#ifndef INCLUDE_lexerlexer #define INCLUDE_lexerlexer #include "token.h" #include #include #include #include typedef struct { const char *buf; int pos; int line; int col; } _LX; /* Error reporting with line/column info */ static void perror_at(_LX *lx, const char *msg) { fprintf(stderr, "[LEXER] Error at line %d, column %d: %s\n", lx->line, lx->col, msg); exit(1); } #define sic static inline char #define sii static inline int #define siv static inline void #define sit static inline _T sic lxpeek(_LX *lx) { return lx->buf[lx->pos]; } sic lxget(_LX *lx) { return lx->buf[lx->pos++]; } static inline char *lx_strdup_checked(_LX *lx, const char *s) { char *p = strdup(s); if (!p) { perror_at(lx, "out of memory"); } return p; } siv lxskipws(_LX *lx) { while (lxpeek(lx) == ' ' || lxpeek(lx) == '\t') { if (lxpeek(lx) == '\t') lx->col += 8; else lx->col++; lxget(lx); } } sit lxnext(_LX *lx) { lxskipws(lx); char c = lxpeek(lx); if (c == 0) return (_T){TK_EOF, 0, NULL}; // Track newlines if (c == '\n') { lx->line++; lx->col = 0; lxget(lx); return lxnext(lx); // recurse to get next token } if (isalpha(c) || c == '_') { int start = lx->pos; while (isalnum(lxpeek(lx)) || lxpeek(lx) == '_') lxget(lx); int len = lx->pos - start; char *text = strndup(lx->buf + start, len); if (!text) { perror_at(lx, "out of memory"); } return (_T){checkkw(text), 0, text}; } if (isdigit(c)) { int start = lx->pos; while (isdigit(lxpeek(lx))) lxget(lx); int len = lx->pos - start; char *text = strndup(lx->buf + start, len); if (!text) { perror_at(lx, "out of memory"); } return (_T){TK_NUMBER, atoi(text), text}; } lxget(lx); lx->col++; // handle multi-char tokens if (c == '=') { if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_EQ,0,lx_strdup_checked(lx,"==")}; } return (_T){TK_ASSIGN, 0, lx_strdup_checked(lx,"=")}; } if (c == '!') { if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_NE,0,lx_strdup_checked(lx,"!=")}; } return (_T){TK_BANG,0,lx_strdup_checked(lx,"!")}; } if (c == '<') { if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_LE,0,lx_strdup_checked(lx,"<=")}; } if (lxpeek(lx) == '<') { lxget(lx); lx->col++; return (_T){TK_SHL,0,lx_strdup_checked(lx,"<<")}; } return (_T){TK_LT,0,lx_strdup_checked(lx,"<")}; } if (c == '>') { if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_GE,0,lx_strdup_checked(lx,">=")}; } if (lxpeek(lx) == '>') { lxget(lx); lx->col++; return (_T){TK_SHR,0,lx_strdup_checked(lx,">>")}; } return (_T){TK_GT,0,lx_strdup_checked(lx,">")}; } if (c == '&') { if (lxpeek(lx) == '&') { lxget(lx); lx->col++; return (_T){TK_AND,0,lx_strdup_checked(lx,"&&")}; } } if (c == '|') { if (lxpeek(lx) == '|') { lxget(lx); lx->col++; return (_T){TK_OR,0,lx_strdup_checked(lx,"||")}; } } if (c == '"') { // String literal int start = lx->pos; // pos is already after the opening quote while (lxpeek(lx) != '"' && lxpeek(lx) != 0) { if (lxpeek(lx) == '\\') { lxget(lx); // consume backslash if (lxpeek(lx) != 0) lxget(lx); // consume escaped char } else { lxget(lx); } } if (lxpeek(lx) == '"') { int len = lx->pos - start; // length of content lxget(lx); // consume closing quote char *text = strndup(lx->buf + start, len); // start at content if (!text) { perror_at(lx, "out of memory"); } return (_T){TK_STRING, 0, text}; } else { perror_at(lx, "unterminated string literal"); } } switch (c) { case '(': return (_T){TK_LPAREN, 0, lx_strdup_checked(lx,"(")}; case ')': return (_T){TK_RPAREN, 0, lx_strdup_checked(lx,")")}; case '{': return (_T){TK_LBRACE, 0, lx_strdup_checked(lx,"{")}; case '}': return (_T){TK_RBRACE, 0, lx_strdup_checked(lx,"}")}; case ';': return (_T){TK_SEMI, 0, lx_strdup_checked(lx,";")}; case '+': return (_T){TK_PLUS, 0, lx_strdup_checked(lx,"+")}; case '-': return (_T){TK_MINUS, 0, lx_strdup_checked(lx,"-")}; case '*': return (_T){TK_STAR, 0, lx_strdup_checked(lx,"*")}; case '/': return (_T){TK_SLASH, 0, lx_strdup_checked(lx,"/")}; case '&': return (_T){TK_AMP, 0, lx_strdup_checked(lx,"&")}; case '|': return (_T){TK_BAR, 0, lx_strdup_checked(lx,"|")}; case '^': return (_T){TK_CARET, 0, lx_strdup_checked(lx,"^")}; case '%': return (_T){TK_PERCENT, 0, lx_strdup_checked(lx,"%")}; case ',': return (_T){TK_COMMA, 0, lx_strdup_checked(lx,",")}; case '\'': return (_T){TK_SQUOTE, 0, lx_strdup_checked(lx,"'")}; case '"': return (_T){TK_DQUOTE, 0, lx_strdup_checked(lx,"\"")}; case '[': return (_T){TK_LBRACKET, 0, lx_strdup_checked(lx,"[")}; case ']': return (_T){TK_RBRACKET, 0, lx_strdup_checked(lx,"]")}; default: return (_T){TK_INVALID, 0, NULL}; } } #undef sic #undef sii #undef siv #undef sit #endif