summaryrefslogtreecommitdiff
path: root/src/lexer.h
diff options
context:
space:
mode:
authorDavid Moc <personal@cdatgoose.org>2026-03-05 23:38:49 +0100
committerDavid Moc <personal@cdatgoose.org>2026-03-05 23:38:49 +0100
commit0385817bb1301a778bb33f8405a435293b9f8905 (patch)
tree53f4b6f13e393bd368c37ba4363826b46940dfd3 /src/lexer.h
parent262abf9b552a168ef3ae91f91af97683f16420a7 (diff)
Pushing to repo for safety.
Diffstat (limited to 'src/lexer.h')
-rw-r--r--src/lexer.h182
1 files changed, 182 insertions, 0 deletions
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..ca2b790
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,182 @@
+
+#ifndef INCLUDE_lexerlexer
+#define INCLUDE_lexerlexer
+
+#include "token.h"
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+ const char *buf;
+ int pos;
+ int line;
+ int col;
+} _LX;
+
+/* Error reporting with line/column info */
+static void perror_at(_LX *lx, const char *msg) {
+ fprintf(stderr, "[LEXER] Error at line %d, column %d: %s\n", lx->line, lx->col, msg);
+ exit(1);
+}
+
+#define sic static inline char
+#define sii static inline int
+#define siv static inline void
+#define sit static inline _T
+
+sic lxpeek(_LX *lx) { return lx->buf[lx->pos]; }
+
+sic lxget(_LX *lx) { return lx->buf[lx->pos++]; }
+
+static inline char *lx_strdup_checked(_LX *lx, const char *s) {
+ char *p = strdup(s);
+ if (!p) {
+ perror_at(lx, "out of memory");
+ }
+ return p;
+}
+
+siv lxskipws(_LX *lx) {
+ while (lxpeek(lx) == ' ' || lxpeek(lx) == '\t') {
+ if (lxpeek(lx) == '\t') lx->col += 8; else lx->col++;
+ lxget(lx);
+ }
+}
+
+sit lxnext(_LX *lx) {
+ lxskipws(lx);
+ char c = lxpeek(lx);
+ if (c == 0)
+ return (_T){TK_EOF, 0, NULL};
+
+ // Track newlines
+ if (c == '\n') {
+ lx->line++;
+ lx->col = 0;
+ lxget(lx);
+ return lxnext(lx); // recurse to get next token
+ }
+
+ if (isalpha(c) || c == '_') {
+ int start = lx->pos;
+ while (isalnum(lxpeek(lx)) || lxpeek(lx) == '_')
+ lxget(lx);
+ int len = lx->pos - start;
+ char *text = strndup(lx->buf + start, len);
+ if (!text) {
+ perror_at(lx, "out of memory");
+ }
+ return (_T){checkkw(text), 0, text};
+ }
+ if (isdigit(c)) {
+ int start = lx->pos;
+ while (isdigit(lxpeek(lx)))
+ lxget(lx);
+ int len = lx->pos - start;
+ char *text = strndup(lx->buf + start, len);
+ if (!text) {
+ perror_at(lx, "out of memory");
+ }
+ return (_T){TK_NUMBER, atoi(text), text};
+ }
+
+ lxget(lx);
+ lx->col++;
+ // handle multi-char tokens
+ if (c == '=') {
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_EQ,0,lx_strdup_checked(lx,"==")}; }
+ return (_T){TK_ASSIGN, 0, lx_strdup_checked(lx,"=")};
+ }
+ if (c == '!') {
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_NE,0,lx_strdup_checked(lx,"!=")}; }
+ return (_T){TK_BANG,0,lx_strdup_checked(lx,"!")};
+ }
+ if (c == '<') {
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_LE,0,lx_strdup_checked(lx,"<=")}; }
+ if (lxpeek(lx) == '<') { lxget(lx); lx->col++; return (_T){TK_SHL,0,lx_strdup_checked(lx,"<<")}; }
+ return (_T){TK_LT,0,lx_strdup_checked(lx,"<")};
+ }
+ if (c == '>') {
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_GE,0,lx_strdup_checked(lx,">=")}; }
+ if (lxpeek(lx) == '>') { lxget(lx); lx->col++; return (_T){TK_SHR,0,lx_strdup_checked(lx,">>")}; }
+ return (_T){TK_GT,0,lx_strdup_checked(lx,">")};
+ }
+ if (c == '&') {
+ if (lxpeek(lx) == '&') { lxget(lx); lx->col++; return (_T){TK_AND,0,lx_strdup_checked(lx,"&&")}; }
+ }
+ if (c == '|') {
+ if (lxpeek(lx) == '|') { lxget(lx); lx->col++; return (_T){TK_OR,0,lx_strdup_checked(lx,"||")}; }
+ }
+ if (c == '"') {
+ // String literal
+ int start = lx->pos; // pos is already after the opening quote
+ while (lxpeek(lx) != '"' && lxpeek(lx) != 0) {
+ if (lxpeek(lx) == '\\') {
+ lxget(lx); // consume backslash
+ if (lxpeek(lx) != 0) lxget(lx); // consume escaped char
+ } else {
+ lxget(lx);
+ }
+ }
+ if (lxpeek(lx) == '"') {
+ int len = lx->pos - start; // length of content
+ lxget(lx); // consume closing quote
+ char *text = strndup(lx->buf + start, len); // start at content
+ if (!text) {
+ perror_at(lx, "out of memory");
+ }
+ return (_T){TK_STRING, 0, text};
+ } else {
+ perror_at(lx, "unterminated string literal");
+ }
+ }
+ switch (c) {
+ case '(':
+ return (_T){TK_LPAREN, 0, lx_strdup_checked(lx,"(")};
+ case ')':
+ return (_T){TK_RPAREN, 0, lx_strdup_checked(lx,")")};
+ case '{':
+ return (_T){TK_LBRACE, 0, lx_strdup_checked(lx,"{")};
+ case '}':
+ return (_T){TK_RBRACE, 0, lx_strdup_checked(lx,"}")};
+ case ';':
+ return (_T){TK_SEMI, 0, lx_strdup_checked(lx,";")};
+ case '+':
+ return (_T){TK_PLUS, 0, lx_strdup_checked(lx,"+")};
+ case '-':
+ return (_T){TK_MINUS, 0, lx_strdup_checked(lx,"-")};
+ case '*':
+ return (_T){TK_STAR, 0, lx_strdup_checked(lx,"*")};
+ case '/':
+ return (_T){TK_SLASH, 0, lx_strdup_checked(lx,"/")};
+ case '&':
+ return (_T){TK_AMP, 0, lx_strdup_checked(lx,"&")};
+ case '|':
+ return (_T){TK_BAR, 0, lx_strdup_checked(lx,"|")};
+ case '^':
+ return (_T){TK_CARET, 0, lx_strdup_checked(lx,"^")};
+ case '%':
+ return (_T){TK_PERCENT, 0, lx_strdup_checked(lx,"%")};
+ case ',':
+ return (_T){TK_COMMA, 0, lx_strdup_checked(lx,",")};
+ case '\'':
+ return (_T){TK_SQUOTE, 0, lx_strdup_checked(lx,"'")};
+ case '"':
+ return (_T){TK_DQUOTE, 0, lx_strdup_checked(lx,"\"")};
+ case '[':
+ return (_T){TK_LBRACKET, 0, lx_strdup_checked(lx,"[")};
+ case ']':
+ return (_T){TK_RBRACKET, 0, lx_strdup_checked(lx,"]")};
+ default:
+ return (_T){TK_INVALID, 0, NULL};
+ }
+}
+
+#undef sic
+#undef sii
+#undef siv
+#undef sit
+
+#endif