summaryrefslogtreecommitdiff
path: root/src/lexer.h
diff options
context:
space:
mode:
authorDavid Moc <personal@cdatgoose.org>2026-03-08 15:02:30 +0100
committerDavid Moc <personal@cdatgoose.org>2026-03-08 15:02:30 +0100
commitad035ac5d942c6448a6a0464b995c2868a8378db (patch)
tree9c2c779d544a45b4234a6a3c311aa7b612ea975e /src/lexer.h
parent0385817bb1301a778bb33f8405a435293b9f8905 (diff)
Gosh.\nFixed bugs in offsetting. Added syscalls. Cleaned up the previous commenting (used to pass the project thu claude to add comments but LLMs are dumb). Removed the LLM made test runner cuz fuck AI.HEADmaster
Diffstat (limited to 'src/lexer.h')
-rw-r--r--src/lexer.h117
1 files changed, 104 insertions, 13 deletions
diff --git a/src/lexer.h b/src/lexer.h
index ca2b790..3d36bf4 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -1,4 +1,3 @@
-
#ifndef INCLUDE_lexerlexer
#define INCLUDE_lexerlexer
@@ -15,7 +14,7 @@ typedef struct {
int col;
} _LX;
-/* Error reporting with line/column info */
+
static void perror_at(_LX *lx, const char *msg) {
fprintf(stderr, "[LEXER] Error at line %d, column %d: %s\n", lx->line, lx->col, msg);
exit(1);
@@ -56,7 +55,32 @@ sit lxnext(_LX *lx) {
lx->line++;
lx->col = 0;
lxget(lx);
- return lxnext(lx); // recurse to get next token
+ return lxnext(lx);
+ }
+
+ // Comments: // and /* */
+ if (c == '/') {
+ // peek one ahead
+ int saved = lx->pos;
+ lxget(lx);
+ char c2 = lxpeek(lx);
+ if (c2 == '/') {
+ while (lxpeek(lx) != '\n' && lxpeek(lx) != 0) lxget(lx);
+ return lxnext(lx);
+ } else if (c2 == '*') {
+ lxget(lx); // consume '*'
+ for (;;) {
+ char ch = lxpeek(lx);
+ if (ch == 0) break; // unterminated, let it go
+ lxget(lx);
+ if (ch == '\n') { lx->line++; lx->col = 0; }
+ if (ch == '*' && lxpeek(lx) == '/') { lxget(lx); break; }
+ }
+ return lxnext(lx);
+ } else {
+ // not a comment — put position back, fall through to normal '/' handling
+ lx->pos = saved;
+ }
}
if (isalpha(c) || c == '_') {
@@ -110,28 +134,85 @@ sit lxnext(_LX *lx) {
if (lxpeek(lx) == '|') { lxget(lx); lx->col++; return (_T){TK_OR,0,lx_strdup_checked(lx,"||")}; }
}
if (c == '"') {
- // String literal
- int start = lx->pos; // pos is already after the opening quote
+ // String literal — decode escape sequences into a fresh buffer
+ int cap = 64, dlen = 0;
+ char *decoded = (char*)malloc(cap);
+ if (!decoded) perror_at(lx, "out of memory");
while (lxpeek(lx) != '"' && lxpeek(lx) != 0) {
+ char ch;
if (lxpeek(lx) == '\\') {
lxget(lx); // consume backslash
- if (lxpeek(lx) != 0) lxget(lx); // consume escaped char
+ char esc = (char)lxget(lx);
+ switch (esc) {
+ case 'n': ch = '\n'; break;
+ case 't': ch = '\t'; break;
+ case 'r': ch = '\r'; break;
+ case '0': ch = '\0'; break;
+ case '\\': ch = '\\'; break;
+ case '"': ch = '"'; break;
+ case '\'': ch = '\''; break;
+ default: ch = esc; break;
+ }
} else {
- lxget(lx);
+ ch = (char)lxget(lx);
}
+ if (dlen + 2 > cap) {
+ cap *= 2;
+ char *tmp = (char*)realloc(decoded, cap);
+ if (!tmp) { free(decoded); perror_at(lx, "out of memory"); }
+ decoded = tmp;
+ }
+ decoded[dlen++] = ch;
}
+ decoded[dlen] = '\0';
if (lxpeek(lx) == '"') {
- int len = lx->pos - start; // length of content
lxget(lx); // consume closing quote
- char *text = strndup(lx->buf + start, len); // start at content
- if (!text) {
- perror_at(lx, "out of memory");
- }
- return (_T){TK_STRING, 0, text};
+ return (_T){TK_STRING, 0, decoded};
} else {
+ free(decoded);
perror_at(lx, "unterminated string literal");
}
}
+ if (c == '\'') {
+ /* Character literal: decode a single char or escape sequence and emit
+ * TK_CHARLIT with the integer value in .val so the parser never needs
+ * to reason about escape sequences. */
+ int char_val = 0;
+ if (lxpeek(lx) == '\\') {
+ lxget(lx); /* consume backslash */
+ char esc = lxget(lx);
+ switch (esc) {
+ case 'n': char_val = '\n'; break;
+ case 't': char_val = '\t'; break;
+ case 'r': char_val = '\r'; break;
+ case '0': char_val = '\0'; break;
+ case '\\': char_val = '\\'; break;
+ case '\'': char_val = '\''; break;
+ case '"': char_val = '"'; break;
+ case 'a': char_val = '\a'; break;
+ case 'b': char_val = '\b'; break;
+ case 'f': char_val = '\f'; break;
+ case 'v': char_val = '\v'; break;
+ case 'x': {
+ int h = 0;
+ while (isxdigit(lxpeek(lx))) {
+ char hc = lxget(lx);
+ h = h * 16 + (isdigit(hc) ? hc - '0' : tolower(hc) - 'a' + 10);
+ }
+ char_val = h;
+ break;
+ }
+ default: char_val = (unsigned char)esc; break;
+ }
+ } else if (lxpeek(lx) != '\'') {
+ char_val = (unsigned char)lxget(lx);
+ }
+ if (lxpeek(lx) != '\'') {
+ perror_at(lx, "unterminated or multi-character char literal");
+ }
+ lxget(lx); /* consume closing ' */
+ return (_T){TK_CHARLIT, char_val, NULL};
+ }
switch (c) {
case '(':
return (_T){TK_LPAREN, 0, lx_strdup_checked(lx,"(")};
@@ -144,12 +225,18 @@ sit lxnext(_LX *lx) {
case ';':
return (_T){TK_SEMI, 0, lx_strdup_checked(lx,";")};
case '+':
+ if (lxpeek(lx) == '+') { lxget(lx); lx->col++; return (_T){TK_INC, 0, lx_strdup_checked(lx,"++")}; }
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_PLUS_EQ,0, lx_strdup_checked(lx,"+=")}; }
return (_T){TK_PLUS, 0, lx_strdup_checked(lx,"+")};
case '-':
+ if (lxpeek(lx) == '-') { lxget(lx); lx->col++; return (_T){TK_DEC, 0, lx_strdup_checked(lx,"--")}; }
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_MINUS_EQ,0, lx_strdup_checked(lx,"-=")}; }
return (_T){TK_MINUS, 0, lx_strdup_checked(lx,"-")};
case '*':
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_STAR_EQ, 0, lx_strdup_checked(lx,"*=")}; }
return (_T){TK_STAR, 0, lx_strdup_checked(lx,"*")};
case '/':
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_SLASH_EQ,0, lx_strdup_checked(lx,"/=")}; }
return (_T){TK_SLASH, 0, lx_strdup_checked(lx,"/")};
case '&':
return (_T){TK_AMP, 0, lx_strdup_checked(lx,"&")};
@@ -169,6 +256,10 @@ sit lxnext(_LX *lx) {
return (_T){TK_LBRACKET, 0, lx_strdup_checked(lx,"[")};
case ']':
return (_T){TK_RBRACKET, 0, lx_strdup_checked(lx,"]")};
+ case '?':
+ return (_T){TK_QUESTION, 0, lx_strdup_checked(lx,"?")};
+ case ':':
+ return (_T){TK_COLON, 0, lx_strdup_checked(lx,":")};
default:
return (_T){TK_INVALID, 0, NULL};
}