summaryrefslogtreecommitdiff
path: root/src/parser.h
diff options
context:
space:
mode:
authorDavid Moc <personal@cdatgoose.org>2026-03-05 23:38:49 +0100
committerDavid Moc <personal@cdatgoose.org>2026-03-05 23:38:49 +0100
commit0385817bb1301a778bb33f8405a435293b9f8905 (patch)
tree53f4b6f13e393bd368c37ba4363826b46940dfd3 /src/parser.h
parent262abf9b552a168ef3ae91f91af97683f16420a7 (diff)
Pushing to repo for safety.
Diffstat (limited to 'src/parser.h')
-rw-r--r--src/parser.h595
1 files changed, 595 insertions, 0 deletions
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000..154e0ba
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,595 @@
+/* Parser: recursive-descent front end that builds an AST `_FN` list
+ * from the token stream, handling expressions, statements, and functions. */
+#ifndef INCLUDE_parser
+#define INCLUDE_parser
+
+#include "ast.h"
+#include "lexer.h"
+#include "token.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Parser state */
+typedef struct {
+ _LX *lx;
+ _T cur;
+} _P;
+
+/* Error reporting with line/column info */
+static void perror_expected(_LX *lx, const char *expected, const char *got) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: expected %s, got %s\n",
+ lx->line, lx->col, expected, got);
+ exit(1);
+}
+
+static void perror_unexpected(_LX *lx, const char *context, const char *got) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: unexpected %s in %s\n",
+ lx->line, lx->col, got, context);
+ exit(1);
+}
+
+static void pnext(_P *p) {
+ if (p->cur.lxem) {
+ free(p->cur.lxem);
+ p->cur.lxem = NULL;
+ }
+ p->cur = lxnext(p->lx);
+}
+
+/* Expect a particular token kind; on mismatch print helpful error and exit */
+static void pexpect(_P *p, _TK tk) {
+ if (p->cur.kind != tk) {
+ const char *got = (p->cur.kind < TK__COUNT) ? _TN[p->cur.kind] : "<?>";
+ perror_expected(p->lx, _TN[tk], got);
+ }
+}
+
+static _STN *pstmt(_P *p); // forward
+static _STN *pblock(_P *p) {
+ pexpect(p, TK_LBRACE);
+ pnext(p); /* consume '{' */
+
+ _STN *head = NULL;
+ _STN **cur = &head;
+
+ while (p->cur.kind != TK_RBRACE && p->cur.kind != TK_EOF) {
+ _STN *stmt = pstmt(p);
+ *cur = stmt;
+ cur = &stmt->n;
+ }
+
+ pexpect(p, TK_RBRACE);
+ pnext(p); /* consume '}' */
+
+ return st_block(head);
+}
+static _EX *pexpr(_P *p); // forward
+static _EX *pterm(_P *p); // forward
+static _EX *punary(_P *p); // forward
+static _EX *pfact(_P *p); // fwd
+
+/* Precedence climbing layers */
+static _EX *pmul(_P *p);
+static _EX *padd(_P *p);
+static _EX *pshift(_P *p);
+static _EX *pbitand(_P *p);
+static _EX *pbitxor(_P *p);
+static _EX *pbitor(_P *p);
+static _EX *prel(_P *p);
+static _EX *peq(_P *p);
+static _EX *plogand(_P *p);
+static _EX *plogor(_P *p);
+
+static _STN *passign_or_expr_stmt(_P *p) {
+ _EX *lhs = pexpr(p);
+ if (p->cur.kind == TK_ASSIGN) {
+ if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target in expression context\n", p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ return st_assign(lhs, rhs);
+ }
+ return st_expr(lhs);
+}
+
+static _EX *ppostfix_from_expr(_P *p, _EX *e) {
+ // Handle array indexing: expr[expr]
+ while (p->cur.kind == TK_LBRACKET) {
+ pnext(p); // consume '['
+ _EX *index = pexpr(p);
+ pexpect(p, TK_RBRACKET);
+ pnext(p); // consume ']'
+ e = ex_index(e, index);
+ }
+
+ return e;
+}
+
+static _EX *ppostfix(_P *p) {
+ _EX *e = pfact(p);
+ return ppostfix_from_expr(p, e);
+}
+
+/* ---- FACTOR ---- */
+static _EX *pfact(_P *p) {
+ if (p->cur.kind == TK_NUMBER) {
+ _EX *n = ex_number(p->cur.val);
+ pnext(p);
+ return n;
+
+ } else if (p->cur.kind == TK_IDENT) {
+ char *name = strdup(p->cur.lxem);
+ if (!name) {
+ fprintf(stderr, "[PARSER] Error: strdup failed for identifier\n");
+ exit(1);
+ }
+ pnext(p); /* consume identifier */
+
+ if (p->cur.kind == TK_LPAREN) {
+ /* function call */
+ pnext(p); /* consume '(' */
+
+ _EX **args = NULL;
+ int argc = 0;
+
+ if (p->cur.kind != TK_RPAREN) {
+ do {
+ args = realloc(args, sizeof(_EX *) * (argc + 1));
+ if (!args) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function arguments\n", p->lx->line, p->lx->col);
+ exit(1);
+ }
+ args[argc++] = pexpr(p);
+
+ if (p->cur.kind == TK_COMMA) {
+ pnext(p); /* skip comma */
+ } else {
+ break;
+ }
+ } while (1);
+ }
+
+ pexpect(p, TK_RPAREN);
+ pnext(p); /* consume ')' */
+
+ return ex_call(name, args, argc);
+ }
+
+ _EX *var_expr = ex_var(name);
+ return ppostfix_from_expr(p, var_expr);
+
+ } else if (p->cur.kind == TK_LPAREN) {
+ pnext(p); /* consume '(' */
+ _EX *n = pexpr(p);
+ pexpect(p, TK_RPAREN);
+ pnext(p); /* consume ')' */
+ return n;
+
+ } else if (p->cur.kind == TK_STRING) {
+ char *str = strdup(p->cur.lxem);
+ if (!str) {
+ fprintf(stderr, "[PARSER] Error: strdup failed for string literal\n");
+ exit(1);
+ }
+ pnext(p);
+ return ex_string(str);
+
+ } else {
+ perror_unexpected(p->lx, "factor", _TN[p->cur.kind]);
+ }
+}
+
+/* ---- TERM ---- */
+static _EX *pmul(_P *p) {
+ _EX *n = punary(p);
+ while (p->cur.kind == TK_STAR || p->cur.kind == TK_SLASH || p->cur.kind == TK_PERCENT) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = punary(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *padd(_P *p) {
+ _EX *n = pmul(p);
+ while (p->cur.kind == TK_PLUS || p->cur.kind == TK_MINUS) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = pmul(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *pshift(_P *p) {
+ _EX *n = padd(p);
+ while (p->cur.kind == TK_SHL || p->cur.kind == TK_SHR) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = padd(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *pbitand(_P *p) {
+ _EX *n = peq(p);
+ while (p->cur.kind == TK_AMP) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = peq(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *pbitxor(_P *p) {
+ _EX *n = pbitand(p);
+ while (p->cur.kind == TK_CARET) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = pbitand(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *pbitor(_P *p) {
+ _EX *n = pbitxor(p);
+ while (p->cur.kind == TK_BAR) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = pbitxor(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *prel(_P *p) {
+ _EX *n = pshift(p);
+ while (p->cur.kind == TK_LT || p->cur.kind == TK_LE || p->cur.kind == TK_GT || p->cur.kind == TK_GE) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = pshift(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *peq(_P *p) {
+ _EX *n = prel(p);
+ while (p->cur.kind == TK_EQ || p->cur.kind == TK_NE) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *r = prel(p);
+ n = ex_binop(n, op, r);
+ }
+ return n;
+}
+
+static _EX *plogand(_P *p) {
+ _EX *n = pbitor(p);
+ while (p->cur.kind == TK_AND) {
+ pnext(p);
+ _EX *r = pbitor(p);
+ // keep as a binary op node TK_AND; codegen will short-circuit
+ n = ex_binop(n, TK_AND, r);
+ }
+ return n;
+}
+
+static _EX *plogor(_P *p) {
+ _EX *n = plogand(p);
+ while (p->cur.kind == TK_OR) {
+ pnext(p);
+ _EX *r = plogand(p);
+ n = ex_binop(n, TK_OR, r);
+ }
+ return n;
+}
+
+/* ---- EXPR ---- */
+static _EX *pexpr(_P *p) { return plogor(p); }
+
+/* ---- UNARY ---- */
+static _EX *punary(_P *p) {
+ if (p->cur.kind == TK_AMP) { // &expr
+ pnext(p);
+ _EX *sub = punary(p);
+ NEW_EX(EX_ADDR);
+ e->addr.expr = sub;
+ return e;
+ }
+ if (p->cur.kind == TK_STAR) { // *expr
+ pnext(p);
+ _EX *sub = punary(p);
+ NEW_EX(EX_DEREF);
+ e->deref.expr = sub;
+ return e;
+ }
+ if (p->cur.kind == TK_BANG) { // !expr -> (expr == 0)
+ pnext(p);
+ _EX *sub = punary(p);
+ _EX *zero = ex_number(0);
+ return ex_binop(sub, TK_EQ, zero);
+ }
+ return ppostfix(p);
+}
+char parse_char_literal(_P *p) {
+ /* assume current token is TK_SQUOTE */
+ pnext(p); // consume opening '
+ if (p->cur.kind != TK_IDENT) {
+ perror_expected(p->lx, "character literal", _TN[p->cur.kind]);
+ }
+ char c = p->cur.lxem[0];
+ pnext(p); // consume char
+ pexpect(p, TK_SQUOTE);
+ pnext(p); // consume closing '
+ return c;
+}
+_EX *ex_charlit(char c) {
+ _EX *e = malloc(sizeof(_EX));
+ if (!e) {
+ fprintf(stderr, "[PARSER] Error: malloc failed in ex_charlit\n");
+ exit(1);
+ }
+ e->kind = EX_NUMBER;
+ e->value = c;
+ return e;
+}
+
+static _STN *pstmt(_P *p) {
+ switch (p->cur.kind) {
+ case TK_IF: {
+ pnext(p); // consume if
+ pexpect(p, TK_LPAREN); pnext(p);
+ _EX *cond = pexpr(p);
+ pexpect(p, TK_RPAREN); pnext(p);
+ _STN *thenb = pstmt(p);
+ _STN *elseb = NULL;
+ if (p->cur.kind == TK_ELSE) { pnext(p); elseb = pstmt(p); }
+ return st_if(cond, thenb, elseb);
+ }
+ case TK_WHILE: {
+ pnext(p);
+ pexpect(p, TK_LPAREN); pnext(p);
+ _EX *cond = pexpr(p);
+ pexpect(p, TK_RPAREN); pnext(p);
+ _STN *body = pstmt(p);
+ return st_while(cond, body);
+ }
+ case TK_FOR: {
+ pnext(p);
+ pexpect(p, TK_LPAREN); pnext(p);
+ _STN *init = NULL; _EX *cond = NULL; _STN *step = NULL;
+ if (p->cur.kind != TK_SEMI) {
+ init = pstmt(p);
+ } else { pnext(p); }
+ if (p->cur.kind != TK_SEMI) {
+ cond = pexpr(p);
+ }
+ pexpect(p, TK_SEMI); pnext(p);
+ if (p->cur.kind != TK_RPAREN) {
+ step = passign_or_expr_stmt(p);
+ }
+ pexpect(p, TK_RPAREN); pnext(p);
+ _STN *body = pstmt(p);
+ return st_for(init, cond, step, body);
+ }
+ case TK_INT:
+ case TK_CHAR: {
+ _TY vtype = { .base = (p->cur.kind == TK_INT) ? TY_INT : TY_CHAR, .ptr_level = 0, .array_size = -1 };
+ pnext(p); /* consume type */
+ while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); }
+
+ if (p->cur.kind != TK_IDENT) {
+ perror_expected(p->lx, "variable name after type", _TN[p->cur.kind]);
+ }
+ char *name = strdup(p->cur.lxem);
+ if (!name) {
+ fprintf(stderr, "[PARSER] Error: strdup failed for variable name\n");
+ exit(1);
+ }
+ pnext(p);
+
+ // Parse array size: [N] or []
+ if (p->cur.kind == TK_LBRACKET) {
+ pnext(p); // consume '['
+ if (p->cur.kind == TK_NUMBER) {
+ vtype.array_size = p->cur.val;
+ pnext(p); // consume number
+ } else {
+ vtype.array_size = 0; // unknown size []
+ }
+ pexpect(p, TK_RBRACKET);
+ pnext(p); // consume ']'
+ }
+
+ _EX *init = NULL;
+ if (p->cur.kind == TK_ASSIGN) {
+ pnext(p);
+
+ if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_SQUOTE) {
+ /* parse char literal */
+ char c = parse_char_literal(p); // implement this to consume quotes and return char
+ init = ex_charlit(c);
+ } else {
+ init = pexpr(p);
+ }
+ }
+
+ pexpect(p, TK_SEMI);
+ pnext(p);
+
+ _STN *decl = st_var_decl(name, init);
+ decl->var_decl.type = vtype;
+ return decl;
+ }
+
+ case TK_IDENT: {
+ _EX *lhs_or_call = pfact(p); /* starts from ident path */
+ if (p->cur.kind == TK_ASSIGN) {
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ pexpect(p, TK_SEMI);
+ pnext(p);
+ return st_assign(lhs_or_call, rhs);
+ }
+ pexpect(p, TK_SEMI);
+ pnext(p);
+ return st_expr(lhs_or_call);
+ }
+ case TK_RETURN: {
+ pnext(p); /* consume 'return' */
+ _EX *expr = pexpr(p);
+ pexpect(p, TK_SEMI);
+ pnext(p); /* consume ';' */
+
+ return st_return(expr);
+ }
+ case TK_LBRACE: {
+ /* block statement */
+ return pblock(p); /* pblock will consume the braces */
+ }
+ default: {
+ /* General expression or assignment starting with unary, paren, etc. */
+ _EX *lhs = pexpr(p);
+ if (p->cur.kind == TK_ASSIGN) {
+ /* only allow assignment to var or *expr */
+ if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target - only variables, dereferenced expressions, and array indexing allowed\n", p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ pexpect(p, TK_SEMI);
+ pnext(p);
+ return st_assign(lhs, rhs);
+ }
+ pexpect(p, TK_SEMI);
+ pnext(p);
+ return st_expr(lhs);
+ }
+ }
+}
+
+static _FN *pfunc(_P *p) {
+ pexpect(p, TK_INT);
+ pnext(p); /* consume 'int' */
+
+ if (p->cur.kind != TK_IDENT) {
+ perror_expected(p->lx, "function name after 'int'", _TN[p->cur.kind]);
+ }
+ char *name = NULL;
+ if (p->cur.lxem)
+ name = strdup(p->cur.lxem);
+ if (p->cur.lxem && !name) {
+ fprintf(stderr, "[PARSER] Error: strdup failed for function name\n");
+ exit(1);
+ }
+ pnext(p); /* consume function name */
+
+ /* expect '(' then consume it */
+ pexpect(p, TK_LPAREN);
+ pnext(p); /* consume '(' */
+
+ /* parse optional parameter list */
+ char **params = NULL;
+ _TY *params_types = NULL;
+ int pac = 0;
+
+ if (p->cur.kind != TK_RPAREN) {
+ /* at least one parameter expected */
+ while (1) {
+ /* first token should be a type, e.g. int */
+ _TY vtype;
+ if (p->cur.kind == TK_INT)
+ vtype = (_TY){.base=TY_INT,.ptr_level=0,.array_size=-1};
+ else if (p->cur.kind == TK_CHAR)
+ vtype = (_TY){.base=TY_CHAR,.ptr_level=0,.array_size=-1};
+ else {
+ perror_expected(p->lx, "type in parameter list", _TN[p->cur.kind]);
+ }
+ pnext(p); /* consume type */
+ while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); }
+
+ /* next should be an identifier (variable name) */
+ if (p->cur.kind != TK_IDENT) {
+ perror_expected(p->lx, "identifier after type in parameter list", _TN[p->cur.kind]);
+ }
+
+ // Parse array size for parameters after the identifier
+ char *param_name = strdup(p->cur.lxem);
+ if (!param_name) {
+ fprintf(stderr, "[PARSER] Error: strdup failed for parameter name\n");
+ exit(1);
+ }
+ pnext(p); // consume identifier
+ if (p->cur.kind == TK_LBRACKET) {
+ pnext(p); // consume '['
+ if (p->cur.kind == TK_NUMBER) {
+ vtype.array_size = p->cur.val;
+ pnext(p); // consume number
+ } else {
+ vtype.array_size = 0; // unknown size []
+ }
+ pexpect(p, TK_RBRACKET);
+ pnext(p); // consume ']'
+ }
+
+ char **new_params = (char **)realloc(params, sizeof(char *) * (pac + 1));
+ _TY *new_param_types = (_TY *)realloc(params_types, sizeof(_TY) * (pac + 1));
+ if (!new_params || !new_param_types) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function parameters\n", p->lx->line, p->lx->col);
+ free(new_params);
+ free(new_param_types);
+ exit(1);
+ }
+ params = new_params;
+ params_types = new_param_types;
+ params[pac] = param_name;
+ params_types[pac] = vtype;
+ pac++;
+
+ if (p->cur.kind == TK_COMMA) {
+ pnext(p); /* consume comma and continue */
+ continue;
+ } else {
+ break;
+ }
+ }
+ }
+
+ pexpect(p, TK_RPAREN);
+ pnext(p); /* consume ')' */
+
+ _STN *body = pblock(p); /* pblock consumes the block braces and returns */
+
+ return fn_new(name, params, params_types, pac, body);
+}
+
+static _FN *parse_program(_LX *lx) {
+ _P pstate = {.lx = lx, .cur = lxnext(lx)};
+
+ _FN *head = NULL;
+ _FN **cur = &head;
+
+ while (pstate.cur.kind != TK_EOF) {
+ _FN *f = pfunc(&pstate);
+ *cur = f;
+ cur = &f->n;
+ }
+
+ if (pstate.cur.lxem) {
+ free(pstate.cur.lxem);
+ pstate.cur.lxem = NULL;
+ }
+
+ return head;
+}
+
+#endif /* INCLUDE_parser */