diff options
| author | David Moc <personal@cdatgoose.org> | 2026-03-05 23:38:49 +0100 |
|---|---|---|
| committer | David Moc <personal@cdatgoose.org> | 2026-03-05 23:38:49 +0100 |
| commit | 0385817bb1301a778bb33f8405a435293b9f8905 (patch) | |
| tree | 53f4b6f13e393bd368c37ba4363826b46940dfd3 /src/parser.h | |
| parent | 262abf9b552a168ef3ae91f91af97683f16420a7 (diff) | |
Pushing to repo for safety.
Diffstat (limited to 'src/parser.h')
| -rw-r--r-- | src/parser.h | 595 |
1 files changed, 595 insertions, 0 deletions
diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..154e0ba --- /dev/null +++ b/src/parser.h @@ -0,0 +1,595 @@ +/* Parser: recursive-descent front end that builds an AST `_FN` list + * from the token stream, handling expressions, statements, and functions. */ +#ifndef INCLUDE_parser +#define INCLUDE_parser + +#include "ast.h" +#include "lexer.h" +#include "token.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Parser state */ +typedef struct { + _LX *lx; + _T cur; +} _P; + +/* Error reporting with line/column info */ +static void perror_expected(_LX *lx, const char *expected, const char *got) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: expected %s, got %s\n", + lx->line, lx->col, expected, got); + exit(1); +} + +static void perror_unexpected(_LX *lx, const char *context, const char *got) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: unexpected %s in %s\n", + lx->line, lx->col, got, context); + exit(1); +} + +static void pnext(_P *p) { + if (p->cur.lxem) { + free(p->cur.lxem); + p->cur.lxem = NULL; + } + p->cur = lxnext(p->lx); +} + +/* Expect a particular token kind; on mismatch print helpful error and exit */ +static void pexpect(_P *p, _TK tk) { + if (p->cur.kind != tk) { + const char *got = (p->cur.kind < TK__COUNT) ? _TN[p->cur.kind] : "<?>"; + perror_expected(p->lx, _TN[tk], got); + } +} + +static _STN *pstmt(_P *p); // forward +static _STN *pblock(_P *p) { + pexpect(p, TK_LBRACE); + pnext(p); /* consume '{' */ + + _STN *head = NULL; + _STN **cur = &head; + + while (p->cur.kind != TK_RBRACE && p->cur.kind != TK_EOF) { + _STN *stmt = pstmt(p); + *cur = stmt; + cur = &stmt->n; + } + + pexpect(p, TK_RBRACE); + pnext(p); /* consume '}' */ + + return st_block(head); +} +static _EX *pexpr(_P *p); // forward +static _EX *pterm(_P *p); // forward +static _EX *punary(_P *p); // forward +static _EX *pfact(_P *p); // fwd + +/* Precedence climbing layers */ +static _EX *pmul(_P *p); +static _EX *padd(_P *p); +static _EX *pshift(_P *p); +static _EX *pbitand(_P *p); +static _EX *pbitxor(_P *p); +static _EX *pbitor(_P *p); +static _EX *prel(_P *p); +static _EX *peq(_P *p); +static _EX *plogand(_P *p); +static _EX *plogor(_P *p); + +static _STN *passign_or_expr_stmt(_P *p) { + _EX *lhs = pexpr(p); + if (p->cur.kind == TK_ASSIGN) { + if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target in expression context\n", p->lx->line, p->lx->col); + exit(1); + } + pnext(p); + _EX *rhs = pexpr(p); + return st_assign(lhs, rhs); + } + return st_expr(lhs); +} + +static _EX *ppostfix_from_expr(_P *p, _EX *e) { + // Handle array indexing: expr[expr] + while (p->cur.kind == TK_LBRACKET) { + pnext(p); // consume '[' + _EX *index = pexpr(p); + pexpect(p, TK_RBRACKET); + pnext(p); // consume ']' + e = ex_index(e, index); + } + + return e; +} + +static _EX *ppostfix(_P *p) { + _EX *e = pfact(p); + return ppostfix_from_expr(p, e); +} + +/* ---- FACTOR ---- */ +static _EX *pfact(_P *p) { + if (p->cur.kind == TK_NUMBER) { + _EX *n = ex_number(p->cur.val); + pnext(p); + return n; + + } else if (p->cur.kind == TK_IDENT) { + char *name = strdup(p->cur.lxem); + if (!name) { + fprintf(stderr, "[PARSER] Error: strdup failed for identifier\n"); + exit(1); + } + pnext(p); /* consume identifier */ + + if (p->cur.kind == TK_LPAREN) { + /* function call */ + pnext(p); /* consume '(' */ + + _EX **args = NULL; + int argc = 0; + + if (p->cur.kind != TK_RPAREN) { + do { + args = realloc(args, sizeof(_EX *) * (argc + 1)); + if (!args) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function arguments\n", p->lx->line, p->lx->col); + exit(1); + } + args[argc++] = pexpr(p); + + if (p->cur.kind == TK_COMMA) { + pnext(p); /* skip comma */ + } else { + break; + } + } while (1); + } + + pexpect(p, TK_RPAREN); + pnext(p); /* consume ')' */ + + return ex_call(name, args, argc); + } + + _EX *var_expr = ex_var(name); + return ppostfix_from_expr(p, var_expr); + + } else if (p->cur.kind == TK_LPAREN) { + pnext(p); /* consume '(' */ + _EX *n = pexpr(p); + pexpect(p, TK_RPAREN); + pnext(p); /* consume ')' */ + return n; + + } else if (p->cur.kind == TK_STRING) { + char *str = strdup(p->cur.lxem); + if (!str) { + fprintf(stderr, "[PARSER] Error: strdup failed for string literal\n"); + exit(1); + } + pnext(p); + return ex_string(str); + + } else { + perror_unexpected(p->lx, "factor", _TN[p->cur.kind]); + } +} + +/* ---- TERM ---- */ +static _EX *pmul(_P *p) { + _EX *n = punary(p); + while (p->cur.kind == TK_STAR || p->cur.kind == TK_SLASH || p->cur.kind == TK_PERCENT) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = punary(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *padd(_P *p) { + _EX *n = pmul(p); + while (p->cur.kind == TK_PLUS || p->cur.kind == TK_MINUS) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pmul(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pshift(_P *p) { + _EX *n = padd(p); + while (p->cur.kind == TK_SHL || p->cur.kind == TK_SHR) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = padd(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pbitand(_P *p) { + _EX *n = peq(p); + while (p->cur.kind == TK_AMP) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = peq(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pbitxor(_P *p) { + _EX *n = pbitand(p); + while (p->cur.kind == TK_CARET) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pbitand(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pbitor(_P *p) { + _EX *n = pbitxor(p); + while (p->cur.kind == TK_BAR) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pbitxor(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *prel(_P *p) { + _EX *n = pshift(p); + while (p->cur.kind == TK_LT || p->cur.kind == TK_LE || p->cur.kind == TK_GT || p->cur.kind == TK_GE) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pshift(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *peq(_P *p) { + _EX *n = prel(p); + while (p->cur.kind == TK_EQ || p->cur.kind == TK_NE) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = prel(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *plogand(_P *p) { + _EX *n = pbitor(p); + while (p->cur.kind == TK_AND) { + pnext(p); + _EX *r = pbitor(p); + // keep as a binary op node TK_AND; codegen will short-circuit + n = ex_binop(n, TK_AND, r); + } + return n; +} + +static _EX *plogor(_P *p) { + _EX *n = plogand(p); + while (p->cur.kind == TK_OR) { + pnext(p); + _EX *r = plogand(p); + n = ex_binop(n, TK_OR, r); + } + return n; +} + +/* ---- EXPR ---- */ +static _EX *pexpr(_P *p) { return plogor(p); } + +/* ---- UNARY ---- */ +static _EX *punary(_P *p) { + if (p->cur.kind == TK_AMP) { // &expr + pnext(p); + _EX *sub = punary(p); + NEW_EX(EX_ADDR); + e->addr.expr = sub; + return e; + } + if (p->cur.kind == TK_STAR) { // *expr + pnext(p); + _EX *sub = punary(p); + NEW_EX(EX_DEREF); + e->deref.expr = sub; + return e; + } + if (p->cur.kind == TK_BANG) { // !expr -> (expr == 0) + pnext(p); + _EX *sub = punary(p); + _EX *zero = ex_number(0); + return ex_binop(sub, TK_EQ, zero); + } + return ppostfix(p); +} +char parse_char_literal(_P *p) { + /* assume current token is TK_SQUOTE */ + pnext(p); // consume opening ' + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "character literal", _TN[p->cur.kind]); + } + char c = p->cur.lxem[0]; + pnext(p); // consume char + pexpect(p, TK_SQUOTE); + pnext(p); // consume closing ' + return c; +} +_EX *ex_charlit(char c) { + _EX *e = malloc(sizeof(_EX)); + if (!e) { + fprintf(stderr, "[PARSER] Error: malloc failed in ex_charlit\n"); + exit(1); + } + e->kind = EX_NUMBER; + e->value = c; + return e; +} + +static _STN *pstmt(_P *p) { + switch (p->cur.kind) { + case TK_IF: { + pnext(p); // consume if + pexpect(p, TK_LPAREN); pnext(p); + _EX *cond = pexpr(p); + pexpect(p, TK_RPAREN); pnext(p); + _STN *thenb = pstmt(p); + _STN *elseb = NULL; + if (p->cur.kind == TK_ELSE) { pnext(p); elseb = pstmt(p); } + return st_if(cond, thenb, elseb); + } + case TK_WHILE: { + pnext(p); + pexpect(p, TK_LPAREN); pnext(p); + _EX *cond = pexpr(p); + pexpect(p, TK_RPAREN); pnext(p); + _STN *body = pstmt(p); + return st_while(cond, body); + } + case TK_FOR: { + pnext(p); + pexpect(p, TK_LPAREN); pnext(p); + _STN *init = NULL; _EX *cond = NULL; _STN *step = NULL; + if (p->cur.kind != TK_SEMI) { + init = pstmt(p); + } else { pnext(p); } + if (p->cur.kind != TK_SEMI) { + cond = pexpr(p); + } + pexpect(p, TK_SEMI); pnext(p); + if (p->cur.kind != TK_RPAREN) { + step = passign_or_expr_stmt(p); + } + pexpect(p, TK_RPAREN); pnext(p); + _STN *body = pstmt(p); + return st_for(init, cond, step, body); + } + case TK_INT: + case TK_CHAR: { + _TY vtype = { .base = (p->cur.kind == TK_INT) ? TY_INT : TY_CHAR, .ptr_level = 0, .array_size = -1 }; + pnext(p); /* consume type */ + while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); } + + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "variable name after type", _TN[p->cur.kind]); + } + char *name = strdup(p->cur.lxem); + if (!name) { + fprintf(stderr, "[PARSER] Error: strdup failed for variable name\n"); + exit(1); + } + pnext(p); + + // Parse array size: [N] or [] + if (p->cur.kind == TK_LBRACKET) { + pnext(p); // consume '[' + if (p->cur.kind == TK_NUMBER) { + vtype.array_size = p->cur.val; + pnext(p); // consume number + } else { + vtype.array_size = 0; // unknown size [] + } + pexpect(p, TK_RBRACKET); + pnext(p); // consume ']' + } + + _EX *init = NULL; + if (p->cur.kind == TK_ASSIGN) { + pnext(p); + + if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_SQUOTE) { + /* parse char literal */ + char c = parse_char_literal(p); // implement this to consume quotes and return char + init = ex_charlit(c); + } else { + init = pexpr(p); + } + } + + pexpect(p, TK_SEMI); + pnext(p); + + _STN *decl = st_var_decl(name, init); + decl->var_decl.type = vtype; + return decl; + } + + case TK_IDENT: { + _EX *lhs_or_call = pfact(p); /* starts from ident path */ + if (p->cur.kind == TK_ASSIGN) { + pnext(p); + _EX *rhs = pexpr(p); + pexpect(p, TK_SEMI); + pnext(p); + return st_assign(lhs_or_call, rhs); + } + pexpect(p, TK_SEMI); + pnext(p); + return st_expr(lhs_or_call); + } + case TK_RETURN: { + pnext(p); /* consume 'return' */ + _EX *expr = pexpr(p); + pexpect(p, TK_SEMI); + pnext(p); /* consume ';' */ + + return st_return(expr); + } + case TK_LBRACE: { + /* block statement */ + return pblock(p); /* pblock will consume the braces */ + } + default: { + /* General expression or assignment starting with unary, paren, etc. */ + _EX *lhs = pexpr(p); + if (p->cur.kind == TK_ASSIGN) { + /* only allow assignment to var or *expr */ + if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target - only variables, dereferenced expressions, and array indexing allowed\n", p->lx->line, p->lx->col); + exit(1); + } + pnext(p); + _EX *rhs = pexpr(p); + pexpect(p, TK_SEMI); + pnext(p); + return st_assign(lhs, rhs); + } + pexpect(p, TK_SEMI); + pnext(p); + return st_expr(lhs); + } + } +} + +static _FN *pfunc(_P *p) { + pexpect(p, TK_INT); + pnext(p); /* consume 'int' */ + + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "function name after 'int'", _TN[p->cur.kind]); + } + char *name = NULL; + if (p->cur.lxem) + name = strdup(p->cur.lxem); + if (p->cur.lxem && !name) { + fprintf(stderr, "[PARSER] Error: strdup failed for function name\n"); + exit(1); + } + pnext(p); /* consume function name */ + + /* expect '(' then consume it */ + pexpect(p, TK_LPAREN); + pnext(p); /* consume '(' */ + + /* parse optional parameter list */ + char **params = NULL; + _TY *params_types = NULL; + int pac = 0; + + if (p->cur.kind != TK_RPAREN) { + /* at least one parameter expected */ + while (1) { + /* first token should be a type, e.g. int */ + _TY vtype; + if (p->cur.kind == TK_INT) + vtype = (_TY){.base=TY_INT,.ptr_level=0,.array_size=-1}; + else if (p->cur.kind == TK_CHAR) + vtype = (_TY){.base=TY_CHAR,.ptr_level=0,.array_size=-1}; + else { + perror_expected(p->lx, "type in parameter list", _TN[p->cur.kind]); + } + pnext(p); /* consume type */ + while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); } + + /* next should be an identifier (variable name) */ + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "identifier after type in parameter list", _TN[p->cur.kind]); + } + + // Parse array size for parameters after the identifier + char *param_name = strdup(p->cur.lxem); + if (!param_name) { + fprintf(stderr, "[PARSER] Error: strdup failed for parameter name\n"); + exit(1); + } + pnext(p); // consume identifier + if (p->cur.kind == TK_LBRACKET) { + pnext(p); // consume '[' + if (p->cur.kind == TK_NUMBER) { + vtype.array_size = p->cur.val; + pnext(p); // consume number + } else { + vtype.array_size = 0; // unknown size [] + } + pexpect(p, TK_RBRACKET); + pnext(p); // consume ']' + } + + char **new_params = (char **)realloc(params, sizeof(char *) * (pac + 1)); + _TY *new_param_types = (_TY *)realloc(params_types, sizeof(_TY) * (pac + 1)); + if (!new_params || !new_param_types) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function parameters\n", p->lx->line, p->lx->col); + free(new_params); + free(new_param_types); + exit(1); + } + params = new_params; + params_types = new_param_types; + params[pac] = param_name; + params_types[pac] = vtype; + pac++; + + if (p->cur.kind == TK_COMMA) { + pnext(p); /* consume comma and continue */ + continue; + } else { + break; + } + } + } + + pexpect(p, TK_RPAREN); + pnext(p); /* consume ')' */ + + _STN *body = pblock(p); /* pblock consumes the block braces and returns */ + + return fn_new(name, params, params_types, pac, body); +} + +static _FN *parse_program(_LX *lx) { + _P pstate = {.lx = lx, .cur = lxnext(lx)}; + + _FN *head = NULL; + _FN **cur = &head; + + while (pstate.cur.kind != TK_EOF) { + _FN *f = pfunc(&pstate); + *cur = f; + cur = &f->n; + } + + if (pstate.cur.lxem) { + free(pstate.cur.lxem); + pstate.cur.lxem = NULL; + } + + return head; +} + +#endif /* INCLUDE_parser */ |
