/* Parser: recursive-descent front end that builds an AST `_FN` list * from the token stream, handling expressions, statements, and functions. */ #ifndef INCLUDE_parser #define INCLUDE_parser #include "ast.h" #include "lexer.h" #include "token.h" #include #include #include /* Parser state */ typedef struct { _LX *lx; _T cur; } _P; /* Error reporting with line/column info */ static void perror_expected(_LX *lx, const char *expected, const char *got) { fprintf(stderr, "[PARSER] Error at line %d, column %d: expected %s, got %s\n", lx->line, lx->col, expected, got); exit(1); } static void perror_unexpected(_LX *lx, const char *context, const char *got) { fprintf(stderr, "[PARSER] Error at line %d, column %d: unexpected %s in %s\n", lx->line, lx->col, got, context); exit(1); } static void pnext(_P *p) { if (p->cur.lxem) { free(p->cur.lxem); p->cur.lxem = NULL; } p->cur = lxnext(p->lx); } /* Expect a particular token kind; on mismatch print helpful error and exit */ static void pexpect(_P *p, _TK tk) { if (p->cur.kind != tk) { const char *got = (p->cur.kind < TK__COUNT) ? _TN[p->cur.kind] : ""; perror_expected(p->lx, _TN[tk], got); } } static _STN *pstmt(_P *p); // forward static _STN *pblock(_P *p) { pexpect(p, TK_LBRACE); pnext(p); /* consume '{' */ _STN *head = NULL; _STN **cur = &head; while (p->cur.kind != TK_RBRACE && p->cur.kind != TK_EOF) { _STN *stmt = pstmt(p); *cur = stmt; cur = &stmt->n; } pexpect(p, TK_RBRACE); pnext(p); /* consume '}' */ return st_block(head); } static _EX *pexpr(_P *p); // forward static _EX *pterm(_P *p); // forward static _EX *punary(_P *p); // forward static _EX *pfact(_P *p); // fwd /* Precedence climbing layers */ static _EX *pmul(_P *p); static _EX *padd(_P *p); static _EX *pshift(_P *p); static _EX *pbitand(_P *p); static _EX *pbitxor(_P *p); static _EX *pbitor(_P *p); static _EX *prel(_P *p); static _EX *peq(_P *p); static _EX *plogand(_P *p); static _EX *plogor(_P *p); static _STN *passign_or_expr_stmt(_P *p) { _EX *lhs = pexpr(p); if (p->cur.kind == TK_ASSIGN) { if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) { fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target in expression context\n", p->lx->line, p->lx->col); exit(1); } pnext(p); _EX *rhs = pexpr(p); return st_assign(lhs, rhs); } return st_expr(lhs); } static _EX *ppostfix_from_expr(_P *p, _EX *e) { // Handle array indexing: expr[expr] while (p->cur.kind == TK_LBRACKET) { pnext(p); // consume '[' _EX *index = pexpr(p); pexpect(p, TK_RBRACKET); pnext(p); // consume ']' e = ex_index(e, index); } return e; } static _EX *ppostfix(_P *p) { _EX *e = pfact(p); return ppostfix_from_expr(p, e); } /* ---- FACTOR ---- */ static _EX *pfact(_P *p) { if (p->cur.kind == TK_NUMBER) { _EX *n = ex_number(p->cur.val); pnext(p); return n; } else if (p->cur.kind == TK_IDENT) { char *name = strdup(p->cur.lxem); if (!name) { fprintf(stderr, "[PARSER] Error: strdup failed for identifier\n"); exit(1); } pnext(p); /* consume identifier */ if (p->cur.kind == TK_LPAREN) { /* function call */ pnext(p); /* consume '(' */ _EX **args = NULL; int argc = 0; if (p->cur.kind != TK_RPAREN) { do { args = realloc(args, sizeof(_EX *) * (argc + 1)); if (!args) { fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function arguments\n", p->lx->line, p->lx->col); exit(1); } args[argc++] = pexpr(p); if (p->cur.kind == TK_COMMA) { pnext(p); /* skip comma */ } else { break; } } while (1); } pexpect(p, TK_RPAREN); pnext(p); /* consume ')' */ return ex_call(name, args, argc); } _EX *var_expr = ex_var(name); return ppostfix_from_expr(p, var_expr); } else if (p->cur.kind == TK_LPAREN) { pnext(p); /* consume '(' */ _EX *n = pexpr(p); pexpect(p, TK_RPAREN); pnext(p); /* consume ')' */ return n; } else if (p->cur.kind == TK_STRING) { char *str = strdup(p->cur.lxem); if (!str) { fprintf(stderr, "[PARSER] Error: strdup failed for string literal\n"); exit(1); } pnext(p); return ex_string(str); } else { perror_unexpected(p->lx, "factor", _TN[p->cur.kind]); } } /* ---- TERM ---- */ static _EX *pmul(_P *p) { _EX *n = punary(p); while (p->cur.kind == TK_STAR || p->cur.kind == TK_SLASH || p->cur.kind == TK_PERCENT) { _TK op = p->cur.kind; pnext(p); _EX *r = punary(p); n = ex_binop(n, op, r); } return n; } static _EX *padd(_P *p) { _EX *n = pmul(p); while (p->cur.kind == TK_PLUS || p->cur.kind == TK_MINUS) { _TK op = p->cur.kind; pnext(p); _EX *r = pmul(p); n = ex_binop(n, op, r); } return n; } static _EX *pshift(_P *p) { _EX *n = padd(p); while (p->cur.kind == TK_SHL || p->cur.kind == TK_SHR) { _TK op = p->cur.kind; pnext(p); _EX *r = padd(p); n = ex_binop(n, op, r); } return n; } static _EX *pbitand(_P *p) { _EX *n = peq(p); while (p->cur.kind == TK_AMP) { _TK op = p->cur.kind; pnext(p); _EX *r = peq(p); n = ex_binop(n, op, r); } return n; } static _EX *pbitxor(_P *p) { _EX *n = pbitand(p); while (p->cur.kind == TK_CARET) { _TK op = p->cur.kind; pnext(p); _EX *r = pbitand(p); n = ex_binop(n, op, r); } return n; } static _EX *pbitor(_P *p) { _EX *n = pbitxor(p); while (p->cur.kind == TK_BAR) { _TK op = p->cur.kind; pnext(p); _EX *r = pbitxor(p); n = ex_binop(n, op, r); } return n; } static _EX *prel(_P *p) { _EX *n = pshift(p); while (p->cur.kind == TK_LT || p->cur.kind == TK_LE || p->cur.kind == TK_GT || p->cur.kind == TK_GE) { _TK op = p->cur.kind; pnext(p); _EX *r = pshift(p); n = ex_binop(n, op, r); } return n; } static _EX *peq(_P *p) { _EX *n = prel(p); while (p->cur.kind == TK_EQ || p->cur.kind == TK_NE) { _TK op = p->cur.kind; pnext(p); _EX *r = prel(p); n = ex_binop(n, op, r); } return n; } static _EX *plogand(_P *p) { _EX *n = pbitor(p); while (p->cur.kind == TK_AND) { pnext(p); _EX *r = pbitor(p); // keep as a binary op node TK_AND; codegen will short-circuit n = ex_binop(n, TK_AND, r); } return n; } static _EX *plogor(_P *p) { _EX *n = plogand(p); while (p->cur.kind == TK_OR) { pnext(p); _EX *r = plogand(p); n = ex_binop(n, TK_OR, r); } return n; } /* ---- EXPR ---- */ static _EX *pexpr(_P *p) { return plogor(p); } /* ---- UNARY ---- */ static _EX *punary(_P *p) { if (p->cur.kind == TK_AMP) { // &expr pnext(p); _EX *sub = punary(p); NEW_EX(EX_ADDR); e->addr.expr = sub; return e; } if (p->cur.kind == TK_STAR) { // *expr pnext(p); _EX *sub = punary(p); NEW_EX(EX_DEREF); e->deref.expr = sub; return e; } if (p->cur.kind == TK_BANG) { // !expr -> (expr == 0) pnext(p); _EX *sub = punary(p); _EX *zero = ex_number(0); return ex_binop(sub, TK_EQ, zero); } return ppostfix(p); } char parse_char_literal(_P *p) { /* assume current token is TK_SQUOTE */ pnext(p); // consume opening ' if (p->cur.kind != TK_IDENT) { perror_expected(p->lx, "character literal", _TN[p->cur.kind]); } char c = p->cur.lxem[0]; pnext(p); // consume char pexpect(p, TK_SQUOTE); pnext(p); // consume closing ' return c; } _EX *ex_charlit(char c) { _EX *e = malloc(sizeof(_EX)); if (!e) { fprintf(stderr, "[PARSER] Error: malloc failed in ex_charlit\n"); exit(1); } e->kind = EX_NUMBER; e->value = c; return e; } static _STN *pstmt(_P *p) { switch (p->cur.kind) { case TK_IF: { pnext(p); // consume if pexpect(p, TK_LPAREN); pnext(p); _EX *cond = pexpr(p); pexpect(p, TK_RPAREN); pnext(p); _STN *thenb = pstmt(p); _STN *elseb = NULL; if (p->cur.kind == TK_ELSE) { pnext(p); elseb = pstmt(p); } return st_if(cond, thenb, elseb); } case TK_WHILE: { pnext(p); pexpect(p, TK_LPAREN); pnext(p); _EX *cond = pexpr(p); pexpect(p, TK_RPAREN); pnext(p); _STN *body = pstmt(p); return st_while(cond, body); } case TK_FOR: { pnext(p); pexpect(p, TK_LPAREN); pnext(p); _STN *init = NULL; _EX *cond = NULL; _STN *step = NULL; if (p->cur.kind != TK_SEMI) { init = pstmt(p); } else { pnext(p); } if (p->cur.kind != TK_SEMI) { cond = pexpr(p); } pexpect(p, TK_SEMI); pnext(p); if (p->cur.kind != TK_RPAREN) { step = passign_or_expr_stmt(p); } pexpect(p, TK_RPAREN); pnext(p); _STN *body = pstmt(p); return st_for(init, cond, step, body); } case TK_INT: case TK_CHAR: { _TY vtype = { .base = (p->cur.kind == TK_INT) ? TY_INT : TY_CHAR, .ptr_level = 0, .array_size = -1 }; pnext(p); /* consume type */ while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); } if (p->cur.kind != TK_IDENT) { perror_expected(p->lx, "variable name after type", _TN[p->cur.kind]); } char *name = strdup(p->cur.lxem); if (!name) { fprintf(stderr, "[PARSER] Error: strdup failed for variable name\n"); exit(1); } pnext(p); // Parse array size: [N] or [] if (p->cur.kind == TK_LBRACKET) { pnext(p); // consume '[' if (p->cur.kind == TK_NUMBER) { vtype.array_size = p->cur.val; pnext(p); // consume number } else { vtype.array_size = 0; // unknown size [] } pexpect(p, TK_RBRACKET); pnext(p); // consume ']' } _EX *init = NULL; if (p->cur.kind == TK_ASSIGN) { pnext(p); if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_SQUOTE) { /* parse char literal */ char c = parse_char_literal(p); // implement this to consume quotes and return char init = ex_charlit(c); } else { init = pexpr(p); } } pexpect(p, TK_SEMI); pnext(p); _STN *decl = st_var_decl(name, init); decl->var_decl.type = vtype; return decl; } case TK_IDENT: { _EX *lhs_or_call = pfact(p); /* starts from ident path */ if (p->cur.kind == TK_ASSIGN) { pnext(p); _EX *rhs = pexpr(p); pexpect(p, TK_SEMI); pnext(p); return st_assign(lhs_or_call, rhs); } pexpect(p, TK_SEMI); pnext(p); return st_expr(lhs_or_call); } case TK_RETURN: { pnext(p); /* consume 'return' */ _EX *expr = pexpr(p); pexpect(p, TK_SEMI); pnext(p); /* consume ';' */ return st_return(expr); } case TK_LBRACE: { /* block statement */ return pblock(p); /* pblock will consume the braces */ } default: { /* General expression or assignment starting with unary, paren, etc. */ _EX *lhs = pexpr(p); if (p->cur.kind == TK_ASSIGN) { /* only allow assignment to var or *expr */ if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) { fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target - only variables, dereferenced expressions, and array indexing allowed\n", p->lx->line, p->lx->col); exit(1); } pnext(p); _EX *rhs = pexpr(p); pexpect(p, TK_SEMI); pnext(p); return st_assign(lhs, rhs); } pexpect(p, TK_SEMI); pnext(p); return st_expr(lhs); } } } static _FN *pfunc(_P *p) { pexpect(p, TK_INT); pnext(p); /* consume 'int' */ if (p->cur.kind != TK_IDENT) { perror_expected(p->lx, "function name after 'int'", _TN[p->cur.kind]); } char *name = NULL; if (p->cur.lxem) name = strdup(p->cur.lxem); if (p->cur.lxem && !name) { fprintf(stderr, "[PARSER] Error: strdup failed for function name\n"); exit(1); } pnext(p); /* consume function name */ /* expect '(' then consume it */ pexpect(p, TK_LPAREN); pnext(p); /* consume '(' */ /* parse optional parameter list */ char **params = NULL; _TY *params_types = NULL; int pac = 0; if (p->cur.kind != TK_RPAREN) { /* at least one parameter expected */ while (1) { /* first token should be a type, e.g. int */ _TY vtype; if (p->cur.kind == TK_INT) vtype = (_TY){.base=TY_INT,.ptr_level=0,.array_size=-1}; else if (p->cur.kind == TK_CHAR) vtype = (_TY){.base=TY_CHAR,.ptr_level=0,.array_size=-1}; else { perror_expected(p->lx, "type in parameter list", _TN[p->cur.kind]); } pnext(p); /* consume type */ while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); } /* next should be an identifier (variable name) */ if (p->cur.kind != TK_IDENT) { perror_expected(p->lx, "identifier after type in parameter list", _TN[p->cur.kind]); } // Parse array size for parameters after the identifier char *param_name = strdup(p->cur.lxem); if (!param_name) { fprintf(stderr, "[PARSER] Error: strdup failed for parameter name\n"); exit(1); } pnext(p); // consume identifier if (p->cur.kind == TK_LBRACKET) { pnext(p); // consume '[' if (p->cur.kind == TK_NUMBER) { vtype.array_size = p->cur.val; pnext(p); // consume number } else { vtype.array_size = 0; // unknown size [] } pexpect(p, TK_RBRACKET); pnext(p); // consume ']' } char **new_params = (char **)realloc(params, sizeof(char *) * (pac + 1)); _TY *new_param_types = (_TY *)realloc(params_types, sizeof(_TY) * (pac + 1)); if (!new_params || !new_param_types) { fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function parameters\n", p->lx->line, p->lx->col); free(new_params); free(new_param_types); exit(1); } params = new_params; params_types = new_param_types; params[pac] = param_name; params_types[pac] = vtype; pac++; if (p->cur.kind == TK_COMMA) { pnext(p); /* consume comma and continue */ continue; } else { break; } } } pexpect(p, TK_RPAREN); pnext(p); /* consume ')' */ _STN *body = pblock(p); /* pblock consumes the block braces and returns */ return fn_new(name, params, params_types, pac, body); } static _FN *parse_program(_LX *lx) { _P pstate = {.lx = lx, .cur = lxnext(lx)}; _FN *head = NULL; _FN **cur = &head; while (pstate.cur.kind != TK_EOF) { _FN *f = pfunc(&pstate); *cur = f; cur = &f->n; } if (pstate.cur.lxem) { free(pstate.cur.lxem); pstate.cur.lxem = NULL; } return head; } #endif /* INCLUDE_parser */