diff options
| author | David Moc <personal@cdatgoose.org> | 2026-03-05 23:38:49 +0100 |
|---|---|---|
| committer | David Moc <personal@cdatgoose.org> | 2026-03-05 23:38:49 +0100 |
| commit | 0385817bb1301a778bb33f8405a435293b9f8905 (patch) | |
| tree | 53f4b6f13e393bd368c37ba4363826b46940dfd3 /src | |
| parent | 262abf9b552a168ef3ae91f91af97683f16420a7 (diff) | |
Pushing to repo for safety.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ast.h | 410 | ||||
| -rw-r--r-- | src/codegen_jit.h | 827 | ||||
| -rw-r--r-- | src/lexer.h | 182 | ||||
| -rw-r--r-- | src/main.c | 289 | ||||
| -rw-r--r-- | src/parser.h | 595 | ||||
| -rw-r--r-- | src/token.h | 104 |
6 files changed, 2407 insertions, 0 deletions
diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..6c326cc --- /dev/null +++ b/src/ast.h @@ -0,0 +1,410 @@ +#ifndef INCLUDE_ast +#define INCLUDE_ast + +#include "token.h" +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + + +typedef enum { EX_NUMBER, EX_VAR, EX_BINOP, EX_CALL, EX_ADDR, EX_DEREF, EX_STRING, EX_INDEX } _EK; + +typedef struct _EX { + _EK kind; + union { + int value; // for number + char *name; // for var + struct { + struct _EX *l; + _TK op; + struct _EX *r; + } binop; + struct { + char *func_name; + struct _EX **args; + int argc; + } call; + struct { + struct _EX *expr; // &expr + } addr; + struct { + struct _EX *expr; // *expr + } deref; + char *string; // for string literal + struct { + struct _EX *array; // array expression + struct _EX *index; // index expression + } index; + }; +} _EX; + + +typedef enum { STK_RETURN, STK_VAR_DECL, STK_ASSIGN, STK_EXPR, STK_BLOCK, STK_IF, STK_WHILE, STK_FOR } _STK; + +typedef struct _STN { + _STK kind; + union { + struct { + char *name; + _EX *init; + _TY type; + } var_decl; + struct { + _EX *lhs; // var or *expr + _EX *expr; + } assign; + _EX *expr; // STK_EXPR + _EX *return_expr; // STK_RETURN + struct _STN *body; // STK_BLOCK + struct { // STK_IF + _EX *cond; + struct _STN *thenb; + struct _STN *elseb; // may be NULL + } ifs; + struct { // STK_WHILE + _EX *cond; + struct _STN *body; + } whl; + struct { // STK_FOR + struct _STN *init; // may be NULL (a stmt like decl/assign) + _EX *cond; // may be NULL + struct _STN *step; // may be NULL (an expr stmt) + struct _STN *body; + } fr; + }; + struct _STN *n; // linked list +} _STN; + + + +typedef struct _FN { + char *name; + char **params; + _TY *param_types; + int pac; + _STN *body; + struct _FN *n; +} _FN; + +typedef struct NameEntry { + char *name; + struct NameEntry *next; +} NameEntry; + +#define HASH_SIZE 128 + +static unsigned long hash_str(const char *s) { + unsigned long h = 5381; + int c; + while ((c = *s++)) + h = ((h << 5) + h) + (unsigned long)c; + return h; +} + +static int name_seen(NameEntry **table, const char *name) { + unsigned long h = hash_str(name) % HASH_SIZE; + for (NameEntry *e = table[h]; e; e = e->next) { + if (strcmp(e->name, name) == 0) + return 1; + } + NameEntry *ne = malloc(sizeof(*ne)); + if (!ne) { + fprintf(stderr, "Error: malloc failed\n"); + exit(1); + } + ne->name = strdup(name); + ne->next = table[h]; + table[h] = ne; + return 0; +} + +static void free_hash_table(NameEntry **table) { + for (int i = 0; i < HASH_SIZE; i++) { + NameEntry *e = table[i]; + while (e) { + NameEntry *next = e->next; + free(e->name); + free(e); + e = next; + } + table[i] = NULL; + } +} + +_FN *fnlist_prepare(_FN *head) { + if (!head) return NULL; + + NameEntry *table[HASH_SIZE] = {0}; + + _FN *main_node = NULL; + _FN *main_prev = NULL; + _FN *prev = NULL; + + for (_FN *cur = head; cur; cur = cur->n) { + if (!cur->name) { + fprintf(stderr, "[fnlist prepare] error: function with NULL name\n"); + free_hash_table(table); + exit(1); + } + + if (name_seen(table, cur->name)) { + fprintf(stderr, "[fnlist prepare] error: duplicate function name '%s'\n", cur->name); + free_hash_table(table); + exit(1); + } + + if (strcmp(cur->name, "main") == 0) { + if (main_node) { + fprintf(stderr, "[fnlist prepare] error: multiple 'main' functions found\n"); + free_hash_table(table); + exit(1); + } + main_node = cur; + main_prev = prev; + } + + prev = cur; + } + + free_hash_table(table); + + if (!main_node) return head; + if (!main_prev) return head; + + main_prev->n = main_node->n; + main_node->n = head; + return main_node; +} + + + + + +/* Generic alloc macros */ +#define NEW_EX(k) \ + _EX *e = (_EX *)calloc(1, sizeof(_EX)); \ + e->kind = k +#define NEW_ST(k) \ + _STN *s = (_STN *)calloc(1, sizeof(_STN)); \ + s->kind = k +#define NEW_FN() _FN *f = (_FN *)calloc(1, sizeof(_FN)) + +/* Constructor declaration macros */ +#define DEFINE_EX_CONSTRUCTOR(name, kind, ...) \ + static inline _EX *ex_##name(__VA_ARGS__) + +#define DEFINE_ST_CONSTRUCTOR(name, kind, ...) \ + static inline _STN *st_##name(__VA_ARGS__) + +#define DEFINE_FN_CONSTRUCTOR(name, ...) \ + static inline _FN *fn_##name(__VA_ARGS__) + + + +DEFINE_EX_CONSTRUCTOR(number, EX_NUMBER, int value) { + NEW_EX(EX_NUMBER); + e->value = value; + return e; +} + +DEFINE_EX_CONSTRUCTOR(var, EX_VAR, char *name) { + NEW_EX(EX_VAR); + e->name = name; + return e; +} + +DEFINE_EX_CONSTRUCTOR(binop, EX_BINOP, _EX *l, _TK op, _EX *r) { + NEW_EX(EX_BINOP); + e->binop.l = l; + e->binop.r = r; + e->binop.op = op; + return e; +} + +DEFINE_EX_CONSTRUCTOR(call, EX_CALL, char *func_name, _EX **args, int argc) { + NEW_EX(EX_CALL); + e->call.func_name = func_name; + e->call.args = args; + e->call.argc = argc; + return e; +} + +DEFINE_EX_CONSTRUCTOR(string, EX_STRING, char *str) { + NEW_EX(EX_STRING); + e->string = str; + return e; +} + +DEFINE_EX_CONSTRUCTOR(index, EX_INDEX, _EX *array, _EX *idx) { + NEW_EX(EX_INDEX); + e->index.array = array; + e->index.index = idx; + return e; +} + + +DEFINE_ST_CONSTRUCTOR(return, STK_RETURN, _EX *expr) { + NEW_ST(STK_RETURN); + s->return_expr = expr; + return s; +} + +DEFINE_ST_CONSTRUCTOR(var_decl, STK_VAR_DECL, char *name, _EX *init) { + NEW_ST(STK_VAR_DECL); + s->var_decl.name = name; + s->var_decl.init = init; + return s; +} + +DEFINE_ST_CONSTRUCTOR(assign, STK_ASSIGN, _EX *lhs, _EX *expr) { + NEW_ST(STK_ASSIGN); + s->assign.lhs = lhs; + s->assign.expr = expr; + return s; +} + +DEFINE_ST_CONSTRUCTOR(expr, STK_EXPR, _EX *expr) { + NEW_ST(STK_EXPR); + s->expr = expr; + return s; +} + +DEFINE_ST_CONSTRUCTOR(block, STK_BLOCK, _STN *body) { + NEW_ST(STK_BLOCK); + s->body = body; + return s; +} + +DEFINE_ST_CONSTRUCTOR(if, STK_IF, _EX *cond, _STN *thenb, _STN *elseb) { + NEW_ST(STK_IF); + s->ifs.cond = cond; + s->ifs.thenb = thenb; + s->ifs.elseb = elseb; + return s; +} + +DEFINE_ST_CONSTRUCTOR(while, STK_WHILE, _EX *cond, _STN *body) { + NEW_ST(STK_WHILE); + s->whl.cond = cond; + s->whl.body = body; + return s; +} + +DEFINE_ST_CONSTRUCTOR(for, STK_FOR, _STN *init, _EX *cond, _STN *step, _STN *body) { + NEW_ST(STK_FOR); + s->fr.init = init; + s->fr.cond = cond; + s->fr.step = step; + s->fr.body = body; + return s; +} + + + +DEFINE_FN_CONSTRUCTOR(new, char *name, char **params, _TY* params_types, int pac, _STN *body) { + NEW_FN(); + f->name = name; + f->params = params; + f->param_types = params_types; + f->pac = pac; + f->body = body; + return f; +} + + +static inline void ex_free(_EX *e) { + if (!e) + return; + switch (e->kind) { + case EX_NUMBER: + break; + case EX_VAR: + free(e->name); + break; + case EX_BINOP: + ex_free(e->binop.l); + ex_free(e->binop.r); + break; + case EX_CALL: + for (int i = 0; i < e->call.argc; i++) + ex_free(e->call.args[i]); + free(e->call.args); + free(e->call.func_name); + break; + case EX_ADDR: + ex_free(e->addr.expr); + break; + case EX_DEREF: + ex_free(e->deref.expr); + break; + case EX_STRING: + free(e->string); + break; + case EX_INDEX: + ex_free(e->index.array); + ex_free(e->index.index); + break; + } + free(e); +} + +static inline void st_free(_STN *s) { + if (!s) + return; + switch (s->kind) { + case STK_RETURN: + ex_free(s->return_expr); + break; + case STK_VAR_DECL: + ex_free(s->var_decl.init); + free(s->var_decl.name); + break; + case STK_ASSIGN: + ex_free(s->assign.expr); + break; + case STK_EXPR: + ex_free(s->expr); + break; + case STK_BLOCK: + st_free(s->body); + break; + case STK_IF: + ex_free(s->ifs.cond); + st_free(s->ifs.thenb); + st_free(s->ifs.elseb); + break; + case STK_WHILE: + ex_free(s->whl.cond); + st_free(s->whl.body); + break; + case STK_FOR: + st_free(s->fr.init); + ex_free(s->fr.cond); + st_free(s->fr.step); + st_free(s->fr.body); + break; + } + _STN *next = s->n; + free(s); + st_free(next); +} + +static inline void fn_free(_FN *f) { + while (f) { + _FN *next = f->n; + if (f->name) + free(f->name); + if (f->params) { + for (int i = 0; i < f->pac; i++) { + free(f->params[i]); + } + free(f->params); + } + free(f->param_types); + st_free(f->body); + free(f); + f = next; + } +} + +#endif diff --git a/src/codegen_jit.h b/src/codegen_jit.h new file mode 100644 index 0000000..ff1d687 --- /dev/null +++ b/src/codegen_jit.h @@ -0,0 +1,827 @@ +#ifndef CODEGEN_JIT_H +#define CODEGEN_JIT_H + +#include "ast.h" +#include "token.h" + +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> +#include <limits.h> +#include <ctype.h> + +enum { + RAX = 0, RCX = 1, RDX = 2, RBX = 3, + RSP = 4, RBP = 5, RSI = 6, RDI = 7, + R8 = 8, R9 = 9, R10 = 10, R11 = 11, + R12 = 12, R13 = 13, R14 = 14, R15 = 15 +}; + +typedef struct { + uint8_t *buf; + size_t len; + size_t cap; +} CodeBuf; + +typedef struct FuncMap { + char *name; + void *addr; + size_t size; + size_t alloc_size; + struct FuncMap *next; +} FuncMap; + +typedef struct PatchEntry { + size_t offset; // offset of the imm64 within owning function's code + char *func_name; // target function to patch in + char *owning_func; + struct PatchEntry *next; +} PatchEntry; + +typedef struct VarMap { + char *name; + int offset; // RBP-relative offset + _TY type; + struct VarMap *next; +} VarMap; + +typedef struct { + FuncMap *func_list; + VarMap *var_list; + int next_local_offset; + CodeBuf cb; + char *current_func_name; + PatchEntry *patch_list; +} JIT; + +static void jit_init(JIT *jit) { + jit->func_list = NULL; + jit->var_list = NULL; + jit->next_local_offset = -8; + jit->cb.buf = NULL; + jit->cb.len = jit->cb.cap = 0; + jit->current_func_name = NULL; + jit->patch_list = NULL; +} + +static void jit_free(JIT *jit) { + for (VarMap *v = jit->var_list; v;) { + VarMap *n = v->next; free(v->name); free(v); v = n; + } + jit->var_list = NULL; + + for (PatchEntry *p = jit->patch_list; p;) { + PatchEntry *n = p->next; free(p->func_name); free(p->owning_func); free(p); p = n; + } + jit->patch_list = NULL; + + for (FuncMap *f = jit->func_list; f;) { + FuncMap *n = f->next; + if (f->addr && f->alloc_size > 0) munmap(f->addr, f->alloc_size); + free(f->name); free(f); f = n; + } + jit->func_list = NULL; + + free(jit->cb.buf); + jit->cb.buf = NULL; + jit->cb.len = jit->cb.cap = 0; +} + +/* --- Code buffer --- */ + +static void cb_init(CodeBuf *c) { + c->cap = 1024; c->len = 0; + c->buf = (uint8_t *)malloc(c->cap); + if (!c->buf) { fprintf(stderr, "[JIT] failed to allocate code buffer\n"); exit(1); } +} +static void cb_free(CodeBuf *c) { + free(c->buf); c->buf = NULL; c->len = c->cap = 0; +} +static void cb_grow(CodeBuf *c, size_t need) { + if (c->len + need > c->cap) { + while (c->len + need > c->cap) c->cap *= 2; + uint8_t *nb = (uint8_t *)realloc(c->buf, c->cap); + if (!nb) { fprintf(stderr, "[JIT] failed to grow code buffer\n"); free(c->buf); exit(1); } + c->buf = nb; + } +} +static void emit8(CodeBuf *c, uint8_t b) { cb_grow(c,1); c->buf[c->len++] = b; } +static void emit32(CodeBuf *c, uint32_t v) { cb_grow(c,4); memcpy(c->buf+c->len,&v,4); c->len+=4; } +static void emit64(CodeBuf *c, uint64_t v) { cb_grow(c,8); memcpy(c->buf+c->len,&v,8); c->len+=8; } +static void emitN(CodeBuf *c, const void *p, size_t n) { cb_grow(c,n); memcpy(c->buf+c->len,p,n); c->len+=n; } + +/* --- x86-64 encoding helpers --- */ + +static void emit_rex(CodeBuf *c, int reg, int rm, int w) { + uint8_t rex = 0x40; + if (w) rex |= 0x08; + if (reg&8) rex |= 0x04; + if (rm&8) rex |= 0x01; + emit8(c, (rex != 0x40 || w) ? rex : 0x48); +} +static void emit_modrm(CodeBuf *c, uint8_t mod, uint8_t reg, uint8_t rm) { + emit8(c, (mod<<6)|((reg&7)<<3)|(rm&7)); +} + +static void emit_movabs_rax_imm64(CodeBuf *c, uint64_t imm) { + emit8(c,0x48); emit8(c,0xB8); emit64(c,imm); +} +static void emit_mov_reg_imm64(CodeBuf *c, int reg, int64_t imm) { + emit_rex(c,reg,0,1); emit8(c,0xB8+(reg&7)); emit64(c,imm); +} +static void emit_mov_mem64_reg(CodeBuf *c, int disp32, int reg) { + emit_rex(c,reg,RBP,1); emit8(c,0x89); emit_modrm(c,2,reg,RBP); emit32(c,(uint32_t)disp32); +} +static void emit_mov_reg_mem64(CodeBuf *c, int reg, int disp32) { + emit_rex(c,reg,RBP,1); emit8(c,0x8B); emit_modrm(c,2,reg,RBP); emit32(c,(uint32_t)disp32); +} +static void emit_mov_reg_mem_reg(CodeBuf *c, int dst, int base) { + emit_rex(c,dst,base,1); emit8(c,0x8B); emit_modrm(c,0,dst,base); +} +static void emit_mov_mem_reg_reg(CodeBuf *c, int base, int src) { + emit_rex(c,src,base,1); emit8(c,0x89); emit_modrm(c,0,src,base); +} +static void emit_movzx_rax_mem8(CodeBuf *c, int disp32) { + emit8(c,0x48); emit8(c,0x0F); emit8(c,0xB6); emit_modrm(c,2,RAX,RBP); emit32(c,disp32); +} +static void emit_mov_mem8_rax(CodeBuf *c, int disp32) { + emit_rex(c,RAX,RBP,0); emit8(c,0x88); emit_modrm(c,2,RAX,RBP); emit32(c,disp32); +} +static void emit_mov_reg_reg(CodeBuf *c, int dst, int src) { + emit_rex(c,src,dst,1); emit8(c,0x89); emit_modrm(c,3,src,dst); +} +static void emit_add_reg_reg(CodeBuf *c, int dst, int src) { + emit_rex(c,src,dst,1); emit8(c,0x01); emit_modrm(c,3,src,dst); +} +static void emit_sub_reg_reg(CodeBuf *c, int dst, int src) { + emit_rex(c,src,dst,1); emit8(c,0x29); emit_modrm(c,3,src,dst); +} +static void emit_add_reg_imm32(CodeBuf *c, int dst, int32_t imm) { + emit_rex(c,0,dst,1); emit8(c,0x81); emit_modrm(c,3,0,dst); emit32(c,(uint32_t)imm); +} +static void emit_push_reg(CodeBuf *c, int reg) { + if (reg&8) emit8(c,0x41); emit8(c,0x50+(reg&7)); +} +static void emit_pop_reg(CodeBuf *c, int reg) { + if (reg&8) emit8(c,0x41); emit8(c,0x58+(reg&7)); +} + +static void emit_add_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x01,0xD8},3); } +static void emit_imul_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x0F,0xAF,0xC3},4); } +static void emit_idiv_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5); } +static void emit_imod(CodeBuf *c) { + emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5); // cqo; idiv rbx + emit_mov_reg_reg(c, RAX, RDX); // remainder -> RAX +} +static void emit_or_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x09,0xD8},3); } +static void emit_xor_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x31,0xD8},3); } +static void emit_and_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x21,0xD8},3); } +static void emit_shl_rax_cl(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0xD3,0xE0},3); } +static void emit_shr_rax_cl(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0xD3,0xE8},3); } +static void emit_cmp_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x39,0xD8},3); } +static void emit_setcc_al(CodeBuf *c, uint8_t cc) { emitN(c,(uint8_t[]){0x0F,0x90|cc,0xC0},3); } +static void emit_movzx_rax_al(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x0F,0xB6,0xC0},4); } +static void emit_jmp_rel32(CodeBuf *c, int32_t rel) { emit8(c,0xE9); emit32(c,(uint32_t)rel); } +static void emit_jcc_rel32(CodeBuf *c, uint8_t cc, int32_t rel) { + emitN(c,(uint8_t[]){0x0F,(uint8_t)(0x80|cc)},2); emit32(c,(uint32_t)rel); +} +static void emit_test_rax_rax(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x85,0xC0},3); } + +static void emit_prologue(CodeBuf *c, int total_stack_size) { + emit8(c,0x55); // push rbp + emitN(c,(uint8_t[]){0x48,0x89,0xE5},3); // mov rbp, rsp + emit_push_reg(c, RBX); // save callee-saved RBX + int stack_bytes = ((total_stack_size+15)/16)*16; + if (stack_bytes > 0) { + emitN(c,(uint8_t[]){0x48,0x81,0xEC},3); // sub rsp, imm32 + emit32(c,(uint32_t)stack_bytes); + } +} +static void emit_epilogue(CodeBuf *c) { + emit_pop_reg(c, RBX); emit8(c,0xC9); emit8(c,0xC3); // restore RBX; leave; ret +} +static void emit_lea_rax_rbp_disp(CodeBuf *c, int disp32) { + emit8(c,0x48); emit8(c,0x8D); emit_modrm(c,2,RAX,RBP); emit32(c,(uint32_t)disp32); +} +static void emit_load_rax_from_mem(CodeBuf *c, int disp32, int size) { + if (size == 1) emit_movzx_rax_mem8(c, disp32); else emit_mov_reg_mem64(c, RAX, disp32); +} +static void emit_store_rax_to_mem(CodeBuf *c, int disp32, int size) { + if (size == 1) emit_mov_mem8_rax(c, disp32); else emit_mov_mem64_reg(c, disp32, RAX); +} + +/* --- Variable and type helpers --- */ + +static int calculate_type_size(_TY type) { + if (type.ptr_level > 0) return 8; + int base_size = (type.base == TY_CHAR) ? 1 : 8; + size_t total = (size_t)base_size; + if (type.array_size > 0) { + total *= (size_t)type.array_size; + if (total > (size_t)INT_MAX) { fprintf(stderr, "[JIT] array size too large\n"); exit(1); } + } + return (int)total; +} + +static int align_offset(int offset, _TY type) { + int align = (type.base == TY_CHAR) ? 1 : 8; + if (offset % align == 0) return offset; + return (offset / align) * align; +} + +static void reset_varmap(JIT *jit) { + for (VarMap *v = jit->var_list; v;) { + VarMap *n = v->next; free(v->name); free(v); v = n; + } + jit->var_list = NULL; + jit->next_local_offset = -16; // after saved RBX at RBP-8, 16-byte aligned +} + +static void add_var(JIT *jit, const char *name, _TY type) { + VarMap *v = (VarMap *)malloc(sizeof(VarMap)); + if (!v) { fprintf(stderr, "[JIT] malloc failed in add_var\n"); exit(1); } + v->name = strdup(name); + if (!v->name) { fprintf(stderr, "[JIT] strdup failed in add_var\n"); free(v); exit(1); } + v->type = type; + int type_size = calculate_type_size(type); + jit->next_local_offset = align_offset(jit->next_local_offset, type); + v->offset = jit->next_local_offset; + jit->next_local_offset -= type_size; + v->next = jit->var_list; + jit->var_list = v; +} + +static int get_var_offset(JIT *jit, const char *name) { + for (VarMap *v = jit->var_list; v; v = v->next) + if (strcmp(v->name, name) == 0) return v->offset; + fprintf(stderr, "[JIT] Unknown variable '%s'\n", name); exit(1); +} + +static _TY get_var_type(JIT *jit, const char *name) { + for (VarMap *v = jit->var_list; v; v = v->next) + if (strcmp(v->name, name) == 0) return v->type; + fprintf(stderr, "[JIT] Unknown variable '%s'\n", name); exit(1); +} + +/* --- Type checking --- */ + +static _TY get_expr_type(JIT *jit, _EX *expr) { + switch (expr->kind) { + case EX_NUMBER: return (_TY){TY_INT, 0, -1}; + case EX_STRING: return (_TY){TY_CHAR, 1, -1}; + case EX_VAR: return get_var_type(jit, expr->name); + case EX_BINOP: { + _TY left_type = get_expr_type(jit, expr->binop.l); + // All arithmetic, comparison, bitwise ops produce int + (void)get_expr_type(jit, expr->binop.r); + switch (expr->binop.op) { + case TK_PLUS: case TK_MINUS: case TK_STAR: case TK_SLASH: case TK_PERCENT: + case TK_EQ: case TK_NE: case TK_LT: case TK_LE: case TK_GT: case TK_GE: + case TK_AND: case TK_OR: + case TK_AMP: case TK_BAR: case TK_CARET: case TK_SHL: case TK_SHR: + return (_TY){TY_INT, 0, -1}; + default: return left_type; + } + } + case EX_CALL: return (_TY){TY_INT, 0, -1}; + case EX_INDEX: { + _TY t = get_expr_type(jit, expr->index.array); + if (t.array_size > 0) return (_TY){t.base, t.ptr_level, -1}; + if (t.ptr_level > 0) return (_TY){t.base, t.ptr_level-1, -1}; + return (_TY){TY_INT, 0, -1}; + } + case EX_DEREF: { + _TY t = get_expr_type(jit, expr->deref.expr); + if (t.ptr_level > 0) return (_TY){t.base, t.ptr_level-1, -1}; + return (_TY){TY_INT, 0, -1}; + } + case EX_ADDR: { + _TY t = get_expr_type(jit, expr->addr.expr); + return (_TY){t.base, t.ptr_level+1, -1}; + } + default: return (_TY){TY_INT, 0, -1}; + } +} + +static int types_compatible(_TY expected, _TY actual) { + if (expected.base == actual.base && + expected.ptr_level == actual.ptr_level && + expected.array_size == actual.array_size) return 1; + // Allow untyped int literals to be assigned anywhere + if (actual.base == TY_INT && actual.ptr_level == 0 && actual.array_size == -1) return 1; + return 0; +} + +/* --- Function registry --- */ + +static void register_func(JIT *jit, const char *name) { + FuncMap *f = (FuncMap *)malloc(sizeof(FuncMap)); + if (!f) { fprintf(stderr, "[JIT] malloc failed in register_func\n"); exit(1); } + f->name = strdup(name); + if (!f->name) { fprintf(stderr, "[JIT] strdup failed in register_func\n"); free(f); exit(1); } + f->addr = NULL; f->size = 0; f->alloc_size = 0; + f->next = jit->func_list; + jit->func_list = f; +} + +static void set_func_addr(JIT *jit, const char *name, void *addr, size_t size, size_t alloc_size) { + for (FuncMap *f = jit->func_list; f; f = f->next) { + if (strcmp(f->name, name) != 0) continue; + f->addr = addr; f->size = size; f->alloc_size = alloc_size; + + fprintf(stderr, "\n[JIT] %s @ %p (%zu bytes)\n", name, addr, size); + uint8_t *p = (uint8_t *)addr; + for (size_t i = 0; i < size; i += 16) { + fprintf(stderr, "\t%04zx ", i); + for (size_t j = 0; j < 16; j++) + (i+j < size) ? fprintf(stderr, "%02x ", p[i+j]) : fprintf(stderr, " "); + fprintf(stderr, " |"); + for (size_t j = 0; j < 16 && i+j < size; j++) + fprintf(stderr, "%c", isprint(p[i+j]) ? p[i+j] : '.'); + fprintf(stderr, "\n"); + } + return; + } + fprintf(stderr, "[JIT] set_func_addr: unknown function '%s'\n", name); exit(1); +} + +static void *get_func_addr(JIT *jit, const char *name) { + for (FuncMap *f = jit->func_list; f; f = f->next) { + if (strcmp(f->name, name) == 0) + return f->addr ? f->addr : (void*)0xDEADBEEF; // placeholder; patched later + } + fprintf(stderr, "[JIT] get_func_addr: unknown function '%s'\n", name); exit(1); +} + +/* --- Stack size calculation --- */ + +static int calculate_stack_size(_STN *s) { + int total = 0; + while (s) { + if (s->kind == STK_VAR_DECL) total += calculate_type_size(s->var_decl.type); + if (s->kind == STK_BLOCK) total += calculate_stack_size(s->body); + if (s->kind == STK_FOR) { + if (s->fr.init) total += calculate_stack_size(s->fr.init); + total += calculate_stack_size(s->fr.body); + } + s = s->n; + } + return total; +} + +/* --- Code generation (forward declarations) --- */ + +static void gen_expr_jit(JIT *jit, _EX *e); +static int gen_stmt_jit(JIT *jit, _STN *s); + +/* --- Statement generation --- */ + +static int gen_stmt_jit(JIT *jit, _STN *s) { + while (s) { + switch (s->kind) { + + case STK_VAR_DECL: + add_var(jit, s->var_decl.name, s->var_decl.type); + if (s->var_decl.init) { + _TY init_type = get_expr_type(jit, s->var_decl.init); + if (!types_compatible(s->var_decl.type, init_type)) { + fprintf(stderr, "[JIT] Type mismatch: cannot assign %s to %s\n", + tybase_name(init_type.base), tybase_name(s->var_decl.type.base)); + exit(1); + } + gen_expr_jit(jit, s->var_decl.init); + int sz = (s->var_decl.type.ptr_level > 0) ? 8 + : (s->var_decl.type.base == TY_CHAR) ? 1 : 8; + emit_store_rax_to_mem(&jit->cb, get_var_offset(jit, s->var_decl.name), sz); + } + break; + + case STK_ASSIGN: { + _EX *lhs = s->assign.lhs; + if (lhs->kind == EX_VAR) { + int offset = get_var_offset(jit, lhs->name); + _TY type = get_var_type(jit, lhs->name); + _TY expr_type = get_expr_type(jit, s->assign.expr); + if (!types_compatible(type, expr_type)) { + fprintf(stderr, "[JIT] Type mismatch: cannot assign %s to %s\n", + tybase_name(expr_type.base), tybase_name(type.base)); + exit(1); + } + gen_expr_jit(jit, s->assign.expr); + int sz = (type.ptr_level > 0) ? 8 : (type.base == TY_CHAR) ? 1 : 8; + emit_store_rax_to_mem(&jit->cb, offset, sz); + + } else if (lhs->kind == EX_DEREF) { + gen_expr_jit(jit, lhs->deref.expr); + emit_mov_reg_reg(&jit->cb, RBX, RAX); // RBX = address + gen_expr_jit(jit, s->assign.expr); // RAX = value + emit_mov_mem_reg_reg(&jit->cb, RBX, RAX); + + } else if (lhs->kind == EX_INDEX) { + if (lhs->index.array->kind != EX_VAR) { + gen_expr_jit(jit, lhs->index.array); break; + } + _TY var_type = get_var_type(jit, lhs->index.array->name); + if (var_type.array_size <= 0) { + fprintf(stderr, "[JIT] Cannot index non-array '%s'\n", lhs->index.array->name); exit(1); + } + int array_offset = get_var_offset(jit, lhs->index.array->name); + int element_size = (var_type.base == TY_CHAR) ? 1 : 8; + gen_expr_jit(jit, lhs->index.index); // RAX = index + emit_mov_reg_imm64(&jit->cb, RBX, element_size); + emit_imul_rax_rbx(&jit->cb); // RAX = byte offset + emit_mov_reg_imm64(&jit->cb, RBX, array_offset); + emit_sub_reg_reg(&jit->cb, RBX, RAX); // RBX = array_offset - byte_offset + emit_mov_reg_reg(&jit->cb, RAX, RBX); + emit_mov_reg_reg(&jit->cb, RBX, RBP); + emit_add_reg_reg(&jit->cb, RAX, RBX); // RAX = &arr[index] + emit_mov_reg_reg(&jit->cb, RBX, RAX); + gen_expr_jit(jit, s->assign.expr); // RAX = value + emit_mov_mem_reg_reg(&jit->cb, RBX, RAX); + + } else { + fprintf(stderr, "[JIT] Unsupported assignment LHS kind %d\n", lhs->kind); exit(1); + } + break; + } + + case STK_RETURN: + if (s->return_expr) gen_expr_jit(jit, s->return_expr); + emit_epilogue(&jit->cb); + return 1; + + case STK_EXPR: + gen_expr_jit(jit, s->expr); + break; + + case STK_BLOCK: { + int r = gen_stmt_jit(jit, s->body); + if (r) return 1; + break; + } + + case STK_IF: { + gen_expr_jit(jit, s->ifs.cond); + emit_test_rax_rax(&jit->cb); + size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0); // JZ else + gen_stmt_jit(jit, s->ifs.thenb); + size_t jmp_pos = jit->cb.len; emit_jmp_rel32(&jit->cb, 0); // JMP end + int32_t rel_else = (int32_t)(jit->cb.len - (jz_pos + 6)); + memcpy(jit->cb.buf + jz_pos + 2, &rel_else, 4); + if (s->ifs.elseb) gen_stmt_jit(jit, s->ifs.elseb); + int32_t rel_end = (int32_t)(jit->cb.len - (jmp_pos + 5)); + memcpy(jit->cb.buf + jmp_pos + 1, &rel_end, 4); + break; + } + + case STK_WHILE: { + size_t loop_start = jit->cb.len; + gen_expr_jit(jit, s->whl.cond); + emit_test_rax_rax(&jit->cb); + size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0); + gen_stmt_jit(jit, s->whl.body); + emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5))); + int32_t rel_end = (int32_t)(jit->cb.len - (jz_pos + 6)); + memcpy(jit->cb.buf + jz_pos + 2, &rel_end, 4); + break; + } + + case STK_FOR: { + if (s->fr.init) gen_stmt_jit(jit, s->fr.init); + size_t loop_start = jit->cb.len; + if (s->fr.cond) { + gen_expr_jit(jit, s->fr.cond); + emit_test_rax_rax(&jit->cb); + size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0); + gen_stmt_jit(jit, s->fr.body); + if (s->fr.step) gen_stmt_jit(jit, s->fr.step); + emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5))); + int32_t rel_end = (int32_t)(jit->cb.len - (jz_pos + 6)); + memcpy(jit->cb.buf + jz_pos + 2, &rel_end, 4); + } else { + gen_stmt_jit(jit, s->fr.body); + if (s->fr.step) gen_stmt_jit(jit, s->fr.step); + emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5))); + } + break; + } + + default: + fprintf(stderr, "[JIT] Unsupported statement kind %d\n", s->kind); exit(1); + } + s = s->n; + } + return 0; +} + +/* --- Expression generation --- */ + +static void gen_expr_jit(JIT *jit, _EX *e) { + if (!e) return; + switch (e->kind) { + + case EX_NUMBER: + emit_movabs_rax_imm64(&jit->cb, (uint64_t)e->value); + break; + + case EX_VAR: { + int off = get_var_offset(jit, e->name); + _TY ty = get_var_type(jit, e->name); + int sz = (ty.ptr_level > 0) ? 8 : (ty.base == TY_CHAR) ? 1 : 8; + if (sz == 1) emit_movzx_rax_mem8(&jit->cb, off); + else emit_mov_reg_mem64(&jit->cb, RAX, off); + break; + } + + case EX_BINOP: { + // Short-circuit AND + if (e->binop.op == TK_AND) { + gen_expr_jit(jit, e->binop.l); + emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb); + size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0); + gen_expr_jit(jit, e->binop.r); + emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb); + emit_setcc_al(&jit->cb, 0x05); emit_movzx_rax_al(&jit->cb); + size_t jmp_pos = jit->cb.len; emit_jmp_rel32(&jit->cb, 0); + int32_t rel = (int32_t)(jit->cb.len - (jz_pos + 6)); + memcpy(jit->cb.buf + jz_pos + 2, &rel, 4); + emit_movabs_rax_imm64(&jit->cb, 0); + rel = (int32_t)(jit->cb.len - (jmp_pos + 5)); + memcpy(jit->cb.buf + jmp_pos + 1, &rel, 4); + break; + } + // Short-circuit OR + if (e->binop.op == TK_OR) { + gen_expr_jit(jit, e->binop.l); + emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb); + size_t jnz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x05, 0); + gen_expr_jit(jit, e->binop.r); + emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb); + emit_setcc_al(&jit->cb, 0x05); emit_movzx_rax_al(&jit->cb); + size_t jmp_pos = jit->cb.len; emit_jmp_rel32(&jit->cb, 0); + int32_t rel = (int32_t)(jit->cb.len - (jnz_pos + 6)); + memcpy(jit->cb.buf + jnz_pos + 2, &rel, 4); + emit_movabs_rax_imm64(&jit->cb, 1); + rel = (int32_t)(jit->cb.len - (jmp_pos + 5)); + memcpy(jit->cb.buf + jmp_pos + 1, &rel, 4); + break; + } + // All other binary ops: eval left -> push; eval right -> RBX = left + gen_expr_jit(jit, e->binop.l); + emit_push_reg(&jit->cb, RAX); + gen_expr_jit(jit, e->binop.r); + emit_pop_reg(&jit->cb, RBX); // RBX = left, RAX = right + switch (e->binop.op) { + case TK_PLUS: emit_add_rax_rbx(&jit->cb); break; + case TK_MINUS: + emit_mov_reg_reg(&jit->cb, RCX, RAX); + emit_mov_reg_reg(&jit->cb, RAX, RBX); + emit_rex(&jit->cb, RCX, RAX, 1); emit8(&jit->cb, 0x29); emit_modrm(&jit->cb, 3, RCX, RAX); + break; + case TK_STAR: emit_imul_rax_rbx(&jit->cb); break; + case TK_SLASH: + emit_mov_reg_reg(&jit->cb, RCX, RAX); + emit_mov_reg_reg(&jit->cb, RAX, RBX); + emit_mov_reg_reg(&jit->cb, RBX, RCX); + emit_idiv_rbx(&jit->cb); break; + case TK_PERCENT: + emit_mov_reg_reg(&jit->cb, RCX, RAX); + emit_mov_reg_reg(&jit->cb, RAX, RBX); + emit_mov_reg_reg(&jit->cb, RBX, RCX); + emit_imod(&jit->cb); break; + case TK_BAR: emit_or_rax_rbx(&jit->cb); break; + case TK_CARET: emit_xor_rax_rbx(&jit->cb); break; + case TK_AMP: emit_and_rax_rbx(&jit->cb); break; + case TK_SHL: + emit_mov_reg_reg(&jit->cb, RCX, RAX); + emit_mov_reg_reg(&jit->cb, RAX, RBX); + emit_shl_rax_cl(&jit->cb); break; + case TK_SHR: + emit_mov_reg_reg(&jit->cb, RCX, RAX); + emit_mov_reg_reg(&jit->cb, RAX, RBX); + emit_shr_rax_cl(&jit->cb); break; + case TK_EQ: case TK_NE: case TK_LT: case TK_LE: case TK_GT: case TK_GE: { + emit_mov_reg_reg(&jit->cb, RCX, RAX); // RCX = right + emit_mov_reg_reg(&jit->cb, RAX, RBX); // RAX = left + emit_mov_reg_reg(&jit->cb, RBX, RCX); // RBX = right + emit_cmp_rax_rbx(&jit->cb); + uint8_t cc; + switch (e->binop.op) { + case TK_EQ: cc=0x04; break; case TK_NE: cc=0x05; break; + case TK_LT: cc=0x0C; break; case TK_LE: cc=0x0E; break; + case TK_GT: cc=0x0F; break; case TK_GE: cc=0x0D; break; + default: cc=0x04; break; + } + emit_setcc_al(&jit->cb, cc); emit_movzx_rax_al(&jit->cb); + break; + } + default: + fprintf(stderr, "[JIT] Unsupported binop %d\n", e->binop.op); exit(1); + } + break; + } + + case EX_CALL: { + int total_args = e->call.argc; + int stack_args = total_args > 6 ? total_args - 6 : 0; + int padding = (stack_args % 2) ? 8 : 0; + const int arg_regs[6] = {RDI, RSI, RDX, RCX, R8, R9}; + + for (int i = total_args-1; i >= 6; i--) { + gen_expr_jit(jit, e->call.args[i]); emit_push_reg(&jit->cb, RAX); + } + for (int i = 0; i < total_args && i < 6; i++) { + gen_expr_jit(jit, e->call.args[i]); emit_mov_reg_reg(&jit->cb, arg_regs[i], RAX); + } + if (padding) { + emit8(&jit->cb,0x48); emit8(&jit->cb,0x83); emit8(&jit->cb,0xEC); emit8(&jit->cb,0x08); + } + + void *addr = get_func_addr(jit, e->call.func_name); + if (addr == (void*)0xDEADBEEF) { + // Forward call: emit placeholder imm64, record offset for later patching + emit_movabs_rax_imm64(&jit->cb, (uint64_t)0xDEADBEEF); + PatchEntry *patch = (PatchEntry*)malloc(sizeof(PatchEntry)); + if (!patch) { fprintf(stderr, "[JIT] malloc failed (patch)\n"); exit(1); } + patch->func_name = strdup(e->call.func_name); + patch->owning_func = strdup(jit->current_func_name); + if (!patch->func_name || !patch->owning_func) { + fprintf(stderr, "[JIT] strdup failed (patch)\n"); + free(patch->func_name); free(patch->owning_func); free(patch); exit(1); + } + patch->offset = jit->cb.len - 8; // points at the imm64 in the buffer + patch->next = jit->patch_list; + jit->patch_list = patch; + } else { + emit_movabs_rax_imm64(&jit->cb, (uint64_t)addr); + } + emit8(&jit->cb, 0xFF); emit8(&jit->cb, 0xD0); // CALL RAX + + if (stack_args * 8 + padding > 0) { + emit8(&jit->cb,0x48); emit8(&jit->cb,0x81); emit8(&jit->cb,0xC4); + emit32(&jit->cb, (uint32_t)(stack_args*8 + padding)); + } + break; + } + + case EX_ADDR: + if (e->addr.expr->kind != EX_VAR) { + fprintf(stderr, "[JIT] &expr: only &var supported\n"); exit(1); + } + emit_lea_rax_rbp_disp(&jit->cb, get_var_offset(jit, e->addr.expr->name)); + break; + + case EX_DEREF: + gen_expr_jit(jit, e->deref.expr); + emit_rex(&jit->cb, RAX, RAX, 1); emit8(&jit->cb, 0x8B); emit_modrm(&jit->cb, 0, RAX, RAX); + break; + + case EX_STRING: { + char *s = strdup(e->string); + if (!s) { fprintf(stderr, "[JIT] strdup failed (string literal)\n"); exit(1); } + emit_movabs_rax_imm64(&jit->cb, (uint64_t)s); + break; + } + + case EX_INDEX: { + if (e->index.array->kind != EX_VAR) { + gen_expr_jit(jit, e->index.array); break; + } + _TY var_type = get_var_type(jit, e->index.array->name); + int element_size = (var_type.base == TY_CHAR) ? 1 : 8; + + if (var_type.ptr_level > 0) { + // Pointer indexing: load pointer, add index*element_size + int ptr_offset = get_var_offset(jit, e->index.array->name); + emit_mov_reg_mem64(&jit->cb, RBX, ptr_offset); // RBX = pointer + gen_expr_jit(jit, e->index.index); // RAX = index + if (element_size > 1) { + emit_mov_reg_reg(&jit->cb, RCX, RBX); // save pointer + emit_mov_reg_imm64(&jit->cb, RBX, element_size); + emit_imul_rax_rbx(&jit->cb); // RAX = byte offset + emit_mov_reg_reg(&jit->cb, RBX, RCX); // restore pointer + } + emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = &ptr[index] + if (element_size == 1) { + emit_rex(&jit->cb, RAX, RBX, 0); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB6); + emit_modrm(&jit->cb, 0, RAX, RBX); + } else { + emit_mov_reg_mem_reg(&jit->cb, RAX, RBX); + } + + } else if (var_type.array_size > 0) { + // Array indexing: compute RBP-relative address = array_offset - index*element_size + int array_offset = get_var_offset(jit, e->index.array->name); + gen_expr_jit(jit, e->index.index); // RAX = index + emit_mov_reg_imm64(&jit->cb, RBX, element_size); + emit_imul_rax_rbx(&jit->cb); // RAX = byte offset + emit_mov_reg_imm64(&jit->cb, RBX, array_offset); + emit_sub_reg_reg(&jit->cb, RBX, RAX); // RBX = array_offset - byte_offset + emit_mov_reg_reg(&jit->cb, RAX, RBX); + emit_mov_reg_reg(&jit->cb, RBX, RBP); + emit_add_reg_reg(&jit->cb, RAX, RBX); // RAX = &arr[index] + emit_mov_reg_reg(&jit->cb, RBX, RAX); + if (element_size == 1) { + emit_rex(&jit->cb, RAX, RBX, 0); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB6); + emit_modrm(&jit->cb, 0, RAX, RBX); + } else { + emit_mov_reg_mem_reg(&jit->cb, RAX, RBX); + } + + } else { + fprintf(stderr, "[JIT] Cannot index non-array/non-pointer '%s'\n", e->index.array->name); exit(1); + } + break; + } + + default: + fprintf(stderr, "[JIT] Unsupported expression kind %d\n", e->kind); exit(1); + } +} + +/* --- Compile one function --- */ + +static void *gen_function_jit(JIT *jit, _FN *f, size_t *out_size) { + reset_varmap(jit); + jit->current_func_name = f->name; + + int total_stack_size = calculate_stack_size(f->body); + cb_init(&jit->cb); + emit_prologue(&jit->cb, total_stack_size); + + const int param_regs[6] = {RDI, RSI, RDX, RCX, R8, R9}; + for (int i = 0; i < f->pac && i < 6; i++) { + add_var(jit, f->params[i], f->param_types[i]); + emit_mov_mem64_reg(&jit->cb, get_var_offset(jit, f->params[i]), param_regs[i]); + } + + int did_return = gen_stmt_jit(jit, f->body); + if (!did_return) { + emit_movabs_rax_imm64(&jit->cb, 0); + emit_epilogue(&jit->cb); + } + + size_t pagesz = (size_t)sysconf(_SC_PAGESIZE); + size_t alloc_size = ((jit->cb.len + pagesz - 1) / pagesz) * pagesz; + void *mem = mmap(NULL, alloc_size, PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { perror("mmap"); cb_free(&jit->cb); return NULL; } + memcpy(mem, jit->cb.buf, jit->cb.len); + if (out_size) *out_size = jit->cb.len; + set_func_addr(jit, f->name, mem, jit->cb.len, alloc_size); + cb_free(&jit->cb); + return mem; +} + +/* --- Patch forward calls after all functions are compiled --- */ + +static void patch_function_calls(JIT *jit) { + for (PatchEntry *patch = jit->patch_list; patch; patch = patch->next) { + void *target = NULL; + for (FuncMap *f = jit->func_list; f; f = f->next) + if (strcmp(f->name, patch->func_name) == 0 && f->addr) { target = f->addr; break; } + if (!target) { + fprintf(stderr, "[JIT] patch: function '%s' not found\n", patch->func_name); exit(1); + } + void *owner = NULL; + for (FuncMap *f = jit->func_list; f; f = f->next) + if (strcmp(f->name, patch->owning_func) == 0 && f->addr) { owner = f->addr; break; } + if (!owner) { + fprintf(stderr, "[JIT] patch: owning function '%s' not found\n", patch->owning_func); exit(1); + } + *(void **)((uint8_t *)owner + patch->offset) = target; + } +} + +/* --- Compile all functions and patch forward calls --- */ + +static void jit_compile_all(JIT *jit, _FN *fn_list) { + for (_FN *cur = fn_list; cur; cur = cur->n) + register_func(jit, cur->name); + + // Compile in reverse order so callees are typically compiled before callers + _FN *functions[64]; + int count = 0; + for (_FN *cur = fn_list; cur; cur = cur->n) { + if (count >= 64) { fprintf(stderr, "[JIT] Too many functions (max 64)\n"); exit(1); } + functions[count++] = cur; + } + for (int i = count-1; i >= 0; i--) + gen_function_jit(jit, functions[i], NULL); + + patch_function_calls(jit); +} + +/* --- Entry point --- */ + +static int jit_run(JIT *jit, int argc, char **argv) { + int (*main_func)(int, char **) = get_func_addr(jit, "main"); + fprintf(stderr, "[JIT] Running main (argc=%d argv=%p)\n", argc, (void*)argv); + return main_func(argc, argv); +} + +#endif
\ No newline at end of file diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..ca2b790 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,182 @@ + +#ifndef INCLUDE_lexerlexer +#define INCLUDE_lexerlexer + +#include "token.h" +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +typedef struct { + const char *buf; + int pos; + int line; + int col; +} _LX; + +/* Error reporting with line/column info */ +static void perror_at(_LX *lx, const char *msg) { + fprintf(stderr, "[LEXER] Error at line %d, column %d: %s\n", lx->line, lx->col, msg); + exit(1); +} + +#define sic static inline char +#define sii static inline int +#define siv static inline void +#define sit static inline _T + +sic lxpeek(_LX *lx) { return lx->buf[lx->pos]; } + +sic lxget(_LX *lx) { return lx->buf[lx->pos++]; } + +static inline char *lx_strdup_checked(_LX *lx, const char *s) { + char *p = strdup(s); + if (!p) { + perror_at(lx, "out of memory"); + } + return p; +} + +siv lxskipws(_LX *lx) { + while (lxpeek(lx) == ' ' || lxpeek(lx) == '\t') { + if (lxpeek(lx) == '\t') lx->col += 8; else lx->col++; + lxget(lx); + } +} + +sit lxnext(_LX *lx) { + lxskipws(lx); + char c = lxpeek(lx); + if (c == 0) + return (_T){TK_EOF, 0, NULL}; + + // Track newlines + if (c == '\n') { + lx->line++; + lx->col = 0; + lxget(lx); + return lxnext(lx); // recurse to get next token + } + + if (isalpha(c) || c == '_') { + int start = lx->pos; + while (isalnum(lxpeek(lx)) || lxpeek(lx) == '_') + lxget(lx); + int len = lx->pos - start; + char *text = strndup(lx->buf + start, len); + if (!text) { + perror_at(lx, "out of memory"); + } + return (_T){checkkw(text), 0, text}; + } + if (isdigit(c)) { + int start = lx->pos; + while (isdigit(lxpeek(lx))) + lxget(lx); + int len = lx->pos - start; + char *text = strndup(lx->buf + start, len); + if (!text) { + perror_at(lx, "out of memory"); + } + return (_T){TK_NUMBER, atoi(text), text}; + } + + lxget(lx); + lx->col++; + // handle multi-char tokens + if (c == '=') { + if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_EQ,0,lx_strdup_checked(lx,"==")}; } + return (_T){TK_ASSIGN, 0, lx_strdup_checked(lx,"=")}; + } + if (c == '!') { + if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_NE,0,lx_strdup_checked(lx,"!=")}; } + return (_T){TK_BANG,0,lx_strdup_checked(lx,"!")}; + } + if (c == '<') { + if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_LE,0,lx_strdup_checked(lx,"<=")}; } + if (lxpeek(lx) == '<') { lxget(lx); lx->col++; return (_T){TK_SHL,0,lx_strdup_checked(lx,"<<")}; } + return (_T){TK_LT,0,lx_strdup_checked(lx,"<")}; + } + if (c == '>') { + if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_GE,0,lx_strdup_checked(lx,">=")}; } + if (lxpeek(lx) == '>') { lxget(lx); lx->col++; return (_T){TK_SHR,0,lx_strdup_checked(lx,">>")}; } + return (_T){TK_GT,0,lx_strdup_checked(lx,">")}; + } + if (c == '&') { + if (lxpeek(lx) == '&') { lxget(lx); lx->col++; return (_T){TK_AND,0,lx_strdup_checked(lx,"&&")}; } + } + if (c == '|') { + if (lxpeek(lx) == '|') { lxget(lx); lx->col++; return (_T){TK_OR,0,lx_strdup_checked(lx,"||")}; } + } + if (c == '"') { + // String literal + int start = lx->pos; // pos is already after the opening quote + while (lxpeek(lx) != '"' && lxpeek(lx) != 0) { + if (lxpeek(lx) == '\\') { + lxget(lx); // consume backslash + if (lxpeek(lx) != 0) lxget(lx); // consume escaped char + } else { + lxget(lx); + } + } + if (lxpeek(lx) == '"') { + int len = lx->pos - start; // length of content + lxget(lx); // consume closing quote + char *text = strndup(lx->buf + start, len); // start at content + if (!text) { + perror_at(lx, "out of memory"); + } + return (_T){TK_STRING, 0, text}; + } else { + perror_at(lx, "unterminated string literal"); + } + } + switch (c) { + case '(': + return (_T){TK_LPAREN, 0, lx_strdup_checked(lx,"(")}; + case ')': + return (_T){TK_RPAREN, 0, lx_strdup_checked(lx,")")}; + case '{': + return (_T){TK_LBRACE, 0, lx_strdup_checked(lx,"{")}; + case '}': + return (_T){TK_RBRACE, 0, lx_strdup_checked(lx,"}")}; + case ';': + return (_T){TK_SEMI, 0, lx_strdup_checked(lx,";")}; + case '+': + return (_T){TK_PLUS, 0, lx_strdup_checked(lx,"+")}; + case '-': + return (_T){TK_MINUS, 0, lx_strdup_checked(lx,"-")}; + case '*': + return (_T){TK_STAR, 0, lx_strdup_checked(lx,"*")}; + case '/': + return (_T){TK_SLASH, 0, lx_strdup_checked(lx,"/")}; + case '&': + return (_T){TK_AMP, 0, lx_strdup_checked(lx,"&")}; + case '|': + return (_T){TK_BAR, 0, lx_strdup_checked(lx,"|")}; + case '^': + return (_T){TK_CARET, 0, lx_strdup_checked(lx,"^")}; + case '%': + return (_T){TK_PERCENT, 0, lx_strdup_checked(lx,"%")}; + case ',': + return (_T){TK_COMMA, 0, lx_strdup_checked(lx,",")}; + case '\'': + return (_T){TK_SQUOTE, 0, lx_strdup_checked(lx,"'")}; + case '"': + return (_T){TK_DQUOTE, 0, lx_strdup_checked(lx,"\"")}; + case '[': + return (_T){TK_LBRACKET, 0, lx_strdup_checked(lx,"[")}; + case ']': + return (_T){TK_RBRACKET, 0, lx_strdup_checked(lx,"]")}; + default: + return (_T){TK_INVALID, 0, NULL}; + } +} + +#undef sic +#undef sii +#undef siv +#undef sit + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..0f18359 --- /dev/null +++ b/src/main.c @@ -0,0 +1,289 @@ +#include "token.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#ifndef RELEASE +#define _CB_IMPLEMENTATION +#include <cbuild/cbuild.h> +static _CB_PROJECT *this = NULL; +#endif +#include "ast.h" +#include "codegen_jit.h" +#include "lexer.h" +#include "parser.h" + +static void indentf(int indent) { + for (int i = 0; i < indent; i++) + printf(" "); +} + +void print_expr(_EX *e, int indent) { + if (!e) { + indentf(indent); + printf("(null expr)\n"); + return; + } + + indentf(indent); + switch (e->kind) { + case EX_NUMBER: + printf("Number(%d)\n", e->value); + break; + case EX_VAR: + printf("Var(%s)\n", e->name); + break; + case EX_BINOP: + printf("BinOp(%s)\n", _TN[e->binop.op]); + indentf(indent + 1); + printf("Left:\n"); + print_expr(e->binop.l, indent + 2); + indentf(indent + 1); + printf("Right:\n"); + print_expr(e->binop.r, indent + 2); + break; + case EX_CALL: + printf("Call(%s) argc=%d\n", e->call.func_name, e->call.argc); + for (int i = 0; i < e->call.argc; i++) { + indentf(indent + 1); + printf("Arg[%d]:\n", i); + print_expr(e->call.args[i], indent + 2); + } + break; + case EX_ADDR: + printf("AddressOf(&)\n"); + indentf(indent + 1); + printf("Operand:\n"); + print_expr(e->addr.expr, indent + 2); + break; + case EX_DEREF: + printf("Dereference(*)\n"); + indentf(indent + 1); + printf("Operand:\n"); + print_expr(e->deref.expr, indent + 2); + break; + case EX_STRING: + printf("String(\"%s\")\n", e->string); + break; + case EX_INDEX: + printf("ArrayIndex([])\n"); + indentf(indent + 1); + printf("Array:\n"); + print_expr(e->index.array, indent + 2); + indentf(indent + 1); + printf("Index:\n"); + print_expr(e->index.index, indent + 2); + break; + default: + printf("UnknownExpr(kind=%d)\n", e->kind); + break; + } +} + +void print_stmt(_STN *s, int indent) { + if (!s) + return; + + indentf(indent); + switch (s->kind) { + case STK_RETURN: + printf("Return\n"); + if (s->return_expr) { + indentf(indent + 1); + printf("Value:\n"); + print_expr(s->return_expr, indent + 2); + } + break; + case STK_VAR_DECL: + printf("VarDecl(%s) type=[%s", s->var_decl.name, + tybase_name(s->var_decl.type.base)); + for (int i = 0; i < s->var_decl.type.ptr_level; i++) + printf("*"); + if (s->var_decl.type.array_size >= 0) { + if (s->var_decl.type.array_size == 0) { + printf("[]"); + } else { + printf("[%d]", s->var_decl.type.array_size); + } + } + printf("]\n"); + if (s->var_decl.init) { + indentf(indent + 1); + printf("Init:\n"); + print_expr(s->var_decl.init, indent + 2); + } + break; + case STK_ASSIGN: + printf("Assign\n"); + indentf(indent + 1); + printf("LHS:\n"); + print_expr(s->assign.lhs, indent + 2); + indentf(indent + 1); + printf("RHS:\n"); + print_expr(s->assign.expr, indent + 2); + break; + case STK_EXPR: + printf("ExprStmt\n"); + print_expr(s->expr, indent + 1); + break; + case STK_BLOCK: + printf("Block\n"); + print_stmt(s->body, indent + 1); + break; + case STK_IF: + printf("If\n"); + indentf(indent + 1); + printf("Condition:\n"); + print_expr(s->ifs.cond, indent + 2); + indentf(indent + 1); + printf("Then:\n"); + print_stmt(s->ifs.thenb, indent + 2); + if (s->ifs.elseb) { + indentf(indent + 1); + printf("Else:\n"); + print_stmt(s->ifs.elseb, indent + 2); + } + break; + case STK_WHILE: + printf("While\n"); + indentf(indent + 1); + printf("Condition:\n"); + print_expr(s->whl.cond, indent + 2); + indentf(indent + 1); + printf("Body:\n"); + print_stmt(s->whl.body, indent + 2); + break; + case STK_FOR: + printf("For\n"); + if (s->fr.init) { + indentf(indent + 1); + printf("Init:\n"); + print_stmt(s->fr.init, indent + 2); + } + if (s->fr.cond) { + indentf(indent + 1); + printf("Condition:\n"); + print_expr(s->fr.cond, indent + 2); + } + if (s->fr.step) { + indentf(indent + 1); + printf("Step:\n"); + print_stmt(s->fr.step, indent + 2); + } + indentf(indent + 1); + printf("Body:\n"); + print_stmt(s->fr.body, indent + 2); + break; + default: + printf("UnknownStmt(kind=%d)\n", s->kind); + break; + } + + if (s->n) + print_stmt(s->n, indent); +} + +void print_func(_FN *f, int indent) { + if (!f) + return; + + indentf(indent); + printf("Function(%s) params=[", f->name); + for (int i = 0; i < f->pac; i++) { + printf("%s", tybase_name(f->param_types[i].base)); + for (int j = 0; j < f->param_types[i].ptr_level; j++) + printf("*"); + if (f->param_types[i].array_size >= 0) { + if (f->param_types[i].array_size == 0) { + printf("[]"); + } else { + printf("[%d]", f->param_types[i].array_size); + } + } + printf(" %s", f->params[i]); + if (i < f->pac - 1) + printf(", "); + } + printf("]\n"); + + indentf(indent + 1); + printf("Body:\n"); + print_stmt(f->body, indent + 2); + + if (f->n) + print_func(f->n, indent); +} + +int main(int argc, char *argv[]) { +#ifndef RELEASE + _CB_CREATE_PROJECT(this, .name = "ccdjit", .build_type = BUILD_EXEC, + .files = CB_STRLIST("src/main.c"), .is_rebuild = 1, + .output = "bin/ccdjit"); + _CB_PROJECT_BUILD(.projects = CB_PROJECT_LIST(this), .run = 1); +#endif + if (argc == 1) { + fprintf(stderr, "Usage: %s [file.c] [-- [args]] (optional)\n", argv[0]); + fprintf(stderr, "%s add2.c -- 1 2\n", argv[0]); + return 1; + } + const char *filename = argv[1]; + int _in_arg_c = 0; + char **_in_arg_v = NULL; + if (argc > 1) { + for (int i = 2; i < argc; i++) { + if (strcmp(argv[i], "--") == 0) { + _in_arg_c = i + 1; + _in_arg_v = &argv[i + 1]; + break; + } + } + } + + printf("Input File Name: %s\n\tInput Args:\n", filename); + for (int i = 0; i < _in_arg_c; i++) + printf("\t%s\n", _in_arg_v[i]); + + FILE *file = fopen(filename, "r"); + if (file == NULL) { + fprintf(stderr, "Failed to open file %s\n", filename); + return 1; + } + fseek(file, 0, SEEK_END); + size_t file_size = ftell(file); + fseek(file, 0, SEEK_SET); + char *src = malloc(file_size + 1); + if (src == NULL) { + fprintf(stderr, "Failed to allocate memory for file contents\n"); + fclose(file); + return 1; + } + fread(src, file_size, 1, file); + fclose(file); + src[file_size] = '\0'; + + _LX lx = {src, 0, 1, 0}; // line=1, col=0 + _FN *ast = parse_program(&lx); + _FN *clean_ast = fnlist_prepare(ast); + +#ifndef RELEASE + print_func(clean_ast, 0); +#endif /* ifndef RELEASE */ + JIT jit; + jit_init(&jit); + + jit_compile_all(&jit, clean_ast); + + int result = jit_run(&jit, argc, argv); + +#ifndef RELEASE + printf("JIT returned: %d\n", result); +#endif /* ifndef RELEASE */ + jit_free(&jit); + fn_free(clean_ast); + free(src); + +#ifndef RELEASE + return 0; +#else + return result; +#endif +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..154e0ba --- /dev/null +++ b/src/parser.h @@ -0,0 +1,595 @@ +/* Parser: recursive-descent front end that builds an AST `_FN` list + * from the token stream, handling expressions, statements, and functions. */ +#ifndef INCLUDE_parser +#define INCLUDE_parser + +#include "ast.h" +#include "lexer.h" +#include "token.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* Parser state */ +typedef struct { + _LX *lx; + _T cur; +} _P; + +/* Error reporting with line/column info */ +static void perror_expected(_LX *lx, const char *expected, const char *got) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: expected %s, got %s\n", + lx->line, lx->col, expected, got); + exit(1); +} + +static void perror_unexpected(_LX *lx, const char *context, const char *got) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: unexpected %s in %s\n", + lx->line, lx->col, got, context); + exit(1); +} + +static void pnext(_P *p) { + if (p->cur.lxem) { + free(p->cur.lxem); + p->cur.lxem = NULL; + } + p->cur = lxnext(p->lx); +} + +/* Expect a particular token kind; on mismatch print helpful error and exit */ +static void pexpect(_P *p, _TK tk) { + if (p->cur.kind != tk) { + const char *got = (p->cur.kind < TK__COUNT) ? _TN[p->cur.kind] : "<?>"; + perror_expected(p->lx, _TN[tk], got); + } +} + +static _STN *pstmt(_P *p); // forward +static _STN *pblock(_P *p) { + pexpect(p, TK_LBRACE); + pnext(p); /* consume '{' */ + + _STN *head = NULL; + _STN **cur = &head; + + while (p->cur.kind != TK_RBRACE && p->cur.kind != TK_EOF) { + _STN *stmt = pstmt(p); + *cur = stmt; + cur = &stmt->n; + } + + pexpect(p, TK_RBRACE); + pnext(p); /* consume '}' */ + + return st_block(head); +} +static _EX *pexpr(_P *p); // forward +static _EX *pterm(_P *p); // forward +static _EX *punary(_P *p); // forward +static _EX *pfact(_P *p); // fwd + +/* Precedence climbing layers */ +static _EX *pmul(_P *p); +static _EX *padd(_P *p); +static _EX *pshift(_P *p); +static _EX *pbitand(_P *p); +static _EX *pbitxor(_P *p); +static _EX *pbitor(_P *p); +static _EX *prel(_P *p); +static _EX *peq(_P *p); +static _EX *plogand(_P *p); +static _EX *plogor(_P *p); + +static _STN *passign_or_expr_stmt(_P *p) { + _EX *lhs = pexpr(p); + if (p->cur.kind == TK_ASSIGN) { + if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target in expression context\n", p->lx->line, p->lx->col); + exit(1); + } + pnext(p); + _EX *rhs = pexpr(p); + return st_assign(lhs, rhs); + } + return st_expr(lhs); +} + +static _EX *ppostfix_from_expr(_P *p, _EX *e) { + // Handle array indexing: expr[expr] + while (p->cur.kind == TK_LBRACKET) { + pnext(p); // consume '[' + _EX *index = pexpr(p); + pexpect(p, TK_RBRACKET); + pnext(p); // consume ']' + e = ex_index(e, index); + } + + return e; +} + +static _EX *ppostfix(_P *p) { + _EX *e = pfact(p); + return ppostfix_from_expr(p, e); +} + +/* ---- FACTOR ---- */ +static _EX *pfact(_P *p) { + if (p->cur.kind == TK_NUMBER) { + _EX *n = ex_number(p->cur.val); + pnext(p); + return n; + + } else if (p->cur.kind == TK_IDENT) { + char *name = strdup(p->cur.lxem); + if (!name) { + fprintf(stderr, "[PARSER] Error: strdup failed for identifier\n"); + exit(1); + } + pnext(p); /* consume identifier */ + + if (p->cur.kind == TK_LPAREN) { + /* function call */ + pnext(p); /* consume '(' */ + + _EX **args = NULL; + int argc = 0; + + if (p->cur.kind != TK_RPAREN) { + do { + args = realloc(args, sizeof(_EX *) * (argc + 1)); + if (!args) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function arguments\n", p->lx->line, p->lx->col); + exit(1); + } + args[argc++] = pexpr(p); + + if (p->cur.kind == TK_COMMA) { + pnext(p); /* skip comma */ + } else { + break; + } + } while (1); + } + + pexpect(p, TK_RPAREN); + pnext(p); /* consume ')' */ + + return ex_call(name, args, argc); + } + + _EX *var_expr = ex_var(name); + return ppostfix_from_expr(p, var_expr); + + } else if (p->cur.kind == TK_LPAREN) { + pnext(p); /* consume '(' */ + _EX *n = pexpr(p); + pexpect(p, TK_RPAREN); + pnext(p); /* consume ')' */ + return n; + + } else if (p->cur.kind == TK_STRING) { + char *str = strdup(p->cur.lxem); + if (!str) { + fprintf(stderr, "[PARSER] Error: strdup failed for string literal\n"); + exit(1); + } + pnext(p); + return ex_string(str); + + } else { + perror_unexpected(p->lx, "factor", _TN[p->cur.kind]); + } +} + +/* ---- TERM ---- */ +static _EX *pmul(_P *p) { + _EX *n = punary(p); + while (p->cur.kind == TK_STAR || p->cur.kind == TK_SLASH || p->cur.kind == TK_PERCENT) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = punary(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *padd(_P *p) { + _EX *n = pmul(p); + while (p->cur.kind == TK_PLUS || p->cur.kind == TK_MINUS) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pmul(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pshift(_P *p) { + _EX *n = padd(p); + while (p->cur.kind == TK_SHL || p->cur.kind == TK_SHR) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = padd(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pbitand(_P *p) { + _EX *n = peq(p); + while (p->cur.kind == TK_AMP) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = peq(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pbitxor(_P *p) { + _EX *n = pbitand(p); + while (p->cur.kind == TK_CARET) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pbitand(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *pbitor(_P *p) { + _EX *n = pbitxor(p); + while (p->cur.kind == TK_BAR) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pbitxor(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *prel(_P *p) { + _EX *n = pshift(p); + while (p->cur.kind == TK_LT || p->cur.kind == TK_LE || p->cur.kind == TK_GT || p->cur.kind == TK_GE) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = pshift(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *peq(_P *p) { + _EX *n = prel(p); + while (p->cur.kind == TK_EQ || p->cur.kind == TK_NE) { + _TK op = p->cur.kind; + pnext(p); + _EX *r = prel(p); + n = ex_binop(n, op, r); + } + return n; +} + +static _EX *plogand(_P *p) { + _EX *n = pbitor(p); + while (p->cur.kind == TK_AND) { + pnext(p); + _EX *r = pbitor(p); + // keep as a binary op node TK_AND; codegen will short-circuit + n = ex_binop(n, TK_AND, r); + } + return n; +} + +static _EX *plogor(_P *p) { + _EX *n = plogand(p); + while (p->cur.kind == TK_OR) { + pnext(p); + _EX *r = plogand(p); + n = ex_binop(n, TK_OR, r); + } + return n; +} + +/* ---- EXPR ---- */ +static _EX *pexpr(_P *p) { return plogor(p); } + +/* ---- UNARY ---- */ +static _EX *punary(_P *p) { + if (p->cur.kind == TK_AMP) { // &expr + pnext(p); + _EX *sub = punary(p); + NEW_EX(EX_ADDR); + e->addr.expr = sub; + return e; + } + if (p->cur.kind == TK_STAR) { // *expr + pnext(p); + _EX *sub = punary(p); + NEW_EX(EX_DEREF); + e->deref.expr = sub; + return e; + } + if (p->cur.kind == TK_BANG) { // !expr -> (expr == 0) + pnext(p); + _EX *sub = punary(p); + _EX *zero = ex_number(0); + return ex_binop(sub, TK_EQ, zero); + } + return ppostfix(p); +} +char parse_char_literal(_P *p) { + /* assume current token is TK_SQUOTE */ + pnext(p); // consume opening ' + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "character literal", _TN[p->cur.kind]); + } + char c = p->cur.lxem[0]; + pnext(p); // consume char + pexpect(p, TK_SQUOTE); + pnext(p); // consume closing ' + return c; +} +_EX *ex_charlit(char c) { + _EX *e = malloc(sizeof(_EX)); + if (!e) { + fprintf(stderr, "[PARSER] Error: malloc failed in ex_charlit\n"); + exit(1); + } + e->kind = EX_NUMBER; + e->value = c; + return e; +} + +static _STN *pstmt(_P *p) { + switch (p->cur.kind) { + case TK_IF: { + pnext(p); // consume if + pexpect(p, TK_LPAREN); pnext(p); + _EX *cond = pexpr(p); + pexpect(p, TK_RPAREN); pnext(p); + _STN *thenb = pstmt(p); + _STN *elseb = NULL; + if (p->cur.kind == TK_ELSE) { pnext(p); elseb = pstmt(p); } + return st_if(cond, thenb, elseb); + } + case TK_WHILE: { + pnext(p); + pexpect(p, TK_LPAREN); pnext(p); + _EX *cond = pexpr(p); + pexpect(p, TK_RPAREN); pnext(p); + _STN *body = pstmt(p); + return st_while(cond, body); + } + case TK_FOR: { + pnext(p); + pexpect(p, TK_LPAREN); pnext(p); + _STN *init = NULL; _EX *cond = NULL; _STN *step = NULL; + if (p->cur.kind != TK_SEMI) { + init = pstmt(p); + } else { pnext(p); } + if (p->cur.kind != TK_SEMI) { + cond = pexpr(p); + } + pexpect(p, TK_SEMI); pnext(p); + if (p->cur.kind != TK_RPAREN) { + step = passign_or_expr_stmt(p); + } + pexpect(p, TK_RPAREN); pnext(p); + _STN *body = pstmt(p); + return st_for(init, cond, step, body); + } + case TK_INT: + case TK_CHAR: { + _TY vtype = { .base = (p->cur.kind == TK_INT) ? TY_INT : TY_CHAR, .ptr_level = 0, .array_size = -1 }; + pnext(p); /* consume type */ + while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); } + + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "variable name after type", _TN[p->cur.kind]); + } + char *name = strdup(p->cur.lxem); + if (!name) { + fprintf(stderr, "[PARSER] Error: strdup failed for variable name\n"); + exit(1); + } + pnext(p); + + // Parse array size: [N] or [] + if (p->cur.kind == TK_LBRACKET) { + pnext(p); // consume '[' + if (p->cur.kind == TK_NUMBER) { + vtype.array_size = p->cur.val; + pnext(p); // consume number + } else { + vtype.array_size = 0; // unknown size [] + } + pexpect(p, TK_RBRACKET); + pnext(p); // consume ']' + } + + _EX *init = NULL; + if (p->cur.kind == TK_ASSIGN) { + pnext(p); + + if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_SQUOTE) { + /* parse char literal */ + char c = parse_char_literal(p); // implement this to consume quotes and return char + init = ex_charlit(c); + } else { + init = pexpr(p); + } + } + + pexpect(p, TK_SEMI); + pnext(p); + + _STN *decl = st_var_decl(name, init); + decl->var_decl.type = vtype; + return decl; + } + + case TK_IDENT: { + _EX *lhs_or_call = pfact(p); /* starts from ident path */ + if (p->cur.kind == TK_ASSIGN) { + pnext(p); + _EX *rhs = pexpr(p); + pexpect(p, TK_SEMI); + pnext(p); + return st_assign(lhs_or_call, rhs); + } + pexpect(p, TK_SEMI); + pnext(p); + return st_expr(lhs_or_call); + } + case TK_RETURN: { + pnext(p); /* consume 'return' */ + _EX *expr = pexpr(p); + pexpect(p, TK_SEMI); + pnext(p); /* consume ';' */ + + return st_return(expr); + } + case TK_LBRACE: { + /* block statement */ + return pblock(p); /* pblock will consume the braces */ + } + default: { + /* General expression or assignment starting with unary, paren, etc. */ + _EX *lhs = pexpr(p); + if (p->cur.kind == TK_ASSIGN) { + /* only allow assignment to var or *expr */ + if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target - only variables, dereferenced expressions, and array indexing allowed\n", p->lx->line, p->lx->col); + exit(1); + } + pnext(p); + _EX *rhs = pexpr(p); + pexpect(p, TK_SEMI); + pnext(p); + return st_assign(lhs, rhs); + } + pexpect(p, TK_SEMI); + pnext(p); + return st_expr(lhs); + } + } +} + +static _FN *pfunc(_P *p) { + pexpect(p, TK_INT); + pnext(p); /* consume 'int' */ + + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "function name after 'int'", _TN[p->cur.kind]); + } + char *name = NULL; + if (p->cur.lxem) + name = strdup(p->cur.lxem); + if (p->cur.lxem && !name) { + fprintf(stderr, "[PARSER] Error: strdup failed for function name\n"); + exit(1); + } + pnext(p); /* consume function name */ + + /* expect '(' then consume it */ + pexpect(p, TK_LPAREN); + pnext(p); /* consume '(' */ + + /* parse optional parameter list */ + char **params = NULL; + _TY *params_types = NULL; + int pac = 0; + + if (p->cur.kind != TK_RPAREN) { + /* at least one parameter expected */ + while (1) { + /* first token should be a type, e.g. int */ + _TY vtype; + if (p->cur.kind == TK_INT) + vtype = (_TY){.base=TY_INT,.ptr_level=0,.array_size=-1}; + else if (p->cur.kind == TK_CHAR) + vtype = (_TY){.base=TY_CHAR,.ptr_level=0,.array_size=-1}; + else { + perror_expected(p->lx, "type in parameter list", _TN[p->cur.kind]); + } + pnext(p); /* consume type */ + while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); } + + /* next should be an identifier (variable name) */ + if (p->cur.kind != TK_IDENT) { + perror_expected(p->lx, "identifier after type in parameter list", _TN[p->cur.kind]); + } + + // Parse array size for parameters after the identifier + char *param_name = strdup(p->cur.lxem); + if (!param_name) { + fprintf(stderr, "[PARSER] Error: strdup failed for parameter name\n"); + exit(1); + } + pnext(p); // consume identifier + if (p->cur.kind == TK_LBRACKET) { + pnext(p); // consume '[' + if (p->cur.kind == TK_NUMBER) { + vtype.array_size = p->cur.val; + pnext(p); // consume number + } else { + vtype.array_size = 0; // unknown size [] + } + pexpect(p, TK_RBRACKET); + pnext(p); // consume ']' + } + + char **new_params = (char **)realloc(params, sizeof(char *) * (pac + 1)); + _TY *new_param_types = (_TY *)realloc(params_types, sizeof(_TY) * (pac + 1)); + if (!new_params || !new_param_types) { + fprintf(stderr, "[PARSER] Error at line %d, column %d: out of memory while parsing function parameters\n", p->lx->line, p->lx->col); + free(new_params); + free(new_param_types); + exit(1); + } + params = new_params; + params_types = new_param_types; + params[pac] = param_name; + params_types[pac] = vtype; + pac++; + + if (p->cur.kind == TK_COMMA) { + pnext(p); /* consume comma and continue */ + continue; + } else { + break; + } + } + } + + pexpect(p, TK_RPAREN); + pnext(p); /* consume ')' */ + + _STN *body = pblock(p); /* pblock consumes the block braces and returns */ + + return fn_new(name, params, params_types, pac, body); +} + +static _FN *parse_program(_LX *lx) { + _P pstate = {.lx = lx, .cur = lxnext(lx)}; + + _FN *head = NULL; + _FN **cur = &head; + + while (pstate.cur.kind != TK_EOF) { + _FN *f = pfunc(&pstate); + *cur = f; + cur = &f->n; + } + + if (pstate.cur.lxem) { + free(pstate.cur.lxem); + pstate.cur.lxem = NULL; + } + + return head; +} + +#endif /* INCLUDE_parser */ diff --git a/src/token.h b/src/token.h new file mode 100644 index 0000000..e99294e --- /dev/null +++ b/src/token.h @@ -0,0 +1,104 @@ +#ifndef INCLUDE_token +#define INCLUDE_token + +#include <string.h> + +/* Token and type definitions shared by lexer, parser, and JIT. + * The type system uses a base kind plus pointer/array decorations. */ +typedef enum { + TY_INT = 0, + TY_CHAR = 1, + TY_BOOL = 2, +} _TYBASE; + +typedef struct { + _TYBASE base; /* TY_INT, TY_CHAR */ + int ptr_level; /* 0 for non-pointer, 1 for T*, 2 for T**, ... */ + int array_size; /* -1 for non-array, 0 for unknown size [], >0 for fixed size [N] */ +} _TY; + +static inline const char *tybase_name(_TYBASE b) { + switch (b) { + case TY_INT: return "int"; + case TY_CHAR: return "char"; + case TY_BOOL: return "bool"; + default: return "?"; + } +} + +#define _TL(_) \ + _(TK_BEGIN, "begin") \ + _(TK_INT, "int") \ + _(TK_CHAR, "char") \ + _(TK_ASSIGN, "=") \ + _(TK_EQ, "==") \ + _(TK_NE, "!=") \ + _(TK_LT, "<") \ + _(TK_LE, "<=") \ + _(TK_GT, ">") \ + _(TK_GE, ">=") \ + _(TK_RETURN, "return") \ + _(TK_IF, "if") \ + _(TK_ELSE, "else") \ + _(TK_FOR, "for") \ + _(TK_WHILE, "while") \ + _(TK_BOOL, "bool") \ + _(TK_IDENT, "ident") \ + _(TK_NUMBER, "number") \ + _(TK_CHARLIT, "charlit") \ + _(TK_LPAREN, "(") \ + _(TK_RPAREN, ")") \ + _(TK_LBRACE, "{") \ + _(TK_RBRACE, "}") \ + _(TK_SEMI, ";") \ + _(TK_PLUS, "+") \ + _(TK_MINUS, "-") \ + _(TK_STAR, "*") \ + _(TK_SLASH, "/") \ + _(TK_PERCENT, "%") \ + _(TK_AMP, "&") \ + _(TK_AND, "&&") \ + _(TK_BAR, "|") \ + _(TK_OR, "||") \ + _(TK_CARET, "^") \ + _(TK_SHL, "<<") \ + _(TK_SHR, ">>") \ + _(TK_BANG, "!") \ + _(TK_SQUOTE, "'") \ + _(TK_DQUOTE, "\"") \ + _(TK_LBRACKET, "[") \ + _(TK_RBRACKET, "]") \ + _(TK_STRING, "string") \ + _(TK_EOF, "eof") \ + _(TK_COMMA, ",") \ + _(TK_INVALID, "invalid") \ + _(TK__COUNT, "<count>") + +typedef enum { +#define MAKE_ENUM(_, s) _, + _TL(MAKE_ENUM) +#undef MAKE_ENUM +} _TK; + +static const char *_TN[] = { +#define MAKE_STR(_, s) s, + _TL(MAKE_STR) +#undef MAKE_STR +}; + +typedef struct { + _TK kind; + int val; // only valid if kind == TK_NUMBER + char *lxem; // malloc’d lexeme string, or NULL +} _T; + +static _TK checkkw(const char *kw) { +#define _(k, s) \ + if (strcmp(kw, s) == 0) \ + return k; + _TL(_) +#undef _ + return TK_IDENT; +} + +#endif /* INCLUDE_token */ |
