summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Moc <personal@cdatgoose.org>2026-03-08 15:02:30 +0100
committerDavid Moc <personal@cdatgoose.org>2026-03-08 15:02:30 +0100
commitad035ac5d942c6448a6a0464b995c2868a8378db (patch)
tree9c2c779d544a45b4234a6a3c311aa7b612ea975e
parent0385817bb1301a778bb33f8405a435293b9f8905 (diff)
Gosh.\nFixed bugs in offsetting. Added syscalls. Cleaned up the previous commenting (used to pass the project thu claude to add comments but LLMs are dumb). Removed the LLM made test runner cuz fuck AI.HEADmaster
-rw-r--r--src/ast.h92
-rw-r--r--src/codegen_jit.h853
-rw-r--r--src/lexer.h117
-rw-r--r--src/main.c5
-rw-r--r--src/parser.h472
-rw-r--r--src/token.h42
-rwxr-xr-xtest_runner.sh334
-rw-r--r--tests/getc.c131
-rw-r--r--tests/new_arrays.c31
-rw-r--r--tests/printf.c164
-rw-r--r--tests/syscalls.c30
11 files changed, 1683 insertions, 588 deletions
diff --git a/src/ast.h b/src/ast.h
index 6c326cc..5cfe5d6 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -7,7 +7,7 @@
#include <stdio.h>
-typedef enum { EX_NUMBER, EX_VAR, EX_BINOP, EX_CALL, EX_ADDR, EX_DEREF, EX_STRING, EX_INDEX } _EK;
+typedef enum { EX_NUMBER, EX_VAR, EX_BINOP, EX_CALL, EX_ADDR, EX_DEREF, EX_STRING, EX_INDEX, EX_TERNARY, EX_CAST } _EK;
typedef struct _EX {
_EK kind;
@@ -35,11 +35,20 @@ typedef struct _EX {
struct _EX *array; // array expression
struct _EX *index; // index expression
} index;
+ struct { // EX_TERNARY: cond ? then : else
+ struct _EX *cond;
+ struct _EX *then_expr;
+ struct _EX *else_expr;
+ } ternary;
+ struct { // EX_CAST: (type)expr
+ _TY to;
+ struct _EX *expr;
+ } cast;
};
} _EX;
-typedef enum { STK_RETURN, STK_VAR_DECL, STK_ASSIGN, STK_EXPR, STK_BLOCK, STK_IF, STK_WHILE, STK_FOR } _STK;
+typedef enum { STK_RETURN, STK_VAR_DECL, STK_ASSIGN, STK_EXPR, STK_BLOCK, STK_IF, STK_WHILE, STK_FOR, STK_DOWHILE, STK_BREAK, STK_CONTINUE, STK_GLOBAL } _STK;
typedef struct _STN {
_STK kind;
@@ -71,6 +80,16 @@ typedef struct _STN {
struct _STN *step; // may be NULL (an expr stmt)
struct _STN *body;
} fr;
+ struct { // STK_DOWHILE
+ struct _STN *body;
+ _EX *cond;
+ } dowhl;
+ /* STK_BREAK and STK_CONTINUE carry no payload */
+ struct { // STK_GLOBAL: a global variable declaration
+ char *name;
+ _EX *init; /* may be NULL; must be constant (EX_NUMBER/EX_STRING) */
+ _TY type;
+ } global;
};
struct _STN *n; // linked list
} _STN;
@@ -83,6 +102,7 @@ typedef struct _FN {
_TY *param_types;
int pac;
_STN *body;
+ _TY ret_type; /* return type of this function */
struct _FN *n;
} _FN;
@@ -180,7 +200,6 @@ _FN *fnlist_prepare(_FN *head) {
-/* Generic alloc macros */
#define NEW_EX(k) \
_EX *e = (_EX *)calloc(1, sizeof(_EX)); \
e->kind = k
@@ -189,7 +208,6 @@ _FN *fnlist_prepare(_FN *head) {
s->kind = k
#define NEW_FN() _FN *f = (_FN *)calloc(1, sizeof(_FN))
-/* Constructor declaration macros */
#define DEFINE_EX_CONSTRUCTOR(name, kind, ...) \
static inline _EX *ex_##name(__VA_ARGS__)
@@ -299,15 +317,60 @@ DEFINE_ST_CONSTRUCTOR(for, STK_FOR, _STN *init, _EX *cond, _STN *step, _STN *bod
return s;
}
+DEFINE_ST_CONSTRUCTOR(dowhile, STK_DOWHILE, _STN *body, _EX *cond) {
+ NEW_ST(STK_DOWHILE);
+ s->dowhl.body = body;
+ s->dowhl.cond = cond;
+ return s;
+}
+
+DEFINE_ST_CONSTRUCTOR(break, STK_BREAK) {
+ NEW_ST(STK_BREAK);
+ return s;
+}
+
+DEFINE_ST_CONSTRUCTOR(continue, STK_CONTINUE) {
+ NEW_ST(STK_CONTINUE);
+ return s;
+}
+
+DEFINE_ST_CONSTRUCTOR(global, STK_GLOBAL, char *name, _TY type, _EX *init) {
+ NEW_ST(STK_GLOBAL);
+ s->global.name = name;
+ s->global.type = type;
+ s->global.init = init;
+ return s;
+}
+
+static inline _EX *ex_ternary(_EX *cond, _EX *then_expr, _EX *else_expr) {
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[AST] OOM\n"); exit(1); }
+ e->kind = EX_TERNARY;
+ e->ternary.cond = cond;
+ e->ternary.then_expr = then_expr;
+ e->ternary.else_expr = else_expr;
+ return e;
+}
+
+static inline _EX *ex_cast(_TY to, _EX *expr) {
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[AST] OOM\n"); exit(1); }
+ e->kind = EX_CAST;
+ e->cast.to = to;
+ e->cast.expr = expr;
+ return e;
+}
-DEFINE_FN_CONSTRUCTOR(new, char *name, char **params, _TY* params_types, int pac, _STN *body) {
+
+DEFINE_FN_CONSTRUCTOR(new, char *name, char **params, _TY* params_types, int pac, _STN *body, _TY ret_type) {
NEW_FN();
f->name = name;
f->params = params;
f->param_types = params_types;
f->pac = pac;
f->body = body;
+ f->ret_type = ret_type;
return f;
}
@@ -344,6 +407,14 @@ static inline void ex_free(_EX *e) {
ex_free(e->index.array);
ex_free(e->index.index);
break;
+ case EX_TERNARY:
+ ex_free(e->ternary.cond);
+ ex_free(e->ternary.then_expr);
+ ex_free(e->ternary.else_expr);
+ break;
+ case EX_CAST:
+ ex_free(e->cast.expr);
+ break;
}
free(e);
}
@@ -383,6 +454,17 @@ static inline void st_free(_STN *s) {
st_free(s->fr.step);
st_free(s->fr.body);
break;
+ case STK_DOWHILE:
+ st_free(s->dowhl.body);
+ ex_free(s->dowhl.cond);
+ break;
+ case STK_BREAK:
+ case STK_CONTINUE:
+ break;
+ case STK_GLOBAL:
+ free(s->global.name);
+ ex_free(s->global.init);
+ break;
}
_STN *next = s->n;
free(s);
diff --git a/src/codegen_jit.h b/src/codegen_jit.h
index ff1d687..2cd4aa2 100644
--- a/src/codegen_jit.h
+++ b/src/codegen_jit.h
@@ -32,6 +32,7 @@ typedef struct FuncMap {
void *addr;
size_t size;
size_t alloc_size;
+ _TY ret_type; /* return type of this function */
struct FuncMap *next;
} FuncMap;
@@ -49,6 +50,14 @@ typedef struct VarMap {
struct VarMap *next;
} VarMap;
+typedef struct GlobalVar {
+ char *name;
+ uint8_t *addr; /* pointer into globals_buf */
+ _TY type;
+ int size; /* total bytes allocated */
+ struct GlobalVar *next;
+} GlobalVar;
+
typedef struct {
FuncMap *func_list;
VarMap *var_list;
@@ -56,6 +65,17 @@ typedef struct {
CodeBuf cb;
char *current_func_name;
PatchEntry *patch_list;
+ /* Globals data segment */
+ uint8_t *globals_buf; /* mmap'd RW page(s) for global variables */
+ size_t globals_cap;
+ size_t globals_used;
+ GlobalVar *global_list;
+ /* Break/continue patch stacks (up to 64 nesting levels) */
+ size_t break_patches[64][256]; /* offsets to patch */
+ int break_patch_count[64];
+ size_t cont_patches[64][256];
+ int cont_patch_count[64];
+ int loop_depth;
} JIT;
static void jit_init(JIT *jit) {
@@ -66,6 +86,19 @@ static void jit_init(JIT *jit) {
jit->cb.len = jit->cb.cap = 0;
jit->current_func_name = NULL;
jit->patch_list = NULL;
+ jit->globals_cap = 1024 * 1024;
+ jit->globals_used = 0;
+ jit->globals_buf = (uint8_t *)mmap(NULL, jit->globals_cap,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (jit->globals_buf == MAP_FAILED) {
+ perror("mmap globals"); exit(1);
+ }
+ memset(jit->globals_buf, 0, jit->globals_cap);
+ jit->global_list = NULL;
+ jit->loop_depth = 0;
+ memset(jit->break_patch_count, 0, sizeof(jit->break_patch_count));
+ memset(jit->cont_patch_count, 0, sizeof(jit->cont_patch_count));
}
static void jit_free(JIT *jit) {
@@ -79,6 +112,14 @@ static void jit_free(JIT *jit) {
}
jit->patch_list = NULL;
+ for (GlobalVar *g = jit->global_list; g;) {
+ GlobalVar *n = g->next; free(g->name); free(g); g = n;
+ }
+ jit->global_list = NULL;
+
+ if (jit->globals_buf && jit->globals_buf != MAP_FAILED)
+ munmap(jit->globals_buf, jit->globals_cap);
+
for (FuncMap *f = jit->func_list; f;) {
FuncMap *n = f->next;
if (f->addr && f->alloc_size > 0) munmap(f->addr, f->alloc_size);
@@ -91,8 +132,6 @@ static void jit_free(JIT *jit) {
jit->cb.len = jit->cb.cap = 0;
}
-/* --- Code buffer --- */
-
static void cb_init(CodeBuf *c) {
c->cap = 1024; c->len = 0;
c->buf = (uint8_t *)malloc(c->cap);
@@ -114,8 +153,6 @@ static void emit32(CodeBuf *c, uint32_t v) { cb_grow(c,4); memcpy(c->buf+c->
static void emit64(CodeBuf *c, uint64_t v) { cb_grow(c,8); memcpy(c->buf+c->len,&v,8); c->len+=8; }
static void emitN(CodeBuf *c, const void *p, size_t n) { cb_grow(c,n); memcpy(c->buf+c->len,p,n); c->len+=n; }
-/* --- x86-64 encoding helpers --- */
-
static void emit_rex(CodeBuf *c, int reg, int rm, int w) {
uint8_t rex = 0x40;
if (w) rex |= 0x08;
@@ -145,9 +182,18 @@ static void emit_mov_reg_mem_reg(CodeBuf *c, int dst, int base) {
static void emit_mov_mem_reg_reg(CodeBuf *c, int base, int src) {
emit_rex(c,src,base,1); emit8(c,0x89); emit_modrm(c,0,src,base);
}
+static void emit_mov_mem8_reg_reg(CodeBuf *c, int base) {
+ if (base & 8) emit8(c, 0x41); /* REX.B for extended base registers */
+ emit8(c, 0x88); emit_modrm(c, 0, RAX, base);
+}
static void emit_movzx_rax_mem8(CodeBuf *c, int disp32) {
emit8(c,0x48); emit8(c,0x0F); emit8(c,0xB6); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
}
+static void emit_movzx_rax_mem8_base(CodeBuf *c, int base) {
+ if (base & 8) emit8(c, 0x49); else emit8(c, 0x48);
+ emit8(c, 0x0F); emit8(c, 0xB6);
+ emit_modrm(c, 0, RAX, base);
+}
static void emit_mov_mem8_rax(CodeBuf *c, int disp32) {
emit_rex(c,RAX,RBP,0); emit8(c,0x88); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
}
@@ -174,8 +220,8 @@ static void emit_add_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x01,0xD8},
static void emit_imul_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x0F,0xAF,0xC3},4); }
static void emit_idiv_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5); }
static void emit_imod(CodeBuf *c) {
- emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5); // cqo; idiv rbx
- emit_mov_reg_reg(c, RAX, RDX); // remainder -> RAX
+ emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5);
+ emit_mov_reg_reg(c, RAX, RDX);
}
static void emit_or_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x09,0xD8},3); }
static void emit_xor_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x31,0xD8},3); }
@@ -192,53 +238,97 @@ static void emit_jcc_rel32(CodeBuf *c, uint8_t cc, int32_t rel) {
static void emit_test_rax_rax(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x85,0xC0},3); }
static void emit_prologue(CodeBuf *c, int total_stack_size) {
- emit8(c,0x55); // push rbp
- emitN(c,(uint8_t[]){0x48,0x89,0xE5},3); // mov rbp, rsp
- emit_push_reg(c, RBX); // save callee-saved RBX
+ emit8(c,0x55);
+ emitN(c,(uint8_t[]){0x48,0x89,0xE5},3);
+ emit_push_reg(c, RBX);
int stack_bytes = ((total_stack_size+15)/16)*16;
if (stack_bytes > 0) {
- emitN(c,(uint8_t[]){0x48,0x81,0xEC},3); // sub rsp, imm32
+ emitN(c,(uint8_t[]){0x48,0x81,0xEC},3);
emit32(c,(uint32_t)stack_bytes);
}
}
static void emit_epilogue(CodeBuf *c) {
- emit_pop_reg(c, RBX); emit8(c,0xC9); emit8(c,0xC3); // restore RBX; leave; ret
+ emit_pop_reg(c, RBX); emit8(c,0xC9); emit8(c,0xC3);
}
static void emit_lea_rax_rbp_disp(CodeBuf *c, int disp32) {
emit8(c,0x48); emit8(c,0x8D); emit_modrm(c,2,RAX,RBP); emit32(c,(uint32_t)disp32);
}
+static void emit_movzx_rax_mem16(CodeBuf *c, int disp32) {
+ emit8(c,0x48); emit8(c,0x0F); emit8(c,0xB7); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
+}
+static void emit_mov_mem16_rax(CodeBuf *c, int disp32) {
+ emit8(c,0x66); emit_rex(c,RAX,RBP,0); emit8(c,0x89); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
+}
static void emit_load_rax_from_mem(CodeBuf *c, int disp32, int size) {
- if (size == 1) emit_movzx_rax_mem8(c, disp32); else emit_mov_reg_mem64(c, RAX, disp32);
+ if (size == 1) emit_movzx_rax_mem8(c, disp32);
+ else if (size == 2) emit_movzx_rax_mem16(c, disp32);
+ else emit_mov_reg_mem64(c, RAX, disp32);
}
static void emit_store_rax_to_mem(CodeBuf *c, int disp32, int size) {
- if (size == 1) emit_mov_mem8_rax(c, disp32); else emit_mov_mem64_reg(c, disp32, RAX);
+ if (size == 1) emit_mov_mem8_rax(c, disp32);
+ else if (size == 2) emit_mov_mem16_rax(c, disp32);
+ else emit_mov_mem64_reg(c, disp32, RAX);
}
-/* --- Variable and type helpers --- */
-
static int calculate_type_size(_TY type) {
if (type.ptr_level > 0) return 8;
- int base_size = (type.base == TY_CHAR) ? 1 : 8;
- size_t total = (size_t)base_size;
+ int slot;
+ switch (type.base) {
+ case TY_CHAR: slot = 1; break;
+ case TY_BOOL: slot = 1; break;
+ case TY_SHORT: slot = 2; break;
+ case TY_INT: slot = 8; break;
+ case TY_FLOAT: slot = 8; break;
+ case TY_LONG: slot = 8; break;
+ case TY_VOID: slot = 0; break;
+ default: slot = 8; break;
+ }
if (type.array_size > 0) {
- total *= (size_t)type.array_size;
+ size_t total = (size_t)slot * (size_t)type.array_size;
if (total > (size_t)INT_MAX) { fprintf(stderr, "[JIT] array size too large\n"); exit(1); }
+ return (int)total;
}
- return (int)total;
+ return slot;
}
static int align_offset(int offset, _TY type) {
- int align = (type.base == TY_CHAR) ? 1 : 8;
+ int align;
+ if (type.ptr_level > 0) { align = 8; }
+ else switch (type.base) {
+ case TY_CHAR: align = 1; break;
+ case TY_BOOL: align = 1; break;
+ case TY_SHORT: align = 2; break;
+ case TY_INT: align = 8; break;
+ case TY_FLOAT: align = 8; break;
+ case TY_LONG: align = 8; break;
+ default: align = 8; break;
+ }
if (offset % align == 0) return offset;
+ if (offset < 0)
+ return ((offset - (align - 1)) / align) * align;
return (offset / align) * align;
}
+static int ty_slot_size(_TY ty) {
+ if (ty.ptr_level > 0) return 8;
+ switch (ty.base) {
+ case TY_CHAR: return 1;
+ case TY_BOOL: return 1;
+ case TY_SHORT: return 2;
+ case TY_INT: return 8; /* promoted to 64-bit slot */
+ case TY_FLOAT: return 8; /* stored in 64-bit slot (integer bits) */
+ case TY_LONG: return 8;
+ case TY_VOID: return 0;
+ default: return 8;
+ }
+}
+
static void reset_varmap(JIT *jit) {
for (VarMap *v = jit->var_list; v;) {
VarMap *n = v->next; free(v->name); free(v); v = n;
}
jit->var_list = NULL;
- jit->next_local_offset = -16; // after saved RBX at RBP-8, 16-byte aligned
+ jit->next_local_offset = -16;
}
static void add_var(JIT *jit, const char *name, _TY type) {
@@ -248,9 +338,9 @@ static void add_var(JIT *jit, const char *name, _TY type) {
if (!v->name) { fprintf(stderr, "[JIT] strdup failed in add_var\n"); free(v); exit(1); }
v->type = type;
int type_size = calculate_type_size(type);
+ jit->next_local_offset -= type_size;
jit->next_local_offset = align_offset(jit->next_local_offset, type);
v->offset = jit->next_local_offset;
- jit->next_local_offset -= type_size;
v->next = jit->var_list;
jit->var_list = v;
}
@@ -261,23 +351,60 @@ static int get_var_offset(JIT *jit, const char *name) {
fprintf(stderr, "[JIT] Unknown variable '%s'\n", name); exit(1);
}
+static GlobalVar *find_global(JIT *jit, const char *name) {
+ for (GlobalVar *g = jit->global_list; g; g = g->next)
+ if (strcmp(g->name, name) == 0) return g;
+ return NULL;
+}
+
+static GlobalVar *register_global(JIT *jit, const char *name, _TY type) {
+ if (find_global(jit, name)) {
+ fprintf(stderr, "[JIT] Duplicate global '%s'\n", name); exit(1);
+ }
+ int elem_sz = ty_slot_size((_TY){type.base, type.ptr_level > 0 ? type.ptr_level : 0, -1});
+ int n_elems = (type.array_size > 0) ? type.array_size : 1;
+ int total_sz = elem_sz * n_elems;
+ size_t align = (elem_sz < 8) ? elem_sz : 8;
+ size_t off = (jit->globals_used + align - 1) & ~(align - 1);
+ if (off + (size_t)total_sz > jit->globals_cap) {
+ fprintf(stderr, "[JIT] Globals segment full\n"); exit(1);
+ }
+ GlobalVar *g = (GlobalVar *)malloc(sizeof(GlobalVar));
+ if (!g) { fprintf(stderr, "[JIT] OOM\n"); exit(1); }
+ g->name = strdup(name);
+ g->addr = jit->globals_buf + off;
+ g->type = type;
+ g->size = total_sz;
+ g->next = jit->global_list;
+ jit->global_list = g;
+ jit->globals_used = off + total_sz;
+ return g;
+}
+
static _TY get_var_type(JIT *jit, const char *name) {
for (VarMap *v = jit->var_list; v; v = v->next)
if (strcmp(v->name, name) == 0) return v->type;
+ /* Fall back to globals */
+ GlobalVar *g = find_global(jit, name);
+ if (g) return g->type;
fprintf(stderr, "[JIT] Unknown variable '%s'\n", name); exit(1);
}
-/* --- Type checking --- */
+static _TY get_func_ret_type(JIT *jit, const char *name);
+static void reset_varmap(JIT *jit);
static _TY get_expr_type(JIT *jit, _EX *expr) {
switch (expr->kind) {
case EX_NUMBER: return (_TY){TY_INT, 0, -1};
case EX_STRING: return (_TY){TY_CHAR, 1, -1};
- case EX_VAR: return get_var_type(jit, expr->name);
+ case EX_VAR: {
+ _TY t = get_var_type(jit, expr->name);
+ if (t.array_size > 0) return (_TY){t.base, t.ptr_level + 1, -1};
+ return t;
+ }
case EX_BINOP: {
_TY left_type = get_expr_type(jit, expr->binop.l);
- // All arithmetic, comparison, bitwise ops produce int
- (void)get_expr_type(jit, expr->binop.r);
+ (void)get_expr_type(jit, expr->binop.r);
switch (expr->binop.op) {
case TK_PLUS: case TK_MINUS: case TK_STAR: case TK_SLASH: case TK_PERCENT:
case TK_EQ: case TK_NE: case TK_LT: case TK_LE: case TK_GT: case TK_GE:
@@ -287,7 +414,13 @@ static _TY get_expr_type(JIT *jit, _EX *expr) {
default: return left_type;
}
}
- case EX_CALL: return (_TY){TY_INT, 0, -1};
+ case EX_CALL:
+ if (strcmp(expr->call.func_name, "syscall") == 0)
+ return (_TY){TY_LONG, 0, -1};
+ if (strcmp(expr->call.func_name, "__initlist__") == 0 ||
+ strcmp(expr->call.func_name, "__sizeof__") == 0)
+ return (_TY){TY_INT, 0, -1};
+ return get_func_ret_type(jit, expr->call.func_name);
case EX_INDEX: {
_TY t = get_expr_type(jit, expr->index.array);
if (t.array_size > 0) return (_TY){t.base, t.ptr_level, -1};
@@ -303,31 +436,53 @@ static _TY get_expr_type(JIT *jit, _EX *expr) {
_TY t = get_expr_type(jit, expr->addr.expr);
return (_TY){t.base, t.ptr_level+1, -1};
}
+ case EX_TERNARY:
+ return get_expr_type(jit, expr->ternary.then_expr);
+ case EX_CAST:
+ return expr->cast.to;
default: return (_TY){TY_INT, 0, -1};
}
}
+static int type_is_integer(_TY ty) {
+ if (ty.ptr_level > 0) return 0;
+ switch (ty.base) {
+ case TY_INT: case TY_CHAR: case TY_SHORT:
+ case TY_LONG: case TY_BOOL: return 1;
+ default: return 0;
+ }
+}
+
static int types_compatible(_TY expected, _TY actual) {
if (expected.base == actual.base &&
expected.ptr_level == actual.ptr_level &&
expected.array_size == actual.array_size) return 1;
- // Allow untyped int literals to be assigned anywhere
if (actual.base == TY_INT && actual.ptr_level == 0 && actual.array_size == -1) return 1;
+ if (type_is_integer(expected) && type_is_integer(actual)) return 1;
+ if (expected.ptr_level > 0 && actual.ptr_level > 0) return 1;
+ if (expected.ptr_level > 0 && actual.array_size > 0 &&
+ expected.base == actual.base &&
+ expected.ptr_level == actual.ptr_level + 1) return 1;
return 0;
}
-/* --- Function registry --- */
-
-static void register_func(JIT *jit, const char *name) {
+static void register_func(JIT *jit, const char *name, _TY ret_type) {
FuncMap *f = (FuncMap *)malloc(sizeof(FuncMap));
if (!f) { fprintf(stderr, "[JIT] malloc failed in register_func\n"); exit(1); }
f->name = strdup(name);
if (!f->name) { fprintf(stderr, "[JIT] strdup failed in register_func\n"); free(f); exit(1); }
f->addr = NULL; f->size = 0; f->alloc_size = 0;
+ f->ret_type = ret_type;
f->next = jit->func_list;
jit->func_list = f;
}
+static _TY get_func_ret_type(JIT *jit, const char *name) {
+ for (FuncMap *f = jit->func_list; f; f = f->next)
+ if (strcmp(f->name, name) == 0) return f->ret_type;
+ return (_TY){TY_INT, 0, -1};
+}
+
static void set_func_addr(JIT *jit, const char *name, void *addr, size_t size, size_t alloc_size) {
for (FuncMap *f = jit->func_list; f; f = f->next) {
if (strcmp(f->name, name) != 0) continue;
@@ -352,17 +507,19 @@ static void set_func_addr(JIT *jit, const char *name, void *addr, size_t size, s
static void *get_func_addr(JIT *jit, const char *name) {
for (FuncMap *f = jit->func_list; f; f = f->next) {
if (strcmp(f->name, name) == 0)
- return f->addr ? f->addr : (void*)0xDEADBEEF; // placeholder; patched later
+ return f->addr ? f->addr : (void*)0xDEADBEEF;
}
fprintf(stderr, "[JIT] get_func_addr: unknown function '%s'\n", name); exit(1);
}
-/* --- Stack size calculation --- */
static int calculate_stack_size(_STN *s) {
int total = 0;
while (s) {
- if (s->kind == STK_VAR_DECL) total += calculate_type_size(s->var_decl.type);
+ if (s->kind == STK_VAR_DECL) {
+ int sz = calculate_type_size(s->var_decl.type);
+ total += (sz < 8 && s->var_decl.type.array_size <= 0) ? 8 : sz;
+ }
if (s->kind == STK_BLOCK) total += calculate_stack_size(s->body);
if (s->kind == STK_FOR) {
if (s->fr.init) total += calculate_stack_size(s->fr.init);
@@ -370,15 +527,21 @@ static int calculate_stack_size(_STN *s) {
}
s = s->n;
}
- return total;
+ return total + 8;
+}
+
+static int calculate_total_stack_size(_FN *f) {
+ int param_space = 0;
+ for (int i = 0; i < f->pac && i < 6; i++)
+ param_space += (calculate_type_size(f->param_types[i]) < 8) ? 8
+ : calculate_type_size(f->param_types[i]);
+ return calculate_stack_size(f->body) + param_space;
}
-/* --- Code generation (forward declarations) --- */
static void gen_expr_jit(JIT *jit, _EX *e);
static int gen_stmt_jit(JIT *jit, _STN *s);
-/* --- Statement generation --- */
static int gen_stmt_jit(JIT *jit, _STN *s) {
while (s) {
@@ -387,6 +550,19 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
case STK_VAR_DECL:
add_var(jit, s->var_decl.name, s->var_decl.type);
if (s->var_decl.init) {
+ if (s->var_decl.init->kind == EX_CALL &&
+ strcmp(s->var_decl.init->call.func_name, "__initlist__") == 0) {
+ _TY vty = s->var_decl.type;
+ int elem_sz = ty_slot_size((_TY){vty.base, 0, -1});
+ int arr_off = get_var_offset(jit, s->var_decl.name);
+ for (int ii = 0; ii < s->var_decl.init->call.argc; ii++) {
+ gen_expr_jit(jit, s->var_decl.init->call.args[ii]);
+ /* address of arr[ii] = RBP + arr_off + ii*elem_sz */
+ int elem_off = arr_off + ii * elem_sz;
+ emit_store_rax_to_mem(&jit->cb, elem_off, elem_sz);
+ }
+ break;
+ }
_TY init_type = get_expr_type(jit, s->var_decl.init);
if (!types_compatible(s->var_decl.type, init_type)) {
fprintf(stderr, "[JIT] Type mismatch: cannot assign %s to %s\n",
@@ -394,8 +570,7 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
exit(1);
}
gen_expr_jit(jit, s->var_decl.init);
- int sz = (s->var_decl.type.ptr_level > 0) ? 8
- : (s->var_decl.type.base == TY_CHAR) ? 1 : 8;
+ int sz = ty_slot_size(s->var_decl.type);
emit_store_rax_to_mem(&jit->cb, get_var_offset(jit, s->var_decl.name), sz);
}
break;
@@ -403,6 +578,15 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
case STK_ASSIGN: {
_EX *lhs = s->assign.lhs;
if (lhs->kind == EX_VAR) {
+ GlobalVar *gv = find_global(jit, lhs->name);
+ if (gv) {
+ gen_expr_jit(jit, s->assign.expr);
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv->addr);
+ int sz = ty_slot_size(gv->type);
+ if (sz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ break;
+ }
int offset = get_var_offset(jit, lhs->name);
_TY type = get_var_type(jit, lhs->name);
_TY expr_type = get_expr_type(jit, s->assign.expr);
@@ -412,36 +596,74 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
exit(1);
}
gen_expr_jit(jit, s->assign.expr);
- int sz = (type.ptr_level > 0) ? 8 : (type.base == TY_CHAR) ? 1 : 8;
+ int sz = ty_slot_size(type);
emit_store_rax_to_mem(&jit->cb, offset, sz);
} else if (lhs->kind == EX_DEREF) {
gen_expr_jit(jit, lhs->deref.expr);
- emit_mov_reg_reg(&jit->cb, RBX, RAX); // RBX = address
- gen_expr_jit(jit, s->assign.expr); // RAX = value
- emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ emit_push_reg(&jit->cb, RAX); // save address on stack
+ gen_expr_jit(jit, s->assign.expr); // RAX = value (may clobber RBX)
+ emit_pop_reg(&jit->cb, RBX); // RBX = address
+ _TY ptr_ty = get_expr_type(jit, lhs->deref.expr);
+ int dsz = (ptr_ty.ptr_level > 1) ? 8 : ty_slot_size((_TY){ptr_ty.base, 0, -1});
+ if (dsz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
} else if (lhs->kind == EX_INDEX) {
if (lhs->index.array->kind != EX_VAR) {
gen_expr_jit(jit, lhs->index.array); break;
}
_TY var_type = get_var_type(jit, lhs->index.array->name);
- if (var_type.array_size <= 0) {
- fprintf(stderr, "[JIT] Cannot index non-array '%s'\n", lhs->index.array->name); exit(1);
+ int element_size = ty_slot_size((_TY){var_type.base, 0, -1});
+
+ if (var_type.ptr_level > 0) {
+ GlobalVar *gv_ptr2 = find_global(jit, lhs->index.array->name);
+ if (gv_ptr2) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_ptr2->addr);
+ emit_mov_reg_mem_reg(&jit->cb, RBX, RBX); /* deref global ptr */
+ } else {
+ int ptr_offset = get_var_offset(jit, lhs->index.array->name);
+ emit_mov_reg_mem64(&jit->cb, RBX, ptr_offset); // RBX = pointer
+ }
+ gen_expr_jit(jit, lhs->index.index); // RAX = index
+ if (element_size > 1) {
+ emit_mov_reg_reg(&jit->cb, RCX, RBX);
+ emit_mov_reg_imm64(&jit->cb, RBX, element_size);
+ emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
+ emit_mov_reg_reg(&jit->cb, RBX, RCX);
+ }
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = &ptr[index]
+ emit_push_reg(&jit->cb, RBX); // save address
+ gen_expr_jit(jit, s->assign.expr); // RAX = value
+ emit_pop_reg(&jit->cb, RBX); // restore address
+ if (element_size == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+
+ } else if (var_type.array_size > 0) {
+ GlobalVar *gv_arr2 = find_global(jit, lhs->index.array->name);
+ gen_expr_jit(jit, lhs->index.index); // RAX = index
+ emit_mov_reg_imm64(&jit->cb, RBX, element_size);
+ emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
+ if (gv_arr2) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_arr2->addr);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = &arr[index]
+ } else {
+ int array_offset = get_var_offset(jit, lhs->index.array->name);
+ emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
+ emit_add_reg_reg(&jit->cb, RBX, RAX);
+ emit_mov_reg_reg(&jit->cb, RAX, RBX);
+ emit_mov_reg_reg(&jit->cb, RBX, RBP);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = RBP + array_offset + byte_offset
+ }
+ emit_push_reg(&jit->cb, RBX); // save address
+ gen_expr_jit(jit, s->assign.expr); // RAX = value
+ emit_pop_reg(&jit->cb, RBX); // RBX = address
+ if (element_size == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+
+ } else {
+ fprintf(stderr, "[JIT] Cannot index non-array/non-pointer '%s'\n", lhs->index.array->name); exit(1);
}
- int array_offset = get_var_offset(jit, lhs->index.array->name);
- int element_size = (var_type.base == TY_CHAR) ? 1 : 8;
- gen_expr_jit(jit, lhs->index.index); // RAX = index
- emit_mov_reg_imm64(&jit->cb, RBX, element_size);
- emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
- emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
- emit_sub_reg_reg(&jit->cb, RBX, RAX); // RBX = array_offset - byte_offset
- emit_mov_reg_reg(&jit->cb, RAX, RBX);
- emit_mov_reg_reg(&jit->cb, RBX, RBP);
- emit_add_reg_reg(&jit->cb, RAX, RBX); // RAX = &arr[index]
- emit_mov_reg_reg(&jit->cb, RBX, RAX);
- gen_expr_jit(jit, s->assign.expr); // RAX = value
- emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
} else {
fprintf(stderr, "[JIT] Unsupported assignment LHS kind %d\n", lhs->kind); exit(1);
@@ -479,38 +701,151 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
}
case STK_WHILE: {
+ int depth = jit->loop_depth++;
+ jit->break_patch_count[depth] = 0;
+ jit->cont_patch_count[depth] = 0;
+
size_t loop_start = jit->cb.len;
gen_expr_jit(jit, s->whl.cond);
emit_test_rax_rax(&jit->cb);
size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0);
gen_stmt_jit(jit, s->whl.body);
+ size_t cont_target = jit->cb.len;
+ for (int i = 0; i < jit->cont_patch_count[depth]; i++) {
+ size_t p = jit->cont_patches[depth][i];
+ int32_t r = (int32_t)(cont_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
- int32_t rel_end = (int32_t)(jit->cb.len - (jz_pos + 6));
+ size_t break_target = jit->cb.len;
+ int32_t rel_end = (int32_t)(break_target - (jz_pos + 6));
memcpy(jit->cb.buf + jz_pos + 2, &rel_end, 4);
+ for (int i = 0; i < jit->break_patch_count[depth]; i++) {
+ size_t p = jit->break_patches[depth][i];
+ int32_t r = (int32_t)(break_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ jit->loop_depth--;
break;
}
case STK_FOR: {
+ int depth = jit->loop_depth++;
+ jit->break_patch_count[depth] = 0;
+ jit->cont_patch_count[depth] = 0;
+
if (s->fr.init) gen_stmt_jit(jit, s->fr.init);
size_t loop_start = jit->cb.len;
- if (s->fr.cond) {
+ size_t jz_pos_for = 0;
+ int has_cond = (s->fr.cond != NULL);
+ if (has_cond) {
gen_expr_jit(jit, s->fr.cond);
emit_test_rax_rax(&jit->cb);
- size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0);
- gen_stmt_jit(jit, s->fr.body);
- if (s->fr.step) gen_stmt_jit(jit, s->fr.step);
- emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
- int32_t rel_end = (int32_t)(jit->cb.len - (jz_pos + 6));
- memcpy(jit->cb.buf + jz_pos + 2, &rel_end, 4);
- } else {
- gen_stmt_jit(jit, s->fr.body);
- if (s->fr.step) gen_stmt_jit(jit, s->fr.step);
- emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
+ jz_pos_for = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0);
+ }
+ gen_stmt_jit(jit, s->fr.body);
+ size_t cont_target = jit->cb.len;
+ for (int i = 0; i < jit->cont_patch_count[depth]; i++) {
+ size_t p = jit->cont_patches[depth][i];
+ int32_t r = (int32_t)(cont_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
}
+ if (s->fr.step) gen_stmt_jit(jit, s->fr.step);
+ emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
+ size_t break_target = jit->cb.len;
+ if (has_cond) {
+ int32_t rel_end = (int32_t)(break_target - (jz_pos_for + 6));
+ memcpy(jit->cb.buf + jz_pos_for + 2, &rel_end, 4);
+ }
+ for (int i = 0; i < jit->break_patch_count[depth]; i++) {
+ size_t p = jit->break_patches[depth][i];
+ int32_t r = (int32_t)(break_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ jit->loop_depth--;
break;
}
- default:
+ case STK_DOWHILE: {
+ int depth = jit->loop_depth++;
+ jit->break_patch_count[depth] = 0;
+ jit->cont_patch_count[depth] = 0;
+
+ size_t loop_start = jit->cb.len;
+ gen_stmt_jit(jit, s->dowhl.body);
+ /* continue → jump to condition check */
+ size_t cont_target = jit->cb.len;
+ for (int i = 0; i < jit->cont_patch_count[depth]; i++) {
+ size_t p = jit->cont_patches[depth][i];
+ int32_t r = (int32_t)(cont_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ gen_expr_jit(jit, s->dowhl.cond);
+ emit_test_rax_rax(&jit->cb);
+ emit_jcc_rel32(&jit->cb, 0x05, (int32_t)(loop_start - (jit->cb.len + 6)));
+ size_t break_target = jit->cb.len;
+ for (int i = 0; i < jit->break_patch_count[depth]; i++) {
+ size_t p = jit->break_patches[depth][i];
+ int32_t r = (int32_t)(break_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ jit->loop_depth--;
+ break;
+ }
+
+ case STK_BREAK: {
+ if (jit->loop_depth == 0) {
+ fprintf(stderr, "[JIT] 'break' outside loop\n"); exit(1);
+ }
+ int depth = jit->loop_depth - 1;
+ size_t pos = jit->cb.len;
+ emit_jmp_rel32(&jit->cb, 0);
+ int cnt = jit->break_patch_count[depth];
+ if (cnt >= 256) { fprintf(stderr, "[JIT] Too many breaks\n"); exit(1); }
+ jit->break_patches[depth][cnt] = pos;
+ jit->break_patch_count[depth]++;
+ break;
+ }
+
+ case STK_CONTINUE: {
+ if (jit->loop_depth == 0) {
+ fprintf(stderr, "[JIT] 'continue' outside loop\n"); exit(1);
+ }
+ int depth = jit->loop_depth - 1;
+ size_t pos = jit->cb.len;
+ emit_jmp_rel32(&jit->cb, 0);
+ int cnt = jit->cont_patch_count[depth];
+ if (cnt >= 256) { fprintf(stderr, "[JIT] Too many continues\n"); exit(1); }
+ jit->cont_patches[depth][cnt] = pos;
+ jit->cont_patch_count[depth]++;
+ break;
+ }
+
+ case STK_GLOBAL: {
+ GlobalVar *gv = find_global(jit, s->global.name);
+ if (!gv) {
+ fprintf(stderr, "[JIT] Global '%s' not registered\n", s->global.name); exit(1);
+ }
+ if (s->global.init) {
+ if (s->global.init->kind == EX_CALL &&
+ strcmp(s->global.init->call.func_name, "__initlist__") == 0) {
+ int esz = ty_slot_size((_TY){gv->type.base, 0, -1});
+ for (int ii = 0; ii < s->global.init->call.argc; ii++) {
+ gen_expr_jit(jit, s->global.init->call.args[ii]);
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)(gv->addr + ii * esz));
+ if (esz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ }
+ } else {
+ gen_expr_jit(jit, s->global.init);
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv->addr);
+ int sz = ty_slot_size(gv->type);
+ if (sz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ }
+ }
+ break;
+ }
fprintf(stderr, "[JIT] Unsupported statement kind %d\n", s->kind); exit(1);
}
s = s->n;
@@ -518,7 +853,6 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
return 0;
}
-/* --- Expression generation --- */
static void gen_expr_jit(JIT *jit, _EX *e) {
if (!e) return;
@@ -529,16 +863,110 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
break;
case EX_VAR: {
- int off = get_var_offset(jit, e->name);
- _TY ty = get_var_type(jit, e->name);
- int sz = (ty.ptr_level > 0) ? 8 : (ty.base == TY_CHAR) ? 1 : 8;
- if (sz == 1) emit_movzx_rax_mem8(&jit->cb, off);
- else emit_mov_reg_mem64(&jit->cb, RAX, off);
- break;
+ GlobalVar *gv = NULL;
+ for (VarMap *v = jit->var_list; v; v = v->next) {
+ if (strcmp(v->name, e->name) == 0) goto local_var;
+ }
+ gv = find_global(jit, e->name);
+ if (gv) {
+ _TY ty = gv->type;
+ if (ty.array_size > 0) {
+ /* Global array: load its absolute address */
+ emit_mov_reg_imm64(&jit->cb, RAX, (uint64_t)gv->addr);
+ break;
+ }
+ /* Global scalar: absolute address → load through it */
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv->addr);
+ int sz = ty_slot_size(ty);
+ if (sz == 1) emit_movzx_rax_mem8_base(&jit->cb, RBX);
+ else emit_mov_reg_mem_reg(&jit->cb, RAX, RBX);
+ break;
+ }
+ local_var: {
+ int off = get_var_offset(jit, e->name);
+ _TY ty = get_var_type(jit, e->name);
+ if (ty.array_size > 0) {
+ emit_lea_rax_rbp_disp(&jit->cb, off);
+ break;
+ }
+ int sz = ty_slot_size(ty);
+ if (sz == 1) emit_movzx_rax_mem8(&jit->cb, off);
+ else emit_mov_reg_mem64(&jit->cb, RAX, off);
+ break;
+ }
}
case EX_BINOP: {
- // Short-circuit AND
+ if (e->binop.op == TK_INC || e->binop.op == TK_DEC) {
+ int is_pre = (e->binop.r->value == -1);
+ int is_inc = (e->binop.op == TK_INC);
+ _EX *lval = e->binop.l;
+
+ _TY lty = get_expr_type(jit, lval);
+ int step = (lty.ptr_level > 0) ? ty_slot_size((_TY){lty.base,0,-1}) : 1;
+
+ gen_expr_jit(jit, lval); /* RAX = old value */
+ emit_mov_reg_reg(&jit->cb, RCX, RAX); /* RCX = old value */
+ emit_mov_reg_imm64(&jit->cb, RBX, step);
+ if (is_inc) emit_add_reg_reg(&jit->cb, RCX, RBX);
+ else emit_sub_reg_reg(&jit->cb, RCX, RBX);
+ if (lval->kind == EX_VAR) {
+ GlobalVar *gv_inc = find_global(jit, lval->name);
+ emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ if (gv_inc) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_inc->addr);
+ int sz = ty_slot_size(lty);
+ if (sz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ } else {
+ int off = get_var_offset(jit, lval->name);
+ int sz = ty_slot_size(lty);
+ emit_store_rax_to_mem(&jit->cb, off, sz);
+ }
+ } else {
+ emit_push_reg(&jit->cb, RCX); /* save new value */
+ if (lval->kind == EX_DEREF) {
+ gen_expr_jit(jit, lval->deref.expr); /* RAX = address */
+ } else { /* EX_INDEX */
+ _TY vty = get_var_type(jit, lval->index.array->name);
+ int esz = ty_slot_size((_TY){vty.base,0,-1});
+ if (vty.ptr_level > 0) {
+ int poff = get_var_offset(jit, lval->index.array->name);
+ emit_mov_reg_mem64(&jit->cb, RAX, poff);
+ emit_push_reg(&jit->cb, RAX);
+ gen_expr_jit(jit, lval->index.index);
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RBX, esz);
+ emit_imul_rax_rbx(&jit->cb);
+ }
+ emit_pop_reg(&jit->cb, RBX);
+ emit_add_reg_reg(&jit->cb, RAX, RBX);
+ } else {
+ int aoff = get_var_offset(jit, lval->index.array->name);
+ gen_expr_jit(jit, lval->index.index);
+ emit_mov_reg_imm64(&jit->cb, RBX, esz);
+ emit_imul_rax_rbx(&jit->cb);
+ emit_mov_reg_imm64(&jit->cb, RBX, aoff);
+ emit_add_reg_reg(&jit->cb, RBX, RAX);
+ emit_mov_reg_reg(&jit->cb, RAX, RBX);
+ emit_mov_reg_reg(&jit->cb, RBX, RBP);
+ emit_add_reg_reg(&jit->cb, RAX, RBX);
+ }
+ }
+ emit_mov_reg_reg(&jit->cb, RBX, RAX); /* RBX = address */
+ emit_pop_reg(&jit->cb, RCX); /* RCX = new val */
+ emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ }
+ if (is_pre) emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ if (!is_pre) {
+ emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ emit_mov_reg_imm64(&jit->cb, RBX, step);
+ if (is_inc) emit_sub_reg_reg(&jit->cb, RAX, RBX);
+ else emit_add_reg_reg(&jit->cb, RAX, RBX);
+ }
+ break;
+ }
if (e->binop.op == TK_AND) {
gen_expr_jit(jit, e->binop.l);
emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb);
@@ -554,7 +982,6 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
memcpy(jit->cb.buf + jmp_pos + 1, &rel, 4);
break;
}
- // Short-circuit OR
if (e->binop.op == TK_OR) {
gen_expr_jit(jit, e->binop.l);
emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb);
@@ -570,18 +997,54 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
memcpy(jit->cb.buf + jmp_pos + 1, &rel, 4);
break;
}
- // All other binary ops: eval left -> push; eval right -> RBX = left
gen_expr_jit(jit, e->binop.l);
emit_push_reg(&jit->cb, RAX);
gen_expr_jit(jit, e->binop.r);
emit_pop_reg(&jit->cb, RBX); // RBX = left, RAX = right
switch (e->binop.op) {
- case TK_PLUS: emit_add_rax_rbx(&jit->cb); break;
- case TK_MINUS:
+ case TK_PLUS: {
+ _TY lt = get_expr_type(jit, e->binop.l);
+ _TY rt = get_expr_type(jit, e->binop.r);
+ int lptr = (lt.ptr_level > 0 || lt.array_size > 0);
+ int rptr = (rt.ptr_level > 0 || rt.array_size > 0);
+ if (lptr && !rptr) {
+ int esz = ty_slot_size((_TY){lt.base, 0, -1});
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RCX, esz);
+ emitN(&jit->cb, (uint8_t[]){0x48,0x0F,0xAF,0xC1}, 4);
+ }
+ } else if (rptr && !lptr) {
+ int esz = ty_slot_size((_TY){rt.base, 0, -1});
+ emit_mov_reg_reg(&jit->cb, RCX, RAX); /* RCX = ptr */
+ emit_mov_reg_reg(&jit->cb, RAX, RBX); /* RAX = int */
+ emit_mov_reg_reg(&jit->cb, RBX, RCX); /* RBX = ptr */
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RCX, esz);
+ emitN(&jit->cb, (uint8_t[]){0x48,0x0F,0xAF,0xC1}, 4);
+ }
+ }
+ emit_add_rax_rbx(&jit->cb);
+ break;
+ }
+ case TK_MINUS: {
+ _TY lt = get_expr_type(jit, e->binop.l);
+ _TY rt = get_expr_type(jit, e->binop.r);
+ int lptr = (lt.ptr_level > 0 || lt.array_size > 0);
+ int rptr = (rt.ptr_level > 0 || rt.array_size > 0);
+ if (lptr && !rptr) {
+ /* ptr - int: scale int by element size */
+ int esz = ty_slot_size((_TY){lt.base, 0, -1});
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RCX, esz);
+ emitN(&jit->cb, (uint8_t[]){0x48,0x0F,0xAF,0xC1}, 4);
+ }
+ }
+ /* RAX=right(scaled), RBX=left: result = left - right */
emit_mov_reg_reg(&jit->cb, RCX, RAX);
emit_mov_reg_reg(&jit->cb, RAX, RBX);
emit_rex(&jit->cb, RCX, RAX, 1); emit8(&jit->cb, 0x29); emit_modrm(&jit->cb, 3, RCX, RAX);
break;
+ }
case TK_STAR: emit_imul_rax_rbx(&jit->cb); break;
case TK_SLASH:
emit_mov_reg_reg(&jit->cb, RCX, RAX);
@@ -626,24 +1089,84 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
case EX_CALL: {
+ if (strcmp(e->call.func_name, "__sizeof__") == 0) {
+ _EX *arg = e->call.args[0];
+ _TY ty;
+ if (arg->kind == EX_VAR) {
+ ty = get_var_type(jit, arg->name);
+ } else {
+ ty = get_expr_type(jit, arg);
+ }
+ int sz;
+ if (ty.array_size > 0) {
+ int esz = (ty.base == TY_CHAR) ? 1 : (ty.base == TY_SHORT) ? 2 :
+ (ty.base == TY_LONG) ? 8 : 4;
+ sz = esz * ty.array_size;
+ } else if (ty.ptr_level > 0) {
+ sz = 8;
+ } else {
+ switch (ty.base) {
+ case TY_CHAR: sz = 1; break;
+ case TY_SHORT: sz = 2; break;
+ case TY_LONG: sz = 8; break;
+ default: sz = 4; break; /* int, bool, float */
+ }
+ }
+ emit_mov_reg_imm64(&jit->cb, RAX, sz);
+ break;
+ }
+
+ if (strcmp(e->call.func_name, "syscall") == 0) {
+ int argc = e->call.argc;
+ if (argc < 1) {
+ fprintf(stderr, "[JIT] syscall() requires at least 1 argument (number)\n"); exit(1);
+ }
+ if (argc > 7) {
+ fprintf(stderr, "[JIT] syscall() supports at most 7 arguments\n"); exit(1);
+ }
+ const int sc_regs[6] = {RDI, RSI, RDX, R10, R8, R9};
+ int nargs = argc - 1; /* number of args after the syscall number */
+
+ for (int i = argc - 1; i >= 0; i--) {
+ gen_expr_jit(jit, e->call.args[i]);
+ emit_push_reg(&jit->cb, RAX);
+ }
+
+ emit_pop_reg(&jit->cb, RAX);
+
+ for (int i = 0; i < nargs; i++) {
+ emit_pop_reg(&jit->cb, sc_regs[i]);
+ }
+
+ emit8(&jit->cb, 0x0F);
+ emit8(&jit->cb, 0x05);
+ break;
+ }
+
int total_args = e->call.argc;
int stack_args = total_args > 6 ? total_args - 6 : 0;
+ int reg_args = total_args < 6 ? total_args : 6;
int padding = (stack_args % 2) ? 8 : 0;
const int arg_regs[6] = {RDI, RSI, RDX, RCX, R8, R9};
+ /* Push stack-passed args right-to-left (args[total-1] first). */
for (int i = total_args-1; i >= 6; i--) {
gen_expr_jit(jit, e->call.args[i]); emit_push_reg(&jit->cb, RAX);
}
- for (int i = 0; i < total_args && i < 6; i++) {
- gen_expr_jit(jit, e->call.args[i]); emit_mov_reg_reg(&jit->cb, arg_regs[i], RAX);
+ for (int i = reg_args-1; i >= 0; i--) {
+ gen_expr_jit(jit, e->call.args[i]); emit_push_reg(&jit->cb, RAX);
+ }
+
+ for (int i = 0; i < reg_args; i++) {
+ emit_pop_reg(&jit->cb, arg_regs[i]);
}
+
if (padding) {
emit8(&jit->cb,0x48); emit8(&jit->cb,0x83); emit8(&jit->cb,0xEC); emit8(&jit->cb,0x08);
}
void *addr = get_func_addr(jit, e->call.func_name);
if (addr == (void*)0xDEADBEEF) {
- // Forward call: emit placeholder imm64, record offset for later patching
emit_movabs_rax_imm64(&jit->cb, (uint64_t)0xDEADBEEF);
PatchEntry *patch = (PatchEntry*)malloc(sizeof(PatchEntry));
if (!patch) { fprintf(stderr, "[JIT] malloc failed (patch)\n"); exit(1); }
@@ -659,7 +1182,7 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
} else {
emit_movabs_rax_imm64(&jit->cb, (uint64_t)addr);
}
- emit8(&jit->cb, 0xFF); emit8(&jit->cb, 0xD0); // CALL RAX
+ emit8(&jit->cb, 0xFF); emit8(&jit->cb, 0xD0);
if (stack_args * 8 + padding > 0) {
emit8(&jit->cb,0x48); emit8(&jit->cb,0x81); emit8(&jit->cb,0xC4);
@@ -669,12 +1192,56 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
case EX_ADDR:
- if (e->addr.expr->kind != EX_VAR) {
+ if (e->addr.expr->kind == EX_VAR) {
+ GlobalVar *gv_addr = find_global(jit, e->addr.expr->name);
+ if (gv_addr) {
+ emit_mov_reg_imm64(&jit->cb, RAX, (uint64_t)gv_addr->addr);
+ } else {
+ emit_lea_rax_rbp_disp(&jit->cb, get_var_offset(jit, e->addr.expr->name));
+ }
+ } else {
fprintf(stderr, "[JIT] &expr: only &var supported\n"); exit(1);
}
- emit_lea_rax_rbp_disp(&jit->cb, get_var_offset(jit, e->addr.expr->name));
break;
+ case EX_TERNARY: {
+ gen_expr_jit(jit, e->ternary.cond);
+ emit_mov_reg_reg(&jit->cb, RBX, RAX);
+ emit_or_rax_rbx(&jit->cb);
+ size_t jz_pos = jit->cb.len;
+ emit_jcc_rel32(&jit->cb, 0x04, 0);
+ gen_expr_jit(jit, e->ternary.then_expr);
+ size_t jmp_pos = jit->cb.len;
+ emit_jmp_rel32(&jit->cb, 0);
+ int32_t rel_jz = (int32_t)(jit->cb.len - (jz_pos + 6));
+ memcpy(jit->cb.buf + jz_pos + 2, &rel_jz, 4);
+ gen_expr_jit(jit, e->ternary.else_expr);
+ int32_t rel_jmp = (int32_t)(jit->cb.len - (jmp_pos + 5));
+ memcpy(jit->cb.buf + jmp_pos + 1, &rel_jmp, 4);
+ break;
+ }
+
+ case EX_CAST: {
+ gen_expr_jit(jit, e->cast.expr);
+ _TY to = e->cast.to;
+ if (to.ptr_level > 0) break; /* pointer cast: value unchanged */
+ switch (to.base) {
+ case TY_CHAR:
+ emit8(&jit->cb, 0x48); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB6);
+ emit_modrm(&jit->cb, 3, RAX, RAX); /* movzx rax, al */
+ break;
+ case TY_SHORT:
+ emit8(&jit->cb, 0x48); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB7);
+ emit_modrm(&jit->cb, 3, RAX, RAX); /* movzx rax, ax */
+ break;
+ case TY_INT:
+ emit8(&jit->cb, 0x48); emit8(&jit->cb, 0x63); emit_modrm(&jit->cb, 3, RAX, RAX);
+ break;
+ default: break; /* long/void/etc: no-op */
+ }
+ break;
+ }
+
case EX_DEREF:
gen_expr_jit(jit, e->deref.expr);
emit_rex(&jit->cb, RAX, RAX, 1); emit8(&jit->cb, 0x8B); emit_modrm(&jit->cb, 0, RAX, RAX);
@@ -692,12 +1259,17 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
gen_expr_jit(jit, e->index.array); break;
}
_TY var_type = get_var_type(jit, e->index.array->name);
- int element_size = (var_type.base == TY_CHAR) ? 1 : 8;
+ int element_size = ty_slot_size((_TY){var_type.base, 0, -1});
if (var_type.ptr_level > 0) {
- // Pointer indexing: load pointer, add index*element_size
- int ptr_offset = get_var_offset(jit, e->index.array->name);
- emit_mov_reg_mem64(&jit->cb, RBX, ptr_offset); // RBX = pointer
+ GlobalVar *gv_ptr = find_global(jit, e->index.array->name);
+ if (gv_ptr) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_ptr->addr);
+ emit_mov_reg_mem_reg(&jit->cb, RBX, RBX); /* deref: RBX = *addr = pointer value */
+ } else {
+ int ptr_offset = get_var_offset(jit, e->index.array->name);
+ emit_mov_reg_mem64(&jit->cb, RBX, ptr_offset); // RBX = pointer
+ }
gen_expr_jit(jit, e->index.index); // RAX = index
if (element_size > 1) {
emit_mov_reg_reg(&jit->cb, RCX, RBX); // save pointer
@@ -714,17 +1286,22 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
} else if (var_type.array_size > 0) {
- // Array indexing: compute RBP-relative address = array_offset - index*element_size
- int array_offset = get_var_offset(jit, e->index.array->name);
+ GlobalVar *gv_arr = find_global(jit, e->index.array->name);
gen_expr_jit(jit, e->index.index); // RAX = index
emit_mov_reg_imm64(&jit->cb, RBX, element_size);
emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
- emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
- emit_sub_reg_reg(&jit->cb, RBX, RAX); // RBX = array_offset - byte_offset
- emit_mov_reg_reg(&jit->cb, RAX, RBX);
- emit_mov_reg_reg(&jit->cb, RBX, RBP);
- emit_add_reg_reg(&jit->cb, RAX, RBX); // RAX = &arr[index]
- emit_mov_reg_reg(&jit->cb, RBX, RAX);
+ if (gv_arr) {
+ /* Global array: base is absolute address */
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_arr->addr);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = &arr[index]
+ } else {
+ int array_offset = get_var_offset(jit, e->index.array->name);
+ emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
+ emit_add_reg_reg(&jit->cb, RBX, RAX);
+ emit_mov_reg_reg(&jit->cb, RAX, RBX);
+ emit_mov_reg_reg(&jit->cb, RBX, RBP);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = RBP + array_offset + byte_offset
+ }
if (element_size == 1) {
emit_rex(&jit->cb, RAX, RBX, 0); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB6);
emit_modrm(&jit->cb, 0, RAX, RBX);
@@ -743,22 +1320,35 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
}
-/* --- Compile one function --- */
-
static void *gen_function_jit(JIT *jit, _FN *f, size_t *out_size) {
reset_varmap(jit);
jit->current_func_name = f->name;
- int total_stack_size = calculate_stack_size(f->body);
+ int total_stack_size = calculate_total_stack_size(f);
cb_init(&jit->cb);
emit_prologue(&jit->cb, total_stack_size);
const int param_regs[6] = {RDI, RSI, RDX, RCX, R8, R9};
+
for (int i = 0; i < f->pac && i < 6; i++) {
add_var(jit, f->params[i], f->param_types[i]);
emit_mov_mem64_reg(&jit->cb, get_var_offset(jit, f->params[i]), param_regs[i]);
}
+ int num_stack_params = f->pac > 6 ? f->pac - 6 : 0;
+ int stack_arg_padding = (num_stack_params % 2) ? 8 : 0;
+ int stack_arg_base = 16 + stack_arg_padding; /* offset of last-pushed (highest-index) stack arg */
+ for (int i = 6; i < f->pac; i++) {
+ VarMap *v = (VarMap *)malloc(sizeof(VarMap));
+ if (!v) { fprintf(stderr, "[JIT] malloc failed (stack param)\n"); exit(1); }
+ v->name = strdup(f->params[i]);
+ if (!v->name) { fprintf(stderr, "[JIT] strdup failed (stack param)\n"); free(v); exit(1); }
+ v->type = f->param_types[i];
+ v->offset = stack_arg_base + (i - 6) * 8;
+ v->next = jit->var_list;
+ jit->var_list = v;
+ }
+
int did_return = gen_stmt_jit(jit, f->body);
if (!did_return) {
emit_movabs_rax_imm64(&jit->cb, 0);
@@ -777,7 +1367,6 @@ static void *gen_function_jit(JIT *jit, _FN *f, size_t *out_size) {
return mem;
}
-/* --- Patch forward calls after all functions are compiled --- */
static void patch_function_calls(JIT *jit) {
for (PatchEntry *patch = jit->patch_list; patch; patch = patch->next) {
@@ -797,16 +1386,36 @@ static void patch_function_calls(JIT *jit) {
}
}
-/* --- Compile all functions and patch forward calls --- */
-
static void jit_compile_all(JIT *jit, _FN *fn_list) {
- for (_FN *cur = fn_list; cur; cur = cur->n)
- register_func(jit, cur->name);
+ /* First pass: register all globals so type lookups work during codegen */
+ for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) == 0) {
+ /* Extract variable name from __global_NAME__ */
+ const char *start = cur->name + 9;
+ size_t len = strlen(start);
+ if (len >= 2 && start[len-2] == '_' && start[len-1] == '_') len -= 2;
+ char *vname = strndup(start, len);
+ if (!vname) { fprintf(stderr, "[JIT] OOM\n"); exit(1); }
+ /* Get type from the STK_GLOBAL node */
+ _STN *gdecl = cur->body;
+ if (gdecl && gdecl->kind == STK_GLOBAL) {
+ register_global(jit, vname, gdecl->global.type);
+ }
+ free(vname);
+ }
+ }
+
+ /* Second pass: register all real functions (so forward calls work) */
+ for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) != 0)
+ register_func(jit, cur->name, cur->ret_type);
+ }
- // Compile in reverse order so callees are typically compiled before callers
+ /* Compile real functions in reverse order */
_FN *functions[64];
int count = 0;
for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) == 0) continue;
if (count >= 64) { fprintf(stderr, "[JIT] Too many functions (max 64)\n"); exit(1); }
functions[count++] = cur;
}
@@ -814,9 +1423,27 @@ static void jit_compile_all(JIT *jit, _FN *fn_list) {
gen_function_jit(jit, functions[i], NULL);
patch_function_calls(jit);
-}
-/* --- Entry point --- */
+ /* Run global initialisers in declaration order */
+ for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) != 0) continue;
+ reset_varmap(jit);
+ jit->current_func_name = cur->name;
+ cb_init(&jit->cb);
+ emit8(&jit->cb, 0x55); /* push rbp */
+ emitN(&jit->cb, (uint8_t[]){0x48,0x89,0xE5}, 3); /* mov rbp, rsp */
+ gen_stmt_jit(jit, cur->body);
+ emit8(&jit->cb, 0xC9); /* leave */
+ emit8(&jit->cb, 0xC3); /* ret */
+ size_t isz = jit->cb.len;
+ void *ibuf = mmap(NULL, isz, PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (ibuf == MAP_FAILED) { perror("mmap init"); exit(1); }
+ memcpy(ibuf, jit->cb.buf, isz);
+ ((void(*)(void))ibuf)();
+ munmap(ibuf, isz);
+ }
+}
static int jit_run(JIT *jit, int argc, char **argv) {
int (*main_func)(int, char **) = get_func_addr(jit, "main");
@@ -824,4 +1451,4 @@ static int jit_run(JIT *jit, int argc, char **argv) {
return main_func(argc, argv);
}
-#endif \ No newline at end of file
+#endif
diff --git a/src/lexer.h b/src/lexer.h
index ca2b790..3d36bf4 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -1,4 +1,3 @@
-
#ifndef INCLUDE_lexerlexer
#define INCLUDE_lexerlexer
@@ -15,7 +14,7 @@ typedef struct {
int col;
} _LX;
-/* Error reporting with line/column info */
+
static void perror_at(_LX *lx, const char *msg) {
fprintf(stderr, "[LEXER] Error at line %d, column %d: %s\n", lx->line, lx->col, msg);
exit(1);
@@ -56,7 +55,32 @@ sit lxnext(_LX *lx) {
lx->line++;
lx->col = 0;
lxget(lx);
- return lxnext(lx); // recurse to get next token
+ return lxnext(lx);
+ }
+
+ // Comments: // and /* */
+ if (c == '/') {
+ // peek one ahead
+ int saved = lx->pos;
+ lxget(lx);
+ char c2 = lxpeek(lx);
+ if (c2 == '/') {
+ while (lxpeek(lx) != '\n' && lxpeek(lx) != 0) lxget(lx);
+ return lxnext(lx);
+ } else if (c2 == '*') {
+ lxget(lx); // consume '*'
+ for (;;) {
+ char ch = lxpeek(lx);
+ if (ch == 0) break; // unterminated, let it go
+ lxget(lx);
+ if (ch == '\n') { lx->line++; lx->col = 0; }
+ if (ch == '*' && lxpeek(lx) == '/') { lxget(lx); break; }
+ }
+ return lxnext(lx);
+ } else {
+ // not a comment — put position back, fall through to normal '/' handling
+ lx->pos = saved;
+ }
}
if (isalpha(c) || c == '_') {
@@ -110,28 +134,85 @@ sit lxnext(_LX *lx) {
if (lxpeek(lx) == '|') { lxget(lx); lx->col++; return (_T){TK_OR,0,lx_strdup_checked(lx,"||")}; }
}
if (c == '"') {
- // String literal
- int start = lx->pos; // pos is already after the opening quote
+ // String literal — decode escape sequences into a fresh buffer
+ int cap = 64, dlen = 0;
+ char *decoded = (char*)malloc(cap);
+ if (!decoded) perror_at(lx, "out of memory");
while (lxpeek(lx) != '"' && lxpeek(lx) != 0) {
+ char ch;
if (lxpeek(lx) == '\\') {
lxget(lx); // consume backslash
- if (lxpeek(lx) != 0) lxget(lx); // consume escaped char
+ char esc = (char)lxget(lx);
+ switch (esc) {
+ case 'n': ch = '\n'; break;
+ case 't': ch = '\t'; break;
+ case 'r': ch = '\r'; break;
+ case '0': ch = '\0'; break;
+ case '\\': ch = '\\'; break;
+ case '"': ch = '"'; break;
+ case '\'': ch = '\''; break;
+ default: ch = esc; break;
+ }
} else {
- lxget(lx);
+ ch = (char)lxget(lx);
}
+ if (dlen + 2 > cap) {
+ cap *= 2;
+ char *tmp = (char*)realloc(decoded, cap);
+ if (!tmp) { free(decoded); perror_at(lx, "out of memory"); }
+ decoded = tmp;
+ }
+ decoded[dlen++] = ch;
}
+ decoded[dlen] = '\0';
if (lxpeek(lx) == '"') {
- int len = lx->pos - start; // length of content
lxget(lx); // consume closing quote
- char *text = strndup(lx->buf + start, len); // start at content
- if (!text) {
- perror_at(lx, "out of memory");
- }
- return (_T){TK_STRING, 0, text};
+ return (_T){TK_STRING, 0, decoded};
} else {
+ free(decoded);
perror_at(lx, "unterminated string literal");
}
}
+ if (c == '\'') {
+ /* Character literal: decode a single char or escape sequence and emit
+ * TK_CHARLIT with the integer value in .val so the parser never needs
+ * to reason about escape sequences. */
+ int char_val = 0;
+ if (lxpeek(lx) == '\\') {
+ lxget(lx); /* consume backslash */
+ char esc = lxget(lx);
+ switch (esc) {
+ case 'n': char_val = '\n'; break;
+ case 't': char_val = '\t'; break;
+ case 'r': char_val = '\r'; break;
+ case '0': char_val = '\0'; break;
+ case '\\': char_val = '\\'; break;
+ case '\'': char_val = '\''; break;
+ case '"': char_val = '"'; break;
+ case 'a': char_val = '\a'; break;
+ case 'b': char_val = '\b'; break;
+ case 'f': char_val = '\f'; break;
+ case 'v': char_val = '\v'; break;
+ case 'x': {
+ int h = 0;
+ while (isxdigit(lxpeek(lx))) {
+ char hc = lxget(lx);
+ h = h * 16 + (isdigit(hc) ? hc - '0' : tolower(hc) - 'a' + 10);
+ }
+ char_val = h;
+ break;
+ }
+ default: char_val = (unsigned char)esc; break;
+ }
+ } else if (lxpeek(lx) != '\'') {
+ char_val = (unsigned char)lxget(lx);
+ }
+ if (lxpeek(lx) != '\'') {
+ perror_at(lx, "unterminated or multi-character char literal");
+ }
+ lxget(lx); /* consume closing ' */
+ return (_T){TK_CHARLIT, char_val, NULL};
+ }
switch (c) {
case '(':
return (_T){TK_LPAREN, 0, lx_strdup_checked(lx,"(")};
@@ -144,12 +225,18 @@ sit lxnext(_LX *lx) {
case ';':
return (_T){TK_SEMI, 0, lx_strdup_checked(lx,";")};
case '+':
+ if (lxpeek(lx) == '+') { lxget(lx); lx->col++; return (_T){TK_INC, 0, lx_strdup_checked(lx,"++")}; }
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_PLUS_EQ,0, lx_strdup_checked(lx,"+=")}; }
return (_T){TK_PLUS, 0, lx_strdup_checked(lx,"+")};
case '-':
+ if (lxpeek(lx) == '-') { lxget(lx); lx->col++; return (_T){TK_DEC, 0, lx_strdup_checked(lx,"--")}; }
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_MINUS_EQ,0, lx_strdup_checked(lx,"-=")}; }
return (_T){TK_MINUS, 0, lx_strdup_checked(lx,"-")};
case '*':
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_STAR_EQ, 0, lx_strdup_checked(lx,"*=")}; }
return (_T){TK_STAR, 0, lx_strdup_checked(lx,"*")};
case '/':
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_SLASH_EQ,0, lx_strdup_checked(lx,"/=")}; }
return (_T){TK_SLASH, 0, lx_strdup_checked(lx,"/")};
case '&':
return (_T){TK_AMP, 0, lx_strdup_checked(lx,"&")};
@@ -169,6 +256,10 @@ sit lxnext(_LX *lx) {
return (_T){TK_LBRACKET, 0, lx_strdup_checked(lx,"[")};
case ']':
return (_T){TK_RBRACKET, 0, lx_strdup_checked(lx,"]")};
+ case '?':
+ return (_T){TK_QUESTION, 0, lx_strdup_checked(lx,"?")};
+ case ':':
+ return (_T){TK_COLON, 0, lx_strdup_checked(lx,":")};
default:
return (_T){TK_INVALID, 0, NULL};
}
diff --git a/src/main.c b/src/main.c
index 0f18359..0e4c9c3 100644
--- a/src/main.c
+++ b/src/main.c
@@ -187,7 +187,10 @@ void print_func(_FN *f, int indent) {
return;
indentf(indent);
- printf("Function(%s) params=[", f->name);
+ /* Print return type */
+ printf("Function(%s) -> [%s", f->name, tybase_name(f->ret_type.base));
+ for (int j = 0; j < f->ret_type.ptr_level; j++) printf("*");
+ printf("] params=[");
for (int i = 0; i < f->pac; i++) {
printf("%s", tybase_name(f->param_types[i].base));
for (int j = 0; j < f->param_types[i].ptr_level; j++)
diff --git a/src/parser.h b/src/parser.h
index 154e0ba..bd35323 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -1,5 +1,3 @@
-/* Parser: recursive-descent front end that builds an AST `_FN` list
- * from the token stream, handling expressions, statements, and functions. */
#ifndef INCLUDE_parser
#define INCLUDE_parser
@@ -10,13 +8,13 @@
#include <stdlib.h>
#include <string.h>
-/* Parser state */
+
typedef struct {
_LX *lx;
_T cur;
} _P;
-/* Error reporting with line/column info */
+
static void perror_expected(_LX *lx, const char *expected, const char *got) {
fprintf(stderr, "[PARSER] Error at line %d, column %d: expected %s, got %s\n",
lx->line, lx->col, expected, got);
@@ -37,7 +35,7 @@ static void pnext(_P *p) {
p->cur = lxnext(p->lx);
}
-/* Expect a particular token kind; on mismatch print helpful error and exit */
+
static void pexpect(_P *p, _TK tk) {
if (p->cur.kind != tk) {
const char *got = (p->cur.kind < TK__COUNT) ? _TN[p->cur.kind] : "<?>";
@@ -45,10 +43,35 @@ static void pexpect(_P *p, _TK tk) {
}
}
-static _STN *pstmt(_P *p); // forward
+
+static int is_type_token(_TK k) {
+ return k == TK_INT || k == TK_CHAR || k == TK_VOID ||
+ k == TK_FLOAT || k == TK_LONG || k == TK_SHORT;
+}
+
+
+static _TY pparse_type(_P *p) {
+ _TY ty;
+ switch (p->cur.kind) {
+ case TK_INT: ty = (_TY){.base=TY_INT, .ptr_level=0,.array_size=-1}; break;
+ case TK_CHAR: ty = (_TY){.base=TY_CHAR, .ptr_level=0,.array_size=-1}; break;
+ case TK_VOID: ty = (_TY){.base=TY_VOID, .ptr_level=0,.array_size=-1}; break;
+ case TK_FLOAT: ty = (_TY){.base=TY_FLOAT,.ptr_level=0,.array_size=-1}; break;
+ case TK_LONG: ty = (_TY){.base=TY_LONG, .ptr_level=0,.array_size=-1}; break;
+ case TK_SHORT: ty = (_TY){.base=TY_SHORT,.ptr_level=0,.array_size=-1}; break;
+ default:
+ perror_expected(p->lx, "type keyword", _TN[p->cur.kind]);
+ ty = (_TY){TY_INT,0,-1}; /* unreachable */
+ }
+ pnext(p);
+ while (p->cur.kind == TK_STAR) { ty.ptr_level++; pnext(p); }
+ return ty;
+}
+
+static _STN *pstmt(_P *p);
static _STN *pblock(_P *p) {
pexpect(p, TK_LBRACE);
- pnext(p); /* consume '{' */
+ pnext(p);
_STN *head = NULL;
_STN **cur = &head;
@@ -60,16 +83,17 @@ static _STN *pblock(_P *p) {
}
pexpect(p, TK_RBRACE);
- pnext(p); /* consume '}' */
+ pnext(p);
return st_block(head);
}
-static _EX *pexpr(_P *p); // forward
-static _EX *pterm(_P *p); // forward
-static _EX *punary(_P *p); // forward
-static _EX *pfact(_P *p); // fwd
+static _EX *pexpr(_P *p);
+static _EX *pterm(_P *p);
+static _EX *punary(_P *p);
+static _EX *pfact(_P *p);
+static _EX *pparse_charlit(_P *p);
+
-/* Precedence climbing layers */
static _EX *pmul(_P *p);
static _EX *padd(_P *p);
static _EX *pshift(_P *p);
@@ -92,19 +116,42 @@ static _STN *passign_or_expr_stmt(_P *p) {
_EX *rhs = pexpr(p);
return st_assign(lhs, rhs);
}
+
+ _TK compound_op = TK_INVALID;
+ if (p->cur.kind == TK_PLUS_EQ) compound_op = TK_PLUS;
+ else if (p->cur.kind == TK_MINUS_EQ) compound_op = TK_MINUS;
+ else if (p->cur.kind == TK_STAR_EQ) compound_op = TK_STAR;
+ else if (p->cur.kind == TK_SLASH_EQ) compound_op = TK_SLASH;
+ if (compound_op != TK_INVALID) {
+ if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid compound-assignment target\n", p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ _EX *combined = ex_binop(lhs, compound_op, rhs);
+ return st_assign(lhs, combined);
+ }
return st_expr(lhs);
}
static _EX *ppostfix_from_expr(_P *p, _EX *e) {
- // Handle array indexing: expr[expr]
- while (p->cur.kind == TK_LBRACKET) {
- pnext(p); // consume '['
- _EX *index = pexpr(p);
- pexpect(p, TK_RBRACKET);
- pnext(p); // consume ']'
- e = ex_index(e, index);
- }
-
+ for (;;) {
+ if (p->cur.kind == TK_LBRACKET) {
+ pnext(p);
+ _EX *index = pexpr(p);
+ pexpect(p, TK_RBRACKET);
+ pnext(p);
+ e = ex_index(e, index);
+ } else if (p->cur.kind == TK_INC || p->cur.kind == TK_DEC) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *one = ex_number(1);
+ e = ex_binop(e, op, one);
+ } else {
+ break;
+ }
+ }
return e;
}
@@ -113,7 +160,7 @@ static _EX *ppostfix(_P *p) {
return ppostfix_from_expr(p, e);
}
-/* ---- FACTOR ---- */
+
static _EX *pfact(_P *p) {
if (p->cur.kind == TK_NUMBER) {
_EX *n = ex_number(p->cur.val);
@@ -126,11 +173,11 @@ static _EX *pfact(_P *p) {
fprintf(stderr, "[PARSER] Error: strdup failed for identifier\n");
exit(1);
}
- pnext(p); /* consume identifier */
+ pnext(p);
if (p->cur.kind == TK_LPAREN) {
- /* function call */
- pnext(p); /* consume '(' */
+
+ pnext(p);
_EX **args = NULL;
int argc = 0;
@@ -145,7 +192,7 @@ static _EX *pfact(_P *p) {
args[argc++] = pexpr(p);
if (p->cur.kind == TK_COMMA) {
- pnext(p); /* skip comma */
+ pnext(p);
} else {
break;
}
@@ -153,7 +200,7 @@ static _EX *pfact(_P *p) {
}
pexpect(p, TK_RPAREN);
- pnext(p); /* consume ')' */
+ pnext(p);
return ex_call(name, args, argc);
}
@@ -162,11 +209,21 @@ static _EX *pfact(_P *p) {
return ppostfix_from_expr(p, var_expr);
} else if (p->cur.kind == TK_LPAREN) {
- pnext(p); /* consume '(' */
+ pnext(p);
+
+ if (p->cur.kind == TK_INT || p->cur.kind == TK_CHAR ||
+ p->cur.kind == TK_SHORT || p->cur.kind == TK_LONG ||
+ p->cur.kind == TK_VOID || p->cur.kind == TK_FLOAT) {
+ _TY to = pparse_type(p);
+ pexpect(p, TK_RPAREN);
+ pnext(p);
+ _EX *sub = punary(p);
+ return ppostfix_from_expr(p, ex_cast(to, sub));
+ }
_EX *n = pexpr(p);
pexpect(p, TK_RPAREN);
- pnext(p); /* consume ')' */
- return n;
+ pnext(p);
+ return ppostfix_from_expr(p, n);
} else if (p->cur.kind == TK_STRING) {
char *str = strdup(p->cur.lxem);
@@ -177,12 +234,14 @@ static _EX *pfact(_P *p) {
pnext(p);
return ex_string(str);
+ } else if (p->cur.kind == TK_CHARLIT) {
+ return pparse_charlit(p);
+
} else {
perror_unexpected(p->lx, "factor", _TN[p->cur.kind]);
}
}
-/* ---- TERM ---- */
static _EX *pmul(_P *p) {
_EX *n = punary(p);
while (p->cur.kind == TK_STAR || p->cur.kind == TK_SLASH || p->cur.kind == TK_PERCENT) {
@@ -276,7 +335,6 @@ static _EX *plogand(_P *p) {
while (p->cur.kind == TK_AND) {
pnext(p);
_EX *r = pbitor(p);
- // keep as a binary op node TK_AND; codegen will short-circuit
n = ex_binop(n, TK_AND, r);
}
return n;
@@ -292,60 +350,115 @@ static _EX *plogor(_P *p) {
return n;
}
-/* ---- EXPR ---- */
-static _EX *pexpr(_P *p) { return plogor(p); }
+static _EX *pexpr(_P *p) {
+ _EX *n = plogor(p);
+ if (p->cur.kind == TK_QUESTION) {
+ pnext(p); /* consume '?' */
+ _EX *then_e = pexpr(p);
+ if (p->cur.kind != TK_COLON) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: expected ':' in ternary\n",
+ p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p); /* consume ':' */
+ _EX *else_e = pexpr(p);
+ return ex_ternary(n, then_e, else_e);
+ }
+ return n;
+}
-/* ---- UNARY ---- */
static _EX *punary(_P *p) {
- if (p->cur.kind == TK_AMP) { // &expr
+ if (p->cur.kind == TK_AMP) {
pnext(p);
_EX *sub = punary(p);
NEW_EX(EX_ADDR);
e->addr.expr = sub;
return e;
}
- if (p->cur.kind == TK_STAR) { // *expr
+ if (p->cur.kind == TK_STAR) {
pnext(p);
_EX *sub = punary(p);
NEW_EX(EX_DEREF);
e->deref.expr = sub;
return e;
}
- if (p->cur.kind == TK_BANG) { // !expr -> (expr == 0)
+ if (p->cur.kind == TK_BANG) {
pnext(p);
_EX *sub = punary(p);
_EX *zero = ex_number(0);
return ex_binop(sub, TK_EQ, zero);
}
+ if (p->cur.kind == TK_MINUS) {
+ pnext(p);
+ _EX *sub = punary(p);
+ return ex_binop(ex_number(0), TK_MINUS, sub);
+ }
+ if (p->cur.kind == TK_INC || p->cur.kind == TK_DEC) {
+ _TK op = (p->cur.kind == TK_INC) ? TK_INC : TK_DEC;
+ pnext(p);
+ _EX *sub = punary(p);
+ return ex_binop(sub, op, ex_number(-1));
+ }
+ if (p->cur.kind == TK_SIZEOF) {
+ pnext(p); /* consume sizeof */
+ pexpect(p, TK_LPAREN); pnext(p);
+ int sz = 0;
+ if (p->cur.kind == TK_INT || p->cur.kind == TK_CHAR ||
+ p->cur.kind == TK_SHORT || p->cur.kind == TK_LONG ||
+ p->cur.kind == TK_VOID || p->cur.kind == TK_FLOAT) {
+ _TY ty = pparse_type(p);
+ if (ty.ptr_level > 0) sz = 8;
+ else {
+ switch (ty.base) {
+ case TY_CHAR: sz = 1; break;
+ case TY_SHORT: sz = 2; break;
+ case TY_INT: sz = 4; break;
+ case TY_LONG: sz = 8; break;
+ default: sz = 4; break;
+ }
+ }
+ if (ty.array_size > 0) sz *= ty.array_size;
+ } else {
+ /* sizeof(expr) — resolved at JIT time via __sizeof__ built-in */
+ _EX *inner = pexpr(p);
+ pexpect(p, TK_RPAREN); pnext(p);
+ _EX **args = (_EX **)malloc(sizeof(_EX *));
+ if (!args) { fprintf(stderr, "[PARSER] OOM in sizeof\n"); exit(1); }
+ args[0] = inner;
+ return ex_call(strdup("__sizeof__"), args, 1);
+ }
+ pexpect(p, TK_RPAREN); pnext(p);
+ return ex_number(sz);
+ }
return ppostfix(p);
}
-char parse_char_literal(_P *p) {
- /* assume current token is TK_SQUOTE */
- pnext(p); // consume opening '
- if (p->cur.kind != TK_IDENT) {
- perror_expected(p->lx, "character literal", _TN[p->cur.kind]);
- }
- char c = p->cur.lxem[0];
- pnext(p); // consume char
- pexpect(p, TK_SQUOTE);
- pnext(p); // consume closing '
- return c;
+static _EX *pparse_charlit(_P *p) {
+ int value = p->cur.val;
+ pnext(p);
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[PARSER] Error: calloc failed in pparse_charlit\n"); exit(1); }
+ e->kind = EX_NUMBER;
+ e->value = value;
+ return e;
}
-_EX *ex_charlit(char c) {
- _EX *e = malloc(sizeof(_EX));
- if (!e) {
- fprintf(stderr, "[PARSER] Error: malloc failed in ex_charlit\n");
- exit(1);
- }
- e->kind = EX_NUMBER;
- e->value = c;
- return e;
+
+static char parse_char_literal(_P *p) {
+ char c = (char)p->cur.val;
+ pnext(p);
+ return c;
+}
+static _EX *ex_charlit(char c) {
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[PARSER] Error: calloc failed in ex_charlit\n"); exit(1); }
+ e->kind = EX_NUMBER;
+ e->value = (unsigned char)c;
+ return e;
}
static _STN *pstmt(_P *p) {
switch (p->cur.kind) {
case TK_IF: {
- pnext(p); // consume if
+ pnext(p);
pexpect(p, TK_LPAREN); pnext(p);
_EX *cond = pexpr(p);
pexpect(p, TK_RPAREN); pnext(p);
@@ -362,6 +475,31 @@ static _STN *pstmt(_P *p) {
_STN *body = pstmt(p);
return st_while(cond, body);
}
+ case TK_DO: {
+ pnext(p); /* consume 'do' */
+ _STN *body = pstmt(p);
+ if (p->cur.kind != TK_WHILE) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: expected 'while' after do body\n",
+ p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p); /* consume 'while' */
+ pexpect(p, TK_LPAREN); pnext(p);
+ _EX *cond = pexpr(p);
+ pexpect(p, TK_RPAREN); pnext(p);
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_dowhile(body, cond);
+ }
+ case TK_BREAK: {
+ pnext(p); /* consume 'break' */
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_break();
+ }
+ case TK_CONTINUE: {
+ pnext(p); /* consume 'continue' */
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_continue();
+ }
case TK_FOR: {
pnext(p);
pexpect(p, TK_LPAREN); pnext(p);
@@ -380,11 +518,13 @@ static _STN *pstmt(_P *p) {
_STN *body = pstmt(p);
return st_for(init, cond, step, body);
}
+ case TK_VOID:
+ case TK_FLOAT:
+ case TK_LONG:
+ case TK_SHORT:
case TK_INT:
case TK_CHAR: {
- _TY vtype = { .base = (p->cur.kind == TK_INT) ? TY_INT : TY_CHAR, .ptr_level = 0, .array_size = -1 };
- pnext(p); /* consume type */
- while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); }
+ _TY vtype = pparse_type(p);
if (p->cur.kind != TK_IDENT) {
perror_expected(p->lx, "variable name after type", _TN[p->cur.kind]);
@@ -396,27 +536,36 @@ static _STN *pstmt(_P *p) {
}
pnext(p);
- // Parse array size: [N] or []
if (p->cur.kind == TK_LBRACKET) {
- pnext(p); // consume '['
+ pnext(p);
if (p->cur.kind == TK_NUMBER) {
vtype.array_size = p->cur.val;
- pnext(p); // consume number
+ pnext(p);
} else {
vtype.array_size = 0; // unknown size []
}
pexpect(p, TK_RBRACKET);
- pnext(p); // consume ']'
+ pnext(p);
}
_EX *init = NULL;
if (p->cur.kind == TK_ASSIGN) {
pnext(p);
- if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_SQUOTE) {
- /* parse char literal */
- char c = parse_char_literal(p); // implement this to consume quotes and return char
- init = ex_charlit(c);
+ if (p->cur.kind == TK_LBRACE) {
+ pnext(p);
+ _EX **elems = NULL;
+ int nelems = 0;
+ while (p->cur.kind != TK_RBRACE && p->cur.kind != TK_EOF) {
+ elems = (_EX **)realloc(elems, sizeof(_EX *) * (nelems + 1));
+ if (!elems) { fprintf(stderr, "[PARSER] OOM in initializer list\n"); exit(1); }
+ elems[nelems++] = pexpr(p);
+ if (p->cur.kind == TK_COMMA) pnext(p);
+ }
+ pexpect(p, TK_RBRACE); pnext(p);
+ init = ex_call(strdup("__initlist__"), elems, nelems);
+ } else if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_CHARLIT) {
+ init = pparse_charlit(p);
} else {
init = pexpr(p);
}
@@ -435,16 +584,25 @@ static _STN *pstmt(_P *p) {
if (p->cur.kind == TK_ASSIGN) {
pnext(p);
_EX *rhs = pexpr(p);
- pexpect(p, TK_SEMI);
- pnext(p);
+ pexpect(p, TK_SEMI); pnext(p);
return st_assign(lhs_or_call, rhs);
}
- pexpect(p, TK_SEMI);
- pnext(p);
+ _TK cop = TK_INVALID;
+ if (p->cur.kind == TK_PLUS_EQ) cop = TK_PLUS;
+ else if (p->cur.kind == TK_MINUS_EQ) cop = TK_MINUS;
+ else if (p->cur.kind == TK_STAR_EQ) cop = TK_STAR;
+ else if (p->cur.kind == TK_SLASH_EQ) cop = TK_SLASH;
+ if (cop != TK_INVALID) {
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_assign(lhs_or_call, ex_binop(lhs_or_call, cop, rhs));
+ }
+ pexpect(p, TK_SEMI); pnext(p);
return st_expr(lhs_or_call);
}
case TK_RETURN: {
- pnext(p); /* consume 'return' */
+ pnext(p);
_EX *expr = pexpr(p);
pexpect(p, TK_SEMI);
pnext(p); /* consume ';' */
@@ -452,24 +610,35 @@ static _STN *pstmt(_P *p) {
return st_return(expr);
}
case TK_LBRACE: {
- /* block statement */
- return pblock(p); /* pblock will consume the braces */
+ return pblock(p);
}
default: {
- /* General expression or assignment starting with unary, paren, etc. */
_EX *lhs = pexpr(p);
if (p->cur.kind == TK_ASSIGN) {
- /* only allow assignment to var or *expr */
if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target - only variables, dereferenced expressions, and array indexing allowed\n", p->lx->line, p->lx->col);
exit(1);
}
pnext(p);
_EX *rhs = pexpr(p);
- pexpect(p, TK_SEMI);
- pnext(p);
+ pexpect(p, TK_SEMI); pnext(p);
return st_assign(lhs, rhs);
}
+ _TK dcop = TK_INVALID;
+ if (p->cur.kind == TK_PLUS_EQ) dcop = TK_PLUS;
+ else if (p->cur.kind == TK_MINUS_EQ) dcop = TK_MINUS;
+ else if (p->cur.kind == TK_STAR_EQ) dcop = TK_STAR;
+ else if (p->cur.kind == TK_SLASH_EQ) dcop = TK_SLASH;
+ if (dcop != TK_INVALID) {
+ if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid compound-assignment target\n", p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_assign(lhs, ex_binop(lhs, dcop, rhs));
+ }
pexpect(p, TK_SEMI);
pnext(p);
return st_expr(lhs);
@@ -478,11 +647,13 @@ static _STN *pstmt(_P *p) {
}
static _FN *pfunc(_P *p) {
- pexpect(p, TK_INT);
- pnext(p); /* consume 'int' */
+ if (!is_type_token(p->cur.kind)) {
+ perror_expected(p->lx, "return type for function", _TN[p->cur.kind]);
+ }
+ _TY ret_type = pparse_type(p);
if (p->cur.kind != TK_IDENT) {
- perror_expected(p->lx, "function name after 'int'", _TN[p->cur.kind]);
+ perror_expected(p->lx, "function name after return type", _TN[p->cur.kind]);
}
char *name = NULL;
if (p->cur.lxem)
@@ -491,54 +662,42 @@ static _FN *pfunc(_P *p) {
fprintf(stderr, "[PARSER] Error: strdup failed for function name\n");
exit(1);
}
- pnext(p); /* consume function name */
+ pnext(p);
- /* expect '(' then consume it */
pexpect(p, TK_LPAREN);
- pnext(p); /* consume '(' */
+ pnext(p);
- /* parse optional parameter list */
char **params = NULL;
_TY *params_types = NULL;
int pac = 0;
if (p->cur.kind != TK_RPAREN) {
- /* at least one parameter expected */
while (1) {
- /* first token should be a type, e.g. int */
- _TY vtype;
- if (p->cur.kind == TK_INT)
- vtype = (_TY){.base=TY_INT,.ptr_level=0,.array_size=-1};
- else if (p->cur.kind == TK_CHAR)
- vtype = (_TY){.base=TY_CHAR,.ptr_level=0,.array_size=-1};
- else {
+ if (!is_type_token(p->cur.kind)) {
perror_expected(p->lx, "type in parameter list", _TN[p->cur.kind]);
}
- pnext(p); /* consume type */
- while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); }
+ _TY vtype = pparse_type(p);
- /* next should be an identifier (variable name) */
if (p->cur.kind != TK_IDENT) {
perror_expected(p->lx, "identifier after type in parameter list", _TN[p->cur.kind]);
}
- // Parse array size for parameters after the identifier
char *param_name = strdup(p->cur.lxem);
if (!param_name) {
fprintf(stderr, "[PARSER] Error: strdup failed for parameter name\n");
exit(1);
}
- pnext(p); // consume identifier
+ pnext(p);
if (p->cur.kind == TK_LBRACKET) {
- pnext(p); // consume '['
+ pnext(p);
if (p->cur.kind == TK_NUMBER) {
vtype.array_size = p->cur.val;
- pnext(p); // consume number
+ pnext(p);
} else {
vtype.array_size = 0; // unknown size []
}
pexpect(p, TK_RBRACKET);
- pnext(p); // consume ']'
+ pnext(p);
}
char **new_params = (char **)realloc(params, sizeof(char *) * (pac + 1));
@@ -556,7 +715,7 @@ static _FN *pfunc(_P *p) {
pac++;
if (p->cur.kind == TK_COMMA) {
- pnext(p); /* consume comma and continue */
+ pnext(p);
continue;
} else {
break;
@@ -565,11 +724,11 @@ static _FN *pfunc(_P *p) {
}
pexpect(p, TK_RPAREN);
- pnext(p); /* consume ')' */
+ pnext(p);
- _STN *body = pblock(p); /* pblock consumes the block braces and returns */
+ _STN *body = pblock(p);
- return fn_new(name, params, params_types, pac, body);
+ return fn_new(name, params, params_types, pac, body, ret_type);
}
static _FN *parse_program(_LX *lx) {
@@ -579,9 +738,98 @@ static _FN *parse_program(_LX *lx) {
_FN **cur = &head;
while (pstate.cur.kind != TK_EOF) {
- _FN *f = pfunc(&pstate);
- *cur = f;
- cur = &f->n;
+ if (!is_type_token(pstate.cur.kind)) {
+ perror_unexpected(pstate.lx, "top-level declaration", _TN[pstate.cur.kind]);
+ }
+
+ /* Peek ahead: type ident ';'/'='/'[' → global var; type ident '(' → function */
+ _TY gtype = pparse_type(&pstate);
+
+ if (pstate.cur.kind != TK_IDENT) {
+ perror_expected(pstate.lx, "identifier after type", _TN[pstate.cur.kind]);
+ }
+ char *gname = strdup(pstate.cur.lxem);
+ if (!gname) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ pnext(&pstate); /* consume ident */
+
+ if (pstate.cur.kind == TK_LBRACKET) {
+ pnext(&pstate);
+ if (pstate.cur.kind == TK_NUMBER) {
+ gtype.array_size = pstate.cur.val; pnext(&pstate);
+ } else {
+ gtype.array_size = 0;
+ }
+ pexpect(&pstate, TK_RBRACKET); pnext(&pstate);
+ }
+
+ if (pstate.cur.kind == TK_LPAREN) {
+ pnext(&pstate); /* consume '(' */
+
+ char **params = NULL;
+ _TY *param_types = NULL;
+ int pac = 0;
+
+ if (pstate.cur.kind != TK_RPAREN) {
+ do {
+ if (!is_type_token(pstate.cur.kind)) break;
+ _TY ptype = pparse_type(&pstate);
+ char *pname = NULL;
+ if (pstate.cur.kind == TK_IDENT) {
+ pname = strdup(pstate.cur.lxem);
+ if (!pname) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ pnext(&pstate);
+ } else {
+ pname = strdup("_anon");
+ }
+ params = realloc(params, sizeof(char*) * (pac+1));
+ param_types = realloc(param_types, sizeof(_TY) * (pac+1));
+ if (!params || !param_types) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ params[pac] = pname;
+ param_types[pac] = ptype;
+ pac++;
+ if (pstate.cur.kind == TK_COMMA) pnext(&pstate); else break;
+ } while (1);
+ }
+ pexpect(&pstate, TK_RPAREN); pnext(&pstate);
+
+ _STN *body = pblock(&pstate);
+
+ _FN *f = fn_new(gname, params, param_types, pac, body, gtype);
+ *cur = f;
+ cur = &f->n;
+ } else {
+ _EX *init = NULL;
+ if (pstate.cur.kind == TK_ASSIGN) {
+ pnext(&pstate);
+ if (pstate.cur.kind == TK_LBRACE) {
+ /* initializer list for global array */
+ pnext(&pstate);
+ _EX **elems = NULL; int nelems = 0;
+ while (pstate.cur.kind != TK_RBRACE && pstate.cur.kind != TK_EOF) {
+ elems = realloc(elems, sizeof(_EX*) * (nelems+1));
+ if (!elems) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ elems[nelems++] = pexpr(&pstate);
+ if (pstate.cur.kind == TK_COMMA) pnext(&pstate);
+ }
+ pexpect(&pstate, TK_RBRACE); pnext(&pstate);
+ init = ex_call(strdup("__initlist__"), elems, nelems);
+ } else {
+ init = pexpr(&pstate);
+ }
+ }
+ pexpect(&pstate, TK_SEMI); pnext(&pstate);
+
+
+ char *gfunc_name = malloc(strlen(gname) + 12);
+ if (!gfunc_name) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ sprintf(gfunc_name, "__global_%s__", gname);
+
+ _STN *gdecl = st_global(gname, gtype, init); /* gdecl takes ownership of gname */
+ _FN *gf = fn_new(gfunc_name, NULL, NULL, 0, gdecl,
+ (_TY){TY_VOID,0,-1});
+ *cur = gf;
+ cur = &gf->n;
+ }
}
if (pstate.cur.lxem) {
diff --git a/src/token.h b/src/token.h
index e99294e..1e57662 100644
--- a/src/token.h
+++ b/src/token.h
@@ -3,12 +3,14 @@
#include <string.h>
-/* Token and type definitions shared by lexer, parser, and JIT.
- * The type system uses a base kind plus pointer/array decorations. */
typedef enum {
- TY_INT = 0,
- TY_CHAR = 1,
- TY_BOOL = 2,
+ TY_INT = 0,
+ TY_CHAR = 1,
+ TY_BOOL = 2,
+ TY_VOID = 3,
+ TY_FLOAT = 4,
+ TY_LONG = 5,
+ TY_SHORT = 6,
} _TYBASE;
typedef struct {
@@ -19,9 +21,13 @@ typedef struct {
static inline const char *tybase_name(_TYBASE b) {
switch (b) {
- case TY_INT: return "int";
- case TY_CHAR: return "char";
- case TY_BOOL: return "bool";
+ case TY_INT: return "int";
+ case TY_CHAR: return "char";
+ case TY_BOOL: return "bool";
+ case TY_VOID: return "void";
+ case TY_FLOAT: return "float";
+ case TY_LONG: return "long";
+ case TY_SHORT: return "short";
default: return "?";
}
}
@@ -30,6 +36,10 @@ static inline const char *tybase_name(_TYBASE b) {
_(TK_BEGIN, "begin") \
_(TK_INT, "int") \
_(TK_CHAR, "char") \
+ _(TK_VOID, "void") \
+ _(TK_FLOAT, "float") \
+ _(TK_LONG, "long") \
+ _(TK_SHORT, "short") \
_(TK_ASSIGN, "=") \
_(TK_EQ, "==") \
_(TK_NE, "!=") \
@@ -42,6 +52,9 @@ static inline const char *tybase_name(_TYBASE b) {
_(TK_ELSE, "else") \
_(TK_FOR, "for") \
_(TK_WHILE, "while") \
+ _(TK_DO, "do") \
+ _(TK_BREAK, "break") \
+ _(TK_CONTINUE, "continue") \
_(TK_BOOL, "bool") \
_(TK_IDENT, "ident") \
_(TK_NUMBER, "number") \
@@ -69,6 +82,15 @@ static inline const char *tybase_name(_TYBASE b) {
_(TK_LBRACKET, "[") \
_(TK_RBRACKET, "]") \
_(TK_STRING, "string") \
+ _(TK_PLUS_EQ, "+=") \
+ _(TK_MINUS_EQ, "-=") \
+ _(TK_STAR_EQ, "*=") \
+ _(TK_SLASH_EQ, "/=") \
+ _(TK_INC, "++") \
+ _(TK_DEC, "--") \
+ _(TK_SIZEOF, "sizeof") \
+ _(TK_QUESTION, "?") \
+ _(TK_COLON, ":") \
_(TK_EOF, "eof") \
_(TK_COMMA, ",") \
_(TK_INVALID, "invalid") \
@@ -88,8 +110,8 @@ static const char *_TN[] = {
typedef struct {
_TK kind;
- int val; // only valid if kind == TK_NUMBER
- char *lxem; // malloc’d lexeme string, or NULL
+ int val;
+ char *lxem;
} _T;
static _TK checkkw(const char *kw) {
diff --git a/test_runner.sh b/test_runner.sh
deleted file mode 100755
index d478f55..0000000
--- a/test_runner.sh
+++ /dev/null
@@ -1,334 +0,0 @@
-#!/bin/bash
-
-# CCDJIT Comprehensive Test Runner
-# This script provides a more advanced testing interface with better organization
-
-# Colors for pretty output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-PURPLE='\033[0;35m'
-CYAN='\033[0;36m'
-WHITE='\033[1;37m'
-GRAY='\033[0;37m'
-NC='\033[0m' # No Color
-
-# Test configuration
-declare -A test_map
-test_map["./tests/add2.c"]=3
-test_map["./tests/arithmetic.c"]=28
-test_map["./tests/comparison.c"]=5
-test_map["./tests/logical.c"]=3
-test_map["./tests/if_else.c"]=12
-test_map["./tests/while_loop.c"]=10
-test_map["./tests/for_loop.c"]=6
-test_map["./tests/function_call.c"]=17
-test_map["./tests/recursive.c"]=24
-test_map["./tests/bitwise.c"]=58
-test_map["./tests/pointers.c"]=184
-test_map["./tests/arrays.c"]=150
-test_map["./tests/strings.c"]=5
-test_map["./tests/edge_cases.c"]=12
-
-# Error tests (expected to fail with non-zero exit code)
-declare -A error_test_map
-error_test_map["./tests/error_syntax.c"]="syntax"
-error_test_map["./tests/error_undefined_var.c"]="undefined"
-error_test_map["./tests/error_type_mismatch.c"]="type"
-
-# Test categories
-declare -A test_categories
-test_categories["./tests/add2.c"]="Basic"
-test_categories["./tests/arithmetic.c"]="Arithmetic"
-test_categories["./tests/comparison.c"]="Comparison"
-test_categories["./tests/logical.c"]="Logical"
-test_categories["./tests/if_else.c"]="Control Flow"
-test_categories["./tests/while_loop.c"]="Control Flow"
-test_categories["./tests/for_loop.c"]="Control Flow"
-test_categories["./tests/function_call.c"]="Functions"
-test_categories["./tests/recursive.c"]="Functions"
-test_categories["./tests/bitwise.c"]="Bitwise"
-test_categories["./tests/pointers.c"]="Pointers"
-test_categories["./tests/arrays.c"]="Arrays"
-test_categories["./tests/strings.c"]="Strings"
-test_categories["./tests/edge_cases.c"]="Edge Cases"
-test_categories["./tests/error_syntax.c"]="Error Tests"
-test_categories["./tests/error_undefined_var.c"]="Error Tests"
-test_categories["./tests/error_type_mismatch.c"]="Error Tests"
-
-print_header() {
- echo -e "${CYAN}╔══════════════════════════════════════════════════════════════════════════════╗${NC}"
- echo -e "${CYAN}║${WHITE} CCDJIT Comprehensive Test Suite ${CYAN}║${NC}"
- echo -e "${CYAN}╚══════════════════════════════════════════════════════════════════════════════╝${NC}"
- echo
-}
-
-print_test_header() {
- local test_file="$1"
- local category="${test_categories[$test_file]}"
- local test_name=$(basename "$test_file" .c)
- echo -e "${BLUE}┌─ ${WHITE}$test_name${BLUE} (${YELLOW}$category${BLUE})${NC}"
- echo -e "${BLUE}│${NC} File: $test_file"
-
- if [ -n "${test_map[$test_file]}" ]; then
- echo -e "${BLUE}│${NC} Expected: ${test_map[$test_file]}"
- elif [ -n "${error_test_map[$test_file]}" ]; then
- echo -e "${BLUE}│${NC} Expected: ${RED}ERROR${NC} (${error_test_map[$test_file]})"
- fi
-}
-
-print_test_result() {
- local test_file="$1"
- local expected="$2"
- local actual="$3"
- local status="$4"
- local is_error="$5"
-
- if [ "$status" == "PASS" ]; then
- if [ "$is_error" == "true" ]; then
- echo -e "${BLUE}│${NC} Result: ${GREEN}✓ PASSED${NC} (correctly failed with exit code $actual)"
- else
- echo -e "${BLUE}│${NC} Result: ${GREEN}✓ PASSED${NC} (got $actual)"
- fi
- echo -e "${BLUE}└─${GREEN} SUCCESS${NC}"
- else
- if [ "$is_error" == "true" ]; then
- echo -e "${BLUE}│${NC} Result: ${RED}✗ FAILED${NC} (expected error, got exit code $actual)"
- else
- echo -e "${BLUE}│${NC} Result: ${RED}✗ FAILED${NC} (expected $expected, got $actual)"
- fi
- echo -e "${BLUE}└─${RED} FAILURE${NC}"
- fi
- echo
-}
-
-print_summary() {
- local passed="$1"
- local failed="$2"
- local total="$3"
- local error_passed="$4"
- local error_failed="$5"
- local error_total="$6"
-
- echo -e "${CYAN}╔══════════════════════════════════════════════════════════════════════════════╗${NC}"
- echo -e "${CYAN}║${WHITE} Test Summary ${CYAN}║${NC}"
- echo -e "${CYAN}╠══════════════════════════════════════════════════════════════════════════════╣${NC}"
-
- if [ "$failed" -eq 0 ] && [ "$error_failed" -eq 0 ]; then
- echo -e "${CYAN}║${GREEN} All tests passed! ${WHITE}(${passed}/${total} functional, ${error_passed}/${error_total} error)${CYAN} ║${NC}"
- else
- echo -e "${CYAN}║${GREEN} Functional Tests: ${passed}/${total}${CYAN} │ ${RED}Failed: ${failed}${CYAN} ║${NC}"
- echo -e "${CYAN}║${GREEN} Error Tests: ${error_passed}/${error_total}${CYAN} │ ${RED}Failed: ${error_failed}${CYAN} ║${NC}"
- fi
-
- echo -e "${CYAN}╚══════════════════════════════════════════════════════════════════════════════╝${NC}"
-}
-
-run_functional_tests() {
- local count=0
- local passed=0
- local failed=0
-
- echo -e "${PURPLE}Running Functional Tests...${NC}" >&2
- echo >&2
-
- for key in "${!test_map[@]}"; do
- print_test_header "$key" >&2
-
- local output
- local exit_code
- local actual_result
- output=$(./bin/ccdjit "$key" 2>&1)
- exit_code=$?
-
- # Extract the actual result from "JIT returned: X" line
- actual_result=$(echo "$output" | grep "JIT returned:" | sed 's/.*JIT returned: //' | tail -1)
- if [ -z "$actual_result" ]; then
- actual_result=$exit_code
- fi
-
- if [ "${test_map[$key]}" = "$actual_result" ]; then
- print_test_result "$key" "${test_map[$key]}" "$actual_result" "PASS" "false" >&2
- passed=$((passed+1))
- else
- print_test_result "$key" "${test_map[$key]}" "$actual_result" "FAIL" "false" >&2
- failed=$((failed+1))
- fi
- count=$((count+1))
- done
-
- echo "$passed $failed $count"
-}
-
-run_error_tests() {
- local count=0
- local passed=0
- local failed=0
-
- echo -e "${PURPLE}Running Error Tests...${NC}" >&2
- echo >&2
-
- for key in "${!error_test_map[@]}"; do
- print_test_header "$key" >&2
-
- local output
- local exit_code
- output=$(./bin/ccdjit "$key" 2>&1)
- exit_code=$?
-
- if [ $exit_code -ne 0 ]; then
- print_test_result "$key" "ERROR" "$exit_code" "PASS" "true" >&2
- passed=$((passed+1))
- else
- print_test_result "$key" "ERROR" "$exit_code" "FAIL" "true" >&2
- failed=$((failed+1))
- fi
- count=$((count+1))
- done
-
- echo "$passed $failed $count"
-}
-
-run_all_tests() {
- print_header
-
- local func_results
- local error_results
-
- func_results=($(run_functional_tests))
- error_results=($(run_error_tests))
-
- # Ensure all stderr output is flushed before showing summary
- sleep 0.1
-
- local func_passed=${func_results[0]}
- local func_failed=${func_results[1]}
- local func_total=${func_results[2]}
-
- local error_passed=${error_results[0]}
- local error_failed=${error_results[1]}
- local error_total=${error_results[2]}
-
- print_summary $func_passed $func_failed $func_total $error_passed $error_failed $error_total
-}
-
-run_single_test() {
- local test_file="$1"
-
- if [ -z "${test_map[$test_file]}" ] && [ -z "${error_test_map[$test_file]}" ]; then
- echo -e "${RED}Error: Test file '$test_file' not found in test suite${NC}"
- exit 1
- fi
-
- print_header
- print_test_header "$test_file"
-
- local output
- local exit_code
- local actual_result
- output=$(./bin/ccdjit "$test_file" 2>&1)
- exit_code=$?
-
- # Extract the actual result from "JIT returned: X" line
- actual_result=$(echo "$output" | grep "JIT returned:" | sed 's/.*JIT returned: //' | tail -1)
- if [ -z "$actual_result" ]; then
- actual_result=$exit_code
- fi
-
- if [ -n "${test_map[$test_file]}" ]; then
- # Functional test
- if [ "${test_map[$test_file]}" = "$actual_result" ]; then
- print_test_result "$test_file" "${test_map[$test_file]}" "$actual_result" "PASS" "false"
- else
- print_test_result "$test_file" "${test_map[$test_file]}" "$actual_result" "FAIL" "false"
- fi
- else
- # Error test
- if [ $exit_code -ne 0 ]; then
- print_test_result "$test_file" "ERROR" "$exit_code" "PASS" "true"
- else
- print_test_result "$test_file" "ERROR" "$exit_code" "FAIL" "true"
- fi
- fi
-}
-
-list_tests() {
- echo -e "${CYAN}Available Tests:${NC}"
- echo
-
- echo -e "${GREEN}Functional Tests:${NC}"
- for key in "${!test_map[@]}"; do
- local category="${test_categories[$key]}"
- local test_name=$(basename "$key" .c)
- echo -e " ${WHITE}$test_name${NC} (${YELLOW}$category${NC}) - Expected: ${test_map[$key]}"
- done
-
- echo
- echo -e "${RED}Error Tests:${NC}"
- for key in "${!error_test_map[@]}"; do
- local category="${test_categories[$key]}"
- local test_name=$(basename "$key" .c)
- echo -e " ${WHITE}$test_name${NC} (${YELLOW}$category${NC}) - Expected: ERROR"
- done
-}
-
-show_help() {
- echo -e "${CYAN}CCDJIT Test Runner${NC}"
- echo
- echo "Usage: $0 [COMMAND] [OPTIONS]"
- echo
- echo "Commands:"
- echo " all Run all tests (functional + error)"
- echo " functional Run only functional tests"
- echo " error Run only error tests"
- echo " test <file> Run a specific test file"
- echo " list List all available tests"
- echo " help Show this help message"
- echo
- echo "Examples:"
- echo " $0 all"
- echo " $0 functional"
- echo " $0 test ./tests/arithmetic.c"
- echo " $0 list"
-}
-
-# Main script logic
-case "$1" in
- "all")
- run_all_tests
- ;;
- "functional")
- print_header
- func_results=($(run_functional_tests))
- sleep 0.1
- print_summary ${func_results[0]} ${func_results[1]} ${func_results[2]} 0 0 0
- ;;
- "error")
- print_header
- error_results=($(run_error_tests))
- sleep 0.1
- print_summary 0 0 0 ${error_results[0]} ${error_results[1]} ${error_results[2]}
- ;;
- "test")
- if [ -z "$2" ]; then
- echo -e "${RED}Error: Please specify a test file${NC}"
- exit 1
- fi
- run_single_test "$2"
- ;;
- "list")
- list_tests
- ;;
- "help"|"-h"|"--help")
- show_help
- ;;
- "")
- show_help
- ;;
- *)
- echo -e "${RED}Error: Unknown command '$1'${NC}"
- echo "Use '$0 help' for usage information"
- exit 1
- ;;
-esac
diff --git a/tests/getc.c b/tests/getc.c
new file mode 100644
index 0000000..ef3b860
--- /dev/null
+++ b/tests/getc.c
@@ -0,0 +1,131 @@
+int g_passed = 0;
+int g_failed = 0;
+
+int getc() {
+ char c;
+ int n = syscall(0, 0, &c, 1);
+ if (n <= 0) return -1;
+ return (int)c;
+}
+
+int write_char(int c) {
+ char b;
+ b = (char)c;
+ syscall(1, 1, &b, 1);
+ return 1;
+}
+
+int write_str(char *s) {
+ int n = 0;
+ while (s[n] != 0) n++;
+ syscall(1, 1, s, n);
+ return n;
+}
+
+int write_int(int n) {
+ char tmp[24];
+ int len = 0;
+ int neg = n < 0;
+ if (neg) { write_char('-'); n = -n; }
+ if (n == 0) { write_char('0'); return 1; }
+ while (n > 0) { tmp[len] = (char)('0' + n % 10); n = n / 10; len++; }
+ int i = len - 1;
+ while (i >= 0) { write_char(tmp[i]); i--; }
+ return neg + len;
+}
+
+int check(char *name, int got, int want) {
+ if (got == want) {
+ write_str("PASS "); write_str(name); write_char('\n');
+ g_passed++;
+ return 1;
+ }
+ write_str("FAIL "); write_str(name);
+ write_str(": got "); write_int(got);
+ write_str(" want "); write_int(want);
+ write_char('\n');
+ g_failed++;
+ return 0;
+}
+
+int read_line(char *buf, int cap) {
+ int i = 0;
+ int c;
+ while (i < cap - 1) {
+ c = getc();
+ if (c == -1 || c == '\n') break;
+ buf[i] = (char)c;
+ i++;
+ }
+ buf[i] = 0;
+ return i;
+}
+
+int test1() {
+ int c = getc();
+ int result = c;
+ while (c != '\n' && c != -1) c = getc();
+ return check("single_char_is_65", result, 65);
+}
+
+int test2() {
+ char buf[64];
+ int n = read_line(buf, 64);
+ write_str(buf); write_char('\n');
+ return check("readline_length", n, 5);
+}
+
+int test3() {
+ int newlines = 0;
+ int c;
+ while (newlines < 2) {
+ c = getc();
+ if (c == -1) break;
+ if (c == '\n') newlines++;
+ }
+ return check("count_lines", newlines, 2);
+}
+
+int test4() {
+ int value = 0;
+ int c;
+ while (1) {
+ c = getc();
+ if (c < '0' || c > '9') break;
+ value = value * 10 + (c - '0');
+ }
+ return check("parse_int", value, 42);
+}
+
+int test5() {
+ char buf[64];
+ int n = read_line(buf, 64);
+ int i = 0;
+ while (i < n) {
+ char ch = buf[i];
+ if (ch >= 'a' && ch <= 'z') buf[i] = ch - 'a' + 'A';
+ i++;
+ }
+ write_str(buf); write_char('\n');
+ return check("toupper_length", n, 5);
+}
+
+int test6() {
+ int c = getc();
+ return check("eof_is_minus1", c, -1);
+}
+
+int main() {
+ test1();
+ test2();
+ test3();
+ test4();
+ test5();
+ test6();
+
+ write_char('\n');
+ write_int(g_passed); write_str(" passed, ");
+ write_int(g_failed); write_str(" failed\n");
+
+ return g_failed == 0 ? 0 : 1;
+}
diff --git a/tests/new_arrays.c b/tests/new_arrays.c
new file mode 100644
index 0000000..5b7b59b
--- /dev/null
+++ b/tests/new_arrays.c
@@ -0,0 +1,31 @@
+int strlen_p(char *s) {
+ int n = 0;
+ while (s[n] != 0) { n = n + 1; }
+ return n;
+}
+
+int sum(int *arr, int n) {
+ int s = 0;
+ int i = 0;
+ while (i < n) { s += arr[i]; i++; }
+ return s;
+}
+
+int main() {
+ int nums[5] = {10, 20, 30, 40, 50};
+ int total = sum(nums, 5);
+
+ char msg[4] = {'O', 'K', '\n', 0};
+ syscall(1, 1, msg, 3);
+
+ int *p = nums;
+ p += 2;
+ int mid = *p;
+
+ int n = sizeof(nums);
+ int c = sizeof(char);
+ int ip = sizeof(int);
+
+ int score = (total == 150) + (mid == 30) + (n == 20) + (c == 1) + (ip == 4);
+ return score;
+}
diff --git a/tests/printf.c b/tests/printf.c
new file mode 100644
index 0000000..fc0d68d
--- /dev/null
+++ b/tests/printf.c
@@ -0,0 +1,164 @@
+char out_buf[4096];
+int out_pos = 0;
+
+int buf_putc(char c) {
+ out_buf[out_pos] = c;
+ out_pos++;
+ if (out_pos >= 4096) {
+ syscall(1, 1, out_buf, out_pos);
+ out_pos = 0;
+ }
+ return 1;
+}
+
+int buf_flush() {
+ if (out_pos > 0) {
+ syscall(1, 1, out_buf, out_pos);
+ out_pos = 0;
+ }
+ return 0;
+}
+
+
+int my_strlen(char *s) {
+ int n = 0;
+ while (s[n] != 0) { n++; }
+ return n;
+}
+
+int print_str(char *s) {
+ int i = 0;
+ while (s[i] != 0) { buf_putc(s[i]); i++; }
+ return i;
+}
+
+
+int print_int(int n) {
+ char tmp[24];
+ int len = 0;
+ int neg = n < 0;
+ if (neg) { buf_putc('-'); n = -n; }
+ if (n == 0) { buf_putc('0'); return neg + 1; }
+ while (n > 0) {
+ tmp[len] = (char)('0' + n % 10);
+ n = n / 10;
+ len++;
+ }
+
+ int i = len - 1;
+ while (i >= 0) { buf_putc(tmp[i]); i--; }
+ return neg + len;
+}
+
+
+int print_hex(int n) {
+ char hex[16] = {'0','1','2','3','4','5','6','7',
+ '8','9','a','b','c','d','e','f'};
+ if (n == 0) { buf_putc('0'); return 1; }
+ char tmp[18];
+ int len = 0;
+ int u = n;
+ while (u != 0) {
+ tmp[len] = hex[u & 15];
+ u = u / 16;
+ len++;
+ }
+ int i = len - 1;
+ while (i >= 0) { buf_putc(tmp[i]); i--; }
+ return len;
+}
+
+
+int my_printf(char *fmt, int a, int b, int c) {
+ int arg_idx = 0;
+ int i = 0;
+ int total = 0;
+ while (fmt[i] != 0) {
+ if (fmt[i] != '%') {
+ buf_putc(fmt[i]);
+ total++;
+ i++;
+ continue;
+ }
+ i++; /* skip '%' */
+ int arg = arg_idx == 0 ? a : (arg_idx == 1 ? b : c);
+ arg_idx++;
+ if (fmt[i] == 'd') {
+ total += print_int(arg);
+ } else if (fmt[i] == 's') {
+ total += print_str((char*)arg);
+ } else if (fmt[i] == 'c') {
+ buf_putc((char)arg);
+ total++;
+ } else if (fmt[i] == 'x') {
+ total += print_hex(arg);
+ } else if (fmt[i] == '%') {
+ buf_putc('%');
+ total++;
+ arg_idx--; /* %% doesn't consume an arg */
+ } else {
+ buf_putc('%'); buf_putc(fmt[i]);
+ total += 2;
+ }
+ i++;
+ }
+ return total;
+}
+
+
+int fact(int n) {
+ return n <= 1 ? 1 : n * fact(n - 1);
+}
+
+int fib(int n) {
+ if (n <= 1) return n;
+ return fib(n - 1) + fib(n - 2);
+}
+
+int main() {
+ my_printf("=== JIT printf demo ===\n", 0, 0, 0);
+
+ my_printf("Hello, %s!\n", "world", 0, 0);
+ my_printf("int: %d neg: %d\n", 42, -7, 0);
+ my_printf("hex: 0x%x\n", 255, 0, 0);
+ my_printf("char: %c\n", 'A', 0, 0);
+ my_printf("percent: 100%%\n", 0, 0, 0);
+
+ my_printf("\nFactorials:\n", 0, 0, 0);
+ int i = 1;
+ while (i <= 8) {
+ my_printf(" %d! = %d\n", i, fact(i), 0);
+ i++;
+ }
+
+ my_printf("\nFibonacci:\n ", 0, 0, 0);
+ i = 0;
+ while (i < 10) {
+ my_printf("%d ", fib(i), 0, 0);
+ i++;
+ }
+ my_printf("\n", 0, 0, 0);
+
+
+ my_printf("\nCountdown: ", 0, 0, 0);
+ int n = 5;
+ do {
+ my_printf("%d ", n, 0, 0);
+ n--;
+ } while (n > 0);
+ my_printf("\n", 0, 0, 0);
+
+
+ my_printf("Odd 1-9: ", 0, 0, 0);
+ i = 0;
+ while (i < 10) {
+ i++;
+ if (i % 2 == 0) continue;
+ my_printf("%d ", i, 0, 0);
+ if (i == 9) break;
+ }
+ my_printf("\n", 0, 0, 0);
+
+ buf_flush();
+ return 0;
+}
diff --git a/tests/syscalls.c b/tests/syscalls.c
new file mode 100644
index 0000000..285e539
--- /dev/null
+++ b/tests/syscalls.c
@@ -0,0 +1,30 @@
+int strlen(char *s) {
+ int n = 0;
+ while (s[n] != 0) { n = n + 1; }
+ return n;
+}
+
+int puts_fd(int fd, char *s) {
+ return syscall(1, fd, s, strlen(s));
+}
+
+int main() {
+ puts_fd(1, "Hello from JIT syscall!\n");
+
+ int pid = syscall(39);
+ int pid_ok = pid > 0;
+
+ char msg[32];
+ msg[0] = 'P'; msg[1] = 'I'; msg[2] = 'D'; msg[3] = ' ';
+ msg[4] = 'o'; msg[5] = 'k'; msg[6] = ':'; msg[7] = ' ';
+ msg[8] = '0' + pid_ok;
+ msg[9] = '\n';
+ syscall(1, 1, msg, 10);
+
+ int n1 = syscall(1, 1, "write test 1\n", 13);
+ int n2 = syscall(1, 1, "write test 2\n", 13);
+
+ int result = pid_ok + (n1 == 13) + (n2 == 13);
+
+ return result;
+}