summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/ast.h92
-rw-r--r--src/codegen_jit.h853
-rw-r--r--src/lexer.h117
-rw-r--r--src/main.c5
-rw-r--r--src/parser.h472
-rw-r--r--src/token.h42
-rwxr-xr-xtest_runner.sh334
-rw-r--r--tests/getc.c131
-rw-r--r--tests/new_arrays.c31
-rw-r--r--tests/printf.c164
-rw-r--r--tests/syscalls.c30
11 files changed, 1683 insertions, 588 deletions
diff --git a/src/ast.h b/src/ast.h
index 6c326cc..5cfe5d6 100644
--- a/src/ast.h
+++ b/src/ast.h
@@ -7,7 +7,7 @@
#include <stdio.h>
-typedef enum { EX_NUMBER, EX_VAR, EX_BINOP, EX_CALL, EX_ADDR, EX_DEREF, EX_STRING, EX_INDEX } _EK;
+typedef enum { EX_NUMBER, EX_VAR, EX_BINOP, EX_CALL, EX_ADDR, EX_DEREF, EX_STRING, EX_INDEX, EX_TERNARY, EX_CAST } _EK;
typedef struct _EX {
_EK kind;
@@ -35,11 +35,20 @@ typedef struct _EX {
struct _EX *array; // array expression
struct _EX *index; // index expression
} index;
+ struct { // EX_TERNARY: cond ? then : else
+ struct _EX *cond;
+ struct _EX *then_expr;
+ struct _EX *else_expr;
+ } ternary;
+ struct { // EX_CAST: (type)expr
+ _TY to;
+ struct _EX *expr;
+ } cast;
};
} _EX;
-typedef enum { STK_RETURN, STK_VAR_DECL, STK_ASSIGN, STK_EXPR, STK_BLOCK, STK_IF, STK_WHILE, STK_FOR } _STK;
+typedef enum { STK_RETURN, STK_VAR_DECL, STK_ASSIGN, STK_EXPR, STK_BLOCK, STK_IF, STK_WHILE, STK_FOR, STK_DOWHILE, STK_BREAK, STK_CONTINUE, STK_GLOBAL } _STK;
typedef struct _STN {
_STK kind;
@@ -71,6 +80,16 @@ typedef struct _STN {
struct _STN *step; // may be NULL (an expr stmt)
struct _STN *body;
} fr;
+ struct { // STK_DOWHILE
+ struct _STN *body;
+ _EX *cond;
+ } dowhl;
+ /* STK_BREAK and STK_CONTINUE carry no payload */
+ struct { // STK_GLOBAL: a global variable declaration
+ char *name;
+ _EX *init; /* may be NULL; must be constant (EX_NUMBER/EX_STRING) */
+ _TY type;
+ } global;
};
struct _STN *n; // linked list
} _STN;
@@ -83,6 +102,7 @@ typedef struct _FN {
_TY *param_types;
int pac;
_STN *body;
+ _TY ret_type; /* return type of this function */
struct _FN *n;
} _FN;
@@ -180,7 +200,6 @@ _FN *fnlist_prepare(_FN *head) {
-/* Generic alloc macros */
#define NEW_EX(k) \
_EX *e = (_EX *)calloc(1, sizeof(_EX)); \
e->kind = k
@@ -189,7 +208,6 @@ _FN *fnlist_prepare(_FN *head) {
s->kind = k
#define NEW_FN() _FN *f = (_FN *)calloc(1, sizeof(_FN))
-/* Constructor declaration macros */
#define DEFINE_EX_CONSTRUCTOR(name, kind, ...) \
static inline _EX *ex_##name(__VA_ARGS__)
@@ -299,15 +317,60 @@ DEFINE_ST_CONSTRUCTOR(for, STK_FOR, _STN *init, _EX *cond, _STN *step, _STN *bod
return s;
}
+DEFINE_ST_CONSTRUCTOR(dowhile, STK_DOWHILE, _STN *body, _EX *cond) {
+ NEW_ST(STK_DOWHILE);
+ s->dowhl.body = body;
+ s->dowhl.cond = cond;
+ return s;
+}
+
+DEFINE_ST_CONSTRUCTOR(break, STK_BREAK) {
+ NEW_ST(STK_BREAK);
+ return s;
+}
+
+DEFINE_ST_CONSTRUCTOR(continue, STK_CONTINUE) {
+ NEW_ST(STK_CONTINUE);
+ return s;
+}
+
+DEFINE_ST_CONSTRUCTOR(global, STK_GLOBAL, char *name, _TY type, _EX *init) {
+ NEW_ST(STK_GLOBAL);
+ s->global.name = name;
+ s->global.type = type;
+ s->global.init = init;
+ return s;
+}
+
+static inline _EX *ex_ternary(_EX *cond, _EX *then_expr, _EX *else_expr) {
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[AST] OOM\n"); exit(1); }
+ e->kind = EX_TERNARY;
+ e->ternary.cond = cond;
+ e->ternary.then_expr = then_expr;
+ e->ternary.else_expr = else_expr;
+ return e;
+}
+
+static inline _EX *ex_cast(_TY to, _EX *expr) {
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[AST] OOM\n"); exit(1); }
+ e->kind = EX_CAST;
+ e->cast.to = to;
+ e->cast.expr = expr;
+ return e;
+}
-DEFINE_FN_CONSTRUCTOR(new, char *name, char **params, _TY* params_types, int pac, _STN *body) {
+
+DEFINE_FN_CONSTRUCTOR(new, char *name, char **params, _TY* params_types, int pac, _STN *body, _TY ret_type) {
NEW_FN();
f->name = name;
f->params = params;
f->param_types = params_types;
f->pac = pac;
f->body = body;
+ f->ret_type = ret_type;
return f;
}
@@ -344,6 +407,14 @@ static inline void ex_free(_EX *e) {
ex_free(e->index.array);
ex_free(e->index.index);
break;
+ case EX_TERNARY:
+ ex_free(e->ternary.cond);
+ ex_free(e->ternary.then_expr);
+ ex_free(e->ternary.else_expr);
+ break;
+ case EX_CAST:
+ ex_free(e->cast.expr);
+ break;
}
free(e);
}
@@ -383,6 +454,17 @@ static inline void st_free(_STN *s) {
st_free(s->fr.step);
st_free(s->fr.body);
break;
+ case STK_DOWHILE:
+ st_free(s->dowhl.body);
+ ex_free(s->dowhl.cond);
+ break;
+ case STK_BREAK:
+ case STK_CONTINUE:
+ break;
+ case STK_GLOBAL:
+ free(s->global.name);
+ ex_free(s->global.init);
+ break;
}
_STN *next = s->n;
free(s);
diff --git a/src/codegen_jit.h b/src/codegen_jit.h
index ff1d687..2cd4aa2 100644
--- a/src/codegen_jit.h
+++ b/src/codegen_jit.h
@@ -32,6 +32,7 @@ typedef struct FuncMap {
void *addr;
size_t size;
size_t alloc_size;
+ _TY ret_type; /* return type of this function */
struct FuncMap *next;
} FuncMap;
@@ -49,6 +50,14 @@ typedef struct VarMap {
struct VarMap *next;
} VarMap;
+typedef struct GlobalVar {
+ char *name;
+ uint8_t *addr; /* pointer into globals_buf */
+ _TY type;
+ int size; /* total bytes allocated */
+ struct GlobalVar *next;
+} GlobalVar;
+
typedef struct {
FuncMap *func_list;
VarMap *var_list;
@@ -56,6 +65,17 @@ typedef struct {
CodeBuf cb;
char *current_func_name;
PatchEntry *patch_list;
+ /* Globals data segment */
+ uint8_t *globals_buf; /* mmap'd RW page(s) for global variables */
+ size_t globals_cap;
+ size_t globals_used;
+ GlobalVar *global_list;
+ /* Break/continue patch stacks (up to 64 nesting levels) */
+ size_t break_patches[64][256]; /* offsets to patch */
+ int break_patch_count[64];
+ size_t cont_patches[64][256];
+ int cont_patch_count[64];
+ int loop_depth;
} JIT;
static void jit_init(JIT *jit) {
@@ -66,6 +86,19 @@ static void jit_init(JIT *jit) {
jit->cb.len = jit->cb.cap = 0;
jit->current_func_name = NULL;
jit->patch_list = NULL;
+ jit->globals_cap = 1024 * 1024;
+ jit->globals_used = 0;
+ jit->globals_buf = (uint8_t *)mmap(NULL, jit->globals_cap,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (jit->globals_buf == MAP_FAILED) {
+ perror("mmap globals"); exit(1);
+ }
+ memset(jit->globals_buf, 0, jit->globals_cap);
+ jit->global_list = NULL;
+ jit->loop_depth = 0;
+ memset(jit->break_patch_count, 0, sizeof(jit->break_patch_count));
+ memset(jit->cont_patch_count, 0, sizeof(jit->cont_patch_count));
}
static void jit_free(JIT *jit) {
@@ -79,6 +112,14 @@ static void jit_free(JIT *jit) {
}
jit->patch_list = NULL;
+ for (GlobalVar *g = jit->global_list; g;) {
+ GlobalVar *n = g->next; free(g->name); free(g); g = n;
+ }
+ jit->global_list = NULL;
+
+ if (jit->globals_buf && jit->globals_buf != MAP_FAILED)
+ munmap(jit->globals_buf, jit->globals_cap);
+
for (FuncMap *f = jit->func_list; f;) {
FuncMap *n = f->next;
if (f->addr && f->alloc_size > 0) munmap(f->addr, f->alloc_size);
@@ -91,8 +132,6 @@ static void jit_free(JIT *jit) {
jit->cb.len = jit->cb.cap = 0;
}
-/* --- Code buffer --- */
-
static void cb_init(CodeBuf *c) {
c->cap = 1024; c->len = 0;
c->buf = (uint8_t *)malloc(c->cap);
@@ -114,8 +153,6 @@ static void emit32(CodeBuf *c, uint32_t v) { cb_grow(c,4); memcpy(c->buf+c->
static void emit64(CodeBuf *c, uint64_t v) { cb_grow(c,8); memcpy(c->buf+c->len,&v,8); c->len+=8; }
static void emitN(CodeBuf *c, const void *p, size_t n) { cb_grow(c,n); memcpy(c->buf+c->len,p,n); c->len+=n; }
-/* --- x86-64 encoding helpers --- */
-
static void emit_rex(CodeBuf *c, int reg, int rm, int w) {
uint8_t rex = 0x40;
if (w) rex |= 0x08;
@@ -145,9 +182,18 @@ static void emit_mov_reg_mem_reg(CodeBuf *c, int dst, int base) {
static void emit_mov_mem_reg_reg(CodeBuf *c, int base, int src) {
emit_rex(c,src,base,1); emit8(c,0x89); emit_modrm(c,0,src,base);
}
+static void emit_mov_mem8_reg_reg(CodeBuf *c, int base) {
+ if (base & 8) emit8(c, 0x41); /* REX.B for extended base registers */
+ emit8(c, 0x88); emit_modrm(c, 0, RAX, base);
+}
static void emit_movzx_rax_mem8(CodeBuf *c, int disp32) {
emit8(c,0x48); emit8(c,0x0F); emit8(c,0xB6); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
}
+static void emit_movzx_rax_mem8_base(CodeBuf *c, int base) {
+ if (base & 8) emit8(c, 0x49); else emit8(c, 0x48);
+ emit8(c, 0x0F); emit8(c, 0xB6);
+ emit_modrm(c, 0, RAX, base);
+}
static void emit_mov_mem8_rax(CodeBuf *c, int disp32) {
emit_rex(c,RAX,RBP,0); emit8(c,0x88); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
}
@@ -174,8 +220,8 @@ static void emit_add_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x01,0xD8},
static void emit_imul_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x0F,0xAF,0xC3},4); }
static void emit_idiv_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5); }
static void emit_imod(CodeBuf *c) {
- emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5); // cqo; idiv rbx
- emit_mov_reg_reg(c, RAX, RDX); // remainder -> RAX
+ emitN(c,(uint8_t[]){0x48,0x99,0x48,0xF7,0xFB},5);
+ emit_mov_reg_reg(c, RAX, RDX);
}
static void emit_or_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x09,0xD8},3); }
static void emit_xor_rax_rbx(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x31,0xD8},3); }
@@ -192,53 +238,97 @@ static void emit_jcc_rel32(CodeBuf *c, uint8_t cc, int32_t rel) {
static void emit_test_rax_rax(CodeBuf *c) { emitN(c,(uint8_t[]){0x48,0x85,0xC0},3); }
static void emit_prologue(CodeBuf *c, int total_stack_size) {
- emit8(c,0x55); // push rbp
- emitN(c,(uint8_t[]){0x48,0x89,0xE5},3); // mov rbp, rsp
- emit_push_reg(c, RBX); // save callee-saved RBX
+ emit8(c,0x55);
+ emitN(c,(uint8_t[]){0x48,0x89,0xE5},3);
+ emit_push_reg(c, RBX);
int stack_bytes = ((total_stack_size+15)/16)*16;
if (stack_bytes > 0) {
- emitN(c,(uint8_t[]){0x48,0x81,0xEC},3); // sub rsp, imm32
+ emitN(c,(uint8_t[]){0x48,0x81,0xEC},3);
emit32(c,(uint32_t)stack_bytes);
}
}
static void emit_epilogue(CodeBuf *c) {
- emit_pop_reg(c, RBX); emit8(c,0xC9); emit8(c,0xC3); // restore RBX; leave; ret
+ emit_pop_reg(c, RBX); emit8(c,0xC9); emit8(c,0xC3);
}
static void emit_lea_rax_rbp_disp(CodeBuf *c, int disp32) {
emit8(c,0x48); emit8(c,0x8D); emit_modrm(c,2,RAX,RBP); emit32(c,(uint32_t)disp32);
}
+static void emit_movzx_rax_mem16(CodeBuf *c, int disp32) {
+ emit8(c,0x48); emit8(c,0x0F); emit8(c,0xB7); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
+}
+static void emit_mov_mem16_rax(CodeBuf *c, int disp32) {
+ emit8(c,0x66); emit_rex(c,RAX,RBP,0); emit8(c,0x89); emit_modrm(c,2,RAX,RBP); emit32(c,disp32);
+}
static void emit_load_rax_from_mem(CodeBuf *c, int disp32, int size) {
- if (size == 1) emit_movzx_rax_mem8(c, disp32); else emit_mov_reg_mem64(c, RAX, disp32);
+ if (size == 1) emit_movzx_rax_mem8(c, disp32);
+ else if (size == 2) emit_movzx_rax_mem16(c, disp32);
+ else emit_mov_reg_mem64(c, RAX, disp32);
}
static void emit_store_rax_to_mem(CodeBuf *c, int disp32, int size) {
- if (size == 1) emit_mov_mem8_rax(c, disp32); else emit_mov_mem64_reg(c, disp32, RAX);
+ if (size == 1) emit_mov_mem8_rax(c, disp32);
+ else if (size == 2) emit_mov_mem16_rax(c, disp32);
+ else emit_mov_mem64_reg(c, disp32, RAX);
}
-/* --- Variable and type helpers --- */
-
static int calculate_type_size(_TY type) {
if (type.ptr_level > 0) return 8;
- int base_size = (type.base == TY_CHAR) ? 1 : 8;
- size_t total = (size_t)base_size;
+ int slot;
+ switch (type.base) {
+ case TY_CHAR: slot = 1; break;
+ case TY_BOOL: slot = 1; break;
+ case TY_SHORT: slot = 2; break;
+ case TY_INT: slot = 8; break;
+ case TY_FLOAT: slot = 8; break;
+ case TY_LONG: slot = 8; break;
+ case TY_VOID: slot = 0; break;
+ default: slot = 8; break;
+ }
if (type.array_size > 0) {
- total *= (size_t)type.array_size;
+ size_t total = (size_t)slot * (size_t)type.array_size;
if (total > (size_t)INT_MAX) { fprintf(stderr, "[JIT] array size too large\n"); exit(1); }
+ return (int)total;
}
- return (int)total;
+ return slot;
}
static int align_offset(int offset, _TY type) {
- int align = (type.base == TY_CHAR) ? 1 : 8;
+ int align;
+ if (type.ptr_level > 0) { align = 8; }
+ else switch (type.base) {
+ case TY_CHAR: align = 1; break;
+ case TY_BOOL: align = 1; break;
+ case TY_SHORT: align = 2; break;
+ case TY_INT: align = 8; break;
+ case TY_FLOAT: align = 8; break;
+ case TY_LONG: align = 8; break;
+ default: align = 8; break;
+ }
if (offset % align == 0) return offset;
+ if (offset < 0)
+ return ((offset - (align - 1)) / align) * align;
return (offset / align) * align;
}
+static int ty_slot_size(_TY ty) {
+ if (ty.ptr_level > 0) return 8;
+ switch (ty.base) {
+ case TY_CHAR: return 1;
+ case TY_BOOL: return 1;
+ case TY_SHORT: return 2;
+ case TY_INT: return 8; /* promoted to 64-bit slot */
+ case TY_FLOAT: return 8; /* stored in 64-bit slot (integer bits) */
+ case TY_LONG: return 8;
+ case TY_VOID: return 0;
+ default: return 8;
+ }
+}
+
static void reset_varmap(JIT *jit) {
for (VarMap *v = jit->var_list; v;) {
VarMap *n = v->next; free(v->name); free(v); v = n;
}
jit->var_list = NULL;
- jit->next_local_offset = -16; // after saved RBX at RBP-8, 16-byte aligned
+ jit->next_local_offset = -16;
}
static void add_var(JIT *jit, const char *name, _TY type) {
@@ -248,9 +338,9 @@ static void add_var(JIT *jit, const char *name, _TY type) {
if (!v->name) { fprintf(stderr, "[JIT] strdup failed in add_var\n"); free(v); exit(1); }
v->type = type;
int type_size = calculate_type_size(type);
+ jit->next_local_offset -= type_size;
jit->next_local_offset = align_offset(jit->next_local_offset, type);
v->offset = jit->next_local_offset;
- jit->next_local_offset -= type_size;
v->next = jit->var_list;
jit->var_list = v;
}
@@ -261,23 +351,60 @@ static int get_var_offset(JIT *jit, const char *name) {
fprintf(stderr, "[JIT] Unknown variable '%s'\n", name); exit(1);
}
+static GlobalVar *find_global(JIT *jit, const char *name) {
+ for (GlobalVar *g = jit->global_list; g; g = g->next)
+ if (strcmp(g->name, name) == 0) return g;
+ return NULL;
+}
+
+static GlobalVar *register_global(JIT *jit, const char *name, _TY type) {
+ if (find_global(jit, name)) {
+ fprintf(stderr, "[JIT] Duplicate global '%s'\n", name); exit(1);
+ }
+ int elem_sz = ty_slot_size((_TY){type.base, type.ptr_level > 0 ? type.ptr_level : 0, -1});
+ int n_elems = (type.array_size > 0) ? type.array_size : 1;
+ int total_sz = elem_sz * n_elems;
+ size_t align = (elem_sz < 8) ? elem_sz : 8;
+ size_t off = (jit->globals_used + align - 1) & ~(align - 1);
+ if (off + (size_t)total_sz > jit->globals_cap) {
+ fprintf(stderr, "[JIT] Globals segment full\n"); exit(1);
+ }
+ GlobalVar *g = (GlobalVar *)malloc(sizeof(GlobalVar));
+ if (!g) { fprintf(stderr, "[JIT] OOM\n"); exit(1); }
+ g->name = strdup(name);
+ g->addr = jit->globals_buf + off;
+ g->type = type;
+ g->size = total_sz;
+ g->next = jit->global_list;
+ jit->global_list = g;
+ jit->globals_used = off + total_sz;
+ return g;
+}
+
static _TY get_var_type(JIT *jit, const char *name) {
for (VarMap *v = jit->var_list; v; v = v->next)
if (strcmp(v->name, name) == 0) return v->type;
+ /* Fall back to globals */
+ GlobalVar *g = find_global(jit, name);
+ if (g) return g->type;
fprintf(stderr, "[JIT] Unknown variable '%s'\n", name); exit(1);
}
-/* --- Type checking --- */
+static _TY get_func_ret_type(JIT *jit, const char *name);
+static void reset_varmap(JIT *jit);
static _TY get_expr_type(JIT *jit, _EX *expr) {
switch (expr->kind) {
case EX_NUMBER: return (_TY){TY_INT, 0, -1};
case EX_STRING: return (_TY){TY_CHAR, 1, -1};
- case EX_VAR: return get_var_type(jit, expr->name);
+ case EX_VAR: {
+ _TY t = get_var_type(jit, expr->name);
+ if (t.array_size > 0) return (_TY){t.base, t.ptr_level + 1, -1};
+ return t;
+ }
case EX_BINOP: {
_TY left_type = get_expr_type(jit, expr->binop.l);
- // All arithmetic, comparison, bitwise ops produce int
- (void)get_expr_type(jit, expr->binop.r);
+ (void)get_expr_type(jit, expr->binop.r);
switch (expr->binop.op) {
case TK_PLUS: case TK_MINUS: case TK_STAR: case TK_SLASH: case TK_PERCENT:
case TK_EQ: case TK_NE: case TK_LT: case TK_LE: case TK_GT: case TK_GE:
@@ -287,7 +414,13 @@ static _TY get_expr_type(JIT *jit, _EX *expr) {
default: return left_type;
}
}
- case EX_CALL: return (_TY){TY_INT, 0, -1};
+ case EX_CALL:
+ if (strcmp(expr->call.func_name, "syscall") == 0)
+ return (_TY){TY_LONG, 0, -1};
+ if (strcmp(expr->call.func_name, "__initlist__") == 0 ||
+ strcmp(expr->call.func_name, "__sizeof__") == 0)
+ return (_TY){TY_INT, 0, -1};
+ return get_func_ret_type(jit, expr->call.func_name);
case EX_INDEX: {
_TY t = get_expr_type(jit, expr->index.array);
if (t.array_size > 0) return (_TY){t.base, t.ptr_level, -1};
@@ -303,31 +436,53 @@ static _TY get_expr_type(JIT *jit, _EX *expr) {
_TY t = get_expr_type(jit, expr->addr.expr);
return (_TY){t.base, t.ptr_level+1, -1};
}
+ case EX_TERNARY:
+ return get_expr_type(jit, expr->ternary.then_expr);
+ case EX_CAST:
+ return expr->cast.to;
default: return (_TY){TY_INT, 0, -1};
}
}
+static int type_is_integer(_TY ty) {
+ if (ty.ptr_level > 0) return 0;
+ switch (ty.base) {
+ case TY_INT: case TY_CHAR: case TY_SHORT:
+ case TY_LONG: case TY_BOOL: return 1;
+ default: return 0;
+ }
+}
+
static int types_compatible(_TY expected, _TY actual) {
if (expected.base == actual.base &&
expected.ptr_level == actual.ptr_level &&
expected.array_size == actual.array_size) return 1;
- // Allow untyped int literals to be assigned anywhere
if (actual.base == TY_INT && actual.ptr_level == 0 && actual.array_size == -1) return 1;
+ if (type_is_integer(expected) && type_is_integer(actual)) return 1;
+ if (expected.ptr_level > 0 && actual.ptr_level > 0) return 1;
+ if (expected.ptr_level > 0 && actual.array_size > 0 &&
+ expected.base == actual.base &&
+ expected.ptr_level == actual.ptr_level + 1) return 1;
return 0;
}
-/* --- Function registry --- */
-
-static void register_func(JIT *jit, const char *name) {
+static void register_func(JIT *jit, const char *name, _TY ret_type) {
FuncMap *f = (FuncMap *)malloc(sizeof(FuncMap));
if (!f) { fprintf(stderr, "[JIT] malloc failed in register_func\n"); exit(1); }
f->name = strdup(name);
if (!f->name) { fprintf(stderr, "[JIT] strdup failed in register_func\n"); free(f); exit(1); }
f->addr = NULL; f->size = 0; f->alloc_size = 0;
+ f->ret_type = ret_type;
f->next = jit->func_list;
jit->func_list = f;
}
+static _TY get_func_ret_type(JIT *jit, const char *name) {
+ for (FuncMap *f = jit->func_list; f; f = f->next)
+ if (strcmp(f->name, name) == 0) return f->ret_type;
+ return (_TY){TY_INT, 0, -1};
+}
+
static void set_func_addr(JIT *jit, const char *name, void *addr, size_t size, size_t alloc_size) {
for (FuncMap *f = jit->func_list; f; f = f->next) {
if (strcmp(f->name, name) != 0) continue;
@@ -352,17 +507,19 @@ static void set_func_addr(JIT *jit, const char *name, void *addr, size_t size, s
static void *get_func_addr(JIT *jit, const char *name) {
for (FuncMap *f = jit->func_list; f; f = f->next) {
if (strcmp(f->name, name) == 0)
- return f->addr ? f->addr : (void*)0xDEADBEEF; // placeholder; patched later
+ return f->addr ? f->addr : (void*)0xDEADBEEF;
}
fprintf(stderr, "[JIT] get_func_addr: unknown function '%s'\n", name); exit(1);
}
-/* --- Stack size calculation --- */
static int calculate_stack_size(_STN *s) {
int total = 0;
while (s) {
- if (s->kind == STK_VAR_DECL) total += calculate_type_size(s->var_decl.type);
+ if (s->kind == STK_VAR_DECL) {
+ int sz = calculate_type_size(s->var_decl.type);
+ total += (sz < 8 && s->var_decl.type.array_size <= 0) ? 8 : sz;
+ }
if (s->kind == STK_BLOCK) total += calculate_stack_size(s->body);
if (s->kind == STK_FOR) {
if (s->fr.init) total += calculate_stack_size(s->fr.init);
@@ -370,15 +527,21 @@ static int calculate_stack_size(_STN *s) {
}
s = s->n;
}
- return total;
+ return total + 8;
+}
+
+static int calculate_total_stack_size(_FN *f) {
+ int param_space = 0;
+ for (int i = 0; i < f->pac && i < 6; i++)
+ param_space += (calculate_type_size(f->param_types[i]) < 8) ? 8
+ : calculate_type_size(f->param_types[i]);
+ return calculate_stack_size(f->body) + param_space;
}
-/* --- Code generation (forward declarations) --- */
static void gen_expr_jit(JIT *jit, _EX *e);
static int gen_stmt_jit(JIT *jit, _STN *s);
-/* --- Statement generation --- */
static int gen_stmt_jit(JIT *jit, _STN *s) {
while (s) {
@@ -387,6 +550,19 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
case STK_VAR_DECL:
add_var(jit, s->var_decl.name, s->var_decl.type);
if (s->var_decl.init) {
+ if (s->var_decl.init->kind == EX_CALL &&
+ strcmp(s->var_decl.init->call.func_name, "__initlist__") == 0) {
+ _TY vty = s->var_decl.type;
+ int elem_sz = ty_slot_size((_TY){vty.base, 0, -1});
+ int arr_off = get_var_offset(jit, s->var_decl.name);
+ for (int ii = 0; ii < s->var_decl.init->call.argc; ii++) {
+ gen_expr_jit(jit, s->var_decl.init->call.args[ii]);
+ /* address of arr[ii] = RBP + arr_off + ii*elem_sz */
+ int elem_off = arr_off + ii * elem_sz;
+ emit_store_rax_to_mem(&jit->cb, elem_off, elem_sz);
+ }
+ break;
+ }
_TY init_type = get_expr_type(jit, s->var_decl.init);
if (!types_compatible(s->var_decl.type, init_type)) {
fprintf(stderr, "[JIT] Type mismatch: cannot assign %s to %s\n",
@@ -394,8 +570,7 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
exit(1);
}
gen_expr_jit(jit, s->var_decl.init);
- int sz = (s->var_decl.type.ptr_level > 0) ? 8
- : (s->var_decl.type.base == TY_CHAR) ? 1 : 8;
+ int sz = ty_slot_size(s->var_decl.type);
emit_store_rax_to_mem(&jit->cb, get_var_offset(jit, s->var_decl.name), sz);
}
break;
@@ -403,6 +578,15 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
case STK_ASSIGN: {
_EX *lhs = s->assign.lhs;
if (lhs->kind == EX_VAR) {
+ GlobalVar *gv = find_global(jit, lhs->name);
+ if (gv) {
+ gen_expr_jit(jit, s->assign.expr);
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv->addr);
+ int sz = ty_slot_size(gv->type);
+ if (sz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ break;
+ }
int offset = get_var_offset(jit, lhs->name);
_TY type = get_var_type(jit, lhs->name);
_TY expr_type = get_expr_type(jit, s->assign.expr);
@@ -412,36 +596,74 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
exit(1);
}
gen_expr_jit(jit, s->assign.expr);
- int sz = (type.ptr_level > 0) ? 8 : (type.base == TY_CHAR) ? 1 : 8;
+ int sz = ty_slot_size(type);
emit_store_rax_to_mem(&jit->cb, offset, sz);
} else if (lhs->kind == EX_DEREF) {
gen_expr_jit(jit, lhs->deref.expr);
- emit_mov_reg_reg(&jit->cb, RBX, RAX); // RBX = address
- gen_expr_jit(jit, s->assign.expr); // RAX = value
- emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ emit_push_reg(&jit->cb, RAX); // save address on stack
+ gen_expr_jit(jit, s->assign.expr); // RAX = value (may clobber RBX)
+ emit_pop_reg(&jit->cb, RBX); // RBX = address
+ _TY ptr_ty = get_expr_type(jit, lhs->deref.expr);
+ int dsz = (ptr_ty.ptr_level > 1) ? 8 : ty_slot_size((_TY){ptr_ty.base, 0, -1});
+ if (dsz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
} else if (lhs->kind == EX_INDEX) {
if (lhs->index.array->kind != EX_VAR) {
gen_expr_jit(jit, lhs->index.array); break;
}
_TY var_type = get_var_type(jit, lhs->index.array->name);
- if (var_type.array_size <= 0) {
- fprintf(stderr, "[JIT] Cannot index non-array '%s'\n", lhs->index.array->name); exit(1);
+ int element_size = ty_slot_size((_TY){var_type.base, 0, -1});
+
+ if (var_type.ptr_level > 0) {
+ GlobalVar *gv_ptr2 = find_global(jit, lhs->index.array->name);
+ if (gv_ptr2) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_ptr2->addr);
+ emit_mov_reg_mem_reg(&jit->cb, RBX, RBX); /* deref global ptr */
+ } else {
+ int ptr_offset = get_var_offset(jit, lhs->index.array->name);
+ emit_mov_reg_mem64(&jit->cb, RBX, ptr_offset); // RBX = pointer
+ }
+ gen_expr_jit(jit, lhs->index.index); // RAX = index
+ if (element_size > 1) {
+ emit_mov_reg_reg(&jit->cb, RCX, RBX);
+ emit_mov_reg_imm64(&jit->cb, RBX, element_size);
+ emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
+ emit_mov_reg_reg(&jit->cb, RBX, RCX);
+ }
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = &ptr[index]
+ emit_push_reg(&jit->cb, RBX); // save address
+ gen_expr_jit(jit, s->assign.expr); // RAX = value
+ emit_pop_reg(&jit->cb, RBX); // restore address
+ if (element_size == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+
+ } else if (var_type.array_size > 0) {
+ GlobalVar *gv_arr2 = find_global(jit, lhs->index.array->name);
+ gen_expr_jit(jit, lhs->index.index); // RAX = index
+ emit_mov_reg_imm64(&jit->cb, RBX, element_size);
+ emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
+ if (gv_arr2) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_arr2->addr);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = &arr[index]
+ } else {
+ int array_offset = get_var_offset(jit, lhs->index.array->name);
+ emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
+ emit_add_reg_reg(&jit->cb, RBX, RAX);
+ emit_mov_reg_reg(&jit->cb, RAX, RBX);
+ emit_mov_reg_reg(&jit->cb, RBX, RBP);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = RBP + array_offset + byte_offset
+ }
+ emit_push_reg(&jit->cb, RBX); // save address
+ gen_expr_jit(jit, s->assign.expr); // RAX = value
+ emit_pop_reg(&jit->cb, RBX); // RBX = address
+ if (element_size == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+
+ } else {
+ fprintf(stderr, "[JIT] Cannot index non-array/non-pointer '%s'\n", lhs->index.array->name); exit(1);
}
- int array_offset = get_var_offset(jit, lhs->index.array->name);
- int element_size = (var_type.base == TY_CHAR) ? 1 : 8;
- gen_expr_jit(jit, lhs->index.index); // RAX = index
- emit_mov_reg_imm64(&jit->cb, RBX, element_size);
- emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
- emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
- emit_sub_reg_reg(&jit->cb, RBX, RAX); // RBX = array_offset - byte_offset
- emit_mov_reg_reg(&jit->cb, RAX, RBX);
- emit_mov_reg_reg(&jit->cb, RBX, RBP);
- emit_add_reg_reg(&jit->cb, RAX, RBX); // RAX = &arr[index]
- emit_mov_reg_reg(&jit->cb, RBX, RAX);
- gen_expr_jit(jit, s->assign.expr); // RAX = value
- emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
} else {
fprintf(stderr, "[JIT] Unsupported assignment LHS kind %d\n", lhs->kind); exit(1);
@@ -479,38 +701,151 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
}
case STK_WHILE: {
+ int depth = jit->loop_depth++;
+ jit->break_patch_count[depth] = 0;
+ jit->cont_patch_count[depth] = 0;
+
size_t loop_start = jit->cb.len;
gen_expr_jit(jit, s->whl.cond);
emit_test_rax_rax(&jit->cb);
size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0);
gen_stmt_jit(jit, s->whl.body);
+ size_t cont_target = jit->cb.len;
+ for (int i = 0; i < jit->cont_patch_count[depth]; i++) {
+ size_t p = jit->cont_patches[depth][i];
+ int32_t r = (int32_t)(cont_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
- int32_t rel_end = (int32_t)(jit->cb.len - (jz_pos + 6));
+ size_t break_target = jit->cb.len;
+ int32_t rel_end = (int32_t)(break_target - (jz_pos + 6));
memcpy(jit->cb.buf + jz_pos + 2, &rel_end, 4);
+ for (int i = 0; i < jit->break_patch_count[depth]; i++) {
+ size_t p = jit->break_patches[depth][i];
+ int32_t r = (int32_t)(break_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ jit->loop_depth--;
break;
}
case STK_FOR: {
+ int depth = jit->loop_depth++;
+ jit->break_patch_count[depth] = 0;
+ jit->cont_patch_count[depth] = 0;
+
if (s->fr.init) gen_stmt_jit(jit, s->fr.init);
size_t loop_start = jit->cb.len;
- if (s->fr.cond) {
+ size_t jz_pos_for = 0;
+ int has_cond = (s->fr.cond != NULL);
+ if (has_cond) {
gen_expr_jit(jit, s->fr.cond);
emit_test_rax_rax(&jit->cb);
- size_t jz_pos = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0);
- gen_stmt_jit(jit, s->fr.body);
- if (s->fr.step) gen_stmt_jit(jit, s->fr.step);
- emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
- int32_t rel_end = (int32_t)(jit->cb.len - (jz_pos + 6));
- memcpy(jit->cb.buf + jz_pos + 2, &rel_end, 4);
- } else {
- gen_stmt_jit(jit, s->fr.body);
- if (s->fr.step) gen_stmt_jit(jit, s->fr.step);
- emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
+ jz_pos_for = jit->cb.len; emit_jcc_rel32(&jit->cb, 0x04, 0);
+ }
+ gen_stmt_jit(jit, s->fr.body);
+ size_t cont_target = jit->cb.len;
+ for (int i = 0; i < jit->cont_patch_count[depth]; i++) {
+ size_t p = jit->cont_patches[depth][i];
+ int32_t r = (int32_t)(cont_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
}
+ if (s->fr.step) gen_stmt_jit(jit, s->fr.step);
+ emit_jmp_rel32(&jit->cb, (int32_t)(loop_start - (jit->cb.len + 5)));
+ size_t break_target = jit->cb.len;
+ if (has_cond) {
+ int32_t rel_end = (int32_t)(break_target - (jz_pos_for + 6));
+ memcpy(jit->cb.buf + jz_pos_for + 2, &rel_end, 4);
+ }
+ for (int i = 0; i < jit->break_patch_count[depth]; i++) {
+ size_t p = jit->break_patches[depth][i];
+ int32_t r = (int32_t)(break_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ jit->loop_depth--;
break;
}
- default:
+ case STK_DOWHILE: {
+ int depth = jit->loop_depth++;
+ jit->break_patch_count[depth] = 0;
+ jit->cont_patch_count[depth] = 0;
+
+ size_t loop_start = jit->cb.len;
+ gen_stmt_jit(jit, s->dowhl.body);
+ /* continue → jump to condition check */
+ size_t cont_target = jit->cb.len;
+ for (int i = 0; i < jit->cont_patch_count[depth]; i++) {
+ size_t p = jit->cont_patches[depth][i];
+ int32_t r = (int32_t)(cont_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ gen_expr_jit(jit, s->dowhl.cond);
+ emit_test_rax_rax(&jit->cb);
+ emit_jcc_rel32(&jit->cb, 0x05, (int32_t)(loop_start - (jit->cb.len + 6)));
+ size_t break_target = jit->cb.len;
+ for (int i = 0; i < jit->break_patch_count[depth]; i++) {
+ size_t p = jit->break_patches[depth][i];
+ int32_t r = (int32_t)(break_target - (p + 5));
+ memcpy(jit->cb.buf + p + 1, &r, 4);
+ }
+ jit->loop_depth--;
+ break;
+ }
+
+ case STK_BREAK: {
+ if (jit->loop_depth == 0) {
+ fprintf(stderr, "[JIT] 'break' outside loop\n"); exit(1);
+ }
+ int depth = jit->loop_depth - 1;
+ size_t pos = jit->cb.len;
+ emit_jmp_rel32(&jit->cb, 0);
+ int cnt = jit->break_patch_count[depth];
+ if (cnt >= 256) { fprintf(stderr, "[JIT] Too many breaks\n"); exit(1); }
+ jit->break_patches[depth][cnt] = pos;
+ jit->break_patch_count[depth]++;
+ break;
+ }
+
+ case STK_CONTINUE: {
+ if (jit->loop_depth == 0) {
+ fprintf(stderr, "[JIT] 'continue' outside loop\n"); exit(1);
+ }
+ int depth = jit->loop_depth - 1;
+ size_t pos = jit->cb.len;
+ emit_jmp_rel32(&jit->cb, 0);
+ int cnt = jit->cont_patch_count[depth];
+ if (cnt >= 256) { fprintf(stderr, "[JIT] Too many continues\n"); exit(1); }
+ jit->cont_patches[depth][cnt] = pos;
+ jit->cont_patch_count[depth]++;
+ break;
+ }
+
+ case STK_GLOBAL: {
+ GlobalVar *gv = find_global(jit, s->global.name);
+ if (!gv) {
+ fprintf(stderr, "[JIT] Global '%s' not registered\n", s->global.name); exit(1);
+ }
+ if (s->global.init) {
+ if (s->global.init->kind == EX_CALL &&
+ strcmp(s->global.init->call.func_name, "__initlist__") == 0) {
+ int esz = ty_slot_size((_TY){gv->type.base, 0, -1});
+ for (int ii = 0; ii < s->global.init->call.argc; ii++) {
+ gen_expr_jit(jit, s->global.init->call.args[ii]);
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)(gv->addr + ii * esz));
+ if (esz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ }
+ } else {
+ gen_expr_jit(jit, s->global.init);
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv->addr);
+ int sz = ty_slot_size(gv->type);
+ if (sz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ }
+ }
+ break;
+ }
fprintf(stderr, "[JIT] Unsupported statement kind %d\n", s->kind); exit(1);
}
s = s->n;
@@ -518,7 +853,6 @@ static int gen_stmt_jit(JIT *jit, _STN *s) {
return 0;
}
-/* --- Expression generation --- */
static void gen_expr_jit(JIT *jit, _EX *e) {
if (!e) return;
@@ -529,16 +863,110 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
break;
case EX_VAR: {
- int off = get_var_offset(jit, e->name);
- _TY ty = get_var_type(jit, e->name);
- int sz = (ty.ptr_level > 0) ? 8 : (ty.base == TY_CHAR) ? 1 : 8;
- if (sz == 1) emit_movzx_rax_mem8(&jit->cb, off);
- else emit_mov_reg_mem64(&jit->cb, RAX, off);
- break;
+ GlobalVar *gv = NULL;
+ for (VarMap *v = jit->var_list; v; v = v->next) {
+ if (strcmp(v->name, e->name) == 0) goto local_var;
+ }
+ gv = find_global(jit, e->name);
+ if (gv) {
+ _TY ty = gv->type;
+ if (ty.array_size > 0) {
+ /* Global array: load its absolute address */
+ emit_mov_reg_imm64(&jit->cb, RAX, (uint64_t)gv->addr);
+ break;
+ }
+ /* Global scalar: absolute address → load through it */
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv->addr);
+ int sz = ty_slot_size(ty);
+ if (sz == 1) emit_movzx_rax_mem8_base(&jit->cb, RBX);
+ else emit_mov_reg_mem_reg(&jit->cb, RAX, RBX);
+ break;
+ }
+ local_var: {
+ int off = get_var_offset(jit, e->name);
+ _TY ty = get_var_type(jit, e->name);
+ if (ty.array_size > 0) {
+ emit_lea_rax_rbp_disp(&jit->cb, off);
+ break;
+ }
+ int sz = ty_slot_size(ty);
+ if (sz == 1) emit_movzx_rax_mem8(&jit->cb, off);
+ else emit_mov_reg_mem64(&jit->cb, RAX, off);
+ break;
+ }
}
case EX_BINOP: {
- // Short-circuit AND
+ if (e->binop.op == TK_INC || e->binop.op == TK_DEC) {
+ int is_pre = (e->binop.r->value == -1);
+ int is_inc = (e->binop.op == TK_INC);
+ _EX *lval = e->binop.l;
+
+ _TY lty = get_expr_type(jit, lval);
+ int step = (lty.ptr_level > 0) ? ty_slot_size((_TY){lty.base,0,-1}) : 1;
+
+ gen_expr_jit(jit, lval); /* RAX = old value */
+ emit_mov_reg_reg(&jit->cb, RCX, RAX); /* RCX = old value */
+ emit_mov_reg_imm64(&jit->cb, RBX, step);
+ if (is_inc) emit_add_reg_reg(&jit->cb, RCX, RBX);
+ else emit_sub_reg_reg(&jit->cb, RCX, RBX);
+ if (lval->kind == EX_VAR) {
+ GlobalVar *gv_inc = find_global(jit, lval->name);
+ emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ if (gv_inc) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_inc->addr);
+ int sz = ty_slot_size(lty);
+ if (sz == 1) emit_mov_mem8_reg_reg(&jit->cb, RBX);
+ else emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ } else {
+ int off = get_var_offset(jit, lval->name);
+ int sz = ty_slot_size(lty);
+ emit_store_rax_to_mem(&jit->cb, off, sz);
+ }
+ } else {
+ emit_push_reg(&jit->cb, RCX); /* save new value */
+ if (lval->kind == EX_DEREF) {
+ gen_expr_jit(jit, lval->deref.expr); /* RAX = address */
+ } else { /* EX_INDEX */
+ _TY vty = get_var_type(jit, lval->index.array->name);
+ int esz = ty_slot_size((_TY){vty.base,0,-1});
+ if (vty.ptr_level > 0) {
+ int poff = get_var_offset(jit, lval->index.array->name);
+ emit_mov_reg_mem64(&jit->cb, RAX, poff);
+ emit_push_reg(&jit->cb, RAX);
+ gen_expr_jit(jit, lval->index.index);
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RBX, esz);
+ emit_imul_rax_rbx(&jit->cb);
+ }
+ emit_pop_reg(&jit->cb, RBX);
+ emit_add_reg_reg(&jit->cb, RAX, RBX);
+ } else {
+ int aoff = get_var_offset(jit, lval->index.array->name);
+ gen_expr_jit(jit, lval->index.index);
+ emit_mov_reg_imm64(&jit->cb, RBX, esz);
+ emit_imul_rax_rbx(&jit->cb);
+ emit_mov_reg_imm64(&jit->cb, RBX, aoff);
+ emit_add_reg_reg(&jit->cb, RBX, RAX);
+ emit_mov_reg_reg(&jit->cb, RAX, RBX);
+ emit_mov_reg_reg(&jit->cb, RBX, RBP);
+ emit_add_reg_reg(&jit->cb, RAX, RBX);
+ }
+ }
+ emit_mov_reg_reg(&jit->cb, RBX, RAX); /* RBX = address */
+ emit_pop_reg(&jit->cb, RCX); /* RCX = new val */
+ emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ emit_mov_mem_reg_reg(&jit->cb, RBX, RAX);
+ }
+ if (is_pre) emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ if (!is_pre) {
+ emit_mov_reg_reg(&jit->cb, RAX, RCX);
+ emit_mov_reg_imm64(&jit->cb, RBX, step);
+ if (is_inc) emit_sub_reg_reg(&jit->cb, RAX, RBX);
+ else emit_add_reg_reg(&jit->cb, RAX, RBX);
+ }
+ break;
+ }
if (e->binop.op == TK_AND) {
gen_expr_jit(jit, e->binop.l);
emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb);
@@ -554,7 +982,6 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
memcpy(jit->cb.buf + jmp_pos + 1, &rel, 4);
break;
}
- // Short-circuit OR
if (e->binop.op == TK_OR) {
gen_expr_jit(jit, e->binop.l);
emit_mov_reg_reg(&jit->cb, RBX, RAX); emit_or_rax_rbx(&jit->cb);
@@ -570,18 +997,54 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
memcpy(jit->cb.buf + jmp_pos + 1, &rel, 4);
break;
}
- // All other binary ops: eval left -> push; eval right -> RBX = left
gen_expr_jit(jit, e->binop.l);
emit_push_reg(&jit->cb, RAX);
gen_expr_jit(jit, e->binop.r);
emit_pop_reg(&jit->cb, RBX); // RBX = left, RAX = right
switch (e->binop.op) {
- case TK_PLUS: emit_add_rax_rbx(&jit->cb); break;
- case TK_MINUS:
+ case TK_PLUS: {
+ _TY lt = get_expr_type(jit, e->binop.l);
+ _TY rt = get_expr_type(jit, e->binop.r);
+ int lptr = (lt.ptr_level > 0 || lt.array_size > 0);
+ int rptr = (rt.ptr_level > 0 || rt.array_size > 0);
+ if (lptr && !rptr) {
+ int esz = ty_slot_size((_TY){lt.base, 0, -1});
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RCX, esz);
+ emitN(&jit->cb, (uint8_t[]){0x48,0x0F,0xAF,0xC1}, 4);
+ }
+ } else if (rptr && !lptr) {
+ int esz = ty_slot_size((_TY){rt.base, 0, -1});
+ emit_mov_reg_reg(&jit->cb, RCX, RAX); /* RCX = ptr */
+ emit_mov_reg_reg(&jit->cb, RAX, RBX); /* RAX = int */
+ emit_mov_reg_reg(&jit->cb, RBX, RCX); /* RBX = ptr */
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RCX, esz);
+ emitN(&jit->cb, (uint8_t[]){0x48,0x0F,0xAF,0xC1}, 4);
+ }
+ }
+ emit_add_rax_rbx(&jit->cb);
+ break;
+ }
+ case TK_MINUS: {
+ _TY lt = get_expr_type(jit, e->binop.l);
+ _TY rt = get_expr_type(jit, e->binop.r);
+ int lptr = (lt.ptr_level > 0 || lt.array_size > 0);
+ int rptr = (rt.ptr_level > 0 || rt.array_size > 0);
+ if (lptr && !rptr) {
+ /* ptr - int: scale int by element size */
+ int esz = ty_slot_size((_TY){lt.base, 0, -1});
+ if (esz > 1) {
+ emit_mov_reg_imm64(&jit->cb, RCX, esz);
+ emitN(&jit->cb, (uint8_t[]){0x48,0x0F,0xAF,0xC1}, 4);
+ }
+ }
+ /* RAX=right(scaled), RBX=left: result = left - right */
emit_mov_reg_reg(&jit->cb, RCX, RAX);
emit_mov_reg_reg(&jit->cb, RAX, RBX);
emit_rex(&jit->cb, RCX, RAX, 1); emit8(&jit->cb, 0x29); emit_modrm(&jit->cb, 3, RCX, RAX);
break;
+ }
case TK_STAR: emit_imul_rax_rbx(&jit->cb); break;
case TK_SLASH:
emit_mov_reg_reg(&jit->cb, RCX, RAX);
@@ -626,24 +1089,84 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
case EX_CALL: {
+ if (strcmp(e->call.func_name, "__sizeof__") == 0) {
+ _EX *arg = e->call.args[0];
+ _TY ty;
+ if (arg->kind == EX_VAR) {
+ ty = get_var_type(jit, arg->name);
+ } else {
+ ty = get_expr_type(jit, arg);
+ }
+ int sz;
+ if (ty.array_size > 0) {
+ int esz = (ty.base == TY_CHAR) ? 1 : (ty.base == TY_SHORT) ? 2 :
+ (ty.base == TY_LONG) ? 8 : 4;
+ sz = esz * ty.array_size;
+ } else if (ty.ptr_level > 0) {
+ sz = 8;
+ } else {
+ switch (ty.base) {
+ case TY_CHAR: sz = 1; break;
+ case TY_SHORT: sz = 2; break;
+ case TY_LONG: sz = 8; break;
+ default: sz = 4; break; /* int, bool, float */
+ }
+ }
+ emit_mov_reg_imm64(&jit->cb, RAX, sz);
+ break;
+ }
+
+ if (strcmp(e->call.func_name, "syscall") == 0) {
+ int argc = e->call.argc;
+ if (argc < 1) {
+ fprintf(stderr, "[JIT] syscall() requires at least 1 argument (number)\n"); exit(1);
+ }
+ if (argc > 7) {
+ fprintf(stderr, "[JIT] syscall() supports at most 7 arguments\n"); exit(1);
+ }
+ const int sc_regs[6] = {RDI, RSI, RDX, R10, R8, R9};
+ int nargs = argc - 1; /* number of args after the syscall number */
+
+ for (int i = argc - 1; i >= 0; i--) {
+ gen_expr_jit(jit, e->call.args[i]);
+ emit_push_reg(&jit->cb, RAX);
+ }
+
+ emit_pop_reg(&jit->cb, RAX);
+
+ for (int i = 0; i < nargs; i++) {
+ emit_pop_reg(&jit->cb, sc_regs[i]);
+ }
+
+ emit8(&jit->cb, 0x0F);
+ emit8(&jit->cb, 0x05);
+ break;
+ }
+
int total_args = e->call.argc;
int stack_args = total_args > 6 ? total_args - 6 : 0;
+ int reg_args = total_args < 6 ? total_args : 6;
int padding = (stack_args % 2) ? 8 : 0;
const int arg_regs[6] = {RDI, RSI, RDX, RCX, R8, R9};
+ /* Push stack-passed args right-to-left (args[total-1] first). */
for (int i = total_args-1; i >= 6; i--) {
gen_expr_jit(jit, e->call.args[i]); emit_push_reg(&jit->cb, RAX);
}
- for (int i = 0; i < total_args && i < 6; i++) {
- gen_expr_jit(jit, e->call.args[i]); emit_mov_reg_reg(&jit->cb, arg_regs[i], RAX);
+ for (int i = reg_args-1; i >= 0; i--) {
+ gen_expr_jit(jit, e->call.args[i]); emit_push_reg(&jit->cb, RAX);
+ }
+
+ for (int i = 0; i < reg_args; i++) {
+ emit_pop_reg(&jit->cb, arg_regs[i]);
}
+
if (padding) {
emit8(&jit->cb,0x48); emit8(&jit->cb,0x83); emit8(&jit->cb,0xEC); emit8(&jit->cb,0x08);
}
void *addr = get_func_addr(jit, e->call.func_name);
if (addr == (void*)0xDEADBEEF) {
- // Forward call: emit placeholder imm64, record offset for later patching
emit_movabs_rax_imm64(&jit->cb, (uint64_t)0xDEADBEEF);
PatchEntry *patch = (PatchEntry*)malloc(sizeof(PatchEntry));
if (!patch) { fprintf(stderr, "[JIT] malloc failed (patch)\n"); exit(1); }
@@ -659,7 +1182,7 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
} else {
emit_movabs_rax_imm64(&jit->cb, (uint64_t)addr);
}
- emit8(&jit->cb, 0xFF); emit8(&jit->cb, 0xD0); // CALL RAX
+ emit8(&jit->cb, 0xFF); emit8(&jit->cb, 0xD0);
if (stack_args * 8 + padding > 0) {
emit8(&jit->cb,0x48); emit8(&jit->cb,0x81); emit8(&jit->cb,0xC4);
@@ -669,12 +1192,56 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
case EX_ADDR:
- if (e->addr.expr->kind != EX_VAR) {
+ if (e->addr.expr->kind == EX_VAR) {
+ GlobalVar *gv_addr = find_global(jit, e->addr.expr->name);
+ if (gv_addr) {
+ emit_mov_reg_imm64(&jit->cb, RAX, (uint64_t)gv_addr->addr);
+ } else {
+ emit_lea_rax_rbp_disp(&jit->cb, get_var_offset(jit, e->addr.expr->name));
+ }
+ } else {
fprintf(stderr, "[JIT] &expr: only &var supported\n"); exit(1);
}
- emit_lea_rax_rbp_disp(&jit->cb, get_var_offset(jit, e->addr.expr->name));
break;
+ case EX_TERNARY: {
+ gen_expr_jit(jit, e->ternary.cond);
+ emit_mov_reg_reg(&jit->cb, RBX, RAX);
+ emit_or_rax_rbx(&jit->cb);
+ size_t jz_pos = jit->cb.len;
+ emit_jcc_rel32(&jit->cb, 0x04, 0);
+ gen_expr_jit(jit, e->ternary.then_expr);
+ size_t jmp_pos = jit->cb.len;
+ emit_jmp_rel32(&jit->cb, 0);
+ int32_t rel_jz = (int32_t)(jit->cb.len - (jz_pos + 6));
+ memcpy(jit->cb.buf + jz_pos + 2, &rel_jz, 4);
+ gen_expr_jit(jit, e->ternary.else_expr);
+ int32_t rel_jmp = (int32_t)(jit->cb.len - (jmp_pos + 5));
+ memcpy(jit->cb.buf + jmp_pos + 1, &rel_jmp, 4);
+ break;
+ }
+
+ case EX_CAST: {
+ gen_expr_jit(jit, e->cast.expr);
+ _TY to = e->cast.to;
+ if (to.ptr_level > 0) break; /* pointer cast: value unchanged */
+ switch (to.base) {
+ case TY_CHAR:
+ emit8(&jit->cb, 0x48); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB6);
+ emit_modrm(&jit->cb, 3, RAX, RAX); /* movzx rax, al */
+ break;
+ case TY_SHORT:
+ emit8(&jit->cb, 0x48); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB7);
+ emit_modrm(&jit->cb, 3, RAX, RAX); /* movzx rax, ax */
+ break;
+ case TY_INT:
+ emit8(&jit->cb, 0x48); emit8(&jit->cb, 0x63); emit_modrm(&jit->cb, 3, RAX, RAX);
+ break;
+ default: break; /* long/void/etc: no-op */
+ }
+ break;
+ }
+
case EX_DEREF:
gen_expr_jit(jit, e->deref.expr);
emit_rex(&jit->cb, RAX, RAX, 1); emit8(&jit->cb, 0x8B); emit_modrm(&jit->cb, 0, RAX, RAX);
@@ -692,12 +1259,17 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
gen_expr_jit(jit, e->index.array); break;
}
_TY var_type = get_var_type(jit, e->index.array->name);
- int element_size = (var_type.base == TY_CHAR) ? 1 : 8;
+ int element_size = ty_slot_size((_TY){var_type.base, 0, -1});
if (var_type.ptr_level > 0) {
- // Pointer indexing: load pointer, add index*element_size
- int ptr_offset = get_var_offset(jit, e->index.array->name);
- emit_mov_reg_mem64(&jit->cb, RBX, ptr_offset); // RBX = pointer
+ GlobalVar *gv_ptr = find_global(jit, e->index.array->name);
+ if (gv_ptr) {
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_ptr->addr);
+ emit_mov_reg_mem_reg(&jit->cb, RBX, RBX); /* deref: RBX = *addr = pointer value */
+ } else {
+ int ptr_offset = get_var_offset(jit, e->index.array->name);
+ emit_mov_reg_mem64(&jit->cb, RBX, ptr_offset); // RBX = pointer
+ }
gen_expr_jit(jit, e->index.index); // RAX = index
if (element_size > 1) {
emit_mov_reg_reg(&jit->cb, RCX, RBX); // save pointer
@@ -714,17 +1286,22 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
} else if (var_type.array_size > 0) {
- // Array indexing: compute RBP-relative address = array_offset - index*element_size
- int array_offset = get_var_offset(jit, e->index.array->name);
+ GlobalVar *gv_arr = find_global(jit, e->index.array->name);
gen_expr_jit(jit, e->index.index); // RAX = index
emit_mov_reg_imm64(&jit->cb, RBX, element_size);
emit_imul_rax_rbx(&jit->cb); // RAX = byte offset
- emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
- emit_sub_reg_reg(&jit->cb, RBX, RAX); // RBX = array_offset - byte_offset
- emit_mov_reg_reg(&jit->cb, RAX, RBX);
- emit_mov_reg_reg(&jit->cb, RBX, RBP);
- emit_add_reg_reg(&jit->cb, RAX, RBX); // RAX = &arr[index]
- emit_mov_reg_reg(&jit->cb, RBX, RAX);
+ if (gv_arr) {
+ /* Global array: base is absolute address */
+ emit_mov_reg_imm64(&jit->cb, RBX, (uint64_t)gv_arr->addr);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = &arr[index]
+ } else {
+ int array_offset = get_var_offset(jit, e->index.array->name);
+ emit_mov_reg_imm64(&jit->cb, RBX, array_offset);
+ emit_add_reg_reg(&jit->cb, RBX, RAX);
+ emit_mov_reg_reg(&jit->cb, RAX, RBX);
+ emit_mov_reg_reg(&jit->cb, RBX, RBP);
+ emit_add_reg_reg(&jit->cb, RBX, RAX); // RBX = RBP + array_offset + byte_offset
+ }
if (element_size == 1) {
emit_rex(&jit->cb, RAX, RBX, 0); emit8(&jit->cb, 0x0F); emit8(&jit->cb, 0xB6);
emit_modrm(&jit->cb, 0, RAX, RBX);
@@ -743,22 +1320,35 @@ static void gen_expr_jit(JIT *jit, _EX *e) {
}
}
-/* --- Compile one function --- */
-
static void *gen_function_jit(JIT *jit, _FN *f, size_t *out_size) {
reset_varmap(jit);
jit->current_func_name = f->name;
- int total_stack_size = calculate_stack_size(f->body);
+ int total_stack_size = calculate_total_stack_size(f);
cb_init(&jit->cb);
emit_prologue(&jit->cb, total_stack_size);
const int param_regs[6] = {RDI, RSI, RDX, RCX, R8, R9};
+
for (int i = 0; i < f->pac && i < 6; i++) {
add_var(jit, f->params[i], f->param_types[i]);
emit_mov_mem64_reg(&jit->cb, get_var_offset(jit, f->params[i]), param_regs[i]);
}
+ int num_stack_params = f->pac > 6 ? f->pac - 6 : 0;
+ int stack_arg_padding = (num_stack_params % 2) ? 8 : 0;
+ int stack_arg_base = 16 + stack_arg_padding; /* offset of last-pushed (highest-index) stack arg */
+ for (int i = 6; i < f->pac; i++) {
+ VarMap *v = (VarMap *)malloc(sizeof(VarMap));
+ if (!v) { fprintf(stderr, "[JIT] malloc failed (stack param)\n"); exit(1); }
+ v->name = strdup(f->params[i]);
+ if (!v->name) { fprintf(stderr, "[JIT] strdup failed (stack param)\n"); free(v); exit(1); }
+ v->type = f->param_types[i];
+ v->offset = stack_arg_base + (i - 6) * 8;
+ v->next = jit->var_list;
+ jit->var_list = v;
+ }
+
int did_return = gen_stmt_jit(jit, f->body);
if (!did_return) {
emit_movabs_rax_imm64(&jit->cb, 0);
@@ -777,7 +1367,6 @@ static void *gen_function_jit(JIT *jit, _FN *f, size_t *out_size) {
return mem;
}
-/* --- Patch forward calls after all functions are compiled --- */
static void patch_function_calls(JIT *jit) {
for (PatchEntry *patch = jit->patch_list; patch; patch = patch->next) {
@@ -797,16 +1386,36 @@ static void patch_function_calls(JIT *jit) {
}
}
-/* --- Compile all functions and patch forward calls --- */
-
static void jit_compile_all(JIT *jit, _FN *fn_list) {
- for (_FN *cur = fn_list; cur; cur = cur->n)
- register_func(jit, cur->name);
+ /* First pass: register all globals so type lookups work during codegen */
+ for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) == 0) {
+ /* Extract variable name from __global_NAME__ */
+ const char *start = cur->name + 9;
+ size_t len = strlen(start);
+ if (len >= 2 && start[len-2] == '_' && start[len-1] == '_') len -= 2;
+ char *vname = strndup(start, len);
+ if (!vname) { fprintf(stderr, "[JIT] OOM\n"); exit(1); }
+ /* Get type from the STK_GLOBAL node */
+ _STN *gdecl = cur->body;
+ if (gdecl && gdecl->kind == STK_GLOBAL) {
+ register_global(jit, vname, gdecl->global.type);
+ }
+ free(vname);
+ }
+ }
+
+ /* Second pass: register all real functions (so forward calls work) */
+ for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) != 0)
+ register_func(jit, cur->name, cur->ret_type);
+ }
- // Compile in reverse order so callees are typically compiled before callers
+ /* Compile real functions in reverse order */
_FN *functions[64];
int count = 0;
for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) == 0) continue;
if (count >= 64) { fprintf(stderr, "[JIT] Too many functions (max 64)\n"); exit(1); }
functions[count++] = cur;
}
@@ -814,9 +1423,27 @@ static void jit_compile_all(JIT *jit, _FN *fn_list) {
gen_function_jit(jit, functions[i], NULL);
patch_function_calls(jit);
-}
-/* --- Entry point --- */
+ /* Run global initialisers in declaration order */
+ for (_FN *cur = fn_list; cur; cur = cur->n) {
+ if (strncmp(cur->name, "__global_", 9) != 0) continue;
+ reset_varmap(jit);
+ jit->current_func_name = cur->name;
+ cb_init(&jit->cb);
+ emit8(&jit->cb, 0x55); /* push rbp */
+ emitN(&jit->cb, (uint8_t[]){0x48,0x89,0xE5}, 3); /* mov rbp, rsp */
+ gen_stmt_jit(jit, cur->body);
+ emit8(&jit->cb, 0xC9); /* leave */
+ emit8(&jit->cb, 0xC3); /* ret */
+ size_t isz = jit->cb.len;
+ void *ibuf = mmap(NULL, isz, PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (ibuf == MAP_FAILED) { perror("mmap init"); exit(1); }
+ memcpy(ibuf, jit->cb.buf, isz);
+ ((void(*)(void))ibuf)();
+ munmap(ibuf, isz);
+ }
+}
static int jit_run(JIT *jit, int argc, char **argv) {
int (*main_func)(int, char **) = get_func_addr(jit, "main");
@@ -824,4 +1451,4 @@ static int jit_run(JIT *jit, int argc, char **argv) {
return main_func(argc, argv);
}
-#endif \ No newline at end of file
+#endif
diff --git a/src/lexer.h b/src/lexer.h
index ca2b790..3d36bf4 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -1,4 +1,3 @@
-
#ifndef INCLUDE_lexerlexer
#define INCLUDE_lexerlexer
@@ -15,7 +14,7 @@ typedef struct {
int col;
} _LX;
-/* Error reporting with line/column info */
+
static void perror_at(_LX *lx, const char *msg) {
fprintf(stderr, "[LEXER] Error at line %d, column %d: %s\n", lx->line, lx->col, msg);
exit(1);
@@ -56,7 +55,32 @@ sit lxnext(_LX *lx) {
lx->line++;
lx->col = 0;
lxget(lx);
- return lxnext(lx); // recurse to get next token
+ return lxnext(lx);
+ }
+
+ // Comments: // and /* */
+ if (c == '/') {
+ // peek one ahead
+ int saved = lx->pos;
+ lxget(lx);
+ char c2 = lxpeek(lx);
+ if (c2 == '/') {
+ while (lxpeek(lx) != '\n' && lxpeek(lx) != 0) lxget(lx);
+ return lxnext(lx);
+ } else if (c2 == '*') {
+ lxget(lx); // consume '*'
+ for (;;) {
+ char ch = lxpeek(lx);
+ if (ch == 0) break; // unterminated, let it go
+ lxget(lx);
+ if (ch == '\n') { lx->line++; lx->col = 0; }
+ if (ch == '*' && lxpeek(lx) == '/') { lxget(lx); break; }
+ }
+ return lxnext(lx);
+ } else {
+ // not a comment — put position back, fall through to normal '/' handling
+ lx->pos = saved;
+ }
}
if (isalpha(c) || c == '_') {
@@ -110,28 +134,85 @@ sit lxnext(_LX *lx) {
if (lxpeek(lx) == '|') { lxget(lx); lx->col++; return (_T){TK_OR,0,lx_strdup_checked(lx,"||")}; }
}
if (c == '"') {
- // String literal
- int start = lx->pos; // pos is already after the opening quote
+ // String literal — decode escape sequences into a fresh buffer
+ int cap = 64, dlen = 0;
+ char *decoded = (char*)malloc(cap);
+ if (!decoded) perror_at(lx, "out of memory");
while (lxpeek(lx) != '"' && lxpeek(lx) != 0) {
+ char ch;
if (lxpeek(lx) == '\\') {
lxget(lx); // consume backslash
- if (lxpeek(lx) != 0) lxget(lx); // consume escaped char
+ char esc = (char)lxget(lx);
+ switch (esc) {
+ case 'n': ch = '\n'; break;
+ case 't': ch = '\t'; break;
+ case 'r': ch = '\r'; break;
+ case '0': ch = '\0'; break;
+ case '\\': ch = '\\'; break;
+ case '"': ch = '"'; break;
+ case '\'': ch = '\''; break;
+ default: ch = esc; break;
+ }
} else {
- lxget(lx);
+ ch = (char)lxget(lx);
}
+ if (dlen + 2 > cap) {
+ cap *= 2;
+ char *tmp = (char*)realloc(decoded, cap);
+ if (!tmp) { free(decoded); perror_at(lx, "out of memory"); }
+ decoded = tmp;
+ }
+ decoded[dlen++] = ch;
}
+ decoded[dlen] = '\0';
if (lxpeek(lx) == '"') {
- int len = lx->pos - start; // length of content
lxget(lx); // consume closing quote
- char *text = strndup(lx->buf + start, len); // start at content
- if (!text) {
- perror_at(lx, "out of memory");
- }
- return (_T){TK_STRING, 0, text};
+ return (_T){TK_STRING, 0, decoded};
} else {
+ free(decoded);
perror_at(lx, "unterminated string literal");
}
}
+ if (c == '\'') {
+ /* Character literal: decode a single char or escape sequence and emit
+ * TK_CHARLIT with the integer value in .val so the parser never needs
+ * to reason about escape sequences. */
+ int char_val = 0;
+ if (lxpeek(lx) == '\\') {
+ lxget(lx); /* consume backslash */
+ char esc = lxget(lx);
+ switch (esc) {
+ case 'n': char_val = '\n'; break;
+ case 't': char_val = '\t'; break;
+ case 'r': char_val = '\r'; break;
+ case '0': char_val = '\0'; break;
+ case '\\': char_val = '\\'; break;
+ case '\'': char_val = '\''; break;
+ case '"': char_val = '"'; break;
+ case 'a': char_val = '\a'; break;
+ case 'b': char_val = '\b'; break;
+ case 'f': char_val = '\f'; break;
+ case 'v': char_val = '\v'; break;
+ case 'x': {
+ int h = 0;
+ while (isxdigit(lxpeek(lx))) {
+ char hc = lxget(lx);
+ h = h * 16 + (isdigit(hc) ? hc - '0' : tolower(hc) - 'a' + 10);
+ }
+ char_val = h;
+ break;
+ }
+ default: char_val = (unsigned char)esc; break;
+ }
+ } else if (lxpeek(lx) != '\'') {
+ char_val = (unsigned char)lxget(lx);
+ }
+ if (lxpeek(lx) != '\'') {
+ perror_at(lx, "unterminated or multi-character char literal");
+ }
+ lxget(lx); /* consume closing ' */
+ return (_T){TK_CHARLIT, char_val, NULL};
+ }
switch (c) {
case '(':
return (_T){TK_LPAREN, 0, lx_strdup_checked(lx,"(")};
@@ -144,12 +225,18 @@ sit lxnext(_LX *lx) {
case ';':
return (_T){TK_SEMI, 0, lx_strdup_checked(lx,";")};
case '+':
+ if (lxpeek(lx) == '+') { lxget(lx); lx->col++; return (_T){TK_INC, 0, lx_strdup_checked(lx,"++")}; }
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_PLUS_EQ,0, lx_strdup_checked(lx,"+=")}; }
return (_T){TK_PLUS, 0, lx_strdup_checked(lx,"+")};
case '-':
+ if (lxpeek(lx) == '-') { lxget(lx); lx->col++; return (_T){TK_DEC, 0, lx_strdup_checked(lx,"--")}; }
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_MINUS_EQ,0, lx_strdup_checked(lx,"-=")}; }
return (_T){TK_MINUS, 0, lx_strdup_checked(lx,"-")};
case '*':
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_STAR_EQ, 0, lx_strdup_checked(lx,"*=")}; }
return (_T){TK_STAR, 0, lx_strdup_checked(lx,"*")};
case '/':
+ if (lxpeek(lx) == '=') { lxget(lx); lx->col++; return (_T){TK_SLASH_EQ,0, lx_strdup_checked(lx,"/=")}; }
return (_T){TK_SLASH, 0, lx_strdup_checked(lx,"/")};
case '&':
return (_T){TK_AMP, 0, lx_strdup_checked(lx,"&")};
@@ -169,6 +256,10 @@ sit lxnext(_LX *lx) {
return (_T){TK_LBRACKET, 0, lx_strdup_checked(lx,"[")};
case ']':
return (_T){TK_RBRACKET, 0, lx_strdup_checked(lx,"]")};
+ case '?':
+ return (_T){TK_QUESTION, 0, lx_strdup_checked(lx,"?")};
+ case ':':
+ return (_T){TK_COLON, 0, lx_strdup_checked(lx,":")};
default:
return (_T){TK_INVALID, 0, NULL};
}
diff --git a/src/main.c b/src/main.c
index 0f18359..0e4c9c3 100644
--- a/src/main.c
+++ b/src/main.c
@@ -187,7 +187,10 @@ void print_func(_FN *f, int indent) {
return;
indentf(indent);
- printf("Function(%s) params=[", f->name);
+ /* Print return type */
+ printf("Function(%s) -> [%s", f->name, tybase_name(f->ret_type.base));
+ for (int j = 0; j < f->ret_type.ptr_level; j++) printf("*");
+ printf("] params=[");
for (int i = 0; i < f->pac; i++) {
printf("%s", tybase_name(f->param_types[i].base));
for (int j = 0; j < f->param_types[i].ptr_level; j++)
diff --git a/src/parser.h b/src/parser.h
index 154e0ba..bd35323 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -1,5 +1,3 @@
-/* Parser: recursive-descent front end that builds an AST `_FN` list
- * from the token stream, handling expressions, statements, and functions. */
#ifndef INCLUDE_parser
#define INCLUDE_parser
@@ -10,13 +8,13 @@
#include <stdlib.h>
#include <string.h>
-/* Parser state */
+
typedef struct {
_LX *lx;
_T cur;
} _P;
-/* Error reporting with line/column info */
+
static void perror_expected(_LX *lx, const char *expected, const char *got) {
fprintf(stderr, "[PARSER] Error at line %d, column %d: expected %s, got %s\n",
lx->line, lx->col, expected, got);
@@ -37,7 +35,7 @@ static void pnext(_P *p) {
p->cur = lxnext(p->lx);
}
-/* Expect a particular token kind; on mismatch print helpful error and exit */
+
static void pexpect(_P *p, _TK tk) {
if (p->cur.kind != tk) {
const char *got = (p->cur.kind < TK__COUNT) ? _TN[p->cur.kind] : "<?>";
@@ -45,10 +43,35 @@ static void pexpect(_P *p, _TK tk) {
}
}
-static _STN *pstmt(_P *p); // forward
+
+static int is_type_token(_TK k) {
+ return k == TK_INT || k == TK_CHAR || k == TK_VOID ||
+ k == TK_FLOAT || k == TK_LONG || k == TK_SHORT;
+}
+
+
+static _TY pparse_type(_P *p) {
+ _TY ty;
+ switch (p->cur.kind) {
+ case TK_INT: ty = (_TY){.base=TY_INT, .ptr_level=0,.array_size=-1}; break;
+ case TK_CHAR: ty = (_TY){.base=TY_CHAR, .ptr_level=0,.array_size=-1}; break;
+ case TK_VOID: ty = (_TY){.base=TY_VOID, .ptr_level=0,.array_size=-1}; break;
+ case TK_FLOAT: ty = (_TY){.base=TY_FLOAT,.ptr_level=0,.array_size=-1}; break;
+ case TK_LONG: ty = (_TY){.base=TY_LONG, .ptr_level=0,.array_size=-1}; break;
+ case TK_SHORT: ty = (_TY){.base=TY_SHORT,.ptr_level=0,.array_size=-1}; break;
+ default:
+ perror_expected(p->lx, "type keyword", _TN[p->cur.kind]);
+ ty = (_TY){TY_INT,0,-1}; /* unreachable */
+ }
+ pnext(p);
+ while (p->cur.kind == TK_STAR) { ty.ptr_level++; pnext(p); }
+ return ty;
+}
+
+static _STN *pstmt(_P *p);
static _STN *pblock(_P *p) {
pexpect(p, TK_LBRACE);
- pnext(p); /* consume '{' */
+ pnext(p);
_STN *head = NULL;
_STN **cur = &head;
@@ -60,16 +83,17 @@ static _STN *pblock(_P *p) {
}
pexpect(p, TK_RBRACE);
- pnext(p); /* consume '}' */
+ pnext(p);
return st_block(head);
}
-static _EX *pexpr(_P *p); // forward
-static _EX *pterm(_P *p); // forward
-static _EX *punary(_P *p); // forward
-static _EX *pfact(_P *p); // fwd
+static _EX *pexpr(_P *p);
+static _EX *pterm(_P *p);
+static _EX *punary(_P *p);
+static _EX *pfact(_P *p);
+static _EX *pparse_charlit(_P *p);
+
-/* Precedence climbing layers */
static _EX *pmul(_P *p);
static _EX *padd(_P *p);
static _EX *pshift(_P *p);
@@ -92,19 +116,42 @@ static _STN *passign_or_expr_stmt(_P *p) {
_EX *rhs = pexpr(p);
return st_assign(lhs, rhs);
}
+
+ _TK compound_op = TK_INVALID;
+ if (p->cur.kind == TK_PLUS_EQ) compound_op = TK_PLUS;
+ else if (p->cur.kind == TK_MINUS_EQ) compound_op = TK_MINUS;
+ else if (p->cur.kind == TK_STAR_EQ) compound_op = TK_STAR;
+ else if (p->cur.kind == TK_SLASH_EQ) compound_op = TK_SLASH;
+ if (compound_op != TK_INVALID) {
+ if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid compound-assignment target\n", p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ _EX *combined = ex_binop(lhs, compound_op, rhs);
+ return st_assign(lhs, combined);
+ }
return st_expr(lhs);
}
static _EX *ppostfix_from_expr(_P *p, _EX *e) {
- // Handle array indexing: expr[expr]
- while (p->cur.kind == TK_LBRACKET) {
- pnext(p); // consume '['
- _EX *index = pexpr(p);
- pexpect(p, TK_RBRACKET);
- pnext(p); // consume ']'
- e = ex_index(e, index);
- }
-
+ for (;;) {
+ if (p->cur.kind == TK_LBRACKET) {
+ pnext(p);
+ _EX *index = pexpr(p);
+ pexpect(p, TK_RBRACKET);
+ pnext(p);
+ e = ex_index(e, index);
+ } else if (p->cur.kind == TK_INC || p->cur.kind == TK_DEC) {
+ _TK op = p->cur.kind;
+ pnext(p);
+ _EX *one = ex_number(1);
+ e = ex_binop(e, op, one);
+ } else {
+ break;
+ }
+ }
return e;
}
@@ -113,7 +160,7 @@ static _EX *ppostfix(_P *p) {
return ppostfix_from_expr(p, e);
}
-/* ---- FACTOR ---- */
+
static _EX *pfact(_P *p) {
if (p->cur.kind == TK_NUMBER) {
_EX *n = ex_number(p->cur.val);
@@ -126,11 +173,11 @@ static _EX *pfact(_P *p) {
fprintf(stderr, "[PARSER] Error: strdup failed for identifier\n");
exit(1);
}
- pnext(p); /* consume identifier */
+ pnext(p);
if (p->cur.kind == TK_LPAREN) {
- /* function call */
- pnext(p); /* consume '(' */
+
+ pnext(p);
_EX **args = NULL;
int argc = 0;
@@ -145,7 +192,7 @@ static _EX *pfact(_P *p) {
args[argc++] = pexpr(p);
if (p->cur.kind == TK_COMMA) {
- pnext(p); /* skip comma */
+ pnext(p);
} else {
break;
}
@@ -153,7 +200,7 @@ static _EX *pfact(_P *p) {
}
pexpect(p, TK_RPAREN);
- pnext(p); /* consume ')' */
+ pnext(p);
return ex_call(name, args, argc);
}
@@ -162,11 +209,21 @@ static _EX *pfact(_P *p) {
return ppostfix_from_expr(p, var_expr);
} else if (p->cur.kind == TK_LPAREN) {
- pnext(p); /* consume '(' */
+ pnext(p);
+
+ if (p->cur.kind == TK_INT || p->cur.kind == TK_CHAR ||
+ p->cur.kind == TK_SHORT || p->cur.kind == TK_LONG ||
+ p->cur.kind == TK_VOID || p->cur.kind == TK_FLOAT) {
+ _TY to = pparse_type(p);
+ pexpect(p, TK_RPAREN);
+ pnext(p);
+ _EX *sub = punary(p);
+ return ppostfix_from_expr(p, ex_cast(to, sub));
+ }
_EX *n = pexpr(p);
pexpect(p, TK_RPAREN);
- pnext(p); /* consume ')' */
- return n;
+ pnext(p);
+ return ppostfix_from_expr(p, n);
} else if (p->cur.kind == TK_STRING) {
char *str = strdup(p->cur.lxem);
@@ -177,12 +234,14 @@ static _EX *pfact(_P *p) {
pnext(p);
return ex_string(str);
+ } else if (p->cur.kind == TK_CHARLIT) {
+ return pparse_charlit(p);
+
} else {
perror_unexpected(p->lx, "factor", _TN[p->cur.kind]);
}
}
-/* ---- TERM ---- */
static _EX *pmul(_P *p) {
_EX *n = punary(p);
while (p->cur.kind == TK_STAR || p->cur.kind == TK_SLASH || p->cur.kind == TK_PERCENT) {
@@ -276,7 +335,6 @@ static _EX *plogand(_P *p) {
while (p->cur.kind == TK_AND) {
pnext(p);
_EX *r = pbitor(p);
- // keep as a binary op node TK_AND; codegen will short-circuit
n = ex_binop(n, TK_AND, r);
}
return n;
@@ -292,60 +350,115 @@ static _EX *plogor(_P *p) {
return n;
}
-/* ---- EXPR ---- */
-static _EX *pexpr(_P *p) { return plogor(p); }
+static _EX *pexpr(_P *p) {
+ _EX *n = plogor(p);
+ if (p->cur.kind == TK_QUESTION) {
+ pnext(p); /* consume '?' */
+ _EX *then_e = pexpr(p);
+ if (p->cur.kind != TK_COLON) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: expected ':' in ternary\n",
+ p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p); /* consume ':' */
+ _EX *else_e = pexpr(p);
+ return ex_ternary(n, then_e, else_e);
+ }
+ return n;
+}
-/* ---- UNARY ---- */
static _EX *punary(_P *p) {
- if (p->cur.kind == TK_AMP) { // &expr
+ if (p->cur.kind == TK_AMP) {
pnext(p);
_EX *sub = punary(p);
NEW_EX(EX_ADDR);
e->addr.expr = sub;
return e;
}
- if (p->cur.kind == TK_STAR) { // *expr
+ if (p->cur.kind == TK_STAR) {
pnext(p);
_EX *sub = punary(p);
NEW_EX(EX_DEREF);
e->deref.expr = sub;
return e;
}
- if (p->cur.kind == TK_BANG) { // !expr -> (expr == 0)
+ if (p->cur.kind == TK_BANG) {
pnext(p);
_EX *sub = punary(p);
_EX *zero = ex_number(0);
return ex_binop(sub, TK_EQ, zero);
}
+ if (p->cur.kind == TK_MINUS) {
+ pnext(p);
+ _EX *sub = punary(p);
+ return ex_binop(ex_number(0), TK_MINUS, sub);
+ }
+ if (p->cur.kind == TK_INC || p->cur.kind == TK_DEC) {
+ _TK op = (p->cur.kind == TK_INC) ? TK_INC : TK_DEC;
+ pnext(p);
+ _EX *sub = punary(p);
+ return ex_binop(sub, op, ex_number(-1));
+ }
+ if (p->cur.kind == TK_SIZEOF) {
+ pnext(p); /* consume sizeof */
+ pexpect(p, TK_LPAREN); pnext(p);
+ int sz = 0;
+ if (p->cur.kind == TK_INT || p->cur.kind == TK_CHAR ||
+ p->cur.kind == TK_SHORT || p->cur.kind == TK_LONG ||
+ p->cur.kind == TK_VOID || p->cur.kind == TK_FLOAT) {
+ _TY ty = pparse_type(p);
+ if (ty.ptr_level > 0) sz = 8;
+ else {
+ switch (ty.base) {
+ case TY_CHAR: sz = 1; break;
+ case TY_SHORT: sz = 2; break;
+ case TY_INT: sz = 4; break;
+ case TY_LONG: sz = 8; break;
+ default: sz = 4; break;
+ }
+ }
+ if (ty.array_size > 0) sz *= ty.array_size;
+ } else {
+ /* sizeof(expr) — resolved at JIT time via __sizeof__ built-in */
+ _EX *inner = pexpr(p);
+ pexpect(p, TK_RPAREN); pnext(p);
+ _EX **args = (_EX **)malloc(sizeof(_EX *));
+ if (!args) { fprintf(stderr, "[PARSER] OOM in sizeof\n"); exit(1); }
+ args[0] = inner;
+ return ex_call(strdup("__sizeof__"), args, 1);
+ }
+ pexpect(p, TK_RPAREN); pnext(p);
+ return ex_number(sz);
+ }
return ppostfix(p);
}
-char parse_char_literal(_P *p) {
- /* assume current token is TK_SQUOTE */
- pnext(p); // consume opening '
- if (p->cur.kind != TK_IDENT) {
- perror_expected(p->lx, "character literal", _TN[p->cur.kind]);
- }
- char c = p->cur.lxem[0];
- pnext(p); // consume char
- pexpect(p, TK_SQUOTE);
- pnext(p); // consume closing '
- return c;
+static _EX *pparse_charlit(_P *p) {
+ int value = p->cur.val;
+ pnext(p);
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[PARSER] Error: calloc failed in pparse_charlit\n"); exit(1); }
+ e->kind = EX_NUMBER;
+ e->value = value;
+ return e;
}
-_EX *ex_charlit(char c) {
- _EX *e = malloc(sizeof(_EX));
- if (!e) {
- fprintf(stderr, "[PARSER] Error: malloc failed in ex_charlit\n");
- exit(1);
- }
- e->kind = EX_NUMBER;
- e->value = c;
- return e;
+
+static char parse_char_literal(_P *p) {
+ char c = (char)p->cur.val;
+ pnext(p);
+ return c;
+}
+static _EX *ex_charlit(char c) {
+ _EX *e = (_EX *)calloc(1, sizeof(_EX));
+ if (!e) { fprintf(stderr, "[PARSER] Error: calloc failed in ex_charlit\n"); exit(1); }
+ e->kind = EX_NUMBER;
+ e->value = (unsigned char)c;
+ return e;
}
static _STN *pstmt(_P *p) {
switch (p->cur.kind) {
case TK_IF: {
- pnext(p); // consume if
+ pnext(p);
pexpect(p, TK_LPAREN); pnext(p);
_EX *cond = pexpr(p);
pexpect(p, TK_RPAREN); pnext(p);
@@ -362,6 +475,31 @@ static _STN *pstmt(_P *p) {
_STN *body = pstmt(p);
return st_while(cond, body);
}
+ case TK_DO: {
+ pnext(p); /* consume 'do' */
+ _STN *body = pstmt(p);
+ if (p->cur.kind != TK_WHILE) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: expected 'while' after do body\n",
+ p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p); /* consume 'while' */
+ pexpect(p, TK_LPAREN); pnext(p);
+ _EX *cond = pexpr(p);
+ pexpect(p, TK_RPAREN); pnext(p);
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_dowhile(body, cond);
+ }
+ case TK_BREAK: {
+ pnext(p); /* consume 'break' */
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_break();
+ }
+ case TK_CONTINUE: {
+ pnext(p); /* consume 'continue' */
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_continue();
+ }
case TK_FOR: {
pnext(p);
pexpect(p, TK_LPAREN); pnext(p);
@@ -380,11 +518,13 @@ static _STN *pstmt(_P *p) {
_STN *body = pstmt(p);
return st_for(init, cond, step, body);
}
+ case TK_VOID:
+ case TK_FLOAT:
+ case TK_LONG:
+ case TK_SHORT:
case TK_INT:
case TK_CHAR: {
- _TY vtype = { .base = (p->cur.kind == TK_INT) ? TY_INT : TY_CHAR, .ptr_level = 0, .array_size = -1 };
- pnext(p); /* consume type */
- while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); }
+ _TY vtype = pparse_type(p);
if (p->cur.kind != TK_IDENT) {
perror_expected(p->lx, "variable name after type", _TN[p->cur.kind]);
@@ -396,27 +536,36 @@ static _STN *pstmt(_P *p) {
}
pnext(p);
- // Parse array size: [N] or []
if (p->cur.kind == TK_LBRACKET) {
- pnext(p); // consume '['
+ pnext(p);
if (p->cur.kind == TK_NUMBER) {
vtype.array_size = p->cur.val;
- pnext(p); // consume number
+ pnext(p);
} else {
vtype.array_size = 0; // unknown size []
}
pexpect(p, TK_RBRACKET);
- pnext(p); // consume ']'
+ pnext(p);
}
_EX *init = NULL;
if (p->cur.kind == TK_ASSIGN) {
pnext(p);
- if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_SQUOTE) {
- /* parse char literal */
- char c = parse_char_literal(p); // implement this to consume quotes and return char
- init = ex_charlit(c);
+ if (p->cur.kind == TK_LBRACE) {
+ pnext(p);
+ _EX **elems = NULL;
+ int nelems = 0;
+ while (p->cur.kind != TK_RBRACE && p->cur.kind != TK_EOF) {
+ elems = (_EX **)realloc(elems, sizeof(_EX *) * (nelems + 1));
+ if (!elems) { fprintf(stderr, "[PARSER] OOM in initializer list\n"); exit(1); }
+ elems[nelems++] = pexpr(p);
+ if (p->cur.kind == TK_COMMA) pnext(p);
+ }
+ pexpect(p, TK_RBRACE); pnext(p);
+ init = ex_call(strdup("__initlist__"), elems, nelems);
+ } else if (vtype.ptr_level == 0 && vtype.base == TY_CHAR && p->cur.kind == TK_CHARLIT) {
+ init = pparse_charlit(p);
} else {
init = pexpr(p);
}
@@ -435,16 +584,25 @@ static _STN *pstmt(_P *p) {
if (p->cur.kind == TK_ASSIGN) {
pnext(p);
_EX *rhs = pexpr(p);
- pexpect(p, TK_SEMI);
- pnext(p);
+ pexpect(p, TK_SEMI); pnext(p);
return st_assign(lhs_or_call, rhs);
}
- pexpect(p, TK_SEMI);
- pnext(p);
+ _TK cop = TK_INVALID;
+ if (p->cur.kind == TK_PLUS_EQ) cop = TK_PLUS;
+ else if (p->cur.kind == TK_MINUS_EQ) cop = TK_MINUS;
+ else if (p->cur.kind == TK_STAR_EQ) cop = TK_STAR;
+ else if (p->cur.kind == TK_SLASH_EQ) cop = TK_SLASH;
+ if (cop != TK_INVALID) {
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_assign(lhs_or_call, ex_binop(lhs_or_call, cop, rhs));
+ }
+ pexpect(p, TK_SEMI); pnext(p);
return st_expr(lhs_or_call);
}
case TK_RETURN: {
- pnext(p); /* consume 'return' */
+ pnext(p);
_EX *expr = pexpr(p);
pexpect(p, TK_SEMI);
pnext(p); /* consume ';' */
@@ -452,24 +610,35 @@ static _STN *pstmt(_P *p) {
return st_return(expr);
}
case TK_LBRACE: {
- /* block statement */
- return pblock(p); /* pblock will consume the braces */
+ return pblock(p);
}
default: {
- /* General expression or assignment starting with unary, paren, etc. */
_EX *lhs = pexpr(p);
if (p->cur.kind == TK_ASSIGN) {
- /* only allow assignment to var or *expr */
if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid assignment target - only variables, dereferenced expressions, and array indexing allowed\n", p->lx->line, p->lx->col);
exit(1);
}
pnext(p);
_EX *rhs = pexpr(p);
- pexpect(p, TK_SEMI);
- pnext(p);
+ pexpect(p, TK_SEMI); pnext(p);
return st_assign(lhs, rhs);
}
+ _TK dcop = TK_INVALID;
+ if (p->cur.kind == TK_PLUS_EQ) dcop = TK_PLUS;
+ else if (p->cur.kind == TK_MINUS_EQ) dcop = TK_MINUS;
+ else if (p->cur.kind == TK_STAR_EQ) dcop = TK_STAR;
+ else if (p->cur.kind == TK_SLASH_EQ) dcop = TK_SLASH;
+ if (dcop != TK_INVALID) {
+ if (!(lhs->kind == EX_VAR || lhs->kind == EX_DEREF || lhs->kind == EX_INDEX)) {
+ fprintf(stderr, "[PARSER] Error at line %d, column %d: invalid compound-assignment target\n", p->lx->line, p->lx->col);
+ exit(1);
+ }
+ pnext(p);
+ _EX *rhs = pexpr(p);
+ pexpect(p, TK_SEMI); pnext(p);
+ return st_assign(lhs, ex_binop(lhs, dcop, rhs));
+ }
pexpect(p, TK_SEMI);
pnext(p);
return st_expr(lhs);
@@ -478,11 +647,13 @@ static _STN *pstmt(_P *p) {
}
static _FN *pfunc(_P *p) {
- pexpect(p, TK_INT);
- pnext(p); /* consume 'int' */
+ if (!is_type_token(p->cur.kind)) {
+ perror_expected(p->lx, "return type for function", _TN[p->cur.kind]);
+ }
+ _TY ret_type = pparse_type(p);
if (p->cur.kind != TK_IDENT) {
- perror_expected(p->lx, "function name after 'int'", _TN[p->cur.kind]);
+ perror_expected(p->lx, "function name after return type", _TN[p->cur.kind]);
}
char *name = NULL;
if (p->cur.lxem)
@@ -491,54 +662,42 @@ static _FN *pfunc(_P *p) {
fprintf(stderr, "[PARSER] Error: strdup failed for function name\n");
exit(1);
}
- pnext(p); /* consume function name */
+ pnext(p);
- /* expect '(' then consume it */
pexpect(p, TK_LPAREN);
- pnext(p); /* consume '(' */
+ pnext(p);
- /* parse optional parameter list */
char **params = NULL;
_TY *params_types = NULL;
int pac = 0;
if (p->cur.kind != TK_RPAREN) {
- /* at least one parameter expected */
while (1) {
- /* first token should be a type, e.g. int */
- _TY vtype;
- if (p->cur.kind == TK_INT)
- vtype = (_TY){.base=TY_INT,.ptr_level=0,.array_size=-1};
- else if (p->cur.kind == TK_CHAR)
- vtype = (_TY){.base=TY_CHAR,.ptr_level=0,.array_size=-1};
- else {
+ if (!is_type_token(p->cur.kind)) {
perror_expected(p->lx, "type in parameter list", _TN[p->cur.kind]);
}
- pnext(p); /* consume type */
- while (p->cur.kind == TK_STAR) { vtype.ptr_level++; pnext(p); }
+ _TY vtype = pparse_type(p);
- /* next should be an identifier (variable name) */
if (p->cur.kind != TK_IDENT) {
perror_expected(p->lx, "identifier after type in parameter list", _TN[p->cur.kind]);
}
- // Parse array size for parameters after the identifier
char *param_name = strdup(p->cur.lxem);
if (!param_name) {
fprintf(stderr, "[PARSER] Error: strdup failed for parameter name\n");
exit(1);
}
- pnext(p); // consume identifier
+ pnext(p);
if (p->cur.kind == TK_LBRACKET) {
- pnext(p); // consume '['
+ pnext(p);
if (p->cur.kind == TK_NUMBER) {
vtype.array_size = p->cur.val;
- pnext(p); // consume number
+ pnext(p);
} else {
vtype.array_size = 0; // unknown size []
}
pexpect(p, TK_RBRACKET);
- pnext(p); // consume ']'
+ pnext(p);
}
char **new_params = (char **)realloc(params, sizeof(char *) * (pac + 1));
@@ -556,7 +715,7 @@ static _FN *pfunc(_P *p) {
pac++;
if (p->cur.kind == TK_COMMA) {
- pnext(p); /* consume comma and continue */
+ pnext(p);
continue;
} else {
break;
@@ -565,11 +724,11 @@ static _FN *pfunc(_P *p) {
}
pexpect(p, TK_RPAREN);
- pnext(p); /* consume ')' */
+ pnext(p);
- _STN *body = pblock(p); /* pblock consumes the block braces and returns */
+ _STN *body = pblock(p);
- return fn_new(name, params, params_types, pac, body);
+ return fn_new(name, params, params_types, pac, body, ret_type);
}
static _FN *parse_program(_LX *lx) {
@@ -579,9 +738,98 @@ static _FN *parse_program(_LX *lx) {
_FN **cur = &head;
while (pstate.cur.kind != TK_EOF) {
- _FN *f = pfunc(&pstate);
- *cur = f;
- cur = &f->n;
+ if (!is_type_token(pstate.cur.kind)) {
+ perror_unexpected(pstate.lx, "top-level declaration", _TN[pstate.cur.kind]);
+ }
+
+ /* Peek ahead: type ident ';'/'='/'[' → global var; type ident '(' → function */
+ _TY gtype = pparse_type(&pstate);
+
+ if (pstate.cur.kind != TK_IDENT) {
+ perror_expected(pstate.lx, "identifier after type", _TN[pstate.cur.kind]);
+ }
+ char *gname = strdup(pstate.cur.lxem);
+ if (!gname) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ pnext(&pstate); /* consume ident */
+
+ if (pstate.cur.kind == TK_LBRACKET) {
+ pnext(&pstate);
+ if (pstate.cur.kind == TK_NUMBER) {
+ gtype.array_size = pstate.cur.val; pnext(&pstate);
+ } else {
+ gtype.array_size = 0;
+ }
+ pexpect(&pstate, TK_RBRACKET); pnext(&pstate);
+ }
+
+ if (pstate.cur.kind == TK_LPAREN) {
+ pnext(&pstate); /* consume '(' */
+
+ char **params = NULL;
+ _TY *param_types = NULL;
+ int pac = 0;
+
+ if (pstate.cur.kind != TK_RPAREN) {
+ do {
+ if (!is_type_token(pstate.cur.kind)) break;
+ _TY ptype = pparse_type(&pstate);
+ char *pname = NULL;
+ if (pstate.cur.kind == TK_IDENT) {
+ pname = strdup(pstate.cur.lxem);
+ if (!pname) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ pnext(&pstate);
+ } else {
+ pname = strdup("_anon");
+ }
+ params = realloc(params, sizeof(char*) * (pac+1));
+ param_types = realloc(param_types, sizeof(_TY) * (pac+1));
+ if (!params || !param_types) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ params[pac] = pname;
+ param_types[pac] = ptype;
+ pac++;
+ if (pstate.cur.kind == TK_COMMA) pnext(&pstate); else break;
+ } while (1);
+ }
+ pexpect(&pstate, TK_RPAREN); pnext(&pstate);
+
+ _STN *body = pblock(&pstate);
+
+ _FN *f = fn_new(gname, params, param_types, pac, body, gtype);
+ *cur = f;
+ cur = &f->n;
+ } else {
+ _EX *init = NULL;
+ if (pstate.cur.kind == TK_ASSIGN) {
+ pnext(&pstate);
+ if (pstate.cur.kind == TK_LBRACE) {
+ /* initializer list for global array */
+ pnext(&pstate);
+ _EX **elems = NULL; int nelems = 0;
+ while (pstate.cur.kind != TK_RBRACE && pstate.cur.kind != TK_EOF) {
+ elems = realloc(elems, sizeof(_EX*) * (nelems+1));
+ if (!elems) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ elems[nelems++] = pexpr(&pstate);
+ if (pstate.cur.kind == TK_COMMA) pnext(&pstate);
+ }
+ pexpect(&pstate, TK_RBRACE); pnext(&pstate);
+ init = ex_call(strdup("__initlist__"), elems, nelems);
+ } else {
+ init = pexpr(&pstate);
+ }
+ }
+ pexpect(&pstate, TK_SEMI); pnext(&pstate);
+
+
+ char *gfunc_name = malloc(strlen(gname) + 12);
+ if (!gfunc_name) { fprintf(stderr, "[PARSER] OOM\n"); exit(1); }
+ sprintf(gfunc_name, "__global_%s__", gname);
+
+ _STN *gdecl = st_global(gname, gtype, init); /* gdecl takes ownership of gname */
+ _FN *gf = fn_new(gfunc_name, NULL, NULL, 0, gdecl,
+ (_TY){TY_VOID,0,-1});
+ *cur = gf;
+ cur = &gf->n;
+ }
}
if (pstate.cur.lxem) {
diff --git a/src/token.h b/src/token.h
index e99294e..1e57662 100644
--- a/src/token.h
+++ b/src/token.h
@@ -3,12 +3,14 @@
#include <string.h>
-/* Token and type definitions shared by lexer, parser, and JIT.
- * The type system uses a base kind plus pointer/array decorations. */
typedef enum {
- TY_INT = 0,
- TY_CHAR = 1,
- TY_BOOL = 2,
+ TY_INT = 0,
+ TY_CHAR = 1,
+ TY_BOOL = 2,
+ TY_VOID = 3,
+ TY_FLOAT = 4,
+ TY_LONG = 5,
+ TY_SHORT = 6,
} _TYBASE;
typedef struct {
@@ -19,9 +21,13 @@ typedef struct {
static inline const char *tybase_name(_TYBASE b) {
switch (b) {
- case TY_INT: return "int";
- case TY_CHAR: return "char";
- case TY_BOOL: return "bool";
+ case TY_INT: return "int";
+ case TY_CHAR: return "char";
+ case TY_BOOL: return "bool";
+ case TY_VOID: return "void";
+ case TY_FLOAT: return "float";
+ case TY_LONG: return "long";
+ case TY_SHORT: return "short";
default: return "?";
}
}
@@ -30,6 +36,10 @@ static inline const char *tybase_name(_TYBASE b) {
_(TK_BEGIN, "begin") \
_(TK_INT, "int") \
_(TK_CHAR, "char") \
+ _(TK_VOID, "void") \
+ _(TK_FLOAT, "float") \
+ _(TK_LONG, "long") \
+ _(TK_SHORT, "short") \
_(TK_ASSIGN, "=") \
_(TK_EQ, "==") \
_(TK_NE, "!=") \
@@ -42,6 +52,9 @@ static inline const char *tybase_name(_TYBASE b) {
_(TK_ELSE, "else") \
_(TK_FOR, "for") \
_(TK_WHILE, "while") \
+ _(TK_DO, "do") \
+ _(TK_BREAK, "break") \
+ _(TK_CONTINUE, "continue") \
_(TK_BOOL, "bool") \
_(TK_IDENT, "ident") \
_(TK_NUMBER, "number") \
@@ -69,6 +82,15 @@ static inline const char *tybase_name(_TYBASE b) {
_(TK_LBRACKET, "[") \
_(TK_RBRACKET, "]") \
_(TK_STRING, "string") \
+ _(TK_PLUS_EQ, "+=") \
+ _(TK_MINUS_EQ, "-=") \
+ _(TK_STAR_EQ, "*=") \
+ _(TK_SLASH_EQ, "/=") \
+ _(TK_INC, "++") \
+ _(TK_DEC, "--") \
+ _(TK_SIZEOF, "sizeof") \
+ _(TK_QUESTION, "?") \
+ _(TK_COLON, ":") \
_(TK_EOF, "eof") \
_(TK_COMMA, ",") \
_(TK_INVALID, "invalid") \
@@ -88,8 +110,8 @@ static const char *_TN[] = {
typedef struct {
_TK kind;
- int val; // only valid if kind == TK_NUMBER
- char *lxem; // malloc’d lexeme string, or NULL
+ int val;
+ char *lxem;
} _T;
static _TK checkkw(const char *kw) {
diff --git a/test_runner.sh b/test_runner.sh
deleted file mode 100755
index d478f55..0000000
--- a/test_runner.sh
+++ /dev/null
@@ -1,334 +0,0 @@
-#!/bin/bash
-
-# CCDJIT Comprehensive Test Runner
-# This script provides a more advanced testing interface with better organization
-
-# Colors for pretty output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-PURPLE='\033[0;35m'
-CYAN='\033[0;36m'
-WHITE='\033[1;37m'
-GRAY='\033[0;37m'
-NC='\033[0m' # No Color
-
-# Test configuration
-declare -A test_map
-test_map["./tests/add2.c"]=3
-test_map["./tests/arithmetic.c"]=28
-test_map["./tests/comparison.c"]=5
-test_map["./tests/logical.c"]=3
-test_map["./tests/if_else.c"]=12
-test_map["./tests/while_loop.c"]=10
-test_map["./tests/for_loop.c"]=6
-test_map["./tests/function_call.c"]=17
-test_map["./tests/recursive.c"]=24
-test_map["./tests/bitwise.c"]=58
-test_map["./tests/pointers.c"]=184
-test_map["./tests/arrays.c"]=150
-test_map["./tests/strings.c"]=5
-test_map["./tests/edge_cases.c"]=12
-
-# Error tests (expected to fail with non-zero exit code)
-declare -A error_test_map
-error_test_map["./tests/error_syntax.c"]="syntax"
-error_test_map["./tests/error_undefined_var.c"]="undefined"
-error_test_map["./tests/error_type_mismatch.c"]="type"
-
-# Test categories
-declare -A test_categories
-test_categories["./tests/add2.c"]="Basic"
-test_categories["./tests/arithmetic.c"]="Arithmetic"
-test_categories["./tests/comparison.c"]="Comparison"
-test_categories["./tests/logical.c"]="Logical"
-test_categories["./tests/if_else.c"]="Control Flow"
-test_categories["./tests/while_loop.c"]="Control Flow"
-test_categories["./tests/for_loop.c"]="Control Flow"
-test_categories["./tests/function_call.c"]="Functions"
-test_categories["./tests/recursive.c"]="Functions"
-test_categories["./tests/bitwise.c"]="Bitwise"
-test_categories["./tests/pointers.c"]="Pointers"
-test_categories["./tests/arrays.c"]="Arrays"
-test_categories["./tests/strings.c"]="Strings"
-test_categories["./tests/edge_cases.c"]="Edge Cases"
-test_categories["./tests/error_syntax.c"]="Error Tests"
-test_categories["./tests/error_undefined_var.c"]="Error Tests"
-test_categories["./tests/error_type_mismatch.c"]="Error Tests"
-
-print_header() {
- echo -e "${CYAN}╔══════════════════════════════════════════════════════════════════════════════╗${NC}"
- echo -e "${CYAN}║${WHITE} CCDJIT Comprehensive Test Suite ${CYAN}║${NC}"
- echo -e "${CYAN}╚══════════════════════════════════════════════════════════════════════════════╝${NC}"
- echo
-}
-
-print_test_header() {
- local test_file="$1"
- local category="${test_categories[$test_file]}"
- local test_name=$(basename "$test_file" .c)
- echo -e "${BLUE}┌─ ${WHITE}$test_name${BLUE} (${YELLOW}$category${BLUE})${NC}"
- echo -e "${BLUE}│${NC} File: $test_file"
-
- if [ -n "${test_map[$test_file]}" ]; then
- echo -e "${BLUE}│${NC} Expected: ${test_map[$test_file]}"
- elif [ -n "${error_test_map[$test_file]}" ]; then
- echo -e "${BLUE}│${NC} Expected: ${RED}ERROR${NC} (${error_test_map[$test_file]})"
- fi
-}
-
-print_test_result() {
- local test_file="$1"
- local expected="$2"
- local actual="$3"
- local status="$4"
- local is_error="$5"
-
- if [ "$status" == "PASS" ]; then
- if [ "$is_error" == "true" ]; then
- echo -e "${BLUE}│${NC} Result: ${GREEN}✓ PASSED${NC} (correctly failed with exit code $actual)"
- else
- echo -e "${BLUE}│${NC} Result: ${GREEN}✓ PASSED${NC} (got $actual)"
- fi
- echo -e "${BLUE}└─${GREEN} SUCCESS${NC}"
- else
- if [ "$is_error" == "true" ]; then
- echo -e "${BLUE}│${NC} Result: ${RED}✗ FAILED${NC} (expected error, got exit code $actual)"
- else
- echo -e "${BLUE}│${NC} Result: ${RED}✗ FAILED${NC} (expected $expected, got $actual)"
- fi
- echo -e "${BLUE}└─${RED} FAILURE${NC}"
- fi
- echo
-}
-
-print_summary() {
- local passed="$1"
- local failed="$2"
- local total="$3"
- local error_passed="$4"
- local error_failed="$5"
- local error_total="$6"
-
- echo -e "${CYAN}╔══════════════════════════════════════════════════════════════════════════════╗${NC}"
- echo -e "${CYAN}║${WHITE} Test Summary ${CYAN}║${NC}"
- echo -e "${CYAN}╠══════════════════════════════════════════════════════════════════════════════╣${NC}"
-
- if [ "$failed" -eq 0 ] && [ "$error_failed" -eq 0 ]; then
- echo -e "${CYAN}║${GREEN} All tests passed! ${WHITE}(${passed}/${total} functional, ${error_passed}/${error_total} error)${CYAN} ║${NC}"
- else
- echo -e "${CYAN}║${GREEN} Functional Tests: ${passed}/${total}${CYAN} │ ${RED}Failed: ${failed}${CYAN} ║${NC}"
- echo -e "${CYAN}║${GREEN} Error Tests: ${error_passed}/${error_total}${CYAN} │ ${RED}Failed: ${error_failed}${CYAN} ║${NC}"
- fi
-
- echo -e "${CYAN}╚══════════════════════════════════════════════════════════════════════════════╝${NC}"
-}
-
-run_functional_tests() {
- local count=0
- local passed=0
- local failed=0
-
- echo -e "${PURPLE}Running Functional Tests...${NC}" >&2
- echo >&2
-
- for key in "${!test_map[@]}"; do
- print_test_header "$key" >&2
-
- local output
- local exit_code
- local actual_result
- output=$(./bin/ccdjit "$key" 2>&1)
- exit_code=$?
-
- # Extract the actual result from "JIT returned: X" line
- actual_result=$(echo "$output" | grep "JIT returned:" | sed 's/.*JIT returned: //' | tail -1)
- if [ -z "$actual_result" ]; then
- actual_result=$exit_code
- fi
-
- if [ "${test_map[$key]}" = "$actual_result" ]; then
- print_test_result "$key" "${test_map[$key]}" "$actual_result" "PASS" "false" >&2
- passed=$((passed+1))
- else
- print_test_result "$key" "${test_map[$key]}" "$actual_result" "FAIL" "false" >&2
- failed=$((failed+1))
- fi
- count=$((count+1))
- done
-
- echo "$passed $failed $count"
-}
-
-run_error_tests() {
- local count=0
- local passed=0
- local failed=0
-
- echo -e "${PURPLE}Running Error Tests...${NC}" >&2
- echo >&2
-
- for key in "${!error_test_map[@]}"; do
- print_test_header "$key" >&2
-
- local output
- local exit_code
- output=$(./bin/ccdjit "$key" 2>&1)
- exit_code=$?
-
- if [ $exit_code -ne 0 ]; then
- print_test_result "$key" "ERROR" "$exit_code" "PASS" "true" >&2
- passed=$((passed+1))
- else
- print_test_result "$key" "ERROR" "$exit_code" "FAIL" "true" >&2
- failed=$((failed+1))
- fi
- count=$((count+1))
- done
-
- echo "$passed $failed $count"
-}
-
-run_all_tests() {
- print_header
-
- local func_results
- local error_results
-
- func_results=($(run_functional_tests))
- error_results=($(run_error_tests))
-
- # Ensure all stderr output is flushed before showing summary
- sleep 0.1
-
- local func_passed=${func_results[0]}
- local func_failed=${func_results[1]}
- local func_total=${func_results[2]}
-
- local error_passed=${error_results[0]}
- local error_failed=${error_results[1]}
- local error_total=${error_results[2]}
-
- print_summary $func_passed $func_failed $func_total $error_passed $error_failed $error_total
-}
-
-run_single_test() {
- local test_file="$1"
-
- if [ -z "${test_map[$test_file]}" ] && [ -z "${error_test_map[$test_file]}" ]; then
- echo -e "${RED}Error: Test file '$test_file' not found in test suite${NC}"
- exit 1
- fi
-
- print_header
- print_test_header "$test_file"
-
- local output
- local exit_code
- local actual_result
- output=$(./bin/ccdjit "$test_file" 2>&1)
- exit_code=$?
-
- # Extract the actual result from "JIT returned: X" line
- actual_result=$(echo "$output" | grep "JIT returned:" | sed 's/.*JIT returned: //' | tail -1)
- if [ -z "$actual_result" ]; then
- actual_result=$exit_code
- fi
-
- if [ -n "${test_map[$test_file]}" ]; then
- # Functional test
- if [ "${test_map[$test_file]}" = "$actual_result" ]; then
- print_test_result "$test_file" "${test_map[$test_file]}" "$actual_result" "PASS" "false"
- else
- print_test_result "$test_file" "${test_map[$test_file]}" "$actual_result" "FAIL" "false"
- fi
- else
- # Error test
- if [ $exit_code -ne 0 ]; then
- print_test_result "$test_file" "ERROR" "$exit_code" "PASS" "true"
- else
- print_test_result "$test_file" "ERROR" "$exit_code" "FAIL" "true"
- fi
- fi
-}
-
-list_tests() {
- echo -e "${CYAN}Available Tests:${NC}"
- echo
-
- echo -e "${GREEN}Functional Tests:${NC}"
- for key in "${!test_map[@]}"; do
- local category="${test_categories[$key]}"
- local test_name=$(basename "$key" .c)
- echo -e " ${WHITE}$test_name${NC} (${YELLOW}$category${NC}) - Expected: ${test_map[$key]}"
- done
-
- echo
- echo -e "${RED}Error Tests:${NC}"
- for key in "${!error_test_map[@]}"; do
- local category="${test_categories[$key]}"
- local test_name=$(basename "$key" .c)
- echo -e " ${WHITE}$test_name${NC} (${YELLOW}$category${NC}) - Expected: ERROR"
- done
-}
-
-show_help() {
- echo -e "${CYAN}CCDJIT Test Runner${NC}"
- echo
- echo "Usage: $0 [COMMAND] [OPTIONS]"
- echo
- echo "Commands:"
- echo " all Run all tests (functional + error)"
- echo " functional Run only functional tests"
- echo " error Run only error tests"
- echo " test <file> Run a specific test file"
- echo " list List all available tests"
- echo " help Show this help message"
- echo
- echo "Examples:"
- echo " $0 all"
- echo " $0 functional"
- echo " $0 test ./tests/arithmetic.c"
- echo " $0 list"
-}
-
-# Main script logic
-case "$1" in
- "all")
- run_all_tests
- ;;
- "functional")
- print_header
- func_results=($(run_functional_tests))
- sleep 0.1
- print_summary ${func_results[0]} ${func_results[1]} ${func_results[2]} 0 0 0
- ;;
- "error")
- print_header
- error_results=($(run_error_tests))
- sleep 0.1
- print_summary 0 0 0 ${error_results[0]} ${error_results[1]} ${error_results[2]}
- ;;
- "test")
- if [ -z "$2" ]; then
- echo -e "${RED}Error: Please specify a test file${NC}"
- exit 1
- fi
- run_single_test "$2"
- ;;
- "list")
- list_tests
- ;;
- "help"|"-h"|"--help")
- show_help
- ;;
- "")
- show_help
- ;;
- *)
- echo -e "${RED}Error: Unknown command '$1'${NC}"
- echo "Use '$0 help' for usage information"
- exit 1
- ;;
-esac
diff --git a/tests/getc.c b/tests/getc.c
new file mode 100644
index 0000000..ef3b860
--- /dev/null
+++ b/tests/getc.c
@@ -0,0 +1,131 @@
+int g_passed = 0;
+int g_failed = 0;
+
+int getc() {
+ char c;
+ int n = syscall(0, 0, &c, 1);
+ if (n <= 0) return -1;
+ return (int)c;
+}
+
+int write_char(int c) {
+ char b;
+ b = (char)c;
+ syscall(1, 1, &b, 1);
+ return 1;
+}
+
+int write_str(char *s) {
+ int n = 0;
+ while (s[n] != 0) n++;
+ syscall(1, 1, s, n);
+ return n;
+}
+
+int write_int(int n) {
+ char tmp[24];
+ int len = 0;
+ int neg = n < 0;
+ if (neg) { write_char('-'); n = -n; }
+ if (n == 0) { write_char('0'); return 1; }
+ while (n > 0) { tmp[len] = (char)('0' + n % 10); n = n / 10; len++; }
+ int i = len - 1;
+ while (i >= 0) { write_char(tmp[i]); i--; }
+ return neg + len;
+}
+
+int check(char *name, int got, int want) {
+ if (got == want) {
+ write_str("PASS "); write_str(name); write_char('\n');
+ g_passed++;
+ return 1;
+ }
+ write_str("FAIL "); write_str(name);
+ write_str(": got "); write_int(got);
+ write_str(" want "); write_int(want);
+ write_char('\n');
+ g_failed++;
+ return 0;
+}
+
+int read_line(char *buf, int cap) {
+ int i = 0;
+ int c;
+ while (i < cap - 1) {
+ c = getc();
+ if (c == -1 || c == '\n') break;
+ buf[i] = (char)c;
+ i++;
+ }
+ buf[i] = 0;
+ return i;
+}
+
+int test1() {
+ int c = getc();
+ int result = c;
+ while (c != '\n' && c != -1) c = getc();
+ return check("single_char_is_65", result, 65);
+}
+
+int test2() {
+ char buf[64];
+ int n = read_line(buf, 64);
+ write_str(buf); write_char('\n');
+ return check("readline_length", n, 5);
+}
+
+int test3() {
+ int newlines = 0;
+ int c;
+ while (newlines < 2) {
+ c = getc();
+ if (c == -1) break;
+ if (c == '\n') newlines++;
+ }
+ return check("count_lines", newlines, 2);
+}
+
+int test4() {
+ int value = 0;
+ int c;
+ while (1) {
+ c = getc();
+ if (c < '0' || c > '9') break;
+ value = value * 10 + (c - '0');
+ }
+ return check("parse_int", value, 42);
+}
+
+int test5() {
+ char buf[64];
+ int n = read_line(buf, 64);
+ int i = 0;
+ while (i < n) {
+ char ch = buf[i];
+ if (ch >= 'a' && ch <= 'z') buf[i] = ch - 'a' + 'A';
+ i++;
+ }
+ write_str(buf); write_char('\n');
+ return check("toupper_length", n, 5);
+}
+
+int test6() {
+ int c = getc();
+ return check("eof_is_minus1", c, -1);
+}
+
+int main() {
+ test1();
+ test2();
+ test3();
+ test4();
+ test5();
+ test6();
+
+ write_char('\n');
+ write_int(g_passed); write_str(" passed, ");
+ write_int(g_failed); write_str(" failed\n");
+
+ return g_failed == 0 ? 0 : 1;
+}
diff --git a/tests/new_arrays.c b/tests/new_arrays.c
new file mode 100644
index 0000000..5b7b59b
--- /dev/null
+++ b/tests/new_arrays.c
@@ -0,0 +1,31 @@
+int strlen_p(char *s) {
+ int n = 0;
+ while (s[n] != 0) { n = n + 1; }
+ return n;
+}
+
+int sum(int *arr, int n) {
+ int s = 0;
+ int i = 0;
+ while (i < n) { s += arr[i]; i++; }
+ return s;
+}
+
+int main() {
+ int nums[5] = {10, 20, 30, 40, 50};
+ int total = sum(nums, 5);
+
+ char msg[4] = {'O', 'K', '\n', 0};
+ syscall(1, 1, msg, 3);
+
+ int *p = nums;
+ p += 2;
+ int mid = *p;
+
+ int n = sizeof(nums);
+ int c = sizeof(char);
+ int ip = sizeof(int);
+
+ int score = (total == 150) + (mid == 30) + (n == 20) + (c == 1) + (ip == 4);
+ return score;
+}
diff --git a/tests/printf.c b/tests/printf.c
new file mode 100644
index 0000000..fc0d68d
--- /dev/null
+++ b/tests/printf.c
@@ -0,0 +1,164 @@
+char out_buf[4096];
+int out_pos = 0;
+
+int buf_putc(char c) {
+ out_buf[out_pos] = c;
+ out_pos++;
+ if (out_pos >= 4096) {
+ syscall(1, 1, out_buf, out_pos);
+ out_pos = 0;
+ }
+ return 1;
+}
+
+int buf_flush() {
+ if (out_pos > 0) {
+ syscall(1, 1, out_buf, out_pos);
+ out_pos = 0;
+ }
+ return 0;
+}
+
+
+int my_strlen(char *s) {
+ int n = 0;
+ while (s[n] != 0) { n++; }
+ return n;
+}
+
+int print_str(char *s) {
+ int i = 0;
+ while (s[i] != 0) { buf_putc(s[i]); i++; }
+ return i;
+}
+
+
+int print_int(int n) {
+ char tmp[24];
+ int len = 0;
+ int neg = n < 0;
+ if (neg) { buf_putc('-'); n = -n; }
+ if (n == 0) { buf_putc('0'); return neg + 1; }
+ while (n > 0) {
+ tmp[len] = (char)('0' + n % 10);
+ n = n / 10;
+ len++;
+ }
+
+ int i = len - 1;
+ while (i >= 0) { buf_putc(tmp[i]); i--; }
+ return neg + len;
+}
+
+
+int print_hex(int n) {
+ char hex[16] = {'0','1','2','3','4','5','6','7',
+ '8','9','a','b','c','d','e','f'};
+ if (n == 0) { buf_putc('0'); return 1; }
+ char tmp[18];
+ int len = 0;
+ int u = n;
+ while (u != 0) {
+ tmp[len] = hex[u & 15];
+ u = u / 16;
+ len++;
+ }
+ int i = len - 1;
+ while (i >= 0) { buf_putc(tmp[i]); i--; }
+ return len;
+}
+
+
+int my_printf(char *fmt, int a, int b, int c) {
+ int arg_idx = 0;
+ int i = 0;
+ int total = 0;
+ while (fmt[i] != 0) {
+ if (fmt[i] != '%') {
+ buf_putc(fmt[i]);
+ total++;
+ i++;
+ continue;
+ }
+ i++; /* skip '%' */
+ int arg = arg_idx == 0 ? a : (arg_idx == 1 ? b : c);
+ arg_idx++;
+ if (fmt[i] == 'd') {
+ total += print_int(arg);
+ } else if (fmt[i] == 's') {
+ total += print_str((char*)arg);
+ } else if (fmt[i] == 'c') {
+ buf_putc((char)arg);
+ total++;
+ } else if (fmt[i] == 'x') {
+ total += print_hex(arg);
+ } else if (fmt[i] == '%') {
+ buf_putc('%');
+ total++;
+ arg_idx--; /* %% doesn't consume an arg */
+ } else {
+ buf_putc('%'); buf_putc(fmt[i]);
+ total += 2;
+ }
+ i++;
+ }
+ return total;
+}
+
+
+int fact(int n) {
+ return n <= 1 ? 1 : n * fact(n - 1);
+}
+
+int fib(int n) {
+ if (n <= 1) return n;
+ return fib(n - 1) + fib(n - 2);
+}
+
+int main() {
+ my_printf("=== JIT printf demo ===\n", 0, 0, 0);
+
+ my_printf("Hello, %s!\n", "world", 0, 0);
+ my_printf("int: %d neg: %d\n", 42, -7, 0);
+ my_printf("hex: 0x%x\n", 255, 0, 0);
+ my_printf("char: %c\n", 'A', 0, 0);
+ my_printf("percent: 100%%\n", 0, 0, 0);
+
+ my_printf("\nFactorials:\n", 0, 0, 0);
+ int i = 1;
+ while (i <= 8) {
+ my_printf(" %d! = %d\n", i, fact(i), 0);
+ i++;
+ }
+
+ my_printf("\nFibonacci:\n ", 0, 0, 0);
+ i = 0;
+ while (i < 10) {
+ my_printf("%d ", fib(i), 0, 0);
+ i++;
+ }
+ my_printf("\n", 0, 0, 0);
+
+
+ my_printf("\nCountdown: ", 0, 0, 0);
+ int n = 5;
+ do {
+ my_printf("%d ", n, 0, 0);
+ n--;
+ } while (n > 0);
+ my_printf("\n", 0, 0, 0);
+
+
+ my_printf("Odd 1-9: ", 0, 0, 0);
+ i = 0;
+ while (i < 10) {
+ i++;
+ if (i % 2 == 0) continue;
+ my_printf("%d ", i, 0, 0);
+ if (i == 9) break;
+ }
+ my_printf("\n", 0, 0, 0);
+
+ buf_flush();
+ return 0;
+}
diff --git a/tests/syscalls.c b/tests/syscalls.c
new file mode 100644
index 0000000..285e539
--- /dev/null
+++ b/tests/syscalls.c
@@ -0,0 +1,30 @@
+int strlen(char *s) {
+ int n = 0;
+ while (s[n] != 0) { n = n + 1; }
+ return n;
+}
+
+int puts_fd(int fd, char *s) {
+ return syscall(1, fd, s, strlen(s));
+}
+
+int main() {
+ puts_fd(1, "Hello from JIT syscall!\n");
+
+ int pid = syscall(39);
+ int pid_ok = pid > 0;
+
+ char msg[32];
+ msg[0] = 'P'; msg[1] = 'I'; msg[2] = 'D'; msg[3] = ' ';
+ msg[4] = 'o'; msg[5] = 'k'; msg[6] = ':'; msg[7] = ' ';
+ msg[8] = '0' + pid_ok;
+ msg[9] = '\n';
+ syscall(1, 1, msg, 10);
+
+ int n1 = syscall(1, 1, "write test 1\n", 13);
+ int n2 = syscall(1, 1, "write test 2\n", 13);
+
+ int result = pid_ok + (n1 == 13) + (n2 == 13);
+
+ return result;
+}