Add user-defined functions with call frames

Implement fn/return across the full pipeline:
- Lexer: TOK_FN, TOK_RETURN keywords
- Parser: NODE_FN_DEF, NODE_RETURN AST nodes
- Compiler: FunctionEntry table, inline compilation with jump-over and backpatching
- VM: CallFrame stack with variable snapshot for scoped calls and OP_RETURN
This commit is contained in:
Jose Luis Montañes Ojados
2026-02-18 03:16:54 +01:00
parent da9bb6ca62
commit f2e90efc16
6 changed files with 421 additions and 191 deletions

4
projects/custom_fn.j Normal file
View File

@@ -0,0 +1,4 @@
fn greet(name):
println("Hola, " + name)
greet("mundo!")

View File

@@ -1,226 +1,310 @@
#ifndef JLANG_COMPILER_H #ifndef JLANG_COMPILER_H
#define JLANG_COMPILER_H #define JLANG_COMPILER_H
#include "../../frontend/parser.h"
#include "opcodes.h" #include "opcodes.h"
#include <string.h> #include <string.h>
#include "../../frontend/parser.h"
typedef struct { typedef struct {
Instruction code[4096]; // bytecodes char *name;
int code_count; int entry_point; // indice de la primera instruccion
char *constants[256]; // pool de strings literales int param_count;
int const_count; char **param_names; // nombres de parametros
char *names[256]; // tabla de nombres (variables + funciones) } FunctionEntry;
int name_count;
typedef struct {
Instruction code[4096]; // bytecodes
int code_count;
char *constants[256]; // pool de strings literales
int const_count;
char *names[256]; // tabla de nombres (variables + funciones)
int name_count;
FunctionEntry functions[64];
int func_count;
} Chunk; } Chunk;
int emit(Chunk* chunk, Instruction instr) { int emit(Chunk *chunk, Instruction instr) {
chunk->code[chunk->code_count++] = instr; chunk->code[chunk->code_count++] = instr;
return chunk->code_count -1; return chunk->code_count - 1;
} }
int add_constant(Chunk* chunk, char* str) { int add_constant(Chunk *chunk, char *str) {
for (int i=0;i<chunk->const_count; i++){ for (int i = 0; i < chunk->const_count; i++) {
if (strcmp(chunk->constants[i], str) == 0) { if (strcmp(chunk->constants[i], str) == 0) {
return i; return i;
}
} }
}
chunk->constants[chunk->const_count++] = str; chunk->constants[chunk->const_count++] = str;
return chunk->const_count - 1; return chunk->const_count - 1;
} }
Instruction make_instruction(OpCode op) { Instruction make_instruction(OpCode op) {
Instruction instr; Instruction instr;
instr.op = op; instr.op = op;
return instr; return instr;
} }
int add_name(Chunk* chunk, char* name) { int add_name(Chunk *chunk, char *name) {
for (int i=0;i<chunk->name_count; i++){ for (int i = 0; i < chunk->name_count; i++) {
if (strcmp(chunk->names[i], name) == 0) { if (strcmp(chunk->names[i], name) == 0) {
return i; return i;
} }
}
chunk->names[chunk->name_count++] = name;
return chunk->name_count - 1;
}
int compile_node(Chunk *chunk, ASTNode *node) {
switch (node->type) {
case NODE_INT_LIT: {
Instruction instr = make_instruction(OP_CONST_INT);
instr.operand.int_val = node->data.int_val;
return emit(chunk, instr);
}
case NODE_STRING_LIT: {
Instruction instr = make_instruction(OP_CONST_STRING);
instr.operand.str_index = add_constant(chunk, node->data.string_val);
return emit(chunk, instr);
}
case NODE_VAR: {
Instruction instr = make_instruction(OP_LOAD_VAR);
instr.operand.var_index = add_name(chunk, node->data.string_val);
return emit(chunk, instr);
}
case NODE_ASSIGN: {
compile_node(chunk, node->data.assign.value);
Instruction instr = make_instruction(OP_STORE_VAR);
instr.operand.var_index = add_name(chunk, node->data.assign.name);
return emit(chunk, instr);
}
case NODE_CALL: {
// Compilar cada argumento y pushear al stack
for (int i = 0; i < node->data.call.arg_count; i++) {
compile_node(chunk, node->data.call.args[i]);
} }
chunk->names[chunk->name_count++] = name; // Registrar el nombre de la funcion
return chunk->name_count - 1; Instruction instr = make_instruction(OP_CALL);
} instr.operand.call.arg_count = node->data.call.arg_count;
instr.operand.call.name_index = add_name(chunk, node->data.call.name);
return emit(chunk, instr);
}
case NODE_BLOCK: {
int n = node->data.block.count;
int compile_node(Chunk *chunk, ASTNode* node) { // NOP for gc
switch (node->type) { emit(chunk, make_instruction(OP_NOP));
case NODE_INT_LIT: {
Instruction instr = make_instruction(OP_CONST_INT);
instr.operand.int_val = node->data.int_val;
return emit(chunk, instr);
}
case NODE_STRING_LIT: {
Instruction instr = make_instruction(OP_CONST_STRING);
instr.operand.str_index = add_constant(chunk, node->data.string_val);
return emit(chunk, instr);
}
case NODE_VAR: {
Instruction instr = make_instruction(OP_LOAD_VAR);
instr.operand.var_index = add_name(chunk, node->data.string_val);
return emit(chunk, instr);
}
case NODE_ASSIGN: {
compile_node(chunk, node->data.assign.value);
Instruction instr = make_instruction(OP_STORE_VAR);
instr.operand.var_index = add_name(chunk, node->data.assign.name);
return emit(chunk, instr);
}
case NODE_CALL: {
// Compilar cada argumento y pushear al stack
for (int i=0; i<node->data.call.arg_count; i++){
compile_node(chunk, node->data.call.args[i]);
}
// Registrar el nombre de la funcion for (int i = 0; i < n; i++) {
Instruction instr = make_instruction(OP_CALL); compile_node(chunk, node->data.block.stmts[i]);
instr.operand.call.arg_count = node->data.call.arg_count;
instr.operand.call.name_index = add_name(chunk, node->data.call.name);
return emit(chunk, instr);
}
case NODE_BLOCK: {
int n = node->data.block.count;
// NOP for gc
emit(chunk, make_instruction(OP_NOP));
for (int i=0; i<n; i++){
compile_node(chunk, node->data.block.stmts[i]);
}
return 0;
}
case NODE_BINOP: {
int leftOffset = compile_node(chunk, node->data.binop.left);
int rightOffset = compile_node(chunk, node->data.binop.right);
OpCode opCode;
switch (node->data.binop.op) {
case '+':
opCode = OP_ADD;
break;
case '-':
opCode = OP_SUB;
break;
case '*':
opCode = OP_MUL;
break;
case '/':
opCode = OP_DIV;
break;
case '>':
opCode = OP_CMP_GT;
break;
case '<':
opCode = OP_CMP_LT;
break;
default:
break;
}
emit(chunk, make_instruction(opCode));
return 0;
}
case NODE_WHILE: {
int loop_start = chunk->code_count;
compile_node(chunk, node->data.while_loop.cond);
// jump if zero, zero = false
Instruction instr = make_instruction(OP_JUMP_IF_ZERO);
instr.operand.jump_target = -1;
int jump_offset = emit(chunk, instr);
// compile body
compile_node(chunk, node->data.while_loop.body);
instr = make_instruction(OP_JUMP);
instr.operand.jump_target = loop_start;
emit(chunk, instr);
// Bachpatching
chunk->code[jump_offset].operand.jump_target = chunk->code_count;
break;
}
case NODE_IF: {
// compile condition
compile_node(chunk, node->data.if_statement.cond);
// add jump if zero
Instruction instr = make_instruction(OP_JUMP_IF_ZERO);
instr.operand.jump_target = -1;
int jump_offset = emit(chunk, instr);
// compile body
compile_node(chunk, node->data.if_statement.body);
chunk->code[jump_offset].operand.jump_target = chunk->code_count;
break;
}
default:
break;
} }
return 0; return 0;
} }
case NODE_BINOP: {
int leftOffset = compile_node(chunk, node->data.binop.left);
int rightOffset = compile_node(chunk, node->data.binop.right);
Chunk* compile(ASTNode* root) { OpCode opCode;
// Create chunk switch (node->data.binop.op) {
Chunk* chunk = (Chunk*) malloc(sizeof(Chunk)); case '+':
opCode = OP_ADD;
break;
case '-':
opCode = OP_SUB;
break;
case '*':
opCode = OP_MUL;
break;
case '/':
opCode = OP_DIV;
break;
case '>':
opCode = OP_CMP_GT;
break;
case '<':
opCode = OP_CMP_LT;
break;
default:
break;
}
emit(chunk, make_instruction(opCode));
return 0;
}
case NODE_WHILE: {
int loop_start = chunk->code_count;
compile_node(chunk, node->data.while_loop.cond);
// jump if zero, zero = false
Instruction instr = make_instruction(OP_JUMP_IF_ZERO);
instr.operand.jump_target = -1;
int jump_offset = emit(chunk, instr);
// Set arrays to 0 // compile body
memset(chunk, 0, sizeof(Chunk)); compile_node(chunk, node->data.while_loop.body);
compile_node(chunk, root); instr = make_instruction(OP_JUMP);
instr.operand.jump_target = loop_start;
Instruction instr;
instr.op = OP_HALT;
emit(chunk, instr); emit(chunk, instr);
return chunk; // Bachpatching
chunk->code[jump_offset].operand.jump_target = chunk->code_count;
break;
}
case NODE_IF: {
// compile condition
compile_node(chunk, node->data.if_statement.cond);
// add jump if zero
Instruction instr = make_instruction(OP_JUMP_IF_ZERO);
instr.operand.jump_target = -1;
int jump_offset = emit(chunk, instr);
// compile body
compile_node(chunk, node->data.if_statement.body);
chunk->code[jump_offset].operand.jump_target = chunk->code_count;
break;
}
case NODE_FN_DEF: {
// emitir jmp para ignorar la funcion por defecto
Instruction jump = make_instruction(OP_JUMP);
jump.operand.jump_target = -1; // backpatch despues
int jump_idx = emit(chunk, jump);
// registrar entrypoint de la funcion
int entry = chunk->code_count;
FunctionEntry *fn = &chunk->functions[chunk->func_count++];
fn->name = node->data.fn_def.name;
fn->entry_point = entry;
fn->param_count = node->data.fn_def.param_count;
fn->param_names = node->data.fn_def.params;
// emitir store_var para cada parametro (orden inverso al stack)
for (int i = node->data.fn_def.param_count - 1; i >= 0; i--) {
Instruction store = make_instruction(OP_STORE_VAR);
store.operand.var_index = add_name(chunk, node->data.fn_def.params[i]);
emit(chunk, store);
}
// compilar el cuerpo
compile_node(chunk, node->data.fn_def.body);
// emitir el return implicito (por si no hay return explicito)
emit(chunk, make_instruction(OP_RETURN));
// backpatch jump
chunk->code[jump_idx].operand.jump_target = chunk->code_count;
break;
}
default:
break;
}
return 0;
} }
void print_chunk(Chunk* chunk) { Chunk *compile(ASTNode *root) {
printf("=== Names (%d) ===\n", chunk->name_count); // Create chunk
for (int i = 0; i < chunk->name_count; i++) { Chunk *chunk = (Chunk *)malloc(sizeof(Chunk));
printf(" [%d] %s\n", i, chunk->names[i]);
}
printf("=== Constants (%d) ===\n", chunk->const_count); // Set arrays to 0
for (int i = 0; i < chunk->const_count; i++) { memset(chunk, 0, sizeof(Chunk));
printf(" [%d] \"%s\"\n", i, chunk->constants[i]);
}
printf("=== Bytecode (%d instructions) ===\n", chunk->code_count); compile_node(chunk, root);
for (int i = 0; i < chunk->code_count; i++) {
Instruction instr = chunk->code[i]; Instruction instr;
printf("%04d ", i); instr.op = OP_HALT;
switch (instr.op) { emit(chunk, instr);
case OP_CONST_INT: printf("CONST_INT %d", instr.operand.int_val); break;
case OP_CONST_STRING: printf("CONST_STRING [%d] \"%s\"", instr.operand.str_index, chunk->constants[instr.operand.str_index]); break; return chunk;
case OP_POP: printf("POP"); break; }
case OP_ADD: printf("ADD"); break;
case OP_SUB: printf("SUB"); break; void print_chunk(Chunk *chunk) {
case OP_MUL: printf("MUL"); break; printf("=== Names (%d) ===\n", chunk->name_count);
case OP_DIV: printf("DIV"); break; for (int i = 0; i < chunk->name_count; i++) {
case OP_NEG: printf("NEG"); break; printf(" [%d] %s\n", i, chunk->names[i]);
case OP_CMP_LT: printf("CMP_LT"); break; }
case OP_CMP_GT: printf("CMP_GT"); break;
case OP_LOAD_VAR: printf("LOAD_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break; printf("=== Constants (%d) ===\n", chunk->const_count);
case OP_STORE_VAR: printf("STORE_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break; for (int i = 0; i < chunk->const_count; i++) {
case OP_JUMP: printf("JUMP -> %04d", instr.operand.jump_target); break; printf(" [%d] \"%s\"\n", i, chunk->constants[i]);
case OP_JUMP_IF_ZERO: printf("JUMP_IF_ZERO -> %04d", instr.operand.jump_target); break; }
case OP_CALL: printf("CALL %s(%d args)", chunk->names[instr.operand.call.name_index], instr.operand.call.arg_count); break;
case OP_NOP: printf("NOP"); break; printf("=== Bytecode (%d instructions) ===\n", chunk->code_count);
case OP_HALT: printf("HALT"); break; for (int i = 0; i < chunk->code_count; i++) {
default: printf("UNKNOWN op=%d", instr.op); break; Instruction instr = chunk->code[i];
} printf("%04d ", i);
printf("\n"); switch (instr.op) {
case OP_CONST_INT:
printf("CONST_INT %d", instr.operand.int_val);
break;
case OP_CONST_STRING:
printf("CONST_STRING [%d] \"%s\"", instr.operand.str_index,
chunk->constants[instr.operand.str_index]);
break;
case OP_POP:
printf("POP");
break;
case OP_ADD:
printf("ADD");
break;
case OP_SUB:
printf("SUB");
break;
case OP_MUL:
printf("MUL");
break;
case OP_DIV:
printf("DIV");
break;
case OP_NEG:
printf("NEG");
break;
case OP_CMP_LT:
printf("CMP_LT");
break;
case OP_CMP_GT:
printf("CMP_GT");
break;
case OP_LOAD_VAR:
printf("LOAD_VAR [%d] %s", instr.operand.var_index,
chunk->names[instr.operand.var_index]);
break;
case OP_STORE_VAR:
printf("STORE_VAR [%d] %s", instr.operand.var_index,
chunk->names[instr.operand.var_index]);
break;
case OP_JUMP:
printf("JUMP -> %04d", instr.operand.jump_target);
break;
case OP_JUMP_IF_ZERO:
printf("JUMP_IF_ZERO -> %04d", instr.operand.jump_target);
break;
case OP_CALL:
printf("CALL %s(%d args)", chunk->names[instr.operand.call.name_index],
instr.operand.call.arg_count);
break;
case OP_RETURN:
printf("RETURN");
break;
case OP_NOP:
printf("NOP");
break;
case OP_HALT:
printf("HALT");
break;
default:
printf("UNKNOWN op=%d", instr.op);
break;
} }
printf("=== End ===\n"); printf("\n");
}
printf("=== End ===\n");
} }
#endif #endif

View File

@@ -13,6 +13,7 @@ typedef enum {
OP_JUMP, // salto incondicional OP_JUMP, // salto incondicional
OP_JUMP_IF_ZERO, // pop -> si false, saltar OP_JUMP_IF_ZERO, // pop -> si false, saltar
OP_CALL, // llamar built-in por indice de nombre OP_CALL, // llamar built-in por indice de nombre
OP_RETURN, // retornar de funcion (pop call frame)
OP_NOP, OP_HALT, OP_NOP, OP_HALT,
} OpCode; } OpCode;

View File

@@ -5,6 +5,13 @@
#include "compiler.h" #include "compiler.h"
#include "value.h" #include "value.h"
typedef struct {
int return_ip; // a donde volver
int saved_sp; // base del stack
Value saved_vars[256]; // variables del caller (snapshot)
int saved_var_set[256];
} CallFrame;
typedef struct { typedef struct {
Chunk *chunk; Chunk *chunk;
int ip; // instruction pointer int ip; // instruction pointer
@@ -12,6 +19,10 @@ typedef struct {
int sp; // stack pointer int sp; // stack pointer
Value vars[256]; // variables por indice Value vars[256]; // variables por indice
int var_set[256]; // 0=no definida, 1=definida int var_set[256]; // 0=no definida, 1=definida
CallFrame frames[64];
int frame_count;
JLANG_memory_allocator *allocator; JLANG_memory_allocator *allocator;
} VM; } VM;
@@ -88,8 +99,30 @@ void run_vm(VM *vm) {
case OP_CALL: { case OP_CALL: {
int nameIdx = instr.operand.call.name_index; int nameIdx = instr.operand.call.name_index;
char *name = vm->chunk->names[nameIdx]; char *name = vm->chunk->names[nameIdx];
// check if is an user function
FunctionEntry *fn = NULL;
for (int i = 0; i < vm->chunk->func_count; i++) {
if (strcmp(vm->chunk->functions[i].name, name) == 0) {
fn = &vm->chunk->functions[i];
break;
}
}
if (fn != NULL) {
// Guardar estado actual en call frame
CallFrame *frame = &vm->frames[vm->frame_count++];
frame->return_ip = vm->ip + 1; // volver a la siguiente instruccion
frame->saved_sp = vm->sp - fn->param_count;
memcpy(frame->saved_vars, vm->vars, sizeof(vm->vars));
memcpy(frame->saved_var_set, vm->var_set, sizeof(vm->var_set));
// Saltar al entrypoint
vm->ip = fn->entry_point;
continue; // no hacer ip++
}
if (strcmp(name, "print") == 0 || strcmp(name, "println") == 0) { if (strcmp(name, "print") == 0 || strcmp(name, "println") == 0) {
int nParams = instr.operand.call.arg_count; int nParams = instr.operand.call.arg_count;
@@ -141,6 +174,30 @@ void run_vm(VM *vm) {
break; break;
} }
case OP_RETURN: {
// Captrurar valor de retorno si hay alguno en el stack
Value return_val = {0};
int has_return = 0;
if (vm->sp > vm->frames[vm->frame_count-1].saved_sp) {
return_val = vm->stack[--vm->sp];
has_return = 1;
}
// Restaurar call frame
CallFrame *frame = &vm->frames[--vm->frame_count];
vm->ip = frame->return_ip;
vm->sp = frame->saved_sp;
memcpy(vm->vars, frame->saved_vars, sizeof(vm->vars));
memcpy(vm->var_set, frame->saved_var_set, sizeof(vm->var_set));
// Push return value
if (has_return) {
vm->stack[vm->sp++] = return_val;
}
continue;
}
case OP_ADD: { case OP_ADD: {
// Pop from stack // Pop from stack
Value var2 = vm->stack[--vm->sp]; Value var2 = vm->stack[--vm->sp];
@@ -278,6 +335,7 @@ void run_vm(VM *vm) {
break; break;
} }
default: default:
break; break;
} }

View File

@@ -17,6 +17,8 @@ typedef enum {
TOK_ID, // x, foo, mi_var TOK_ID, // x, foo, mi_var
TOK_IF, // if TOK_IF, // if
TOK_WHILE, // while TOK_WHILE, // while
TOK_FN, // fn
TOK_RETURN, // return
// Operadores // Operadores
TOK_ASSIGN, // = TOK_ASSIGN, // =
@@ -153,6 +155,10 @@ Token *tokenize(const char *source, int *token_count) {
tokens[count++] = make_token(TOK_IF, word); tokens[count++] = make_token(TOK_IF, word);
} else if (strcmp(word, "while") == 0) { } else if (strcmp(word, "while") == 0) {
tokens[count++] = make_token(TOK_WHILE, word); tokens[count++] = make_token(TOK_WHILE, word);
} else if (strcmp(word, "fn") == 0) {
tokens[count++] = make_token(TOK_FN, word);
} else if (strcmp(word, "return") == 0) {
tokens[count++] = make_token(TOK_RETURN, word);
} else { } else {
tokens[count++] = make_token(TOK_ID, word); tokens[count++] = make_token(TOK_ID, word);
} }

View File

@@ -17,6 +17,8 @@ typedef enum {
NODE_WHILE, // while cond: bloque NODE_WHILE, // while cond: bloque
NODE_BLOCK, // secuencia de statements NODE_BLOCK, // secuencia de statements
NODE_CALL, NODE_CALL,
NODE_FN_DEF, // definicion de funcion
NODE_RETURN, // return expr
} NodeType; } NodeType;
typedef struct ASTNode { typedef struct ASTNode {
@@ -53,6 +55,15 @@ typedef struct ASTNode {
struct ASTNode **args; struct ASTNode **args;
int arg_count; int arg_count;
} call; } call;
struct {
char *name;
char **params;
int param_count;
struct ASTNode *body;
} fn_def; // NODE_FN_DEF
struct {
struct ASTNode *value; // expresion de retorno
} ret; // NODE_RETURN
} data; } data;
} ASTNode; } ASTNode;
@@ -68,7 +79,6 @@ ASTNode *parse_expr(Token *tokens);
ASTNode *parse_term(Token *tokens); ASTNode *parse_term(Token *tokens);
ASTNode *parse_factor(Token *tokens); ASTNode *parse_factor(Token *tokens);
ASTNode *parse_factor(Token *tokens) { ASTNode *parse_factor(Token *tokens) {
if (tokens[pos].type == TOK_INT) { if (tokens[pos].type == TOK_INT) {
ASTNode *node = make_node(NODE_INT_LIT); ASTNode *node = make_node(NODE_INT_LIT);
@@ -274,6 +284,57 @@ ASTNode *parse_statement(Token *tokens) {
return node; return node;
} }
if (tokens[pos].type == TOK_FN) {
pos++; // consumir "fn"
char* name = tokens[pos].value;
pos++; // consumir name
pos++; // consumir "("
// Parsear parametros (max 16)
char **params = malloc(sizeof(char *) * 16);
int param_count = 0;
if (tokens[pos].type != TOK_RPAREN) {
params[param_count++] = tokens[pos].value;
pos++;
while (tokens[pos].type == TOK_COMMA) {
pos++; // consumir ","
params[param_count++] = tokens[pos].value;
pos++;
}
}
pos++; // consumir ")"
pos++; // consumir ":"
pos++; // consumir NEWLINE
pos++; // consumir INDENT
// Parsear bloque de statements hasta DEDENT
ASTNode *body = make_node(NODE_BLOCK);
body->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256);
body->data.block.count = 0;
while (tokens[pos].type != TOK_DEDENT) {
body->data.block.stmts[body->data.block.count++] =
parse_statement(tokens);
if (tokens[pos].type == TOK_NEWLINE) {
pos++;
}
}
pos++; // Consumir DEDENT
ASTNode*node = make_node(NODE_FN_DEF);
node->data.fn_def.name = name;
node->data.fn_def.params = params;
node->data.fn_def.param_count = param_count;
node->data.fn_def.body = body;
return node;
}
if (tokens[pos].type == TOK_RETURN) {
pos++;
ASTNode *node = make_node(NODE_RETURN);
node->data.ret.value = parse_expr(tokens);
return node;
}
printf("ERROR: statement inesperado\n"); printf("ERROR: statement inesperado\n");
exit(1); exit(1);
} }
@@ -366,6 +427,22 @@ void ast_print(ASTNode *node, const char *prefix, int is_last) {
} }
break; break;
case NODE_FN_DEF:
printf("NODE_FN_DEF(\"%s\"", node->data.fn_def.name);
for (int i = 0; i < node->data.fn_def.param_count; i++) {
printf(", %s", node->data.fn_def.params[i]);
}
printf(")\n");
ast_print(node->data.fn_def.body, new_prefix, 1);
break;
case NODE_RETURN:
printf("NODE_RETURN\n");
if (node->data.ret.value) {
ast_print(node->data.ret.value, new_prefix, 1);
}
break;
default: default:
printf("UNKNOWN\n"); printf("UNKNOWN\n");
break; break;