Add user-defined functions with call frames

Implement fn/return across the full pipeline:
- Lexer: TOK_FN, TOK_RETURN keywords
- Parser: NODE_FN_DEF, NODE_RETURN AST nodes
- Compiler: FunctionEntry table, inline compilation with jump-over and backpatching
- VM: CallFrame stack with variable snapshot for scoped calls and OP_RETURN
This commit is contained in:
Jose Luis Montañes Ojados
2026-02-18 03:16:54 +01:00
parent da9bb6ca62
commit f2e90efc16
6 changed files with 421 additions and 191 deletions

4
projects/custom_fn.j Normal file
View File

@@ -0,0 +1,4 @@
fn greet(name):
println("Hola, " + name)
greet("mundo!")

View File

@@ -1,9 +1,17 @@
#ifndef JLANG_COMPILER_H
#define JLANG_COMPILER_H
#include "../../frontend/parser.h"
#include "opcodes.h"
#include <string.h>
#include "../../frontend/parser.h"
typedef struct {
char *name;
int entry_point; // indice de la primera instruccion
int param_count;
char **param_names; // nombres de parametros
} FunctionEntry;
typedef struct {
Instruction code[4096]; // bytecodes
@@ -12,15 +20,17 @@ typedef struct {
int const_count;
char *names[256]; // tabla de nombres (variables + funciones)
int name_count;
FunctionEntry functions[64];
int func_count;
} Chunk;
int emit(Chunk* chunk, Instruction instr) {
int emit(Chunk *chunk, Instruction instr) {
chunk->code[chunk->code_count++] = instr;
return chunk->code_count -1;
return chunk->code_count - 1;
}
int add_constant(Chunk* chunk, char* str) {
for (int i=0;i<chunk->const_count; i++){
int add_constant(Chunk *chunk, char *str) {
for (int i = 0; i < chunk->const_count; i++) {
if (strcmp(chunk->constants[i], str) == 0) {
return i;
}
@@ -36,8 +46,8 @@ Instruction make_instruction(OpCode op) {
return instr;
}
int add_name(Chunk* chunk, char* name) {
for (int i=0;i<chunk->name_count; i++){
int add_name(Chunk *chunk, char *name) {
for (int i = 0; i < chunk->name_count; i++) {
if (strcmp(chunk->names[i], name) == 0) {
return i;
}
@@ -47,7 +57,7 @@ int add_name(Chunk* chunk, char* name) {
return chunk->name_count - 1;
}
int compile_node(Chunk *chunk, ASTNode* node) {
int compile_node(Chunk *chunk, ASTNode *node) {
switch (node->type) {
case NODE_INT_LIT: {
Instruction instr = make_instruction(OP_CONST_INT);
@@ -72,7 +82,7 @@ int compile_node(Chunk *chunk, ASTNode* node) {
}
case NODE_CALL: {
// Compilar cada argumento y pushear al stack
for (int i=0; i<node->data.call.arg_count; i++){
for (int i = 0; i < node->data.call.arg_count; i++) {
compile_node(chunk, node->data.call.args[i]);
}
@@ -88,7 +98,7 @@ int compile_node(Chunk *chunk, ASTNode* node) {
// NOP for gc
emit(chunk, make_instruction(OP_NOP));
for (int i=0; i<n; i++){
for (int i = 0; i < n; i++) {
compile_node(chunk, node->data.block.stmts[i]);
}
@@ -160,6 +170,37 @@ int compile_node(Chunk *chunk, ASTNode* node) {
break;
}
case NODE_FN_DEF: {
// emitir jmp para ignorar la funcion por defecto
Instruction jump = make_instruction(OP_JUMP);
jump.operand.jump_target = -1; // backpatch despues
int jump_idx = emit(chunk, jump);
// registrar entrypoint de la funcion
int entry = chunk->code_count;
FunctionEntry *fn = &chunk->functions[chunk->func_count++];
fn->name = node->data.fn_def.name;
fn->entry_point = entry;
fn->param_count = node->data.fn_def.param_count;
fn->param_names = node->data.fn_def.params;
// emitir store_var para cada parametro (orden inverso al stack)
for (int i = node->data.fn_def.param_count - 1; i >= 0; i--) {
Instruction store = make_instruction(OP_STORE_VAR);
store.operand.var_index = add_name(chunk, node->data.fn_def.params[i]);
emit(chunk, store);
}
// compilar el cuerpo
compile_node(chunk, node->data.fn_def.body);
// emitir el return implicito (por si no hay return explicito)
emit(chunk, make_instruction(OP_RETURN));
// backpatch jump
chunk->code[jump_idx].operand.jump_target = chunk->code_count;
break;
}
default:
break;
}
@@ -167,9 +208,9 @@ int compile_node(Chunk *chunk, ASTNode* node) {
return 0;
}
Chunk* compile(ASTNode* root) {
Chunk *compile(ASTNode *root) {
// Create chunk
Chunk* chunk = (Chunk*) malloc(sizeof(Chunk));
Chunk *chunk = (Chunk *)malloc(sizeof(Chunk));
// Set arrays to 0
memset(chunk, 0, sizeof(Chunk));
@@ -183,7 +224,7 @@ Chunk* compile(ASTNode* root) {
return chunk;
}
void print_chunk(Chunk* chunk) {
void print_chunk(Chunk *chunk) {
printf("=== Names (%d) ===\n", chunk->name_count);
for (int i = 0; i < chunk->name_count; i++) {
printf(" [%d] %s\n", i, chunk->names[i]);
@@ -199,24 +240,67 @@ void print_chunk(Chunk* chunk) {
Instruction instr = chunk->code[i];
printf("%04d ", i);
switch (instr.op) {
case OP_CONST_INT: printf("CONST_INT %d", instr.operand.int_val); break;
case OP_CONST_STRING: printf("CONST_STRING [%d] \"%s\"", instr.operand.str_index, chunk->constants[instr.operand.str_index]); break;
case OP_POP: printf("POP"); break;
case OP_ADD: printf("ADD"); break;
case OP_SUB: printf("SUB"); break;
case OP_MUL: printf("MUL"); break;
case OP_DIV: printf("DIV"); break;
case OP_NEG: printf("NEG"); break;
case OP_CMP_LT: printf("CMP_LT"); break;
case OP_CMP_GT: printf("CMP_GT"); break;
case OP_LOAD_VAR: printf("LOAD_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break;
case OP_STORE_VAR: printf("STORE_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break;
case OP_JUMP: printf("JUMP -> %04d", instr.operand.jump_target); break;
case OP_JUMP_IF_ZERO: printf("JUMP_IF_ZERO -> %04d", instr.operand.jump_target); break;
case OP_CALL: printf("CALL %s(%d args)", chunk->names[instr.operand.call.name_index], instr.operand.call.arg_count); break;
case OP_NOP: printf("NOP"); break;
case OP_HALT: printf("HALT"); break;
default: printf("UNKNOWN op=%d", instr.op); break;
case OP_CONST_INT:
printf("CONST_INT %d", instr.operand.int_val);
break;
case OP_CONST_STRING:
printf("CONST_STRING [%d] \"%s\"", instr.operand.str_index,
chunk->constants[instr.operand.str_index]);
break;
case OP_POP:
printf("POP");
break;
case OP_ADD:
printf("ADD");
break;
case OP_SUB:
printf("SUB");
break;
case OP_MUL:
printf("MUL");
break;
case OP_DIV:
printf("DIV");
break;
case OP_NEG:
printf("NEG");
break;
case OP_CMP_LT:
printf("CMP_LT");
break;
case OP_CMP_GT:
printf("CMP_GT");
break;
case OP_LOAD_VAR:
printf("LOAD_VAR [%d] %s", instr.operand.var_index,
chunk->names[instr.operand.var_index]);
break;
case OP_STORE_VAR:
printf("STORE_VAR [%d] %s", instr.operand.var_index,
chunk->names[instr.operand.var_index]);
break;
case OP_JUMP:
printf("JUMP -> %04d", instr.operand.jump_target);
break;
case OP_JUMP_IF_ZERO:
printf("JUMP_IF_ZERO -> %04d", instr.operand.jump_target);
break;
case OP_CALL:
printf("CALL %s(%d args)", chunk->names[instr.operand.call.name_index],
instr.operand.call.arg_count);
break;
case OP_RETURN:
printf("RETURN");
break;
case OP_NOP:
printf("NOP");
break;
case OP_HALT:
printf("HALT");
break;
default:
printf("UNKNOWN op=%d", instr.op);
break;
}
printf("\n");
}

View File

@@ -13,6 +13,7 @@ typedef enum {
OP_JUMP, // salto incondicional
OP_JUMP_IF_ZERO, // pop -> si false, saltar
OP_CALL, // llamar built-in por indice de nombre
OP_RETURN, // retornar de funcion (pop call frame)
OP_NOP, OP_HALT,
} OpCode;

View File

@@ -5,6 +5,13 @@
#include "compiler.h"
#include "value.h"
typedef struct {
int return_ip; // a donde volver
int saved_sp; // base del stack
Value saved_vars[256]; // variables del caller (snapshot)
int saved_var_set[256];
} CallFrame;
typedef struct {
Chunk *chunk;
int ip; // instruction pointer
@@ -12,6 +19,10 @@ typedef struct {
int sp; // stack pointer
Value vars[256]; // variables por indice
int var_set[256]; // 0=no definida, 1=definida
CallFrame frames[64];
int frame_count;
JLANG_memory_allocator *allocator;
} VM;
@@ -88,8 +99,30 @@ void run_vm(VM *vm) {
case OP_CALL: {
int nameIdx = instr.operand.call.name_index;
char *name = vm->chunk->names[nameIdx];
// check if is an user function
FunctionEntry *fn = NULL;
for (int i = 0; i < vm->chunk->func_count; i++) {
if (strcmp(vm->chunk->functions[i].name, name) == 0) {
fn = &vm->chunk->functions[i];
break;
}
}
if (fn != NULL) {
// Guardar estado actual en call frame
CallFrame *frame = &vm->frames[vm->frame_count++];
frame->return_ip = vm->ip + 1; // volver a la siguiente instruccion
frame->saved_sp = vm->sp - fn->param_count;
memcpy(frame->saved_vars, vm->vars, sizeof(vm->vars));
memcpy(frame->saved_var_set, vm->var_set, sizeof(vm->var_set));
// Saltar al entrypoint
vm->ip = fn->entry_point;
continue; // no hacer ip++
}
if (strcmp(name, "print") == 0 || strcmp(name, "println") == 0) {
int nParams = instr.operand.call.arg_count;
@@ -141,6 +174,30 @@ void run_vm(VM *vm) {
break;
}
case OP_RETURN: {
// Captrurar valor de retorno si hay alguno en el stack
Value return_val = {0};
int has_return = 0;
if (vm->sp > vm->frames[vm->frame_count-1].saved_sp) {
return_val = vm->stack[--vm->sp];
has_return = 1;
}
// Restaurar call frame
CallFrame *frame = &vm->frames[--vm->frame_count];
vm->ip = frame->return_ip;
vm->sp = frame->saved_sp;
memcpy(vm->vars, frame->saved_vars, sizeof(vm->vars));
memcpy(vm->var_set, frame->saved_var_set, sizeof(vm->var_set));
// Push return value
if (has_return) {
vm->stack[vm->sp++] = return_val;
}
continue;
}
case OP_ADD: {
// Pop from stack
Value var2 = vm->stack[--vm->sp];
@@ -278,6 +335,7 @@ void run_vm(VM *vm) {
break;
}
default:
break;
}

View File

@@ -17,6 +17,8 @@ typedef enum {
TOK_ID, // x, foo, mi_var
TOK_IF, // if
TOK_WHILE, // while
TOK_FN, // fn
TOK_RETURN, // return
// Operadores
TOK_ASSIGN, // =
@@ -153,6 +155,10 @@ Token *tokenize(const char *source, int *token_count) {
tokens[count++] = make_token(TOK_IF, word);
} else if (strcmp(word, "while") == 0) {
tokens[count++] = make_token(TOK_WHILE, word);
} else if (strcmp(word, "fn") == 0) {
tokens[count++] = make_token(TOK_FN, word);
} else if (strcmp(word, "return") == 0) {
tokens[count++] = make_token(TOK_RETURN, word);
} else {
tokens[count++] = make_token(TOK_ID, word);
}

View File

@@ -17,6 +17,8 @@ typedef enum {
NODE_WHILE, // while cond: bloque
NODE_BLOCK, // secuencia de statements
NODE_CALL,
NODE_FN_DEF, // definicion de funcion
NODE_RETURN, // return expr
} NodeType;
typedef struct ASTNode {
@@ -53,6 +55,15 @@ typedef struct ASTNode {
struct ASTNode **args;
int arg_count;
} call;
struct {
char *name;
char **params;
int param_count;
struct ASTNode *body;
} fn_def; // NODE_FN_DEF
struct {
struct ASTNode *value; // expresion de retorno
} ret; // NODE_RETURN
} data;
} ASTNode;
@@ -68,7 +79,6 @@ ASTNode *parse_expr(Token *tokens);
ASTNode *parse_term(Token *tokens);
ASTNode *parse_factor(Token *tokens);
ASTNode *parse_factor(Token *tokens) {
if (tokens[pos].type == TOK_INT) {
ASTNode *node = make_node(NODE_INT_LIT);
@@ -274,6 +284,57 @@ ASTNode *parse_statement(Token *tokens) {
return node;
}
if (tokens[pos].type == TOK_FN) {
pos++; // consumir "fn"
char* name = tokens[pos].value;
pos++; // consumir name
pos++; // consumir "("
// Parsear parametros (max 16)
char **params = malloc(sizeof(char *) * 16);
int param_count = 0;
if (tokens[pos].type != TOK_RPAREN) {
params[param_count++] = tokens[pos].value;
pos++;
while (tokens[pos].type == TOK_COMMA) {
pos++; // consumir ","
params[param_count++] = tokens[pos].value;
pos++;
}
}
pos++; // consumir ")"
pos++; // consumir ":"
pos++; // consumir NEWLINE
pos++; // consumir INDENT
// Parsear bloque de statements hasta DEDENT
ASTNode *body = make_node(NODE_BLOCK);
body->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256);
body->data.block.count = 0;
while (tokens[pos].type != TOK_DEDENT) {
body->data.block.stmts[body->data.block.count++] =
parse_statement(tokens);
if (tokens[pos].type == TOK_NEWLINE) {
pos++;
}
}
pos++; // Consumir DEDENT
ASTNode*node = make_node(NODE_FN_DEF);
node->data.fn_def.name = name;
node->data.fn_def.params = params;
node->data.fn_def.param_count = param_count;
node->data.fn_def.body = body;
return node;
}
if (tokens[pos].type == TOK_RETURN) {
pos++;
ASTNode *node = make_node(NODE_RETURN);
node->data.ret.value = parse_expr(tokens);
return node;
}
printf("ERROR: statement inesperado\n");
exit(1);
}
@@ -366,6 +427,22 @@ void ast_print(ASTNode *node, const char *prefix, int is_last) {
}
break;
case NODE_FN_DEF:
printf("NODE_FN_DEF(\"%s\"", node->data.fn_def.name);
for (int i = 0; i < node->data.fn_def.param_count; i++) {
printf(", %s", node->data.fn_def.params[i]);
}
printf(")\n");
ast_print(node->data.fn_def.body, new_prefix, 1);
break;
case NODE_RETURN:
printf("NODE_RETURN\n");
if (node->data.ret.value) {
ast_print(node->data.ret.value, new_prefix, 1);
}
break;
default:
printf("UNKNOWN\n");
break;