From f2e90efc16268bc2191f9ef61ec258de52d6fbd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jose=20Luis=20Monta=C3=B1es=20Ojados?= Date: Wed, 18 Feb 2026 03:16:54 +0100 Subject: [PATCH] Add user-defined functions with call frames Implement fn/return across the full pipeline: - Lexer: TOK_FN, TOK_RETURN keywords - Parser: NODE_FN_DEF, NODE_RETURN AST nodes - Compiler: FunctionEntry table, inline compilation with jump-over and backpatching - VM: CallFrame stack with variable snapshot for scoped calls and OP_RETURN --- projects/custom_fn.j | 4 + src/backend/bytecode/compiler.h | 460 +++++++++++++++++++------------- src/backend/bytecode/opcodes.h | 1 + src/backend/bytecode/vm.h | 62 ++++- src/frontend/lexer.h | 6 + src/frontend/parser.h | 79 +++++- 6 files changed, 421 insertions(+), 191 deletions(-) create mode 100644 projects/custom_fn.j diff --git a/projects/custom_fn.j b/projects/custom_fn.j new file mode 100644 index 0000000..74c80dc --- /dev/null +++ b/projects/custom_fn.j @@ -0,0 +1,4 @@ +fn greet(name): + println("Hola, " + name) + +greet("mundo!") \ No newline at end of file diff --git a/src/backend/bytecode/compiler.h b/src/backend/bytecode/compiler.h index 06e9aa0..e1ee711 100644 --- a/src/backend/bytecode/compiler.h +++ b/src/backend/bytecode/compiler.h @@ -1,226 +1,310 @@ #ifndef JLANG_COMPILER_H #define JLANG_COMPILER_H +#include "../../frontend/parser.h" #include "opcodes.h" #include -#include "../../frontend/parser.h" + typedef struct { - Instruction code[4096]; // bytecodes - int code_count; - char *constants[256]; // pool de strings literales - int const_count; - char *names[256]; // tabla de nombres (variables + funciones) - int name_count; + char *name; + int entry_point; // indice de la primera instruccion + int param_count; + char **param_names; // nombres de parametros +} FunctionEntry; + +typedef struct { + Instruction code[4096]; // bytecodes + int code_count; + char *constants[256]; // pool de strings literales + int const_count; + char *names[256]; // tabla de nombres (variables + funciones) + int name_count; + FunctionEntry functions[64]; + int func_count; } Chunk; -int emit(Chunk* chunk, Instruction instr) { - chunk->code[chunk->code_count++] = instr; - return chunk->code_count -1; +int emit(Chunk *chunk, Instruction instr) { + chunk->code[chunk->code_count++] = instr; + return chunk->code_count - 1; } -int add_constant(Chunk* chunk, char* str) { - for (int i=0;iconst_count; i++){ - if (strcmp(chunk->constants[i], str) == 0) { - return i; - } +int add_constant(Chunk *chunk, char *str) { + for (int i = 0; i < chunk->const_count; i++) { + if (strcmp(chunk->constants[i], str) == 0) { + return i; } + } - chunk->constants[chunk->const_count++] = str; - return chunk->const_count - 1; + chunk->constants[chunk->const_count++] = str; + return chunk->const_count - 1; } Instruction make_instruction(OpCode op) { - Instruction instr; - instr.op = op; - return instr; + Instruction instr; + instr.op = op; + return instr; } -int add_name(Chunk* chunk, char* name) { - for (int i=0;iname_count; i++){ - if (strcmp(chunk->names[i], name) == 0) { - return i; - } +int add_name(Chunk *chunk, char *name) { + for (int i = 0; i < chunk->name_count; i++) { + if (strcmp(chunk->names[i], name) == 0) { + return i; + } + } + + chunk->names[chunk->name_count++] = name; + return chunk->name_count - 1; +} + +int compile_node(Chunk *chunk, ASTNode *node) { + switch (node->type) { + case NODE_INT_LIT: { + Instruction instr = make_instruction(OP_CONST_INT); + instr.operand.int_val = node->data.int_val; + return emit(chunk, instr); + } + case NODE_STRING_LIT: { + Instruction instr = make_instruction(OP_CONST_STRING); + instr.operand.str_index = add_constant(chunk, node->data.string_val); + return emit(chunk, instr); + } + case NODE_VAR: { + Instruction instr = make_instruction(OP_LOAD_VAR); + instr.operand.var_index = add_name(chunk, node->data.string_val); + return emit(chunk, instr); + } + case NODE_ASSIGN: { + compile_node(chunk, node->data.assign.value); + Instruction instr = make_instruction(OP_STORE_VAR); + instr.operand.var_index = add_name(chunk, node->data.assign.name); + return emit(chunk, instr); + } + case NODE_CALL: { + // Compilar cada argumento y pushear al stack + for (int i = 0; i < node->data.call.arg_count; i++) { + compile_node(chunk, node->data.call.args[i]); } - chunk->names[chunk->name_count++] = name; - return chunk->name_count - 1; -} + // Registrar el nombre de la funcion + Instruction instr = make_instruction(OP_CALL); + instr.operand.call.arg_count = node->data.call.arg_count; + instr.operand.call.name_index = add_name(chunk, node->data.call.name); + return emit(chunk, instr); + } + case NODE_BLOCK: { + int n = node->data.block.count; -int compile_node(Chunk *chunk, ASTNode* node) { - switch (node->type) { - case NODE_INT_LIT: { - Instruction instr = make_instruction(OP_CONST_INT); - instr.operand.int_val = node->data.int_val; - return emit(chunk, instr); - } - case NODE_STRING_LIT: { - Instruction instr = make_instruction(OP_CONST_STRING); - instr.operand.str_index = add_constant(chunk, node->data.string_val); - return emit(chunk, instr); - } - case NODE_VAR: { - Instruction instr = make_instruction(OP_LOAD_VAR); - instr.operand.var_index = add_name(chunk, node->data.string_val); - return emit(chunk, instr); - } - case NODE_ASSIGN: { - compile_node(chunk, node->data.assign.value); - Instruction instr = make_instruction(OP_STORE_VAR); - instr.operand.var_index = add_name(chunk, node->data.assign.name); - return emit(chunk, instr); - } - case NODE_CALL: { - // Compilar cada argumento y pushear al stack - for (int i=0; idata.call.arg_count; i++){ - compile_node(chunk, node->data.call.args[i]); - } + // NOP for gc + emit(chunk, make_instruction(OP_NOP)); - // Registrar el nombre de la funcion - Instruction instr = make_instruction(OP_CALL); - instr.operand.call.arg_count = node->data.call.arg_count; - instr.operand.call.name_index = add_name(chunk, node->data.call.name); - return emit(chunk, instr); - } - case NODE_BLOCK: { - int n = node->data.block.count; - - // NOP for gc - emit(chunk, make_instruction(OP_NOP)); - - for (int i=0; idata.block.stmts[i]); - } - - return 0; - } - case NODE_BINOP: { - int leftOffset = compile_node(chunk, node->data.binop.left); - int rightOffset = compile_node(chunk, node->data.binop.right); - - OpCode opCode; - switch (node->data.binop.op) { - case '+': - opCode = OP_ADD; - break; - case '-': - opCode = OP_SUB; - break; - case '*': - opCode = OP_MUL; - break; - case '/': - opCode = OP_DIV; - break; - case '>': - opCode = OP_CMP_GT; - break; - case '<': - opCode = OP_CMP_LT; - break; - default: - break; - } - emit(chunk, make_instruction(opCode)); - return 0; - } - case NODE_WHILE: { - int loop_start = chunk->code_count; - compile_node(chunk, node->data.while_loop.cond); - // jump if zero, zero = false - Instruction instr = make_instruction(OP_JUMP_IF_ZERO); - instr.operand.jump_target = -1; - int jump_offset = emit(chunk, instr); - - // compile body - compile_node(chunk, node->data.while_loop.body); - - instr = make_instruction(OP_JUMP); - instr.operand.jump_target = loop_start; - emit(chunk, instr); - - // Bachpatching - chunk->code[jump_offset].operand.jump_target = chunk->code_count; - break; - } - - case NODE_IF: { - // compile condition - compile_node(chunk, node->data.if_statement.cond); - - // add jump if zero - Instruction instr = make_instruction(OP_JUMP_IF_ZERO); - instr.operand.jump_target = -1; - int jump_offset = emit(chunk, instr); - - // compile body - compile_node(chunk, node->data.if_statement.body); - - chunk->code[jump_offset].operand.jump_target = chunk->code_count; - break; - } - - default: - break; + for (int i = 0; i < n; i++) { + compile_node(chunk, node->data.block.stmts[i]); } return 0; -} + } + case NODE_BINOP: { + int leftOffset = compile_node(chunk, node->data.binop.left); + int rightOffset = compile_node(chunk, node->data.binop.right); -Chunk* compile(ASTNode* root) { - // Create chunk - Chunk* chunk = (Chunk*) malloc(sizeof(Chunk)); + OpCode opCode; + switch (node->data.binop.op) { + case '+': + opCode = OP_ADD; + break; + case '-': + opCode = OP_SUB; + break; + case '*': + opCode = OP_MUL; + break; + case '/': + opCode = OP_DIV; + break; + case '>': + opCode = OP_CMP_GT; + break; + case '<': + opCode = OP_CMP_LT; + break; + default: + break; + } + emit(chunk, make_instruction(opCode)); + return 0; + } + case NODE_WHILE: { + int loop_start = chunk->code_count; + compile_node(chunk, node->data.while_loop.cond); + // jump if zero, zero = false + Instruction instr = make_instruction(OP_JUMP_IF_ZERO); + instr.operand.jump_target = -1; + int jump_offset = emit(chunk, instr); - // Set arrays to 0 - memset(chunk, 0, sizeof(Chunk)); + // compile body + compile_node(chunk, node->data.while_loop.body); - compile_node(chunk, root); - - Instruction instr; - instr.op = OP_HALT; + instr = make_instruction(OP_JUMP); + instr.operand.jump_target = loop_start; emit(chunk, instr); - return chunk; + // Bachpatching + chunk->code[jump_offset].operand.jump_target = chunk->code_count; + break; + } + + case NODE_IF: { + // compile condition + compile_node(chunk, node->data.if_statement.cond); + + // add jump if zero + Instruction instr = make_instruction(OP_JUMP_IF_ZERO); + instr.operand.jump_target = -1; + int jump_offset = emit(chunk, instr); + + // compile body + compile_node(chunk, node->data.if_statement.body); + + chunk->code[jump_offset].operand.jump_target = chunk->code_count; + break; + } + + case NODE_FN_DEF: { + // emitir jmp para ignorar la funcion por defecto + Instruction jump = make_instruction(OP_JUMP); + jump.operand.jump_target = -1; // backpatch despues + int jump_idx = emit(chunk, jump); + + // registrar entrypoint de la funcion + int entry = chunk->code_count; + FunctionEntry *fn = &chunk->functions[chunk->func_count++]; + fn->name = node->data.fn_def.name; + fn->entry_point = entry; + fn->param_count = node->data.fn_def.param_count; + fn->param_names = node->data.fn_def.params; + + // emitir store_var para cada parametro (orden inverso al stack) + for (int i = node->data.fn_def.param_count - 1; i >= 0; i--) { + Instruction store = make_instruction(OP_STORE_VAR); + store.operand.var_index = add_name(chunk, node->data.fn_def.params[i]); + emit(chunk, store); + } + + // compilar el cuerpo + compile_node(chunk, node->data.fn_def.body); + + // emitir el return implicito (por si no hay return explicito) + emit(chunk, make_instruction(OP_RETURN)); + + // backpatch jump + chunk->code[jump_idx].operand.jump_target = chunk->code_count; + break; + } + default: + break; + } + + return 0; } -void print_chunk(Chunk* chunk) { - printf("=== Names (%d) ===\n", chunk->name_count); - for (int i = 0; i < chunk->name_count; i++) { - printf(" [%d] %s\n", i, chunk->names[i]); - } +Chunk *compile(ASTNode *root) { + // Create chunk + Chunk *chunk = (Chunk *)malloc(sizeof(Chunk)); - printf("=== Constants (%d) ===\n", chunk->const_count); - for (int i = 0; i < chunk->const_count; i++) { - printf(" [%d] \"%s\"\n", i, chunk->constants[i]); - } + // Set arrays to 0 + memset(chunk, 0, sizeof(Chunk)); - printf("=== Bytecode (%d instructions) ===\n", chunk->code_count); - for (int i = 0; i < chunk->code_count; i++) { - Instruction instr = chunk->code[i]; - printf("%04d ", i); - switch (instr.op) { - case OP_CONST_INT: printf("CONST_INT %d", instr.operand.int_val); break; - case OP_CONST_STRING: printf("CONST_STRING [%d] \"%s\"", instr.operand.str_index, chunk->constants[instr.operand.str_index]); break; - case OP_POP: printf("POP"); break; - case OP_ADD: printf("ADD"); break; - case OP_SUB: printf("SUB"); break; - case OP_MUL: printf("MUL"); break; - case OP_DIV: printf("DIV"); break; - case OP_NEG: printf("NEG"); break; - case OP_CMP_LT: printf("CMP_LT"); break; - case OP_CMP_GT: printf("CMP_GT"); break; - case OP_LOAD_VAR: printf("LOAD_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break; - case OP_STORE_VAR: printf("STORE_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break; - case OP_JUMP: printf("JUMP -> %04d", instr.operand.jump_target); break; - case OP_JUMP_IF_ZERO: printf("JUMP_IF_ZERO -> %04d", instr.operand.jump_target); break; - case OP_CALL: printf("CALL %s(%d args)", chunk->names[instr.operand.call.name_index], instr.operand.call.arg_count); break; - case OP_NOP: printf("NOP"); break; - case OP_HALT: printf("HALT"); break; - default: printf("UNKNOWN op=%d", instr.op); break; - } - printf("\n"); + compile_node(chunk, root); + + Instruction instr; + instr.op = OP_HALT; + emit(chunk, instr); + + return chunk; +} + +void print_chunk(Chunk *chunk) { + printf("=== Names (%d) ===\n", chunk->name_count); + for (int i = 0; i < chunk->name_count; i++) { + printf(" [%d] %s\n", i, chunk->names[i]); + } + + printf("=== Constants (%d) ===\n", chunk->const_count); + for (int i = 0; i < chunk->const_count; i++) { + printf(" [%d] \"%s\"\n", i, chunk->constants[i]); + } + + printf("=== Bytecode (%d instructions) ===\n", chunk->code_count); + for (int i = 0; i < chunk->code_count; i++) { + Instruction instr = chunk->code[i]; + printf("%04d ", i); + switch (instr.op) { + case OP_CONST_INT: + printf("CONST_INT %d", instr.operand.int_val); + break; + case OP_CONST_STRING: + printf("CONST_STRING [%d] \"%s\"", instr.operand.str_index, + chunk->constants[instr.operand.str_index]); + break; + case OP_POP: + printf("POP"); + break; + case OP_ADD: + printf("ADD"); + break; + case OP_SUB: + printf("SUB"); + break; + case OP_MUL: + printf("MUL"); + break; + case OP_DIV: + printf("DIV"); + break; + case OP_NEG: + printf("NEG"); + break; + case OP_CMP_LT: + printf("CMP_LT"); + break; + case OP_CMP_GT: + printf("CMP_GT"); + break; + case OP_LOAD_VAR: + printf("LOAD_VAR [%d] %s", instr.operand.var_index, + chunk->names[instr.operand.var_index]); + break; + case OP_STORE_VAR: + printf("STORE_VAR [%d] %s", instr.operand.var_index, + chunk->names[instr.operand.var_index]); + break; + case OP_JUMP: + printf("JUMP -> %04d", instr.operand.jump_target); + break; + case OP_JUMP_IF_ZERO: + printf("JUMP_IF_ZERO -> %04d", instr.operand.jump_target); + break; + case OP_CALL: + printf("CALL %s(%d args)", chunk->names[instr.operand.call.name_index], + instr.operand.call.arg_count); + break; + case OP_RETURN: + printf("RETURN"); + break; + case OP_NOP: + printf("NOP"); + break; + case OP_HALT: + printf("HALT"); + break; + default: + printf("UNKNOWN op=%d", instr.op); + break; } - printf("=== End ===\n"); + printf("\n"); + } + printf("=== End ===\n"); } #endif \ No newline at end of file diff --git a/src/backend/bytecode/opcodes.h b/src/backend/bytecode/opcodes.h index 22efe42..b2b62ff 100644 --- a/src/backend/bytecode/opcodes.h +++ b/src/backend/bytecode/opcodes.h @@ -13,6 +13,7 @@ typedef enum { OP_JUMP, // salto incondicional OP_JUMP_IF_ZERO, // pop -> si false, saltar OP_CALL, // llamar built-in por indice de nombre + OP_RETURN, // retornar de funcion (pop call frame) OP_NOP, OP_HALT, } OpCode; diff --git a/src/backend/bytecode/vm.h b/src/backend/bytecode/vm.h index 2a54002..5726236 100644 --- a/src/backend/bytecode/vm.h +++ b/src/backend/bytecode/vm.h @@ -5,6 +5,13 @@ #include "compiler.h" #include "value.h" +typedef struct { + int return_ip; // a donde volver + int saved_sp; // base del stack + Value saved_vars[256]; // variables del caller (snapshot) + int saved_var_set[256]; +} CallFrame; + typedef struct { Chunk *chunk; int ip; // instruction pointer @@ -12,6 +19,10 @@ typedef struct { int sp; // stack pointer Value vars[256]; // variables por indice int var_set[256]; // 0=no definida, 1=definida + + CallFrame frames[64]; + int frame_count; + JLANG_memory_allocator *allocator; } VM; @@ -88,8 +99,30 @@ void run_vm(VM *vm) { case OP_CALL: { int nameIdx = instr.operand.call.name_index; - char *name = vm->chunk->names[nameIdx]; + + // check if is an user function + FunctionEntry *fn = NULL; + for (int i = 0; i < vm->chunk->func_count; i++) { + if (strcmp(vm->chunk->functions[i].name, name) == 0) { + fn = &vm->chunk->functions[i]; + break; + } + } + + if (fn != NULL) { + // Guardar estado actual en call frame + CallFrame *frame = &vm->frames[vm->frame_count++]; + frame->return_ip = vm->ip + 1; // volver a la siguiente instruccion + frame->saved_sp = vm->sp - fn->param_count; + memcpy(frame->saved_vars, vm->vars, sizeof(vm->vars)); + memcpy(frame->saved_var_set, vm->var_set, sizeof(vm->var_set)); + + // Saltar al entrypoint + vm->ip = fn->entry_point; + continue; // no hacer ip++ + } + if (strcmp(name, "print") == 0 || strcmp(name, "println") == 0) { int nParams = instr.operand.call.arg_count; @@ -141,6 +174,30 @@ void run_vm(VM *vm) { break; } + case OP_RETURN: { + // Captrurar valor de retorno si hay alguno en el stack + Value return_val = {0}; + int has_return = 0; + if (vm->sp > vm->frames[vm->frame_count-1].saved_sp) { + return_val = vm->stack[--vm->sp]; + has_return = 1; + } + + // Restaurar call frame + CallFrame *frame = &vm->frames[--vm->frame_count]; + vm->ip = frame->return_ip; + vm->sp = frame->saved_sp; + memcpy(vm->vars, frame->saved_vars, sizeof(vm->vars)); + memcpy(vm->var_set, frame->saved_var_set, sizeof(vm->var_set)); + + // Push return value + if (has_return) { + vm->stack[vm->sp++] = return_val; + } + + continue; + } + case OP_ADD: { // Pop from stack Value var2 = vm->stack[--vm->sp]; @@ -275,9 +332,10 @@ void run_vm(VM *vm) { } gc_collect(vm->allocator, roots, root_count); - + break; } + default: break; } diff --git a/src/frontend/lexer.h b/src/frontend/lexer.h index 9a7598e..d1300ad 100644 --- a/src/frontend/lexer.h +++ b/src/frontend/lexer.h @@ -17,6 +17,8 @@ typedef enum { TOK_ID, // x, foo, mi_var TOK_IF, // if TOK_WHILE, // while + TOK_FN, // fn + TOK_RETURN, // return // Operadores TOK_ASSIGN, // = @@ -153,6 +155,10 @@ Token *tokenize(const char *source, int *token_count) { tokens[count++] = make_token(TOK_IF, word); } else if (strcmp(word, "while") == 0) { tokens[count++] = make_token(TOK_WHILE, word); + } else if (strcmp(word, "fn") == 0) { + tokens[count++] = make_token(TOK_FN, word); + } else if (strcmp(word, "return") == 0) { + tokens[count++] = make_token(TOK_RETURN, word); } else { tokens[count++] = make_token(TOK_ID, word); } diff --git a/src/frontend/parser.h b/src/frontend/parser.h index f9fc03c..cf200aa 100644 --- a/src/frontend/parser.h +++ b/src/frontend/parser.h @@ -17,6 +17,8 @@ typedef enum { NODE_WHILE, // while cond: bloque NODE_BLOCK, // secuencia de statements NODE_CALL, + NODE_FN_DEF, // definicion de funcion + NODE_RETURN, // return expr } NodeType; typedef struct ASTNode { @@ -53,6 +55,15 @@ typedef struct ASTNode { struct ASTNode **args; int arg_count; } call; + struct { + char *name; + char **params; + int param_count; + struct ASTNode *body; + } fn_def; // NODE_FN_DEF + struct { + struct ASTNode *value; // expresion de retorno + } ret; // NODE_RETURN } data; } ASTNode; @@ -68,7 +79,6 @@ ASTNode *parse_expr(Token *tokens); ASTNode *parse_term(Token *tokens); ASTNode *parse_factor(Token *tokens); - ASTNode *parse_factor(Token *tokens) { if (tokens[pos].type == TOK_INT) { ASTNode *node = make_node(NODE_INT_LIT); @@ -274,6 +284,57 @@ ASTNode *parse_statement(Token *tokens) { return node; } + if (tokens[pos].type == TOK_FN) { + pos++; // consumir "fn" + char* name = tokens[pos].value; + pos++; // consumir name + pos++; // consumir "(" + + // Parsear parametros (max 16) + char **params = malloc(sizeof(char *) * 16); + int param_count = 0; + if (tokens[pos].type != TOK_RPAREN) { + params[param_count++] = tokens[pos].value; + pos++; + while (tokens[pos].type == TOK_COMMA) { + pos++; // consumir "," + params[param_count++] = tokens[pos].value; + pos++; + } + } + pos++; // consumir ")" + pos++; // consumir ":" + pos++; // consumir NEWLINE + pos++; // consumir INDENT + + // Parsear bloque de statements hasta DEDENT + ASTNode *body = make_node(NODE_BLOCK); + body->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256); + body->data.block.count = 0; + while (tokens[pos].type != TOK_DEDENT) { + body->data.block.stmts[body->data.block.count++] = + parse_statement(tokens); + if (tokens[pos].type == TOK_NEWLINE) { + pos++; + } + } + pos++; // Consumir DEDENT + + ASTNode*node = make_node(NODE_FN_DEF); + node->data.fn_def.name = name; + node->data.fn_def.params = params; + node->data.fn_def.param_count = param_count; + node->data.fn_def.body = body; + return node; + } + + if (tokens[pos].type == TOK_RETURN) { + pos++; + ASTNode *node = make_node(NODE_RETURN); + node->data.ret.value = parse_expr(tokens); + return node; + } + printf("ERROR: statement inesperado\n"); exit(1); } @@ -366,6 +427,22 @@ void ast_print(ASTNode *node, const char *prefix, int is_last) { } break; + case NODE_FN_DEF: + printf("NODE_FN_DEF(\"%s\"", node->data.fn_def.name); + for (int i = 0; i < node->data.fn_def.param_count; i++) { + printf(", %s", node->data.fn_def.params[i]); + } + printf(")\n"); + ast_print(node->data.fn_def.body, new_prefix, 1); + break; + + case NODE_RETURN: + printf("NODE_RETURN\n"); + if (node->data.ret.value) { + ast_print(node->data.ret.value, new_prefix, 1); + } + break; + default: printf("UNKNOWN\n"); break;