Add bytecode VM backend (compile AST to bytecodes + stack-based VM)

New execution mode: ./run vm <file.j> compiles AST to bytecodes and
runs them in a while/switch loop. Ints/floats live on the stack (no
heap allocation), ~7.7x faster than the tree-walking interpreter.

Implements: opcodes, compiler with backpatching (if/while), stack VM
with arithmetic, comparisons, variables, strings, and print/println.
Reorganizes backend into src/backend/eval/ and src/backend/bytecode/.
This commit is contained in:
Jose Luis Montañes Ojados
2026-02-18 01:01:22 +01:00
parent 2c91cbb561
commit 4442886afa
11 changed files with 776 additions and 58 deletions

View File

@@ -0,0 +1,222 @@
#ifndef JLANG_COMPILER_H
#define JLANG_COMPILER_H
#include "opcodes.h"
#include <string.h>
#include "../../frontend/parser.h"
typedef struct {
Instruction code[4096]; // bytecodes
int code_count;
char *constants[256]; // pool de strings literales
int const_count;
char *names[256]; // tabla de nombres (variables + funciones)
int name_count;
} Chunk;
int emit(Chunk* chunk, Instruction instr) {
chunk->code[chunk->code_count++] = instr;
return chunk->code_count -1;
}
int add_constant(Chunk* chunk, char* str) {
for (int i=0;i<chunk->const_count; i++){
if (strcmp(chunk->constants[i], str) == 0) {
return i;
}
}
chunk->constants[chunk->const_count++] = str;
return chunk->const_count - 1;
}
Instruction make_instruction(OpCode op) {
Instruction instr;
instr.op = op;
return instr;
}
int add_name(Chunk* chunk, char* name) {
for (int i=0;i<chunk->name_count; i++){
if (strcmp(chunk->names[i], name) == 0) {
return i;
}
}
chunk->names[chunk->name_count++] = name;
return chunk->name_count - 1;
}
int compile_node(Chunk *chunk, ASTNode* node) {
switch (node->type) {
case NODE_INT_LIT: {
Instruction instr = make_instruction(OP_CONST_INT);
instr.operand.int_val = node->data.int_val;
return emit(chunk, instr);
}
case NODE_STRING_LIT: {
Instruction instr = make_instruction(OP_CONST_STRING);
instr.operand.str_index = add_constant(chunk, node->data.string_val);
return emit(chunk, instr);
}
case NODE_VAR: {
Instruction instr = make_instruction(OP_LOAD_VAR);
instr.operand.var_index = add_name(chunk, node->data.string_val);
return emit(chunk, instr);
}
case NODE_ASSIGN: {
compile_node(chunk, node->data.assign.value);
Instruction instr = make_instruction(OP_STORE_VAR);
instr.operand.var_index = add_name(chunk, node->data.assign.name);
return emit(chunk, instr);
}
case NODE_CALL: {
// Compilar cada argumento y pushear al stack
for (int i=0; i<node->data.call.arg_count; i++){
compile_node(chunk, node->data.call.args[i]);
}
// Registrar el nombre de la funcion
Instruction instr = make_instruction(OP_CALL);
instr.operand.call.arg_count = node->data.call.arg_count;
instr.operand.call.name_index = add_name(chunk, node->data.call.name);
return emit(chunk, instr);
}
case NODE_BLOCK: {
int n = node->data.block.count;
for (int i=0; i<n; i++){
compile_node(chunk, node->data.block.stmts[i]);
}
return 0;
}
case NODE_BINOP: {
int leftOffset = compile_node(chunk, node->data.binop.left);
int rightOffset = compile_node(chunk, node->data.binop.right);
OpCode opCode;
switch (node->data.binop.op) {
case '+':
opCode = OP_ADD;
break;
case '-':
opCode = OP_SUB;
break;
case '*':
opCode = OP_MUL;
break;
case '/':
opCode = OP_DIV;
break;
case '>':
opCode = OP_CMP_GT;
break;
case '<':
opCode = OP_CMP_LT;
break;
default:
break;
}
emit(chunk, make_instruction(opCode));
return 0;
}
case NODE_WHILE: {
int loop_start = chunk->code_count;
compile_node(chunk, node->data.while_loop.cond);
// jump if zero, zero = false
Instruction instr = make_instruction(OP_JUMP_IF_ZERO);
instr.operand.jump_target = -1;
int jump_offset = emit(chunk, instr);
// compile body
compile_node(chunk, node->data.while_loop.body);
instr = make_instruction(OP_JUMP);
instr.operand.jump_target = loop_start;
emit(chunk, instr);
// Bachpatching
chunk->code[jump_offset].operand.jump_target = chunk->code_count;
break;
}
case NODE_IF: {
// compile condition
compile_node(chunk, node->data.if_statement.cond);
// add jump if zero
Instruction instr = make_instruction(OP_JUMP_IF_ZERO);
instr.operand.jump_target = -1;
int jump_offset = emit(chunk, instr);
// compile body
compile_node(chunk, node->data.if_statement.body);
chunk->code[jump_offset].operand.jump_target = chunk->code_count;
break;
}
default:
break;
}
return 0;
}
Chunk* compile(ASTNode* root) {
// Create chunk
Chunk* chunk = (Chunk*) malloc(sizeof(Chunk));
// Set arrays to 0
memset(chunk, 0, sizeof(Chunk));
compile_node(chunk, root);
Instruction instr;
instr.op = OP_HALT;
emit(chunk, instr);
return chunk;
}
void print_chunk(Chunk* chunk) {
printf("=== Names (%d) ===\n", chunk->name_count);
for (int i = 0; i < chunk->name_count; i++) {
printf(" [%d] %s\n", i, chunk->names[i]);
}
printf("=== Constants (%d) ===\n", chunk->const_count);
for (int i = 0; i < chunk->const_count; i++) {
printf(" [%d] \"%s\"\n", i, chunk->constants[i]);
}
printf("=== Bytecode (%d instructions) ===\n", chunk->code_count);
for (int i = 0; i < chunk->code_count; i++) {
Instruction instr = chunk->code[i];
printf("%04d ", i);
switch (instr.op) {
case OP_CONST_INT: printf("CONST_INT %d", instr.operand.int_val); break;
case OP_CONST_STRING: printf("CONST_STRING [%d] \"%s\"", instr.operand.str_index, chunk->constants[instr.operand.str_index]); break;
case OP_POP: printf("POP"); break;
case OP_ADD: printf("ADD"); break;
case OP_SUB: printf("SUB"); break;
case OP_MUL: printf("MUL"); break;
case OP_DIV: printf("DIV"); break;
case OP_NEG: printf("NEG"); break;
case OP_CMP_LT: printf("CMP_LT"); break;
case OP_CMP_GT: printf("CMP_GT"); break;
case OP_LOAD_VAR: printf("LOAD_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break;
case OP_STORE_VAR: printf("STORE_VAR [%d] %s", instr.operand.var_index, chunk->names[instr.operand.var_index]); break;
case OP_JUMP: printf("JUMP -> %04d", instr.operand.jump_target); break;
case OP_JUMP_IF_ZERO: printf("JUMP_IF_ZERO -> %04d", instr.operand.jump_target); break;
case OP_CALL: printf("CALL %s(%d args)", chunk->names[instr.operand.call.name_index], instr.operand.call.arg_count); break;
case OP_NOP: printf("NOP"); break;
case OP_HALT: printf("HALT"); break;
default: printf("UNKNOWN op=%d", instr.op); break;
}
printf("\n");
}
printf("=== End ===\n");
}
#endif

View File

@@ -0,0 +1,36 @@
#ifndef JLANG_OPCODES_H
#define JLANG_OPCODES_H
typedef enum {
OP_CONST_INT, // push entero inmediato
OP_CONST_STRING, // push string desde pool de constantes (alloc en heap)
OP_POP, // descarta top del stack
OP_ADD, OP_SUB, OP_MUL, OP_DIV, // aritmetica
OP_NEG, // negacion unaria
OP_CMP_LT, OP_CMP_GT, // comparacion -> push 0 o 1
OP_LOAD_VAR, // push variable por indice
OP_STORE_VAR, // pop -> guardar en variable por indice
OP_JUMP, // salto incondicional
OP_JUMP_IF_ZERO, // pop -> si false, saltar
OP_CALL, // llamar built-in por indice de nombre
OP_NOP, OP_HALT,
} OpCode;
typedef struct
{
OpCode op;
union
{
int int_val; // OP_CONST_INT
int str_index; // OP_CONST_STRING: indice a pool de constantes
int var_index; // OP_LOAD_VAR, OP_STORE_VAR
int jump_target; // OP_JUMP, OP_JUMP_IF_ZERO
struct
{
int name_index;
int arg_count;
} call; // OP_CALL
} operand;
} Instruction;
#endif

View File

@@ -0,0 +1,26 @@
#ifndef JLANG_VALUE_H
#define JLANG_VALUE_H
#include "opcodes.h"
#include <stdlib.h>
typedef enum
{
VAL_INT,
VAL_FLOAT,
VAL_OBJ,
VAL_NONE,
} ValueType;
typedef struct
{
ValueType type;
union
{
int int_val;
double float_val;
size_t heap_offset; // para strings, listas
} as;
} Value;
#endif

220
src/backend/bytecode/vm.h Normal file
View File

@@ -0,0 +1,220 @@
#ifndef JLANG_VM_H
#define JLANG_VM_H
#include "../../memory/gc.h"
#include "compiler.h"
#include "value.h"
typedef struct {
Chunk *chunk;
int ip; // instruction pointer
Value stack[1024];
int sp; // stack pointer
Value vars[256]; // variables por indice
int var_set[256]; // 0=no definida, 1=definida
JLANG_memory_allocator *allocator;
} VM;
void run_vm(VM *vm) {
while (1) {
Instruction instr = vm->chunk->code[vm->ip];
switch (instr.op) {
case OP_HALT:
// Stop vm
return;
case OP_JUMP: {
// Go to instruction
vm->ip = instr.operand.jump_target;
continue;
}
case OP_JUMP_IF_ZERO: {
// pop from stack
Value var1 = vm->stack[--vm->sp];
if (var1.as.int_val == 0) {
vm->ip = instr.operand.jump_target;
continue;
}
break;
}
case OP_CONST_INT: {
// push value to stack
Value v = {0};
v.type = VAL_INT;
v.as.int_val = instr.operand.int_val;
vm->stack[vm->sp++] = v;
break;
}
case OP_CONST_STRING: {
// Create obj
size_t strOffsetHeap = obj_new_string(
vm->allocator, vm->chunk->constants[instr.operand.str_index]);
// Push to stack
Value v = {0};
v.type = VAL_OBJ;
v.as.heap_offset = strOffsetHeap;
vm->stack[vm->sp++] = v;
break;
}
case OP_STORE_VAR: {
// pop del stack
Value v = vm->stack[--vm->sp];
int idx = instr.operand.var_index;
// store vm->vars and mark vm->var_set
vm->vars[idx] = v;
vm->var_set[idx] = 1;
break;
}
case OP_LOAD_VAR: {
// get from vm->var
int idx = instr.operand.var_index;
Value v = vm->vars[idx];
// push to stack
vm->stack[vm->sp++] = v;
break;
}
case OP_CALL: {
int nameIdx = instr.operand.call.name_index;
char *name = vm->chunk->names[nameIdx];
if (strcmp(name, "print") == 0) {
Value v = vm->stack[--vm->sp];
switch (v.type) {
case VAL_INT:
printf("%d", v.as.int_val);
break;
case VAL_OBJ: {
// Get object from heap
obj_print(vm->allocator, v.as.heap_offset, "", "");
}
default:
break;
}
} else if (strcmp(name, "println") == 0) {
Value v = vm->stack[--vm->sp];
switch (v.type) {
case VAL_INT:
printf("%d\n", v.as.int_val);
break;
case VAL_OBJ: {
// Get object from heap
obj_print(vm->allocator, v.as.heap_offset, "", "\n");
}
default:
break;
}
} else if (strcmp(name, "debug_heap") == 0) {
JLANG_visualize(vm->allocator);
break;
} else {
printf("error: function '%s' not found!\n", name);
return;
}
break;
}
case OP_ADD: {
// Pop from stack
Value var2 = vm->stack[--vm->sp];
Value var1 = vm->stack[--vm->sp];
Value result = {0};
result.type = VAL_INT;
result.as.int_val = var1.as.int_val + var2.as.int_val;
// Push to stack
vm->stack[vm->sp++] = result;
break;
}
case OP_SUB: {
// Pop from stack
Value var2 = vm->stack[--vm->sp];
Value var1 = vm->stack[--vm->sp];
Value result = {0};
result.type = VAL_INT;
result.as.int_val = var1.as.int_val - var2.as.int_val;
// Push to stack
vm->stack[vm->sp++] = result;
break;
}
case OP_MUL: {
// Pop from stack
Value var2 = vm->stack[--vm->sp];
Value var1 = vm->stack[--vm->sp];
Value result = {0};
result.type = VAL_INT;
result.as.int_val = var1.as.int_val * var2.as.int_val;
// Push to stack
vm->stack[vm->sp++] = result;
break;
}
case OP_DIV: {
// Pop from stack
Value var2 = vm->stack[--vm->sp];
Value var1 = vm->stack[--vm->sp];
Value result = {0};
result.type = VAL_INT;
result.as.int_val = var1.as.int_val / var2.as.int_val;
// Push to stack
vm->stack[vm->sp++] = result;
break;
}
case OP_CMP_GT: {
// Pop from stack
Value var2 = vm->stack[--vm->sp];
Value var1 = vm->stack[--vm->sp];
Value result = {0};
result.type = VAL_INT;
result.as.int_val = var1.as.int_val > var2.as.int_val;
// Push to stack
vm->stack[vm->sp++] = result;
break;
}
case OP_CMP_LT: {
// Pop from stack
Value var2 = vm->stack[--vm->sp];
Value var1 = vm->stack[--vm->sp];
Value result = {0};
result.type = VAL_INT;
result.as.int_val = var1.as.int_val < var2.as.int_val;
// Push to stack
vm->stack[vm->sp++] = result;
break;
}
default:
break;
}
// go to next instruction
vm->ip++;
}
}
#endif

View File

@@ -1,8 +1,8 @@
#ifndef JLANG_EVAL_H
#define JLANG_EVAL_H
#include "../frontend/parser.h"
#include "../memory/gc.h"
#include "../../frontend/parser.h"
#include "../../memory/gc.h"
typedef struct {
char *name;
@@ -139,7 +139,7 @@ size_t eval(ASTNode *node, Environment *env, void *allocator, int debug,
if (strcmp(node->data.call.name, "print") == 0) {
if (node->data.call.arg_count > 0) {
size_t val = eval(node->data.call.args[0], env, allocator, debug, gc);
obj_print(allocator, val, "");
obj_print(allocator, val, "", "");
return val;
}
@@ -149,7 +149,7 @@ size_t eval(ASTNode *node, Environment *env, void *allocator, int debug,
if (strcmp(node->data.call.name, "println") == 0) {
if (node->data.call.arg_count > 0) {
size_t val = eval(node->data.call.args[0], env, allocator, debug, gc);
obj_print(allocator, val, "");
obj_print(allocator, val, "", "");
printf("\n");
return val;
}

View File

@@ -335,6 +335,9 @@ void ast_print(ASTNode *node, const char *prefix, int is_last) {
ast_print(node->data.while_loop.cond, new_prefix, 0);
ast_print(node->data.while_loop.body, new_prefix, 1);
break;
case NODE_NOP:
printf("NODE_NOOP\n");
break;
case NODE_CALL:
printf("NODE_CALL(\"%s\")\n", node->data.call.name);

View File

@@ -1,15 +1,17 @@
#include "vm/eval.h"
#include "backend/eval/eval.h"
#include "backend/bytecode/compiler.h"
#include "backend/bytecode/vm.h"
int main(int argc, char **argv) {
if (argc != 2) {
printf("usage: %s <path to .j file>\n", argv[0]);
if (argc != 3) {
printf("usage: %s eval|vm|asm <path to .j file>\n", argv[0]);
exit(1);
}
// Creamos un allocator
JLANG_memory_allocator *allocPtr = JLANG_CreateAllocator();
// Read file from argv
FILE *fptr = fopen(argv[1], "r");
FILE *fptr = fopen(argv[2], "r");
if (fptr == NULL) {
printf("error leyendo: %s\n", argv[1]);
exit(1);
@@ -32,12 +34,27 @@ int main(int argc, char **argv) {
printf("totalTokens=%d\n", totalTokens);
ASTNode *block = parse(tokens, totalTokens);
ast_debug(block);
Environment env = {0};
eval(block, &env, allocPtr, 0, 1);
if (strcmp(argv[1], "eval") == 0) {
Environment env = {0};
eval(block, &env, allocPtr, 0, 1);
// printf("\nheapSize=%zu\n", allocPtr->size);
// JLANG_visualize(allocPtr);
} else if (strcmp(argv[1], "vm") == 0){
Chunk* chunk = compile(block);
VM vm = {0};
vm.chunk = chunk;
vm.allocator = allocPtr;
print_chunk(chunk);
run_vm(&vm);
// printf("\n");
// JLANG_visualize(allocPtr);
} else {
printf("panic: WIP\n");
}
printf("heapSize=%zu\n", allocPtr->size);
JLANG_visualize(allocPtr);
return 0;
}

View File

@@ -94,7 +94,7 @@ void obj_free(void *allocator, size_t offset) {
JLANG_free(allocator, offset);
}
void obj_print(void *allocator, size_t offset, const char *preffix) {
void obj_print(void *allocator, size_t offset, const char *preffix, const char *suffix) {
Object *obj = (Object *)JLANG_RESOLVE(allocator, offset);
switch (obj->type) {
@@ -117,7 +117,7 @@ void obj_print(void *allocator, size_t offset, const char *preffix) {
if (items[i] == offset) {
printf("<self:0x%zu>", offset);
} else {
obj_print(allocator, items[i], "\"");
obj_print(allocator, items[i], "\"", "\"");
}
if (i < obj->data.list_val.capacity - 1) {
@@ -133,8 +133,8 @@ void obj_print(void *allocator, size_t offset, const char *preffix) {
}
printf("%s", (char *)JLANG_RESOLVE(allocator, obj->data.string_val.chars));
if (strcmp(preffix, "") != 0) {
printf("%s", preffix);
if (strcmp(suffix, "") != 0) {
printf("%s", suffix);
}
break;
default: