Files
j-lang/src/frontend/parser.h
Jose Luis Montañes Ojados 4442886afa Add bytecode VM backend (compile AST to bytecodes + stack-based VM)
New execution mode: ./run vm <file.j> compiles AST to bytecodes and
runs them in a while/switch loop. Ints/floats live on the stack (no
heap allocation), ~7.7x faster than the tree-walking interpreter.

Implements: opcodes, compiler with backpatching (if/while), stack VM
with arithmetic, comparisons, variables, strings, and print/println.
Reorganizes backend into src/backend/eval/ and src/backend/bytecode/.
2026-02-18 01:01:22 +01:00

359 lines
9.4 KiB
C

#ifndef JLANG_PARSER_H
#define JLANG_PARSER_H
/*
Convierte tokens en un arbol
*/
#include "lexer.h"
typedef enum {
NODE_INT_LIT, // literal entero
NODE_STRING_LIT, // literal string
NODE_VAR, // referencia a variable
NODE_ASSIGN, // asignacion: x = expr
NODE_BINOP, // operacion binaria: a + b
NODE_NOP, // noop
NODE_IF, // if cond: bloque
NODE_WHILE, // while cond: bloque
NODE_BLOCK, // secuencia de statements
NODE_CALL,
} NodeType;
typedef struct ASTNode {
NodeType type;
union {
int int_val; // NODE_INT_LIT
char *string_val; // NODE_STRING_LIT
struct {
char *name;
struct ASTNode *value;
} assign; // NODE_ASSIGN
struct {
char op;
struct ASTNode *left;
struct ASTNode *right;
} binop; // NODE_BINOP
struct {
struct ASTNode *expr;
} print; // NODE_PRINT
struct {
struct ASTNode **stmts;
int count;
} block; // NODE_BLOCK
struct {
struct ASTNode *cond;
struct ASTNode *body;
} while_loop; // NODE_WHILE
struct {
struct ASTNode *cond;
struct ASTNode *body;
} if_statement; // NODE_IF
struct {
char *name;
struct ASTNode **args;
int arg_count;
} call;
} data;
} ASTNode;
ASTNode *make_node(NodeType type) {
ASTNode *node = (ASTNode *)malloc(sizeof(ASTNode));
node->type = type;
return node;
}
int pos = 0;
ASTNode *parse_expr(Token *tokens);
ASTNode *parse_term(Token *tokens) {
if (tokens[pos].type == TOK_INT) {
ASTNode *node = make_node(NODE_INT_LIT);
node->data.int_val = atoi(tokens[pos].value);
pos++;
return node;
} else if (tokens[pos].type == TOK_STRING) {
ASTNode *node = make_node(NODE_STRING_LIT);
node->data.string_val = tokens[pos].value;
pos++;
return node;
} else if (tokens[pos].type == TOK_ID) {
if (tokens[pos + 1].type == TOK_LPAREN) {
// Function call
char *name = tokens[pos].value;
pos++; // consumir ID
pos++; // consumir (
// Parsear argumentos
ASTNode **args =
(ASTNode **)malloc(sizeof(ASTNode *) * 16); // Max 16 parametros
int arg_count = 0;
if (tokens[pos].type != TOK_RPAREN) {
args[arg_count++] = parse_expr(tokens);
while (tokens[pos].type == TOK_COMMA) {
pos++; // Consumir ","
args[arg_count++] = parse_expr(tokens);
}
}
pos++; // consumir ")"
ASTNode *node = make_node(NODE_CALL);
node->data.call.name = name;
node->data.call.args = args;
node->data.call.arg_count = arg_count;
return node;
}
ASTNode *node = make_node(NODE_VAR);
node->data.string_val = tokens[pos].value;
pos++;
return node;
} else if (tokens[pos].type == TOK_LPAREN) {
pos++; // consumir (
ASTNode *expr = parse_expr(tokens);
pos++; // consumir )
return expr;
} else if (tokens[pos].type == TOK_MINUS) {
pos++; // consumir '-'
ASTNode *term = parse_term(tokens);
ASTNode *neg = make_node(NODE_BINOP);
neg->data.binop.op = '-';
neg->data.binop.left = make_node(NODE_INT_LIT);
neg->data.binop.left->data.int_val = 0;
neg->data.binop.right = term;
return neg;
}
printf("ERROR: esperaba INT o ID, encontre tipo %d value: %s\n",
tokens[pos].type, tokens[pos].value);
exit(1);
}
ASTNode *parse_expr(Token *tokens) {
ASTNode *left = parse_term(tokens);
while (tokens[pos].type == TOK_PLUS || tokens[pos].type == TOK_MINUS ||
tokens[pos].type == TOK_STAR || tokens[pos].type == TOK_SLASH ||
tokens[pos].type == TOK_LT || tokens[pos].type == TOK_GT) {
char op = tokens[pos].value[0]; // +,-,*,/
pos++;
ASTNode *right = parse_term(tokens);
ASTNode *binop = make_node(NODE_BINOP);
binop->data.binop.op = op;
binop->data.binop.left = left;
binop->data.binop.right = right;
left = binop; // encadenar: (a + b) + c
}
return left;
}
ASTNode *parse_statement(Token *tokens) {
if (tokens[pos].type == TOK_ID) {
if (tokens[pos + 1].type == TOK_LPAREN) {
// Es una funcion
char *name = tokens[pos].value;
pos++; // consumir ID
pos++; // consumir "("
// Parsear argumentos
ASTNode **args =
(ASTNode **)malloc(sizeof(ASTNode *) * 16); // Max 16 parametros
int arg_count = 0;
if (tokens[pos].type != TOK_RPAREN) {
args[arg_count++] = parse_expr(tokens);
while (tokens[pos].type == TOK_COMMA) {
pos++; // Consumir ","
args[arg_count++] = parse_expr(tokens);
}
}
pos++; // consumir ")"
ASTNode *node = make_node(NODE_CALL);
node->data.call.name = name;
node->data.call.args = args;
node->data.call.arg_count = arg_count;
return node;
}
char *name = tokens[pos].value;
pos++; // consumir ID
pos++; // consumir "="
ASTNode *value = parse_expr(tokens);
ASTNode *node = make_node(NODE_ASSIGN);
node->data.assign.name = name;
node->data.assign.value = value;
return node;
}
// Parse comments
if (tokens[pos].type == TOK_SLASH) {
if (tokens[pos + 1].type == TOK_SLASH) {
pos++; // consumir /
pos++; // consumir /
// Consumir hasta NewLine
while (tokens[pos].type != TOK_NEWLINE)
pos++;
pos++; // consumir newline
return make_node(NODE_NOP);
}
}
if (tokens[pos].type == TOK_WHILE) {
pos++; // consumir while
ASTNode *cond = parse_expr(tokens);
pos++; // consumir ":"
pos++; // consumir NEWLINE
pos++; // consumir INDENT
// Parsear bloque de statements hasta DEDENT
ASTNode *body = make_node(NODE_BLOCK);
body->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256);
body->data.block.count = 0;
while (tokens[pos].type != TOK_DEDENT) {
body->data.block.stmts[body->data.block.count++] =
parse_statement(tokens);
if (tokens[pos].type == TOK_NEWLINE) {
pos++;
}
}
pos++; // Consumir DEDENT
ASTNode *node = make_node(NODE_WHILE);
node->data.while_loop.cond = cond;
node->data.while_loop.body = body;
return node;
}
if (tokens[pos].type == TOK_IF) {
pos++; // consumir if
ASTNode *cond = parse_expr(tokens);
pos++; // consumir :
pos++; // consumir NEWLINE
pos++; // consumir INDENT
// Parsear bloque de statements hasta DEDENT
ASTNode *body = make_node(NODE_BLOCK);
body->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256);
body->data.block.count = 0;
while (tokens[pos].type != TOK_DEDENT) {
body->data.block.stmts[body->data.block.count++] =
parse_statement(tokens);
if (tokens[pos].type == TOK_NEWLINE) {
pos++;
}
}
pos++; // Consumir DEDENT
ASTNode *node = make_node(NODE_IF);
node->data.while_loop.cond = cond;
node->data.while_loop.body = body;
return node;
}
printf("ERROR: statement inesperado\n");
exit(1);
}
ASTNode *parse(Token *tokens, int token_count) {
ASTNode *block = make_node(NODE_BLOCK);
block->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256);
block->data.block.count = 0;
while (pos < token_count) {
if (tokens[pos].type == TOK_NEWLINE) {
pos++; // Saltar newlines sueltos
continue;
}
block->data.block.stmts[block->data.block.count++] =
parse_statement(tokens);
// Consumir newline despues del statement
if (pos < token_count && tokens[pos].type == TOK_NEWLINE) {
pos++;
}
}
return block;
}
void ast_print(ASTNode *node, const char *prefix, int is_last) {
if (!node)
return;
printf("%s", prefix);
printf(is_last ? "`-- " : "|-- ");
// Construir nuevo prefijo para hijos
char new_prefix[256];
snprintf(new_prefix, sizeof(new_prefix), "%s%s", prefix,
is_last ? " " : "| ");
switch (node->type) {
case NODE_WHILE:
printf("NODE_WHILE\n");
ast_print(node->data.while_loop.cond, new_prefix, 0);
ast_print(node->data.while_loop.body, new_prefix, 1);
break;
case NODE_INT_LIT:
printf("NODE_INT_LIT(%d)\n", node->data.int_val);
break;
case NODE_STRING_LIT:
printf("NODE_STRING_LIT(\"%s\")\n", node->data.string_val);
break;
case NODE_VAR:
printf("NODE_VAR(\"%s\")\n", node->data.string_val);
break;
case NODE_ASSIGN:
printf("NODE_ASSIGN { name:\"%s\" }\n", node->data.assign.name);
ast_print(node->data.assign.value, new_prefix, 1);
break;
case NODE_BINOP:
printf("NODE_BINOP('%c')\n", node->data.binop.op);
ast_print(node->data.binop.left, new_prefix, 0);
ast_print(node->data.binop.right, new_prefix, 1);
break;
case NODE_BLOCK:
printf("NODE_BLOCK\n");
for (int i = 0; i < node->data.block.count; i++) {
ast_print(node->data.block.stmts[i], new_prefix,
i == node->data.block.count - 1);
}
break;
case NODE_IF:
printf("NODE_IF\n");
ast_print(node->data.while_loop.cond, new_prefix, 0);
ast_print(node->data.while_loop.body, new_prefix, 1);
break;
case NODE_NOP:
printf("NODE_NOOP\n");
break;
case NODE_CALL:
printf("NODE_CALL(\"%s\")\n", node->data.call.name);
for (int i = 0; i < node->data.call.arg_count; i++) {
ast_print(node->data.call.args[i], new_prefix,
i == node->data.call.arg_count - 1);
}
break;
default:
printf("UNKNOWN\n");
break;
}
}
void ast_debug(ASTNode *node) { ast_print(node, "", 1); }
#endif