#ifndef JLANG_PARSER_H #define JLANG_PARSER_H /* Convierte tokens en un arbol */ #include "lexer.h" typedef enum { NODE_INT_LIT, // literal entero NODE_STRING_LIT, // literal string NODE_VAR, // referencia a variable NODE_ASSIGN, // asignacion: x = expr NODE_BINOP, // operacion binaria: a + b NODE_PRINT, // print(expr) NODE_PRINTLN, NODE_IF, // if cond: bloque NODE_WHILE, // while cond: bloque NODE_BLOCK, // secuencia de statements NODE_CALL, } NodeType; typedef struct ASTNode { NodeType type; union { int int_val; // NODE_INT_LIT char *string_val; // NODE_STRING_LIT struct { char *name; struct ASTNode *value; } assign; // NODE_ASSIGN struct { char op; struct ASTNode *left; struct ASTNode *right; } binop; // NODE_BINOP struct { struct ASTNode *expr; } print; // NODE_PRINT struct { struct ASTNode **stmts; int count; } block; // NODE_BLOCK struct { struct ASTNode *cond; struct ASTNode *body; } while_loop; // NODE_WHILE struct { struct ASTNode *cond; struct ASTNode *body; } if_statement; // NODE_IF struct { char *name; struct ASTNode **args; int arg_count; } call; } data; } ASTNode; ASTNode *make_node(NodeType type) { ASTNode *node = (ASTNode *)malloc(sizeof(ASTNode)); node->type = type; return node; } int pos = 0; ASTNode *parse_term(Token *tokens) { if (tokens[pos].type == TOK_INT) { ASTNode *node = make_node(NODE_INT_LIT); node->data.int_val = atoi(tokens[pos].value); pos++; return node; } else if (tokens[pos].type == TOK_STRING) { ASTNode *node = make_node(NODE_STRING_LIT); node->data.string_val = tokens[pos].value; pos++; return node; } else if (tokens[pos].type == TOK_ID) { printf("Parsing token: %s\n", tokens[pos].value); ASTNode *node = make_node(NODE_VAR); node->data.string_val = tokens[pos].value; pos++; return node; } else if (tokens[pos].type == TOK_MINUS) { pos++; // consumir '-' ASTNode *term = parse_term(tokens); ASTNode *neg = make_node(NODE_BINOP); neg->data.binop.op = '-'; neg->data.binop.left = make_node(NODE_INT_LIT); neg->data.binop.left->data.int_val = 0; neg->data.binop.right = term; return neg; } printf("ERROR: esperaba INT o ID, encontre tipo %d value: %s\n", tokens[pos].type, tokens[pos].value); exit(1); } ASTNode *parse_expr(Token *tokens) { ASTNode *left = parse_term(tokens); while (tokens[pos].type == TOK_PLUS || tokens[pos].type == TOK_MINUS || tokens[pos].type == TOK_STAR || tokens[pos].type == TOK_SLASH || tokens[pos].type == TOK_LT || tokens[pos].type == TOK_GT) { char op = tokens[pos].value[0]; // +,-,*,/ pos++; ASTNode *right = parse_term(tokens); ASTNode *binop = make_node(NODE_BINOP); binop->data.binop.op = op; binop->data.binop.left = left; binop->data.binop.right = right; left = binop; // encadenar: (a + b) + c } return left; } ASTNode *parse_statement(Token *tokens) { if (tokens[pos].type == TOK_ID) { if (tokens[pos + 1].type == TOK_LPAREN) { // Es una funcion char *name = tokens[pos].value; pos++; // consumir ID pos++; // consumir "(" // Parsear argumentos ASTNode **args = (ASTNode **)malloc(sizeof(ASTNode *) * 16); // Max 16 parametros int arg_count = 0; if (tokens[pos].type != TOK_RPAREN) { args[arg_count++] = parse_expr(tokens); while (tokens[pos].type == TOK_COMMA) { pos++; // Consumir "," args[arg_count++] = parse_expr(tokens); } } pos++; // consumir ")" ASTNode *node = make_node(NODE_CALL); node->data.call.name = name; node->data.call.args = args; node->data.call.arg_count = arg_count; return node; } char *name = tokens[pos].value; pos++; // consumir ID pos++; // consumir "=" ASTNode *value = parse_expr(tokens); ASTNode *node = make_node(NODE_ASSIGN); node->data.assign.name = name; node->data.assign.value = value; return node; } if (tokens[pos].type == TOK_WHILE) { pos++; // consumir while ASTNode *cond = parse_expr(tokens); pos++; // consumir ":" pos++; // consumir NEWLINE pos++; // consumir INDENT // Parsear bloque de statements hasta DEDENT ASTNode *body = make_node(NODE_BLOCK); body->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256); body->data.block.count = 0; while (tokens[pos].type != TOK_DEDENT) { body->data.block.stmts[body->data.block.count++] = parse_statement(tokens); if (tokens[pos].type == TOK_NEWLINE) { pos++; } } pos++; // Consumir DEDENT ASTNode *node = make_node(NODE_WHILE); node->data.while_loop.cond = cond; node->data.while_loop.body = body; return node; } if (tokens[pos].type == TOK_IF) { pos++; // consumir if ASTNode *cond = parse_expr(tokens); pos++; // consumir : pos++; // consumir NEWLINE pos++; // consumir INDENT // Parsear bloque de statements hasta DEDENT ASTNode *body = make_node(NODE_BLOCK); body->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256); body->data.block.count = 0; while (tokens[pos].type != TOK_DEDENT) { body->data.block.stmts[body->data.block.count++] = parse_statement(tokens); if (tokens[pos].type == TOK_NEWLINE) { pos++; } } pos++; // Consumir DEDENT ASTNode *node = make_node(NODE_IF); node->data.while_loop.cond = cond; node->data.while_loop.body = body; return node; } printf("ERROR: statement inesperado\n"); exit(1); } ASTNode *parse(Token *tokens, int token_count) { ASTNode *block = make_node(NODE_BLOCK); block->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256); block->data.block.count = 0; while (pos < token_count) { if (tokens[pos].type == TOK_NEWLINE) { pos++; // Saltar newlines sueltos continue; } block->data.block.stmts[block->data.block.count++] = parse_statement(tokens); // Consumir newline despues del statement if (pos < token_count && tokens[pos].type == TOK_NEWLINE) { pos++; } } return block; } void ast_print(ASTNode *node, const char *prefix, int is_last) { if (!node) return; printf("%s", prefix); printf(is_last ? "`-- " : "|-- "); // Construir nuevo prefijo para hijos char new_prefix[256]; snprintf(new_prefix, sizeof(new_prefix), "%s%s", prefix, is_last ? " " : "| "); switch (node->type) { case NODE_WHILE: printf("NODE_WHILE\n"); ast_print(node->data.while_loop.cond, new_prefix, 0); ast_print(node->data.while_loop.body, new_prefix, 1); break; case NODE_INT_LIT: printf("NODE_INT_LIT(%d)\n", node->data.int_val); break; case NODE_STRING_LIT: printf("NODE_STRING_LIT(\"%s\")\n", node->data.string_val); break; case NODE_VAR: printf("NODE_VAR(\"%s\")\n", node->data.string_val); break; case NODE_ASSIGN: printf("NODE_ASSIGN { name:\"%s\" }\n", node->data.assign.name); ast_print(node->data.assign.value, new_prefix, 1); break; case NODE_BINOP: printf("NODE_BINOP('%c')\n", node->data.binop.op); ast_print(node->data.binop.left, new_prefix, 0); ast_print(node->data.binop.right, new_prefix, 1); break; case NODE_BLOCK: printf("NODE_BLOCK\n"); for (int i = 0; i < node->data.block.count; i++) { ast_print(node->data.block.stmts[i], new_prefix, i == node->data.block.count - 1); } break; case NODE_IF: printf("NODE_IF\n"); ast_print(node->data.while_loop.cond, new_prefix, 0); ast_print(node->data.while_loop.body, new_prefix, 1); break; case NODE_CALL: printf("NODE_CALL(\"%s\")\n", node->data.call.name); for (int i = 0; i < node->data.call.arg_count; i++) { ast_print(node->data.call.args[i], new_prefix, i == node->data.call.arg_count - 1); } break; default: printf("UNKNOWN\n"); break; } } void ast_debug(ASTNode *node) { ast_print(node, "", 1); } #endif