diff --git a/src/frontend/lexer.h b/src/frontend/lexer.h new file mode 100644 index 0000000..3ffc9fe --- /dev/null +++ b/src/frontend/lexer.h @@ -0,0 +1,114 @@ +/* + Convierte texto en una lista de tokens +*/ +#include +#include +#include +#include + +typedef enum { + // Literales + TOK_INT, // 42 + TOK_STRING, // "hola" + + // Identificadores y keywords + TOK_ID, // x, foo, mi_var + TOK_PRINT, // print + TOK_IF, // if + TOK_WHILE, // while + + // Operadores + TOK_ASSIGN, // = + TOK_PLUS, // + + TOK_MINUS, // - + TOK_STAR, // * + TOK_SLASH, // / + TOK_EQ, // == + TOK_NEQ, // != + TOK_LT, // < + TOK_GT, // > + + // Delimitadores + TOK_LPAREN, // ( + TOK_RPAREN, // ) + TOK_COLON, // : + TOK_NEWLINE, // \n (significativo, como en Python) + TOK_INDENT, // aumento de indentacion + TOK_DEDENT, // reduccion de indentacion + + TOK_EOF + +} TokenType; + +typedef struct { + TokenType type; + char *value; + int line; +} Token; + +Token make_token(TokenType type, const char *value) { + Token t; + t.type = type; + t.value = (char *)value; + return t; +} + +char *substr(const char *src, int start, int end) { + int len = end - start; + char *s = (char *)malloc(len + 1); + memcpy(s, src + start, len); + s[len] = '\0'; + return s; +} + +Token *tokenize(const char *source, int *token_count) { + Token *tokens = + (Token *)malloc(sizeof(Token) * 1024); // 1024 tokens por ahora de maximo + int count = 0; + int pos = 0; + + while (source[pos] != '\0' && pos < strlen(source)) { + char c = source[pos]; + + if (c == ' ') { + pos++; + } else if (c == '\n') { + tokens[count++] = make_token(TOK_NEWLINE, "\n"); + pos++; + } else if (c == '+') { + tokens[count++] = make_token(TOK_PLUS, "+"); + pos++; + } else if (c == '-') { + tokens[count++] = make_token(TOK_MINUS, "-"); + pos++; + } else if (c == '=') { + tokens[count++] = make_token(TOK_ASSIGN, "="); + pos++; + } else if (c >= '0' && c <= '9') { + // Leer todos los digitos consecutivos + int start = pos; + while (source[pos] >= '0' && source[pos] <= '9') + pos++; + tokens[count++] = make_token(TOK_INT, substr(source, start, pos)); + } else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + // Leer todos los caracteres consecutivos + int start = pos; + while (isalnum(source[pos])) + pos++; + char *word = substr(source, start, pos); + + // Comprobar si es una keyword reservada + if (strcmp(word, "print") == 0) { + tokens[count++] = make_token(TOK_PRINT, word); + } else { + tokens[count++] = make_token(TOK_ID, word); + } + } else { + printf("WARN: caracter no reconocido '%c' en pos %d\n", c, pos); + exit(1); + } + } + + *token_count = count; + return tokens; +} diff --git a/src/frontend/parser.h b/src/frontend/parser.h new file mode 100644 index 0000000..26a48a8 --- /dev/null +++ b/src/frontend/parser.h @@ -0,0 +1,208 @@ +/* + Convierte tokens en un arbol +*/ + +#include "lexer.h" + +typedef enum +{ + NODE_INT_LIT, // literal entero + NODE_STRING_LIT, // literal string + NODE_VAR, // referencia a variable + NODE_ASSIGN, // asignacion: x = expr + NODE_BINOP, // operacion binaria: a + b + NODE_PRINT, // print(expr) + NODE_IF, // if cond: bloque + NODE_WHILE, // while cond: bloque + NODE_BLOCK, // secuencia de statements +} NodeType; + +typedef struct ASTNode +{ + NodeType type; + union + { + int int_val; // NODE_INT_LIT + char *string_val; // NODE_STRING_LIT + struct + { + char *name; + struct ASTNode *value; + } assign; // NODE_ASSIGN + struct + { + char op; + struct ASTNode *left; + struct ASTNode *right; + } binop; // NODE_BINOP + struct + { + struct ASTNode *expr; + } print; // NODE_PRINT + struct + { + struct ASTNode **stmts; + int count; + } block; // NODE_BLOCK + } data; +} ASTNode; + +ASTNode *make_node(NodeType type) +{ + ASTNode *node = (ASTNode *)malloc(sizeof(ASTNode)); + node->type = type; + return node; +} + +int pos = 0; + +ASTNode *parse_term(Token *tokens) +{ + if (tokens[pos].type == TOK_INT) + { + ASTNode *node = make_node(NODE_INT_LIT); + node->data.int_val = atoi(tokens[pos].value); + pos++; + return node; + } + else if (tokens[pos].type == TOK_ID) + { + ASTNode *node = make_node(NODE_VAR); + node->data.string_val = tokens[pos].value; + pos++; + return node; + } + printf("ERROR: esperaba INT o ID, encontré tipo %d\n", tokens[pos].type); + exit(1); +} + +ASTNode *parse_expr(Token *tokens) +{ + ASTNode *left = parse_term(tokens); + + while (tokens[pos].type == TOK_PLUS || tokens[pos].type == TOK_MINUS) + { + char op = tokens[pos].value[0]; // + o - + pos++; + ASTNode *right = parse_term(tokens); + + ASTNode *binop = make_node(NODE_BINOP); + binop->data.binop.op = op; + binop->data.binop.left = left; + binop->data.binop.right = right; + left = binop; // encadenar: (a + b) + c + } + return left; +} + +ASTNode *parse_statement(Token *tokens) +{ + if (tokens[pos].type == TOK_ID) + { + char *name = tokens[pos].value; + pos++; // consumir ID + pos++; // consumir "=" + ASTNode *value = parse_expr(tokens); + + ASTNode *node = make_node(NODE_ASSIGN); + node->data.assign.name = name; + node->data.assign.value = value; + return node; + } + if (tokens[pos].type == TOK_PRINT) { + pos++; // consumir "print" + ASTNode *expr = parse_expr(tokens); + + ASTNode*node = make_node(NODE_PRINT); + node->data.print.expr = expr; + return node; + } + + printf("ERROR: statement inesperado\n"); + exit(1); +} + +ASTNode *parse(Token *tokens, int token_count) +{ + ASTNode *block = make_node(NODE_BLOCK); + block->data.block.stmts = (ASTNode **)malloc(sizeof(ASTNode *) * 256); + block->data.block.count = 0; + + while (pos < token_count) + { + if (tokens[pos].type == TOK_NEWLINE) + { + pos++; // Saltar newlines sueltos + continue; + } + block->data.block.stmts[block->data.block.count++] = parse_statement(tokens); + + // Consumir newline despues del statement + if (pos < token_count && tokens[pos].type == TOK_NEWLINE) + { + pos++; + } + } + return block; +} + +void ast_print(ASTNode *node, const char *prefix, int is_last) +{ + if (!node) + return; + + printf("%s", prefix); + printf(is_last ? "`-- " : "|-- "); + + // Construir nuevo prefijo para hijos + char new_prefix[256]; + snprintf(new_prefix, sizeof(new_prefix), "%s%s", prefix, is_last ? " " : "| "); + + switch (node->type) + { + case NODE_INT_LIT: + printf("NODE_INT_LIT(%d)\n", node->data.int_val); + break; + + case NODE_STRING_LIT: + printf("NODE_STRING_LIT(\"%s\")\n", node->data.string_val); + break; + + case NODE_VAR: + printf("NODE_VAR(\"%s\")\n", node->data.string_val); + break; + + case NODE_ASSIGN: + printf("NODE_ASSIGN { name:\"%s\" }\n", node->data.assign.name); + ast_print(node->data.assign.value, new_prefix, 1); + break; + + case NODE_BINOP: + printf("NODE_BINOP('%c')\n", node->data.binop.op); + ast_print(node->data.binop.left, new_prefix, 0); + ast_print(node->data.binop.right, new_prefix, 1); + break; + + case NODE_PRINT: + printf("NODE_PRINT\n"); + ast_print(node->data.print.expr, new_prefix, 1); + break; + + case NODE_BLOCK: + printf("NODE_BLOCK\n"); + for (int i = 0; i < node->data.block.count; i++) + { + ast_print(node->data.block.stmts[i], new_prefix, i == node->data.block.count - 1); + } + break; + + default: + printf("UNKNOWN\n"); + break; + } +} + +void ast_debug(ASTNode *node) +{ + ast_print(node, "", 1); +} diff --git a/src/main.c b/src/main.c index 89e6f30..1d1523a 100644 --- a/src/main.c +++ b/src/main.c @@ -1,3 +1,4 @@ +#include "frontend/parser.h" #include "objects/object.h" int main() { @@ -13,20 +14,25 @@ int main() { items[0] = floatVar1; items[1] = stringVar1; items[2] = listVar1; - + obj_print(allocPtr, listVar1, ""); obj_free(allocPtr, stringVar1); - stringVar1 = obj_new_string(allocPtr, "Hola Mundo!"); + stringVar1 = obj_new_string(allocPtr, "Hola Mundo!"); items[1] = stringVar1; items[2] = stringVar1; - - obj_print(allocPtr, listVar1, ""); JLANG_visualize(allocPtr); + // Lexer test + int totalTokens = 0; + Token *tokens = tokenize("x = 10\ny = 5\nz = x + y\nprint z", &totalTokens); + printf("totalTokens=%d\n", totalTokens); + ASTNode* block = parse(tokens, totalTokens); + ast_debug(block); + return 0; } \ No newline at end of file