#include "lexer.h" #include "../utils/safe_alloc.h" #include #include #include #include extern RavaTokenType_e rava_lexer_lookup_keyword(const char *identifier); extern RavaToken_t* rava_lexer_parse_string(RavaLexer_t *lexer); extern RavaToken_t* rava_lexer_parse_character(RavaLexer_t *lexer); extern RavaToken_t* rava_lexer_parse_number(RavaLexer_t *lexer); RavaLexer_t* rava_lexer_create(const char *source) { RavaLexer_t *lexer = malloc(sizeof(RavaLexer_t)); if (!lexer) return NULL; lexer->source = source; lexer->source_length = strlen(source); lexer->current = 0; lexer->start = 0; lexer->line = 1; lexer->column = 1; lexer->start_column = 1; lexer->error_message = NULL; return lexer; } void rava_lexer_destroy(RavaLexer_t *lexer) { if (lexer) { if (lexer->error_message) { free(lexer->error_message); } free(lexer); } } void rava_token_destroy(RavaToken_t *token) { if (token) { if (token->lexeme) { free(token->lexeme); } if (token->type == RAVA_TOKEN_LITERAL_STRING && token->value.string_value) { free(token->value.string_value); } free(token); } } bool _rava_lexer_is_at_end(RavaLexer_t *lexer) { return lexer->current >= lexer->source_length; } char _rava_lexer_peek(RavaLexer_t *lexer) { if (_rava_lexer_is_at_end(lexer)) return '\0'; return lexer->source[lexer->current]; } char _rava_lexer_peek_next(RavaLexer_t *lexer) { if (lexer->current + 1 >= lexer->source_length) return '\0'; return lexer->source[lexer->current + 1]; } char _rava_lexer_advance(RavaLexer_t *lexer) { if (_rava_lexer_is_at_end(lexer)) return '\0'; char c = lexer->source[lexer->current++]; if (c == '\n') { lexer->line++; lexer->column = 1; } else { lexer->column++; } return c; } bool _rava_lexer_match(RavaLexer_t *lexer, char expected) { if (_rava_lexer_is_at_end(lexer)) return false; if (lexer->source[lexer->current] != expected) return false; lexer->current++; lexer->column++; return true; } void _rava_lexer_skip_whitespace(RavaLexer_t *lexer) { while (!_rava_lexer_is_at_end(lexer)) { char c = _rava_lexer_peek(lexer); switch (c) { case ' ': case '\t': case '\r': case '\n': case '\f': _rava_lexer_advance(lexer); break; case '/': if (_rava_lexer_peek_next(lexer) == '/') { while (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) != '\n') { _rava_lexer_advance(lexer); } } else if (_rava_lexer_peek_next(lexer) == '*') { _rava_lexer_advance(lexer); _rava_lexer_advance(lexer); while (!_rava_lexer_is_at_end(lexer)) { if (_rava_lexer_peek(lexer) == '*' && _rava_lexer_peek_next(lexer) == '/') { _rava_lexer_advance(lexer); _rava_lexer_advance(lexer); break; } _rava_lexer_advance(lexer); } } else { return; } break; default: return; } } } RavaToken_t* _rava_lexer_create_token(RavaLexer_t *lexer, RavaTokenType_e type) { RavaToken_t *token = malloc(sizeof(RavaToken_t)); if (!token) return NULL; token->type = type; size_t length = lexer->current - lexer->start; token->lexeme = malloc(length + 1); if (!token->lexeme) { free(token); return NULL; } memcpy(token->lexeme, lexer->source + lexer->start, length); token->lexeme[length] = '\0'; token->line = lexer->line; token->column = lexer->start_column; token->value.int_value = 0; return token; } static bool _rava_is_alpha(char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || c == '$'; } static bool _rava_is_alnum(char c) { return _rava_is_alpha(c) || (c >= '0' && c <= '9'); } static RavaToken_t* _rava_lexer_identifier(RavaLexer_t *lexer) { while (_rava_is_alnum(_rava_lexer_peek(lexer))) { _rava_lexer_advance(lexer); } RavaToken_t *token = _rava_lexer_create_token(lexer, RAVA_TOKEN_IDENTIFIER); RavaTokenType_e keyword_type = rava_lexer_lookup_keyword(token->lexeme); token->type = keyword_type; if (keyword_type == RAVA_TOKEN_LITERAL_TRUE) { token->value.int_value = 1; } else if (keyword_type == RAVA_TOKEN_LITERAL_FALSE) { token->value.int_value = 0; } return token; } RavaToken_t* rava_lexer_next_token(RavaLexer_t *lexer) { _rava_lexer_skip_whitespace(lexer); lexer->start = lexer->current; lexer->start_column = lexer->column; if (_rava_lexer_is_at_end(lexer)) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_EOF); } char c = _rava_lexer_advance(lexer); if (_rava_is_alpha(c)) { return _rava_lexer_identifier(lexer); } if (isdigit(c)) { return rava_lexer_parse_number(lexer); } switch (c) { case '(': return _rava_lexer_create_token(lexer, RAVA_TOKEN_LPAREN); case ')': return _rava_lexer_create_token(lexer, RAVA_TOKEN_RPAREN); case '{': return _rava_lexer_create_token(lexer, RAVA_TOKEN_LBRACE); case '}': return _rava_lexer_create_token(lexer, RAVA_TOKEN_RBRACE); case '[': return _rava_lexer_create_token(lexer, RAVA_TOKEN_LBRACKET); case ']': return _rava_lexer_create_token(lexer, RAVA_TOKEN_RBRACKET); case ';': return _rava_lexer_create_token(lexer, RAVA_TOKEN_SEMICOLON); case ',': return _rava_lexer_create_token(lexer, RAVA_TOKEN_COMMA); case '~': return _rava_lexer_create_token(lexer, RAVA_TOKEN_TILDE); case '?': return _rava_lexer_create_token(lexer, RAVA_TOKEN_QUESTION); case '@': return _rava_lexer_create_token(lexer, RAVA_TOKEN_AT); case '.': if (_rava_lexer_peek(lexer) == '.' && _rava_lexer_peek_next(lexer) == '.') { _rava_lexer_advance(lexer); _rava_lexer_advance(lexer); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ELLIPSIS); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_DOT); case ':': if (_rava_lexer_match(lexer, ':')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_COLONCOLON); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_COLON); case '+': if (_rava_lexer_match(lexer, '+')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_INC); } if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_PLUSASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_PLUS); case '-': if (_rava_lexer_match(lexer, '-')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_DEC); } if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_MINUSASSIGN); } if (_rava_lexer_match(lexer, '>')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_ARROW); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_MINUS); case '*': if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_STARASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_STAR); case '/': if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_SLASHASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_SLASH); case '%': if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_PERCENTASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_PERCENT); case '&': if (_rava_lexer_match(lexer, '&')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_AND); } if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_ANDASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_AMP); case '|': if (_rava_lexer_match(lexer, '|')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_OR); } if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_ORASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_PIPE); case '^': if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_CARETASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_CARET); case '!': if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_NE); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_BANG); case '=': if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_EQUAL); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_ASSIGN); case '<': if (_rava_lexer_match(lexer, '<')) { if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_LSHIFTASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_LSHIFT); } if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_LE); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_LT); case '>': if (_rava_lexer_match(lexer, '>')) { if (_rava_lexer_match(lexer, '>')) { if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_URSHIFTASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_URSHIFT); } if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_RSHIFTASSIGN); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_RSHIFT); } if (_rava_lexer_match(lexer, '=')) { return _rava_lexer_create_token(lexer, RAVA_TOKEN_GE); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_GT); case '"': return rava_lexer_parse_string(lexer); case '\'': return rava_lexer_parse_character(lexer); default: lexer->error_message = malloc(RAVA_ERROR_BUFFER_SIZE); if (lexer->error_message) { snprintf(lexer->error_message, RAVA_ERROR_BUFFER_SIZE, "Unexpected character '%c'", c); } return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } }