#define _POSIX_C_SOURCE 200809L #include "lexer.h" #include "../utils/safe_alloc.h" #include #include #include #include extern RavaToken_t* _rava_lexer_create_token(RavaLexer_t *lexer, RavaTokenType_e type); extern char _rava_lexer_peek(RavaLexer_t *lexer); extern char _rava_lexer_peek_next(RavaLexer_t *lexer); extern char _rava_lexer_advance(RavaLexer_t *lexer); extern bool _rava_lexer_is_at_end(RavaLexer_t *lexer); static char _rava_unescape_char(char c) { switch (c) { case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case 'b': return '\b'; case 'f': return '\f'; case '\\': return '\\'; case '\'': return '\''; case '"': return '"'; default: return c; } } RavaToken_t* rava_lexer_parse_string(RavaLexer_t *lexer) { size_t capacity = 128; size_t length = 0; char *str = malloc(capacity); while (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) != '"') { char c = _rava_lexer_advance(lexer); if (c == '\\') { if (_rava_lexer_is_at_end(lexer)) { free(str); lexer->error_message = strdup("Unterminated string escape"); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } char next = _rava_lexer_advance(lexer); if (next == 'u') { int codepoint = 0; for (int i = 0; i < 4; i++) { if (_rava_lexer_is_at_end(lexer) || !isxdigit(_rava_lexer_peek(lexer))) { free(str); lexer->error_message = strdup("Invalid Unicode escape"); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } char hex = _rava_lexer_advance(lexer); codepoint = codepoint * 16 + (isdigit(hex) ? hex - '0' : tolower(hex) - 'a' + 10); } if (codepoint < 128) { c = (char)codepoint; } else { c = '?'; } } else if (next >= '0' && next <= '7') { int octal = next - '0'; if (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) >= '0' && _rava_lexer_peek(lexer) <= '7') { octal = octal * 8 + (_rava_lexer_advance(lexer) - '0'); if (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) >= '0' && _rava_lexer_peek(lexer) <= '7') { octal = octal * 8 + (_rava_lexer_advance(lexer) - '0'); } } c = (char)octal; } else { c = _rava_unescape_char(next); } } if (length + 1 >= capacity) { capacity *= 2; char *new_str = rava_safe_realloc(str, capacity); if (!new_str) { lexer->error_message = strdup("Out of memory"); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } str = new_str; } str[length++] = c; } if (_rava_lexer_is_at_end(lexer)) { free(str); lexer->error_message = strdup("Unterminated string"); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } _rava_lexer_advance(lexer); str[length] = '\0'; RavaToken_t *token = _rava_lexer_create_token(lexer, RAVA_TOKEN_LITERAL_STRING); token->value.string_value = str; return token; } RavaToken_t* rava_lexer_parse_character(RavaLexer_t *lexer) { if (_rava_lexer_is_at_end(lexer)) { lexer->error_message = strdup("Unterminated character literal"); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } char c = _rava_lexer_advance(lexer); if (c == '\\') { if (_rava_lexer_is_at_end(lexer)) { lexer->error_message = strdup("Unterminated character escape"); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } char next = _rava_lexer_advance(lexer); c = _rava_unescape_char(next); } if (_rava_lexer_is_at_end(lexer) || _rava_lexer_peek(lexer) != '\'') { lexer->error_message = strdup("Unterminated character literal"); return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR); } _rava_lexer_advance(lexer); RavaToken_t *token = _rava_lexer_create_token(lexer, RAVA_TOKEN_LITERAL_CHARACTER); token->value.char_value = c; return token; } RavaToken_t* rava_lexer_parse_number(RavaLexer_t *lexer) { bool is_hex = false; bool is_octal = false; bool is_binary = false; bool is_float = false; bool has_exponent = false; char first_char = lexer->source[lexer->start]; if (first_char == '0' && !_rava_lexer_is_at_end(lexer)) { char next = _rava_lexer_peek(lexer); if (next == 'x' || next == 'X') { is_hex = true; _rava_lexer_advance(lexer); _rava_lexer_advance(lexer); } else if (next == 'b' || next == 'B') { is_binary = true; _rava_lexer_advance(lexer); _rava_lexer_advance(lexer); } else if (next >= '0' && next <= '7') { is_octal = true; _rava_lexer_advance(lexer); } } while (!_rava_lexer_is_at_end(lexer)) { char c = _rava_lexer_peek(lexer); if (is_hex && isxdigit(c)) { _rava_lexer_advance(lexer); } else if (is_binary && (c == '0' || c == '1')) { _rava_lexer_advance(lexer); } else if (is_octal && c >= '0' && c <= '7') { _rava_lexer_advance(lexer); } else if (!is_hex && !is_binary && !is_octal && isdigit(c)) { _rava_lexer_advance(lexer); } else if (c == '.' && !is_float && !is_hex && !is_binary && !is_octal) { is_float = true; _rava_lexer_advance(lexer); } else if ((c == 'e' || c == 'E') && !has_exponent && !is_hex && !is_binary && !is_octal) { is_float = true; has_exponent = true; _rava_lexer_advance(lexer); if (!_rava_lexer_is_at_end(lexer) && (_rava_lexer_peek(lexer) == '+' || _rava_lexer_peek(lexer) == '-')) { _rava_lexer_advance(lexer); } } else if (c == '_') { _rava_lexer_advance(lexer); } else { break; } } RavaTokenType_e type = RAVA_TOKEN_LITERAL_INTEGER; if (!_rava_lexer_is_at_end(lexer)) { char suffix = _rava_lexer_peek(lexer); if (suffix == 'L' || suffix == 'l') { type = RAVA_TOKEN_LITERAL_LONG; _rava_lexer_advance(lexer); } else if (suffix == 'F' || suffix == 'f') { is_float = true; type = RAVA_TOKEN_LITERAL_FLOAT; _rava_lexer_advance(lexer); } else if (suffix == 'D' || suffix == 'd') { is_float = true; type = RAVA_TOKEN_LITERAL_DOUBLE; _rava_lexer_advance(lexer); } } if (is_float && type == RAVA_TOKEN_LITERAL_INTEGER) { type = RAVA_TOKEN_LITERAL_DOUBLE; } RavaToken_t *token = _rava_lexer_create_token(lexer, type); if (is_float) { token->value.float_value = strtod(token->lexeme, NULL); } else { char *endptr; if (is_hex) { token->value.int_value = strtoll(token->lexeme, &endptr, 16); } else if (is_octal) { token->value.int_value = strtoll(token->lexeme, &endptr, 8); } else if (is_binary) { token->value.int_value = strtoll(token->lexeme + 2, &endptr, 2); } else { token->value.int_value = strtoll(token->lexeme, &endptr, 10); } } return token; }