#define _POSIX_C_SOURCE 200809L
#include "lexer.h"
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
extern RavaToken_t* _rava_lexer_create_token(RavaLexer_t *lexer, RavaTokenType_e type);
extern char _rava_lexer_peek(RavaLexer_t *lexer);
extern char _rava_lexer_peek_next(RavaLexer_t *lexer);
extern char _rava_lexer_advance(RavaLexer_t *lexer);
extern bool _rava_lexer_is_at_end(RavaLexer_t *lexer);
static char _rava_unescape_char(char c) {
switch (c) {
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
case 'b': return '\b';
case 'f': return '\f';
case '\\': return '\\';
case '\'': return '\'';
case '"': return '"';
default: return c;
}
}
RavaToken_t* rava_lexer_parse_string(RavaLexer_t *lexer) {
size_t capacity = 128;
size_t length = 0;
char *str = malloc(capacity);
while (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) != '"') {
char c = _rava_lexer_advance(lexer);
if (c == '\\') {
if (_rava_lexer_is_at_end(lexer)) {
free(str);
lexer->error_message = strdup("Unterminated string escape");
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
}
char next = _rava_lexer_advance(lexer);
if (next == 'u') {
int codepoint = 0;
for (int i = 0; i < 4; i++) {
if (_rava_lexer_is_at_end(lexer) || !isxdigit(_rava_lexer_peek(lexer))) {
free(str);
lexer->error_message = strdup("Invalid Unicode escape");
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
}
char hex = _rava_lexer_advance(lexer);
codepoint = codepoint * 16 + (isdigit(hex) ? hex - '0' : tolower(hex) - 'a' + 10);
}
if (codepoint < 128) {
c = (char)codepoint;
} else {
c = '?';
}
} else if (next >= '0' && next <= '7') {
int octal = next - '0';
if (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) >= '0' && _rava_lexer_peek(lexer) <= '7') {
octal = octal * 8 + (_rava_lexer_advance(lexer) - '0');
if (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) >= '0' && _rava_lexer_peek(lexer) <= '7') {
octal = octal * 8 + (_rava_lexer_advance(lexer) - '0');
}
}
c = (char)octal;
} else {
c = _rava_unescape_char(next);
}
}
if (length + 1 >= capacity) {
capacity *= 2;
str = realloc(str, capacity);
}
str[length++] = c;
}
if (_rava_lexer_is_at_end(lexer)) {
free(str);
lexer->error_message = strdup("Unterminated string");
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
}
_rava_lexer_advance(lexer);
str[length] = '\0';
RavaToken_t *token = _rava_lexer_create_token(lexer, RAVA_TOKEN_LITERAL_STRING);
token->value.string_value = str;
return token;
}
RavaToken_t* rava_lexer_parse_character(RavaLexer_t *lexer) {
if (_rava_lexer_is_at_end(lexer)) {
lexer->error_message = strdup("Unterminated character literal");
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
}
char c = _rava_lexer_advance(lexer);
if (c == '\\') {
if (_rava_lexer_is_at_end(lexer)) {
lexer->error_message = strdup("Unterminated character escape");
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
}
char next = _rava_lexer_advance(lexer);
c = _rava_unescape_char(next);
}
if (_rava_lexer_is_at_end(lexer) || _rava_lexer_peek(lexer) != '\'') {
lexer->error_message = strdup("Unterminated character literal");
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
}
_rava_lexer_advance(lexer);
RavaToken_t *token = _rava_lexer_create_token(lexer, RAVA_TOKEN_LITERAL_CHARACTER);
token->value.char_value = c;
return token;
}
RavaToken_t* rava_lexer_parse_number(RavaLexer_t *lexer) {
bool is_hex = false;
bool is_octal = false;
bool is_binary = false;
bool is_float = false;
bool has_exponent = false;
char first_char = lexer->source[lexer->start];
if (first_char == '0' && !_rava_lexer_is_at_end(lexer)) {
char next = _rava_lexer_peek(lexer);
if (next == 'x' || next == 'X') {
is_hex = true;
_rava_lexer_advance(lexer);
_rava_lexer_advance(lexer);
} else if (next == 'b' || next == 'B') {
is_binary = true;
_rava_lexer_advance(lexer);
_rava_lexer_advance(lexer);
} else if (next >= '0' && next <= '7') {
is_octal = true;
_rava_lexer_advance(lexer);
}
}
while (!_rava_lexer_is_at_end(lexer)) {
char c = _rava_lexer_peek(lexer);
if (is_hex && isxdigit(c)) {
_rava_lexer_advance(lexer);
} else if (is_binary && (c == '0' || c == '1')) {
_rava_lexer_advance(lexer);
} else if (is_octal && c >= '0' && c <= '7') {
_rava_lexer_advance(lexer);
} else if (!is_hex && !is_binary && !is_octal && isdigit(c)) {
_rava_lexer_advance(lexer);
} else if (c == '.' && !is_float && !is_hex && !is_binary && !is_octal) {
is_float = true;
_rava_lexer_advance(lexer);
} else if ((c == 'e' || c == 'E') && !has_exponent && !is_hex && !is_binary && !is_octal) {
is_float = true;
has_exponent = true;
_rava_lexer_advance(lexer);
if (!_rava_lexer_is_at_end(lexer) && (_rava_lexer_peek(lexer) == '+' || _rava_lexer_peek(lexer) == '-')) {
_rava_lexer_advance(lexer);
}
} else if (c == '_') {
_rava_lexer_advance(lexer);
} else {
break;
}
}
RavaTokenType_e type = RAVA_TOKEN_LITERAL_INTEGER;
if (!_rava_lexer_is_at_end(lexer)) {
char suffix = _rava_lexer_peek(lexer);
if (suffix == 'L' || suffix == 'l') {
type = RAVA_TOKEN_LITERAL_LONG;
_rava_lexer_advance(lexer);
} else if (suffix == 'F' || suffix == 'f') {
is_float = true;
type = RAVA_TOKEN_LITERAL_FLOAT;
_rava_lexer_advance(lexer);
} else if (suffix == 'D' || suffix == 'd') {
is_float = true;
type = RAVA_TOKEN_LITERAL_DOUBLE;
_rava_lexer_advance(lexer);
}
}
if (is_float && type == RAVA_TOKEN_LITERAL_INTEGER) {
type = RAVA_TOKEN_LITERAL_DOUBLE;
}
RavaToken_t *token = _rava_lexer_create_token(lexer, type);
if (is_float) {
token->value.float_value = strtod(token->lexeme, NULL);
} else {
char *endptr;
if (is_hex) {
token->value.int_value = strtoll(token->lexeme, &endptr, 16);
} else if (is_octal) {
token->value.int_value = strtoll(token->lexeme, &endptr, 8);
} else if (is_binary) {
token->value.int_value = strtoll(token->lexeme + 2, &endptr, 2);
} else {
token->value.int_value = strtoll(token->lexeme, &endptr, 10);
}
}
return token;
}