|
#define _POSIX_C_SOURCE 200809L
|
|
#include "lexer.h"
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
|
|
extern RavaToken_t* _rava_lexer_create_token(RavaLexer_t *lexer, RavaTokenType_e type);
|
|
extern char _rava_lexer_peek(RavaLexer_t *lexer);
|
|
extern char _rava_lexer_peek_next(RavaLexer_t *lexer);
|
|
extern char _rava_lexer_advance(RavaLexer_t *lexer);
|
|
extern bool _rava_lexer_is_at_end(RavaLexer_t *lexer);
|
|
|
|
static char _rava_unescape_char(char c) {
|
|
switch (c) {
|
|
case 'n': return '\n';
|
|
case 't': return '\t';
|
|
case 'r': return '\r';
|
|
case 'b': return '\b';
|
|
case 'f': return '\f';
|
|
case '\\': return '\\';
|
|
case '\'': return '\'';
|
|
case '"': return '"';
|
|
default: return c;
|
|
}
|
|
}
|
|
|
|
RavaToken_t* rava_lexer_parse_string(RavaLexer_t *lexer) {
|
|
size_t capacity = 128;
|
|
size_t length = 0;
|
|
char *str = malloc(capacity);
|
|
|
|
while (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) != '"') {
|
|
char c = _rava_lexer_advance(lexer);
|
|
|
|
if (c == '\\') {
|
|
if (_rava_lexer_is_at_end(lexer)) {
|
|
free(str);
|
|
lexer->error_message = strdup("Unterminated string escape");
|
|
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
|
|
}
|
|
|
|
char next = _rava_lexer_advance(lexer);
|
|
if (next == 'u') {
|
|
int codepoint = 0;
|
|
for (int i = 0; i < 4; i++) {
|
|
if (_rava_lexer_is_at_end(lexer) || !isxdigit(_rava_lexer_peek(lexer))) {
|
|
free(str);
|
|
lexer->error_message = strdup("Invalid Unicode escape");
|
|
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
|
|
}
|
|
char hex = _rava_lexer_advance(lexer);
|
|
codepoint = codepoint * 16 + (isdigit(hex) ? hex - '0' : tolower(hex) - 'a' + 10);
|
|
}
|
|
if (codepoint < 128) {
|
|
c = (char)codepoint;
|
|
} else {
|
|
c = '?';
|
|
}
|
|
} else if (next >= '0' && next <= '7') {
|
|
int octal = next - '0';
|
|
if (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) >= '0' && _rava_lexer_peek(lexer) <= '7') {
|
|
octal = octal * 8 + (_rava_lexer_advance(lexer) - '0');
|
|
if (!_rava_lexer_is_at_end(lexer) && _rava_lexer_peek(lexer) >= '0' && _rava_lexer_peek(lexer) <= '7') {
|
|
octal = octal * 8 + (_rava_lexer_advance(lexer) - '0');
|
|
}
|
|
}
|
|
c = (char)octal;
|
|
} else {
|
|
c = _rava_unescape_char(next);
|
|
}
|
|
}
|
|
|
|
if (length + 1 >= capacity) {
|
|
capacity *= 2;
|
|
str = realloc(str, capacity);
|
|
}
|
|
str[length++] = c;
|
|
}
|
|
|
|
if (_rava_lexer_is_at_end(lexer)) {
|
|
free(str);
|
|
lexer->error_message = strdup("Unterminated string");
|
|
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
|
|
}
|
|
|
|
_rava_lexer_advance(lexer);
|
|
|
|
str[length] = '\0';
|
|
RavaToken_t *token = _rava_lexer_create_token(lexer, RAVA_TOKEN_LITERAL_STRING);
|
|
token->value.string_value = str;
|
|
return token;
|
|
}
|
|
|
|
RavaToken_t* rava_lexer_parse_character(RavaLexer_t *lexer) {
|
|
if (_rava_lexer_is_at_end(lexer)) {
|
|
lexer->error_message = strdup("Unterminated character literal");
|
|
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
|
|
}
|
|
|
|
char c = _rava_lexer_advance(lexer);
|
|
|
|
if (c == '\\') {
|
|
if (_rava_lexer_is_at_end(lexer)) {
|
|
lexer->error_message = strdup("Unterminated character escape");
|
|
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
|
|
}
|
|
char next = _rava_lexer_advance(lexer);
|
|
c = _rava_unescape_char(next);
|
|
}
|
|
|
|
if (_rava_lexer_is_at_end(lexer) || _rava_lexer_peek(lexer) != '\'') {
|
|
lexer->error_message = strdup("Unterminated character literal");
|
|
return _rava_lexer_create_token(lexer, RAVA_TOKEN_ERROR);
|
|
}
|
|
|
|
_rava_lexer_advance(lexer);
|
|
|
|
RavaToken_t *token = _rava_lexer_create_token(lexer, RAVA_TOKEN_LITERAL_CHARACTER);
|
|
token->value.char_value = c;
|
|
return token;
|
|
}
|
|
|
|
RavaToken_t* rava_lexer_parse_number(RavaLexer_t *lexer) {
|
|
bool is_hex = false;
|
|
bool is_octal = false;
|
|
bool is_binary = false;
|
|
bool is_float = false;
|
|
bool has_exponent = false;
|
|
|
|
char first_char = lexer->source[lexer->start];
|
|
if (first_char == '0' && !_rava_lexer_is_at_end(lexer)) {
|
|
char next = _rava_lexer_peek(lexer);
|
|
if (next == 'x' || next == 'X') {
|
|
is_hex = true;
|
|
_rava_lexer_advance(lexer);
|
|
_rava_lexer_advance(lexer);
|
|
} else if (next == 'b' || next == 'B') {
|
|
is_binary = true;
|
|
_rava_lexer_advance(lexer);
|
|
_rava_lexer_advance(lexer);
|
|
} else if (next >= '0' && next <= '7') {
|
|
is_octal = true;
|
|
_rava_lexer_advance(lexer);
|
|
}
|
|
}
|
|
|
|
while (!_rava_lexer_is_at_end(lexer)) {
|
|
char c = _rava_lexer_peek(lexer);
|
|
|
|
if (is_hex && isxdigit(c)) {
|
|
_rava_lexer_advance(lexer);
|
|
} else if (is_binary && (c == '0' || c == '1')) {
|
|
_rava_lexer_advance(lexer);
|
|
} else if (is_octal && c >= '0' && c <= '7') {
|
|
_rava_lexer_advance(lexer);
|
|
} else if (!is_hex && !is_binary && !is_octal && isdigit(c)) {
|
|
_rava_lexer_advance(lexer);
|
|
} else if (c == '.' && !is_float && !is_hex && !is_binary && !is_octal) {
|
|
is_float = true;
|
|
_rava_lexer_advance(lexer);
|
|
} else if ((c == 'e' || c == 'E') && !has_exponent && !is_hex && !is_binary && !is_octal) {
|
|
is_float = true;
|
|
has_exponent = true;
|
|
_rava_lexer_advance(lexer);
|
|
if (!_rava_lexer_is_at_end(lexer) && (_rava_lexer_peek(lexer) == '+' || _rava_lexer_peek(lexer) == '-')) {
|
|
_rava_lexer_advance(lexer);
|
|
}
|
|
} else if (c == '_') {
|
|
_rava_lexer_advance(lexer);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
RavaTokenType_e type = RAVA_TOKEN_LITERAL_INTEGER;
|
|
|
|
if (!_rava_lexer_is_at_end(lexer)) {
|
|
char suffix = _rava_lexer_peek(lexer);
|
|
if (suffix == 'L' || suffix == 'l') {
|
|
type = RAVA_TOKEN_LITERAL_LONG;
|
|
_rava_lexer_advance(lexer);
|
|
} else if (suffix == 'F' || suffix == 'f') {
|
|
is_float = true;
|
|
type = RAVA_TOKEN_LITERAL_FLOAT;
|
|
_rava_lexer_advance(lexer);
|
|
} else if (suffix == 'D' || suffix == 'd') {
|
|
is_float = true;
|
|
type = RAVA_TOKEN_LITERAL_DOUBLE;
|
|
_rava_lexer_advance(lexer);
|
|
}
|
|
}
|
|
|
|
if (is_float && type == RAVA_TOKEN_LITERAL_INTEGER) {
|
|
type = RAVA_TOKEN_LITERAL_DOUBLE;
|
|
}
|
|
|
|
RavaToken_t *token = _rava_lexer_create_token(lexer, type);
|
|
|
|
if (is_float) {
|
|
token->value.float_value = strtod(token->lexeme, NULL);
|
|
} else {
|
|
char *endptr;
|
|
if (is_hex) {
|
|
token->value.int_value = strtoll(token->lexeme, &endptr, 16);
|
|
} else if (is_octal) {
|
|
token->value.int_value = strtoll(token->lexeme, &endptr, 8);
|
|
} else if (is_binary) {
|
|
token->value.int_value = strtoll(token->lexeme + 2, &endptr, 2);
|
|
} else {
|
|
token->value.int_value = strtoll(token->lexeme, &endptr, 10);
|
|
}
|
|
}
|
|
|
|
return token;
|
|
}
|