371 lines
10 KiB
C
371 lines
10 KiB
C
|
#ifndef RLEXER_H
|
||
|
#define RLEXER_H
|
||
|
#include "rmalloc.h"
|
||
|
#include "rstring.h"
|
||
|
#include <ctype.h>
|
||
|
#include <stdbool.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <sys/types.h>
|
||
|
|
||
|
#define RTOKEN_VALUE_SIZE 1024
|
||
|
|
||
|
typedef enum rtoken_type_t {
|
||
|
RT_UNKNOWN = 0,
|
||
|
RT_SYMBOL,
|
||
|
RT_NUMBER,
|
||
|
RT_STRING,
|
||
|
RT_PUNCT,
|
||
|
RT_OPERATOR,
|
||
|
RT_EOF = 10,
|
||
|
RT_BRACE_OPEN,
|
||
|
RT_CURLY_BRACE_OPEN,
|
||
|
RT_BRACKET_OPEN,
|
||
|
RT_BRACE_CLOSE,
|
||
|
RT_CURLY_BRACE_CLOSE,
|
||
|
RT_BRACKET_CLOSE
|
||
|
} rtoken_type_t;
|
||
|
|
||
|
typedef struct rtoken_t {
|
||
|
rtoken_type_t type;
|
||
|
char value[RTOKEN_VALUE_SIZE];
|
||
|
unsigned int line;
|
||
|
unsigned int col;
|
||
|
} rtoken_t;
|
||
|
|
||
|
static char *_content;
|
||
|
static unsigned int _content_ptr;
|
||
|
static unsigned int _content_line;
|
||
|
static unsigned int _content_col;
|
||
|
|
||
|
static int isgroupingchar(char c) {
|
||
|
return (c == '{' || c == '}' || c == '(' || c == ')' || c == '[' || c == ']' || c == '"' || c == '\'');
|
||
|
}
|
||
|
|
||
|
static int isoperator(char c) {
|
||
|
return (c == '+' || c == '-' || c == '/' || c == '*' || c == '=' || c == '>' || c == '<' || c == '|' || c == '&');
|
||
|
}
|
||
|
|
||
|
static rtoken_t rtoken_new() {
|
||
|
rtoken_t token;
|
||
|
memset(&token, 0, sizeof(token));
|
||
|
token.type = RT_UNKNOWN;
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
rtoken_t rlex_number() {
|
||
|
rtoken_t token = rtoken_new();
|
||
|
token.col = _content_col;
|
||
|
token.line = _content_line;
|
||
|
bool first_char = true;
|
||
|
int dot_count = 0;
|
||
|
char c;
|
||
|
while (isdigit(c = _content[_content_ptr]) || (first_char && _content[_content_ptr] == '-') ||
|
||
|
(dot_count == 0 && _content[_content_ptr] == '.')) {
|
||
|
if (c == '.')
|
||
|
dot_count++;
|
||
|
first_char = false;
|
||
|
char chars[] = {c, 0};
|
||
|
strcat(token.value, chars);
|
||
|
_content_ptr++;
|
||
|
_content_col++;
|
||
|
}
|
||
|
token.type = RT_NUMBER;
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
static rtoken_t rlex_symbol() {
|
||
|
rtoken_t token = rtoken_new();
|
||
|
|
||
|
token.col = _content_col;
|
||
|
token.line = _content_line;
|
||
|
char c;
|
||
|
while (isalpha(_content[_content_ptr]) || _content[_content_ptr] == '_') {
|
||
|
c = _content[_content_ptr];
|
||
|
char chars[] = {c, 0};
|
||
|
strcat(token.value, chars);
|
||
|
_content_ptr++;
|
||
|
_content_col++;
|
||
|
}
|
||
|
token.type = RT_SYMBOL;
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
static rtoken_t rlex_operator() {
|
||
|
|
||
|
rtoken_t token = rtoken_new();
|
||
|
|
||
|
token.col = _content_col;
|
||
|
token.line = _content_line;
|
||
|
char c;
|
||
|
bool is_first = true;
|
||
|
while (isoperator(_content[_content_ptr])) {
|
||
|
if (!is_first) {
|
||
|
if (_content[_content_ptr - 1] == '=' && _content[_content_ptr] == '-') {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
c = _content[_content_ptr];
|
||
|
char chars[] = {c, 0};
|
||
|
strcat(token.value, chars);
|
||
|
_content_ptr++;
|
||
|
_content_col++;
|
||
|
is_first = false;
|
||
|
}
|
||
|
token.type = RT_OPERATOR;
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
static rtoken_t rlex_punct() {
|
||
|
|
||
|
rtoken_t token = rtoken_new();
|
||
|
|
||
|
token.col = _content_col;
|
||
|
token.line = _content_line;
|
||
|
char c;
|
||
|
bool is_first = true;
|
||
|
while (ispunct(_content[_content_ptr])) {
|
||
|
if (!is_first) {
|
||
|
if (_content[_content_ptr] == '"') {
|
||
|
break;
|
||
|
}
|
||
|
if (_content[_content_ptr] == '\'') {
|
||
|
break;
|
||
|
}
|
||
|
if (isgroupingchar(_content[_content_ptr])) {
|
||
|
break;
|
||
|
}
|
||
|
if (isoperator(_content[_content_ptr])) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
c = _content[_content_ptr];
|
||
|
char chars[] = {c, 0};
|
||
|
strcat(token.value, chars);
|
||
|
_content_ptr++;
|
||
|
_content_col++;
|
||
|
is_first = false;
|
||
|
}
|
||
|
token.type = RT_PUNCT;
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
static rtoken_t rlex_string() {
|
||
|
rtoken_t token = rtoken_new();
|
||
|
char c;
|
||
|
token.col = _content_col;
|
||
|
token.line = _content_line;
|
||
|
char str_chr = _content[_content_ptr];
|
||
|
_content_ptr++;
|
||
|
while (_content[_content_ptr] != str_chr) {
|
||
|
c = _content[_content_ptr];
|
||
|
if (c == '\\') {
|
||
|
_content_ptr++;
|
||
|
c = _content[_content_ptr];
|
||
|
if (c == 'n') {
|
||
|
c = '\n';
|
||
|
} else if (c == 'r') {
|
||
|
c = '\r';
|
||
|
} else if (c == 't') {
|
||
|
c = '\t';
|
||
|
} else if (c == str_chr) {
|
||
|
c = str_chr;
|
||
|
}
|
||
|
|
||
|
_content_col++;
|
||
|
}
|
||
|
char chars[] = {c, 0};
|
||
|
strcat(token.value, chars);
|
||
|
_content_ptr++;
|
||
|
_content_col++;
|
||
|
}
|
||
|
_content_ptr++;
|
||
|
token.type = RT_STRING;
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
void rlex(char *content) {
|
||
|
_content = content;
|
||
|
_content_ptr = 0;
|
||
|
_content_col = 1;
|
||
|
_content_line = 1;
|
||
|
}
|
||
|
|
||
|
static void rlex_repeat_str(char *dest, char *src, unsigned int times) {
|
||
|
for (size_t i = 0; i < times; i++) {
|
||
|
strcat(dest, src);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
rtoken_t rtoken_create(rtoken_type_t type, char *value) {
|
||
|
rtoken_t token = rtoken_new();
|
||
|
token.type = type;
|
||
|
token.col = _content_col;
|
||
|
token.line = _content_line;
|
||
|
strcpy(token.value, value);
|
||
|
return token;
|
||
|
}
|
||
|
|
||
|
rtoken_t rlex_next() {
|
||
|
while (true) {
|
||
|
|
||
|
_content_col++;
|
||
|
|
||
|
if (_content[_content_ptr] == 0) {
|
||
|
return rtoken_create(RT_EOF, "eof");
|
||
|
} else if (_content[_content_ptr] == '\n') {
|
||
|
_content_line++;
|
||
|
_content_col = 1;
|
||
|
_content_ptr++;
|
||
|
} else if (isspace(_content[_content_ptr])) {
|
||
|
_content_ptr++;
|
||
|
} else if (isdigit(_content[_content_ptr]) || (_content[_content_ptr] == '-' && isdigit(_content[_content_ptr + 1]))) {
|
||
|
return rlex_number();
|
||
|
} else if (isalpha(_content[_content_ptr]) || _content[_content_ptr] == '_') {
|
||
|
return rlex_symbol();
|
||
|
} else if (_content[_content_ptr] == '"' || _content[_content_ptr] == '\'') {
|
||
|
return rlex_string();
|
||
|
} else if (isoperator(_content[_content_ptr])) {
|
||
|
return rlex_operator();
|
||
|
} else if (ispunct(_content[_content_ptr])) {
|
||
|
if (_content[_content_ptr] == '{') {
|
||
|
|
||
|
_content_ptr++;
|
||
|
return rtoken_create(RT_CURLY_BRACE_OPEN, "{");
|
||
|
}
|
||
|
if (_content[_content_ptr] == '}') {
|
||
|
|
||
|
_content_ptr++;
|
||
|
return rtoken_create(RT_CURLY_BRACE_CLOSE, "}");
|
||
|
}
|
||
|
if (_content[_content_ptr] == '(') {
|
||
|
|
||
|
_content_ptr++;
|
||
|
return rtoken_create(RT_BRACE_OPEN, "(");
|
||
|
}
|
||
|
if (_content[_content_ptr] == ')') {
|
||
|
|
||
|
_content_ptr++;
|
||
|
return rtoken_create(RT_BRACE_CLOSE, ")");
|
||
|
}
|
||
|
if (_content[_content_ptr] == '[') {
|
||
|
|
||
|
_content_ptr++;
|
||
|
return rtoken_create(RT_BRACKET_OPEN, "[");
|
||
|
}
|
||
|
if (_content[_content_ptr] == ']') {
|
||
|
|
||
|
_content_ptr++;
|
||
|
return rtoken_create(RT_BRACKET_CLOSE, "]");
|
||
|
}
|
||
|
return rlex_punct();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char *rlex_format(char *content) {
|
||
|
rlex(content);
|
||
|
char *result = (char *)malloc(strlen(content) + 4096);
|
||
|
result[0] = 0;
|
||
|
unsigned int tab_index = 0;
|
||
|
char *tab_chars = " ";
|
||
|
unsigned int col = 0;
|
||
|
rtoken_t token_previous;
|
||
|
token_previous.value[0] = 0;
|
||
|
token_previous.type = RT_UNKNOWN;
|
||
|
while (true) {
|
||
|
rtoken_t token = rlex_next();
|
||
|
if (token.type == RT_EOF) {
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// col = strlen(token.value);
|
||
|
|
||
|
if (col == 0) {
|
||
|
rlex_repeat_str(result, tab_chars, tab_index);
|
||
|
// col = strlen(token.value);// strlen(tab_chars) * tab_index;
|
||
|
}
|
||
|
|
||
|
if (token.type == RT_STRING) {
|
||
|
strcat(result, "\"");
|
||
|
|
||
|
char string_with_slashes[strlen(token.value) * 2 + 1];
|
||
|
rstraddslashes(token.value, string_with_slashes);
|
||
|
strcat(result, string_with_slashes);
|
||
|
|
||
|
strcat(result, "\"");
|
||
|
// col+= strlen(token.value) + 2;
|
||
|
// printf("\n");
|
||
|
// printf("<<<%s>>>\n",token.value);
|
||
|
|
||
|
memcpy(&token_previous, &token, sizeof(token));
|
||
|
continue;
|
||
|
}
|
||
|
if (!(strcmp(token.value, "{"))) {
|
||
|
if (col != 0) {
|
||
|
strcat(result, "\n");
|
||
|
rlex_repeat_str(result, " ", tab_index);
|
||
|
}
|
||
|
strcat(result, token.value);
|
||
|
|
||
|
tab_index++;
|
||
|
|
||
|
strcat(result, "\n");
|
||
|
|
||
|
col = 0;
|
||
|
|
||
|
memcpy(&token_previous, &token, sizeof(token));
|
||
|
continue;
|
||
|
} else if (!(strcmp(token.value, "}"))) {
|
||
|
unsigned int tab_indexed = 0;
|
||
|
if (tab_index)
|
||
|
tab_index--;
|
||
|
strcat(result, "\n");
|
||
|
|
||
|
rlex_repeat_str(result, tab_chars, tab_index);
|
||
|
tab_indexed++;
|
||
|
|
||
|
strcat(result, token.value);
|
||
|
strcat(result, "\n");
|
||
|
col = 0;
|
||
|
|
||
|
memcpy(&token_previous, &token, sizeof(token));
|
||
|
continue;
|
||
|
}
|
||
|
if ((token_previous.type == RT_SYMBOL && token.type == RT_NUMBER) ||
|
||
|
(token_previous.type == RT_NUMBER && token.type == RT_SYMBOL) || (token_previous.type == RT_PUNCT && token.type == RT_SYMBOL) ||
|
||
|
(token_previous.type == RT_BRACE_CLOSE && token.type == RT_SYMBOL) ||
|
||
|
(token_previous.type == RT_SYMBOL && token.type == RT_SYMBOL)) {
|
||
|
if (token_previous.value[0] != ',' && token_previous.value[0] != '.') {
|
||
|
if (token.type != RT_OPERATOR && token.value[0] != '.') {
|
||
|
strcat(result, "\n");
|
||
|
rlex_repeat_str(result, tab_chars, tab_index);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (token.type == RT_OPERATOR) {
|
||
|
strcat(result, " ");
|
||
|
}
|
||
|
if (token.type == RT_STRING) {
|
||
|
strcat(result, "\"");
|
||
|
}
|
||
|
strcat(result, token.value);
|
||
|
if (token.type == RT_STRING) {
|
||
|
strcat(result, "\"");
|
||
|
}
|
||
|
|
||
|
if (token.type == RT_OPERATOR) {
|
||
|
strcat(result, " ");
|
||
|
}
|
||
|
if (!strcmp(token.value, ",")) {
|
||
|
strcat(result, " ");
|
||
|
}
|
||
|
col += strlen(token.value);
|
||
|
memcpy(&token_previous, &token, sizeof(token));
|
||
|
}
|
||
|
return result;
|
||
|
}
|
||
|
#endif
|