196 lines
4.9 KiB
C
196 lines
4.9 KiB
C
|
|
/* retoor <retoor@molodetz.nl> */
|
||
|
|
#include "../include/lexer.h"
|
||
|
|
#include <stdio.h>
|
||
|
|
#include <assert.h>
|
||
|
|
#include <string.h>
|
||
|
|
|
||
|
|
static int tests_passed = 0;
|
||
|
|
static int tests_failed = 0;
|
||
|
|
|
||
|
|
#define TEST(name) static void test_##name(void)
|
||
|
|
#define RUN_TEST(name) do { \
|
||
|
|
printf(" %s... ", #name); \
|
||
|
|
test_##name(); \
|
||
|
|
printf("ok\n"); \
|
||
|
|
tests_passed++; \
|
||
|
|
} while(0)
|
||
|
|
|
||
|
|
#define ASSERT(cond) do { \
|
||
|
|
if (!(cond)) { \
|
||
|
|
printf("FAILED at line %d: %s\n", __LINE__, #cond); \
|
||
|
|
tests_failed++; \
|
||
|
|
return; \
|
||
|
|
} \
|
||
|
|
} while(0)
|
||
|
|
|
||
|
|
TEST(simple_chars) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "abc");
|
||
|
|
|
||
|
|
token_t t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'b');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'c');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_EOF);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(meta_chars) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, ".*+?|()^$");
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_DOT);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_STAR);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_PLUS);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_QUESTION);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_PIPE);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_LPAREN);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_RPAREN);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CARET);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_DOLLAR);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_EOF);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(escaped_chars) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "\\*\\+\\.");
|
||
|
|
|
||
|
|
token_t t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == '*');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == '+');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == '.');
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(character_classes) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "\\d\\w\\s\\D\\W\\S");
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_DIGIT);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_WORD);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_SPACE);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_NDIGIT);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_NWORD);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CLASS_NSPACE);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(bracket_expression) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "[abc]");
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_LBRACKET);
|
||
|
|
|
||
|
|
token_t t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'b');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'c');
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_RBRACKET);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(bracket_range) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "[a-z]");
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_LBRACKET);
|
||
|
|
|
||
|
|
token_t t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_DASH);
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'z');
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_RBRACKET);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(negated_bracket) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "[^a]");
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_LBRACKET);
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_CARET);
|
||
|
|
|
||
|
|
token_t t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_RBRACKET);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(quantifier_braces) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "a{3}");
|
||
|
|
|
||
|
|
token_t t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_LBRACE);
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == '3');
|
||
|
|
|
||
|
|
ASSERT(lexer_next(&lexer).type == TOKEN_RBRACE);
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(peek) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "ab");
|
||
|
|
|
||
|
|
token_t t = lexer_peek(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
t = lexer_peek(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'a');
|
||
|
|
|
||
|
|
t = lexer_peek(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == 'b');
|
||
|
|
}
|
||
|
|
|
||
|
|
TEST(escape_sequences) {
|
||
|
|
lexer_t lexer;
|
||
|
|
lexer_init(&lexer, "\\n\\t\\r");
|
||
|
|
|
||
|
|
token_t t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == '\n');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == '\t');
|
||
|
|
|
||
|
|
t = lexer_next(&lexer);
|
||
|
|
ASSERT(t.type == TOKEN_CHAR && t.value == '\r');
|
||
|
|
}
|
||
|
|
|
||
|
|
int main(void) {
|
||
|
|
printf("lexer tests:\n");
|
||
|
|
|
||
|
|
RUN_TEST(simple_chars);
|
||
|
|
RUN_TEST(meta_chars);
|
||
|
|
RUN_TEST(escaped_chars);
|
||
|
|
RUN_TEST(character_classes);
|
||
|
|
RUN_TEST(bracket_expression);
|
||
|
|
RUN_TEST(bracket_range);
|
||
|
|
RUN_TEST(negated_bracket);
|
||
|
|
RUN_TEST(quantifier_braces);
|
||
|
|
RUN_TEST(peek);
|
||
|
|
RUN_TEST(escape_sequences);
|
||
|
|
|
||
|
|
printf("\nlexer: %d passed, %d failed\n", tests_passed, tests_failed);
|
||
|
|
return tests_failed > 0 ? 1 : 0;
|
||
|
|
}
|