|
/* retoor <retoor@molodetz.nl> */
|
|
#ifndef LOREX_NFA_H
|
|
#define LOREX_NFA_H
|
|
|
|
#include "ast.h"
|
|
#include "lorex.h"
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
|
|
#define EPSILON '\0'
|
|
#define NFA_MAX_TRANSITIONS 256
|
|
|
|
typedef struct nfa_state nfa_state_t;
|
|
|
|
typedef enum {
|
|
TRANS_CHAR,
|
|
TRANS_EPSILON,
|
|
TRANS_DOT,
|
|
TRANS_BRACKET,
|
|
TRANS_CLASS_DIGIT,
|
|
TRANS_CLASS_WORD,
|
|
TRANS_CLASS_SPACE,
|
|
TRANS_CLASS_NDIGIT,
|
|
TRANS_CLASS_NWORD,
|
|
TRANS_CLASS_NSPACE,
|
|
TRANS_GROUP_START,
|
|
TRANS_GROUP_END,
|
|
TRANS_ANCHOR_START,
|
|
TRANS_ANCHOR_END
|
|
} transition_type_t;
|
|
|
|
typedef struct {
|
|
nfa_state_t *target;
|
|
bracket_class_t *bracket;
|
|
transition_type_t type;
|
|
int group_id;
|
|
char value;
|
|
} transition_t;
|
|
|
|
struct nfa_state {
|
|
transition_t *transitions;
|
|
size_t trans_count;
|
|
size_t trans_capacity;
|
|
int id;
|
|
bool accepting;
|
|
};
|
|
|
|
typedef struct {
|
|
nfa_state_t *start;
|
|
nfa_state_t *accept;
|
|
} nfa_fragment_t;
|
|
|
|
typedef struct {
|
|
nfa_state_t **states;
|
|
nfa_state_t *start;
|
|
char *literal_prefix;
|
|
char *literal_suffix;
|
|
size_t state_count;
|
|
size_t capacity;
|
|
size_t prefix_len;
|
|
size_t suffix_len;
|
|
int group_count;
|
|
char single_first_char;
|
|
bool anchored_start;
|
|
bool anchored_end;
|
|
bool first_chars_valid;
|
|
bool is_pure_literal;
|
|
bool has_alt_dispatch;
|
|
unsigned char first_chars[32];
|
|
unsigned char alt_dispatch[256];
|
|
} nfa_t;
|
|
|
|
nfa_t *nfa_create(void);
|
|
void nfa_free(nfa_t *nfa);
|
|
nfa_state_t *nfa_add_state(nfa_t *nfa);
|
|
void nfa_add_transition(nfa_state_t *from, nfa_state_t *to, transition_type_t type, char value);
|
|
void nfa_add_bracket_transition(nfa_state_t *from, nfa_state_t *to, bracket_class_t *bracket);
|
|
void nfa_add_group_transition(nfa_state_t *from, nfa_state_t *to, transition_type_t type, int group_id);
|
|
nfa_t *nfa_from_ast(ast_node_t *ast, lorex_error_t *error);
|
|
|
|
#endif
|