|
@ -5,6 +5,7 @@ |
|
|
#include <stdbool.h> |
|
|
#include <stdbool.h> |
|
|
#include <string.h> |
|
|
#include <string.h> |
|
|
#include <assert.h> |
|
|
#include <assert.h> |
|
|
|
|
|
#include "maa.h" |
|
|
#include "tokenize.h" |
|
|
#include "tokenize.h" |
|
|
|
|
|
|
|
|
/*
|
|
|
/*
|
|
@ -22,9 +23,7 @@ static const token_t left_paren = {.token_type=PAREN, .token={.parenthesis="("} |
|
|
|
|
|
|
|
|
static const token_t right_paren = {.token_type=PAREN, .token={.parenthesis=")"} }; |
|
|
static const token_t right_paren = {.token_type=PAREN, .token={.parenthesis=")"} }; |
|
|
|
|
|
|
|
|
static |
|
|
static inline char * |
|
|
inline |
|
|
|
|
|
char * |
|
|
|
|
|
string_head(uint32_t n, char *in, char *out) { |
|
|
string_head(uint32_t n, char *in, char *out) { |
|
|
/* out must be large enough to store the number of characters
|
|
|
/* out must be large enough to store the number of characters
|
|
|
* you want to select from in, plus a byte for the null terminator |
|
|
* you want to select from in, plus a byte for the null terminator |
|
@ -44,9 +43,7 @@ string_head(uint32_t n, char *in, char *out) { |
|
|
return out; |
|
|
return out; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static |
|
|
static inline token_t |
|
|
inline |
|
|
|
|
|
token_t |
|
|
|
|
|
make_token(token_val_t val, tok_t toktype) { |
|
|
make_token(token_val_t val, tok_t toktype) { |
|
|
token_t result; |
|
|
token_t result; |
|
|
result.token_type = toktype; |
|
|
result.token_type = toktype; |
|
@ -102,29 +99,11 @@ pop_token(token_stream *tokens) { |
|
|
len--; |
|
|
len--; |
|
|
assert(tokens->tokens != NULL); |
|
|
assert(tokens->tokens != NULL); |
|
|
|
|
|
|
|
|
switch (tokens->tokens[len].token_type) { |
|
|
|
|
|
case SYMBOL: |
|
|
|
|
|
free(tokens->tokens[len].token.symbol); |
|
|
|
|
|
break; |
|
|
|
|
|
case IDENTIFIER: |
|
|
|
|
|
free(tokens->tokens[len].token.identifier); |
|
|
|
|
|
break; |
|
|
|
|
|
case INTEGER: |
|
|
|
|
|
free(tokens->tokens[len].token.integer); |
|
|
|
|
|
break; |
|
|
|
|
|
case FLOATING: |
|
|
|
|
|
free(tokens->tokens[len].token.floating); |
|
|
|
|
|
break; |
|
|
|
|
|
default: |
|
|
|
|
|
break; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
tokens->length--; |
|
|
tokens->length--; |
|
|
return true; |
|
|
return true; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
inline |
|
|
inline token_t |
|
|
token_t |
|
|
|
|
|
peek_token(token_stream *tokens) { |
|
|
peek_token(token_stream *tokens) { |
|
|
/*
|
|
|
/*
|
|
|
* Check if tokens points to NULL |
|
|
* Check if tokens points to NULL |
|
@ -140,9 +119,7 @@ peek_token(token_stream *tokens) { |
|
|
return tokens->tokens[len-1]; |
|
|
return tokens->tokens[len-1]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static |
|
|
static inline uint32_t |
|
|
inline |
|
|
|
|
|
uint32_t |
|
|
|
|
|
match_int(source_t source, uint32_t begin, const uint32_t length) { |
|
|
match_int(source_t source, uint32_t begin, const uint32_t length) { |
|
|
/* Return false if there is no match
|
|
|
/* Return false if there is no match
|
|
|
* otherwise return the position of the end of the match + 1 |
|
|
* otherwise return the position of the end of the match + 1 |
|
@ -166,9 +143,7 @@ match_int(source_t source, uint32_t begin, const uint32_t length) { |
|
|
return i; |
|
|
return i; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static |
|
|
static inline uint32_t |
|
|
inline |
|
|
|
|
|
uint32_t |
|
|
|
|
|
match_float(source_t source, uint32_t begin, const uint32_t length) { |
|
|
match_float(source_t source, uint32_t begin, const uint32_t length) { |
|
|
/* Return false if there is no match
|
|
|
/* Return false if there is no match
|
|
|
* otherwise: |
|
|
* otherwise: |
|
@ -218,9 +193,7 @@ match_float(source_t source, uint32_t begin, const uint32_t length) { |
|
|
return false; |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static |
|
|
static inline uint32_t |
|
|
inline |
|
|
|
|
|
uint32_t |
|
|
|
|
|
match_identifier(source_t source, uint32_t begin, const uint32_t length) { |
|
|
match_identifier(source_t source, uint32_t begin, const uint32_t length) { |
|
|
|
|
|
|
|
|
/* Return false if there is no match
|
|
|
/* Return false if there is no match
|
|
@ -251,9 +224,7 @@ match_identifier(source_t source, uint32_t begin, const uint32_t length) { |
|
|
return i; |
|
|
return i; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static |
|
|
static inline uint32_t |
|
|
inline |
|
|
|
|
|
uint32_t |
|
|
|
|
|
match_symbol(source_t source, uint32_t begin, const uint32_t length) { |
|
|
match_symbol(source_t source, uint32_t begin, const uint32_t length) { |
|
|
uint32_t i, identifier_match; |
|
|
uint32_t i, identifier_match; |
|
|
assert(source != NULL); |
|
|
assert(source != NULL); |
|
@ -273,9 +244,7 @@ match_symbol(source_t source, uint32_t begin, const uint32_t length) { |
|
|
return false; |
|
|
return false; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static |
|
|
static inline void |
|
|
inline |
|
|
|
|
|
void |
|
|
|
|
|
extract_token(uint32_t position, |
|
|
extract_token(uint32_t position, |
|
|
uint32_t begin, |
|
|
uint32_t begin, |
|
|
source_t source, |
|
|
source_t source, |
|
@ -300,6 +269,8 @@ tokenize(source_t source, uint32_t begin, const uint32_t length) { |
|
|
token_val_t current_token; |
|
|
token_val_t current_token; |
|
|
token_t *tokens = calloc(STACK_SIZE, sizeof(token_t)); |
|
|
token_t *tokens = calloc(STACK_SIZE, sizeof(token_t)); |
|
|
|
|
|
|
|
|
|
|
|
hsh_HashTable token_memo = hsh_create(NULL, NULL); |
|
|
|
|
|
|
|
|
assert(begin == 0); |
|
|
assert(begin == 0); |
|
|
assert(length > 0); |
|
|
assert(length > 0); |
|
|
assert(source != NULL); |
|
|
assert(source != NULL); |
|
@ -307,9 +278,10 @@ tokenize(source_t source, uint32_t begin, const uint32_t length) { |
|
|
token_stack.length = 0; |
|
|
token_stack.length = 0; |
|
|
token_stack.max_length = STACK_SIZE; |
|
|
token_stack.max_length = STACK_SIZE; |
|
|
token_stack.tokens = tokens; |
|
|
token_stack.tokens = tokens; |
|
|
|
|
|
token_stack.memo = token_memo; |
|
|
|
|
|
char lookahead = '\0'; |
|
|
assert(STACK_SIZE > 0); |
|
|
assert(STACK_SIZE > 0); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while (begin <= length && source[begin]) { |
|
|
while (begin <= length && source[begin]) { |
|
|
if (source[begin] == '(') { |
|
|
if (source[begin] == '(') { |
|
|
/*Matched a left paren */ |
|
|
/*Matched a left paren */ |
|
@ -333,48 +305,86 @@ tokenize(source_t source, uint32_t begin, const uint32_t length) { |
|
|
} |
|
|
} |
|
|
else if ((position = match_float(source, begin, length))) { |
|
|
else if ((position = match_float(source, begin, length))) { |
|
|
/* Matched a float */ |
|
|
/* Matched a float */ |
|
|
|
|
|
lookahead = source[position]; |
|
|
|
|
|
source[position] = '\0'; |
|
|
|
|
|
if ((current_token_val = (char *)hsh_retrieve(token_stack.memo, source+begin))) { |
|
|
|
|
|
current_token.floating = current_token_val; |
|
|
|
|
|
source[position] = lookahead; |
|
|
|
|
|
} |
|
|
|
|
|
else { |
|
|
|
|
|
source[position] = lookahead; |
|
|
assert(position > begin); |
|
|
assert(position > begin); |
|
|
|
|
|
|
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
assert(current_token_val != NULL); |
|
|
assert(current_token_val != NULL); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
|
|
|
hsh_insert(token_stack.memo, current_token_val, current_token_val); |
|
|
current_token.floating = current_token_val; |
|
|
current_token.floating = current_token_val; |
|
|
|
|
|
} |
|
|
push_token(&token_stack, make_token(current_token, FLOATING)); |
|
|
push_token(&token_stack, make_token(current_token, FLOATING)); |
|
|
} |
|
|
} |
|
|
else if ((position = match_int(source, begin, length))) { |
|
|
else if ((position = match_int(source, begin, length))) { |
|
|
/* Matched an int */ |
|
|
/* Matched an int */ |
|
|
|
|
|
lookahead = source[position]; |
|
|
|
|
|
source[position] = '\0'; |
|
|
|
|
|
if ((current_token_val = hsh_retrieve(token_stack.memo, source+begin))) { |
|
|
|
|
|
current_token.integer = (char *)current_token_val; |
|
|
|
|
|
source[position] = lookahead; |
|
|
|
|
|
} |
|
|
|
|
|
else { |
|
|
assert(position > begin); |
|
|
assert(position > begin); |
|
|
assert(position <= length); |
|
|
assert(position <= length); |
|
|
|
|
|
|
|
|
|
|
|
source[position] = lookahead; |
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
assert(current_token_val != NULL); |
|
|
assert(current_token_val != NULL); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
|
|
|
hsh_insert(token_stack.memo, current_token_val, current_token_val); |
|
|
current_token.integer = current_token_val; |
|
|
current_token.integer = current_token_val; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
push_token(&token_stack, make_token(current_token, INTEGER)); |
|
|
push_token(&token_stack, make_token(current_token, INTEGER)); |
|
|
} |
|
|
} |
|
|
else if ((position = match_symbol(source, begin, length))) { |
|
|
else if ((position = match_symbol(source, begin, length))) { |
|
|
/* Matched a symbol */ |
|
|
/* Matched a symbol */ |
|
|
|
|
|
lookahead = source[position]; |
|
|
|
|
|
source[position] = '\0'; |
|
|
|
|
|
if ((current_token_val = hsh_retrieve(token_stack.memo, source+begin))) { |
|
|
|
|
|
current_token.symbol = (char *)current_token_val; |
|
|
|
|
|
source[position] = lookahead; |
|
|
|
|
|
} |
|
|
|
|
|
else { |
|
|
assert(position > begin); |
|
|
assert(position > begin); |
|
|
assert(position <= length); |
|
|
assert(position <= length); |
|
|
|
|
|
|
|
|
|
|
|
source[position] = lookahead; |
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
assert(current_token_val != NULL); |
|
|
assert(current_token_val != NULL); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
|
|
|
hsh_insert(token_stack.memo, current_token_val, current_token_val); |
|
|
current_token.symbol = current_token_val; |
|
|
current_token.symbol = current_token_val; |
|
|
|
|
|
} |
|
|
push_token(&token_stack, make_token(current_token, SYMBOL)); |
|
|
push_token(&token_stack, make_token(current_token, SYMBOL)); |
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
else if ((position = match_identifier(source, begin, length))) { |
|
|
else if ((position = match_identifier(source, begin, length))) { |
|
|
|
|
|
/* Matched an identifier */ |
|
|
|
|
|
lookahead = source[position]; |
|
|
|
|
|
source[position] = '\0'; |
|
|
|
|
|
if ((current_token_val = hsh_retrieve(token_stack.memo, source+begin))) { |
|
|
|
|
|
current_token.identifier = (char *)current_token_val; |
|
|
|
|
|
source[position] = lookahead; |
|
|
|
|
|
} |
|
|
|
|
|
else { |
|
|
|
|
|
|
|
|
assert(position > begin); |
|
|
assert(position > begin); |
|
|
assert(position <= length); |
|
|
assert(position <= length); |
|
|
|
|
|
|
|
|
|
|
|
source[position] = lookahead; |
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
current_token_val = calloc(((position - begin) + 1), sizeof(char)); |
|
|
assert(current_token_val != NULL); |
|
|
assert(current_token_val != NULL); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
extract_token(position, begin, source, current_token_val); |
|
|
|
|
|
hsh_insert(token_stack.memo, current_token_val, current_token_val); |
|
|
current_token.identifier = current_token_val; |
|
|
current_token.identifier = current_token_val; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
push_token(&token_stack, make_token(current_token, IDENTIFIER)); |
|
|
push_token(&token_stack, make_token(current_token, IDENTIFIER)); |
|
|
/* Matched an identifier */ |
|
|
/* Matched an identifier */ |
|
@ -385,9 +395,17 @@ tokenize(source_t source, uint32_t begin, const uint32_t length) { |
|
|
} |
|
|
} |
|
|
begin = position; |
|
|
begin = position; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return token_stack; |
|
|
return token_stack; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int free_token(const void *key, const void *val) { |
|
|
|
|
|
/* silence warnings about unused parameters, key and val point to the same data*/ |
|
|
|
|
|
(void)key; |
|
|
|
|
|
free((char *)val); |
|
|
|
|
|
return true; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
bool |
|
|
bool |
|
|
release_tokens(token_stream *tokens) { |
|
|
release_tokens(token_stream *tokens) { |
|
|
/* Iterate through the stack, release each token
|
|
|
/* Iterate through the stack, release each token
|
|
@ -396,10 +414,9 @@ release_tokens(token_stream *tokens) { |
|
|
assert(tokens != NULL); |
|
|
assert(tokens != NULL); |
|
|
assert(tokens->tokens != NULL); |
|
|
assert(tokens->tokens != NULL); |
|
|
assert(tokens->max_length > 0); |
|
|
assert(tokens->max_length > 0); |
|
|
|
|
|
|
|
|
while(tokens->length > 0) { |
|
|
|
|
|
pop_token(tokens); |
|
|
|
|
|
} |
|
|
|
|
|
free(tokens->tokens); |
|
|
free(tokens->tokens); |
|
|
|
|
|
hsh_iterate(tokens->memo, free_token); |
|
|
|
|
|
|
|
|
|
|
|
hsh_destroy(tokens->memo); |
|
|
return true; |
|
|
return true; |
|
|
} |
|
|
} |
|
|