diff options
Diffstat (limited to 'src/lex.c')
| -rw-r--r-- | src/lex.c | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/src/lex.c b/src/lex.c new file mode 100644 index 0000000..ffd804e --- /dev/null +++ b/src/lex.c @@ -0,0 +1,201 @@ +#include "lex.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <backend.h> + +#define KEYWORD(id, str) \ + (struct token) { \ + .type = id, \ + .me = { \ + .buf = str, \ + .len = sizeof(str) \ + } \ + } + +static struct token keywords[] = { + KEYWORD(T_TYPE_G64, "g64"), + KEYWORD(T_TYPE_G32, "g32"), + KEYWORD(T_TYPE_G16, "g16"), + KEYWORD(T_TYPE_G8, "g8"), + KEYWORD(T_ERR, NULL) +}; + +int lex_keyword_lookup(struct string_view sv, uint64_t *res) +{ + for(struct token *ptr = keywords; ptr->type != T_ERR; ptr++) { // Increase safety + if(strcmp(ptr->me.buf, sv.buf) == 0) { + *res = ptr->type; + return 0; + } + } + return 1; +} + +int lex_within_bounds(struct lex_config *lc) +{ + return lc->pos < lc->size; +} + + +int lex_within_obounds(struct lex_config *lc, size_t o) +{ + return (lc->pos + o) < lc->size; +} + +char lex_peek(struct lex_config *lc, size_t o) +{ + if(!lex_within_obounds(lc, o)) return lc->src[lc->pos]; + return lc->src[lc->pos + o]; +} + +char lex_advance(struct lex_config *lc) +{ + if(!lex_within_bounds(lc)) return lc->src[lc->pos]; + lc->pos++; + return lc->src[lc->pos]; +} + +int lex_is_char(char c) +{ + return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; +} + +int lex_is_digit(char c) +{ + return c >= '0' && c <= '9'; +} + +int lex_is_space(char c) +{ + return (c == ' ') || + (c == '\t') || + (c == '\r') || + (c == '\n'); +} + +void lex_skip_whitespace(struct lex_config *lc) +{ + while(lex_is_space(lc->src[lc->pos])) { + lex_advance(lc); + } +} + +struct token lex_word(struct lex_config *lc) +{ + assert(lc != NULL); + + int i = 0; + int start_pos = lc->pos; + + do { + lex_advance(lc); + i++; + } while(lex_is_char(lc->src[lc->pos]) || lex_is_digit(lc->src[lc->pos])); + + DEBUG("word starts at: %d, ends at %d\n", start_pos, start_pos+i); + + uint64_t tt; + struct string_view str = sv_create(lc->sp, &lc->src[start_pos], i); + + if(lex_keyword_lookup(str, &tt) == 1) { + tt = T_ID; + } + + return (struct token) { + .type = tt, + .me = str, + }; +} + +struct token lex_number(struct lex_config *lc) +{ + assert(lc != NULL); + + int i = 0; + int start_pos = lc->pos; + + while(lex_is_digit(lc->src[lc->pos])) { + lex_advance(lc); + i++; + } + + DEBUG("number starts at: %d, ends at %d\n", start_pos, start_pos+i); + + return (struct token) { + .type = T_NUMBER, + .me = sv_create(lc->sp, &lc->src[start_pos], i) + }; +} + +struct token lex_opcode(struct lex_config *lc) +{ + assert(lc != NULL); + + switch(lc->src[lc->pos]) { + case ';': + lex_advance(lc); + DEBUG("Tokenized semicolon.\n"); + return TOKEN_SEMICOLON; + case '=': + lex_advance(lc); + DEBUG("Tokenized equal sign.\n"); + return TOKEN_EQUAL_SIGN; + case '{': + lex_advance(lc); + DEBUG("Tokenized scope start.\n"); + return TOKEN_SCOPE_START; + case '}': + lex_advance(lc); + DEBUG("Tokenized scope end.\n"); + return TOKEN_SCOPE_END; + + default: + DIE("ERR: Unknown char at position: %d, char: %d\n", lc->pos, lc->src[lc->pos]); + } +} + +void lex_skip_comment(struct lex_config *lc) +{ + assert(lc != NULL); + lex_advance(lc); + if(lc->src[lc->pos] != '/') + DIE("ERR: Comment invalid, it needs to be //."); + + lex_advance(lc); + + while(lc->src[lc->pos] != '\n') + lex_advance(lc); + + lex_advance(lc); +} + +struct token lex_next(struct lex_config *lc) +{ + assert(lc != NULL); + + if(!lex_within_bounds(lc)) + return TOKEN_EOF; + + lex_skip_whitespace(lc); + + while(lc->src[lc->pos] == '/') { + lex_skip_comment(lc); + lex_skip_whitespace(lc); + } + + if(lc->src[lc->pos] == '\0') { + lex_advance(lc); + return TOKEN_EOF; + } + + if(lex_is_char(lc->src[lc->pos])) + return lex_word(lc); + + if(lex_is_digit(lc->src[lc->pos])) + return lex_number(lc); + return lex_opcode(lc); +} |
