#include "lex.h" #include #include #include #include #include #define KEYWORD(id, str) \ (struct token) { \ .type = id, \ .me = { \ .buf = str, \ .len = sizeof(str) \ } \ } static struct token keywords[] = { KEYWORD(T_TYPE_G64, "g64"), KEYWORD(T_TYPE_G32, "g32"), KEYWORD(T_TYPE_G16, "g16"), KEYWORD(T_TYPE_G8, "g8"), KEYWORD(T_ERR, NULL) }; int lex_keyword_lookup(struct string_view sv, uint64_t *res) { for(struct token *ptr = keywords; ptr->type != T_ERR; ptr++) { // Increase safety if(strcmp(ptr->me.buf, sv.buf) == 0) { *res = ptr->type; return 0; } } return 1; } int lex_within_bounds(struct lex_config *lc) { return lc->pos < lc->size; } int lex_within_obounds(struct lex_config *lc, size_t o) { return (lc->pos + o) < lc->size; } char lex_peek(struct lex_config *lc, size_t o) { if(!lex_within_obounds(lc, o)) return lc->src[lc->pos]; return lc->src[lc->pos + o]; } char lex_advance(struct lex_config *lc) { if(!lex_within_bounds(lc)) return lc->src[lc->pos]; lc->pos++; return lc->src[lc->pos]; } int lex_is_char(char c) { return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; } int lex_is_digit(char c) { return c >= '0' && c <= '9'; } int lex_is_space(char c) { return (c == ' ') || (c == '\t') || (c == '\r') || (c == '\n'); } void lex_skip_whitespace(struct lex_config *lc) { while(lex_is_space(lc->src[lc->pos])) { lex_advance(lc); } } struct token lex_word(struct lex_config *lc) { assert(lc != NULL); int i = 0; int start_pos = lc->pos; do { lex_advance(lc); i++; } while(lex_is_char(lc->src[lc->pos]) || lex_is_digit(lc->src[lc->pos])); DEBUG("word starts at: %d, ends at %d\n", start_pos, start_pos+i); uint64_t tt; struct string_view str = sv_create(lc->sp, &lc->src[start_pos], i); if(lex_keyword_lookup(str, &tt) == 1) { tt = T_ID; } return (struct token) { .type = tt, .me = str, }; } struct token lex_number(struct lex_config *lc) { assert(lc != NULL); int i = 0; int start_pos = lc->pos; while(lex_is_digit(lc->src[lc->pos])) { lex_advance(lc); i++; } DEBUG("number starts at: %d, ends at %d\n", start_pos, start_pos+i); return (struct token) { .type = T_NUMBER, .me = sv_create(lc->sp, &lc->src[start_pos], i) }; } struct token lex_opcode(struct lex_config *lc) { assert(lc != NULL); switch(lc->src[lc->pos]) { case ';': lex_advance(lc); DEBUG("Tokenized semicolon.\n"); return TOKEN_SEMICOLON; case '=': lex_advance(lc); DEBUG("Tokenized equal sign.\n"); return TOKEN_EQUAL_SIGN; case '{': lex_advance(lc); DEBUG("Tokenized scope start.\n"); return TOKEN_SCOPE_START; case '}': lex_advance(lc); DEBUG("Tokenized scope end.\n"); return TOKEN_SCOPE_END; default: DIE("ERR: Unknown char at position: %d, char: %d\n", lc->pos, lc->src[lc->pos]); } } void lex_skip_comment(struct lex_config *lc) { assert(lc != NULL); lex_advance(lc); if(lc->src[lc->pos] != '/') DIE("ERR: Comment invalid, it needs to be //."); lex_advance(lc); while(lc->src[lc->pos] != '\n') lex_advance(lc); lex_advance(lc); } struct token lex_next(struct lex_config *lc) { assert(lc != NULL); if(!lex_within_bounds(lc)) return TOKEN_EOF; lex_skip_whitespace(lc); while(lc->src[lc->pos] == '/') { lex_skip_comment(lc); lex_skip_whitespace(lc); } if(lc->src[lc->pos] == '\0') { lex_advance(lc); return TOKEN_EOF; } if(lex_is_char(lc->src[lc->pos])) return lex_word(lc); if(lex_is_digit(lc->src[lc->pos])) return lex_number(lc); return lex_opcode(lc); }