summaryrefslogtreecommitdiff
path: root/src/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lex.c')
-rw-r--r--src/lex.c201
1 files changed, 201 insertions, 0 deletions
diff --git a/src/lex.c b/src/lex.c
new file mode 100644
index 0000000..ffd804e
--- /dev/null
+++ b/src/lex.c
@@ -0,0 +1,201 @@
+#include "lex.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <backend.h>
+
+#define KEYWORD(id, str) \
+ (struct token) { \
+ .type = id, \
+ .me = { \
+ .buf = str, \
+ .len = sizeof(str) \
+ } \
+ }
+
+static struct token keywords[] = {
+ KEYWORD(T_TYPE_G64, "g64"),
+ KEYWORD(T_TYPE_G32, "g32"),
+ KEYWORD(T_TYPE_G16, "g16"),
+ KEYWORD(T_TYPE_G8, "g8"),
+ KEYWORD(T_ERR, NULL)
+};
+
+int lex_keyword_lookup(struct string_view sv, uint64_t *res)
+{
+ for(struct token *ptr = keywords; ptr->type != T_ERR; ptr++) { // Increase safety
+ if(strcmp(ptr->me.buf, sv.buf) == 0) {
+ *res = ptr->type;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+int lex_within_bounds(struct lex_config *lc)
+{
+ return lc->pos < lc->size;
+}
+
+
+int lex_within_obounds(struct lex_config *lc, size_t o)
+{
+ return (lc->pos + o) < lc->size;
+}
+
+char lex_peek(struct lex_config *lc, size_t o)
+{
+ if(!lex_within_obounds(lc, o)) return lc->src[lc->pos];
+ return lc->src[lc->pos + o];
+}
+
+char lex_advance(struct lex_config *lc)
+{
+ if(!lex_within_bounds(lc)) return lc->src[lc->pos];
+ lc->pos++;
+ return lc->src[lc->pos];
+}
+
+int lex_is_char(char c)
+{
+ return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
+}
+
+int lex_is_digit(char c)
+{
+ return c >= '0' && c <= '9';
+}
+
+int lex_is_space(char c)
+{
+ return (c == ' ') ||
+ (c == '\t') ||
+ (c == '\r') ||
+ (c == '\n');
+}
+
+void lex_skip_whitespace(struct lex_config *lc)
+{
+ while(lex_is_space(lc->src[lc->pos])) {
+ lex_advance(lc);
+ }
+}
+
+struct token lex_word(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ int i = 0;
+ int start_pos = lc->pos;
+
+ do {
+ lex_advance(lc);
+ i++;
+ } while(lex_is_char(lc->src[lc->pos]) || lex_is_digit(lc->src[lc->pos]));
+
+ DEBUG("word starts at: %d, ends at %d\n", start_pos, start_pos+i);
+
+ uint64_t tt;
+ struct string_view str = sv_create(lc->sp, &lc->src[start_pos], i);
+
+ if(lex_keyword_lookup(str, &tt) == 1) {
+ tt = T_ID;
+ }
+
+ return (struct token) {
+ .type = tt,
+ .me = str,
+ };
+}
+
+struct token lex_number(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ int i = 0;
+ int start_pos = lc->pos;
+
+ while(lex_is_digit(lc->src[lc->pos])) {
+ lex_advance(lc);
+ i++;
+ }
+
+ DEBUG("number starts at: %d, ends at %d\n", start_pos, start_pos+i);
+
+ return (struct token) {
+ .type = T_NUMBER,
+ .me = sv_create(lc->sp, &lc->src[start_pos], i)
+ };
+}
+
+struct token lex_opcode(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ switch(lc->src[lc->pos]) {
+ case ';':
+ lex_advance(lc);
+ DEBUG("Tokenized semicolon.\n");
+ return TOKEN_SEMICOLON;
+ case '=':
+ lex_advance(lc);
+ DEBUG("Tokenized equal sign.\n");
+ return TOKEN_EQUAL_SIGN;
+ case '{':
+ lex_advance(lc);
+ DEBUG("Tokenized scope start.\n");
+ return TOKEN_SCOPE_START;
+ case '}':
+ lex_advance(lc);
+ DEBUG("Tokenized scope end.\n");
+ return TOKEN_SCOPE_END;
+
+ default:
+ DIE("ERR: Unknown char at position: %d, char: %d\n", lc->pos, lc->src[lc->pos]);
+ }
+}
+
+void lex_skip_comment(struct lex_config *lc)
+{
+ assert(lc != NULL);
+ lex_advance(lc);
+ if(lc->src[lc->pos] != '/')
+ DIE("ERR: Comment invalid, it needs to be //.");
+
+ lex_advance(lc);
+
+ while(lc->src[lc->pos] != '\n')
+ lex_advance(lc);
+
+ lex_advance(lc);
+}
+
+struct token lex_next(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ if(!lex_within_bounds(lc))
+ return TOKEN_EOF;
+
+ lex_skip_whitespace(lc);
+
+ while(lc->src[lc->pos] == '/') {
+ lex_skip_comment(lc);
+ lex_skip_whitespace(lc);
+ }
+
+ if(lc->src[lc->pos] == '\0') {
+ lex_advance(lc);
+ return TOKEN_EOF;
+ }
+
+ if(lex_is_char(lc->src[lc->pos]))
+ return lex_word(lc);
+
+ if(lex_is_digit(lc->src[lc->pos]))
+ return lex_number(lc);
+ return lex_opcode(lc);
+}