1 files changed, 201 insertions, 0 deletions
diff --git a/src/lex.c b/src/lex.c
new file mode 100644
index 0000000..ffd804e
--- /dev/null
+++ b/src/lex.c
@@ -0,0 +1,201 @@
+#include "lex.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <backend.h>
+
+#define KEYWORD(id, str)							\
+	(struct token) {							\
+		.type = id,							\
+			.me = {							\
+			.buf = str,						\
+			.len = sizeof(str)					\
+		}			   \
+	}
+
+static struct token keywords[] = {
+	KEYWORD(T_TYPE_G64, "g64"),
+	KEYWORD(T_TYPE_G32, "g32"),
+	KEYWORD(T_TYPE_G16, "g16"),
+	KEYWORD(T_TYPE_G8, "g8"),
+	KEYWORD(T_ERR, NULL)
+};
+
+int lex_keyword_lookup(struct string_view sv, uint64_t *res)
+{
+	for(struct token *ptr = keywords; ptr->type != T_ERR; ptr++) {	// Increase safety	
+		if(strcmp(ptr->me.buf, sv.buf) == 0) {
+			*res = ptr->type;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+int lex_within_bounds(struct lex_config *lc)
+{
+	return lc->pos < lc->size;
+}
+
+
+int lex_within_obounds(struct lex_config *lc, size_t o)
+{
+	return (lc->pos + o) < lc->size;
+}
+
+char lex_peek(struct lex_config *lc, size_t o)
+{
+	if(!lex_within_obounds(lc, o)) return lc->src[lc->pos];
+	return lc->src[lc->pos + o];
+}
+
+char lex_advance(struct lex_config *lc)
+{
+	if(!lex_within_bounds(lc)) return lc->src[lc->pos];
+	lc->pos++;
+	return lc->src[lc->pos];		
+}
+
+int lex_is_char(char c)
+{
+	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
+}
+
+int lex_is_digit(char c)
+{
+	return c >= '0' && c <= '9';
+}
+
+int lex_is_space(char c)
+{
+	return  (c == ' ')  ||
+		(c == '\t') ||
+		(c == '\r') ||
+		(c == '\n');
+}
+
+void lex_skip_whitespace(struct lex_config *lc)
+{	
+	while(lex_is_space(lc->src[lc->pos])) {
+		lex_advance(lc);
+	}
+}
+
+struct token lex_word(struct lex_config *lc)
+{
+	assert(lc != NULL);
+
+	int i = 0;
+	int start_pos = lc->pos;
+
+	do {
+		lex_advance(lc);
+		i++;
+	} while(lex_is_char(lc->src[lc->pos]) || lex_is_digit(lc->src[lc->pos]));
+	
+        DEBUG("word starts at: %d, ends at %d\n", start_pos, start_pos+i);
+
+        uint64_t tt;
+	struct string_view str = sv_create(lc->sp, &lc->src[start_pos], i);
+	
+	if(lex_keyword_lookup(str, &tt) == 1) {
+		tt = T_ID;
+	}
+	   
+	return (struct token) {
+		.type = tt,
+		.me = str,
+	};
+}
+
+struct token lex_number(struct lex_config *lc)
+{
+	assert(lc != NULL);
+
+	int i = 0;
+	int start_pos = lc->pos;
+	
+	while(lex_is_digit(lc->src[lc->pos])) {
+		lex_advance(lc);
+		i++;
+	} 
+	
+        DEBUG("number starts at: %d, ends at %d\n", start_pos, start_pos+i);
+
+	return (struct token) {
+		.type = T_NUMBER,
+		.me = sv_create(lc->sp, &lc->src[start_pos], i)
+	};
+}
+
+struct token lex_opcode(struct lex_config *lc)
+{
+	assert(lc != NULL);
+	
+	switch(lc->src[lc->pos]) {
+	case ';':
+		lex_advance(lc);
+	        DEBUG("Tokenized semicolon.\n");
+		return TOKEN_SEMICOLON;
+	case '=':
+		lex_advance(lc);
+		DEBUG("Tokenized equal sign.\n");
+		return TOKEN_EQUAL_SIGN;
+	case '{':
+		lex_advance(lc);
+		DEBUG("Tokenized scope start.\n");
+		return TOKEN_SCOPE_START;
+	case '}':
+		lex_advance(lc);
+		DEBUG("Tokenized scope end.\n");
+		return TOKEN_SCOPE_END;
+
+	default:
+		DIE("ERR: Unknown char at position: %d, char: %d\n", lc->pos, lc->src[lc->pos]);
+	}
+}
+
+void lex_skip_comment(struct lex_config *lc)
+{
+	assert(lc != NULL);
+	lex_advance(lc);
+	if(lc->src[lc->pos] != '/')
+		DIE("ERR: Comment invalid, it needs to be //.");
+	
+	lex_advance(lc);
+
+	while(lc->src[lc->pos] != '\n')
+		lex_advance(lc);
+	
+	lex_advance(lc);
+}
+
+struct token lex_next(struct lex_config *lc)
+{
+	assert(lc != NULL);
+	
+	if(!lex_within_bounds(lc))
+		return TOKEN_EOF;
+	
+	lex_skip_whitespace(lc);
+
+	while(lc->src[lc->pos] == '/') {
+		lex_skip_comment(lc);
+		lex_skip_whitespace(lc);
+	}
+	
+	if(lc->src[lc->pos] == '\0') {
+		lex_advance(lc);
+		return TOKEN_EOF;
+	}
+	
+	if(lex_is_char(lc->src[lc->pos]))
+		return lex_word(lc);
+
+	if(lex_is_digit(lc->src[lc->pos]))
+		return lex_number(lc);      
+	return lex_opcode(lc);
+}