summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author0x221E <0x221E@0xinfinity.dev>2026-04-12 16:24:06 +0200
committer0x221E <0x221E@0xinfinity.dev>2026-04-12 16:24:06 +0200
commit4946ca67cf04845737f0f7f70b5ed27bcfe9a18b (patch)
treee0ce4c11f5b81828da7680143ea444003dd355b3
Initial commitHEADmaster
-rw-r--r--.gitignore1
-rw-r--r--Makefile9
-rw-r--r--code.m11
-rw-r--r--code.o1
-rw-r--r--include/backend.h25
-rw-r--r--include/x86.h36
-rwxr-xr-xmbin0 -> 60528 bytes
-rw-r--r--src/backend/linux.c92
-rw-r--r--src/backend/x86.c75
-rw-r--r--src/elf.h11
-rw-r--r--src/irm.c247
-rw-r--r--src/irm.h58
-rw-r--r--src/lex.c201
-rw-r--r--src/lex.h76
-rw-r--r--src/main.c85
-rw-r--r--src/parser.c170
-rw-r--r--src/parser.h25
-rw-r--r--src/sv.c27
-rw-r--r--src/sv.h22
-rw-r--r--src/symtab.c55
-rw-r--r--src/symtab.h40
21 files changed, 1267 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..df328f7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+code.o\nm
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..877666c
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,9 @@
+BUILD_DIR = ./build
+
+SRC_DIRS := ./src/
+
+SRCS := $(shell find $(SRC_DIRS) -name '*.c')
+
+all:
+ @gcc -g $(SRCS) -o ./m -Iinclude/
+ @echo [CC] Compiled "m".
diff --git a/code.m b/code.m
new file mode 100644
index 0000000..2c36ce2
--- /dev/null
+++ b/code.m
@@ -0,0 +1,11 @@
+g64 test = 15;
+
+g32 test2 = 15;
+
+//test = 15;
+
+//g32 test5 = 20;
+
+//{
+// test = 150;
+//}
diff --git a/code.o b/code.o
new file mode 100644
index 0000000..d1cf3e7
--- /dev/null
+++ b/code.o
@@ -0,0 +1 @@
+48b80f0000000000000050680f000000 \ No newline at end of file
diff --git a/include/backend.h b/include/backend.h
new file mode 100644
index 0000000..42c6e8c
--- /dev/null
+++ b/include/backend.h
@@ -0,0 +1,25 @@
+#ifndef BACKEND_H
+#define BACKEND_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define DEBUG(fmt, ...) debug(__func__, __FILE__, __LINE__, fmt , ## __VA_ARGS__)
+#define DIE(fmt, ...) die(__func__, __FILE__, __LINE__, fmt , ## __VA_ARGS__)
+
+void debug(const char *name, const char *file, size_t line, const char *fmt, ...);
+void die(const char *name, const char *file, size_t line, const char *fmt, ...);
+
+void emit8(uint8_t out);
+void emit16(uint16_t out);
+void emit32(uint32_t out);
+void emit64(uint64_t out);
+
+struct string_view backend_stream_init_src(const char *path);
+void backend_stream_init_dst(const char *path);
+void backend_stream_close_src();
+void backend_stream_close_dst();
+
+void save(const char* path);
+
+#endif
diff --git a/include/x86.h b/include/x86.h
new file mode 100644
index 0000000..6eb8831
--- /dev/null
+++ b/include/x86.h
@@ -0,0 +1,36 @@
+#ifndef X86_H
+#define X86_H
+
+#include <stdint.h>
+
+#define RAX 0
+#define RCX 1
+#define RDX 2
+#define RBX 3
+#define RSP 4
+#define RBP 5
+#define RSI 6
+#define RDI 7
+#define R8 8
+#define R9 9
+#define R10 10
+#define R11 11
+#define R12 12
+#define R13 13
+#define R14 14
+#define R15 15
+
+#define REXW 0b01001000
+#define REXWR 0b01001100
+
+#define MODRM(mod, reg, rm) ((uint8_t)mod << 6) | ((uint8_t)reg << 3) \
+ | ((uint8_t)rm)
+
+void x86_mov_r_i64(uint8_t reg, uint64_t value);
+void x86_push_i32(uint32_t value);
+void x86_push_i16(uint16_t value);
+void x86_push_i8(uint8_t value);
+void x86_push_r64(uint8_t reg);
+void x86_push_rm64(uint8_t mode, uint8_t r, uint64_t m);
+
+#endif
diff --git a/m b/m
new file mode 100755
index 0000000..e35f6ed
--- /dev/null
+++ b/m
Binary files differ
diff --git a/src/backend/linux.c b/src/backend/linux.c
new file mode 100644
index 0000000..dfa5a80
--- /dev/null
+++ b/src/backend/linux.c
@@ -0,0 +1,92 @@
+#include <backend.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "../sv.h"
+
+static FILE *src;
+static FILE *dst;
+
+void die(const char *name, const char *file, size_t line, const char *fmt, ...)
+{
+ printf("ERROR %s(%s:%d): ", name, file, line);
+ va_list args;
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ printf("\nCompilation failed!\n");
+ exit(1);
+}
+
+void debug(const char *name, const char *file, size_t line, const char *fmt, ...)
+{
+ printf("LOG %s(%s:%d): ", name, file, line);
+ va_list args;
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+}
+
+struct string_view backend_stream_init_src(const char *path)
+{
+ assert(path != NULL);
+ src = fopen(path, "r");
+ fseek(src, 0, SEEK_END);
+ size_t size = ftell(src);
+ fseek(src, 0, SEEK_SET);
+ char *buf = malloc(size + 1);
+ if(fread(buf, size, 1, src) == 0)
+ DIE("Failed to read the file!\n");
+ buf[size] = '\0';
+ return (struct string_view){.buf = buf, .len = size};
+}
+
+void backend_stream_init_dst(const char *path)
+{
+ dst = fopen(path, "w");
+}
+
+void backend_stream_close_src()
+{
+ fclose(src);
+}
+
+void backend_stream_close_dst()
+{
+ fclose(dst);
+}
+
+void emit8(uint8_t out)
+{
+ fprintf(dst, "%02x", out);
+}
+
+void emit16(uint16_t out)
+{
+ uint8_t test = 0;
+ for(int i = 0; i < 2; i++) {
+ test = (out >> (i * 8)) & 0xFF;
+ fprintf(dst, "%02x", test);
+ }
+}
+
+void emit32(uint32_t out)
+{
+ uint8_t test = 0;
+ for(int i = 0; i < 4; i++) {
+ test = (out >> (i * 8)) & 0xFF;
+ fprintf(dst, "%02x", test);
+ }
+}
+
+void emit64(uint64_t out)
+{
+ uint8_t test = 0;
+ for(int i = 0; i < 8; i++) {
+ test = (out >> (i * 8)) & 0xFF;
+ fprintf(dst, "%02x", test);
+ }
+}
diff --git a/src/backend/x86.c b/src/backend/x86.c
new file mode 100644
index 0000000..05aafc1
--- /dev/null
+++ b/src/backend/x86.c
@@ -0,0 +1,75 @@
+#include <x86.h>
+
+#include <backend.h>
+
+#define REG_SAFETY(reg) \
+ do { \
+ if(reg > 15) DIE("Compiler error! Register %d is invalid!" \
+ , reg); \
+ } while(0)
+
+// TODO: Consider implementing a modrm backend that is rex-aware.
+
+//void x86_calc_modrm(uint16_t reg, uint16_t mod)
+
+void x86_mov_r_i64(uint8_t reg, uint64_t value)
+{
+ REG_SAFETY(reg);
+ emit8(REXW);
+ emit8(0xB8 + reg);
+ emit64(value);
+}
+
+void x86_mov_rm_i64(uint8_t mode, uint16_t reg)
+{
+ DIE("TODO");
+}
+
+void x86_push_i32(uint32_t value)
+{
+ emit8(0x68);
+ emit32(value);
+}
+
+/*
+ * @todo
+ */
+void x86_push_i16(uint16_t value)
+{
+ DIE("NOT IMPLEMENTED");
+ // 66h
+ // PUSH 68
+ // IMM value
+}
+
+void x86_push_i8(uint8_t value)
+{
+ emit8(0x6A);
+ emit8(value);
+}
+
+void x86_push_r64(uint8_t reg)
+{
+ REG_SAFETY(reg);
+ emit8(0x50 + reg);
+}
+
+void x86_push_rm64(uint8_t mode, uint8_t r, uint64_t m)
+{
+ REG_SAFETY(r);
+
+ if(r > 7) {
+ emit8(REXWR);
+ } else if (r < 7) {
+ emit8(REXW);
+ }
+
+ emit8(0xFF);
+
+ // Calculate MODRM
+
+ // emit MODRM byte
+ // if mode = 1:
+ // disp
+ // done
+}
diff --git a/src/elf.h b/src/elf.h
new file mode 100644
index 0000000..8f98056
--- /dev/null
+++ b/src/elf.h
@@ -0,0 +1,11 @@
+#ifndef ELF_H
+#define ELF_H
+
+#include <stdint.h>
+
+struct Elf {
+ uint8_t magic;
+
+};
+
+#endif
diff --git a/src/irm.c b/src/irm.c
new file mode 100644
index 0000000..6260ae0
--- /dev/null
+++ b/src/irm.c
@@ -0,0 +1,247 @@
+#include "irm.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <backend.h>
+#include <x86.h>
+
+#define STACK_SUB(irm, syment, type) \
+ do { \
+ irm->stackdepth += sizeof(type); \
+ syment->loc = irm->stackdepth; \
+ syment->len = sizeof(type); \
+ while(0)
+
+// TODO: Add scopes.
+
+void irm_init(struct irm *irm)
+{
+ assert(irm != NULL);
+ irm->storage = malloc(1024 * 1024); // TODO: platform-agnostic
+ irm->size = (1024 * 1024) / 8;
+ irm->curr_pos = 0;
+ irm->state = IRM_GRACEFUL;
+ symtab_init(&irm->symtab);
+}
+
+void irm_panic(struct irm *irm)
+{
+ irm->state = IRM_PANIC;
+}
+
+void irm_stmt_enter_scope(struct irm *irm)
+{
+ assert(irm != NULL);
+ // Append 1 level.
+}
+
+void irm_stmt_leave_scope(struct irm *irm)
+{
+ assert(irm != NULL);
+}
+
+void irm_stmt_var_decl(struct irm *irm, size_t *offset)
+{
+ uint64_t* mem = irm->storage;
+
+ uint64_t ltype = mem[*offset];
+ (*offset)++;
+
+ uint64_t symbol_type = mem[*offset];
+ if(symbol_type != SYM_VAR) {
+ DIE("Symbol type invalid! sym:'%x'", symbol_type);
+ }
+ (*offset)++;
+ uint64_t symid = mem[*offset];
+
+ if(irm->symtab.count < symid) {
+ DIE("Symbol id: '%d' not found! This is a compiler bug.\n",
+ symid);
+ }
+
+ struct symbol_entry *syment = &irm->symtab.entries[symid];
+ irm->stackdepth += sizeof(uint64_t);
+ syment->loc = irm->stackdepth;
+
+ DEBUG("Symbol id: %d\n", symid);
+ (*offset)++;
+ (*offset)++;
+ switch(ltype) {
+ case VAR_DECL_QWORD: {
+ uint64_t value = mem[*offset];
+ syment->len = sizeof(uint64_t);
+ (*offset)++;
+ x86_mov_r_i64(RAX, value);
+ x86_push_r64(RAX);
+ break;
+ }
+ case VAR_DECL_DWORD: {
+ uint64_t value = mem[*offset];
+ syment->len = sizeof(uint32_t);
+ (*offset)++;
+ x86_push_i32(value);
+ break;
+ }
+ case VAR_DECL_WORD: {
+ uint64_t value = mem[*offset];
+ syment->len = sizeof(uint32_t);
+ (*offset)++;
+ x86_push_i32(value); // TODO: Replace with immediate 16 instruction
+ break;
+ }
+ case VAR_DECL_BYTE: {
+ uint64_t value = mem[*offset];
+ (*offset)++;
+ syment->len = sizeof(uint16_t);
+ x86_push_i8(value);
+ break;
+ }
+ default:
+ DIE("Invalid type supplied! This is a compiler bug. type: %x.\n"
+ , ltype);
+ break;
+ }
+}
+
+void irm_stmt_var_assign(struct irm *irm, size_t *offset)
+{
+ assert(irm != NULL);
+ if(offset == NULL) {
+ DIE("offset must not be null!");
+ }
+
+ (*offset)++;
+ uint64_t sym_id = irm->storage[*offset];
+ if(sym_id != SYM_VAR) {
+ DIE("Variable assignment only supports a symbol of type variable!");
+ }
+
+ (*offset)++;
+ uint64_t symtab_id = irm->storage[*offset];
+ struct symbol_entry *ent = &irm->symtab.entries[symtab_id];
+ if(ent->symtype != SYM_VAR) {
+ DIE("Symbol '%.*s' is not of type variable!", ent->name.len, ent->name.buf);
+ }
+ (*offset)++;
+ uint64_t value_type = irm->storage[*offset];
+ switch(value_type) {
+ case NUMBER64: { // FIX STACK ISSUE
+ DIE("TODO:: STACK ALGINMENT ISSUE!!");
+ (*offset)++;
+ uint64_t value = irm->storage[*offset];
+ DEBUG("Assigned nr64 literal %d to '%.*s'.\n", value, ent->name.len, ent->name.buf);
+ (*offset)++;
+ // TODO: mov [rsp - X], value
+ // mov rax, rsp
+ emit8(0x48);
+ emit8(0x8B); // MR
+ emit8(0xC4); // rsp -> rax
+ // sub rax, 8
+ emit8(0x48);
+ emit8(0x2d);
+ emit32(ent->loc);
+ // mov rbx, value
+ emit8(0x48);
+ emit8(0xBB); // b8 + rd
+ emit64(value); // FALSE VALUE!! FIX!
+ // mov qword ptr [rax], rbx
+ emit8(0x48);
+ emit8(0x89); // MR
+ emit8(0x18); // rbx -> [rax]
+ break;
+ }
+ default:
+ DIE("Variable assignment only supports a number literal!");
+ }
+}
+
+void irm_stmt_done(struct irm *irm)
+{
+ if(irm->state == IRM_PANIC) return;
+ size_t offset = 0;
+
+ while(offset < irm->curr_pos - 1) {
+ uint64_t type = irm->storage[offset];
+ type &= 0xFFFF0000;
+
+ switch(type) {
+ case VAR_DECL:
+ irm_stmt_var_decl(irm, &offset);
+ break;
+ case VAR_ASSIGN:
+ irm_stmt_var_assign(irm, &offset);
+ break;
+ default:
+ DIE("Statement identifier not found: %d.", type);
+ break;
+ }
+ DEBUG("Offset: %d, Size: %d\n", offset, irm->curr_pos);
+ }
+ if(irm->storage[offset] != STMT_DONE) {
+ DIE("Statement done was not received! This is a compiler bug.");
+ }
+ memset(irm->storage, 0, irm->curr_pos);
+ irm->curr_pos = 0;
+}
+
+void irm_push(struct irm *irm, uint64_t type)
+{
+ assert(irm != NULL);
+ uint64_t *mem = irm->storage + irm->curr_pos;
+ mem[0] = type;
+ irm->curr_pos += 1;
+ DEBUG("added %x to irm.\n", type);
+}
+
+void irm_push_64v(struct irm *irm, uint64_t type, uint64_t value)
+{
+ assert(irm != NULL);
+ if(irm->curr_pos + 2 > irm->size)
+ DIE("IRM OVERFLOW!");
+ uint64_t* mem = irm->storage + irm->curr_pos;
+ mem[0] = type;
+ mem[1] = value;
+ irm->curr_pos += 2;
+ DEBUG("added %d to irm with value %d.\n", type, value);
+}
+
+void irm_push_lookup_sym(struct irm *irm, uint64_t type, struct string_view sym)
+{
+ assert(irm != NULL);
+
+ uint64_t *mem = irm->storage + irm->curr_pos;
+ mem[0] = type;
+ struct symlookup_result ent = symtab_lookup(&irm->symtab, sym);
+
+ if(ent.ent == NULL) {
+ DIE("'%.*s' not found!", sym.len, sym.buf);
+ }
+
+ mem[1] = ent.i;
+ irm->curr_pos+=2;
+}
+
+// TODO:
+// 1. Push SYM_TYPE
+// 2. Push SYMTAB offset (after adding the sym to it)
+void irm_push_sym(struct irm *irm, uint64_t type, struct string_view sym)
+{
+ assert(irm != NULL);
+ struct symbol_entry entry;
+ entry.name = sym;
+ entry.loc = 0;
+ entry.symtype = type;
+ if(symtab_lookup(&irm->symtab, sym).ent != NULL) {
+ DIE("Symbol '%.*s' already exists.\n", sym.len, sym.buf);
+ }
+ size_t symindex = symtab_append(&irm->symtab, entry);
+ uint64_t* mem = irm->storage + irm->curr_pos;
+ mem[0] = type;
+ mem[1] = symindex;
+ irm->curr_pos += 2;
+ DEBUG("added %.*s to symbol table.\n", sym.len, sym.buf);
+}
diff --git a/src/irm.h b/src/irm.h
new file mode 100644
index 0000000..d9342f3
--- /dev/null
+++ b/src/irm.h
@@ -0,0 +1,58 @@
+/**
+ * @file ir_machine.h
+ * @description A very basic intermediate representation storage.
+ *
+ * @note All intermediate reps are sizeof uint8_t
+ */
+
+#ifndef IRMACHINE_H
+#define IRMACHINE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "sv.h"
+#include "symtab.h"
+
+// STMT
+#define VAR_DECL (uint64_t)0x10000000
+#define VAR_ASSIGN (uint64_t)0x01000000
+
+// SYM
+#define SYM (uint64_t)0x00100000
+
+#define NUMBER64 (uint64_t)0
+#define VAR_DECL_QWORD VAR_DECL | (uint64_t)0x1
+#define VAR_DECL_DWORD VAR_DECL | (uint64_t)0x2
+#define VAR_DECL_WORD VAR_DECL | (uint64_t)0x3
+#define VAR_DECL_BYTE VAR_DECL | (uint64_t)0x4
+#define STMT_DONE (uint64_t)5
+#define SYM_VAR (uint64_t)6
+#define SYM_FUNC (uint64_t)7
+
+enum irm_state {
+ IRM_PANIC,
+ IRM_GRACEFUL
+};
+
+struct irm {
+ uint64_t *storage; // performance, easier to manage.
+ size_t size;
+ size_t curr_pos;
+ enum irm_state state;
+ struct symbol_table symtab;
+ size_t stackdepth;
+ size_t scopedepth;
+};
+
+void irm_init(struct irm *irm);
+void irm_panic(struct irm *irm);
+void irm_stmt_enter_scope(struct irm *irm);
+void irm_stmt_leave_scope(struct irm *irm);
+void irm_stmt_done(struct irm *irm);
+void irm_push(struct irm *irm, uint64_t type);
+void irm_push_64v(struct irm *irm, uint64_t type, uint64_t value);
+void irm_push_lookup_sym(struct irm *irm, uint64_t type, struct string_view sym);
+void irm_push_sym(struct irm *irm, uint64_t type, struct string_view sym);
+
+#endif
diff --git a/src/lex.c b/src/lex.c
new file mode 100644
index 0000000..ffd804e
--- /dev/null
+++ b/src/lex.c
@@ -0,0 +1,201 @@
+#include "lex.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <backend.h>
+
+#define KEYWORD(id, str) \
+ (struct token) { \
+ .type = id, \
+ .me = { \
+ .buf = str, \
+ .len = sizeof(str) \
+ } \
+ }
+
+static struct token keywords[] = {
+ KEYWORD(T_TYPE_G64, "g64"),
+ KEYWORD(T_TYPE_G32, "g32"),
+ KEYWORD(T_TYPE_G16, "g16"),
+ KEYWORD(T_TYPE_G8, "g8"),
+ KEYWORD(T_ERR, NULL)
+};
+
+int lex_keyword_lookup(struct string_view sv, uint64_t *res)
+{
+ for(struct token *ptr = keywords; ptr->type != T_ERR; ptr++) { // Increase safety
+ if(strcmp(ptr->me.buf, sv.buf) == 0) {
+ *res = ptr->type;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+int lex_within_bounds(struct lex_config *lc)
+{
+ return lc->pos < lc->size;
+}
+
+
+int lex_within_obounds(struct lex_config *lc, size_t o)
+{
+ return (lc->pos + o) < lc->size;
+}
+
+char lex_peek(struct lex_config *lc, size_t o)
+{
+ if(!lex_within_obounds(lc, o)) return lc->src[lc->pos];
+ return lc->src[lc->pos + o];
+}
+
+char lex_advance(struct lex_config *lc)
+{
+ if(!lex_within_bounds(lc)) return lc->src[lc->pos];
+ lc->pos++;
+ return lc->src[lc->pos];
+}
+
+int lex_is_char(char c)
+{
+ return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
+}
+
+int lex_is_digit(char c)
+{
+ return c >= '0' && c <= '9';
+}
+
+int lex_is_space(char c)
+{
+ return (c == ' ') ||
+ (c == '\t') ||
+ (c == '\r') ||
+ (c == '\n');
+}
+
+void lex_skip_whitespace(struct lex_config *lc)
+{
+ while(lex_is_space(lc->src[lc->pos])) {
+ lex_advance(lc);
+ }
+}
+
+struct token lex_word(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ int i = 0;
+ int start_pos = lc->pos;
+
+ do {
+ lex_advance(lc);
+ i++;
+ } while(lex_is_char(lc->src[lc->pos]) || lex_is_digit(lc->src[lc->pos]));
+
+ DEBUG("word starts at: %d, ends at %d\n", start_pos, start_pos+i);
+
+ uint64_t tt;
+ struct string_view str = sv_create(lc->sp, &lc->src[start_pos], i);
+
+ if(lex_keyword_lookup(str, &tt) == 1) {
+ tt = T_ID;
+ }
+
+ return (struct token) {
+ .type = tt,
+ .me = str,
+ };
+}
+
+struct token lex_number(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ int i = 0;
+ int start_pos = lc->pos;
+
+ while(lex_is_digit(lc->src[lc->pos])) {
+ lex_advance(lc);
+ i++;
+ }
+
+ DEBUG("number starts at: %d, ends at %d\n", start_pos, start_pos+i);
+
+ return (struct token) {
+ .type = T_NUMBER,
+ .me = sv_create(lc->sp, &lc->src[start_pos], i)
+ };
+}
+
+struct token lex_opcode(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ switch(lc->src[lc->pos]) {
+ case ';':
+ lex_advance(lc);
+ DEBUG("Tokenized semicolon.\n");
+ return TOKEN_SEMICOLON;
+ case '=':
+ lex_advance(lc);
+ DEBUG("Tokenized equal sign.\n");
+ return TOKEN_EQUAL_SIGN;
+ case '{':
+ lex_advance(lc);
+ DEBUG("Tokenized scope start.\n");
+ return TOKEN_SCOPE_START;
+ case '}':
+ lex_advance(lc);
+ DEBUG("Tokenized scope end.\n");
+ return TOKEN_SCOPE_END;
+
+ default:
+ DIE("ERR: Unknown char at position: %d, char: %d\n", lc->pos, lc->src[lc->pos]);
+ }
+}
+
+void lex_skip_comment(struct lex_config *lc)
+{
+ assert(lc != NULL);
+ lex_advance(lc);
+ if(lc->src[lc->pos] != '/')
+ DIE("ERR: Comment invalid, it needs to be //.");
+
+ lex_advance(lc);
+
+ while(lc->src[lc->pos] != '\n')
+ lex_advance(lc);
+
+ lex_advance(lc);
+}
+
+struct token lex_next(struct lex_config *lc)
+{
+ assert(lc != NULL);
+
+ if(!lex_within_bounds(lc))
+ return TOKEN_EOF;
+
+ lex_skip_whitespace(lc);
+
+ while(lc->src[lc->pos] == '/') {
+ lex_skip_comment(lc);
+ lex_skip_whitespace(lc);
+ }
+
+ if(lc->src[lc->pos] == '\0') {
+ lex_advance(lc);
+ return TOKEN_EOF;
+ }
+
+ if(lex_is_char(lc->src[lc->pos]))
+ return lex_word(lc);
+
+ if(lex_is_digit(lc->src[lc->pos]))
+ return lex_number(lc);
+ return lex_opcode(lc);
+}
diff --git a/src/lex.h b/src/lex.h
new file mode 100644
index 0000000..02e427e
--- /dev/null
+++ b/src/lex.h
@@ -0,0 +1,76 @@
+#ifndef LEX_H
+#define LEX_H
+
+#include "sv.h"
+
+#include <stddef.h>
+
+#define TOKEN_EOF (struct token){ \
+ .type = T_EOF, \
+ .me = (struct string_view){.buf = NULL, .len = 0} \
+ }
+
+#define TOKEN_EQUAL_SIGN (struct token){ \
+ .type = T_EQUAL_SIGN, \
+ .me = (struct string_view){.buf = "=", .len = 1} \
+ }
+
+#define TOKEN_SEMICOLON (struct token){ \
+ .type = T_SEMICOL, \
+ .me = (struct string_view){.buf = ";", .len = 1} \
+ }
+
+#define TOKEN_SCOPE_START (struct token){ \
+ .type = T_SCOPESTART, \
+ .me = (struct string_view){.buf = "{", .len = 1} \
+ }
+
+#define TOKEN_SCOPE_END (struct token){ \
+ .type = T_SCOPEEND, \
+ .me = (struct string_view){.buf = "}", .len = 1} \
+ }
+
+#define T_TYPE (uint64_t)0xFF000000
+
+#define T_ERR (uint32_t)0
+#define T_ID (uint32_t)1
+#define T_STRING (uint32_t)2
+#define T_SEMICOL (uint32_t)3
+#define T_NUMBER (uint32_t)4
+#define T_EQUAL_SIGN (uint32_t)5
+#define T_SCOPESTART (uint32_t)6
+#define T_SCOPEEND (uint32_t)7
+#define T_TYPE_G64 (uint64_t)T_TYPE | (uint32_t)8
+#define T_TYPE_G32 (uint64_t)T_TYPE | (uint32_t)9
+#define T_TYPE_G16 (uint64_t)T_TYPE | (uint32_t)10
+#define T_TYPE_G8 (uint64_t)T_TYPE | (uint32_t)11
+#define T_EOF (uint32_t)12
+
+struct token {
+ uint64_t type;
+ struct string_view me;
+};
+
+struct lex_config {
+ char *src;
+ struct string_pool *sp;
+ size_t size;
+ size_t pos;
+};
+
+int lex_keyword_lookup(struct string_view sv, uint64_t *res);
+int lex_is_char(char c);
+int lex_is_digit(char c);
+int lex_within_bounds(struct lex_config *lc);
+int lex_within_obounds(struct lex_config *lc, size_t o);
+char lex_peek(struct lex_config *lc, size_t o);
+char lex_advance(struct lex_config *lc);
+int lex_is_space(char c);
+void lex_skip_whitespace(struct lex_config *lc);
+struct token lex_word(struct lex_config *lc);
+struct token lex_number(struct lex_config *lc);
+struct token lex_opcode(struct lex_config *lc);
+
+struct token lex_next(struct lex_config *lc);
+
+#endif
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..720c390
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,85 @@
+// The M Programming Language
+// It is a subset of C, and named after it as a single letter and from the
+// first letter of my OS project Project "Metal".
+
+// This compiler directly produces x86 machine code.
+// It boths compiles the OS, and (will be) part of it in an integrated manner.
+// This compiler will be the JIT shell of the userspace (and kernelspace), and
+// hot reloading will solely be supported through this language.
+
+// This is an extremely simple stack-machine one-pass compiler.
+
+// This will produce binary results that are slower than gcc or other production
+// compilers. However, it remains reasonable, which is the goal for a learning
+// compiler and OS development.
+
+// AST node system is not used so that the compiler can be up and running as
+// soon as possible.
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <backend.h>
+
+#include "lex.h"
+#include "sv.h"
+#include "parser.h"
+
+int main()
+{
+ struct string_pool sp;
+ sp_init(&sp);
+
+ struct token* tokens = malloc(1024*1024);
+
+ struct string_view src_file = backend_stream_init_src("code.m");
+
+ DEBUG("File, size: %d, content:\n%s", src_file.len, src_file.buf);
+
+ struct lex_config lc;
+ lc.src = src_file.buf;
+ lc.size = src_file.len;
+ lc.sp = &sp;
+ lc.pos = 0;
+
+ int index = 0;
+
+ DEBUG("---TOKENIZER START---\n");
+ do {
+ tokens[index] = lex_next(&lc);
+ index++;
+ } while(tokens[index - 1].type != T_EOF);
+
+ DEBUG("---TOKENIZER END---\n");
+ DEBUG("Tokenizer ran %d times!\n", index);
+
+ for(int i = 0; i < index; i++) {
+ DEBUG("Token %d: type|%x|content|%.*s.\n",
+ i, tokens[i].type,
+ tokens[i].me.len, tokens[i].me.buf);
+ }
+
+ backend_stream_init_dst("code.o");
+
+ DEBUG("---PARSER START---\n");
+ struct parser_config pc;
+ pc.tokens = tokens;
+ pc.size = index;
+ pc.pos = 0;
+ parser_parse(&pc);
+
+ DEBUG("---PARSER END---\n");
+
+ if(pc.state == PARSER_GRACEFUL) {
+ printf("\nBinary compiled!\n");
+ exit(0);
+ }
+ else {
+ printf("\nCompilation failed!\n");
+ exit(1);
+ }
+ return 0;
+}
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..5e5485d
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,170 @@
+#include "parser.h"
+
+#include <backend.h>
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "irm.h"
+#include "lex.h"
+
+int parser_within_bounds(struct parser_config *pc)
+{
+ return pc->pos < pc->size && pc->tokens[pc->pos].type != T_EOF;
+}
+
+int parser_within_obounds(struct parser_config *pc, size_t o)
+{
+ return (pc->pos + o) < pc->size && pc->tokens[pc->pos + o].type != T_EOF;
+}
+
+struct token parser_peek(struct parser_config *pc, size_t o)
+{
+ assert(pc != NULL);
+ if(!parser_within_obounds(pc, o)) return pc->tokens[pc->pos];
+ return pc->tokens[pc->pos + o];
+}
+
+void parser_throw(struct parser_config *pc, const char *err)
+{
+ pc->state = PARSER_PANIC;
+ DEBUG("%s", err);
+ irm_panic(&pc->irm);
+}
+
+int parser_expect(struct parser_config *pc, uint64_t type, const char *err)
+{
+ assert(pc != NULL);
+ if(parser_peek(pc, 0).type == type)
+ return 0;
+ parser_throw(pc, err);
+ return 1;
+}
+
+int parser_expect_condthrow(struct parser_config *pc, uint64_t type, const char *err)
+{
+ assert(pc != NULL);
+ if(parser_peek(pc, 0).type == type)
+ return 0;
+ if(err[0] != '\0')
+ parser_throw(pc, err);
+ return 1;
+}
+
+struct token parser_advance(struct parser_config *pc)
+{
+ assert(pc != NULL);
+ if(!parser_within_bounds(pc)) return pc->tokens[pc->pos];
+ pc->pos++;
+ return pc->tokens[pc->pos];
+}
+
+void parser_literal(struct parser_config *pc)
+{
+ assert(pc != NULL);
+
+ struct token t = parser_peek(pc, 0);
+
+ if(t.type == T_NUMBER) {
+ uint64_t a = atoi(t.me.buf);
+ irm_push_64v(&pc->irm, NUMBER64, a);
+ DEBUG("Pushed into the irm: %lld.\n", a);
+ parser_advance(pc);
+ return;
+ }
+
+ parser_throw(pc, "Invalid expression\n");
+}
+
+uint64_t parser_type_to_irm_inst(struct parser_config *pc, uint64_t type)
+{
+ switch(type) {
+ case T_TYPE_G64:
+ return VAR_DECL_QWORD;
+ case T_TYPE_G32:
+ return VAR_DECL_DWORD;
+ case T_TYPE_G16:
+ return VAR_DECL_WORD;
+ case T_TYPE_G8:
+ return VAR_DECL_BYTE;
+ default:
+ parser_throw(pc, "An invalid type detected... defaulting storage to QWORD.");
+ return VAR_DECL_QWORD;
+ }
+}
+
+void parser_assign_stmt(struct parser_config *pc)
+{
+ struct string_view symbol = parser_peek(pc, 0).me;
+ irm_push(&pc->irm, VAR_ASSIGN);
+ irm_push_lookup_sym(&pc->irm, SYM_VAR, symbol);
+ parser_advance(pc);
+ parser_expect(pc, T_EQUAL_SIGN, "Assignment operation requires an equal sign after the symbol.");
+ parser_advance(pc);
+ parser_literal(pc);
+}
+
+void parser_decl_stmt(struct parser_config *pc)
+{
+ struct token t = parser_peek(pc, 0);
+ if(!(t.type & T_TYPE)) {
+ parser_throw(pc, "Declaration statement must begin with a type!");
+ parser_advance(pc);
+ return;
+ }
+
+ uint64_t type = parser_peek(pc, 0).type;
+ irm_push(&pc->irm, parser_type_to_irm_inst(pc, type));
+ parser_advance(pc);
+
+ parser_expect(pc, T_ID, "An identifier must follow a type in a declaration statement.");
+ struct string_view symbol_name = parser_peek(pc, 0).me;
+ irm_push_sym(&pc->irm, SYM_VAR, symbol_name);
+ parser_advance(pc);
+
+ parser_expect(pc, T_EQUAL_SIGN, "Equal sign must be used to assign a value to a locator");
+ parser_advance(pc);
+ parser_literal(pc);
+}
+
+void parser_stmt(struct parser_config *pc)
+{
+ struct token t = parser_peek(pc, 0);
+ if(t.type & T_TYPE) {
+ parser_decl_stmt(pc);
+ } else if(t.type == T_ID) {
+ parser_assign_stmt(pc);
+ } else {
+ parser_throw(pc,
+ "Invalid statement, can only be declaration "
+ "or assignment.");
+ }
+
+ irm_push(&pc->irm, STMT_DONE); // PUSH STATEMENT DONE SIGNAL
+ irm_stmt_done(&pc->irm); // TRIGGER SIGNAL
+
+ parser_expect(pc, T_SEMICOL, "Statement must end with a semicolon.\n");
+ parser_advance(pc);
+}
+
+void parse_compound_stmt(struct parser_config *pc)
+{
+ assert(pc != NULL);
+
+ irm_stmt_enter_scope(&pc->irm);
+
+ irm_stmt_leave_scope(&pc->irm);
+}
+
+void parser_parse(struct parser_config *pc)
+{
+ assert(pc != NULL);
+ pc->state = PARSER_GRACEFUL;
+
+ irm_init(&pc->irm);
+
+ while(parser_within_bounds(pc)) {
+ parser_stmt(pc);
+ }
+}
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000..38c8f94
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,25 @@
+#ifndef PARSER_H
+#define PARSER_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lex.h"
+#include "irm.h"
+
+enum parser_state {
+ PARSER_PANIC = -1,
+ PARSER_GRACEFUL
+};
+
+struct parser_config {
+ enum parser_state state;
+ struct token *tokens;
+ struct irm irm;
+ size_t size;
+ size_t pos;
+};
+
+void parser_parse(struct parser_config *pc);
+
+#endif
diff --git a/src/sv.c b/src/sv.c
new file mode 100644
index 0000000..d3debb3
--- /dev/null
+++ b/src/sv.c
@@ -0,0 +1,27 @@
+#include "sv.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+void sp_init(struct string_pool *sp)
+{
+ assert(sp != NULL);
+ sp->mem = (uintptr_t)malloc(1024*1024);
+ sp->offset = 0;
+}
+
+struct string_view sv_create(struct string_pool *sp, const char *buf, size_t s)
+{
+ assert(sp != NULL);
+ if(buf == NULL) {
+ printf("Error: cannot create string! Buffer null!");
+ exit(1);
+ }
+
+ char* ptr = (char*)sp->mem + sp->offset;
+ memcpy((void*)ptr, buf, s);
+ sp->offset += s;
+ return (struct string_view) {.buf = ptr, .len = s};
+}
diff --git a/src/sv.h b/src/sv.h
new file mode 100644
index 0000000..c4c8750
--- /dev/null
+++ b/src/sv.h
@@ -0,0 +1,22 @@
+#ifndef STRING_H
+#define STRING_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+#define SV(sp, s) sv_create(sp, s, sizeof(s))
+
+struct string_pool {
+ uintptr_t mem;
+ size_t offset;
+};
+
+struct string_view {
+ size_t len;
+ char *buf;
+};
+
+void sp_init(struct string_pool *sp);
+struct string_view sv_create(struct string_pool *sp, const char *buf, size_t s);
+
+#endif
diff --git a/src/symtab.c b/src/symtab.c
new file mode 100644
index 0000000..0034480
--- /dev/null
+++ b/src/symtab.c
@@ -0,0 +1,55 @@
+#include "symtab.h"
+
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+#include "sv.h"
+
+void symtab_init(struct symbol_table *symtab)
+{
+ assert(symtab != NULL);
+ symtab->cap = 150;
+ symtab->count = 0;
+ symtab->entries = malloc(150);
+}
+
+void symtab_expand(struct symbol_table *symtab)
+{
+ assert(symtab != NULL);
+ size_t newcap = symtab->cap * 2;
+ void* newbuf = malloc(newcap);
+ memcpy(newbuf, symtab->entries, symtab->cap);
+ free(symtab->entries);
+ symtab->cap = newcap;
+ symtab->entries = newbuf;
+}
+
+void symtab_free(struct symbol_table *symtab)
+{
+ assert(symtab != NULL);
+ symtab->count = 0;
+ symtab->cap = 0;
+ free(symtab->entries);
+}
+
+struct symlookup_result symtab_lookup(struct symbol_table *symtab, struct string_view ent)
+{
+ for(size_t i = 0; i < symtab->count; i++) {
+ struct symbol_entry *syment = &symtab->entries[i];
+
+ if(ent.len != syment->name.len) continue;
+
+ if(strncmp(ent.buf, syment->name.buf, ent.len) == 0) {
+ return (struct symlookup_result){.ent = syment, .i = i};
+ }
+ }
+ return (struct symlookup_result){.ent = NULL, .i = 0};
+}
+
+size_t symtab_append(struct symbol_table *symtab, struct symbol_entry syment)
+{
+ if(symtab->count + 1 >= symtab->cap) symtab_expand(symtab);
+ symtab->entries[symtab->count++] = syment;
+ return symtab->count - 1;
+}
diff --git a/src/symtab.h b/src/symtab.h
new file mode 100644
index 0000000..ca5a726
--- /dev/null
+++ b/src/symtab.h
@@ -0,0 +1,40 @@
+/**
+ * @file symtab.h
+ * @description A basic symbol table implementation, dynamic expansion.
+ *
+ * @todo Implement hash map.
+ */
+
+#ifndef SYMTAB_H
+#define SYMTAB_H
+
+#include "sv.h"
+
+struct symbol_entry {
+ uint64_t symtype;
+ struct string_view name;
+ uintptr_t loc;
+ size_t len;
+};
+
+struct symbol_table {
+ struct symbol_entry *entries;
+ size_t count;
+ size_t cap;
+};
+
+void symtab_init(struct symbol_table *symtab);
+void symtab_expand(struct symbol_table *symtab);
+void symtab_free(struct symbol_table *symtab);
+
+struct symlookup_result {
+ struct symbol_entry *ent;
+ size_t i;
+};
+
+// Returns a pointer, should be on lookout, will reset on expansion.
+struct symlookup_result symtab_lookup(struct symbol_table *symtab,
+ struct string_view ent);
+size_t symtab_append(struct symbol_table *symtab, struct symbol_entry syment);
+
+#endif