diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/backend/linux.c | 92 | ||||
| -rw-r--r-- | src/backend/x86.c | 75 | ||||
| -rw-r--r-- | src/elf.h | 11 | ||||
| -rw-r--r-- | src/irm.c | 247 | ||||
| -rw-r--r-- | src/irm.h | 58 | ||||
| -rw-r--r-- | src/lex.c | 201 | ||||
| -rw-r--r-- | src/lex.h | 76 | ||||
| -rw-r--r-- | src/main.c | 85 | ||||
| -rw-r--r-- | src/parser.c | 170 | ||||
| -rw-r--r-- | src/parser.h | 25 | ||||
| -rw-r--r-- | src/sv.c | 27 | ||||
| -rw-r--r-- | src/sv.h | 22 | ||||
| -rw-r--r-- | src/symtab.c | 55 | ||||
| -rw-r--r-- | src/symtab.h | 40 |
14 files changed, 1184 insertions, 0 deletions
diff --git a/src/backend/linux.c b/src/backend/linux.c new file mode 100644 index 0000000..dfa5a80 --- /dev/null +++ b/src/backend/linux.c @@ -0,0 +1,92 @@ +#include <backend.h> + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <assert.h> + +#include "../sv.h" + +static FILE *src; +static FILE *dst; + +void die(const char *name, const char *file, size_t line, const char *fmt, ...) +{ + printf("ERROR %s(%s:%d): ", name, file, line); + va_list args; + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + printf("\nCompilation failed!\n"); + exit(1); +} + +void debug(const char *name, const char *file, size_t line, const char *fmt, ...) +{ + printf("LOG %s(%s:%d): ", name, file, line); + va_list args; + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); +} + +struct string_view backend_stream_init_src(const char *path) +{ + assert(path != NULL); + src = fopen(path, "r"); + fseek(src, 0, SEEK_END); + size_t size = ftell(src); + fseek(src, 0, SEEK_SET); + char *buf = malloc(size + 1); + if(fread(buf, size, 1, src) == 0) + DIE("Failed to read the file!\n"); + buf[size] = '\0'; + return (struct string_view){.buf = buf, .len = size}; +} + +void backend_stream_init_dst(const char *path) +{ + dst = fopen(path, "w"); +} + +void backend_stream_close_src() +{ + fclose(src); +} + +void backend_stream_close_dst() +{ + fclose(dst); +} + +void emit8(uint8_t out) +{ + fprintf(dst, "%02x", out); +} + +void emit16(uint16_t out) +{ + uint8_t test = 0; + for(int i = 0; i < 2; i++) { + test = (out >> (i * 8)) & 0xFF; + fprintf(dst, "%02x", test); + } +} + +void emit32(uint32_t out) +{ + uint8_t test = 0; + for(int i = 0; i < 4; i++) { + test = (out >> (i * 8)) & 0xFF; + fprintf(dst, "%02x", test); + } +} + +void emit64(uint64_t out) +{ + uint8_t test = 0; + for(int i = 0; i < 8; i++) { + test = (out >> (i * 8)) & 0xFF; + fprintf(dst, "%02x", test); + } +} diff --git a/src/backend/x86.c b/src/backend/x86.c new file mode 100644 index 0000000..05aafc1 --- /dev/null +++ b/src/backend/x86.c @@ -0,0 +1,75 @@ +#include <x86.h> + +#include <backend.h> + +#define REG_SAFETY(reg) \ + do { \ + if(reg > 15) DIE("Compiler error! Register %d is invalid!" \ + , reg); \ + } while(0) + +// TODO: Consider implementing a modrm backend that is rex-aware. + +//void x86_calc_modrm(uint16_t reg, uint16_t mod) + +void x86_mov_r_i64(uint8_t reg, uint64_t value) +{ + REG_SAFETY(reg); + emit8(REXW); + emit8(0xB8 + reg); + emit64(value); +} + +void x86_mov_rm_i64(uint8_t mode, uint16_t reg) +{ + DIE("TODO"); +} + +void x86_push_i32(uint32_t value) +{ + emit8(0x68); + emit32(value); +} + +/* + * @todo + */ +void x86_push_i16(uint16_t value) +{ + DIE("NOT IMPLEMENTED"); + // 66h + // PUSH 68 + // IMM value +} + +void x86_push_i8(uint8_t value) +{ + emit8(0x6A); + emit8(value); +} + +void x86_push_r64(uint8_t reg) +{ + REG_SAFETY(reg); + emit8(0x50 + reg); +} + +void x86_push_rm64(uint8_t mode, uint8_t r, uint64_t m) +{ + REG_SAFETY(r); + + if(r > 7) { + emit8(REXWR); + } else if (r < 7) { + emit8(REXW); + } + + emit8(0xFF); + + // Calculate MODRM + + // emit MODRM byte + // if mode = 1: + // disp + // done +} diff --git a/src/elf.h b/src/elf.h new file mode 100644 index 0000000..8f98056 --- /dev/null +++ b/src/elf.h @@ -0,0 +1,11 @@ +#ifndef ELF_H +#define ELF_H + +#include <stdint.h> + +struct Elf { + uint8_t magic; + +}; + +#endif diff --git a/src/irm.c b/src/irm.c new file mode 100644 index 0000000..6260ae0 --- /dev/null +++ b/src/irm.c @@ -0,0 +1,247 @@ +#include "irm.h" + +#include <assert.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> + +#include <backend.h> +#include <x86.h> + +#define STACK_SUB(irm, syment, type) \ + do { \ + irm->stackdepth += sizeof(type); \ + syment->loc = irm->stackdepth; \ + syment->len = sizeof(type); \ + while(0) + +// TODO: Add scopes. + +void irm_init(struct irm *irm) +{ + assert(irm != NULL); + irm->storage = malloc(1024 * 1024); // TODO: platform-agnostic + irm->size = (1024 * 1024) / 8; + irm->curr_pos = 0; + irm->state = IRM_GRACEFUL; + symtab_init(&irm->symtab); +} + +void irm_panic(struct irm *irm) +{ + irm->state = IRM_PANIC; +} + +void irm_stmt_enter_scope(struct irm *irm) +{ + assert(irm != NULL); + // Append 1 level. +} + +void irm_stmt_leave_scope(struct irm *irm) +{ + assert(irm != NULL); +} + +void irm_stmt_var_decl(struct irm *irm, size_t *offset) +{ + uint64_t* mem = irm->storage; + + uint64_t ltype = mem[*offset]; + (*offset)++; + + uint64_t symbol_type = mem[*offset]; + if(symbol_type != SYM_VAR) { + DIE("Symbol type invalid! sym:'%x'", symbol_type); + } + (*offset)++; + uint64_t symid = mem[*offset]; + + if(irm->symtab.count < symid) { + DIE("Symbol id: '%d' not found! This is a compiler bug.\n", + symid); + } + + struct symbol_entry *syment = &irm->symtab.entries[symid]; + irm->stackdepth += sizeof(uint64_t); + syment->loc = irm->stackdepth; + + DEBUG("Symbol id: %d\n", symid); + (*offset)++; + (*offset)++; + switch(ltype) { + case VAR_DECL_QWORD: { + uint64_t value = mem[*offset]; + syment->len = sizeof(uint64_t); + (*offset)++; + x86_mov_r_i64(RAX, value); + x86_push_r64(RAX); + break; + } + case VAR_DECL_DWORD: { + uint64_t value = mem[*offset]; + syment->len = sizeof(uint32_t); + (*offset)++; + x86_push_i32(value); + break; + } + case VAR_DECL_WORD: { + uint64_t value = mem[*offset]; + syment->len = sizeof(uint32_t); + (*offset)++; + x86_push_i32(value); // TODO: Replace with immediate 16 instruction + break; + } + case VAR_DECL_BYTE: { + uint64_t value = mem[*offset]; + (*offset)++; + syment->len = sizeof(uint16_t); + x86_push_i8(value); + break; + } + default: + DIE("Invalid type supplied! This is a compiler bug. type: %x.\n" + , ltype); + break; + } +} + +void irm_stmt_var_assign(struct irm *irm, size_t *offset) +{ + assert(irm != NULL); + if(offset == NULL) { + DIE("offset must not be null!"); + } + + (*offset)++; + uint64_t sym_id = irm->storage[*offset]; + if(sym_id != SYM_VAR) { + DIE("Variable assignment only supports a symbol of type variable!"); + } + + (*offset)++; + uint64_t symtab_id = irm->storage[*offset]; + struct symbol_entry *ent = &irm->symtab.entries[symtab_id]; + if(ent->symtype != SYM_VAR) { + DIE("Symbol '%.*s' is not of type variable!", ent->name.len, ent->name.buf); + } + (*offset)++; + uint64_t value_type = irm->storage[*offset]; + switch(value_type) { + case NUMBER64: { // FIX STACK ISSUE + DIE("TODO:: STACK ALGINMENT ISSUE!!"); + (*offset)++; + uint64_t value = irm->storage[*offset]; + DEBUG("Assigned nr64 literal %d to '%.*s'.\n", value, ent->name.len, ent->name.buf); + (*offset)++; + // TODO: mov [rsp - X], value + // mov rax, rsp + emit8(0x48); + emit8(0x8B); // MR + emit8(0xC4); // rsp -> rax + // sub rax, 8 + emit8(0x48); + emit8(0x2d); + emit32(ent->loc); + // mov rbx, value + emit8(0x48); + emit8(0xBB); // b8 + rd + emit64(value); // FALSE VALUE!! FIX! + // mov qword ptr [rax], rbx + emit8(0x48); + emit8(0x89); // MR + emit8(0x18); // rbx -> [rax] + break; + } + default: + DIE("Variable assignment only supports a number literal!"); + } +} + +void irm_stmt_done(struct irm *irm) +{ + if(irm->state == IRM_PANIC) return; + size_t offset = 0; + + while(offset < irm->curr_pos - 1) { + uint64_t type = irm->storage[offset]; + type &= 0xFFFF0000; + + switch(type) { + case VAR_DECL: + irm_stmt_var_decl(irm, &offset); + break; + case VAR_ASSIGN: + irm_stmt_var_assign(irm, &offset); + break; + default: + DIE("Statement identifier not found: %d.", type); + break; + } + DEBUG("Offset: %d, Size: %d\n", offset, irm->curr_pos); + } + if(irm->storage[offset] != STMT_DONE) { + DIE("Statement done was not received! This is a compiler bug."); + } + memset(irm->storage, 0, irm->curr_pos); + irm->curr_pos = 0; +} + +void irm_push(struct irm *irm, uint64_t type) +{ + assert(irm != NULL); + uint64_t *mem = irm->storage + irm->curr_pos; + mem[0] = type; + irm->curr_pos += 1; + DEBUG("added %x to irm.\n", type); +} + +void irm_push_64v(struct irm *irm, uint64_t type, uint64_t value) +{ + assert(irm != NULL); + if(irm->curr_pos + 2 > irm->size) + DIE("IRM OVERFLOW!"); + uint64_t* mem = irm->storage + irm->curr_pos; + mem[0] = type; + mem[1] = value; + irm->curr_pos += 2; + DEBUG("added %d to irm with value %d.\n", type, value); +} + +void irm_push_lookup_sym(struct irm *irm, uint64_t type, struct string_view sym) +{ + assert(irm != NULL); + + uint64_t *mem = irm->storage + irm->curr_pos; + mem[0] = type; + struct symlookup_result ent = symtab_lookup(&irm->symtab, sym); + + if(ent.ent == NULL) { + DIE("'%.*s' not found!", sym.len, sym.buf); + } + + mem[1] = ent.i; + irm->curr_pos+=2; +} + +// TODO: +// 1. Push SYM_TYPE +// 2. Push SYMTAB offset (after adding the sym to it) +void irm_push_sym(struct irm *irm, uint64_t type, struct string_view sym) +{ + assert(irm != NULL); + struct symbol_entry entry; + entry.name = sym; + entry.loc = 0; + entry.symtype = type; + if(symtab_lookup(&irm->symtab, sym).ent != NULL) { + DIE("Symbol '%.*s' already exists.\n", sym.len, sym.buf); + } + size_t symindex = symtab_append(&irm->symtab, entry); + uint64_t* mem = irm->storage + irm->curr_pos; + mem[0] = type; + mem[1] = symindex; + irm->curr_pos += 2; + DEBUG("added %.*s to symbol table.\n", sym.len, sym.buf); +} diff --git a/src/irm.h b/src/irm.h new file mode 100644 index 0000000..d9342f3 --- /dev/null +++ b/src/irm.h @@ -0,0 +1,58 @@ +/** + * @file ir_machine.h + * @description A very basic intermediate representation storage. + * + * @note All intermediate reps are sizeof uint8_t + */ + +#ifndef IRMACHINE_H +#define IRMACHINE_H + +#include <stddef.h> +#include <stdint.h> + +#include "sv.h" +#include "symtab.h" + +// STMT +#define VAR_DECL (uint64_t)0x10000000 +#define VAR_ASSIGN (uint64_t)0x01000000 + +// SYM +#define SYM (uint64_t)0x00100000 + +#define NUMBER64 (uint64_t)0 +#define VAR_DECL_QWORD VAR_DECL | (uint64_t)0x1 +#define VAR_DECL_DWORD VAR_DECL | (uint64_t)0x2 +#define VAR_DECL_WORD VAR_DECL | (uint64_t)0x3 +#define VAR_DECL_BYTE VAR_DECL | (uint64_t)0x4 +#define STMT_DONE (uint64_t)5 +#define SYM_VAR (uint64_t)6 +#define SYM_FUNC (uint64_t)7 + +enum irm_state { + IRM_PANIC, + IRM_GRACEFUL +}; + +struct irm { + uint64_t *storage; // performance, easier to manage. + size_t size; + size_t curr_pos; + enum irm_state state; + struct symbol_table symtab; + size_t stackdepth; + size_t scopedepth; +}; + +void irm_init(struct irm *irm); +void irm_panic(struct irm *irm); +void irm_stmt_enter_scope(struct irm *irm); +void irm_stmt_leave_scope(struct irm *irm); +void irm_stmt_done(struct irm *irm); +void irm_push(struct irm *irm, uint64_t type); +void irm_push_64v(struct irm *irm, uint64_t type, uint64_t value); +void irm_push_lookup_sym(struct irm *irm, uint64_t type, struct string_view sym); +void irm_push_sym(struct irm *irm, uint64_t type, struct string_view sym); + +#endif diff --git a/src/lex.c b/src/lex.c new file mode 100644 index 0000000..ffd804e --- /dev/null +++ b/src/lex.c @@ -0,0 +1,201 @@ +#include "lex.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <backend.h> + +#define KEYWORD(id, str) \ + (struct token) { \ + .type = id, \ + .me = { \ + .buf = str, \ + .len = sizeof(str) \ + } \ + } + +static struct token keywords[] = { + KEYWORD(T_TYPE_G64, "g64"), + KEYWORD(T_TYPE_G32, "g32"), + KEYWORD(T_TYPE_G16, "g16"), + KEYWORD(T_TYPE_G8, "g8"), + KEYWORD(T_ERR, NULL) +}; + +int lex_keyword_lookup(struct string_view sv, uint64_t *res) +{ + for(struct token *ptr = keywords; ptr->type != T_ERR; ptr++) { // Increase safety + if(strcmp(ptr->me.buf, sv.buf) == 0) { + *res = ptr->type; + return 0; + } + } + return 1; +} + +int lex_within_bounds(struct lex_config *lc) +{ + return lc->pos < lc->size; +} + + +int lex_within_obounds(struct lex_config *lc, size_t o) +{ + return (lc->pos + o) < lc->size; +} + +char lex_peek(struct lex_config *lc, size_t o) +{ + if(!lex_within_obounds(lc, o)) return lc->src[lc->pos]; + return lc->src[lc->pos + o]; +} + +char lex_advance(struct lex_config *lc) +{ + if(!lex_within_bounds(lc)) return lc->src[lc->pos]; + lc->pos++; + return lc->src[lc->pos]; +} + +int lex_is_char(char c) +{ + return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'; +} + +int lex_is_digit(char c) +{ + return c >= '0' && c <= '9'; +} + +int lex_is_space(char c) +{ + return (c == ' ') || + (c == '\t') || + (c == '\r') || + (c == '\n'); +} + +void lex_skip_whitespace(struct lex_config *lc) +{ + while(lex_is_space(lc->src[lc->pos])) { + lex_advance(lc); + } +} + +struct token lex_word(struct lex_config *lc) +{ + assert(lc != NULL); + + int i = 0; + int start_pos = lc->pos; + + do { + lex_advance(lc); + i++; + } while(lex_is_char(lc->src[lc->pos]) || lex_is_digit(lc->src[lc->pos])); + + DEBUG("word starts at: %d, ends at %d\n", start_pos, start_pos+i); + + uint64_t tt; + struct string_view str = sv_create(lc->sp, &lc->src[start_pos], i); + + if(lex_keyword_lookup(str, &tt) == 1) { + tt = T_ID; + } + + return (struct token) { + .type = tt, + .me = str, + }; +} + +struct token lex_number(struct lex_config *lc) +{ + assert(lc != NULL); + + int i = 0; + int start_pos = lc->pos; + + while(lex_is_digit(lc->src[lc->pos])) { + lex_advance(lc); + i++; + } + + DEBUG("number starts at: %d, ends at %d\n", start_pos, start_pos+i); + + return (struct token) { + .type = T_NUMBER, + .me = sv_create(lc->sp, &lc->src[start_pos], i) + }; +} + +struct token lex_opcode(struct lex_config *lc) +{ + assert(lc != NULL); + + switch(lc->src[lc->pos]) { + case ';': + lex_advance(lc); + DEBUG("Tokenized semicolon.\n"); + return TOKEN_SEMICOLON; + case '=': + lex_advance(lc); + DEBUG("Tokenized equal sign.\n"); + return TOKEN_EQUAL_SIGN; + case '{': + lex_advance(lc); + DEBUG("Tokenized scope start.\n"); + return TOKEN_SCOPE_START; + case '}': + lex_advance(lc); + DEBUG("Tokenized scope end.\n"); + return TOKEN_SCOPE_END; + + default: + DIE("ERR: Unknown char at position: %d, char: %d\n", lc->pos, lc->src[lc->pos]); + } +} + +void lex_skip_comment(struct lex_config *lc) +{ + assert(lc != NULL); + lex_advance(lc); + if(lc->src[lc->pos] != '/') + DIE("ERR: Comment invalid, it needs to be //."); + + lex_advance(lc); + + while(lc->src[lc->pos] != '\n') + lex_advance(lc); + + lex_advance(lc); +} + +struct token lex_next(struct lex_config *lc) +{ + assert(lc != NULL); + + if(!lex_within_bounds(lc)) + return TOKEN_EOF; + + lex_skip_whitespace(lc); + + while(lc->src[lc->pos] == '/') { + lex_skip_comment(lc); + lex_skip_whitespace(lc); + } + + if(lc->src[lc->pos] == '\0') { + lex_advance(lc); + return TOKEN_EOF; + } + + if(lex_is_char(lc->src[lc->pos])) + return lex_word(lc); + + if(lex_is_digit(lc->src[lc->pos])) + return lex_number(lc); + return lex_opcode(lc); +} diff --git a/src/lex.h b/src/lex.h new file mode 100644 index 0000000..02e427e --- /dev/null +++ b/src/lex.h @@ -0,0 +1,76 @@ +#ifndef LEX_H +#define LEX_H + +#include "sv.h" + +#include <stddef.h> + +#define TOKEN_EOF (struct token){ \ + .type = T_EOF, \ + .me = (struct string_view){.buf = NULL, .len = 0} \ + } + +#define TOKEN_EQUAL_SIGN (struct token){ \ + .type = T_EQUAL_SIGN, \ + .me = (struct string_view){.buf = "=", .len = 1} \ + } + +#define TOKEN_SEMICOLON (struct token){ \ + .type = T_SEMICOL, \ + .me = (struct string_view){.buf = ";", .len = 1} \ + } + +#define TOKEN_SCOPE_START (struct token){ \ + .type = T_SCOPESTART, \ + .me = (struct string_view){.buf = "{", .len = 1} \ + } + +#define TOKEN_SCOPE_END (struct token){ \ + .type = T_SCOPEEND, \ + .me = (struct string_view){.buf = "}", .len = 1} \ + } + +#define T_TYPE (uint64_t)0xFF000000 + +#define T_ERR (uint32_t)0 +#define T_ID (uint32_t)1 +#define T_STRING (uint32_t)2 +#define T_SEMICOL (uint32_t)3 +#define T_NUMBER (uint32_t)4 +#define T_EQUAL_SIGN (uint32_t)5 +#define T_SCOPESTART (uint32_t)6 +#define T_SCOPEEND (uint32_t)7 +#define T_TYPE_G64 (uint64_t)T_TYPE | (uint32_t)8 +#define T_TYPE_G32 (uint64_t)T_TYPE | (uint32_t)9 +#define T_TYPE_G16 (uint64_t)T_TYPE | (uint32_t)10 +#define T_TYPE_G8 (uint64_t)T_TYPE | (uint32_t)11 +#define T_EOF (uint32_t)12 + +struct token { + uint64_t type; + struct string_view me; +}; + +struct lex_config { + char *src; + struct string_pool *sp; + size_t size; + size_t pos; +}; + +int lex_keyword_lookup(struct string_view sv, uint64_t *res); +int lex_is_char(char c); +int lex_is_digit(char c); +int lex_within_bounds(struct lex_config *lc); +int lex_within_obounds(struct lex_config *lc, size_t o); +char lex_peek(struct lex_config *lc, size_t o); +char lex_advance(struct lex_config *lc); +int lex_is_space(char c); +void lex_skip_whitespace(struct lex_config *lc); +struct token lex_word(struct lex_config *lc); +struct token lex_number(struct lex_config *lc); +struct token lex_opcode(struct lex_config *lc); + +struct token lex_next(struct lex_config *lc); + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..720c390 --- /dev/null +++ b/src/main.c @@ -0,0 +1,85 @@ +// The M Programming Language +// It is a subset of C, and named after it as a single letter and from the +// first letter of my OS project Project "Metal". + +// This compiler directly produces x86 machine code. +// It boths compiles the OS, and (will be) part of it in an integrated manner. +// This compiler will be the JIT shell of the userspace (and kernelspace), and +// hot reloading will solely be supported through this language. + +// This is an extremely simple stack-machine one-pass compiler. + +// This will produce binary results that are slower than gcc or other production +// compilers. However, it remains reasonable, which is the goal for a learning +// compiler and OS development. + +// AST node system is not used so that the compiler can be up and running as +// soon as possible. + +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> +#include <stddef.h> +#include <stdlib.h> + +#include <backend.h> + +#include "lex.h" +#include "sv.h" +#include "parser.h" + +int main() +{ + struct string_pool sp; + sp_init(&sp); + + struct token* tokens = malloc(1024*1024); + + struct string_view src_file = backend_stream_init_src("code.m"); + + DEBUG("File, size: %d, content:\n%s", src_file.len, src_file.buf); + + struct lex_config lc; + lc.src = src_file.buf; + lc.size = src_file.len; + lc.sp = &sp; + lc.pos = 0; + + int index = 0; + + DEBUG("---TOKENIZER START---\n"); + do { + tokens[index] = lex_next(&lc); + index++; + } while(tokens[index - 1].type != T_EOF); + + DEBUG("---TOKENIZER END---\n"); + DEBUG("Tokenizer ran %d times!\n", index); + + for(int i = 0; i < index; i++) { + DEBUG("Token %d: type|%x|content|%.*s.\n", + i, tokens[i].type, + tokens[i].me.len, tokens[i].me.buf); + } + + backend_stream_init_dst("code.o"); + + DEBUG("---PARSER START---\n"); + struct parser_config pc; + pc.tokens = tokens; + pc.size = index; + pc.pos = 0; + parser_parse(&pc); + + DEBUG("---PARSER END---\n"); + + if(pc.state == PARSER_GRACEFUL) { + printf("\nBinary compiled!\n"); + exit(0); + } + else { + printf("\nCompilation failed!\n"); + exit(1); + } + return 0; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..5e5485d --- /dev/null +++ b/src/parser.c @@ -0,0 +1,170 @@ +#include "parser.h" + +#include <backend.h> + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> + +#include "irm.h" +#include "lex.h" + +int parser_within_bounds(struct parser_config *pc) +{ + return pc->pos < pc->size && pc->tokens[pc->pos].type != T_EOF; +} + +int parser_within_obounds(struct parser_config *pc, size_t o) +{ + return (pc->pos + o) < pc->size && pc->tokens[pc->pos + o].type != T_EOF; +} + +struct token parser_peek(struct parser_config *pc, size_t o) +{ + assert(pc != NULL); + if(!parser_within_obounds(pc, o)) return pc->tokens[pc->pos]; + return pc->tokens[pc->pos + o]; +} + +void parser_throw(struct parser_config *pc, const char *err) +{ + pc->state = PARSER_PANIC; + DEBUG("%s", err); + irm_panic(&pc->irm); +} + +int parser_expect(struct parser_config *pc, uint64_t type, const char *err) +{ + assert(pc != NULL); + if(parser_peek(pc, 0).type == type) + return 0; + parser_throw(pc, err); + return 1; +} + +int parser_expect_condthrow(struct parser_config *pc, uint64_t type, const char *err) +{ + assert(pc != NULL); + if(parser_peek(pc, 0).type == type) + return 0; + if(err[0] != '\0') + parser_throw(pc, err); + return 1; +} + +struct token parser_advance(struct parser_config *pc) +{ + assert(pc != NULL); + if(!parser_within_bounds(pc)) return pc->tokens[pc->pos]; + pc->pos++; + return pc->tokens[pc->pos]; +} + +void parser_literal(struct parser_config *pc) +{ + assert(pc != NULL); + + struct token t = parser_peek(pc, 0); + + if(t.type == T_NUMBER) { + uint64_t a = atoi(t.me.buf); + irm_push_64v(&pc->irm, NUMBER64, a); + DEBUG("Pushed into the irm: %lld.\n", a); + parser_advance(pc); + return; + } + + parser_throw(pc, "Invalid expression\n"); +} + +uint64_t parser_type_to_irm_inst(struct parser_config *pc, uint64_t type) +{ + switch(type) { + case T_TYPE_G64: + return VAR_DECL_QWORD; + case T_TYPE_G32: + return VAR_DECL_DWORD; + case T_TYPE_G16: + return VAR_DECL_WORD; + case T_TYPE_G8: + return VAR_DECL_BYTE; + default: + parser_throw(pc, "An invalid type detected... defaulting storage to QWORD."); + return VAR_DECL_QWORD; + } +} + +void parser_assign_stmt(struct parser_config *pc) +{ + struct string_view symbol = parser_peek(pc, 0).me; + irm_push(&pc->irm, VAR_ASSIGN); + irm_push_lookup_sym(&pc->irm, SYM_VAR, symbol); + parser_advance(pc); + parser_expect(pc, T_EQUAL_SIGN, "Assignment operation requires an equal sign after the symbol."); + parser_advance(pc); + parser_literal(pc); +} + +void parser_decl_stmt(struct parser_config *pc) +{ + struct token t = parser_peek(pc, 0); + if(!(t.type & T_TYPE)) { + parser_throw(pc, "Declaration statement must begin with a type!"); + parser_advance(pc); + return; + } + + uint64_t type = parser_peek(pc, 0).type; + irm_push(&pc->irm, parser_type_to_irm_inst(pc, type)); + parser_advance(pc); + + parser_expect(pc, T_ID, "An identifier must follow a type in a declaration statement."); + struct string_view symbol_name = parser_peek(pc, 0).me; + irm_push_sym(&pc->irm, SYM_VAR, symbol_name); + parser_advance(pc); + + parser_expect(pc, T_EQUAL_SIGN, "Equal sign must be used to assign a value to a locator"); + parser_advance(pc); + parser_literal(pc); +} + +void parser_stmt(struct parser_config *pc) +{ + struct token t = parser_peek(pc, 0); + if(t.type & T_TYPE) { + parser_decl_stmt(pc); + } else if(t.type == T_ID) { + parser_assign_stmt(pc); + } else { + parser_throw(pc, + "Invalid statement, can only be declaration " + "or assignment."); + } + + irm_push(&pc->irm, STMT_DONE); // PUSH STATEMENT DONE SIGNAL + irm_stmt_done(&pc->irm); // TRIGGER SIGNAL + + parser_expect(pc, T_SEMICOL, "Statement must end with a semicolon.\n"); + parser_advance(pc); +} + +void parse_compound_stmt(struct parser_config *pc) +{ + assert(pc != NULL); + + irm_stmt_enter_scope(&pc->irm); + + irm_stmt_leave_scope(&pc->irm); +} + +void parser_parse(struct parser_config *pc) +{ + assert(pc != NULL); + pc->state = PARSER_GRACEFUL; + + irm_init(&pc->irm); + + while(parser_within_bounds(pc)) { + parser_stmt(pc); + } +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..38c8f94 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,25 @@ +#ifndef PARSER_H +#define PARSER_H + +#include <stddef.h> +#include <stdint.h> + +#include "lex.h" +#include "irm.h" + +enum parser_state { + PARSER_PANIC = -1, + PARSER_GRACEFUL +}; + +struct parser_config { + enum parser_state state; + struct token *tokens; + struct irm irm; + size_t size; + size_t pos; +}; + +void parser_parse(struct parser_config *pc); + +#endif diff --git a/src/sv.c b/src/sv.c new file mode 100644 index 0000000..d3debb3 --- /dev/null +++ b/src/sv.c @@ -0,0 +1,27 @@ +#include "sv.h" + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +void sp_init(struct string_pool *sp) +{ + assert(sp != NULL); + sp->mem = (uintptr_t)malloc(1024*1024); + sp->offset = 0; +} + +struct string_view sv_create(struct string_pool *sp, const char *buf, size_t s) +{ + assert(sp != NULL); + if(buf == NULL) { + printf("Error: cannot create string! Buffer null!"); + exit(1); + } + + char* ptr = (char*)sp->mem + sp->offset; + memcpy((void*)ptr, buf, s); + sp->offset += s; + return (struct string_view) {.buf = ptr, .len = s}; +} diff --git a/src/sv.h b/src/sv.h new file mode 100644 index 0000000..c4c8750 --- /dev/null +++ b/src/sv.h @@ -0,0 +1,22 @@ +#ifndef STRING_H +#define STRING_H + +#include <stdint.h> +#include <stddef.h> + +#define SV(sp, s) sv_create(sp, s, sizeof(s)) + +struct string_pool { + uintptr_t mem; + size_t offset; +}; + +struct string_view { + size_t len; + char *buf; +}; + +void sp_init(struct string_pool *sp); +struct string_view sv_create(struct string_pool *sp, const char *buf, size_t s); + +#endif diff --git a/src/symtab.c b/src/symtab.c new file mode 100644 index 0000000..0034480 --- /dev/null +++ b/src/symtab.c @@ -0,0 +1,55 @@ +#include "symtab.h" + +#include <stdlib.h> +#include <assert.h> +#include <string.h> + +#include "sv.h" + +void symtab_init(struct symbol_table *symtab) +{ + assert(symtab != NULL); + symtab->cap = 150; + symtab->count = 0; + symtab->entries = malloc(150); +} + +void symtab_expand(struct symbol_table *symtab) +{ + assert(symtab != NULL); + size_t newcap = symtab->cap * 2; + void* newbuf = malloc(newcap); + memcpy(newbuf, symtab->entries, symtab->cap); + free(symtab->entries); + symtab->cap = newcap; + symtab->entries = newbuf; +} + +void symtab_free(struct symbol_table *symtab) +{ + assert(symtab != NULL); + symtab->count = 0; + symtab->cap = 0; + free(symtab->entries); +} + +struct symlookup_result symtab_lookup(struct symbol_table *symtab, struct string_view ent) +{ + for(size_t i = 0; i < symtab->count; i++) { + struct symbol_entry *syment = &symtab->entries[i]; + + if(ent.len != syment->name.len) continue; + + if(strncmp(ent.buf, syment->name.buf, ent.len) == 0) { + return (struct symlookup_result){.ent = syment, .i = i}; + } + } + return (struct symlookup_result){.ent = NULL, .i = 0}; +} + +size_t symtab_append(struct symbol_table *symtab, struct symbol_entry syment) +{ + if(symtab->count + 1 >= symtab->cap) symtab_expand(symtab); + symtab->entries[symtab->count++] = syment; + return symtab->count - 1; +} diff --git a/src/symtab.h b/src/symtab.h new file mode 100644 index 0000000..ca5a726 --- /dev/null +++ b/src/symtab.h @@ -0,0 +1,40 @@ +/** + * @file symtab.h + * @description A basic symbol table implementation, dynamic expansion. + * + * @todo Implement hash map. + */ + +#ifndef SYMTAB_H +#define SYMTAB_H + +#include "sv.h" + +struct symbol_entry { + uint64_t symtype; + struct string_view name; + uintptr_t loc; + size_t len; +}; + +struct symbol_table { + struct symbol_entry *entries; + size_t count; + size_t cap; +}; + +void symtab_init(struct symbol_table *symtab); +void symtab_expand(struct symbol_table *symtab); +void symtab_free(struct symbol_table *symtab); + +struct symlookup_result { + struct symbol_entry *ent; + size_t i; +}; + +// Returns a pointer, should be on lookout, will reset on expansion. +struct symlookup_result symtab_lookup(struct symbol_table *symtab, + struct string_view ent); +size_t symtab_append(struct symbol_table *symtab, struct symbol_entry syment); + +#endif |
