From 58d405b3975043e8680a28741eae38a20af0babe Mon Sep 17 00:00:00 2001 From: 0x221E Date: Fri, 16 Jan 2026 00:53:35 +0100 Subject: [PATCH] Proj-Rewrite: arena allocator with mmap --- .gitmodules | 3 - build.sh | 1 + deps/utils | 1 - ibuild.c | 700 --------------------------------------------------- src/main.c | 26 ++ src/memory.c | 68 +++++ src/memory.h | 42 ++++ src/utils.c | 29 +++ src/utils.h | 17 ++ 9 files changed, 183 insertions(+), 704 deletions(-) delete mode 100644 .gitmodules create mode 100755 build.sh delete mode 160000 deps/utils delete mode 100644 ibuild.c create mode 100644 src/main.c create mode 100644 src/memory.c create mode 100644 src/memory.h create mode 100644 src/utils.c create mode 100644 src/utils.h diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 917e069..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "deps/utils"] - path = deps/utils - url = git@github.com:0x221E/utils.git diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..a6639d3 --- /dev/null +++ b/build.sh @@ -0,0 +1 @@ +gcc -o ibuild src/main.c src/memory.c src/utils.c -fsanitize=address -g -DDEBUG diff --git a/deps/utils b/deps/utils deleted file mode 160000 index 07c0bfe..0000000 --- a/deps/utils +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 07c0bfe478b30873ed23e01c4191f082f55371c5 diff --git a/ibuild.c b/ibuild.c deleted file mode 100644 index 57651d4..0000000 --- a/ibuild.c +++ /dev/null @@ -1,700 +0,0 @@ -#define CONFIG_FILE "IBUILD" -#define MAX_STATEMENTS 50 -#define MAX_TOKENS 200 -#define MAX_FILES_IN_DIR 100 -#define MAX_C_FILES 200 -#define MAX_H_FILES 200 -#define MAX_CPP_FILES 200 - -#include "deps/utils/src/arena_alloc.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DIE(fmt, ...) die_t(__func__, __LINE__, fmt, ##__VA_ARGS__) -#define LOG_ERROR(fmt, ...) log_write(__func__, fmt, ## __VA_ARGS__) -#define LOG_USER(fmt, ...) log_write(NULL, fmt, ## __VA_ARGS__) - -/*** allocated memory ***/ -// There shall never be another heap allocation outside of thy block. -// Each system and/or type of allocation shall have its own memory. -// There shall be no arena managed in this state, if the allocated memory is redundant and will not affect performance. -// Arenas shall never be arena_free'd during program execution, only before exit. Use arena_reset if you wish to achieve -// the same functionality without free() calls. - -typedef struct -{ - Arena file_contents; - Arena configuration; - Arena strings; - Arena parser; - Arena discovery; -} Memory; - -static Memory memory; - -void memory_free() -{ - arena_free(&memory.file_contents); - arena_free(&memory.configuration); - arena_free(&memory.strings); - arena_free(&memory.parser); - arena_free(&memory.discovery); -} - -/*** logging ***/ - -// Developer-reporting error interface, along with steps to reproduce the error. -// Include this log in your issues -void die_t(const char* func, int line, const char* fmt, ...) -{ - assert(func != NULL); - assert(fmt != NULL); - va_list args; - va_start(args, fmt); - printf("ibuild exception(f:%s-l:%d): ", func, line); - vprintf(fmt, args); - printf("\n"); - printf("******Last syscall error: %s\n", strerror(errno)); - va_end(args); - exit(1); -} - -// User-reporting interface -// func_name: MSG -void log_write(const char* func, const char* fmt, ...) -{ - if(fmt == NULL) DIE("fmt must not be null!"); - va_list args; - va_start(args, fmt); - if(func != NULL) - printf("%s: ", func); - vprintf(fmt, args); - printf("\n"); - va_end(args); -} - -/*** file management ***/ - -typedef struct -{ - const char* path; - unsigned char type; -} DirEntry; - -int dirent_get_next(DIR *d, DirEntry *e, const char *p) -{ - assert(d != NULL); - - struct dirent* file = readdir(d); - if(file == NULL) - return 1; - - switch(file->d_type) - { - case DT_DIR: - e->type = DT_DIR; - break; - case DT_REG: - e->type = DT_REG; - break; - default: - return 1; - } - - size_t fs = strlen(file->d_name); - char* buf = (char*)arena_alloc(&memory.strings, fs + 1); - memcpy(buf, file->d_name, fs + 1); - buf[fs] = '\0'; - e->path = buf; - return 0; -} - -size_t dirent_get_recursive(const char* p) -{ - if(p == NULL) DIE("p must not be NULL"); - DIR* dir = opendir(p); - DirEntry current; - - size_t count = 0; - - while(dirent_get_next(dir, ¤t, p) == 0) - { - if(current.type == DT_DIR && (strcmp(current.path, ".") == 0 || strcmp(current.path, "..") == 0 || strcmp(current.path, ".git") == 0)) - continue; - - DirEntry* ptr = (DirEntry*)arena_alloc(&memory.discovery, sizeof(DirEntry)); - size_t size = strlen(current.path) + 2 + strlen(p); - char* buf = (char*)arena_alloc(&memory.strings, size); - snprintf(buf, size, "%s/%s", p, current.path); - current.path = buf; - *ptr = current; - - count++; - if(current.type == DT_DIR) - { - count += dirent_get_recursive(buf); - continue; - } - } - - closedir(dir); - return count; -} - -long file_get_size(FILE* fd) -{ - if(fd == NULL) DIE("fd cannot be NULL."); - if(fseek(fd, 0, SEEK_END) != 0) DIE("file exists, fseek SEEK_END fail."); - long size = ftell(fd); - if(fseek(fd, 0, SEEK_SET) != 0) DIE("file exists, fseek SEEK_SET fail."); - return size; -} - -char* file_get_content(Arena* a, const char* fp) -{ - if(fp == NULL) DIE("fp cannot be NULL"); - FILE* fd = fopen(fp, "r"); - long size = file_get_size(fd); - char* buf = (char*)arena_alloc(a, size + 1); - size_t ret = fread(buf, size, 1, fd); - if (ret != 1) DIE("fread returned %d", ret); - buf[size] = '\0'; - return buf; -} - -bool file_has_r_access(const char* f) -{ - if(access(f, F_OK | R_OK) == 0) - return true; - return false; -} - -/*** discovery phase ***/ - -typedef struct -{ - DirEntry** c_files; - DirEntry** h_files; - DirEntry** cpp_files; - DirEntry** o_files; -} Discovery; - -void discover(Discovery *d) -{ - size_t amount = dirent_get_recursive("."); - if(amount == 0) DIE("Directory is empty! If you have symlinks in the directory, ibuild does not support them."); - - size_t c_c = 0; - size_t h_c = 0; - size_t cpp_c = 0; - - for(size_t i = 0; i < amount; i++) - { - char* current = NULL; - if((current = strrchr(((DirEntry*)memory.discovery.start)[i].path, '.')) == NULL) - continue; - - DirEntry* current_dirent = &((DirEntry*)memory.discovery.start)[i]; - - if(strcmp(current, ".c") == 0) - { - if(c_c > MAX_C_FILES) - DIE("Maximum C file limit was reached with the ibuild project. To extend, you must edit the source code and decide whether there is enough memory."); - - d->c_files[c_c] = current_dirent; - c_c++; - } - else if(strcmp(current, ".h") == 0) - { - if(h_c > MAX_H_FILES) - DIE("Maximum H file limit was reached with the ibuild project. To extend, you must edit the source code and decide whether there is enough memory."); - - d->h_files[h_c] = current_dirent; - h_c++; - - }else if(strcmp(current, ".cpp") == 0) - { - if(cpp_c > MAX_CPP_FILES) - DIE("Maximum CPP file limit was reached with the ibuild project. To extend, you must edit the source code and decide whether there is enough memory."); - - d->cpp_files[cpp_c] = current_dirent; - cpp_c++; - } - } - - LOG_USER("Found %d C files, %d H files, %d CPP files.", c_c, h_c, cpp_c); -} - -/*** compilation process ***/ - -typedef struct -{ - char* compiler_path; - char* build_dir; - char* target_exec; - char* src_dir; - char** src_files; - char* version; - char* dep_searcher; - char* dep_searcher_flags; -} Configuration; - -void launch_compile(Configuration* co) -{ - if(co == NULL) DIE("co has to be valid"); - if(co->compiler_path == NULL) DIE("requires a valid compiler path"); - - int fildes[2]; - pipe(fildes); - pid_t p = fork(); - - if(p<0) - { - DIE("fork error"); - } - else if (p==0) - { - char* args[] = {co->compiler_path, "ibuild.c", "-o", co->target_exec, NULL}; - close(STDOUT_FILENO); - dup(fildes[1]); - close(fildes[0]); - close(fildes[1]); - - if((execvp(co->compiler_path, args)) == -1) - DIE("launch_compile() execvp error"); - exit(0); - } -} - -/*** configuration lexer ***/ - -typedef struct -{ - char *src; -} Tokenizer; - -typedef enum -{ - T_INVALID = -1, - T_IDENTIFIER = 0, - T_STRING = 1, - T_IS = 2, - T_EOF = 3, -} TokenType; - -typedef struct -{ - TokenType type; - char* value; - int line; -} Token; - -#define TOKEN_CONST {T_INVALID, NULL, 0} - -bool is_alpha_uppercase(char s) -{ - return (s >= 'A' && s <= 'Z'); -} - -bool is_alpha_lowercase(char s) -{ - return (s >= 'a' && s <= 'z'); -} - -bool is_whitespace(char s) -{ - return s == ' ' || s == '\t' || s == '\r' || s == '\n'; -} - -bool is_part_of_key(char s) -{ - return is_alpha_uppercase(s) || s == '_'; -} - -size_t skip_group(Tokenizer* t, bool (*func)(char)) -{ - assert(func != NULL); - size_t len = 0; - while(func(*t->src)) - { - len++; - t->src += 1; - } - return len; -} - -Token tokenize_identifier(Tokenizer *t) -{ - char* temp = t->src; - size_t len = skip_group(t, &is_part_of_key); - char* buf = (char*)arena_alloc(&memory.strings, sizeof(char) * len + 1); - memcpy(buf, temp, len); - buf[len] = '\0'; - return (Token){T_IDENTIFIER, buf}; -} - -Token tokenize_string(Tokenizer *t) -{ - t->src += 1; - char* temp = t->src; - size_t len = 0; - - while(*t->src != '"' && *t->src != '\0') - { - len++; - t->src += 1; - } - - if(*t->src == '"') t->src++; - - char* buf = (char*)arena_alloc(&memory.strings, (sizeof(char) * len) + 1); - memcpy(buf, temp, len); - buf[len] = '\0'; - return (Token){T_STRING, buf}; -} - -Token tokenizer_next(Tokenizer* t) -{ - if(is_whitespace(*t->src)) skip_group(t, &is_whitespace); - - if(is_alpha_uppercase(*t->src)) - { - return tokenize_identifier(t); - } - - switch(*t->src) - { - case '"': - return tokenize_string(t); - case '=': - t->src++; - return (Token){T_IS, NULL}; - } - - if(*t->src == '\0') return (Token){T_EOF, NULL}; - - t->src++; - return (Token){T_INVALID, NULL}; -} - -size_t tokenizer_tokenize(Tokenizer* t, Token* ts) -{ - size_t t_count = 0; - while(1) - { - if(t_count > MAX_TOKENS) DIE("Maximum number of tokens reached! \n Could not read configuration file."); - Token token = tokenizer_next(t); - if(token.type == T_INVALID) DIE("illegal token!"); - ts[t_count] = token; - t_count++; - if(token.type == T_EOF) break; - } - return t_count; -} - -/*** configuration parser ***/ - -typedef enum -{ - K_NA = -1, - K_UNKNOWN = 0, - K_COMPILER_PATH = 1, - K_SRC_DIR = 2, - K_SRC_FILES = 3, - K_BUILD_DIR = 4, - K_TARGET_EXEC = 5, - K_DEP_SEARCHER = 6, - K_DEP_SEARCHER_FLAGS = 7, - K_VERSION = 8, -} Key; - -typedef enum -{ - N_STRING = 0, - N_ARRAY = 1, - N_PAIR = 2, -} NodeType; - -typedef struct -{ - NodeType type; - Key key; - void* value; -} Node; - -typedef struct -{ - Token* tokens; - size_t loc; - size_t cap; -} Parser; - -typedef struct -{ - Key key; - const char* value; -} KeyMap; - -static KeyMap keyword_mappings[] = { - {K_COMPILER_PATH, "COMPILER_PATH"}, - {K_SRC_DIR, "SRC_DIR"}, - {K_SRC_FILES, "SRC_FILES"}, - {K_BUILD_DIR, "BUILD_DIR"}, - {K_TARGET_EXEC, "TARGET_EXEC"}, - {K_DEP_SEARCHER, "DEP_SEARCHER"}, - {K_DEP_SEARCHER_FLAGS, "DEP_SEARCHER_FLAGS"}, - {K_UNKNOWN, NULL}, -}; - -Key key_lookup(char* s) -{ - if(s == NULL) return K_UNKNOWN; - - for(const KeyMap* ptr = keyword_mappings; ptr->value != NULL; ptr++) - { - if(strcmp(ptr->value, s) == 0) - { - return ptr->key; - } - } - return K_UNKNOWN; -} - -Token parser_peek(Parser* p, size_t o) -{ - if (p->loc + o >= p->cap) - { - return p->tokens[p->loc - 1]; - } - Token t = p->tokens[p->loc + o]; - return t; -} - -Token parser_previous(Parser* p) -{ - if(p->loc - 1 < 0) - { - DIE("parser logic error."); - } - - return p->tokens[p->loc - 1]; -} - -Token parser_advance(Parser* p) -{ - if(p->loc + 1 >= p->cap) - { - return p->tokens[p->loc]; - } - p->loc++; - return p->tokens[p->loc]; -} - -Token parser_expect(Parser* p, TokenType tt, char* s) -{ - assert(p != NULL); - assert(s != NULL); - - if(parser_peek(p, 1).type != tt) - DIE(s); - - return parser_advance(p); -} - -Node* parser_expression(Parser* p) -{ - Token t = parser_advance(p); - if(t.type == T_STRING) - { - Node* node = (Node*)arena_alloc(&memory.parser, sizeof(Node)); - node->key = K_NA; - node->value = (void*)t.value; - node->type = N_STRING; - parser_advance(p); - return node; - } - - DIE("Syntax error: Invalid expression."); -} - -Node* parser_statement(Parser* p) -{ - Token t = parser_peek(p, 0); - if(t.type == T_IDENTIFIER) - { - Key k = key_lookup(t.value); - if(k == K_UNKNOWN) DIE("Syntax error: Unexpected identifer encountered"); - parser_expect(p, T_IS, "Syntax error: Expected '=' after identifier"); - Node* expression = parser_expression(p); - Node* node = (Node*)arena_alloc(&memory.parser, sizeof(Node)); - node->key = k; - node->value = (void*)expression; - node->type = N_PAIR; - return node; - } - else - { - DIE("Syntax error: expected an identifier, got: %d", t.type); - } -} - -Node* parser_parse_next(Parser* p) -{ - return parser_statement(p); -} - -size_t parser_parse(Parser* p, Node** s) -{ - assert(p != NULL); - assert(s != NULL); - - size_t p_count = 0; - - while(p_count < MAX_STATEMENTS && p->loc < p->cap - 1) - { - s[p_count] = parser_parse_next(p); - p_count++; - } - return p_count; -} - -#ifdef DEBUG - -void debug_parser(Node* n, int indent) -{ - for(int i = 0; i < indent; i++) - { - printf(" "); - } - - switch(n->type) - { - case N_PAIR: - printf("PAIR STATEMENT"); - printf(" Key: %10d\n", n->key); - if(n->value != NULL) - debug_parser((Node*)n->value, 2); - break; - case N_STRING: - printf("STRING EXPRESSION"); - printf(" Value: %s\n", (char*)n->value); - break; - } -} - -#endif - -/*** configuration process ***/ - -void configuration_set_option(Configuration* co, Key k, char* val) -{ - switch(k) - { - case K_NA: DIE("Node with an NA key hit configuration_set_option"); break; - case K_UNKNOWN: DIE("Unknown node encountered!"); break; - case K_COMPILER_PATH: co->compiler_path = val; break; - case K_BUILD_DIR: co->build_dir = val; break; - case K_TARGET_EXEC: co->target_exec = val; break; - case K_DEP_SEARCHER: co->dep_searcher = val; break; - case K_DEP_SEARCHER_FLAGS: co->dep_searcher_flags = val; break; - case K_VERSION: co->version = val; break; - case K_SRC_DIR: co->src_dir = val; break; - case K_SRC_FILES: DIE("Not implemented!"); break; - default: - DIE("Invalid compiler option was supplied!"); - } -} - -void configuration_process_node(Configuration* co, Node* n) -{ - switch(n->type) - { - case N_PAIR: - Key k = n->key; - configuration_set_option(co, k, (char*)((Node*)n->value)->value); - break; - } -} - -Configuration configuration_struct_setup(int len, Node** st) -{ - Configuration config; - config.compiler_path = "/usr/bin/gcc"; - config.build_dir = "."; - config.src_dir = "."; - config.target_exec = "iexec"; - config.dep_searcher = "/usr/bin/gcc"; - config.dep_searcher_flags = "-MM"; - - for (size_t i = 0; i < len; i++) - configuration_process_node(&config, st[i]); - - LOG_USER("Configured IBUILD options."); - return config; -} - -/*** configuration file management ***/ - -Configuration process_config(const char* fn) -{ - if(!file_has_r_access(fn)) - { - return configuration_struct_setup(0, NULL); - } - - LOG_USER("IBUILD Configuration file detected.\n"); - char* config_mem = file_get_content(&memory.file_contents, fn); - - Token tokens[MAX_TOKENS]; - Tokenizer t = { .src = config_mem }; - - size_t t_count = tokenizer_tokenize(&t, tokens); - - if(t_count == 0) - { - return configuration_struct_setup(0, NULL); - } - - Parser parser = {.tokens = tokens, .loc = 0, .cap = t_count }; - Node* statements[MAX_STATEMENTS]; - size_t p_count = parser_parse(&parser, statements); - -#ifdef DEBUG - - for (size_t i = 0; i < p_count; i++) - debug_parser(statements[i], 0); - -#endif - - return configuration_struct_setup(p_count, statements); -} - -#ifndef TESTING -int main(int argc, char** argv) -{ - LOG_USER("Build version: %s", VERSION); - Configuration c = process_config(CONFIG_FILE); - - dirent_get_recursive("."); - - Discovery d; - d.c_files = (DirEntry**)arena_alloc(&memory.discovery, MAX_C_FILES * sizeof(DirEntry*)); - d.h_files = (DirEntry**)arena_alloc(&memory.discovery, MAX_H_FILES * sizeof(DirEntry*)); - discover(&d); - - // Hand over discovery to the dependency checker. - - // Compile - //launch_compile(&c); - memory_free(); - return 0; -} -#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..393f6ac --- /dev/null +++ b/src/main.c @@ -0,0 +1,26 @@ +#include "memory.h" + +#include +#include +#include + +#include "utils.h" + +int main(void) { + Backend b = {NULL, 0}; + backend_initialize(&b); + + Arena a; + a.b = &b; + + char* test = (char*)arena_alloc(&a, 6); + char* test2 = "Noice"; + + memcpy(test, test2, 5); + + test[5] = '\0'; + + printf("That's right. %s\n", test); + + return 0; +} diff --git a/src/memory.c b/src/memory.c new file mode 100644 index 0000000..9f86164 --- /dev/null +++ b/src/memory.c @@ -0,0 +1,68 @@ +#include "memory.h" +#include "utils.h" + +#include +#include +#include +#include + +void backend_initialize(Backend* b) +{ + assert(b != NULL); + size_t cap = MEM_BACKEND_MAX_CAP; + void* mem = mmap(NULL, cap, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if(mem == MAP_FAILED) DIE("mmap failed!"); + b->mem = mem; + b->last = b->mem; + b->len = 0; + b->cap = cap; +} + +void backend_free(Backend* b) +{ + if(munmap(b->mem, b->cap) == -1) DIE("munmap failed!"); +} + +void* backend_reserve(Backend* b) +{ + assert(b != NULL); + void* ptr = b->last; + if(b->len + MEM_ARENA_MAX_CAP >= MEM_BACKEND_MAX_CAP) + DIE("Out of memory, requested %d (current usage %d out of %d).", MEM_ARENA_MAX_CAP, b->len, b->cap); + b->last = (char*)b->last + (int)MEM_ARENA_MAX_CAP; + b->len += MEM_ARENA_MAX_CAP; + return ptr; +} + +void arena_init(Arena* a) +{ + assert(a != NULL); + assert(a->b != NULL); + void* mem = backend_reserve(a->b); + a->mem = mem; + a->cap = MEM_ARENA_MAX_CAP; + a->len = 0; + a->last = a->mem; + a->next = NULL; + LOG_DEBUG("Arena initialized with block @ %p.", a->mem); +} + +void* arena_alloc(Arena* a, size_t len) +{ + assert(a != NULL); + assert(a->b != NULL); + if(a->mem == NULL || a->last == NULL) arena_init(a); + + int len_aligned = (len + 7) & ~7; + + if(a->len + len_aligned > a->cap) + { + DIE("Arena exceeded memory, requested: %d (%d out of %d used)", len, a->len, a->cap); + } + + void* res = a->last; + a->len = len_aligned + a->len; + a->last = (char*)a->last + len_aligned; + LOG_DEBUG("Arena with block @ %p, allocated %d bytes of memory, arena last is now @ %p.", a->mem, len_aligned, a->last); + return res; +} diff --git a/src/memory.h b/src/memory.h new file mode 100644 index 0000000..1e665bf --- /dev/null +++ b/src/memory.h @@ -0,0 +1,42 @@ +#ifndef MEMORY_H +#define MEMORY_H + + +#ifndef MEM_ARENA_MAX_CAP +#define MEM_ARENA_MAX_CAP 1024 * 1024 +#endif + +#ifndef MEM_BACKEND_MAX_CAP +#define MEM_BACKEND_MAX_CAP 1024 * 1024 * 256 +#endif + +#include + +typedef struct +{ + void* mem; + void* last; + size_t len; + size_t cap; +} Backend; + +typedef struct +{ + void* mem; + void* last; + size_t len; + size_t cap; + void* next; + Backend* b; +} Arena; + +void backend_initialize(Backend* b); +void backend_free(Backend* b); + +void* backend_reserve(Backend* b); + +void arena_init(Arena* a); +//void arena_reset(Arena* a); +void* arena_alloc(Arena* a, size_t len); + +#endif diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..f5fbbd8 --- /dev/null +++ b/src/utils.c @@ -0,0 +1,29 @@ +#include "utils.h" + +#include +#include +#include +#include +#include +#include + +void die_f(const char* func, int l, const char* fmt,...) +{ + va_list args; + va_start(args); + printf("IBUILD Error (%d:%s): ", l, func); + vprintf(fmt, args); + printf("\nlast syscall error: \n", strerror(errno)); + va_end(args); + exit(1); +} + +void log_f(const char* func, int l, const char* fmt, ...) +{ + va_list args; + va_start(args); + printf("IBUILD Log (%d:%s): ", l, func); + vprintf(fmt, args); + printf("\n"); + va_end(args); +} diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 0000000..c522986 --- /dev/null +++ b/src/utils.h @@ -0,0 +1,17 @@ +#ifndef UTILS_H +#define UTILS_H + +#define DIE(fmt, ...) die_f(__func__, __LINE__, fmt, ## __VA_ARGS__) + +#define LOG(fmt, ...) log_f(__func__, __LINE__, fmt, ## __VA_ARGS__) + +#ifdef DEBUG +#define LOG_DEBUG(fmt, ...) log_f(__func__, __LINE__, fmt, ## __VA_ARGS__) +#else +#define LOG_DEBUG(...) +#endif + +void die_f(const char* func, int l, const char* fmt, ...); +void log_f(const char* func, int l, const char* fmt, ...); + +#endif