From efd764b9077b101fb02b4b484766be8ae81f2a51 Mon Sep 17 00:00:00 2001 From: Ludovic 'Archivist' Lagouardette Date: Tue, 21 May 2024 19:24:55 +0200 Subject: [PATCH] Added garbage collection (and a memory corruption bug) --- ink.h | 83 +++++++++++++++++-- lib.c | 221 ++++++++++++++++++++++++++++++++++++++++++++++++- main.c | 3 +- test/test02.nk | 2 + test/test03.nk | 10 ++- test/test04.nk | 2 +- 6 files changed, 308 insertions(+), 13 deletions(-) diff --git a/ink.h b/ink.h index 8846c9b..0ef9278 100644 --- a/ink.h +++ b/ink.h @@ -1,6 +1,7 @@ #pragma once #include "stddef.h" + /** * Represents the natively defined type of integers */ @@ -85,16 +86,31 @@ struct ink_routine { void *routine_userdata; }; +/** + * Contains a list of element on which garbage collection is to not be performed + */ struct ink_collection_list { - struct elem* elems; - size_t size; + struct elem* elements; + size_t count; }; +struct element_slab { + void* data; + int uses; + int in_use; +}; + +/** + * Contains all the data for every element of any type and its garbage collection information. + */ struct ink_type { - const char* name; - void* elements; - void (*collect)(void*); - struct ink_collection_list (*gc)(void*); + const char* name; //< The name of the type + int element_size; //< The size of individual elements of the type, 0 for int adjacent, negative for unmanaged size + struct element_slab* elements; //< The elements that are still live + int elements_top; //< The top of the elements list + int elements_capacity; //< The allocated capacity of the elements list + void (*collect)(struct context*,void*); //< The "destructor" of the object + struct ink_collection_list (*gc)(struct context*,void*); //< A function that returns an in-interpreter allocated list of elem references within the object }; /** @@ -136,8 +152,9 @@ struct context { char **lex_reserved_words; int lex_reserved_words_capacity; int lex_reserved_words_top; - - unsigned int steps; + + unsigned int steps; + unsigned int collections; /** * Can be set to any data that is convenient to the user to track and use within natively defined functions @@ -251,6 +268,56 @@ void ink_pop_fn(struct context *ctx); */ void ink_pop(struct context *ctx); +/** + * Declares a new type that can be stored within the interpreter + * @param ctx The context in which to add the file + * @param type_name The name of the type we want to add + * @param size The size in bytes of the type to add, size of 0 mean no size, in which case the type is adjacent to C int, negative size means that the memory is not managed by the interpreter. + * @param collect A "destructor" function for the data + * @param gc A function that returns a list (allocated with the `inner_malloc`) of all the elements this element holds references to + * @return if positive, a new type id, if negative an error value + * @internal user defined type ids minimal value is 15, we keep the first 16 types as reserved, just like negative type ids + */ +int ink_new_type( + struct context* ctx, + const char* type_name, + int size, + void (*collect)(struct context*,void*), + struct ink_collection_list (*gc)(struct context*,void*) +); + +/** + * + * @param ctx The context of the interpreter + * @param ref The in-interpreter reference + * @return A pointer to the created value or NULL if it can't be found or has a size of 0 + */ +void* ink_get_value(struct context* ctx, struct elem ref); + +/** + * Builds a native type from the provided memory by copying it using memcpy + * @param ctx The context in which we operate + * @param type_id The type_id to use + * @param ptr The pointer from which to copy + * @return The in-interpreter reference of the newly created element + */ +struct elem ink_make_native(struct context* ctx, int type_id, void* ptr); + +/** + * Builds a transparent type from the provided pointer + * @param ctx The context in which we operate + * @param type_id The type_id to use + * @param ptr The pointer + * @return The in-interpreter reference of the newly created element + */ +struct elem ink_make_transparent(struct context* ctx, int type_id, void* ptr); + +/** + * Launch the mark and sweep garbage collection + * @param ctx The context to clean + */ +void ink_gc(struct context* ctx); + #ifdef __cplusplus }; #endif \ No newline at end of file diff --git a/lib.c b/lib.c index e0cc0a4..c5c5835 100644 --- a/lib.c +++ b/lib.c @@ -21,6 +21,9 @@ #define _KEYWORD_INK_END "end" #define _KEYWORD_INK_RETURN "return" +#define min(x, y) ((x) > (y) ? (y) : (x)) +#define max(x, y) ((x) < (y) ? (y) : (x)) + struct label { int active; int dest; @@ -271,6 +274,9 @@ struct context* ink_make_context(void*(*malloc)(size_t), void*(*realloc)(void*, ctx->routines = NULL; ctx->routines_capacity = 0; ctx->routines_top = 0; + ctx->types = NULL; + ctx->types_capacity = 0; + ctx->types_top = 0; ctx->native_words = NULL; ctx->native_words_capacity = 0; ctx->native_words_top = 0; @@ -280,7 +286,8 @@ struct context* ink_make_context(void*(*malloc)(size_t), void*(*realloc)(void*, ctx->lex_reserved_words = NULL; ctx->lex_reserved_words_capacity = 0; ctx->lex_reserved_words_top = 0; - ctx->steps = 0; + ctx->collections = 0; + ctx->steps = 0; return ctx; } @@ -791,6 +798,172 @@ int ink_step_everyone(struct context* pContext) { return 0; } +int ink_new_type( + struct context* ctx, + const char* type_name, + int size, + void (*collect)(struct context*,void*), + struct ink_collection_list (*gc)(struct context*,void*) +) { + if(ctx->panic) return -128; + if(ctx->types == NULL) { + ctx->types = ctx->inner_malloc(sizeof(struct ink_type) * 8); + ctx->types_top = 0; + ctx->types_capacity = 8; + } else if(ctx->types_top == ctx->types_capacity) { + int new_count = (ctx->types_capacity + ctx->types_capacity/2); + void* renewed = ctx->inner_realloc(ctx->types, sizeof(struct ink_type) * new_count); + if(renewed == NULL) { + return -129; + } else { + ctx->types = renewed; + ctx->types_capacity = new_count; + } + } + ctx->types[ctx->types_top].name = type_name; + ctx->types[ctx->types_top].element_size = size; + ctx->types[ctx->types_top].elements = NULL; + ctx->types[ctx->types_top].elements_top = 0; + ctx->types[ctx->types_top].elements_capacity = 0; + ctx->types[ctx->types_top].collect = collect; + ctx->types[ctx->types_top].gc = gc; + + ctx->types_top++; + // Satisfying the minimal value requirement + return ctx->types_top - 1 + 16; +} + +static struct element_slab* ink_get_value_link(struct context* ctx, struct elem ref) { + if(ref.type < 16) return NULL; + int type_id = ref.type - 16; + if(type_id >= ctx->types_top) return NULL; + if(ctx->types[type_id].element_size == 0) return NULL; + if(ref.value < 0) return NULL; + if(ref.value >= ctx->types[type_id].elements_top) return NULL; + if(! ctx->types[type_id].elements[ref.value].in_use) return NULL; + return ctx->types[type_id].elements + ref.value; +} + +void* ink_get_value(struct context* ctx, struct elem ref) { + struct element_slab* s; + s = ink_get_value_link(ctx, ref); + if(s == NULL) return NULL; + return s->data; +} + +struct elem ink_make_native(struct context* ctx, int type, void* ptr) { + if(type < 16) { + struct elem ret; + ret.type = 0; + ret.value = -130; + return ret; + } + int type_id = type - 16; + if(type_id >= ctx->types_top) { + struct elem ret; + ret.type = 0; + ret.value = -129; + return ret; + } + + if(ctx->panic) { + struct elem ret; + ret.type = 0; + ret.value = -135; + return ret; + } + if(ctx->types[type_id].elements == NULL) { + ctx->types[type_id].elements = ctx->inner_malloc(sizeof(struct element_slab) * 8); + ctx->types[type_id].elements_top = 0; + ctx->types[type_id].elements_capacity = 8; + memset(ctx->types[type_id].elements + ctx->types[type_id].elements_top, 0, ctx->types[type_id].elements_capacity - ctx->types[type_id].elements_top); + } else if(ctx->types[type_id].elements_top == ctx->types[type_id].elements_capacity) { + int new_count = (ctx->types[type_id].elements_capacity + ctx->types[type_id].elements_capacity/2); + void* renewed = ctx->inner_realloc(ctx->types[type_id].elements, sizeof(struct element_slab) * new_count); + if(renewed == NULL) { + struct elem ret; + ret.type = 0; + ret.value = -129; + return ret; + } else { + ctx->types[type_id].elements = renewed; + ctx->types[type_id].elements_capacity = new_count; + memset(ctx->types[type_id].elements + ctx->types[type_id].elements_top, 0, ctx->types[type_id].elements_capacity - ctx->types[type_id].elements_top); + } + } + int g = ctx->types[type_id].elements_capacity; + int i; + for(i = 0; i < g; ++i) { + if(! ctx->types[type_id].elements[i].in_use) { + ctx->types[type_id].elements[i].in_use = 1; + ctx->types[type_id].elements[i].uses = 1; + if(ctx->types[type_id].element_size < 0) { + ctx->types[type_id].elements[i].data = ptr; + } else { + void* new_ptr = ctx->malloc(ctx->types[type_id].element_size); + if(new_ptr == NULL) { + struct elem ret; + ret.type = 0; + ret.value = -139; + return ret; + } + memcpy(new_ptr, ptr, ctx->types[type_id].element_size); + ctx->types[type_id].elements[i].data = ptr; + } + ctx->types[type_id].elements_top = max(ctx->types[type_id].elements_top+1, i+1); + struct elem ret; + ret.type = type; + ret.value = i; + return ret; + } + } + struct elem ret; + ret.type = 0; + ret.value = -140; + return ret; +} + +void ink_gc(struct context* ctx) { + int i, j, k; + for(i = 0; i < ctx->types_top; ++i) { + for(j = 0; j < ctx->types[i].elements_top; ++j) { + ctx->types[i].elements[j].uses = 0; + } + } + + for(i = 0; i < ctx->types_top; ++i) { + for(j = 0; j < ctx->types[i].elements_top; ++j) { + struct ink_collection_list c = ctx->types[i].gc(ctx, ctx->types[i].elements[j].data); + for(k = 0; k < c.count; ++k) { + struct element_slab* v = ink_get_value_link(ctx, c.elements[k]); + if(v != NULL) ++v->uses; + } + if(c.elements != NULL) ctx->inner_free(c.elements); + } + } + for(i = 0; i < ctx->routines_top; ++i) { + for(j = 0; j < ctx->routines[i].top; ++j) { + struct element_slab* v = ink_get_value_link(ctx, ctx->routines[i].stack[j]); + if(v != NULL) ++v->uses; + } + } + + for(i = 0; i < ctx->types_top; ++i) { + for(j = 0; j < ctx->types[i].elements_top; ++j) { + if(ctx->types[i].elements[j].uses == 0) { + ctx->collections++; + ctx->types[i].collect(ctx, ctx->types[i].elements[j].data); + if(ctx->types[i].element_size > 0) { + ctx->free(ctx->types[i].elements[j].data); + } + ctx->types[i].elements[j].data = NULL; + ctx->types[i].elements[j].uses = 0; + ctx->types[i].elements[j].in_use = 0; + } + } + } +} + /**********************************************************************************************************************/ static void print_stacktrace(struct context* _) { @@ -1069,10 +1242,54 @@ static void print_as_utf8(struct context* ctx) { ink_pop(ctx); } +struct ink_array { + int top; + int capacity; + struct elem* elements; +}; + +static int get_type_by_name(struct context* ctx, const char* name) { + int i; + for(i = 0; i < ctx->types_top; ++i) { + if(strcmp(ctx->types[i].name, name) == 0) { + return i + 16; + } + } + return -1; +} + +static void collect_array(struct context* ctx, void* array) { + struct ink_array* ary = array; + ctx->free(ary->elements); +} + +static struct ink_collection_list gc_array(struct context* ctx, void* array) { + struct ink_array* ary = array; + struct ink_collection_list c; + c.elements = ctx->inner_malloc(sizeof(struct elem)*ary->top); + c.count = ary->top; + memcpy(c.elements, ary->elements, sizeof(struct elem)*ary->top); + return c; +} + +static void new_array(struct context* ctx) { + int tid = get_type_by_name(ctx, "array"); + struct ink_array ary; + ary.elements = NULL; + ary.top = 0; + ary.capacity = 0; + struct elem e = ink_make_native(ctx, tid, &ary); + ink_push(ctx, e); +} + + + int ink_std_library(struct context* ctx) { int v; v = 0; - v += ink_add_native(ctx, "trace", print_stacktrace); + int array_t = ink_new_type(ctx, "array", sizeof(struct ink_array), collect_array, gc_array); + v += ink_add_native(ctx, "array", new_array); + v += ink_add_native(ctx, "trace", print_stacktrace); v += ink_add_native(ctx, "print_int", print_int); v += ink_add_native(ctx, "print_utf8", print_as_utf8); v += ink_add_native(ctx, "+", add_int); diff --git a/main.c b/main.c index b0ae3f8..cbe4d79 100644 --- a/main.c +++ b/main.c @@ -24,6 +24,7 @@ int main(int argc, char** argv) { while(ink_can_run(ctx)) { ink_step_everyone(ctx); } - printf("\nExecuted in %u steps\n", ctx->steps); + ink_gc(ctx); + printf("\nExecuted in %u steps\nCollected %u times\n", ctx->steps, ctx->collections); return ctx->panic; } \ No newline at end of file diff --git a/test/test02.nk b/test/test02.nk index 3fc8712..5dea0ed 100644 --- a/test/test02.nk +++ b/test/test02.nk @@ -1,3 +1,5 @@ fn print_n_utf8 do print_n_utf8_impl drop end + +array drop array drop \ No newline at end of file diff --git a/test/test03.nk b/test/test03.nk index 51f2f9f..c850e4b 100644 --- a/test/test03.nk +++ b/test/test03.nk @@ -1 +1,9 @@ -4 potato2 \ No newline at end of file +4 potato2 + +fn u do + +loop: + 65 print_int +loop 1 jump_if + +end \ No newline at end of file diff --git a/test/test04.nk b/test/test04.nk index c31557e..79f5b5e 100644 --- a/test/test04.nk +++ b/test/test04.nk @@ -1 +1 @@ -4 potato3 \ No newline at end of file +4 potato3