#include "mem.h"

#include "util.js.h"

#include "uv.h"
#include "quickjs.h"
#include "sqlite3.h"
#include "uv.h"

#include <openssl/crypto.h>

#include <string.h>
#include <stdbool.h>

typedef struct _tf_mem_node_t tf_mem_node_t;

static uv_mutex_t s_tracking_mutex;
static bool s_mem_tracking;
static tf_mem_node_t* s_mem_tracked;
static int64_t s_tf_malloc_size;
static int64_t s_uv_malloc_size;
static int64_t s_tls_malloc_size;
static int64_t s_js_malloc_size;
static int64_t s_sqlite_malloc_size;

extern uint32_t fnv32a(const void* buffer, int length, uint32_t start);

static size_t _tf_mem_round_up(size_t size)
{
	return (size + 7) & ~7;
}

void tf_mem_startup(bool tracking)
{
	s_mem_tracking = tracking;
	uv_mutex_init(&s_tracking_mutex);
}

void tf_mem_shutdown()
{
	s_mem_tracking = false;
	s_mem_tracked = NULL;
	uv_mutex_destroy(&s_tracking_mutex);
}

typedef struct _tf_mem_node_t
{
	void* ptr;
	tf_mem_node_t* next;
	tf_mem_node_t* previous;
	int frames_count;
	void* frames[];
} tf_mem_node_t;

static void _tf_mem_add_tracked_allocation(tf_mem_node_t* node)
{
	if (s_mem_tracking)
	{
		uv_mutex_lock(&s_tracking_mutex);
		if (s_mem_tracked)
		{
			node->next = s_mem_tracked;
			node->previous = s_mem_tracked->previous;
			s_mem_tracked->previous->next = node;
			s_mem_tracked->previous = node;
		}
		else
		{
			s_mem_tracked = node;
			node->next = node->previous = node;
		}
		uv_mutex_unlock(&s_tracking_mutex);
	}
}

static void _tf_mem_remove_tracked_allocation(tf_mem_node_t* node)
{
	if (s_mem_tracking)
	{
		uv_mutex_lock(&s_tracking_mutex);
		tf_mem_node_t* previous = node->previous;
		tf_mem_node_t* next = node->next;
		next->previous = previous;
		previous->next = next;
		node->next = NULL;
		node->previous = NULL;
		if (node == s_mem_tracked)
		{
			s_mem_tracked = next != node ? next : NULL;
		}
		uv_mutex_unlock(&s_tracking_mutex);
	}
}

void tf_mem_walk_allocations(void (*callback)(void* ptr, size_t size, int frames_count, void* const* frames, void* user_data), void* user_data)
{
	uv_mutex_lock(&s_tracking_mutex);
	for (tf_mem_node_t* node = s_mem_tracked ? s_mem_tracked->next : NULL; node; node = node->next)
	{
		size_t size = 0;
		void* frames[32];
		memcpy(&size, node->ptr, sizeof(size));
		if (s_mem_tracking)
		{
			if (node->frames_count)
			{
				memcpy(frames, node->frames, sizeof(void*) * node->frames_count);
			}
		}
		callback((void*)((intptr_t)node->ptr + sizeof(size_t)), size, node->frames_count, node->frames_count ? frames : NULL, user_data);
		if (node == s_mem_tracked)
		{
			break;
		}
	}
	uv_mutex_unlock(&s_tracking_mutex);
}

typedef struct _summary_t
{
	tf_mem_allocation_t* allocations;
	int count;
	int capacity;
} summary_t;

static int _tf_mem_hash_stack_compare(const void* a, const void* b)
{
	const tf_mem_allocation_t* aa = a;
	const tf_mem_allocation_t* ab = b;
	if (aa->stack_hash != ab->stack_hash)
	{
		return aa->stack_hash < ab->stack_hash ? -1 : 1;
	}
	if (aa->frames_count != ab->frames_count)
	{
		return aa->frames_count < ab->frames_count ? -1 : 1;
	}
	return memcmp(aa->frames, ab->frames, sizeof(void*) * aa->frames_count);
}

static int _tf_mem_size_compare(const void* a, const void* b)
{
	const tf_mem_allocation_t* aa = a;
	const tf_mem_allocation_t* ab = b;
	if (aa->size > ab->size)
	{
		return -1;
	}
	else if (ab->size > aa->size)
	{
		return 1;
	}
	return 0;
}

static void _tf_mem_summarize(void* ptr, size_t size, int frames_count, void* const* frames, void* user_data)
{
	summary_t* summary = user_data;
	tf_mem_allocation_t allocation = {
		.stack_hash = fnv32a(frames, sizeof(void*) * frames_count, 0),
		.count = 1,
		.size = size,
		.frames_count = frames_count,
	};
	memcpy(allocation.frames, frames, sizeof(void*) * frames_count);

	int index = tf_util_insert_index(&allocation, summary->allocations, summary->count, sizeof(tf_mem_allocation_t), _tf_mem_hash_stack_compare);
	if (index < summary->count && allocation.stack_hash == summary->allocations[index].stack_hash && allocation.frames_count == summary->allocations[index].frames_count &&
	    memcmp(frames, summary->allocations[index].frames, sizeof(void*) * frames_count) == 0)
	{
		summary->allocations[index].count++;
		summary->allocations[index].size += size;
	}
	else
	{
		if (summary->count + 1 >= summary->capacity)
		{
			summary->capacity = summary->capacity ? summary->capacity * 2 : 256;
			summary->allocations = realloc(summary->allocations, sizeof(tf_mem_allocation_t) * summary->capacity);
		}
		if (index < summary->count)
		{
			memmove(summary->allocations + index + 1, summary->allocations + index, sizeof(tf_mem_allocation_t) * (summary->count - index));
		}
		summary->allocations[index] = allocation;
		summary->count++;
	}
}

tf_mem_allocation_t* tf_mem_summarize_allocations(int* out_count)
{
	summary_t summary = { 0 };
	tf_mem_walk_allocations(_tf_mem_summarize, &summary);
	if (summary.count)
	{
		qsort(summary.allocations, summary.count, sizeof(tf_mem_allocation_t), _tf_mem_size_compare);
	}
	*out_count = summary.count;
	tf_mem_allocation_t* result = tf_malloc(sizeof(tf_mem_allocation_t) * summary.count);
	if (result && summary.count)
	{
		memcpy(result, summary.allocations, sizeof(tf_mem_allocation_t) * summary.count);
	}
	free(summary.allocations);
	return result;
}

#if defined(__GNUC__) && !defined(__APPLE__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wanalyzer-malloc-leak"
#endif
static void* _tf_alloc(int64_t* total, size_t size)
{
	size_t overhead = sizeof(size_t);
	void* buffer[32];
	int count = 0;
	if (s_mem_tracking)
	{
		count = tf_util_backtrace(buffer, sizeof(buffer) / sizeof(*buffer));
		overhead += sizeof(tf_mem_node_t) + sizeof(void*) * count;
	}

	size_t rounded_up_size = _tf_mem_round_up(size);
	void* ptr = malloc(rounded_up_size + overhead);
	if (ptr)
	{
		__atomic_add_fetch(total, size, __ATOMIC_RELAXED);
		memcpy(ptr, &size, sizeof(size_t));
		if (s_mem_tracking)
		{
			tf_mem_node_t* node = (tf_mem_node_t*)((intptr_t)ptr + sizeof(size_t) + rounded_up_size);
			memcpy(node, &(tf_mem_node_t) { .ptr = ptr, .frames_count = count }, sizeof(tf_mem_node_t));
			if (count)
			{
				memcpy(node->frames, buffer, sizeof(void*) * count);
			}
			_tf_mem_add_tracked_allocation(node);
		}
		return (void*)((intptr_t)ptr + sizeof(size_t));
	}
	else
	{
		return NULL;
	}
}

static void* _tf_realloc(int64_t* total, void* ptr, size_t size)
{
	if (!ptr && !size)
	{
		return NULL;
	}

	void* buffer[32];
	int count = 0;
	size_t overhead = sizeof(size_t);
	if (s_mem_tracking)
	{
		count = tf_util_backtrace(buffer, sizeof(buffer) / sizeof(*buffer));
		overhead += sizeof(tf_mem_node_t) + sizeof(void*) * count;
	}

	void* old_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t old_size = 0;
	if (old_ptr)
	{
		memcpy(&old_size, old_ptr, sizeof(size_t));
	}
	void* new_ptr = NULL;
	tf_mem_node_t* node = (void*)((intptr_t)ptr + _tf_mem_round_up(old_size));
	size_t rounded_up_size = _tf_mem_round_up(size);
	if (old_ptr && !size)
	{
		_tf_mem_remove_tracked_allocation(node);
		free(old_ptr);
	}
	else
	{
		if (old_ptr)
		{
			_tf_mem_remove_tracked_allocation(node);
		}
		new_ptr = realloc(old_ptr, rounded_up_size + overhead);
	}
	if (new_ptr)
	{
		__atomic_add_fetch(total, (int64_t)size - (int64_t)old_size, __ATOMIC_RELAXED);
		memcpy(new_ptr, &size, sizeof(size_t));
		if (s_mem_tracking)
		{
			tf_mem_node_t* node = (tf_mem_node_t*)((intptr_t)new_ptr + sizeof(size_t) + rounded_up_size);
			memcpy(node, &(tf_mem_node_t) { .ptr = new_ptr, .frames_count = count }, sizeof(tf_mem_node_t));
			if (count)
			{
				memcpy(node->frames, buffer, sizeof(void*) * count);
			}
			_tf_mem_add_tracked_allocation(node);
		}
		return (void*)((intptr_t)new_ptr + sizeof(size_t));
	}
	else
	{
		__atomic_sub_fetch(total, old_size, __ATOMIC_RELAXED);
		return NULL;
	}
}
#if defined(__GNUC__) && !defined(__APPLE__)
#pragma GCC diagnostic pop
#endif

static void _tf_free(int64_t* total, void* ptr)
{
	if (ptr)
	{
		void* old_ptr = (void*)((intptr_t)ptr - sizeof(size_t));
		size_t size = 0;
		memcpy(&size, old_ptr, sizeof(size_t));
		tf_mem_node_t* node = (void*)((intptr_t)ptr + _tf_mem_round_up(size));
		__atomic_sub_fetch(total, size, __ATOMIC_RELAXED);
		_tf_mem_remove_tracked_allocation(node);
		free(old_ptr);
	}
}

static void* _tf_uv_alloc(size_t size)
{
	return _tf_alloc(&s_uv_malloc_size, size);
}

static void* _tf_uv_realloc(void* ptr, size_t size)
{
	return _tf_realloc(&s_uv_malloc_size, ptr, size);
}

#if defined(__GNUC__) && !defined(__APPLE__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wanalyzer-malloc-leak"
#endif
static void* _tf_uv_calloc(size_t nmemb, size_t size)
{
	size_t total_size = nmemb * size;
	size_t rounded_up_size = _tf_mem_round_up(total_size);
	size_t overhead = sizeof(size_t);
	void* buffer[32];
	int count = 0;
	if (s_mem_tracking)
	{
		count = tf_util_backtrace(buffer, sizeof(buffer) / sizeof(*buffer));
		overhead += sizeof(tf_mem_node_t) + sizeof(void*) * count;
	}
	void* ptr = calloc(1, rounded_up_size + overhead);
	if (ptr)
	{
		__atomic_add_fetch(&s_uv_malloc_size, total_size, __ATOMIC_RELAXED);
		memcpy(ptr, &total_size, sizeof(size_t));
		if (s_mem_tracking)
		{
			tf_mem_node_t* node = (tf_mem_node_t*)((intptr_t)ptr + sizeof(size_t) + rounded_up_size);
			memcpy(node, &(tf_mem_node_t) { .ptr = ptr, .frames_count = count }, sizeof(tf_mem_node_t));
			if (count)
			{
				memcpy(node->frames, buffer, sizeof(void*) * count);
			}
			_tf_mem_add_tracked_allocation(node);
		}
		return (void*)((intptr_t)ptr + sizeof(size_t));
	}
	else
	{
		return NULL;
	}
}
#if defined(__GNUC__) && !defined(__APPLE__)
#pragma GCC diagnostic pop
#endif

static void _tf_uv_free(void* ptr)
{
	_tf_free(&s_uv_malloc_size, ptr);
}

void tf_mem_replace_uv_allocator()
{
	uv_replace_allocator(_tf_uv_alloc, _tf_uv_realloc, _tf_uv_calloc, _tf_uv_free);
}

size_t tf_mem_get_uv_malloc_size()
{
	return s_uv_malloc_size;
}

#if defined(__OpenBSD__)
static void* _tf_tls_alloc(size_t size)
#else
static void* _tf_tls_alloc(size_t size, const char* file, int line)
#endif
{
	return _tf_alloc(&s_tls_malloc_size, size);
}

#if defined(__OpenBSD__)
static void* _tf_tls_realloc(void* ptr, size_t size)
#else
static void* _tf_tls_realloc(void* ptr, size_t size, const char* file, int line)
#endif
{
	return _tf_realloc(&s_tls_malloc_size, ptr, size);
}

#if defined(__OpenBSD__)
static void _tf_tls_free(void* ptr)
#else
static void _tf_tls_free(void* ptr, const char* file, int line)
#endif
{
	_tf_free(&s_tls_malloc_size, ptr);
}

void tf_mem_replace_tls_allocator()
{
	CRYPTO_set_mem_functions(_tf_tls_alloc, _tf_tls_realloc, _tf_tls_free);
}

size_t tf_mem_get_tls_malloc_size()
{
	return s_tls_malloc_size;
}

void* tf_malloc(size_t size)
{
	return _tf_alloc(&s_tf_malloc_size, size);
}

void* tf_realloc(void* ptr, size_t size)
{
	return _tf_realloc(&s_tf_malloc_size, ptr, size);
}

void tf_free(void* ptr)
{
	_tf_free(&s_tf_malloc_size, ptr);
}

char* tf_strdup(const char* string)
{
	if (!string)
	{
		return NULL;
	}
	size_t len = strlen(string);
	char* buffer = tf_malloc(len + 1);
	if (buffer)
	{
		memcpy(buffer, string, len + 1);
	}
	return buffer;
}

void* tf_resize_vec(void* ptr, size_t size)
{
	void* alloc_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t alloc_size = 0;
	if (alloc_ptr)
	{
		memcpy(&alloc_size, alloc_ptr, sizeof(size_t));
	}

	if ((alloc_size >= 16 * size + sizeof(size_t)) || !size)
	{
		/* If we've dropped significantly in size or are freeing, resize down. */
		return tf_realloc(ptr, size);
	}
	else if (alloc_size >= size + sizeof(size_t))
	{
		/* Otherwise, if we're big enough, stay the same size. */
		return ptr;
	}
	else
	{
		/* If we need to grow, overallocate 2x to give room to continue growing. */
		return tf_realloc(ptr, size * 2);
	}
}

size_t tf_mem_get_tf_malloc_size()
{
	return s_tf_malloc_size;
}

static void* _tf_js_malloc(JSMallocState* state, size_t size)
{
	int64_t delta = 0;
	void* ptr = _tf_alloc(&delta, size);
	if (ptr)
	{
		__atomic_add_fetch(&s_js_malloc_size, delta, __ATOMIC_RELAXED);
		state->malloc_count++;
		state->malloc_size += delta;
	}
	return ptr;
}

static void _tf_js_free(JSMallocState* state, void* ptr)
{
	if (ptr)
	{
		int64_t delta = 0;
		_tf_free(&delta, ptr);
		__atomic_add_fetch(&s_js_malloc_size, delta, __ATOMIC_RELAXED);
		state->malloc_count--;
		state->malloc_size += delta;
	}
}

static void* _tf_js_realloc(JSMallocState* state, void* ptr, size_t size)
{
	int64_t delta = 0;
	void* result = _tf_realloc(&delta, ptr, size);
	__atomic_add_fetch(&s_js_malloc_size, delta, __ATOMIC_RELAXED);
	state->malloc_count += (ptr ? -1 : 0) + (result ? 1 : 0);
	state->malloc_size += delta;
	return result;
}

static size_t _tf_js_malloc_usable_size(const void* ptr)
{
	void* old_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t old_size = 0;
	if (old_ptr)
	{
		memcpy(&old_size, old_ptr, sizeof(size_t));
	}
	return old_size;
}

void tf_get_js_malloc_functions(JSMallocFunctions* out)
{
	*out = (JSMallocFunctions) {
		.js_malloc = _tf_js_malloc,
		.js_free = _tf_js_free,
		.js_realloc = _tf_js_realloc,
		.js_malloc_usable_size = _tf_js_malloc_usable_size,
	};
}

size_t tf_mem_get_js_malloc_size()
{
	return s_js_malloc_size;
}

static void* _tf_sqlite_malloc(int size)
{
	return _tf_alloc(&s_sqlite_malloc_size, size);
}

static void _tf_sqlite_free(void* ptr)
{
	_tf_free(&s_sqlite_malloc_size, ptr);
}

static void* _tf_sqlite_realloc(void* ptr, int size)
{
	return _tf_realloc(&s_sqlite_malloc_size, ptr, size);
}

static int _tf_sqlite_size(void* ptr)
{
	void* old_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t old_size = 0;
	if (old_ptr)
	{
		memcpy(&old_size, old_ptr, sizeof(size_t));
	}
	return (int)old_size;
}

static int _tf_sqlite_roundup(int size)
{
	return (size + 7) & ~7;
}

static int _tf_sqlite_init(void* user_data)
{
	return SQLITE_OK;
}

static void _tf_sqlite_shutdown(void* user_data)
{
}

void tf_mem_replace_sqlite_allocator()
{
	sqlite3_mem_methods methods = {
		.xMalloc = _tf_sqlite_malloc,
		.xFree = _tf_sqlite_free,
		.xRealloc = _tf_sqlite_realloc,
		.xSize = _tf_sqlite_size,
		.xRoundup = _tf_sqlite_roundup,
		.xInit = _tf_sqlite_init,
		.xShutdown = _tf_sqlite_shutdown,
	};
	sqlite3_config(SQLITE_CONFIG_MALLOC, &methods);
}

size_t tf_mem_get_sqlite_malloc_size()
{
	return s_sqlite_malloc_size;
}