#include "mem.h"

#include "util.js.h"

#include <uv.h>

#include <openssl/crypto.h>

#include <quickjs.h>
#include <sqlite3.h>
#include <uv.h>

#include <string.h>
#include <stdbool.h>

static uv_mutex_t s_tracking_mutex;
static bool s_mem_tracking;
static void** s_mem_tracked;
static int s_mem_tracked_count;
static int s_mem_tracked_capacity;
static int64_t s_tf_malloc_size;
static int64_t s_uv_malloc_size;
static int64_t s_tls_malloc_size;
static int64_t s_js_malloc_size;
static int64_t s_sqlite_malloc_size;

extern uint32_t fnv32a(const void* buffer, int length, uint32_t start);

void tf_mem_startup(bool tracking)
{
	s_mem_tracking = tracking;
	uv_mutex_init(&s_tracking_mutex);
}

void tf_mem_shutdown()
{
	s_mem_tracking = false;
	free(s_mem_tracked);
	s_mem_tracked = NULL;
	s_mem_tracked_capacity = 0;
	uv_mutex_destroy(&s_tracking_mutex);
}

static void _tf_mem_add_tracked_allocation(void* ptr)
{
	if (s_mem_tracking)
	{
		uv_mutex_lock(&s_tracking_mutex);
		if (s_mem_tracked_count + 1 >= s_mem_tracked_capacity)
		{
			s_mem_tracked_capacity = s_mem_tracked_capacity ? (s_mem_tracked_capacity * 2) : 256;
			s_mem_tracked = realloc(s_mem_tracked, sizeof(void*) * s_mem_tracked_capacity);
		}
		s_mem_tracked[s_mem_tracked_count++] = ptr;
		uv_mutex_unlock(&s_tracking_mutex);
	}
}

static void _tf_mem_remove_tracked_allocation(void* ptr)
{
	if (s_mem_tracking)
	{
		uv_mutex_lock(&s_tracking_mutex);
		for (int i = 0; i < s_mem_tracked_count; i++)
		{
			if (s_mem_tracked[i] == ptr)
			{
				s_mem_tracked[i] = s_mem_tracked[--s_mem_tracked_count];
				break;
			}
		}
		uv_mutex_unlock(&s_tracking_mutex);
	}
}

void tf_mem_walk_allocations(void (*callback)(void* ptr, size_t size, int frames_count, void* const* frames, void* user_data), void* user_data)
{
	uv_mutex_lock(&s_tracking_mutex);
	for (int i = 0; i < s_mem_tracked_count; i++)
	{
		size_t size = 0;
		int frames_count = 0;
		void* frames[32];
		memcpy(&size, s_mem_tracked[i], sizeof(size));
		if (s_mem_tracking)
		{
			memcpy(&frames_count, (void*)((intptr_t)s_mem_tracked[i] + sizeof(size_t) + size), sizeof(frames_count));
			if (frames_count)
			{
				memcpy(frames, (void*)((intptr_t)s_mem_tracked[i] + sizeof(size_t) + size + sizeof(frames_count)), sizeof(void*) * frames_count);
			}
		}
		callback(
			(void*)((intptr_t)s_mem_tracked[i] + sizeof(size_t)),
			size,
			frames_count,
			frames_count ? frames : NULL,
			user_data);
	}
	uv_mutex_unlock(&s_tracking_mutex);
}

typedef struct _summary_t
{
	tf_mem_allocation_t* allocations;
	int count;
	int capacity;
} summary_t;

static int _tf_mem_hash_stack_compare(const void* a, const void* b)
{
	const tf_mem_allocation_t* aa = a;
	const tf_mem_allocation_t* ab = b;
	if (aa->stack_hash != ab->stack_hash)
	{
		return aa->stack_hash < ab->stack_hash ? -1 : 1;
	}
	if (aa->frames_count != ab->frames_count)
	{
		return aa->frames_count < ab->frames_count ? -1 : 1;
	}
	return memcmp(aa->frames, ab->frames, sizeof(void*) * aa->frames_count);
}

static int _tf_mem_size_compare(const void* a, const void* b)
{
	const tf_mem_allocation_t* aa = a;
	const tf_mem_allocation_t* ab = b;
	if (aa->size > ab->size)
	{
		return -1;
	}
	else if (ab->size > aa->size)
	{
		return 1;
	}
	return 0;
}

static void _tf_mem_summarize(void* ptr, size_t size, int frames_count, void* const* frames, void* user_data)
{
	summary_t* summary = user_data;
	tf_mem_allocation_t allocation =
	{
		.stack_hash = fnv32a(frames, sizeof(void*) * frames_count, 0),
		.count = 1,
		.size = size,
		.frames_count = frames_count,
	};
	memcpy(allocation.frames, frames, sizeof(void*) * frames_count);

	int index = tf_util_insert_index(&allocation, summary->allocations, summary->count, sizeof(tf_mem_allocation_t), _tf_mem_hash_stack_compare);
	if (index < summary->count &&
		allocation.stack_hash == summary->allocations[index].stack_hash &&
		allocation.frames_count == summary->allocations[index].frames_count &&
		memcmp(frames, summary->allocations[index].frames, sizeof(void*) * frames_count) == 0)
	{
		summary->allocations[index].count++;
		summary->allocations[index].size += size;
	}
	else
	{
		if (summary->count + 1 >= summary->capacity)
		{
			summary->capacity = summary->capacity ? summary->capacity * 2 : 256;
			summary->allocations = realloc(summary->allocations, sizeof(tf_mem_allocation_t) * summary->capacity);
		}
		if (index < summary->count)
		{
			memmove(summary->allocations + index + 1, summary->allocations + index, sizeof(tf_mem_allocation_t) * (summary->count - index));
		}
		summary->allocations[index] = allocation;
		summary->count++;
	}
}

tf_mem_allocation_t* tf_mem_summarize_allocations(int* out_count)
{
	summary_t summary = { 0 };
	tf_mem_walk_allocations(_tf_mem_summarize, &summary);
	qsort(summary.allocations, summary.count, sizeof(tf_mem_allocation_t), _tf_mem_size_compare);
	*out_count = summary.count;
	tf_mem_allocation_t* result = tf_malloc(sizeof(tf_mem_allocation_t) * summary.count);
	memcpy(result, summary.allocations, sizeof(tf_mem_allocation_t) * summary.count);
	free(summary.allocations);
	return result;
}

static void* _tf_alloc(int64_t* total, size_t size)
{
	size_t overhead = sizeof(size_t);
	void* buffer[32];
	int count = 0;
	if (s_mem_tracking)
	{
		count = tf_util_backtrace(buffer, sizeof(buffer) / sizeof(*buffer));
		overhead += sizeof(count) + sizeof(void*) * count;
	}

	void* ptr = malloc(size + overhead);
	if (ptr)
	{
		__atomic_add_fetch(total, size, __ATOMIC_RELAXED);
		memcpy(ptr, &size, sizeof(size_t));
		if (count)
		{
			memcpy((void*)((intptr_t)ptr + sizeof(size_t) + size), &count, sizeof(count));
			memcpy((void*)((intptr_t)ptr + sizeof(size_t) + size + sizeof(count)), buffer, sizeof(void*) * count);
		}
		_tf_mem_add_tracked_allocation(ptr);
		return (void*)((intptr_t)ptr + sizeof(size_t));
	}
	else
	{
		return NULL;
	}
}

static void* _tf_realloc(int64_t* total, void* ptr, size_t size)
{
	void* buffer[32];
	int count = 0;
	size_t overhead = sizeof(size_t);
	if (s_mem_tracking)
	{
		count = tf_util_backtrace(buffer, sizeof(buffer) / sizeof(*buffer));
		overhead += sizeof(count) + sizeof(void*) * count;
	}

	void* old_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t old_size = 0;
	if (old_ptr)
	{
		memcpy(&old_size, old_ptr, sizeof(size_t));
	}
	void* new_ptr = NULL;
	if (old_ptr && !size)
	{
		_tf_mem_remove_tracked_allocation(old_ptr);
		free(old_ptr);
	}
	else
	{
		if (old_ptr)
		{
			_tf_mem_remove_tracked_allocation(old_ptr);
		}
		new_ptr = realloc(old_ptr, size + overhead);
	}
	if (new_ptr)
	{
		__atomic_add_fetch(total, (int64_t)size - (int64_t)old_size, __ATOMIC_RELAXED);
		memcpy(new_ptr, &size, sizeof(size_t));
		if (count)
		{
			memcpy((void*)((intptr_t)new_ptr + sizeof(size_t) + size), &count, sizeof(count));
			memcpy((void*)((intptr_t)new_ptr + sizeof(size_t) + size + sizeof(count)), buffer, sizeof(void*) * count);
		}
		_tf_mem_add_tracked_allocation(new_ptr);
		return (void*)((intptr_t)new_ptr + sizeof(size_t));
	}
	else
	{
		__atomic_sub_fetch(total, old_size, __ATOMIC_RELAXED);
		return NULL;
	}
}

static void _tf_free(int64_t* total, void* ptr)
{
	if (ptr)
	{
		void* old_ptr = (void*)((intptr_t)ptr - sizeof(size_t));
		size_t size = 0;
		memcpy(&size, old_ptr, sizeof(size_t));
		__atomic_sub_fetch(total, size, __ATOMIC_RELAXED);
		_tf_mem_remove_tracked_allocation(old_ptr);
		free(old_ptr);
	}
}

static void* _tf_uv_alloc(size_t size)
{
	return _tf_alloc(&s_uv_malloc_size, size);
}

static void* _tf_uv_realloc(void* ptr, size_t size)
{
	return _tf_realloc(&s_uv_malloc_size, ptr, size);
}

static void* _tf_uv_calloc(size_t nmemb, size_t size)
{
	void* ptr = calloc(1, nmemb * size + sizeof(size_t));
	if (ptr)
	{
		size_t total_size = nmemb * size;
		__atomic_add_fetch(&s_uv_malloc_size, total_size, __ATOMIC_RELAXED);
		memcpy(ptr, &total_size, sizeof(size_t));
		return (void*)((intptr_t)ptr + sizeof(size_t));
	}
	else
	{
		return NULL;
	}
}

static void _tf_uv_free(void* ptr)
{
	_tf_free(&s_uv_malloc_size, ptr);
}

void tf_mem_replace_uv_allocator()
{
	uv_replace_allocator(_tf_uv_alloc, _tf_uv_realloc, _tf_uv_calloc, _tf_uv_free);
}

size_t tf_mem_get_uv_malloc_size()
{
	return s_uv_malloc_size;
}

void* _tf_tls_alloc(size_t size, const char* file, int line)
{
	return _tf_alloc(&s_tls_malloc_size, size);
}

void* _tf_tls_realloc(void* ptr, size_t size, const char* file, int line)
{
	return _tf_realloc(&s_tls_malloc_size, ptr, size);
}

void _tf_tls_free(void* ptr, const char* file, int line)
{
	_tf_free(&s_tls_malloc_size, ptr);
}

void tf_mem_replace_tls_allocator()
{
	CRYPTO_set_mem_functions(_tf_tls_alloc, _tf_tls_realloc, _tf_tls_free);
}

size_t tf_mem_get_tls_malloc_size()
{
	return s_tls_malloc_size;
}

void* tf_malloc(size_t size)
{
	return _tf_alloc(&s_tf_malloc_size, size);
}

void* tf_realloc(void* ptr, size_t size)
{
	return _tf_realloc(&s_tf_malloc_size, ptr, size);
}

void tf_free(void* ptr)
{
	_tf_free(&s_tf_malloc_size, ptr);
}

char* tf_strdup(const char* string)
{
	size_t len = strlen(string);
	char* buffer = tf_malloc(len + 1);
	memcpy(buffer, string, len + 1);
	return buffer;
}

void* tf_resize_vec(void* ptr, size_t size)
{
	void* alloc_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t alloc_size = 0;
	if (alloc_ptr)
	{
		memcpy(&alloc_size, alloc_ptr, sizeof(size_t));
	}

	if ((alloc_size >= 16 * size + sizeof(size_t)) || !size)
	{
		/* If we've dropped significantly in size or are freeing, resize down. */
		return tf_realloc(ptr, size);
	}
	else if (alloc_size >= size + sizeof(size_t))
	{
		/* Otherwise, if we're big enough, stay the same size. */
		return ptr;
	}
	else
	{
		/* If we need to grow, overallocate 2x to give room to continue growing. */
		return tf_realloc(ptr, size * 2);
	}
}

size_t tf_mem_get_tf_malloc_size()
{
	return s_tf_malloc_size;
}

static void* _tf_js_malloc(JSMallocState* state, size_t size)
{
	int64_t delta = 0;
	void* ptr = _tf_alloc(&delta, size);
	if (ptr)
	{
		__atomic_add_fetch(&s_js_malloc_size, delta, __ATOMIC_RELAXED);
		state->malloc_count++;
		state->malloc_size += delta;
	}
	return ptr;
}

static void _tf_js_free(JSMallocState* state, void* ptr)
{
	if (ptr)
	{
		int64_t delta = 0;
		_tf_free(&delta, ptr);
		__atomic_add_fetch(&s_js_malloc_size, delta, __ATOMIC_RELAXED);
		state->malloc_count--;
		state->malloc_size += delta;
	}
}

static void* _tf_js_realloc(JSMallocState* state, void* ptr, size_t size)
{
	int64_t delta = 0;
	void* result = _tf_realloc(&delta, ptr, size);
	__atomic_add_fetch(&s_js_malloc_size, delta, __ATOMIC_RELAXED);
	state->malloc_count += (ptr ? -1 : 0) + (result ? 1 : 0);
	state->malloc_size += delta;
	return result;
}

static size_t _tf_js_malloc_usable_size(const void* ptr)
{
	void* old_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t old_size = 0;
	if (old_ptr)
	{
		memcpy(&old_size, old_ptr, sizeof(size_t));
	}
	return old_size;
}

void tf_get_js_malloc_functions(JSMallocFunctions* out)
{
	*out = (JSMallocFunctions)
	{
		.js_malloc = _tf_js_malloc,
		.js_free = _tf_js_free,
		.js_realloc = _tf_js_realloc,
		.js_malloc_usable_size = _tf_js_malloc_usable_size,
	};
}

size_t tf_mem_get_js_malloc_size()
{
	return s_js_malloc_size;
}

static void* _tf_sqlite_malloc(int size)
{
	return _tf_alloc(&s_sqlite_malloc_size, size);
}

static void _tf_sqlite_free(void* ptr)
{
	_tf_free(&s_sqlite_malloc_size, ptr);
}

static void* _tf_sqlite_realloc(void* ptr, int size)
{
	return _tf_realloc(&s_sqlite_malloc_size, ptr, size);
}

static int _tf_sqlite_size(void* ptr)
{
	void* old_ptr = ptr ? (void*)((intptr_t)ptr - sizeof(size_t)) : NULL;
	size_t old_size = 0;
	if (old_ptr)
	{
		memcpy(&old_size, old_ptr, sizeof(size_t));
	}
	return (int)old_size;
}

static int _tf_sqlite_roundup(int size)
{
	return (size + 7) & ~7;
}

static int _tf_sqlite_init(void* user_data)
{
	return SQLITE_OK;
}

static void _tf_sqlite_shutdown(void* user_data)
{
}

void tf_mem_replace_sqlite_allocator()
{
	sqlite3_mem_methods methods =
	{
		.xMalloc = _tf_sqlite_malloc,
		.xFree = _tf_sqlite_free,
		.xRealloc = _tf_sqlite_realloc,
		.xSize = _tf_sqlite_size,
		.xRoundup = _tf_sqlite_roundup,
		.xInit = _tf_sqlite_init,
		.xShutdown = _tf_sqlite_shutdown,
	};
	sqlite3_config(SQLITE_CONFIG_MALLOC, &methods);
}

size_t tf_mem_get_sqlite_malloc_size()
{
	return s_sqlite_malloc_size;
}