/**
 * This is extremely bad, a large amount of this code is dangerous and error prone, and if something goes wrong, it has no way to indicate that and roll back the error.
 * This needs to be fixed
 */

#include <lcrash/mm/phys.h>

#include <lcrash/types.h>

#include <lcrash/elf/elf.h>
#include <lcrash/efi/efi.h>
#include <lcrash/efi/memmap.h>
#include <lcrash/lnxboot.h>
#include <lcrash/cmdline.h>
#include <lcrash/math.h>

#define BUDDY_BLK_(_ORDER, _ADDR) ((_ADDR) >> (_ORDER))

/**
 * Root table for the memory allocation data, once we know where we want it, it won't need extra memory space.
 */
struct BuddyTable {
	/// Minimum size of a block log 2
	u32 StartWidth;

	/// Maximum size of a block log 2
	u32 EndWidth;

	/// Order tables
	struct BuddyOrder* Order[64];
};

/**
 * The order of a buddy block
 */
struct BuddyOrder {
	/// Number of blocks
	u64 Count;

	/// Length of Free
	u64 Length;

	/// Free blocks
	u8 Free[];
};

/**
 * Pointer to the buddy root table
 */
struct BuddyTable* PmemBuddyTable = 0;

/**
 * Merge a block
 *
 * \todo Optimize
 */
void PmemBuddyBlkMerge(u32 MergeTo, u32 Ordr, u64 Address) {
	// Nothing to merge
	if (Ordr <= PmemBuddyTable->StartWidth) return;

	// Select blocks
	struct BuddyOrder* Order = PmemBuddyTable->Order[Ordr - 1];
	u64 LBlock = BUDDY_BLK_(Ordr - 1, Address & ~(1ULL << (Ordr - 1)));
	u64 RBlock = BUDDY_BLK_(Ordr - 1, Address |  (1ULL << (Ordr - 1)));

	// Mark as used
	Order->Free[LBlock >> 3] &= ~(1ULL << (LBlock & 7));
	Order->Free[RBlock >> 3] &= ~(1ULL << (RBlock & 7));

	// Merge lower blocks
	PmemBuddyBlkMerge(MergeTo, Ordr - 1, Address & ~(1ULL << (Ordr - 1)));
	PmemBuddyBlkMerge(MergeTo, Ordr - 1, Address |  (1ULL << (Ordr - 1)));
}

/**
 * Split a block
 */
void PmemBuddyBlkSplit(u32 Ordr, u64 Address) {
	struct BuddyOrder* UOrder = PmemBuddyTable->Order[Ordr];
	struct BuddyOrder* LOrder = PmemBuddyTable->Order[Ordr - 1];
	u64  Block = BUDDY_BLK_(Ordr, Address);
	u64 LBlock = BUDDY_BLK_(Ordr - 1, Address & ~(1ULL << (Ordr - 1)));
	u64 RBlock = BUDDY_BLK_(Ordr - 1, Address |  (1ULL << (Ordr - 1)));

	// Split
	UOrder->Free[Block >> 3] &= ~(1ULL << (Block & 7));
	LOrder->Free[LBlock >> 3] |= 1ULL << (LBlock & 7);
	LOrder->Free[RBlock >> 3] |= 1ULL << (RBlock & 7);
}

/**
 * Mark a block as allocated
 *
 * \todo Optimize
 */
void PmemBuddyBlkMarkAlloc(u32 Ordr, u64 Address) {
	struct BuddyOrder* Order = PmemBuddyTable->Order[Ordr];	
	
	u64 Block = BUDDY_BLK_(Ordr, Address);

	// Mark in use
	Order->Free[Block >> 3] &= ~(1ULL << (Block & 7));

	// Merge lower orders
	PmemBuddyBlkMerge(Ordr, Ordr, Address);
}

/**
 * Mark a block as free
 *
 * \todo Optimize
 */
void PmemBuddyBlkMarkFree(u32 Ordr, u64 Address) {
	struct BuddyOrder* Order = PmemBuddyTable->Order[Ordr];

	u64 Block = BUDDY_BLK_(Ordr, Address);

	// Mark free
	Order->Free[Block >> 3] |= 1ULL << (Block & 7);

	// Merge
	PmemBuddyBlkMerge(Ordr, Ordr, Address);
}

/**
 * Recursively split blocks until one is free
 *
 * \todo Optimize
 */
void PmemBuddyBlkSplitUp(u32 Ordr, u64 Address) {
	struct BuddyOrder* Order = PmemBuddyTable->Order[Ordr];
	u64 Block = BUDDY_BLK_(Ordr, Address);

	// Check if this block is split
	if (!(Order->Free[Block >> 3] & (1ULL << (Block & 7)))) {
		if (Ordr < PmemBuddyTable->EndWidth) {
			PmemBuddyBlkSplitUp(Ordr + 1, Address & (((u64)-1) << Ordr));
		}
	}

	// Hopefully we are now
	PmemBuddyBlkSplit(Ordr, Address);
}

/**
 * Allocates a block and splits parent blocks if necessary
 *
 * \todo Optimize
 */
void PmemBuddyBlkAlloc(u32 Ordr, u64 Address) {
	//struct BuddyOrder* Order = PmemBuddyTable->Order[Ordr];
	//u64 Block = BUDDY_BLK_(Ordr, Address);

	// Split the block above us
	if (Ordr < PmemBuddyTable->EndWidth) {
		PmemBuddyBlkSplitUp(Ordr + 1, Address & (((u64)-1) << Ordr));
	}

	PmemBuddyBlkMarkAlloc(Ordr, Address);
}

/**
 * Mark a range as allocated
 *
 * \todo Optimize
 */
void PmemBuddyMarkAllocRange(void* From, void* To) {
	u64 OwningOrder = 0;
	for (OwningOrder = PmemBuddyTable->StartWidth; OwningOrder < PmemBuddyTable->EndWidth; OwningOrder++) {
		if ((1ULL << OwningOrder) >= (uptr)(To - From)) break;
	}

	// Select a "pivot" block to merge relative to
	void* CentralBlockStartAddress = (void*)((uptr)From & (-1ULL << OwningOrder));
	void* CentralBlockMidAddress   = (void*)((uptr)From | (1ULL << (OwningOrder - 1)));
	void* CentralBlockEndAddress   = (void*)((uptr)CentralBlockStartAddress + (1ULL << OwningOrder)) - 1;
	void* BigAreaBlockEndAddress   = (void*)((uptr)To & (-1ULL << OwningOrder));

	// It's merging time
	if (From <= CentralBlockStartAddress && To >= CentralBlockEndAddress) {
		PmemBuddyBlkAlloc(OwningOrder, (u64)CentralBlockStartAddress);

		// The rest of the <BIG AREA>
		for (void* Block = CentralBlockEndAddress + 1; Block < BigAreaBlockEndAddress; Block += 1ULL << OwningOrder) {
			PmemBuddyBlkAlloc(OwningOrder, (u64)Block);
		}

		// Other blocks we need to split
		if (BigAreaBlockEndAddress > CentralBlockEndAddress && To > BigAreaBlockEndAddress) {
			PmemBuddyMarkAllocRange(BigAreaBlockEndAddress, To);
		}
	} else {
		// Looks like a split
		if (From < CentralBlockMidAddress && To >= CentralBlockMidAddress) {
			PmemBuddyMarkAllocRange(From, CentralBlockMidAddress - 1);
			PmemBuddyMarkAllocRange(CentralBlockMidAddress, To);
		} else if (From >= CentralBlockMidAddress) {
			PmemBuddyMarkAllocRange(From, CentralBlockEndAddress);
			PmemBuddyMarkAllocRange(CentralBlockEndAddress + 1, To);
		}
	}
}

int PmemInitialize() {
	// Allocate a sizable chunk of stack memory for scanning, we lack
	// a heap so we need to be creative with how we store this.
	void* BeginAddresses[0x80];
	void* EndAddresses[0x80];
	u32   NextAddress = 0;

	/*
	// Find our memory address
	if (EfiPresent()) {
		// Scan the EFI memory map for usable memory
		struct EfiMemoryDescription *Entry;
		int Index;
		EFI_MEMORY_MAP_FOREACH_(Index, Entry) {
			if (Entry->Type == EFI_MEMORY_TYPE_CONVENTIONAL_MEMORY) {
				BeginAddresses[NextAddress] = Entry->PhysicalStart;
				EndAddresses[NextAddress] = Entry->NumberOfPages * 0x1000 + Entry->PhysicalStart;
				NextAddress += 1;
			}
		}
	} else while (true) {} // die
	*/

	// Scan BIOS e820 table provided by inferior
	for (int i = 0; i < BootBootParams->e820_entries; i++) {
		if (BootBootParams->e820_table[i].type == 1) {
			BeginAddresses[NextAddress] = (void*)BootBootParams->e820_table[i].base;
			EndAddresses[NextAddress] = (void*)BootBootParams->e820_table[i].base + BootBootParams->e820_table[i].length;

			NextAddress += 1;
		}
	}

	// Mark kernel memory as reserved
	const c8* CoreHeaderParam;
	if ((CoreHeaderParam = CmdlineGet("elfcorehdr"))) {
		/// TODO: Should this be its own function?
		if (CoreHeaderParam[0] == '0' && CoreHeaderParam[1] == 'x') {
			uptr CoreHeaderAddr = 0;

			for (int i = 2; CoreHeaderParam[i] != 0; i++) {
				c8 c = CoreHeaderParam[i];

				CoreHeaderAddr *= 16;

				if (c >= '0' && c <= '9') {
					CoreHeaderAddr += c - '0';
				} else if (c >= 'a' && c <= 'f') {
					CoreHeaderAddr += c - 'a';
				} else if (c >= 'A' && c <= 'F') {
					CoreHeaderAddr += c - 'A';
				} else break;
			}

			// TODO: 32-bit machines?
			struct Elf64_Ehdr* CoreHeader = (struct Elf64_Ehdr*)CoreHeaderAddr;

			// We don't check for the ELF signature or version,
			// if the kernel doesn't give us anything valid,
			// we're fucked anyways
			if (CoreHeader->e_type != ET_CORE) goto sort; 

			// Scan core dump for kernel memory
			void* CoreSegmentLocation = (void*)CoreHeader + CoreHeader->e_phoff;
			for (int i = 0; i < CoreHeader->e_phnum; i++) {
				struct Elf64_Phdr* Segment = (struct Elf64_Phdr*)CoreSegmentLocation;

				// Only take LOAD areas
				if (Segment->p_type == PT_LOAD) {
					void* BeginAddr = (void*)Segment->p_paddr;
					void* EndAddr   = (void*)Segment->p_paddr + Segment->p_memsz;
					
					long  EndSplitScan = NextAddress;
					for (int j = 0; j < EndSplitScan; j++) {
						if (BeginAddr > BeginAddresses[j] && BeginAddr < EndAddresses[j]) { // Begins in the middle of a block
							if (EndAddr < EndAddresses[j]) {
								// Create new address block
								BeginAddresses[NextAddress] = EndAddr;
								EndAddresses[NextAddress] = EndAddresses[j];
								
								// Decrease size of old address block
								EndAddresses[j] = BeginAddr - 1;

								// Grow
								NextAddress += 1;
							} else {
								// Shrink block
								EndAddresses[j] = BeginAddr - 1;
							}
						} else if (BeginAddr == BeginAddresses[j]) { // Begins at the start of a block
							if (EndAddr < EndAddresses[j]) {
								// Shrink
								EndAddresses[j] = BeginAddr - 1;
							} else goto delblk;
						} else if (BeginAddr < BeginAddresses[j] && EndAddr >= EndAddresses[j]) {
							delblk:
							for (int k = 0; k < NextAddress - 1; k++) {
								BeginAddresses[k] = BeginAddresses[k + 1];
								EndAddresses[k] = EndAddresses[k + 1];
							}

							NextAddress -= 1;
							EndSplitScan -= 1;
						}
					}
				}

				CoreSegmentLocation += CoreHeader->e_phentsize;
			}
		}
	}

	// TODO: Please replace this.
	// Sort address ranges with lazily tossed together slow piece of crap
sort:	while (true) {
		bool KeepGoing = false;

		for (int i = 0; i < NextAddress - 1; i++) {
			if (BeginAddresses[i] > BeginAddresses[i + 1]) {
				void* Temp = BeginAddresses[i];
				BeginAddresses[i] = BeginAddresses[i + 1];
				BeginAddresses[i + 1] = Temp;
				Temp = EndAddresses[i];
				EndAddresses[i] = EndAddresses[i + 1];
				EndAddresses[i + 1] = Temp;
				KeepGoing = true;
			}
		}

		if (!KeepGoing) break;
	};

	// Set up our physical memory table
	u64 MemorySize = (u64)EndAddresses[NextAddress - 1];
	u32 BuddyStartWidth = 12;
	u32 BuddyEndWidth = 20;
	u64 BuddyRange = MemorySize + MemorySize % (1ULL << BuddyEndWidth);
	u32 BuddyTableSize = 0;

	// Calculate table size
	BuddyTableSize += sizeof(struct BuddyTable);
	BuddyTableSize += BuddyTableSize % 8;
	for (u32 Width = BuddyStartWidth; Width <= BuddyEndWidth; Width++) {
		u64 BlockSize  = 1ULL << Width;
		u64 BlockCount = BuddyRange / BlockSize;
		BlockCount += BlockCount % 8;
		BlockCount /= 8;
		BlockCount += BlockCount % 8;
		BlockCount += sizeof(struct BuddyOrder);
		BlockCount += BlockCount % 8;
		BuddyTableSize += BlockCount;
	}

	// Find a safe area for the allocation tables
	for (int i = 0; i < NextAddress; i++) {
		if (EndAddresses[i] - BeginAddresses[i] >= BuddyTableSize) {
			PmemBuddyTable = (struct BuddyTable*)BeginAddresses[i];
			break;
		}
	}

	// Uh oh. TODO: We should probably panic here when we figure out how to do that.
	if (PmemBuddyTable == 0) while (true) {}

	// Fill out the table
	PmemBuddyTable->StartWidth = BuddyStartWidth;
	PmemBuddyTable->EndWidth = BuddyEndWidth;
	for (int i = 0; i < 64; i++) PmemBuddyTable->Order[i] = 0;
	
	// Build our order tables
	void* NextOrderLocation = (void*)PmemBuddyTable + sizeof(struct BuddyTable);
	NextOrderLocation += (uptr)NextOrderLocation % 8;
	for (int i = BuddyStartWidth; i <= BuddyEndWidth; i++) {
		struct BuddyOrder* Order = (struct BuddyOrder*)NextOrderLocation;

		// I sure hope this gets optimized into a rep 
		Order->Count  = BuddyRange / (1ULL << i);
		Order->Length = Order->Count / 8;
		Order->Length = Order->Length ? Order->Length : 1;
		if (i == BuddyEndWidth) {
			for (int j = 0; j < Order->Length; j++) Order->Free[j] = 255;
		} else {
			for (int j = 0; j < Order->Length; j++) Order->Free[j] = 0;
		}

		// Save it
		PmemBuddyTable->Order[i] = Order;

		// Calculate next order address
		NextOrderLocation += sizeof(struct BuddyOrder);
		NextOrderLocation += Order->Length;
		NextOrderLocation += (uptr)NextOrderLocation % 8;
	}

	// Mark anything we can't touch as used
	for (int i = 0; i < NextAddress; i++) {
		if (i == 0) {
			PmemBuddyMarkAllocRange(0, BeginAddresses[i] - 1);
		}

		if (i + 1 == NextAddress) {
			PmemBuddyMarkAllocRange(EndAddresses[i], (void*)((1ULL << BuddyEndWidth) * PmemBuddyTable->Order[BuddyEndWidth]->Count) - 1);
		} else {
			PmemBuddyMarkAllocRange(EndAddresses[i], BeginAddresses[i + 1] - 1);
		}
	}

	return 0;
}

/**
 * Find a free block of the specified order
 *
 * \return Returns 0 on success
 */
int PmemBuddyFindBlock(u32 Ordr, u64* Result) {
	struct BuddyOrder* Order = PmemBuddyTable->Order[Ordr];

	// Find our block
	for (uptr i = 0; i < Order->Length; i++) {
		if (Order->Free[i]) {
			for (int sub = 0; sub < 8; sub++) {
				if (Order->Free[i] & (1 << sub)) {
					// We got it
					*Result = (i << 3) | sub;
					return 0;
				}
			}
		}
	}

	*Result = 0;
	return -1;
}

[[gnu::nonnull(2)]]
int PmemAllocateBlock(uptr RequiredSize, struct PmemAllocation* Allocated) {
	// Don't function if the PMM subsystem isn't ready
	if (PmemBuddyTable == 0) return -1;

	// We can't make massive allocations
	if (RequiredSize > (1 << PmemBuddyTable->EndWidth)) return -1;

	// Calculate how big we need our stuff to be
	u32 Order = PmemBuddyTable->StartWidth;
	for (; RequiredSize >= (1 << Order) && Order < PmemBuddyTable->EndWidth; Order++);

	// Find a block
	u64 Block = 0;
	if (PmemBuddyFindBlock(Order, &Block)) {
		// We need to split a block to get ours
		bool Found = false;
		u32  OrderAttempt = Order + 1;
		
		// Find (a) block(s) to split
		u64 ToSplit = 0;
		for (; OrderAttempt <= PmemBuddyTable->EndWidth; OrderAttempt++) {
			if (!PmemBuddyFindBlock(OrderAttempt, &ToSplit)) {
				Found = true;
				break;
			}
		}

		// Out of memory
		if (!Found) return -1;

		// Split
		for (; OrderAttempt > Order; OrderAttempt--) {
			PmemBuddyBlkSplit(OrderAttempt, ToSplit << OrderAttempt);
			ToSplit <<= 1;
		}

		// Here's our block
		Block = ToSplit;
	}

	// TODO: This isn't optimal, space is wasted, fix this as soon as possible
	PmemBuddyBlkMarkAlloc(Order, Block << Order);

	// Return our crap
	Allocated->Address = (void*)(Block << Order);
	Allocated->AllocationOrder = Order;
	Allocated->AllocationBlock = Block;

	return 0;
}

[[gnu::nonnull(1)]]
int PmemFreeBlock(struct PmemAllocation* Block) {
	// Don't function if the PMM subsystem isn't ready
	if (PmemBuddyTable == 0) return -1;

	// Deallocate the block
	PmemBuddyBlkMarkFree(Block->AllocationOrder, Block->AllocationBlock << Block->AllocationOrder);

	return 0;
}