310 lines
9.5 KiB
C
310 lines
9.5 KiB
C
#include "gzip.h"
|
|
|
|
// we build a table of uh everything. this uses a lot of stack space,
|
|
void GzipPopulateTable(
|
|
u8* InputLengths, // Lengths of symbol bits
|
|
uptr InputCount, // Number of symbols
|
|
u8* OutputLengths, // Output bit lengths
|
|
u32* OutputSymbols, // Output symbols
|
|
uptr OutputCount, // Number of outputs
|
|
uptr OutputBits // Highest bit length
|
|
) {
|
|
u32 NextSym[15]; /// Next symbol for a specific bit length
|
|
for (int i = 0; i < 15; i++) NextSym[i] = -1;
|
|
|
|
// Count the symbols in each bit length
|
|
u32 CodeCounts[15] = {};
|
|
for (int i = 0; i < InputCount; i++) {
|
|
CodeCounts[InputLengths[i]]++;
|
|
if (NextSym[InputLengths[i]] == -1) NextSym[InputLengths[i]] = i;
|
|
}
|
|
|
|
// Ignore null symbols
|
|
CodeCounts[0] = 0;
|
|
|
|
// Find lowest code for each bit length
|
|
u32 Code = 0;
|
|
u32 NextCode[15] = {}; // Next code to add to the table
|
|
for (u32 Bits = 1; Bits < 15; Bits++) {
|
|
Code = (Code + CodeCounts[Bits - 1]) << 1;
|
|
NextCode[Bits] = Code;
|
|
}
|
|
|
|
// Fill in the table in a horribly inefficient manner
|
|
Code = 0;
|
|
for (u32 Bits = 0; Bits < 15; Bits++) {
|
|
const uptr OutputMask = ~((uptr)-1 << Bits);
|
|
for (int i = 0; i < CodeCounts[Bits]; i++) {
|
|
// Take the next symbol
|
|
Code = NextSym[Bits];
|
|
|
|
// Set the next next symbol
|
|
for (int j = Code + 1; j < InputCount; j++) {
|
|
if (InputLengths[j] == Bits) {
|
|
NextSym[Bits] = j;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// reverse the bits i dont fucking know it doesn't work if i dont
|
|
u32 Reversed = 0;
|
|
for (int j = 0; j < Bits; j++) {
|
|
Reversed |= ((NextCode[Bits] >> j) & 1) << (Bits - j - 1);
|
|
}
|
|
|
|
// this is horrible
|
|
for (int j = 0; j < OutputCount; j++) {
|
|
if ((j & OutputMask) == Reversed) {
|
|
OutputLengths[j] = Bits;
|
|
OutputSymbols[j] = Code;
|
|
}
|
|
}
|
|
|
|
// Next
|
|
NextCode[Bits]++;
|
|
Code++;
|
|
}
|
|
}
|
|
|
|
// "Good Practice" insists that i inform you that this is in fact, a return statement.
|
|
return;
|
|
}
|
|
|
|
u64 GzipFetchBits(u8* Stream, u64* Location, uptr Length) {
|
|
u64 Output = 0;
|
|
for (uptr i = 0; i < Length; i++) {
|
|
Output |= ((Stream[*Location / 8] >> (*Location % 8)) & 1) << i;
|
|
*Location += 1;
|
|
}
|
|
return Output;
|
|
}
|
|
|
|
uptr GzipDecompress(c8* Input, uptr InputSize, c8* Output) {
|
|
#define TAKE_(_TYPE) (\
|
|
(Location += sizeof(_TYPE)), \
|
|
*(_TYPE*)(Location - sizeof(_TYPE)) \
|
|
)
|
|
|
|
const u32 LengthAddends[29] = {
|
|
3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
|
|
15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
|
|
67, 83, 99, 115, 131, 163, 195, 227, 258
|
|
};
|
|
|
|
const u32 DistanceAddends[30] = {
|
|
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025,
|
|
1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
|
|
};
|
|
|
|
void* Location = (void*)Input;
|
|
void* End = Location + InputSize;
|
|
void* BinaryStart = (void*)Output;
|
|
|
|
// each member hopefully. any bug will result in complete failure c:
|
|
// we dont even bother making sure it's valid, we have no way to tell
|
|
// the user there's something wrong
|
|
while (Location < End) {
|
|
[[maybe_unused]] c8 Id1 = TAKE_(c8);
|
|
[[maybe_unused]] c8 Id2 = TAKE_(c8);
|
|
|
|
[[maybe_unused]] u8 Method = TAKE_(u8);
|
|
u8 Flags = TAKE_(u8);
|
|
[[maybe_unused]] u32 LastModified = TAKE_(u32);
|
|
[[maybe_unused]] u8 ExtraFlags = TAKE_(u8);
|
|
[[maybe_unused]] u8 OperatingSystem = TAKE_(u8);
|
|
|
|
// for the magic extra field
|
|
[[maybe_unused]] u16 XLen = 0;
|
|
[[maybe_unused]] void* XBuf = 0;
|
|
if (Flags & 0x04) { // FLG.FEXTRA
|
|
XLen = TAKE_(u16);
|
|
XBuf = Location;
|
|
Location += XLen;
|
|
}
|
|
|
|
// filename
|
|
[[maybe_unused]] c8* FileName = 0;
|
|
if (Flags & 0x08) { // FLG.FNAME
|
|
FileName = Location;
|
|
while (TAKE_(c8) != 0) {}
|
|
}
|
|
|
|
// comment
|
|
[[maybe_unused]] c8* FileComment = 0;
|
|
if (Flags & 0x10) { // FLG.FCOMMENT
|
|
FileComment = Location;
|
|
while (TAKE_(c8) != 0) {}
|
|
}
|
|
|
|
// hash
|
|
[[maybe_unused]] u16 CRC16 = 0;
|
|
if (Flags & 0x02) { // FLG.FHCRC
|
|
CRC16 = TAKE_(u16);
|
|
}
|
|
|
|
u8* Stream = Location;
|
|
u64 BitLocation = 0;
|
|
while (true) {
|
|
u8 head = GzipFetchBits(Stream, &BitLocation, 3);
|
|
switch ((head & 0x06) >> 1) {
|
|
case 0: { // NO COMPRESSION
|
|
break;
|
|
}
|
|
case 1: // FIXED HUFFMAN
|
|
case 2: { // DYNAMIC HUFFMAN
|
|
// Get huffman code counts
|
|
u64 OperationCount = (head & 2) ? 288 : GzipFetchBits(Stream, &BitLocation, 5) + 257;
|
|
u64 DistanceCount = (head & 2) ? 31 : GzipFetchBits(Stream, &BitLocation, 5) + 1;
|
|
u64 CodeCount = (head & 2) ? 0 : GzipFetchBits(Stream, &BitLocation, 4) + 4;
|
|
u8 Lengths[OperationCount + DistanceCount] = {};
|
|
|
|
if (head & 2) { // Build fixed huffman lengths
|
|
for (int i = 0; i <= 143; i++) Lengths[i] = 8;
|
|
for (int i = 143; i <= 255; i++) Lengths[i] = 9;
|
|
for (int i = 256; i <= 279; i++) Lengths[i] = 7;
|
|
for (int i = 280; i <= 287; i++) Lengths[i] = 8;
|
|
for (int i = 288; i <= 319; i++) Lengths[i] = 5;
|
|
} else { // Build dynamic huffman lengths
|
|
const u8 CodeLengthLengthOffset[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
|
|
u8 CodeLengthLengthProto[19] = {};
|
|
for (int i = 0; i < CodeCount; i++) {
|
|
CodeLengthLengthProto[CodeLengthLengthOffset[i]] = GzipFetchBits(Stream, &BitLocation, 3);
|
|
}
|
|
|
|
// Build our decoding tables
|
|
u8 CodeLengthLengths[0x80] = {};
|
|
u32 CodeLengthSymbols[0x80] = {};
|
|
GzipPopulateTable(CodeLengthLengthProto, 19, CodeLengthLengths, CodeLengthSymbols, 0x80, 7);
|
|
|
|
// Collect lengths
|
|
uptr CurrentLen = 0;
|
|
while (CurrentLen < OperationCount + DistanceCount) {
|
|
// Fetch the next op
|
|
u64 Code = GzipFetchBits(Stream, &BitLocation, 7);
|
|
u64 Error = 7 - CodeLengthLengths[Code];
|
|
BitLocation -= Error;
|
|
Code = CodeLengthSymbols[Code];
|
|
|
|
if (Code < 16) { // Literal
|
|
Lengths[CurrentLen++] = Code;
|
|
} else { // Repeat
|
|
u64 RepeatCount = 0;
|
|
u64 RepeatedLen = 0;
|
|
|
|
if (Code == 16) {
|
|
RepeatCount = 3 + GzipFetchBits(Stream, &BitLocation, 2);
|
|
RepeatedLen = CurrentLen > 0 ? Lengths[CurrentLen - 1] : 0;
|
|
} else if (Code == 17) {
|
|
RepeatCount = 3 + GzipFetchBits(Stream, &BitLocation, 3);
|
|
} else if (Code == 18) {
|
|
RepeatCount =11 + GzipFetchBits(Stream, &BitLocation, 7);
|
|
} else {} // Should be unreachable
|
|
|
|
for (int i = 0; i < RepeatCount; i++) {
|
|
Lengths[CurrentLen++] = RepeatedLen;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build operation table
|
|
u32 OperationMaxBits = 0;
|
|
for (int i = 0; i < OperationCount; i++) {
|
|
if (Lengths[i] > OperationMaxBits) OperationMaxBits = Lengths[i];
|
|
}
|
|
|
|
// We have no standard library
|
|
u32 OperationTableLength = 1;
|
|
for (int i = 0; i < OperationMaxBits; i++) OperationTableLength *= 2;
|
|
|
|
// Tables
|
|
u32 OperationSymbols[OperationTableLength + 0x10];
|
|
u8 OperationLengths[OperationTableLength + 0x10];
|
|
GzipPopulateTable(Lengths, OperationCount,
|
|
OperationLengths, OperationSymbols, OperationTableLength,
|
|
OperationMaxBits);
|
|
|
|
// Build distance tables
|
|
u32 DistanceMaxBits = 0;
|
|
for (int i = 0; i < DistanceCount; i++) {
|
|
if (Lengths[OperationCount + i] > DistanceMaxBits) {
|
|
DistanceMaxBits = Lengths[OperationCount + i];
|
|
}
|
|
}
|
|
|
|
// Power
|
|
u32 DistanceTableLength = 1;
|
|
for (int i = 0; i < DistanceMaxBits; i++) DistanceTableLength *= 2;
|
|
|
|
// More tables
|
|
u32 DistanceSymbols[DistanceTableLength + 0x10]; // ad dsome padding theres some bugs i cant see
|
|
u8 DistanceLengths[DistanceTableLength + 0x10];
|
|
GzipPopulateTable(Lengths + OperationCount, DistanceCount,
|
|
DistanceLengths, DistanceSymbols, DistanceTableLength,
|
|
DistanceMaxBits);
|
|
|
|
// Inflate
|
|
while (Location + BitLocation / 8 < End) {
|
|
// Fetch
|
|
u32 Code = GzipFetchBits(Stream, &BitLocation, OperationMaxBits);
|
|
u64 Error = OperationMaxBits - OperationLengths[Code];
|
|
BitLocation -= Error;
|
|
Code = OperationSymbols[Code];
|
|
|
|
if (Code < 256) { // Literal
|
|
*Output++ = Code;
|
|
} else if (Code == 256) { // End block
|
|
break;
|
|
} else if (Code < OperationCount) { // Copy
|
|
// You'd need to look at the spec yourself to understand this
|
|
u32 CopyLength = GzipFetchBits(Stream, &BitLocation,
|
|
Code <= 284 && Code > 264
|
|
? (Code - 1) / 4 - 65
|
|
: 0)
|
|
+ LengthAddends[Code - 257];
|
|
|
|
// More bitstream shit! I'm done formatting this stuff nicely.
|
|
Code = GzipFetchBits(Stream, &BitLocation, DistanceMaxBits);
|
|
Error = DistanceMaxBits - DistanceLengths[Code];
|
|
BitLocation -= Error;
|
|
Code = DistanceSymbols[Code];
|
|
|
|
// make this a s32 because address and offset awaaa
|
|
s32 Distance = GzipFetchBits(Stream, &BitLocation,
|
|
Code > 3 && Code <= 29
|
|
? (Code - 2) / 2
|
|
: 0)
|
|
+ DistanceAddends[Code];
|
|
|
|
// Finally copy the thing
|
|
for (int i = 0; i < CopyLength; i++) {
|
|
Output[i] = Output[i - Distance];
|
|
}
|
|
|
|
Output += CopyLength;
|
|
} else { while (1) {} } // Unreachable unless invalid
|
|
}
|
|
|
|
break;
|
|
}
|
|
case 3: {
|
|
// THIS SHOULD NEVER HAPPEN
|
|
break;
|
|
}
|
|
}
|
|
if (head & 0x01) break; // BFINAL
|
|
}
|
|
|
|
// I think this is necessary but im not completely sure
|
|
Location += BitLocation / 8 + 1;
|
|
|
|
// trailer..? hey siri, what's the opposite of a header?
|
|
[[maybe_unused]] u32 CRC32 = TAKE_(u32);
|
|
[[maybe_unused]] u32 InputSize = TAKE_(u32);
|
|
}
|
|
|
|
return (void*)Output - BinaryStart;
|
|
|
|
#undef TAKE_
|
|
}
|