From 52fa0201f97808d518c64bcb9696f2a350678aa5 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Tue, 17 Jan 2017 16:01:12 +0300 Subject: [PATCH] Partial data recovery (-D flag). This feature allows to partially recover data from a given segment file in format suitable for using in COPY FROM statement. List of supported data types is currently not full and TOAST is not yet supported, but it's better than nothing. Hopefully data recovery will be improved in the future. Implemented by Aleksander Alekseev, reviewed by Dmitry Ivanov, tested by Dmitry Ivanov and Grigoriy Smolkin. --- .gitignore | 2 +- Makefile | 15 +- README.pg_filedump | 4 +- decode.c | 814 +++++++++++++++++++++++++++++++++++++++++++++ decode.h | 10 + pg_filedump.c | 39 ++- pg_filedump.h | 3 +- pg_lzcompress.c | 778 +++++++++++++++++++++++++++++++++++++++++++ stringinfo.c | 170 ++++++++++ 9 files changed, 1828 insertions(+), 7 deletions(-) create mode 100644 decode.c create mode 100644 decode.h create mode 100644 pg_lzcompress.c create mode 100644 stringinfo.c diff --git a/.gitignore b/.gitignore index 013b7bb..002d812 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ -/pg_filedump.o +/*.o /pg_filedump diff --git a/Makefile b/Makefile index 7894a49..6835ddd 100644 --- a/Makefile +++ b/Makefile @@ -14,16 +14,25 @@ PGSQL_LIB_DIR=$(shell $(PG_CONFIG) --libdir) PGSQL_BIN_DIR=$(shell $(PG_CONFIG) --bindir) DISTFILES= README.pg_filedump Makefile Makefile.contrib \ - pg_filedump.h pg_filedump.c + pg_filedump.h pg_filedump.c decode.h decode.c stringinfo.c pg_lzcompress.c all: pg_filedump -pg_filedump: pg_filedump.o - ${CC} ${PGSQL_LDFLAGS} ${LDFLAGS} -o pg_filedump pg_filedump.o -L${PGSQL_LIB_DIR} -lpgport +pg_filedump: pg_filedump.o decode.o stringinfo.o pg_lzcompress.o + ${CC} ${PGSQL_LDFLAGS} ${LDFLAGS} -o pg_filedump pg_filedump.o decode.o stringinfo.o pg_lzcompress.o -L${PGSQL_LIB_DIR} -lpgport pg_filedump.o: pg_filedump.c ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} pg_filedump.c -c +decode.o: decode.c + ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} decode.c -c + +stringinfo.o: stringinfo.c + ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} stringinfo.c -c + +pg_lzcompress.o: pg_lzcompress.c + ${CC} ${PGSQL_CFLAGS} ${CFLAGS} -I${PGSQL_INCLUDE_DIR} pg_lzcompress.c -c + dist: rm -rf pg_filedump-${FD_VERSION} pg_filedump-${FD_VERSION}.tar.gz mkdir pg_filedump-${FD_VERSION} diff --git a/README.pg_filedump b/README.pg_filedump index 1c950ae..723ae55 100644 --- a/README.pg_filedump +++ b/README.pg_filedump @@ -59,7 +59,7 @@ not require any manual adjustments of the Makefile. ------------------------------------------------------------------------ Invocation: -pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-S blocksize] [-s segsize] [-n segnumber] file +pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-D attrlist] [-S blocksize] [-s segsize] [-n segnumber] file Defaults are: relative addressing, range of the entire file, block size as listed on block 0 in the file @@ -71,6 +71,8 @@ The following options are valid for heap and index files: off all formatting options) -d Display formatted block content dump (Option will turn off all other formatting options) + -D Try to decode tuples using provided list of attribute types. + [attrlist] should be something like int,timestamp,bool,uuid -f Display formatted block content dump along with interpretation -h Display this information -i Display interpreted item details diff --git a/decode.c b/decode.c new file mode 100644 index 0000000..4da4c7c --- /dev/null +++ b/decode.c @@ -0,0 +1,814 @@ +#include "postgres.h" +#include "decode.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ATTRTYPES_STR_MAX_LEN (1024-1) + +typedef int (*decode_callback_t)(const char* buffer, unsigned int buff_size, + unsigned int* out_size); + +static int +decode_smallint(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_int(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_bigint(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_time(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_timetz(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_date(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_timestamp(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_float4(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_float8(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_bool(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_uuid(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_macaddr(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int +decode_string(const char* buffer, unsigned int buff_size, unsigned int* out_size); + +static int ncallbacks = 0; +static decode_callback_t callbacks[ATTRTYPES_STR_MAX_LEN / 2] = { NULL }; + +typedef struct { + char* name; + decode_callback_t callback; +} ParseCallbackTableItem; + +static ParseCallbackTableItem callback_table[] = { + { "smallserial", &decode_smallint }, + { "smallint", &decode_smallint }, + { "int", &decode_int }, + { "serial", &decode_int }, + { "bigint", &decode_bigint }, + { "bigserial", &decode_bigint }, + { "time", &decode_time }, + { "timetz", &decode_timetz }, + { "date", &decode_date }, + { "timestamp", &decode_timestamp }, + { "float4", &decode_float4 }, + { "float8", &decode_float8 }, + { "float", &decode_float8 }, + { "bool", &decode_bool }, + { "uuid", &decode_uuid }, + { "macaddr", &decode_macaddr }, + + /* internally all string types are stored the same way */ + { "char", &decode_string }, + { "varchar", &decode_string }, + { "text", &decode_string }, + { "json", &decode_string }, + { "xml", &decode_string }, + { NULL, NULL}, +}; + +static StringInfoData copyString; +static bool copyStringInitDone = false; + +/* + * Temporary buffer for storing decompressed data. + * + * 64K should be enough in most cases. If it's not user can manually change + * this limit. Unfortunately there is no way to know how much memory user + * is willing to allocate. + */ +static char decompress_tmp_buff[64*1024]; + +/* Used by some PostgreSQL macro definitions */ +void +ExceptionalCondition(const char *conditionName, + const char *errorType, + const char *fileName, + int lineNumber) +{ + printf("Exceptional condition: name = %s, type = %s, fname = %s, line = %d\n", + conditionName ? conditionName : "(NULL)", + errorType ? errorType : "(NULL)", + fileName ? fileName : "(NULL)", + lineNumber); + exit(1); +} + +/* Append given string to current COPY line */ +static void +CopyAppend(const char* str) +{ + if(!copyStringInitDone) + { + initStringInfo(©String); + copyStringInitDone = true; + } + + /* Caller probably wanted just to init copyString */ + if(str == NULL) + return; + + if(copyString.data[0] != '\0') + appendStringInfoString(©String, "\t"); + + appendStringInfoString(©String, str); +} + +/* + * Append given string to current COPY line and encode special symbols + * like \r, \n, \t and \\. + */ +static void +CopyAppendEncode(const char* str, int orig_len) +{ + /* + * Should be enough in most cases. If it's not user can manually change + * this limit. Unfortunately there is no way to know how much memory user + * is willing to allocate. + */ + static char tmp_buff[64*1024]; + /* Reserve one byte for a trailing zero. */ + const int max_offset = sizeof(tmp_buff) - 2; + int curr_offset = 0; + int len = orig_len; + + while(len > 0) + { + /* + * Make sure there is enough free space for at least one special symbol + * and a trailing zero. + */ + if(curr_offset > max_offset - 2) + { + printf("ERROR: Unable to properly encode a string since it's too " + "large (%d bytes). Try to increase tmp_buff size in CopyAppendEncode " + "procedure.\n", orig_len); + exit(1); + } + + /* + * Since we are working with potentially corrupted data we can encounter + * \0 as well. + */ + if(*str == '\0') + { + tmp_buff[curr_offset] = '\\'; + tmp_buff[curr_offset+1] = '0'; + curr_offset += 2; + } + else if(*str == '\r') + { + tmp_buff[curr_offset] = '\\'; + tmp_buff[curr_offset+1] = 'r'; + curr_offset += 2; + } + else if(*str == '\n') + { + tmp_buff[curr_offset] = '\\'; + tmp_buff[curr_offset+1] = 'n'; + curr_offset += 2; + } + else if(*str == '\t') + { + tmp_buff[curr_offset] = '\\'; + tmp_buff[curr_offset+1] = 'r'; + curr_offset += 2; + } + else if(*str == '\\') + { + tmp_buff[curr_offset] = '\\'; + tmp_buff[curr_offset+1] = '\\'; + curr_offset += 2; + } + else + { + /* It's a regular symbol. */ + tmp_buff[curr_offset] = *str; + curr_offset++; + } + + str++; + len--; + } + + tmp_buff[curr_offset] = '\0'; + CopyAppend(tmp_buff); +} + +/* CopyAppend version with format string support */ +#define CopyAppendFmt(fmt, ...) do { \ + char __copy_format_buff[512]; \ + snprintf(__copy_format_buff, sizeof(__copy_format_buff), fmt, ##__VA_ARGS__); \ + CopyAppend(__copy_format_buff); \ + } while(0) + +/* Discard accumulated COPY line */ +static void +CopyClear(void) +{ + /* Make sure init is done */ + CopyAppend(NULL); + + resetStringInfo(©String); +} + +/* Output and then clear accumulated COPY line */ +static void +CopyFlush(void) +{ + /* Make sure init is done */ + CopyAppend(NULL); + + printf("COPY: %s\n", copyString.data); + CopyClear(); +} + +/* + * Add a callback to `callbacks` table for given type name + * + * Arguments: + * type - name of a single type, always lowercase + * + * Return value is: + * == 0 - no error + * < 0 - invalid type name + */ +static int +AddTypeCallback(const char* type) +{ + int idx = 0; + + if(*type == '\0') /* ignore empty strings */ + return 0; + + while(callback_table[idx].name != NULL) + { + if(strcmp(callback_table[idx].name, type) == 0) + { + callbacks[ncallbacks] = callback_table[idx].callback; + ncallbacks++; + return 0; + } + idx++; + } + + printf("Error: type <%s> doesn't exist or is not currently supported\n", type); + printf("Full list of known types: "); + idx = 0; + while(callback_table[idx].name != NULL) + { + printf("%s ", callback_table[idx].name); + idx++; + } + printf("\n"); + return -1; +} + +/* + * Decode attribute types string like "int,timestamp,bool,uuid" + * + * Arguments: + * str - types string + * Return value is: + * == 0 - if string is valid + * < 0 - if string is invalid + */ +int +ParseAttributeTypesString(const char* str) +{ + char *curr_type, *next_type; + char attrtypes[ATTRTYPES_STR_MAX_LEN+1]; + int i, len = strlen(str); + + if(len > ATTRTYPES_STR_MAX_LEN) + { + printf("Error: attribute types string is longer then %u characters!\n", + ATTRTYPES_STR_MAX_LEN); + return -1; + } + + strcpy(attrtypes, str); + for(i = 0; i < len; i++) + attrtypes[i] = tolower(attrtypes[i]); + + curr_type = attrtypes; + while(curr_type) + { + next_type = strstr(curr_type, ","); + if(next_type) + { + *next_type = '\0'; + next_type++; + } + + if(AddTypeCallback(curr_type) < 0) + return -1; + + curr_type = next_type; + } + + return 0; +} + +/* + * Convert Julian day number (JDN) to a date. + * Copy-pasted from src/backend/utils/adt/datetime.c + */ +static void +j2date(int jd, int *year, int *month, int *day) +{ + unsigned int julian; + unsigned int quad; + unsigned int extra; + int y; + + julian = jd; + julian += 32044; + quad = julian / 146097; + extra = (julian - quad * 146097) * 4 + 3; + julian += 60 + quad * 3 + extra / 146097; + quad = julian / 1461; + julian -= quad * 1461; + y = julian * 4 / 1461; + julian = ((y != 0) ? ((julian + 305) % 365) : ((julian + 306) % 366)) + + 123; + y += quad * 4; + *year = y - 4800; + quad = julian * 2141 / 65536; + *day = julian - 7834 * quad / 256; + *month = (quad + 10) % MONTHS_PER_YEAR + 1; +} + +/* Decode a smallint type */ +static int +decode_smallint(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int16), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(int16)) + return -2; + + CopyAppendFmt("%d", (int)(*(int16*)buffer)); + *out_size = sizeof(int16) + delta; + return 0; +} + + +/* Decode an int type */ +static int +decode_int(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(int32)) + return -2; + + CopyAppendFmt("%d", *(int32*)buffer); + *out_size = sizeof(int32) + delta; + return 0; +} + +/* Decode a bigint type */ +static int +decode_bigint(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(int64)) + return -2; + + CopyAppendFmt("%ld", *(int64*)buffer); + *out_size = sizeof(int64) + delta; + return 0; +} + +/* Decode a time type */ +static int +decode_time(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + int64 timestamp, timestamp_sec; + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(int64)) + return -2; + + timestamp = *(int64*)buffer; + timestamp_sec = timestamp / 1000000; + *out_size = sizeof(int64) + delta; + + CopyAppendFmt("%02ld:%02ld:%02ld.%06ld", + timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, + timestamp % 1000000); + + return 0; +} + +/* Decode a timetz type */ +static int +decode_timetz(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + int64 timestamp, timestamp_sec; + int32 tz_sec, tz_min; + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < (sizeof(int64) + sizeof(int32))) + return -2; + + timestamp = *(int64*)buffer; + tz_sec = *(int32*)(buffer + sizeof(int64)); + timestamp_sec = timestamp / 1000000; + tz_min = - (tz_sec / 60); + *out_size = sizeof(int64) + sizeof(int32) + delta; + + CopyAppendFmt("%02ld:%02ld:%02ld.%06ld%c%02d:%02d", + timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, + timestamp % 1000000, (tz_min > 0 ? '+' : '-'), abs(tz_min / 60), abs(tz_min % 60)); + + return 0; +} + +/* Decode a date type */ +static int +decode_date(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + int32 jd, year, month, day; + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(int32)) + return -2; + + *out_size = sizeof(int32) + delta; + + jd = *(int32*)buffer + POSTGRES_EPOCH_JDATE; + j2date(jd, &year, &month, &day); + + CopyAppendFmt("%04d-%02d-%02d%s", (year <= 0) ? -year + 1 : year, month, day, (year <= 0) ? " BC" : ""); + + return 0; +} + +/* Decode a timestamp type */ +static int +decode_timestamp(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + int64 timestamp, timestamp_sec; + int32 jd, year, month, day; + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(int64)) + return -2; + + *out_size = sizeof(int64) + delta; + timestamp = *(int64*)buffer; + + jd = timestamp / USECS_PER_DAY; + if (jd != 0) + timestamp -= jd * USECS_PER_DAY; + + if (timestamp < INT64CONST(0)) + { + timestamp += USECS_PER_DAY; + jd -= 1; + } + + /* add offset to go from J2000 back to standard Julian date */ + jd += POSTGRES_EPOCH_JDATE; + + j2date(jd, &year, &month, &day); + timestamp_sec = timestamp / 1000000; + + CopyAppendFmt("%04d-%02d-%02d %02ld:%02ld:%02ld.%06ld%s", + (year <= 0) ? -year + 1 : year, month, day, + timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60, + timestamp % 1000000, + (year <= 0) ? " BC" : ""); + + return 0; +} + +/* Decode a float4 type */ +static int +decode_float4(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(float), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(float)) + return -2; + + CopyAppendFmt("%.12f", *(float*)buffer); + *out_size = sizeof(float) + delta; + return 0; +} + +/* Decode a float8 type */ +static int +decode_float8(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + const char* new_buffer = (const char*)TYPEALIGN(sizeof(double), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(double)) + return -2; + + CopyAppendFmt("%.12lf", *(double*)buffer); + *out_size = sizeof(double) + delta; + return 0; +} + +/* Decode an uuid type */ +static int +decode_uuid(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + unsigned char uuid[16]; + + if(buff_size < sizeof(uuid)) + return -1; + + memcpy(uuid, buffer, sizeof(uuid)); + CopyAppendFmt("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7], + uuid[8], uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15] + ); + *out_size = sizeof(uuid); + return 0; +} + +/* Decode a macaddr type */ +static int +decode_macaddr(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + unsigned char macaddr[6]; + const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer); + unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer ); + + if(buff_size < delta) + return -1; + + buff_size -= delta; + buffer = new_buffer; + + if(buff_size < sizeof(macaddr)) + return -2; + + memcpy(macaddr, buffer, sizeof(macaddr)); + CopyAppendFmt("%02x:%02x:%02x:%02x:%02x:%02x", + macaddr[0], macaddr[1], macaddr[2], macaddr[3], macaddr[4], macaddr[5] + ); + *out_size = sizeof(macaddr) + delta; + return 0; +} + +/* Decode a bool type */ +static int +decode_bool(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + if(buff_size < sizeof(bool)) + return -1; + + CopyAppend(*(bool*)buffer ? "t" : "f"); + *out_size = sizeof(bool); + return 0; +} + +/* Decode char(N), varchar(N), text, json or xml types */ +static int +decode_string(const char* buffer, unsigned int buff_size, unsigned int* out_size) +{ + int padding = 0; + + /* Skip padding bytes. */ + while(*buffer == 0x00) + { + if(buff_size == 0) + return -1; + + buff_size--; + buffer++; + padding++; + } + + if(VARATT_IS_1B_E(buffer)) + { + /* + * 00000001 1-byte length word, unaligned, TOAST pointer + */ + uint8 tag = VARTAG_1B_E(buffer); + uint32 len = VARTAG_SIZE(tag); + if(len > buff_size) + return -1; + + CopyAppend("(TOASTED)"); + *out_size = padding + len; + return 0; + } + + if(VARATT_IS_1B(buffer)) + { + /* + * xxxxxxx1 1-byte length word, unaligned, uncompressed data (up to 126b) + * xxxxxxx is 1 + string length + */ + uint8 len = VARSIZE_1B(buffer); + if(len > buff_size) + return -1; + + CopyAppendEncode(buffer + 1, len - 1); + *out_size = padding + len; + return 0; + } + + if(VARATT_IS_4B_U(buffer) && buff_size >= 4) + { + /* + * xxxxxx00 4-byte length word, aligned, uncompressed data (up to 1G) + */ + uint32 len = VARSIZE_4B(buffer); + if(len > buff_size) + return -1; + + CopyAppendEncode(buffer + 4, len - 4); + *out_size = padding + len; + return 0; + } + + if(VARATT_IS_4B_C(buffer) && buff_size >= 8) + { + /* + * xxxxxx10 4-byte length word, aligned, *compressed* data (up to 1G) + */ + int decompress_ret; + uint32 len = VARSIZE_4B(buffer); + uint32 decompressed_len = VARRAWSIZE_4B_C(buffer); + + if(len > buff_size) + return -1; + + if(decompressed_len > sizeof(decompress_tmp_buff)) + { + printf("WARNING: Unable to decompress a string since it's too " + "large (%d bytes after decompressing). Consider increasing " + "decompress_tmp_buff size.\n", decompressed_len); + + CopyAppend("(COMPRESSED)"); + *out_size = padding + len; + return 0; + } + + decompress_ret = pglz_decompress(VARDATA_4B_C(buffer), len - 2*sizeof(uint32), + decompress_tmp_buff, decompressed_len); + if((decompress_ret != decompressed_len) || (decompress_ret < 0)) + { + printf("WARNING: Unable to decompress a string. Data is corrupted.\n"); + CopyAppend("(COMPRESSED)"); + *out_size = padding + len; + return 0; + } + + CopyAppendEncode(decompress_tmp_buff, decompressed_len); + *out_size = padding + len; + return 0; + } + + return -9; +} + +/* + * Try to decode a tuple using a types string provided previously. + * + * Arguments: + * tupleData - pointer to the tuple data + * tupleSize - tuple size in bytes + */ +void +FormatDecode(const char* tupleData, unsigned int tupleSize) +{ + HeapTupleHeader header = (HeapTupleHeader)tupleData; + const char* data = tupleData + header->t_hoff; + unsigned int size = tupleSize - header->t_hoff; + int curr_attr; + + CopyClear(); + + for(curr_attr = 0; curr_attr < ncallbacks; curr_attr++) + { + int ret; + unsigned int processed_size = 0; + + if( (header->t_infomask & HEAP_HASNULL) && att_isnull(curr_attr, header->t_bits) ) + { + CopyAppend("\\N"); + continue; + } + + if(size <= 0) + { + printf("Error: unable to decode a tuple, no more bytes left. Partial data: %s\n", + copyString.data); + return; + } + + ret = callbacks[curr_attr](data, size, &processed_size); + if(ret < 0) + { + printf("Error: unable to decode a tuple, callback #%d returned %d. Partial data: %s\n", + curr_attr+1, ret, copyString.data); + return; + } + + size -= processed_size; + data += processed_size; + } + + if(size != 0) + { + printf("Error: unable to decode a tuple, %d bytes left, 0 expected. Partial data: %s\n", + size, copyString.data); + return; + } + + CopyFlush(); +} diff --git a/decode.h b/decode.h new file mode 100644 index 0000000..a91b883 --- /dev/null +++ b/decode.h @@ -0,0 +1,10 @@ +#ifndef _PG_FILEDUMP_DECODE_H_ +#define _PG_FILEDUMP_DECODE_H_ + +int +ParseAttributeTypesString(const char* str); + +void +FormatDecode(const char* tupleData, unsigned int tupleSize); + +#endif diff --git a/pg_filedump.c b/pg_filedump.c index ba55711..fc02cc6 100644 --- a/pg_filedump.c +++ b/pg_filedump.c @@ -32,6 +32,7 @@ #include "storage/checksum.h" #include "storage/checksum_impl.h" +#include "decode.h" /*** * Global variables for ease of use mostly @@ -104,7 +105,7 @@ DisplayOptions(unsigned int validOptions) FD_VERSION, FD_PG_VERSION); printf - ("\nUsage: pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-S blocksize] [-s segsize] [-n segnumber] file\n\n" + ("\nUsage: pg_filedump [-abcdfhikxy] [-R startblock [endblock]] [-D attrlist] [-S blocksize] [-s segsize] [-n segnumber] file\n\n" "Display formatted contents of a PostgreSQL heap/index/control file\n" "Defaults are: relative addressing, range of the entire file, block\n" " size as listed on block 0 in the file\n\n" @@ -115,6 +116,8 @@ DisplayOptions(unsigned int validOptions) " off all formatting options)\n" " -d Display formatted block content dump (Option will turn off\n" " all other formatting options)\n" + " -D Try to decode tuples using provided list of attribute types.\n" + " [attrlist] should be something like int,timestamp,bool,uuid\n" " -f Display formatted block content dump along with interpretation\n" " -h Display this information\n" " -i Display interpreted item details\n" @@ -308,6 +311,36 @@ ConsumeOptions(int numOptions, char **options) break; } } + /* Check for the special case where the user forces tuples decoding. */ + else if((optionStringLength == 2) + && (strcmp(optionString, "-D") == 0)) + { + SET_OPTION(blockOptions, BLOCK_DECODE, 'D'); + /* Only accept the decode option once */ + if (rc == OPT_RC_DUPLICATE) + break; + + /* The token immediately following -D is attrubute types string */ + if (x >= (numOptions - 2)) + { + rc = OPT_RC_INVALID; + printf("Error: Missing attribute types string.\n"); + exitCode = 1; + break; + } + + /* Next option encountered must be attribute types string */ + optionString = options[++x]; + + if(ParseAttributeTypesString(optionString) < 0) + { + rc = OPT_RC_INVALID; + printf("Error: Invalid attribute types string <%s>.\n", + optionString); + exitCode = 1; + break; + } + } /* Check for the special case where the user forces a segment number */ /* instead of having the tool determine it by file name. */ else if ((optionStringLength == 2) @@ -968,6 +1001,10 @@ FormatItemBlock(Page page) if (blockOptions & BLOCK_FORMAT) FormatBinary(itemSize, itemOffset); + /* Decode tuple data */ + if(blockOptions & BLOCK_DECODE) + FormatDecode(&buffer[itemOffset], itemSize); + if (x == maxOffset) printf("\n"); } diff --git a/pg_filedump.h b/pg_filedump.h index d0634a8..345f6c0 100644 --- a/pg_filedump.h +++ b/pg_filedump.h @@ -52,7 +52,8 @@ typedef enum blockSwitches BLOCK_FORCED = 0x00000008, /* -S: Block size forced */ BLOCK_NO_INTR = 0x00000010, /* -d: Dump straight blocks */ BLOCK_RANGE = 0x00000020, /* -R: Specific block range to dump */ - BLOCK_CHECKSUMS = 0x00000040 /* -k: verify block checksums */ + BLOCK_CHECKSUMS = 0x00000040, /* -k: verify block checksums */ + BLOCK_DECODE = 0x00000080 /* -D: Try to decode tuples */ } blockSwitches; /* Segment-related options */ diff --git a/pg_lzcompress.c b/pg_lzcompress.c new file mode 100644 index 0000000..5ec93ec --- /dev/null +++ b/pg_lzcompress.c @@ -0,0 +1,778 @@ +/* ---------- + * pg_lzcompress.c - + * + * This is an implementation of LZ compression for PostgreSQL. + * It uses a simple history table and generates 2-3 byte tags + * capable of backward copy information for 3-273 bytes with + * a max offset of 4095. + * + * Entry routines: + * + * int32 + * pglz_compress(const char *source, int32 slen, char *dest, + * const PGLZ_Strategy *strategy); + * + * source is the input data to be compressed. + * + * slen is the length of the input data. + * + * dest is the output area for the compressed result. + * It must be at least as big as PGLZ_MAX_OUTPUT(slen). + * + * strategy is a pointer to some information controlling + * the compression algorithm. If NULL, the compiled + * in default strategy is used. + * + * The return value is the number of bytes written in the + * buffer dest, or -1 if compression fails; in the latter + * case the contents of dest are undefined. + * + * int32 + * pglz_decompress(const char *source, int32 slen, char *dest, + * int32 rawsize) + * + * source is the compressed input. + * + * slen is the length of the compressed input. + * + * dest is the area where the uncompressed data will be + * written to. It is the callers responsibility to + * provide enough space. + * + * The data is written to buff exactly as it was handed + * to pglz_compress(). No terminating zero byte is added. + * + * rawsize is the length of the uncompressed data. + * + * The return value is the number of bytes written in the + * buffer dest, or -1 if decompression fails. + * + * The decompression algorithm and internal data format: + * + * It is made with the compressed data itself. + * + * The data representation is easiest explained by describing + * the process of decompression. + * + * If compressed_size == rawsize, then the data + * is stored uncompressed as plain bytes. Thus, the decompressor + * simply copies rawsize bytes to the destination. + * + * Otherwise the first byte tells what to do the next 8 times. + * We call this the control byte. + * + * An unset bit in the control byte means, that one uncompressed + * byte follows, which is copied from input to output. + * + * A set bit in the control byte means, that a tag of 2-3 bytes + * follows. A tag contains information to copy some bytes, that + * are already in the output buffer, to the current location in + * the output. Let's call the three tag bytes T1, T2 and T3. The + * position of the data to copy is coded as an offset from the + * actual output position. + * + * The offset is in the upper nibble of T1 and in T2. + * The length is in the lower nibble of T1. + * + * So the 16 bits of a 2 byte tag are coded as + * + * 7---T1--0 7---T2--0 + * OOOO LLLL OOOO OOOO + * + * This limits the offset to 1-4095 (12 bits) and the length + * to 3-18 (4 bits) because 3 is always added to it. To emit + * a tag of 2 bytes with a length of 2 only saves one control + * bit. But we lose one byte in the possible length of a tag. + * + * In the actual implementation, the 2 byte tag's length is + * limited to 3-17, because the value 0xF in the length nibble + * has special meaning. It means, that the next following + * byte (T3) has to be added to the length value of 18. That + * makes total limits of 1-4095 for offset and 3-273 for length. + * + * Now that we have successfully decoded a tag. We simply copy + * the output that occurred bytes back to the current + * output location in the specified . Thus, a + * sequence of 200 spaces (think about bpchar fields) could be + * coded in 4 bytes. One literal space and a three byte tag to + * copy 199 bytes with a -1 offset. Whow - that's a compression + * rate of 98%! Well, the implementation needs to save the + * original data size too, so we need another 4 bytes for it + * and end up with a total compression rate of 96%, what's still + * worth a Whow. + * + * The compression algorithm + * + * The following uses numbers used in the default strategy. + * + * The compressor works best for attributes of a size between + * 1K and 1M. For smaller items there's not that much chance of + * redundancy in the character sequence (except for large areas + * of identical bytes like trailing spaces) and for bigger ones + * our 4K maximum look-back distance is too small. + * + * The compressor creates a table for lists of positions. + * For each input position (except the last 3), a hash key is + * built from the 4 next input bytes and the position remembered + * in the appropriate list. Thus, the table points to linked + * lists of likely to be at least in the first 4 characters + * matching strings. This is done on the fly while the input + * is compressed into the output area. Table entries are only + * kept for the last 4096 input positions, since we cannot use + * back-pointers larger than that anyway. The size of the hash + * table is chosen based on the size of the input - a larger table + * has a larger startup cost, as it needs to be initialized to + * zero, but reduces the number of hash collisions on long inputs. + * + * For each byte in the input, its hash key (built from this + * byte and the next 3) is used to find the appropriate list + * in the table. The lists remember the positions of all bytes + * that had the same hash key in the past in increasing backward + * offset order. Now for all entries in the used lists, the + * match length is computed by comparing the characters from the + * entries position with the characters from the actual input + * position. + * + * The compressor starts with a so called "good_match" of 128. + * It is a "prefer speed against compression ratio" optimizer. + * So if the first entry looked at already has 128 or more + * matching characters, the lookup stops and that position is + * used for the next tag in the output. + * + * For each subsequent entry in the history list, the "good_match" + * is lowered by 10%. So the compressor will be more happy with + * short matches the farer it has to go back in the history. + * Another "speed against ratio" preference characteristic of + * the algorithm. + * + * Thus there are 3 stop conditions for the lookup of matches: + * + * - a match >= good_match is found + * - there are no more history entries to look at + * - the next history entry is already too far back + * to be coded into a tag. + * + * Finally the match algorithm checks that at least a match + * of 3 or more bytes has been found, because that is the smallest + * amount of copy information to code into a tag. If so, a tag + * is omitted and all the input bytes covered by that are just + * scanned for the history add's, otherwise a literal character + * is omitted and only his history entry added. + * + * Acknowledgements: + * + * Many thanks to Adisak Pochanayon, who's article about SLZ + * inspired me to write the PostgreSQL compression this way. + * + * Jan Wieck + * + * Copyright (c) 1999-2017, PostgreSQL Global Development Group + * + * src/common/pg_lzcompress.c + * ---------- + */ +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#include + +#include "common/pg_lzcompress.h" + + +/* ---------- + * Local definitions + * ---------- + */ +#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */ +#define PGLZ_HISTORY_SIZE 4096 +#define PGLZ_MAX_MATCH 273 + + +/* ---------- + * PGLZ_HistEntry - + * + * Linked list for the backward history lookup + * + * All the entries sharing a hash key are linked in a doubly linked list. + * This makes it easy to remove an entry when it's time to recycle it + * (because it's more than 4K positions old). + * ---------- + */ +typedef struct PGLZ_HistEntry +{ + struct PGLZ_HistEntry *next; /* links for my hash key's list */ + struct PGLZ_HistEntry *prev; + int hindex; /* my current hash key */ + const char *pos; /* my input position */ +} PGLZ_HistEntry; + + +/* ---------- + * The provided standard strategies + * ---------- + */ +static const PGLZ_Strategy strategy_default_data = { + 32, /* Data chunks less than 32 bytes are not + * compressed */ + INT_MAX, /* No upper limit on what we'll try to + * compress */ + 25, /* Require 25% compression rate, or not worth + * it */ + 1024, /* Give up if no compression in the first 1KB */ + 128, /* Stop history lookup if a match of 128 bytes + * is found */ + 10 /* Lower good match size by 10% at every loop + * iteration */ +}; +const PGLZ_Strategy *const PGLZ_strategy_default = &strategy_default_data; + + +static const PGLZ_Strategy strategy_always_data = { + 0, /* Chunks of any size are compressed */ + INT_MAX, + 0, /* It's enough to save one single byte */ + INT_MAX, /* Never give up early */ + 128, /* Stop history lookup if a match of 128 bytes + * is found */ + 6 /* Look harder for a good match */ +}; +const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data; + + +/* ---------- + * Statically allocated work arrays for history + * ---------- + */ +static int16 hist_start[PGLZ_MAX_HISTORY_LISTS]; +static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1]; + +/* + * Element 0 in hist_entries is unused, and means 'invalid'. Likewise, + * INVALID_ENTRY_PTR in next/prev pointers mean 'invalid'. + */ +#define INVALID_ENTRY 0 +#define INVALID_ENTRY_PTR (&hist_entries[INVALID_ENTRY]) + +/* ---------- + * pglz_hist_idx - + * + * Computes the history table slot for the lookup by the next 4 + * characters in the input. + * + * NB: because we use the next 4 characters, we are not guaranteed to + * find 3-character matches; they very possibly will be in the wrong + * hash list. This seems an acceptable tradeoff for spreading out the + * hash keys more. + * ---------- + */ +#define pglz_hist_idx(_s,_e, _mask) ( \ + ((((_e) - (_s)) < 4) ? (int) (_s)[0] : \ + (((_s)[0] << 6) ^ ((_s)[1] << 4) ^ \ + ((_s)[2] << 2) ^ (_s)[3])) & (_mask) \ + ) + + +/* ---------- + * pglz_hist_add - + * + * Adds a new entry to the history table. + * + * If _recycle is true, then we are recycling a previously used entry, + * and must first delink it from its old hashcode's linked list. + * + * NOTE: beware of multiple evaluations of macro's arguments, and note that + * _hn and _recycle are modified in the macro. + * ---------- + */ +#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _mask) \ +do { \ + int __hindex = pglz_hist_idx((_s),(_e), (_mask)); \ + int16 *__myhsp = &(_hs)[__hindex]; \ + PGLZ_HistEntry *__myhe = &(_he)[_hn]; \ + if (_recycle) { \ + if (__myhe->prev == NULL) \ + (_hs)[__myhe->hindex] = __myhe->next - (_he); \ + else \ + __myhe->prev->next = __myhe->next; \ + if (__myhe->next != NULL) \ + __myhe->next->prev = __myhe->prev; \ + } \ + __myhe->next = &(_he)[*__myhsp]; \ + __myhe->prev = NULL; \ + __myhe->hindex = __hindex; \ + __myhe->pos = (_s); \ + /* If there was an existing entry in this hash slot, link */ \ + /* this new entry to it. However, the 0th entry in the */ \ + /* entries table is unused, so we can freely scribble on it. */ \ + /* So don't bother checking if the slot was used - we'll */ \ + /* scribble on the unused entry if it was not, but that's */ \ + /* harmless. Avoiding the branch in this critical path */ \ + /* speeds this up a little bit. */ \ + /* if (*__myhsp != INVALID_ENTRY) */ \ + (_he)[(*__myhsp)].prev = __myhe; \ + *__myhsp = _hn; \ + if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) { \ + (_hn) = 1; \ + (_recycle) = true; \ + } \ +} while (0) + + +/* ---------- + * pglz_out_ctrl - + * + * Outputs the last and allocates a new control byte if needed. + * ---------- + */ +#define pglz_out_ctrl(__ctrlp,__ctrlb,__ctrl,__buf) \ +do { \ + if ((__ctrl & 0xff) == 0) \ + { \ + *(__ctrlp) = __ctrlb; \ + __ctrlp = (__buf)++; \ + __ctrlb = 0; \ + __ctrl = 1; \ + } \ +} while (0) + + +/* ---------- + * pglz_out_literal - + * + * Outputs a literal byte to the destination buffer including the + * appropriate control bit. + * ---------- + */ +#define pglz_out_literal(_ctrlp,_ctrlb,_ctrl,_buf,_byte) \ +do { \ + pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ + *(_buf)++ = (unsigned char)(_byte); \ + _ctrl <<= 1; \ +} while (0) + + +/* ---------- + * pglz_out_tag - + * + * Outputs a backward reference tag of 2-4 bytes (depending on + * offset and length) to the destination buffer including the + * appropriate control bit. + * ---------- + */ +#define pglz_out_tag(_ctrlp,_ctrlb,_ctrl,_buf,_len,_off) \ +do { \ + pglz_out_ctrl(_ctrlp,_ctrlb,_ctrl,_buf); \ + _ctrlb |= _ctrl; \ + _ctrl <<= 1; \ + if (_len > 17) \ + { \ + (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | 0x0f); \ + (_buf)[1] = (unsigned char)(((_off) & 0xff)); \ + (_buf)[2] = (unsigned char)((_len) - 18); \ + (_buf) += 3; \ + } else { \ + (_buf)[0] = (unsigned char)((((_off) & 0xf00) >> 4) | ((_len) - 3)); \ + (_buf)[1] = (unsigned char)((_off) & 0xff); \ + (_buf) += 2; \ + } \ +} while (0) + + +/* ---------- + * pglz_find_match - + * + * Lookup the history table if the actual input stream matches + * another sequence of characters, starting somewhere earlier + * in the input buffer. + * ---------- + */ +static inline int +pglz_find_match(int16 *hstart, const char *input, const char *end, + int *lenp, int *offp, int good_match, int good_drop, int mask) +{ + PGLZ_HistEntry *hent; + int16 hentno; + int32 len = 0; + int32 off = 0; + + /* + * Traverse the linked history list until a good enough match is found. + */ + hentno = hstart[pglz_hist_idx(input, end, mask)]; + hent = &hist_entries[hentno]; + while (hent != INVALID_ENTRY_PTR) + { + const char *ip = input; + const char *hp = hent->pos; + int32 thisoff; + int32 thislen; + + /* + * Stop if the offset does not fit into our tag anymore. + */ + thisoff = ip - hp; + if (thisoff >= 0x0fff) + break; + + /* + * Determine length of match. A better match must be larger than the + * best so far. And if we already have a match of 16 or more bytes, + * it's worth the call overhead to use memcmp() to check if this match + * is equal for the same size. After that we must fallback to + * character by character comparison to know the exact position where + * the diff occurred. + */ + thislen = 0; + if (len >= 16) + { + if (memcmp(ip, hp, len) == 0) + { + thislen = len; + ip += len; + hp += len; + while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) + { + thislen++; + ip++; + hp++; + } + } + } + else + { + while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) + { + thislen++; + ip++; + hp++; + } + } + + /* + * Remember this match as the best (if it is) + */ + if (thislen > len) + { + len = thislen; + off = thisoff; + } + + /* + * Advance to the next history entry + */ + hent = hent->next; + + /* + * Be happy with lesser good matches the more entries we visited. But + * no point in doing calculation if we're at end of list. + */ + if (hent != INVALID_ENTRY_PTR) + { + if (len >= good_match) + break; + good_match -= (good_match * good_drop) / 100; + } + } + + /* + * Return match information only if it results at least in one byte + * reduction. + */ + if (len > 2) + { + *lenp = len; + *offp = off; + return 1; + } + + return 0; +} + + +/* ---------- + * pglz_compress - + * + * Compresses source into dest using strategy. Returns the number of + * bytes written in buffer dest, or -1 if compression fails. + * ---------- + */ +int32 +pglz_compress(const char *source, int32 slen, char *dest, + const PGLZ_Strategy *strategy) +{ + unsigned char *bp = (unsigned char *) dest; + unsigned char *bstart = bp; + int hist_next = 1; + bool hist_recycle = false; + const char *dp = source; + const char *dend = source + slen; + unsigned char ctrl_dummy = 0; + unsigned char *ctrlp = &ctrl_dummy; + unsigned char ctrlb = 0; + unsigned char ctrl = 0; + bool found_match = false; + int32 match_len; + int32 match_off; + int32 good_match; + int32 good_drop; + int32 result_size; + int32 result_max; + int32 need_rate; + int hashsz; + int mask; + + /* + * Our fallback strategy is the default. + */ + if (strategy == NULL) + strategy = PGLZ_strategy_default; + + /* + * If the strategy forbids compression (at all or if source chunk size out + * of range), fail. + */ + if (strategy->match_size_good <= 0 || + slen < strategy->min_input_size || + slen > strategy->max_input_size) + return -1; + + /* + * Limit the match parameters to the supported range. + */ + good_match = strategy->match_size_good; + if (good_match > PGLZ_MAX_MATCH) + good_match = PGLZ_MAX_MATCH; + else if (good_match < 17) + good_match = 17; + + good_drop = strategy->match_size_drop; + if (good_drop < 0) + good_drop = 0; + else if (good_drop > 100) + good_drop = 100; + + need_rate = strategy->min_comp_rate; + if (need_rate < 0) + need_rate = 0; + else if (need_rate > 99) + need_rate = 99; + + /* + * Compute the maximum result size allowed by the strategy, namely the + * input size minus the minimum wanted compression rate. This had better + * be <= slen, else we might overrun the provided output buffer. + */ + if (slen > (INT_MAX / 100)) + { + /* Approximate to avoid overflow */ + result_max = (slen / 100) * (100 - need_rate); + } + else + result_max = (slen * (100 - need_rate)) / 100; + + /* + * Experiments suggest that these hash sizes work pretty well. A large + * hash table minimizes collision, but has a higher startup cost. For a + * small input, the startup cost dominates. The table size must be a power + * of two. + */ + if (slen < 128) + hashsz = 512; + else if (slen < 256) + hashsz = 1024; + else if (slen < 512) + hashsz = 2048; + else if (slen < 1024) + hashsz = 4096; + else + hashsz = 8192; + mask = hashsz - 1; + + /* + * Initialize the history lists to empty. We do not need to zero the + * hist_entries[] array; its entries are initialized as they are used. + */ + memset(hist_start, 0, hashsz * sizeof(int16)); + + /* + * Compress the source directly into the output buffer. + */ + while (dp < dend) + { + /* + * If we already exceeded the maximum result size, fail. + * + * We check once per loop; since the loop body could emit as many as 4 + * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better + * allow 4 slop bytes. + */ + if (bp - bstart >= result_max) + return -1; + + /* + * If we've emitted more than first_success_by bytes without finding + * anything compressible at all, fail. This lets us fall out + * reasonably quickly when looking at incompressible input (such as + * pre-compressed data). + */ + if (!found_match && bp - bstart >= strategy->first_success_by) + return -1; + + /* + * Try to find a match in the history + */ + if (pglz_find_match(hist_start, dp, dend, &match_len, + &match_off, good_match, good_drop, mask)) + { + /* + * Create the tag and add history entries for all matched + * characters. + */ + pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off); + while (match_len--) + { + pglz_hist_add(hist_start, hist_entries, + hist_next, hist_recycle, + dp, dend, mask); + dp++; /* Do not do this ++ in the line above! */ + /* The macro would do it four times - Jan. */ + } + found_match = true; + } + else + { + /* + * No match found. Copy one literal byte. + */ + pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp); + pglz_hist_add(hist_start, hist_entries, + hist_next, hist_recycle, + dp, dend, mask); + dp++; /* Do not do this ++ in the line above! */ + /* The macro would do it four times - Jan. */ + } + } + + /* + * Write out the last control byte and check that we haven't overrun the + * output size allowed by the strategy. + */ + *ctrlp = ctrlb; + result_size = bp - bstart; + if (result_size >= result_max) + return -1; + + /* success */ + return result_size; +} + + +/* ---------- + * pglz_decompress - + * + * Decompresses source into dest. Returns the number of bytes + * decompressed in the destination buffer, or -1 if decompression + * fails. + * ---------- + */ +int32 +pglz_decompress(const char *source, int32 slen, char *dest, + int32 rawsize) +{ + const unsigned char *sp; + const unsigned char *srcend; + unsigned char *dp; + unsigned char *destend; + + sp = (const unsigned char *) source; + srcend = ((const unsigned char *) source) + slen; + dp = (unsigned char *) dest; + destend = dp + rawsize; + + while (sp < srcend && dp < destend) + { + /* + * Read one control byte and process the next 8 items (or as many as + * remain in the compressed input). + */ + unsigned char ctrl = *sp++; + int ctrlc; + + for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++) + { + if (ctrl & 1) + { + /* + * Otherwise it contains the match length minus 3 and the + * upper 4 bits of the offset. The next following byte + * contains the lower 8 bits of the offset. If the length is + * coded as 18, another extension tag byte tells how much + * longer the match really was (0-255). + */ + int32 len; + int32 off; + + len = (sp[0] & 0x0f) + 3; + off = ((sp[0] & 0xf0) << 4) | sp[1]; + sp += 2; + if (len == 18) + len += *sp++; + + /* + * Check for output buffer overrun, to ensure we don't clobber + * memory in case of corrupt input. Note: we must advance dp + * here to ensure the error is detected below the loop. We + * don't simply put the elog inside the loop since that will + * probably interfere with optimization. + */ + if (dp + len > destend) + { + dp += len; + break; + } + + /* + * Now we copy the bytes specified by the tag from OUTPUT to + * OUTPUT. It is dangerous and platform dependent to use + * memcpy() here, because the copied areas could overlap + * extremely! + */ + while (len--) + { + *dp = dp[-off]; + dp++; + } + } + else + { + /* + * An unset control bit means LITERAL BYTE. So we just copy + * one from INPUT to OUTPUT. + */ + if (dp >= destend) /* check for buffer overrun */ + break; /* do not clobber memory */ + + *dp++ = *sp++; + } + + /* + * Advance the control bit + */ + ctrl >>= 1; + } + } + + /* + * Check we decompressed the right amount. + */ + if (dp != destend || sp != srcend) + return -1; + + /* + * That's it. + */ + return rawsize; +} diff --git a/stringinfo.c b/stringinfo.c new file mode 100644 index 0000000..4cdc8c5 --- /dev/null +++ b/stringinfo.c @@ -0,0 +1,170 @@ +/* + * Code mostly borrowed from PostgreSQL's stringinfo.c + * palloc replaced to malloc, etc. + */ + +#include "postgres.h" +#include +#include +#include + +/* 64 Kb - until pg_filedump doesn't support TOAST it doesn't need more */ +#define MaxAllocSize ((Size) (64*1024)) + +/*------------------------- + * StringInfoData holds information about an extensible string. + * data is the current buffer for the string. + * len is the current string length. There is guaranteed to be + * a terminating '\0' at data[len], although this is not very + * useful when the string holds binary data rather than text. + * maxlen is the allocated size in bytes of 'data', i.e. the maximum + * string size (including the terminating '\0' char) that we can + * currently store in 'data' without having to reallocate + * more space. We must always have maxlen > len. + * cursor is initialized to zero by makeStringInfo or initStringInfo, + * but is not otherwise touched by the stringinfo.c routines. + * Some routines use it to scan through a StringInfo. + *------------------------- + */ + +/* + * initStringInfo + * + * Initialize a StringInfoData struct (with previously undefined contents) + * to describe an empty string. + */ +void +initStringInfo(StringInfo str) +{ + int size = 1024; /* initial default buffer size */ + + str->data = (char *) malloc(size); + str->maxlen = size; + resetStringInfo(str); +} + +/* + * resetStringInfo + * + * Reset the StringInfo: the data buffer remains valid, but its + * previous content, if any, is cleared. + */ +void +resetStringInfo(StringInfo str) +{ + str->data[0] = '\0'; + str->len = 0; + str->cursor = 0; +} + +/* + * appendStringInfoString + * + * Append a null-terminated string to str. + */ +void +appendStringInfoString(StringInfo str, const char *s) +{ + appendBinaryStringInfo(str, s, strlen(s)); +} + +/* + * appendBinaryStringInfo + * + * Append arbitrary binary data to a StringInfo, allocating more space + * if necessary. + */ +void +appendBinaryStringInfo(StringInfo str, const char *data, int datalen) +{ + assert(str != NULL); + + /* Make more room if needed */ + enlargeStringInfo(str, datalen); + + /* OK, append the data */ + memcpy(str->data + str->len, data, datalen); + str->len += datalen; + + /* + * Keep a trailing null in place, even though it's probably useless for + * binary data. (Some callers are dealing with text but call this because + * their input isn't null-terminated.) + */ + str->data[str->len] = '\0'; +} + +/* + * enlargeStringInfo + * + * Make sure there is enough space for 'needed' more bytes + * ('needed' does not include the terminating null). + * + * External callers usually need not concern themselves with this, since + * all stringinfo.c routines do it automatically. However, if a caller + * knows that a StringInfo will eventually become X bytes large, it + * can save some malloc overhead by enlarging the buffer before starting + * to store data in it. + */ +void +enlargeStringInfo(StringInfo str, int needed) +{ + Size newlen; + Size limit; + char* old_data; + + limit = MaxAllocSize; + + /* + * Guard against out-of-range "needed" values. Without this, we can get + * an overflow or infinite loop in the following. + */ + if (needed < 0) /* should not happen */ + { + printf("Error: invalid string enlargement request size: %d", needed); + exit(1); + } + + if (((Size) needed) >= (limit - (Size) str->len)) + { + printf("Error: cannot enlarge string buffer containing %d bytes by %d more bytes.", + str->len, needed); + exit(1); + } + + needed += str->len + 1; /* total space required now */ + + /* Because of the above test, we now have needed <= limit */ + + if (needed <= str->maxlen) + return; /* got enough space already */ + + /* + * We don't want to allocate just a little more space with each append; + * for efficiency, double the buffer size each time it overflows. + * Actually, we might need to more than double it if 'needed' is big... + */ + newlen = 2 * str->maxlen; + while (needed > newlen) + newlen = 2 * newlen; + + /* + * Clamp to the limit in case we went past it. Note we are assuming here + * that limit <= INT_MAX/2, else the above loop could overflow. We will + * still have newlen >= needed. + */ + if (newlen > limit) + newlen = limit; + + old_data = str->data; + str->data = (char *) realloc(str->data, (Size) newlen); + if(str->data == NULL) + { + free(old_data); + printf("Error: realloc() failed!\n"); + exit(1); + } + + str->maxlen = newlen; +} +