pg_filedump/decode.c
Teodor Sigaev 52fa0201f9 Partial data recovery (-D flag).
This feature allows to partially recover data from a given segment file
in format suitable for using in COPY FROM statement. List of supported
data types is currently not full and TOAST is not yet supported, but
it's better than nothing. Hopefully data recovery will be improved in
the future.

Implemented by Aleksander Alekseev, reviewed by Dmitry Ivanov, tested
by Dmitry Ivanov and Grigoriy Smolkin.
2017-01-17 16:01:12 +03:00

815 lines
19 KiB
C

#include "postgres.h"
#include "decode.h"
#include <lib/stringinfo.h>
#include <access/htup_details.h>
#include <access/tupmacs.h>
#include <datatype/timestamp.h>
#include <common/pg_lzcompress.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <assert.h>
#define ATTRTYPES_STR_MAX_LEN (1024-1)
typedef int (*decode_callback_t)(const char* buffer, unsigned int buff_size,
unsigned int* out_size);
static int
decode_smallint(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_int(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_bigint(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_time(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_timetz(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_date(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_timestamp(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_float4(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_float8(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_bool(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_uuid(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_macaddr(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int
decode_string(const char* buffer, unsigned int buff_size, unsigned int* out_size);
static int ncallbacks = 0;
static decode_callback_t callbacks[ATTRTYPES_STR_MAX_LEN / 2] = { NULL };
typedef struct {
char* name;
decode_callback_t callback;
} ParseCallbackTableItem;
static ParseCallbackTableItem callback_table[] = {
{ "smallserial", &decode_smallint },
{ "smallint", &decode_smallint },
{ "int", &decode_int },
{ "serial", &decode_int },
{ "bigint", &decode_bigint },
{ "bigserial", &decode_bigint },
{ "time", &decode_time },
{ "timetz", &decode_timetz },
{ "date", &decode_date },
{ "timestamp", &decode_timestamp },
{ "float4", &decode_float4 },
{ "float8", &decode_float8 },
{ "float", &decode_float8 },
{ "bool", &decode_bool },
{ "uuid", &decode_uuid },
{ "macaddr", &decode_macaddr },
/* internally all string types are stored the same way */
{ "char", &decode_string },
{ "varchar", &decode_string },
{ "text", &decode_string },
{ "json", &decode_string },
{ "xml", &decode_string },
{ NULL, NULL},
};
static StringInfoData copyString;
static bool copyStringInitDone = false;
/*
* Temporary buffer for storing decompressed data.
*
* 64K should be enough in most cases. If it's not user can manually change
* this limit. Unfortunately there is no way to know how much memory user
* is willing to allocate.
*/
static char decompress_tmp_buff[64*1024];
/* Used by some PostgreSQL macro definitions */
void
ExceptionalCondition(const char *conditionName,
const char *errorType,
const char *fileName,
int lineNumber)
{
printf("Exceptional condition: name = %s, type = %s, fname = %s, line = %d\n",
conditionName ? conditionName : "(NULL)",
errorType ? errorType : "(NULL)",
fileName ? fileName : "(NULL)",
lineNumber);
exit(1);
}
/* Append given string to current COPY line */
static void
CopyAppend(const char* str)
{
if(!copyStringInitDone)
{
initStringInfo(&copyString);
copyStringInitDone = true;
}
/* Caller probably wanted just to init copyString */
if(str == NULL)
return;
if(copyString.data[0] != '\0')
appendStringInfoString(&copyString, "\t");
appendStringInfoString(&copyString, str);
}
/*
* Append given string to current COPY line and encode special symbols
* like \r, \n, \t and \\.
*/
static void
CopyAppendEncode(const char* str, int orig_len)
{
/*
* Should be enough in most cases. If it's not user can manually change
* this limit. Unfortunately there is no way to know how much memory user
* is willing to allocate.
*/
static char tmp_buff[64*1024];
/* Reserve one byte for a trailing zero. */
const int max_offset = sizeof(tmp_buff) - 2;
int curr_offset = 0;
int len = orig_len;
while(len > 0)
{
/*
* Make sure there is enough free space for at least one special symbol
* and a trailing zero.
*/
if(curr_offset > max_offset - 2)
{
printf("ERROR: Unable to properly encode a string since it's too "
"large (%d bytes). Try to increase tmp_buff size in CopyAppendEncode "
"procedure.\n", orig_len);
exit(1);
}
/*
* Since we are working with potentially corrupted data we can encounter
* \0 as well.
*/
if(*str == '\0')
{
tmp_buff[curr_offset] = '\\';
tmp_buff[curr_offset+1] = '0';
curr_offset += 2;
}
else if(*str == '\r')
{
tmp_buff[curr_offset] = '\\';
tmp_buff[curr_offset+1] = 'r';
curr_offset += 2;
}
else if(*str == '\n')
{
tmp_buff[curr_offset] = '\\';
tmp_buff[curr_offset+1] = 'n';
curr_offset += 2;
}
else if(*str == '\t')
{
tmp_buff[curr_offset] = '\\';
tmp_buff[curr_offset+1] = 'r';
curr_offset += 2;
}
else if(*str == '\\')
{
tmp_buff[curr_offset] = '\\';
tmp_buff[curr_offset+1] = '\\';
curr_offset += 2;
}
else
{
/* It's a regular symbol. */
tmp_buff[curr_offset] = *str;
curr_offset++;
}
str++;
len--;
}
tmp_buff[curr_offset] = '\0';
CopyAppend(tmp_buff);
}
/* CopyAppend version with format string support */
#define CopyAppendFmt(fmt, ...) do { \
char __copy_format_buff[512]; \
snprintf(__copy_format_buff, sizeof(__copy_format_buff), fmt, ##__VA_ARGS__); \
CopyAppend(__copy_format_buff); \
} while(0)
/* Discard accumulated COPY line */
static void
CopyClear(void)
{
/* Make sure init is done */
CopyAppend(NULL);
resetStringInfo(&copyString);
}
/* Output and then clear accumulated COPY line */
static void
CopyFlush(void)
{
/* Make sure init is done */
CopyAppend(NULL);
printf("COPY: %s\n", copyString.data);
CopyClear();
}
/*
* Add a callback to `callbacks` table for given type name
*
* Arguments:
* type - name of a single type, always lowercase
*
* Return value is:
* == 0 - no error
* < 0 - invalid type name
*/
static int
AddTypeCallback(const char* type)
{
int idx = 0;
if(*type == '\0') /* ignore empty strings */
return 0;
while(callback_table[idx].name != NULL)
{
if(strcmp(callback_table[idx].name, type) == 0)
{
callbacks[ncallbacks] = callback_table[idx].callback;
ncallbacks++;
return 0;
}
idx++;
}
printf("Error: type <%s> doesn't exist or is not currently supported\n", type);
printf("Full list of known types: ");
idx = 0;
while(callback_table[idx].name != NULL)
{
printf("%s ", callback_table[idx].name);
idx++;
}
printf("\n");
return -1;
}
/*
* Decode attribute types string like "int,timestamp,bool,uuid"
*
* Arguments:
* str - types string
* Return value is:
* == 0 - if string is valid
* < 0 - if string is invalid
*/
int
ParseAttributeTypesString(const char* str)
{
char *curr_type, *next_type;
char attrtypes[ATTRTYPES_STR_MAX_LEN+1];
int i, len = strlen(str);
if(len > ATTRTYPES_STR_MAX_LEN)
{
printf("Error: attribute types string is longer then %u characters!\n",
ATTRTYPES_STR_MAX_LEN);
return -1;
}
strcpy(attrtypes, str);
for(i = 0; i < len; i++)
attrtypes[i] = tolower(attrtypes[i]);
curr_type = attrtypes;
while(curr_type)
{
next_type = strstr(curr_type, ",");
if(next_type)
{
*next_type = '\0';
next_type++;
}
if(AddTypeCallback(curr_type) < 0)
return -1;
curr_type = next_type;
}
return 0;
}
/*
* Convert Julian day number (JDN) to a date.
* Copy-pasted from src/backend/utils/adt/datetime.c
*/
static void
j2date(int jd, int *year, int *month, int *day)
{
unsigned int julian;
unsigned int quad;
unsigned int extra;
int y;
julian = jd;
julian += 32044;
quad = julian / 146097;
extra = (julian - quad * 146097) * 4 + 3;
julian += 60 + quad * 3 + extra / 146097;
quad = julian / 1461;
julian -= quad * 1461;
y = julian * 4 / 1461;
julian = ((y != 0) ? ((julian + 305) % 365) : ((julian + 306) % 366))
+ 123;
y += quad * 4;
*year = y - 4800;
quad = julian * 2141 / 65536;
*day = julian - 7834 * quad / 256;
*month = (quad + 10) % MONTHS_PER_YEAR + 1;
}
/* Decode a smallint type */
static int
decode_smallint(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int16), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(int16))
return -2;
CopyAppendFmt("%d", (int)(*(int16*)buffer));
*out_size = sizeof(int16) + delta;
return 0;
}
/* Decode an int type */
static int
decode_int(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(int32))
return -2;
CopyAppendFmt("%d", *(int32*)buffer);
*out_size = sizeof(int32) + delta;
return 0;
}
/* Decode a bigint type */
static int
decode_bigint(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(int64))
return -2;
CopyAppendFmt("%ld", *(int64*)buffer);
*out_size = sizeof(int64) + delta;
return 0;
}
/* Decode a time type */
static int
decode_time(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
int64 timestamp, timestamp_sec;
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(int64))
return -2;
timestamp = *(int64*)buffer;
timestamp_sec = timestamp / 1000000;
*out_size = sizeof(int64) + delta;
CopyAppendFmt("%02ld:%02ld:%02ld.%06ld",
timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60,
timestamp % 1000000);
return 0;
}
/* Decode a timetz type */
static int
decode_timetz(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
int64 timestamp, timestamp_sec;
int32 tz_sec, tz_min;
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < (sizeof(int64) + sizeof(int32)))
return -2;
timestamp = *(int64*)buffer;
tz_sec = *(int32*)(buffer + sizeof(int64));
timestamp_sec = timestamp / 1000000;
tz_min = - (tz_sec / 60);
*out_size = sizeof(int64) + sizeof(int32) + delta;
CopyAppendFmt("%02ld:%02ld:%02ld.%06ld%c%02d:%02d",
timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60,
timestamp % 1000000, (tz_min > 0 ? '+' : '-'), abs(tz_min / 60), abs(tz_min % 60));
return 0;
}
/* Decode a date type */
static int
decode_date(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
int32 jd, year, month, day;
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(int32))
return -2;
*out_size = sizeof(int32) + delta;
jd = *(int32*)buffer + POSTGRES_EPOCH_JDATE;
j2date(jd, &year, &month, &day);
CopyAppendFmt("%04d-%02d-%02d%s", (year <= 0) ? -year + 1 : year, month, day, (year <= 0) ? " BC" : "");
return 0;
}
/* Decode a timestamp type */
static int
decode_timestamp(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int64), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
int64 timestamp, timestamp_sec;
int32 jd, year, month, day;
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(int64))
return -2;
*out_size = sizeof(int64) + delta;
timestamp = *(int64*)buffer;
jd = timestamp / USECS_PER_DAY;
if (jd != 0)
timestamp -= jd * USECS_PER_DAY;
if (timestamp < INT64CONST(0))
{
timestamp += USECS_PER_DAY;
jd -= 1;
}
/* add offset to go from J2000 back to standard Julian date */
jd += POSTGRES_EPOCH_JDATE;
j2date(jd, &year, &month, &day);
timestamp_sec = timestamp / 1000000;
CopyAppendFmt("%04d-%02d-%02d %02ld:%02ld:%02ld.%06ld%s",
(year <= 0) ? -year + 1 : year, month, day,
timestamp_sec / 60 / 60, (timestamp_sec / 60) % 60, timestamp_sec % 60,
timestamp % 1000000,
(year <= 0) ? " BC" : "");
return 0;
}
/* Decode a float4 type */
static int
decode_float4(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(float), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(float))
return -2;
CopyAppendFmt("%.12f", *(float*)buffer);
*out_size = sizeof(float) + delta;
return 0;
}
/* Decode a float8 type */
static int
decode_float8(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
const char* new_buffer = (const char*)TYPEALIGN(sizeof(double), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(double))
return -2;
CopyAppendFmt("%.12lf", *(double*)buffer);
*out_size = sizeof(double) + delta;
return 0;
}
/* Decode an uuid type */
static int
decode_uuid(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
unsigned char uuid[16];
if(buff_size < sizeof(uuid))
return -1;
memcpy(uuid, buffer, sizeof(uuid));
CopyAppendFmt("%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
uuid[0], uuid[1], uuid[2], uuid[3], uuid[4], uuid[5], uuid[6], uuid[7],
uuid[8], uuid[9], uuid[10], uuid[11], uuid[12], uuid[13], uuid[14], uuid[15]
);
*out_size = sizeof(uuid);
return 0;
}
/* Decode a macaddr type */
static int
decode_macaddr(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
unsigned char macaddr[6];
const char* new_buffer = (const char*)TYPEALIGN(sizeof(int32), (uintptr_t)buffer);
unsigned int delta = (unsigned int)( (uintptr_t)new_buffer - (uintptr_t)buffer );
if(buff_size < delta)
return -1;
buff_size -= delta;
buffer = new_buffer;
if(buff_size < sizeof(macaddr))
return -2;
memcpy(macaddr, buffer, sizeof(macaddr));
CopyAppendFmt("%02x:%02x:%02x:%02x:%02x:%02x",
macaddr[0], macaddr[1], macaddr[2], macaddr[3], macaddr[4], macaddr[5]
);
*out_size = sizeof(macaddr) + delta;
return 0;
}
/* Decode a bool type */
static int
decode_bool(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
if(buff_size < sizeof(bool))
return -1;
CopyAppend(*(bool*)buffer ? "t" : "f");
*out_size = sizeof(bool);
return 0;
}
/* Decode char(N), varchar(N), text, json or xml types */
static int
decode_string(const char* buffer, unsigned int buff_size, unsigned int* out_size)
{
int padding = 0;
/* Skip padding bytes. */
while(*buffer == 0x00)
{
if(buff_size == 0)
return -1;
buff_size--;
buffer++;
padding++;
}
if(VARATT_IS_1B_E(buffer))
{
/*
* 00000001 1-byte length word, unaligned, TOAST pointer
*/
uint8 tag = VARTAG_1B_E(buffer);
uint32 len = VARTAG_SIZE(tag);
if(len > buff_size)
return -1;
CopyAppend("(TOASTED)");
*out_size = padding + len;
return 0;
}
if(VARATT_IS_1B(buffer))
{
/*
* xxxxxxx1 1-byte length word, unaligned, uncompressed data (up to 126b)
* xxxxxxx is 1 + string length
*/
uint8 len = VARSIZE_1B(buffer);
if(len > buff_size)
return -1;
CopyAppendEncode(buffer + 1, len - 1);
*out_size = padding + len;
return 0;
}
if(VARATT_IS_4B_U(buffer) && buff_size >= 4)
{
/*
* xxxxxx00 4-byte length word, aligned, uncompressed data (up to 1G)
*/
uint32 len = VARSIZE_4B(buffer);
if(len > buff_size)
return -1;
CopyAppendEncode(buffer + 4, len - 4);
*out_size = padding + len;
return 0;
}
if(VARATT_IS_4B_C(buffer) && buff_size >= 8)
{
/*
* xxxxxx10 4-byte length word, aligned, *compressed* data (up to 1G)
*/
int decompress_ret;
uint32 len = VARSIZE_4B(buffer);
uint32 decompressed_len = VARRAWSIZE_4B_C(buffer);
if(len > buff_size)
return -1;
if(decompressed_len > sizeof(decompress_tmp_buff))
{
printf("WARNING: Unable to decompress a string since it's too "
"large (%d bytes after decompressing). Consider increasing "
"decompress_tmp_buff size.\n", decompressed_len);
CopyAppend("(COMPRESSED)");
*out_size = padding + len;
return 0;
}
decompress_ret = pglz_decompress(VARDATA_4B_C(buffer), len - 2*sizeof(uint32),
decompress_tmp_buff, decompressed_len);
if((decompress_ret != decompressed_len) || (decompress_ret < 0))
{
printf("WARNING: Unable to decompress a string. Data is corrupted.\n");
CopyAppend("(COMPRESSED)");
*out_size = padding + len;
return 0;
}
CopyAppendEncode(decompress_tmp_buff, decompressed_len);
*out_size = padding + len;
return 0;
}
return -9;
}
/*
* Try to decode a tuple using a types string provided previously.
*
* Arguments:
* tupleData - pointer to the tuple data
* tupleSize - tuple size in bytes
*/
void
FormatDecode(const char* tupleData, unsigned int tupleSize)
{
HeapTupleHeader header = (HeapTupleHeader)tupleData;
const char* data = tupleData + header->t_hoff;
unsigned int size = tupleSize - header->t_hoff;
int curr_attr;
CopyClear();
for(curr_attr = 0; curr_attr < ncallbacks; curr_attr++)
{
int ret;
unsigned int processed_size = 0;
if( (header->t_infomask & HEAP_HASNULL) && att_isnull(curr_attr, header->t_bits) )
{
CopyAppend("\\N");
continue;
}
if(size <= 0)
{
printf("Error: unable to decode a tuple, no more bytes left. Partial data: %s\n",
copyString.data);
return;
}
ret = callbacks[curr_attr](data, size, &processed_size);
if(ret < 0)
{
printf("Error: unable to decode a tuple, callback #%d returned %d. Partial data: %s\n",
curr_attr+1, ret, copyString.data);
return;
}
size -= processed_size;
data += processed_size;
}
if(size != 0)
{
printf("Error: unable to decode a tuple, %d bytes left, 0 expected. Partial data: %s\n",
size, copyString.data);
return;
}
CopyFlush();
}