pg_repack/bin/pg_reorg.c

647 lines
16 KiB
C
Raw Normal View History

2008-12-08 04:32:10 +00:00
/*
* pg_reorg.c: bin/pg_reorg.c
*
* Copyright (c) 2008-2009, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
2008-12-08 04:32:10 +00:00
*/
/**
* @brief Client Modules
*/
const char *PROGRAM_VERSION = "1.0.6";
const char *PROGRAM_URL = "http://reorg.projects.postgresql.org/";
const char *PROGRAM_EMAIL = "reorg-general@lists.pgfoundry.org";
2008-12-08 04:32:10 +00:00
2009-04-22 07:26:12 +00:00
#include "pgut/pgut.h"
2008-12-08 04:32:10 +00:00
2009-04-22 07:26:12 +00:00
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
2008-12-08 04:32:10 +00:00
#define APPLY_COUNT 1000
2009-04-22 07:26:12 +00:00
#define SQL_XID_SNAPSHOT_80300 \
2008-12-08 04:32:10 +00:00
"SELECT reorg.array_accum(virtualtransaction) FROM pg_locks"\
" WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid()"
2009-04-22 07:26:12 +00:00
#define SQL_XID_SNAPSHOT_80200 \
2008-12-08 04:32:10 +00:00
"SELECT reorg.array_accum(transactionid) FROM pg_locks"\
" WHERE locktype = 'transactionid' AND pid <> pg_backend_pid()"
2009-04-22 07:26:12 +00:00
#define SQL_XID_ALIVE_80300 \
"SELECT 1 FROM pg_locks WHERE locktype = 'virtualxid'"\
" AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1) LIMIT 1"
#define SQL_XID_ALIVE_80200 \
2008-12-08 04:32:10 +00:00
"SELECT 1 FROM pg_locks WHERE locktype = 'transactionid'"\
" AND pid <> pg_backend_pid() AND transactionid = ANY($1) LIMIT 1"
2009-04-22 07:26:12 +00:00
#define SQL_XID_SNAPSHOT \
(PQserverVersion(connection) >= 80300 \
2009-04-22 07:26:12 +00:00
? SQL_XID_SNAPSHOT_80300 \
: SQL_XID_SNAPSHOT_80200)
#define SQL_XID_ALIVE \
(PQserverVersion(connection) >= 80300 \
2009-04-22 07:26:12 +00:00
? SQL_XID_ALIVE_80300 \
: SQL_XID_ALIVE_80200)
2008-12-08 04:32:10 +00:00
/*
* per-table information
*/
typedef struct reorg_table
{
const char *target_name; /* target: relname */
Oid target_oid; /* target: OID */
Oid target_toast; /* target: toast OID */
Oid target_tidx; /* target: toast index OID */
Oid pkid; /* target: PK OID */
Oid ckid; /* target: CK OID */
const char *create_pktype; /* CREATE TYPE pk */
const char *create_log; /* CREATE TABLE log */
const char *create_trigger; /* CREATE TRIGGER z_reorg_trigger */
const char *create_table; /* CREATE TABLE table AS SELECT */
const char *delete_log; /* DELETE FROM log */
const char *lock_table; /* LOCK TABLE table */
const char *sql_peek; /* SQL used in flush */
const char *sql_insert; /* SQL used in flush */
const char *sql_delete; /* SQL used in flush */
const char *sql_update; /* SQL used in flush */
const char *sql_pop; /* SQL used in flush */
} reorg_table;
/*
* per-index information
*/
typedef struct reorg_index
{
Oid target_oid; /* target: OID */
const char *create_index; /* CREATE INDEX */
} reorg_index;
static void reorg_all_databases(const char *orderby);
static bool reorg_one_database(const char *orderby, const char *table);
static void reorg_one_table(const reorg_table *table, const char *orderby);
2008-12-08 04:32:10 +00:00
static char *getstr(PGresult *res, int row, int col);
static Oid getoid(PGresult *res, int row, int col);
#define SQLSTATE_INVALID_SCHEMA_NAME "3F000"
#define SQLSTATE_LOCK_NOT_AVAILABLE "55P03"
static bool sqlstate_equals(PGresult *res, const char *state)
{
return strcmp(PQresultErrorField(res, PG_DIAG_SQLSTATE), state) == 0;
}
2008-12-08 04:32:10 +00:00
static bool verbose = false;
static bool analyze = true;
2008-12-08 04:32:10 +00:00
/*
* The table begin re-organized. If not null, we need to cleanup temp
* objects before the program exits.
*/
static const reorg_table *current_table = NULL;
/* buffer should have at least 11 bytes */
static char *
utoa(unsigned int value, char *buffer)
{
sprintf(buffer, "%u", value);
return buffer;
}
const struct option pgut_options[] = {
2009-04-22 07:26:12 +00:00
{"verbose", no_argument, NULL, 'v'},
{"all", no_argument, NULL, 'a'},
{"table", required_argument, NULL, 't'},
{"no-order", no_argument, NULL, 'n'},
{"order-by", required_argument, NULL, 'o'},
{"no-analyze", no_argument, NULL, 'Z'},
2009-04-22 07:26:12 +00:00
{NULL, 0, NULL, 0}
};
bool alldb = false;
2009-04-22 07:26:12 +00:00
const char *table = NULL;
const char *orderby = NULL;
bool
2009-04-22 07:26:12 +00:00
pgut_argument(int c, const char *arg)
2008-12-08 04:32:10 +00:00
{
2009-04-22 07:26:12 +00:00
switch (c)
2008-12-08 04:32:10 +00:00
{
2009-04-22 07:26:12 +00:00
case 'v':
verbose = true;
break;
case 'a':
alldb = true;
break;
case 't':
assign_option(&table, c, arg);
2008-12-08 04:32:10 +00:00
break;
2009-04-22 07:26:12 +00:00
case 'n':
assign_option(&orderby, c, "");
2009-04-22 07:26:12 +00:00
break;
case 'o':
assign_option(&orderby, c, arg);
2008-12-08 04:32:10 +00:00
break;
case 'Z':
analyze = false;
break;
2008-12-08 04:32:10 +00:00
default:
2009-04-22 07:26:12 +00:00
return false;
2008-12-08 04:32:10 +00:00
}
2009-04-22 07:26:12 +00:00
return true;
}
int
main(int argc, char *argv[])
{
parse_options(argc, argv);
2008-12-08 04:32:10 +00:00
if (alldb)
{
if (table)
elog(ERROR, "cannot reorg a specific table in all databases");
2008-12-08 04:32:10 +00:00
reorg_all_databases(orderby);
}
else
{
if (!reorg_one_database(orderby, table))
elog(ERROR, "%s is not installed", PROGRAM_NAME);
2008-12-08 04:32:10 +00:00
}
return 0;
}
/*
* Call reorg_one_database for each database.
*/
static void
reorg_all_databases(const char *orderby)
{
PGresult *result;
int i;
dbname = "postgres";
reconnect();
result = execute("SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1;", 0, NULL);
disconnect();
for (i = 0; i < PQntuples(result); i++)
{
bool ret;
2008-12-08 04:32:10 +00:00
dbname = PQgetvalue(result, i, 0);
if (!quiet)
{
printf("%s: reorg database \"%s\"", PROGRAM_NAME, dbname);
2008-12-08 04:32:10 +00:00
fflush(stdout);
}
ret = reorg_one_database(orderby, NULL);
if (!quiet)
{
if (ret)
printf("\n");
else
2009-04-22 07:26:12 +00:00
printf(" ... skipped\n");
2008-12-08 04:32:10 +00:00
fflush(stdout);
}
}
PQclear(result);
}
/* result is not copied */
static char *
getstr(PGresult *res, int row, int col)
{
if (PQgetisnull(res, row, col))
return NULL;
else
return PQgetvalue(res, row, col);
}
static Oid
getoid(PGresult *res, int row, int col)
{
if (PQgetisnull(res, row, col))
return InvalidOid;
else
return (Oid)strtoul(PQgetvalue(res, row, col), NULL, 10);
}
/*
* Call reorg_one_table for the target table or each table in a database.
*/
static bool
2008-12-08 04:32:10 +00:00
reorg_one_database(const char *orderby, const char *table)
{
bool ret = true;
2008-12-08 04:32:10 +00:00
PGresult *res;
int i;
int num;
StringInfoData sql;
2008-12-08 04:32:10 +00:00
initStringInfo(&sql);
2008-12-08 04:32:10 +00:00
reconnect();
/* Restrict search_path to system catalog. */
command("SET search_path = pg_catalog, pg_temp, public", 0, NULL);
2008-12-08 04:32:10 +00:00
/* To avoid annoying "create implicit ..." messages. */
command("SET client_min_messages = warning", 0, NULL);
/* acquire target tables */
appendStringInfoString(&sql, "SELECT * FROM reorg.tables WHERE ");
2008-12-08 04:32:10 +00:00
if (table)
{
appendStringInfoString(&sql, "relid = $1::regclass");
res = execute_elevel(sql.data, 1, &table, LOG);
2008-12-08 04:32:10 +00:00
}
else
{
appendStringInfoString(&sql, "pkid IS NOT NULL");
2008-12-08 04:32:10 +00:00
if (!orderby)
appendStringInfoString(&sql, " AND ckid IS NOT NULL");
res = execute_elevel(sql.data, 0, NULL, LOG);
2008-12-08 04:32:10 +00:00
}
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
if (sqlstate_equals(res, SQLSTATE_INVALID_SCHEMA_NAME))
2008-12-08 04:32:10 +00:00
{
/* Schema reorg does not exist. Skip the database. */
ret = false;
goto cleanup;
}
else
{
/* exit otherwise */
printf("%s", PQerrorMessage(connection));
2008-12-08 04:32:10 +00:00
PQclear(res);
2009-04-22 07:26:12 +00:00
exit(1);
2008-12-08 04:32:10 +00:00
}
}
num = PQntuples(res);
for (i = 0; i < num; i++)
{
reorg_table table;
const char *create_table;
const char *ckey;
int c = 0;
table.target_name = getstr(res, i, c++);
table.target_oid = getoid(res, i, c++);
table.target_toast = getoid(res, i, c++);
table.target_tidx = getoid(res, i, c++);
table.pkid = getoid(res, i, c++);
table.ckid = getoid(res, i, c++);
if (table.pkid == 0)
elog(ERROR, "relation \"%s\" has no primary key", table.target_name);
2008-12-08 04:32:10 +00:00
table.create_pktype = getstr(res, i, c++);
table.create_log = getstr(res, i, c++);
table.create_trigger = getstr(res, i, c++);
create_table = getstr(res, i, c++);
table.delete_log = getstr(res, i, c++);
table.lock_table = getstr(res, i, c++);
ckey = getstr(res, i, c++);
resetStringInfo(&sql);
2008-12-08 04:32:10 +00:00
if (!orderby)
{
/* CLUSTER mode */
if (ckey == NULL)
elog(ERROR, "relation \"%s\" has no cluster key", table.target_name);
appendStringInfo(&sql, "%s ORDER BY %s", create_table, ckey);
2008-12-08 04:32:10 +00:00
table.create_table = sql.data;
}
else if (!orderby[0])
{
/* VACUUM FULL mode */
table.create_table = create_table;
}
else
{
/* User specified ORDER BY */
appendStringInfo(&sql, "%s ORDER BY %s", create_table, orderby);
2008-12-08 04:32:10 +00:00
table.create_table = sql.data;
}
table.sql_peek = getstr(res, i, c++);
table.sql_insert = getstr(res, i, c++);
table.sql_delete = getstr(res, i, c++);
table.sql_update = getstr(res, i, c++);
table.sql_pop = getstr(res, i, c++);
reorg_one_table(&table, orderby);
}
cleanup:
PQclear(res);
disconnect();
termStringInfo(&sql);
2008-12-08 04:32:10 +00:00
return ret;
}
static int
apply_log(const reorg_table *table, int count)
{
int result;
PGresult *res;
const char *params[6];
char buffer[12];
params[0] = table->sql_peek;
params[1] = table->sql_insert;
params[2] = table->sql_delete;
params[3] = table->sql_update;
params[4] = table->sql_pop;
params[5] = utoa(count, buffer);
res = execute("SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)",
6, params);
result = atoi(PQgetvalue(res, 0, 0));
PQclear(res);
return result;
}
/*
* Re-organize one table.
*/
static void
reorg_one_table(const reorg_table *table, const char *orderby)
{
PGresult *res;
const char *params[1];
int num;
int i;
char *vxid;
char buffer[12];
StringInfoData sql;
initStringInfo(&sql);
2008-12-08 04:32:10 +00:00
if (verbose)
{
fprintf(stderr, "---- reorg_one_table ----\n");
fprintf(stderr, "target_name : %s\n", table->target_name);
fprintf(stderr, "target_oid : %u\n", table->target_oid);
fprintf(stderr, "target_toast : %u\n", table->target_toast);
fprintf(stderr, "target_tidx : %u\n", table->target_tidx);
fprintf(stderr, "pkid : %u\n", table->pkid);
fprintf(stderr, "ckid : %u\n", table->ckid);
fprintf(stderr, "create_pktype : %s\n", table->create_pktype);
fprintf(stderr, "create_log : %s\n", table->create_log);
fprintf(stderr, "create_trigger : %s\n", table->create_trigger);
fprintf(stderr, "create_table : %s\n", table->create_table);
fprintf(stderr, "delete_log : %s\n", table->delete_log);
fprintf(stderr, "lock_table : %s\n", table->lock_table);
fprintf(stderr, "sql_peek : %s\n", table->sql_peek);
fprintf(stderr, "sql_insert : %s\n", table->sql_insert);
fprintf(stderr, "sql_delete : %s\n", table->sql_delete);
fprintf(stderr, "sql_update : %s\n", table->sql_update);
fprintf(stderr, "sql_pop : %s\n", table->sql_pop);
}
/*
* 1. Setup workspaces and a trigger.
*/
if (verbose)
fprintf(stderr, "---- setup ----\n");
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
/*
* Check z_reorg_trigger is the trigger executed at last so that
* other before triggers cannot modify triggered tuples.
*/
params[0] = utoa(table->target_oid, buffer);
res = execute("SELECT reorg.conflicted_triggers($1)", 1, params);
2008-12-08 04:32:10 +00:00
if (PQntuples(res) > 0)
elog(ERROR, "trigger %s conflicted for %s",
PQgetvalue(res, 0, 0), table->target_name);
2008-12-08 04:32:10 +00:00
command(table->create_pktype, 0, NULL);
command(table->create_log, 0, NULL);
command(table->create_trigger, 0, NULL);
printfStringInfo(&sql, "SELECT reorg.disable_autovacuum('reorg.log_%u')", table->target_oid);
command(sql.data, 0, NULL);
2008-12-08 04:32:10 +00:00
command("COMMIT", 0, NULL);
/*
* Register the table to be dropped on error. We use pktype as
* an advisory lock. The registration should be done after
* the first command is succeeded.
*/
current_table = table;
/*
* 2. Copy tuples into temp table.
*/
if (verbose)
fprintf(stderr, "---- copy tuples ----\n");
command("BEGIN ISOLATION LEVEL SERIALIZABLE", 0, NULL);
/* SET work_mem = maintenance_work_mem */
command("SELECT set_config('work_mem', current_setting('maintenance_work_mem'), true)", 0, NULL);
if (PQserverVersion(connection) >= 80300 && orderby && !orderby[0])
2008-12-08 04:32:10 +00:00
command("SET LOCAL synchronize_seqscans = off", 0, NULL);
res = execute(SQL_XID_SNAPSHOT, 0, NULL);
vxid = strdup(PQgetvalue(res, 0, 0));
PQclear(res);
command(table->delete_log, 0, NULL);
command(table->create_table, 0, NULL);
printfStringInfo(&sql, "SELECT reorg.disable_autovacuum('reorg.table_%u')", table->target_oid);
command(sql.data, 0, NULL);
2008-12-08 04:32:10 +00:00
command("COMMIT", 0, NULL);
/*
* 3. Create indexes on temp table.
*/
if (verbose)
fprintf(stderr, "---- create indexes ----\n");
params[0] = utoa(table->target_oid, buffer);
res = execute("SELECT indexrelid,"
" reorg.reorg_indexdef(indexrelid, indrelid)"
" FROM pg_index WHERE indrelid = $1", 1, params);
num = PQntuples(res);
for (i = 0; i < num; i++)
{
reorg_index index;
int c = 0;
index.target_oid = getoid(res, i, c++);
index.create_index = getstr(res, i, c++);
if (verbose)
{
fprintf(stderr, "[%d]\n", i);
fprintf(stderr, "target_oid : %u\n", index.target_oid);
fprintf(stderr, "create_index : %s\n", index.create_index);
}
/*
* NOTE: If we want to create multiple indexes in parallel,
* we need to call create_index in multiple connections.
*/
command(index.create_index, 0, NULL);
}
PQclear(res);
/*
* 4. Apply log to temp table until no tuples left in the log
* and all of old transactions are finished.
*/
for (;;)
{
num = apply_log(table, APPLY_COUNT);
if (num > 0)
continue; /* there might be still some tuples, repeat. */
/* old transactions still alive ? */
params[0] = vxid;
res = execute(SQL_XID_ALIVE, 1, params);
num = PQntuples(res);
PQclear(res);
if (num > 0)
{
sleep(1);
continue; /* wait for old transactions */
}
/* ok, go next step. */
break;
}
/*
* 5. Swap.
2008-12-08 04:32:10 +00:00
*/
if (verbose)
fprintf(stderr, "---- swap ----\n");
2008-12-08 04:32:10 +00:00
for (;;)
{
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
res = execute_elevel(table->lock_table, 0, NULL, NOTICE);
if (PQresultStatus(res) == PGRES_COMMAND_OK)
{
PQclear(res);
break;
}
else if (sqlstate_equals(res, SQLSTATE_LOCK_NOT_AVAILABLE))
{
/* retry if lock conflicted */
PQclear(res);
command("ROLLBACK", 0, NULL);
sleep(1);
continue;
}
else
{
/* exit otherwise */
printf("%s", PQerrorMessage(connection));
PQclear(res);
2009-04-22 07:26:12 +00:00
exit(1);
}
}
2008-12-08 04:32:10 +00:00
apply_log(table, 0);
params[0] = utoa(table->target_oid, buffer);
command("SELECT reorg.reorg_swap($1)", 1, params);
command("COMMIT", 0, NULL);
/*
* 6. Drop.
*/
if (verbose)
fprintf(stderr, "---- drop ----\n");
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
params[0] = utoa(table->target_oid, buffer);
2008-12-08 04:32:10 +00:00
command("SELECT reorg.reorg_drop($1)", 1, params);
command("COMMIT", 0, NULL);
2008-12-08 04:32:10 +00:00
current_table = NULL;
free(vxid);
/*
* 7. Analyze.
* Note that current_table is already set to NULL here because analyze
* is an unimportant operation; No clean up even if failed.
*/
if (analyze)
{
if (verbose)
fprintf(stderr, "---- analyze ----\n");
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
printfStringInfo(&sql, "ANALYZE %s%s",
(verbose ? "VERBOSE " : ""), table->target_name);
command(sql.data, 0, NULL);
command("COMMIT", 0, NULL);
}
termStringInfo(&sql);
2008-12-08 04:32:10 +00:00
}
2009-04-22 07:26:12 +00:00
void
pgut_cleanup(bool fatal)
2008-12-08 04:32:10 +00:00
{
2009-04-22 07:26:12 +00:00
if (fatal)
2008-12-08 04:32:10 +00:00
{
2009-04-22 07:26:12 +00:00
if (current_table)
fprintf(stderr, "!!!FATAL ERROR!!! Please refer to a manual.\n\n");
2008-12-08 04:32:10 +00:00
}
2009-04-22 07:26:12 +00:00
else
2008-12-08 04:32:10 +00:00
{
2009-04-22 07:26:12 +00:00
char buffer[12];
const char *params[1];
if (current_table == NULL)
return; /* no needs to cleanup */
2008-12-08 04:32:10 +00:00
/* Rollback current transaction */
if (connection)
2009-04-22 07:26:12 +00:00
command("ROLLBACK", 0, NULL);
2008-12-08 04:32:10 +00:00
/* Try reconnection if not available. */
if (PQstatus(connection) != CONNECTION_OK)
2008-12-08 04:32:10 +00:00
reconnect();
2009-04-22 07:26:12 +00:00
/* do cleanup */
params[0] = utoa(current_table->target_oid, buffer);
command("SELECT reorg.reorg_drop($1)", 1, params);
current_table = NULL;
2008-12-08 04:32:10 +00:00
}
}
void
2009-04-22 07:26:12 +00:00
pgut_help(void)
2008-12-08 04:32:10 +00:00
{
2009-04-22 07:26:12 +00:00
fprintf(stderr,
"%s re-organizes a PostgreSQL database.\n\n"
"Usage:\n"
" %s [OPTION]... [DBNAME]\n"
"\nOptions:\n"
" -a, --all reorg all databases\n"
" -t, --table=TABLE reorg specific table only\n"
" -n, --no-order do vacuum full instead of cluster\n"
" -o, --order-by=columns order by columns instead of cluster keys\n"
" -Z, --no-analyze don't analyze at end\n"
" -v, --verbose display detailed information during processing\n",
PROGRAM_NAME, PROGRAM_NAME);
2008-12-08 04:32:10 +00:00
}