pg_repack/bin/pg_reorg.c
2008-12-08 04:32:10 +00:00

947 lines
22 KiB
C
Executable File

/*
* pg_reorg.c: bin/pg_reorg.c
*
* Copyright (c) 2008, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
*/
/**
* @brief Client Modules
*/
#include "postgres_fe.h"
#include "common.h"
#include "libpq/pqsignal.h"
#include <unistd.h>
#include <signal.h>
#define REORG_VERSION "1.0.2"
#define REORG_URL "http://reorg.projects.postgresql.org/"
#define REORG_EMAIL "reorg-general@lists.pgfoundry.org"
#define APPLY_COUNT 1000
#if PG_VERSION_NUM >= 80300
#define SQL_XID_SNAPSHOT \
"SELECT reorg.array_accum(virtualtransaction) FROM pg_locks"\
" WHERE locktype = 'virtualxid' AND pid <> pg_backend_pid()"
#define SQL_XID_ALIVE \
"SELECT 1 FROM pg_locks WHERE locktype = 'virtualxid'"\
" AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1) LIMIT 1"
#else
#define SQL_XID_SNAPSHOT \
"SELECT reorg.array_accum(transactionid) FROM pg_locks"\
" WHERE locktype = 'transactionid' AND pid <> pg_backend_pid()"
#define SQL_XID_ALIVE \
"SELECT 1 FROM pg_locks WHERE locktype = 'transactionid'"\
" AND pid <> pg_backend_pid() AND transactionid = ANY($1) LIMIT 1"
#endif
/*
* per-table information
*/
typedef struct reorg_table
{
const char *target_name; /* target: relname */
Oid target_oid; /* target: OID */
Oid target_toast; /* target: toast OID */
Oid target_tidx; /* target: toast index OID */
Oid pkid; /* target: PK OID */
Oid ckid; /* target: CK OID */
const char *create_pktype; /* CREATE TYPE pk */
const char *create_log; /* CREATE TABLE log */
const char *create_trigger; /* CREATE TRIGGER z_reorg_trigger */
const char *create_table; /* CREATE TABLE table AS SELECT */
const char *delete_log; /* DELETE FROM log */
const char *lock_table; /* LOCK TABLE table */
const char *sql_peek; /* SQL used in flush */
const char *sql_insert; /* SQL used in flush */
const char *sql_delete; /* SQL used in flush */
const char *sql_update; /* SQL used in flush */
const char *sql_pop; /* SQL used in flush */
} reorg_table;
/*
* per-index information
*/
typedef struct reorg_index
{
Oid target_oid; /* target: OID */
const char *create_index; /* CREATE INDEX */
} reorg_index;
static void reorg_all_databases(const char *orderby);
static bool reorg_one_database(const char *orderby, const char *table);
static void reorg_one_table(const reorg_table *table, const char* orderby);
static void reconnect(void);
static void disconnect(void);
static PGresult *execute_nothrow(const char *query, int nParams, const char **params);
static PGresult *execute(const char *query, int nParams, const char **params);
static void command(const char *query, int nParams, const char **params);
static void cleanup(void);
static void exit_with_cleanup(int exitcode);
static void reorg_setup_cancel_handler(void);
static void reorg_command_begin(PGconn *conn);
static void reorg_command_end(void);
static void PrintHelp(const char *progname);
static void PrintVersion(void);
static char *getstr(PGresult *res, int row, int col);
static Oid getoid(PGresult *res, int row, int col);
static const char *progname = NULL;
static bool echo = false;
static bool verbose = false;
static bool quiet = false;
/* connectin parameters */
static const char *dbname = NULL;
static char *host = NULL;
static char *port = NULL;
static char *username = NULL;
static bool password = false;
/*
* The table begin re-organized. If not null, we need to cleanup temp
* objects before the program exits.
*/
static const reorg_table *current_table = NULL;
/* Current connection initizlied with coneection parameters. */
static PGconn *current_conn = NULL;
/* Interrupted by SIGINT (Ctrl+C) ? */
static bool interrupted = false;
/* Not null during executing some SQL commands. */
static PGcancel *volatile cancelConn = NULL;
#ifdef WIN32
static CRITICAL_SECTION cancelConnLock;
static unsigned int sleep(unsigned int seconds)
{
Sleep(seconds * 1000);
return 0;
}
#endif
/* buffer should have at least 11 bytes */
static char *
utoa(unsigned int value, char *buffer)
{
sprintf(buffer, "%u", value);
return buffer;
}
/* called by atexit */
static void
warn_if_unclean(void)
{
if (current_table)
fprintf(stderr, _("!!!FATAL ERROR!!! Please refer to a manual.\n\n"));
}
int
main(int argc, char *argv[])
{
static struct option long_options[] = {
{"host", required_argument, NULL, 'h'},
{"port", required_argument, NULL, 'p'},
{"username", required_argument, NULL, 'U'},
{"password", no_argument, NULL, 'W'},
{"echo", no_argument, NULL, 'e'},
{"quiet", no_argument, NULL, 'q'},
{"verbose", no_argument, NULL, 'v'},
{"dbname", required_argument, NULL, 'd'},
{"all", no_argument, NULL, 'a'},
{"table", required_argument, NULL, 't'},
{"no-order", no_argument, NULL, 'n'},
{"order-by", required_argument, NULL, 'o'},
{NULL, 0, NULL, 0}
};
int optindex;
int c;
bool alldb = false;
const char *table = NULL;
const char *orderby = NULL;
progname = get_progname(argv[0]);
set_pglocale_pgservice(argv[0], "pgscripts");
/*
* Help message and version are handled at first.
*/
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
PrintHelp(progname);
return 0;
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
PrintVersion();
return 0;
}
}
while ((c = getopt_long(argc, argv, "h:p:U:Weqvd:at:no:", long_options, &optindex)) != -1)
{
switch (c)
{
case 'h':
host = optarg;
break;
case 'p':
port = optarg;
break;
case 'U':
username = optarg;
break;
case 'W':
password = true;
break;
case 'e':
echo = true;
break;
case 'q':
quiet = true;
break;
case 'v':
verbose = true;
break;
case 'd':
dbname = optarg;
break;
case 'a':
alldb = true;
break;
case 't':
table = optarg;
break;
case 'n':
orderby = "";
break;
case 'o':
orderby = optarg;
break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
}
switch (argc - optind)
{
case 0:
break;
case 1:
dbname = argv[optind];
break;
default:
fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"),
progname, argv[optind + 1]);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
reorg_setup_cancel_handler();
atexit(warn_if_unclean);
if (alldb)
{
if (dbname)
{
fprintf(stderr, _("%s: cannot reorg all databases and a specific one at the same time\n"),
progname);
exit(1);
}
if (table)
{
fprintf(stderr, _("%s: cannot reorg a specific table in all databases\n"),
progname);
exit(1);
}
reorg_all_databases(orderby);
}
else
{
(void) (dbname ||
(dbname = getenv("PGDATABASE")) ||
(dbname = getenv("PGUSER")) ||
(dbname = get_user_name(progname)));
if (!reorg_one_database(orderby, table))
{
fprintf(stderr, _("ERROR: %s is not installed\n"), progname);
return 1;
}
}
return 0;
}
/*
* Call reorg_one_database for each database.
*/
static void
reorg_all_databases(const char *orderby)
{
PGresult *result;
int i;
dbname = "postgres";
reconnect();
result = execute("SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1;", 0, NULL);
disconnect();
for (i = 0; i < PQntuples(result); i++)
{
bool ret;
dbname = PQgetvalue(result, i, 0);
if (!quiet)
{
printf(_("%s: reorg database \"%s\""), progname, dbname);
fflush(stdout);
}
ret = reorg_one_database(orderby, NULL);
if (!quiet)
{
if (ret)
printf("\n");
else
printf(_(" ... skipped\n"));
fflush(stdout);
}
}
PQclear(result);
}
/* result is not copied */
static char *
getstr(PGresult *res, int row, int col)
{
if (PQgetisnull(res, row, col))
return NULL;
else
return PQgetvalue(res, row, col);
}
static Oid
getoid(PGresult *res, int row, int col)
{
if (PQgetisnull(res, row, col))
return InvalidOid;
else
return (Oid)strtoul(PQgetvalue(res, row, col), NULL, 10);
}
/*
* Call reorg_one_table for the target table or each table in a database.
*/
static bool
reorg_one_database(const char *orderby, const char *table)
{
bool ret = true;
PGresult *res;
int i;
int num;
PQExpBufferData sql;
initPQExpBuffer(&sql);
reconnect();
/* Restrict search_path to system catalog. */
command("SET search_path = pg_catalog, pg_temp", 0, NULL);
/* To avoid annoying "create implicit ..." messages. */
command("SET client_min_messages = warning", 0, NULL);
/* acquire target tables */
appendPQExpBufferStr(&sql, "SELECT * FROM reorg.tables WHERE ");
if (table)
{
appendPQExpBufferStr(&sql, "relid = $1::regclass");
res = execute_nothrow(sql.data, 1, &table);
}
else
{
appendPQExpBufferStr(&sql, "pkid IS NOT NULL");
if (!orderby)
appendPQExpBufferStr(&sql, " AND ckid IS NOT NULL");
res = execute_nothrow(sql.data, 0, NULL);
}
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
const char *state = PQresultErrorField(res, PG_DIAG_SQLSTATE);
if (state && strcmp(state, "3F000") == 0)
{
/* Schema reorg does not exist. Skip the database. */
ret = false;
goto cleanup;
}
else
{
/* exit otherwise */
printf("%s", PQerrorMessage(current_conn));
PQclear(res);
exit_with_cleanup(1);
}
}
num = PQntuples(res);
for (i = 0; i < num; i++)
{
reorg_table table;
const char *create_table;
const char *ckey;
int c = 0;
table.target_name = getstr(res, i, c++);
table.target_oid = getoid(res, i, c++);
table.target_toast = getoid(res, i, c++);
table.target_tidx = getoid(res, i, c++);
table.pkid = getoid(res, i, c++);
table.ckid = getoid(res, i, c++);
if (table.pkid == 0)
{
fprintf(stderr, _("ERROR: relation \"%s\" has no primary key\n"), table.target_name);
exit_with_cleanup(1);
}
table.create_pktype = getstr(res, i, c++);
table.create_log = getstr(res, i, c++);
table.create_trigger = getstr(res, i, c++);
create_table = getstr(res, i, c++);
table.delete_log = getstr(res, i, c++);
table.lock_table = getstr(res, i, c++);
ckey = getstr(res, i, c++);
resetPQExpBuffer(&sql);
if (!orderby)
{
/* CLUSTER mode */
if (ckey == NULL)
{
fprintf(stderr, _("ERROR: relation \"%s\" has no cluster key\n"), table.target_name);
exit_with_cleanup(1);
}
appendPQExpBuffer(&sql, "%s ORDER BY %s", create_table, ckey);
table.create_table = sql.data;
}
else if (!orderby[0])
{
/* VACUUM FULL mode */
table.create_table = create_table;
}
else
{
/* User specified ORDER BY */
appendPQExpBuffer(&sql, "%s ORDER BY %s", create_table, orderby);
table.create_table = sql.data;
}
table.sql_peek = getstr(res, i, c++);
table.sql_insert = getstr(res, i, c++);
table.sql_delete = getstr(res, i, c++);
table.sql_update = getstr(res, i, c++);
table.sql_pop = getstr(res, i, c++);
reorg_one_table(&table, orderby);
}
cleanup:
PQclear(res);
disconnect();
termPQExpBuffer(&sql);
return ret;
}
static int
apply_log(const reorg_table *table, int count)
{
int result;
PGresult *res;
const char *params[6];
char buffer[12];
params[0] = table->sql_peek;
params[1] = table->sql_insert;
params[2] = table->sql_delete;
params[3] = table->sql_update;
params[4] = table->sql_pop;
params[5] = utoa(count, buffer);
res = execute("SELECT reorg.reorg_apply($1, $2, $3, $4, $5, $6)",
6, params);
result = atoi(PQgetvalue(res, 0, 0));
PQclear(res);
return result;
}
/*
* Re-organize one table.
*/
static void
reorg_one_table(const reorg_table *table, const char *orderby)
{
PGresult *res;
const char *params[1];
int num;
int i;
char *vxid;
char buffer[12];
if (verbose)
{
fprintf(stderr, "---- reorg_one_table ----\n");
fprintf(stderr, "target_name : %s\n", table->target_name);
fprintf(stderr, "target_oid : %u\n", table->target_oid);
fprintf(stderr, "target_toast : %u\n", table->target_toast);
fprintf(stderr, "target_tidx : %u\n", table->target_tidx);
fprintf(stderr, "pkid : %u\n", table->pkid);
fprintf(stderr, "ckid : %u\n", table->ckid);
fprintf(stderr, "create_pktype : %s\n", table->create_pktype);
fprintf(stderr, "create_log : %s\n", table->create_log);
fprintf(stderr, "create_trigger : %s\n", table->create_trigger);
fprintf(stderr, "create_table : %s\n", table->create_table);
fprintf(stderr, "delete_log : %s\n", table->delete_log);
fprintf(stderr, "lock_table : %s\n", table->lock_table);
fprintf(stderr, "sql_peek : %s\n", table->sql_peek);
fprintf(stderr, "sql_insert : %s\n", table->sql_insert);
fprintf(stderr, "sql_delete : %s\n", table->sql_delete);
fprintf(stderr, "sql_update : %s\n", table->sql_update);
fprintf(stderr, "sql_pop : %s\n", table->sql_pop);
}
/*
* 1. Setup workspaces and a trigger.
*/
if (verbose)
fprintf(stderr, "---- setup ----\n");
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
/*
* Check z_reorg_trigger is the trigger executed at last so that
* other before triggers cannot modify triggered tuples.
*/
params[0] = utoa(table->target_oid, buffer);
res = execute(
"SELECT 1 FROM pg_trigger"
" WHERE tgrelid = $1 AND tgname >= 'z_reorg_trigger' LIMIT 1",
1, params);
if (PQntuples(res) > 0)
{
fprintf(stderr, _("%s: trigger conflicted for %s\n"),
progname, table->target_name);
exit_with_cleanup(1);
}
command(table->create_pktype, 0, NULL);
command(table->create_log, 0, NULL);
command(table->create_trigger, 0, NULL);
command("COMMIT", 0, NULL);
/*
* Register the table to be dropped on error. We use pktype as
* an advisory lock. The registration should be done after
* the first command is succeeded.
*/
current_table = table;
/*
* 2. Copy tuples into temp table.
*/
if (verbose)
fprintf(stderr, "---- copy tuples ----\n");
command("BEGIN ISOLATION LEVEL SERIALIZABLE", 0, NULL);
if (orderby && !orderby[0])
command("SET LOCAL synchronize_seqscans = off", 0, NULL);
res = execute(SQL_XID_SNAPSHOT, 0, NULL);
vxid = strdup(PQgetvalue(res, 0, 0));
PQclear(res);
command(table->delete_log, 0, NULL);
command(table->create_table, 0, NULL);
command("COMMIT", 0, NULL);
/*
* 3. Create indexes on temp table.
*/
if (verbose)
fprintf(stderr, "---- create indexes ----\n");
params[0] = utoa(table->target_oid, buffer);
res = execute("SELECT indexrelid,"
" reorg.reorg_indexdef(indexrelid, indrelid)"
" FROM pg_index WHERE indrelid = $1", 1, params);
num = PQntuples(res);
for (i = 0; i < num; i++)
{
reorg_index index;
int c = 0;
index.target_oid = getoid(res, i, c++);
index.create_index = getstr(res, i, c++);
if (verbose)
{
fprintf(stderr, "[%d]\n", i);
fprintf(stderr, "target_oid : %u\n", index.target_oid);
fprintf(stderr, "create_index : %s\n", index.create_index);
}
/*
* NOTE: If we want to create multiple indexes in parallel,
* we need to call create_index in multiple connections.
*/
command(index.create_index, 0, NULL);
}
PQclear(res);
/*
* 4. Apply log to temp table until no tuples left in the log
* and all of old transactions are finished.
*/
for (;;)
{
num = apply_log(table, APPLY_COUNT);
if (num > 0)
continue; /* there might be still some tuples, repeat. */
/* old transactions still alive ? */
params[0] = vxid;
res = execute(SQL_XID_ALIVE, 1, params);
num = PQntuples(res);
PQclear(res);
if (num > 0)
{
sleep(1);
continue; /* wait for old transactions */
}
/* ok, go next step. */
break;
}
/*
* 5. Cleanup.
*/
if (verbose)
fprintf(stderr, "---- cleanup ----\n");
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
command(table->lock_table, 0, NULL);
apply_log(table, 0);
params[0] = utoa(table->target_oid, buffer);
command("SELECT reorg.reorg_swap($1)", 1, params);
command("SELECT reorg.reorg_drop($1)", 1, params);
command("COMMIT", 0, NULL);
current_table = NULL;
free(vxid);
}
static void
cleanup(void)
{
char buffer[12];
const char *params[1];
if (!current_table)
return;
params[0] = utoa(current_table->target_oid, buffer);
execute("SELECT reorg.reorg_drop($1)", 1, params);
current_table = NULL;
}
static void
reconnect(void)
{
disconnect();
current_conn = connectDatabase(dbname, host, port, username, password, progname);
}
static void
disconnect(void)
{
if (current_conn)
{
PQfinish(current_conn);
current_conn = NULL;
}
}
static void
exit_with_cleanup(int exitcode)
{
if (current_table)
{
/* Rollback current transaction */
if (current_conn)
{
PGresult *res;
res = PQexec(current_conn, "ROLLBACK");
if (PQresultStatus(res) != PGRES_COMMAND_OK)
exit(1); // fatal error
PQclear(res);
}
/* Try reconnection if not available. */
if (PQstatus(current_conn) != CONNECTION_OK)
reconnect();
cleanup();
}
disconnect();
exit(exitcode);
}
static PGresult *
execute_nothrow(const char *query, int nParams, const char **params)
{
PGresult *res;
if (echo)
fprintf(stderr, _("%s: executing %s\n"), progname, query);
#ifdef DEBUG_REORG
fprintf(stderr, "debug: suspend in execute. (sql='%s')\npush enter key: ", query);
fgetc(stdin);
#endif
reorg_command_begin(current_conn);
if (nParams == 0)
res = PQexec(current_conn, query);
else
res = PQexecParams(current_conn, query, nParams, NULL, params, NULL, NULL, 0);
reorg_command_end();
return res;
}
/*
* execute - Execute a SQL and discard the result, or exit() if failed.
*/
static PGresult *
execute(const char *query, int nParams, const char **params)
{
if (interrupted)
{
interrupted = false;
fprintf(stderr, _("%s: interrupted\n"), progname);
}
else
{
PGresult *res = execute_nothrow(query, nParams, params);
if (PQresultStatus(res) == PGRES_TUPLES_OK ||
PQresultStatus(res) == PGRES_COMMAND_OK)
return res;
fprintf(stderr, _("%s: query failed: %s"),
progname, PQerrorMessage(current_conn));
fprintf(stderr, _("%s: query was: %s\n"),
progname, query);
PQclear(res);
}
exit_with_cleanup(1);
return NULL; /* keep compiler quiet */
}
/*
* command - Execute a SQL and discard the result, or exit() if failed.
*/
static void
command(const char *query, int nParams, const char **params)
{
PGresult *res = execute(query, nParams, params);
PQclear(res);
}
static void
PrintHelp(const char *progname)
{
printf(_("%s re-organizes a PostgreSQL database.\n\n"), progname);
printf(_("Usage:\n"));
printf(_(" %s [OPTION]... [DBNAME]\n"), progname);
printf(_("\nOptions:\n"));
printf(_(" -a, --all reorg all databases\n"));
printf(_(" -d, --dbname=DBNAME database to reorg\n"));
printf(_(" -t, --table=TABLE reorg specific table only\n"));
printf(_(" -n, --no-order do vacuum full instead of cluster\n"));
printf(_(" -o, --order-by=columns order by columns instead of cluster keys\n"));
printf(_(" -e, --echo show the commands being sent to the server\n"));
printf(_(" -q, --quiet don't write any messages\n"));
printf(_(" -v, --verbose display detailed information during processing\n"));
printf(_(" --help show this help, then exit\n"));
printf(_(" --version output version information, then exit\n"));
printf(_("\nConnection options:\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port\n"));
printf(_(" -U, --username=USERNAME user name to connect as\n"));
printf(_(" -W, --password force password prompt\n"));
#ifdef REORG_URL
printf(_("\nRead the website for details. <" REORG_URL ">\n"));
#endif
#ifdef REORG_EMAIL
printf(_("\nReport bugs to <" REORG_EMAIL ">.\n"));
#endif
}
static void
PrintVersion(void)
{
fprintf(stderr, "pg_reorg " REORG_VERSION "\n");
return;
}
/*
* reorg_command_begin
*
* Set cancelConn to point to the current database connection.
*/
static void
reorg_command_begin(PGconn *conn)
{
PGcancel *oldCancelConn;
#ifdef WIN32
EnterCriticalSection(&cancelConnLock);
#endif
/* Free the old one if we have one */
oldCancelConn = cancelConn;
/* be sure handle_sigint doesn't use pointer while freeing */
cancelConn = NULL;
if (oldCancelConn != NULL)
PQfreeCancel(oldCancelConn);
cancelConn = PQgetCancel(conn);
#ifdef WIN32
LeaveCriticalSection(&cancelConnLock);
#endif
}
/*
* reorg_command_end
*
* Free the current cancel connection, if any, and set to NULL.
*/
static void
reorg_command_end(void)
{
PGcancel *oldCancelConn;
#ifdef WIN32
EnterCriticalSection(&cancelConnLock);
#endif
oldCancelConn = cancelConn;
/* be sure handle_sigint doesn't use pointer while freeing */
cancelConn = NULL;
if (oldCancelConn != NULL)
PQfreeCancel(oldCancelConn);
#ifdef WIN32
LeaveCriticalSection(&cancelConnLock);
#endif
}
/*
* Handle interrupt signals by cancelling the current command.
*/
static void
reorg_cancel(void)
{
int save_errno = errno;
char errbuf[256];
/* Set interruped flag */
interrupted = true;
/* Send QueryCancel if we are processing a database query */
if (cancelConn != NULL && PQcancel(cancelConn, errbuf, sizeof(errbuf)))
fprintf(stderr, _("Cancel request sent\n"));
errno = save_errno; /* just in case the write changed it */
}
#ifndef WIN32
static void
handle_sigint(SIGNAL_ARGS)
{
reorg_cancel();
}
static void
reorg_setup_cancel_handler(void)
{
pqsignal(SIGINT, handle_sigint);
}
#else /* WIN32 */
/*
* Console control handler for Win32. Note that the control handler will
* execute on a *different thread* than the main one, so we need to do
* proper locking around those structures.
*/
static BOOL WINAPI
consoleHandler(DWORD dwCtrlType)
{
if (dwCtrlType == CTRL_C_EVENT ||
dwCtrlType == CTRL_BREAK_EVENT)
{
EnterCriticalSection(&cancelConnLock);
reorg_cancel();
LeaveCriticalSection(&cancelConnLock);
return TRUE;
}
else
/* Return FALSE for any signals not being handled */
return FALSE;
}
static void
reorg_setup_cancel_handler(void)
{
InitializeCriticalSection(&cancelConnLock);
SetConsoleCtrlHandler(consoleHandler, TRUE);
}
#endif /* WIN32 */