Fix table locking so that race conditions don't exist between lock release in primary conn, and lock acquisition in conn2. Also, have conn2 be in charge of performing the table swap step, to avoid a similar race. Part of work for Issue #8.
855 lines
23 KiB
C
855 lines
23 KiB
C
/*
|
|
* pg_repack.c: bin/pg_repack.c
|
|
*
|
|
* Portions Copyright (c) 2008-2011, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
|
|
* Portions Copyright (c) 2011, Itagaki Takahiro
|
|
* Portions Copyright (c) 2012, The Reorg Development Team
|
|
*/
|
|
|
|
/**
|
|
* @brief Client Modules
|
|
*/
|
|
|
|
const char *PROGRAM_URL = "http://reorg.github.com/pg_repack";
|
|
const char *PROGRAM_EMAIL = "reorg-general@lists.pgfoundry.org";
|
|
|
|
#ifdef REPACK_VERSION
|
|
/* macro trick to stringify a macro expansion */
|
|
#define xstr(s) str(s)
|
|
#define str(s) #s
|
|
const char *PROGRAM_VERSION = xstr(REPACK_VERSION);
|
|
#else
|
|
const char *PROGRAM_VERSION = "unknown";
|
|
#endif
|
|
|
|
#include "pgut/pgut-fe.h"
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <time.h>
|
|
|
|
/*
|
|
* APPLY_COUNT: Number of applied logs per transaction. Larger values
|
|
* could be faster, but will be long transactions in the REDO phase.
|
|
*/
|
|
#define APPLY_COUNT 1000
|
|
|
|
/* Record the PIDs of any possibly-conflicting transactions. Ignore the PID
|
|
* of our primary connection, and our second connection holding an
|
|
* ACCESS SHARE table lock.
|
|
* The '1/1, -1/0' lock skipped is from the bgwriter on newly promoted
|
|
* servers. See GH ticket #1.
|
|
*/
|
|
#define SQL_XID_SNAPSHOT \
|
|
"SELECT repack.array_accum(virtualtransaction) FROM pg_locks"\
|
|
" WHERE locktype = 'virtualxid' AND pid NOT IN (pg_backend_pid(), $1)"\
|
|
" AND (virtualxid, virtualtransaction) <> ('1/1', '-1/0')"
|
|
|
|
/* Later, check whether any of the transactions we saw before are still
|
|
* alive, and wait for them to go away.
|
|
*/
|
|
#define SQL_XID_ALIVE \
|
|
"SELECT pid FROM pg_locks WHERE locktype = 'virtualxid'"\
|
|
" AND pid <> pg_backend_pid() AND virtualtransaction = ANY($1)"
|
|
|
|
/* To be run while our main connection holds an AccessExclusive lock on the
|
|
* target table, and our secondary conn is attempting to grab an AccessShare
|
|
* lock. We know that "granted" must be false for these queries because
|
|
* we already hold the AccessExclusive lock. Also, we only care about other
|
|
* transactions trying to grab an ACCESS EXCLUSIVE lock, because that lock
|
|
* level is needed for any of the disallowed DDL commands, e.g. ALTER TABLE
|
|
* or TRUNCATE.
|
|
*/
|
|
#define CANCEL_COMPETING_LOCKS \
|
|
"SELECT pg_cancel_backend(pid) FROM pg_locks WHERE locktype = 'relation'"\
|
|
" AND granted = false AND relation = %u"\
|
|
" AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()"
|
|
|
|
#define KILL_COMPETING_LOCKS \
|
|
"SELECT pg_cancel_backend(pid) FROM pg_locks WHERE locktype = 'relation'"\
|
|
" AND granted = false AND relation = %u"\
|
|
" AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()"
|
|
|
|
/*
|
|
* per-table information
|
|
*/
|
|
typedef struct repack_table
|
|
{
|
|
const char *target_name; /* target: relname */
|
|
Oid target_oid; /* target: OID */
|
|
Oid target_toast; /* target: toast OID */
|
|
Oid target_tidx; /* target: toast index OID */
|
|
Oid pkid; /* target: PK OID */
|
|
Oid ckid; /* target: CK OID */
|
|
const char *create_pktype; /* CREATE TYPE pk */
|
|
const char *create_log; /* CREATE TABLE log */
|
|
const char *create_trigger; /* CREATE TRIGGER z_repack_trigger */
|
|
const char *enable_trigger; /* ALTER TABLE ENABLE ALWAYS TRIGGER z_repack_trigger */
|
|
const char *create_table; /* CREATE TABLE table AS SELECT */
|
|
const char *drop_columns; /* ALTER TABLE DROP COLUMNs */
|
|
const char *delete_log; /* DELETE FROM log */
|
|
const char *lock_table; /* LOCK TABLE table */
|
|
const char *sql_peek; /* SQL used in flush */
|
|
const char *sql_insert; /* SQL used in flush */
|
|
const char *sql_delete; /* SQL used in flush */
|
|
const char *sql_update; /* SQL used in flush */
|
|
const char *sql_pop; /* SQL used in flush */
|
|
} repack_table;
|
|
|
|
/*
|
|
* per-index information
|
|
*/
|
|
typedef struct repack_index
|
|
{
|
|
Oid target_oid; /* target: OID */
|
|
const char *create_index; /* CREATE INDEX */
|
|
} repack_index;
|
|
|
|
static void repack_all_databases(const char *order_by);
|
|
static bool repack_one_database(const char *order_by, const char *table);
|
|
static void repack_one_table(const repack_table *table, const char *order_by);
|
|
static void repack_cleanup(bool fatal, void *userdata);
|
|
|
|
static char *getstr(PGresult *res, int row, int col);
|
|
static Oid getoid(PGresult *res, int row, int col);
|
|
static void lock_exclusive(PGconn *conn, const char *relid, const char *lock_query, bool start_xact);
|
|
|
|
#define SQLSTATE_INVALID_SCHEMA_NAME "3F000"
|
|
#define SQLSTATE_QUERY_CANCELED "57014"
|
|
|
|
static bool sqlstate_equals(PGresult *res, const char *state)
|
|
{
|
|
return strcmp(PQresultErrorField(res, PG_DIAG_SQLSTATE), state) == 0;
|
|
}
|
|
|
|
static bool analyze = true;
|
|
static bool alldb = false;
|
|
static bool noorder = false;
|
|
static char *table = NULL;
|
|
static char *orderby = NULL;
|
|
static int wait_timeout = 60; /* in seconds */
|
|
|
|
/* buffer should have at least 11 bytes */
|
|
static char *
|
|
utoa(unsigned int value, char *buffer)
|
|
{
|
|
sprintf(buffer, "%u", value);
|
|
return buffer;
|
|
}
|
|
|
|
static pgut_option options[] =
|
|
{
|
|
{ 'b', 'a', "all", &alldb },
|
|
{ 's', 't', "table", &table },
|
|
{ 'b', 'n', "no-order", &noorder },
|
|
{ 's', 'o', "order-by", &orderby },
|
|
{ 'i', 'T', "wait-timeout", &wait_timeout },
|
|
{ 'B', 'Z', "no-analyze", &analyze },
|
|
{ 0 },
|
|
};
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
int i;
|
|
|
|
i = pgut_getopt(argc, argv, options);
|
|
|
|
if (i == argc - 1)
|
|
dbname = argv[i];
|
|
else if (i < argc)
|
|
ereport(ERROR,
|
|
(errcode(EINVAL),
|
|
errmsg("too many arguments")));
|
|
|
|
if (noorder)
|
|
orderby = "";
|
|
|
|
if (alldb)
|
|
{
|
|
if (table)
|
|
ereport(ERROR,
|
|
(errcode(EINVAL),
|
|
errmsg("cannot repack a specific table in all databases")));
|
|
repack_all_databases(orderby);
|
|
}
|
|
else
|
|
{
|
|
if (!repack_one_database(orderby, table))
|
|
ereport(ERROR,
|
|
(errcode(ENOENT),
|
|
errmsg("%s is not installed", PROGRAM_NAME)));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Call repack_one_database for each database.
|
|
*/
|
|
static void
|
|
repack_all_databases(const char *orderby)
|
|
{
|
|
PGresult *result;
|
|
int i;
|
|
|
|
dbname = "postgres";
|
|
reconnect(ERROR);
|
|
result = execute("SELECT datname FROM pg_database WHERE datallowconn ORDER BY 1;", 0, NULL);
|
|
disconnect();
|
|
|
|
for (i = 0; i < PQntuples(result); i++)
|
|
{
|
|
bool ret;
|
|
|
|
dbname = PQgetvalue(result, i, 0);
|
|
|
|
if (pgut_log_level >= INFO)
|
|
{
|
|
printf("%s: repack database \"%s\"", PROGRAM_NAME, dbname);
|
|
fflush(stdout);
|
|
}
|
|
|
|
ret = repack_one_database(orderby, NULL);
|
|
|
|
if (pgut_log_level >= INFO)
|
|
{
|
|
if (ret)
|
|
printf("\n");
|
|
else
|
|
printf(" ... skipped\n");
|
|
fflush(stdout);
|
|
}
|
|
}
|
|
|
|
PQclear(result);
|
|
}
|
|
|
|
/* result is not copied */
|
|
static char *
|
|
getstr(PGresult *res, int row, int col)
|
|
{
|
|
if (PQgetisnull(res, row, col))
|
|
return NULL;
|
|
else
|
|
return PQgetvalue(res, row, col);
|
|
}
|
|
|
|
static Oid
|
|
getoid(PGresult *res, int row, int col)
|
|
{
|
|
if (PQgetisnull(res, row, col))
|
|
return InvalidOid;
|
|
else
|
|
return (Oid)strtoul(PQgetvalue(res, row, col), NULL, 10);
|
|
}
|
|
|
|
/*
|
|
* Call repack_one_table for the target table or each table in a database.
|
|
*/
|
|
static bool
|
|
repack_one_database(const char *orderby, const char *table)
|
|
{
|
|
bool ret = true;
|
|
PGresult *res;
|
|
int i;
|
|
int num;
|
|
StringInfoData sql;
|
|
|
|
initStringInfo(&sql);
|
|
|
|
reconnect(ERROR);
|
|
|
|
/* Disable statement timeout. */
|
|
command("SET statement_timeout = 0", 0, NULL);
|
|
|
|
/* Restrict search_path to system catalog. */
|
|
command("SET search_path = pg_catalog, pg_temp, public", 0, NULL);
|
|
|
|
/* To avoid annoying "create implicit ..." messages. */
|
|
command("SET client_min_messages = warning", 0, NULL);
|
|
|
|
/* acquire target tables */
|
|
appendStringInfoString(&sql, "SELECT * FROM repack.tables WHERE ");
|
|
if (table)
|
|
{
|
|
appendStringInfoString(&sql, "relid = $1::regclass");
|
|
res = execute_elevel(sql.data, 1, &table, DEBUG2);
|
|
}
|
|
else
|
|
{
|
|
appendStringInfoString(&sql, "pkid IS NOT NULL");
|
|
if (!orderby)
|
|
appendStringInfoString(&sql, " AND ckid IS NOT NULL");
|
|
res = execute_elevel(sql.data, 0, NULL, DEBUG2);
|
|
}
|
|
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
{
|
|
if (sqlstate_equals(res, SQLSTATE_INVALID_SCHEMA_NAME))
|
|
{
|
|
/* Schema repack does not exist. Skip the database. */
|
|
ret = false;
|
|
goto cleanup;
|
|
}
|
|
else
|
|
{
|
|
/* exit otherwise */
|
|
printf("%s", PQerrorMessage(connection));
|
|
PQclear(res);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
num = PQntuples(res);
|
|
|
|
for (i = 0; i < num; i++)
|
|
{
|
|
repack_table table;
|
|
const char *create_table;
|
|
const char *ckey;
|
|
int c = 0;
|
|
|
|
table.target_name = getstr(res, i, c++);
|
|
table.target_oid = getoid(res, i, c++);
|
|
table.target_toast = getoid(res, i, c++);
|
|
table.target_tidx = getoid(res, i, c++);
|
|
table.pkid = getoid(res, i, c++);
|
|
table.ckid = getoid(res, i, c++);
|
|
|
|
if (table.pkid == 0)
|
|
ereport(ERROR,
|
|
(errcode(E_PG_COMMAND),
|
|
errmsg("relation \"%s\" must have a primary key or not-null unique keys", table.target_name)));
|
|
|
|
table.create_pktype = getstr(res, i, c++);
|
|
table.create_log = getstr(res, i, c++);
|
|
table.create_trigger = getstr(res, i, c++);
|
|
table.enable_trigger = getstr(res, i, c++);
|
|
|
|
create_table = getstr(res, i, c++);
|
|
table.drop_columns = getstr(res, i, c++);
|
|
table.delete_log = getstr(res, i, c++);
|
|
table.lock_table = getstr(res, i, c++);
|
|
ckey = getstr(res, i, c++);
|
|
|
|
resetStringInfo(&sql);
|
|
if (!orderby)
|
|
{
|
|
/* CLUSTER mode */
|
|
if (ckey == NULL)
|
|
ereport(ERROR,
|
|
(errcode(E_PG_COMMAND),
|
|
errmsg("relation \"%s\" has no cluster key", table.target_name)));
|
|
appendStringInfo(&sql, "%s ORDER BY %s", create_table, ckey);
|
|
table.create_table = sql.data;
|
|
}
|
|
else if (!orderby[0])
|
|
{
|
|
/* VACUUM FULL mode */
|
|
table.create_table = create_table;
|
|
}
|
|
else
|
|
{
|
|
/* User specified ORDER BY */
|
|
appendStringInfo(&sql, "%s ORDER BY %s", create_table, orderby);
|
|
table.create_table = sql.data;
|
|
}
|
|
|
|
table.sql_peek = getstr(res, i, c++);
|
|
table.sql_insert = getstr(res, i, c++);
|
|
table.sql_delete = getstr(res, i, c++);
|
|
table.sql_update = getstr(res, i, c++);
|
|
table.sql_pop = getstr(res, i, c++);
|
|
|
|
repack_one_table(&table, orderby);
|
|
}
|
|
|
|
cleanup:
|
|
PQclear(res);
|
|
disconnect();
|
|
termStringInfo(&sql);
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
apply_log(PGconn *conn, const repack_table *table, int count)
|
|
{
|
|
int result;
|
|
PGresult *res;
|
|
const char *params[6];
|
|
char buffer[12];
|
|
|
|
params[0] = table->sql_peek;
|
|
params[1] = table->sql_insert;
|
|
params[2] = table->sql_delete;
|
|
params[3] = table->sql_update;
|
|
params[4] = table->sql_pop;
|
|
params[5] = utoa(count, buffer);
|
|
|
|
res = pgut_execute(conn,
|
|
"SELECT repack.repack_apply($1, $2, $3, $4, $5, $6)",
|
|
6, params);
|
|
result = atoi(PQgetvalue(res, 0, 0));
|
|
PQclear(res);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Re-organize one table.
|
|
*/
|
|
static void
|
|
repack_one_table(const repack_table *table, const char *orderby)
|
|
{
|
|
PGresult *res;
|
|
const char *params[1];
|
|
int num;
|
|
int i;
|
|
int num_waiting = 0;
|
|
char *vxid;
|
|
char *lock_conn_pid;
|
|
char buffer[12];
|
|
StringInfoData sql;
|
|
|
|
initStringInfo(&sql);
|
|
|
|
elog(DEBUG2, "---- repack_one_table ----");
|
|
elog(DEBUG2, "target_name : %s", table->target_name);
|
|
elog(DEBUG2, "target_oid : %u", table->target_oid);
|
|
elog(DEBUG2, "target_toast : %u", table->target_toast);
|
|
elog(DEBUG2, "target_tidx : %u", table->target_tidx);
|
|
elog(DEBUG2, "pkid : %u", table->pkid);
|
|
elog(DEBUG2, "ckid : %u", table->ckid);
|
|
elog(DEBUG2, "create_pktype : %s", table->create_pktype);
|
|
elog(DEBUG2, "create_log : %s", table->create_log);
|
|
elog(DEBUG2, "create_trigger : %s", table->create_trigger);
|
|
elog(DEBUG2, "enable_trigger : %s", table->enable_trigger);
|
|
elog(DEBUG2, "create_table : %s", table->create_table);
|
|
elog(DEBUG2, "drop_columns : %s", table->drop_columns ? table->drop_columns : "(skipped)");
|
|
elog(DEBUG2, "delete_log : %s", table->delete_log);
|
|
elog(DEBUG2, "lock_table : %s", table->lock_table);
|
|
elog(DEBUG2, "sql_peek : %s", table->sql_peek);
|
|
elog(DEBUG2, "sql_insert : %s", table->sql_insert);
|
|
elog(DEBUG2, "sql_delete : %s", table->sql_delete);
|
|
elog(DEBUG2, "sql_update : %s", table->sql_update);
|
|
elog(DEBUG2, "sql_pop : %s", table->sql_pop);
|
|
|
|
/*
|
|
* 1. Setup workspaces and a trigger.
|
|
*/
|
|
elog(DEBUG2, "---- setup ----");
|
|
lock_exclusive(connection, utoa(table->target_oid, buffer), table->lock_table, TRUE);
|
|
|
|
/*
|
|
* Check z_repack_trigger is the trigger executed at last so that
|
|
* other before triggers cannot modify triggered tuples.
|
|
*/
|
|
params[0] = utoa(table->target_oid, buffer);
|
|
|
|
res = execute("SELECT repack.conflicted_triggers($1)", 1, params);
|
|
if (PQntuples(res) > 0)
|
|
ereport(ERROR,
|
|
(errcode(E_PG_COMMAND),
|
|
errmsg("trigger %s conflicted for %s",
|
|
PQgetvalue(res, 0, 0), table->target_name)));
|
|
PQclear(res);
|
|
|
|
command(table->create_pktype, 0, NULL);
|
|
command(table->create_log, 0, NULL);
|
|
command(table->create_trigger, 0, NULL);
|
|
command(table->enable_trigger, 0, NULL);
|
|
printfStringInfo(&sql, "SELECT repack.disable_autovacuum('repack.log_%u')", table->target_oid);
|
|
command(sql.data, 0, NULL);
|
|
|
|
/* While we are still holding an AccessExclusive lock on the table, submit
|
|
* the request for an AccessShare lock asynchronously from conn2.
|
|
* We want to submit this query in conn2 while connection's
|
|
* transaction still holds its lock, so that no DDL may sneak in
|
|
* between the time that connection commits and conn2 gets its lock.
|
|
*
|
|
*/
|
|
pgut_command(conn2, "BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
|
|
|
|
/* grab the backend PID of conn2; we'll need this when querying
|
|
* pg_locks momentarily.
|
|
*/
|
|
res = pgut_execute(conn2, "SELECT pg_backend_pid()", 0, NULL);
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
{
|
|
printf("%s", PQerrorMessage(conn2));
|
|
PQclear(res);
|
|
exit(1);
|
|
}
|
|
lock_conn_pid = strdup(PQgetvalue(res, 0, 0));
|
|
PQclear(res);
|
|
|
|
printfStringInfo(&sql, "LOCK TABLE %s IN ACCESS SHARE MODE",
|
|
table->target_name);
|
|
elog(DEBUG2, "LOCK TABLE %s IN ACCESS SHARE MODE", table->target_name);
|
|
if (!(PQsendQuery(conn2, sql.data))) {
|
|
printf("Error sending async query: %s\n%s", sql.data, PQerrorMessage(conn2));
|
|
exit(1);
|
|
}
|
|
|
|
/* Now that we've submitted the LOCK TABLE request through conn2,
|
|
* look for and cancel any (potentially dangerous) DDL commands which
|
|
* might also be waiting on our table lock at this point --
|
|
* it's not safe to let them wait, because they may grab their
|
|
* AccessExclusive lock before conn2 gets its AccessShare lock,
|
|
* and perform unsafe DDL on the table.
|
|
*
|
|
* XXX: maybe we should use a loop canceling queries, as in
|
|
* lock_exclusive().
|
|
*/
|
|
printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, table->target_oid);
|
|
res = execute(sql.data, 0, NULL);
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
{
|
|
printf("Error canceling competing queries: %s", PQerrorMessage(connection));
|
|
PQclear(res);
|
|
exit(1);
|
|
}
|
|
if (PQntuples(res) > 0)
|
|
{
|
|
elog(WARNING, "Canceled %d unsafe queries. Terminating any remaining PIDs.", PQntuples(res));
|
|
|
|
if (PQserverVersion(connection) >= 80400)
|
|
{
|
|
PQclear(res);
|
|
printfStringInfo(&sql, KILL_COMPETING_LOCKS, table->target_oid);
|
|
res = execute(sql.data, 0, NULL);
|
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
{
|
|
printf("Error killing competing queries: %s", PQerrorMessage(connection));
|
|
PQclear(res);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
elog(DEBUG2, "No competing DDL to cancel.");
|
|
}
|
|
PQclear(res);
|
|
|
|
|
|
/* We're finished killing off any unsafe DDL. COMMIT in our main
|
|
* connection, so that conn2 may get its AccessShare lock.
|
|
*/
|
|
command("COMMIT", 0, NULL);
|
|
|
|
/* Keep looping PQgetResult() calls until it returns NULL, indicating the
|
|
* command is done and we have obtained our lock.
|
|
*/
|
|
while ((res = PQgetResult(conn2)))
|
|
{
|
|
elog(DEBUG2, "Waiting on ACCESS SHARE lock...");
|
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
|
{
|
|
printf("Error with LOCK TABLE: %s", PQerrorMessage(conn2));
|
|
PQclear(res);
|
|
exit(1);
|
|
}
|
|
PQclear(res);
|
|
}
|
|
|
|
|
|
/*
|
|
* Register the table to be dropped on error. We use pktype as
|
|
* an advisory lock. The registration should be done after
|
|
* the first command succeeds.
|
|
*/
|
|
pgut_atexit_push(&repack_cleanup, (void *) table);
|
|
|
|
/*
|
|
* 2. Copy tuples into temp table.
|
|
*/
|
|
elog(DEBUG2, "---- copy tuples ----");
|
|
|
|
command("BEGIN ISOLATION LEVEL SERIALIZABLE", 0, NULL);
|
|
/* SET work_mem = maintenance_work_mem */
|
|
command("SELECT set_config('work_mem', current_setting('maintenance_work_mem'), true)", 0, NULL);
|
|
if (orderby && !orderby[0])
|
|
command("SET LOCAL synchronize_seqscans = off", 0, NULL);
|
|
|
|
/* Fetch an array of Virtual IDs of all transactions active right now.
|
|
*/
|
|
params[0] = lock_conn_pid;
|
|
res = execute(SQL_XID_SNAPSHOT, 1, params);
|
|
vxid = strdup(PQgetvalue(res, 0, 0));
|
|
PQclear(res);
|
|
|
|
command(table->delete_log, 0, NULL);
|
|
command(table->create_table, 0, NULL);
|
|
printfStringInfo(&sql, "SELECT repack.disable_autovacuum('repack.table_%u')", table->target_oid);
|
|
if (table->drop_columns)
|
|
command(table->drop_columns, 0, NULL);
|
|
command(sql.data, 0, NULL);
|
|
command("COMMIT", 0, NULL);
|
|
|
|
/*
|
|
* 3. Create indexes on temp table.
|
|
*/
|
|
elog(DEBUG2, "---- create indexes ----");
|
|
|
|
params[0] = utoa(table->target_oid, buffer);
|
|
res = execute("SELECT indexrelid,"
|
|
" repack.repack_indexdef(indexrelid, indrelid),"
|
|
" indisvalid,"
|
|
" pg_get_indexdef(indexrelid)"
|
|
" FROM pg_index WHERE indrelid = $1", 1, params);
|
|
|
|
num = PQntuples(res);
|
|
for (i = 0; i < num; i++)
|
|
{
|
|
repack_index index;
|
|
int c = 0;
|
|
const char *isvalid;
|
|
const char *indexdef;
|
|
|
|
index.target_oid = getoid(res, i, c++);
|
|
index.create_index = getstr(res, i, c++);
|
|
isvalid = getstr(res, i, c++);
|
|
indexdef = getstr(res, i, c++);
|
|
|
|
if (isvalid && isvalid[0] == 'f') {
|
|
elog(WARNING, "skipping invalid index: %s", indexdef);
|
|
continue;
|
|
}
|
|
|
|
elog(DEBUG2, "[%d]", i);
|
|
elog(DEBUG2, "target_oid : %u", index.target_oid);
|
|
elog(DEBUG2, "create_index : %s", index.create_index);
|
|
|
|
/*
|
|
* NOTE: If we want to create multiple indexes in parallel,
|
|
* we need to call create_index in multiple connections.
|
|
*/
|
|
command(index.create_index, 0, NULL);
|
|
}
|
|
PQclear(res);
|
|
|
|
/*
|
|
* 4. Apply log to temp table until no tuples are left in the log
|
|
* and all of the old transactions are finished.
|
|
*/
|
|
for (;;)
|
|
{
|
|
num = apply_log(connection, table, APPLY_COUNT);
|
|
if (num > 0)
|
|
continue; /* there might be still some tuples, repeat. */
|
|
|
|
/* old transactions still alive ? */
|
|
params[0] = vxid;
|
|
res = execute(SQL_XID_ALIVE, 1, params);
|
|
num = PQntuples(res);
|
|
|
|
if (num > 0)
|
|
{
|
|
/* Wait for old transactions.
|
|
* Only display the message below when the number of
|
|
* transactions we are waiting on changes (presumably,
|
|
* num_waiting should only go down), so as not to
|
|
* be too noisy.
|
|
*/
|
|
if (num != num_waiting)
|
|
{
|
|
elog(NOTICE, "Waiting for %d transactions to finish. First PID: %s", num, PQgetvalue(res, 0, 0));
|
|
num_waiting = num;
|
|
}
|
|
|
|
PQclear(res);
|
|
sleep(1);
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
/* All old transactions are finished;
|
|
* go to next step. */
|
|
PQclear(res);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* 5. Swap: will be done with conn2, since it already holds an
|
|
* AccessShare lock.
|
|
*/
|
|
elog(DEBUG2, "---- swap ----");
|
|
/* Bump our existing AccessShare lock to AccessExclusive */
|
|
lock_exclusive(conn2, utoa(table->target_oid, buffer), table->lock_table,
|
|
FALSE);
|
|
apply_log(conn2, table, 0);
|
|
params[0] = utoa(table->target_oid, buffer);
|
|
pgut_command(conn2, "SELECT repack.repack_swap($1)", 1, params);
|
|
pgut_command(conn2, "COMMIT", 0, NULL);
|
|
|
|
/*
|
|
* 6. Drop.
|
|
*/
|
|
elog(DEBUG2, "---- drop ----");
|
|
|
|
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
|
|
params[0] = utoa(table->target_oid, buffer);
|
|
command("SELECT repack.repack_drop($1)", 1, params);
|
|
command("COMMIT", 0, NULL);
|
|
|
|
pgut_atexit_pop(&repack_cleanup, (void *) table);
|
|
free(vxid);
|
|
free(lock_conn_pid);
|
|
|
|
/*
|
|
* 7. Analyze.
|
|
* Note that cleanup hook has been already uninstalled here because analyze
|
|
* is not an important operation; No clean up even if failed.
|
|
*/
|
|
if (analyze)
|
|
{
|
|
elog(DEBUG2, "---- analyze ----");
|
|
|
|
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
|
|
printfStringInfo(&sql, "ANALYZE %s", table->target_name);
|
|
command(sql.data, 0, NULL);
|
|
command("COMMIT", 0, NULL);
|
|
}
|
|
|
|
termStringInfo(&sql);
|
|
}
|
|
|
|
/*
|
|
* Try acquire a table lock but avoid long time locks when conflict.
|
|
* Arguments:
|
|
*
|
|
* conn: connection to use
|
|
* relid: OID of relation
|
|
* lock_query: LOCK TABLE ... IN ACCESS EXCLUSIVE query to be executed
|
|
* start_xact: whether we need to issue a BEGIN;
|
|
*/
|
|
static void
|
|
lock_exclusive(PGconn *conn, const char *relid, const char *lock_query, bool start_xact)
|
|
{
|
|
time_t start = time(NULL);
|
|
int i;
|
|
|
|
for (i = 1; ; i++)
|
|
{
|
|
time_t duration;
|
|
char sql[1024];
|
|
PGresult *res;
|
|
int wait_msec;
|
|
|
|
if (start_xact)
|
|
pgut_command(conn, "BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
|
|
|
|
duration = time(NULL) - start;
|
|
if (duration > wait_timeout)
|
|
{
|
|
const char *cancel_query;
|
|
if (PQserverVersion(conn) >= 80400 &&
|
|
duration > wait_timeout * 2)
|
|
{
|
|
elog(WARNING, "terminating conflicted backends");
|
|
cancel_query =
|
|
"SELECT pg_terminate_backend(pid) FROM pg_locks"
|
|
" WHERE locktype = 'relation'"
|
|
" AND relation = $1 AND pid <> pg_backend_pid()";
|
|
}
|
|
else
|
|
{
|
|
elog(WARNING, "canceling conflicted backends");
|
|
cancel_query =
|
|
"SELECT pg_cancel_backend(pid) FROM pg_locks"
|
|
" WHERE locktype = 'relation'"
|
|
" AND relation = $1 AND pid <> pg_backend_pid()";
|
|
}
|
|
|
|
pgut_command(conn, cancel_query, 1, &relid);
|
|
}
|
|
|
|
/* wait for a while to lock the table. */
|
|
wait_msec = Min(1000, i * 100);
|
|
snprintf(sql, lengthof(sql), "SET LOCAL statement_timeout = %d", wait_msec);
|
|
command(sql, 0, NULL);
|
|
|
|
res = pgut_execute_elevel(conn, lock_query, 0, NULL, DEBUG2);
|
|
if (PQresultStatus(res) == PGRES_COMMAND_OK)
|
|
{
|
|
PQclear(res);
|
|
break;
|
|
}
|
|
else if (sqlstate_equals(res, SQLSTATE_QUERY_CANCELED))
|
|
{
|
|
/* retry if lock conflicted */
|
|
PQclear(res);
|
|
command("ROLLBACK", 0, NULL);
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
/* exit otherwise */
|
|
printf("%s", PQerrorMessage(connection));
|
|
PQclear(res);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
command("RESET statement_timeout", 0, NULL);
|
|
}
|
|
|
|
/*
|
|
* The userdata pointing a table being re-organized. We need to cleanup temp
|
|
* objects before the program exits.
|
|
*/
|
|
static void
|
|
repack_cleanup(bool fatal, void *userdata)
|
|
{
|
|
const repack_table *table = (const repack_table *) userdata;
|
|
|
|
if (fatal)
|
|
{
|
|
fprintf(stderr, "!!!FATAL ERROR!!! Please refer to the manual.\n\n");
|
|
}
|
|
else
|
|
{
|
|
char buffer[12];
|
|
const char *params[1];
|
|
|
|
/* Rollback current transaction */
|
|
if (conn2)
|
|
pgut_command(conn2, "ROLLBACK", 0, NULL);
|
|
|
|
if (connection)
|
|
command("ROLLBACK", 0, NULL);
|
|
|
|
/* Try reconnection if not available. */
|
|
if (PQstatus(connection) != CONNECTION_OK)
|
|
reconnect(ERROR);
|
|
|
|
/* do cleanup */
|
|
params[0] = utoa(table->target_oid, buffer);
|
|
command("SELECT repack.repack_drop($1)", 1, params);
|
|
}
|
|
}
|
|
|
|
void
|
|
pgut_help(bool details)
|
|
{
|
|
printf("%s re-organizes a PostgreSQL database.\n\n", PROGRAM_NAME);
|
|
printf("Usage:\n");
|
|
printf(" %s [OPTION]... [DBNAME]\n", PROGRAM_NAME);
|
|
|
|
if (!details)
|
|
return;
|
|
|
|
printf("Options:\n");
|
|
printf(" -a, --all repack all databases\n");
|
|
printf(" -n, --no-order do vacuum full instead of cluster\n");
|
|
printf(" -o, --order-by=COLUMNS order by columns instead of cluster keys\n");
|
|
printf(" -t, --table=TABLE repack specific table only\n");
|
|
printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n");
|
|
printf(" -Z, --no-analyze don't analyze at end\n");
|
|
}
|