Introduce --dont-kill-backend option.

pg_repack needs to take an exclusive lock at the end of the
reorganization. If the lock cannot be taken after duration
--wait-timeout option specified and this option is true,
pg_repack gives up to repack a target table instead of
cancelling conflicting backend. False by default.
This commit is contained in:
Masahiko Sawada 2017-01-19 18:26:13 +09:00
parent 6cadd7d97d
commit 34c6506f54
4 changed files with 99 additions and 43 deletions

View File

@ -152,6 +152,11 @@ const char *PROGRAM_VERSION = "unknown";
" AND granted = false AND relation = %u"\ " AND granted = false AND relation = %u"\
" AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" " AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()"
#define COUNT_COMPETING_LOCKS \
"SELECT pid FROM pg_locks WHERE locktype = 'relation'" \
"AND granted = false AND relation = %u" \
"AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()"
/* Will be used as a unique prefix for advisory locks. */ /* Will be used as a unique prefix for advisory locks. */
#define REPACK_LOCK_PREFIX_STR "16185446" #define REPACK_LOCK_PREFIX_STR "16185446"
@ -244,6 +249,7 @@ static int wait_timeout = 60; /* in seconds */
static int jobs = 0; /* number of concurrent worker conns. */ static int jobs = 0; /* number of concurrent worker conns. */
static bool dryrun = false; static bool dryrun = false;
static unsigned int temp_obj_num = 0; /* temporary objects counter */ static unsigned int temp_obj_num = 0; /* temporary objects counter */
static bool dont_kill_backend = false; /* abandon when timed-out */
/* buffer should have at least 11 bytes */ /* buffer should have at least 11 bytes */
static char * static char *
@ -269,6 +275,7 @@ static pgut_option options[] =
{ 'i', 'T', "wait-timeout", &wait_timeout }, { 'i', 'T', "wait-timeout", &wait_timeout },
{ 'B', 'Z', "no-analyze", &analyze }, { 'B', 'Z', "no-analyze", &analyze },
{ 'i', 'j', "jobs", &jobs }, { 'i', 'j', "jobs", &jobs },
{ 'b', 'D', "dont-kill-backend", &dont_kill_backend },
{ 0 }, { 0 },
}; };
@ -1453,9 +1460,9 @@ cleanup:
} }
/* Kill off any concurrent DDL (or any transaction attempting to take /* Kill off any concurrent DDL (or any transaction attempting to take
* an AccessExclusive lock) trying to run against our table. Note, we're * an AccessExclusive lock) trying to run against our table if we want to
* killing these queries off *before* they are granted an AccessExclusive * do. Note, we're killing these queries off *before* they are granted
* lock on our table. * an AccessExclusive lock on our table.
* *
* Returns true if no problems encountered, false otherwise. * Returns true if no problems encountered, false otherwise.
*/ */
@ -1465,35 +1472,57 @@ kill_ddl(PGconn *conn, Oid relid, bool terminate)
bool ret = true; bool ret = true;
PGresult *res; PGresult *res;
StringInfoData sql; StringInfoData sql;
int n_tuples;
initStringInfo(&sql); initStringInfo(&sql);
printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid); /* Check the number of backends competing AccessExclusiveLock */
printfStringInfo(&sql, COUNT_COMPETING_LOCKS, relid);
res = pgut_execute(conn, sql.data, 0, NULL); res = pgut_execute(conn, sql.data, 0, NULL);
if (PQresultStatus(res) != PGRES_TUPLES_OK) n_tuples = PQntuples(res);
{
elog(WARNING, "Error canceling unsafe queries: %s",
PQerrorMessage(conn));
ret = false;
}
else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400)
{
elog(WARNING,
"Canceled %d unsafe queries. Terminating any remaining PIDs.",
PQntuples(res));
CLEARPGRES(res); if (n_tuples != 0)
printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid); {
res = pgut_execute(conn, sql.data, 0, NULL); /* Competing backend is exsits, but if we do not want to calcel/terminate
if (PQresultStatus(res) != PGRES_TUPLES_OK) * any backend, do nothing.
*/
if (dont_kill_backend)
{ {
elog(WARNING, "Error killing unsafe queries: %s", elog(WARNING, "%d unsafe queries remain but do not cancel them",
PQerrorMessage(conn)); n_tuples);
ret = false; ret = false;
} }
else
{
resetStringInfo(&sql);
printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid);
res = pgut_execute(conn, sql.data, 0, NULL);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
elog(WARNING, "Error canceling unsafe queries: %s",
PQerrorMessage(conn));
ret = false;
}
else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400)
{
elog(WARNING,
"Canceled %d unsafe queries. Terminating any remaining PIDs.",
PQntuples(res));
CLEARPGRES(res);
printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid);
res = pgut_execute(conn, sql.data, 0, NULL);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
elog(WARNING, "Error killing unsafe queries: %s",
PQerrorMessage(conn));
ret = false;
}
}
else if (PQntuples(res) > 0)
elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res));
}
} }
else if (PQntuples(res) > 0)
elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res));
else else
elog(DEBUG2, "No competing DDL to cancel."); elog(DEBUG2, "No competing DDL to cancel.");
@ -1652,26 +1681,35 @@ lock_exclusive(PGconn *conn, const char *relid, const char *lock_query, bool sta
duration = time(NULL) - start; duration = time(NULL) - start;
if (duration > wait_timeout) if (duration > wait_timeout)
{ {
const char *cancel_query; if (dont_kill_backend)
if (PQserverVersion(conn) >= 80400 &&
duration > wait_timeout * 2)
{ {
elog(WARNING, "terminating conflicted backends"); elog(WARNING, "timed out, do not cancel conflicting backends");
cancel_query = ret = false;
"SELECT pg_terminate_backend(pid) FROM pg_locks" break;
" WHERE locktype = 'relation'"
" AND relation = $1 AND pid <> pg_backend_pid()";
} }
else else
{ {
elog(WARNING, "canceling conflicted backends"); const char *cancel_query;
cancel_query = if (PQserverVersion(conn) >= 80400 &&
"SELECT pg_cancel_backend(pid) FROM pg_locks" duration > wait_timeout * 2)
" WHERE locktype = 'relation'" {
" AND relation = $1 AND pid <> pg_backend_pid()"; elog(WARNING, "terminating conflicted backends");
} cancel_query =
"SELECT pg_terminate_backend(pid) FROM pg_locks"
" WHERE locktype = 'relation'"
" AND relation = $1 AND pid <> pg_backend_pid()";
}
else
{
elog(WARNING, "canceling conflicted backends");
cancel_query =
"SELECT pg_cancel_backend(pid) FROM pg_locks"
" WHERE locktype = 'relation'"
" AND relation = $1 AND pid <> pg_backend_pid()";
}
pgut_command(conn, cancel_query, 1, &relid); pgut_command(conn, cancel_query, 1, &relid);
}
} }
/* wait for a while to lock the table. */ /* wait for a while to lock the table. */
@ -2063,5 +2101,6 @@ pgut_help(bool details)
printf(" -i, --index=INDEX move only the specified index\n"); printf(" -i, --index=INDEX move only the specified index\n");
printf(" -x, --only-indexes move only indexes of the specified table\n"); printf(" -x, --only-indexes move only indexes of the specified table\n");
printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n"); printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n");
printf(" -D, --dont-kill-backend do not kill other backends when timed out\n");
printf(" -Z, --no-analyze don't analyze at end\n"); printf(" -Z, --no-analyze don't analyze at end\n");
} }

View File

@ -127,6 +127,7 @@ Options:
-i, --index=INDEX move only the specified index -i, --index=INDEX move only the specified index
-x, --only-indexes move only indexes of the specified table -x, --only-indexes move only indexes of the specified table
-T, --wait-timeout=SECS timeout to cancel other backends on conflict -T, --wait-timeout=SECS timeout to cancel other backends on conflict
-D, --dont-kill-backend do not kill other backends when timed out
-Z, --no-analyze don't analyze at end -Z, --no-analyze don't analyze at end
Connection options: Connection options:
@ -200,11 +201,17 @@ Reorg Options
``-T SECS``, ``--wait-timeout=SECS`` ``-T SECS``, ``--wait-timeout=SECS``
pg_repack needs to take an exclusive lock at the end of the pg_repack needs to take an exclusive lock at the end of the
reorganization. This setting controls how many seconds pg_repack will reorganization. This setting controls how many seconds pg_repack will
wait to acquire this lock. If the lock cannot be taken after this duration, wait to acquire this lock. If the lock cannot be taken after this duration
pg_repack will forcibly cancel the conflicting queries. If you are using and ``--dont-kill-backend`` option is not specified, pg_repack will forcibly
PostgreSQL version 8.4 or newer, pg_repack will fall back to using cancel the conflicting queries. If you are using PostgreSQL version 8.4
pg_terminate_backend() to disconnect any remaining backends after or newer, pg_repack will fall back to using pg_terminate_backend() to
twice this timeout has passed. The default is 60 seconds. disconnect any remaining backends after twice this timeout has passed.
The default is 60 seconds.
``-D``, ``--dont-kill-backend``
Skip to repack table if the lock cannot be taken for duration specified
``--wait-timeout``, instead of cancelling conflicting queries. The default
is false.
``-Z``, ``--no-analyze`` ``-Z``, ``--no-analyze``
Disable ANALYZE after a full-table reorganization. If not specified, run Disable ANALYZE after a full-table reorganization. If not specified, run

View File

@ -387,3 +387,8 @@ ERROR: cannot repack specific table(s) in schema, use schema.table notation inst
-- => ERROR -- => ERROR
\! pg_repack --dbname=contrib_regression --all --schema=test_schema1 \! pg_repack --dbname=contrib_regression --all --schema=test_schema1
ERROR: cannot repack specific schema(s) in all databases ERROR: cannot repack specific schema(s) in all databases
--
-- don't kill backend
--
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --dont-kill-backend
INFO: repacking table "tbl_cluster"

View File

@ -230,3 +230,8 @@ CREATE TABLE test_schema2.tbl2 (id INTEGER PRIMARY KEY);
\! pg_repack --dbname=contrib_regression --schema=test_schema1 --table=tbl1 \! pg_repack --dbname=contrib_regression --schema=test_schema1 --table=tbl1
-- => ERROR -- => ERROR
\! pg_repack --dbname=contrib_regression --all --schema=test_schema1 \! pg_repack --dbname=contrib_regression --all --schema=test_schema1
--
-- don't kill backend
--
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --dont-kill-backend