Merge pull request #108 from MasahikoSawada/dont_kill_backend

Introduce --no-kill-backend option.
This commit is contained in:
masahiko 2017-02-23 09:39:36 +09:00 committed by GitHub
commit b329f9e143
5 changed files with 122 additions and 51 deletions

View File

@ -152,6 +152,11 @@ const char *PROGRAM_VERSION = "unknown";
" AND granted = false AND relation = %u"\ " AND granted = false AND relation = %u"\
" AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" " AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()"
#define COUNT_COMPETING_LOCKS \
"SELECT pid FROM pg_locks WHERE locktype = 'relation'" \
" AND granted = false AND relation = %u" \
" AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()"
/* Will be used as a unique prefix for advisory locks. */ /* Will be used as a unique prefix for advisory locks. */
#define REPACK_LOCK_PREFIX_STR "16185446" #define REPACK_LOCK_PREFIX_STR "16185446"
@ -244,6 +249,7 @@ static int wait_timeout = 60; /* in seconds */
static int jobs = 0; /* number of concurrent worker conns. */ static int jobs = 0; /* number of concurrent worker conns. */
static bool dryrun = false; static bool dryrun = false;
static unsigned int temp_obj_num = 0; /* temporary objects counter */ static unsigned int temp_obj_num = 0; /* temporary objects counter */
static bool no_kill_backend = false; /* abandon when timed-out */
/* buffer should have at least 11 bytes */ /* buffer should have at least 11 bytes */
static char * static char *
@ -269,6 +275,7 @@ static pgut_option options[] =
{ 'i', 'T', "wait-timeout", &wait_timeout }, { 'i', 'T', "wait-timeout", &wait_timeout },
{ 'B', 'Z', "no-analyze", &analyze }, { 'B', 'Z', "no-analyze", &analyze },
{ 'i', 'j', "jobs", &jobs }, { 'i', 'j', "jobs", &jobs },
{ 'b', 'D', "no-kill-backend", &no_kill_backend },
{ 0 }, { 0 },
}; };
@ -1074,7 +1081,10 @@ repack_one_table(repack_table *table, const char *orderby)
if (!(lock_exclusive(connection, buffer, table->lock_table, TRUE))) if (!(lock_exclusive(connection, buffer, table->lock_table, TRUE)))
{ {
elog(WARNING, "lock_exclusive() failed for %s", table->target_name); if (no_kill_backend)
elog(INFO, "Skipping repack %s due to timeout", table->target_name);
else
elog(WARNING, "lock_exclusive() failed for %s", table->target_name);
goto cleanup; goto cleanup;
} }
@ -1208,7 +1218,10 @@ repack_one_table(repack_table *table, const char *orderby)
*/ */
if (!(kill_ddl(connection, table->target_oid, true))) if (!(kill_ddl(connection, table->target_oid, true)))
{ {
elog(WARNING, "kill_ddl() failed."); if (no_kill_backend)
elog(INFO, "Skipping repack %s due to timeout.", table->target_name);
else
elog(WARNING, "kill_ddl() failed.");
goto cleanup; goto cleanup;
} }
@ -1438,9 +1451,9 @@ cleanup:
} }
/* Kill off any concurrent DDL (or any transaction attempting to take /* Kill off any concurrent DDL (or any transaction attempting to take
* an AccessExclusive lock) trying to run against our table. Note, we're * an AccessExclusive lock) trying to run against our table if we want to
* killing these queries off *before* they are granted an AccessExclusive * do. Note, we're killing these queries off *before* they are granted
* lock on our table. * an AccessExclusive lock on our table.
* *
* Returns true if no problems encountered, false otherwise. * Returns true if no problems encountered, false otherwise.
*/ */
@ -1450,35 +1463,57 @@ kill_ddl(PGconn *conn, Oid relid, bool terminate)
bool ret = true; bool ret = true;
PGresult *res; PGresult *res;
StringInfoData sql; StringInfoData sql;
int n_tuples;
initStringInfo(&sql); initStringInfo(&sql);
printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid); /* Check the number of backends competing AccessExclusiveLock */
printfStringInfo(&sql, COUNT_COMPETING_LOCKS, relid);
res = pgut_execute(conn, sql.data, 0, NULL); res = pgut_execute(conn, sql.data, 0, NULL);
if (PQresultStatus(res) != PGRES_TUPLES_OK) n_tuples = PQntuples(res);
{
elog(WARNING, "Error canceling unsafe queries: %s",
PQerrorMessage(conn));
ret = false;
}
else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400)
{
elog(WARNING,
"Canceled %d unsafe queries. Terminating any remaining PIDs.",
PQntuples(res));
CLEARPGRES(res); if (n_tuples != 0)
printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid); {
res = pgut_execute(conn, sql.data, 0, NULL); /* Competing backend is exsits, but if we do not want to calcel/terminate
if (PQresultStatus(res) != PGRES_TUPLES_OK) * any backend, do nothing.
*/
if (no_kill_backend)
{ {
elog(WARNING, "Error killing unsafe queries: %s", elog(WARNING, "%d unsafe queries remain but do not cancel them and skip to repack it",
PQerrorMessage(conn)); n_tuples);
ret = false; ret = false;
} }
else
{
resetStringInfo(&sql);
printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid);
res = pgut_execute(conn, sql.data, 0, NULL);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
elog(WARNING, "Error canceling unsafe queries: %s",
PQerrorMessage(conn));
ret = false;
}
else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400)
{
elog(WARNING,
"Canceled %d unsafe queries. Terminating any remaining PIDs.",
PQntuples(res));
CLEARPGRES(res);
printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid);
res = pgut_execute(conn, sql.data, 0, NULL);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
elog(WARNING, "Error killing unsafe queries: %s",
PQerrorMessage(conn));
ret = false;
}
}
else if (PQntuples(res) > 0)
elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res));
}
} }
else if (PQntuples(res) > 0)
elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res));
else else
elog(DEBUG2, "No competing DDL to cancel."); elog(DEBUG2, "No competing DDL to cancel.");
@ -1637,26 +1672,35 @@ lock_exclusive(PGconn *conn, const char *relid, const char *lock_query, bool sta
duration = time(NULL) - start; duration = time(NULL) - start;
if (duration > wait_timeout) if (duration > wait_timeout)
{ {
const char *cancel_query; if (no_kill_backend)
if (PQserverVersion(conn) >= 80400 &&
duration > wait_timeout * 2)
{ {
elog(WARNING, "terminating conflicted backends"); elog(WARNING, "timed out, do not cancel conflicting backends");
cancel_query = ret = false;
"SELECT pg_terminate_backend(pid) FROM pg_locks" break;
" WHERE locktype = 'relation'"
" AND relation = $1 AND pid <> pg_backend_pid()";
} }
else else
{ {
elog(WARNING, "canceling conflicted backends"); const char *cancel_query;
cancel_query = if (PQserverVersion(conn) >= 80400 &&
"SELECT pg_cancel_backend(pid) FROM pg_locks" duration > wait_timeout * 2)
" WHERE locktype = 'relation'" {
" AND relation = $1 AND pid <> pg_backend_pid()"; elog(WARNING, "terminating conflicted backends");
} cancel_query =
"SELECT pg_terminate_backend(pid) FROM pg_locks"
" WHERE locktype = 'relation'"
" AND relation = $1 AND pid <> pg_backend_pid()";
}
else
{
elog(WARNING, "canceling conflicted backends");
cancel_query =
"SELECT pg_cancel_backend(pid) FROM pg_locks"
" WHERE locktype = 'relation'"
" AND relation = $1 AND pid <> pg_backend_pid()";
}
pgut_command(conn, cancel_query, 1, &relid); pgut_command(conn, cancel_query, 1, &relid);
}
} }
/* wait for a while to lock the table. */ /* wait for a while to lock the table. */
@ -2048,5 +2092,6 @@ pgut_help(bool details)
printf(" -i, --index=INDEX move only the specified index\n"); printf(" -i, --index=INDEX move only the specified index\n");
printf(" -x, --only-indexes move only indexes of the specified table\n"); printf(" -x, --only-indexes move only indexes of the specified table\n");
printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n"); printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n");
printf(" -D, --no-kill-backend don't kill other backends when timed out\n");
printf(" -Z, --no-analyze don't analyze at end\n"); printf(" -Z, --no-analyze don't analyze at end\n");
} }

View File

@ -127,6 +127,7 @@ Options:
-i, --index=INDEX move only the specified index -i, --index=INDEX move only the specified index
-x, --only-indexes move only indexes of the specified table -x, --only-indexes move only indexes of the specified table
-T, --wait-timeout=SECS timeout to cancel other backends on conflict -T, --wait-timeout=SECS timeout to cancel other backends on conflict
-D, --no-kill-backend don't kill other backends when timed out
-Z, --no-analyze don't analyze at end -Z, --no-analyze don't analyze at end
Connection options: Connection options:
@ -200,11 +201,17 @@ Reorg Options
``-T SECS``, ``--wait-timeout=SECS`` ``-T SECS``, ``--wait-timeout=SECS``
pg_repack needs to take an exclusive lock at the end of the pg_repack needs to take an exclusive lock at the end of the
reorganization. This setting controls how many seconds pg_repack will reorganization. This setting controls how many seconds pg_repack will
wait to acquire this lock. If the lock cannot be taken after this duration, wait to acquire this lock. If the lock cannot be taken after this duration
pg_repack will forcibly cancel the conflicting queries. If you are using and ``--no-kill-backend`` option is not specified, pg_repack will forcibly
PostgreSQL version 8.4 or newer, pg_repack will fall back to using cancel the conflicting queries. If you are using PostgreSQL version 8.4
pg_terminate_backend() to disconnect any remaining backends after or newer, pg_repack will fall back to using pg_terminate_backend() to
twice this timeout has passed. The default is 60 seconds. disconnect any remaining backends after twice this timeout has passed.
The default is 60 seconds.
``-D``, ``--no-kill-backend``
Skip to repack table if the lock cannot be taken for duration specified
``--wait-timeout``, instead of cancelling conflicting queries. The default
is false.
``-Z``, ``--no-analyze`` ``-Z``, ``--no-analyze``
Disable ANALYZE after a full-table reorganization. If not specified, run Disable ANALYZE after a full-table reorganization. If not specified, run

View File

@ -206,6 +206,7 @@ pg_repackもしくはpg_reorgの古いバージョンからのアップグレー
-i, --index=INDEX move only the specified index -i, --index=INDEX move only the specified index
-x, --only-indexes move only indexes of the specified table -x, --only-indexes move only indexes of the specified table
-T, --wait-timeout=SECS timeout to cancel other backends on conflict -T, --wait-timeout=SECS timeout to cancel other backends on conflict
-D, --no-kill-backend don't kill other backends when timed out
-Z, --no-analyze don't analyze at end -Z, --no-analyze don't analyze at end
Connection options: Connection options:
@ -244,6 +245,7 @@ OPTIONには以下のものが指定できます。
-i, --index=INDEX 指定したインデックスのみ再編成します -i, --index=INDEX 指定したインデックスのみ再編成します
-x, --only-indexes 指定したテーブルに付与されたインデックスだけを再編成します -x, --only-indexes 指定したテーブルに付与されたインデックスだけを再編成します
-T, --wait-timeout=SECS ロック競合している他のトランザクションをキャンセルするまで待機する時間を指定します -T, --wait-timeout=SECS ロック競合している他のトランザクションをキャンセルするまで待機する時間を指定します
-D, --no-kill-backend タイムアウト時に他のバックエンドをキャンセルしません
-Z, --no-analyze 再編成後にANALYZEを行いません -Z, --no-analyze 再編成後にANALYZEを行いません
接続オプション: 接続オプション:
@ -351,15 +353,22 @@ OPTIONには以下のものが指定できます。
.. ``-T SECS``, ``--wait-timeout=SECS`` .. ``-T SECS``, ``--wait-timeout=SECS``
pg_repack needs to take an exclusive lock at the end of the pg_repack needs to take an exclusive lock at the end of the
reorganization. This setting controls how many seconds pg_repack will reorganization. This setting controls how many seconds pg_repack will
wait to acquire this lock. If the lock cannot be taken after this duration, wait to acquire this lock. If the lock cannot be taken after this duration
pg_repack will forcibly cancel the conflicting queries. If you are using and ``--no-kill-backend`` option is not specified, pg_repack will forcibly
PostgreSQL version 8.4 or newer, pg_repack will fall back to using cancel the conflicting queries. If you are using PostgreSQL version 8.4
pg_terminate_backend() to disconnect any remaining backends after or newer, pg_repack will fall back to using pg_terminate_backend() to
twice this timeout has passed. The default is 60 seconds. disconnect any remaining backends after twice this timeout has passed.
The default is 60 seconds.
``-T SECS``, ``--wait-timeout=SECS`` ``-T SECS``, ``--wait-timeout=SECS``
pg_repackは再編成の完了直前に排他ロックを利用します。このオプションは、このロック取得時に何秒間pg_repackが取得を待機するかを指定します。指定した時間経ってもロックが取得できない場合、pg_repackは競合するクエリを強制的にキャンセルさせます。PostgreSQL 8.4以上のバージョンを利用している場合、指定した時間の2倍以上経ってもロックが取得できない場合、pg_repackは競合するクエリを実行しているPostgreSQLバックエンドプロセスをpg_terminate_backend()関数により強制的に停止させます。このオプションのデフォルトは60秒です。 pg_repackは再編成の完了直前に排他ロックを利用します。このオプションは、このロック取得時に何秒間pg_repackが取得を待機するかを指定します。指定した時間経ってもロックが取得できないかつ、`no-kill-backend`オプションが指定されていない場合、pg_repackは競合するクエリを強制的にキャンセルさせます。PostgreSQL 8.4以上のバージョンを利用している場合、指定した時間の2倍以上経ってもロックが取得できない場合、pg_repackは競合するクエリを実行しているPostgreSQLバックエンドプロセスをpg_terminate_backend()関数により強制的に停止させます。このオプションのデフォルトは60秒です。
.. ``-D``, ``--no-kill-backend``
Skip to repack table if the lock cannot be taken for duration specified
``--wait-timeout``, instead of cancelling conflicting queries. The default
is false.
``-D``, ``--no-kill-backend``
``--wait-timeout``オプションで指定された時間が経過してもロックが取得できない場合、競合するクエリをキャンセルする代わりに対象テーブルの再編成をスキップします。
.. ``-Z``, ``--no-analyze`` .. ``-Z``, ``--no-analyze``
Disable ANALYZE after a full-table reorganization. If not specified, run Disable ANALYZE after a full-table reorganization. If not specified, run

View File

@ -381,3 +381,8 @@ ERROR: cannot repack specific table(s) in schema, use schema.table notation inst
-- => ERROR -- => ERROR
\! pg_repack --dbname=contrib_regression --all --schema=test_schema1 \! pg_repack --dbname=contrib_regression --all --schema=test_schema1
ERROR: cannot repack specific schema(s) in all databases ERROR: cannot repack specific schema(s) in all databases
--
-- don't kill backend
--
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-kill-backend
INFO: repacking table "tbl_cluster"

View File

@ -227,3 +227,8 @@ CREATE TABLE test_schema2.tbl2 (id INTEGER PRIMARY KEY);
\! pg_repack --dbname=contrib_regression --schema=test_schema1 --table=tbl1 \! pg_repack --dbname=contrib_regression --schema=test_schema1 --table=tbl1
-- => ERROR -- => ERROR
\! pg_repack --dbname=contrib_regression --all --schema=test_schema1 \! pg_repack --dbname=contrib_regression --all --schema=test_schema1
--
-- don't kill backend
--
\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-kill-backend