From 34c6506f545fc072604c46c7719bb564f920d87d Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Thu, 19 Jan 2017 18:26:13 +0900 Subject: [PATCH 1/5] Introduce --dont-kill-backend option. pg_repack needs to take an exclusive lock at the end of the reorganization. If the lock cannot be taken after duration --wait-timeout option specified and this option is true, pg_repack gives up to repack a target table instead of cancelling conflicting backend. False by default. --- bin/pg_repack.c | 115 ++++++++++++++++++++++++------------ doc/pg_repack.rst | 17 ++++-- regress/expected/repack.out | 5 ++ regress/sql/repack.sql | 5 ++ 4 files changed, 99 insertions(+), 43 deletions(-) diff --git a/bin/pg_repack.c b/bin/pg_repack.c index 8ccb1ae..c121b14 100644 --- a/bin/pg_repack.c +++ b/bin/pg_repack.c @@ -152,6 +152,11 @@ const char *PROGRAM_VERSION = "unknown"; " AND granted = false AND relation = %u"\ " AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" +#define COUNT_COMPETING_LOCKS \ + "SELECT pid FROM pg_locks WHERE locktype = 'relation'" \ + "AND granted = false AND relation = %u" \ + "AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" + /* Will be used as a unique prefix for advisory locks. */ #define REPACK_LOCK_PREFIX_STR "16185446" @@ -244,6 +249,7 @@ static int wait_timeout = 60; /* in seconds */ static int jobs = 0; /* number of concurrent worker conns. */ static bool dryrun = false; static unsigned int temp_obj_num = 0; /* temporary objects counter */ +static bool dont_kill_backend = false; /* abandon when timed-out */ /* buffer should have at least 11 bytes */ static char * @@ -269,6 +275,7 @@ static pgut_option options[] = { 'i', 'T', "wait-timeout", &wait_timeout }, { 'B', 'Z', "no-analyze", &analyze }, { 'i', 'j', "jobs", &jobs }, + { 'b', 'D', "dont-kill-backend", &dont_kill_backend }, { 0 }, }; @@ -1453,9 +1460,9 @@ cleanup: } /* Kill off any concurrent DDL (or any transaction attempting to take - * an AccessExclusive lock) trying to run against our table. Note, we're - * killing these queries off *before* they are granted an AccessExclusive - * lock on our table. + * an AccessExclusive lock) trying to run against our table if we want to + * do. Note, we're killing these queries off *before* they are granted + * an AccessExclusive lock on our table. * * Returns true if no problems encountered, false otherwise. */ @@ -1465,35 +1472,57 @@ kill_ddl(PGconn *conn, Oid relid, bool terminate) bool ret = true; PGresult *res; StringInfoData sql; + int n_tuples; initStringInfo(&sql); - printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid); + /* Check the number of backends competing AccessExclusiveLock */ + printfStringInfo(&sql, COUNT_COMPETING_LOCKS, relid); res = pgut_execute(conn, sql.data, 0, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - elog(WARNING, "Error canceling unsafe queries: %s", - PQerrorMessage(conn)); - ret = false; - } - else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400) - { - elog(WARNING, - "Canceled %d unsafe queries. Terminating any remaining PIDs.", - PQntuples(res)); + n_tuples = PQntuples(res); - CLEARPGRES(res); - printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid); - res = pgut_execute(conn, sql.data, 0, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) + if (n_tuples != 0) + { + /* Competing backend is exsits, but if we do not want to calcel/terminate + * any backend, do nothing. + */ + if (dont_kill_backend) { - elog(WARNING, "Error killing unsafe queries: %s", - PQerrorMessage(conn)); + elog(WARNING, "%d unsafe queries remain but do not cancel them", + n_tuples); ret = false; } + else + { + resetStringInfo(&sql); + printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid); + res = pgut_execute(conn, sql.data, 0, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + elog(WARNING, "Error canceling unsafe queries: %s", + PQerrorMessage(conn)); + ret = false; + } + else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400) + { + elog(WARNING, + "Canceled %d unsafe queries. Terminating any remaining PIDs.", + PQntuples(res)); + + CLEARPGRES(res); + printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid); + res = pgut_execute(conn, sql.data, 0, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + elog(WARNING, "Error killing unsafe queries: %s", + PQerrorMessage(conn)); + ret = false; + } + } + else if (PQntuples(res) > 0) + elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res)); + } } - else if (PQntuples(res) > 0) - elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res)); else elog(DEBUG2, "No competing DDL to cancel."); @@ -1652,26 +1681,35 @@ lock_exclusive(PGconn *conn, const char *relid, const char *lock_query, bool sta duration = time(NULL) - start; if (duration > wait_timeout) { - const char *cancel_query; - if (PQserverVersion(conn) >= 80400 && - duration > wait_timeout * 2) + if (dont_kill_backend) { - elog(WARNING, "terminating conflicted backends"); - cancel_query = - "SELECT pg_terminate_backend(pid) FROM pg_locks" - " WHERE locktype = 'relation'" - " AND relation = $1 AND pid <> pg_backend_pid()"; + elog(WARNING, "timed out, do not cancel conflicting backends"); + ret = false; + break; } else { - elog(WARNING, "canceling conflicted backends"); - cancel_query = - "SELECT pg_cancel_backend(pid) FROM pg_locks" - " WHERE locktype = 'relation'" - " AND relation = $1 AND pid <> pg_backend_pid()"; - } + const char *cancel_query; + if (PQserverVersion(conn) >= 80400 && + duration > wait_timeout * 2) + { + elog(WARNING, "terminating conflicted backends"); + cancel_query = + "SELECT pg_terminate_backend(pid) FROM pg_locks" + " WHERE locktype = 'relation'" + " AND relation = $1 AND pid <> pg_backend_pid()"; + } + else + { + elog(WARNING, "canceling conflicted backends"); + cancel_query = + "SELECT pg_cancel_backend(pid) FROM pg_locks" + " WHERE locktype = 'relation'" + " AND relation = $1 AND pid <> pg_backend_pid()"; + } - pgut_command(conn, cancel_query, 1, &relid); + pgut_command(conn, cancel_query, 1, &relid); + } } /* wait for a while to lock the table. */ @@ -2063,5 +2101,6 @@ pgut_help(bool details) printf(" -i, --index=INDEX move only the specified index\n"); printf(" -x, --only-indexes move only indexes of the specified table\n"); printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n"); + printf(" -D, --dont-kill-backend do not kill other backends when timed out\n"); printf(" -Z, --no-analyze don't analyze at end\n"); } diff --git a/doc/pg_repack.rst b/doc/pg_repack.rst index c51e1d2..a607e86 100644 --- a/doc/pg_repack.rst +++ b/doc/pg_repack.rst @@ -127,6 +127,7 @@ Options: -i, --index=INDEX move only the specified index -x, --only-indexes move only indexes of the specified table -T, --wait-timeout=SECS timeout to cancel other backends on conflict + -D, --dont-kill-backend do not kill other backends when timed out -Z, --no-analyze don't analyze at end Connection options: @@ -200,11 +201,17 @@ Reorg Options ``-T SECS``, ``--wait-timeout=SECS`` pg_repack needs to take an exclusive lock at the end of the reorganization. This setting controls how many seconds pg_repack will - wait to acquire this lock. If the lock cannot be taken after this duration, - pg_repack will forcibly cancel the conflicting queries. If you are using - PostgreSQL version 8.4 or newer, pg_repack will fall back to using - pg_terminate_backend() to disconnect any remaining backends after - twice this timeout has passed. The default is 60 seconds. + wait to acquire this lock. If the lock cannot be taken after this duration + and ``--dont-kill-backend`` option is not specified, pg_repack will forcibly + cancel the conflicting queries. If you are using PostgreSQL version 8.4 + or newer, pg_repack will fall back to using pg_terminate_backend() to + disconnect any remaining backends after twice this timeout has passed. + The default is 60 seconds. + +``-D``, ``--dont-kill-backend`` + Skip to repack table if the lock cannot be taken for duration specified + ``--wait-timeout``, instead of cancelling conflicting queries. The default + is false. ``-Z``, ``--no-analyze`` Disable ANALYZE after a full-table reorganization. If not specified, run diff --git a/regress/expected/repack.out b/regress/expected/repack.out index 4c5635b..f77669b 100644 --- a/regress/expected/repack.out +++ b/regress/expected/repack.out @@ -387,3 +387,8 @@ ERROR: cannot repack specific table(s) in schema, use schema.table notation inst -- => ERROR \! pg_repack --dbname=contrib_regression --all --schema=test_schema1 ERROR: cannot repack specific schema(s) in all databases +-- +-- don't kill backend +-- +\! pg_repack --dbname=contrib_regression --table=tbl_cluster --dont-kill-backend +INFO: repacking table "tbl_cluster" diff --git a/regress/sql/repack.sql b/regress/sql/repack.sql index 03e72cd..45e9296 100644 --- a/regress/sql/repack.sql +++ b/regress/sql/repack.sql @@ -230,3 +230,8 @@ CREATE TABLE test_schema2.tbl2 (id INTEGER PRIMARY KEY); \! pg_repack --dbname=contrib_regression --schema=test_schema1 --table=tbl1 -- => ERROR \! pg_repack --dbname=contrib_regression --all --schema=test_schema1 + +-- +-- don't kill backend +-- +\! pg_repack --dbname=contrib_regression --table=tbl_cluster --dont-kill-backend From ff8cb96c74502fe2823d79a315ce69644bff8dd4 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Thu, 2 Feb 2017 19:59:18 +0900 Subject: [PATCH 2/5] Add white space to COUNT_COMPETING_LOCKS sql. Pointed out by schmiddy. --- bin/pg_repack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/pg_repack.c b/bin/pg_repack.c index c121b14..075207f 100644 --- a/bin/pg_repack.c +++ b/bin/pg_repack.c @@ -154,8 +154,8 @@ const char *PROGRAM_VERSION = "unknown"; #define COUNT_COMPETING_LOCKS \ "SELECT pid FROM pg_locks WHERE locktype = 'relation'" \ - "AND granted = false AND relation = %u" \ - "AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" + " AND granted = false AND relation = %u" \ + " AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" /* Will be used as a unique prefix for advisory locks. */ #define REPACK_LOCK_PREFIX_STR "16185446" From f6c1304c36d7335c545c0ce56f7efdfd19881029 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Wed, 22 Feb 2017 03:49:04 +0000 Subject: [PATCH 3/5] Change the option name to no-kill-backend. For consistency with other slimilar option such as no-order, no-analyze. --- bin/pg_repack.c | 10 +++++----- doc/pg_repack.rst | 6 +++--- regress/expected/repack.out | 2 +- regress/sql/repack.sql | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bin/pg_repack.c b/bin/pg_repack.c index 075207f..fa2e601 100644 --- a/bin/pg_repack.c +++ b/bin/pg_repack.c @@ -249,7 +249,7 @@ static int wait_timeout = 60; /* in seconds */ static int jobs = 0; /* number of concurrent worker conns. */ static bool dryrun = false; static unsigned int temp_obj_num = 0; /* temporary objects counter */ -static bool dont_kill_backend = false; /* abandon when timed-out */ +static bool no_kill_backend = false; /* abandon when timed-out */ /* buffer should have at least 11 bytes */ static char * @@ -275,7 +275,7 @@ static pgut_option options[] = { 'i', 'T', "wait-timeout", &wait_timeout }, { 'B', 'Z', "no-analyze", &analyze }, { 'i', 'j', "jobs", &jobs }, - { 'b', 'D', "dont-kill-backend", &dont_kill_backend }, + { 'b', 'D', "no-kill-backend", &no_kill_backend }, { 0 }, }; @@ -1486,7 +1486,7 @@ kill_ddl(PGconn *conn, Oid relid, bool terminate) /* Competing backend is exsits, but if we do not want to calcel/terminate * any backend, do nothing. */ - if (dont_kill_backend) + if (no_kill_backend) { elog(WARNING, "%d unsafe queries remain but do not cancel them", n_tuples); @@ -1681,7 +1681,7 @@ lock_exclusive(PGconn *conn, const char *relid, const char *lock_query, bool sta duration = time(NULL) - start; if (duration > wait_timeout) { - if (dont_kill_backend) + if (no_kill_backend) { elog(WARNING, "timed out, do not cancel conflicting backends"); ret = false; @@ -2101,6 +2101,6 @@ pgut_help(bool details) printf(" -i, --index=INDEX move only the specified index\n"); printf(" -x, --only-indexes move only indexes of the specified table\n"); printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n"); - printf(" -D, --dont-kill-backend do not kill other backends when timed out\n"); + printf(" -D, --no-kill-backend don't kill other backends when timed out\n"); printf(" -Z, --no-analyze don't analyze at end\n"); } diff --git a/doc/pg_repack.rst b/doc/pg_repack.rst index a607e86..d1e7952 100644 --- a/doc/pg_repack.rst +++ b/doc/pg_repack.rst @@ -127,7 +127,7 @@ Options: -i, --index=INDEX move only the specified index -x, --only-indexes move only indexes of the specified table -T, --wait-timeout=SECS timeout to cancel other backends on conflict - -D, --dont-kill-backend do not kill other backends when timed out + -D, --no-kill-backend don't kill other backends when timed out -Z, --no-analyze don't analyze at end Connection options: @@ -202,13 +202,13 @@ Reorg Options pg_repack needs to take an exclusive lock at the end of the reorganization. This setting controls how many seconds pg_repack will wait to acquire this lock. If the lock cannot be taken after this duration - and ``--dont-kill-backend`` option is not specified, pg_repack will forcibly + and ``--no-kill-backend`` option is not specified, pg_repack will forcibly cancel the conflicting queries. If you are using PostgreSQL version 8.4 or newer, pg_repack will fall back to using pg_terminate_backend() to disconnect any remaining backends after twice this timeout has passed. The default is 60 seconds. -``-D``, ``--dont-kill-backend`` +``-D``, ``--no-kill-backend`` Skip to repack table if the lock cannot be taken for duration specified ``--wait-timeout``, instead of cancelling conflicting queries. The default is false. diff --git a/regress/expected/repack.out b/regress/expected/repack.out index f77669b..816f41c 100644 --- a/regress/expected/repack.out +++ b/regress/expected/repack.out @@ -390,5 +390,5 @@ ERROR: cannot repack specific schema(s) in all databases -- -- don't kill backend -- -\! pg_repack --dbname=contrib_regression --table=tbl_cluster --dont-kill-backend +\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-kill-backend INFO: repacking table "tbl_cluster" diff --git a/regress/sql/repack.sql b/regress/sql/repack.sql index 45e9296..2353262 100644 --- a/regress/sql/repack.sql +++ b/regress/sql/repack.sql @@ -234,4 +234,4 @@ CREATE TABLE test_schema2.tbl2 (id INTEGER PRIMARY KEY); -- -- don't kill backend -- -\! pg_repack --dbname=contrib_regression --table=tbl_cluster --dont-kill-backend +\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-kill-backend From d1d3d774d31c8b019042c35764fd874f231554f7 Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Wed, 22 Feb 2017 06:03:09 +0000 Subject: [PATCH 4/5] Update Japanese documentation for no-kill-backend option. --- doc/pg_repack_jp.rst | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/doc/pg_repack_jp.rst b/doc/pg_repack_jp.rst index 02b9bd2..d107a23 100644 --- a/doc/pg_repack_jp.rst +++ b/doc/pg_repack_jp.rst @@ -206,6 +206,7 @@ pg_repackもしくはpg_reorgの古いバージョンからのアップグレー -i, --index=INDEX move only the specified index -x, --only-indexes move only indexes of the specified table -T, --wait-timeout=SECS timeout to cancel other backends on conflict + -D, --no-kill-backend don't kill other backends when timed out -Z, --no-analyze don't analyze at end Connection options: @@ -244,6 +245,7 @@ OPTIONには以下のものが指定できます。 -i, --index=INDEX 指定したインデックスのみ再編成します -x, --only-indexes 指定したテーブルに付与されたインデックスだけを再編成します -T, --wait-timeout=SECS ロック競合している他のトランザクションをキャンセルするまで待機する時間を指定します + -D, --no-kill-backend タイムアウト時に他のバックエンドをキャンセルしません -Z, --no-analyze 再編成後にANALYZEを行いません 接続オプション: @@ -351,15 +353,22 @@ OPTIONには以下のものが指定できます。 .. ``-T SECS``, ``--wait-timeout=SECS`` pg_repack needs to take an exclusive lock at the end of the reorganization. This setting controls how many seconds pg_repack will - wait to acquire this lock. If the lock cannot be taken after this duration, - pg_repack will forcibly cancel the conflicting queries. If you are using - PostgreSQL version 8.4 or newer, pg_repack will fall back to using - pg_terminate_backend() to disconnect any remaining backends after - twice this timeout has passed. The default is 60 seconds. + wait to acquire this lock. If the lock cannot be taken after this duration + and ``--no-kill-backend`` option is not specified, pg_repack will forcibly + cancel the conflicting queries. If you are using PostgreSQL version 8.4 + or newer, pg_repack will fall back to using pg_terminate_backend() to + disconnect any remaining backends after twice this timeout has passed. + The default is 60 seconds. ``-T SECS``, ``--wait-timeout=SECS`` - pg_repackは再編成の完了直前に排他ロックを利用します。このオプションは、このロック取得時に何秒間pg_repackが取得を待機するかを指定します。指定した時間経ってもロックが取得できない場合、pg_repackは競合するクエリを強制的にキャンセルさせます。PostgreSQL 8.4以上のバージョンを利用している場合、指定した時間の2倍以上経ってもロックが取得できない場合、pg_repackは競合するクエリを実行しているPostgreSQLバックエンドプロセスをpg_terminate_backend()関数により強制的に停止させます。このオプションのデフォルトは60秒です。 + pg_repackは再編成の完了直前に排他ロックを利用します。このオプションは、このロック取得時に何秒間pg_repackが取得を待機するかを指定します。指定した時間経ってもロックが取得できないかつ、`no-kill-backend`オプションが指定されていない場合、pg_repackは競合するクエリを強制的にキャンセルさせます。PostgreSQL 8.4以上のバージョンを利用している場合、指定した時間の2倍以上経ってもロックが取得できない場合、pg_repackは競合するクエリを実行しているPostgreSQLバックエンドプロセスをpg_terminate_backend()関数により強制的に停止させます。このオプションのデフォルトは60秒です。 +.. ``-D``, ``--no-kill-backend`` + Skip to repack table if the lock cannot be taken for duration specified + ``--wait-timeout``, instead of cancelling conflicting queries. The default + is false. +``-D``, ``--no-kill-backend`` + ``--wait-timeout``オプションで指定された時間が経過してもロックが取得できない場合、競合するクエリをキャンセルする代わりに対象テーブルの再編成をスキップします。 .. ``-Z``, ``--no-analyze`` Disable ANALYZE after a full-table reorganization. If not specified, run From 9ef8f9f80bb6eaad1154331272c7d419875f74ca Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Wed, 22 Feb 2017 06:03:54 +0000 Subject: [PATCH 5/5] Improve error message more explicitely when time out. This change distinguishes error message between failed to cancel query due to time out and abandoning to cancel query due to timeout. --- bin/pg_repack.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/bin/pg_repack.c b/bin/pg_repack.c index fa2e601..ee16d8b 100644 --- a/bin/pg_repack.c +++ b/bin/pg_repack.c @@ -1081,7 +1081,10 @@ repack_one_table(repack_table *table, const char *orderby) if (!(lock_exclusive(connection, buffer, table->lock_table, TRUE))) { - elog(WARNING, "lock_exclusive() failed for %s", table->target_name); + if (no_kill_backend) + elog(INFO, "Skipping repack %s due to timeout", table->target_name); + else + elog(WARNING, "lock_exclusive() failed for %s", table->target_name); goto cleanup; } @@ -1230,7 +1233,10 @@ repack_one_table(repack_table *table, const char *orderby) */ if (!(kill_ddl(connection, table->target_oid, true))) { - elog(WARNING, "kill_ddl() failed."); + if (no_kill_backend) + elog(INFO, "Skipping repack %s due to timeout.", table->target_name); + else + elog(WARNING, "kill_ddl() failed."); goto cleanup; } @@ -1488,7 +1494,7 @@ kill_ddl(PGconn *conn, Oid relid, bool terminate) */ if (no_kill_backend) { - elog(WARNING, "%d unsafe queries remain but do not cancel them", + elog(WARNING, "%d unsafe queries remain but do not cancel them and skip to repack it", n_tuples); ret = false; }