diff --git a/bin/pg_repack.c b/bin/pg_repack.c index d4aa226..684badc 100644 --- a/bin/pg_repack.c +++ b/bin/pg_repack.c @@ -152,6 +152,11 @@ const char *PROGRAM_VERSION = "unknown"; " AND granted = false AND relation = %u"\ " AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" +#define COUNT_COMPETING_LOCKS \ + "SELECT pid FROM pg_locks WHERE locktype = 'relation'" \ + " AND granted = false AND relation = %u" \ + " AND mode = 'AccessExclusiveLock' AND pid <> pg_backend_pid()" + /* Will be used as a unique prefix for advisory locks. */ #define REPACK_LOCK_PREFIX_STR "16185446" @@ -244,6 +249,7 @@ static int wait_timeout = 60; /* in seconds */ static int jobs = 0; /* number of concurrent worker conns. */ static bool dryrun = false; static unsigned int temp_obj_num = 0; /* temporary objects counter */ +static bool no_kill_backend = false; /* abandon when timed-out */ /* buffer should have at least 11 bytes */ static char * @@ -269,6 +275,7 @@ static pgut_option options[] = { 'i', 'T', "wait-timeout", &wait_timeout }, { 'B', 'Z', "no-analyze", &analyze }, { 'i', 'j', "jobs", &jobs }, + { 'b', 'D', "no-kill-backend", &no_kill_backend }, { 0 }, }; @@ -1074,7 +1081,10 @@ repack_one_table(repack_table *table, const char *orderby) if (!(lock_exclusive(connection, buffer, table->lock_table, TRUE))) { - elog(WARNING, "lock_exclusive() failed for %s", table->target_name); + if (no_kill_backend) + elog(INFO, "Skipping repack %s due to timeout", table->target_name); + else + elog(WARNING, "lock_exclusive() failed for %s", table->target_name); goto cleanup; } @@ -1208,7 +1218,10 @@ repack_one_table(repack_table *table, const char *orderby) */ if (!(kill_ddl(connection, table->target_oid, true))) { - elog(WARNING, "kill_ddl() failed."); + if (no_kill_backend) + elog(INFO, "Skipping repack %s due to timeout.", table->target_name); + else + elog(WARNING, "kill_ddl() failed."); goto cleanup; } @@ -1438,9 +1451,9 @@ cleanup: } /* Kill off any concurrent DDL (or any transaction attempting to take - * an AccessExclusive lock) trying to run against our table. Note, we're - * killing these queries off *before* they are granted an AccessExclusive - * lock on our table. + * an AccessExclusive lock) trying to run against our table if we want to + * do. Note, we're killing these queries off *before* they are granted + * an AccessExclusive lock on our table. * * Returns true if no problems encountered, false otherwise. */ @@ -1450,35 +1463,57 @@ kill_ddl(PGconn *conn, Oid relid, bool terminate) bool ret = true; PGresult *res; StringInfoData sql; + int n_tuples; initStringInfo(&sql); - printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid); + /* Check the number of backends competing AccessExclusiveLock */ + printfStringInfo(&sql, COUNT_COMPETING_LOCKS, relid); res = pgut_execute(conn, sql.data, 0, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - elog(WARNING, "Error canceling unsafe queries: %s", - PQerrorMessage(conn)); - ret = false; - } - else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400) - { - elog(WARNING, - "Canceled %d unsafe queries. Terminating any remaining PIDs.", - PQntuples(res)); + n_tuples = PQntuples(res); - CLEARPGRES(res); - printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid); - res = pgut_execute(conn, sql.data, 0, NULL); - if (PQresultStatus(res) != PGRES_TUPLES_OK) + if (n_tuples != 0) + { + /* Competing backend is exsits, but if we do not want to calcel/terminate + * any backend, do nothing. + */ + if (no_kill_backend) { - elog(WARNING, "Error killing unsafe queries: %s", - PQerrorMessage(conn)); + elog(WARNING, "%d unsafe queries remain but do not cancel them and skip to repack it", + n_tuples); ret = false; } + else + { + resetStringInfo(&sql); + printfStringInfo(&sql, CANCEL_COMPETING_LOCKS, relid); + res = pgut_execute(conn, sql.data, 0, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + elog(WARNING, "Error canceling unsafe queries: %s", + PQerrorMessage(conn)); + ret = false; + } + else if (PQntuples(res) > 0 && terminate && PQserverVersion(conn) >= 80400) + { + elog(WARNING, + "Canceled %d unsafe queries. Terminating any remaining PIDs.", + PQntuples(res)); + + CLEARPGRES(res); + printfStringInfo(&sql, KILL_COMPETING_LOCKS, relid); + res = pgut_execute(conn, sql.data, 0, NULL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + elog(WARNING, "Error killing unsafe queries: %s", + PQerrorMessage(conn)); + ret = false; + } + } + else if (PQntuples(res) > 0) + elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res)); + } } - else if (PQntuples(res) > 0) - elog(NOTICE, "Canceled %d unsafe queries", PQntuples(res)); else elog(DEBUG2, "No competing DDL to cancel."); @@ -1637,26 +1672,35 @@ lock_exclusive(PGconn *conn, const char *relid, const char *lock_query, bool sta duration = time(NULL) - start; if (duration > wait_timeout) { - const char *cancel_query; - if (PQserverVersion(conn) >= 80400 && - duration > wait_timeout * 2) + if (no_kill_backend) { - elog(WARNING, "terminating conflicted backends"); - cancel_query = - "SELECT pg_terminate_backend(pid) FROM pg_locks" - " WHERE locktype = 'relation'" - " AND relation = $1 AND pid <> pg_backend_pid()"; + elog(WARNING, "timed out, do not cancel conflicting backends"); + ret = false; + break; } else { - elog(WARNING, "canceling conflicted backends"); - cancel_query = - "SELECT pg_cancel_backend(pid) FROM pg_locks" - " WHERE locktype = 'relation'" - " AND relation = $1 AND pid <> pg_backend_pid()"; - } + const char *cancel_query; + if (PQserverVersion(conn) >= 80400 && + duration > wait_timeout * 2) + { + elog(WARNING, "terminating conflicted backends"); + cancel_query = + "SELECT pg_terminate_backend(pid) FROM pg_locks" + " WHERE locktype = 'relation'" + " AND relation = $1 AND pid <> pg_backend_pid()"; + } + else + { + elog(WARNING, "canceling conflicted backends"); + cancel_query = + "SELECT pg_cancel_backend(pid) FROM pg_locks" + " WHERE locktype = 'relation'" + " AND relation = $1 AND pid <> pg_backend_pid()"; + } - pgut_command(conn, cancel_query, 1, &relid); + pgut_command(conn, cancel_query, 1, &relid); + } } /* wait for a while to lock the table. */ @@ -2048,5 +2092,6 @@ pgut_help(bool details) printf(" -i, --index=INDEX move only the specified index\n"); printf(" -x, --only-indexes move only indexes of the specified table\n"); printf(" -T, --wait-timeout=SECS timeout to cancel other backends on conflict\n"); + printf(" -D, --no-kill-backend don't kill other backends when timed out\n"); printf(" -Z, --no-analyze don't analyze at end\n"); } diff --git a/doc/pg_repack.rst b/doc/pg_repack.rst index 1600ce0..32176d1 100644 --- a/doc/pg_repack.rst +++ b/doc/pg_repack.rst @@ -127,6 +127,7 @@ Options: -i, --index=INDEX move only the specified index -x, --only-indexes move only indexes of the specified table -T, --wait-timeout=SECS timeout to cancel other backends on conflict + -D, --no-kill-backend don't kill other backends when timed out -Z, --no-analyze don't analyze at end Connection options: @@ -200,11 +201,17 @@ Reorg Options ``-T SECS``, ``--wait-timeout=SECS`` pg_repack needs to take an exclusive lock at the end of the reorganization. This setting controls how many seconds pg_repack will - wait to acquire this lock. If the lock cannot be taken after this duration, - pg_repack will forcibly cancel the conflicting queries. If you are using - PostgreSQL version 8.4 or newer, pg_repack will fall back to using - pg_terminate_backend() to disconnect any remaining backends after - twice this timeout has passed. The default is 60 seconds. + wait to acquire this lock. If the lock cannot be taken after this duration + and ``--no-kill-backend`` option is not specified, pg_repack will forcibly + cancel the conflicting queries. If you are using PostgreSQL version 8.4 + or newer, pg_repack will fall back to using pg_terminate_backend() to + disconnect any remaining backends after twice this timeout has passed. + The default is 60 seconds. + +``-D``, ``--no-kill-backend`` + Skip to repack table if the lock cannot be taken for duration specified + ``--wait-timeout``, instead of cancelling conflicting queries. The default + is false. ``-Z``, ``--no-analyze`` Disable ANALYZE after a full-table reorganization. If not specified, run diff --git a/doc/pg_repack_jp.rst b/doc/pg_repack_jp.rst index 7332d9a..87b79e6 100644 --- a/doc/pg_repack_jp.rst +++ b/doc/pg_repack_jp.rst @@ -206,6 +206,7 @@ pg_repackもしくはpg_reorgの古いバージョンからのアップグレー -i, --index=INDEX move only the specified index -x, --only-indexes move only indexes of the specified table -T, --wait-timeout=SECS timeout to cancel other backends on conflict + -D, --no-kill-backend don't kill other backends when timed out -Z, --no-analyze don't analyze at end Connection options: @@ -244,6 +245,7 @@ OPTIONには以下のものが指定できます。 -i, --index=INDEX 指定したインデックスのみ再編成します -x, --only-indexes 指定したテーブルに付与されたインデックスだけを再編成します -T, --wait-timeout=SECS ロック競合している他のトランザクションをキャンセルするまで待機する時間を指定します + -D, --no-kill-backend タイムアウト時に他のバックエンドをキャンセルしません -Z, --no-analyze 再編成後にANALYZEを行いません 接続オプション: @@ -351,15 +353,22 @@ OPTIONには以下のものが指定できます。 .. ``-T SECS``, ``--wait-timeout=SECS`` pg_repack needs to take an exclusive lock at the end of the reorganization. This setting controls how many seconds pg_repack will - wait to acquire this lock. If the lock cannot be taken after this duration, - pg_repack will forcibly cancel the conflicting queries. If you are using - PostgreSQL version 8.4 or newer, pg_repack will fall back to using - pg_terminate_backend() to disconnect any remaining backends after - twice this timeout has passed. The default is 60 seconds. + wait to acquire this lock. If the lock cannot be taken after this duration + and ``--no-kill-backend`` option is not specified, pg_repack will forcibly + cancel the conflicting queries. If you are using PostgreSQL version 8.4 + or newer, pg_repack will fall back to using pg_terminate_backend() to + disconnect any remaining backends after twice this timeout has passed. + The default is 60 seconds. ``-T SECS``, ``--wait-timeout=SECS`` - pg_repackは再編成の完了直前に排他ロックを利用します。このオプションは、このロック取得時に何秒間pg_repackが取得を待機するかを指定します。指定した時間経ってもロックが取得できない場合、pg_repackは競合するクエリを強制的にキャンセルさせます。PostgreSQL 8.4以上のバージョンを利用している場合、指定した時間の2倍以上経ってもロックが取得できない場合、pg_repackは競合するクエリを実行しているPostgreSQLバックエンドプロセスをpg_terminate_backend()関数により強制的に停止させます。このオプションのデフォルトは60秒です。 + pg_repackは再編成の完了直前に排他ロックを利用します。このオプションは、このロック取得時に何秒間pg_repackが取得を待機するかを指定します。指定した時間経ってもロックが取得できないかつ、`no-kill-backend`オプションが指定されていない場合、pg_repackは競合するクエリを強制的にキャンセルさせます。PostgreSQL 8.4以上のバージョンを利用している場合、指定した時間の2倍以上経ってもロックが取得できない場合、pg_repackは競合するクエリを実行しているPostgreSQLバックエンドプロセスをpg_terminate_backend()関数により強制的に停止させます。このオプションのデフォルトは60秒です。 +.. ``-D``, ``--no-kill-backend`` + Skip to repack table if the lock cannot be taken for duration specified + ``--wait-timeout``, instead of cancelling conflicting queries. The default + is false. +``-D``, ``--no-kill-backend`` + ``--wait-timeout``オプションで指定された時間が経過してもロックが取得できない場合、競合するクエリをキャンセルする代わりに対象テーブルの再編成をスキップします。 .. ``-Z``, ``--no-analyze`` Disable ANALYZE after a full-table reorganization. If not specified, run diff --git a/regress/expected/repack.out b/regress/expected/repack.out index b1ba8c2..9359731 100644 --- a/regress/expected/repack.out +++ b/regress/expected/repack.out @@ -381,3 +381,8 @@ ERROR: cannot repack specific table(s) in schema, use schema.table notation inst -- => ERROR \! pg_repack --dbname=contrib_regression --all --schema=test_schema1 ERROR: cannot repack specific schema(s) in all databases +-- +-- don't kill backend +-- +\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-kill-backend +INFO: repacking table "tbl_cluster" diff --git a/regress/sql/repack.sql b/regress/sql/repack.sql index 7f9098d..3bcd38e 100644 --- a/regress/sql/repack.sql +++ b/regress/sql/repack.sql @@ -227,3 +227,8 @@ CREATE TABLE test_schema2.tbl2 (id INTEGER PRIMARY KEY); \! pg_repack --dbname=contrib_regression --schema=test_schema1 --table=tbl1 -- => ERROR \! pg_repack --dbname=contrib_regression --all --schema=test_schema1 + +-- +-- don't kill backend +-- +\! pg_repack --dbname=contrib_regression --table=tbl_cluster --no-kill-backend