Modify LOCK to LOCK-NOWAIT-and-loop at swap tables to avoid holding exclusive locks long time. Suggested by Kenny Gorman.

This commit is contained in:
Takahiro Itagaki 2009-04-16 06:01:01 +00:00
parent 942180c2d8
commit e1011e11fe
6 changed files with 849 additions and 680 deletions

View File

@ -93,6 +93,13 @@ static void PrintVersion(void);
static char *getstr(PGresult *res, int row, int col); static char *getstr(PGresult *res, int row, int col);
static Oid getoid(PGresult *res, int row, int col); static Oid getoid(PGresult *res, int row, int col);
#define SQLSTATE_INVALID_SCHEMA_NAME "3F000"
#define SQLSTATE_LOCK_NOT_AVAILABLE "55P03"
static bool sqlstate_equals(PGresult *res, const char *state)
{
return strcmp(PQresultErrorField(res, PG_DIAG_SQLSTATE), state) == 0;
}
static const char *progname = NULL; static const char *progname = NULL;
static bool echo = false; static bool echo = false;
@ -390,8 +397,7 @@ reorg_one_database(const char *orderby, const char *table)
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
const char *state = PQresultErrorField(res, PG_DIAG_SQLSTATE); if (sqlstate_equals(res, SQLSTATE_INVALID_SCHEMA_NAME))
if (state && strcmp(state, "3F000") == 0)
{ {
/* Schema reorg does not exist. Skip the database. */ /* Schema reorg does not exist. Skip the database. */
ret = false; ret = false;
@ -657,8 +663,32 @@ reorg_one_table(const reorg_table *table, const char *orderby)
if (verbose) if (verbose)
fprintf(stderr, "---- swap ----\n"); fprintf(stderr, "---- swap ----\n");
for (;;)
{
command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL); command("BEGIN ISOLATION LEVEL READ COMMITTED", 0, NULL);
command(table->lock_table, 0, NULL); res = execute_nothrow(table->lock_table, 0, NULL);
if (PQresultStatus(res) == PGRES_COMMAND_OK)
{
PQclear(res);
break;
}
else if (sqlstate_equals(res, SQLSTATE_LOCK_NOT_AVAILABLE))
{
/* retry if lock conflicted */
PQclear(res);
command("ROLLBACK", 0, NULL);
sleep(1);
continue;
}
else
{
/* exit otherwise */
printf("%s", PQerrorMessage(current_conn));
PQclear(res);
exit_with_cleanup(1);
}
}
apply_log(table, 0); apply_log(table, 0);
params[0] = utoa(table->target_oid, buffer); params[0] = utoa(table->target_oid, buffer);
command("SELECT reorg.reorg_swap($1)", 1, params); command("SELECT reorg.reorg_swap($1)", 1, params);
@ -761,7 +791,7 @@ execute_nothrow(const char *query, int nParams, const char **params)
} }
/* /*
* execute - Execute a SQL and discard the result, or exit() if failed. * execute - Execute a SQL and return the result, or exit() if failed.
*/ */
static PGresult * static PGresult *
execute(const char *query, int nParams, const char **params) execute(const char *query, int nParams, const char **params)
@ -834,7 +864,6 @@ static void
PrintVersion(void) PrintVersion(void)
{ {
fprintf(stderr, "pg_reorg " REORG_VERSION "\n"); fprintf(stderr, "pg_reorg " REORG_VERSION "\n");
return;
} }
/* /*

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html <!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@ -33,10 +33,80 @@ pg_reorg は PostgreSQL のテーブルを再編成するシェルコマンド
</div> </div>
<hr /> <hr />
<h2>ドキュメント</h2>
<p> <p>
<a href="pg_reorg-ja.html">ドキュメントはこちら</a> <a href="pg_reorg-ja.html">ドキュメントはこちら</a>
</p> </p>
<h2>実行時間</h2>
<p>
pg_reorg とclusterdb の比較に示します。
断片化のないソートされた状態 (not fragmented) では clusterdb のほうが高速ですが、完全に断片化した状態 (fully fragmented) では pg_reorg が大幅に高速です。
一般的に、再編成は断片化が進行した状態で実施されることを考えると、pg_reorg は clusterdb よりも実行時間が短いと言えます。
</p>
<center>
<div style="margin: 2em">
<img src="result.png" />
</div>
<table border="1">
<caption>測定環境</caption>
<tr>
<th>大項目</th>
<th>小項目</th>
<th>環境</th>
</tr>
<tr>
<td rowspan="3">ハードウェア</td>
<td>CPU</td>
<td>2 × Xeon 5160 3.00GHz (Dual core)</td>
</tr>
<tr>
<td>メモリ</td>
<td>2GB</td>
</tr>
<tr>
<td>ストレージ</td>
<td>Ultra320 SCSI, 15000rpm (220GB)</td>
</tr>
<tr>
<td rowspan="4">ソフトウェア</td>
<td>OS</td>
<td>RHEL 5.2 (64bit) 2.6.18-92.el5</td>
</tr>
<tr>
<td>DB</td>
<td>PostgreSQL 8.3.3</td>
</tr>
<tr>
<td>pg_reorg</td>
<td>1.0.0</td>
</tr>
<tr>
<td>clusterdb</td>
<td>clusterdb (PostgreSQL) 8.3.3</td>
</tr>
<tr>
<td rowspan="2">データ</td>
<td>スキーマ</td>
<td><code><pre>CREATE TABLE tbl (
id bigserial PRIMARY KEY,
seqkey timestamp NOT NULL,
rndkey timestamp NOT NULL,
filler char(75) NOT NULL
);
CREATE INDEX idx_seq ON tbl (seqkey);
CREATE INDEX idx_rnd ON tbl (rndkey);</pre></code></td>
</tr>
<tr>
<td>件数</td>
<td>1650万件 (約2GB)</td>
</tr>
</table>
</center>
<hr /> <hr />
<div align="right"> <div align="right">
Copyright (c) 2008-2009, NIPPON TELEGRAPH AND TELEPHONE CORPORATION Copyright (c) 2008-2009, NIPPON TELEGRAPH AND TELEPHONE CORPORATION

View File

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html <!DOCTYPE html
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@ -33,10 +33,80 @@ where you can find <a href="http://pgfoundry.org/frs/?group_id=1000411">download
</div> </div>
<hr /> <hr />
<h2>Documentation</h2>
<p> <p>
<a href="pg_reorg.html">Documentations here</a>. <a href="pg_reorg.html">Documentations here</a>.
</p> </p>
<h2>Execution time</h2>
<p>
Here is a comparison between pg_reorg and clusterdb.
Clusterdb is faster on not fragmented conditions, but pg_reorg is faster on fully fragmented conditions.
Since reorganization is needed only if tables are fragmented, pg_reorg should be faster than clusterdb.
</p>
<center>
<div style="margin: 2em">
<img src="result.png" />
</div>
<table border="1">
<caption>Configuration</caption>
<tr>
<th>Category</th>
<th>Item</th>
<th>Details</th>
</tr>
<tr>
<td rowspan="3">Hardware</td>
<td>CPU</td>
<td>2 * Xeon 5160 3.00GHz (Dual core)</td>
</tr>
<tr>
<td>Memory</td>
<td>2GB</td>
</tr>
<tr>
<td>Storage</td>
<td>Ultra320 SCSI, 15000rpm (220GB)</td>
</tr>
<tr>
<td rowspan="4">Software</td>
<td>OS</td>
<td>RHEL 5.2 (64bit) 2.6.18-92.el5</td>
</tr>
<tr>
<td>DB</td>
<td>PostgreSQL 8.3.3</td>
</tr>
<tr>
<td>pg_reorg</td>
<td>1.0.0</td>
</tr>
<tr>
<td>clusterdb</td>
<td>clusterdb (PostgreSQL) 8.3.3</td>
</tr>
<tr>
<td rowspan="2">Data</td>
<td>Scheme</td>
<td><code><pre>CREATE TABLE tbl (
id bigserial PRIMARY KEY,
seqkey timestamp NOT NULL,
rndkey timestamp NOT NULL,
filler char(75) NOT NULL
);
CREATE INDEX idx_seq ON tbl (seqkey);
CREATE INDEX idx_rnd ON tbl (rndkey);</pre></code></td>
</tr>
<tr>
<td>Rows</td>
<td>16.5 M rows (2GB)</td>
</tr>
</table>
</center>
<hr /> <hr />
<div align="right"> <div align="right">
Copyright (c) 2008-2009, NIPPON TELEGRAPH AND TELEPHONE CORPORATION Copyright (c) 2008-2009, NIPPON TELEGRAPH AND TELEPHONE CORPORATION

View File

@ -241,19 +241,19 @@ pg_reorg の実行中には、<tt>VACUUM</tt> と <tt>ANALYZE</tt> <STRONG>以
<dl> <dl>
<dt><tt>TRUNCATE</tt></dt> <dt><tt>TRUNCATE</tt></dt>
<dd><tt>TRUNCATE</tt>により削除した行が、pg_reorg 実行後には復元しています。操作結果が消失します。</dd> <dd>削除した行が pg_reorg 実行後には復元しています。操作結果が消失します。</dd>
<dt><tt>CREATE INDEX</tt></dt> <dt><tt>CREATE INDEX</tt></dt>
<dd><tt>CREATE INDEX</tt>は、スワップされない索引が残る可能性があります。データの不整合が生じます。</dd> <dd>スワップされない索引が残る可能性があります。データの不整合が生じます。</dd>
<dt><tt>ADD COLUMN</tt></dt> <dt><tt>ALTER TABLE ... ADD COLUMN</tt></dt>
<dd><tt>ADD COLUMN</tt>は、追加された値が全てNULLに置換されてしまう可能性があります。データが消失します。</dd> <dd>追加された値が全てNULLに置換されてしまう可能性があります。データが消失します。</dd>
<dt><tt>ALTER COLUMN TYPE</tt></dt> <dt><tt>ALTER TABLE ... ALTER COLUMN TYPE</tt></dt>
<dd><tt>ALTER COLUMN TYPE</tt>は、実行するとスキーマで定義された型と実際の格納状態に齟齬をきたします。データの不整合が生じます。</dd> <dd>実行するとスキーマで定義された型と実際の格納状態に齟齬をきたします。データの不整合が生じます。</dd>
<dt><tt>ALTER TABLE SET TABLESPACE</tt></dt> <dt><tt>ALTER TABLE ... SET TABLESPACE</tt></dt>
<dd><tt>ALTER TABLE SET TABLE SPACE</tt>は、pg_reorg 実行後にrelfilenodeとの不整合が起こるため、対象のテーブルに対する参照/更新操作時にエラーが発生します。</dd> <dd>pg_reorg 実行後にrelfilenodeとの不整合が起こるため、対象のテーブルに対する参照/更新操作時にエラーが発生します。</dd>
</dl> </dl>
</div> </div>

View File

@ -228,16 +228,16 @@ In many case pg_reorg would fail and rollback collectly, but there are some case
<dd><tt>TRUNCATE</tt> is lost. Deleted rows still exist after pg_reorg.</dd> <dd><tt>TRUNCATE</tt> is lost. Deleted rows still exist after pg_reorg.</dd>
<dt><tt>CREATE INDEX</tt></dt> <dt><tt>CREATE INDEX</tt></dt>
<dd><tt>CREATE INDEX</tt> causes index corruptions.</dd> <dd>It causes index corruptions.</dd>
<dt><tt>ADD COLUMN</tt></dt> <dt><tt>ALTER TABLE ... ADD COLUMN</tt></dt>
<dd><tt>ADD COLUMN</tt> causes lost of data. Newly added columns are initialized with NULLs.</dd> <dd>It causes lost of data. Newly added columns are initialized with NULLs.</dd>
<dt><tt>ALTER COLUMN TYPE</tt></dt> <dt><tt>ALTER TABLE ... ALTER COLUMN TYPE</tt></dt>
<dd><tt>ALTER COLUMN TYPE</tt> causes data corruptions.</dd> <dd>It causes data corruptions.</dd>
<dt><tt>ALTER TABLE SET TABLESPACE</tt></dt> <dt><tt>ALTER TABLE ... SET TABLESPACE</tt></dt>
<dd><tt>ALTER TABLE SET TABLE SPACE</tt> causes data corruptions by wrong relfilenode.</dd> <dd>It causes data corruptions by wrong relfilenode.</dd>
</dl> </dl>
</div> </div>

View File

@ -114,7 +114,7 @@ CREATE VIEW reorg.tables AS
reorg.get_create_trigger(R.oid, PK.indexrelid) AS create_trigger, reorg.get_create_trigger(R.oid, PK.indexrelid) AS create_trigger,
'CREATE TABLE reorg.table_' || R.oid || ' WITH (' || array_to_string(array_append(R.reloptions, 'oids=' || CASE WHEN R.relhasoids THEN 'true' ELSE 'false' END), ',') || ') TABLESPACE ' || coalesce(quote_ident(S.spcname), 'pg_default') || ' AS SELECT * FROM ONLY ' || reorg.oid2text(R.oid) AS create_table, 'CREATE TABLE reorg.table_' || R.oid || ' WITH (' || array_to_string(array_append(R.reloptions, 'oids=' || CASE WHEN R.relhasoids THEN 'true' ELSE 'false' END), ',') || ') TABLESPACE ' || coalesce(quote_ident(S.spcname), 'pg_default') || ' AS SELECT * FROM ONLY ' || reorg.oid2text(R.oid) AS create_table,
'DELETE FROM reorg.log_' || R.oid AS delete_log, 'DELETE FROM reorg.log_' || R.oid AS delete_log,
'LOCK TABLE ' || reorg.oid2text(R.oid) || ' IN ACCESS EXCLUSIVE MODE' AS lock_table, 'LOCK TABLE ' || reorg.oid2text(R.oid) || ' IN ACCESS EXCLUSIVE MODE NOWAIT' AS lock_table,
reorg.get_index_keys(CK.indexrelid, R.oid) AS ckey, reorg.get_index_keys(CK.indexrelid, R.oid) AS ckey,
'SELECT * FROM reorg.log_' || R.oid || ' ORDER BY id LIMIT $1' AS sql_peek, 'SELECT * FROM reorg.log_' || R.oid || ' ORDER BY id LIMIT $1' AS sql_peek,
'INSERT INTO reorg.table_' || R.oid || ' VALUES ($1.*)' AS sql_insert, 'INSERT INTO reorg.table_' || R.oid || ' VALUES ($1.*)' AS sql_insert,