From 24a03c01a5260baddee27d7de35a23eed279cca8 Mon Sep 17 00:00:00 2001 From: Takahiro Itagaki Date: Wed, 21 Jan 2009 08:09:22 +0000 Subject: [PATCH] workaround for toast tables. The bug is reported by Kenny Gorman. --- Makefile | 7 +- bin/Makefile | 1 + bin/expected/init.out | 3 + bin/expected/reorg.out | 122 +++++++++++ bin/pg_reorg.c | 2 +- bin/sql/init.sql | 5 + bin/sql/reorg.sql | 73 +++++++ lib/reorg.c | 465 ++++++++++++++++++++++++++++++----------- 8 files changed, 552 insertions(+), 126 deletions(-) create mode 100755 bin/expected/init.out create mode 100755 bin/expected/reorg.out create mode 100755 bin/sql/init.sql create mode 100755 bin/sql/reorg.sql diff --git a/Makefile b/Makefile index 224b6e7..f73e4e5 100755 --- a/Makefile +++ b/Makefile @@ -5,11 +5,11 @@ # .PHONY: all install clean -all: +all: make -C bin make -C lib -install: +install: make -C bin install make -C lib install @@ -24,3 +24,6 @@ debug: uninstall: make -C bin uninstall make -C lib uninstall + +installcheck: + make -C bin installcheck diff --git a/bin/Makefile b/bin/Makefile index db735ab..9a98687 100755 --- a/bin/Makefile +++ b/bin/Makefile @@ -7,6 +7,7 @@ SRCS = \ pg_reorg.c OBJS = $(SRCS:.c=.o) $(top_builddir)/src/bin/scripts/common.o PROGRAM = pg_reorg +REGRESS = init reorg ifdef DEBUG_REORG PG_CPPFLAGS = -I$(libpq_srcdir) -I$(top_builddir)/src/bin/scripts -DDEBUG_REORG diff --git a/bin/expected/init.out b/bin/expected/init.out new file mode 100755 index 0000000..c365538 --- /dev/null +++ b/bin/expected/init.out @@ -0,0 +1,3 @@ +SET client_min_messages = warning; +\set ECHO none +RESET client_min_messages; diff --git a/bin/expected/reorg.out b/bin/expected/reorg.out new file mode 100755 index 0000000..2f70acc --- /dev/null +++ b/bin/expected/reorg.out @@ -0,0 +1,122 @@ +SET client_min_messages = warning; +-- +-- create table. +-- +CREATE TABLE tbl_cluster ( + col1 int, + col2 timestamp, + ":-)" text, + primary key(":-)", col1) +) WITH (fillfactor = 70); +CREATE INDEX cidx_cluster ON tbl_cluster (col2, length(":-)")); +ALTER TABLE tbl_cluster CLUSTER ON cidx_cluster; +CREATE TABLE tbl_only_pkey ( + col1 int PRIMARY KEY, + ":-)" text +); +CREATE TABLE tbl_only_ckey ( + col1 int, + col2 timestamp, + ":-)" text +) WITH (fillfactor = 70); +CREATE INDEX cidx_only_ckey ON tbl_only_ckey (col2, ":-)"); +ALTER TABLE tbl_only_ckey CLUSTER ON cidx_only_ckey; +CREATE TABLE tbl_gistkey ( + id integer PRIMARY KEY, + c circle +); +CREATE INDEX cidx_circle ON tbl_gistkey USING gist (c); +ALTER TABLE tbl_gistkey CLUSTER ON cidx_circle; +INSERT INTO tbl_cluster VALUES(1, '2008-12-31 10:00:00', 'admin'); +INSERT INTO tbl_cluster VALUES(2, '2008-01-01 00:00:00', 'king'); +INSERT INTO tbl_cluster VALUES(3, '2008-03-04 12:00:00', 'joker'); +INSERT INTO tbl_cluster VALUES(4, '2008-03-05 15:00:00', 'queen'); +INSERT INTO tbl_cluster VALUES(5, '2008-01-01 00:30:00', sqrt(2::numeric(1000,999))::text || sqrt(3::numeric(1000,999))::text); +-- +-- do reorg +-- +\! pg_reorg --dbname=contrib_regression --table=tbl_cluster +-- +-- results +-- +\d+ tbl_cluster + Table "public.tbl_cluster" + Column | Type | Modifiers | Storage | Description +--------+-----------------------------+-----------+----------+------------- + col1 | integer | not null | plain | + col2 | timestamp without time zone | | plain | + :-) | text | not null | extended | +Indexes: + "tbl_cluster_pkey" PRIMARY KEY, btree (":-)", col1) + "cidx_cluster" btree (col2, length(":-)")) CLUSTER +Has OIDs: no +Options: fillfactor=70 + +\d+ tbl_gistkey + Table "public.tbl_gistkey" + Column | Type | Modifiers | Storage | Description +--------+---------+-----------+---------+------------- + id | integer | not null | plain | + c | circle | | plain | +Indexes: + "tbl_gistkey_pkey" PRIMARY KEY, btree (id) + "cidx_circle" gist (c) CLUSTER +Has OIDs: no + +\d+ tbl_only_ckey + Table "public.tbl_only_ckey" + Column | Type | Modifiers | Storage | Description +--------+-----------------------------+-----------+----------+------------- + col1 | integer | | plain | + col2 | timestamp without time zone | | plain | + :-) | text | | extended | +Indexes: + "cidx_only_ckey" btree (col2, ":-)") CLUSTER +Has OIDs: no +Options: fillfactor=70 + +\d+ tbl_only_pkey + Table "public.tbl_only_pkey" + Column | Type | Modifiers | Storage | Description +--------+---------+-----------+----------+------------- + col1 | integer | not null | plain | + :-) | text | | extended | +Indexes: + "tbl_only_pkey_pkey" PRIMARY KEY, btree (col1) +Has OIDs: no + +SET synchronize_seqscans = off; +SELECT col1, to_char(col2, 'YYYY-MM-DD HH24:MI:SS'), ":-)" FROM tbl_cluster; + col1 | to_char | :-) +------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + 2 | 2008-01-01 00:00:00 | king + 5 | 2008-01-01 00:30:00 | 1.4142135623730950488016887242096980785696718753769480731766797379907324784621070388503875343276415727350138462309122970249248360558507372126441214970999358314132226659275055927557999505011527820605714701095599716059702745345968620147285174186408891986095523292304843087143214508397626036279952514079896872533965463318088296406206152583523950547457502877599617298355752203375318570113543746034084988471603868999706990048150305440277903164542478230684929369186215805784631115966687130130156185689872372352885092648612494977154218334204285686060146824720771435854874155657069677653720226485447015858801620758474922657226002085584466521458398893944370926591800311388246468157082630100594858704003186480342194897278290641045072636881313739855256117322040245091227700226941127573627280495738108967504018369868368450725799364729060762996941380475654823728997180326802474420629269124859052181004459842150591120249441341728531478105803603371077309182869314710171111683916581726889419758716582152128229518488471.732050807568877293527446341505872366942805253810380628055806979451933016908800037081146186757248575675626141415406703029969945094998952478811655512094373648528093231902305582067974820101084674923265015312343266903322886650672254668921837971227047131660367861588019049986537379859389467650347506576050756618348129606100947602187190325083145829523959832997789824508288714463832917347224163984587855397667958063818353666110843173780894378316102088305524901670023520711144288695990956365797087168498072899493296484283020786408603988738697537582317317831395992983007838702877053913369563312103707264019249106768231199288375641141422016742752102372994270831059898459475987664288897796147837958390228854852903576033852808064381972344661059689722872865264153822664698420021195484155278441181286534507035191650016689294415480846071277143999762926834629577438361895110127148638746976545982451788550975379013880664961911962222957110555242923723192197738262561631468842032853716682938649611917049738836395495938 + 3 | 2008-03-04 12:00:00 | joker + 4 | 2008-03-05 15:00:00 | queen + 1 | 2008-12-31 10:00:00 | admin +(5 rows) + +SELECT * FROM tbl_gistkey; + id | c +----+--- +(0 rows) + +SELECT * FROM tbl_only_ckey; + col1 | col2 | :-) +------+------+----- +(0 rows) + +SELECT * FROM tbl_only_pkey; + col1 | :-) +------+----- +(0 rows) + +RESET synchronize_seqscans; +-- +-- clean up +-- +DROP TABLE tbl_cluster; +DROP TABLE tbl_gistkey; +DROP TABLE tbl_only_pkey; +DROP TABLE tbl_only_ckey; +RESET client_min_messages; diff --git a/bin/pg_reorg.c b/bin/pg_reorg.c index 7005c61..d414807 100755 --- a/bin/pg_reorg.c +++ b/bin/pg_reorg.c @@ -368,7 +368,7 @@ reorg_one_database(const char *orderby, const char *table) reconnect(); /* Restrict search_path to system catalog. */ - command("SET search_path = pg_catalog, pg_temp", 0, NULL); + command("SET search_path = pg_catalog, pg_temp, public", 0, NULL); /* To avoid annoying "create implicit ..." messages. */ command("SET client_min_messages = warning", 0, NULL); diff --git a/bin/sql/init.sql b/bin/sql/init.sql new file mode 100755 index 0000000..a49b271 --- /dev/null +++ b/bin/sql/init.sql @@ -0,0 +1,5 @@ +SET client_min_messages = warning; +\set ECHO none +\i ../lib/pg_reorg.sql +\set ECHO all +RESET client_min_messages; diff --git a/bin/sql/reorg.sql b/bin/sql/reorg.sql new file mode 100755 index 0000000..6aa546f --- /dev/null +++ b/bin/sql/reorg.sql @@ -0,0 +1,73 @@ +SET client_min_messages = warning; +-- +-- create table. +-- +CREATE TABLE tbl_cluster ( + col1 int, + col2 timestamp, + ":-)" text, + primary key(":-)", col1) +) WITH (fillfactor = 70); + +CREATE INDEX cidx_cluster ON tbl_cluster (col2, length(":-)")); +ALTER TABLE tbl_cluster CLUSTER ON cidx_cluster; + +CREATE TABLE tbl_only_pkey ( + col1 int PRIMARY KEY, + ":-)" text +); + +CREATE TABLE tbl_only_ckey ( + col1 int, + col2 timestamp, + ":-)" text +) WITH (fillfactor = 70); + +CREATE INDEX cidx_only_ckey ON tbl_only_ckey (col2, ":-)"); +ALTER TABLE tbl_only_ckey CLUSTER ON cidx_only_ckey; + +CREATE TABLE tbl_gistkey ( + id integer PRIMARY KEY, + c circle +); + +CREATE INDEX cidx_circle ON tbl_gistkey USING gist (c); +ALTER TABLE tbl_gistkey CLUSTER ON cidx_circle; + +INSERT INTO tbl_cluster VALUES(1, '2008-12-31 10:00:00', 'admin'); +INSERT INTO tbl_cluster VALUES(2, '2008-01-01 00:00:00', 'king'); +INSERT INTO tbl_cluster VALUES(3, '2008-03-04 12:00:00', 'joker'); +INSERT INTO tbl_cluster VALUES(4, '2008-03-05 15:00:00', 'queen'); +INSERT INTO tbl_cluster VALUES(5, '2008-01-01 00:30:00', sqrt(2::numeric(1000,999))::text || sqrt(3::numeric(1000,999))::text); + +-- +-- do reorg +-- + +\! pg_reorg --dbname=contrib_regression --table=tbl_cluster + +-- +-- results +-- + +\d+ tbl_cluster +\d+ tbl_gistkey +\d+ tbl_only_ckey +\d+ tbl_only_pkey + +SET synchronize_seqscans = off; +SELECT col1, to_char(col2, 'YYYY-MM-DD HH24:MI:SS'), ":-)" FROM tbl_cluster; +SELECT * FROM tbl_gistkey; +SELECT * FROM tbl_only_ckey; +SELECT * FROM tbl_only_pkey; +RESET synchronize_seqscans; + +-- +-- clean up +-- + +DROP TABLE tbl_cluster; +DROP TABLE tbl_gistkey; +DROP TABLE tbl_only_pkey; +DROP TABLE tbl_only_ckey; +RESET client_min_messages; diff --git a/lib/reorg.c b/lib/reorg.c index 5c5fa19..bfd4cd1 100755 --- a/lib/reorg.c +++ b/lib/reorg.c @@ -9,13 +9,21 @@ */ #include "postgres.h" +#include "access/transam.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/pg_namespace.h" #include "catalog/pg_type.h" +#include "commands/tablecmds.h" #include "commands/trigger.h" #include "executor/spi.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/relcache.h" +#include "utils/syscache.h" #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; @@ -48,11 +56,13 @@ static SPIPlanPtr reorg_prepare(const char *src, int nargs, Oid *argtypes); static void reorg_execp(SPIPlanPtr plan, Datum *values, const char *nulls, int expected); static void reorg_execf(int expexted, const char *format, ...) __attribute__((format(printf, 2, 3))); +static void reorg_execd(const char *src, int nargs, Oid *argtypes, Datum *values, const char *nulls, int expected); static const char *get_quoted_relname(Oid oid); static const char *get_quoted_nspname(Oid oid); +static void swap_heap_or_index_files(Oid r1, Oid r2); -#define copy_tuple(tuple, tupdesc) \ - PointerGetDatum(SPI_returntuple((tuple), (tupdesc))) +#define copy_tuple(tuple, desc) \ + PointerGetDatum(SPI_returntuple((tuple), (desc))) /* check access authority */ static void @@ -63,34 +73,11 @@ must_be_superuser(const char *func) } #if PG_VERSION_NUM < 80400 -static int -SPI_execute_with_args(const char *src, - int nargs, Oid *argtypes, +static int SPI_execute_with_args(const char *src, int nargs, Oid *argtypes, Datum *values, const char *nulls, - bool read_only, long tcount) -{ - SPIPlanPtr plan; - int ret; - - plan = SPI_prepare(src, nargs, argtypes); - if (plan == NULL) - return SPI_result; - ret = SPI_execute_plan(plan, values, nulls, read_only, tcount); - SPI_freeplan(plan); - return ret; -} - -static text * -cstring_to_text(const char * s) -{ - int len = strlen(s); - text *result = palloc(len + VARHDRSZ); - - SET_VARSIZE(result, len + VARHDRSZ); - memcpy(VARDATA(result), s, len); - - return result; -} + bool read_only, long tcount); +static text *cstring_to_text(const char * s); +static void RenameRelationInternal(Oid myrelid, const char *newrelname, Oid namespaceId); #endif /** @@ -105,13 +92,12 @@ Datum reorg_trigger(PG_FUNCTION_ARGS) { TriggerData *trigdata = (TriggerData *) fcinfo->context; - TupleDesc tupdesc; + TupleDesc desc; HeapTuple tuple; Datum values[2]; char nulls[2] = { ' ', ' ' }; Oid argtypes[2]; const char *sql; - int ret; /* authority check */ must_be_superuser("reorg_trigger"); @@ -125,7 +111,7 @@ reorg_trigger(PG_FUNCTION_ARGS) /* retrieve parameters */ sql = trigdata->tg_trigger->tgargs[0]; - tupdesc = RelationGetDescr(trigdata->tg_relation); + desc = RelationGetDescr(trigdata->tg_relation); argtypes[0] = argtypes[1] = trigdata->tg_relation->rd_rel->reltype; /* connect to SPI manager */ @@ -136,27 +122,25 @@ reorg_trigger(PG_FUNCTION_ARGS) /* INSERT: (NULL, newtup) */ tuple = trigdata->tg_trigtuple; nulls[0] = 'n'; - values[1] = copy_tuple(tuple, tupdesc); + values[1] = copy_tuple(tuple, desc); } else if (TRIGGER_FIRED_BY_DELETE(trigdata->tg_event)) { /* DELETE: (oldtup, NULL) */ tuple = trigdata->tg_trigtuple; - values[0] = copy_tuple(tuple, tupdesc); + values[0] = copy_tuple(tuple, desc); nulls[1] = 'n'; } else { /* UPDATE: (oldtup, newtup) */ tuple = trigdata->tg_newtuple; - values[0] = copy_tuple(trigdata->tg_trigtuple, tupdesc); - values[1] = copy_tuple(tuple, tupdesc); + values[0] = copy_tuple(trigdata->tg_trigtuple, desc); + values[1] = copy_tuple(tuple, desc); } /* INSERT INTO reorg.log VALUES ($1, $2) */ - ret = SPI_execute_with_args(sql, 2, argtypes, values, nulls, false, 1); - if (ret < 0) - elog(ERROR, "reorg_trigger: SPI_execp returned %d", ret); + reorg_execd(sql, 2, argtypes, values, nulls, SPI_OK_INSERT); SPI_finish(); @@ -195,7 +179,7 @@ reorg_apply(PG_FUNCTION_ARGS) SPIPlanPtr plan_update = NULL; SPIPlanPtr plan_pop = NULL; uint32 n, i; - TupleDesc tupdesc; + TupleDesc desc; Oid argtypes[3]; /* id, pk, row */ Datum values[3]; /* id, pk, row */ char nulls[3]; /* id, pk, row */ @@ -230,10 +214,10 @@ reorg_apply(PG_FUNCTION_ARGS) /* copy tuptable because we will call other sqls. */ ntuples = SPI_processed; tuptable = SPI_tuptable; - tupdesc = tuptable->tupdesc; - argtypes[0] = SPI_gettypeid(tupdesc, 1); /* id */ - argtypes[1] = SPI_gettypeid(tupdesc, 2); /* pk */ - argtypes[2] = SPI_gettypeid(tupdesc, 3); /* row */ + desc = tuptable->tupdesc; + argtypes[0] = SPI_gettypeid(desc, 1); /* id */ + argtypes[1] = SPI_gettypeid(desc, 2); /* pk */ + argtypes[2] = SPI_gettypeid(desc, 3); /* row */ for (i = 0; i < ntuples; i++, n++) { @@ -241,11 +225,11 @@ reorg_apply(PG_FUNCTION_ARGS) bool isnull; tuple = tuptable->vals[i]; - values[0] = SPI_getbinval(tuple, tupdesc, 1, &isnull); + values[0] = SPI_getbinval(tuple, desc, 1, &isnull); nulls[0] = ' '; - values[1] = SPI_getbinval(tuple, tupdesc, 2, &isnull); + values[1] = SPI_getbinval(tuple, desc, 2, &isnull); nulls[1] = (isnull ? 'n' : ' '); - values[2] = SPI_getbinval(tuple, tupdesc, 3, &isnull); + values[2] = SPI_getbinval(tuple, desc, 3, &isnull); nulls[2] = (isnull ? 'n' : ' '); if (nulls[1] == 'n') @@ -479,40 +463,18 @@ reorg_indexdef(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(str.data)); } -#define SQL_GET_SWAPINFO "\ -SELECT X.oid, X.relfilenode, X.relfrozenxid, Y.oid, Y.relfilenode, Y.relfrozenxid \ -FROM pg_class X, pg_class Y \ -WHERE X.oid = $1\ - AND Y.oid = ('reorg.table_' || X.oid)::regclass \ -UNION ALL \ -SELECT T.oid, T.relfilenode, T.relfrozenxid, U.oid, U.relfilenode, U.relfrozenxid \ -FROM pg_class X, pg_class T, pg_class Y, pg_class U \ -WHERE X.oid = $1\ - AND T.oid = X.reltoastrelid\ - AND Y.oid = ('reorg.table_' || X.oid)::regclass\ - AND U.oid = Y.reltoastrelid \ -UNION ALL \ -SELECT I.oid, I.relfilenode, I.relfrozenxid, J.oid, J.relfilenode, J.relfrozenxid \ -FROM pg_class X, pg_class T, pg_class I, pg_class Y, pg_class U, pg_class J \ -WHERE X.oid = $1\ - AND T.oid = X.reltoastrelid\ - AND I.oid = T.reltoastidxid\ - AND Y.oid = ('reorg.table_' || X.oid)::regclass\ - AND U.oid = Y.reltoastrelid\ - AND J.oid = U.reltoastidxid \ -UNION ALL \ -SELECT X.oid, X.relfilenode, X.relfrozenxid, Y.oid, Y.relfilenode, Y.relfrozenxid \ -FROM pg_index I, pg_class X, pg_class Y \ -WHERE I.indrelid = $1\ - AND I.indexrelid = X.oid\ - AND Y.oid = ('reorg.index_' || X.oid)::regclass\ - ORDER BY 1\ -" +static Oid +getoid(HeapTuple tuple, TupleDesc desc, int column) +{ + bool isnull; + Datum datum = SPI_getbinval(tuple, desc, column, &isnull); + return isnull ? InvalidOid : DatumGetObjectId(datum); +} /** * @fn Datum reorg_swap(PG_FUNCTION_ARGS) - * @brief Swapping relfilenode of table, toast, toast index - * and table indexes on target table and temp table mutually. + * @brief Swapping relfilenode of tables and relation ids of toast tables + * and toast indexes. * * reorg_swap(oid, relname) * @@ -522,16 +484,23 @@ WHERE I.indrelid = $1\ Datum reorg_swap(PG_FUNCTION_ARGS) { - Oid oid = PG_GETARG_OID(0); - const char *relname = get_quoted_relname(oid); - const char *nspname = get_quoted_nspname(oid); + Oid oid = PG_GETARG_OID(0); + const char *relname = get_quoted_relname(oid); + const char *nspname = get_quoted_nspname(oid); + Oid argtypes[1] = { OIDOID }; + char nulls[1] = { ' ' }; + Datum values[1]; + SPITupleTable *tuptable; + TupleDesc desc; + HeapTuple tuple; + uint32 records; + uint32 i; - SPIPlanPtr plan_swapinfo; - SPIPlanPtr plan_swap; - Oid argtypes[3] = { OIDOID, OIDOID, XIDOID }; - char nulls[3] = { ' ', ' ', ' ' }; - Datum values[3]; - int record; + Oid reltoastrelid1; + Oid reltoastidxid1; + Oid oid2; + Oid reltoastrelid2; + Oid reltoastidxid2; /* authority check */ must_be_superuser("reorg_swap"); @@ -539,46 +508,85 @@ reorg_swap(PG_FUNCTION_ARGS) /* connect to SPI manager */ reorg_init(); - /* parepare */ - plan_swapinfo = reorg_prepare( - SQL_GET_SWAPINFO, - 1, argtypes); - plan_swap = reorg_prepare( - "UPDATE pg_class SET relfilenode = $2, relfrozenxid = $3 WHERE oid = $1", - 3, argtypes); - - /* swap relfilenode */ + /* swap relfilenode and dependencies for tables. */ values[0] = ObjectIdGetDatum(oid); - reorg_execp(plan_swapinfo, values, nulls, SPI_OK_SELECT); - - record = SPI_processed; + reorg_execd( + "SELECT X.oid, X.reltoastrelid, TX.reltoastidxid," + " Y.oid, Y.reltoastrelid, TY.reltoastidxid" + " FROM pg_class X, pg_class Y, pg_class TX, pg_class TY" + " WHERE X.oid = $1" + " AND X.reltoastrelid = TX.oid" + " AND Y.oid = ('reorg.table_' || X.oid)::regclass" + " AND Y.reltoastrelid = TY.oid", + 1, argtypes, values, nulls, SPI_OK_SELECT); - if (record > 0) + tuptable = SPI_tuptable; + desc = tuptable->tupdesc; + records = SPI_processed; + + if (records == 0) + elog(ERROR, "reorg_swap : unexpected"); + + tuple = tuptable->vals[0]; + + reltoastrelid1 = getoid(tuple, desc, 2); + reltoastidxid1 = getoid(tuple, desc, 3); + oid2 = getoid(tuple, desc, 4); + reltoastrelid2 = getoid(tuple, desc, 5); + reltoastidxid2 = getoid(tuple, desc, 6); + + swap_heap_or_index_files(oid, oid2); + CommandCounterIncrement(); + + /* swap relfilenode and dependencies for indxes. */ + values[0] = ObjectIdGetDatum(oid); + reorg_execd( + "SELECT X.oid, Y.oid" + " FROM pg_index I, pg_class X, pg_class Y" + " WHERE I.indrelid = $1" + " AND I.indexrelid = X.oid" + " AND Y.oid = ('reorg.index_' || X.oid)::regclass", + 1, argtypes, values, nulls, SPI_OK_SELECT); + + tuptable = SPI_tuptable; + desc = tuptable->tupdesc; + records = SPI_processed; + + for (i = 0; i < records; i++) { - SPITupleTable *tuptable; - TupleDesc tupdesc; - HeapTuple tuple; - char isnull; - int i; - - tuptable = SPI_tuptable; - tupdesc = tuptable->tupdesc; - - for (i = 0; i < record; i++) - { - tuple = tuptable->vals[i]; + tuple = tuptable->vals[i]; + swap_heap_or_index_files( + getoid(tuple, desc, 1), + getoid(tuple, desc, 2)); + CommandCounterIncrement(); + } + + /* swap names for toast tables and toast indexes */ + if (reltoastrelid1 != InvalidOid) + { + char name[NAMEDATALEN]; + int pid = getpid(); - /* target -> temp */ - values[0] = SPI_getbinval(tuple, tupdesc, 4, &isnull); - values[1] = SPI_getbinval(tuple, tupdesc, 2, &isnull); - values[2] = SPI_getbinval(tuple, tupdesc, 3, &isnull); - reorg_execp(plan_swap, values, nulls, SPI_OK_UPDATE); - /* temp -> target */ - values[0] = SPI_getbinval(tuple, tupdesc, 1, &isnull); - values[1] = SPI_getbinval(tuple, tupdesc, 5, &isnull); - values[2] = SPI_getbinval(tuple, tupdesc, 6, &isnull); - reorg_execp(plan_swap, values, nulls, SPI_OK_UPDATE); - } + /* rename X to TEMP */ + snprintf(name, NAMEDATALEN, "pg_toast_pid%d", pid); + RenameRelationInternal(reltoastrelid1, name, PG_TOAST_NAMESPACE); + snprintf(name, NAMEDATALEN, "pg_toast_pid%d_index", pid); + RenameRelationInternal(reltoastidxid1, name, PG_TOAST_NAMESPACE); + CommandCounterIncrement(); + + /* rename Y to X */ + snprintf(name, NAMEDATALEN, "pg_toast_%u", oid); + RenameRelationInternal(reltoastrelid2, name, PG_TOAST_NAMESPACE); + snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid); + RenameRelationInternal(reltoastidxid2, name, PG_TOAST_NAMESPACE); + CommandCounterIncrement(); + + /* rename TEMP to Y */ + snprintf(name, NAMEDATALEN, "pg_toast_%u", oid2); + RenameRelationInternal(reltoastrelid1, name, PG_TOAST_NAMESPACE); + snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid2); + RenameRelationInternal(reltoastidxid1, name, PG_TOAST_NAMESPACE); + CommandCounterIncrement(); } /* drop reorg trigger */ @@ -691,6 +699,15 @@ reorg_execf(int expected, const char *format, ...) elog(ERROR, "pg_reorg: reorg_execf failed (sql=%s, code=%d, expected=%d)", sql.data, ret, expected); } +/* execute a query */ +static void +reorg_execd(const char *src, int nargs, Oid *argtypes, Datum *values, const char *nulls, int expected) +{ + int ret = SPI_execute_with_args(src, nargs, argtypes, values, nulls, expected == SPI_OK_SELECT, 0); + if (ret != expected) + elog(ERROR, "pg_reorg: reorg_execd failed (sql=%s, code=%d, expected=%d)", src, ret, expected); +} + static const char * get_quoted_relname(Oid oid) { @@ -702,3 +719,205 @@ get_quoted_nspname(Oid oid) { return quote_identifier(get_namespace_name(get_rel_namespace(oid))); } + +/* + * This is a copy of swap_relation_files in cluster.c, but it also swaps + * relfrozenxid. + */ +static void +swap_heap_or_index_files(Oid r1, Oid r2) +{ + Relation relRelation; + HeapTuple reltup1, + reltup2; + Form_pg_class relform1, + relform2; + Oid swaptemp; + CatalogIndexState indstate; + + /* We need writable copies of both pg_class tuples. */ + relRelation = heap_open(RelationRelationId, RowExclusiveLock); + + reltup1 = SearchSysCacheCopy(RELOID, + ObjectIdGetDatum(r1), + 0, 0, 0); + if (!HeapTupleIsValid(reltup1)) + elog(ERROR, "cache lookup failed for relation %u", r1); + relform1 = (Form_pg_class) GETSTRUCT(reltup1); + + reltup2 = SearchSysCacheCopy(RELOID, + ObjectIdGetDatum(r2), + 0, 0, 0); + if (!HeapTupleIsValid(reltup2)) + elog(ERROR, "cache lookup failed for relation %u", r2); + relform2 = (Form_pg_class) GETSTRUCT(reltup2); + + Assert(relform1->relkind == relform2->relkind); + + /* + * Actually swap the fields in the two tuples + */ + swaptemp = relform1->relfilenode; + relform1->relfilenode = relform2->relfilenode; + relform2->relfilenode = swaptemp; + + swaptemp = relform1->reltablespace; + relform1->reltablespace = relform2->reltablespace; + relform2->reltablespace = swaptemp; + + swaptemp = relform1->reltoastrelid; + relform1->reltoastrelid = relform2->reltoastrelid; + relform2->reltoastrelid = swaptemp; + + /* set rel1's frozen Xid to larger one */ + if (TransactionIdIsNormal(relform1->relfrozenxid)) + { + if (TransactionIdFollows(relform1->relfrozenxid, + relform2->relfrozenxid)) + relform1->relfrozenxid = relform2->relfrozenxid; + else + relform2->relfrozenxid = relform1->relfrozenxid; + } + + /* swap size statistics too, since new rel has freshly-updated stats */ + { + int4 swap_pages; + float4 swap_tuples; + + swap_pages = relform1->relpages; + relform1->relpages = relform2->relpages; + relform2->relpages = swap_pages; + + swap_tuples = relform1->reltuples; + relform1->reltuples = relform2->reltuples; + relform2->reltuples = swap_tuples; + } + + /* Update the tuples in pg_class */ + simple_heap_update(relRelation, &reltup1->t_self, reltup1); + simple_heap_update(relRelation, &reltup2->t_self, reltup2); + + /* Keep system catalogs current */ + indstate = CatalogOpenIndexes(relRelation); + CatalogIndexInsert(indstate, reltup1); + CatalogIndexInsert(indstate, reltup2); + CatalogCloseIndexes(indstate); + + /* + * If we have toast tables associated with the relations being swapped, + * change their dependency links to re-associate them with their new + * owning relations. Otherwise the wrong one will get dropped ... + * + * NOTE: it is possible that only one table has a toast table; this can + * happen in CLUSTER if there were dropped columns in the old table, and + * in ALTER TABLE when adding or changing type of columns. + * + * NOTE: at present, a TOAST table's only dependency is the one on its + * owning table. If more are ever created, we'd need to use something + * more selective than deleteDependencyRecordsFor() to get rid of only the + * link we want. + */ + if (relform1->reltoastrelid || relform2->reltoastrelid) + { + ObjectAddress baseobject, + toastobject; + long count; + + /* Delete old dependencies */ + if (relform1->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + relform1->reltoastrelid); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } + if (relform2->reltoastrelid) + { + count = deleteDependencyRecordsFor(RelationRelationId, + relform2->reltoastrelid); + if (count != 1) + elog(ERROR, "expected one dependency record for TOAST table, found %ld", + count); + } + + /* Register new dependencies */ + baseobject.classId = RelationRelationId; + baseobject.objectSubId = 0; + toastobject.classId = RelationRelationId; + toastobject.objectSubId = 0; + + if (relform1->reltoastrelid) + { + baseobject.objectId = r1; + toastobject.objectId = relform1->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); + } + + if (relform2->reltoastrelid) + { + baseobject.objectId = r2; + toastobject.objectId = relform2->reltoastrelid; + recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); + } + } + + /* + * Blow away the old relcache entries now. We need this kluge because + * relcache.c keeps a link to the smgr relation for the physical file, and + * that will be out of date as soon as we do CommandCounterIncrement. + * Whichever of the rels is the second to be cleared during cache + * invalidation will have a dangling reference to an already-deleted smgr + * relation. Rather than trying to avoid this by ordering operations just + * so, it's easiest to not have the relcache entries there at all. + * (Fortunately, since one of the entries is local in our transaction, + * it's sufficient to clear out our own relcache this way; the problem + * cannot arise for other backends when they see our update on the + * non-local relation.) + */ + RelationForgetRelation(r1); + RelationForgetRelation(r2); + + /* Clean up. */ + heap_freetuple(reltup1); + heap_freetuple(reltup2); + + heap_close(relRelation, RowExclusiveLock); +} + +#if PG_VERSION_NUM < 80400 +static int +SPI_execute_with_args(const char *src, + int nargs, Oid *argtypes, + Datum *values, const char *nulls, + bool read_only, long tcount) +{ + SPIPlanPtr plan; + int ret; + + plan = SPI_prepare(src, nargs, argtypes); + if (plan == NULL) + return SPI_result; + ret = SPI_execute_plan(plan, values, nulls, read_only, tcount); + SPI_freeplan(plan); + return ret; +} + +static text * +cstring_to_text(const char * s) +{ + int len = strlen(s); + text *result = palloc(len + VARHDRSZ); + + SET_VARSIZE(result, len + VARHDRSZ); + memcpy(VARDATA(result), s, len); + + return result; +} + +static void +RenameRelationInternal(Oid myrelid, const char *newrelname, Oid namespaceId) +{ + renamerel(myrelid, newrelname, OBJECT_TABLE); +} +#endif