Several fixes for concurrent index builds:
* Use poll() if it is available, or select() otherwise, to efficiently wait on index builds in worker queries to finish. * fix off-by-one error when initially assigning workers * move PQsetnonblocking() calls to setup_workers()
This commit is contained in:
parent
8c2dd16608
commit
42357353a7
@ -24,17 +24,32 @@ const char *PROGRAM_VERSION = "unknown";
|
|||||||
|
|
||||||
#include "pgut/pgut-fe.h"
|
#include "pgut/pgut-fe.h"
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_POLL_H
|
||||||
|
#include <poll.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_POLL_H
|
||||||
|
#include <sys/poll.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_SELECT_H
|
||||||
|
#include <sys/select.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* APPLY_COUNT: Number of applied logs per transaction. Larger values
|
* APPLY_COUNT: Number of applied logs per transaction. Larger values
|
||||||
* could be faster, but will be long transactions in the REDO phase.
|
* could be faster, but will be long transactions in the REDO phase.
|
||||||
*/
|
*/
|
||||||
#define APPLY_COUNT 1000
|
#define APPLY_COUNT 1000
|
||||||
|
|
||||||
|
/* poll() or select() timeout, in seconds */
|
||||||
|
#define POLL_TIMEOUT 3
|
||||||
|
|
||||||
/* Compile an array of existing transactions which are active during
|
/* Compile an array of existing transactions which are active during
|
||||||
* pg_repack's setup. Some transactions we can safely ignore:
|
* pg_repack's setup. Some transactions we can safely ignore:
|
||||||
@ -633,7 +648,7 @@ rebuild_indexes(const repack_table *table)
|
|||||||
*/
|
*/
|
||||||
index_jobs[i].status = FINISHED;
|
index_jobs[i].status = FINISHED;
|
||||||
}
|
}
|
||||||
else if (i <= workers.num_workers) {
|
else if (i < workers.num_workers) {
|
||||||
/* Assign available worker to build an index. */
|
/* Assign available worker to build an index. */
|
||||||
index_jobs[i].status = INPROGRESS;
|
index_jobs[i].status = INPROGRESS;
|
||||||
index_jobs[i].worker_idx = i;
|
index_jobs[i].worker_idx = i;
|
||||||
@ -656,23 +671,67 @@ rebuild_indexes(const repack_table *table)
|
|||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
/* How many workers we kicked off earlier. */
|
||||||
|
num_active_workers = num_indexes > workers.num_workers ? workers.num_workers : num_indexes;
|
||||||
|
|
||||||
if (workers.num_workers > 1)
|
if (workers.num_workers > 1)
|
||||||
{
|
{
|
||||||
/* How many workers we kicked off earlier. */
|
int freed_worker = -1;
|
||||||
num_active_workers = num_indexes > workers.num_workers ? workers.num_workers : num_indexes;
|
int ret;
|
||||||
|
|
||||||
|
/* Prefer poll() over select(), following PostgreSQL custom. */
|
||||||
|
#ifdef HAVE_POLL
|
||||||
|
struct pollfd *input_fds;
|
||||||
|
|
||||||
|
input_fds = pgut_malloc(sizeof(struct pollfd) * num_active_workers);
|
||||||
|
for (i = 0; i < num_active_workers; i++)
|
||||||
|
{
|
||||||
|
input_fds[i].fd = PQsocket(workers.conns[i]);
|
||||||
|
input_fds[i].events = POLLIN | POLLERR;
|
||||||
|
input_fds[i].revents = 0;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
fd_set input_mask;
|
||||||
|
struct timeval timeout;
|
||||||
|
/* select() needs the highest-numbered socket descriptor */
|
||||||
|
int max_fd = 0;
|
||||||
|
|
||||||
|
FD_ZERO(&input_mask);
|
||||||
|
for (i = 0; i < num_active_workers; i++)
|
||||||
|
{
|
||||||
|
FD_SET(PQsocket(workers.conns[i]), &input_mask);
|
||||||
|
if (PQsocket(workers.conns[i]) > max_fd)
|
||||||
|
max_fd = PQsocket(workers.conns[i]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Now go through our index builds, and look for any which is
|
/* Now go through our index builds, and look for any which is
|
||||||
* reported complete. Reassign that worker to the next index to
|
* reported complete. Reassign that worker to the next index to
|
||||||
* be built, if any.
|
* be built, if any.
|
||||||
*/
|
*/
|
||||||
while (num_active_workers)
|
while (num_active_workers > 0)
|
||||||
{
|
{
|
||||||
int freed_worker = -1;
|
elog(DEBUG2, "polling %d active workers", num_active_workers);
|
||||||
|
|
||||||
|
#ifdef HAVE_POLL
|
||||||
|
ret = poll(input_fds, num_active_workers, POLL_TIMEOUT * 1000);
|
||||||
|
#else
|
||||||
|
/* re-initialize timeout before each invocation of select()
|
||||||
|
* just in case select() modifies timeout to indicate remaining
|
||||||
|
* time.
|
||||||
|
*/
|
||||||
|
timeout.tv_sec = POLL_TIMEOUT;
|
||||||
|
timeout.tv_usec = 0;
|
||||||
|
ret = select(max_fd + 1, &input_mask, NULL, NULL, &timeout);
|
||||||
|
#endif
|
||||||
|
if (ret < 0 && errno != EINTR)
|
||||||
|
elog(ERROR, "poll() failed: %d, %d", ret, errno);
|
||||||
|
|
||||||
for (i = 0; i < num_indexes; i++)
|
for (i = 0; i < num_indexes; i++)
|
||||||
{
|
{
|
||||||
if (index_jobs[i].status == INPROGRESS)
|
if (index_jobs[i].status == INPROGRESS)
|
||||||
{
|
{
|
||||||
|
Assert(index_jobs[i].worker_idx >= 0);
|
||||||
/* Must call PQconsumeInput before we can check PQisBusy */
|
/* Must call PQconsumeInput before we can check PQisBusy */
|
||||||
if (PQconsumeInput(workers.conns[index_jobs[i].worker_idx]) != 1)
|
if (PQconsumeInput(workers.conns[index_jobs[i].worker_idx]) != 1)
|
||||||
{
|
{
|
||||||
@ -699,7 +758,13 @@ rebuild_indexes(const repack_table *table)
|
|||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We are only going to re-queue one worker, even
|
||||||
|
* though more than one index build might be finished.
|
||||||
|
* Any other jobs which may be finished will
|
||||||
|
* just have to wait for the next pass through the
|
||||||
|
* poll()/select() loop.
|
||||||
|
*/
|
||||||
freed_worker = index_jobs[i].worker_idx;
|
freed_worker = index_jobs[i].worker_idx;
|
||||||
index_jobs[i].status = FINISHED;
|
index_jobs[i].status = FINISHED;
|
||||||
num_active_workers--;
|
num_active_workers--;
|
||||||
@ -733,7 +798,6 @@ rebuild_indexes(const repack_table *table)
|
|||||||
}
|
}
|
||||||
freed_worker = -1;
|
freed_worker = -1;
|
||||||
}
|
}
|
||||||
sleep(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -980,7 +1044,6 @@ repack_one_table(const repack_table *table, const char *orderby)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 4. Apply log to temp table until no tuples are left in the log
|
* 4. Apply log to temp table until no tuples are left in the log
|
||||||
* and all of the old transactions are finished.
|
* and all of the old transactions are finished.
|
||||||
|
@ -82,7 +82,11 @@ setup_workers(int num_workers)
|
|||||||
*/
|
*/
|
||||||
elog(DEBUG2, "Setting up worker conn %d", i);
|
elog(DEBUG2, "Setting up worker conn %d", i);
|
||||||
|
|
||||||
/* Don't confuse pgut_connections by using pgut_connect() */
|
/* Don't confuse pgut_connections by using pgut_connect()
|
||||||
|
*
|
||||||
|
* XXX: could use PQconnectStart() and PQconnectPoll() to
|
||||||
|
* open these connections in non-blocking manner.
|
||||||
|
*/
|
||||||
conn = PQconnectdb(buf.data);
|
conn = PQconnectdb(buf.data);
|
||||||
if (PQstatus(conn) == CONNECTION_OK)
|
if (PQstatus(conn) == CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@ -94,6 +98,15 @@ setup_workers(int num_workers)
|
|||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Make sure each worker connection can work in non-blocking
|
||||||
|
* mode.
|
||||||
|
*/
|
||||||
|
if (PQsetnonblocking(workers.conns[i], 1))
|
||||||
|
{
|
||||||
|
elog(ERROR, "Unable to set worker connection %d "
|
||||||
|
"non-blocking.", i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* In case we bailed out of setting up all workers, record
|
/* In case we bailed out of setting up all workers, record
|
||||||
* how many successful worker conns we actually have.
|
* how many successful worker conns we actually have.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user