diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index f22e3da..835061d 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -792,7 +792,7 @@ host=localhost port=5432 dbname=mydb connect_timeout=10 The general form for a connection URI is: -postgresql://[user[:password]@][netloc][:port][/dbname][?param1=value1&...] +postgresql://[user[:password]@][netloc][:port][,netloc[:port]...][/dbname][?param1=value1&...] @@ -809,6 +809,7 @@ postgresql://localhost/mydb postgresql://user@localhost postgresql://user:secret@localhost postgresql://other@localhost/otherdb?connect_timeout=10&application_name=myapp +postgresql://node1,node2:5433,node3:4432,node4/mydb?hostorder=random&readonly=1 Components of the hierarchical part of the URI can also be given as parameters. For example: @@ -831,7 +832,9 @@ postgresql:///mydb?host=localhost&port=5433 For improved compatibility with JDBC connection URIs, instances of parameter ssl=true are translated into - sslmode=require. + sslmode=require and + loadBalanceHosts=true into + hostorder=random. @@ -841,6 +844,10 @@ postgresql:///mydb?host=localhost&port=5433 postgresql://[2001:db8::1234]/database + + There can be serveral host specifications, optionally accompanied + with port, separated by comma. + The host component is interpreted as described for the parameter PostgreSQL was built). On machines without Unix-domain sockets, the default is to connect to localhost. + + There can be more than one host parameter in + the connect string. In this case these hosts would be considered + alternate entries into same database and if connect to first one + fails, library would try to connect second etc. This can be used + for high availability cluster or for load balancing. See + parameter. + + + Network host name can be accompanied with port number, separated by + colon. If so, this port number is used only when connected to + this host. If there is no port number, port specified in the + parameter would be used. + @@ -942,8 +963,44 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname - + + + hostorder + + + Specifies how to choose host from list of alternate hosts, + specified in the parameter. + + + If value of this argument is sequential (the + default) library connects to the hosts in order they specified, + and tries to connect second one only if connection to the first + fails. + + + If value is random host to connect is randomly + picked from the list. It allows to balance load between several + cluster nodes. However, currently PostgreSQL doesn't support + multimaster clusters. So, without use of third-party products, + only read-only connections can take advantage from the + load-balancing. See + + + + + readonly + + + If this parameter is 0 (the default), upon successful connection + library checks if host is in recovery state, and if it is so, + tries next host in the connect string. If this parameter is + non-zero, connection to warm standby nodes are considered + successful. + + + + port @@ -985,7 +1042,6 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname - connect_timeout @@ -996,7 +1052,27 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname - + + falover_timeout + + + Maximum time to cycilically retry all the hosts in commect string. + (as decimal integer number of seconds). If not specified, then + hosts are tried just once. + + + If we have replicating cluster, and master node fails, it might + take some time to promote one of standby nodes to the new master. + So clients which notice that connect to the master fails, can + already give up attempt to reestablish a connection when new master + became available. + + + Setting this parameter to reasonable time makes library try to + reconnect all the host in cyclically until new master appears. + + + client_encoding @@ -7212,6 +7288,18 @@ user=admin An example file is provided at share/pg_service.conf.sample. + + If more than one host option present in the section of service file, it + is interpeted as alternate servers for failover or load-balancing. See + option in the connect string. + + + For all other options first value takes precedence over later ones. + + + Options, specified in the connect string along with service option + have precedence over values from the service file. + diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 76b61bd..b8a9cf5 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -300,6 +300,18 @@ static const internalPQconninfoOption PQconninfoOptions[] = { "Replication", "D", 5, offsetof(struct pg_conn, replication)}, + {"hostorder", NULL, "sequential", NULL, + "Host order", "", 10, + offsetof(struct pg_conn, hostorder)}, + + {"readonly", NULL, "0", NULL, + "Read only", "", 1, /* should be just '0' or '1' */ + offsetof(struct pg_conn, read_only)}, + + {"failover_timeout", NULL, NULL, NULL, + "Failover Timeout", "", 10, /* strlen(INT32_MAX) == 10 */ + offsetof(struct pg_conn, failover_timeout)}, + /* Terminating entry --- MUST BE LAST */ {NULL, NULL, NULL, NULL, NULL, NULL, 0} @@ -336,6 +348,8 @@ static PGconn *makeEmptyPGconn(void); static bool fillPGconn(PGconn *conn, PQconninfoOption *connOptions); static void freePGconn(PGconn *conn); static void closePGconn(PGconn *conn); +static void pqTerminateConn(PGconn *conn); +static void freenodes(PGconn * conn); static PQconninfoOption *conninfo_init(PQExpBuffer errorMessage); static PQconninfoOption *parse_connection_string(const char *conninfo, PQExpBuffer errorMessage, bool use_defaults); @@ -380,6 +394,7 @@ static bool getPgPassFilename(char *pgpassfile); static void dot_pg_pass_warning(PGconn *conn); static void default_threadlock(int acquire); +static int try_next_address(PGconn *conn); /* global variable because fe-auth.c needs to access it */ pgthreadlock_t pg_g_threadlock = default_threadlock; @@ -1394,7 +1409,7 @@ connectDBStart(PGconn *conn) char portstr[MAXPGPATH]; struct addrinfo *addrs = NULL; struct addrinfo hint; - const char *node; + struct nodeinfo *node; int ret; if (!conn) @@ -1436,21 +1451,102 @@ connectDBStart(PGconn *conn) if (conn->pghostaddr != NULL && conn->pghostaddr[0] != '\0') { /* Using pghostaddr avoids a hostname lookup */ - node = conn->pghostaddr; + conn->nodes = calloc(sizeof(struct nodeinfo), 2); + conn->nodes->host = strdup(conn->pghostaddr); + conn->nodes->port = strdup(portstr); hint.ai_family = AF_UNSPEC; hint.ai_flags = AI_NUMERICHOST; } else if (conn->pghost != NULL && conn->pghost[0] != '\0') { /* Using pghost, so we have to look-up the hostname */ - node = conn->pghost; + char *p = conn->pghost; + char *q; + char *r; + int nodecount = 0, + nodesallocated = 4; + /* + * Parse comma-separated list of host-port pairs into function-local + * array of records. + */ + conn->nodes = malloc(sizeof(struct nodeinfo) * 4); + while (*p) + { + q = p; + r = NULL; + + /* Scan for the comma or end of string */ + while (*q != ',' && *q != 0) + { + if (*q == ':') + r = q; + /* + * If there is IPv6, colons before close bracket are part of + * address. + */ + if (*q == ']') + r = NULL; + q++; + } + + if (r) + { + /* Host has explicitely specified port */ + conn->nodes[nodecount].port = malloc(q - r ); + strncpy(conn->nodes[nodecount].port, r + 1 , q - r); + conn->nodes[nodecount].port[q - r - 1] = 0; + } + else + { + r = q; + conn->nodes[nodecount].port = NULL; + } + + /* + * If port is not given we use portstr. Save it now will be useful + * for PQport(). + */ + if (!conn->nodes[nodecount].port) + conn->nodes[nodecount].port = strdup(portstr); + else if (conn->nodes[nodecount].port[0] == '\0') + { + free(conn->nodes[nodecount].port); + conn->nodes[nodecount].port = strdup(portstr); + } + + if ((*p) == '[' && *(r - 1) == ']') + { + /* IPv6 address found. Strip brackets */ + p++; + r--; + } + + /* Fill node record */ + conn->nodes[nodecount].host = malloc(r - p + 1); + strncpy(conn->nodes[nodecount].host, p, r - p); + conn->nodes[nodecount].host[r - p] = 0; + + if (*q) /* skip a comma. */ + q++; + nodecount++; + if (nodecount == nodesallocated) + conn->nodes + = realloc(conn->nodes, + sizeof(struct nodeinfo) * (nodesallocated += 4)); + p = q; + } + + /* Fill end-of-host list marker */ + conn->nodes[nodecount].host = NULL; + conn->nodes[nodecount].port = NULL; + conn->nodes[nodecount].this_addr = NULL; hint.ai_family = AF_UNSPEC; } else { #ifdef HAVE_UNIX_SOCKETS /* pghostaddr and pghost are NULL, so use Unix domain socket */ - node = NULL; + conn->nodes = calloc(sizeof(struct nodeinfo), 2); hint.ai_family = AF_UNIX; UNIXSOCK_PATH(portstr, portnum, conn->pgunixsocket); if (strlen(portstr) >= UNIXSOCK_PATH_BUFLEN) @@ -1460,29 +1556,89 @@ connectDBStart(PGconn *conn) portstr, (int) (UNIXSOCK_PATH_BUFLEN - 1)); conn->options_valid = false; + free(conn->nodes); goto connect_errReturn; } #else /* Without Unix sockets, default to localhost instead */ - node = DefaultHost; + conn->nodes = calloc(sizeof(struct nodeinfo), 2); + conn->nodes->host = strdup(DefaultHost); hint.ai_family = AF_UNSPEC; #endif /* HAVE_UNIX_SOCKETS */ + conn->nodes->port = strdup(portstr); } - /* Use pg_getaddrinfo_all() to resolve the address */ - ret = pg_getaddrinfo_all(node, portstr, &hint, &addrs); - if (ret || !addrs) + /* + * Use pg_getaddrinfo_all() to resolve each of the the address in node + * list. + */ + for (node = conn->nodes; node->host != NULL || node->port != NULL; node++) { - if (node) - appendPQExpBuffer(&conn->errorMessage, - libpq_gettext("could not translate host name \"%s\" to address: %s\n"), - node, gai_strerror(ret)); + struct addrinfo *this_node_addrs; + struct addrinfo *curr_addr; + + ret = pg_getaddrinfo_all(node->host, node->port, &hint, + &this_node_addrs); + if (ret || !this_node_addrs) + { + if (node->host) + appendPQExpBuffer(&conn->errorMessage, + libpq_gettext("could not translate host name \"%s\" to address: %s\n"), + node->host, gai_strerror(ret)); + else + appendPQExpBuffer(&conn->errorMessage, + libpq_gettext("could not translate Unix-domain socket path \"%s\" to address: %s\n"), + node->port, gai_strerror(ret)); + if (this_node_addrs) + pg_freeaddrinfo_all(hint.ai_family, this_node_addrs); + + node->this_addr = NULL; + continue; + } + + /* + * Save all pointers to addrinfo's in an array, which will be used + * later to map conn->addr_cur to its corresponding node info. + */ + node->num_this_addr = 0; + curr_addr = this_node_addrs; + while (curr_addr) + { + node->num_this_addr++; + curr_addr = curr_addr->ai_next; + } + + if (node->num_this_addr) + { + int i; + node->this_addr + = malloc(sizeof(struct addrinfo *) * node->num_this_addr); + curr_addr = this_node_addrs; + for (i = 0; i < node->num_this_addr; i++) + { + node->this_addr[i] = curr_addr; + curr_addr = curr_addr->ai_next; + } + } + + /* Add this host addrs to addrs field of PGconn structure. */ + if (!addrs) + { + addrs = this_node_addrs; + } else - appendPQExpBuffer(&conn->errorMessage, - libpq_gettext("could not translate Unix-domain socket path \"%s\" to address: %s\n"), - portstr, gai_strerror(ret)); - if (addrs) - pg_freeaddrinfo_all(hint.ai_family, addrs); + { + struct addrinfo *p; + + for (p = addrs; p->ai_next != NULL; p = p->ai_next); + p->ai_next = this_node_addrs; + } + } + + /* Check if we've found at least one usable address. */ + if (!addrs) + { + freenodes(conn); conn->options_valid = false; goto connect_errReturn; } @@ -1499,11 +1655,26 @@ connectDBStart(PGconn *conn) * Set up to try to connect, with protocol 3.0 as the first attempt. */ conn->addrlist = addrs; - conn->addr_cur = addrs; + + /* + * We cannot just assign first addrs record to addr_cur, because host + * order may be random. So, use try_next_address + */ + conn->addr_cur = NULL; + try_next_address(conn); conn->addrlist_family = hint.ai_family; conn->pversion = PG_PROTOCOL(3, 0); conn->send_appname = true; conn->status = CONNECTION_NEEDED; + if (conn->failover_timeout) + { + conn->failover_finish_time = time(NULL) + + atoi(conn->failover_timeout); + } + else + { + conn->failover_finish_time = (time_t)0; /* it is in past, so its ok */ + } /* * The code for processing CONNECTION_NEEDED state is in PQconnectPoll(), @@ -1603,6 +1774,159 @@ connectDBComplete(PGconn *conn) } } +/* + * Gets address of pointer to the list of addrinfo sturctures. + * If order is random, rearranges the list by moving random element to the + * beginning (and putting its addres into given pointer. Returns address of + * first list element. + */ +static struct addrinfo *get_next_element(struct addrinfo **list, char *order) +{ + struct addrinfo *choice = NULL, *prev, *current, *prechoice = NULL; + int count = 0; + + if (*list == NULL) + return NULL; + if (strcmp(order,"random") == 0) + { + /* Peek random element from the list. */ + for (current = *list,prev = NULL; current != NULL; + prev=current, current=current->ai_next) + { + count++; + if ((rand()&0xffff) < 0x10000 / count) + { + choice = current; + prechoice = prev; + } + } + + /* + * If prechoice is not NULL, selected element is not first in the list. + * We have to move it to he head. + */ + if (prechoice != NULL) + { + prechoice->ai_next=choice->ai_next; + choice->ai_next=*list; + *list=choice; + } + } + + /* We always return first elemet of the list */ + return *list; +} + +/* ------------- + * set_host_and_port + * Set conn->pghost and conn->pgport which are needed for PQhost + * and PQport calls. + */ +static void +set_host_and_port(PGconn *conn) +{ + bool found_node = false; + struct nodeinfo *node; + + if (!conn->nodes) + return; + + for (node = conn->nodes; + (node->host != NULL || node->port != NULL) && !found_node; node++) + { + /* + * For each node check whether it corresponds to same addr_curr. + * And then set connected_node details appropriately. + */ + struct addrinfo **addrs = node->this_addr; + int i = 0; + while (i < node->num_this_addr) + { + if (addrs[i] == conn->addr_cur) + { + found_node = true; + + if (conn->connected_node.port) + free(conn->connected_node.port); + if (node->port) + conn->connected_node.port = strdup(node->port); + else + conn->connected_node.port = NULL; + + if (conn->connected_node.host) + free(conn->connected_node.host); + if (node->host) + conn->connected_node.host = strdup(node->host); + else + conn->connected_node.host = NULL; + break; + } + + i++; + } + } +} + + +/* ------------- + * try_next_address + * Attempts to set next address from the list of known ones. + * Returns 1 if address is choosen and 0 if there are no more addresses + * to try + * Takes into account hostorder parameter + * ------------ + */ +static int +try_next_address(PGconn *conn) +{ + if (strcmp(conn->hostorder,"random")==0) { + /* Initialize random number generator in case if nobody have + * done it before. Use value from rand along with time in case + * random number have been initialized by application. + * Use address of conn structure to load-balance different + * connections in the same app + */ + srand((unsigned int)((long int)conn ^ (long int) time(NULL) ^ + (long int)rand())); + } + if (conn->addr_cur == NULL) + { + + conn->addr_cur = get_next_element(&(conn->addrlist), + conn->hostorder); + + if (conn->addr_cur) + set_host_and_port(conn); + return 1; + } + else + { + conn->addr_cur = get_next_element(&(conn->addr_cur->ai_next), + conn->hostorder); + } + + if (conn->addr_cur == NULL && time(NULL) < conn->failover_finish_time) { + + /* + * If failover timeout is set, retry list of hosts from + * the beginning + */ + pg_usleep(1000000); + conn->addr_cur = get_next_element(&(conn->addrlist), + conn->hostorder); + } + + if (conn->addr_cur != NULL) + { + /* Clean up error message buffer. */ + set_host_and_port(conn); + resetPQExpBuffer(&conn->errorMessage); + return 1; + } + else + return 0; +} + /* ---------------- * PQconnectPoll * @@ -1680,7 +2004,8 @@ PQconnectPoll(PGconn *conn) case CONNECTION_SSL_STARTUP: case CONNECTION_NEEDED: break; - + case CONNECTION_CHECK_RW: + break; default: appendPQExpBufferStr(&conn->errorMessage, libpq_gettext( @@ -1718,9 +2043,8 @@ keep_going: /* We will come back to here until there is * ignore socket() failure if we have more addresses * to try */ - if (addr_cur->ai_next != NULL) + if (try_next_address(conn)) { - conn->addr_cur = addr_cur->ai_next; continue; } appendPQExpBuffer(&conn->errorMessage, @@ -1739,7 +2063,7 @@ keep_going: /* We will come back to here until there is if (!connectNoDelay(conn)) { pqDropConnection(conn, true); - conn->addr_cur = addr_cur->ai_next; + try_next_address(conn); continue; } } @@ -1749,7 +2073,7 @@ keep_going: /* We will come back to here until there is libpq_gettext("could not set socket to nonblocking mode: %s\n"), SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf))); pqDropConnection(conn, true); - conn->addr_cur = addr_cur->ai_next; + try_next_address(conn); continue; } @@ -1760,7 +2084,7 @@ keep_going: /* We will come back to here until there is libpq_gettext("could not set socket to close-on-exec mode: %s\n"), SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf))); pqDropConnection(conn, true); - conn->addr_cur = addr_cur->ai_next; + try_next_address(conn); continue; } #endif /* F_SETFD */ @@ -1807,7 +2131,7 @@ keep_going: /* We will come back to here until there is if (err) { pqDropConnection(conn, true); - conn->addr_cur = addr_cur->ai_next; + try_next_address(conn); continue; } } @@ -1898,7 +2222,7 @@ keep_going: /* We will come back to here until there is /* * Try the next address, if any. */ - conn->addr_cur = addr_cur->ai_next; + try_next_address(conn); } /* loop over addresses */ /* @@ -1944,9 +2268,8 @@ keep_going: /* We will come back to here until there is * If more addresses remain, keep trying, just as in the * case where connect() returned failure immediately. */ - if (conn->addr_cur->ai_next != NULL) + if (try_next_address(conn)) { - conn->addr_cur = conn->addr_cur->ai_next; conn->status = CONNECTION_NEEDED; goto keep_going; } @@ -2596,14 +2919,22 @@ keep_going: /* We will come back to here until there is conn->errorMessage.data[conn->errorMessage.len - 1] != '\n') appendPQExpBufferChar(&conn->errorMessage, '\n'); PQclear(res); + + /* + * If we have more than one host in the connect string, + * FATAL message from one of them is not really FATAL. + */ + if (try_next_address(conn)) + { + /* Must drop the old connection. */ + pqDropConnection(conn, true); + conn->status = CONNECTION_NEEDED; + goto keep_going; + } + goto error_return; } - /* We can release the address list now. */ - pg_freeaddrinfo_all(conn->addrlist_family, conn->addrlist); - conn->addrlist = NULL; - conn->addr_cur = NULL; - /* Fire up post-connection housekeeping if needed */ if (PG_PROTOCOL_MAJOR(conn->pversion) < 3) { @@ -2614,8 +2945,9 @@ keep_going: /* We will come back to here until there is } /* Otherwise, we are open for business! */ - conn->status = CONNECTION_OK; - return PGRES_POLLING_OK; + conn->status = CONNECTION_CHECK_RO; + goto keep_going; + } case CONNECTION_SETENV: @@ -2645,9 +2977,121 @@ keep_going: /* We will come back to here until there is goto error_return; } - /* We are open for business! */ + /* + * Move the state whether we need a read-only or read-write + * connection. And if that can be satisfied. + */ + conn->status = CONNECTION_CHECK_RO; + goto keep_going; + + case CONNECTION_CHECK_RO: + + /* + * consult connection options and check if RO connection is OK + * RO connection is OK if readonly connection is explicitely + * requested or if replication option is set. + */ + if ((conn->read_only && conn->read_only[0] > '0') + ||(conn->replication && conn->replication[0]) + ) + { + /* We can release the nodes and address list now. */ + freenodes(conn); + pg_freeaddrinfo_all(conn->addrlist_family, conn->addrlist); + conn->addrlist = NULL; + conn->addr_cur = NULL; + + conn->status = CONNECTION_OK; + return PGRES_POLLING_OK; + } + + /* + * Ok we need a read-write connection. We can find whether the + * server accept such a connection by its results for + * "SELECT pg_catalog.pg_is_in_recovery()" + */ conn->status = CONNECTION_OK; - return PGRES_POLLING_OK; + PQsendQuery(conn, "SELECT pg_catalog.pg_is_in_recovery()"); + conn->status = CONNECTION_CHECK_RW; + return PGRES_POLLING_READING; + + case CONNECTION_CHECK_RW: + { + char *value; + PGresult *res; + + conn->status = CONNECTION_OK; + if (!PQconsumeInput(conn)) + { + conn->status = CONNECTION_BAD; + return PGRES_POLLING_FAILED; + } + + if (PQisBusy(conn)) + { + /* Result is not ready yet */ + conn->status = CONNECTION_CHECK_RW; + return PGRES_POLLING_READING; + } + + res = PQgetResult(conn); + + /* + * Call PQgetResult second time to clear connection state. + * Should return NULL, so result is ignored. + */ + + PQgetResult(conn); + + if (!res || + (PQresultStatus(res) != PGRES_TUPLES_OK) || + PQntuples(res) != 1) + { + /* + * Something wrong happened with this host. skip to next + * one. + */ + conn->status = CONNECTION_NEEDED; + } + else + { + value = PQgetvalue(res, 0, 0); + if (value[0]=='t') + { + conn->status = CONNECTION_NEEDED; + } + } + + if (res) + PQclear(res); + if (conn->status != CONNECTION_OK) + { + ConnStatusType save_status = conn->status; + + conn->status = CONNECTION_OK; + pqTerminateConn(conn); + pqDropConnection(conn,true); + conn->sock = PGINVALID_SOCKET; + if (try_next_address(conn)) + { + conn->status = save_status; + goto keep_going; + } + else + { + conn->status = CONNECTION_BAD; + return PGRES_POLLING_FAILED; + } + + } + + /* We can release the nodes and address list now. */ + freenodes(conn); + pg_freeaddrinfo_all(conn->addrlist_family, conn->addrlist); + conn->addrlist = NULL; + conn->addr_cur = NULL; + return PGRES_POLLING_OK; + } default: appendPQExpBuffer(&conn->errorMessage, @@ -2937,20 +3381,43 @@ freePGconn(PGconn *conn) #endif } + /* - * closePGconn - * - properly close a connection to the backend - * - * This should reset or release all transient state, but NOT the connection - * parameters. On exit, the PGconn should be in condition to start a fresh - * connection with the same parameters (see PQreset()). + * freenodes + * - Free memory used to store all given hostname and service port. */ static void -closePGconn(PGconn *conn) +freenodes(PGconn *conn) { - PGnotify *notify; - pgParameterStatus *pstatus; + struct nodeinfo *node; + + if (!conn->nodes) + return; + + for (node = conn->nodes; + (node->host != NULL || node->port != NULL); node++) + { + if (node->host) + free(node->host); + if (node->port) + free(node->port); + if (node->this_addr) + free(node->this_addr); + } + free(conn->nodes); + conn->nodes = NULL; +} + + +/* + * pqTerminateConn + * - send terminate message to the backend, but do not free any transient + * state of PGconn object, which can be needed to reconnect. + */ +static void +pqTerminateConn(PGconn *conn) +{ /* * Note that the protocol doesn't allow us to send Terminate messages * during the startup phase. @@ -2958,13 +3425,30 @@ closePGconn(PGconn *conn) if (conn->sock != PGINVALID_SOCKET && conn->status == CONNECTION_OK) { /* - * Try to send "close connection" message to backend. Ignore any - * error. + * Try to send "close connection" message to backend. Ignore any error. */ pqPutMsgStart('X', false, conn); pqPutMsgEnd(conn); (void) pqFlush(conn); } +} + +/* + * closePGconn + * - properly close a connection to the backend + * + * This should reset or release all transient state, but NOT the connection + * parameters. On exit, the PGconn should be in condition to start a fresh + * connection with the same parameters (see PQreset()). + */ +static void +closePGconn(PGconn *conn) +{ + PGnotify *notify; + pgParameterStatus *pstatus; + + /* Send terminate request to backend */ + pqTerminateConn(conn); /* * Must reset the blocking status so a possible reconnect will work. @@ -2983,6 +3467,8 @@ closePGconn(PGconn *conn) conn->asyncStatus = PGASYNC_IDLE; pqClearAsyncResult(conn); /* deallocate result */ resetPQExpBuffer(&conn->errorMessage); + + freenodes(conn); pg_freeaddrinfo_all(conn->addrlist_family, conn->addrlist); conn->addrlist = NULL; conn->addr_cur = NULL; @@ -3969,6 +4455,9 @@ parseServiceFile(const char *serviceFile, { int linenr = 0, i; + + /* true if 'host' parameter is seen in service file. */ + bool hostflag = false; FILE *f; char buf[MAXBUFSIZE], *line; @@ -4088,7 +4577,33 @@ parseServiceFile(const char *serviceFile, if (strcmp(options[i].keyword, key) == 0) { if (options[i].val == NULL) + { options[i].val = strdup(val); + + /* + * Set flag that we get value of host option from + * this service file, so subsequent host lines + * should be appended to it, not ignored. + */ + if (!strcmp(key,"host")) + hostflag = true; + } + else if (!strcmp(key,"host") && hostflag) + { + /* + * Existing host value is from same service file, + * so append new one to it. + */ + char *old=options[i].val; + int oldlen=strlen(old); + + options[i].val=malloc(oldlen+1+strlen(val)+1); + strncpy(options[i].val,old,oldlen); + options[i].val[oldlen]=','; + strcpy(options[i].val+oldlen+1,val); + free(old); + } + if (!options[i].val) { printfPQExpBuffer(errorMessage, @@ -4809,86 +5324,129 @@ conninfo_uri_parse_options(PQconninfoOption *options, const char *uri, p = start; } - /* - * "p" has been incremented past optional URI credential information at - * this point and now points at the "netloc" part of the URI. - * - * Look for IPv6 address. - */ - if (*p == '[') + host = p; + if (*p == ':') { - host = ++p; - while (*p && *p != ']') - ++p; - if (!*p) - { - printfPQExpBuffer(errorMessage, - libpq_gettext("end of string reached when looking for matching \"]\" in IPv6 host address in URI: \"%s\"\n"), - uri); - goto cleanup; - } - if (p == host) - { - printfPQExpBuffer(errorMessage, - libpq_gettext("IPv6 host address may not be empty in URI: \"%s\"\n"), - uri); - goto cleanup; - } + int portnum; + char *portstr; - /* Cut off the bracket and advance */ *(p++) = '\0'; + portstr = p; + portnum = 0; - /* - * The address may be followed by a port specifier or a slash or a - * query. - */ - if (*p && *p != ':' && *p != '/' && *p != '?') + while (*p >= '0' && *p <= '9') + { + portnum = portnum * 10 + (*(p++) - '0'); + } + + if (portnum > 65535 || portnum <1) { printfPQExpBuffer(errorMessage, - libpq_gettext("unexpected character \"%c\" at position %d in URI (expected \":\" or \"/\"): \"%s\"\n"), - *p, (int) (p - buf + 1), uri); + libpq_gettext("invalid port number: \"%d\"\n"), + portnum); goto cleanup; } + + prevchar = *p; + *p = '\0'; + if (*portstr && + !conninfo_storeval(options, "port", portstr, + errorMessage, false, true)); } else { - /* not an IPv6 address: DNS-named or IPv4 netloc */ - host = p; + do + { + if (*p == ',') + p++; - /* - * Look for port specifier (colon) or end of host specifier (slash), - * or query (question mark). - */ - while (*p && *p != ':' && *p != '/' && *p != '?') - ++p; - } + /* + * "p" has been incremented past optional URI credential + * information at this point and now points at the "netloc" part of + * the URI. + * + * Look for IPv6 address. + */ + if (*p == '[') + { + char *ipv6start = ++p; - /* Save the hostname terminator before we null it */ - prevchar = *p; - *p = '\0'; + while (*p && *p != ']') + ++p; + if (!*p) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("end of string reached when looking for matching \"]\" in IPv6 host address in URI: \"%s\"\n"), + uri); + goto cleanup; + } - if (*host && - !conninfo_storeval(options, "host", host, - errorMessage, false, true)) - goto cleanup; + if (p == ipv6start) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("IPv6 host address may not be empty in URI: \"%s\"\n"), + uri); + goto cleanup; + } + p++; - if (prevchar == ':') - { - const char *port = ++p; /* advance past host terminator */ + /* + * The address may be followed by a port specifier, a comma or + * a slash or a query. + */ + if (*p && *p != ',' && *p != ':' && *p != '/' && *p != '?') + { + printfPQExpBuffer(errorMessage, + libpq_gettext("unexpected character \"%c\" at position %d in URI (expected \":\" or \"/\"): \"%s\"\n"), + *p, (int) (p - buf + 1), uri); + goto cleanup; + } - while (*p && *p != '/' && *p != '?') - ++p; + } + else + { + /* not an IPv6 address: DNS-named or IPv4 netloc */ + + /* + * Look for port specifier (colon) or end of host specifier + * (slash), or query (question mark). + */ + while (*p && *p != ',' && *p != ':' && *p != '/' && *p != '?') + ++p; + } + /* Skip port specifier */ + if (*p == ':') + { + int portnum; + + p++; + portnum = 0; + while (*p >= '0' && *p <= '9') + { + portnum = portnum * 10 + (*(p++) - '0'); + } + + if (portnum > 65535 || portnum <1) + { + printfPQExpBuffer(errorMessage, + libpq_gettext("invalid port number: \"%d\"\n"), + portnum); + goto cleanup; + } + } + } while (*p == ','); + + /* Save the hostname terminator before we null it */ prevchar = *p; *p = '\0'; - if (*port && - !conninfo_storeval(options, "port", port, - errorMessage, false, true)) + if (*host && + !conninfo_storeval(options, "host", host, + errorMessage, false, true)) goto cleanup; } - if (prevchar && prevchar != '?') { const char *dbname = ++p; /* advance past host terminator */ @@ -5019,6 +5577,17 @@ conninfo_uri_parse_params(char *params, value = "require"; } + if ((strcmp(keyword, "loadBalanceHosts") == 0 || + strcmp(keyword, "load_balance_hosts") == 0) && + strcmp(value, "true") == 0) + { + free(keyword); + free(value); + malloced = false; + keyword = "hostorder"; + value = "random"; + } + /* * Store the value if the corresponding option exists; ignore * otherwise. At this point both keyword and value are not @@ -5232,7 +5801,22 @@ conninfo_storeval(PQconninfoOption *connOptions, } if (option->val) + { + if (strcmp(option->keyword, "host") == 0) + { + /* Accumulate multiple hosts in the single string */ + int val_len = strlen(option->val), + new_len = strlen(value); + + free(value_copy); + value_copy = malloc(val_len + 1 + new_len + 1); + strncpy(value_copy, option->val, val_len + 1); + value_copy[val_len] = ','; + strncpy(value_copy + val_len + 1, value, new_len + 1); + } free(option->val); + + } option->val = value_copy; return option; @@ -5353,7 +5937,7 @@ PQhost(const PGconn *conn) if (!conn) return NULL; if (conn->pghost != NULL && conn->pghost[0] != '\0') - return conn->pghost; + return conn->connected_node.host; else { #ifdef HAVE_UNIX_SOCKETS @@ -5372,7 +5956,12 @@ PQport(const PGconn *conn) { if (!conn) return NULL; - return conn->pgport; +#ifdef HAVE_UNIX_SOCKETS + if (IS_AF_UNIX(conn->addrlist_family)) + return conn->pgport; + else +#endif + return conn->connected_node.port; } char * diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h index 9ca0756..23560f4 100644 --- a/src/interfaces/libpq/libpq-fe.h +++ b/src/interfaces/libpq/libpq-fe.h @@ -62,7 +62,11 @@ typedef enum * backend startup. */ CONNECTION_SETENV, /* Negotiating environment. */ CONNECTION_SSL_STARTUP, /* Negotiating SSL. */ - CONNECTION_NEEDED /* Internal state: connect() needed */ + CONNECTION_NEEDED, /* Internal state: connect() needed */ + CONNECTION_CHECK_RO, /* Internal state: need to check is RO + * connection acceptable */ + CONNECTION_CHECK_RW, /* Internal state: waiting that server replies + * if it is in recovery */ } ConnStatusType; typedef enum diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index 1183323..c68f976 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -292,6 +292,22 @@ typedef struct pgDataValue const char *value; /* data value, without zero-termination */ } PGdataValue; + +typedef struct nodeinfo +{ + char *host; + char *port; + + /* + * All of the addrinfos corresponding to this node are saved here. The + * function try_next_address rearranges the conn->addrlist at global level. + * And, after that reverse mapping of addrinfo to nodeinfo will not be + * possible if we store them as it is. + */ + struct addrinfo **this_addr; /* addresses of above host and port. */ + int num_this_addr; +} nodeinfo; + /* * PGconn stores all the state data associated with a single connection * to a backend. @@ -334,7 +350,11 @@ struct pg_conn #if defined(ENABLE_GSS) || defined(ENABLE_SSPI) char *krbsrvname; /* Kerberos service name */ #endif - + char *hostorder; /* How to handle multiple hosts */ + char *read_only; /* If true, we could work with readonly + * standby server */ + char *failover_timeout; /* If no usable server found, how long + * to wait before retry */ /* Optional file to write trace info to */ FILE *Pfdebug; @@ -378,10 +398,20 @@ struct pg_conn bool sigpipe_so; /* have we masked SIGPIPE via SO_NOSIGPIPE? */ bool sigpipe_flag; /* can we mask SIGPIPE via MSG_NOSIGNAL? */ + /* + * This variable stores information about host node to which client is + * connected. We cannot use pghost and pgport as we need them unmodified + * to do connection reset. + */ + nodeinfo connected_node; + /* Transient state needed while establishing connection */ struct addrinfo *addrlist; /* list of possible backend addresses */ struct addrinfo *addr_cur; /* the one currently being tried */ int addrlist_family; /* needed to know how to free addrlist */ + nodeinfo *nodes; /* needed for PQport, PQhost */ + time_t failover_finish_time; /* how long to retry host list + *waiting for new master to appear */ PGSetenvStatusType setenv_state; /* for 2.0 protocol only */ const PQEnvironmentOption *next_eo; bool send_appname; /* okay to send application_name? */ diff --git a/src/interfaces/libpq/test/expected.out b/src/interfaces/libpq/test/expected.out index d375e82..4832bdd 100644 --- a/src/interfaces/libpq/test/expected.out +++ b/src/interfaces/libpq/test/expected.out @@ -1,20 +1,20 @@ trying postgresql://uri-user:secret@host:12345/db -user='uri-user' password='secret' dbname='db' host='host' port='12345' (inet) +user='uri-user' password='secret' dbname='db' host='host:12345' (inet) trying postgresql://uri-user@host:12345/db -user='uri-user' dbname='db' host='host' port='12345' (inet) +user='uri-user' dbname='db' host='host:12345' (inet) trying postgresql://uri-user@host/db user='uri-user' dbname='db' host='host' (inet) trying postgresql://host:12345/db -dbname='db' host='host' port='12345' (inet) +dbname='db' host='host:12345' (inet) trying postgresql://host/db dbname='db' host='host' (inet) trying postgresql://uri-user@host:12345/ -user='uri-user' host='host' port='12345' (inet) +user='uri-user' host='host:12345' (inet) trying postgresql://uri-user@host/ user='uri-user' host='host' (inet) @@ -23,10 +23,10 @@ trying postgresql://uri-user@ user='uri-user' (local) trying postgresql://host:12345/ -host='host' port='12345' (inet) +host='host:12345' (inet) trying postgresql://host:12345 -host='host' port='12345' (inet) +host='host:12345' (inet) trying postgresql://host/db dbname='db' host='host' (inet) @@ -62,7 +62,7 @@ trying postgresql://host/db?u%7aer=someotheruser&port=12345 uri-regress: invalid URI query parameter: "uzer" trying postgresql://host:12345?user=uri-user -user='uri-user' host='host' port='12345' (inet) +user='uri-user' host='host:12345' (inet) trying postgresql://host?user=uri-user user='uri-user' host='host' (inet) @@ -71,19 +71,19 @@ trying postgresql://host? host='host' (inet) trying postgresql://[::1]:12345/db -dbname='db' host='::1' port='12345' (inet) +dbname='db' host='[::1]:12345' (inet) trying postgresql://[::1]/db -dbname='db' host='::1' (inet) +dbname='db' host='[::1]' (inet) trying postgresql://[2001:db8::1234]/ -host='2001:db8::1234' (inet) +host='[2001:db8::1234]' (inet) trying postgresql://[200z:db8::1234]/ -host='200z:db8::1234' (inet) +host='[200z:db8::1234]' (inet) trying postgresql://[::1] -host='::1' (inet) +host='[::1]' (inet) trying postgres:// (local) @@ -143,7 +143,7 @@ trying postgres://@host host='host' (inet) trying postgres://host:/ -host='host' (inet) +uri-regress: invalid port number: "0" trying postgres://:12345/ port='12345' (local)