diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 2f036015cc..e29ef152fd 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -10571,8 +10571,7 @@ SELECT xpath_exists('/my:a/text()', 'test The optional XMLNAMESPACES clause is a comma-separated list of namespaces. It specifies the XML namespaces used in - the document and their aliases. A default namespace specification - is not currently supported. + the document and their aliases. diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 1fb018416e..b60a3cfe0d 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -29,7 +29,7 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \ tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \ tsvector.o tsvector_op.o tsvector_parser.o \ txid.o uuid.o varbit.o varchar.o varlena.o version.o \ - windowfuncs.o xid.o xml.o + windowfuncs.o xid.o xml.o xpath_parser.o like.o: like.c like_match.c diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 24229c2dff..75f33cfc71 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -90,7 +90,7 @@ #include "utils/rel.h" #include "utils/syscache.h" #include "utils/xml.h" - +#include "utils/xpath_parser.h" /* GUC variables */ int xmlbinary; @@ -187,6 +187,7 @@ typedef struct XmlTableBuilderData xmlXPathCompExprPtr xpathcomp; xmlXPathObjectPtr xpathobj; xmlXPathCompExprPtr *xpathscomp; + bool with_default_ns; } XmlTableBuilderData; #endif @@ -4195,6 +4196,7 @@ XmlTableInitOpaque(TableFuncScanState *state, int natts) xtCxt->magic = XMLTABLE_CONTEXT_MAGIC; xtCxt->natts = natts; xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts); + xtCxt->with_default_ns = false; xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); @@ -4287,6 +4289,8 @@ XmlTableSetDocument(TableFuncScanState *state, Datum value) #endif /* not USE_LIBXML */ } +#define DEFAULT_NAMESPACE_NAME "pgdefnamespace.pgsqlxml.internal" + /* * XmlTableSetNamespace * Add a namespace declaration @@ -4297,12 +4301,24 @@ XmlTableSetNamespace(TableFuncScanState *state, char *name, char *uri) #ifdef USE_LIBXML XmlTableBuilderData *xtCxt; - if (name == NULL) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("DEFAULT namespace is not supported"))); xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace"); + if (name != NULL) + { + /* Don't allow same namespace as out internal default namespace name */ + if (strcmp(name, DEFAULT_NAMESPACE_NAME) == 0) + ereport(ERROR, + (errmsg("cannot to use \"%s\" as namespace name", + DEFAULT_NAMESPACE_NAME), + errdetail("\"%s\" is reserved for internal purpose", + DEFAULT_NAMESPACE_NAME))); + } + else + { + xtCxt->with_default_ns = true; + name = DEFAULT_NAMESPACE_NAME; + } + if (xmlXPathRegisterNs(xtCxt->xpathcxt, pg_xmlCharStrndup(name, strlen(name)), pg_xmlCharStrndup(uri, strlen(uri)))) @@ -4331,6 +4347,14 @@ XmlTableSetRowFilter(TableFuncScanState *state, char *path) (errcode(ERRCODE_DATA_EXCEPTION), errmsg("row path filter must not be empty string"))); + if (xtCxt->with_default_ns) + { + StringInfoData str; + + transformXPath(&str, path, DEFAULT_NAMESPACE_NAME); + path = str.data; + } + xstr = pg_xmlCharStrndup(path, strlen(path)); xtCxt->xpathcomp = xmlXPathCompile(xstr); @@ -4362,6 +4386,14 @@ XmlTableSetColumnFilter(TableFuncScanState *state, char *path, int colnum) (errcode(ERRCODE_DATA_EXCEPTION), errmsg("column path filter must not be empty string"))); + if (xtCxt->with_default_ns) + { + StringInfoData str; + + transformXPath(&str, path, DEFAULT_NAMESPACE_NAME); + path = str.data; + } + xstr = pg_xmlCharStrndup(path, strlen(path)); xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr); diff --git a/src/backend/utils/adt/xpath_parser.c b/src/backend/utils/adt/xpath_parser.c new file mode 100644 index 0000000000..ed5a071a0a --- /dev/null +++ b/src/backend/utils/adt/xpath_parser.c @@ -0,0 +1,328 @@ +/*------------------------------------------------------------------------- + * + * xpath_parser.c + * XML XPath parser. + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/backend/utils/adt/xpath_parser.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/xpath_parser.h" + +/* + * All PostgreSQL XML related functionality is based on libxml2 library, and + * XPath support is not an exception. However, libxml2 doesn't support + * default namespace for XPath expressions. Because there are not any API + * how to transform or access to parsed XPath expression we have to parse + * XPath here. + * + * Those functionalities are implemented with a simple XPath parser/ + * preprocessor. This XPath parser transforms a XPath expression to another + * XPath expression that can be used by libxml2 XPath evaluation. It doesn't + * replace libxml2 XPath parser or libxml2 XPath expression evaluation. + */ + +#ifdef USE_LIBXML + +/* + * We need to work with XPath expression tokens. When expression starting with + * nodename, then we can use prefix. When default namespace is defined, then we + * should to enhance any nodename and attribute without namespace by default + * namespace. + */ + +typedef enum +{ + XPATH_TOKEN_NONE, + XPATH_TOKEN_NAME, + XPATH_TOKEN_STRING, + XPATH_TOKEN_NUMBER, + XPATH_TOKEN_OTHER +} XPathTokenType; + +typedef struct XPathTokenInfo +{ + XPathTokenType ttype; + char *start; + int length; +} XPathTokenInfo; + +#define TOKEN_STACK_SIZE 10 + +typedef struct ParserData +{ + char *str; + char *cur; + XPathTokenInfo stack[TOKEN_STACK_SIZE]; + int stack_length; +} XPathParserData; + +/* Any high-bit-set character is OK (might be part of a multibyte char) */ +#define NODENAME_FIRSTCHAR(c) ((c) == '_' || (c) == '-' || \ + ((c) >= 'A' && (c) <= 'Z') || \ + ((c) >= 'a' && (c) <= 'z') || \ + (IS_HIGHBIT_SET(c))) + +#define IS_NODENAME_CHAR(c) (NODENAME_FIRSTCHAR(c) || (c) == '.' || \ + ((c) >= '0' && (c) <= '9')) + + +/* + * Returns next char after last char of token - XPath lexer + */ +static char * +getXPathToken(char *str, XPathTokenInfo * ti) +{ + /* skip initial spaces */ + while (*str == ' ') + str++; + + if (*str != '\0') + { + char c = *str; + + ti->start = str++; + + if (c >= '0' && c <= '9') + { + while (*str >= '0' && *str <= '9') + str++; + if (*str == '.') + { + str++; + while (*str >= '0' && *str <= '9') + str++; + } + ti->ttype = XPATH_TOKEN_NUMBER; + } + else if (NODENAME_FIRSTCHAR(c)) + { + while (IS_NODENAME_CHAR(*str)) + str++; + + ti->ttype = XPATH_TOKEN_NAME; + } + else if (c == '"') + { + while (*str != '\0') + if (*str++ == '"') + break; + + ti->ttype = XPATH_TOKEN_STRING; + } + else + ti->ttype = XPATH_TOKEN_OTHER; + + ti->length = str - ti->start; + } + else + { + ti->start = NULL; + ti->length = 0; + + ti->ttype = XPATH_TOKEN_NONE; + } + + return str; +} + +/* + * reset XPath parser stack + */ +static void +initXPathParser(XPathParserData * parser, char *str) +{ + parser->str = str; + parser->cur = str; + parser->stack_length = 0; +} + +/* + * Returns token from stack or read token + */ +static void +nextXPathToken(XPathParserData * parser, XPathTokenInfo * ti) +{ + if (parser->stack_length > 0) + memcpy(ti, &parser->stack[--parser->stack_length], + sizeof(XPathTokenInfo)); + else + parser->cur = getXPathToken(parser->cur, ti); +} + +/* + * Push token to stack + */ +static void +pushXPathToken(XPathParserData * parser, XPathTokenInfo * ti) +{ + if (parser->stack_length == TOKEN_STACK_SIZE) + elog(ERROR, "internal error"); + memcpy(&parser->stack[parser->stack_length++], ti, + sizeof(XPathTokenInfo)); +} + +/* + * Write token to output string + */ +static void +writeXPathToken(StringInfo str, XPathTokenInfo * ti) +{ + Assert(ti->ttype != XPATH_TOKEN_NONE); + + if (ti->ttype != XPATH_TOKEN_OTHER) + appendBinaryStringInfo(str, ti->start, ti->length); + else + appendStringInfoChar(str, *ti->start); +} + +/* + * This is main part of XPath transformation. It can be called recursivly, + * when XPath expression contains predicates. + */ +static void +_transformXPath(StringInfo str, XPathParserData * parser, + bool inside_predicate, + char *def_namespace_name) +{ + XPathTokenInfo t1, + t2; + bool last_token_is_name = false; + + nextXPathToken(parser, &t1); + + while (t1.ttype != XPATH_TOKEN_NONE) + { + switch (t1.ttype) + { + case XPATH_TOKEN_NUMBER: + case XPATH_TOKEN_STRING: + last_token_is_name = false; + writeXPathToken(str, &t1); + nextXPathToken(parser, &t1); + break; + + case XPATH_TOKEN_NAME: + { + bool is_qual_name = false; + + /* inside predicate ignore keywords "and" "or" */ + if (inside_predicate) + { + if ((strncmp(t1.start, "and", 3) == 0 && t1.length == 3) || + (strncmp(t1.start, "or", 2) == 0 && t1.length == 2)) + { + writeXPathToken(str, &t1); + nextXPathToken(parser, &t1); + break; + } + } + + last_token_is_name = true; + nextXPathToken(parser, &t2); + if (t2.ttype == XPATH_TOKEN_OTHER) + { + if (*t2.start == '(') + last_token_is_name = false; + else if (*t2.start == ':') + is_qual_name = true; + } + + if (last_token_is_name && !is_qual_name && def_namespace_name != NULL) + appendStringInfo(str, "%s:", def_namespace_name); + + writeXPathToken(str, &t1); + + if (is_qual_name) + { + writeXPathToken(str, &t2); + nextXPathToken(parser, &t1); + if (t1.ttype == XPATH_TOKEN_NAME) + writeXPathToken(str, &t1); + else + pushXPathToken(parser, &t1); + } + else + pushXPathToken(parser, &t2); + + nextXPathToken(parser, &t1); + } + break; + + case XPATH_TOKEN_OTHER: + { + char c = *t1.start; + + writeXPathToken(str, &t1); + + if (c == '[') + _transformXPath(str, parser, true, def_namespace_name); + else + { + last_token_is_name = false; + + if (c == ']' && inside_predicate) + return; + + else if (c == '@') + { + nextXPathToken(parser, &t1); + if (t1.ttype == XPATH_TOKEN_NAME) + { + bool is_qual_name = false; + + /* + * A default namespace declaration applies to all + * unprefixed element names within its scope. Default + * namespace declarations do not apply directly to + * attribute names; the interpretation of unprefixed + * attributes is determined by the element on which + * they appear. + */ + nextXPathToken(parser, &t2); + if (t2.ttype == XPATH_TOKEN_OTHER && *t2.start == ':') + is_qual_name = true; + + writeXPathToken(str, &t1); + if (is_qual_name) + { + writeXPathToken(str, &t2); + nextXPathToken(parser, &t1); + if (t1.ttype == XPATH_TOKEN_NAME) + writeXPathToken(str, &t1); + else + pushXPathToken(parser, &t1); + } + else + pushXPathToken(parser, &t2); + } + else + pushXPathToken(parser, &t1); + } + } + nextXPathToken(parser, &t1); + } + break; + + case XPATH_TOKEN_NONE: + elog(ERROR, "should not be here"); + } + } +} + +void +transformXPath(StringInfo str, char *xpath, + char *def_namespace_name) +{ + XPathParserData parser; + + initStringInfo(str); + initXPathParser(&parser, xpath); + _transformXPath(str, &parser, false, def_namespace_name); +} + +#endif diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index bcc585d427..b069286423 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -1085,7 +1085,11 @@ SELECT * FROM XMLTABLE(XMLNAMESPACES(DEFAULT 'http://x.y'), '/rows/row' PASSING '10' COLUMNS a int PATH 'a'); -ERROR: DEFAULT namespace is not supported + a +---- + 10 +(1 row) + -- used in prepare statements PREPARE pp AS SELECT xmltable.* @@ -1452,3 +1456,22 @@ SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c 14 (4 rows) +-- default namespaces +CREATE TABLE t1 (id int, doc xml); +INSERT INTO t1 VALUES (5, '50'); +SELECT x.* FROM t1, xmltable(XMLNAMESPACES('http://x.y' AS x), '/x:rows/x:row' PASSING t1.doc COLUMNS data int PATH 'x:a[1][@hoge]') AS x; + data +------ + 50 +(1 row) + +SELECT x.* FROM t1, xmltable(XMLNAMESPACES(DEFAULT 'http://x.y'), '/rows/row' PASSING t1.doc COLUMNS data int PATH 'a[1][@hoge]') AS x; + data +------ + 50 +(1 row) + +-- should fail +SELECT x.* FROM t1, xmltable(XMLNAMESPACES('http://x.y' AS "pgdefnamespace.pgsqlxml.internal"), '/x:rows/x:row' PASSING t1.doc COLUMNS data int PATH 'x:a[1][@hoge]') AS x; +ERROR: cannot to use "pgdefnamespace.pgsqlxml.internal" as namespace name +DETAIL: "pgdefnamespace.pgsqlxml.internal" is reserved for internal purpose diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index eb4687fb09..b18d1b5eab 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -558,3 +558,13 @@ INSERT INTO xmltest2 VALUES('2', 'D'); SELECT xmltable.* FROM xmltest2, LATERAL xmltable('/d/r' PASSING x COLUMNS a int PATH '' || lower(_path) || 'c'); SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c') PASSING x COLUMNS a int PATH '.'); SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c') PASSING x COLUMNS a int PATH 'x' DEFAULT ascii(_path) - 54); + +-- default namespaces +CREATE TABLE t1 (id int, doc xml); +INSERT INTO t1 VALUES (5, '50'); + +SELECT x.* FROM t1, xmltable(XMLNAMESPACES('http://x.y' AS x), '/x:rows/x:row' PASSING t1.doc COLUMNS data int PATH 'x:a[1][@hoge]') AS x; +SELECT x.* FROM t1, xmltable(XMLNAMESPACES(DEFAULT 'http://x.y'), '/rows/row' PASSING t1.doc COLUMNS data int PATH 'a[1][@hoge]') AS x; + +-- should fail +SELECT x.* FROM t1, xmltable(XMLNAMESPACES('http://x.y' AS "pgdefnamespace.pgsqlxml.internal"), '/x:rows/x:row' PASSING t1.doc COLUMNS data int PATH 'x:a[1][@hoge]') AS x;