Re: [PATCH] Re: Adding xpath_exists function - Mailing list pgsql-hackers
From | Mike Fowler |
---|---|
Subject | Re: [PATCH] Re: Adding xpath_exists function |
Date | |
Msg-id | 4C29CCE8.7070405@mlfowler.com Whole thread Raw |
In response to | [PATCH] Re: Adding xpath_exists function (Mike Fowler <mike@mlfowler.com>) |
Responses |
Review: Re: [PATCH] Re: Adding xpath_exists function
Re: [PATCH] Re: Adding xpath_exists function |
List | pgsql-hackers |
Mike Fowler wrote: > Bruce Momjian wrote: >> I have added this to the next commit-fest: >> >> https://commitfest.postgresql.org/action/commitfest_view?id=6 > Thanks Bruce. Attached is a revised patch which changes the code > slightly such that it uses an older version of the libxml library. > I've added comments to the code so that we remember why we didn't use > the latest function. After seeing some other posts in the last couple of days, I realised I hadn't documented the function in the SGML. I have now done so, and added a couple of tests with XML literals. Please find the patch attached. Now time to go correct the xmlexists patch too... -- Mike Fowler Registered Linux user: 379787 *** a/doc/src/sgml/func.sgml --- b/doc/src/sgml/func.sgml *************** *** 8626,8631 **** SELECT xpath('/my:a/text()', '<my:a xmlns:my="http://example.com">test</my:a>', --- 8626,8664 ---- (1 row) ]]></screen> </para> + + <sect3> + <title>xpath_exists</title> + + <indexterm> + <primary>xpath_exists</primary> + </indexterm> + + <synopsis> + <function>xpath_exists</function>(<replaceable>xpath</replaceable>, <replaceable>xml</replaceable><optional>, <replaceable>nsarray</replaceable></optional>) + </synopsis> + + <para> + The function <function>xpath_exists</function> is a specialised form + of the <function>xpath</function> function. Though the functions are + syntactically the same the xpath expressions are evaluated in differing + contexts. Instead of returning the XML values that satisfy the xpath, this + function returns a boolean indicating whether the query was satisfied or not. + </para> + + <para> + Example: + <screen><![CDATA[ + SELECT xpath_exists('/my:a/text()', '<my:a xmlns:my="http://example.com">test</my:a>', + ARRAY[ARRAY['my', 'http://example.com']]); + + xpath_exists + ------------ + t + (1 row) + ]]></screen> + </para> + </sect3> </sect2> <sect2 id="functions-xml-mapping"> *** a/src/backend/utils/adt/xml.c --- b/src/backend/utils/adt/xml.c *************** *** 3495,3497 **** xpath(PG_FUNCTION_ARGS) --- 3495,3681 ---- return 0; #endif } + + /* + * Determines if the node specified by the supplied XPath exists + * in a given XML document, returning a boolean. + * + * It is up to the user to ensure that the XML passed is in fact + * an XML document - XPath doesn't work easily on fragments without + * a context node being known. + */ + Datum + xpath_exists(PG_FUNCTION_ARGS) + { + #ifdef USE_LIBXML + text *xpath_expr_text = PG_GETARG_TEXT_P(0); + xmltype *data = PG_GETARG_XML_P(1); + ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2); + ArrayBuildState *astate = NULL; + xmlParserCtxtPtr ctxt = NULL; + xmlDocPtr doc = NULL; + xmlXPathContextPtr xpathctx = NULL; + xmlXPathCompExprPtr xpathcomp = NULL; + xmlXPathObjectPtr xpathobj = NULL; + char *datastr; + int32 len; + int32 xpath_len; + xmlChar *string; + xmlChar *xpath_expr; + int i; + int res_nitems; + int ndim; + Datum *ns_names_uris; + bool *ns_names_uris_nulls; + int ns_count; + int result; + + /* + * Namespace mappings are passed as text[]. If an empty array is passed + * (ndim = 0, "0-dimensional"), then there are no namespace mappings. + * Else, a 2-dimensional array with length of the second axis being equal + * to 2 should be passed, i.e., every subarray contains 2 elements, the + * first element defining the name, the second one the URI. Example: + * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2', + * 'http://example2.com']]. + */ + ndim = ARR_NDIM(namespaces); + if (ndim != 0) + { + int *dims; + + dims = ARR_DIMS(namespaces); + + if (ndim != 2 || dims[1] != 2) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid array for XML namespace mapping"), + errdetail("The array must be two-dimensional with length of the second axis equal to 2."))); + + Assert(ARR_ELEMTYPE(namespaces) == TEXTOID); + + deconstruct_array(namespaces, TEXTOID, -1, false, 'i', + &ns_names_uris, &ns_names_uris_nulls, + &ns_count); + + Assert((ns_count % 2) == 0); /* checked above */ + ns_count /= 2; /* count pairs only */ + } + else + { + ns_names_uris = NULL; + ns_names_uris_nulls = NULL; + ns_count = 0; + } + + datastr = VARDATA(data); + len = VARSIZE(data) - VARHDRSZ; + xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ; + if (xpath_len == 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("empty XPath expression"))); + + string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar)); + memcpy(string, datastr, len); + string[len] = '\0'; + + xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar)); + memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len); + xpath_expr[xpath_len] = '\0'; + + pg_xml_init(); + xmlInitParser(); + + PG_TRY(); + { + /* + * redundant XML parsing (two parsings for the same value during one + * command execution are possible) + */ + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL) + xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0); + if (doc == NULL) + xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, + "could not parse XML document"); + xpathctx = xmlXPathNewContext(doc); + if (xpathctx == NULL) + xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate XPath context"); + xpathctx->node = xmlDocGetRootElement(doc); + if (xpathctx->node == NULL) + xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + "could not find root XML element"); + + /* register namespaces, if any */ + if (ns_count > 0) + { + for (i = 0; i < ns_count; i++) + { + char *ns_name; + char *ns_uri; + + if (ns_names_uris_nulls[i * 2] || + ns_names_uris_nulls[i * 2 + 1]) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("neither namespace name nor URI may be null"))); + ns_name = TextDatumGetCString(ns_names_uris[i * 2]); + ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]); + if (xmlXPathRegisterNs(xpathctx, + (xmlChar *) ns_name, + (xmlChar *) ns_uri) != 0) + ereport(ERROR, /* is this an internal error??? */ + (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"", + ns_name, ns_uri))); + } + } + + xpathcomp = xmlXPathCompile(xpath_expr); + if (xpathcomp == NULL) /* TODO: show proper XPath error details */ + xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + "invalid XPath expression"); + + /* Version 2.6.27 introduces a function named xmlXPathCompiledEvalToBoolean + * however we can derive the existence by whether any nodes are returned + * thereby preventing a library version upgrade */ + xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx); + if (xpathobj == NULL) /* TODO: reason? */ + xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + "could not create XPath object"); + + if (xpathobj->nodesetval == NULL) + result = 0; + else + result = xpathobj->nodesetval->nodeNr; + } + PG_CATCH(); + { + if (xpathobj) + xmlXPathFreeObject(xpathobj); + if (xpathcomp) + xmlXPathFreeCompExpr(xpathcomp); + if (xpathctx) + xmlXPathFreeContext(xpathctx); + if (doc) + xmlFreeDoc(doc); + if (ctxt) + xmlFreeParserCtxt(ctxt); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlXPathFreeCompExpr(xpathcomp); + xmlXPathFreeContext(xpathctx); + xmlFreeDoc(doc); + xmlFreeParserCtxt(ctxt); + + PG_RETURN_BOOL(result); + #else + NO_XML_SUPPORT(); + return 0; + #endif + } *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** *** 4385,4390 **** DESCR("evaluate XPath expression, with namespaces support"); --- 4385,4395 ---- DATA(insert OID = 2932 ( xpath PGNSP PGUID 14 1 0 0 f f f t f i 2 0 143 "25 142" _null_ _null_ _null_ _null_ "selectpg_catalog.xpath($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); DESCR("evaluate XPath expression"); + DATA(insert OID = 3037 ( xpath_exists PGNSP PGUID 12 1 0 0 f f f t f i 3 0 16 "25 142 1009" _null_ _null_ _null_ _null_xpath_exists _null_ _null_ _null_ )); + DESCR("evaluate XPath expression in a boolean context, with namespaces support"); + DATA(insert OID = 3038 ( xpath_exists PGNSP PGUID 14 1 0 0 f f f t f i 2 0 16 "25 142" _null_ _null_ _null_ _null_"select pg_catalog.xpath_exists($1, $2, ''{}''::pg_catalog.text[])" _null_ _null_ _null_ )); + DESCR("evaluate XPath expression in a boolean context"); + /* uuid */ DATA(insert OID = 2952 ( uuid_in PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2950 "2275" _null_ _null_ _null_ _null_uuid_in _null_ _null_ _null_ )); DESCR("I/O"); *** a/src/include/utils/xml.h --- b/src/include/utils/xml.h *************** *** 37,42 **** extern Datum texttoxml(PG_FUNCTION_ARGS); --- 37,43 ---- extern Datum xmltotext(PG_FUNCTION_ARGS); extern Datum xmlvalidate(PG_FUNCTION_ARGS); extern Datum xpath(PG_FUNCTION_ARGS); + extern Datum xpath_exists(PG_FUNCTION_ARGS); extern Datum table_to_xml(PG_FUNCTION_ARGS); extern Datum query_to_xml(PG_FUNCTION_ARGS); *** a/src/test/regress/expected/xml.out --- b/src/test/regress/expected/xml.out *************** *** 502,504 **** SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); --- 502,557 ---- {<b>two</b>,<b>etc</b>} (1 row) + -- Test xpath_exists evaluation + SELECT xpath_exists('//town[text() = ''Toronto'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml); + xpath_exists + -------------- + f + (1 row) + + SELECT xpath_exists('//town[text() = ''Cwmbran'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml); + xpath_exists + -------------- + t + (1 row) + + INSERT INTO xmltest VALUES (4, '<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (5, '<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (6, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + INSERT INTO xmltest VALUES (7, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data); + count + ------- + 0 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data); + count + ------- + 2 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data); + count + ------- + 1 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]); + count + ------- + 0 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]); + count + ------- + 2 + (1 row) + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() = ''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]); + count + ------- + 1 + (1 row) + *** a/src/test/regress/sql/xml.sql --- b/src/test/regress/sql/xml.sql *************** *** 163,165 **** SELECT xpath('', '<!-- error -->'); --- 163,181 ---- SELECT xpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:pieceid="2" /></local:data>'); SELECT xpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:pieceid="2" /></local:data>', ARRAY[ARRAY['loc', 'http://127.0.0.1']]); SELECT xpath('//b', '<a>one <b>two</b> three <b>etc</b></a>'); + + -- Test xpath_exists evaluation + SELECT xpath_exists('//town[text() = ''Toronto'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml); + SELECT xpath_exists('//town[text() = ''Cwmbran'']','<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>'::xml); + + INSERT INTO xmltest VALUES (4, '<menu><beers><name>Budvar</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (5, '<menu><beers><name>Molson</name><cost>free</cost><name>Carling</name><cost>lots</cost></beers></menu>'::xml); + INSERT INTO xmltest VALUES (6, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Budvar</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + INSERT INTO xmltest VALUES (7, '<myns:menu xmlns:myns="http://myns.com"><myns:beers><myns:name>Molson</myns:name><myns:cost>free</myns:cost><myns:name>Carling</myns:name><myns:cost>lots</myns:cost></myns:beers></myns:menu>'::xml); + + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beer',data); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers',data); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/menu/beers/name[text() = ''Molson'']',data); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beer',data,ARRAY[ARRAY['myns','http://myns.com']]); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers',data,ARRAY[ARRAY['myns','http://myns.com']]); + SELECT COUNT(id) FROM xmltest WHERE xpath_exists('/myns:menu/myns:beers/myns:name[text() = ''Molson'']',data,ARRAY[ARRAY['myns','http://myns.com']]);
pgsql-hackers by date: