From bd2e6c9d4197d3f98db509ec112b6f21db97f287 Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Wed, 30 Apr 2025 13:19:42 +0300 Subject: [PATCH v5 2/2] contrib/xml2: overloaded xslt_process() to provide variants for xmltype and specifying parameters in arrays * There are apparently no functions that accept XML as text, except for xmlparse(). xslt_process() should therefore also accept xmltype. * A version accepting text is still kept for backwards compatibility, but is considered deprecated. * The new xmltype-based version expects an array of stylesheet parameter-value pairs, which is less limited than the now deprecated way of encoding all stylesheet parameters into a single text argument. We can now accept an arbitrary number of parameters and you can include `=` and `,` signs in both the key and value strings. Hstores haven't been used since they are in a module and we don't want to depend on any additional module. * The new implementation respects the database's encoding - text strings are always converted to UTF8 before passing them into libxml2. * On the downside, xml_parse() had to be made an external function. Since a declaration cannot be added to xml.h without drawing in libxml2 headers, the declaration is repeated in xslt_proc.c. Perhaps xml_parse() should be declared in a separate internal header? * xmlCtxtReadDoc() now sets a dummy "SQL" URL to preserve line numbers in XSLT stylesheet errors. This change at least does not break the test suite. --- contrib/xml2/expected/xml2.out | 13 +++ contrib/xml2/sql/xml2.sql | 8 ++ contrib/xml2/xml2--1.1.sql | 11 +++ contrib/xml2/xslt_proc.c | 148 +++++++++++++++++++++++++-------- doc/src/sgml/xml2.sgml | 19 +++-- src/backend/utils/adt/xml.c | 19 +++-- 6 files changed, 172 insertions(+), 46 deletions(-) diff --git a/contrib/xml2/expected/xml2.out b/contrib/xml2/expected/xml2.out index 157d584e63..0a8a628020 100644 --- a/contrib/xml2/expected/xml2.out +++ b/contrib/xml2/expected/xml2.out @@ -278,3 +278,16 @@ Variable 'n1' has not been declared. Undefined variable runtime error: file SQL line 3 element value-of XPath evaluation returned no result. +-- xmltype and Array-based signature +SELECT xslt_process(xmlelement(name xml), +$$ + +$$::xml, ARRAY['n1','"foo"']); + xslt_process +-------------- + foo + + +(1 row) + diff --git a/contrib/xml2/sql/xml2.sql b/contrib/xml2/sql/xml2.sql index 9d42ac8a0b..7555854d49 100644 --- a/contrib/xml2/sql/xml2.sql +++ b/contrib/xml2/sql/xml2.sql @@ -161,3 +161,11 @@ $$ $$)::xml; + +-- xmltype and Array-based signature +SELECT xslt_process(xmlelement(name xml), +$$ + +$$::xml, ARRAY['n1','"foo"']); diff --git a/contrib/xml2/xml2--1.1.sql b/contrib/xml2/xml2--1.1.sql index 671372cb27..a579a1e5e1 100644 --- a/contrib/xml2/xml2--1.1.sql +++ b/contrib/xml2/xml2--1.1.sql @@ -71,3 +71,14 @@ CREATE FUNCTION xslt_process(text,text) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION xslt_process(xml,xml,text[]) +RETURNS xml +AS 'MODULE_PATHNAME','xslt_process_xmltype' +LANGUAGE C STRICT VOLATILE PARALLEL SAFE; + +-- the function checks for the correct argument count +CREATE FUNCTION xslt_process(xml,xml) +RETURNS xml +AS 'MODULE_PATHNAME','xslt_process_xmltype' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index 17776f78b5..074952cf8b 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -10,6 +10,9 @@ #include "fmgr.h" #include "utils/builtins.h" #include "utils/xml.h" +#include "utils/array.h" +#include "utils/memutils.h" +#include "mb/pg_wchar.h" #ifdef USE_LIBXSLT @@ -35,9 +38,18 @@ extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); /* local defs */ +static xmltype *xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params); static const char **parse_params(text *paramstr); #endif /* USE_LIBXSLT */ +/* + * FIXME: This cannot easily be exposed in xml.h. + * Perhaps there should be an xml-internal.h? + */ +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes, + Node *escontext); PG_FUNCTION_INFO_V1(xslt_process); @@ -48,9 +60,103 @@ xslt_process(PG_FUNCTION_ARGS) text *doct = PG_GETARG_TEXT_PP(0); text *ssheet = PG_GETARG_TEXT_PP(1); - text *volatile result = NULL; - text *paramstr; - const char **params; + const char **params = NULL; + text *result; + + if (fcinfo->nargs == 3) + { + text *paramstr = PG_GETARG_TEXT_PP(2); + + params = parse_params(paramstr); + } + + result = xslt_process_internal(doct, ssheet, params); + + PG_RETURN_TEXT_P(result); + +#else /* !USE_LIBXSLT */ + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xslt_process() is not available without libxslt"))); + PG_RETURN_NULL(); + +#endif /* USE_LIBXSLT */ +} + +PG_FUNCTION_INFO_V1(xslt_process_xmltype); + +Datum +xslt_process_xmltype(PG_FUNCTION_ARGS) +{ +#ifdef USE_LIBXSLT + + xmltype *doct = PG_GETARG_XML_P(0); + xmltype *ssheet = PG_GETARG_XML_P(1); + const char **params = NULL; + xmltype *result; + + /* + * Parameters are key-value pairs. The values are XPath expressions, so + * strings will have to be escaped with single or double quotes. Even + * `xsltproc --stringparam` does nothing else than adding single or double + * quotes and fails if the value contains both. + */ + if (fcinfo->nargs == 3) + { + ArrayType *paramarray = PG_GETARG_ARRAYTYPE_P(2); + Datum *arr_datums; + bool *arr_nulls; + int arr_count; + int i, + j; + + deconstruct_array_builtin(paramarray, TEXTOID, &arr_datums, &arr_nulls, &arr_count); + + if ((arr_count % 2) != 0) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_ELEMENT_ERROR), + errmsg("number of stylesheet parameters (%d) must be a multiple of 2", + arr_count))); + + params = palloc_array(const char *, arr_count + 1); + + for (i = 0, j = 0; i < arr_count; i++) + { + char *cstr; + + if (arr_nulls[i]) + continue; + + cstr = TextDatumGetCString(arr_datums[i]); + params[j++] = (char *) pg_do_encoding_conversion((unsigned char *) cstr, + strlen(cstr), + GetDatabaseEncoding(), + PG_UTF8); + } + params[j] = NULL; + } + + result = xslt_process_internal(doct, ssheet, params); + + PG_RETURN_XML_P(result); + +#else /* !USE_LIBXSLT */ + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("xslt_process() is not available without libxslt"))); + PG_RETURN_NULL(); + +#endif /* USE_LIBXSLT */ +} + +#ifdef USE_LIBXSLT + +static xmltype * +xslt_process_internal(xmltype *doct, xmltype *ssheet, const char **params) +{ + text *volatile result; PgXmlErrorContext *xmlerrcxt; volatile xsltStylesheetPtr stylesheet = NULL; volatile xmlDocPtr doctree = NULL; @@ -64,18 +170,6 @@ xslt_process(PG_FUNCTION_ARGS) xmlGenericErrorFunc saved_errfunc; void *saved_errcxt; - if (fcinfo->nargs == 3) - { - paramstr = PG_GETARG_TEXT_PP(2); - params = parse_params(paramstr); - } - else - { - /* No parameters */ - params = (const char **) palloc(sizeof(char *)); - params[0] = NULL; - } - /* Setup parser */ xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_ALL); @@ -93,21 +187,18 @@ xslt_process(PG_FUNCTION_ARGS) int reslen = 0; /* - * Parse document. It's important to set an "URL", so libxslt includes - * line numbers in error messages (cf. xsltPrintErrorContext()). + * Parse document. */ - doctree = xmlReadMemory((char *) VARDATA_ANY(doct), - VARSIZE_ANY_EXHDR(doct), "SQL", NULL, - XML_PARSE_NOENT); + doctree = xml_parse(doct, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, NULL); if (doctree == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "error parsing XML document"); /* Same for stylesheet */ - ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet), - VARSIZE_ANY_EXHDR(ssheet), "SQL", NULL, - XML_PARSE_NOENT); + ssdoc = xml_parse(ssheet, XMLOPTION_DOCUMENT, true, + GetDatabaseEncoding(), NULL, NULL, NULL); if (ssdoc == NULL || pg_xml_error_occurred(xmlerrcxt)) xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, @@ -198,18 +289,9 @@ xslt_process(PG_FUNCTION_ARGS) xsltSetGenericErrorFunc(saved_errcxt, saved_errfunc); pg_xml_done(xmlerrcxt, false); - PG_RETURN_TEXT_P(result); -#else /* !USE_LIBXSLT */ - - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("xslt_process() is not available without libxslt"))); - PG_RETURN_NULL(); -#endif /* USE_LIBXSLT */ + return result; } -#ifdef USE_LIBXSLT - static const char ** parse_params(text *paramstr) { diff --git a/doc/src/sgml/xml2.sgml b/doc/src/sgml/xml2.sgml index 9fd613f967..dc6fb40121 100644 --- a/doc/src/sgml/xml2.sgml +++ b/doc/src/sgml/xml2.sgml @@ -408,22 +408,29 @@ ORDER BY doc_num, line_num; -xslt_process(text document, text stylesheet, text paramlist) returns text +xslt_process(xml document, xml stylesheet, text[] paramlist) returns xml This function applies the XSL stylesheet to the document and returns - the transformed result. The paramlist is a list of parameter - assignments to be used in the transformation, specified in the form - a=1,b=2. Note that the - parameter parsing is very simple-minded: parameter values cannot - contain commas! + the transformed result. The paramlist is an array of parameter + assignments to be used in the transformation, specified in pairs of + key and value strings (e.g. ARRAY['a','1', 'b','2']). + The length of the array must be even. + Note that the values are still interpreted as XPath expressions, so string values need to + be quoted in single or double quotes (e.g. ARRAY['a','"string"']). There is also a two-parameter version of xslt_process which does not pass any parameters to the transformation. + + + Deprecated variants of xslt_process accepting + text arguments and parameters encoded into single text strings + (e.g. a=1,b=2) are also still available. + diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index f54828fb99..3b8ab29555 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -155,11 +155,11 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp, static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone); static bool xml_doctype_in_content(const xmlChar *str); -static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, - bool preserve_whitespace, int encoding, - XmlOptionType *parsed_xmloptiontype, - xmlNodePtr *parsed_nodes, - Node *escontext); +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, + bool preserve_whitespace, int encoding, + XmlOptionType *parsed_xmloptiontype, + xmlNodePtr *parsed_nodes, + Node *escontext); static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt); static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj, ArrayBuildState *astate, @@ -1783,7 +1783,7 @@ xml_doctype_in_content(const xmlChar *str) * TODO maybe libxml2's xmlreader is better? (do not construct DOM, * yet do not use SAX - see xmlreader.c) */ -static xmlDocPtr +xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int encoding, XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes, @@ -1879,8 +1879,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); + /* + * Setting a dummy "SQL" URL is important for the + * xsltPrintErrorContext() when using the legacy text-based + * xslt_process() variant. + */ doc = xmlCtxtReadDoc(ctxt, utf8string, - NULL, /* no URL */ + "SQL", "UTF-8", options); -- 2.49.0