Re: Simplify formatting.c - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: Simplify formatting.c |
Date | |
Msg-id | 200806181843.m5IIhgm03448@momjian.us Whole thread Raw |
In response to | Re: Simplify formatting.c (Bruce Momjian <bruce@momjian.us>) |
Responses |
Re: Simplify formatting.c
|
List | pgsql-patches |
Bruce Momjian wrote: > Alvaro Herrera wrote: > > Bruce Momjian wrote: > > > > > I moved str_initcap() over into oracle_compat.c and then had initcap() > > > convert to/from TEXT to call it. The code is a little weird because > > > str_initcap() needs to convert to text to use texttowcs(), so in > > > multibyte encodings initcap converts the string to text, then to char, > > > then to text to call texttowcs(). I didn't see a cleaner way to do > > > this. > > > > Why not use wchar2char? It seems there's room for extra cleanup here. > > > > Also, the prototype of str_initcap in builtins.h looks out of place. > > I talked to Alvaro on IM, and there is certainly much more cleanup to do > in this area. I will work from the bottom up. First, is moving the > USE_WIDE_UPPER_LOWER define to c.h, and removing TS_USE_WIDE and using > USE_WIDE_UPPER_LOWER instead. Patch attached and applied. The second step is to move wchar2char() and char2wchar() from tsearch into /mb to be easier to use for other modules; also move pnstrdup(). Patch attached and applied. -- Bruce Momjian <bruce@momjian.us> http://momjian.us EnterpriseDB http://enterprisedb.com + If your life is a hard drive, Christ can be your backup. + Index: src/backend/tsearch/ts_locale.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/tsearch/ts_locale.c,v retrieving revision 1.8 diff -c -c -r1.8 ts_locale.c *** src/backend/tsearch/ts_locale.c 17 Jun 2008 16:09:06 -0000 1.8 --- src/backend/tsearch/ts_locale.c 18 Jun 2008 18:37:02 -0000 *************** *** 16,140 **** #include "tsearch/ts_locale.h" #include "tsearch/ts_public.h" - #ifdef USE_WIDE_UPPER_LOWER - /* - * wchar2char --- convert wide characters to multibyte format - * - * This has the same API as the standard wcstombs() function; in particular, - * tolen is the maximum number of bytes to store at *to, and *from must be - * zero-terminated. The output will be zero-terminated iff there is room. - */ - size_t - wchar2char(char *to, const wchar_t *from, size_t tolen) - { - if (tolen == 0) - return 0; - - #ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8) - { - int r; - - r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, - NULL, NULL); - - if (r <= 0) - return (size_t) -1; - - Assert(r <= tolen); - - /* Microsoft counts the zero terminator in the result */ - return r - 1; - } - #endif /* WIN32 */ - - return wcstombs(to, from, tolen); - } - - /* - * char2wchar --- convert multibyte characters to wide characters - * - * This has almost the API of mbstowcs(), except that *from need not be - * null-terminated; instead, the number of input bytes is specified as - * fromlen. Also, we ereport() rather than returning -1 for invalid - * input encoding. tolen is the maximum number of wchar_t's to store at *to. - * The output will be zero-terminated iff there is room. - */ - size_t - char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) - { - if (tolen == 0) - return 0; - - #ifdef WIN32 - if (GetDatabaseEncoding() == PG_UTF8) - { - int r; - - /* stupid Microsloth API does not work for zero-length input */ - if (fromlen == 0) - r = 0; - else - { - r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); - - if (r <= 0) - { - /* see notes in oracle_compat.c about error reporting */ - pg_verifymbstr(from, fromlen, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - } - - Assert(r < tolen); - to[r] = 0; - - return r; - } - #endif /* WIN32 */ - - if (lc_ctype_is_c()) - { - /* - * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be - * allocated with sufficient space - */ - return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen); - } - else - { - /* - * mbstowcs requires ending '\0' - */ - char *str = pnstrdup(from, fromlen); - size_t result; - - result = mbstowcs(to, str, tolen); - - pfree(str); - - if (result == (size_t) -1) - { - pg_verifymbstr(from, fromlen, false); - ereport(ERROR, - (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), - errmsg("invalid multibyte character for locale"), - errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); - } - - if (result < tolen) - to[result] = 0; - - return result; - } - } - - int t_isdigit(const char *ptr) { --- 16,23 ---- Index: src/backend/tsearch/ts_utils.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/tsearch/ts_utils.c,v retrieving revision 1.9 diff -c -c -r1.9 ts_utils.c *** src/backend/tsearch/ts_utils.c 1 Jan 2008 19:45:52 -0000 1.9 --- src/backend/tsearch/ts_utils.c 18 Jun 2008 18:37:02 -0000 *************** *** 153,165 **** bsearch(&key, s->stop, s->len, sizeof(char *), comparestr)) ? true : false; } - - char * - pnstrdup(const char *in, int len) - { - char *out = palloc(len + 1); - - memcpy(out, in, len); - out[len] = '\0'; - return out; - } --- 153,155 ---- Index: src/backend/utils/mb/mbutils.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v retrieving revision 1.71 diff -c -c -r1.71 mbutils.c *** src/backend/utils/mb/mbutils.c 27 May 2008 12:24:42 -0000 1.71 --- src/backend/utils/mb/mbutils.c 18 Jun 2008 18:37:02 -0000 *************** *** 555,560 **** --- 555,688 ---- return result; } + + + #ifdef USE_WIDE_UPPER_LOWER + + /* + * wchar2char --- convert wide characters to multibyte format + * + * This has the same API as the standard wcstombs() function; in particular, + * tolen is the maximum number of bytes to store at *to, and *from must be + * zero-terminated. The output will be zero-terminated iff there is room. + */ + size_t + wchar2char(char *to, const wchar_t *from, size_t tolen) + { + size_t result; + + if (tolen == 0) + return 0; + + #ifdef WIN32 + /* + * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, + * and for some reason mbstowcs and wcstombs won't do this for us, + * so we use MultiByteToWideChar(). + */ + if (GetDatabaseEncoding() == PG_UTF8) + { + result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen, + NULL, NULL); + /* A zero return is failure */ + if (result <= 0) + result = -1; + else + { + Assert(result <= tolen); + /* Microsoft counts the zero terminator in the result */ + result--; + } + } + else + #endif /* WIN32 */ + result = wcstombs(to, from, tolen); + return result; + } + + /* + * char2wchar --- convert multibyte characters to wide characters + * + * This has almost the API of mbstowcs(), except that *from need not be + * null-terminated; instead, the number of input bytes is specified as + * fromlen. Also, we ereport() rather than returning -1 for invalid + * input encoding. tolen is the maximum number of wchar_t's to store at *to. + * The output will be zero-terminated iff there is room. + */ + size_t + char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen) + { + size_t result; + + if (tolen == 0) + return 0; + + #ifdef WIN32 + /* See WIN32 "Unicode" comment above */ + if (GetDatabaseEncoding() == PG_UTF8) + { + /* Win32 API does not work for zero-length input */ + if (fromlen == 0) + result = 0; + else + { + result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); + /* A zero return is failure */ + if (result == 0) + result = -1; + } + + if (result != -1) + { + Assert(result < tolen); + /* Append trailing null wchar (MultiByteToWideChar() does not) */ + to[result] = 0; + } + } + else + #endif /* WIN32 */ + { + if (lc_ctype_is_c()) + { + /* + * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be + * allocated with sufficient space + */ + result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen); + } + else + { + /* mbstowcs requires ending '\0' */ + char *str = pnstrdup(from, fromlen); + + result = mbstowcs(to, str, tolen); + pfree(str); + } + } + + if (result == -1) + { + /* + * Invalid multibyte character encountered. We try to give a useful + * error message by letting pg_verifymbstr check the string. But it's + * possible that the string is OK to us, and not OK to mbstowcs --- + * this suggests that the LC_CTYPE locale is different from the + * database encoding. Give a generic error message if verifymbstr + * can't find anything wrong. + */ + pg_verifymbstr(from, fromlen, false); /* might not return */ + /* but if it does ... */ + ereport(ERROR, + (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), + errmsg("invalid multibyte character for locale"), + errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); + } + + return result; + } + + #endif + /* convert a multibyte string to a wchar */ int pg_mb2wchar(const char *from, pg_wchar *to) Index: src/backend/utils/mmgr/mcxt.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/mmgr/mcxt.c,v retrieving revision 1.63 diff -c -c -r1.63 mcxt.c *** src/backend/utils/mmgr/mcxt.c 1 Jan 2008 19:45:55 -0000 1.63 --- src/backend/utils/mmgr/mcxt.c 18 Jun 2008 18:37:05 -0000 *************** *** 624,629 **** --- 624,641 ---- pointer, size); } + /* Like pstrdup(), but append null byte */ + char * + pnstrdup(const char *in, int len) + { + char *out = palloc(len + 1); + + memcpy(out, in, len); + out[len] = '\0'; + return out; + } + + /* * MemoryContextSwitchTo * Returns the current context; installs the given context. Index: src/include/mb/pg_wchar.h =================================================================== RCS file: /cvsroot/pgsql/src/include/mb/pg_wchar.h,v retrieving revision 1.78 diff -c -c -r1.78 pg_wchar.h *** src/include/mb/pg_wchar.h 1 Jan 2008 19:45:58 -0000 1.78 --- src/include/mb/pg_wchar.h 18 Jun 2008 18:37:05 -0000 *************** *** 362,367 **** --- 362,372 ---- extern int pg_encoding_max_length(int encoding); extern int pg_database_encoding_max_length(void); + #ifdef USE_WIDE_UPPER_LOWER + extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen); + extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen); + #endif + extern void SetDefaultClientEncoding(void); extern int SetClientEncoding(int encoding, bool doit); extern void InitializeClientEncoding(void); Index: src/include/tsearch/ts_locale.h =================================================================== RCS file: /cvsroot/pgsql/src/include/tsearch/ts_locale.h,v retrieving revision 1.6 diff -c -c -r1.6 ts_locale.h *** src/include/tsearch/ts_locale.h 17 Jun 2008 16:09:06 -0000 1.6 --- src/include/tsearch/ts_locale.h 18 Jun 2008 18:37:05 -0000 *************** *** 33,41 **** #ifdef USE_WIDE_UPPER_LOWER - extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen); - extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen); - extern int t_isdigit(const char *ptr); extern int t_isspace(const char *ptr); extern int t_isalpha(const char *ptr); --- 33,38 ---- Index: src/include/tsearch/ts_public.h =================================================================== RCS file: /cvsroot/pgsql/src/include/tsearch/ts_public.h,v retrieving revision 1.9 diff -c -c -r1.9 ts_public.h *** src/include/tsearch/ts_public.h 16 May 2008 16:31:02 -0000 1.9 --- src/include/tsearch/ts_public.h 18 Jun 2008 18:37:05 -0000 *************** *** 62,69 **** extern char *get_tsearch_config_filename(const char *basename, const char *extension); - extern char *pnstrdup(const char *in, int len); - /* * Often useful stopword list management */ --- 62,67 ---- Index: src/include/utils/palloc.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/palloc.h,v retrieving revision 1.38 diff -c -c -r1.38 palloc.h *** src/include/utils/palloc.h 1 Jan 2008 19:45:59 -0000 1.38 --- src/include/utils/palloc.h 18 Jun 2008 18:37:05 -0000 *************** *** 70,75 **** --- 70,77 ---- extern void *repalloc(void *pointer, Size size); + extern char *pnstrdup(const char *in, int len); + /* * MemoryContextSwitchTo can't be a macro in standard C compilers. * But we can make it an inline function when using GCC.
pgsql-patches by date: