From 6e75d02598594e55930d4441724e136f4273d0bd Mon Sep 17 00:00:00 2001 From: J Smith Date: Sun, 6 Nov 2011 16:48:20 -0500 Subject: [PATCH] Fix weirdness when dealing with UTF-8 in buggy libc implementations. OSX libc has a bug from an older FreeBSD libc that causes it to see certain characters as spaces incorrectly when using UTF-8. --- contrib/unaccent/unaccent.c | 10 +++++++++- 1 files changed, 9 insertions(+), 1 deletions(-) diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index d9c2eac..f5ab8b2 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -93,6 +93,9 @@ initSuffixTree(char *filename) { char src[4096]; char trg[4096]; + wchar_t wsrc[4096]; + wchar_t wtrg[4096]; + wchar_t wline[4096]; int srclen; int trglen; char *line = NULL; @@ -108,9 +111,14 @@ initSuffixTree(char *filename) */ while ((line = tsearch_readline(&trst)) != NULL) { - if (sscanf(line, "%s\t%s\n", src, trg) != 2) + char2wchar(wline, 4096, line, strlen(line) + 1, (pg_locale_t) 0); + + if (swscanf(wline, L"%ls\t%ls\n", wsrc, wtrg) != 2) continue; + wchar2char(src, wsrc, 4096, (pg_locale_t) 0); + wchar2char(trg, wtrg, 4096, (pg_locale_t) 0); + srclen = strlen(src); trglen = strlen(trg); -- 1.7.7.2