Re: Request for review: tsearch2 patch - Mailing list pgsql-hackers
From | Teodor Sigaev |
---|---|
Subject | Re: Request for review: tsearch2 patch |
Date | |
Msg-id | 45A4BF77.6020604@sigaev.ru Whole thread Raw |
In response to | Re: Request for review: tsearch2 patch (Tatsuo Ishii <ishii@postgresql.org>) |
Responses |
Re: Request for review: tsearch2 patch
|
List | pgsql-hackers |
Sorry for delay, I was on holidays :) Did you test patch on Windows platform? Tatsuo Ishii wrote: > I have tested with local-enabled environment and found a bug. Included > is the new version of patches. > > Teodor, Oleg, what do you think about these patches? > If ok, shall I commit to CVS head? > -- > Tatsuo Ishii > SRA OSS, Inc. Japan > >> Hi, >> >> Here are patches against tsearch2 with CVS head. Currently tsearch2 >> does not work with multibyte encoding which uses C locale. These >> patches are intended to solve the problem by using PostgreSQL in-house >> multibyte function instead of mbstowcs which does not work with C >> locale. Also iswalpha etc. will not be called in case of C locale >> since they are not working with it. Tested with the EUC_JP encoding >> (should be working with any multibye encodings). Existing single byte >> encodings should not be broken by the patches, I did not test though. >> -- >> Tatsuo Ishii >> SRA OSS, Inc. Japan >> >> ------------------------------------------------------------------------ >> >> Index: ts_locale.c >> =================================================================== >> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.c,v >> retrieving revision 1.7 >> diff -c -r1.7 ts_locale.c >> *** ts_locale.c 20 Nov 2006 14:03:30 -0000 1.7 >> --- ts_locale.c 4 Jan 2007 12:16:00 -0000 >> *************** >> *** 63,68 **** >> --- 63,101 ---- >> >> return mbstowcs(to, from, len); >> } >> + >> + #else /* WIN32 */ >> + >> + size_t >> + char2wchar(wchar_t *to, const char *from, size_t len) >> + { >> + wchar_t *result; >> + size_t n; >> + >> + if (to == NULL) >> + return 0; >> + >> + if (lc_ctype_is_c()) >> + { >> + /* allocate neccesary memory for "to" including NULL terminate */ >> + result = (wchar_t *)palloc((len+1)*sizeof(wchar_t)); >> + >> + /* do the conversion */ >> + n = (size_t)pg_mb2wchar_with_len(from, (pg_wchar *)result, len); >> + if (n > 0) >> + { >> + /* store the result */ >> + if (n > len) >> + n = len; >> + memcpy(to, result, n*sizeof(wchar_t)); >> + pfree(result); >> + *(to + n) = '\0'; >> + } >> + return n; >> + } >> + return mbstowcs(to, from, len); >> + } >> + >> #endif /* WIN32 */ >> >> int >> *************** >> *** 70,75 **** >> --- 103,113 ---- >> { >> wchar_t character; >> >> + if (lc_ctype_is_c()) >> + { >> + return isalpha(TOUCHAR(ptr)); >> + } >> + >> char2wchar(&character, ptr, 1); >> >> return iswalpha((wint_t) character); >> *************** >> *** 80,85 **** >> --- 118,128 ---- >> { >> wchar_t character; >> >> + if (lc_ctype_is_c()) >> + { >> + return isprint(TOUCHAR(ptr)); >> + } >> + >> char2wchar(&character, ptr, 1); >> >> return iswprint((wint_t) character); >> *************** >> *** 126,132 **** >> if ( wlen < 0 ) >> ereport(ERROR, >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), >> ! errmsg("transalation failed from server encoding to wchar_t"))); >> >> Assert(wlen<=len); >> wstr[wlen] = 0; >> --- 169,175 ---- >> if ( wlen < 0 ) >> ereport(ERROR, >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), >> ! errmsg("translation failed from server encoding to wchar_t"))); >> >> Assert(wlen<=len); >> wstr[wlen] = 0; >> *************** >> *** 152,158 **** >> if ( wlen < 0 ) >> ereport(ERROR, >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), >> ! errmsg("transalation failed from wchar_t to server encoding %d", errno))); >> Assert(wlen<=len); >> out[wlen]='\0'; >> } >> --- 195,201 ---- >> if ( wlen < 0 ) >> ereport(ERROR, >> (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), >> ! errmsg("translation failed from wchar_t to server encoding %d", errno))); >> Assert(wlen<=len); >> out[wlen]='\0'; >> } >> Index: ts_locale.h >> =================================================================== >> RCS file: /cvsroot/pgsql/contrib/tsearch2/ts_locale.h,v >> retrieving revision 1.7 >> diff -c -r1.7 ts_locale.h >> *** ts_locale.h 4 Oct 2006 00:29:47 -0000 1.7 >> --- ts_locale.h 4 Jan 2007 12:16:00 -0000 >> *************** >> *** 38,45 **** >> #else /* WIN32 */ >> >> /* correct mbstowcs */ >> - #define char2wchar mbstowcs >> #define wchar2char wcstombs >> #endif /* WIN32 */ >> >> #define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) ) >> --- 38,46 ---- >> #else /* WIN32 */ >> >> /* correct mbstowcs */ >> #define wchar2char wcstombs >> + size_t char2wchar(wchar_t *to, const char *from, size_t len); >> + >> #endif /* WIN32 */ >> >> #define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) ) >> *************** >> *** 54,59 **** >> --- 55,61 ---- >> * t_iseq() should be called only for ASCII symbols >> */ >> #define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false ) >> + /*#define t_iseq(x,c) ( TOUCHAR(x) == ((unsigned char)(c)))*/ >> >> #define COPYCHAR(d,s) do { \ >> int lll = pg_mblen( s ); \ >> Index: wordparser/parser.c >> =================================================================== >> RCS file: /cvsroot/pgsql/contrib/tsearch2/wordparser/parser.c,v >> retrieving revision 1.11 >> diff -c -r1.11 parser.c >> *** wordparser/parser.c 4 Oct 2006 00:29:47 -0000 1.11 >> --- wordparser/parser.c 4 Jan 2007 12:16:01 -0000 >> *************** >> *** 44,52 **** >> * Some operating systems fail with multi-byte encodings and a C locale. >> * Also, for a C locale there is no need to process as multibyte. From >> * backend/utils/adt/oracle_compat.c Teodor >> */ >> >> ! if (prs->charmaxlen > 1 && !lc_ctype_is_c()) >> { >> prs->usewide = true; >> prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr); >> --- 44,54 ---- >> * Some operating systems fail with multi-byte encodings and a C locale. >> * Also, for a C locale there is no need to process as multibyte. From >> * backend/utils/adt/oracle_compat.c Teodor >> + * >> + * This is wrong assumption. even if locale is C, multibyte is necceary. >> */ >> >> ! if (prs->charmaxlen > 1) >> { >> prs->usewide = true; >> prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr); >> *************** >> *** 92,98 **** >> static int \ >> p_is##type(TParser *prs) { \ >> Assert( prs->state ); \ >> ! return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \ >> is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ >> } \ >> \ >> --- 94,102 ---- >> static int \ >> p_is##type(TParser *prs) { \ >> Assert( prs->state ); \ >> ! return ( ( prs->usewide ) ? \ >> ! (lc_ctype_is_c()? is##type( 0xff & *( prs->wstr + prs->state->poschar)): \ >> ! isw##type( (wint_t)*( prs->wstr + prs->state->poschar))): \ >> is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ >> } \ >> \ >> *************** >> *** 134,141 **** >> } >> #endif /* TS_USE_WIDE */ >> >> ! p_iswhat(alnum) >> ! p_iswhat(alpha) >> p_iswhat(digit) >> p_iswhat(lower) >> p_iswhat(print) >> --- 138,197 ---- >> } >> #endif /* TS_USE_WIDE */ >> >> ! static int p_isalnum(TParser *prs) { >> ! Assert( prs->state ); >> ! >> ! if (prs->usewide) >> ! { >> ! unsigned int c; >> ! >> ! c = *(prs->wstr + prs->state->poschar); >> ! >> ! if (lc_ctype_is_c()) >> ! { >> ! if (c > 0x7f) >> ! return 1; >> ! return isalnum(0xff & c); >> ! } >> ! else >> ! return iswalnum( (wint_t)*( prs->wstr + prs->state->poschar)); >> ! } >> ! else >> ! return isalnum( (unsigned char)*( prs->str + prs->state->posbyte )); >> ! } >> ! >> ! static int p_isnotalnum(TParser *prs) >> ! { >> ! return !p_isalnum(prs); >> ! } >> ! >> ! static int p_isalpha(TParser *prs) { >> ! Assert( prs->state ); >> ! >> ! if (prs->usewide) >> ! { >> ! unsigned int c; >> ! >> ! c = *(prs->wstr + prs->state->poschar); >> ! >> ! if (lc_ctype_is_c()) >> ! { >> ! if (c > 0x7f) >> ! return 1; >> ! return isalpha(0xff & c); >> ! } >> ! else >> ! return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar)); >> ! } >> ! else >> ! return isalpha( (unsigned char)*( prs->str + prs->state->posbyte )); >> ! } >> ! >> ! static int p_isnotalpha(TParser *prs) >> ! { >> ! return !p_isalpha(prs); >> ! } >> ! >> p_iswhat(digit) >> p_iswhat(lower) >> p_iswhat(print) >> >> ------------------------------------------------------------------------ >> >> >> ---------------------------(end of broadcast)--------------------------- >> TIP 9: In versions below 8.0, the planner will ignore your desire to >> choose an index scan if your joining column's datatypes do not >> match -- Teodor Sigaev E-mail: teodor@sigaev.ru WWW: http://www.sigaev.ru/
pgsql-hackers by date: