diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile new file mode 100644 index 212a890..b406261 *** a/contrib/pg_trgm/Makefile --- b/contrib/pg_trgm/Makefile *************** MODULE_big = pg_trgm *** 4,10 **** OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES) EXTENSION = pg_trgm ! DATA = pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \ pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql PGFILEDESC = "pg_trgm - trigram matching" --- 4,11 ---- OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o $(WIN32RES) EXTENSION = pg_trgm ! DATA = pg_trgm--1.3--1.4.sql \ ! pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \ pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql PGFILEDESC = "pg_trgm - trigram matching" diff --git a/contrib/pg_trgm/expected/pg_word_trgm.out b/contrib/pg_trgm/expected/pg_word_trgm.out new file mode 100644 index bed61c4..e10f539 *** a/contrib/pg_trgm/expected/pg_word_trgm.out --- b/contrib/pg_trgm/expected/pg_word_trgm.out *************** select t,word_similarity('Baykal',t) as *** 14,19 **** --- 14,79 ---- Sanatoriy Baykal | 1 Stantsiya Baykal | 1 Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + (12 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ------------------------------+------ + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + (2 rows) + + select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + (12 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ------------------------------+------ + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + (2 rows) + + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + ?column? | t + ----------+---------------------------------- + 0 | Kabankala + 0.25 | Kabankalan City Public Plaza + 0.416667 | Kabakala + 0.416667 | Abankala + 0.538462 | Kabikala + 0.625 | Ntombankala School + 0.642857 | Nehalla Bankalah Reserved Forest + (7 rows) + + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 Baykalikha | 0.857143 Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 Baykalovo | 0.857143 *************** select t,word_similarity('Baykal',t) as *** 25,31 **** Zabaykal | 0.714286 (20 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 --- 85,91 ---- Zabaykal | 0.714286 (20 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 34,40 **** Ntombankala School | 0.6 (4 rows) ! select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 --- 94,100 ---- Ntombankala School | 0.6 (4 rows) ! select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 *************** select t,word_similarity('Baykal',t) as *** 59,65 **** Zabaykal | 0.714286 (20 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 --- 119,125 ---- Zabaykal | 0.714286 (20 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 68,74 **** Ntombankala School | 0.6 (4 rows) ! select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; ?column? | t ----------+---------------------------------- 0 | Kabankala --- 128,134 ---- Ntombankala School | 0.6 (4 rows) ! select t <->>> 'Kabankala', t from test_trgm2 order by t <->>> 'Kabankala' limit 7; ?column? | t ----------+---------------------------------- 0 | Kabankala *************** select t,word_similarity('Baykal',t) as *** 96,101 **** --- 156,230 ---- Sanatoriy Baykal | 1 Stantsiya Baykal | 1 Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + (12 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ------------------------------+------ + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + (2 rows) + + select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + (12 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ------------------------------+------ + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + (2 rows) + + explain (costs off) + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + QUERY PLAN + ------------------------------------------------ + Limit + -> Index Scan using trgm_idx2 on test_trgm2 + Order By: (t <->> 'Kabankala'::text) + (3 rows) + + select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + ?column? | t + ----------+---------------------------------- + 0 | Kabankala + 0.25 | Kabankalan City Public Plaza + 0.416667 | Kabakala + 0.416667 | Abankala + 0.538462 | Kabikala + 0.625 | Ntombankala School + 0.642857 | Nehalla Bankalah Reserved Forest + (7 rows) + + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 Baykalikha | 0.857143 Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 Baykalovo | 0.857143 *************** select t,word_similarity('Baykal',t) as *** 107,113 **** Zabaykal | 0.714286 (20 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 --- 236,242 ---- Zabaykal | 0.714286 (20 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 116,122 **** Ntombankala School | 0.6 (4 rows) ! select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 --- 245,251 ---- Ntombankala School | 0.6 (4 rows) ! select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 *************** select t,word_similarity('Baykal',t) as *** 141,147 **** Zabaykal | 0.714286 (20 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 --- 270,276 ---- Zabaykal | 0.714286 (20 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 151,165 **** (4 rows) explain (costs off) ! select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; QUERY PLAN ------------------------------------------------ Limit -> Index Scan using trgm_idx2 on test_trgm2 ! Order By: (t <->> 'Kabankala'::text) (3 rows) ! select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; ?column? | t ----------+---------------------------------- 0 | Kabankala --- 280,294 ---- (4 rows) explain (costs off) ! select t <->>> 'Kabankala', t from test_trgm2 order by t <->>> 'Kabankala' limit 7; QUERY PLAN ------------------------------------------------ Limit -> Index Scan using trgm_idx2 on test_trgm2 ! Order By: (t <->>> 'Kabankala'::text) (3 rows) ! select t <->>> 'Kabankala', t from test_trgm2 order by t <->>> 'Kabankala' limit 7; ?column? | t ----------+---------------------------------- 0 | Kabankala *************** select t,word_similarity('Baykal',t) as *** 188,193 **** --- 317,370 ---- Sanatoriy Baykal | 1 Stantsiya Baykal | 1 Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + (12 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ------------------------------+------ + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + (2 rows) + + select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + (12 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ------------------------------+------ + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + (2 rows) + + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 Baykalikha | 0.857143 Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 Baykalovo | 0.857143 *************** select t,word_similarity('Baykal',t) as *** 199,205 **** Zabaykal | 0.714286 (20 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 --- 376,382 ---- Zabaykal | 0.714286 (20 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 208,214 **** Ntombankala School | 0.6 (4 rows) ! select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 --- 385,391 ---- Ntombankala School | 0.6 (4 rows) ! select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 *************** select t,word_similarity('Baykal',t) as *** 233,239 **** Zabaykal | 0.714286 (20 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 --- 410,416 ---- Zabaykal | 0.714286 (20 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; t | sml ------------------------------+----- Kabankala | 1 *************** select t,word_similarity('Baykal',t) as *** 257,262 **** --- 434,501 ---- Sanatoriy Baykal | 1 Stantsiya Baykal | 1 Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + Baykalovo | 0.545455 + Baykalsko | 0.545455 + Maloye Baykalovo | 0.545455 + Baykalikha | 0.5 + Baykalovsk | 0.5 + (17 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + Abankala | 0.583333 + Kabakala | 0.583333 + (4 rows) + + select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + Baykalovo | 0.545455 + Baykalsko | 0.545455 + Maloye Baykalovo | 0.545455 + Baykalikha | 0.5 + Baykalovsk | 0.5 + (17 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + Abankala | 0.583333 + Kabakala | 0.583333 + (4 rows) + + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 Baykalikha | 0.857143 Baykalo-Amurskaya Zheleznaya Doroga | 0.857143 Baykalovo | 0.857143 *************** select t,word_similarity('Baykal',t) as *** 271,277 **** Zabaykalovskiy | 0.571429 (23 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 --- 510,516 ---- Zabaykalovskiy | 0.571429 (23 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 282,288 **** Nehalla Bankalah Reserved Forest | 0.5 (6 rows) ! select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 --- 521,527 ---- Nehalla Bankalah Reserved Forest | 0.5 (6 rows) ! select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; t | sml -------------------------------------+---------- Baykal | 1 *************** select t,word_similarity('Baykal',t) as *** 310,316 **** Zabaykalovskiy | 0.571429 (23 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 --- 549,555 ---- Zabaykalovskiy | 0.571429 (23 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 323,328 **** --- 562,741 ---- set "pg_trgm.word_similarity_threshold" to 0.3; select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + Baykalovo | 0.545455 + Baykalsko | 0.545455 + Maloye Baykalovo | 0.545455 + Baykalikha | 0.5 + Baykalovsk | 0.5 + Zabaykal | 0.454545 + Air Bakal-kecil | 0.444444 + Bakal | 0.444444 + Bakal Batu | 0.444444 + Bakal Dos | 0.444444 + Bakal Julu | 0.444444 + Bakal Khel | 0.444444 + Bakal Lama | 0.444444 + Bakal Tres | 0.444444 + Bakal Uno | 0.444444 + Daang Bakal | 0.444444 + Desa Bakal | 0.444444 + Eat Bakal | 0.444444 + Gunung Bakal | 0.444444 + Sidi Bakal | 0.444444 + Stantsiya Bakal | 0.444444 + Sungai Bakal | 0.444444 + Talang Bakal | 0.444444 + Uruk Bakal | 0.444444 + Zaouia Oulad Bakal | 0.444444 + Baykalovskiy | 0.428571 + Baykalovskiy Rayon | 0.428571 + Baikal | 0.4 + Baikal Airfield | 0.4 + Baikal Business Centre | 0.4 + Baikal Hotel Moscow | 0.4 + Baikal Listvyanka Hotel | 0.4 + Baikal Mountains | 0.4 + Baikal Plaza | 0.4 + Bajkal | 0.4 + Bankal | 0.4 + Bankal School | 0.4 + Barkal | 0.4 + Jabal Barkal | 0.4 + Lake Baikal | 0.4 + Oulad el Bakkal | 0.4 + Sidi Mohammed Bakkal | 0.4 + Bay of Backaland | 0.375 + Boikalakalawa Bay | 0.375 + Waikalabubu Bay | 0.375 + Bairkal | 0.363636 + Bairkal Dhora | 0.363636 + Bairkal Jabal | 0.363636 + Batikal | 0.363636 + Bakaleyka | 0.307692 + Bakkalmal | 0.307692 + Bikal | 0.3 + (64 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; + t | sml + ----------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + Abankala | 0.583333 + Kabakala | 0.583333 + Kabikala | 0.461538 + Ntombankala School | 0.375 + Nehalla Bankalah Reserved Forest | 0.357143 + Jabba Kalai | 0.333333 + Kambakala | 0.333333 + Ker Samba Kalla | 0.333333 + Bankal | 0.307692 + Bankal School | 0.307692 + Kanampumba-Kalawa | 0.307692 + (13 rows) + + select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; + t | sml + -------------------------------------+---------- + Baykal | 1 + Boloto Baykal | 1 + Boloto Malyy Baykal | 1 + Kolkhoz Krasnyy Baykal | 1 + Ozero Baykal | 1 + Polevoy Stan Baykal | 1 + Port Baykal | 1 + Prud Novyy Baykal | 1 + Sanatoriy Baykal | 1 + Stantsiya Baykal | 1 + Zaliv Baykal | 1 + Baykalo-Amurskaya Zheleznaya Doroga | 0.666667 + Baykalovo | 0.545455 + Baykalsko | 0.545455 + Maloye Baykalovo | 0.545455 + Baykalikha | 0.5 + Baykalovsk | 0.5 + Zabaykal | 0.454545 + Air Bakal-kecil | 0.444444 + Bakal | 0.444444 + Bakal Batu | 0.444444 + Bakal Dos | 0.444444 + Bakal Julu | 0.444444 + Bakal Khel | 0.444444 + Bakal Lama | 0.444444 + Bakal Tres | 0.444444 + Bakal Uno | 0.444444 + Daang Bakal | 0.444444 + Desa Bakal | 0.444444 + Eat Bakal | 0.444444 + Gunung Bakal | 0.444444 + Sidi Bakal | 0.444444 + Stantsiya Bakal | 0.444444 + Sungai Bakal | 0.444444 + Talang Bakal | 0.444444 + Uruk Bakal | 0.444444 + Zaouia Oulad Bakal | 0.444444 + Baykalovskiy | 0.428571 + Baykalovskiy Rayon | 0.428571 + Baikal | 0.4 + Baikal Airfield | 0.4 + Baikal Business Centre | 0.4 + Baikal Hotel Moscow | 0.4 + Baikal Listvyanka Hotel | 0.4 + Baikal Mountains | 0.4 + Baikal Plaza | 0.4 + Bajkal | 0.4 + Bankal | 0.4 + Bankal School | 0.4 + Barkal | 0.4 + Jabal Barkal | 0.4 + Lake Baikal | 0.4 + Oulad el Bakkal | 0.4 + Sidi Mohammed Bakkal | 0.4 + Bay of Backaland | 0.375 + Boikalakalawa Bay | 0.375 + Waikalabubu Bay | 0.375 + Bairkal | 0.363636 + Bairkal Dhora | 0.363636 + Bairkal Jabal | 0.363636 + Batikal | 0.363636 + Bakaleyka | 0.307692 + Bakkalmal | 0.307692 + Bikal | 0.3 + (64 rows) + + select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + t | sml + ----------------------------------+---------- + Kabankala | 1 + Kabankalan City Public Plaza | 0.75 + Abankala | 0.583333 + Kabakala | 0.583333 + Kabikala | 0.461538 + Ntombankala School | 0.375 + Nehalla Bankalah Reserved Forest | 0.357143 + Jabba Kalai | 0.333333 + Kambakala | 0.333333 + Ker Samba Kalla | 0.333333 + Bankal | 0.307692 + Bankal School | 0.307692 + Kanampumba-Kalawa | 0.307692 + (13 rows) + + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; t | sml -----------------------------------------------------------+---------- Baykal | 1 *************** select t,word_similarity('Baykal',t) as *** 588,594 **** Urochishche Batkali | 0.3 (261 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 --- 1001,1007 ---- Urochishche Batkali | 0.3 (261 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 *************** select t,word_similarity('Kabankala',t) *** 682,688 **** Waikala | 0.3 (89 rows) ! select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; t | sml -----------------------------------------------------------+---------- Baykal | 1 --- 1095,1101 ---- Waikala | 0.3 (89 rows) ! select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; t | sml -----------------------------------------------------------+---------- Baykal | 1 *************** select t,word_similarity('Baykal',t) as *** 948,954 **** Urochishche Batkali | 0.3 (261 rows) ! select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 --- 1361,1367 ---- Urochishche Batkali | 0.3 (261 rows) ! select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; t | sml ----------------------------------+---------- Kabankala | 1 diff --git a/contrib/pg_trgm/pg_trgm--1.3--1.4.sql b/contrib/pg_trgm/pg_trgm--1.3--1.4.sql new file mode 100644 index ...98c3c55 *** a/contrib/pg_trgm/pg_trgm--1.3--1.4.sql --- b/contrib/pg_trgm/pg_trgm--1.3--1.4.sql *************** *** 0 **** --- 1,68 ---- + /* contrib/pg_trgm/pg_trgm--1.3--1.4.sql */ + + -- complain if script is sourced in psql, rather than via ALTER EXTENSION + \echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.4'" to load this file. \quit + + CREATE FUNCTION subset_similarity(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + + CREATE FUNCTION subset_similarity_op(text,text) + RETURNS bool + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold + + CREATE FUNCTION subset_similarity_commutator_op(text,text) + RETURNS bool + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT STABLE PARALLEL SAFE; -- stable because depends on pg_trgm.word_similarity_threshold + + CREATE OPERATOR <<% ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subset_similarity_op, + COMMUTATOR = '%>>', + RESTRICT = contsel, + JOIN = contjoinsel + ); + + CREATE OPERATOR %>> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subset_similarity_commutator_op, + COMMUTATOR = '<<%', + RESTRICT = contsel, + JOIN = contjoinsel + ); + + CREATE FUNCTION subset_similarity_dist_op(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + + CREATE FUNCTION subset_similarity_dist_commutator_op(text,text) + RETURNS float4 + AS 'MODULE_PATHNAME' + LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + + CREATE OPERATOR <<<-> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subset_similarity_dist_op, + COMMUTATOR = '<->>>' + ); + + CREATE OPERATOR <->>> ( + LEFTARG = text, + RIGHTARG = text, + PROCEDURE = subset_similarity_dist_commutator_op, + COMMUTATOR = '<<<->' + ); + + ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD + OPERATOR 9 %>> (text, text), + OPERATOR 10 <->>> (text, text) FOR ORDER BY pg_catalog.float_ops; + + ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD + OPERATOR 9 %>> (text, text); diff --git a/contrib/pg_trgm/pg_trgm.control b/contrib/pg_trgm/pg_trgm.control new file mode 100644 index 06f274f..3e325dd *** a/contrib/pg_trgm/pg_trgm.control --- b/contrib/pg_trgm/pg_trgm.control *************** *** 1,5 **** # pg_trgm extension comment = 'text similarity measurement and index searching based on trigrams' ! default_version = '1.3' module_pathname = '$libdir/pg_trgm' relocatable = true --- 1,5 ---- # pg_trgm extension comment = 'text similarity measurement and index searching based on trigrams' ! default_version = '1.4' module_pathname = '$libdir/pg_trgm' relocatable = true diff --git a/contrib/pg_trgm/sql/pg_word_trgm.sql b/contrib/pg_trgm/sql/pg_word_trgm.sql new file mode 100644 index 4b1db97..af9adda *** a/contrib/pg_trgm/sql/pg_word_trgm.sql --- b/contrib/pg_trgm/sql/pg_word_trgm.sql *************** select t,word_similarity('Baykal',t) as *** 8,13 **** --- 8,19 ---- select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; + select t <->>> 'Kabankala', t from test_trgm2 order by t <->>> 'Kabankala' limit 7; + create index trgm_idx2 on test_trgm2 using gist (t gist_trgm_ops); set enable_seqscan=off; *************** explain (costs off) *** 20,25 **** --- 26,40 ---- select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; select t <->> 'Kabankala', t from test_trgm2 order by t <->> 'Kabankala' limit 7; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; + + explain (costs off) + select t <->>> 'Kabankala', t from test_trgm2 order by t <->>> 'Kabankala' limit 7; + select t <->>> 'Kabankala', t from test_trgm2 order by t <->>> 'Kabankala' limit 7; + drop index trgm_idx2; create index trgm_idx2 on test_trgm2 using gin (t gin_trgm_ops); set enable_seqscan=off; *************** select t,word_similarity('Kabankala',t) *** 29,42 **** --- 44,74 ---- select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; + set "pg_trgm.word_similarity_threshold" to 0.5; + select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; + set "pg_trgm.word_similarity_threshold" to 0.3; + select t,word_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <% t order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <% t order by sml desc, t; select t,word_similarity('Baykal',t) as sml from test_trgm2 where t %> 'Baykal' order by sml desc, t; select t,word_similarity('Kabankala',t) as sml from test_trgm2 where t %> 'Kabankala' order by sml desc, t; + + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where 'Baykal' <<% t order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where 'Kabankala' <<% t order by sml desc, t; + select t,subset_similarity('Baykal',t) as sml from test_trgm2 where t %>> 'Baykal' order by sml desc, t; + select t,subset_similarity('Kabankala',t) as sml from test_trgm2 where t %>> 'Kabankala' order by sml desc, t; diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h new file mode 100644 index 45df918..88871e9 *** a/contrib/pg_trgm/trgm.h --- b/contrib/pg_trgm/trgm.h *************** *** 34,39 **** --- 34,41 ---- #define RegExpICaseStrategyNumber 6 #define WordSimilarityStrategyNumber 7 #define WordDistanceStrategyNumber 8 + #define SubsetSimilarityStrategyNumber 9 + #define SubsetDistanceStrategyNumber 10 typedef char trgm[3]; diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c new file mode 100644 index e4b3dae..dc914fd *** a/contrib/pg_trgm/trgm_gin.c --- b/contrib/pg_trgm/trgm_gin.c *************** gin_extract_query_trgm(PG_FUNCTION_ARGS) *** 90,95 **** --- 90,96 ---- { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: + case SubsetSimilarityStrategyNumber: trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val)); break; case ILikeStrategyNumber: *************** gin_trgm_consistent(PG_FUNCTION_ARGS) *** 187,192 **** --- 188,194 ---- { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: + case SubsetSimilarityStrategyNumber: nlimit = (strategy == SimilarityStrategyNumber) ? similarity_threshold : word_similarity_threshold; *************** gin_trgm_triconsistent(PG_FUNCTION_ARGS) *** 282,287 **** --- 284,290 ---- { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: + case SubsetSimilarityStrategyNumber: nlimit = (strategy == SimilarityStrategyNumber) ? similarity_threshold : word_similarity_threshold; diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c new file mode 100644 index e55dc19..0a7c854 *** a/contrib/pg_trgm/trgm_gist.c --- b/contrib/pg_trgm/trgm_gist.c *************** gtrgm_consistent(PG_FUNCTION_ARGS) *** 221,226 **** --- 221,227 ---- { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: + case SubsetSimilarityStrategyNumber: qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ); break; *************** gtrgm_consistent(PG_FUNCTION_ARGS) *** 290,297 **** { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: /* Similarity search is exact. Word similarity search is inexact */ ! *recheck = (strategy == WordSimilarityStrategyNumber); nlimit = (strategy == SimilarityStrategyNumber) ? similarity_threshold : word_similarity_threshold; --- 291,299 ---- { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: + case SubsetSimilarityStrategyNumber: /* Similarity search is exact. Word similarity search is inexact */ ! *recheck = (strategy != SimilarityStrategyNumber); nlimit = (strategy == SimilarityStrategyNumber) ? similarity_threshold : word_similarity_threshold; *************** gtrgm_distance(PG_FUNCTION_ARGS) *** 468,474 **** { case DistanceStrategyNumber: case WordDistanceStrategyNumber: ! *recheck = strategy == WordDistanceStrategyNumber; if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ --- 470,477 ---- { case DistanceStrategyNumber: case WordDistanceStrategyNumber: ! case SubsetDistanceStrategyNumber: ! *recheck = (strategy != DistanceStrategyNumber); if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c new file mode 100644 index f7e96ac..2a0d8f1 *** a/contrib/pg_trgm/trgm_op.c --- b/contrib/pg_trgm/trgm_op.c *************** PG_FUNCTION_INFO_V1(show_limit); *** 26,37 **** --- 26,42 ---- PG_FUNCTION_INFO_V1(show_trgm); PG_FUNCTION_INFO_V1(similarity); PG_FUNCTION_INFO_V1(word_similarity); + PG_FUNCTION_INFO_V1(subset_similarity); PG_FUNCTION_INFO_V1(similarity_dist); PG_FUNCTION_INFO_V1(similarity_op); PG_FUNCTION_INFO_V1(word_similarity_op); PG_FUNCTION_INFO_V1(word_similarity_commutator_op); PG_FUNCTION_INFO_V1(word_similarity_dist_op); PG_FUNCTION_INFO_V1(word_similarity_dist_commutator_op); + PG_FUNCTION_INFO_V1(subset_similarity_op); + PG_FUNCTION_INFO_V1(subset_similarity_commutator_op); + PG_FUNCTION_INFO_V1(subset_similarity_dist_op); + PG_FUNCTION_INFO_V1(subset_similarity_dist_commutator_op); /* Trigram with position */ typedef struct *************** typedef struct *** 40,45 **** --- 45,54 ---- int index; } pos_trgm; + /* Trigram bound status */ + #define TRGM_BOUND_LOWER (0x01) + #define TRGM_BOUND_UPPER (0x02) + /* * Module load callback */ *************** make_trigrams(trgm *tptr, char *str, int *** 235,245 **** * * trg: where to return the array of trigrams. * str: source string, of length slen bytes. * * Returns length of the generated array. */ static int ! generate_trgm_only(trgm *trg, char *str, int slen) { trgm *tptr; char *buf; --- 244,255 ---- * * trg: where to return the array of trigrams. * str: source string, of length slen bytes. + * bounds: where to return bound status of trigrams (if needed). * * Returns length of the generated array. */ static int ! generate_trgm_only(trgm *trg, char *str, int slen, uint8 *bounds) { trgm *tptr; char *buf; *************** generate_trgm_only(trgm *trg, char *str, *** 282,292 **** buf[LPADDING + bytelen] = ' '; buf[LPADDING + bytelen + 1] = ' '; ! /* ! * count trigrams ! */ tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING, charlen + LPADDING + RPADDING); } pfree(buf); --- 292,304 ---- buf[LPADDING + bytelen] = ' '; buf[LPADDING + bytelen + 1] = ' '; ! /* Calculate trigrams marking their bounds if needed */ ! if (bounds) ! bounds[tptr - trg] |= TRGM_BOUND_LOWER; tptr = make_trigrams(tptr, buf, bytelen + LPADDING + RPADDING, charlen + LPADDING + RPADDING); + if (bounds) + bounds[tptr - trg - 1] |= TRGM_BOUND_UPPER; } pfree(buf); *************** generate_trgm(char *str, int slen) *** 328,334 **** trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3); trg->flag = ARRKEY; ! len = generate_trgm_only(GETARR(trg), str, slen); SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); if (len == 0) --- 340,346 ---- trg = (TRGM *) palloc(TRGMHDRSIZE + sizeof(trgm) * (slen / 2 + 1) * 3); trg->flag = ARRKEY; ! len = generate_trgm_only(GETARR(trg), str, slen, NULL); SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len)); if (len == 0) *************** iterate_word_similarity(int *trg2indexes *** 424,437 **** int ulen1, int len2, int len, ! bool check_only) { int *lastpos, i, ulen2 = 0, count = 0, upper = -1, ! lower = -1; float4 smlr_cur, smlr_max = 0.0f; --- 436,450 ---- int ulen1, int len2, int len, ! bool check_only, ! uint8 *bounds) { int *lastpos, i, ulen2 = 0, count = 0, upper = -1, ! lower = bounds ? 0 : -1; float4 smlr_cur, smlr_max = 0.0f; *************** iterate_word_similarity(int *trg2indexes *** 457,463 **** } /* Adjust lower bound if this trigram is present in required substring */ ! if (found[trgindex]) { int prev_lower, tmp_ulen2, --- 470,476 ---- } /* Adjust lower bound if this trigram is present in required substring */ ! if (bounds ? (bounds[i] & TRGM_BOUND_UPPER) : found[trgindex]) { int prev_lower, tmp_ulen2, *************** iterate_word_similarity(int *trg2indexes *** 479,502 **** prev_lower = lower; for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++) { ! float smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2); int tmp_trgindex; ! if (smlr_tmp > smlr_cur) { ! smlr_cur = smlr_tmp; ! ulen2 = tmp_ulen2; ! lower = tmp_lower; ! count = tmp_count; ! } ! /* ! * if we only check that word similarity is greater than ! * pg_trgm.word_similarity_threshold we do not need to ! * calculate a maximum similarity. ! */ ! if (check_only && smlr_cur >= word_similarity_threshold) ! break; tmp_trgindex = trg2indexes[tmp_lower]; if (lastpos[tmp_trgindex] == tmp_lower) --- 492,519 ---- prev_lower = lower; for (tmp_lower = lower; tmp_lower <= upper; tmp_lower++) { ! float smlr_tmp; int tmp_trgindex; ! if (!bounds || (bounds[tmp_lower] & TRGM_BOUND_LOWER)) { ! smlr_tmp = CALCSML(tmp_count, ulen1, tmp_ulen2); ! if (smlr_tmp > smlr_cur) ! { ! smlr_cur = smlr_tmp; ! ulen2 = tmp_ulen2; ! lower = tmp_lower; ! count = tmp_count; ! } ! /* ! * if we only check that word similarity is greater than ! * pg_trgm.word_similarity_threshold we do not need to ! * calculate a maximum similarity. ! */ ! if (check_only && smlr_cur >= word_similarity_threshold) ! break; ! } tmp_trgindex = trg2indexes[tmp_lower]; if (lastpos[tmp_trgindex] == tmp_lower) *************** iterate_word_similarity(int *trg2indexes *** 549,560 **** * str2: text in which we are looking for a word, of length slen2 bytes. * check_only: if true then only check existence of similar search pattern in * text. * * Returns word similarity. */ static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, ! bool check_only) { bool *found; pos_trgm *ptrg; --- 566,578 ---- * str2: text in which we are looking for a word, of length slen2 bytes. * check_only: if true then only check existence of similar search pattern in * text. + * word_bounds: force bounds of extent to match word bounds. * * Returns word similarity. */ static float4 calc_word_similarity(char *str1, int slen1, char *str2, int slen2, ! bool check_only, bool word_bounds) { bool *found; pos_trgm *ptrg; *************** calc_word_similarity(char *str1, int sle *** 568,582 **** ulen1; int *trg2indexes; float4 result; protect_out_of_mem(slen1 + slen2); /* Make positional trigrams */ trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3); trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3); ! len1 = generate_trgm_only(trg1, str1, slen1); ! len2 = generate_trgm_only(trg2, str2, slen2); ptrg = make_positional_trgm(trg1, len1, trg2, len2); len = len1 + len2; --- 586,605 ---- ulen1; int *trg2indexes; float4 result; + uint8 *bounds; protect_out_of_mem(slen1 + slen2); /* Make positional trigrams */ trg1 = (trgm *) palloc(sizeof(trgm) * (slen1 / 2 + 1) * 3); trg2 = (trgm *) palloc(sizeof(trgm) * (slen2 / 2 + 1) * 3); + if (word_bounds) + bounds = (uint8 *) palloc0(sizeof(uint8) * (slen2 / 2 + 1) * 3); + else + bounds = NULL; ! len1 = generate_trgm_only(trg1, str1, slen1, NULL); ! len2 = generate_trgm_only(trg2, str2, slen2, bounds); ptrg = make_positional_trgm(trg1, len1, trg2, len2); len = len1 + len2; *************** calc_word_similarity(char *str1, int sle *** 622,628 **** /* Run iterative procedure to find maximum similarity with word */ result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len, ! check_only); pfree(trg2indexes); pfree(found); --- 645,651 ---- /* Run iterative procedure to find maximum similarity with word */ result = iterate_word_similarity(trg2indexes, found, ulen1, len2, len, ! check_only, bounds); pfree(trg2indexes); pfree(found); *************** word_similarity(PG_FUNCTION_ARGS) *** 1081,1087 **** res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! false); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); --- 1104,1126 ---- res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! false, true); ! ! PG_FREE_IF_COPY(in1, 0); ! PG_FREE_IF_COPY(in2, 1); ! PG_RETURN_FLOAT4(res); ! } ! ! Datum ! subset_similarity(PG_FUNCTION_ARGS) ! { ! text *in1 = PG_GETARG_TEXT_PP(0); ! text *in2 = PG_GETARG_TEXT_PP(1); ! float4 res; ! ! res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! false, false); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); *************** word_similarity_op(PG_FUNCTION_ARGS) *** 1117,1123 **** res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! true); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); --- 1156,1162 ---- res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! true, true); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); *************** word_similarity_commutator_op(PG_FUNCTIO *** 1133,1139 **** res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! true); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); --- 1172,1178 ---- res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! true, true); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); *************** word_similarity_dist_op(PG_FUNCTION_ARGS *** 1149,1155 **** res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! false); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); --- 1188,1194 ---- res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! false, true); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); *************** word_similarity_dist_commutator_op(PG_FU *** 1165,1171 **** res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! false); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1); --- 1204,1274 ---- res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! false, true); ! ! PG_FREE_IF_COPY(in1, 0); ! PG_FREE_IF_COPY(in2, 1); ! PG_RETURN_FLOAT4(1.0 - res); ! } ! ! Datum ! subset_similarity_op(PG_FUNCTION_ARGS) ! { ! text *in1 = PG_GETARG_TEXT_PP(0); ! text *in2 = PG_GETARG_TEXT_PP(1); ! float4 res; ! ! res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! true, false); ! ! PG_FREE_IF_COPY(in1, 0); ! PG_FREE_IF_COPY(in2, 1); ! PG_RETURN_BOOL(res >= word_similarity_threshold); ! } ! ! Datum ! subset_similarity_commutator_op(PG_FUNCTION_ARGS) ! { ! text *in1 = PG_GETARG_TEXT_PP(0); ! text *in2 = PG_GETARG_TEXT_PP(1); ! float4 res; ! ! res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! true, false); ! ! PG_FREE_IF_COPY(in1, 0); ! PG_FREE_IF_COPY(in2, 1); ! PG_RETURN_BOOL(res >= word_similarity_threshold); ! } ! ! Datum ! subset_similarity_dist_op(PG_FUNCTION_ARGS) ! { ! text *in1 = PG_GETARG_TEXT_PP(0); ! text *in2 = PG_GETARG_TEXT_PP(1); ! float4 res; ! ! res = calc_word_similarity(VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! false, false); ! ! PG_FREE_IF_COPY(in1, 0); ! PG_FREE_IF_COPY(in2, 1); ! PG_RETURN_FLOAT4(1.0 - res); ! } ! ! Datum ! subset_similarity_dist_commutator_op(PG_FUNCTION_ARGS) ! { ! text *in1 = PG_GETARG_TEXT_PP(0); ! text *in2 = PG_GETARG_TEXT_PP(1); ! float4 res; ! ! res = calc_word_similarity(VARDATA_ANY(in2), VARSIZE_ANY_EXHDR(in2), ! VARDATA_ANY(in1), VARSIZE_ANY_EXHDR(in1), ! false, false); PG_FREE_IF_COPY(in1, 0); PG_FREE_IF_COPY(in2, 1);