diff --git a/VERSIONS_HOTLINE/209619 b/VERSIONS_HOTLINE/209619 new file mode 100644 index 0000000000000000000000000000000000000000..444f3823bb91d1694c22550fff255db9f406eafc --- /dev/null +++ b/VERSIONS_HOTLINE/209619 @@ -0,0 +1 @@ + - correctif #209619 : Recherche : Indexation des charactères spéciaux pour éviter de séparer les mots en deux. \ No newline at end of file diff --git a/library/Class/CharSet.php b/library/Class/CharSet.php index 83d2f46a89857499619abf592f9ce3d0647880ac..44099157ca1382b916351b0670ffe12a1b742eb2 100644 --- a/library/Class/CharSet.php +++ b/library/Class/CharSet.php @@ -45,4 +45,14 @@ class Class_CharSet { return iconv('UTF-8', 'ISO-8859-1', ($words ?? '')); } + + + public static function tryConvertToUtf8(string $words): string + { + $words = Normalizer::normalize($words); + + return preg_match('`&[A-Za-z]+;`', $words) + ? mb_convert_encoding($words, 'UTF-8', 'HTML-ENTITIES') + : $words; + } } diff --git a/library/Class/Indexation.php b/library/Class/Indexation.php index dffef5ace82e7d48eed4af1c75072173aa562c72..2bae9f684593ea3675e6bc807538378726c36962 100644 --- a/library/Class/Indexation.php +++ b/library/Class/Indexation.php @@ -129,21 +129,6 @@ class Class_Indexation { 'Ž' => [self::INDEXATION => 'Z', self::PHONETIX => 'Z'] ]; - protected static array $_html_conv = ['â' => 'a', - 'à' => 'a', - 'é' => 'e', - 'ê' => 'e', - 'è' => 'e', - 'ë' => 'e', - 'î' => 'i', - 'ï' => 'i', - 'ô' => 'o', - 'œ' => 'oe', - 'û' => 'u', - 'ù' => 'u', - 'ü' => 'u', - 'ç' => 'c']; - protected static $_instance; protected static array $_alpha_maj_cache = []; @@ -390,18 +375,14 @@ class Class_Indexation { if ('' === $expression) return ''; - $expression = str_replace(array_keys(static::$_html_conv), - array_values(static::$_html_conv), - $expression); - $expression = preg_replace('/&[A-Za-z]+;/i', ' ', $expression); + $expression = Class_CharSet::tryConvertToUtf8($expression); $expression = str_replace(array_keys(static::$_min_to_maj), array_map(fn($values) => $values[$key], static::$_min_to_maj), $expression); - $expression = Class_CharSet::fromISOtoUTF8(strtoupper($expression)); - $expression = preg_replace('/[^A-Z0-9]/', ' ', $expression); + $expression = preg_replace('/[^A-Z0-9]/', ' ', strtoupper($expression)); return trim(preg_replace('/\s+/', ' ', $expression)); } diff --git a/tests/library/Class/Indexation/SpecialFormatTest.php b/tests/library/Class/Indexation/SpecialFormatTest.php new file mode 100644 index 0000000000000000000000000000000000000000..e5f7debd302b76ac972218dfaeb7f8a2d83fba41 --- /dev/null +++ b/tests/library/Class/Indexation/SpecialFormatTest.php @@ -0,0 +1,32 @@ +<?php +/** + * Copyright (c) 2012-2025, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Class_Indexation_SpecialFormatTest extends ModelTestCase +{ + + /** @test */ + public function withFormatUnicodeShouldConvertAccentWithoutEmptyChar() + { + $chaine = file_get_contents(realpath(__DIR__) . '/unicode_format.txt'); + $this->assertEquals('MATERIALITE', (new Class_Indexation)->alphaMaj($chaine)); + } +} diff --git a/tests/library/Class/Indexation/unicode_format.txt b/tests/library/Class/Indexation/unicode_format.txt new file mode 100644 index 0000000000000000000000000000000000000000..f731ea884adcea1b52f48fe36ab74734c3f0b655 --- /dev/null +++ b/tests/library/Class/Indexation/unicode_format.txt @@ -0,0 +1 @@ +mateÌrialiteÌ diff --git a/tests/library/Class/MoteurRechercheTest.php b/tests/library/Class/MoteurRechercheTest.php index 514937445fa9aaea97fabfae75f10fde55141de0..900995dba80880571825315d11936caeaf008728 100644 --- a/tests/library/Class/MoteurRechercheTest.php +++ b/tests/library/Class/MoteurRechercheTest.php @@ -481,7 +481,7 @@ class MoteurRechercheSimpleWithOtherIndexFieldsTest extends MoteurRechercheSimpl * * @test */ - public function lancerRechercheSimpleShouldBe(array $params, string $sql) + public function lancerRechercheSimpleBakounineShouldBe(array $params, string $sql) { $this->mockReqProfilAndZone($params); @@ -1082,7 +1082,7 @@ class MoteurRechercheCountWordsTest extends MoteurRechercheTestCase * * @test */ - public function lancerRechercheSimpleShouldBe(array $params, int $count_words) + public function lancerRechercheSimpleCountWordsShouldBe(array $params, int $count_words) { $this->mockReqProfilAndZone($params);