From 5df34b5165fee209b28cd9f268673f2a4fec06d9 Mon Sep 17 00:00:00 2001 From: llaffont <llaffont@afi-sa.fr> Date: Tue, 18 Oct 2016 17:16:03 +0200 Subject: [PATCH] hotline #43215 fix title normalization --- Linse.php | 17 +++-------------- Linse/Converter.php | 6 +++++- Linse/Iso5426Converter.php | 2 +- tests/LinseTest.php | 24 ++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/Linse.php b/Linse.php index de15a55..0d3fc00 100644 --- a/Linse.php +++ b/Linse.php @@ -23,7 +23,6 @@ class Linse { protected $_converter; - private $articles; // Articles rejetes private $inclu; // Mots inclus private $exclu; // Mots vides private $pluriel; // Règles des pluriels @@ -67,8 +66,6 @@ class Linse { public function __construct() { - $this->articles = ['L\'','LE ','LA ','LES ','UN ','UNE ']; - $this->inclu = ['AN','AS','OR','U2','AI','LU','XO','DO','RE','MI','FA','SI','AC','DC','XX','B','C','D','E','F','G','H','I','J','K','M','P','Q','R','S','T','V','W','X','Y','Z','L','YU','UT','LI','OC','PI','ZU','WU','TO','OZ','ZZ','XX', 'PC', 'DS']; $this->exclu = ['L','LE','LA','LES','UN','UNE','LES','DES','MES','TES','CES']; @@ -126,17 +123,9 @@ class Linse { public function codeAlphaTitre($titre) { - $titre = $this->alphaMaj($titre); - foreach($this->articles as $article) { - $lg = strlen($article); - if(strLeft($titre, $lg)==$article) { - $titre = strMid($titre,$lg,256); - break; - } - } - - $titre = $this->alphaMaj($titre); - return $titre; + return preg_replace('/^((L|LE|LA|LES|UN|UNE)\s)/', + '', + $this->alphaMaj($titre)); } diff --git a/Linse/Converter.php b/Linse/Converter.php index f71b6a3..26ae5a5 100644 --- a/Linse/Converter.php +++ b/Linse/Converter.php @@ -39,6 +39,10 @@ class Linse_Converter { public function toIndexable($data) { - return trim(str_replace($this->_ascii_map, $this->_ascii_to_uppercase, $data)); + return preg_replace('/\s+/', + ' ', + trim(str_replace($this->_ascii_map, + $this->_ascii_to_uppercase, + $data))); } } \ No newline at end of file diff --git a/Linse/Iso5426Converter.php b/Linse/Iso5426Converter.php index 8133961..c49e8f6 100644 --- a/Linse/Iso5426Converter.php +++ b/Linse/Iso5426Converter.php @@ -22,7 +22,7 @@ class Linse_Iso5426Converter extends Linse_Converter { public function toIndexable($data) { - $data = str_replace([chr(136), chr(137)], '', $data); // Les delimiteurs d'article bnf + $data = preg_replace('/\210.+\211/', '', $data); // Les delimiteurs d'article bnf NSB NSE $result = ''; $len = strlen($data); diff --git a/tests/LinseTest.php b/tests/LinseTest.php index a3ef518..939cb42 100644 --- a/tests/LinseTest.php +++ b/tests/LinseTest.php @@ -70,4 +70,28 @@ class LinseTest extends PHPUnit_Framework_TestCase { public function ethShouldBeHandled() { $this->assertEquals('D', $this->_model->alphaMaj('ð')); } + + + public function titlesAndAlpha() { + return [ + [ 'Norvege, Islande', 'NORVEGE ISLANDE' ], + [ 'L\'ame divisee' , 'AME DIVISEE' ], + [ 'Le velo de Jojo', 'VELO DE JOJO' ], + [ 'Les tontons flingueurs', 'TONTONS FLINGUEURS' ], + [ 'La voiture de la maison', 'VOITURE DE LA MAISON' ], + [ 'Un dimanche', 'DIMANCHE' ], + [ 'Un voyage a la mer', 'VOYAGE A LA MER' ], + [ chr(136) . 'Joseph Canteloube. ' . chr(137) . 'Anthologie des chants populaires franco-canadiens . [A 1 v.]', + 'ANTHOLOGIE DES CHANTS POPULAIRES FRANCO-CANADIENS'], + ]; + } + + /** + * @dataProvider titlesAndAlpha + * @test + */ + public function alphaTitleShouldAnswer($title, $alpha) { + $this->assertEquals($alpha, + $this->_model->beForIso5426()->codeAlphaTitre($title)); + } } \ No newline at end of file -- GitLab