diff --git a/library/Class/Indexation.php b/library/Class/Indexation.php index a2c8146c63b7d155c460ba7c63c8782150b9d346..2f4d46abb5110724c433d222a2bd8ec94995f690 100644 --- a/library/Class/Indexation.php +++ b/library/Class/Indexation.php @@ -49,67 +49,67 @@ class Class_Indexation { function __construct() { // Lire formes rejetées - $this->articles=array("L'","LE ","LA ","LES ","UN ","UNE "); - $this->inclu=array("AN","AS","OR","U2","AI","LU","XO","DO","RE","MI","FA","SI","AC","DC","XX","B","C","D","E","F","G","H","I","J","K","M","P","Q","R","S","T","V","W","X","Y","Z","L","YU","UT","LI","OC","PI","ZU","WU","TO","OZ","ZZ","XX"); - $this->exclu = array("L","LE","LA","LES","UN","UNE","LES","DES","MES","TES","CES"); + $this->articles=array('L\'','LE ','LA ','LES ','UN ','UNE '); + $this->inclu=array('AN','AS','OR','U2','AI','LU','XO','DO','RE','MI','FA','SI','AC','DC','XX','B','C','D','E','F','G','H','I','J','K','M','P','Q','R','S','T','V','W','X','Y','Z','L','YU','UT','LI','OC','PI','ZU','WU','TO','OZ','ZZ','XX'); + $this->exclu = array('L','LE','LA','LES','UN','UNE','LES','DES','MES','TES','CES'); // Pluriels $this->pluriel=array( - array("AIL","AULX"), - array("AVAL","AVALS"), - array("BAIL","BAUX"), - array("BAL","BALS"), - array("BANAL","BANALS"), - array("BANCAL","BANCALS"), - array("BIJOU","BIJOUX"), - array("BLEU","BLEUS"), - array("CAILLOU","CAILLOUX"), - array("CAL","CALS"), - array("CARNAVAL","CARNAVALS"), - array("CEREMONIAL","CEREMONIALS"), - array("CHACAL","CHACALS"), - array("CHORAL","CHORALS"), - array("CHOU","CHOUX"), - array("CORAIL","CORAUX"), - array("DETAIL","DETAILS"), - array("EMAIL","EMAUX"), - array("EMEU","EMEUS"), - array("ETAL","ETALS"), - array("FATAL","FATALS"), - array("FESTIVAL","FESTIVALS"), - array("GEMMAIL","GEMMAUX"), - array("GENOU","GENOUX"), - array("HIBOU","HIBOUX"), - array("JOUJOU","JOUJOUX"), - array("LANDAU","LANDAUX"), - array("NATAL","NATALS"), - array("OEIL","YEUX"), - array("PAL","PALS"), - array("PNEU","PNEUS"), - array("POU","POUX"), - array("RECITAL","RECITALS"), - array("REGAL","REGALS"), - array("SARRAU","SARRAUS"), - array("SOUPIRAIL","SOUPIRAUX"), - array("TONAL","TONALS"), - array("TRAVAIL","TRAVAUX"), - array("VAL","VALS"), - array("VENTAIL","VENTAUX"), - array("VIRGINAL","VIRGINALS"), - array("VITRAIL","VITRAUX"), - array("*EAU","*EAUX"), - array("*AL","*AUX"), - array("*EU","*EUX"), - array("*AU","*AUX") + array('AIL','AULX'), + array('AVAL','AVALS'), + array('BAIL','BAUX'), + array('BAL','BALS'), + array('BANAL','BANALS'), + array('BANCAL','BANCALS'), + array('BIJOU','BIJOUX'), + array('BLEU','BLEUS'), + array('CAILLOU','CAILLOUX'), + array('CAL','CALS'), + array('CARNAVAL','CARNAVALS'), + array('CEREMONIAL','CEREMONIALS'), + array('CHACAL','CHACALS'), + array('CHORAL','CHORALS'), + array('CHOU','CHOUX'), + array('CORAIL','CORAUX'), + array('DETAIL','DETAILS'), + array('EMAIL','EMAUX'), + array('EMEU','EMEUS'), + array('ETAL','ETALS'), + array('FATAL','FATALS'), + array('FESTIVAL','FESTIVALS'), + array('GEMMAIL','GEMMAUX'), + array('GENOU','GENOUX'), + array('HIBOU','HIBOUX'), + array('JOUJOU','JOUJOUX'), + array('LANDAU','LANDAUX'), + array('NATAL','NATALS'), + array('OEIL','YEUX'), + array('PAL','PALS'), + array('PNEU','PNEUS'), + array('POU','POUX'), + array('RECITAL','RECITALS'), + array('REGAL','REGALS'), + array('SARRAU','SARRAUS'), + array('SOUPIRAIL','SOUPIRAUX'), + array('TONAL','TONALS'), + array('TRAVAIL','TRAVAUX'), + array('VAL','VALS'), + array('VENTAIL','VENTAUX'), + array('VIRGINAL','VIRGINALS'), + array('VITRAIL','VITRAUX'), + array('*EAU','*EAUX'), + array('*AL','*AUX'), + array('*EU','*EUX'), + array('*AU','*AUX') ); // Init table ascii pour majuscules - $this->tableMajTo = str_split(str_repeat( " ", 42 ) - . "* 0123456789 " - . "ABCDEFGHIJKLMNOPQRSTUVWXYZ " - . "ABCDEFGHIJKLMNOPQRSTUVWXYZ " - . str_repeat( " ", 63) - ."AAAAAAACEEEEIIII NOOOOO UUUUY AAAAAAACEEEEIIII NOOOOO UUUUY Y"); + $this->tableMajTo = str_split(str_repeat( ' ', 42 ) + . '* 0123456789 ' + . 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' + . 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' + . str_repeat( ' ', 63) + .'AAAAAAACEEEEIIII NOOOOO UUUUY AAAAAAACEEEEIIII NOOOOO UUUUY Y'); for($i=0; $i<count($this->tableMajTo); $i++) $this->tableMajFrom[] = chr($i); @@ -140,14 +140,14 @@ class Class_Indexation { // Pluriel $etoile = ''; - if(strRight($mot,1) == "*") { - $etoile="*"; + if(strRight($mot,1) == '*') { + $etoile='*'; $mot=strLeft($mot, strlen($mot)-1); } $m = $this->getPluriel($mot); $m[2] = $this->phonetix($m[0]); - return trim("(".$m[0].$etoile." ".$m[1]." ".$m[2].")"); + return trim('('.$m[0].$etoile.' '.$m[1].' '.$m[2].')'); } @@ -160,22 +160,22 @@ class Class_Indexation { public function getClefAlpha($type_doc,$titre,$complement_titre,$auteur,$tome,$editeur,$annee) { - $clef=$this->getClefOeuvre($titre,$complement_titre,$auteur,$tome)."-"; - $clef.=substr($this->alphaMaj(str_replace(" ","",$editeur)),0,80)."-"; - $clef.=$annee."-"; + $clef=$this->getClefOeuvre($titre,$complement_titre,$auteur,$tome).'-'; + $clef.=substr($this->alphaMaj(str_replace(' ','',$editeur)),0,80).'-'; + $clef.=$annee.'-'; $clef.=$type_doc; - $clef=str_replace(" ","",$clef); + $clef=str_replace(' ','',$clef); return $clef; } // Rend une clef alpha pour les oeuvres public function getClefOeuvre($titre,$complement_titre,$auteur,$tome) { - $clef = substr($this->codeAlphaTitre(strtoupper(str_replace(" ","",$titre))),0,80)."-"; - $clef.=substr($this->codeAlphaTitre(strtoupper(str_replace(" ","",$complement_titre))),0,20)."-"; - $clef.=substr($this->alphaMaj(str_replace(" ","",$auteur)),0,80)."-"; + $clef = substr($this->codeAlphaTitre(strtoupper(str_replace(' ','',$titre))),0,80).'-'; + $clef.=substr($this->codeAlphaTitre(strtoupper(str_replace(' ','',$complement_titre))),0,20).'-'; + $clef.=substr($this->alphaMaj(str_replace(' ','',$auteur)),0,80).'-'; $clef.=$this->alphaMaj($tome); - $clef=str_replace(" ","",$clef); + $clef=str_replace(' ','',$clef); return $clef; } @@ -183,9 +183,9 @@ class Class_Indexation { public function getMots( $chaine ) { $new=array(); - $chaine = str_replace( ".", "", $chaine); + $chaine = str_replace( '.', '', $chaine); $chaine = trim($this->alphaMaj($chaine)); - $mot = explode( " ", $chaine ); + $mot = explode( ' ', $chaine ); $index = 0; for( $i = 0; $i < count($mot); $i++) { @@ -193,7 +193,7 @@ class Class_Indexation { if( strLen($mot[$i]) < 3 And intVal($mot[$i])==false) { if( in_array( $mot[$i], $this->inclu) == false) continue; - if(strlen($mot[$i])==1) $mot[$i].="00"; // mots d'1 lettre : on double la lettre + if(strlen($mot[$i])==1) $mot[$i].='00'; // mots d'1 lettre : on double la lettre } // Retirer mots vides if( in_array($mot[$i], $this->exclu ) == true ) continue; @@ -205,21 +205,21 @@ class Class_Indexation { // Rend une chaine de mots dedoublonnes et filtres public function getFulltext($data) { - if (gettype($data) != "array") + if (gettype($data) != 'array') $data=array($data); - $new=" "; + $new=' '; foreach($data as $chaine) { $mots=$this->getMots($chaine); foreach($mots as $mot) { - $mot=" ".$mot." "; + $mot=' '.$mot.' '; if(strpos($new,$mot) === false ) { - $new.=trim($mot)." "; - $phonem=" ".$this->phonetix(trim($mot))." "; - if($phonem and strpos($new,$phonem) === false ) $new.=trim($phonem)." "; + $new.=trim($mot).' '; + $phonem=' '.$this->phonetix(trim($mot)).' '; + if($phonem and strpos($new,$phonem) === false ) $new.=trim($phonem).' '; } } } @@ -233,15 +233,15 @@ class Class_Indexation { if(!trim($mot)) return false; foreach($this->pluriel as $regle) { - if(strLeft($regle[0],1) != "*") + if(strLeft($regle[0],1) != '*') { if($mot==$regle[0] or $mot==$regle[1]) return $regle; else continue; } - $regle[0]=str_replace("*","",$regle[0]); - $regle[1]=str_replace("*","",$regle[1]); - $pattern_singulier = "(".$regle[0]."$)"; - $pattern_pluriel="(".$regle[1]."$)"; + $regle[0]=str_replace('*','',$regle[0]); + $regle[1]=str_replace('*','',$regle[1]); + $pattern_singulier = '('.$regle[0].'$)'; + $pattern_pluriel='('.$regle[1].'$)'; $pluriel=ereg_replace($pattern_singulier, $regle[1], $mot); $singulier=ereg_replace($pattern_pluriel, $regle[0], $mot); @@ -250,8 +250,8 @@ class Class_Indexation { // Si inchangé on ajoute le S if($singulier == $pluriel) { - if(strRight($mot,1)=="S") {$pluriel=$singulier; $singulier=strLeft($singulier,strlen($singulier)-1);} - else $pluriel=$singulier."S"; + if(strRight($mot,1)=='S') {$pluriel=$singulier; $singulier=strLeft($singulier,strlen($singulier)-1);} + else $pluriel=$singulier.'S'; } return array($singulier,$pluriel); } @@ -273,9 +273,7 @@ class Class_Indexation { return $this->phonetixCompute($sIn); } - function phonetixCompute($sIn) - { - //$sIn = utf8_decode($sIn); // Selon votre implémentation, vous aurez besoin de décoder ce qui arrive pour les caractères spéciaux + function phonetixCompute($sIn) { $sIn = strtr( $sIn, $this->accents); // minuscules accentuées ou composées en majuscules simples $sIn = strtr( $sIn, $this->min2maj); // majuscules accentuées ou composées en majuscules simples $sIn = strtoupper( $sIn ); // on passe tout le reste en majuscules @@ -291,23 +289,23 @@ class Class_Indexation { $sIn = preg_replace( '`(.)\1`', '$1', $sIn ); // supression des répétitions // quelques cas particuliers - if ($sIn=="CD") return($sIn); - if ($sIn=="BD") return($sIn); - if ($sIn=="BV") return($sIn); - if ($sIn=="TABAC") return("TABA"); - if ($sIn=="FEU") return("FE"); - if ($sIn=="FE") return($sIn); - if ($sIn=="FER") return($sIn); - if ($sIn=="FIEF") return($sIn); - if ($sIn=="FJORD") return($sIn); - if ($sIn=="GOAL") return("GOL"); - if ($sIn=="FLEAU") return("FLEO"); - if ($sIn=="HIER") return("IER"); - if ($sIn=="HEU") return("E"); - if ($sIn=="HE") return("E"); - if ($sIn=="OS") return($sIn); - if ($sIn=="RIZ") return("RI"); - if ($sIn=="RAZ") return("RA"); + if ($sIn=='CD') return($sIn); + if ($sIn=='BD') return($sIn); + if ($sIn=='BV') return($sIn); + if ($sIn=='TABAC') return('TABA'); + if ($sIn=='FEU') return('FE'); + if ($sIn=='FE') return($sIn); + if ($sIn=='FER') return($sIn); + if ($sIn=='FIEF') return($sIn); + if ($sIn=='FJORD') return($sIn); + if ($sIn=='GOAL') return('GOL'); + if ($sIn=='FLEAU') return('FLEO'); + if ($sIn=='HIER') return('IER'); + if ($sIn=='HEU') return('E'); + if ($sIn=='HE') return('E'); + if ($sIn=='OS') return($sIn); + if ($sIn=='RIZ') return('RI'); + if ($sIn=='RAZ') return('RA'); // pré-traitements $sIn = preg_replace( '`OIN[GT]$`', 'OIN', $sIn ); // terminaisons OING -> OIN @@ -471,7 +469,7 @@ class Class_Indexation { $sIn = str_replace( 'PTIE', 'TIE', $sIn ); // retouche PTIE -> TIE $sIn = str_replace( 'GT', 'T', $sIn ); // retouche GT -> T $sIn = str_replace( 'ANKIEM', 'ANKILEM', $sIn ); // retouche tranquillement - $sIn = preg_replace( "`(LO|RE)KEMAN`", "$1KAMAN", $sIn ); // KEMAN -> KAMAN + $sIn = preg_replace( '`(LO|RE)KEMAN`', '$1KAMAN', $sIn ); // KEMAN -> KAMAN $sIn = preg_replace( '`NT(B|M)`', 'N$1', $sIn ); // retouche TB -> B TM -> M $sIn = str_replace( 'GSU', 'SU', $sIn ); // retouche GS -> SU $sIn = str_replace( 'ESD', 'ED', $sIn ); // retouche ESD -> ED @@ -563,10 +561,10 @@ class Class_Indexation { if (strlen($sIn)<2) { // Sigles ou abréviations - if (preg_match("`[BCDFGHJKLMNPQRSTVWXYZ][BCDFGHJKLMNPQRSTVWXYZ][BCDFGHJKLMNPQRSTVWXYZ][BCDFGHJKLMNPQRSTVWXYZ]*`",$sBack)) + if (preg_match('`[BCDFGHJKLMNPQRSTVWXYZ][BCDFGHJKLMNPQRSTVWXYZ][BCDFGHJKLMNPQRSTVWXYZ][BCDFGHJKLMNPQRSTVWXYZ]*`',$sBack)) return($sBack); - if (preg_match("`[RFMLVSPJDF][AEIOU]`",$sBack)) + if (preg_match('`[RFMLVSPJDF][AEIOU]`',$sBack)) { if (strlen($sBack)==3) return(substr($sBack,0,2));// mots de trois lettres supposés simples