diff --git a/FEATURES/55588 b/FEATURES/55588 new file mode 100644 index 0000000000000000000000000000000000000000..87bd4b6a894244a320215022d3c830f062991360 --- /dev/null +++ b/FEATURES/55588 @@ -0,0 +1,10 @@ + '55588' => + ['Label' => $this->_('Indexation en texte intégral des documents liés (pdf, html)'), + 'Desc' => $this->_('Bokeh permet d\'indexer les fichiers décrits dans une zone des notices et présents dans sur le serveur d\'hébergement'), + 'Image' => '', + 'Video' => '', + 'Category' => '', + 'Right' => function($feature_description, $user) {return true;}, + 'Wiki' => 'http://wiki.bokeh-library-portal.org/index.php?title=Indexation_de_fichiers_li%C3%A9s_aux_notices', + 'Test' => '', + 'Date' => '2019-07-19'], \ No newline at end of file diff --git a/VERSIONS_WIP/55588 b/VERSIONS_WIP/55588 new file mode 100644 index 0000000000000000000000000000000000000000..9b1ab1393cbd92d7ebad4c9ff8b92baf5ed8b16f --- /dev/null +++ b/VERSIONS_WIP/55588 @@ -0,0 +1 @@ + - ticket #55588 : Intégration : Indexation en texte intégral des documents liés aux notices (pdf, html) \ No newline at end of file diff --git a/cosmogramme/cosmozend/tests/application/modules/cosmo/controllers/DataProfileControllerTest.php b/cosmogramme/cosmozend/tests/application/modules/cosmo/controllers/DataProfileControllerTest.php index bdcfdc9d61191bd3893c37c9ed287e954b00e074..07e6fed3bb9c2ed0d56cc176cafcef88612b90bf 100644 --- a/cosmogramme/cosmozend/tests/application/modules/cosmo/controllers/DataProfileControllerTest.php +++ b/cosmogramme/cosmozend/tests/application/modules/cosmo/controllers/DataProfileControllerTest.php @@ -358,6 +358,18 @@ class Cosmo_DataProfileControllerEditUnimarcKohaTest extends Cosmo_DataProfileCo } + /** @test */ + public function multiInputIndexFilesShouldBePresent() { + $this->assertXPath('//form//div[@id="multi_inputs_index_files"]'); + } + + + /** @test */ + public function inputIndexFileUriRegexDefaultValueShouldBeUserfilesFilesStarPDF() { + $this->assertXPathContentContains('//script', '"index_file_uri_regex":["\/userfiles\/files\/[a-zA-Z0-9_\\\-]+\\\.pdf"]'); + } + + /** @test */ public function selectRejetPeriodiquesShoudHaveNoSelected() { $this->assertXPathContentContains('//form//select[@id="rejet_periodiques"]/option[@value="0"][@selected]', 'Non'); @@ -431,6 +443,9 @@ class Cosmo_DataProfileControllerPostEditUnimarcKohaFileFormatTest extends Cosmo 'new'], 'interest_zone' => [0 => '932'], 'interest_champ' => [0 => '7'], + 'index_file_zone' => ['856'], + 'index_file_field' => ['u'], + 'index_file_uri_regex' => ['/userfiles/files/public/.+.pdf'], 'holds' => 'SUPPORT; SUPPORT', 'carts' => 'LIBELLE; ROLE', 'csv_item_fields' => 'ean;ean', @@ -549,10 +564,29 @@ class Cosmo_DataProfileControllerPostEditUnimarcKohaFileFormatTest extends Cosmo public function interestFieldShouldBe7() { $this->assertEquals('7', $this->_koha->getInterestField()); } + + + /** @test */ + public function indexFileZoneShouldBe856() { + $this->assertEquals('856', $this->_koha->getIndexFileZone()); + } + + + /** @test */ + public function indexFileFieldShouldBeU() { + $this->assertEquals('u', $this->_koha->getIndexFileField()); + } + + + /** @test */ + public function indexFildUriRegexShouldBeUserfilesPublic() { + $this->assertEquals('/userfiles/files/public/.+.pdf', $this->_koha->getIndexFileUriRegex()); + } } + class Cosmo_DataProfileControllerPostEditFieldsTest extends Cosmo_DataProfileControllerTestCase { protected $_nanook_profile; @@ -1110,7 +1144,10 @@ class Cosmo_DataProfileControllerMultiValuesTest extends Cosmo_DataProfileContro 4 => [], 5 => [], 6 => ['zone' => '995', - 'champ' => 'z'] + 'champ' => 'z'], + 7 => ['index_file_zone' => ['934'], + 'index_file_field' => ['u'], + 'index_file_uri_regex' => ['/.*']] ]]); $this->dispatch('cosmo/data-profile/edit/id/321', true); @@ -1146,6 +1183,15 @@ class Cosmo_DataProfileControllerMultiValuesTest extends Cosmo_DataProfileContro $this->assertXPathContentContains('//script', 'values:{"1_label":["am","bm","","em","mm","","","","","","",""],"1_zone":["BDA","BDJ","LFA","LFJ","LDA","LDJ","LCDA","LCDJ","PATIMP","PATMS","PERIP","PATINC"]}', $this->_response->getBody()); } + + + /** @test */ + public function indexFileShouldContainsZone934FieldURegexStar() { + $this->assertXPathContentContains('//script', 'values:{"index_file_zone":["934"],"index_file_field":["u"],"index_file_uri_regex":["\/.*"]}', + $this->_response->getBody()); + } + + } diff --git a/cosmogramme/php/classes/classe_notice_integration.php b/cosmogramme/php/classes/classe_notice_integration.php index 180443e837fa3cdda08c18c7618badf4129c8153..f5ab6bbc33e5206816846855835e3171f8ddcee5 100644 --- a/cosmogramme/php/classes/classe_notice_integration.php +++ b/cosmogramme/php/classes/classe_notice_integration.php @@ -512,6 +512,7 @@ class notice_integration { 'collection' => $this->indexation->getfullText($this->notice['collection']), 'matieres' => $this->indexation->getfullText($this->_getFulltextSubjects()), 'dewey' => $this->indexation->getfullText($this->notice['full_dewey']), + 'file_content' => $this->notice['file_content'], 'facettes' => $this->notice['facettes'], 'isbn' => $this->notice['isbn'], 'ean' => $this->notice['ean'], diff --git a/cosmogramme/php/classes/classe_profil_donnees.php b/cosmogramme/php/classes/classe_profil_donnees.php index e5beeeb933cb2812b3c19bf9300fc98421272880..c6f51ca655322388bdeae9a4ac94a437da73ea2e 100644 --- a/cosmogramme/php/classes/classe_profil_donnees.php +++ b/cosmogramme/php/classes/classe_profil_donnees.php @@ -20,283 +20,274 @@ */ class profil_donnees { - const - HOMOGENIZATION_ISBN = -1, - HOMOGENIZATION_EAN = -2; - - protected static - $_profil_cache = [], - $_type_docs_cache = null; - - private $id_profil; // Id sgbd - private $libelle; // Libellé du profil - private $rejet_periodiques; // Rejet des periodiques dans les imports - private $id_article_periodique; // Mode de reconnaissance pour les articles de periodiques - private $accents=0; // Types de caractères accentués - private $type_fichier=0; // Type de fichier à parser (variable type_fichier) - private $format; // Format du fichier à parser (variable import_format) - private $attributs; // Bloc de donnees associe au format - - protected $_to_array=[]; //legacy refactoring @see getProfil - - - public static function clearCache() { - static::$_profil_cache = []; - static::$_type_docs_cache = null; - } - - public static function find($id_profil) { - if (isset(static::$_profil_cache[$id_profil])) - return static::$_profil_cache[$id_profil]; - - $profil = new static(); - $profil->lire($id_profil); - - return static::$_profil_cache[$profil->getId()] = $profil; - } - - - public function getId() { - return $this->id_profil; - } - - - public function lire($id_profil) { - if (!static::$_type_docs_cache) - static::$_type_docs_cache = Class_TypeDoc::findAll(); - - $all_type_docs = static::$_type_docs_cache; - - // Profils d'homogeneisation : -1= isbn -2=ean - if ($id_profil < 0) { - $this->getProfilStandard($id_profil); - return $id_profil; - } - - if (!$data = Class_IntProfilDonnees::find($id_profil)) { - if ($data = Class_IntProfilDonnees::find(1)) { - $this->lire(1); - $this->id_profil=0; - $this->libelle="** nouveau profil **"; - return 0; - } - - // Sinon on initialise a vide - $this->id_profil=0; - $this->libelle="** nouveau profil **"; - $this->accents=0; - $this->rejet_periodiques=0; - $this->id_article_periodique=0; - $this->type_fichier=0; - $this->format=0; - $this->attributs=array(); - - // Init structure unimarc - foreach($all_type_docs as $type_doc) { - $this->attributs[0]["type_doc"][] = ['label' => [], - 'zone_995' => []]; - } - return 0; - } - - - $this->id_profil = $id_profil; - $this->libelle = $data->getLibelle(); - $this->accents = $data->getAccents(); - $this->rejet_periodiques = $data->getRejetPeriodiques(); - $this->id_article_periodique = $data->getIdArticlePeriodique(); - $this->type_fichier = $data->getTypeFichier(); - $this->format = $data->getFormat(); - $this->attributs=unserialize($data->getAttributs()); - - // Decompacter et consolider les types de docs - $td = $this->attributs[0]["type_doc"]; - foreach($all_type_docs as $i => $type_doc) { + const + HOMOGENIZATION_ISBN = -1, + HOMOGENIZATION_EAN = -2; + + protected static + $_profil_cache = [], + $_type_docs_cache = null; + + private $id_profil; // Id sgbd + private $libelle; // Libellé du profil + private $rejet_periodiques; // Rejet des periodiques dans les imports + private $id_article_periodique; // Mode de reconnaissance pour les articles de periodiques + private $accents=0; // Types de caractères accentués + private $type_fichier=0; // Type de fichier à parser (variable type_fichier) + private $format; // Format du fichier à parser (variable import_format) + private $attributs; // Bloc de donnees associe au format + + protected $_to_array=[]; //legacy refactoring @see getProfil + + + public static function clearCache() { + static::$_profil_cache = []; + static::$_type_docs_cache = null; + } + + public static function find($id_profil) { + if (isset(static::$_profil_cache[$id_profil])) + return static::$_profil_cache[$id_profil]; + + $profil = new static(); + $profil->lire($id_profil); + + return static::$_profil_cache[$profil->getId()] = $profil; + } + + + public function __call($method, $args) { + if (method_exists($this->_data_profile, $method)) + return call_user_func_array(array($this->_data_profile, $method), $args); + + throw new RuntimeException('Call to undefined method profil_donnees::' . $method); + } + + + public function getId() { + return $this->id_profil; + } + + + protected static function _getTypeDocs() { + return static::$_type_docs_cache + ? static::$_type_docs_cache + : static::$_type_docs_cache = Class_TypeDoc::findAll(); + } + + + public function lire($id_profil) { + // Profils d'homogeneisation : -1= isbn -2=ean + if ($id_profil < 0) + return $this->_init($id_profil, + $id_profil == self::HOMOGENIZATION_ISBN + ? Class_IntProfilDonnees::forIsbnHomogenization() + : Class_IntProfilDonnees::forEanHomogenization()); + + if ($data_profile = Class_IntProfilDonnees::find($id_profil)) + return $this->_init($id_profil, $data_profile); + + if ($data_profile = Class_IntProfilDonnees::find(1)) { + $this->_init(0, $data_profile); + $this->libelle = '** nouveau profil **'; + return 0; + } + + $attribs = []; + foreach($this->_getTypeDocs() as $type_doc) { + $this->attributs[0]['type_doc'][] = ['label' => '', + 'zone_995' => '']; + } + + $this->_init(0, + Class_IntProfilDonnees::newInstance(['libelle' => '** nouveau profil **', + 'attributs' => serialize($attribs)])); + return 0; + } + + + protected function _init($id_profil, $data_profile) { + $this->id_profil = $id_profil; + $this->_data_profile = $data_profile; + + $this->libelle = $this->_data_profile->getLibelle(); + $this->accents = $this->_data_profile->getAccents(); + $this->rejet_periodiques = $this->_data_profile->getRejetPeriodiques(); + $this->id_article_periodique = $this->_data_profile->getIdArticlePeriodique(); + $this->type_fichier = $this->_data_profile->getTypeFichier(); + $this->format = $this->_data_profile->getFormat(); + $this->attributs = unserialize($this->_data_profile->getAttributs()); + + // Decompacter et consolider les types de docs + $td = $this->attributs[0]["type_doc"]; + foreach($this->_getTypeDocs() as $i => $type_doc) { $this->attributs[0]["type_doc"][$i]["code"] = $type_doc->getId(); - $this->attributs[0]["type_doc"][$i]["libelle"] = $type_doc->getLibelle(); - $this->attributs[0]["type_doc"][$i]["label"] = array(); - $this->attributs[0]["type_doc"][$i]["zone_995"] = array(); + $this->attributs[0]["type_doc"][$i]["libelle"] = $type_doc->getLibelle(); + $this->attributs[0]["type_doc"][$i]["label"] = []; + $this->attributs[0]["type_doc"][$i]["zone_995"] = []; for($j=0; $j < count($td); $j++) { if(!isset($td[$j]["label"])) continue; - if($td[$j]["code"] == $type_doc->getId()) { - $this->attributs[0]["type_doc"][$i]["label"]=explode(";",$td[$j]["label"]); - $this->attributs[0]["type_doc"][$i]["zone_995"]=explode(";",$td[$j]["zone_995"]); - break; - } - } - } - - // decompacter et consolider champs xml - $champs=getCodifsVariable("champs_abonne"); - foreach($champs as $champ) { - $code=$champ["code"]; - if($code !="NULL" and !isset($this->attributs[5]["xml_champs_abonne"][$code])) + if($td[$j]["code"] == $type_doc->getId()) { + $this->attributs[0]["type_doc"][$i]["label"]=explode(";",$td[$j]["label"]); + $this->attributs[0]["type_doc"][$i]["zone_995"]=explode(";",$td[$j]["zone_995"]); + break; + } + } + } + + // decompacter et consolider champs xml + $champs = getCodifsVariable("champs_abonne"); + foreach($champs as $champ) { + $code = $champ["code"]; + if ($code != "NULL" and !isset($this->attributs[5]["xml_champs_abonne"][$code])) $this->attributs[5]["xml_champs_abonne"][$code]=""; - } - return $this->id_profil; - } - - - public function toArray() { - $profil["id_profil"]=$this->id_profil; - $profil["libelle"]=$this->libelle; - $profil["accents"]=$this->accents; - $profil["rejet_periodiques"]=$this->rejet_periodiques; - $profil["id_article_periodique"]=$this->id_article_periodique; - $profil["type_fichier"]=$this->type_fichier; - $profil["format"]=$this->format; - $profil["attributs"]=$this->attributs; - - if(!isset($profil["attributs"][0]["champ_cote"]) + } + + return $this->id_profil; + } + + + public function toArray() { + $profil["id_profil"]=$this->id_profil; + $profil["libelle"]=$this->libelle; + $profil["accents"]=$this->accents; + $profil["rejet_periodiques"]=$this->rejet_periodiques; + $profil["id_article_periodique"]=$this->id_article_periodique; + $profil["type_fichier"]=$this->type_fichier; + $profil["format"]=$this->format; + $profil["attributs"]=$this->attributs; + + if(!isset($profil["attributs"][0]["champ_cote"]) || !$profil["attributs"][0]["champ_cote"]) $profil["attributs"][0]["champ_cote"] = "k"; - if(!isset($profil["attributs"][0]["champ_url"])) - $profil["attributs"][0]["champ_url"] = ['zone' => '', 'champ' => '']; - - return $profil; - } - - public function getProfil($id_profil) { - return static::find($id_profil)->toArray(); - } - - - /** - * Profils standard réhomogénéisations : -1: panier d'isbn -2:panier d'ean - */ - private function getProfilStandard($id_profil) { - $this->id_profil=$id_profil; - $this->accents=1; - $this->rejet_periodiques=0; - $this->id_article_periodique=0; - $this->type_fichier=0; - $this->format=1; - if($id_profil == self::HOMOGENIZATION_ISBN) { - $this->libelle="Homogénéisation d'isbn"; - $this->attributs[1]["champs"]="isbn"; - } else { - $this->libelle="Homogénéisation d'ean"; - $this->attributs[1]["champs"]="ean"; - } - } - - - public function getCombo($valeur) { - global $sql; - $data=$sql->fetchAll("Select id_profil,libelle from profil_donnees"); - $combo='<select name="profil">'; - for($i=0; $i<count($data); $i++) - { - $lig=$data[$i]; - if($valeur==$lig["id_profil"]) $selected=" selected"; else $selected=""; - $combo.='<option value="'.$lig["id_profil"].'"'.$selected.'>'.$lig["libelle"].'</option>'; - } - $combo.='</select>'; - return $combo; - } - - - public function ecrire($id_profil,$libelle,$accents,$rejet_periodiques,$type_fichier,$format,$attributs,$id_article_periodique) { - //tracedebug(1,$attributs,true); - if(!trim($libelle)) return false; - global $sql; - $attributs=serialize($attributs); - $data=compact("id_profil","libelle","accents","rejet_periodiques","type_fichier","format","attributs","id_article_periodique"); - if( $id_profil == 0 ) $sql->insert("profil_donnees", $data); - else $sql->update("Update profil_donnees set @SET@ Where id_profil ='$id_profil'",$data); - } - - - public function getTypeDoc($label, $z995r, $z995p) { - if (isset($z995p[0]) && (strToUpper(substr(trim($z995p[0]), 0, 1)) == 'P')) - return ['code' => 2, 'libelle' => 'Périodiques']; - - if ($this->isArticlePeriodique($label)) - return ['code' => Class_TypeDoc::SERIAL_ARTICLE, 'libelle' => 'Article de périodique']; - - // First we check 995$r subfield. - foreach($this->attributs[0]["type_doc"] as $td) { - if ($this->isTypeDocMatchOneBiblioItem($z995r, $td)) - return ['code' => $td['code'], 'libelle' => $td['libelle']]; - } - - // Then, we check label. - foreach($this->attributs[0]["type_doc"] as $td) { - if ($this->isTypeDocMatchLabel($label, $td)) - return ['code' => $td['code'], 'libelle' => $td['libelle']]; - } - - return ['code' => 0, 'libelle' => 'non identifié']; - } - - - public function isTypeDocMatchOneBiblioItem($z995, $td) { - $z995 = array_map('strtolower', $z995); + if(!isset($profil["attributs"][0]["champ_url"])) + $profil["attributs"][0]["champ_url"] = ['zone' => '', 'champ' => '']; + + return $profil; + } + + + public function getProfil($id_profil) { + return static::find($id_profil)->toArray(); + } + + + public function getCombo($valeur) { + global $sql; + $data=$sql->fetchAll("Select id_profil,libelle from profil_donnees"); + $combo='<select name="profil">'; + for($i=0; $i<count($data); $i++) + { + $lig=$data[$i]; + if($valeur==$lig["id_profil"]) $selected=" selected"; else $selected=""; + $combo.='<option value="'.$lig["id_profil"].'"'.$selected.'>'.$lig["libelle"].'</option>'; + } + $combo.='</select>'; + return $combo; + } + + + public function ecrire($id_profil,$libelle,$accents,$rejet_periodiques,$type_fichier,$format,$attributs,$id_article_periodique) { + //tracedebug(1,$attributs,true); + if(!trim($libelle)) return false; + global $sql; + $attributs=serialize($attributs); + $data=compact("id_profil","libelle","accents","rejet_periodiques","type_fichier","format","attributs","id_article_periodique"); + if( $id_profil == 0 ) $sql->insert("profil_donnees", $data); + else $sql->update("Update profil_donnees set @SET@ Where id_profil ='$id_profil'",$data); + } + + + public function getTypeDoc($label, $z995r, $z995p) { + if (isset($z995p[0]) && (strToUpper(substr(trim($z995p[0]), 0, 1)) == 'P')) + return ['code' => 2, 'libelle' => 'Périodiques']; + + if ($this->isArticlePeriodique($label)) + return ['code' => Class_TypeDoc::SERIAL_ARTICLE, 'libelle' => 'Article de périodique']; + + // First we check 995$r subfield. + foreach($this->attributs[0]["type_doc"] as $td) { + if ($this->isTypeDocMatchOneBiblioItem($z995r, $td)) + return ['code' => $td['code'], 'libelle' => $td['libelle']]; + } + + // Then, we check label. + foreach($this->attributs[0]["type_doc"] as $td) { + if ($this->isTypeDocMatchLabel($label, $td)) + return ['code' => $td['code'], 'libelle' => $td['libelle']]; + } + + return ['code' => 0, 'libelle' => 'non identifié']; + } + + + public function isTypeDocMatchOneBiblioItem($z995, $td) { + $z995 = array_map('strtolower', $z995); $identification_codes = is_array($td["zone_995"]) - ? array_map('strtolower', array_filter($td["zone_995"])) - : []; + ? array_map('strtolower', array_filter($td["zone_995"])) + : []; - foreach($identification_codes as $identification_code) { - if ($z995[0] == $identification_code) - return true; - } - return false; - } + foreach($identification_codes as $identification_code) { + if ($z995[0] == $identification_code) + return true; + } + return false; + } - public function isTypeDocMatchLabel($label, $td) { - if (!trim($label) || !is_array($td["label"])) - return false; + public function isTypeDocMatchLabel($label, $td) { + if (!trim($label) || !is_array($td["label"])) + return false; - foreach(array_filter($td["label"]) as $item) { - if (0 === strpos($label, $item)) { - return true; - } - } - return false; - } + foreach(array_filter($td["label"]) as $item) { + if (0 === strpos($label, $item)) { + return true; + } + } + return false; + } //--------------------------------------------------------------------------------- // rend les infos d'un fichier a itegrer //--------------------------------------------------------------------------------- - public function getInfosFichierIntegration($id_integration) - { - // enregistrement integration - if(!$id_integration) return false; - $data=fetchEnreg("select * from integrations where id=$id_integration"); - if(!$data) return false; - - // infos integration - $path=getVariable("integration_path"); - $ret["fichier"]=$data["fichier"]; - if(file_exists($path.$ret["fichier"])) - { - $ret["taille"]=(int)filesize($path.$ret["fichier"])/1024; - $ret["taille"]=number_format($ret["taille"],0,","," ")." ko"; - $ret["taille"]=str_replace(" "," ",$ret["taille"]); - } - else $ret["taille"]="?"; - - // type du fichier - $type_fic=fetchOne("select type_fichier from profil_donnees where id_profil=".$data["profil"]); - $ret["type_fichier"]=getLibCodifVariable("type_fichier",$type_fic); - - // retour - return $ret; - } - - - public function isArticlePeriodique($label) { - switch($this->id_article_periodique) { - case 1: if($label=="aa") return true; // pergame - case 2: if($label=="aa") return true; // opsys indexpresse - default: return false; - } - } + public function getInfosFichierIntegration($id_integration) + { + // enregistrement integration + if(!$id_integration) return false; + $data=fetchEnreg("select * from integrations where id=$id_integration"); + if(!$data) return false; + + // infos integration + $path=getVariable("integration_path"); + $ret["fichier"]=$data["fichier"]; + if(file_exists($path.$ret["fichier"])) + { + $ret["taille"]=(int)filesize($path.$ret["fichier"])/1024; + $ret["taille"]=number_format($ret["taille"],0,","," ")." ko"; + $ret["taille"]=str_replace(" "," ",$ret["taille"]); + } + else $ret["taille"]="?"; + + // type du fichier + $type_fic=fetchOne("select type_fichier from profil_donnees where id_profil=".$data["profil"]); + $ret["type_fichier"]=getLibCodifVariable("type_fichier",$type_fic); + + // retour + return $ret; + } + + + public function isArticlePeriodique($label) { + switch($this->id_article_periodique) { + case 1: if($label=="aa") return true; // pergame + case 2: if($label=="aa") return true; // opsys indexpresse + default: return false; + } + } } ?> \ No newline at end of file diff --git a/cosmogramme/php/classes/classe_unimarc.php b/cosmogramme/php/classes/classe_unimarc.php index aee82556b88fdc97aab999103cb7a543326083b0..6226945fbe4d5e4f0f8bce19f0df0e6782644a6f 100644 --- a/cosmogramme/php/classes/classe_unimarc.php +++ b/cosmogramme/php/classes/classe_unimarc.php @@ -195,6 +195,7 @@ class notice_unimarc extends iso2709_record { $notice["langues"] = $this->getLangues(); $notice["champs_forces"] = $this->getChampsForces(); $notice["interet"] = $this->getCentreInteret(); + $notice["file_content"] = $this->getFileContent(); $notice["statut_exemplaires"] = $ex["statut_exemplaires"]; $notice["exemplaires"] = isset($ex["exemplaires"]) ? $ex["exemplaires"] : []; @@ -1351,6 +1352,12 @@ class notice_unimarc extends iso2709_record { } + public function getFileContent() { + return (new Class_Cosmogramme_Integration_Record_FileContent()) + ->getContent($this, $this->profil_unimarc); + } + + public function getCentreInteret() { $zone_interet = (!$this->profil['attributs'][6]['zone']) ? '932' diff --git a/cosmogramme/sql/patch/patch_377.php b/cosmogramme/sql/patch/patch_377.php new file mode 100644 index 0000000000000000000000000000000000000000..b3e0f41496964afc5da6c7c5061c7db2a5dea664 --- /dev/null +++ b/cosmogramme/sql/patch/patch_377.php @@ -0,0 +1,14 @@ +<?php +$adapter = Zend_Db_Table_Abstract::getDefaultAdapter(); +try { + $adapter->query("alter table notices + add column `file_content` longtext not null default '', + add fulltext key `file_content` (`file_content`)" ); +} catch (Exception $e) {} + + +try { + $adapter->query("alter table notices + add column `file_content` longtext character set latin1 not null default '', + add fulltext key `file_content` (`file_content`)"); +} catch (Exception $e) {} diff --git a/cosmogramme/tests/php/classes/CarthameIntegrationTest.php b/cosmogramme/tests/php/classes/CarthameIntegrationTest.php index cdf69fc11d9d1b81a65ec6aaabf0c63d634085c4..38240eff0c6940c3043df297fbc1836ab0819eab 100644 --- a/cosmogramme/tests/php/classes/CarthameIntegrationTest.php +++ b/cosmogramme/tests/php/classes/CarthameIntegrationTest.php @@ -132,6 +132,4 @@ class TangoMangoCarthameIntegrationTest extends CarthameIntegrationTestCase { $notice = Class_Notice::find(7939934); $this->assertEquals('Tangomango n° 2<br /> La gazette du pirate', $notice->getTitrePrincipal()); } - - } diff --git a/cosmogramme/tests/php/classes/KohaRecordIntegrationTest.php b/cosmogramme/tests/php/classes/KohaRecordIntegrationTest.php index 9af40c220138a19314f03e2d9e181c5c82f9a1f8..9a3ec802b4a3cd6b84700cccf85355c72d0aea23 100644 --- a/cosmogramme/tests/php/classes/KohaRecordIntegrationTest.php +++ b/cosmogramme/tests/php/classes/KohaRecordIntegrationTest.php @@ -469,6 +469,12 @@ class KohaRecordIntegrationVagabondWithTooMany610aTest extends KohaRecordIntegra public function shouldNotCreateThesaurusFor610_a() { $this->assertNull(Class_CodifThesaurus::findFirstBy(['libelle' => 'Manga'])); } + + + /** @test */ + public function fileContentShouldBeEmpty() { + $this->assertEmpty($this->_notice->getFileContent()); + } } @@ -500,6 +506,7 @@ class KohaRecordIntegrationEscapableAnnexeCodesTest extends KohaRecordIntegratio + class KohaRecordIntegrationBdMilleniumWithAuthorityTest extends KohaRecordIntegrationTestCase { public function setUp() { parent::setUp(); @@ -610,4 +617,123 @@ class KohaRecordIntegrationDeduplicateTest extends KohaRecordIntegrationTestCase $this->assertEquals(2, Class_Exemplaire::countBy(['id_int_bib' => 2, 'id_origine' => 397126])); } -} \ No newline at end of file +} + + + + +abstract class KohaRecordIntegrationPommeWithAttachedFileTestCase + extends KohaRecordIntegrationTestCase { + + public function setUp() { + parent::setUp(); + + Class_Indexation_File_Html::setFileInfoFactory($this->_getHtmlFileInfoFactory()); + Class_Indexation_File_PDF::setCommand($this->_getPdfCommand()); + + $this->loadNotice('unimarc_pomme'); + $this->_notice = Class_Notice::find(1); + } + + + protected function _getPdfCommand() { + return $this->mock() + ->whenCalled('exec') + ->with('pdftotext -nopgbrk -raw \'' . USERFILESPATH . '/files/public/pomme.pdf\' -') + ->answers(0) + + ->whenCalled('getOutput') + ->willDo(function() { return ['chlorprophame']; }) + ->beStrict(); + } + + + protected function _getHtmlFileInfoFactory() { + return new KohaRecordIntegrationFileInfoFactory(); + } + + + public function tearDown() { + Class_Indexation_File_Html::setFileInfoFactory(null); + Class_Indexation_File_PDF::setCommand(null); + parent::tearDown(); + } + + + /** @test */ + public function titleShouldBeLaPommeDeTerre() { + $this->assertEquals('La pomme de terre', $this->_notice->getTitrePrincipal()); + } +} + + + + +class KohaRecordIntegrationPommeWithAttachedPdfFileTest + extends KohaRecordIntegrationPommeWithAttachedFileTestCase { + + public function getProfilDonnees() { + return Class_IntProfilDonnees::forKoha() + ->setIdProfil(111) + ->setIndexFile('856', 'u', '/userfiles/files/public/[a-zA-Z0-9_\-]+\.pdf') + ->getRawAttributes(); + } + + + /** @test */ + public function fileContentShouldContainsChlorprophame() { + $this->assertContains('chlorprophame', $this->_notice->getFileContent()); + } +} + + + + +class KohaRecordIntegrationPommeWithAttachedPdfAndHtmlFileTest + extends KohaRecordIntegrationPommeWithAttachedFileTestCase { + + public function getProfilDonnees() { + return Class_IntProfilDonnees::forKoha() + ->setIdProfil(111) + ->setIndexFile('856', 'u', '/userfiles/files/public/[a-zA-Z0-9_\-]+\.(pdf|htm)') + ->getRawAttributes(); + } + + + protected function _getPdfCommand() { + return $this->mock() + ->whenCalled('exec') + ->with('pdftotext -nopgbrk -raw \'' . USERFILESPATH . '/files/public/pomme.pdf\' -') + ->answers(0) + + ->whenCalled('getOutput') + ->willDo(function() { return ['chlorprophame']; }) + ->beStrict(); + } + + + protected function _getHtmlFileInfoFactory() { + return new KohaRecordIntegrationFileInfoFactory(); + } + + + /** @test */ + public function fileContentShouldContainsHtmlContentWithoutTags() { + $this->assertEquals('super', $this->_notice->getFileContent()); + } +} + + + +class KohaRecordIntegrationFileInfoFactory { + public function __invoke($path) { + $wrapper = Storm_Test_ObjectWrapper::on(new SplFileInfo($path)) + ->whenCalled('isFile')->answers(true) + ->whenCalled('isReadable')->answers(true); + + if ('html' == substr($path, -4)) + $wrapper->whenCalled('getContents')->answers('<p>super</p>'); + + return $wrapper; + } +} diff --git a/cosmogramme/tests/php/classes/NoticeIntegrationTest.php b/cosmogramme/tests/php/classes/NoticeIntegrationTest.php index 236d240ea668e84f9fc0e01eb29e06fcad1b4c1b..5624e8b80a044b8e5ce1e5b1b8f1545e74cc857e 100644 --- a/cosmogramme/tests/php/classes/NoticeIntegrationTest.php +++ b/cosmogramme/tests/php/classes/NoticeIntegrationTest.php @@ -498,6 +498,7 @@ class NoticeIntegrationSupertrampWithElectreAndPcmd4GeneratedNoticeRecordTest ex class NoticeIntegrationKohaNeonWithPcmd4GeneratedNoticeRecordTest extends NoticeIntegrationTestCase { protected $_storm_default_to_volatile = true; + public function setUp() { parent::setUp(); @@ -506,9 +507,17 @@ class NoticeIntegrationKohaNeonWithPcmd4GeneratedNoticeRecordTest extends Notice $this->notice_integration = new notice_integration(); - $this->notice_integration->setParamsIntegration(1, 0, 1); + $this->notice_integration->setParamsIntegration(1, 0, 110); } + + public function getProfilDonnees() { + return Class_IntProfilDonnees::forKoha() + ->setIdProfil(110) + ->getRawAttributes(); + } + + public function processUnimarc($file){ $this->notice_integration->traiteNotice(file_get_contents(dirname(__FILE__)."/".$file)); $this->notice_integration->traiteFacettes(); diff --git a/cosmogramme/tests/php/classes/pomme.pdf b/cosmogramme/tests/php/classes/pomme.pdf new file mode 100644 index 0000000000000000000000000000000000000000..03540046580d04e896b9c62b8a29b37251e0661c Binary files /dev/null and b/cosmogramme/tests/php/classes/pomme.pdf differ diff --git a/cosmogramme/tests/php/classes/unimarc_pomme.txt b/cosmogramme/tests/php/classes/unimarc_pomme.txt new file mode 100644 index 0000000000000000000000000000000000000000..a340f44b575f49c1686ccc5255de4296a0a78cd2 --- /dev/null +++ b/cosmogramme/tests/php/classes/unimarc_pomme.txt @@ -0,0 +1 @@ +00917 2200241 45000010005000000100025000050900009000301000041000392000048000802100025001282150037001532250016001901010008002063300120002146150017003346760010003516060034003617000027003954100016004228560038004388560039004769950160005154414 a2740417659d9.00 EUR a4414 a20040402d2004 a |0frey50 ||||ba1 a[La ]pomme de terrefTexte Anne RoyerbLIVR 1cMango-Jeunessed2004 1a32 p.cill. en coul.d25 x 20 cm1 aQui es-tu ? afre aPour tout savoir sur la pomme de terre : son histoire, sa plantation, sa récolte, les différentes varietés, etc. aDocumentaire 1a635.2 196068aLégumexPomme de terre 196067aRoyerbAnne4070 1tQui es-tu ? u/userfiles/files/public/pomme.pdf u/userfiles/files/public/pomme.html 2095418bMAUREScMAURESeDocumentaires adultef00519000087170kE 635.2m2014-12-23o0rLIVRh8717jLibrairie jeunesse au pays bleqJeunesp9,00s17/07/2004 \ No newline at end of file diff --git a/library/Class/Cosmogramme/Integration/Record/FileContent.php b/library/Class/Cosmogramme/Integration/Record/FileContent.php new file mode 100644 index 0000000000000000000000000000000000000000..da5cf54de168dd2136c4a82b271a94c9f1f74108 --- /dev/null +++ b/library/Class/Cosmogramme/Integration/Record/FileContent.php @@ -0,0 +1,72 @@ +<?php +/** + * Copyright (c) 2012-2019, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Class_Cosmogramme_Integration_Record_FileContent { + /** @var Trait_Observer */ + protected $_observer; + + + public function __construct($observer=null) { + $this->_observer = $observer; + } + + + public function getContent($record, $profile) { + $zone = $profile->getIndexFileZone(); + $field = $profile->getIndexFileField(); + if ('' === $zone || '' === $field) + return ''; + + $uri_regex = '%' . str_replace('%', '\%', $profile->getIndexFileUriRegex()) . '%'; + + $paths = []; + foreach($record->get_subfield($zone, $field) as $path) + if (preg_match($uri_regex, $path)) + $paths[] = USERFILESPATH . end(explode('/'.USERFILES, $path)); + + if (!$paths) + return ''; + + $this->_getObserver()->notifyMatch($record, $paths); + $indexation_file = new Class_Indexation_File; + if ('' == ($content = $indexation_file->getContent($paths))) { + $this->_getObserver()->notifyEmpty($record); + return ''; + } + + $this->_getObserver()->notifySource($record, $indexation_file->getContentSource()); + return $content; + } + + + protected function _getObserver() { + return $this->_observer + ? $this->_observer + : new Class_Cosmogramme_Integration_Record_FileContent_NullObserver(); + } +} + + + +class Class_Cosmogramme_Integration_Record_FileContent_NullObserver { + use Trait_Observer; +} diff --git a/library/Class/CriteresRecherche.php b/library/Class/CriteresRecherche.php index ae2615a9852d1aea5a408e42227aab4b0a24206e..f8ae1c2b14827f372c5794a1cf912852ba26f390 100644 --- a/library/Class/CriteresRecherche.php +++ b/library/Class/CriteresRecherche.php @@ -78,6 +78,7 @@ class Class_CriteresRecherche { 'no_extension' => '', 'bookmarked_search' => '', 'bookmarked_version' => '', + 'in_files' => '' ]; @@ -136,6 +137,12 @@ class Class_CriteresRecherche { } + public function getSearchModeList() { + return [$this->_('Index seulement'), + $this->_('Index et contenu des fichiers')]; + } + + public function getListeTris() { if (!isset($this->_liste_tris)) { $relevance_label = $this->getPanier() @@ -358,6 +365,11 @@ class Class_CriteresRecherche { } + public function getInFiles() { + return 1 === (int)$this->getParam('in_files'); + } + + public function getTri() { $tri = urldecode($this->getParam('tri', '')); if ('' === $tri && $this->isRechercheCatalogue()) @@ -547,7 +559,10 @@ class Class_CriteresRecherche { } if ($expression = $this->getExpressionRecherche()) - $visitor->visitExpression($expression,$this->getPertinence(),$this->getTri()); + $visitor->visitExpression($expression, + $this->getPertinence(), + $this->getTri(), + $this->getInFiles()); $type_recherche = $this->getTypeRecherche(); diff --git a/library/Class/Indexation/File.php b/library/Class/Indexation/File.php new file mode 100644 index 0000000000000000000000000000000000000000..3d71f70aa91e0409ef10f7cb8d05bbaf675dea53 --- /dev/null +++ b/library/Class/Indexation/File.php @@ -0,0 +1,69 @@ +<?php +/** + * Copyright (c) 2012-2019, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Class_Indexation_File { + protected + $_extractors, + $_content_source; + + public function __construct() { + $this->_extractors = ['Class_Indexation_File_Html', + 'Class_Indexation_File_PDF']; + } + + + public function getContent($paths) { + $this->_content_source = null; + + if (!$paths) + return ''; + + $by_extractors = []; + foreach($this->_extractors as $extractor) + $by_extractors[$extractor] = array_filter($paths, + function($path) use($extractor) + { + return (new $extractor())->shouldHandle($path); + }); + + + foreach($by_extractors as $extractor => $paths) + if ($content = $this->_extractWith(new $extractor(), $paths)) + return $content; + + return ''; + } + + + public function getContentSource() { + return $this->_content_source; + } + + + protected function _extractWith($extractor, $paths) { + foreach($paths as $path) + if ($content = $extractor->getContent($path)) { + $this->_content_source = $path; + return $content; + } + } +} diff --git a/library/Class/Indexation/File/Extractor.php b/library/Class/Indexation/File/Extractor.php new file mode 100644 index 0000000000000000000000000000000000000000..15a51833e8dfc71814b3afc2e106ce9053a98166 --- /dev/null +++ b/library/Class/Indexation/File/Extractor.php @@ -0,0 +1,46 @@ +<?php +/** + * Copyright (c) 2012-2019, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +abstract class Class_Indexation_File_Extractor { + protected static $_file_info_factory; + + /** @category testing */ + public static function setFileInfoFactory($file_info_factory) { + static::$_file_info_factory = $file_info_factory; + } + + + protected static function _getFileInfo($path) { + return ($file_info_factory = static::$_file_info_factory) + ? $file_info_factory($path) + : new Class_Indexation_File_FileInfo($path); + } + + + public function shouldHandle($path) { + $file_info = $this->_getFileInfo($path); + return $file_info->isFile() && $file_info->isReadable(); + } + + + abstract public function getContent($path); +} \ No newline at end of file diff --git a/library/Class/Indexation/File/FileInfo.php b/library/Class/Indexation/File/FileInfo.php new file mode 100644 index 0000000000000000000000000000000000000000..9dfb073eaae8111072b8b4f2386c8fd5eedce872 --- /dev/null +++ b/library/Class/Indexation/File/FileInfo.php @@ -0,0 +1,26 @@ +<?php +/** + * Copyright (c) 2012-2019, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +class Class_Indexation_File_FileInfo extends SplFileInfo { + public function getContents() { + return file_get_contents($this->getPathname()); + } +} diff --git a/library/Class/Indexation/File/Html.php b/library/Class/Indexation/File/Html.php new file mode 100644 index 0000000000000000000000000000000000000000..28f79f16030574b51d604611e988bfcd6975b71a --- /dev/null +++ b/library/Class/Indexation/File/Html.php @@ -0,0 +1,36 @@ +<?php +/** + * Copyright (c) 2012-2019, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Class_Indexation_File_Html extends Class_Indexation_File_Extractor { + public function shouldHandle($path) { + return parent::shouldHandle($path) + && in_array($this->_getFileInfo($path)->getExtension(), ['htm', 'html']); + } + + + public function getContent($path) { + if (false === $content = $this->_getFileInfo($path)->getContents()) + return ''; + + return strip_tags(str_replace(['<br>', '<br/>'], ' ', $content)); + } +} \ No newline at end of file diff --git a/library/Class/Indexation/File/PDF.php b/library/Class/Indexation/File/PDF.php new file mode 100644 index 0000000000000000000000000000000000000000..2ed958ff9b3eb2450b251febec1c794ae51f5aab --- /dev/null +++ b/library/Class/Indexation/File/PDF.php @@ -0,0 +1,40 @@ +<?php +/** + * Copyright (c) 2012-2019, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Class_Indexation_File_PDF extends Class_Indexation_File_Extractor { + use Trait_StaticCommand; + + public function shouldHandle($path) { + $path = urldecode($path); + + return parent::shouldHandle($path) + && 'pdf' == $this->_getFileInfo($path)->getExtension(); + } + + + public function getContent($path) { + $command = $this->getCommand(); + $command->exec('pdftotext -nopgbrk -raw ' . escapeshellarg(urldecode($path)) . ' -'); + + return implode(' ', $command->getOutput()); + } +} diff --git a/library/Class/IntProfilDonnees.php b/library/Class/IntProfilDonnees.php index 066498d0b4a525b495315dabdcbce4205404a87d..4aadf0ba7535af1cef4ddfc14d5bc5452fc533c5 100644 --- a/library/Class/IntProfilDonnees.php +++ b/library/Class/IntProfilDonnees.php @@ -273,6 +273,11 @@ class IntProfilDonneesLoader extends Storm_Model_Loader { public function findAllOfTypeAuthority() { return Class_IntProfilDonnees::findAllBy(['type_fichier' => Class_IntProfilDonnees::FT_AUTHORITY]); } + + + public function findAllOfTypeBibliographic() { + return Class_IntProfilDonnees::findAllBy(['type_fichier' => Class_IntProfilDonnees::FT_RECORDS]); + } } @@ -859,6 +864,32 @@ class Class_IntProfilDonnees extends Storm_Model_Abstract { } + public static function forIsbnHomogenization() { + return self + ::newInstance(['libelle' => 'Homogénéisation d\'isbn', + 'accents' => self::ENCODING_ISO2709, + 'rejet_periodiques' => 0, + 'id_article_periodique' => self::SERIAL_FORMAT_NONE, + 'type_fichier' => self::FT_RECORDS, + 'format' => self::FORMAT_TABBED_ASCII, + 'attributs' => [ 1 => ['champs' => 'isbn'] ] + ]); + } + + + public static function forEanHomogenization() { + return self + ::newInstance(['libelle' => 'Homogénéisation d\'ean', + 'accents' => self::ENCODING_ISO2709, + 'rejet_periodiques' => 0, + 'id_article_periodique' => self::SERIAL_FORMAT_NONE, + 'type_fichier' => self::FT_RECORDS, + 'format' => self::FORMAT_TABBED_ASCII, + 'attributs' => [ 1 => ['champs' => 'ean'] ] + ]); + } + + public function setAttributs($array_or_string) { return $this->_set('attributs', is_array($array_or_string) @@ -876,6 +907,15 @@ class Class_IntProfilDonnees extends Storm_Model_Abstract { } + public function setIndexFile($zone, $field, $regex) { + $config = unserialize($this->getAttributs()); + $config[7] = ['index_file_zone' => [$zone], + 'index_file_field' => [$field], + 'index_file_uri_regex' => [$regex]]; + return $this->setAttributs($config); + } + + public function setItemField($name, $value) { $config = unserialize($this->getAttributs()); $config[0][$name] = $value; @@ -1085,6 +1125,22 @@ class Class_IntProfilDonnees extends Storm_Model_Abstract { } + public function getIndexFileZone() { + return $this->getProfilePrefs()->getIndexFileZone(); + } + + + public function getIndexFileField() { + return $this->getProfilePrefs()->getIndexFileField(); + } + + + public function getIndexFileUriRegex() { + return $this->getProfilePrefs()->getIndexFileUriRegex(); + } + + + public function getProfilePrefs() { if(!$this->_profile_prefs) $this->_profile_prefs = (new Class_ProfilePrefs())->setDatas($this->toArray()); diff --git a/library/Class/MoteurRecherche.php b/library/Class/MoteurRecherche.php index 7a898e15c51bb9817cae9fd37aeaca88b47c3804..eb80e49580874bb3b1079c47a04fa0ea261257ad 100644 --- a/library/Class/MoteurRecherche.php +++ b/library/Class/MoteurRecherche.php @@ -19,7 +19,6 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ class Class_MoteurRecherche { - use Trait_Singleton, Trait_Translator, Trait_TimeSource, Trait_SearchCriteriaVisitor; /** Classe d'indexation */ @@ -133,7 +132,7 @@ class Class_MoteurRecherche { } - public function visitExpression($expression, $pertinence, $tri) { + public function visitExpression($expression, $pertinence, $tri, $in_files) { // Analyse de l'expression $expression = trim($expression); if ($expression == '*') @@ -156,18 +155,27 @@ class Class_MoteurRecherche { if (trim(Class_CosmoVar::get('other_index_fields'))) $axes []= 'other_terms'; - $this->setCondition('MATCH(' . implode(', ', $axes) . ')' . $against); + $quoted_expression = Zend_Db_Table_Abstract::getDefaultAdapter()->quote($expression); + $match_condition = 'MATCH(' . implode(', ', $axes) . ')' . $against; + if ($in_files) + $match_condition = '(' . $match_condition . ' OR MATCH(file_content) AGAINST(' . $quoted_expression .' IN BOOLEAN MODE))'; + + $this->setCondition($match_condition); if (($tri and $tri !== '*') and !$pertinence) return; $against_titre = $terms->asSelectAgainst(); - $match_weights = ['titres' => '1.5', 'auteurs' => '1']; + $match_weights = ['titres' => '1.5', + 'auteurs' => '1']; $match_weights_string = []; foreach($match_weights as $axe => $weight) $match_weights_string []= 'MATCH(' . $axe . ') ' . $against_titre . ( $weight === '1' ? '' :' * ' . $weight); + if ($in_files) + $match_weights_string[] = 'MATCH(file_content) AGAINST('. $quoted_expression . ') * 0.5'; + $this->order_by = 'order by (' . implode(') + (', $match_weights_string) . ') desc'; } diff --git a/library/Class/Notice.php b/library/Class/Notice.php index e2383cb92669d9c43873be2768b02ff06ad0f2d8..c9c153397332935393e41a2db8d0391422050b11 100644 --- a/library/Class/Notice.php +++ b/library/Class/Notice.php @@ -210,7 +210,8 @@ class Class_Notice extends Storm_Model_Abstract { 'date_creation' => '', 'created_at' => null, 'type' => self::TYPE_BIBLIOGRAPHIC, - 'z3950_retry' => 0]; + 'z3950_retry' => 0, + 'file_content' => '']; public function __construct() { @@ -1843,4 +1844,9 @@ class Class_Notice extends Storm_Model_Abstract { public function getArticle() { return Class_Article::findFirstBy(['id_notice' => $this->getId()]); } + + + public function getFileContentFirstWords() { + return substr($this->getFileContent(), 0, 180) . '…'; + } } \ No newline at end of file diff --git a/library/Class/Profil/Preferences/SearchResult.php b/library/Class/Profil/Preferences/SearchResult.php index 3ad82c15b156385bc752448bd5cc05e67ce5f3cf..af5a2c4bfd790ca2ee5fcf3cd59bf0044a4ce19f 100644 --- a/library/Class/Profil/Preferences/SearchResult.php +++ b/library/Class/Profil/Preferences/SearchResult.php @@ -132,7 +132,16 @@ class Class_Profil_Preferences_SearchResult { ->whenCalledDo('renderOn', function($view, $criteria) { return $view->search_History($criteria); - })]; + }), + + (new Class_Entity()) + ->setId('SearchMode') + ->setLabel($this->_('Mode de recherche')) + ->whenCalledDo('renderOn', function($view, $criteria) + { + return $view->search_SearchMode($criteria); + }), + ]; } diff --git a/library/Class/ProfilePrefs.php b/library/Class/ProfilePrefs.php index fd8f2d1a5a72f73b63c9c862eb8a2bccabdcd40d..8c15cbeee3aa06b407e60f14f86ccb9149e33b83 100644 --- a/library/Class/ProfilePrefs.php +++ b/library/Class/ProfilePrefs.php @@ -118,13 +118,13 @@ class Class_ProfilePrefs extends Class_Entity { $default = ['interest_zone' => ['932'], 'interest_champ' => ['a']]; - if(!$interet_prefs = $this->getPrefs()[6]) + if (!$interet_prefs = $this->getPrefs()[6]) return $default; - if(!$interet_zone = $interet_prefs['zone']) + if (!$interet_zone = $interet_prefs['zone']) return $default; - if(!$interet_champ = $interet_prefs['champ']) + if (!$interet_champ = $interet_prefs['champ']) return $default; return ['interest_zone' => [$interet_zone], @@ -132,6 +132,33 @@ class Class_ProfilePrefs extends Class_Entity { } + public function getFileIndexation() { + $default = ['index_file_zone' => [''], + 'index_file_field' => [''], + 'index_file_uri_regex' => ['/userfiles/files/[a-zA-Z0-9_\-]+\.pdf']]; + + $prefs = $this->getPrefs(); + return isset($prefs[7]) && ($file_indexation = $prefs[7]) + ? $file_indexation + : $default; + } + + + public function getIndexFileZone() { + return $this->getFileIndexation()['index_file_zone'][0]; + } + + + public function getIndexFileField() { + return $this->getFileIndexation()['index_file_field'][0]; + } + + + public function getIndexFileUriRegex() { + return $this->getFileIndexation()['index_file_uri_regex'][0]; + } + + public function getItemUrl() { $url_zone = $this->getItemUrlZone(); $url_champ = $this->getItemUrlChamp(); diff --git a/library/Class/ProfileSerializer/UnimarcRecord.php b/library/Class/ProfileSerializer/UnimarcRecord.php index 1bf6d6a7db80cd6af4201f2e6dd47e03525e1f16..6f003102d3727fd98cff1ac98ab6033541f7a12a 100644 --- a/library/Class/ProfileSerializer/UnimarcRecord.php +++ b/library/Class/ProfileSerializer/UnimarcRecord.php @@ -28,7 +28,8 @@ class Class_ProfileSerializer_UnimarcRecord extends Class_ProfileSerializer_Abst 3 => [], 4 => $this->_extractNovelty(), 5 => [], - 6 => $this->_extractInterests()]; + 6 => $this->_extractInterests(), + 7 => $this->_extractFileIndexation()]; } @@ -55,7 +56,8 @@ class Class_ProfileSerializer_UnimarcRecord extends Class_ProfileSerializer_Abst ->populateItemDocTypes(['label' => 'label', 'zone' => 'zone_995']) ->populateItemIdOrigine() ->populateItemUrl() - ->populateInterests(); + ->populateInterests() + ->populateFileIndexation(); } @@ -65,6 +67,13 @@ class Class_ProfileSerializer_UnimarcRecord extends Class_ProfileSerializer_Abst } + protected function _extractFileIndexation() { + return ['index_file_zone' => $this->_datas['index_file_zone'], + 'index_file_field' => $this->_datas['index_file_field'], + 'index_file_uri_regex' => $this->_datas['index_file_uri_regex']]; + } + + protected function _extractRecords() { return array_merge($this->_extractDocTypes(), $this->_extractItemFields(), diff --git a/library/Class/Systeme/ModulesAccueil/RechercheSimple.php b/library/Class/Systeme/ModulesAccueil/RechercheSimple.php index 5658673715b313a00e8476709b60731d4e42cb0b..c935007a0063592caded7c163b04866420040ed8 100644 --- a/library/Class/Systeme/ModulesAccueil/RechercheSimple.php +++ b/library/Class/Systeme/ModulesAccueil/RechercheSimple.php @@ -46,6 +46,7 @@ class Class_Systeme_ModulesAccueil_RechercheSimple extends Class_Systeme_Modules 'recherche_avancee' => 1, 'type_doc' => 0, 'tri' => '*', + 'in_files' => 0, 'profil_redirect' => 0, 'placeholder' => '', 'search_button' => '', diff --git a/library/Trait/Observer.php b/library/Trait/Observer.php new file mode 100644 index 0000000000000000000000000000000000000000..a102766285d4c308e4fab63f3464893be7661fbf --- /dev/null +++ b/library/Trait/Observer.php @@ -0,0 +1,31 @@ +<?php +/** + * Copyright (c) 2012-2019, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +trait Trait_Observer { + public function __call($name, $args) { + // allow call to any notifyXXXXX method without error + if ('notify' === substr($name, 0, 6)) + return $this; + + throw new RuntimeException('Call to unknown method ' . get_class($this) . '::' . $name); + } +} diff --git a/library/ZendAfi/Form/Configuration/Widget/Search.php b/library/ZendAfi/Form/Configuration/Widget/Search.php index 0fbf2901df14e89b6330727d25c42ddd3165530f..2bdf76d3bbad36c0cd3f75fab02cf5f8b491010b 100644 --- a/library/ZendAfi/Form/Configuration/Widget/Search.php +++ b/library/ZendAfi/Form/Configuration/Widget/Search.php @@ -82,6 +82,11 @@ class ZendAfi_Form_Configuration_Widget_Search ['label' => $this->_('Tri du résultat de recherche'), 'multiOptions' => (new Class_CriteresRecherche())->getListeTris()]) + ->addElement('select', + 'in_files', + ['label' => $this->_('Mode de recherche'), + 'multiOptions' => (new Class_CriteresRecherche())->getSearchModeList()]) + ->addElement('comboProfils', 'profil_redirect', ['label' => $this->_('Basculer automatiquement sur le profil'), @@ -112,6 +117,7 @@ class ZendAfi_Form_Configuration_Widget_Search $this ->addToSelectionGroup(['type_doc', 'tri', + 'in_files', 'select_bib', 'select_annexe', 'select_doc', diff --git a/library/ZendAfi/Form/Cosmo/DataProfile.php b/library/ZendAfi/Form/Cosmo/DataProfile.php index 887fbc0923149e96c91573e3e39211b1b472b070..43bcf8ea4be4644a0c1b3e99837b87279316feb8 100644 --- a/library/ZendAfi/Form/Cosmo/DataProfile.php +++ b/library/ZendAfi/Form/Cosmo/DataProfile.php @@ -147,7 +147,7 @@ class ZendAfi_Form_Cosmo_DataProfile extends ZendAfi_Form { ->addDocTypeGroup(['label' => $this->_('Label'), 'zone' => $this->_('Zone exemplaire')]) ->_recordNoveltyDate() ->_recordItemSerial() - ->_recordItemInterest(); + ->_recordIndexation(); } @@ -197,7 +197,7 @@ class ZendAfi_Form_Cosmo_DataProfile extends ZendAfi_Form { } - protected function _recordItemInterest() { + protected function _recordIndexation() { return $this->addElement('multiInput', 'interests', ['label' => $this->_('Prendre la zone centre d\'intérêts en'), @@ -205,8 +205,17 @@ class ZendAfi_Form_Cosmo_DataProfile extends ZendAfi_Form { ['name' => 'interest_champ', 'label' => $this->_('Champ')]], 'fixed' => true]) - ->addDisplayGroup(['interests'], - 'item_interests_group', + + ->addElement('multiInput', + 'index_files', + ['label' => $this->_('Indexer le contenu des fichiers spécifiés en'), + 'fields' => [['name' => 'index_file_zone', 'label' => $this->_('Zone')], + ['name' => 'index_file_field', 'label' => $this->_('Champ')], + ['name' => 'index_file_uri_regex', 'label' => $this->_('Chemin du fichier (expression régulière)')]], + 'fixed' => true]) + + ->addDisplayGroup(['interests', 'index_files'], + 'indexation_group', ['legend' => $this->_('Indexation')]); } @@ -465,6 +474,12 @@ class ZendAfi_Form_Cosmo_DataProfile extends ZendAfi_Form { } + public function populateFileIndexation() { + $this->index_files->setValues($this->_profile_prefs->getFileIndexation()); + return $this; + } + + public function setProfilePrefs($preferences) { $this->_profile_prefs = $preferences; } diff --git a/library/ZendAfi/View/Helper/Notice/Unimarc.php b/library/ZendAfi/View/Helper/Notice/Unimarc.php index 20c0baa7a8bddaf1dbc7e644e32015a34f1dccfa..3c2d5d3a2ae778de2b705c9e1d1b9b41a7ff235b 100644 --- a/library/ZendAfi/View/Helper/Notice/Unimarc.php +++ b/library/ZendAfi/View/Helper/Notice/Unimarc.php @@ -60,6 +60,7 @@ class ZendAfi_View_Helper_Notice_Unimarc extends Zend_View_Helper_HtmlElement { 'dewey' => $this->_('Dewey'), 'collection' => $this->_('Collection'), 'other_terms' => $this->_('Autres termes'), + 'file_content_first_words' => $this->_('Contenu du fichier'), 'raw_editeur' => $this->_('Éditeurs'), 'facettes' => $this->_('Facettes'), 'clef_alpha' => $this->_('Clé alpha'), diff --git a/library/ZendAfi/View/Helper/Search/SearchMode.php b/library/ZendAfi/View/Helper/Search/SearchMode.php new file mode 100644 index 0000000000000000000000000000000000000000..b8b7387c47f478448e4f2fdb14a6c2f7945190b9 --- /dev/null +++ b/library/ZendAfi/View/Helper/Search/SearchMode.php @@ -0,0 +1,34 @@ +<?php +/** + * Copyright (c) 2012-2014, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class ZendAfi_View_Helper_Search_SearchMode extends ZendAfi_View_Helper_BaseHelper { + public function search_SearchMode($search_criteria) { + $instance = (new Class_Entity()) + ->setKey('in_files') + ->setLabel($this->_('Mode de recherche')) + ->setValue($search_criteria->getInFiles() ? 1 : 0) + ->setAvailables($search_criteria->getSearchModeList()); + + return $this->view->selectWidget($instance); + } +} +?> \ No newline at end of file diff --git a/library/ZendAfi/View/Helper/TagRechercheSimple.php b/library/ZendAfi/View/Helper/TagRechercheSimple.php index d4c480a2ddbbf98c459d696be3c16414e7038408..eeb39a16e35458d0c4bd5212b7cf979839916515 100644 --- a/library/ZendAfi/View/Helper/TagRechercheSimple.php +++ b/library/ZendAfi/View/Helper/TagRechercheSimple.php @@ -30,6 +30,7 @@ class ZendAfi_View_Helper_TagRechercheSimple extends ZendAfi_View_Helper_BaseHel $form = implode([$this->_renderInputTypeDoc(), $this->_renderInputTri(), + $this->_renderInputInFiles(), $this->_renderChampSaisie(), $this->renderDomainSelect(), $this->renderSelectionBib()]); @@ -231,6 +232,15 @@ class ZendAfi_View_Helper_TagRechercheSimple extends ZendAfi_View_Helper_BaseHel } + protected function _renderInputInFiles() { + return $this->_tag('input', + null, + ['type' => 'hidden', + 'name' => 'in_files', + 'value' => (int)$this->preferences["in_files"]]); + } + + public function renderForm($content) { $action_url = ['controller' => 'recherche', 'action' => 'simple']; diff --git a/scripts/reindex_local_file_content.php b/scripts/reindex_local_file_content.php new file mode 100644 index 0000000000000000000000000000000000000000..c37821b94cf9f6895a2b527a18e02f5486973048 --- /dev/null +++ b/scripts/reindex_local_file_content.php @@ -0,0 +1,152 @@ +<?php +error_reporting(E_ERROR | E_PARSE); +require(__DIR__.'/../console.php'); + + +echo "\n\nWelcome to the iReindex Files content 4K 3D tool by @patbator\n\n"; + +$data_profile = null; + +$profiles = Class_IntProfilDonnees::findAllOfTypeBibliographic(); +$message = "Tell me which data profile to use: \n\n"; +$possibles = []; + +foreach($profiles as $profile) { + $message .= "\t" . sprintf("[%s] %s", $profile->getId(), $profile->getLibelle()) . "\n"; + $possibles[] = $profile->getId(); +} +$message .= "\n"; + +while(!$data_profile) { + echo $message; + + $id = (int)readline("Please iReindex use : "); + if (!in_array($id, $possibles)) { + echo $id . " is not an existing profile, try again \n\n"; + continue; + } + + if (!$data_profile = Class_IntProfilDonnees::find($id)) { + echo "data profile not found try again \n\n"; + continue; + } + + if ('' === $data_profile->getIndexFileZone() + || '' === $data_profile->getIndexFileField()) { + $data_profile = null; + echo "data profile does not have file content configuration, try again \n\n"; + continue; + } + + echo "File content will be detected with profile : " . $data_profile->getLibelle() . "\n\n"; +} + +echo sprintf("\n\n==== %s records to handle ====\n\n", + Class_Notice::countBy(['type' => Class_Notice::TYPE_BIBLIOGRAPHIC])); + + +class Scripts_Local_File_Content_Indexer { + use Trait_Observer; + + protected + $_profile, + $_total_count = 0, + $_matches = [], + $_empties = [], + $_sources = [], + $_start_time, + $_end_time; + + public function __construct($profile) { + $this->_profile = $profile; + $this->_start_time = new DateTime(); + } + + + public function index($record) { + $this->_total_count++; + echo '.'; + $record->setFileContent((new Class_Cosmogramme_Integration_Record_FileContent($this)) + ->getContent($record, $this->_profile)); + $record->save(); + $this->_end_time = new DateTime(); + } + + + public function notifyMatch($record, $paths) { + $this->_matches[$record->getId()] = $paths; + return $this; + } + + + public function notifyEmpty($record) { + $this->_empties[] = $record->getId(); + return $this; + } + + + public function notifySource($record, $path) { + $this->_sources[$record->getId()] = $path; + return $this; + } + + + public function summarize() { + return "\t" . implode("\n\t", $this->_summaryParts()); + } + + + public function report() { + $report_path = PATH_TEMP . 'reindex_local_file_content_report_' . $this->_start_time->format('Ymd_His') . '.json'; + $wrote = file_put_contents($report_path, + json_encode(['summary' => $this->_summaryParts(), + 'matches' => $this->_matches, + 'sources' => $this->_sources, + 'empties' => $this->_empties], + JSON_PRETTY_PRINT)); + return (false !== $wrote) + ? 'See full report in ' . $report_path + : 'Could not write report in' . $report_path; + } + + protected function _summaryParts() { + $elapsed_minutes = $this->_elapsedMinutes(); + + $parts = [count($this->_matches) . ' records with matching URIs']; + if ($this->_empties) + $parts[] = 'In which ' . count($this->_empties) . ' produced empty content'; + + if (0 < $elapsed_minutes && 0 < $this->_total_count) + $parts[] = 'Took ' . $elapsed_minutes . ' minutes to handle ' . $this->_total_count . ' records (~' . floor($this->_total_count/$elapsed_minutes) . '/mn)'; + + return $parts; + } + + + protected function _elapsedMinutes() { + if (!$this->_end_time) + return 0; + + $interval = $this->_end_time->diff($this->_start_time); + return $interval->i; + } +} + + + +$indexer = new Scripts_Local_File_Content_Indexer($data_profile); +$page = 1; +while ($records = Class_Notice::findAllBy(['type' => Class_Notice::TYPE_BIBLIOGRAPHIC, + 'limitPage' => [$page, 1000]])) { + echo "\npage: $page\n"; + $page ++; + array_map([$indexer, 'index'], $records); + + Storm_Model_Abstract::unsetLoaders(); + Storm_Model_Loader::resetCache(); + gc_collect_cycles(); +} + +echo "\n\n" . $indexer->summarize(); +echo "\n\n" . $indexer->report(); +echo "\n\nDONE !!!!\n\n"; \ No newline at end of file diff --git a/tests/application/modules/admin/controllers/WidgetControllerTest.php b/tests/application/modules/admin/controllers/WidgetControllerTest.php index 351d4b8586da3db45072bee60b2493150bb522d6..55839256bf7348c1a7fb0a0b7f007c2b635d0db6 100644 --- a/tests/application/modules/admin/controllers/WidgetControllerTest.php +++ b/tests/application/modules/admin/controllers/WidgetControllerTest.php @@ -1822,6 +1822,14 @@ class WidgetControllerSearchTest extends WidgetControllerDispatchWidgetConfigura } + /** @test */ + public function inputInFilesShouldHaveIndexOnlySelected() { + $this->assertXPathContentContains('//form//select[@name="in_files"]//option[@selected][@value="0"]', + 'Index seulement', + $this->_response->getBody()); + } + + /** @test */ public function selectProfilRedirectShouldContainsProfilAdulte() { $this->assertXPath('//select[@name="profil_redirect"]//option[@value="2"]'); diff --git a/tests/application/modules/opac/controllers/ProfilOptionsControllerTest.php b/tests/application/modules/opac/controllers/ProfilOptionsControllerTest.php index cb6770b4cfd42c2eb91b152b83441958da0e1ee9..28e788296e02caca60bc104c4697293cee9d2e81 100644 --- a/tests/application/modules/opac/controllers/ProfilOptionsControllerTest.php +++ b/tests/application/modules/opac/controllers/ProfilOptionsControllerTest.php @@ -249,7 +249,8 @@ abstract class ProfilOptionsControllerWithProfilAdulteTestCase extends AbstractC 'type_module' => 'RECH_SIMPLE', 'preferences' => ['recherche_avancee' => "on", 'select_doc' => 'on', - 'select_annexe' => 'on']], + 'select_annexe' => 'on', + 'in_files' => 1]], '2' => ['division' => '4', 'type_module' => 'LOGIN', @@ -1572,6 +1573,12 @@ class ProfilOptionsControllerViewProfilJeunesseAccueilTest extends ProfilOptions } + /** @test */ + public function hiddenInputShouldContainsInFilesOne() { + $this->assertXPath('//form[@class="rechSimpleForm"]//input[@name="in_files"][@value="1"][@type="hidden"]'); + } + + /** @test */ public function comboRechSimpleTypeDocShouldOnlyContainsTypesOneTwoAndFour() { foreach([1,2,4] as $id) diff --git a/tests/db/UpgradeDBTest.php b/tests/db/UpgradeDBTest.php index 6fe50e6c25d2d2df3378674852c57a904e90d833..8a6b634e934a2dfb26575fe9fedae1eb1cc07d82 100644 --- a/tests/db/UpgradeDBTest.php +++ b/tests/db/UpgradeDBTest.php @@ -2692,14 +2692,12 @@ class UpgradeDB_373_Test extends UpgradeDBTestCase { '1' => 'Bibliothèque + codes-barres'], Class_CosmoVar::getList('unicite_code_barres')); } - } class UpgradeDB_374_Test extends UpgradeDBTestCase { - public function prepare() { $this->silentQuery("ALTER TABLE `bib_c_site` DROP COLUMNS notify_on_new_resa, notify_on_new_user;"); } @@ -2756,3 +2754,26 @@ class UpgradeDB_376_Test extends UpgradeDBTestCase { $this->assertFieldType('codif_auteur', 'thumbnail_url', 'varchar(255)'); } } + + + + +class UpgradeDB_377_Test extends UpgradeDBTestCase { + public function prepare() { + $this + ->silentQuery('ALTER TABLE notices DROP COLUMN file_content') + ->silentQuery('ALTER TABLE notices DROP KEY file_content'); + } + + + /** @test */ + public function tableNoticesShouldContainsColumnFileContent() { + $this->assertFieldType('notices', 'file_content', 'longtext'); + } + + + /** @test */ + public function noticesColumnFileContentShouldBeIndexed() { + $this->assertIndex('notices', 'file_content', 'FULLTEXT'); + } +} \ No newline at end of file diff --git a/tests/library/Class/CriteresRechercheTest.php b/tests/library/Class/CriteresRechercheTest.php index 5fb75af5c018bc01a7f9dd1742c6e675a3c2b32d..74ff176628f37e20a7830b9d05e67674e09ac74e 100644 --- a/tests/library/Class/CriteresRechercheTest.php +++ b/tests/library/Class/CriteresRechercheTest.php @@ -69,12 +69,14 @@ class CriteresRechercheRetourTest extends ModelTestCase { 'facettes' => 'B1-T1']], [['expressionRecherche' => 'La nouvelle grille', + 'in_files' => '1', 'facettes' => 'B1-zork', 'facette' => '1345'], ['controller' => 'recherche', 'action' => 'simple', 'expressionRecherche' => 'La nouvelle grille', + 'in_files' => '1', 'facettes' => 'B1']], [['expressionRecherche' => '', diff --git a/tests/library/Class/MoteurRechercheTest.php b/tests/library/Class/MoteurRechercheTest.php index 243f2e09136d6e109fe85db794a67320a4037050..357037f7e01f3dfcf0e3573e849a23f4ec6d7bc1 100644 --- a/tests/library/Class/MoteurRechercheTest.php +++ b/tests/library/Class/MoteurRechercheTest.php @@ -256,6 +256,13 @@ class MoteurRechercheSimpleTest extends MoteurRechercheTestCase { 'req_liste' => $this->listSqlWith($match_axes ." AGAINST('+(BAKOUNINE BAKOUNINES BAKOUNIN)' IN BOOLEAN MODE)", "(MATCH(titres) AGAINST(' BAKOUNINE') * 1.5) + (MATCH(auteurs) AGAINST(' BAKOUNINE')) desc")], + [['expressionRecherche' => 'Bakounine', + 'in_files' => 1], + 'nb_mots' => 1, + 'req_liste' => $this->listSqlWith('(' . $match_axes ." AGAINST('+(BAKOUNINE BAKOUNINES BAKOUNIN)' IN BOOLEAN MODE) OR MATCH(file_content) AGAINST('Bakounine' IN BOOLEAN MODE))", + "(MATCH(titres) AGAINST(' BAKOUNINE') * 1.5) + (MATCH(auteurs) AGAINST(' BAKOUNINE')) + (MATCH(file_content) AGAINST('Bakounine') * 0.5) desc")], + + [['expressionRecherche' => 'Slavoj Zizek', 'tri' => 'alpha_titre'] , 'nb_mots' => 2, diff --git a/tests/library/ZendAfi/View/Helper/TagRechercheSimpleTest.php b/tests/library/ZendAfi/View/Helper/TagRechercheSimpleTest.php index c5292c70bd17ad3e2c069c11c4099d0c243dd9cd..07055a6c466b740010455ee040b1823d86e475bd 100644 --- a/tests/library/ZendAfi/View/Helper/TagRechercheSimpleTest.php +++ b/tests/library/ZendAfi/View/Helper/TagRechercheSimpleTest.php @@ -32,9 +32,8 @@ abstract class ZendAfi_View_Helper_TagRechercheSimpleTestCase extends ViewHelper ->whenCalled('saveThesaurus') ->answers(null); - $view = new ZendAfi_Controller_Action_Helper_View(); $this->_helper = new ZendAfi_View_Helper_TagRechercheSimple(); - $this->_helper->setView($view); + $this->_helper->setView($this->view); $this->_preferences = (new Class_Systeme_ModulesAccueil_RechercheSimple())->getDefaultValues(); } @@ -358,7 +357,8 @@ class ZendAfi_View_Helper_TagRechercheSimpleAdvancedSearchTest -class ZendAfi_View_Helper_TagRechercheSimpleDocTypeTest extends ZendAfi_View_Helper_TagRechercheSimpleTestCase { +class ZendAfi_View_Helper_TagRechercheSimpleDocTypeTest + extends ZendAfi_View_Helper_TagRechercheSimpleTestCase { public function setUp() { @@ -380,23 +380,18 @@ class ZendAfi_View_Helper_TagRechercheSimpleDocTypeTest extends ZendAfi_View_Hel ->whenCalled('getTable') ->answers($table); - $this->preferences['select_doc'] = '1'; - $this->preferences['tri'] = '*'; - $this->preferences['message'] = ''; - $this->preferences['placeholder'] = ''; - $this->preferences['largeur'] = ''; - $this->preferences['exemple'] = ''; - $this->preferences['select_annexe'] = ''; - $this->preferences['domain_ids'] = ''; - $this->preferences['select_bib'] = ''; - $this->preferences['recherche_avancee'] = ''; + $this->_preferences['select_doc'] = '1'; + $this->_preferences['largeur'] = ''; + $this->_preferences['select_annexe'] = ''; + $this->_preferences['select_bib'] = ''; + $this->_preferences['recherche_avancee'] = ''; } /** @test */ public function tousShouldBeSelected() { - $this->preferences['type_doc'] = ''; - $this->_html = $this->_helper->tagRechercheSimple($this->preferences, 1); + $this->_preferences['type_doc'] = ''; + $this->_html = $this->_helper->tagRechercheSimple($this->_preferences, 1); $this->assertXPathContentContains($this->_html, '//option[@value=""][@selected]', 'tous'); $this->assertXPathContentContains($this->_html, '//option[@value="0"][not(@selected)]', 'Non identifi'); } @@ -404,16 +399,16 @@ class ZendAfi_View_Helper_TagRechercheSimpleDocTypeTest extends ZendAfi_View_Hel /** @test */ public function livresShouldBeSelected() { - $this->preferences['type_doc'] = '1'; - $this->_html = $this->_helper->tagRechercheSimple($this->preferences, 1); + $this->_preferences['type_doc'] = '1'; + $this->_html = $this->_helper->tagRechercheSimple($this->_preferences, 1); $this->assertXPathContentContains($this->_html, '//option[@value="1"][@selected]', 'Livres'); } /** @test */ public function nonIdentifieShouldBeSelected() { - $this->preferences['type_doc'] = '0'; - $this->_html = $this->_helper->tagRechercheSimple($this->preferences, 1); + $this->_preferences['type_doc'] = '0'; + $this->_html = $this->_helper->tagRechercheSimple($this->_preferences, 1); $this->assertXPathContentContains($this->_html, '//option[@value="0"][@selected]', 'Non identifi'); $this->assertXPathContentContains($this->_html, '//option[@value=""][not(@selected)]', 'tous'); } diff --git a/tests/scenarios/SearchResult/SearchResultTest.php b/tests/scenarios/SearchResult/SearchResultTest.php index a089ced7d727c63a046101b55253bde82a2762b3..2e4c525cbd2698f5c18f85073a222221d19db314 100644 --- a/tests/scenarios/SearchResult/SearchResultTest.php +++ b/tests/scenarios/SearchResult/SearchResultTest.php @@ -32,7 +32,7 @@ class SearchResultHeaderTest extends AbstractControllerTestCase { ->setController('recherche') ->setAction('resultat') ->setSubAction('simple'), - ['header_composition' => 'Advanced;History;Display;Order;PageSize;']); + ['header_composition' => 'Advanced;History;Display;Order;PageSize;SearchMode']); $this->dispatch('/opac/recherche/pomme', true); } @@ -84,6 +84,14 @@ class SearchResultHeaderTest extends AbstractControllerTestCase { public function domainSelectorShouldBeAnAutoCompleteSelector() { $this->assertXPath('//form//input[@name="rech_collection"]'); } + + + /** @test */ + public function searchModeWidgetShouldSelect() { + $this->assertXPathContentContains('//form//select[@name="in_files"]//option[@selected][@value="0"]', + 'Index seulement', + $this->_response->getBody()); + } } diff --git a/tests/scenarios/Security/SearchTest.php b/tests/scenarios/Security/SearchTest.php index 8e747c73728aaece7e12b84aac2d4f5d008efd42..871731f719c8c0006aee056b1bd0c693032ff99d 100644 --- a/tests/scenarios/Security/SearchTest.php +++ b/tests/scenarios/Security/SearchTest.php @@ -48,7 +48,8 @@ class Security_SearchTest extends AbstractControllerTestCase { 'type_doc ' => '1', 'annexe' => '1', 'section' => '1', - 'genre' => '']; + 'genre' => '', + 'in_files' => 1]; $this->onLoaderOfModel('Class_TypeDoc') ->whenCalled('findUsedTypeDocIds')