From ae162e40929f342b766cdd1e338ec794ff5a9f6b Mon Sep 17 00:00:00 2001 From: Patrick Barroca <pbarroca@afi-sa.fr> Date: Mon, 26 Apr 2021 12:01:58 +0200 Subject: [PATCH] hotline #124923 : fix cyberlibris harvesting --- VERSIONS_HOTLINE/124923 | 1 + .../WebService/BibNumerique/Cyberlibris.php | 9 ++-- .../Cyberlibris/LivreNumerique.php | 22 ++++------ .../BibNumerique/Cyberlibris/Parser.php | 33 ++++++++++++++ .../Class/WebService/OAI/DublinCoreParser.php | 2 - .../ForRessourceNumerique.php | 12 ++---- tests/fixtures/cyberlibris_oai.xml | 3 +- .../Class/WebService/CyberlibrisTest.php | 43 +++++++++++++------ 8 files changed, 82 insertions(+), 43 deletions(-) create mode 100644 VERSIONS_HOTLINE/124923 create mode 100644 library/Class/WebService/BibNumerique/Cyberlibris/Parser.php diff --git a/VERSIONS_HOTLINE/124923 b/VERSIONS_HOTLINE/124923 new file mode 100644 index 00000000000..f0a4d7214fc --- /dev/null +++ b/VERSIONS_HOTLINE/124923 @@ -0,0 +1 @@ + - ticket #124923 : Ressources numériques : Maintenance du moissonnage des documents Bibliovox \ No newline at end of file diff --git a/library/Class/WebService/BibNumerique/Cyberlibris.php b/library/Class/WebService/BibNumerique/Cyberlibris.php index 96b300f3a3d..59701322369 100644 --- a/library/Class/WebService/BibNumerique/Cyberlibris.php +++ b/library/Class/WebService/BibNumerique/Cyberlibris.php @@ -27,9 +27,11 @@ class Class_WebService_BibNumerique_Cyberlibris extends Class_WebService_BibNume public function __construct() { - $this->_oaiws = (new Class_WebService_OAI()) + $this->_oaiws = (new Class_WebService_OAI) ->setOAIHandler(Class_AdminVar::get('CYBERLIBRIS_URL')) - ->setNumericResourceClass('Class_WebService_BibNumerique_Cyberlibris_LivreNumerique'); + ->setNumericResourceClass(Class_WebService_BibNumerique_Cyberlibris_LivreNumerique::class) + ->setParser(new Class_WebService_BibNumerique_Cyberlibris_Parser) + ; } @@ -39,7 +41,7 @@ class Class_WebService_BibNumerique_Cyberlibris extends Class_WebService_BibNume protected function getUrlOrigine() { - return self::BASE_URL; + return static::BASE_URL; } @@ -52,4 +54,3 @@ class Class_WebService_BibNumerique_Cyberlibris extends Class_WebService_BibNume return $this->_oaiws; } } -?> diff --git a/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php b/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php index e5bc1b7b879..fd3f732cdb1 100644 --- a/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php +++ b/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php @@ -23,12 +23,17 @@ class Class_WebService_BibNumerique_Cyberlibris_LivreNumerique extends Class_WebService_BibNumerique_RessourceNumerique { - protected $_record; - protected $_titre; + public function getBaseUrl() { + return Class_WebService_BibNumerique_Cyberlibris::BASE_URL; + } - public function getBaseUrl(){ - return Class_WebService_BibNumerique_Cyberlibris::BASE_URL; + public function import($update_constraints=false) { + if (!$this->getId()) + $this->_debug(sprintf('record without matching dc:identifier (%s)', + $this->getTitle())); + + return parent::import($update_constraints); } @@ -40,13 +45,4 @@ class Class_WebService_BibNumerique_Cyberlibris_LivreNumerique public function getRessourceCategorieLibelle() { return Class_WebService_BibNumerique_Cyberlibris::CATEGORY_LABEL; } - - - public function setOaiId($data) { - preg_match('|/book/([^/?]+)|i', $data, $matches) - ? ($this->_id = $matches[1]) - : $this->_debug('cannot read id from : ' . $data); - - return $this->setExternalURI($data); - } } \ No newline at end of file diff --git a/library/Class/WebService/BibNumerique/Cyberlibris/Parser.php b/library/Class/WebService/BibNumerique/Cyberlibris/Parser.php new file mode 100644 index 00000000000..96060489ec5 --- /dev/null +++ b/library/Class/WebService/BibNumerique/Cyberlibris/Parser.php @@ -0,0 +1,33 @@ +<?php +/** + * Copyright (c) 2012-2021, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Class_WebService_BibNumerique_Cyberlibris_Parser + extends Class_WebService_OAI_DublinCoreParser_ForRessourceNumerique { + + public function enddc_identifier($data) { + if (!preg_match('|/book/([^/?]+)|i', $data, $matches)) + return; + + $this->_record['id_oai'] = $matches[1]; + $this->_record['relation'][] = $data; + } +} diff --git a/library/Class/WebService/OAI/DublinCoreParser.php b/library/Class/WebService/OAI/DublinCoreParser.php index c4c48ee87e9..8ebf69a5030 100644 --- a/library/Class/WebService/OAI/DublinCoreParser.php +++ b/library/Class/WebService/OAI/DublinCoreParser.php @@ -32,8 +32,6 @@ class Class_WebService_OAI_DublinCoreParser extends Class_WebService_OAI_ParserA protected $_record; - - public function endns1_record($data) { $this->endRecord($data); } diff --git a/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php b/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php index 89ad01f534b..e3d564287b2 100644 --- a/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php +++ b/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php @@ -19,16 +19,10 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -/* Sait extraire informations du XML retourné par - * l'opération OAI ListRecords, format DublinCore - * Functions: - * - parse($xml): analyse le xml donné - * - getRecords: retourne tous les enregistrements sous forme de tableau associatif - * - getResumptionToken: retourne le token qui permet de rechercher - * les enregistrements suivants - */ -class Class_WebService_OAI_DublinCoreParser_ForRessourceNumerique extends Class_WebService_OAI_DublinCoreParser { +class Class_WebService_OAI_DublinCoreParser_ForRessourceNumerique + extends Class_WebService_OAI_DublinCoreParser { + protected $visitor, $_records_ressources, diff --git a/tests/fixtures/cyberlibris_oai.xml b/tests/fixtures/cyberlibris_oai.xml index fb64c8d64b4..65f6870b352 100644 --- a/tests/fixtures/cyberlibris_oai.xml +++ b/tests/fixtures/cyberlibris_oai.xml @@ -20,6 +20,7 @@ <dc:date>2004</dc:date> <dc:language>fr</dc:language> <dc:identifier>https://www.bibliovox.com/book/10104055</dc:identifier> + <dc:identifier>https://static2.cyberlibris.com/books_upload/136pix/9782847690279.jpg</dc:identifier> <dc:rights> copyrighted </dc:rights> </oai_dc:dc></metadata> </record> @@ -80,7 +81,7 @@ <p>Après un bilan sans concession, l'auteur donne des pistes d'actions concrètes pour revoir la façon de gérer la carrière des seniors. Cet ouvrage vous aidera à réaliser un état des lieux de la GRH des seniors dans votre propre entreprise. Il explique aussi quelles actions lancer à court terme pour sensibiliser le management à la gestion des âges... <br /></p>]]></dc:description> <dc:date>2005</dc:date> <dc:language>fr</dc:language> - <dc:identifier>https://www.bibliovox.com/book/10104077</dc:identifier> + <dc:identifier>https://www.bibliovox.com/</dc:identifier> <dc:rights> copyrighted </dc:rights> </oai_dc:dc></metadata> </record> diff --git a/tests/library/Class/WebService/CyberlibrisTest.php b/tests/library/Class/WebService/CyberlibrisTest.php index e03eacedce6..7ff02a3d252 100644 --- a/tests/library/Class/WebService/CyberlibrisTest.php +++ b/tests/library/Class/WebService/CyberlibrisTest.php @@ -21,15 +21,15 @@ include_once('tests/fixtures/RessourcesNumeriquesFixtures.php'); class CyberlibrisHarvestSaveTest extends ModelTestCase { - protected $_storm_default_to_volatile = true; + protected $_log = "\n"; public function setUp() { parent::setUp(); Class_AdminVar::set('CYBERLIBRIS_URL', 'http://oai-bibliovox.cyberlibris.fr/oai.aspx'); - $catalogue_xml = file_get_contents(realpath(dirname(__FILE__)). '/../../../fixtures/cyberlibris_oai.xml'); - $catalogue_xml_fin = file_get_contents(realpath(dirname(__FILE__)). '/../../../fixtures/cyberlibris_oai_fin.xml'); + $catalogue_xml = file_get_contents(__DIR__ . '/../../../fixtures/cyberlibris_oai.xml'); + $catalogue_xml_fin = file_get_contents(__DIR__ . '/../../../fixtures/cyberlibris_oai_fin.xml'); $this->_http_client = $this ->mock() @@ -43,8 +43,11 @@ class CyberlibrisHarvestSaveTest extends ModelTestCase { ->beStrict(); - $this->_service = new Class_WebService_BibNumerique_Cyberlibris(); Class_WebService_BibNumerique_Cyberlibris::setDefaultHttpClient($this->_http_client); + $logger = $this->mock()->whenCalled('log') + ->willDo(function($message) { $this->_log .= $message . "\n"; }); + Class_WebService_BibNumerique_RessourceNumerique::setLogger($logger); + $this->_service = (new Class_WebService_BibNumerique_Cyberlibris)->setLogger($logger); $this->_service->harvest(); } @@ -68,33 +71,45 @@ class CyberlibrisHarvestSaveTest extends ModelTestCase { /** @test */ - public function firstAlbumShouldNotHavePoster() { + public function logShouldContainsRecordWithoutMatchingIdentifier() { + $this->assertContains('record without matching dc:identifier (Manager la ', $this->_log); + } + + + /** @test */ + public function firstBookShouldHaveNoPoster() { $this->assertEquals('', Class_Album::find(1)->getPoster()); } /** @test */ - public function secondBookShouldBe() { - $this->assertContains('Stimuler la créativité', Class_Album::find(2)->getTitre()); + public function firstBookExternalURIShouldBeBookSlash10104055() { + $this->assertEquals('https://www.bibliovox.com/book/10104055', + Class_Album::find(1)->getExternalUri()); } /** @test */ - public function lastBookAuthorShouldBeVerneEtienne() { - $this->assertEquals('Verne, Etienne', Class_Album::find(4)->getAuthorsNames()[0]); + public function firstBookIdOrigineShouldBe10104055() { + $this->assertEquals('10104055', Class_Album::find(1)->getIdOrigine()); } /** @test */ - public function idShouldBeSet() { - $this->assertEquals('10104081', Class_Album::find(4)->getIdOrigine()); + public function secondBookTitleShouldBeStimulerLaCreativite() { + $this->assertContains('Stimuler la créativité', Class_Album::find(2)->getTitre()); } /** @test */ - public function firstAlbumExternalURIShouldBeSet() { - $this->assertEquals('https://www.bibliovox.com/book/10104055', - Class_Album::find(1)->getExternalUri()); + public function lastBookAuthorShouldBeVerneEtienne() { + $this->assertEquals('Verne, Etienne', Class_Album::find(4)->getAuthorsNames()[0]); + } + + + /** @test */ + public function lastBookIdOrigineShouldBe10104081() { + $this->assertEquals('10104081', Class_Album::find(4)->getIdOrigine()); } } -- GitLab