From ae162e40929f342b766cdd1e338ec794ff5a9f6b Mon Sep 17 00:00:00 2001
From: Patrick Barroca <pbarroca@afi-sa.fr>
Date: Mon, 26 Apr 2021 12:01:58 +0200
Subject: [PATCH] hotline #124923 : fix cyberlibris harvesting

---
 VERSIONS_HOTLINE/124923                       |  1 +
 .../WebService/BibNumerique/Cyberlibris.php   |  9 ++--
 .../Cyberlibris/LivreNumerique.php            | 22 ++++------
 .../BibNumerique/Cyberlibris/Parser.php       | 33 ++++++++++++++
 .../Class/WebService/OAI/DublinCoreParser.php |  2 -
 .../ForRessourceNumerique.php                 | 12 ++----
 tests/fixtures/cyberlibris_oai.xml            |  3 +-
 .../Class/WebService/CyberlibrisTest.php      | 43 +++++++++++++------
 8 files changed, 82 insertions(+), 43 deletions(-)
 create mode 100644 VERSIONS_HOTLINE/124923
 create mode 100644 library/Class/WebService/BibNumerique/Cyberlibris/Parser.php

diff --git a/VERSIONS_HOTLINE/124923 b/VERSIONS_HOTLINE/124923
new file mode 100644
index 00000000000..f0a4d7214fc
--- /dev/null
+++ b/VERSIONS_HOTLINE/124923
@@ -0,0 +1 @@
+ - ticket #124923 : Ressources numériques : Maintenance du moissonnage des documents Bibliovox
\ No newline at end of file
diff --git a/library/Class/WebService/BibNumerique/Cyberlibris.php b/library/Class/WebService/BibNumerique/Cyberlibris.php
index 96b300f3a3d..59701322369 100644
--- a/library/Class/WebService/BibNumerique/Cyberlibris.php
+++ b/library/Class/WebService/BibNumerique/Cyberlibris.php
@@ -27,9 +27,11 @@ class Class_WebService_BibNumerique_Cyberlibris extends Class_WebService_BibNume
 
 
   public function __construct() {
-    $this->_oaiws = (new Class_WebService_OAI())
+    $this->_oaiws = (new Class_WebService_OAI)
       ->setOAIHandler(Class_AdminVar::get('CYBERLIBRIS_URL'))
-      ->setNumericResourceClass('Class_WebService_BibNumerique_Cyberlibris_LivreNumerique');
+      ->setNumericResourceClass(Class_WebService_BibNumerique_Cyberlibris_LivreNumerique::class)
+      ->setParser(new Class_WebService_BibNumerique_Cyberlibris_Parser)
+      ;
   }
 
 
@@ -39,7 +41,7 @@ class Class_WebService_BibNumerique_Cyberlibris extends Class_WebService_BibNume
 
 
   protected function getUrlOrigine() {
-    return self::BASE_URL;
+    return static::BASE_URL;
   }
 
 
@@ -52,4 +54,3 @@ class Class_WebService_BibNumerique_Cyberlibris extends Class_WebService_BibNume
     return $this->_oaiws;
   }
 }
-?>
diff --git a/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php b/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php
index e5bc1b7b879..fd3f732cdb1 100644
--- a/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php
+++ b/library/Class/WebService/BibNumerique/Cyberlibris/LivreNumerique.php
@@ -23,12 +23,17 @@
 class Class_WebService_BibNumerique_Cyberlibris_LivreNumerique
   extends Class_WebService_BibNumerique_RessourceNumerique {
 
-  protected $_record;
-  protected $_titre;
+  public function getBaseUrl() {
+    return Class_WebService_BibNumerique_Cyberlibris::BASE_URL;
+  }
 
 
-  public function getBaseUrl(){
-    return Class_WebService_BibNumerique_Cyberlibris::BASE_URL;
+  public function import($update_constraints=false) {
+    if (!$this->getId())
+      $this->_debug(sprintf('record without matching dc:identifier (%s)',
+                            $this->getTitle()));
+
+    return parent::import($update_constraints);
   }
 
 
@@ -40,13 +45,4 @@ class Class_WebService_BibNumerique_Cyberlibris_LivreNumerique
   public function getRessourceCategorieLibelle() {
     return Class_WebService_BibNumerique_Cyberlibris::CATEGORY_LABEL;
   }
-
-
-  public function setOaiId($data) {
-    preg_match('|/book/([^/?]+)|i', $data, $matches)
-      ? ($this->_id = $matches[1])
-      : $this->_debug('cannot read id from : ' . $data);
-
-    return $this->setExternalURI($data);
-  }
 }
\ No newline at end of file
diff --git a/library/Class/WebService/BibNumerique/Cyberlibris/Parser.php b/library/Class/WebService/BibNumerique/Cyberlibris/Parser.php
new file mode 100644
index 00000000000..96060489ec5
--- /dev/null
+++ b/library/Class/WebService/BibNumerique/Cyberlibris/Parser.php
@@ -0,0 +1,33 @@
+<?php
+/**
+ * Copyright (c) 2012-2021, Agence Française Informatique (AFI). All rights reserved.
+ *
+ * BOKEH is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by
+ * the Free Software Foundation.
+ *
+ * There are special exceptions to the terms and conditions of the AGPL as it
+ * is applied to this software (see README file).
+ *
+ * BOKEH is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU AFFERO GENERAL PUBLIC LICENSE for more details.
+ *
+ * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE
+ * along with BOKEH; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+ */
+
+
+class Class_WebService_BibNumerique_Cyberlibris_Parser
+  extends Class_WebService_OAI_DublinCoreParser_ForRessourceNumerique {
+
+  public function enddc_identifier($data) {
+    if (!preg_match('|/book/([^/?]+)|i', $data, $matches))
+      return;
+
+    $this->_record['id_oai'] = $matches[1];
+    $this->_record['relation'][] = $data;
+  }
+}
diff --git a/library/Class/WebService/OAI/DublinCoreParser.php b/library/Class/WebService/OAI/DublinCoreParser.php
index c4c48ee87e9..8ebf69a5030 100644
--- a/library/Class/WebService/OAI/DublinCoreParser.php
+++ b/library/Class/WebService/OAI/DublinCoreParser.php
@@ -32,8 +32,6 @@ class Class_WebService_OAI_DublinCoreParser extends Class_WebService_OAI_ParserA
   protected $_record;
 
 
-
-
   public function endns1_record($data) {
     $this->endRecord($data);
   }
diff --git a/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php b/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php
index 89ad01f534b..e3d564287b2 100644
--- a/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php
+++ b/library/Class/WebService/OAI/DublinCoreParser/ForRessourceNumerique.php
@@ -19,16 +19,10 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
  */
 
-/* Sait extraire informations du XML retourné par
- * l'opération OAI ListRecords, format DublinCore
- * Functions:
- * - parse($xml): analyse le xml donné
- * - getRecords: retourne tous les enregistrements sous forme de tableau associatif
- * - getResumptionToken: retourne le token qui permet de rechercher
- *   les enregistrements suivants
- */
 
-class Class_WebService_OAI_DublinCoreParser_ForRessourceNumerique extends Class_WebService_OAI_DublinCoreParser {
+class Class_WebService_OAI_DublinCoreParser_ForRessourceNumerique
+  extends Class_WebService_OAI_DublinCoreParser {
+
   protected
     $visitor,
     $_records_ressources,
diff --git a/tests/fixtures/cyberlibris_oai.xml b/tests/fixtures/cyberlibris_oai.xml
index fb64c8d64b4..65f6870b352 100644
--- a/tests/fixtures/cyberlibris_oai.xml
+++ b/tests/fixtures/cyberlibris_oai.xml
@@ -20,6 +20,7 @@
 	  <dc:date>2004</dc:date>
 	  <dc:language>fr</dc:language>
 	  <dc:identifier>https://www.bibliovox.com/book/10104055</dc:identifier>
+	  <dc:identifier>https://static2.cyberlibris.com/books_upload/136pix/9782847690279.jpg</dc:identifier>
 	  <dc:rights> copyrighted </dc:rights>
       </oai_dc:dc></metadata>
     </record>
@@ -80,7 +81,7 @@
 	  <p>Apr&egrave;s un bilan sans concession, l'auteur donne des pistes d'actions concr&egrave;tes pour revoir la fa&ccedil;on de g&eacute;rer la carri&egrave;re des seniors. Cet ouvrage vous aidera &agrave; r&eacute;aliser un &eacute;tat des lieux de la GRH des seniors dans votre propre entreprise. Il explique aussi quelles actions lancer &agrave; court terme pour sensibiliser le management &agrave; la gestion des &acirc;ges... <br /></p>]]></dc:description>
 	  <dc:date>2005</dc:date>
 	  <dc:language>fr</dc:language>
-	  <dc:identifier>https://www.bibliovox.com/book/10104077</dc:identifier>
+	  <dc:identifier>https://www.bibliovox.com/</dc:identifier>
 	  <dc:rights> copyrighted </dc:rights>
       </oai_dc:dc></metadata>
     </record>
diff --git a/tests/library/Class/WebService/CyberlibrisTest.php b/tests/library/Class/WebService/CyberlibrisTest.php
index e03eacedce6..7ff02a3d252 100644
--- a/tests/library/Class/WebService/CyberlibrisTest.php
+++ b/tests/library/Class/WebService/CyberlibrisTest.php
@@ -21,15 +21,15 @@
 include_once('tests/fixtures/RessourcesNumeriquesFixtures.php');
 
 class CyberlibrisHarvestSaveTest extends ModelTestCase {
-  protected $_storm_default_to_volatile = true;
+  protected $_log = "\n";
 
   public function setUp() {
     parent::setUp();
 
     Class_AdminVar::set('CYBERLIBRIS_URL', 'http://oai-bibliovox.cyberlibris.fr/oai.aspx');
 
-    $catalogue_xml = file_get_contents(realpath(dirname(__FILE__)). '/../../../fixtures/cyberlibris_oai.xml');
-    $catalogue_xml_fin = file_get_contents(realpath(dirname(__FILE__)). '/../../../fixtures/cyberlibris_oai_fin.xml');
+    $catalogue_xml = file_get_contents(__DIR__ . '/../../../fixtures/cyberlibris_oai.xml');
+    $catalogue_xml_fin = file_get_contents(__DIR__ . '/../../../fixtures/cyberlibris_oai_fin.xml');
 
     $this->_http_client = $this
       ->mock()
@@ -43,8 +43,11 @@ class CyberlibrisHarvestSaveTest extends ModelTestCase {
 
       ->beStrict();
 
-    $this->_service = new Class_WebService_BibNumerique_Cyberlibris();
     Class_WebService_BibNumerique_Cyberlibris::setDefaultHttpClient($this->_http_client);
+    $logger = $this->mock()->whenCalled('log')
+                   ->willDo(function($message) { $this->_log .= $message . "\n"; });
+    Class_WebService_BibNumerique_RessourceNumerique::setLogger($logger);
+    $this->_service = (new Class_WebService_BibNumerique_Cyberlibris)->setLogger($logger);
     $this->_service->harvest();
   }
 
@@ -68,33 +71,45 @@ class CyberlibrisHarvestSaveTest extends ModelTestCase {
 
 
   /** @test */
-  public function firstAlbumShouldNotHavePoster() {
+  public function logShouldContainsRecordWithoutMatchingIdentifier() {
+    $this->assertContains('record without matching dc:identifier (Manager la ', $this->_log);
+  }
+
+
+  /** @test */
+  public function firstBookShouldHaveNoPoster() {
     $this->assertEquals('', Class_Album::find(1)->getPoster());
   }
 
 
   /** @test */
-  public function secondBookShouldBe() {
-    $this->assertContains('Stimuler la créativité', Class_Album::find(2)->getTitre());
+  public function firstBookExternalURIShouldBeBookSlash10104055() {
+    $this->assertEquals('https://www.bibliovox.com/book/10104055',
+                        Class_Album::find(1)->getExternalUri());
   }
 
 
   /** @test */
-  public function lastBookAuthorShouldBeVerneEtienne() {
-    $this->assertEquals('Verne, Etienne', Class_Album::find(4)->getAuthorsNames()[0]);
+  public function firstBookIdOrigineShouldBe10104055() {
+    $this->assertEquals('10104055', Class_Album::find(1)->getIdOrigine());
   }
 
 
   /** @test */
-  public function idShouldBeSet() {
-    $this->assertEquals('10104081', Class_Album::find(4)->getIdOrigine());
+  public function secondBookTitleShouldBeStimulerLaCreativite() {
+    $this->assertContains('Stimuler la créativité', Class_Album::find(2)->getTitre());
   }
 
 
   /** @test */
-  public function firstAlbumExternalURIShouldBeSet() {
-    $this->assertEquals('https://www.bibliovox.com/book/10104055',
-                        Class_Album::find(1)->getExternalUri());
+  public function lastBookAuthorShouldBeVerneEtienne() {
+    $this->assertEquals('Verne, Etienne', Class_Album::find(4)->getAuthorsNames()[0]);
+  }
+
+
+  /** @test */
+  public function lastBookIdOrigineShouldBe10104081() {
+    $this->assertEquals('10104081', Class_Album::find(4)->getIdOrigine());
   }
 }
 
-- 
GitLab