From 335f9c748b06c5339997b946147525ccd5eee928 Mon Sep 17 00:00:00 2001
From: efalcy <efalcy@afi-sa.fr>
Date: Tue, 21 Jun 2022 14:28:08 +0000
Subject: [PATCH] fix oai pasing even without notices

---
 VERSIONS_WIP/152581                           |  1 +
 library/Class/WebService/OAIHarvester.php     | 21 ++++-
 .../Bibliondemand/Config.php                  | 14 +++-
 .../Bibliondemand/tests/BibliondemandTest.php |  2 +-
 .../PhasePrepareIntegrationNumelTest.php      | 78 +++++++++++++++++--
 tests/scenarios/Numel/hcc_oai_1.xml           | 40 ++++++++++
 tests/scenarios/Numel/hcc_oai_2.xml           |  7 ++
 tests/scenarios/Numel/hcc_oai_3.xml           |  6 ++
 tests/scenarios/Numel/numel_loop.xml          | 26 +++++++
 tests/scenarios/Numel/numel_loop2.xml         | 26 +++++++
 tests/scenarios/Numel/numel_loop3.xml         | 26 +++++++
 11 files changed, 237 insertions(+), 10 deletions(-)
 create mode 100644 VERSIONS_WIP/152581
 create mode 100644 tests/scenarios/Numel/hcc_oai_1.xml
 create mode 100644 tests/scenarios/Numel/hcc_oai_2.xml
 create mode 100644 tests/scenarios/Numel/hcc_oai_3.xml
 create mode 100644 tests/scenarios/Numel/numel_loop.xml
 create mode 100644 tests/scenarios/Numel/numel_loop2.xml
 create mode 100644 tests/scenarios/Numel/numel_loop3.xml

diff --git a/VERSIONS_WIP/152581 b/VERSIONS_WIP/152581
new file mode 100644
index 00000000000..b75b8c6a2d5
--- /dev/null
+++ b/VERSIONS_WIP/152581
@@ -0,0 +1 @@
+ - fonctionnalité #152581 : Moissonnage Syracuse Biblio On Demand : adaptation permettant de ne pas interrompre la lecture OAI si la réponse ne contient pas de notice mais fourni un jeton pour une prochaine page
\ No newline at end of file
diff --git a/library/Class/WebService/OAIHarvester.php b/library/Class/WebService/OAIHarvester.php
index 02d46ac3de3..5419348cf96 100644
--- a/library/Class/WebService/OAIHarvester.php
+++ b/library/Class/WebService/OAIHarvester.php
@@ -31,6 +31,7 @@ class Class_WebService_OAIHarvester {
     IDENTIFIER_PATTERN = '|<identifier[^>]*>[^<]*</identifier>|',
     LIST_RECORDS = 'ListRecords';
 
+  public static int $MAX_REQUESTS = 50000;
   protected
     $_base_url,
     $_query = '',
@@ -76,10 +77,15 @@ class Class_WebService_OAIHarvester {
     $start_url = $this->_base_url . '?' . $this->_query;
     $this->_debug($start_url, 'is the computed start harvest url');
     $token = $this->_fetchUrl($start_url, $callback);
-
-    while ($token)
+    $count = 0;
+    while ($token && $count < static::$MAX_REQUESTS ) {
       $token = $this->_fetchUrl($this->_nextUrlWith($token), $callback);
-
+      $count++;
+    }
+    if ($count >=  static::$MAX_REQUESTS) {
+      $this->_debug($start_url, 'Error : loop on request');
+      throw new RuntimeException($this->_('Le nombre de requêtes a dépassé le nombre autorisé: '. static::$MAX_REQUESTS));
+    }
     return $this;
   }
 
@@ -101,7 +107,11 @@ class Class_WebService_OAIHarvester {
       throw new RuntimeException($this->_('Erreur OAI-PMH (%s) (%s)',
                                           $matches[1], $url));
 
+
     preg_match_all(static::IDENTIFIER_PATTERN, $body, $matches);
+    if( empty($matches) || empty( reset($matches)))
+      return $this->_parseToken($body);
+
     $checksum = md5(json_encode($matches));
     if ($this->_previous_checksum === $checksum)
       throw new RuntimeException($this->_('Réponse identique à la précédente'));
@@ -109,6 +119,11 @@ class Class_WebService_OAIHarvester {
 
     $callback($body . static::PAGE_SEPARATOR);
 
+    return $this->_parseToken($body);
+  }
+
+
+  protected function _parseToken($body) {
     return preg_match(static::RESUMPTION_PATTERN, $body, $matches)
       ? $matches[1]
       : null;
diff --git a/library/digital_resources/Bibliondemand/Config.php b/library/digital_resources/Bibliondemand/Config.php
index 1c11554ed49..69571153ac5 100644
--- a/library/digital_resources/Bibliondemand/Config.php
+++ b/library/digital_resources/Bibliondemand/Config.php
@@ -26,10 +26,11 @@ class Bibliondemand_Config extends Class_DigitalResource_Config {
             'AdminVars' => ['SSO_URL' => Class_AdminVar_Meta::newDefault($this->_('URL SSO des ressources Bibliondemand'))
                             ->bePrivate()],
 
-            'MuteSsoDashboard' =>  true,
+            'MuteSsoDashboard' =>  false,
             'MuteHarvestDashboard' => true,
 
             'SsoAction' => true,
+            'SsoValidateUrl' => true,
             'Harvesting' => true,
             'PermissionLabel' => $this->_('Bibliothèque numérique: accéder à la ressource Biblio on demand'),
             'NotAllowedMessage' => $this->_('Votre compte n\'est pas autorisé à accéder à cette ressource.'),
@@ -78,6 +79,17 @@ class Bibliondemand_Config extends Class_DigitalResource_Config {
   }
 
 
+  public function validateUrlFor($user) {
+    return $this->_validateUrl()
+      . '?' . http_build_query(['ticket' => (new Class_CasTicketV3($this->getAdminVar('SSO_URL')))->getTicketForUser($user)]);
+  }
+
+
+  protected function _validateUrl() {
+    return Class_Url::absolute(['module' => $this->getModuleName(),                                'controller' => 'auth',
+                                'action' => 'validate'], null, true);
+  }
+
   public function getIndexThesauriBatchInstance() {
     return Class_DigitalResource::getInstance()
       ->build($this->withNameSpace('IndexThesauriBatch'), $this);
diff --git a/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php b/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php
index 827ac10a779..9943a4d87bb 100644
--- a/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php
+++ b/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php
@@ -68,7 +68,7 @@ class BibliondemandModulesControllerTest extends AbstractControllerTestCase {
   /** @test */
   public function dashboardCustomIntegrationShouldBeDisplay() {
     $this->dispatch('/Bibliondemand_Plugin', true);
-    $this->assertNotXpathContentContains('//h3', 'Diagnostic SSO');
+    $this->assertXpathContentContains('//h3', 'Diagnostic SSO');
     $this->assertNotXpathContentContains('//h3', 'Diagnostic moissonnage');
     $this->assertXpathContentContains('//h3', 'Diagnostic complémentaire');
   }
diff --git a/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php b/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php
index 530681a1d16..e6fb50865fa 100644
--- a/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php
+++ b/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php
@@ -33,12 +33,12 @@ class PhasePrepareIntegrationNumelTest extends Class_Cosmogramme_Integration_Pha
     $this->_filesystem
       ->whenCalled('file_put_contents')
       ->willDo(function($path, $content)
-               {
-                 if (!array_key_exists($path, $this->_files_contents))
-                   $this->_files_contents[$path] = '';
+      {
+        if (!array_key_exists($path, $this->_files_contents))
+          $this->_files_contents[$path] = '';
 
-                 $this->_files_contents[$path] .= $content;
-               })
+        $this->_files_contents[$path] .= $content;
+      })
 
       ->whenCalled('file_exists')
       ->willDo(fn($path) => array_key_exists($path, $this->_files_contents))
@@ -156,4 +156,72 @@ class PhasePrepareIntegrationNumelTest extends Class_Cosmogramme_Integration_Pha
 
     $this->assertLogContains('Impossible de moissonner https://server.org/oai/index.php : Réponse identique à la précédente');
   }
+
+
+  /** @test */
+  public function withSameRecordsAndNoIdentifierShouldStop() {
+    $response_1 = $this->mock()
+                       ->whenCalled('isSuccessful')->answers(true)
+                       ->whenCalled('getBody')
+                       ->answers(file_get_contents(__DIR__.'/numel_loop.xml'));
+
+    $response_2 = $this->mock()
+                       ->whenCalled('isSuccessful')->answers(true)
+                       ->whenCalled('getBody')
+                       ->answers(file_get_contents(__DIR__.'/numel_loop2.xml'));
+    $response_3 = $this->mock()
+                       ->whenCalled('isSuccessful')->answers(true)
+                       ->whenCalled('getBody')
+                       ->answers(file_get_contents(__DIR__.'/numel_loop3.xml'));
+    Class_WebService_OAIHarvester::$MAX_REQUESTS=5;
+    $this->_http
+      ->whenCalled('getResponse')
+      ->with('https://server.org/oai/index.php?verb=ListRecords&metadataPrefix=oai_numel')
+      ->answers($response_1)
+
+      ->whenCalled('getResponse')
+      ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=1646836602')
+      ->answers($response_2)
+      ->whenCalled('getResponse')
+      ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=1646836603')
+      ->answers($response_3);
+
+    $this->_phase = $this->_buildPhase('PrepareIntegrations')->run();
+    $this->assertLogContains('Le nombre de requêtes a dépassé le nombre autorisé: 5');
+  }
+
+
+  /**
+   * @test
+   */
+  public function withNoIdentifierShouldReadNextPage(){
+    $response_1 = $this->mock()
+                       ->whenCalled('isSuccessful')->answers(true)
+                       ->whenCalled('getBody')
+                       ->answers(file_get_contents(__DIR__.'/hcc_oai_1.xml'));
+    $response_2 = $this->mock()
+                       ->whenCalled('isSuccessful')->answers(true)
+                       ->whenCalled('getBody')
+                       ->answers(file_get_contents(__DIR__.'/hcc_oai_2.xml'));
+
+    $response_3 = $this->mock()
+                       ->whenCalled('isSuccessful')->answers(true)
+                       ->whenCalled('getBody')
+                       ->answers(file_get_contents(__DIR__.'/hcc_oai_3.xml'));
+    $this->_http
+      ->whenCalled('getResponse')
+      ->with('https://server.org/oai/index.php?verb=ListRecords&metadataPrefix=oai_numel')
+      ->answers($response_1)
+
+      ->whenCalled('getResponse')
+      ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=%21%21%2171200%21338417%21oai_dc')
+      ->answers($response_2)
+      ->whenCalled('getResponse')
+      ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=%21%21%21338300%21338426%21oai_dc')
+      ->answers($response_3);
+
+    $this->_phase = $this->_buildPhase('PrepareIntegrations')->run();
+
+    $this->assertLogContains('transfert de https://server.org/oai/index.php vers integre1179.pan');
+  }
 }
diff --git a/tests/scenarios/Numel/hcc_oai_1.xml b/tests/scenarios/Numel/hcc_oai_1.xml
new file mode 100644
index 00000000000..d0fc9bc76e6
--- /dev/null
+++ b/tests/scenarios/Numel/hcc_oai_1.xml
@@ -0,0 +1,40 @@
+<OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
+<responseDate>2022-05-25T08:40:47Z</responseDate>
+<request verb="ListRecords">bd.correze.fr/oaiserver.ashx</request>
+<ListRecords>
+<record>
+<header>
+</header>
+<metadata>
+</metadata>
+</record>
+<record>
+<header>
+<identifier>CG19587856</identifier>
+<datestamp>2022-03-11T23:53:14Z</datestamp>
+</header>
+<metadata>
+<oai_dc:dc>
+<dc:audience>CG19</dc:audience>
+<dc:identifier>
+Url : https://numerique.bd.correze.fr/doc/YOUBOOX/587856
+</dc:identifier>
+<dc:identifier>
+Vignette : https://d2adx4xmvnkm9k.cloudfront.net/books/587/587856/thumbs/252x342.jpg?version=20220311101905723263
+</dc:identifier>
+<dc:title>Via Ferrata : Poèmes ou journal épars</dc:title>
+<dc:type>Livre numérique</dc:type>
+<dc:creator>Fred Pougeard</dc:creator>
+<dc:subject>Romans</dc:subject>
+<dc:source>YOUBOOX</dc:source>
+<dc:publisher>Éditions Thierry Marchaisse</dc:publisher>
+<dc:language>français</dc:language>
+<dc:description>
+Tombeau pour un père —cheminot— encore un peu vivant, ces poèmes racontent la traversée des deuils, ils tentent de phraser le chagrin, la peine à vivre et la maladie du proche, désormais plongé dans le brouillard de l'oubli. Ils disent aussi ce qui y échappe, les « épiphanies », ces moments si rares où s'éclaire fugitivement quelque chose de soi, des autres ou du monde. Avec un café, pourquoi pas, et une heure devant soi, il est bon de lire dans l'ordre ce journal poétique épars, de prendre le train en compagnie de l'auteur, au rythme de ses photos. On peut en éprouver un grand calme, pas du tout désespérant.FRED POUGEARD est né en 1974 à Guéret. Conteur de son métier, il est voué lui aussi aux TER, aux petites gares et aux lignes oubliées. Il dirige la compagnie l'Allégresse du Pourpre.
+</dc:description>
+</oai_dc:dc>
+</metadata>
+</record>
+<resumptionToken completeListSize="338417" cursor="71200">!!!71200!338417!oai_dc</resumptionToken>
+</ListRecords>
+</OAI-PMH>
diff --git a/tests/scenarios/Numel/hcc_oai_2.xml b/tests/scenarios/Numel/hcc_oai_2.xml
new file mode 100644
index 00000000000..c03b7674648
--- /dev/null
+++ b/tests/scenarios/Numel/hcc_oai_2.xml
@@ -0,0 +1,7 @@
+<OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
+<responseDate>2022-05-25T09:36:52Z</responseDate>
+<request verb="ListRecords">bd.correze.fr/oaiserver.ashx</request>
+<ListRecords>
+<resumptionToken completeListSize="338426" cursor="338300">!!!338300!338426!oai_dc</resumptionToken>
+</ListRecords>
+</OAI-PMH>
diff --git a/tests/scenarios/Numel/hcc_oai_3.xml b/tests/scenarios/Numel/hcc_oai_3.xml
new file mode 100644
index 00000000000..aa58e9a91df
--- /dev/null
+++ b/tests/scenarios/Numel/hcc_oai_3.xml
@@ -0,0 +1,6 @@
+<OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
+<responseDate>2022-05-25T08:41:05Z</responseDate>
+<request verb="ListRecords">bd.correze.fr/oaiserver.ashx</request>
+<ListRecords>
+</ListRecords>
+</OAI-PMH>
diff --git a/tests/scenarios/Numel/numel_loop.xml b/tests/scenarios/Numel/numel_loop.xml
new file mode 100644
index 00000000000..25fa2755c9f
--- /dev/null
+++ b/tests/scenarios/Numel/numel_loop.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns:dcterms="http://purl.org/dc/terms/"
+         xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
+         http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
+<responseDate>2022-03-09T14:36:42Z</responseDate>
+ <request verb="ListRecords" resumptionToken="1646836484">https://melun.prod-osiros.decalog.net/oai/index.php</request>
+ <ListRecords>
+  <record>
+  <header status="deleted">
+
+   <datestamp>2019-11-06</datestamp>
+  </header>
+  </record>
+  <record>
+  <header status="deleted">
+
+   <datestamp>2020-04-10</datestamp>
+  </header>
+  </record>
+  <resumptionToken expirationDate="2022-03-10T14:36:42Z"
+     completeListSize="23443"
+     cursor="0">1646836602</resumptionToken>
+ </ListRecords>
+</OAI-PMH>
diff --git a/tests/scenarios/Numel/numel_loop2.xml b/tests/scenarios/Numel/numel_loop2.xml
new file mode 100644
index 00000000000..115f50d6c94
--- /dev/null
+++ b/tests/scenarios/Numel/numel_loop2.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns:dcterms="http://purl.org/dc/terms/"
+         xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
+         http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
+<responseDate>2022-03-09T14:36:42Z</responseDate>
+ <request verb="ListRecords" resumptionToken="1646836484">https://melun.prod-osiros.decalog.net/oai/index.php</request>
+ <ListRecords>
+  <record>
+  <header status="deleted">
+
+   <datestamp>2019-11-06</datestamp>
+  </header>
+  </record>
+  <record>
+  <header status="deleted">
+
+   <datestamp>2020-04-10</datestamp>
+  </header>
+  </record>
+  <resumptionToken expirationDate="2022-03-10T14:36:42Z"
+     completeListSize="23443"
+     cursor="0">1646836603</resumptionToken>
+ </ListRecords>
+</OAI-PMH>
diff --git a/tests/scenarios/Numel/numel_loop3.xml b/tests/scenarios/Numel/numel_loop3.xml
new file mode 100644
index 00000000000..25fa2755c9f
--- /dev/null
+++ b/tests/scenarios/Numel/numel_loop3.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xmlns:dcterms="http://purl.org/dc/terms/"
+         xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/
+         http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
+<responseDate>2022-03-09T14:36:42Z</responseDate>
+ <request verb="ListRecords" resumptionToken="1646836484">https://melun.prod-osiros.decalog.net/oai/index.php</request>
+ <ListRecords>
+  <record>
+  <header status="deleted">
+
+   <datestamp>2019-11-06</datestamp>
+  </header>
+  </record>
+  <record>
+  <header status="deleted">
+
+   <datestamp>2020-04-10</datestamp>
+  </header>
+  </record>
+  <resumptionToken expirationDate="2022-03-10T14:36:42Z"
+     completeListSize="23443"
+     cursor="0">1646836602</resumptionToken>
+ </ListRecords>
+</OAI-PMH>
-- 
GitLab