diff --git a/VERSIONS_WIP/152581 b/VERSIONS_WIP/152581 new file mode 100644 index 0000000000000000000000000000000000000000..b75b8c6a2d54f3039b6d25f309814faf939a1a04 --- /dev/null +++ b/VERSIONS_WIP/152581 @@ -0,0 +1 @@ + - fonctionnalité #152581 : Moissonnage Syracuse Biblio On Demand : adaptation permettant de ne pas interrompre la lecture OAI si la réponse ne contient pas de notice mais fourni un jeton pour une prochaine page \ No newline at end of file diff --git a/library/Class/WebService/OAIHarvester.php b/library/Class/WebService/OAIHarvester.php index 02d46ac3de3e9f248e14472ff2984672b91806d1..5419348cf96b5369601f4ecae6ef68e6134f96a2 100644 --- a/library/Class/WebService/OAIHarvester.php +++ b/library/Class/WebService/OAIHarvester.php @@ -31,6 +31,7 @@ class Class_WebService_OAIHarvester { IDENTIFIER_PATTERN = '|<identifier[^>]*>[^<]*</identifier>|', LIST_RECORDS = 'ListRecords'; + public static int $MAX_REQUESTS = 50000; protected $_base_url, $_query = '', @@ -76,10 +77,15 @@ class Class_WebService_OAIHarvester { $start_url = $this->_base_url . '?' . $this->_query; $this->_debug($start_url, 'is the computed start harvest url'); $token = $this->_fetchUrl($start_url, $callback); - - while ($token) + $count = 0; + while ($token && $count < static::$MAX_REQUESTS ) { $token = $this->_fetchUrl($this->_nextUrlWith($token), $callback); - + $count++; + } + if ($count >= static::$MAX_REQUESTS) { + $this->_debug($start_url, 'Error : loop on request'); + throw new RuntimeException($this->_('Le nombre de requêtes a dépassé le nombre autorisé: '. static::$MAX_REQUESTS)); + } return $this; } @@ -101,7 +107,11 @@ class Class_WebService_OAIHarvester { throw new RuntimeException($this->_('Erreur OAI-PMH (%s) (%s)', $matches[1], $url)); + preg_match_all(static::IDENTIFIER_PATTERN, $body, $matches); + if( empty($matches) || empty( reset($matches))) + return $this->_parseToken($body); + $checksum = md5(json_encode($matches)); if ($this->_previous_checksum === $checksum) throw new RuntimeException($this->_('Réponse identique à la précédente')); @@ -109,6 +119,11 @@ class Class_WebService_OAIHarvester { $callback($body . static::PAGE_SEPARATOR); + return $this->_parseToken($body); + } + + + protected function _parseToken($body) { return preg_match(static::RESUMPTION_PATTERN, $body, $matches) ? $matches[1] : null; diff --git a/library/digital_resources/Bibliondemand/Config.php b/library/digital_resources/Bibliondemand/Config.php index 1c11554ed49bae2e2677adb3691c83e9a384c863..69571153ac539f66a09f3206111d8d6998ceaf63 100644 --- a/library/digital_resources/Bibliondemand/Config.php +++ b/library/digital_resources/Bibliondemand/Config.php @@ -26,10 +26,11 @@ class Bibliondemand_Config extends Class_DigitalResource_Config { 'AdminVars' => ['SSO_URL' => Class_AdminVar_Meta::newDefault($this->_('URL SSO des ressources Bibliondemand')) ->bePrivate()], - 'MuteSsoDashboard' => true, + 'MuteSsoDashboard' => false, 'MuteHarvestDashboard' => true, 'SsoAction' => true, + 'SsoValidateUrl' => true, 'Harvesting' => true, 'PermissionLabel' => $this->_('Bibliothèque numérique: accéder à la ressource Biblio on demand'), 'NotAllowedMessage' => $this->_('Votre compte n\'est pas autorisé à accéder à cette ressource.'), @@ -78,6 +79,17 @@ class Bibliondemand_Config extends Class_DigitalResource_Config { } + public function validateUrlFor($user) { + return $this->_validateUrl() + . '?' . http_build_query(['ticket' => (new Class_CasTicketV3($this->getAdminVar('SSO_URL')))->getTicketForUser($user)]); + } + + + protected function _validateUrl() { + return Class_Url::absolute(['module' => $this->getModuleName(), 'controller' => 'auth', + 'action' => 'validate'], null, true); + } + public function getIndexThesauriBatchInstance() { return Class_DigitalResource::getInstance() ->build($this->withNameSpace('IndexThesauriBatch'), $this); diff --git a/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php b/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php index 827ac10a779177cbfecc1ceb9d9b3c4b32921831..9943a4d87bb365c5092bda7247bc86168584b9a1 100644 --- a/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php +++ b/library/digital_resources/Bibliondemand/tests/BibliondemandTest.php @@ -68,7 +68,7 @@ class BibliondemandModulesControllerTest extends AbstractControllerTestCase { /** @test */ public function dashboardCustomIntegrationShouldBeDisplay() { $this->dispatch('/Bibliondemand_Plugin', true); - $this->assertNotXpathContentContains('//h3', 'Diagnostic SSO'); + $this->assertXpathContentContains('//h3', 'Diagnostic SSO'); $this->assertNotXpathContentContains('//h3', 'Diagnostic moissonnage'); $this->assertXpathContentContains('//h3', 'Diagnostic complémentaire'); } diff --git a/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php b/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php index 530681a1d16414e133ef321d10d7fdcd5c4de2bb..e6fb50865fa1ecb32d5e238d508da13b431e49b2 100644 --- a/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php +++ b/tests/scenarios/Numel/PhasePrepareIntegrationNumelTest.php @@ -33,12 +33,12 @@ class PhasePrepareIntegrationNumelTest extends Class_Cosmogramme_Integration_Pha $this->_filesystem ->whenCalled('file_put_contents') ->willDo(function($path, $content) - { - if (!array_key_exists($path, $this->_files_contents)) - $this->_files_contents[$path] = ''; + { + if (!array_key_exists($path, $this->_files_contents)) + $this->_files_contents[$path] = ''; - $this->_files_contents[$path] .= $content; - }) + $this->_files_contents[$path] .= $content; + }) ->whenCalled('file_exists') ->willDo(fn($path) => array_key_exists($path, $this->_files_contents)) @@ -156,4 +156,72 @@ class PhasePrepareIntegrationNumelTest extends Class_Cosmogramme_Integration_Pha $this->assertLogContains('Impossible de moissonner https://server.org/oai/index.php : Réponse identique à la précédente'); } + + + /** @test */ + public function withSameRecordsAndNoIdentifierShouldStop() { + $response_1 = $this->mock() + ->whenCalled('isSuccessful')->answers(true) + ->whenCalled('getBody') + ->answers(file_get_contents(__DIR__.'/numel_loop.xml')); + + $response_2 = $this->mock() + ->whenCalled('isSuccessful')->answers(true) + ->whenCalled('getBody') + ->answers(file_get_contents(__DIR__.'/numel_loop2.xml')); + $response_3 = $this->mock() + ->whenCalled('isSuccessful')->answers(true) + ->whenCalled('getBody') + ->answers(file_get_contents(__DIR__.'/numel_loop3.xml')); + Class_WebService_OAIHarvester::$MAX_REQUESTS=5; + $this->_http + ->whenCalled('getResponse') + ->with('https://server.org/oai/index.php?verb=ListRecords&metadataPrefix=oai_numel') + ->answers($response_1) + + ->whenCalled('getResponse') + ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=1646836602') + ->answers($response_2) + ->whenCalled('getResponse') + ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=1646836603') + ->answers($response_3); + + $this->_phase = $this->_buildPhase('PrepareIntegrations')->run(); + $this->assertLogContains('Le nombre de requêtes a dépassé le nombre autorisé: 5'); + } + + + /** + * @test + */ + public function withNoIdentifierShouldReadNextPage(){ + $response_1 = $this->mock() + ->whenCalled('isSuccessful')->answers(true) + ->whenCalled('getBody') + ->answers(file_get_contents(__DIR__.'/hcc_oai_1.xml')); + $response_2 = $this->mock() + ->whenCalled('isSuccessful')->answers(true) + ->whenCalled('getBody') + ->answers(file_get_contents(__DIR__.'/hcc_oai_2.xml')); + + $response_3 = $this->mock() + ->whenCalled('isSuccessful')->answers(true) + ->whenCalled('getBody') + ->answers(file_get_contents(__DIR__.'/hcc_oai_3.xml')); + $this->_http + ->whenCalled('getResponse') + ->with('https://server.org/oai/index.php?verb=ListRecords&metadataPrefix=oai_numel') + ->answers($response_1) + + ->whenCalled('getResponse') + ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=%21%21%2171200%21338417%21oai_dc') + ->answers($response_2) + ->whenCalled('getResponse') + ->with('https://server.org/oai/index.php?verb=ListRecords&resumptionToken=%21%21%21338300%21338426%21oai_dc') + ->answers($response_3); + + $this->_phase = $this->_buildPhase('PrepareIntegrations')->run(); + + $this->assertLogContains('transfert de https://server.org/oai/index.php vers integre1179.pan'); + } } diff --git a/tests/scenarios/Numel/hcc_oai_1.xml b/tests/scenarios/Numel/hcc_oai_1.xml new file mode 100644 index 0000000000000000000000000000000000000000..d0fc9bc76e66160d1118a8491a3fbc9b3ef0582d --- /dev/null +++ b/tests/scenarios/Numel/hcc_oai_1.xml @@ -0,0 +1,40 @@ +<OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"> +<responseDate>2022-05-25T08:40:47Z</responseDate> +<request verb="ListRecords">bd.correze.fr/oaiserver.ashx</request> +<ListRecords> +<record> +<header> +</header> +<metadata> +</metadata> +</record> +<record> +<header> +<identifier>CG19587856</identifier> +<datestamp>2022-03-11T23:53:14Z</datestamp> +</header> +<metadata> +<oai_dc:dc> +<dc:audience>CG19</dc:audience> +<dc:identifier> +Url : https://numerique.bd.correze.fr/doc/YOUBOOX/587856 +</dc:identifier> +<dc:identifier> +Vignette : https://d2adx4xmvnkm9k.cloudfront.net/books/587/587856/thumbs/252x342.jpg?version=20220311101905723263 +</dc:identifier> +<dc:title>Via Ferrata : Poèmes ou journal épars</dc:title> +<dc:type>Livre numérique</dc:type> +<dc:creator>Fred Pougeard</dc:creator> +<dc:subject>Romans</dc:subject> +<dc:source>YOUBOOX</dc:source> +<dc:publisher>Éditions Thierry Marchaisse</dc:publisher> +<dc:language>français</dc:language> +<dc:description> +Tombeau pour un père —cheminot— encore un peu vivant, ces poèmes racontent la traversée des deuils, ils tentent de phraser le chagrin, la peine à vivre et la maladie du proche, désormais plongé dans le brouillard de l'oubli. Ils disent aussi ce qui y échappe, les « épiphanies », ces moments si rares où s'éclaire fugitivement quelque chose de soi, des autres ou du monde. Avec un café, pourquoi pas, et une heure devant soi, il est bon de lire dans l'ordre ce journal poétique épars, de prendre le train en compagnie de l'auteur, au rythme de ses photos. On peut en éprouver un grand calme, pas du tout désespérant.FRED POUGEARD est né en 1974 à Guéret. Conteur de son métier, il est voué lui aussi aux TER, aux petites gares et aux lignes oubliées. Il dirige la compagnie l'Allégresse du Pourpre. +</dc:description> +</oai_dc:dc> +</metadata> +</record> +<resumptionToken completeListSize="338417" cursor="71200">!!!71200!338417!oai_dc</resumptionToken> +</ListRecords> +</OAI-PMH> diff --git a/tests/scenarios/Numel/hcc_oai_2.xml b/tests/scenarios/Numel/hcc_oai_2.xml new file mode 100644 index 0000000000000000000000000000000000000000..c03b76746485e7d9c15a19bc2d9abe8e7f067505 --- /dev/null +++ b/tests/scenarios/Numel/hcc_oai_2.xml @@ -0,0 +1,7 @@ +<OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"> +<responseDate>2022-05-25T09:36:52Z</responseDate> +<request verb="ListRecords">bd.correze.fr/oaiserver.ashx</request> +<ListRecords> +<resumptionToken completeListSize="338426" cursor="338300">!!!338300!338426!oai_dc</resumptionToken> +</ListRecords> +</OAI-PMH> diff --git a/tests/scenarios/Numel/hcc_oai_3.xml b/tests/scenarios/Numel/hcc_oai_3.xml new file mode 100644 index 0000000000000000000000000000000000000000..aa58e9a91df06425e21b7e92362171b1bb8fac2f --- /dev/null +++ b/tests/scenarios/Numel/hcc_oai_3.xml @@ -0,0 +1,6 @@ +<OAI-PMH xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"> +<responseDate>2022-05-25T08:41:05Z</responseDate> +<request verb="ListRecords">bd.correze.fr/oaiserver.ashx</request> +<ListRecords> +</ListRecords> +</OAI-PMH> diff --git a/tests/scenarios/Numel/numel_loop.xml b/tests/scenarios/Numel/numel_loop.xml new file mode 100644 index 0000000000000000000000000000000000000000..25fa2755c9f551bdbafec3a53970ac76039847f9 --- /dev/null +++ b/tests/scenarios/Numel/numel_loop.xml @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:dcterms="http://purl.org/dc/terms/" + xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ + http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"> +<responseDate>2022-03-09T14:36:42Z</responseDate> + <request verb="ListRecords" resumptionToken="1646836484">https://melun.prod-osiros.decalog.net/oai/index.php</request> + <ListRecords> + <record> + <header status="deleted"> + + <datestamp>2019-11-06</datestamp> + </header> + </record> + <record> + <header status="deleted"> + + <datestamp>2020-04-10</datestamp> + </header> + </record> + <resumptionToken expirationDate="2022-03-10T14:36:42Z" + completeListSize="23443" + cursor="0">1646836602</resumptionToken> + </ListRecords> +</OAI-PMH> diff --git a/tests/scenarios/Numel/numel_loop2.xml b/tests/scenarios/Numel/numel_loop2.xml new file mode 100644 index 0000000000000000000000000000000000000000..115f50d6c9403ac86f76d3124ee2f379c5cac29f --- /dev/null +++ b/tests/scenarios/Numel/numel_loop2.xml @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:dcterms="http://purl.org/dc/terms/" + xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ + http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"> +<responseDate>2022-03-09T14:36:42Z</responseDate> + <request verb="ListRecords" resumptionToken="1646836484">https://melun.prod-osiros.decalog.net/oai/index.php</request> + <ListRecords> + <record> + <header status="deleted"> + + <datestamp>2019-11-06</datestamp> + </header> + </record> + <record> + <header status="deleted"> + + <datestamp>2020-04-10</datestamp> + </header> + </record> + <resumptionToken expirationDate="2022-03-10T14:36:42Z" + completeListSize="23443" + cursor="0">1646836603</resumptionToken> + </ListRecords> +</OAI-PMH> diff --git a/tests/scenarios/Numel/numel_loop3.xml b/tests/scenarios/Numel/numel_loop3.xml new file mode 100644 index 0000000000000000000000000000000000000000..25fa2755c9f551bdbafec3a53970ac76039847f9 --- /dev/null +++ b/tests/scenarios/Numel/numel_loop3.xml @@ -0,0 +1,26 @@ +<?xml version="1.0" encoding="UTF-8"?> +<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xmlns:dcterms="http://purl.org/dc/terms/" + xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ + http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd"> +<responseDate>2022-03-09T14:36:42Z</responseDate> + <request verb="ListRecords" resumptionToken="1646836484">https://melun.prod-osiros.decalog.net/oai/index.php</request> + <ListRecords> + <record> + <header status="deleted"> + + <datestamp>2019-11-06</datestamp> + </header> + </record> + <record> + <header status="deleted"> + + <datestamp>2020-04-10</datestamp> + </header> + </record> + <resumptionToken expirationDate="2022-03-10T14:36:42Z" + completeListSize="23443" + cursor="0">1646836602</resumptionToken> + </ListRecords> +</OAI-PMH>