diff --git a/VERSIONS_HOTLINE/162061 b/VERSIONS_HOTLINE/162061 new file mode 100644 index 0000000000000000000000000000000000000000..4c615cf7a13d2265cb2368f66e76ce923dfc808b --- /dev/null +++ b/VERSIONS_HOTLINE/162061 @@ -0,0 +1 @@ + - correctif #162061 : Connecteur Bacon : Prise en charge du format TSV pour le moissonage \ No newline at end of file diff --git a/library/digital_resources/Bacon/Service.php b/library/digital_resources/Bacon/Service.php index eb7ef33724b03cd5acc19c8fc863fb2f5020ed41..81b18ea43b09ab2137d4d416646b38dbfa3fc10b 100644 --- a/library/digital_resources/Bacon/Service.php +++ b/library/digital_resources/Bacon/Service.php @@ -28,14 +28,14 @@ class Bacon_Service extends Class_DigitalResource_Service { $_album_ids = [], $_existing_key_in_bd = [], $_has_resources = false, - $_parser, + $_config, $_base_url, $_sets; public function __construct($config) { parent::__construct($config); - $this->_parser = new Bacon_Service_Parser($config); + $this->_config = $config; $this->_base_url = $config->getHarvestUrl(); $this->_sets = array_keys($config->getHarvestSets()); } @@ -51,12 +51,12 @@ class Bacon_Service extends Class_DigitalResource_Service { $set = $this->_sets[$page_number-1]; parent::_logPage($this->getPageCount(), $page_number); $this->getLogger()->info($this->_('Analyse du fichier %s', $set)); - - $this->_collection = $this->_parser->setHarvestSet($set) - ->setLogger($this->getLogger()) - ->parseXML($this->httpGet($this->_base_url . $set)) - ->getAlbums(); - + $parser_name = 'Bacon_Service_Parser' . strtoupper(substr($set, -3, 3)); + $parser = new $parser_name($this->_config); + $this->_collection = $parser->setHarvestSet($set) + ->setLogger($this->getLogger()) + ->parse($this->httpGet($this->_base_url . $set)) + ->getAlbums(); return $this; } diff --git a/library/digital_resources/Bacon/Service/Parser.php b/library/digital_resources/Bacon/Service/Parser.php index b64872d8cf04b465e963f89523cd9ea1747e7563..1a2f12b41e2cba200e7f87379599ca9eea0cfe5c 100644 --- a/library/digital_resources/Bacon/Service/Parser.php +++ b/library/digital_resources/Bacon/Service/Parser.php @@ -42,18 +42,12 @@ class Bacon_Service_Parser { } - public function parseXML($xml) { - $this->_albums = []; - $this->_parser = new Class_WebService_FasterXMLParser(); - $this->_parser->setElementHandler($this); - $this->_parser->parse($xml); - $this->_logKbarts(function($remainder) { return 0 !== $remainder; }); - + public function parse(string $xml) : self { return $this; } - protected function _logKbarts($condition) { + protected function _logKbarts($condition) : self { $remainder = ($count = count($this->_albums)) % 1000; if ($condition($remainder)) $this->getLogger()->info($this->_('%d kbarts', $count)); @@ -62,17 +56,19 @@ class Bacon_Service_Parser { } - public function startKbart($attributes) { + public function startKbart(array $attributes) : self { $this->_current_album = new Bacon_Service_Album($this->_config, $this->_harvest_set); $this->_current_album_key = new Bacon_AlbumKey(); + return $this; } - public function endKbart() { + public function endKbart() : self { $id_origin = $this->_current_album_key->getHash(); $this->_current_album->setId($id_origin); $this->_albums[$id_origin] = $this->_current_album; $this->_logKbarts(function($remainder) { return 0 === $remainder; }); + return $this; } @@ -207,4 +203,4 @@ class Bacon_Service_Parser { public function getAlbums() { return $this->_albums; } -} \ No newline at end of file +} diff --git a/library/digital_resources/Bacon/Service/ParserTXT.php b/library/digital_resources/Bacon/Service/ParserTXT.php new file mode 100644 index 0000000000000000000000000000000000000000..0e7005211609eb9cfcf6c497cce4f74ef32d4dd2 --- /dev/null +++ b/library/digital_resources/Bacon/Service/ParserTXT.php @@ -0,0 +1,60 @@ +<?php +/** + * Copyright (c) 2012-2022, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Bacon_Service_ParserTXT + extends Bacon_Service_Parser { + + public function parse(string $txt) : self { + $this->_albums = []; + $album_datas=[]; + xdebug_break(); + $stream = fopen('php://memory', 'r+'); + fwrite($stream, $txt); + rewind($stream); + $header = fgetcsv($stream,null,"\t"); + while(!empty($data = fgetcsv($stream,null,"\t"))) + $this->startKbart([]) + ->processTSVLine($header, $data) + ->endKbart(); + + $this->_logKbarts(function($remainder) { return 0 !== $remainder; }); + return $this; + } + + + protected function processTSVLine(array $header, array $data) : self { + $album_datas = array_filter(array_combine($header,$data)); + foreach($album_datas as $field => $value) + $this->setFieldFromTSV($field, $value); + + return $this; + } + + + protected function setFieldFromTSV(string $field, string $value) : void { + $words = explode('_', $field); + $words = array_map('ucfirst', $words); + $data_handler = 'end' . implode('_', $words); + if (method_exists($this, $data_handler)) + $this->$data_handler($value); + } +} \ No newline at end of file diff --git a/library/digital_resources/Bacon/Service/ParserXML.php b/library/digital_resources/Bacon/Service/ParserXML.php new file mode 100644 index 0000000000000000000000000000000000000000..e1bf6e97475a136367fdbdc727c4278c87da2009 --- /dev/null +++ b/library/digital_resources/Bacon/Service/ParserXML.php @@ -0,0 +1,34 @@ +<?php +/** + * Copyright (c) 2012-2022, Agence Française Informatique (AFI). All rights reserved. + * + * BOKEH is free software; you can redistribute it and/or modify + * it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by + * the Free Software Foundation. + * + * There are special exceptions to the terms and conditions of the AGPL as it + * is applied to this software (see README file). + * + * BOKEH is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU AFFERO GENERAL PUBLIC LICENSE for more details. + * + * You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE + * along with BOKEH; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +class Bacon_Service_ParserXML + extends Bacon_Service_Parser { + + public function parse(string $xml) : self { + $this->_albums = []; + $this->_parser = new Class_WebService_FasterXMLParser(); + $this->_parser->setElementHandler($this); + $this->_parser->parse($xml); + $this->_logKbarts(function($remainder) { return 0 !== $remainder; }); + return $this; + } +} diff --git a/library/digital_resources/Bacon/tests/BaconTest.php b/library/digital_resources/Bacon/tests/BaconTest.php index 897af7459b2fe9e031aa15af1050913734eac0ad..a9daf34ab08a95fb5735bb14a3fd5948d011e46d 100644 --- a/library/digital_resources/Bacon/tests/BaconTest.php +++ b/library/digital_resources/Bacon/tests/BaconTest.php @@ -238,8 +238,9 @@ abstract class BaconActivatedTestCase extends AbstractControllerTestCase { Class_AdminVar::set('Bacon_HARVEST_SETS', json_encode(['files' => ['NPG_FRANCE_ISTEXJOURNALS.xml', - 'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml'], - 'types' => ['NPG', '']])); + 'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml', + 'PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt'], + 'types' => ['NPG', '', '']])); $group = $this->fixture(Class_UserGroup::class, ['id' => 1, @@ -392,8 +393,8 @@ abstract class BaconHarvestingTestCase extends BaconActivatedTestCase { protected function _expectSetNamed($file_name) { $this->_http_client ->whenCalled('open_url') - ->with('https://bacon.abes.fr/package2kbart/' . $file_name . '.xml') - ->willDo(fn() => file_get_contents(__DIR__ . '/' . $file_name . '.xml')) + ->with('https://bacon.abes.fr/package2kbart/' . $file_name ) + ->willDo(fn() => file_get_contents(__DIR__ . '/' . $file_name )) ->beStrict(); } } @@ -411,8 +412,9 @@ abstract class BaconHarvestingAndIndexingTestCase extends BaconHarvestingTestCas public function setUp() { parent::setUp(); - foreach(['NPG_FRANCE_ISTEXJOURNALS', - 'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02'] as $file) + foreach(['NPG_FRANCE_ISTEXJOURNALS.xml', + 'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml', + 'PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt'] as $file) $this->_expectSetNamed($file); $this->_service->harvest(); @@ -442,14 +444,14 @@ class BaconHarvestingSimpleTest extends BaconHarvestingAndIndexingTestCase { /** @test */ - public function shouldHaveSaved24Albums() { - $this->assertEquals(24, Class_Album::count()); + public function shouldHaveSaved33Albums() { + $this->assertEquals(33, Class_Album::count()); } /** @test */ - public function numberOfAlbumCategorieShouldBeThree() { - $this->assertCount(3, Class_AlbumCategorie::findAll()); + public function numberOfAlbumCategorieShouldBeFour() { + $this->assertCount(4, Class_AlbumCategorie::findAll()); } @@ -461,12 +463,19 @@ class BaconHarvestingSimpleTest extends BaconHarvestingAndIndexingTestCase { /** @test */ - public function lastAlbumPathShouldBeBouquetsBaconCLASSIQUESGARNIER_GLOBAL_ACADEMIE2() { + public function GarnierAlbumPathShouldBeBouquetsBaconCLASSIQUESGARNIER_GLOBAL_ACADEMIE2() { $this->assertEquals('/Bouquets BACON/CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml', Class_Album::find(23)->getCategorie()->getPath()); } + /** @test */ + public function lastAlbumPathShouldBeBouquetsBaconPROQUEST_FRANCE_LN_ISTEX() { + $this->assertEquals('/Bouquets BACON/PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt', + Class_Album::find(33)->getCategorie()->getPath()); + } + + public function albumProperties() { return [['url_origine', 'https://bacon.abes.fr/package2kbart/'], ['id_origine', 'e13284cb062b2a2db9ed23f3c663a1bd83bb09bb'], @@ -551,7 +560,7 @@ class BaconHarvestingUpdateTest extends BaconHarvestingTestCase { public function setUp() { parent::setUp(); - $this->_expectSetNamed('ONLY_ONE'); + $this->_expectSetNamed('ONLY_ONE.xml'); $this->fixture(Class_Album::class, ['id' => 1, @@ -615,7 +624,7 @@ class BaconHarvestingDedupTest extends BaconHarvestingTestCase { public function setUp() { parent::setUp(); - $this->_expectSetNamed('DUPLICATE'); + $this->_expectSetNamed('DUPLICATE.xml'); $this->_service->harvest(); } diff --git a/library/digital_resources/Bacon/tests/PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt b/library/digital_resources/Bacon/tests/PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5fc79f0c983a4007b5e423a8259f72c639ff43d --- /dev/null +++ b/library/digital_resources/Bacon/tests/PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt @@ -0,0 +1,10 @@ +publication_title print_identifier online_identifier date_first_issue_online num_first_vol_online num_first_issue_online date_last_issue_online num_last_vol_online num_last_issue_online title_url first_author title_id embargo_info coverage_depth notes publisher_name publication_type date_monograph_published_print date_monograph_published_online monograph_volume monograph_edition first_editor parent_publication_title_id preceding_publication_title_id access_type bestppn +A modest plea for private mens preaching. Or An answer to a booke intituled, Private men no pulpit men; composed by Master Giles Workman : Wherein the thing in controversie is briefly debated; the examination of private mens preaching examined; also certain accusations wip'd away and removed https://revue-sommaire.istex.fr/ark:/67375/8Q1-7D719SNQ-M Knowles ark:/67375/8Q1-7D719SNQ-M fulltext ProQuest, UMI monograph 1648 1999 P 173020267 +A modest plea for the Church of England https://revue-sommaire.istex.fr/ark:/67375/8Q1-FKD2L0PG-8 Hollingworth ark:/67375/8Q1-FKD2L0PG-8 fulltext ProQuest, UMI monograph 1676 1999 P 173824994 +A modest plea for the Church of England https://revue-sommaire.istex.fr/ark:/67375/8Q1-V4TPV797-6 Hollingworth ark:/67375/8Q1-V4TPV797-6 fulltext ProQuest, UMI monograph 1676 1999 P 17371644X +A modest plea for the clergy : wherein is briefly considered, the original, antiquity, necessity : together with the spurious and genuine occasions of their present contempt https://revue-sommaire.istex.fr/ark:/67375/8Q1-4V8QRPZC-G Addison ark:/67375/8Q1-4V8QRPZC-G fulltext ProQuest, UMI monograph 1677 1999 P 17358375X +A modest plea for the due regulation of the press : in answer to several reasons lately printed against it, humbly submitted to the judgment of authority https://revue-sommaire.istex.fr/ark:/67375/8Q1-BZ2GRB5Z-B Gregory ark:/67375/8Q1-BZ2GRB5Z-B fulltext ProQuest, UMI monograph 1698 1999 P 173819621 +A modest plea for the Lords Day : or rather the summe of the plea made by divines for the Lords Day as the Christian Sabbath, against those who contend for the old Sabbath of the seventh day, in order from the creation https://revue-sommaire.istex.fr/ark:/67375/8Q1-3V3J1SRS-Q ark:/67375/8Q1-3V3J1SRS-Q fulltext ProQuest, UMI monograph 1669 1999 P 173835821 +A modest plea, for an equal common-wealth, against monarchy : In which the genuine nature, and true interest of a free-state, is briefly stated; its consistency with a national clergie, hereditary nobility, and mercenary lawyers, is examined; together with the expediency of an agrarian and rotation of offices asserted. Also, an apology for younger brothers, the restitution of gavil-kinde, and relief of the poor. With a lift at tythes, and reformation of the laws and universities. All accommodated to publique honour and justice, without injury to any mans propriety; and humbly tendered to the Parliament https://revue-sommaire.istex.fr/ark:/67375/8Q1-J0312CLD-F ark:/67375/8Q1-J0312CLD-F fulltext ProQuest, UMI monograph 1659 1999 P 173054013 +A Modest proposal for the more certain and yet more easie provision for the poor : and likewise for the better suppression of thieves, diminishers and corrupters of the coyn, and other lewd livers : tending much to the advancement of trade, especially in the most profitable part of it, the manufactures of the kingdom https://revue-sommaire.istex.fr/ark:/67375/8Q1-VWN9N91L-F ark:/67375/8Q1-VWN9N91L-F fulltext ProQuest, UMI monograph 1696 1999 P 173816851 +A modest reply : humbly offer'd, as an answer to, and confutation of seven arguments collected and deliver'd by Mr. Samuel Lawrence, in a sermon preach'd at his meeting-house in Namptwich, Octob. 16th, 1691, whereby he would shew, that the infants of professing Christians ought to be baptized : with a seasonable word to my brethren of the baptized church https://revue-sommaire.istex.fr/ark:/67375/8Q1-B93Z6QCR-2 ark:/67375/8Q1-B93Z6QCR-2 fulltext ProQuest, UMI monograph 1692 1999 P 173860257