Skip to content
Snippets Groups Projects
Commit d3f88b39 authored by Arthur Suzuki's avatar Arthur Suzuki
Browse files

Merge branch 'hotline#162061_connecteur_bacon' into 'master'

hotline#162061 : digital_ressource BACON : handle TSV file format

See merge request !4587
parents e14e27d7 feac8036
Branches
Tags
1 merge request!4587hotline#162061 : digital_ressource BACON : handle TSV file format
Pipeline #19033 failed with stage
in 21 minutes and 29 seconds
- correctif #162061 : Connecteur Bacon : Prise en charge du format TSV pour le moissonage
\ No newline at end of file
......@@ -28,14 +28,14 @@ class Bacon_Service extends Class_DigitalResource_Service {
$_album_ids = [],
$_existing_key_in_bd = [],
$_has_resources = false,
$_parser,
$_config,
$_base_url,
$_sets;
public function __construct($config) {
parent::__construct($config);
$this->_parser = new Bacon_Service_Parser($config);
$this->_config = $config;
$this->_base_url = $config->getHarvestUrl();
$this->_sets = array_keys($config->getHarvestSets());
}
......@@ -51,12 +51,12 @@ class Bacon_Service extends Class_DigitalResource_Service {
$set = $this->_sets[$page_number-1];
parent::_logPage($this->getPageCount(), $page_number);
$this->getLogger()->info($this->_('Analyse du fichier %s', $set));
$this->_collection = $this->_parser->setHarvestSet($set)
->setLogger($this->getLogger())
->parseXML($this->httpGet($this->_base_url . $set))
->getAlbums();
$parser_name = 'Bacon_Service_Parser' . strtoupper(substr($set, -3, 3));
$parser = new $parser_name($this->_config);
$this->_collection = $parser->setHarvestSet($set)
->setLogger($this->getLogger())
->parse($this->httpGet($this->_base_url . $set))
->getAlbums();
return $this;
}
......
......@@ -42,18 +42,12 @@ class Bacon_Service_Parser {
}
public function parseXML($xml) {
$this->_albums = [];
$this->_parser = new Class_WebService_FasterXMLParser();
$this->_parser->setElementHandler($this);
$this->_parser->parse($xml);
$this->_logKbarts(function($remainder) { return 0 !== $remainder; });
public function parse(string $xml) : self {
return $this;
}
protected function _logKbarts($condition) {
protected function _logKbarts($condition) : self {
$remainder = ($count = count($this->_albums)) % 1000;
if ($condition($remainder))
$this->getLogger()->info($this->_('%d kbarts', $count));
......@@ -62,17 +56,19 @@ class Bacon_Service_Parser {
}
public function startKbart($attributes) {
public function startKbart(array $attributes) : self {
$this->_current_album = new Bacon_Service_Album($this->_config, $this->_harvest_set);
$this->_current_album_key = new Bacon_AlbumKey();
return $this;
}
public function endKbart() {
public function endKbart() : self {
$id_origin = $this->_current_album_key->getHash();
$this->_current_album->setId($id_origin);
$this->_albums[$id_origin] = $this->_current_album;
$this->_logKbarts(function($remainder) { return 0 === $remainder; });
return $this;
}
......@@ -207,4 +203,4 @@ class Bacon_Service_Parser {
public function getAlbums() {
return $this->_albums;
}
}
\ No newline at end of file
}
<?php
/**
* Copyright (c) 2012-2022, Agence Française Informatique (AFI). All rights reserved.
*
* BOKEH is free software; you can redistribute it and/or modify
* it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by
* the Free Software Foundation.
*
* There are special exceptions to the terms and conditions of the AGPL as it
* is applied to this software (see README file).
*
* BOKEH is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
*
* You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE
* along with BOKEH; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
class Bacon_Service_ParserTXT
extends Bacon_Service_Parser {
public function parse(string $txt) : self {
$this->_albums = [];
$album_datas=[];
xdebug_break();
$stream = fopen('php://memory', 'r+');
fwrite($stream, $txt);
rewind($stream);
$header = fgetcsv($stream,null,"\t");
while(!empty($data = fgetcsv($stream,null,"\t")))
$this->startKbart([])
->processTSVLine($header, $data)
->endKbart();
$this->_logKbarts(function($remainder) { return 0 !== $remainder; });
return $this;
}
protected function processTSVLine(array $header, array $data) : self {
$album_datas = array_filter(array_combine($header,$data));
foreach($album_datas as $field => $value)
$this->setFieldFromTSV($field, $value);
return $this;
}
protected function setFieldFromTSV(string $field, string $value) : void {
$words = explode('_', $field);
$words = array_map('ucfirst', $words);
$data_handler = 'end' . implode('_', $words);
if (method_exists($this, $data_handler))
$this->$data_handler($value);
}
}
\ No newline at end of file
<?php
/**
* Copyright (c) 2012-2022, Agence Française Informatique (AFI). All rights reserved.
*
* BOKEH is free software; you can redistribute it and/or modify
* it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by
* the Free Software Foundation.
*
* There are special exceptions to the terms and conditions of the AGPL as it
* is applied to this software (see README file).
*
* BOKEH is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
*
* You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE
* along with BOKEH; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
class Bacon_Service_ParserXML
extends Bacon_Service_Parser {
public function parse(string $xml) : self {
$this->_albums = [];
$this->_parser = new Class_WebService_FasterXMLParser();
$this->_parser->setElementHandler($this);
$this->_parser->parse($xml);
$this->_logKbarts(function($remainder) { return 0 !== $remainder; });
return $this;
}
}
......@@ -238,8 +238,9 @@ abstract class BaconActivatedTestCase extends AbstractControllerTestCase {
Class_AdminVar::set('Bacon_HARVEST_SETS',
json_encode(['files' => ['NPG_FRANCE_ISTEXJOURNALS.xml',
'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml'],
'types' => ['NPG', '']]));
'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml',
'PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt'],
'types' => ['NPG', '', '']]));
$group = $this->fixture(Class_UserGroup::class,
['id' => 1,
......@@ -392,8 +393,8 @@ abstract class BaconHarvestingTestCase extends BaconActivatedTestCase {
protected function _expectSetNamed($file_name) {
$this->_http_client
->whenCalled('open_url')
->with('https://bacon.abes.fr/package2kbart/' . $file_name . '.xml')
->willDo(fn() => file_get_contents(__DIR__ . '/' . $file_name . '.xml'))
->with('https://bacon.abes.fr/package2kbart/' . $file_name )
->willDo(fn() => file_get_contents(__DIR__ . '/' . $file_name ))
->beStrict();
}
}
......@@ -411,8 +412,9 @@ abstract class BaconHarvestingAndIndexingTestCase extends BaconHarvestingTestCas
public function setUp() {
parent::setUp();
foreach(['NPG_FRANCE_ISTEXJOURNALS',
'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02'] as $file)
foreach(['NPG_FRANCE_ISTEXJOURNALS.xml',
'CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml',
'PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt'] as $file)
$this->_expectSetNamed($file);
$this->_service->harvest();
......@@ -442,14 +444,14 @@ class BaconHarvestingSimpleTest extends BaconHarvestingAndIndexingTestCase {
/** @test */
public function shouldHaveSaved24Albums() {
$this->assertEquals(24, Class_Album::count());
public function shouldHaveSaved33Albums() {
$this->assertEquals(33, Class_Album::count());
}
/** @test */
public function numberOfAlbumCategorieShouldBeThree() {
$this->assertCount(3, Class_AlbumCategorie::findAll());
public function numberOfAlbumCategorieShouldBeFour() {
$this->assertCount(4, Class_AlbumCategorie::findAll());
}
......@@ -461,12 +463,19 @@ class BaconHarvestingSimpleTest extends BaconHarvestingAndIndexingTestCase {
/** @test */
public function lastAlbumPathShouldBeBouquetsBaconCLASSIQUESGARNIER_GLOBAL_ACADEMIE2() {
public function GarnierAlbumPathShouldBeBouquetsBaconCLASSIQUESGARNIER_GLOBAL_ACADEMIE2() {
$this->assertEquals('/Bouquets BACON/CLASSIQUESGARNIER_GLOBAL_ACADEMIE2_2018-05-02.xml',
Class_Album::find(23)->getCategorie()->getPath());
}
/** @test */
public function lastAlbumPathShouldBeBouquetsBaconPROQUEST_FRANCE_LN_ISTEX() {
$this->assertEquals('/Bouquets BACON/PROQUEST_FRANCE_LN-ISTEX-EBOOKS-EEBO-PFISTEX.txt',
Class_Album::find(33)->getCategorie()->getPath());
}
public function albumProperties() {
return [['url_origine', 'https://bacon.abes.fr/package2kbart/'],
['id_origine', 'e13284cb062b2a2db9ed23f3c663a1bd83bb09bb'],
......@@ -551,7 +560,7 @@ class BaconHarvestingUpdateTest extends BaconHarvestingTestCase {
public function setUp() {
parent::setUp();
$this->_expectSetNamed('ONLY_ONE');
$this->_expectSetNamed('ONLY_ONE.xml');
$this->fixture(Class_Album::class,
['id' => 1,
......@@ -615,7 +624,7 @@ class BaconHarvestingDedupTest extends BaconHarvestingTestCase {
public function setUp() {
parent::setUp();
$this->_expectSetNamed('DUPLICATE');
$this->_expectSetNamed('DUPLICATE.xml');
$this->_service->harvest();
}
......
publication_title print_identifier online_identifier date_first_issue_online num_first_vol_online num_first_issue_online date_last_issue_online num_last_vol_online num_last_issue_online title_url first_author title_id embargo_info coverage_depth notes publisher_name publication_type date_monograph_published_print date_monograph_published_online monograph_volume monograph_edition first_editor parent_publication_title_id preceding_publication_title_id access_type bestppn
A modest plea for private mens preaching. Or An answer to a booke intituled, Private men no pulpit men; composed by Master Giles Workman : Wherein the thing in controversie is briefly debated; the examination of private mens preaching examined; also certain accusations wip'd away and removed https://revue-sommaire.istex.fr/ark:/67375/8Q1-7D719SNQ-M Knowles ark:/67375/8Q1-7D719SNQ-M fulltext ProQuest, UMI monograph 1648 1999 P 173020267
A modest plea for the Church of England https://revue-sommaire.istex.fr/ark:/67375/8Q1-FKD2L0PG-8 Hollingworth ark:/67375/8Q1-FKD2L0PG-8 fulltext ProQuest, UMI monograph 1676 1999 P 173824994
A modest plea for the Church of England https://revue-sommaire.istex.fr/ark:/67375/8Q1-V4TPV797-6 Hollingworth ark:/67375/8Q1-V4TPV797-6 fulltext ProQuest, UMI monograph 1676 1999 P 17371644X
A modest plea for the clergy : wherein is briefly considered, the original, antiquity, necessity : together with the spurious and genuine occasions of their present contempt https://revue-sommaire.istex.fr/ark:/67375/8Q1-4V8QRPZC-G Addison ark:/67375/8Q1-4V8QRPZC-G fulltext ProQuest, UMI monograph 1677 1999 P 17358375X
A modest plea for the due regulation of the press : in answer to several reasons lately printed against it, humbly submitted to the judgment of authority https://revue-sommaire.istex.fr/ark:/67375/8Q1-BZ2GRB5Z-B Gregory ark:/67375/8Q1-BZ2GRB5Z-B fulltext ProQuest, UMI monograph 1698 1999 P 173819621
A modest plea for the Lords Day : or rather the summe of the plea made by divines for the Lords Day as the Christian Sabbath, against those who contend for the old Sabbath of the seventh day, in order from the creation https://revue-sommaire.istex.fr/ark:/67375/8Q1-3V3J1SRS-Q ark:/67375/8Q1-3V3J1SRS-Q fulltext ProQuest, UMI monograph 1669 1999 P 173835821
A modest plea, for an equal common-wealth, against monarchy : In which the genuine nature, and true interest of a free-state, is briefly stated; its consistency with a national clergie, hereditary nobility, and mercenary lawyers, is examined; together with the expediency of an agrarian and rotation of offices asserted. Also, an apology for younger brothers, the restitution of gavil-kinde, and relief of the poor. With a lift at tythes, and reformation of the laws and universities. All accommodated to publique honour and justice, without injury to any mans propriety; and humbly tendered to the Parliament https://revue-sommaire.istex.fr/ark:/67375/8Q1-J0312CLD-F ark:/67375/8Q1-J0312CLD-F fulltext ProQuest, UMI monograph 1659 1999 P 173054013
A Modest proposal for the more certain and yet more easie provision for the poor : and likewise for the better suppression of thieves, diminishers and corrupters of the coyn, and other lewd livers : tending much to the advancement of trade, especially in the most profitable part of it, the manufactures of the kingdom https://revue-sommaire.istex.fr/ark:/67375/8Q1-VWN9N91L-F ark:/67375/8Q1-VWN9N91L-F fulltext ProQuest, UMI monograph 1696 1999 P 173816851
A modest reply : humbly offer'd, as an answer to, and confutation of seven arguments collected and deliver'd by Mr. Samuel Lawrence, in a sermon preach'd at his meeting-house in Namptwich, Octob. 16th, 1691, whereby he would shew, that the infants of professing Christians ought to be baptized : with a seasonable word to my brethren of the baptized church https://revue-sommaire.istex.fr/ark:/67375/8Q1-B93Z6QCR-2 ark:/67375/8Q1-B93Z6QCR-2 fulltext ProQuest, UMI monograph 1692 1999 P 173860257
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment