Skip to content
Snippets Groups Projects
Commit ffbbf648 authored by Ghislain Loas's avatar Ghislain Loas
Browse files

dev #55145 add xml parsing

parent a9ea8ccd
Branches
Tags
5 merge requests!2102#57574 : correction orthographe des jours de la semaine,!2080Sandbox detach zf from storm,!2061Master,!2034Dev#55145 contractuel 2455 le kiosk recherche federee,!2025Dev#55145 contractuel 2455 le kiosk recherche federee
Pipeline #459 failed with stage
in 8 minutes and 13 seconds
......@@ -61,6 +61,11 @@ abstract class Class_WebService_SIGB_AbstractILSDIPatronInfoReader {
* @return Class_WebService_SIGB_*_PatronInfoReader
*/
public function parseXML($xml) {
$xml = preg_replace_callback("/(&#[0-9]+;)/", function($m) {
return mb_convert_encoding($m[1], "UTF-8", "HTML-ENTITIES");
}, $xml);
$xml = $this->stripInvalidXml($xml);
$this->_xml_parser = Class_WebService_FasterXMLParser::newInstance()
->setElementHandler($this);
......@@ -70,6 +75,36 @@ abstract class Class_WebService_SIGB_AbstractILSDIPatronInfoReader {
}
/**
* @param string $xml
* @return string
*/
protected function stripInvalidXml($xml) {
if (empty($xml)) {
return '';
}
$ret = "";
$length = strlen($xml);
for ($i=0; $i < $length; $i++) {
$current = ord($xml{$i});
// http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char
if (($current == 0x9) ||
($current == 0xA) ||
($current == 0xD) ||
(($current >= 0x20) && ($current <= 0xD7FF)) ||
(($current >= 0xE000) && ($current <= 0xFFFD)) ||
(($current >= 0x10000) && ($current <= 0x10FFFF)))
{
$ret .= chr($current);
} else {
$ret .= " ";
}
}
return $ret;
}
/**
* @param string $data
*/
......
......@@ -39,44 +39,12 @@ class Class_WebService_XMLParser {
$this->remove_namespace=false;
}
/**
* @param string $xml
* @return string
*/
protected function stripInvalidXml($xml) {
if (empty($xml)) {
return '';
}
$ret = "";
$length = strlen($xml);
for ($i=0; $i < $length; $i++) {
$current = ord($xml{$i});
// http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char
if (($current == 0x9) ||
($current == 0xA) ||
($current == 0xD) ||
(($current >= 0x20) && ($current <= 0xD7FF)) ||
(($current >= 0xE000) && ($current <= 0xFFFD)) ||
(($current >= 0x10000) && ($current <= 0x10FFFF)))
{
$ret .= chr($current);
} else {
$ret .= " ";
}
}
return $ret;
}
/**
* @param string $xml
* @return Class_WebService_XMLParser
*/
public function parse($xml) {
$xml = preg_replace_callback("/(&#[0-9]+;)/", function($m) {
return mb_convert_encoding($m[1], "UTF-8", "HTML-ENTITIES");
}, $xml);
$xml = $this->stripInvalidXml($xml);
$this->_parsed_xml = $xml;
$this->_parents = array() ;
$parser = $this->_createParser() ;
......
......@@ -23,7 +23,7 @@ class Lekiosk_Config extends Class_DigitalResource_Config {
public function getConfig() {
return [
'DocTypeLabel' => $this->_('LeKiosk'),
'DocTypeLabel' => $this->_('Magazine numérique LeKiosk'),
'PermissionLabel' => $this->_('Bibliothèque numérique: accéder à LeKiosk'),
'MenuLabel' => $this->_('Lien vers LeKiosk'),
'Introduction' => $this->_('Retrouvez vos magazines préférés en numérique parmi plus de 1600 titres. Un univers riche et varié, des dernières parutions aux anciens numéros.'),
......@@ -64,6 +64,6 @@ class Lekiosk_Config extends Class_DigitalResource_Config {
public function isEnabled() {
return Class_AdminVar::get('LEKIOSK_ID');
return 0 < Class_AdminVar::get('LEKIOSK_ID');
}
}
<?php
/**
* Copyright (c) 2012, Agence Française Informatique (AFI). All rights reserved.
*
* BOKEH is free software; you can redistribute it and/or modify
* it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by
* the Free Software Foundation.
*
* There are special exceptions to the terms and conditions of the AGPL as it
* is applied to this software (see README file).
*
* BOKEH is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
*
* You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE
* along with BOKEH; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
class Lekiosk_Service extends Class_DigitalResource_Service {
use Trait_TimeSource;
protected $_albums;
public function __construct($config) {
parent::__construct($config);
$this->_albums = new Storm_Collection();
}
protected function loadPage($page_number = 1) {
$xml = $this->httpGet($this->_getCatalogueUrl());
$this->_parser = new Lekiosk_Service_Parser();
$this->_parser->parseXML($xml);
$this->_albums = $this->_parser->getAlbums();
$this->_total_count = count($this->_albums);
$this->_page_number = 1;
$this->_page_count = 1;
return $this;
}
protected function _getCatalogueUrl() {
$date = date('dmY', $this->getCurrentTime());
return sprintf('ftp://%s:%s@ftp.lekiosk.com/lekiosque_%s.xml',
Class_AdminVar::get('LEKIOSK_FTP_LOGIN'),
Class_AdminVar::get('LEKIOSK_FTP_PASSWORD'),
$date);
}
public function getPageCount() {
return 1;
}
public function getRessourcesNumeriques() {
return $this->_albums;
}
}
\ No newline at end of file
<?php
/**
* Copyright (c) 2012-2017, Agence Française Informatique (AFI). All rights reserved.
*
* BOKEH is free software; you can redistribute it and/or modify
* it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by
* the Free Software Foundation.
*
* There are special exceptions to the terms and conditions of the AGPL as it
* is applied to this software (see README file).
*
* BOKEH is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
*
* You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE
* along with BOKEH; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
class Lekiosk_Service_Album extends Class_WebService_BibNumerique_RessourceNumerique {
public function fillAlbum($album) {
$album
->setTypeDocId(Lekiosk_Config::getInstance()->getDocType())
->addEditor($this->getEditeur());
}
}
?>
\ No newline at end of file
<?php
/**
* Copyright (c) 2012-2017, Agence Française Informatique (AFI). All rights reserved.
*
* BOKEH is free software; you can redistribute it and/or modify
* it under the terms of the GNU AFFERO GENERAL PUBLIC LICENSE as published by
* the Free Software Foundation.
*
* There are special exceptions to the terms and conditions of the AGPL as it
* is applied to this software (see README file).
*
* BOKEH is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU AFFERO GENERAL PUBLIC LICENSE for more details.
*
* You should have received a copy of the GNU AFFERO GENERAL PUBLIC LICENSE
* along with BOKEH; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
class Lekiosk_Service_Parser {
public function parseXML($xml) {
$this->_albums = new Storm_Collection();
$this->_parser = new Class_WebService_XMLParser();
$this->_parser->setElementHandler($this);
xdebug_break();
$this->_parser->parse($xml);
return $this;
}
public function startData($attribs) {
$album = new Lekiosk_Service_Album();
$album
->setId($attribs['PUBLICATIONID'])
->setTitle($this->_getTitle($attribs))
->setExternalUri($attribs['SSOURL'])
->setDescription($attribs['ISSUEDESCRIPTION'])
->addPoster($attribs['COVERURL'])
->setEditeur($attribs['EDITEUR'])
->addMatiere($attribs['CATEGORIES'])
;
$this->_albums->append($album);
}
protected function _getTitle($attribs) {
$date = strtotime($attribs['RELEASEDATE']);
$date = strftime('%e %B %Y', $date);
return sprintf('%s n°%s : %s',
$attribs['PUBLICATIONTITLE'],
$attribs['ISSUENUMBER'],
$date);
}
public function getAlbums() {
return $this->_albums;
}
}
?>
\ No newline at end of file
......@@ -18,7 +18,14 @@
* along with BOKEH; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
class LekioskAdminVars {
public static function activate() {
Class_AdminVar::set('LEKIOSK_ID', '29');
Class_AdminVar::set('LEKIOSK_FTP_LOGIN', 'FOIX');
Class_AdminVar::set('LEKIOSK_FTP_PASSWORD', 'PWD');
}
}
abstract class LeKioskLinkModeLinkTestCase extends ModelTestCase {
......@@ -33,7 +40,7 @@ abstract class LeKioskLinkModeLinkTestCase extends ModelTestCase {
'libelle' => 'portail',
'mail_site' => '']);
Class_AdminVar::set('LEKIOSK_ID', '29');
LekioskAdminVars::activate();
$this->_lekiosk_config =
new Assimil_Config(new Class_DigitalResource_Wrapper (Class_DigitalResource::getInstance(), 'LeKiosk'));
......@@ -243,4 +250,114 @@ class ModulesControllerLeKioskUserWithGroupWithRightTest extends AbstractControl
}
}
?>
\ No newline at end of file
abstract class LeKioskServiceTestCase extends ModelTestCase {
protected
$_storm_default_to_volatile = true;
public function setUp() {
parent::setUp();
LekioskAdminVars::activate();
$catalogue_xml = file_get_contents(__DIR__. '/catalogue.xml');
$this->_http_client = $this->mock();
$this->_http_client
->whenCalled('open_url')
->with('ftp://FOIX:PWD@ftp.lekiosk.com/lekiosque_06022017.xml')
->answers($catalogue_xml)
->whenCalled('open_url')
->answers('une-image')
;
$this->_service = new Lekiosk_Service(new Lekiosk_Config(new Class_DigitalResource_Wrapper(Class_DigitalResource::getInstance(), 'Lekiosk')));
Lekiosk_Service::setDefaultHttpClient($this->_http_client);
$this->_service->setTimeSource(new TimeSourceForTest('2017-02-06 10:00:00'));
$this->_service->harvest();
Class_Album::clearCache();
$this->_10_national_sport = Class_Album::find(1);
}
}
class LeKioskServiceHarvestTest extends LekioskServiceTestCase {
/** @test */
public function nameShouldBeLekiosk() {
$this->assertEquals('Lekiosk', $this->_service->getName());
}
/** @test */
public function shouldBeEnabled() {
$this->assertTrue($this->_service->isEnabled());
}
/** @test */
public function shouldHaveSaved5Albums() {
$this->assertCount(5, Class_Album::findAll());
}
/** @test */
public function nationalSportShoulBeFound() {
$this->assertNotNull($this->_10_national_sport);
}
/** @test */
public function nationalSportIdShouldBe851749() {
$this->assertEquals(851749, $this->_10_national_sport->getIdOrigine());
}
/** @test */
public function nationalSportTitleShouldBe10NationalSport() {
$this->assertEquals('Le 10 Sport National n°419 : 12 janvier 2017', $this->_10_national_sport->getTitre());
}
/** @test */
public function nationalSportSSOUrlShouldBeLeKioskDotCom() {
$this->assertEquals('https://pro.lekiosk.com/lekiosque.sso.aspx?ReturnUrl=Le-10-Sport-National-z1962566.aspx', $this->_10_national_sport->getExternalUri());
}
/** @test */
public function nationalSportDescriptionShouldContainsSeriesDecatedToSport() {
$this->assertEquals('Le 10 Sport National est le mensuel entièrement dédié au sport. Vous y trouverez les infos foot, tennis ou encore basket du moment...', $this->_10_national_sport->getDescription());
}
/** @test */
public function nationalSportDoctypeShouldBeNumericSerialLeKiosk() {
$this->assertEquals('Magazine numérique LeKiosk', $this->_10_national_sport->getTypeDoc()->getLabel());
}
/** @test */
public function nationalSportPosterUriShouldBeLekioskDotComDetailDotJpg() {
$this->assertEquals('http://cdn2.lekiosk.com/Public/Publications/851749/1962566/Images/Detail.jpg', $this->_10_national_sport->getPoster());
}
/** @test */
public function nationalSportEditorShouldBe10Medias() {
$this->assertEquals('10 Medias', $this->_10_national_sport->getEditors()[0]);
}
/** @test */
public function nationalSportSubjectShouldBeSport() {
$sport = Class_CodifMatiere::find($this->_10_national_sport->getMatiere());
$this->assertEquals('Sport', $sport->getLibelle());
}
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment