Revision 34341
Added by Sandro La Bruzzo over 9 years ago
webapps/dnet-openaire_dli-container/trunk/src/main/resources/eu/dnetlib/cnr-site.properties | ||
---|---|---|
40 | 40 |
msro.wf.mail.cc = michele.artini@isti.cnr.it |
41 | 41 |
|
42 | 42 |
service.index.solr.rank.enable=true |
43 |
|
|
44 |
#MDSTORE |
|
45 |
services.mdstore.dao= postgresMDStoreDao |
|
46 |
|
|
47 |
#INDEX |
|
48 |
service.solr.index.jsonConfiguration= \ |
|
49 |
{"id":"solr",\ |
|
50 |
"address":"localhost:9983",\ |
|
51 |
"port":"8983",\ |
|
52 |
"webContext":"solr",\ |
|
53 |
"numShards":"1",\ |
|
54 |
"replicationFactor":"1",\ |
|
55 |
"host":"localhost",\ |
|
56 |
"feedingShutdownTolerance":"30000",\ |
|
57 |
"feedingBufferFlushThreshold":"1000",\ |
|
58 |
"feedingSimulationMode":"false",\ |
|
59 |
"luceneMatchVersion":"4.9",\ |
|
60 |
"serverLibPath":"../../../../contrib/extraction/lib",\ |
|
61 |
"filterCacheSize":"512","filterCacheInitialSize":"512",\ |
|
62 |
"queryCacheSize":"512","queryCacheInitialSize":"512",\ |
|
63 |
"documentCacheSize":"512","documentCacheInitialSize":"512",\ |
|
64 |
"ramBufferSizeMB":"960","mergeFactor":"40",\ |
|
65 |
"autosoftcommit":"-1","autocommit":"15000",\ |
|
66 |
"termIndexInterval":"1024","maxIndexingThreads":"8",\ |
|
67 |
"queryResultWindowSize":"20","queryResultMaxDocCached":"200"} |
webapps/dnet-openaire_dli-container/trunk/src/main/webapp/index.html | ||
---|---|---|
1 |
|
|
2 |
|
|
3 |
|
|
4 | 1 |
<table style="width :100%"> |
5 | 2 |
<tr><td style="text-align:center; width:100%"><a href="http://www.d-net.research-infrastructures.eu/"><img src='images/d-netLogo.png'/></a></td></tr> |
6 | 3 |
</table> |
webapps/dnet-openaire_dli-container/trunk/pom.xml | ||
---|---|---|
26 | 26 |
</dependency> |
27 | 27 |
<dependency> |
28 | 28 |
<groupId>eu.dnetlib</groupId> |
29 |
<artifactId>cnr-mongo-mdstore</artifactId>
|
|
30 |
<version>[4.0.0,5.0.0)</version>
|
|
29 |
<artifactId>dnet-postgres-mdstore</artifactId>
|
|
30 |
<version>[1.0.0-SNAPSHOT,2.0.0)</version>
|
|
31 | 31 |
</dependency> |
32 | 32 |
<dependency> |
33 | 33 |
<groupId>eu.dnetlib</groupId> |
... | ... | |
97 | 97 |
<artifactId>dnet-modular-is-ui</artifactId> |
98 | 98 |
<version>[2.0.0,3.0.0)</version> |
99 | 99 |
</dependency> |
100 |
|
|
101 |
|
|
102 |
<!-- DATA LITERARATURE INTERLINKING MODULES --> |
|
103 |
<dependency> |
|
104 |
<groupId>eu.dnetlib</groupId> |
|
105 |
<artifactId>dnet-openaire_dli-profiles</artifactId> |
|
106 |
<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version> |
|
107 |
</dependency> |
|
108 |
<dependency> |
|
109 |
<groupId>eu.dnetlib</groupId> |
|
110 |
<artifactId>dnet-openaire_dli-workflows</artifactId> |
|
111 |
<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version> |
|
112 |
</dependency> |
|
100 | 113 |
|
101 | 114 |
<!-- Log service --> |
102 | 115 |
<dependency> |
modules/dnet-openaire_dli-workflows/trunk/src/main/resources/eu/dnetlib/msro/dli_openaire/workflows/repo-hi/applicationContext-nodes.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
|
|
3 |
<beans xmlns="http://www.springframework.org/schema/beans" |
|
4 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p" |
|
5 |
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd"> |
|
6 |
|
|
7 |
<bean id="dataciteDOICacheManager" class="org.springframework.cache.ehcache.EhCacheManagerFactoryBean" |
|
8 |
p:cacheManagerName="dataciteDOICacheManager"/> |
|
9 |
|
|
10 |
<bean id="dataciteDOICache" class="org.springframework.cache.ehcache.EhCacheFactoryBean" |
|
11 |
p:eternal="false" p:timeToLive="0" p:timeToIdle="36000" |
|
12 |
p:maxElementsInMemory="10000" p:maxElementsOnDisk="20000" |
|
13 |
p:cacheManager-ref="dataciteDOICacheManager" p:cacheName="dataciteDOICache" /> |
|
14 |
|
|
15 |
|
|
16 |
<bean id="crossrefDOICache" class="org.springframework.cache.ehcache.EhCacheFactoryBean" |
|
17 |
p:eternal="false" p:timeToLive="0" p:timeToIdle="36000" |
|
18 |
p:maxElementsInMemory="10000" p:maxElementsOnDisk="20000" |
|
19 |
p:cacheManager-ref="dataciteDOICacheManager" p:cacheName="crossrefDOICache" /> |
|
20 |
|
|
21 |
|
|
22 |
<bean id="wfNodeObtainDataSourceParams" |
|
23 |
class="eu.dnetlib.msro.dli.workflows.nodes.ObtainDataSourceParamsJobNode" |
|
24 |
scope="prototype" /> |
|
25 |
|
|
26 |
</beans> |
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/oai_datacite_transform.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="OAI_datacite_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" /> |
|
5 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType" /> |
|
6 |
<RESOURCE_KIND value="TransformationRuleDSResources" /> |
|
7 |
<RESOURCE_URI value="" /> |
|
8 |
<DATE_OF_CREATION value="2014-11-19T11:05:55+01:00" /> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<IMPORTED/> |
|
13 |
<SCRIPT> |
|
14 |
<TITLE>OAI_datacite ingestion transform</TITLE> |
|
15 |
<CODE><![CDATA[ |
|
16 |
<xsl:stylesheet version="1.0" |
|
17 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
18 |
xmlns:datetime="http://exslt.org/dates-and-times" xmlns:exslt="http://exslt.org/common" |
|
19 |
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
20 |
exclude-result-prefixes="xsl datetime exslt "> |
|
21 |
|
|
22 |
<xsl:param name="repositoryId" /> |
|
23 |
<xsl:param name="namespacePrefix"></xsl:param> |
|
24 |
<xsl:param name="datasourceId"></xsl:param> |
|
25 |
<xsl:variable name="status" |
|
26 |
select="/*[local-name() = 'record']/*[local-name() = 'header']/@status" /> |
|
27 |
<xsl:variable name="doi" |
|
28 |
select="//*[local-name()='resource']/*[local-name()='identifier']"></xsl:variable> |
|
29 |
|
|
30 |
|
|
31 |
|
|
32 |
<xsl:template match="/"> |
|
33 |
<oai:record xmlns="http://namespace.openaire.eu/" |
|
34 |
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
35 |
xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" |
|
36 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
37 |
|
|
38 |
<!-- Patch OAI header or create ex-novo if not oai --> |
|
39 |
<oai:header> |
|
40 |
<dri:objIdentifier> |
|
41 |
<xsl:value-of select="concat($namespacePrefix,'::', $doi)" /> |
|
42 |
</dri:objIdentifier> |
|
43 |
<dri:recordIdentifier> |
|
44 |
<xsl:value-of select="$doi" /> |
|
45 |
</dri:recordIdentifier> |
|
46 |
<dri:dateOfCollection> |
|
47 |
<xsl:value-of select="datetime:dateTime()" /> |
|
48 |
</dri:dateOfCollection> |
|
49 |
<dri:repositoryId> |
|
50 |
<xsl:value-of select="$datasourceId" /> |
|
51 |
</dri:repositoryId> |
|
52 |
<dri:datasourceprefix> |
|
53 |
<xsl:value-of select="$namespacePrefix" /> |
|
54 |
</dri:datasourceprefix> |
|
55 |
<!-- Bulk copy of old header --> |
|
56 |
<xsl:for-each |
|
57 |
select="/*[local-name() = 'record']/*[local-name() = 'header']//*[namespace-uri()='http://www.openarchives.org/OAI/2.0/']"> |
|
58 |
<xsl:copy-of select="." /> |
|
59 |
</xsl:for-each> |
|
60 |
</oai:header> |
|
61 |
<xsl:choose> |
|
62 |
<xsl:when |
|
63 |
test="count(/*[local-name() = 'record']/*[local-name() = 'metadata']) > 0"> |
|
64 |
<xsl:copy-of |
|
65 |
select="/*[local-name() = 'record']/*[local-name() = 'metadata']" /> |
|
66 |
</xsl:when> |
|
67 |
</xsl:choose> |
|
68 |
|
|
69 |
</oai:record> |
|
70 |
</xsl:template> |
|
71 |
|
|
72 |
</xsl:stylesheet> |
|
73 |
]]></CODE> |
|
74 |
</SCRIPT> |
|
75 |
</CONFIGURATION> |
|
76 |
<STATUS/> |
|
77 |
<SECURITY_PARAMETERS/> |
|
78 |
</BODY> |
|
79 |
</RESOURCE_PROFILE> |
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/MDFormatDSResources/oai_datacite.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="2-8b912349-8a86-4330-93ef-7e0cd9bc87c2_TURGb3JtYXREU1Jlc291cmNlcy9NREZvcm1hdERTUmVzb3VyY2VUeXBl"/> |
|
5 |
<RESOURCE_TYPE value="MDFormatDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="MDFormatDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2008-05-22T14:40:04+02:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<NAME>oai_datacite</NAME> |
|
13 |
<DESCRIPTION>Data Literature Interlinking Metadata Format</DESCRIPTION> |
|
14 |
<INTERPRETATION>cleaned</INTERPRETATION> |
|
15 |
<SCHEMA uri=""/> |
|
16 |
</CONFIGURATION> |
|
17 |
<STATUS> |
|
18 |
<LAYOUTS> |
|
19 |
<LAYOUT name="mdstore_native"> |
|
20 |
<FIELDS> |
|
21 |
<FIELD indexable="false" name="doi" stat="true" result="false" xpath="//*[local-name()='resource']/*[local-name()='identifier' and ./@identifierType='DOI']"/> |
|
22 |
</FIELDS> |
|
23 |
</LAYOUT> |
|
24 |
</LAYOUTS> |
|
25 |
</STATUS> |
|
26 |
</BODY> |
|
27 |
</RESOURCE_PROFILE> |
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/MDFormatDSResources/DLIF.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="2-8b9503d9-8a86-4330-93ef-7e0cd9bc87c2_TURGb3JtYXREU1Jlc291cmNlcy9NREZvcm1hdERTUmVzb3VyY2VUeXBl"/> |
|
5 |
<RESOURCE_TYPE value="MDFormatDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="MDFormatDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2008-05-22T14:40:04+02:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<NAME>DLIF</NAME> |
|
13 |
<DESCRIPTION>Data Literature Interlinking Metadata Format</DESCRIPTION> |
|
14 |
<INTERPRETATION>cleaned</INTERPRETATION> |
|
15 |
<SCHEMA uri=""/> |
|
16 |
</CONFIGURATION> |
|
17 |
<STATUS> |
|
18 |
<LAYOUTS> |
|
19 |
<LAYOUT name="index"> |
|
20 |
<FIELDS> |
|
21 |
<!-- D-NET header indexed fields --> |
|
22 |
<FIELD name="objidentifier" indexable="true" result="false" |
|
23 |
stat="false" tokenizable="false" multivalued="false" |
|
24 |
xpath="//*[local-name()='objIdentifier']" /> |
|
25 |
<FIELD name="repositoryid" indexable="true" result="false" |
|
26 |
stat="false" tokenizable="false" multivalued="false" |
|
27 |
xpath="//*[local-name()='repositoryId']" /> |
|
28 |
|
|
29 |
<!-- DLI object identifier --> |
|
30 |
<FIELD name="dnetresourceidentifier" indexable="true" |
|
31 |
stored="true" result="false" stat="false" tokenizable="false" |
|
32 |
multivalued="false" |
|
33 |
xpath="//*[local-name()='dliObject']/*[local-name()='dnetResourceIdentifier']" /> |
|
34 |
<!-- Free text indexed fields --> |
|
35 |
<FIELD name="repositoryname" indexable="true" stored="true" |
|
36 |
result="false" stat="false" tokenizable="true" multivalued="false" |
|
37 |
xpath="//*[local-name()='dliObject']/*[local-name()='recordSourceInfo']/@providerName" /> |
|
38 |
<FIELD name="repositoryacronym" indexable="true" stored="false" |
|
39 |
result="false" stat="false" tokenizable="true" multivalued="false" |
|
40 |
xpath="//*[local-name()='dliObject']/*[local-name()='recordSourceInfo']/@providerAcronym" /> |
|
41 |
<FIELD name="localid" indexable="true" stored="true" result="false" |
|
42 |
stat="false" tokenizable="true" multivalued="false" |
|
43 |
xpath="//*[local-name()='dliObject']/*[local-name()='recordSourceInfo']" /> |
|
44 |
|
|
45 |
<FIELD name="entitytype" indexable="true" result="false" |
|
46 |
stat="false" tokenizable="false" multivalued="false" |
|
47 |
xpath="//*[local-name()='objectType']" /> |
|
48 |
|
|
49 |
<FIELD name="title" indexable="true" stored="true" result="false" |
|
50 |
tokenizable="true" stat="false" xpath="//*[local-name()='title']" /> |
|
51 |
|
|
52 |
<FIELD name="date" indexable="true" result="true" stat="false" |
|
53 |
tokenizable="false" value="//*[local-name()='date']" /> |
|
54 |
<FIELD name="relationType" indexable="true" result="true" |
|
55 |
stat="false" tokenizable="false" |
|
56 |
xpath="//*[local-name()='relation']/*[local-name()='entitytype']" /> |
|
57 |
<FIELD name="relationPID" indexable="true" result="true" |
|
58 |
stat="false" tokenizable="false" |
|
59 |
xpath="//*[local-name()='relation']/*[local-name()='pid']" /> |
|
60 |
|
|
61 |
|
|
62 |
<!-- Browsing Fields --> |
|
63 |
<FIELD result="false" name="entitytypeforbrowsing" stat="false" |
|
64 |
tokenizable="false" browsingAliasFor="entitytype" indexable="true" |
|
65 |
xpath="//*[local-name()='objectType']" /> |
|
66 |
|
|
67 |
</FIELDS> |
|
68 |
</LAYOUT> |
|
69 |
</LAYOUTS> |
|
70 |
</STATUS> |
|
71 |
</BODY> |
|
72 |
</RESOURCE_PROFILE> |
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/RepositoryServiceResources/datacite.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="1ab23232-a569-447f-8e74-5d3238e2e3c8_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=" /> |
|
5 |
<RESOURCE_TYPE value="RepositoryServiceResourceType" /> |
|
6 |
<RESOURCE_KIND value="RepositoryServiceResources" /> |
|
7 |
<RESOURCE_URI value="" /> |
|
8 |
<DATE_OF_CREATION value="2015-01-26T22:48:00+00:00" /> |
|
9 |
<PROTOCOL /> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<CONFIGURATION> |
|
13 |
<DATASOURCE_TYPE>eagle:contentprovider</DATASOURCE_TYPE> |
|
14 |
<DATASOURCE_ORIGINAL_ID provenance="DLI">Datacite |
|
15 |
</DATASOURCE_ORIGINAL_ID> |
|
16 |
<DATASOURCE_AGGREGATED>false</DATASOURCE_AGGREGATED> |
|
17 |
<ENVIRONMENTS> |
|
18 |
<ENVIRONMENT>DLI</ENVIRONMENT> |
|
19 |
</ENVIRONMENTS> |
|
20 |
<TYPOLOGY /> |
|
21 |
<MAX_SIZE_OF_DATASTRUCTURE>0</MAX_SIZE_OF_DATASTRUCTURE> |
|
22 |
<AVAILABLE_DISKSPACE>0</AVAILABLE_DISKSPACE> |
|
23 |
<MAX_NUMBER_OF_DATASTRUCTURE>0</MAX_NUMBER_OF_DATASTRUCTURE> |
|
24 |
<OFFICIAL_NAME>Datacite</OFFICIAL_NAME> |
|
25 |
<ENGLISH_NAME>Datacite</ENGLISH_NAME> |
|
26 |
<ICON_URI>http://datacite.org/sites/default/files/DataCite_header_final1_1.png |
|
27 |
</ICON_URI> |
|
28 |
<COUNTRY /> |
|
29 |
<LOCATION> |
|
30 |
<LONGITUDE>0.0</LONGITUDE> |
|
31 |
<LATITUDE>0.0</LATITUDE> |
|
32 |
<TIMEZONE>0.0</TIMEZONE> |
|
33 |
</LOCATION> |
|
34 |
<REPOSITORY_WEBPAGE>http://datacite.org/</REPOSITORY_WEBPAGE> |
|
35 |
<REPOSITORY_INSTITUTION /> |
|
36 |
<ADMIN_INFO>contact@datacite.org</ADMIN_INFO> |
|
37 |
<INTERFACES> |
|
38 |
<INTERFACE active="true" compliance="openaire2.0" |
|
39 |
contentDescription="metadata" id="api_________::datacite::0" |
|
40 |
label="aggregator::datarepository (openaire2.0)" removable="false" |
|
41 |
typology="aggregator::datarepository"> |
|
42 |
<ACCESS_PROTOCOL>mongoDump</ACCESS_PROTOCOL> |
|
43 |
<BASE_URL>/home/sandro/mongo_backup/datacite3000.json</BASE_URL> |
|
44 |
<INTERFACE_EXTRA_FIELD name="last_collection_date" /> |
|
45 |
<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='header']/*[local-name()='identifier'] |
|
46 |
</INTERFACE_EXTRA_FIELD> |
|
47 |
<INTERFACE_EXTRA_FIELD name="last_aggregation_total" /> |
|
48 |
<INTERFACE_EXTRA_FIELD name="last_collection_mdId" /> |
|
49 |
<INTERFACE_EXTRA_FIELD name="last_collection_total" /> |
|
50 |
<INTERFACE_EXTRA_FIELD name="last_aggregation_date" /> |
|
51 |
<INTERFACE_EXTRA_FIELD name="last_aggregation_mdId" /> |
|
52 |
</INTERFACE> |
|
53 |
</INTERFACES> |
|
54 |
<EXTRA_FIELDS> |
|
55 |
<FIELD> |
|
56 |
<key>DataSourceId</key> |
|
57 |
<value>datacite</value> |
|
58 |
</FIELD> |
|
59 |
<FIELD> |
|
60 |
<key>NamespacePrefix</key> |
|
61 |
<value>datacite</value> |
|
62 |
</FIELD> |
|
63 |
<FIELD> |
|
64 |
<key>VERIFIED</key> |
|
65 |
<value>NO</value> |
|
66 |
</FIELD> |
|
67 |
</EXTRA_FIELDS> |
|
68 |
<REGISTERED_BY>andrea.mannocci@isti.cnr.it</REGISTERED_BY> |
|
69 |
</CONFIGURATION> |
|
70 |
<STATUS> |
|
71 |
<NUMBER_OF_OBJECTS>0</NUMBER_OF_OBJECTS> |
|
72 |
</STATUS> |
|
73 |
<QOS> |
|
74 |
<AVAILABILITY>0</AVAILABILITY> |
|
75 |
<CAPACITY /> |
|
76 |
<THROUGHPUT>0.0</THROUGHPUT> |
|
77 |
</QOS> |
|
78 |
<SECURITY_PARAMETERS /> |
|
79 |
<BLACKBOARD /> |
|
80 |
</BODY> |
|
81 |
</RESOURCE_PROFILE> |
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/schemas/DLIFMetadataFormat.xsd | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" |
|
3 |
xmlns:dli="http://www.dli.eu" targetNamespace="http://www.dli.eu"> |
|
4 |
<xs:element name="dliObject" type="dli:dliEntity"> |
|
5 |
<xs:annotation> |
|
6 |
<xs:documentation> |
|
7 |
The Data Literature Interlinking entity. It contains the following main entities |
|
8 |
(sub-entities): artifact/inscription, visual representation, documental manifestation, trismegistos card. |
|
9 |
Such entities share some common properties like resource identifier in D-Net aggregation system, |
|
10 |
provenance and original identifier information, Intellectual property right statement. |
|
11 |
</xs:documentation> |
|
12 |
</xs:annotation> |
|
13 |
</xs:element> |
|
14 |
|
|
15 |
<xs:complexType name="dliEntity"> |
|
16 |
<xs:sequence> |
|
17 |
<xs:element maxOccurs="1" minOccurs="1" name="dnetResourceIdentifier" type="xs:string"> |
|
18 |
<xs:annotation> |
|
19 |
<xs:documentation> |
|
20 |
Unique resource identifier internal to D-Net infrastructure. |
|
21 |
This property is obtained by concatenating the content CP Acronym and the |
|
22 |
MD5 of local record ID to the identifier. (CP_ACRONYM::MD5(LOCAL_IDENTIFIER)) |
|
23 |
</xs:documentation> |
|
24 |
</xs:annotation> |
|
25 |
</xs:element> |
|
26 |
<xs:element maxOccurs="1" minOccurs="1" name="localIdentifier" type="dli:identifier"> |
|
27 |
<xs:annotation> |
|
28 |
<xs:documentation> |
|
29 |
The local identifier of the record |
|
30 |
</xs:documentation> |
|
31 |
</xs:annotation> |
|
32 |
</xs:element> |
|
33 |
<xs:element name="title" type="xs:string" minOccurs="0" maxOccurs="unbounded" /> |
|
34 |
<xs:element name="date" type="xs:string" /> |
|
35 |
<xs:element name= "objectType" maxOccurs="1" minOccurs="1" type="dli:objectType" /> |
|
36 |
|
|
37 |
<xs:element name="authors"> |
|
38 |
<xs:complexType> |
|
39 |
<xs:sequence> |
|
40 |
<xs:element name="author" type="dli:authorType" maxOccurs="unbounded" /> |
|
41 |
</xs:sequence> |
|
42 |
</xs:complexType> |
|
43 |
</xs:element> |
|
44 |
|
|
45 |
<xs:element name="relations"> |
|
46 |
<xs:complexType> |
|
47 |
<xs:sequence> |
|
48 |
<xs:element name="relation" type="dli:realtionType" maxOccurs="unbounded" /> |
|
49 |
</xs:sequence> |
|
50 |
</xs:complexType> |
|
51 |
</xs:element> |
|
52 |
|
|
53 |
</xs:sequence> |
|
54 |
</xs:complexType> |
|
55 |
|
|
56 |
<xs:simpleType name="objectType"> |
|
57 |
<xs:restriction base="xs:string"> |
|
58 |
<xs:enumeration value="publication"/> |
|
59 |
<xs:enumeration value="dataset"/> |
|
60 |
</xs:restriction> |
|
61 |
</xs:simpleType> |
|
62 |
|
|
63 |
|
|
64 |
|
|
65 |
|
|
66 |
<xs:complexType name="realtionType"> |
|
67 |
<xs:sequence> |
|
68 |
<xs:element name="entitytype" type="dli:objectType"></xs:element> |
|
69 |
<xs:element name="pid" type="dli:identifier" minOccurs="1" maxOccurs="1"/> |
|
70 |
<xs:element name="resolvedURL" type="xs:string" maxOccurs="1"/> |
|
71 |
<xs:element name="typeOfRelation" maxOccurs="1"> |
|
72 |
<xs:simpleType> |
|
73 |
<xs:restriction base="xs:string"> |
|
74 |
<xs:enumeration value="relatedTo" /> |
|
75 |
</xs:restriction> |
|
76 |
</xs:simpleType> |
|
77 |
</xs:element> |
|
78 |
</xs:sequence> |
|
79 |
</xs:complexType> |
|
80 |
|
|
81 |
|
|
82 |
|
|
83 |
<xs:complexType name="authorType"> |
|
84 |
<xs:sequence> |
|
85 |
<xs:element name="fullname" type="xs:string" minOccurs="1" maxOccurs="1"/> |
|
86 |
<xs:element name="localIdentifier" type="xs:string" maxOccurs="1"/> |
|
87 |
<xs:element name="typeOfIdentifier" type="xs:string" maxOccurs="1"/> |
|
88 |
</xs:sequence> |
|
89 |
|
|
90 |
</xs:complexType> |
|
91 |
|
|
92 |
|
|
93 |
<xs:complexType name="identifier"> |
|
94 |
<xs:simpleContent> |
|
95 |
<xs:extension base="xs:string"> |
|
96 |
<xs:attribute name="type" type="xs:string" use="required" /> |
|
97 |
</xs:extension> |
|
98 |
</xs:simpleContent> |
|
99 |
</xs:complexType> |
|
100 |
|
|
101 |
|
|
102 |
|
|
103 |
</xs:schema> |
modules/dnet-openaire_dli-workflows/trunk/src/test/java/eu/dnetlib/dli/openaire/transform/ConfigurationTestConfig.java | ||
---|---|---|
1 |
package eu.dnetlib.dli.openaire.transform; |
|
2 |
|
|
3 |
import org.springframework.cache.ehcache.EhCacheFactoryBean; |
|
4 |
import org.springframework.cache.ehcache.EhCacheManagerFactoryBean; |
|
5 |
import org.springframework.context.annotation.Bean; |
|
6 |
import org.springframework.context.annotation.Configuration; |
|
7 |
|
|
8 |
import eu.dnetlib.msro.dli.workflows.nodes.transform.CrossrefDOIResolver; |
|
9 |
import eu.dnetlib.msro.dli.workflows.nodes.transform.DataciteDOIResolver; |
|
10 |
|
|
11 |
@Configuration |
|
12 |
public class ConfigurationTestConfig { |
|
13 |
|
|
14 |
@Bean |
|
15 |
public EhCacheManagerFactoryBean cacheFactory() { |
|
16 |
EhCacheManagerFactoryBean cacheFactory = new EhCacheManagerFactoryBean(); |
|
17 |
cacheFactory.setCacheManagerName("testCacheFactory"); |
|
18 |
return cacheFactory; |
|
19 |
} |
|
20 |
|
|
21 |
/* |
|
22 |
* p:eternal="false" p:timeToLive="0" p:timeToIdle="36000" p:maxElementsInMemory="10000" p:maxElementsOnDisk="20000" |
|
23 |
* p:cacheManager-ref="dataciteDOICacheManager" p:cacheName="dataciteDOICache" |
|
24 |
*/ |
|
25 |
@Bean |
|
26 |
public EhCacheFactoryBean dataciteDOICache() { |
|
27 |
EhCacheFactoryBean doiCacheFactoryBean = new EhCacheFactoryBean(); |
|
28 |
doiCacheFactoryBean.setEternal(false); |
|
29 |
doiCacheFactoryBean.setTimeToLive(0);; |
|
30 |
doiCacheFactoryBean.setTimeToIdle(36000); |
|
31 |
doiCacheFactoryBean.setMaxEntriesLocalHeap(10000); |
|
32 |
doiCacheFactoryBean.setMaxEntriesLocalDisk(20000); |
|
33 |
doiCacheFactoryBean.setCacheManager(cacheFactory().getObject()); |
|
34 |
doiCacheFactoryBean.setCacheName("dataciteDOICache"); |
|
35 |
return doiCacheFactoryBean; |
|
36 |
} |
|
37 |
|
|
38 |
@Bean |
|
39 |
public EhCacheFactoryBean crossrefDOICache() { |
|
40 |
EhCacheFactoryBean doiCacheFactoryBean = new EhCacheFactoryBean(); |
|
41 |
doiCacheFactoryBean.setEternal(false); |
|
42 |
doiCacheFactoryBean.setTimeToLive(0);; |
|
43 |
doiCacheFactoryBean.setTimeToIdle(36000); |
|
44 |
doiCacheFactoryBean.setMaxEntriesLocalHeap(10000); |
|
45 |
doiCacheFactoryBean.setMaxEntriesLocalDisk(20000); |
|
46 |
doiCacheFactoryBean.setCacheManager(cacheFactory().getObject()); |
|
47 |
doiCacheFactoryBean.setCacheName("crossrefDOICache"); |
|
48 |
return doiCacheFactoryBean; |
|
49 |
} |
|
50 |
|
|
51 |
@Bean |
|
52 |
public DataciteDOIResolver dataciteResolver() { |
|
53 |
DataciteDOIResolver resolver = new DataciteDOIResolver(); |
|
54 |
return resolver; |
|
55 |
} |
|
56 |
|
|
57 |
@Bean |
|
58 |
public CrossrefDOIResolver crossrefResolver() { |
|
59 |
return new CrossrefDOIResolver(); |
|
60 |
} |
|
61 |
|
|
62 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/test/java/eu/dnetlib/dli/openaire/transform/TestDOIResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.dli.openaire.transform; |
|
2 |
|
|
3 |
import org.junit.Assert; |
|
4 |
import org.junit.Test; |
|
5 |
import org.junit.runner.RunWith; |
|
6 |
import org.springframework.beans.factory.annotation.Autowired; |
|
7 |
import org.springframework.test.context.ContextConfiguration; |
|
8 |
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; |
|
9 |
|
|
10 |
import eu.dnetlib.msro.dli.workflows.nodes.transform.CrossrefDOIResolver; |
|
11 |
import eu.dnetlib.msro.dli.workflows.nodes.transform.DOIResponse; |
|
12 |
import eu.dnetlib.msro.dli.workflows.nodes.transform.DataciteDOIResolver; |
|
13 |
|
|
14 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
15 |
@ContextConfiguration(classes = ConfigurationTestConfig.class) |
|
16 |
public class TestDOIResolver { |
|
17 |
|
|
18 |
@Autowired |
|
19 |
DataciteDOIResolver dataciteDOIResolver; |
|
20 |
|
|
21 |
@Autowired |
|
22 |
CrossrefDOIResolver crossRefDOIResolver; |
|
23 |
|
|
24 |
@Test |
|
25 |
public void testDataciteNotNull() { |
|
26 |
Assert.assertNotNull(dataciteDOIResolver); |
|
27 |
} |
|
28 |
|
|
29 |
@Test |
|
30 |
public void testSearchDatacite() { |
|
31 |
final String[] dois = { "10.6085/AA/TPT006_061MXTI061R00_20020806.50.3", "10.6085/AA/TPT006_061MXTI061R00_20020806.50.1", |
|
32 |
"10.6085/AA/TPT006_061MXTI061R00_20020806.50.3" }; |
|
33 |
int k = 0; |
|
34 |
double average_sum = 0.0f; |
|
35 |
System.out.println(dataciteDOIResolver.retrieveDOI(dois[0]).getRecord()); |
|
36 |
while (k < 100) { |
|
37 |
for (int i = 0; i < dois.length; i++) { |
|
38 |
long start = System.currentTimeMillis(); |
|
39 |
Assert.assertNotNull(dataciteDOIResolver.retrieveDOI(dois[i])); |
|
40 |
long end = System.currentTimeMillis(); |
|
41 |
average_sum += end - start; |
|
42 |
} |
|
43 |
k++; |
|
44 |
} |
|
45 |
|
|
46 |
System.out.println("Average time on request 3 item 100 times " + average_sum / 100 + "ms"); |
|
47 |
} |
|
48 |
|
|
49 |
@Test |
|
50 |
public void testSearchCrossref() { |
|
51 |
final String[] dois = { "10.1002/etc.2522", "10.1007/s00300-011-1150-7", "10.1016/j.ancene.2014.01.001" }; |
|
52 |
int k = 0; |
|
53 |
double average_sum = 0.0f; |
|
54 |
System.out.println("\n\n\n\n\n" + crossRefDOIResolver.retrieveDOI(dois[0]).getRecord()); |
|
55 |
while (k < 100) { |
|
56 |
for (int i = 0; i < dois.length; i++) { |
|
57 |
long start = System.currentTimeMillis(); |
|
58 |
DOIResponse record = crossRefDOIResolver.retrieveDOI(dois[i]); |
|
59 |
Assert.assertNotNull(record); |
|
60 |
long end = System.currentTimeMillis(); |
|
61 |
average_sum += end - start; |
|
62 |
} |
|
63 |
k++; |
|
64 |
} |
|
65 |
System.out.println("Average time on request 3 item 100 times " + average_sum / 100 + "ms"); |
|
66 |
} |
|
67 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/AbstractDOIResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.io.ByteArrayInputStream; |
|
4 |
import java.io.ByteArrayOutputStream; |
|
5 |
import java.io.IOException; |
|
6 |
import java.io.InputStreamReader; |
|
7 |
import java.io.UnsupportedEncodingException; |
|
8 |
import java.util.zip.GZIPInputStream; |
|
9 |
import java.util.zip.GZIPOutputStream; |
|
10 |
|
|
11 |
import org.apache.commons.io.IOUtils; |
|
12 |
import org.apache.commons.logging.Log; |
|
13 |
import org.apache.commons.logging.LogFactory; |
|
14 |
|
|
15 |
// TODO: Auto-generated Javadoc |
|
16 |
/** |
|
17 |
* The Class AbstractDOIResolver. |
|
18 |
*/ |
|
19 |
public abstract class AbstractDOIResolver { |
|
20 |
|
|
21 |
/** The Constant NOT_FOUND_ELEMENT. */ |
|
22 |
public static final String NOT_FOUND_ELEMENT = "NOT_FOUND"; |
|
23 |
|
|
24 |
/** The Constant log. */ |
|
25 |
private static final Log log = LogFactory.getLog(AbstractDOIResolver.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
26 |
|
|
27 |
/** |
|
28 |
* Retrieve doi. |
|
29 |
* |
|
30 |
* @param doi |
|
31 |
* the doi |
|
32 |
* @return the string |
|
33 |
*/ |
|
34 |
public abstract DOIResponse retrieveDOI(final String doi); |
|
35 |
|
|
36 |
/** |
|
37 |
* Compress record. |
|
38 |
* |
|
39 |
* @param str |
|
40 |
* the str |
|
41 |
* @return the byte[] |
|
42 |
*/ |
|
43 |
protected byte[] compressRecord(final String str) { |
|
44 |
if (str == null || str.length() == 0) { return null; } |
|
45 |
ByteArrayOutputStream obj = new ByteArrayOutputStream(); |
|
46 |
try { |
|
47 |
GZIPOutputStream gzip = new GZIPOutputStream(obj); |
|
48 |
gzip.write(str.getBytes("UTF-8")); |
|
49 |
gzip.close(); |
|
50 |
return obj.toByteArray(); |
|
51 |
|
|
52 |
} catch (UnsupportedEncodingException e) { |
|
53 |
log.error("Error on compress record before tu put in the cache", e); |
|
54 |
return null; |
|
55 |
} catch (IOException e) { |
|
56 |
log.error("Error on compress record before tu put in the cache", e); |
|
57 |
return null; |
|
58 |
} |
|
59 |
} |
|
60 |
|
|
61 |
/** |
|
62 |
* Decompress. |
|
63 |
* |
|
64 |
* @param input |
|
65 |
* the input |
|
66 |
* @return the string |
|
67 |
*/ |
|
68 |
protected String decompress(final byte[] input) { |
|
69 |
|
|
70 |
GZIPInputStream gis; |
|
71 |
try { |
|
72 |
gis = new GZIPInputStream(new ByteArrayInputStream(input)); |
|
73 |
return IOUtils.toString(new InputStreamReader(gis, "UTF-8")); |
|
74 |
} catch (IOException e) { |
|
75 |
log.error("Error on decompress record when getting it on cache", e); |
|
76 |
return null; |
|
77 |
} |
|
78 |
|
|
79 |
} |
|
80 |
|
|
81 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DataCiteSearchResponse.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.List; |
|
5 |
|
|
6 |
import org.apache.commons.codec.binary.Base64; |
|
7 |
|
|
8 |
import com.google.gson.JsonArray; |
|
9 |
import com.google.gson.JsonElement; |
|
10 |
import com.google.gson.JsonObject; |
|
11 |
import com.google.gson.JsonParser; |
|
12 |
|
|
13 |
/** |
|
14 |
* The Class DataCiteSearchResponse. |
|
15 |
*/ |
|
16 |
public class DataCiteSearchResponse { |
|
17 |
|
|
18 |
/** The total. */ |
|
19 |
private long total; |
|
20 |
|
|
21 |
/** The xml records. */ |
|
22 |
private List<String> xmlRecords; |
|
23 |
|
|
24 |
/** |
|
25 |
* Creates the new response. |
|
26 |
* |
|
27 |
* @param response |
|
28 |
* the response |
|
29 |
* @return the data cite search response |
|
30 |
*/ |
|
31 |
public static DataCiteSearchResponse createNewResponse(final String response) { |
|
32 |
DataCiteSearchResponse item = new DataCiteSearchResponse(); |
|
33 |
|
|
34 |
if (response == null) { |
|
35 |
|
|
36 |
return null; |
|
37 |
|
|
38 |
} |
|
39 |
JsonElement jElement = new JsonParser().parse(response); |
|
40 |
JsonObject jobject = jElement.getAsJsonObject(); |
|
41 |
if (jobject.has("response")) { |
|
42 |
|
|
43 |
item.setTotal(jobject.get("response").getAsJsonObject().get("numFound").getAsLong()); |
|
44 |
|
|
45 |
JsonElement hits = ((JsonObject) jobject.get("response")).get("docs"); |
|
46 |
|
|
47 |
JsonArray hitsObject = hits.getAsJsonArray(); |
|
48 |
|
|
49 |
List<String> records = new ArrayList<String>(); |
|
50 |
|
|
51 |
for (JsonElement elem : hitsObject) { |
|
52 |
String xml = ((JsonObject) elem).get("xml").getAsString(); |
|
53 |
byte[] data = Base64.decodeBase64(xml.getBytes()); |
|
54 |
String s = new String(data); |
|
55 |
records.add(s); |
|
56 |
|
|
57 |
} |
|
58 |
item.setXmlRecords(records); |
|
59 |
return item; |
|
60 |
} |
|
61 |
return null; |
|
62 |
} |
|
63 |
|
|
64 |
/** |
|
65 |
* Gets the xml records. |
|
66 |
* |
|
67 |
* @return the xmlRecords |
|
68 |
*/ |
|
69 |
public List<String> getXmlRecords() { |
|
70 |
return xmlRecords; |
|
71 |
} |
|
72 |
|
|
73 |
/** |
|
74 |
* Sets the xml records. |
|
75 |
* |
|
76 |
* @param xmlRecords |
|
77 |
* the xmlRecords to set |
|
78 |
*/ |
|
79 |
public void setXmlRecords(final List<String> xmlRecords) { |
|
80 |
this.xmlRecords = xmlRecords; |
|
81 |
} |
|
82 |
|
|
83 |
/** |
|
84 |
* Gets the total. |
|
85 |
* |
|
86 |
* @return the total |
|
87 |
*/ |
|
88 |
public long getTotal() { |
|
89 |
return total; |
|
90 |
} |
|
91 |
|
|
92 |
/** |
|
93 |
* Sets the total. |
|
94 |
* |
|
95 |
* @param total |
|
96 |
* the total to set |
|
97 |
*/ |
|
98 |
public void setTotal(final long total) { |
|
99 |
this.total = total; |
|
100 |
} |
|
101 |
|
|
102 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/CrossrefDOIResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.net.MalformedURLException; |
|
5 |
import java.net.URL; |
|
6 |
|
|
7 |
import javax.annotation.Resource; |
|
8 |
|
|
9 |
import net.sf.ehcache.Cache; |
|
10 |
import net.sf.ehcache.Element; |
|
11 |
|
|
12 |
import org.apache.commons.io.IOUtils; |
|
13 |
import org.apache.commons.logging.Log; |
|
14 |
import org.apache.commons.logging.LogFactory; |
|
15 |
|
|
16 |
public class CrossrefDOIResolver extends AbstractDOIResolver { |
|
17 |
|
|
18 |
private static final Log log = LogFactory.getLog(CrossrefDOIResolver.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
19 |
|
|
20 |
private final static String baseUrlCrossref = "http://www.crossref.org/openurl?noredirect=true&pid=antleb%40di.uoa.gr&format=unixref&id="; |
|
21 |
|
|
22 |
/** The cache. */ |
|
23 |
@Resource(name = "crossrefDOICache") |
|
24 |
private Cache cache; |
|
25 |
|
|
26 |
/** The Constant NOT_FOUND_ELEMENT. */ |
|
27 |
|
|
28 |
@Override |
|
29 |
public DOIResponse retrieveDOI(final String doi) { |
|
30 |
final Element elem = cache.get(doi); |
|
31 |
String record = null; |
|
32 |
if (elem != null) { |
|
33 |
final byte[] compressed_data = (byte[]) elem.getObjectValue(); |
|
34 |
record = decompress(compressed_data); |
|
35 |
} else { |
|
36 |
log.info("Refreshing " + doi + " cache..."); |
|
37 |
record = requestDOI(doi); |
|
38 |
if (record != null) { |
|
39 |
final byte[] compressed_data = compressRecord(record); |
|
40 |
if (compressed_data != null) { |
|
41 |
cache.put(new Element(doi, compressed_data)); |
|
42 |
} |
|
43 |
} |
|
44 |
|
|
45 |
} |
|
46 |
DOIResponse doiResponse = new DOIResponse(DOIType.publication, doi, record); |
|
47 |
if (record == null || record.equals(NOT_FOUND_ELEMENT)) { |
|
48 |
doiResponse.setType(DOIType.none); |
|
49 |
} |
|
50 |
return doiResponse; |
|
51 |
} |
|
52 |
|
|
53 |
private String requestDOI(final String doi) { |
|
54 |
try { |
|
55 |
URL crossRefurl = new URL(baseUrlCrossref + doi); |
|
56 |
final String response = IOUtils.toString(crossRefurl.openStream()); |
|
57 |
if (response == null) { return response; } |
|
58 |
if (response.contains("<error>")) { return NOT_FOUND_ELEMENT; } |
|
59 |
return response; |
|
60 |
} catch (MalformedURLException e) { |
|
61 |
log.error("Error on request DOI to crossRef, request:" + baseUrlCrossref + doi, e); |
|
62 |
return null; |
|
63 |
} catch (IOException e) { |
|
64 |
log.error("Error on request DOI to crossRef, request:" + baseUrlCrossref + doi, e); |
|
65 |
return null; |
|
66 |
} |
|
67 |
} |
|
68 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DOIWorker.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
import java.util.concurrent.BlockingQueue; |
|
6 |
|
|
7 |
import org.apache.commons.logging.Log; |
|
8 |
import org.apache.commons.logging.LogFactory; |
|
9 |
|
|
10 |
import com.google.common.collect.Lists; |
|
11 |
import com.google.common.collect.Maps; |
|
12 |
|
|
13 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
|
14 |
|
|
15 |
// TODO: Auto-generated Javadoc |
|
16 |
/** |
|
17 |
* The Class DOIWorker. |
|
18 |
*/ |
|
19 |
public class DOIWorker implements Runnable { |
|
20 |
|
|
21 |
private static final String MAIN_RECORD_KEY = "record"; |
|
22 |
|
|
23 |
private static final String RELATED_DOI_KEY = "relatedDOI"; |
|
24 |
|
|
25 |
/** The Constant log. */ |
|
26 |
private static final Log log = LogFactory.getLog(DOIWorker.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
27 |
|
|
28 |
/** The queue. */ |
|
29 |
private final BlockingQueue<String> queue; |
|
30 |
|
|
31 |
/** The output queue. */ |
|
32 |
private final BlockingQueue<String> outputQueue; |
|
33 |
|
|
34 |
/** The terminator queue. */ |
|
35 |
private final String terminatorQueue; |
|
36 |
|
|
37 |
/** The doi resolvers. */ |
|
38 |
private List<AbstractDOIResolver> doiResolvers; |
|
39 |
|
|
40 |
/** The xslt. */ |
|
41 |
private ApplyXslt xslt; |
|
42 |
|
|
43 |
/** |
|
44 |
* Instantiates a new DOI worker. |
|
45 |
* |
|
46 |
* @param queue |
|
47 |
* the queue |
|
48 |
* @param outputQueue |
|
49 |
* the output queue |
|
50 |
* @param terminatorQueue |
|
51 |
* the terminator queue |
|
52 |
* @param doiResolvers |
|
53 |
* the doi resolvers |
|
54 |
* @param xslt |
|
55 |
* the xslt |
|
56 |
*/ |
|
57 |
public DOIWorker(final BlockingQueue<String> queue, final BlockingQueue<String> outputQueue, final String terminatorQueue, |
|
58 |
final List<AbstractDOIResolver> doiResolvers, final ApplyXslt xslt) { |
|
59 |
this.queue = queue; |
|
60 |
this.terminatorQueue = terminatorQueue; |
|
61 |
this.doiResolvers = doiResolvers; |
|
62 |
this.xslt = xslt; |
|
63 |
this.outputQueue = outputQueue; |
|
64 |
|
|
65 |
} |
|
66 |
|
|
67 |
/* |
|
68 |
* (non-Javadoc) |
|
69 |
* |
|
70 |
* @see java.lang.Runnable#run() |
|
71 |
*/ |
|
72 |
@Override |
|
73 |
public void run() { |
|
74 |
try { |
|
75 |
String nextRecord = this.queue.take(); |
|
76 |
if (nextRecord == this.terminatorQueue) { |
|
77 |
log.debug("Found terminator record"); |
|
78 |
this.queue.put(terminatorQueue); |
|
79 |
return; |
|
80 |
} |
|
81 |
while (nextRecord != null && nextRecord != terminatorQueue) { |
|
82 |
Map<String, Object> extractedFields = extractRelatedIdentifiers(nextRecord); |
|
83 |
List<String> relationsDOI = (List<String>) extractedFields.get(RELATED_DOI_KEY); |
|
84 |
if (relationsDOI != null && relationsDOI.size() > 1) { |
|
85 |
DOIResponse response = null; |
|
86 |
List<DOIResponse> matchedResponse = Lists.newArrayList(); |
|
87 |
for (String doi : relationsDOI) { |
|
88 |
DOIResponse responseDOI = getDOIResponse(doi); |
|
89 |
if (responseDOI != null && responseDOI.getType() != DOIType.none) { |
|
90 |
matchedResponse.add(responseDOI); |
|
91 |
} |
|
92 |
} |
|
93 |
if (matchedResponse.size() > 0) { |
|
94 |
DOIResponse mainRecord = (DOIResponse) extractedFields.get(MAIN_RECORD_KEY); |
|
95 |
String outputRecord = putResponsesOnRecord(nextRecord, matchedResponse); |
|
96 |
outputQueue.put(xslt.evaluate(outputRecord)); |
|
97 |
for (DOIResponse res : matchedResponse) { |
|
98 |
outputRecord = putResponsesOnRecord(res.getRecord(), Lists.newArrayList(mainRecord)); |
|
99 |
outputQueue.put(xslt.evaluate(outputRecord)); |
|
100 |
} |
|
101 |
} |
|
102 |
} |
|
103 |
nextRecord = this.queue.take(); |
|
104 |
if (nextRecord == this.terminatorQueue) { |
|
105 |
log.debug("Found terminator record"); |
|
106 |
this.queue.put(terminatorQueue); |
|
107 |
} |
|
108 |
} |
|
109 |
} catch (InterruptedException e) { |
|
110 |
log.error("Error on taking an element on queue", e); |
|
111 |
} |
|
112 |
|
|
113 |
} |
|
114 |
|
|
115 |
/** |
|
116 |
* Put responses on record. |
|
117 |
* |
|
118 |
* @param nextRecord |
|
119 |
* the next record |
|
120 |
* @param matchedResponse |
|
121 |
* the matched response |
|
122 |
* @return the string |
|
123 |
*/ |
|
124 |
private String putResponsesOnRecord(final String nextRecord, final List<DOIResponse> matchedResponse) { |
|
125 |
// TODO Auto-generated method stub |
|
126 |
return null; |
|
127 |
} |
|
128 |
|
|
129 |
/** |
|
130 |
* Search for each DOI |
|
131 |
* |
|
132 |
* @param doi |
|
133 |
* the doi |
|
134 |
* @return the DOI response |
|
135 |
*/ |
|
136 |
private DOIResponse getDOIResponse(final String doi) { |
|
137 |
DOIResponse currentResponse = null; |
|
138 |
for (AbstractDOIResolver resolver : this.doiResolvers) { |
|
139 |
currentResponse = resolver.retrieveDOI(doi); |
|
140 |
if (currentResponse != null && currentResponse.getType() != DOIType.none) { return currentResponse; } |
|
141 |
} |
|
142 |
return currentResponse; |
|
143 |
} |
|
144 |
|
|
145 |
/** |
|
146 |
* Extract related identifiers. |
|
147 |
* |
|
148 |
* @param nextRecord |
|
149 |
* the next record |
|
150 |
* @return the list |
|
151 |
*/ |
|
152 |
private Map<String, Object> extractRelatedIdentifiers(final String nextRecord) { |
|
153 |
Map<String, Object> result = Maps.newHashMap(); |
|
154 |
DOIResponse mainRecord = new DOIResponse(); |
|
155 |
List<String> relatedRecord = Lists.newArrayList(); |
|
156 |
result.put(MAIN_RECORD_KEY, mainRecord); |
|
157 |
result.put(RELATED_DOI_KEY, relatedRecord); |
|
158 |
|
|
159 |
return result; |
|
160 |
} |
|
161 |
|
|
162 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/StreamingRecordParser.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.io.ByteArrayInputStream; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
import java.util.Stack; |
|
8 |
|
|
9 |
import javax.xml.stream.XMLInputFactory; |
|
10 |
import javax.xml.stream.XMLStreamConstants; |
|
11 |
import javax.xml.stream.XMLStreamException; |
|
12 |
import javax.xml.stream.XMLStreamReader; |
|
13 |
|
|
14 |
import org.apache.commons.logging.Log; |
|
15 |
import org.apache.commons.logging.LogFactory; |
|
16 |
|
|
17 |
import com.google.common.collect.Lists; |
|
18 |
|
|
19 |
/** |
|
20 |
* This method outperforms SimpleRecordParser by a vast amount, especially since we are just getting stuff in the header. |
|
21 |
* |
|
22 |
* @author marko |
|
23 |
* |
|
24 |
*/ |
|
25 |
public class StreamingRecordParser { |
|
26 |
|
|
27 |
private static final Log log = LogFactory.getLog(StreamingRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
28 |
|
|
29 |
public Map<String, Object> parseRecord(final String record) { |
|
30 |
|
|
31 |
try { |
|
32 |
XMLInputFactory factory = XMLInputFactory.newInstance(); |
|
33 |
XMLStreamReader parser = factory.createXMLStreamReader(new ByteArrayInputStream(record.getBytes())); |
|
34 |
|
|
35 |
HashMap<String, Object> res = new HashMap<String, Object>(); |
|
36 |
|
|
37 |
Stack<String> elementStack = new Stack<String>(); |
|
38 |
elementStack.push("/"); |
|
39 |
List<String> relatedId = Lists.newArrayList(); |
|
40 |
|
|
41 |
while (parser.hasNext()) { |
|
42 |
int event = parser.next(); |
|
43 |
if (event == XMLStreamConstants.END_ELEMENT) { |
|
44 |
elementStack.pop(); |
|
45 |
} else if (event == XMLStreamConstants.START_ELEMENT) { |
|
46 |
final String localName = parser.getLocalName(); |
|
47 |
elementStack.push(localName); |
|
48 |
|
|
49 |
if ("objIdentifier".equals(localName)) { |
|
50 |
parser.next(); |
|
51 |
// log.debug("found"+parser.getText().trim()); |
|
52 |
res.put("id", parser.getText().trim()); |
|
53 |
|
|
54 |
} else if ("relatedIdentifier".equals(localName)) { |
|
55 |
|
|
56 |
for (int i = 0; i < parser.getAttributeCount(); i++) { |
|
57 |
String attrituteName = parser.getAttributeLocalName(i); |
|
58 |
String attritute = parser.getAttributeValue(i); |
|
59 |
if ("relatedidentifiertype".equals(attrituteName.toLowerCase())) { |
|
60 |
if ("doi".equals(attritute.toLowerCase())) { |
|
61 |
parser.next(); |
|
62 |
try { |
|
63 |
relatedId.add(parser.getText().trim()); |
|
64 |
} catch (Exception e) { |
|
65 |
|
|
66 |
} |
|
67 |
break; |
|
68 |
} |
|
69 |
} |
|
70 |
} |
|
71 |
|
|
72 |
} |
|
73 |
} |
|
74 |
} |
|
75 |
res.put("relatedId", relatedId); |
|
76 |
return res; |
|
77 |
} catch (XMLStreamException e) { |
|
78 |
throw new IllegalStateException(e); |
|
79 |
} |
|
80 |
|
|
81 |
} |
|
82 |
|
|
83 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DOIResponse.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
// TODO: Auto-generated Javadoc |
|
4 |
enum DOIType { |
|
5 |
publication, dataset, none |
|
6 |
} |
|
7 |
|
|
8 |
/** |
|
9 |
* The Class DOIResponse. |
|
10 |
*/ |
|
11 |
public class DOIResponse { |
|
12 |
|
|
13 |
/** The type. */ |
|
14 |
private DOIType type; |
|
15 |
|
|
16 |
/** The doi. */ |
|
17 |
private String doi; |
|
18 |
|
|
19 |
/** The record. */ |
|
20 |
private String record; |
|
21 |
|
|
22 |
/** |
|
23 |
* Instantiates a new DOI response. |
|
24 |
*/ |
|
25 |
public DOIResponse() { |
|
26 |
|
|
27 |
} |
|
28 |
|
|
29 |
/** |
|
30 |
* Instantiates a new DOI response. |
|
31 |
* |
|
32 |
* @param type |
|
33 |
* the type |
|
34 |
* @param doi |
|
35 |
* the doi |
|
36 |
* @param record |
|
37 |
* the record |
|
38 |
*/ |
|
39 |
public DOIResponse(final DOIType type, final String doi, final String record) { |
|
40 |
this.doi = doi; |
|
41 |
this.type = type; |
|
42 |
this.record = record; |
|
43 |
} |
|
44 |
|
|
45 |
/** |
|
46 |
* Gets the type. |
|
47 |
* |
|
48 |
* @return the type |
|
49 |
*/ |
|
50 |
public DOIType getType() { |
|
51 |
return type; |
|
52 |
} |
|
53 |
|
|
54 |
/** |
|
55 |
* Sets the type. |
|
56 |
* |
|
57 |
* @param type |
|
58 |
* the type to set |
|
59 |
*/ |
|
60 |
public void setType(final DOIType type) { |
|
61 |
this.type = type; |
|
62 |
} |
|
63 |
|
|
64 |
/** |
|
65 |
* Gets the doi. |
|
66 |
* |
|
67 |
* @return the doi |
|
68 |
*/ |
|
69 |
public String getDoi() { |
|
70 |
return doi; |
|
71 |
} |
|
72 |
|
|
73 |
/** |
|
74 |
* Sets the doi. |
|
75 |
* |
|
76 |
* @param doi |
|
77 |
* the doi to set |
|
78 |
*/ |
|
79 |
public void setDoi(final String doi) { |
|
80 |
this.doi = doi; |
|
81 |
} |
|
82 |
|
|
83 |
/** |
|
84 |
* Gets the record. |
|
85 |
* |
|
86 |
* @return the record |
|
87 |
*/ |
|
88 |
public String getRecord() { |
|
89 |
return record; |
|
90 |
} |
|
91 |
|
|
92 |
/** |
|
93 |
* Sets the record. |
|
94 |
* |
|
95 |
* @param record |
|
96 |
* the record to set |
|
97 |
*/ |
|
98 |
public void setRecord(final String record) { |
|
99 |
this.record = record; |
|
100 |
} |
|
101 |
|
|
102 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DataciteDOIResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.net.MalformedURLException; |
|
5 |
import java.net.URL; |
|
6 |
|
|
7 |
import javax.annotation.Resource; |
|
8 |
|
|
9 |
import net.sf.ehcache.Cache; |
|
10 |
import net.sf.ehcache.Element; |
|
11 |
|
|
12 |
import org.apache.commons.io.IOUtils; |
|
13 |
import org.apache.commons.logging.Log; |
|
14 |
import org.apache.commons.logging.LogFactory; |
|
15 |
|
|
16 |
/** |
|
17 |
* The Class DataciteDOIResolver. |
|
18 |
*/ |
|
19 |
public class DataciteDOIResolver extends AbstractDOIResolver { |
|
20 |
|
|
21 |
/** The cache. */ |
|
22 |
@Resource(name = "dataciteDOICache") |
|
23 |
private Cache cache; |
|
24 |
|
|
25 |
/** The Constant log. */ |
|
26 |
private static final Log log = LogFactory.getLog(DataciteDOIResolver.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
27 |
|
|
28 |
/** The Constant baseURL. */ |
|
29 |
private final static String baseURL = "http://search.datacite.org/api?wt=json&fl=doi,xml&q=doi:"; |
|
30 |
|
|
31 |
@Override |
|
32 |
public DOIResponse retrieveDOI(final String doi) { |
|
33 |
final Element elem = cache.get(doi); |
|
34 |
String record = null; |
|
35 |
if (elem != null) { |
|
36 |
final byte[] compressed_data = (byte[]) elem.getObjectValue(); |
|
37 |
record = decompress(compressed_data); |
|
38 |
} else { |
|
39 |
log.info("Refreshing " + doi + " cache..."); |
|
40 |
record = requestDOI(doi); |
|
41 |
if (record != null) { |
|
42 |
final byte[] compressed_data = compressRecord(record); |
|
43 |
if (compressed_data != null) { |
|
44 |
cache.put(new Element(doi, compressed_data)); |
|
45 |
} |
|
46 |
} |
|
47 |
} |
|
48 |
DOIResponse doiResponse = new DOIResponse(DOIType.dataset, doi, record); |
|
49 |
if (record == null || record.equals(NOT_FOUND_ELEMENT)) { |
|
50 |
doiResponse.setType(DOIType.none); |
|
51 |
} |
|
52 |
return doiResponse; |
|
53 |
} |
|
54 |
|
|
55 |
/** |
|
56 |
* Request doi. |
|
57 |
* |
|
58 |
* @param doi |
|
59 |
* the doi |
|
60 |
* @return the string |
|
61 |
*/ |
|
62 |
private String requestDOI(final String doi) { |
|
63 |
final String currentURL = baseURL + doi; |
|
64 |
|
|
65 |
try { |
|
66 |
final URL myURl = new URL(currentURL); |
|
67 |
final String response = IOUtils.toString(myURl.openStream()); |
|
68 |
final DataCiteSearchResponse responseDCT = DataCiteSearchResponse.createNewResponse(response); |
|
69 |
if (responseDCT != null && responseDCT.getTotal() > 0) { return responseDCT.getXmlRecords().get(0); } |
|
70 |
return NOT_FOUND_ELEMENT; |
|
71 |
} catch (MalformedURLException e) { |
|
72 |
log.error("Error on request DOI, request :" + currentURL, e); |
|
73 |
return null; |
|
74 |
} catch (IOException e) { |
|
75 |
log.error("Error on request DOI, request :" + currentURL, e); |
|
76 |
return null; |
|
77 |
} |
|
78 |
|
|
79 |
} |
|
80 |
|
|
81 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DOIResolverJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
2 |
|
|
3 |
import javax.annotation.Resource; |
|
4 |
import javax.xml.ws.wsaddressing.W3CEndpointReference; |
|
5 |
|
|
6 |
import org.springframework.beans.factory.annotation.Autowired; |
|
7 |
|
|
8 |
import com.googlecode.sarasvati.Arc; |
|
9 |
import com.googlecode.sarasvati.NodeToken; |
|
10 |
|
|
11 |
import eu.dnetlib.enabling.resultset.IterableResultSetFactory; |
|
12 |
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory; |
|
13 |
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils; |
|
14 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
15 |
|
|
16 |
public class DOIResolverJobNode extends SimpleJobNode { |
|
17 |
|
|
18 |
private String inputEprParam; |
|
19 |
|
|
20 |
private String outputEprParam; |
|
21 |
|
|
22 |
/** The result set client factory. */ |
|
23 |
@Autowired |
|
24 |
private ResultSetClientFactory resultSetClientFactory; |
|
25 |
|
|
26 |
/** The result set factory. */ |
|
27 |
@Resource(name = "iterableResultSetFactory") |
|
28 |
private IterableResultSetFactory resultSetFactory; |
|
29 |
|
|
30 |
@Override |
|
31 |
protected String execute(final NodeToken token) throws Exception { |
|
32 |
final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParam)); |
|
33 |
Iterable<String> input = resultSetClientFactory.getClient(inputEpr); |
|
34 |
|
|
35 |
return Arc.DEFAULT_ARC; |
|
36 |
} |
|
37 |
|
|
38 |
/** |
|
39 |
* @return the inputEprParam |
|
40 |
*/ |
|
41 |
public String getInputEprParam() { |
|
42 |
return inputEprParam; |
|
43 |
} |
|
44 |
|
|
45 |
/** |
|
46 |
* @param inputEprParam |
|
47 |
* the inputEprParam to set |
|
48 |
*/ |
|
49 |
public void setInputEprParam(final String inputEprParam) { |
|
50 |
this.inputEprParam = inputEprParam; |
|
51 |
} |
|
52 |
|
|
53 |
/** |
|
54 |
* @return the outputEprParam |
|
55 |
*/ |
|
56 |
public String getOutputEprParam() { |
|
57 |
return outputEprParam; |
|
58 |
} |
|
59 |
|
|
60 |
/** |
|
61 |
* @param outputEprParam |
|
62 |
* the outputEprParam to set |
|
63 |
*/ |
|
64 |
public void setOutputEprParam(final String outputEprParam) { |
|
65 |
this.outputEprParam = outputEprParam; |
|
66 |
} |
|
67 |
|
|
68 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/ObtainDataSourceParamsJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.dli.workflows.nodes; |
|
2 |
|
|
3 |
import javax.annotation.Resource; |
|
4 |
|
|
5 |
import com.googlecode.sarasvati.Arc; |
|
6 |
import com.googlecode.sarasvati.NodeToken; |
|
7 |
|
|
8 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
9 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
10 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
|
11 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
12 |
|
|
13 |
public class ObtainDataSourceParamsJobNode extends SimpleJobNode { |
|
14 |
|
|
15 |
private String providerId; |
|
16 |
|
|
17 |
@Resource |
|
18 |
private UniqueServiceLocator serviceLocator; |
|
19 |
|
|
20 |
@Override |
|
21 |
protected String execute(final NodeToken token) throws Exception { |
|
22 |
final String query = "let $x := /*[.//RESOURCE_IDENTIFIER/@value='" + providerId + "']//EXTRA_FIELDS\n" |
|
23 |
+ "return concat($x/FIELD[./key='DataSourceId']/value, ' @@@ ', $x/FIELD[./key='NamespacePrefix']/value)"; |
|
24 |
|
|
25 |
final String[] arr = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(query).split("@@@"); |
|
26 |
|
|
27 |
token.getEnv().setAttribute("parentDatasourceId", arr[0].trim()); |
|
28 |
token.getEnv().setAttribute("namespacePrefix", arr[1].trim()); |
|
29 |
token.getEnv().setAttribute("dateOfCollection", DateUtils.now_ISO8601()); |
|
30 |
|
|
31 |
return Arc.DEFAULT_ARC; |
|
32 |
} |
|
33 |
|
|
34 |
public String getProviderId() { |
|
35 |
return providerId; |
|
36 |
} |
|
37 |
|
|
38 |
public void setProviderId(final String providerId) { |
|
39 |
this.providerId = providerId; |
|
40 |
} |
|
41 |
|
|
42 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/dli_openaire/repo-hi/Aggregate_Metadata_from_AggregatorDataRepository_Inference.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="3c924d16-83c1-44b0-81cd-681df42bcec4_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" /> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType" /> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources" /> |
|
8 |
<RESOURCE_URI value="value3" /> |
|
9 |
<DATE_OF_CREATION value="2015-02-03T18:13:51.0Z" /> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>Aggregate Metadata (datasets) from Aggregator::DataRepository</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_INFO> |
|
14 |
<FIELD name="Action">Aggregate Metadata</FIELD> |
|
15 |
<FIELD name="Datasource class">aggregator::datarepository</FIELD> |
|
16 |
<FIELD name="Content">datasets</FIELD> |
|
17 |
</WORKFLOW_INFO> |
|
18 |
<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE> |
|
19 |
<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY> |
|
20 |
<CONFIGURATION start="manual"> |
|
21 |
<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true"> |
|
22 |
<DESCRIPTION>Verify if DS is pending</DESCRIPTION> |
|
23 |
<PARAMETERS> |
Also available in: Unified diff
implemented harvesting and ifrst beta of transformation wf