Project

General

Profile

« Previous | Next » 

Revision 34341

implemented harvesting and ifrst beta of transformation wf

View differences:

webapps/dnet-openaire_dli-container/trunk/src/main/resources/eu/dnetlib/cnr-site.properties
40 40
msro.wf.mail.cc = michele.artini@isti.cnr.it
41 41

  
42 42
service.index.solr.rank.enable=true
43

  
44
#MDSTORE
45
services.mdstore.dao= postgresMDStoreDao
46

  
47
#INDEX
48
service.solr.index.jsonConfiguration= \
49
	{"id":"solr",\
50
	"address":"localhost:9983",\
51
	"port":"8983",\
52
	"webContext":"solr",\
53
	"numShards":"1",\
54
	"replicationFactor":"1",\
55
	"host":"localhost",\
56
	"feedingShutdownTolerance":"30000",\
57
	"feedingBufferFlushThreshold":"1000",\
58
	"feedingSimulationMode":"false",\
59
	"luceneMatchVersion":"4.9",\
60
	"serverLibPath":"../../../../contrib/extraction/lib",\
61
	"filterCacheSize":"512","filterCacheInitialSize":"512",\
62
	"queryCacheSize":"512","queryCacheInitialSize":"512",\
63
	"documentCacheSize":"512","documentCacheInitialSize":"512",\
64
	"ramBufferSizeMB":"960","mergeFactor":"40",\
65
	"autosoftcommit":"-1","autocommit":"15000",\
66
	"termIndexInterval":"1024","maxIndexingThreads":"8",\
67
	"queryResultWindowSize":"20","queryResultMaxDocCached":"200"} 
webapps/dnet-openaire_dli-container/trunk/src/main/webapp/index.html
1

  
2

  
3

  
4 1
<table style="width :100%">
5 2
	<tr><td style="text-align:center; width:100%"><a href="http://www.d-net.research-infrastructures.eu/"><img src='images/d-netLogo.png'/></a></td></tr>
6 3
</table>
webapps/dnet-openaire_dli-container/trunk/pom.xml
26 26
		</dependency>
27 27
		<dependency>
28 28
	        <groupId>eu.dnetlib</groupId>
29
	        <artifactId>cnr-mongo-mdstore</artifactId>
30
	        <version>[4.0.0,5.0.0)</version>
29
	        <artifactId>dnet-postgres-mdstore</artifactId>
30
	        <version>[1.0.0-SNAPSHOT,2.0.0)</version>
31 31
	    </dependency>
32 32
		<dependency>
33 33
			<groupId>eu.dnetlib</groupId>
......
97 97
			<artifactId>dnet-modular-is-ui</artifactId>
98 98
			<version>[2.0.0,3.0.0)</version>
99 99
		</dependency>
100
		
101
		
102
		<!-- DATA LITERARATURE INTERLINKING MODULES -->
103
		<dependency>
104
			<groupId>eu.dnetlib</groupId>
105
			<artifactId>dnet-openaire_dli-profiles</artifactId>
106
			<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
107
		</dependency>
108
		<dependency>
109
			<groupId>eu.dnetlib</groupId>
110
			<artifactId>dnet-openaire_dli-workflows</artifactId>
111
			<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
112
		</dependency>
100 113

  
101 114
		<!-- Log service -->
102 115
		<dependency>
modules/dnet-openaire_dli-workflows/trunk/src/main/resources/eu/dnetlib/msro/dli_openaire/workflows/repo-hi/applicationContext-nodes.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2

  
3
<beans xmlns="http://www.springframework.org/schema/beans"
4
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p"
5
    xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">    
6
    
7
   <bean id="dataciteDOICacheManager" class="org.springframework.cache.ehcache.EhCacheManagerFactoryBean" 
8
		p:cacheManagerName="dataciteDOICacheManager"/>
9
	
10
	<bean id="dataciteDOICache" class="org.springframework.cache.ehcache.EhCacheFactoryBean"
11
		p:eternal="false" p:timeToLive="0" p:timeToIdle="36000"
12
		p:maxElementsInMemory="10000" p:maxElementsOnDisk="20000"
13
		p:cacheManager-ref="dataciteDOICacheManager" p:cacheName="dataciteDOICache" />
14
		
15
		
16
	<bean id="crossrefDOICache" class="org.springframework.cache.ehcache.EhCacheFactoryBean"
17
		p:eternal="false" p:timeToLive="0" p:timeToIdle="36000"
18
		p:maxElementsInMemory="10000" p:maxElementsOnDisk="20000"
19
		p:cacheManager-ref="dataciteDOICacheManager" p:cacheName="crossrefDOICache" />
20
    
21
    
22
   <bean id="wfNodeObtainDataSourceParams"
23
		class="eu.dnetlib.msro.dli.workflows.nodes.ObtainDataSourceParamsJobNode"
24
		scope="prototype" />
25
    
26
</beans>
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/oai_datacite_transform.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
			value="OAI_datacite_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=" />
5
		<RESOURCE_TYPE value="TransformationRuleDSResourceType" />
6
		<RESOURCE_KIND value="TransformationRuleDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2014-11-19T11:05:55+01:00" />
9
	</HEADER>
10
	<BODY>
11
		<CONFIGURATION>
12
            <IMPORTED/>
13
            <SCRIPT>
14
                <TITLE>OAI_datacite ingestion transform</TITLE>
15
                <CODE><![CDATA[
16
					<xsl:stylesheet version="1.0"
17
						xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
18
						xmlns:datetime="http://exslt.org/dates-and-times" xmlns:exslt="http://exslt.org/common"
19
						xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri"
20
						exclude-result-prefixes="xsl datetime exslt ">
21

  
22
						<xsl:param name="repositoryId" />
23
						<xsl:param name="namespacePrefix"></xsl:param>
24
						<xsl:param name="datasourceId"></xsl:param>
25
						<xsl:variable name="status"
26
							select="/*[local-name() = 'record']/*[local-name() = 'header']/@status" />
27
						<xsl:variable name="doi"
28
							select="//*[local-name()='resource']/*[local-name()='identifier']"></xsl:variable>
29

  
30

  
31

  
32
						<xsl:template match="/">
33
							<oai:record xmlns="http://namespace.openaire.eu/"
34
								xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr"
35
								xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance"
36
								xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
37

  
38
								<!-- Patch OAI header or create ex-novo if not oai -->
39
								<oai:header>
40
									<dri:objIdentifier>
41
										<xsl:value-of select="concat($namespacePrefix,'::', $doi)" />
42
									</dri:objIdentifier>
43
									<dri:recordIdentifier>
44
										<xsl:value-of select="$doi" />
45
									</dri:recordIdentifier>
46
									<dri:dateOfCollection>
47
										<xsl:value-of select="datetime:dateTime()" />
48
									</dri:dateOfCollection>
49
									<dri:repositoryId>
50
										<xsl:value-of select="$datasourceId" />
51
									</dri:repositoryId>
52
									<dri:datasourceprefix>
53
										<xsl:value-of select="$namespacePrefix" />
54
									</dri:datasourceprefix>
55
									<!-- Bulk copy of old header -->
56
									<xsl:for-each
57
										select="/*[local-name() = 'record']/*[local-name() = 'header']//*[namespace-uri()='http://www.openarchives.org/OAI/2.0/']">
58
										<xsl:copy-of select="." />
59
									</xsl:for-each>
60
								</oai:header>
61
								<xsl:choose>
62
									<xsl:when
63
										test="count(/*[local-name() = 'record']/*[local-name() = 'metadata']) &gt; 0">
64
										<xsl:copy-of
65
											select="/*[local-name() = 'record']/*[local-name() = 'metadata']" />
66
									</xsl:when>
67
								</xsl:choose>
68

  
69
							</oai:record>
70
						</xsl:template>
71

  
72
					</xsl:stylesheet>
73
				]]></CODE>
74
            </SCRIPT>
75
        </CONFIGURATION>
76
        <STATUS/>
77
        <SECURITY_PARAMETERS/>
78
    </BODY>
79
</RESOURCE_PROFILE>
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/MDFormatDSResources/oai_datacite.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER
4
            value="2-8b912349-8a86-4330-93ef-7e0cd9bc87c2_TURGb3JtYXREU1Jlc291cmNlcy9NREZvcm1hdERTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="MDFormatDSResourceType"/>
6
        <RESOURCE_KIND value="MDFormatDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2008-05-22T14:40:04+02:00"/>
9
    </HEADER>
10
    <BODY>
11
        <CONFIGURATION>
12
            <NAME>oai_datacite</NAME>
13
            <DESCRIPTION>Data Literature Interlinking Metadata Format</DESCRIPTION>
14
            <INTERPRETATION>cleaned</INTERPRETATION>
15
            <SCHEMA uri=""/>
16
        </CONFIGURATION>
17
        <STATUS>
18
            <LAYOUTS>
19
                <LAYOUT name="mdstore_native">
20
                    <FIELDS>
21
                        <FIELD indexable="false" name="doi" stat="true" result="false" xpath="//*[local-name()='resource']/*[local-name()='identifier' and ./@identifierType='DOI']"/>
22
                    </FIELDS>
23
                </LAYOUT>
24
           </LAYOUTS>
25
		</STATUS>
26
	</BODY>
27
</RESOURCE_PROFILE>
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/MDFormatDSResources/DLIF.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER
4
            value="2-8b9503d9-8a86-4330-93ef-7e0cd9bc87c2_TURGb3JtYXREU1Jlc291cmNlcy9NREZvcm1hdERTUmVzb3VyY2VUeXBl"/>
5
        <RESOURCE_TYPE value="MDFormatDSResourceType"/>
6
        <RESOURCE_KIND value="MDFormatDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2008-05-22T14:40:04+02:00"/>
9
    </HEADER>
10
    <BODY>
11
        <CONFIGURATION>
12
            <NAME>DLIF</NAME>
13
            <DESCRIPTION>Data Literature Interlinking Metadata Format</DESCRIPTION>
14
            <INTERPRETATION>cleaned</INTERPRETATION>
15
            <SCHEMA uri=""/>
16
        </CONFIGURATION>
17
        <STATUS>
18
            <LAYOUTS>
19
                <LAYOUT name="index">
20
                    <FIELDS>
21
						<!-- D-NET header indexed fields -->
22
						<FIELD name="objidentifier" indexable="true"  result="false"
23
							stat="false" tokenizable="false" multivalued="false"
24
							xpath="//*[local-name()='objIdentifier']" />
25
						<FIELD name="repositoryid" indexable="true"  result="false"
26
							stat="false" tokenizable="false" multivalued="false"
27
							xpath="//*[local-name()='repositoryId']" />
28

  
29
						<!-- DLI object identifier -->
30
						<FIELD name="dnetresourceidentifier" indexable="true" 
31
							stored="true" result="false" stat="false" tokenizable="false"
32
							multivalued="false"
33
							xpath="//*[local-name()='dliObject']/*[local-name()='dnetResourceIdentifier']" />
34
						<!-- Free text indexed fields -->
35
						<FIELD name="repositoryname" indexable="true"  stored="true"
36
							result="false" stat="false" tokenizable="true" multivalued="false"
37
							xpath="//*[local-name()='dliObject']/*[local-name()='recordSourceInfo']/@providerName" />
38
						<FIELD name="repositoryacronym" indexable="true"  stored="false"
39
							result="false" stat="false" tokenizable="true" multivalued="false"
40
							xpath="//*[local-name()='dliObject']/*[local-name()='recordSourceInfo']/@providerAcronym" />
41
						<FIELD name="localid" indexable="true"  stored="true" result="false"
42
							stat="false" tokenizable="true" multivalued="false"
43
							xpath="//*[local-name()='dliObject']/*[local-name()='recordSourceInfo']" />
44

  
45
						<FIELD name="entitytype" indexable="true"  result="false"
46
							stat="false" tokenizable="false" multivalued="false"
47
							xpath="//*[local-name()='objectType']" />
48

  
49
						<FIELD name="title" indexable="true" stored="true" result="false"
50
							tokenizable="true" stat="false" xpath="//*[local-name()='title']" />
51

  
52
						<FIELD name="date" indexable="true"  result="true" stat="false"
53
							tokenizable="false" value="//*[local-name()='date']" />
54
						<FIELD name="relationType" indexable="true"  result="true"
55
							stat="false" tokenizable="false"
56
							xpath="//*[local-name()='relation']/*[local-name()='entitytype']" />
57
						<FIELD name="relationPID" indexable="true"  result="true"
58
							stat="false" tokenizable="false"
59
							xpath="//*[local-name()='relation']/*[local-name()='pid']" />
60
						
61

  
62
						<!-- Browsing Fields -->
63
						<FIELD result="false" name="entitytypeforbrowsing" stat="false"
64
							tokenizable="false" browsingAliasFor="entitytype" indexable="true"
65
							xpath="//*[local-name()='objectType']" />
66

  
67
					</FIELDS>
68
				</LAYOUT>				
69
			</LAYOUTS>
70
		</STATUS>
71
	</BODY>
72
</RESOURCE_PROFILE>
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/RepositoryServiceResources/datacite.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
			value="1ab23232-a569-447f-8e74-5d3238e2e3c8_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=" />
5
		<RESOURCE_TYPE value="RepositoryServiceResourceType" />
6
		<RESOURCE_KIND value="RepositoryServiceResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2015-01-26T22:48:00+00:00" />
9
		<PROTOCOL />
10
	</HEADER>
11
	<BODY>
12
		<CONFIGURATION>
13
			<DATASOURCE_TYPE>eagle:contentprovider</DATASOURCE_TYPE>
14
			<DATASOURCE_ORIGINAL_ID provenance="DLI">Datacite
15
			</DATASOURCE_ORIGINAL_ID>
16
			<DATASOURCE_AGGREGATED>false</DATASOURCE_AGGREGATED>
17
			<ENVIRONMENTS>
18
				<ENVIRONMENT>DLI</ENVIRONMENT>
19
			</ENVIRONMENTS>
20
			<TYPOLOGY />
21
			<MAX_SIZE_OF_DATASTRUCTURE>0</MAX_SIZE_OF_DATASTRUCTURE>
22
			<AVAILABLE_DISKSPACE>0</AVAILABLE_DISKSPACE>
23
			<MAX_NUMBER_OF_DATASTRUCTURE>0</MAX_NUMBER_OF_DATASTRUCTURE>
24
			<OFFICIAL_NAME>Datacite</OFFICIAL_NAME>
25
			<ENGLISH_NAME>Datacite</ENGLISH_NAME>
26
			<ICON_URI>http://datacite.org/sites/default/files/DataCite_header_final1_1.png
27
			</ICON_URI>
28
			<COUNTRY />
29
			<LOCATION>
30
				<LONGITUDE>0.0</LONGITUDE>
31
				<LATITUDE>0.0</LATITUDE>
32
				<TIMEZONE>0.0</TIMEZONE>
33
			</LOCATION>
34
			<REPOSITORY_WEBPAGE>http://datacite.org/</REPOSITORY_WEBPAGE>
35
			<REPOSITORY_INSTITUTION />
36
			<ADMIN_INFO>contact@datacite.org</ADMIN_INFO>
37
			<INTERFACES>
38
				<INTERFACE active="true" compliance="openaire2.0"
39
					contentDescription="metadata" id="api_________::datacite::0"
40
					label="aggregator::datarepository (openaire2.0)" removable="false"
41
					typology="aggregator::datarepository">
42
					<ACCESS_PROTOCOL>mongoDump</ACCESS_PROTOCOL>
43
					<BASE_URL>/home/sandro/mongo_backup/datacite3000.json</BASE_URL>
44
					<INTERFACE_EXTRA_FIELD name="last_collection_date" />
45
					<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='header']/*[local-name()='identifier']
46
					</INTERFACE_EXTRA_FIELD>
47
					<INTERFACE_EXTRA_FIELD name="last_aggregation_total" />
48
					<INTERFACE_EXTRA_FIELD name="last_collection_mdId" />
49
					<INTERFACE_EXTRA_FIELD name="last_collection_total" />
50
					<INTERFACE_EXTRA_FIELD name="last_aggregation_date" />
51
					<INTERFACE_EXTRA_FIELD name="last_aggregation_mdId" />
52
				</INTERFACE>
53
			</INTERFACES>
54
			<EXTRA_FIELDS>
55
				<FIELD>
56
					<key>DataSourceId</key>
57
					<value>datacite</value>
58
				</FIELD>
59
				<FIELD>
60
					<key>NamespacePrefix</key>
61
					<value>datacite</value>
62
				</FIELD>
63
				<FIELD>
64
					<key>VERIFIED</key>
65
					<value>NO</value>
66
				</FIELD>
67
			</EXTRA_FIELDS>
68
			<REGISTERED_BY>andrea.mannocci@isti.cnr.it</REGISTERED_BY>
69
		</CONFIGURATION>
70
		<STATUS>
71
			<NUMBER_OF_OBJECTS>0</NUMBER_OF_OBJECTS>
72
		</STATUS>
73
		<QOS>
74
			<AVAILABILITY>0</AVAILABILITY>
75
			<CAPACITY />
76
			<THROUGHPUT>0.0</THROUGHPUT>
77
		</QOS>
78
		<SECURITY_PARAMETERS />
79
		<BLACKBOARD />
80
	</BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaire_dli-profiles/trunk/src/main/resources/eu/dnetlib/test/schemas/DLIFMetadataFormat.xsd
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified"  
3
    xmlns:dli="http://www.dli.eu" targetNamespace="http://www.dli.eu"> 
4
    <xs:element name="dliObject" type="dli:dliEntity">
5
        <xs:annotation>
6
            <xs:documentation>
7
                The Data Literature Interlinking entity. It contains the following main entities
8
                (sub-entities): artifact/inscription, visual representation, documental manifestation, trismegistos card.
9
                Such entities share some common properties like resource identifier in D-Net aggregation system, 
10
                provenance and original identifier information, Intellectual property right statement.
11
            </xs:documentation>
12
        </xs:annotation>
13
    </xs:element>
14
    
15
    <xs:complexType name="dliEntity">
16
        <xs:sequence>
17
            <xs:element maxOccurs="1" minOccurs="1" name="dnetResourceIdentifier" type="xs:string">
18
                <xs:annotation>
19
                    <xs:documentation>
20
                        Unique resource identifier internal to D-Net infrastructure.
21
                        This property is obtained by concatenating the content CP Acronym and the
22
                        MD5 of local record ID to the identifier. (CP_ACRONYM::MD5(LOCAL_IDENTIFIER))
23
                    </xs:documentation>
24
                </xs:annotation>
25
            </xs:element>
26
            <xs:element maxOccurs="1" minOccurs="1" name="localIdentifier" type="dli:identifier">
27
                <xs:annotation>
28
                    <xs:documentation>
29
                       The local identifier of the record
30
                    </xs:documentation>
31
                </xs:annotation>
32
            </xs:element>
33
            <xs:element name="title" type="xs:string" minOccurs="0" maxOccurs="unbounded" />
34
            <xs:element name="date" type="xs:string" />
35
            <xs:element name= "objectType" maxOccurs="1" minOccurs="1" type="dli:objectType" />             
36
            
37
            <xs:element name="authors">
38
                <xs:complexType>
39
                    <xs:sequence>
40
                        <xs:element name="author" type="dli:authorType" maxOccurs="unbounded" />
41
                    </xs:sequence>
42
                </xs:complexType>                
43
            </xs:element>         
44
            
45
            <xs:element name="relations">
46
                <xs:complexType>
47
                    <xs:sequence>
48
                        <xs:element name="relation" type="dli:realtionType" maxOccurs="unbounded" />
49
                    </xs:sequence>
50
                </xs:complexType>                
51
            </xs:element>
52

  
53
        </xs:sequence>                
54
    </xs:complexType>
55
    
56
    <xs:simpleType name="objectType">
57
        <xs:restriction base="xs:string">
58
            <xs:enumeration value="publication"/>
59
            <xs:enumeration value="dataset"/>
60
        </xs:restriction>
61
    </xs:simpleType>
62
    
63
    
64
    
65
    
66
    <xs:complexType name="realtionType">  
67
        <xs:sequence>
68
            <xs:element name="entitytype" type="dli:objectType"></xs:element>
69
            <xs:element name="pid" type="dli:identifier" minOccurs="1" maxOccurs="1"/>
70
            <xs:element name="resolvedURL" type="xs:string" maxOccurs="1"/>
71
            <xs:element name="typeOfRelation"  maxOccurs="1">
72
                <xs:simpleType>
73
                    <xs:restriction base="xs:string">
74
                        <xs:enumeration value="relatedTo" />
75
                    </xs:restriction>
76
                </xs:simpleType>                
77
            </xs:element>          
78
        </xs:sequence>        
79
    </xs:complexType>
80
    
81
    
82
    
83
    <xs:complexType name="authorType">  
84
        <xs:sequence>
85
            <xs:element name="fullname" type="xs:string" minOccurs="1" maxOccurs="1"/>
86
            <xs:element name="localIdentifier" type="xs:string" maxOccurs="1"/>
87
            <xs:element name="typeOfIdentifier" type="xs:string" maxOccurs="1"/>          
88
        </xs:sequence>
89
        
90
    </xs:complexType>
91
    
92
    
93
    <xs:complexType name="identifier">
94
        <xs:simpleContent>            
95
                <xs:extension base="xs:string">
96
                    <xs:attribute name="type" type="xs:string" use="required" />                    
97
                </xs:extension>            
98
        </xs:simpleContent>   
99
    </xs:complexType>
100
    
101
    
102

  
103
</xs:schema>
modules/dnet-openaire_dli-workflows/trunk/src/test/java/eu/dnetlib/dli/openaire/transform/ConfigurationTestConfig.java
1
package eu.dnetlib.dli.openaire.transform;
2

  
3
import org.springframework.cache.ehcache.EhCacheFactoryBean;
4
import org.springframework.cache.ehcache.EhCacheManagerFactoryBean;
5
import org.springframework.context.annotation.Bean;
6
import org.springframework.context.annotation.Configuration;
7

  
8
import eu.dnetlib.msro.dli.workflows.nodes.transform.CrossrefDOIResolver;
9
import eu.dnetlib.msro.dli.workflows.nodes.transform.DataciteDOIResolver;
10

  
11
@Configuration
12
public class ConfigurationTestConfig {
13

  
14
	@Bean
15
	public EhCacheManagerFactoryBean cacheFactory() {
16
		EhCacheManagerFactoryBean cacheFactory = new EhCacheManagerFactoryBean();
17
		cacheFactory.setCacheManagerName("testCacheFactory");
18
		return cacheFactory;
19
	}
20

  
21
	/*
22
	 * p:eternal="false" p:timeToLive="0" p:timeToIdle="36000" p:maxElementsInMemory="10000" p:maxElementsOnDisk="20000"
23
	 * p:cacheManager-ref="dataciteDOICacheManager" p:cacheName="dataciteDOICache"
24
	 */
25
	@Bean
26
	public EhCacheFactoryBean dataciteDOICache() {
27
		EhCacheFactoryBean doiCacheFactoryBean = new EhCacheFactoryBean();
28
		doiCacheFactoryBean.setEternal(false);
29
		doiCacheFactoryBean.setTimeToLive(0);;
30
		doiCacheFactoryBean.setTimeToIdle(36000);
31
		doiCacheFactoryBean.setMaxEntriesLocalHeap(10000);
32
		doiCacheFactoryBean.setMaxEntriesLocalDisk(20000);
33
		doiCacheFactoryBean.setCacheManager(cacheFactory().getObject());
34
		doiCacheFactoryBean.setCacheName("dataciteDOICache");
35
		return doiCacheFactoryBean;
36
	}
37

  
38
	@Bean
39
	public EhCacheFactoryBean crossrefDOICache() {
40
		EhCacheFactoryBean doiCacheFactoryBean = new EhCacheFactoryBean();
41
		doiCacheFactoryBean.setEternal(false);
42
		doiCacheFactoryBean.setTimeToLive(0);;
43
		doiCacheFactoryBean.setTimeToIdle(36000);
44
		doiCacheFactoryBean.setMaxEntriesLocalHeap(10000);
45
		doiCacheFactoryBean.setMaxEntriesLocalDisk(20000);
46
		doiCacheFactoryBean.setCacheManager(cacheFactory().getObject());
47
		doiCacheFactoryBean.setCacheName("crossrefDOICache");
48
		return doiCacheFactoryBean;
49
	}
50

  
51
	@Bean
52
	public DataciteDOIResolver dataciteResolver() {
53
		DataciteDOIResolver resolver = new DataciteDOIResolver();
54
		return resolver;
55
	}
56

  
57
	@Bean
58
	public CrossrefDOIResolver crossrefResolver() {
59
		return new CrossrefDOIResolver();
60
	}
61

  
62
}
modules/dnet-openaire_dli-workflows/trunk/src/test/java/eu/dnetlib/dli/openaire/transform/TestDOIResolver.java
1
package eu.dnetlib.dli.openaire.transform;
2

  
3
import org.junit.Assert;
4
import org.junit.Test;
5
import org.junit.runner.RunWith;
6
import org.springframework.beans.factory.annotation.Autowired;
7
import org.springframework.test.context.ContextConfiguration;
8
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
9

  
10
import eu.dnetlib.msro.dli.workflows.nodes.transform.CrossrefDOIResolver;
11
import eu.dnetlib.msro.dli.workflows.nodes.transform.DOIResponse;
12
import eu.dnetlib.msro.dli.workflows.nodes.transform.DataciteDOIResolver;
13

  
14
@RunWith(SpringJUnit4ClassRunner.class)
15
@ContextConfiguration(classes = ConfigurationTestConfig.class)
16
public class TestDOIResolver {
17

  
18
	@Autowired
19
	DataciteDOIResolver dataciteDOIResolver;
20

  
21
	@Autowired
22
	CrossrefDOIResolver crossRefDOIResolver;
23

  
24
	@Test
25
	public void testDataciteNotNull() {
26
		Assert.assertNotNull(dataciteDOIResolver);
27
	}
28

  
29
	@Test
30
	public void testSearchDatacite() {
31
		final String[] dois = { "10.6085/AA/TPT006_061MXTI061R00_20020806.50.3", "10.6085/AA/TPT006_061MXTI061R00_20020806.50.1",
32
				"10.6085/AA/TPT006_061MXTI061R00_20020806.50.3" };
33
		int k = 0;
34
		double average_sum = 0.0f;
35
		System.out.println(dataciteDOIResolver.retrieveDOI(dois[0]).getRecord());
36
		while (k < 100) {
37
			for (int i = 0; i < dois.length; i++) {
38
				long start = System.currentTimeMillis();
39
				Assert.assertNotNull(dataciteDOIResolver.retrieveDOI(dois[i]));
40
				long end = System.currentTimeMillis();
41
				average_sum += end - start;
42
			}
43
			k++;
44
		}
45

  
46
		System.out.println("Average time on request 3 item 100 times " + average_sum / 100 + "ms");
47
	}
48

  
49
	@Test
50
	public void testSearchCrossref() {
51
		final String[] dois = { "10.1002/etc.2522", "10.1007/s00300-011-1150-7", "10.1016/j.ancene.2014.01.001" };
52
		int k = 0;
53
		double average_sum = 0.0f;
54
		System.out.println("\n\n\n\n\n" + crossRefDOIResolver.retrieveDOI(dois[0]).getRecord());
55
		while (k < 100) {
56
			for (int i = 0; i < dois.length; i++) {
57
				long start = System.currentTimeMillis();
58
				DOIResponse record = crossRefDOIResolver.retrieveDOI(dois[i]);
59
				Assert.assertNotNull(record);
60
				long end = System.currentTimeMillis();
61
				average_sum += end - start;
62
			}
63
			k++;
64
		}
65
		System.out.println("Average time on request 3 item 100 times " + average_sum / 100 + "ms");
66
	}
67
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/AbstractDOIResolver.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
import java.io.ByteArrayInputStream;
4
import java.io.ByteArrayOutputStream;
5
import java.io.IOException;
6
import java.io.InputStreamReader;
7
import java.io.UnsupportedEncodingException;
8
import java.util.zip.GZIPInputStream;
9
import java.util.zip.GZIPOutputStream;
10

  
11
import org.apache.commons.io.IOUtils;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14

  
15
// TODO: Auto-generated Javadoc
16
/**
17
 * The Class AbstractDOIResolver.
18
 */
19
public abstract class AbstractDOIResolver {
20

  
21
	/** The Constant NOT_FOUND_ELEMENT. */
22
	public static final String NOT_FOUND_ELEMENT = "NOT_FOUND";
23

  
24
	/** The Constant log. */
25
	private static final Log log = LogFactory.getLog(AbstractDOIResolver.class); // NOPMD by marko on 11/24/08 5:02 PM
26

  
27
	/**
28
	 * Retrieve doi.
29
	 *
30
	 * @param doi
31
	 *            the doi
32
	 * @return the string
33
	 */
34
	public abstract DOIResponse retrieveDOI(final String doi);
35

  
36
	/**
37
	 * Compress record.
38
	 *
39
	 * @param str
40
	 *            the str
41
	 * @return the byte[]
42
	 */
43
	protected byte[] compressRecord(final String str) {
44
		if (str == null || str.length() == 0) { return null; }
45
		ByteArrayOutputStream obj = new ByteArrayOutputStream();
46
		try {
47
			GZIPOutputStream gzip = new GZIPOutputStream(obj);
48
			gzip.write(str.getBytes("UTF-8"));
49
			gzip.close();
50
			return obj.toByteArray();
51

  
52
		} catch (UnsupportedEncodingException e) {
53
			log.error("Error on compress record before tu put in the cache", e);
54
			return null;
55
		} catch (IOException e) {
56
			log.error("Error on compress record before tu put in the cache", e);
57
			return null;
58
		}
59
	}
60

  
61
	/**
62
	 * Decompress.
63
	 *
64
	 * @param input
65
	 *            the input
66
	 * @return the string
67
	 */
68
	protected String decompress(final byte[] input) {
69

  
70
		GZIPInputStream gis;
71
		try {
72
			gis = new GZIPInputStream(new ByteArrayInputStream(input));
73
			return IOUtils.toString(new InputStreamReader(gis, "UTF-8"));
74
		} catch (IOException e) {
75
			log.error("Error on decompress record when getting it on cache", e);
76
			return null;
77
		}
78

  
79
	}
80

  
81
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DataCiteSearchResponse.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import org.apache.commons.codec.binary.Base64;
7

  
8
import com.google.gson.JsonArray;
9
import com.google.gson.JsonElement;
10
import com.google.gson.JsonObject;
11
import com.google.gson.JsonParser;
12

  
13
/**
14
 * The Class DataCiteSearchResponse.
15
 */
16
public class DataCiteSearchResponse {
17

  
18
	/** The total. */
19
	private long total;
20

  
21
	/** The xml records. */
22
	private List<String> xmlRecords;
23

  
24
	/**
25
	 * Creates the new response.
26
	 *
27
	 * @param response
28
	 *            the response
29
	 * @return the data cite search response
30
	 */
31
	public static DataCiteSearchResponse createNewResponse(final String response) {
32
		DataCiteSearchResponse item = new DataCiteSearchResponse();
33

  
34
		if (response == null) {
35

  
36
			return null;
37

  
38
		}
39
		JsonElement jElement = new JsonParser().parse(response);
40
		JsonObject jobject = jElement.getAsJsonObject();
41
		if (jobject.has("response")) {
42

  
43
			item.setTotal(jobject.get("response").getAsJsonObject().get("numFound").getAsLong());
44

  
45
			JsonElement hits = ((JsonObject) jobject.get("response")).get("docs");
46

  
47
			JsonArray hitsObject = hits.getAsJsonArray();
48

  
49
			List<String> records = new ArrayList<String>();
50

  
51
			for (JsonElement elem : hitsObject) {
52
				String xml = ((JsonObject) elem).get("xml").getAsString();
53
				byte[] data = Base64.decodeBase64(xml.getBytes());
54
				String s = new String(data);
55
				records.add(s);
56

  
57
			}
58
			item.setXmlRecords(records);
59
			return item;
60
		}
61
		return null;
62
	}
63

  
64
	/**
65
	 * Gets the xml records.
66
	 *
67
	 * @return the xmlRecords
68
	 */
69
	public List<String> getXmlRecords() {
70
		return xmlRecords;
71
	}
72

  
73
	/**
74
	 * Sets the xml records.
75
	 *
76
	 * @param xmlRecords
77
	 *            the xmlRecords to set
78
	 */
79
	public void setXmlRecords(final List<String> xmlRecords) {
80
		this.xmlRecords = xmlRecords;
81
	}
82

  
83
	/**
84
	 * Gets the total.
85
	 *
86
	 * @return the total
87
	 */
88
	public long getTotal() {
89
		return total;
90
	}
91

  
92
	/**
93
	 * Sets the total.
94
	 *
95
	 * @param total
96
	 *            the total to set
97
	 */
98
	public void setTotal(final long total) {
99
		this.total = total;
100
	}
101

  
102
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/CrossrefDOIResolver.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
import java.io.IOException;
4
import java.net.MalformedURLException;
5
import java.net.URL;
6

  
7
import javax.annotation.Resource;
8

  
9
import net.sf.ehcache.Cache;
10
import net.sf.ehcache.Element;
11

  
12
import org.apache.commons.io.IOUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15

  
16
public class CrossrefDOIResolver extends AbstractDOIResolver {
17

  
18
	private static final Log log = LogFactory.getLog(CrossrefDOIResolver.class); // NOPMD by marko on 11/24/08 5:02 PM
19

  
20
	private final static String baseUrlCrossref = "http://www.crossref.org/openurl?noredirect=true&pid=antleb%40di.uoa.gr&format=unixref&id=";
21

  
22
	/** The cache. */
23
	@Resource(name = "crossrefDOICache")
24
	private Cache cache;
25

  
26
	/** The Constant NOT_FOUND_ELEMENT. */
27

  
28
	@Override
29
	public DOIResponse retrieveDOI(final String doi) {
30
		final Element elem = cache.get(doi);
31
		String record = null;
32
		if (elem != null) {
33
			final byte[] compressed_data = (byte[]) elem.getObjectValue();
34
			record = decompress(compressed_data);
35
		} else {
36
			log.info("Refreshing " + doi + " cache...");
37
			record = requestDOI(doi);
38
			if (record != null) {
39
				final byte[] compressed_data = compressRecord(record);
40
				if (compressed_data != null) {
41
					cache.put(new Element(doi, compressed_data));
42
				}
43
			}
44

  
45
		}
46
		DOIResponse doiResponse = new DOIResponse(DOIType.publication, doi, record);
47
		if (record == null || record.equals(NOT_FOUND_ELEMENT)) {
48
			doiResponse.setType(DOIType.none);
49
		}
50
		return doiResponse;
51
	}
52

  
53
	private String requestDOI(final String doi) {
54
		try {
55
			URL crossRefurl = new URL(baseUrlCrossref + doi);
56
			final String response = IOUtils.toString(crossRefurl.openStream());
57
			if (response == null) { return response; }
58
			if (response.contains("<error>")) { return NOT_FOUND_ELEMENT; }
59
			return response;
60
		} catch (MalformedURLException e) {
61
			log.error("Error on request DOI to crossRef, request:" + baseUrlCrossref + doi, e);
62
			return null;
63
		} catch (IOException e) {
64
			log.error("Error on request DOI to crossRef, request:" + baseUrlCrossref + doi, e);
65
			return null;
66
		}
67
	}
68
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DOIWorker.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
import java.util.List;
4
import java.util.Map;
5
import java.util.concurrent.BlockingQueue;
6

  
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9

  
10
import com.google.common.collect.Lists;
11
import com.google.common.collect.Maps;
12

  
13
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
14

  
15
// TODO: Auto-generated Javadoc
16
/**
17
 * The Class DOIWorker.
18
 */
19
public class DOIWorker implements Runnable {
20

  
21
	private static final String MAIN_RECORD_KEY = "record";
22

  
23
	private static final String RELATED_DOI_KEY = "relatedDOI";
24

  
25
	/** The Constant log. */
26
	private static final Log log = LogFactory.getLog(DOIWorker.class); // NOPMD by marko on 11/24/08 5:02 PM
27

  
28
	/** The queue. */
29
	private final BlockingQueue<String> queue;
30

  
31
	/** The output queue. */
32
	private final BlockingQueue<String> outputQueue;
33

  
34
	/** The terminator queue. */
35
	private final String terminatorQueue;
36

  
37
	/** The doi resolvers. */
38
	private List<AbstractDOIResolver> doiResolvers;
39

  
40
	/** The xslt. */
41
	private ApplyXslt xslt;
42

  
43
	/**
44
	 * Instantiates a new DOI worker.
45
	 *
46
	 * @param queue
47
	 *            the queue
48
	 * @param outputQueue
49
	 *            the output queue
50
	 * @param terminatorQueue
51
	 *            the terminator queue
52
	 * @param doiResolvers
53
	 *            the doi resolvers
54
	 * @param xslt
55
	 *            the xslt
56
	 */
57
	public DOIWorker(final BlockingQueue<String> queue, final BlockingQueue<String> outputQueue, final String terminatorQueue,
58
			final List<AbstractDOIResolver> doiResolvers, final ApplyXslt xslt) {
59
		this.queue = queue;
60
		this.terminatorQueue = terminatorQueue;
61
		this.doiResolvers = doiResolvers;
62
		this.xslt = xslt;
63
		this.outputQueue = outputQueue;
64

  
65
	}
66

  
67
	/*
68
	 * (non-Javadoc)
69
	 *
70
	 * @see java.lang.Runnable#run()
71
	 */
72
	@Override
73
	public void run() {
74
		try {
75
			String nextRecord = this.queue.take();
76
			if (nextRecord == this.terminatorQueue) {
77
				log.debug("Found terminator record");
78
				this.queue.put(terminatorQueue);
79
				return;
80
			}
81
			while (nextRecord != null && nextRecord != terminatorQueue) {
82
				Map<String, Object> extractedFields = extractRelatedIdentifiers(nextRecord);
83
				List<String> relationsDOI = (List<String>) extractedFields.get(RELATED_DOI_KEY);
84
				if (relationsDOI != null && relationsDOI.size() > 1) {
85
					DOIResponse response = null;
86
					List<DOIResponse> matchedResponse = Lists.newArrayList();
87
					for (String doi : relationsDOI) {
88
						DOIResponse responseDOI = getDOIResponse(doi);
89
						if (responseDOI != null && responseDOI.getType() != DOIType.none) {
90
							matchedResponse.add(responseDOI);
91
						}
92
					}
93
					if (matchedResponse.size() > 0) {
94
						DOIResponse mainRecord = (DOIResponse) extractedFields.get(MAIN_RECORD_KEY);
95
						String outputRecord = putResponsesOnRecord(nextRecord, matchedResponse);
96
						outputQueue.put(xslt.evaluate(outputRecord));
97
						for (DOIResponse res : matchedResponse) {
98
							outputRecord = putResponsesOnRecord(res.getRecord(), Lists.newArrayList(mainRecord));
99
							outputQueue.put(xslt.evaluate(outputRecord));
100
						}
101
					}
102
				}
103
				nextRecord = this.queue.take();
104
				if (nextRecord == this.terminatorQueue) {
105
					log.debug("Found terminator record");
106
					this.queue.put(terminatorQueue);
107
				}
108
			}
109
		} catch (InterruptedException e) {
110
			log.error("Error on taking an element on queue", e);
111
		}
112

  
113
	}
114

  
115
	/**
116
	 * Put responses on record.
117
	 *
118
	 * @param nextRecord
119
	 *            the next record
120
	 * @param matchedResponse
121
	 *            the matched response
122
	 * @return the string
123
	 */
124
	private String putResponsesOnRecord(final String nextRecord, final List<DOIResponse> matchedResponse) {
125
		// TODO Auto-generated method stub
126
		return null;
127
	}
128

  
129
	/**
130
	 * Search for each DOI
131
	 *
132
	 * @param doi
133
	 *            the doi
134
	 * @return the DOI response
135
	 */
136
	private DOIResponse getDOIResponse(final String doi) {
137
		DOIResponse currentResponse = null;
138
		for (AbstractDOIResolver resolver : this.doiResolvers) {
139
			currentResponse = resolver.retrieveDOI(doi);
140
			if (currentResponse != null && currentResponse.getType() != DOIType.none) { return currentResponse; }
141
		}
142
		return currentResponse;
143
	}
144

  
145
	/**
146
	 * Extract related identifiers.
147
	 *
148
	 * @param nextRecord
149
	 *            the next record
150
	 * @return the list
151
	 */
152
	private Map<String, Object> extractRelatedIdentifiers(final String nextRecord) {
153
		Map<String, Object> result = Maps.newHashMap();
154
		DOIResponse mainRecord = new DOIResponse();
155
		List<String> relatedRecord = Lists.newArrayList();
156
		result.put(MAIN_RECORD_KEY, mainRecord);
157
		result.put(RELATED_DOI_KEY, relatedRecord);
158

  
159
		return result;
160
	}
161

  
162
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/StreamingRecordParser.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
import java.io.ByteArrayInputStream;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Stack;
8

  
9
import javax.xml.stream.XMLInputFactory;
10
import javax.xml.stream.XMLStreamConstants;
11
import javax.xml.stream.XMLStreamException;
12
import javax.xml.stream.XMLStreamReader;
13

  
14
import org.apache.commons.logging.Log;
15
import org.apache.commons.logging.LogFactory;
16

  
17
import com.google.common.collect.Lists;
18

  
19
/**
20
 * This method outperforms SimpleRecordParser by a vast amount, especially since we are just getting stuff in the header.
21
 *
22
 * @author marko
23
 *
24
 */
25
public class StreamingRecordParser {
26

  
27
	private static final Log log = LogFactory.getLog(StreamingRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM
28

  
29
	public Map<String, Object> parseRecord(final String record) {
30

  
31
		try {
32
			XMLInputFactory factory = XMLInputFactory.newInstance();
33
			XMLStreamReader parser = factory.createXMLStreamReader(new ByteArrayInputStream(record.getBytes()));
34

  
35
			HashMap<String, Object> res = new HashMap<String, Object>();
36

  
37
			Stack<String> elementStack = new Stack<String>();
38
			elementStack.push("/");
39
			List<String> relatedId = Lists.newArrayList();
40

  
41
			while (parser.hasNext()) {
42
				int event = parser.next();
43
				if (event == XMLStreamConstants.END_ELEMENT) {
44
					elementStack.pop();
45
				} else if (event == XMLStreamConstants.START_ELEMENT) {
46
					final String localName = parser.getLocalName();
47
					elementStack.push(localName);
48

  
49
					if ("objIdentifier".equals(localName)) {
50
						parser.next();
51
						// log.debug("found"+parser.getText().trim());
52
						res.put("id", parser.getText().trim());
53

  
54
					} else if ("relatedIdentifier".equals(localName)) {
55

  
56
						for (int i = 0; i < parser.getAttributeCount(); i++) {
57
							String attrituteName = parser.getAttributeLocalName(i);
58
							String attritute = parser.getAttributeValue(i);
59
							if ("relatedidentifiertype".equals(attrituteName.toLowerCase())) {
60
								if ("doi".equals(attritute.toLowerCase())) {
61
									parser.next();
62
									try {
63
										relatedId.add(parser.getText().trim());
64
									} catch (Exception e) {
65

  
66
									}
67
									break;
68
								}
69
							}
70
						}
71

  
72
					}
73
				}
74
			}
75
			res.put("relatedId", relatedId);
76
			return res;
77
		} catch (XMLStreamException e) {
78
			throw new IllegalStateException(e);
79
		}
80

  
81
	}
82

  
83
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DOIResponse.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
// TODO: Auto-generated Javadoc
4
enum DOIType {
5
	publication, dataset, none
6
}
7

  
8
/**
9
 * The Class DOIResponse.
10
 */
11
public class DOIResponse {
12

  
13
	/** The type. */
14
	private DOIType type;
15

  
16
	/** The doi. */
17
	private String doi;
18

  
19
	/** The record. */
20
	private String record;
21

  
22
	/**
23
	 * Instantiates a new DOI response.
24
	 */
25
	public DOIResponse() {
26

  
27
	}
28

  
29
	/**
30
	 * Instantiates a new DOI response.
31
	 *
32
	 * @param type
33
	 *            the type
34
	 * @param doi
35
	 *            the doi
36
	 * @param record
37
	 *            the record
38
	 */
39
	public DOIResponse(final DOIType type, final String doi, final String record) {
40
		this.doi = doi;
41
		this.type = type;
42
		this.record = record;
43
	}
44

  
45
	/**
46
	 * Gets the type.
47
	 *
48
	 * @return the type
49
	 */
50
	public DOIType getType() {
51
		return type;
52
	}
53

  
54
	/**
55
	 * Sets the type.
56
	 *
57
	 * @param type
58
	 *            the type to set
59
	 */
60
	public void setType(final DOIType type) {
61
		this.type = type;
62
	}
63

  
64
	/**
65
	 * Gets the doi.
66
	 *
67
	 * @return the doi
68
	 */
69
	public String getDoi() {
70
		return doi;
71
	}
72

  
73
	/**
74
	 * Sets the doi.
75
	 *
76
	 * @param doi
77
	 *            the doi to set
78
	 */
79
	public void setDoi(final String doi) {
80
		this.doi = doi;
81
	}
82

  
83
	/**
84
	 * Gets the record.
85
	 *
86
	 * @return the record
87
	 */
88
	public String getRecord() {
89
		return record;
90
	}
91

  
92
	/**
93
	 * Sets the record.
94
	 *
95
	 * @param record
96
	 *            the record to set
97
	 */
98
	public void setRecord(final String record) {
99
		this.record = record;
100
	}
101

  
102
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DataciteDOIResolver.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
import java.io.IOException;
4
import java.net.MalformedURLException;
5
import java.net.URL;
6

  
7
import javax.annotation.Resource;
8

  
9
import net.sf.ehcache.Cache;
10
import net.sf.ehcache.Element;
11

  
12
import org.apache.commons.io.IOUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15

  
16
/**
17
 * The Class DataciteDOIResolver.
18
 */
19
public class DataciteDOIResolver extends AbstractDOIResolver {
20

  
21
	/** The cache. */
22
	@Resource(name = "dataciteDOICache")
23
	private Cache cache;
24

  
25
	/** The Constant log. */
26
	private static final Log log = LogFactory.getLog(DataciteDOIResolver.class); // NOPMD by marko on 11/24/08 5:02 PM
27

  
28
	/** The Constant baseURL. */
29
	private final static String baseURL = "http://search.datacite.org/api?wt=json&fl=doi,xml&q=doi:";
30

  
31
	@Override
32
	public DOIResponse retrieveDOI(final String doi) {
33
		final Element elem = cache.get(doi);
34
		String record = null;
35
		if (elem != null) {
36
			final byte[] compressed_data = (byte[]) elem.getObjectValue();
37
			record = decompress(compressed_data);
38
		} else {
39
			log.info("Refreshing " + doi + " cache...");
40
			record = requestDOI(doi);
41
			if (record != null) {
42
				final byte[] compressed_data = compressRecord(record);
43
				if (compressed_data != null) {
44
					cache.put(new Element(doi, compressed_data));
45
				}
46
			}
47
		}
48
		DOIResponse doiResponse = new DOIResponse(DOIType.dataset, doi, record);
49
		if (record == null || record.equals(NOT_FOUND_ELEMENT)) {
50
			doiResponse.setType(DOIType.none);
51
		}
52
		return doiResponse;
53
	}
54

  
55
	/**
56
	 * Request doi.
57
	 *
58
	 * @param doi
59
	 *            the doi
60
	 * @return the string
61
	 */
62
	private String requestDOI(final String doi) {
63
		final String currentURL = baseURL + doi;
64

  
65
		try {
66
			final URL myURl = new URL(currentURL);
67
			final String response = IOUtils.toString(myURl.openStream());
68
			final DataCiteSearchResponse responseDCT = DataCiteSearchResponse.createNewResponse(response);
69
			if (responseDCT != null && responseDCT.getTotal() > 0) { return responseDCT.getXmlRecords().get(0); }
70
			return NOT_FOUND_ELEMENT;
71
		} catch (MalformedURLException e) {
72
			log.error("Error on request DOI, request :" + currentURL, e);
73
			return null;
74
		} catch (IOException e) {
75
			log.error("Error on request DOI, request :" + currentURL, e);
76
			return null;
77
		}
78

  
79
	}
80

  
81
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DOIResolverJobNode.java
1
package eu.dnetlib.msro.dli.workflows.nodes.transform;
2

  
3
import javax.annotation.Resource;
4
import javax.xml.ws.wsaddressing.W3CEndpointReference;
5

  
6
import org.springframework.beans.factory.annotation.Autowired;
7

  
8
import com.googlecode.sarasvati.Arc;
9
import com.googlecode.sarasvati.NodeToken;
10

  
11
import eu.dnetlib.enabling.resultset.IterableResultSetFactory;
12
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
13
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils;
14
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
15

  
16
public class DOIResolverJobNode extends SimpleJobNode {
17

  
18
	private String inputEprParam;
19

  
20
	private String outputEprParam;
21

  
22
	/** The result set client factory. */
23
	@Autowired
24
	private ResultSetClientFactory resultSetClientFactory;
25

  
26
	/** The result set factory. */
27
	@Resource(name = "iterableResultSetFactory")
28
	private IterableResultSetFactory resultSetFactory;
29

  
30
	@Override
31
	protected String execute(final NodeToken token) throws Exception {
32
		final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParam));
33
		Iterable<String> input = resultSetClientFactory.getClient(inputEpr);
34

  
35
		return Arc.DEFAULT_ARC;
36
	}
37

  
38
	/**
39
	 * @return the inputEprParam
40
	 */
41
	public String getInputEprParam() {
42
		return inputEprParam;
43
	}
44

  
45
	/**
46
	 * @param inputEprParam
47
	 *            the inputEprParam to set
48
	 */
49
	public void setInputEprParam(final String inputEprParam) {
50
		this.inputEprParam = inputEprParam;
51
	}
52

  
53
	/**
54
	 * @return the outputEprParam
55
	 */
56
	public String getOutputEprParam() {
57
		return outputEprParam;
58
	}
59

  
60
	/**
61
	 * @param outputEprParam
62
	 *            the outputEprParam to set
63
	 */
64
	public void setOutputEprParam(final String outputEprParam) {
65
		this.outputEprParam = outputEprParam;
66
	}
67

  
68
}
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/ObtainDataSourceParamsJobNode.java
1
package eu.dnetlib.msro.dli.workflows.nodes;
2

  
3
import javax.annotation.Resource;
4

  
5
import com.googlecode.sarasvati.Arc;
6
import com.googlecode.sarasvati.NodeToken;
7

  
8
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
9
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
10
import eu.dnetlib.miscutils.datetime.DateUtils;
11
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
12

  
13
public class ObtainDataSourceParamsJobNode extends SimpleJobNode {
14

  
15
	private String providerId;
16

  
17
	@Resource
18
	private UniqueServiceLocator serviceLocator;
19

  
20
	@Override
21
	protected String execute(final NodeToken token) throws Exception {
22
		final String query = "let $x := /*[.//RESOURCE_IDENTIFIER/@value='" + providerId + "']//EXTRA_FIELDS\n"
23
				+ "return concat($x/FIELD[./key='DataSourceId']/value, ' @@@ ', $x/FIELD[./key='NamespacePrefix']/value)";
24

  
25
		final String[] arr = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(query).split("@@@");
26

  
27
		token.getEnv().setAttribute("parentDatasourceId", arr[0].trim());
28
		token.getEnv().setAttribute("namespacePrefix", arr[1].trim());
29
		token.getEnv().setAttribute("dateOfCollection", DateUtils.now_ISO8601());
30

  
31
		return Arc.DEFAULT_ARC;
32
	}
33

  
34
	public String getProviderId() {
35
		return providerId;
36
	}
37

  
38
	public void setProviderId(final String providerId) {
39
		this.providerId = providerId;
40
	}
41

  
42
}
modules/dnet-openaire_dli-workflows/trunk/src/main/resources/eu/dnetlib/test/profiles/dli_openaire/repo-hi/Aggregate_Metadata_from_AggregatorDataRepository_Inference.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
			value="3c924d16-83c1-44b0-81cd-681df42bcec4_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
6
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
7
		<RESOURCE_KIND value="WorkflowDSResources" />
8
		<RESOURCE_URI value="value3" />
9
		<DATE_OF_CREATION value="2015-02-03T18:13:51.0Z" />
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>Aggregate Metadata (datasets) from Aggregator::DataRepository</WORKFLOW_NAME>
13
		<WORKFLOW_INFO>
14
			<FIELD name="Action">Aggregate Metadata</FIELD>			
15
			<FIELD name="Datasource class">aggregator::datarepository</FIELD>
16
			<FIELD name="Content">datasets</FIELD>
17
		</WORKFLOW_INFO>
18
		<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE>
19
		<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY>
20
		<CONFIGURATION start="manual">
21
			<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true">
22
				<DESCRIPTION>Verify if DS is pending</DESCRIPTION>
23
				<PARAMETERS>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff