Revision 48701
Added by Sandro La Bruzzo over 6 years ago
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/collect/FindDateRangeForIncrementalHarvestingJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.workflows.nodes.collect; |
2 | 2 |
|
3 | 3 |
import java.text.SimpleDateFormat; |
4 |
import java.util.Date; |
|
5 |
import java.util.HashMap; |
|
6 |
import java.util.Map; |
|
4 |
import java.time.Instant; |
|
5 |
import java.time.LocalDateTime; |
|
6 |
import java.time.ZoneId; |
|
7 |
import java.time.ZonedDateTime; |
|
8 |
import java.time.format.DateTimeFormatter; |
|
9 |
import java.util.*; |
|
7 | 10 |
|
8 | 11 |
import javax.annotation.Resource; |
9 | 12 |
|
13 |
import eu.dnetlib.enabling.tools.DnetStreamSupport; |
|
14 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
|
10 | 15 |
import org.apache.commons.logging.Log; |
11 | 16 |
import org.apache.commons.logging.LogFactory; |
12 | 17 |
|
... | ... | |
49 | 54 |
|
50 | 55 |
private String calculateFromDate() { |
51 | 56 |
final long d = findLastSuccessStartDate(); |
52 |
return (d > 0) ? (new SimpleDateFormat("yyyy-MM-dd")).format(new Date(d - ONE_DAY)) : null; |
|
53 |
} |
|
54 | 57 |
|
58 |
|
|
59 |
LocalDateTime zdt = LocalDateTime.ofInstant(Instant.ofEpochMilli(d), ZoneId.of("Etc/UTC")); |
|
60 |
|
|
61 |
return DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").format(zdt); |
|
62 |
|
|
63 |
// return (d > 0) ? (new SimpleDateFormat("yyyy-MM-dd")).format(new Date(d - ONE_DAY)) : null; |
|
64 |
} |
|
65 |
|
|
55 | 66 |
private long findLastSuccessStartDate() { |
56 | 67 |
final long res = -1; |
57 | 68 |
|
58 | 69 |
// TODO find the right condition to obtain the list of the previous executions |
59 | 70 |
|
60 |
final Map<String, Object> query = new HashMap<String, Object>(); |
|
61 |
query.put("", ""); |
|
71 |
//dnetLogger. |
|
62 | 72 |
|
63 |
/* |
|
64 |
* final Iterator<Map<String, String>> iter = this.dnetLogger.find(query); while (iter.hasNext()) { final Map<String, String> map = |
|
65 |
* iter.next(); if ("true".equalsIgnoreCase(map.get(WorkflowsConstants.SYSTEM_COMPLETED_SUCCESSFULLY))) { final long curr = |
|
66 |
* NumberUtils.toLong(map.get(WorkflowsConstants.SYSTEM_START_DATE), -1); if (curr > res) { res = curr; } } } |
|
67 |
*/ |
|
68 |
return res; |
|
69 |
} |
|
73 |
final Map<String, Object> query = new HashMap<>(); |
|
70 | 74 |
|
75 |
query.put("system:profileTemplateId", process.getProfileId()); |
|
76 |
query.put("system:parentProfileId", process.getParentProfileId()); |
|
77 |
query.put("system:processStatus", "SUCCESS"); |
|
78 |
|
|
79 |
Iterator<Map<String, String>> mapIterator = dnetLogger.find(query); |
|
80 |
Optional<Long> maxDate = DnetStreamSupport.generateStreamFromIterator(mapIterator).map(it -> it.get("system:startDate")).map(Long::parseLong).max(Long::compare); |
|
81 |
|
|
82 |
if (maxDate.isPresent()) |
|
83 |
return maxDate.get(); |
|
84 |
else |
|
85 |
return -1; |
|
86 |
} |
|
87 |
|
|
71 | 88 |
public String getFromDateParam() { |
72 | 89 |
return this.fromDateParam; |
73 | 90 |
} |
modules/dnet-data-services/trunk/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIterator.java | ||
---|---|---|
93 | 93 |
if ((set != null) && !set.isEmpty()) { |
94 | 94 |
url += "&set=" + URLEncoder.encode(set, "UTF-8"); |
95 | 95 |
} |
96 |
if ((fromDate != null) && fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
|
97 |
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
|
96 |
if ((fromDate != null) && fromDate.matches("\\d{4}-\\d{2}-\\d{2}(T\\d{2}:\\d{2}:\\d{2}Z)?+")) {
|
|
97 |
url += "&from=" + URLEncoder.encode(fromDate, "UTF-8");
|
|
98 | 98 |
} |
99 |
if ((untilDate != null) && untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) {
|
|
100 |
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
|
99 |
if ((untilDate != null) && untilDate.matches("\\d{4}-\\d{2}-\\d{2}(T\\d{2}:\\d{2}:\\d{2}Z)?+")) {
|
|
100 |
url += "&until=" + URLEncoder.encode(untilDate, "UTF-8");
|
|
101 | 101 |
} |
102 | 102 |
log.info("Start harvesting using url: " + url); |
103 | 103 |
|
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/index/RefreshSchemaJobNode.java | ||
---|---|---|
21 | 21 |
|
22 | 22 |
private String interpretation; |
23 | 23 |
|
24 |
private String backendId; |
|
25 |
|
|
24 | 26 |
@Override |
25 | 27 |
protected String obtainServiceId(final Env env) { |
26 | 28 |
return getServiceLocator().getServiceId(IndexService.class); |
... | ... | |
39 | 41 |
|
40 | 42 |
job.setAction("REFRESH_SCHEMA"); |
41 | 43 |
job.getParameters().put("profileId", results.get(0)); |
44 |
job.getParameters().put("backendId", backendId); |
|
42 | 45 |
|
46 |
|
|
43 | 47 |
} |
44 | 48 |
|
45 | 49 |
public String getFormat() { |
... | ... | |
66 | 70 |
public void setInterpretation(final String interpretation) { |
67 | 71 |
this.interpretation = interpretation; |
68 | 72 |
} |
73 |
|
|
74 |
public String getBackendId() { |
|
75 |
return backendId; |
|
76 |
} |
|
77 |
|
|
78 |
public void setBackendId(String backendId) { |
|
79 |
this.backendId = backendId; |
|
80 |
} |
|
69 | 81 |
} |
modules/dnet-data-services/trunk/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiCollectorPlugin.java | ||
---|---|---|
38 | 38 |
|
39 | 39 |
if (mdFormat == null || mdFormat.isEmpty()) { throw new CollectorServiceException("Param 'mdFormat' is null or empty"); } |
40 | 40 |
|
41 |
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); } |
|
41 |
if (fromDate != null && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}(T\\d{2}:\\d{2}:\\d{2}Z)?+")) { |
|
42 |
throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + fromDate); |
|
43 |
} |
|
42 | 44 |
|
43 |
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}")) { throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + untilDate); } |
|
45 |
if (untilDate != null && !untilDate.matches("\\d{4}-\\d{2}-\\d{2}(T\\d{2}:\\d{2}:\\d{2}Z)?+")) { |
|
46 |
throw new CollectorServiceException("Invalid date (YYYY-MM-DD): " + untilDate); |
|
47 |
} |
|
44 | 48 |
|
45 | 49 |
return () -> { |
46 | 50 |
final Iterator<Iterator<String>> iterator = |
modules/dnet-data-services/trunk/src/main/java/eu/dnetlib/data/mdstore/modular/MDStoreRetriever.java | ||
---|---|---|
35 | 35 |
log.debug("bulk deliver of mdId: " + mdId); |
36 | 36 |
try { |
37 | 37 |
return DnetStreamSupport.generateStreamFromIterator(dao.readMDStore(mdId).iterate().iterator()); |
38 |
} catch (MDStoreServiceException e) {
|
|
39 |
throw new RuntimeException(e);
|
|
38 |
} catch (Throwable e) {
|
|
39 |
throw new RuntimeException(e);
|
|
40 | 40 |
} |
41 | 41 |
}).reduce(Stream::concat).orElseGet(Stream::empty).iterator(); |
42 | 42 |
} |
modules/dnet-hadoop-services/trunk/src/main/java/eu/dnetlib/data/hadoop/blackboard/SequenceFileFeeder.java | ||
---|---|---|
55 | 55 |
if (count % bulkSize == 0) { |
56 | 56 |
writer.hflush(); |
57 | 57 |
writer.hsync(); |
58 |
log.info(String.format("%s records so far %s", writer.toString(), count));
|
|
59 |
}
|
|
58 |
log.debug(String.format("%s records so far %s", writer.toString(), count));
|
|
59 |
}
|
|
60 | 60 |
} |
61 | 61 |
} |
62 | 62 |
log.info("written " + count + " records in sequence file: " + path); |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/dedup/TitleOrderingTest.java | ||
---|---|---|
7 | 7 |
import java.util.Queue; |
8 | 8 |
import java.util.UUID; |
9 | 9 |
|
10 |
import eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKey; |
|
11 |
import eu.dnetlib.data.mapreduce.hbase.dli.kv.DliKey; |
|
10 | 12 |
import org.apache.commons.io.IOUtils; |
13 |
import org.junit.Assert; |
|
11 | 14 |
import org.junit.Before; |
12 | 15 |
import org.junit.Test; |
13 | 16 |
import org.springframework.core.io.ClassPathResource; |
... | ... | |
89 | 92 |
return q; |
90 | 93 |
} |
91 | 94 |
|
95 |
|
|
96 |
@Test |
|
97 |
public void compareDLIKey() { |
|
98 |
|
|
99 |
DNGFKey k1 = DNGFKey.mergesRel("a"); |
|
100 |
DNGFKey k2 = DNGFKey.otherRel("a"); |
|
101 |
|
|
102 |
Assert.assertEquals(-1, k1.compareTo(k2)); |
|
103 |
Assert.assertEquals(0, k1.compareTo(k1)); |
|
104 |
Assert.assertEquals(1, k2.compareTo(k1)); |
|
105 |
|
|
106 |
|
|
107 |
} |
|
108 |
|
|
92 | 109 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/record_dli_dmf.xml | ||
---|---|---|
3 | 3 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
4 | 4 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
5 | 5 |
<oai:header> |
6 |
<dri:objIdentifier>r3d100010197::0000044c9241750360f9635ffab21ebb</dri:objIdentifier> |
|
7 |
<dri:resolvedDate>2017-03-08T18:26:27.390</dri:resolvedDate> |
|
8 |
<dri:recordIdentifier>oai:oai.datacite.org:3361260</dri:recordIdentifier> |
|
9 |
<dri:dateOfCollection>2017-03-08T11:33:08.731+01:00</dri:dateOfCollection> |
|
10 |
<dri:repositoryId> |
|
11 |
af7be59e-78ec-4995-b7a1-00d98129bc3d_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU= |
|
6 |
<dri:objIdentifier>openaire____::1bf84a62e19f820e8d3f6ea7c35eb60d</dri:objIdentifier> |
|
7 |
<dri:resolvedDate>2017-03-31T14:57:43.029</dri:resolvedDate> |
|
8 |
<dri:recordIdentifier>oai:dnet:datacite____::4b8de4f33af8b6e73c9f750e05ae801d</dri:recordIdentifier> |
|
9 |
<dri:dateOfCollection>2017-03-30T11:07:00.452+02:00</dri:dateOfCollection> |
|
10 |
<dri:repositoryId>openaire_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU= |
|
12 | 11 |
</dri:repositoryId> |
13 |
<dri:datasourceprefix>r3d100010197</dri:datasourceprefix> |
|
14 |
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:oai.datacite.org:3361260</identifier> |
|
15 |
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2016-05-07T23:15:54Z</datestamp> |
|
16 |
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">BL</setSpec> |
|
17 |
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">BL.CCDC</setSpec> |
|
12 |
<dri:datasourceprefix>openaire____</dri:datasourceprefix> |
|
13 |
<oai:identifier>oai:dnet:datacite____::4b8de4f33af8b6e73c9f750e05ae801d</oai:identifier> |
|
14 |
<oai:datestamp>2017-03-16T10:09:22Z</oai:datestamp> |
|
15 |
<oai:setSpec>Datacite</oai:setSpec> |
|
16 |
<oai:setSpec>openaire_data</oai:setSpec> |
|
17 |
<oai:setSpec>Datacite_enriched</oai:setSpec> |
|
18 | 18 |
</oai:header> |
19 | 19 |
<metadata> |
20 | 20 |
<resource xmlns="http://datacite.org/schema/kernel-3" |
21 | 21 |
xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"> |
22 |
<identifier xmlns="" identifierType="DOI">10.5517/cctnbd5</identifier>
|
|
22 |
<identifier xmlns="" identifierType="URL">10.6084/m9.figshare.c.3640895</identifier>
|
|
23 | 23 |
<creators xmlns=""> |
24 | 24 |
<creator> |
25 |
<creatorName>Hashmi, A.S.K.</creatorName>
|
|
25 |
<creatorName>Rosalinda D’Amore</creatorName>
|
|
26 | 26 |
</creator> |
27 | 27 |
<creator> |
28 |
<creatorName>Riedel, D.</creatorName>
|
|
28 |
<creatorName>Neil Hall</creatorName>
|
|
29 | 29 |
</creator> |
30 | 30 |
<creator> |
31 |
<creatorName>Grundl, M.A.</creatorName>
|
|
31 |
<creatorName>Umer Ijaz</creatorName>
|
|
32 | 32 |
</creator> |
33 | 33 |
<creator> |
34 |
<creatorName>Wittel, B.C.</creatorName>
|
|
34 |
<creatorName>Melanie Schirmer</creatorName>
|
|
35 | 35 |
</creator> |
36 | 36 |
<creator> |
37 |
<creatorName>Foll, A.</creatorName>
|
|
37 |
<creatorName>John Kenny</creatorName>
|
|
38 | 38 |
</creator> |
39 | 39 |
<creator> |
40 |
<creatorName>Lubkoll, J.</creatorName>
|
|
40 |
<creatorName>Richard Gregory</creatorName>
|
|
41 | 41 |
</creator> |
42 | 42 |
<creator> |
43 |
<creatorName>Traut, T.</creatorName>
|
|
43 |
<creatorName>Alistair Darby</creatorName>
|
|
44 | 44 |
</creator> |
45 | 45 |
<creator> |
46 |
<creatorName>Hewer, R.</creatorName>
|
|
46 |
<creatorName>Migun Shakya</creatorName>
|
|
47 | 47 |
</creator> |
48 | 48 |
<creator> |
49 |
<creatorName>Rominger, F.</creatorName>
|
|
49 |
<creatorName>Mircea Podar</creatorName>
|
|
50 | 50 |
</creator> |
51 | 51 |
<creator> |
52 |
<creatorName>Frey, W.</creatorName>
|
|
52 |
<creatorName>Christopher Quince</creatorName>
|
|
53 | 53 |
</creator> |
54 |
<creator> |
|
55 |
<creatorName>Bats, J.W.</creatorName> |
|
56 |
</creator> |
|
57 | 54 |
</creators> |
58 | 55 |
<titles xmlns=""> |
59 |
<title>CCDC 764317: Experimental Crystal Structure Determination</title> |
|
56 |
<title>A comprehensive benchmarking study of protocols and sequencing platforms for 16S rRNA community |
|
57 |
profiling |
|
58 |
</title> |
|
60 | 59 |
</titles> |
61 | 60 |
|
62 |
<publisher xmlns="">Cambridge Crystallographic Data Centre</publisher> |
|
61 |
<publisher xmlns="">Figshare</publisher> |
|
62 |
<dates xmlns=""> |
|
63 |
<date dateType="Collected">2016-01-01</date> |
|
64 |
</dates> |
|
63 | 65 |
<subjects xmlns=""> |
64 | 66 |
|
65 |
<subject subjectScheme="">Crystal Structure</subject>
|
|
67 |
<subject subjectScheme="">Genetics</subject>
|
|
66 | 68 |
|
67 |
<subject subjectScheme="">Experimental 3D Coordinates</subject>
|
|
69 |
<subject subjectScheme="">Ecology</subject>
|
|
68 | 70 |
|
69 |
<subject subjectScheme="">Crystal System</subject>
|
|
71 |
<subject subjectScheme="">Immunology</subject>
|
|
70 | 72 |
|
71 |
<subject subjectScheme="">Space Group</subject> |
|
73 |
</subjects> |
|
74 |
<resourceType xmlns="" resourceTypeGeneral="dataset">dataset</resourceType> |
|
75 |
<relatedIdentifiers xmlns=""> |
|
72 | 76 |
|
73 |
<subject subjectScheme="">Cell Parameters</subject> |
|
77 |
<relatedIdentifier entityType="unknown" inverseRelationType="references" |
|
78 |
relatedIdentifierType="dnet" |
|
79 |
relationType="isReferencedBy">dli_resolver::2514d7d42a537f42a9678c5cd150f07d |
|
80 |
</relatedIdentifier> |
|
74 | 81 |
|
75 |
<subject subjectScheme="">Crystallography</subject> |
|
82 |
<relatedIdentifier entityType="unknown" inverseRelationType="references" |
|
83 |
relatedIdentifierType="dnet" |
|
84 |
relationType="isReferencedBy">dli_resolver::0f05d44dc9e97749ef7a4ac59534e75b |
|
85 |
</relatedIdentifier> |
|
76 | 86 |
|
77 |
<subject subjectScheme=""> |
|
78 |
rac-bis(acetonitrile)-(2,2'-bis((adamantan-1-ylmethoxy)carbonyl)-1,1'-bis(methoxycarbonyl)-3,3,3',3'-tetramethyl-1,1'-bi(cyclopropyl)-2,2'-diyl)-palladium |
|
79 |
</subject> |
|
87 |
<relatedIdentifier entityType="publication" inverseRelationType="references" |
|
88 |
relatedIdentifierType="dnet" |
|
89 |
relationType="isReferencedBy">dli_resolver::7ca10b8cd66746cdb8fc21e4208f8cc6 |
|
90 |
</relatedIdentifier> |
|
80 | 91 |
|
81 |
</subjects> |
|
82 |
<resourceType xmlns="" resourceTypeGeneral="dataset">dataset</resourceType> |
|
83 |
<relatedIdentifiers xmlns=""> |
|
92 |
<relatedIdentifier entityType="publication" inverseRelationType="references" |
|
93 |
relatedIdentifierType="dnet" |
|
94 |
relationType="isReferencedBy">dli_resolver::a93c5295e2cbc09f625e98d6f1c9d125 |
|
95 |
</relatedIdentifier> |
|
84 | 96 |
|
85 |
<relatedIdentifier entityType="publication" inverseRelationType="isSupplementedBy"
|
|
97 |
<relatedIdentifier entityType="publication" inverseRelationType="references"
|
|
86 | 98 |
relatedIdentifierType="dnet" |
87 |
relationType="isSupplementTo">dli_resolver::5e650db8759a8159c1569a1e126d50ae
|
|
99 |
relationType="isReferencedBy">dli_resolver::92d75939fbb63e29a7e15e8df558dd20
|
|
88 | 100 |
</relatedIdentifier> |
89 | 101 |
|
102 |
<relatedIdentifier entityType="unknown" inverseRelationType="references" |
|
103 |
relatedIdentifierType="openaire" |
|
104 |
relationType="isReferencedBy">oai:dnet:dedup_wf_001::14607676f9663aa9236522b8363b6812 |
|
105 |
</relatedIdentifier> |
|
106 |
|
|
107 |
<relatedIdentifier entityType="publication" inverseRelationType="references" |
|
108 |
relatedIdentifierType="dnet" |
|
109 |
relationType="isReferencedBy">dli_resolver::f91045f61f8ec34f4dcb6c62bc783157 |
|
110 |
</relatedIdentifier> |
|
111 |
|
|
112 |
<relatedIdentifier entityType="publication" inverseRelationType="references" |
|
113 |
relatedIdentifierType="dnet" |
|
114 |
relationType="isReferencedBy">dli_resolver::0597712d190399b55136c593ac1f8cd6 |
|
115 |
</relatedIdentifier> |
|
116 |
|
|
117 |
<relatedIdentifier entityType="publication" inverseRelationType="references" |
|
118 |
relatedIdentifierType="dnet" |
|
119 |
relationType="isReferencedBy">dli_resolver::0597712d190399b55136c593ac1f8cd6 |
|
120 |
</relatedIdentifier> |
|
121 |
|
|
122 |
<relatedIdentifier entityType="publication" inverseRelationType="references" |
|
123 |
relatedIdentifierType="dnet" |
|
124 |
relationType="isReferencedBy">dli_resolver::0597712d190399b55136c593ac1f8cd6 |
|
125 |
</relatedIdentifier> |
|
126 |
|
|
90 | 127 |
</relatedIdentifiers> |
91 | 128 |
</resource> |
92 | 129 |
</metadata> |
... | ... | |
94 | 131 |
<oaf:datainfo> |
95 | 132 |
<oaf:completionStatus>complete</oaf:completionStatus> |
96 | 133 |
|
97 |
<oaf:collectedFrom completionStatus="complete" id="dli_________::r3d100010197" |
|
98 |
name="Cambridge Crystallographic Data Centre"/> |
|
134 |
<oaf:collectedFrom completionStatus="complete" id="dli_________::openaire" name="OpenAIRE"/> |
|
99 | 135 |
|
100 | 136 |
</oaf:datainfo> |
101 | 137 |
</oaf:about> |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/record_dli_pmf.xml | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 |
<record xmlns:oaf="http://namespace.dnet.eu/oaf" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
3 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
4 |
xmlns="http://www.scholix.org"> |
|
5 |
<oai:header xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
6 |
xmlns=""> |
|
7 |
<dri:objIdentifier>elsevier____::000147253d2794c1857cde747c11e4b3</dri:objIdentifier> |
|
8 |
<dri:recordIdentifier>10.1016/j.mcp.2014.03.002::KC136842</dri:recordIdentifier> |
|
9 |
<dri:dateOfCollection>2017-02-09T16:57:25.533+01:00</dri:dateOfCollection> |
|
10 |
<dri:repositoryId> |
|
11 |
2d1245d4-c169-4247-9106-0f69f8d752eb_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU= |
|
2 |
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/"> |
|
3 |
<oai:header> |
|
4 |
<dri:objIdentifier xmlns:dri="http://www.driver-repository.eu/namespace/dri"> |
|
5 |
r3d100010255::d5ad02b122901f0d9d59f8348768d0f9 |
|
6 |
</dri:objIdentifier> |
|
7 |
<dri:recordIdentifier xmlns:dri="http://www.driver-repository.eu/namespace/dri"> |
|
8 |
http://doi.org/10.1007/s10964-016-0587-y |
|
9 |
</dri:recordIdentifier> |
|
10 |
<dri:dateOfCollection xmlns:dri="http://www.driver-repository.eu/namespace/dri">2017-02-16T11:53:25.417+01:00 |
|
11 |
</dri:dateOfCollection> |
|
12 |
<dri:repositoryId xmlns:dri="http://www.driver-repository.eu/namespace/dri"> |
|
13 |
icpsr_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU= |
|
12 | 14 |
</dri:repositoryId> |
13 |
<dri:datasourceprefix>elsevier____</dri:datasourceprefix> |
|
15 |
<dri:datasourceprefix xmlns:dri="http://www.driver-repository.eu/namespace/dri">r3d100010255 |
|
16 |
</dri:datasourceprefix> |
|
17 |
<oai:identifier>http://doi.org/10.1007/s10964-016-0587-y</oai:identifier> |
|
14 | 18 |
</oai:header> |
15 | 19 |
<metadata> |
16 |
<oaf:pid type="doi">10.1016/j.mimet.2014.08.021</oaf:pid> |
|
17 |
<dc:identifier>http://dx.doi.org/10.1016/j.mimet.2014.08.021</dc:identifier> |
|
18 |
<dc:title>"MALDI-TOF MS based identification of food-borne yeast isolates"</dc:title> |
|
19 |
<dc:creator>Pavlovic Melanie</dc:creator> |
|
20 |
<dc:creator>Mewes Anne</dc:creator> |
|
21 |
<dc:creator>Maggipinto Marzena</dc:creator> |
|
22 |
<dc:creator>Schmidt Wolfgang</dc:creator> |
|
23 |
<dc:creator>Messelhäußer Ute</dc:creator> |
|
24 |
<dc:creator>Balsliemke Joachim</dc:creator> |
|
25 |
<dc:creator>Hörmansdorfer Stefan</dc:creator> |
|
26 |
<dc:creator>Busch Ulrich</dc:creator> |
|
27 |
<dc:creator>Huber Ingrid</dc:creator> |
|
28 |
<dc:date>2014-09-02T22:06:28Z</dc:date> |
|
29 |
<dc:description></dc:description> |
|
30 |
<dc:type>publication</dc:type> |
|
31 |
|
|
32 |
<oaf:relatedIdentifier relatedIdentifierType="pdb" relationType="unknown" inverseRelationType="unknown" |
|
33 |
entityType="dataset">its4 |
|
34 |
</oaf:relatedIdentifier> |
|
35 |
|
|
36 |
<oaf:relatedIdentifier relatedIdentifierType="dnet" relationType="cites" inverseRelationType="isCitedBy" |
|
37 |
entityType="dataset">dliresolver_::7201fbb088d569cb40483d25b4443bca |
|
38 |
</oaf:relatedIdentifier> |
|
39 |
|
|
40 |
<oaf:relatedIdentifier relatedIdentifierType="dnet" relationType="cites" inverseRelationType="isCitedBy" |
|
41 |
entityType="dataset">dliresolver_::7201fbb088d569cb40483d25b4443bca |
|
42 |
</oaf:relatedIdentifier> |
|
43 |
|
|
44 |
<dc:publisher>Elsevier BV</dc:publisher> |
|
20 |
<oaf:pid xmlns:oaf="http://namespace.dnet.eu/oaf" type="DOI">http://doi.org/10.1007/s10964-016-0587-y</oaf:pid> |
|
21 |
<dc:identifier xmlns:dc="http://purl.org/dc/elements/1.1/">http://doi.org/10.1007/s10964-016-0587-y |
|
22 |
</dc:identifier> |
|
23 |
<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">Consequences of violent victimization for Native American |
|
24 |
youth in early adulthood |
|
25 |
</dc:title> |
|
26 |
<dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Turanovic, J.J.</dc:creator> |
|
27 |
<dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Pratt, T.C.</dc:creator> |
|
28 |
<dc:type xmlns:dc="http://purl.org/dc/elements/1.1/">publication</dc:type> |
|
45 | 29 |
</metadata> |
46 |
<oaf:about> |
|
30 |
<oaf:about xmlns:oaf="http://namespace.dnet.eu/oaf">
|
|
47 | 31 |
<oaf:datainfo> |
32 |
<oaf:collectedFrom completionStatus="complete" id="dli_________::r3d100010255" name="ICPSR"/> |
|
48 | 33 |
<oaf:completionStatus>complete</oaf:completionStatus> |
49 |
|
|
50 |
<oaf:collectedFrom id="dli_________::elsevier" name="Elsevier" completionStatus="incomplete"/> |
|
51 |
|
|
52 |
<oaf:resolvedFrom id="dli_________::crossref" name="Crossref" completionStatus="complete"/> |
|
53 |
|
|
34 |
<oaf:provisionMode>collected</oaf:provisionMode> |
|
54 | 35 |
</oaf:datainfo> |
55 | 36 |
</oaf:about> |
56 |
|
|
57 |
</record> |
|
37 |
</oai:record> |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dedup/kv/DNGFKey.java | ||
---|---|---|
7 | 7 |
import com.google.common.collect.ComparisonChain; |
8 | 8 |
import org.apache.hadoop.io.IntWritable; |
9 | 9 |
import org.apache.hadoop.io.Text; |
10 |
import org.apache.hadoop.io.Writable; |
|
10 | 11 |
import org.apache.hadoop.io.WritableComparable; |
11 | 12 |
|
12 | 13 |
/** |
13 | 14 |
* Created by claudio on 13/03/2017. |
14 | 15 |
*/ |
15 |
public class DNGFKey implements WritableComparable<DNGFKey> { |
|
16 |
public class DNGFKey implements Writable, WritableComparable<DNGFKey> {
|
|
16 | 17 |
|
17 | 18 |
public final static int MERGES_REL = 0; |
18 | 19 |
public final static int OTHER_REL = 1; |
... | ... | |
58 | 59 |
|
59 | 60 |
@Override |
60 | 61 |
public int compareTo(final DNGFKey o) { |
61 |
return ComparisonChain.start() |
|
62 |
.compare(getKeyType(), o.getKeyType()) |
|
63 |
.compare(getId(), o.getId()) |
|
64 |
.result(); |
|
62 |
int compareValue = this.id.toString().compareTo(o.getId().toString()); |
|
63 |
if (compareValue == 0) { |
|
64 |
compareValue = this.keyType.compareTo(o.getKeyType()); |
|
65 |
} |
|
66 |
return compareValue; // sort ascending |
|
65 | 67 |
} |
66 | 68 |
|
67 | 69 |
@Override |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dedup/kv/DNGFKeyPartitioner.java | ||
---|---|---|
10 | 10 |
|
11 | 11 |
@Override |
12 | 12 |
public int getPartition(DNGFKey key, ImmutableBytesWritable val, int numPartitions) { |
13 |
return Math.abs(key.getId().hashCode() % numPartitions); |
|
13 |
final int res = Math.abs(key.getId().hashCode() % numPartitions); |
|
14 |
|
|
15 |
return res; |
|
16 |
|
|
17 |
|
|
14 | 18 |
} |
15 | 19 |
|
16 | 20 |
} |
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupBuildRootsJob.xml | ||
---|---|---|
28 | 28 |
<!-- MISC --> |
29 | 29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
30 | 30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
33 |
<PROPERTY key="mapred.task.timeout" value="2400000"/> |
|
33 | 34 |
|
34 | 35 |
<PROPERTY key="mapred.reduce.tasks" value="500" /> |
35 | 36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2HdfsActionsJob.xml | ||
---|---|---|
34 | 34 |
<PROPERTY key="mapred.output.compress" value="true"/> |
35 | 35 |
<PROPERTY key="mapred.output.compression.type" value="BLOCK"/> |
36 | 36 |
|
37 |
<PROPERTY key="mapred.reduce.tasks" value="10" />
|
|
37 |
<PROPERTY key="mapred.reduce.tasks" value="100"/>
|
|
38 | 38 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
39 | 39 |
|
40 | 40 |
<!-- Uncomment to override the default lib path --> |
modules/dnet-dli/trunk/src/test/java/eu/dnetlib/resolver/DateTimeTest.java | ||
---|---|---|
1 |
package eu.dnetlib.resolver; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
import java.time.Instant; |
|
6 |
import java.time.LocalDateTime; |
|
7 |
import java.time.ZoneId; |
|
8 |
import java.time.format.DateTimeFormatter; |
|
9 |
|
|
10 |
public class DateTimeTest { |
|
11 |
|
|
12 |
@Test |
|
13 |
public void testDateTime() { |
|
14 |
long l = 1500540547771L; |
|
15 |
|
|
16 |
LocalDateTime zdt = LocalDateTime.ofInstant(Instant.ofEpochMilli(l), ZoneId.of("Etc/UTC")); |
|
17 |
|
|
18 |
System.out.println(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").format(zdt)); |
|
19 |
|
|
20 |
System.out.println(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'").format(zdt)); |
|
21 |
|
|
22 |
|
|
23 |
} |
|
24 |
|
|
25 |
} |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/ResolveAndIndexJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.nodes; |
|
2 |
|
|
3 |
import static eu.dnetlib.data.proto.dli.ScholixObjectProtos.*; |
|
4 |
|
|
5 |
import com.googlecode.protobuf.format.JsonFormat; |
|
6 |
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; |
|
7 |
import eu.dnetlib.dli.resolver.PIDResolver; |
|
8 |
import eu.dnetlib.dli.resolver.model.*; |
|
9 |
import eu.dnetlib.enabling.resultset.client.ResultSetClient; |
|
10 |
import eu.dnetlib.msro.workflows.graph.Arc; |
|
11 |
import eu.dnetlib.msro.workflows.procs.Env; |
|
12 |
import eu.dnetlib.resolver.parser.DMFResolverParser; |
|
13 |
import eu.dnetlib.rmi.common.ResultSet; |
|
14 |
import org.apache.commons.lang3.StringUtils; |
|
15 |
import org.apache.commons.logging.Log; |
|
16 |
import org.apache.commons.logging.LogFactory; |
|
17 |
import org.springframework.beans.factory.annotation.Autowired; |
|
18 |
import org.springframework.http.converter.StringHttpMessageConverter; |
|
19 |
import org.springframework.web.client.RestTemplate; |
|
20 |
|
|
21 |
import java.net.URI; |
|
22 |
import java.nio.charset.Charset; |
|
23 |
import java.time.LocalDateTime; |
|
24 |
import java.util.Arrays; |
|
25 |
import java.util.List; |
|
26 |
|
|
27 |
public class ResolveAndIndexJobNode extends SimpleJobNode { |
|
28 |
|
|
29 |
private static final Log log = LogFactory.getLog(ResolveAndIndexJobNode.class); |
|
30 |
|
|
31 |
|
|
32 |
private static final String BASE_CFG_URL = "http://%s:9200/%s/scholix/%s/?pretty"; |
|
33 |
|
|
34 |
|
|
35 |
private String inputEprParam; |
|
36 |
|
|
37 |
private String indexHost; |
|
38 |
|
|
39 |
private String indexName; |
|
40 |
|
|
41 |
@Autowired |
|
42 |
private List<PIDResolver> pluginResolver; |
|
43 |
|
|
44 |
@Autowired |
|
45 |
private ResultSetClient resultSetClient; |
|
46 |
|
|
47 |
@Override |
|
48 |
protected String execute(Env env) throws Exception { |
|
49 |
|
|
50 |
final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class); |
|
51 |
|
|
52 |
final Iterable<String> records = resultSetClient.iter(rsIn, String.class); |
|
53 |
|
|
54 |
final RestTemplate restTemplate = new RestTemplate(); |
|
55 |
restTemplate.getMessageConverters() |
|
56 |
.add(0, new StringHttpMessageConverter(Charset.forName("UTF-8"))); |
|
57 |
|
|
58 |
setIndexHost(indexHost); |
|
59 |
setIndexName(indexName); |
|
60 |
|
|
61 |
DMFResolverParser parser = new DMFResolverParser(); |
|
62 |
|
|
63 |
|
|
64 |
for (String record : records) { |
|
65 |
final ResolvedObject result = parser.parseObject(record); |
|
66 |
if (result == null) { |
|
67 |
log.error("error on parsing " + record); |
|
68 |
continue; |
|
69 |
} |
|
70 |
for (final ObjectRelation rels : result.getRelations()) { |
|
71 |
final ResolvedObject resolvedRelation = resolveRelation(rels.getTargetPID(), result.getDatasourceProvenance().get(0)); |
|
72 |
|
|
73 |
|
|
74 |
final Scholix.Builder scholix = Scholix.newBuilder(); |
|
75 |
|
|
76 |
scholix.addLinkproviderBuilder() |
|
77 |
.setName(result.getDatasourceProvenance().get(0).getDatasource()) |
|
78 |
.addIdentifiersBuilder() |
|
79 |
.setIdentifier(result.getDatasourceProvenance().get(0).getDatasourceId()) |
|
80 |
.setSchema("dnetIdentifier"); |
|
81 |
|
|
82 |
scholix.setRelationship(ScholixRelationship.newBuilder() |
|
83 |
.setName(rels.getRelationSemantics()) |
|
84 |
.setInverse(rels.getInverseRelation()) |
|
85 |
.setSchema("datacite") |
|
86 |
.build()); |
|
87 |
|
|
88 |
final ScholixResource source = generateResource(result); |
|
89 |
final ScholixResource target = generateResource(resolvedRelation); |
|
90 |
scholix.setSource(source); |
|
91 |
scholix.setTarget(target); |
|
92 |
scholix.setPublicationDate(LocalDateTime.now().toString()); |
|
93 |
|
|
94 |
|
|
95 |
// log.error(indexURL); |
|
96 |
// |
|
97 |
// log.info("json : "+JsonFormat.printToString(scholix.build())); |
|
98 |
|
|
99 |
|
|
100 |
restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(result, resolvedRelation)), JsonFormat.printToString(scholix.build())); |
|
101 |
|
|
102 |
scholix.setRelationship(ScholixRelationship.newBuilder() |
|
103 |
.setInverse(rels.getRelationSemantics()) |
|
104 |
.setName(rels.getInverseRelation()) |
|
105 |
.setSchema("datacite") |
|
106 |
.build()); |
|
107 |
scholix.setTarget(source); |
|
108 |
scholix.setSource(target); |
|
109 |
|
|
110 |
restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(resolvedRelation, result)), JsonFormat.printToString(scholix.build())); |
|
111 |
} |
|
112 |
} |
|
113 |
return Arc.DEFAULT_ARC; |
|
114 |
} |
|
115 |
|
|
116 |
private String generateIdentifier(final ResolvedObject source, ResolvedObject target) { |
|
117 |
|
|
118 |
return AbstractDNetXsltFunctions.md5(String.format("%s::%s", source.getPid().toLowerCase().trim(), target.getPid().toLowerCase().trim())); |
|
119 |
|
|
120 |
} |
|
121 |
|
|
122 |
private ScholixResource generateResource(ResolvedObject result) { |
|
123 |
final ScholixResource.Builder builder = ScholixResource.newBuilder(); |
|
124 |
if (result.getDatasourceProvenance() != null) |
|
125 |
result.getDatasourceProvenance().forEach( |
|
126 |
objectProvenance -> { |
|
127 |
builder.addCollectedFrom(ScholixCollectedFrom.newBuilder() |
|
128 |
.setProvisionMode(objectProvenance.getProvisionMode()) |
|
129 |
.setCompletionStatus(objectProvenance.getCompletionStatus()) |
|
130 |
.setProvider(ScholixEntityId.newBuilder() |
|
131 |
.setName(objectProvenance.getDatasource()) |
|
132 |
.addIdentifiers(ScholixIdentifier.newBuilder().setIdentifier(objectProvenance.getDatasourceId()) |
|
133 |
.setSchema("dnetIdentifier").build()) |
|
134 |
.build())); |
|
135 |
if (StringUtils.isNotEmpty(objectProvenance.getPublisher())) { |
|
136 |
builder.addPublisher(ScholixEntityId.newBuilder() |
|
137 |
.setName(objectProvenance.getPublisher()) |
|
138 |
.build()); |
|
139 |
} |
|
140 |
|
|
141 |
}); |
|
142 |
builder.addIdentifier(ScholixIdentifier.newBuilder(). |
|
143 |
setIdentifier(result.getPid()) |
|
144 |
.setSchema(result.getPidType()) |
|
145 |
.build()); |
|
146 |
builder.setObjectType(result.getType().toString()); |
|
147 |
if (result.getTitles() != null && result.getTitles().size() > 0) |
|
148 |
builder.setTitle(result.getTitles().get(0)); |
|
149 |
if (result.getAuthors() != null) |
|
150 |
result.getAuthors().forEach(author -> builder.addCreator( |
|
151 |
ScholixEntityId.newBuilder() |
|
152 |
.setName(author) |
|
153 |
.build())); |
|
154 |
if (StringUtils.isNotBlank(result.getDate())) { |
|
155 |
builder.setPublicationDate(result.getDate()); |
|
156 |
} |
|
157 |
|
|
158 |
String tp = null; |
|
159 |
|
|
160 |
switch (result.getType()) { |
|
161 |
case dataset: |
|
162 |
tp = "60"; |
|
163 |
break; |
|
164 |
case unknown: |
|
165 |
tp = "70"; |
|
166 |
break; |
|
167 |
case publication: |
|
168 |
tp = "50"; |
|
169 |
break; |
|
170 |
} |
|
171 |
builder.setDnetIdentifier(tp + "|dnet________::" + result.getIdentifier()); |
|
172 |
return builder.build(); |
|
173 |
} |
|
174 |
|
|
175 |
|
|
176 |
private ResolvedObject resolveRelation(final PID currentPid, final ObjectProvenance provenance) { |
|
177 |
for (PIDResolver resolver : pluginResolver) { |
|
178 |
final ResolvedObject currentIdentifier = resolver.retrievePID(currentPid.getId(), currentPid.getType()); |
|
179 |
|
|
180 |
if (currentIdentifier != null && |
|
181 |
!StringUtils.isBlank(currentIdentifier.getPid()) && |
|
182 |
currentIdentifier.getPid().toLowerCase().equals(currentPid.getId().toLowerCase())) { |
|
183 |
return currentIdentifier; |
|
184 |
} |
|
185 |
} |
|
186 |
|
|
187 |
final ResolvedObject resolvedObject = new ResolvedObject(); |
|
188 |
resolvedObject.setPid(currentPid.getId()); |
|
189 |
resolvedObject.setPidType(currentPid.getType()); |
|
190 |
ObjectProvenance resultProvenance = new ObjectProvenance(); |
|
191 |
resultProvenance.setDatasource(provenance.getDatasource()); |
|
192 |
resultProvenance.setDatasourceId(provenance.getDatasourceId()); |
|
193 |
resultProvenance.setCompletionStatus(CompletionStatus.incomplete.toString()); |
|
194 |
resultProvenance.setProvisionMode(ObjectProvisionMode.collected.toString()); |
|
195 |
resolvedObject.setDatasourceProvenance(Arrays.asList(resultProvenance)); |
|
196 |
return resolvedObject; |
|
197 |
} |
|
198 |
|
|
199 |
public String getInputEprParam() { |
|
200 |
return inputEprParam; |
|
201 |
} |
|
202 |
|
|
203 |
public void setInputEprParam(String inputEprParam) { |
|
204 |
this.inputEprParam = inputEprParam; |
|
205 |
} |
|
206 |
|
|
207 |
public String getIndexHost() { |
|
208 |
return indexHost; |
|
209 |
} |
|
210 |
|
|
211 |
public void setIndexHost(String indexHost) { |
|
212 |
this.indexHost = indexHost; |
|
213 |
} |
|
214 |
|
|
215 |
public String getIndexName() { |
|
216 |
return indexName; |
|
217 |
} |
|
218 |
|
|
219 |
public void setIndexName(String indexName) { |
|
220 |
this.indexName = indexName; |
|
221 |
} |
|
222 |
} |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/mdstore/plugin/ResolverMDStorePlugin.java | ||
---|---|---|
99 | 99 |
|
100 | 100 |
final FindIterable<DBObject> mdstoreRecords = currentMdStoreCollection.find(); |
101 | 101 |
|
102 |
mdstoreRecords.noCursorTimeout(true); |
|
103 |
|
|
102 | 104 |
final BlockingQueue<DBObject> queue = new ArrayBlockingQueue<>(100); |
103 | 105 |
|
104 | 106 |
final List<Future<Boolean>> responses = new ArrayList<>(); |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/msro/workflows/nodes/applicationContext-msro-dli-nodes.xml | ||
---|---|---|
75 | 75 |
class="eu.dnetlib.msro.workflows.nodes.actionmanager.UpdateSetsJobNode" |
76 | 76 |
scope="prototype"/> |
77 | 77 |
|
78 |
<bean id="wfNodeQueryUserActionDbJob" |
|
78 |
<bean id="wfNodeResolveAndIndexJob" |
|
79 |
class="eu.dnetlib.msro.workflows.nodes.ResolveAndIndexJobNode" |
|
80 |
scope="prototype"/> |
|
81 |
|
|
82 |
<bean id="wfNodeQueryUserActionDbJob" |
|
79 | 83 |
class="eu.dnetlib.msro.workflows.nodes.dedup.QueryUserActionDbJobNode" |
80 | 84 |
scope="prototype"/> |
81 | 85 |
|
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/workflows/repo-hi/dli_aggregation_incremental_wf.xml.st | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value=""/> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value=""/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<WORKFLOW_NAME>$name$</WORKFLOW_NAME> |
|
12 |
<WORKFLOW_DESCRIPTION>$desc$</WORKFLOW_DESCRIPTION> |
|
13 |
<WORKFLOW_INFO /> |
|
14 |
<WORKFLOW_FAMILY>aggregator</WORKFLOW_FAMILY> |
|
15 |
<WORKFLOW_PRIORITY>$priority$</WORKFLOW_PRIORITY> |
|
16 |
<DATASOURCE id="$dsId$" interface="$interface$" /> |
|
17 |
|
|
18 |
<CONFIGURATION status="WAIT_SYS_SETTINGS" start="MANUAL"> |
|
19 |
<PARAMETERS> |
|
20 |
<PARAM name="collMdstoreId" description="Store for collected records" required="true" managedBy="system" category="MDSTORE_ID"/> |
|
21 |
<PARAM name="incrementalMdstoreId" description="Store for unknown records" required="true" managedBy="system" category="MDSTORE_ID"/> |
|
22 |
<PARAM description="Type of Transformation" function="validValues(['simpleTransform', 'transformAndUnpack'])" managedBy="user" name="typeOfTransform" required="true" type="string">simpleTransform</PARAM> |
|
23 |
<PARAM name="cleanTransformationRuleId" description="Transformation Rule Identifier" required="true" managedBy="user" category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE', 'DLI:')"/> |
|
24 |
<PARAM name="from_date" description="Start Date of Harvesting" required="false" managedBy="user" category="COLLECTION" type="string"/> |
|
25 |
<PARAM name="indexHost" description="ElasticSearch index host" managedBy="user" required="true" type="string"/> |
|
26 |
<PARAM name="indexName" description="ElasticSearch index Name" managedBy="user" required="true" type="string"/> |
|
27 |
</PARAMETERS> |
|
28 |
<WORKFLOW> |
|
29 |
<NODE isStart="true" name="collection" type="LaunchWorkflowTemplate"> |
|
30 |
<DESCRIPTION>Collect records</DESCRIPTION> |
|
31 |
<PARAMETERS> |
|
32 |
<PARAM name="wfTemplateId" value="be04ef26-12aa-46eb-9c67-60ca18e29539_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" /> |
|
33 |
<PARAM name="wfTemplateParams"> |
|
34 |
<MAP> |
|
35 |
<ENTRY key="dsId" value="$dsId$" /> |
|
36 |
<ENTRY key="interface" value="$interface$" /> |
|
37 |
<ENTRY key="collMdstoreId" ref="collMdstoreId" /> |
|
38 |
<ENTRY key="from_date" ref="from_date" /> |
|
39 |
</MAP> |
|
40 |
</PARAM> |
|
41 |
</PARAMETERS> |
|
42 |
<ARCS> |
|
43 |
<ARC to="transform"/> |
|
44 |
</ARCS> |
|
45 |
</NODE> |
|
46 |
<NODE name="transform" type="LaunchWorkflowTemplate"> |
|
47 |
<DESCRIPTION>Transform records</DESCRIPTION> |
|
48 |
<PARAMETERS> |
|
49 |
<PARAM name="wfTemplateId" value="da3531c6-2bf6-48ab-848b-bd4c6379fd65_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" /> |
|
50 |
<PARAM name="wfTemplateParams"> |
|
51 |
<MAP> |
|
52 |
<ENTRY key="dsId" value="$dsId$" /> |
|
53 |
<ENTRY key="interface" value="$interface$" /> |
|
54 |
<ENTRY key="collMdstoreId" ref="collMdstoreId" /> |
|
55 |
<ENTRY key="cleanMdstoreId" ref="incrementalMdstoreId" /> |
|
56 |
<ENTRY key="cleanRuleId" ref="cleanTransformationRuleId" /> |
|
57 |
<ENTRY key="typeOfTransform" ref="typeOfTransform" /> |
|
58 |
</MAP> |
|
59 |
</PARAM> |
|
60 |
</PARAMETERS> |
|
61 |
<ARCS> |
|
62 |
<ARC to="pidResolver"/> |
|
63 |
</ARCS> |
|
64 |
</NODE> |
|
65 |
<NODE name="pidResolver" type="LaunchWorkflowTemplate"> |
|
66 |
<DESCRIPTION>Transform records</DESCRIPTION> |
|
67 |
<PARAMETERS> |
|
68 |
<PARAM name="wfTemplateId" value="ae5f14ac-54e1-4513-b41c-67b82240e5bd_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" /> |
|
69 |
<PARAM name="wfTemplateParams"> |
|
70 |
<MAP> |
|
71 |
<ENTRY key="cleanMdStoreId" ref="incrementalMdstoreId" /> |
|
72 |
<ENTRY key="indexHost" ref="indexHost" /> |
|
73 |
<ENTRY key="indexName" ref="indexName" /> |
|
74 |
</MAP> |
|
75 |
</PARAM> |
|
76 |
</PARAMETERS> |
|
77 |
<ARCS> |
|
78 |
<ARC to="success"/> |
|
79 |
</ARCS> |
|
80 |
</NODE> |
|
81 |
</WORKFLOW> |
|
82 |
<DESTROY_WORKFLOW_TEMPLATE id="8bf9879c-535a-4818-8de7-790a3eb90675_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="> |
|
83 |
<PARAMETERS> |
|
84 |
<PARAM name="dsId" value="$dsId$" /> |
|
85 |
<PARAM name="interface" value="$interface$" /> |
|
86 |
<PARAM name="DLINativeStore" ref="collMdstoreId" /> |
|
87 |
<PARAM name="DLICleanedStore" ref="cleanMdstoreId" /> |
|
88 |
<PARAM name="PMFResolvedStore" ref="publicationMdstoreId" /> |
|
89 |
<PARAM name="DMFResolvedStore" ref="dataSetMdstoreId" /> |
|
90 |
<PARAM name="ScholixResolvedStore" ref="unknownMdstoreId" /> |
|
91 |
</PARAMETERS> |
|
92 |
</DESTROY_WORKFLOW_TEMPLATE> |
|
93 |
</CONFIGURATION> |
|
94 |
<NOTIFICATIONS/> |
|
95 |
<SCHEDULING enabled="false"> |
|
96 |
<CRON>9 9 9 ? * *</CRON> |
|
97 |
<MININTERVAL>10080</MININTERVAL> |
|
98 |
</SCHEDULING> |
|
99 |
<STATUS/> |
|
100 |
</BODY> |
|
101 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/mnd_transform.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="58ed3d19-1113-4379-98e3-e8e1dfb2f1de_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
5 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2014-11-19T11:05:55+01:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<IMPORTED/> |
|
13 |
<SCRIPT> |
|
14 |
<TITLE>DLI: Mendeley to Scholix transform</TITLE> |
|
15 |
<CODE><![CDATA[<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:datetime="http://exslt.org/dates-and-times" |
|
16 |
xmlns:exslt="http://exslt.org/common" xmlns:oaf="http://namespace.dnet.eu/oaf" |
|
17 |
xmlns:dnet="eu.dnetlib.dli.DLIUtils" |
|
18 |
xmlns:oai="http://www.openarchives.org/OAI/2.0/" |
|
19 |
version="2.0" |
|
20 |
exclude-result-prefixes="xsl datetime exslt dnet"> |
|
21 |
<xsl:template match="/"> |
|
22 |
<xsl:variable name="namespacePrefix"> |
|
23 |
<xsl:value-of select="//*[local-name()='header']/*[local-name()='datasourceprefix']"/> |
|
24 |
</xsl:variable> |
|
25 |
<xsl:variable name="datasourceID" select="dnet:getIdFromDataSourcePrefix($namespacePrefix)"/> |
|
26 |
<xsl:variable name="datasourceName" select="dnet:getNameFromDataSourcePrefix($namespacePrefix)"/> |
|
27 |
<oai:record xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
28 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
29 |
xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" |
|
30 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
31 |
xmlns:scholix ="http://www.scholix.org" |
|
32 |
xmlns="http://namespace.openaire.eu/"> |
|
33 |
<xsl:copy-of select="//*[local-name()='header']"/> |
|
34 |
<metadata> |
|
35 |
<link xmlns="http://www.scholix.org"> |
|
36 |
<linkProvider> |
|
37 |
<name> |
|
38 |
<xsl:value-of select="$datasourceName"/> |
|
39 |
</name> |
|
40 |
<identifiers> |
|
41 |
<identifier> |
|
42 |
<xsl:value-of select="$datasourceID"/> |
|
43 |
</identifier> |
|
44 |
<schema>DNet-identifier</schema> |
|
45 |
</identifiers> |
|
46 |
</linkProvider> |
|
47 |
<relashionship> |
|
48 |
<name>unknown</name> |
|
49 |
<schema>datacite</schema> |
|
50 |
<inverseRelashionship>unknown</inverseRelashionship> |
|
51 |
</relashionship> |
|
52 |
<source> |
|
53 |
<identifier> |
|
54 |
<identifier> |
|
55 |
<xsl:value-of select="//*[local-name()='column' and ./@name='ArticleID']"/> |
|
56 |
</identifier> |
|
57 |
<schema>doi</schema> |
|
58 |
</identifier> |
|
59 |
</source> |
|
60 |
<target> |
|
61 |
<identifier> |
|
62 |
<identifier> |
|
63 |
<xsl:value-of select="//*[local-name()='column' and ./@name='datasetID']"/> |
|
64 |
</identifier> |
|
65 |
<schema>doi</schema> |
|
66 |
</identifier> |
|
67 |
</target> |
|
68 |
</link> |
|
69 |
</metadata> |
|
70 |
<oaf:about> |
|
71 |
<oaf:datainfo> |
|
72 |
<oaf:collectedFrom completionStatus="incomplete"> |
|
73 |
<xsl:attribute name="id"> |
|
74 |
<xsl:value-of select="dnet:getIdFromDataSourcePrefix($namespacePrefix)"/> |
|
75 |
</xsl:attribute> |
|
76 |
<xsl:attribute name="name"> |
|
77 |
<xsl:value-of select="dnet:getNameFromDataSourcePrefix($namespacePrefix)"/> |
|
78 |
</xsl:attribute> |
|
79 |
</oaf:collectedFrom> |
|
80 |
<oaf:completionStatus>incomplete</oaf:completionStatus> |
|
81 |
<oaf:provisionMode>collected</oaf:provisionMode> |
|
82 |
</oaf:datainfo> |
|
83 |
</oaf:about> |
|
84 |
</oai:record> |
|
85 |
</xsl:template> |
|
86 |
</xsl:stylesheet>]]></CODE> |
|
87 |
</SCRIPT> |
|
88 |
</CONFIGURATION> |
|
89 |
<STATUS/> |
|
90 |
<SECURITY_PARAMETERS/> |
|
91 |
</BODY> |
|
92 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/RepositoryServiceResources/mnd.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="f0a56408-65ad-42b4-89ae-a6b4c6b5f53b_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU="/> |
|
5 |
<RESOURCE_TYPE value="RepositoryServiceResourceType"/> |
|
6 |
<RESOURCE_KIND value="RepositoryServiceResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2015-01-26T22:48:00+00:00"/> |
|
9 |
<PROTOCOL/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<CONFIGURATION> |
|
13 |
<DATASOURCE_TYPE>dli:contentprovider</DATASOURCE_TYPE> |
|
14 |
<DATASOURCE_ORIGINAL_ID provenance="DLI">dli_________::r3d100011868</DATASOURCE_ORIGINAL_ID> |
|
15 |
<DATASOURCE_AGGREGATED>false</DATASOURCE_AGGREGATED> |
|
16 |
<ENVIRONMENTS> |
|
17 |
<ENVIRONMENT>DLI</ENVIRONMENT> |
|
18 |
</ENVIRONMENTS> |
|
19 |
<TYPOLOGY>dataPublisher</TYPOLOGY> |
|
20 |
<MAX_SIZE_OF_DATASTRUCTURE>0</MAX_SIZE_OF_DATASTRUCTURE> |
|
21 |
<AVAILABLE_DISKSPACE>0</AVAILABLE_DISKSPACE> |
|
22 |
<MAX_NUMBER_OF_DATASTRUCTURE>0</MAX_NUMBER_OF_DATASTRUCTURE> |
|
23 |
<OFFICIAL_NAME>Mendeley Data</OFFICIAL_NAME> |
|
24 |
<ENGLISH_NAME>Mendeley Data</ENGLISH_NAME> |
|
25 |
<ICON_URI>https://data.mendeley.com/</ICON_URI> |
|
26 |
<COUNTRY/> |
|
27 |
<LOCATION> |
|
28 |
<LONGITUDE>8.64937973022461</LONGITUDE> |
|
29 |
<LATITUDE>53.167808532714844</LATITUDE> |
|
30 |
<TIMEZONE>0.0</TIMEZONE> |
|
31 |
</LOCATION> |
|
32 |
<REPOSITORY_WEBPAGE>https://data.mendeley.com/</REPOSITORY_WEBPAGE> |
|
33 |
<REPOSITORY_INSTITUTION/> |
|
34 |
<ADMIN_INFO></ADMIN_INFO> |
|
35 |
<INTERFACES> |
|
36 |
<INTERFACE active="true" compliance="dli" contentDescription="metadata" |
|
37 |
id="api_________::r3d100011868::0" |
|
38 |
label="Links provider" typology="links::provider" removable="false"> |
|
39 |
<ACCESS_PROTOCOL header="true" identifier="1" separator=",">fileCSV</ACCESS_PROTOCOL> |
|
40 |
<BASE_URL>file:///var/lib/dli/MDR-article-dataset-links.csv</BASE_URL> |
|
41 |
<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='column' and @isID='true'] |
|
42 |
</INTERFACE_EXTRA_FIELD> |
|
43 |
<INTERFACE_EXTRA_FIELD name="last_aggregation_total"/> |
|
44 |
<INTERFACE_EXTRA_FIELD name="last_collection_mdId"/> |
|
45 |
<INTERFACE_EXTRA_FIELD name="last_collection_total"/> |
|
46 |
<INTERFACE_EXTRA_FIELD name="last_aggregation_date"/> |
|
47 |
<INTERFACE_EXTRA_FIELD name="last_aggregation_mdId"/> |
|
48 |
</INTERFACE> |
|
49 |
</INTERFACES> |
|
50 |
<EXTRA_FIELDS> |
|
51 |
<FIELD> |
|
52 |
<key>DataSourceId</key> |
|
53 |
<value>dli_________::r3d100011868</value> |
|
54 |
</FIELD> |
|
55 |
<FIELD> |
|
56 |
<key>NamespacePrefix</key> |
|
57 |
<value>r3d100011868</value> |
|
58 |
</FIELD> |
|
59 |
<FIELD> |
|
60 |
<key>VERIFIED</key> |
|
61 |
<value>NO</value> |
|
62 |
</FIELD> |
|
63 |
</EXTRA_FIELDS> |
|
64 |
<REGISTERED_BY>sandro.labruzzo@isti.cnr.it</REGISTERED_BY> |
|
65 |
</CONFIGURATION> |
|
66 |
<STATUS> |
|
67 |
<NUMBER_OF_OBJECTS>0</NUMBER_OF_OBJECTS> |
|
68 |
</STATUS> |
|
69 |
<QOS> |
|
70 |
<AVAILABILITY>0</AVAILABILITY> |
|
71 |
<CAPACITY/> |
|
72 |
<THROUGHPUT>0.0</THROUGHPUT> |
|
73 |
</QOS> |
|
74 |
<SECURITY_PARAMETERS/> |
|
75 |
<BLACKBOARD/> |
|
76 |
</BODY> |
|
77 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/link_provider_resolve_and_index_template.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="ae5f14ac-54e1-4513-b41c-67b82240e5bd_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
6 |
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/> |
|
7 |
<RESOURCE_KIND value="WorkflowTemplateDSResources"/> |
|
8 |
<RESOURCE_URI value=""/> |
|
9 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<CONFIGURATION> |
|
13 |
<PARAMETERS> |
|
14 |
<PARAM name="cleanMdStoreId" description="Store of cleaned records" required="true" type="string"/> |
|
15 |
<PARAM name="indexHost" description="ElasticSearch index host" required="true" type="string"/> |
|
16 |
<PARAM name="indexName" description="ElasticSearch index Name" required="true" type="string"/> |
|
17 |
</PARAMETERS> |
|
18 |
<WORKFLOW> |
|
19 |
<NODE name="fetchCleaned" type="FetchMDStoreRecords" isStart="true"> |
|
20 |
<DESCRIPTION>Fetch records from MDStore</DESCRIPTION> |
|
21 |
<PARAMETERS> |
|
22 |
<PARAM name="mdId" ref="cleanMdStoreId"/> |
|
23 |
<PARAM name="eprParam" value="orig_epr"/> |
|
24 |
</PARAMETERS> |
|
25 |
<ARCS> |
|
26 |
<ARC to="ResolveRecord"/> |
|
27 |
</ARCS> |
|
28 |
</NODE> |
|
29 |
<NODE name="ResolveRecord" type="ResolveAndIndexJob"> |
|
30 |
<DESCRIPTION>Prepare mdstore records</DESCRIPTION> |
|
31 |
<PARAMETERS> |
|
32 |
<PARAM name="inputEprParam" value="orig_epr"/> |
|
33 |
<PARAM name="datasourceInterface" ref="interface"/> |
|
34 |
<PARAM name="indexHost" ref="indexHost"/> |
|
35 |
<PARAM name="indexName" ref="indexName"/> |
|
36 |
</PARAMETERS> |
|
37 |
<ARCS> |
|
38 |
<ARC to="success"/> |
|
39 |
</ARCS> |
|
40 |
</NODE> |
|
41 |
|
|
42 |
</WORKFLOW> |
|
43 |
</CONFIGURATION> |
|
44 |
</BODY> |
|
45 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/link_provider_collection_incremental_template.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="be04ef26-12aa-46eb-9c67-60ca18e29539_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
6 |
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/> |
|
7 |
<RESOURCE_KIND value="WorkflowTemplateDSResources"/> |
|
8 |
<RESOURCE_URI value=""/> |
|
9 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<CONFIGURATION> |
|
13 |
<PARAMETERS> |
|
14 |
<PARAM name="dsId" description="Datasource Id" required="true" type="string"/> |
|
15 |
<PARAM name="interface" description="Datasource Interface" required="true" type="string"/> |
|
16 |
<PARAM name="collMdstoreId" description="Store for collected records" required="true" type="string"/> |
|
17 |
<PARAM name="from_date" description="From Date" required="false" type="string"/> |
|
18 |
</PARAMETERS> |
|
19 |
<WORKFLOW> |
|
20 |
|
|
21 |
|
|
22 |
<NODE name="FindDateRangeForIncrementalHarvesting" type="FindDateRangeForIncrementalHarvesting" |
|
23 |
isStart="true"> |
|
24 |
<DESCRIPTION>Find Last execution</DESCRIPTION> |
|
25 |
<PARAMETERS> |
|
26 |
<PARAM name="fromDateParam" ref="fromDateParam"/> |
|
27 |
</PARAMETERS> |
|
28 |
|
|
29 |
<ARCS> |
|
30 |
<ARC to="COLLECT_INCREMENTAL"/> |
|
31 |
</ARCS> |
|
32 |
</NODE> |
|
33 |
|
|
34 |
|
|
35 |
<NODE name="COLLECT_INCREMENTAL" type="DateRangeCollectRecords"> |
|
36 |
<DESCRIPTION>Start Harvesting</DESCRIPTION> |
|
37 |
<PARAMETERS> |
|
38 |
<PARAM name="datasourceId" ref="dsId"/> |
|
39 |
<PARAM name="datasourceInterface" ref="interface"/> |
|
40 |
<PARAM name="eprParam" value="collected_epr"/> |
|
41 |
<PARAM name="from" ref="from_date"/> |
|
42 |
<PARAM name="fromDateParam" ref="fromDateParam"/> |
|
43 |
</PARAMETERS> |
|
44 |
<ARCS> |
|
45 |
<ARC to="MD_BUILDER"/> |
|
46 |
</ARCS> |
|
47 |
</NODE> |
|
48 |
<NODE name="MD_BUILDER" type="MdBuilder"> |
|
49 |
<DESCRIPTION>Prepare mdstore records</DESCRIPTION> |
|
50 |
<PARAMETERS> |
|
51 |
<PARAM name="inputEprParam" value="collected_epr"/> |
|
52 |
<PARAM name="outputEprParam" value="store_epr"/> |
|
53 |
<PARAM name="datasourceId" ref="dsId"/> |
|
54 |
<PARAM name="datasourceInterface" ref="interface"/> |
|
55 |
</PARAMETERS> |
|
56 |
<ARCS> |
|
57 |
<ARC to="STORE_REFRESH"/> |
|
58 |
</ARCS> |
|
59 |
</NODE> |
|
60 |
<NODE name="STORE_REFRESH" type="StoreMDStoreRecords"> |
|
61 |
<DESCRIPTION>Store mdstore records</DESCRIPTION> |
|
62 |
<PARAMETERS> |
|
63 |
<PARAM name="mdId" ref="collMdstoreId"/> |
|
64 |
<PARAM name="storingType" value="REFRESH"/> |
|
65 |
<PARAM name="eprParam" value="store_epr"/> |
|
66 |
</PARAMETERS> |
|
67 |
<ARCS> |
|
68 |
<ARC to="UPDATE_INFO"/> |
|
69 |
</ARCS> |
|
70 |
</NODE> |
|
71 |
<NODE name="UPDATE_INFO" type="MDStoreToApiExtraField"> |
|
72 |
<DESCRIPTION>Update datasouce API extra fields</DESCRIPTION> |
|
73 |
<PARAMETERS> |
|
74 |
<PARAM name="mdId" ref="collMdstoreId"/> |
|
75 |
<PARAM name="datasourceId" ref="dsId"/> |
|
76 |
<PARAM name="datasourceInterface" ref="interface"/> |
|
77 |
<PARAM name="extraFieldForTotal" value="last_collection_total"/> |
|
78 |
<PARAM name="extraFieldForDate" value="last_collection_date"/> |
|
79 |
<PARAM name="extraFieldForMdId" value="last_collection_mdId"/> |
|
80 |
</PARAMETERS> |
|
81 |
<ARCS> |
|
82 |
<ARC to="success"/> |
|
83 |
</ARCS> |
|
84 |
</NODE> |
|
85 |
|
|
86 |
</WORKFLOW> |
|
87 |
</CONFIGURATION> |
|
88 |
</BODY> |
|
89 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/repo_hi_link_provider_incremetal.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="153192a0-9d3f-4294-af44-0f8467e41ffd_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
8 |
<RESOURCE_URI value=""/> |
|
9 |
<DATE_OF_CREATION value="2015-12-15T15:43:51.0Z"/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>Aggregate and incremental Index Metadata from DLI Link Provider Datasource</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_DESCRIPTION>Aggregate and incremental Index Metadata from DLI Link Provider Datasource |
|
14 |
</WORKFLOW_DESCRIPTION> |
|
15 |
<WORKFLOW_INFO> |
|
16 |
<FIELD name="Action">Aggregate Metadata</FIELD> |
|
17 |
<FIELD name="Consequence IS">Ingestion</FIELD> |
|
18 |
<FIELD name="Datasource class">links::provider</FIELD> |
|
19 |
<FIELD name="Content">metadata</FIELD> |
|
20 |
</WORKFLOW_INFO> |
|
21 |
<WORKFLOW_FAMILY>REPO_HI</WORKFLOW_FAMILY> |
|
22 |
<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY> |
|
23 |
<CONFIGURATION status="EXECUTABLE" start="MANUAL"> |
|
24 |
<PARAMETERS/> |
|
25 |
<WORKFLOW> |
|
26 |
<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true"> |
|
27 |
<DESCRIPTION>Verify if DS is pending</DESCRIPTION> |
|
28 |
<PARAMETERS> |
|
29 |
<PARAM name="expectedInterfaceTypologyPrefixes" value="links::provider"/> |
|
30 |
</PARAMETERS> |
|
31 |
<ARCS> |
|
32 |
<ARC to="registerWf"/> |
|
33 |
<ARC to="validateDs" name="validateDs"/> |
|
34 |
</ARCS> |
|
35 |
</NODE> |
|
36 |
|
|
37 |
<NODE name="validateDs" type="ValidateDatasource"> |
|
38 |
<DESCRIPTION>Validate DS</DESCRIPTION> |
|
39 |
<PARAMETERS/> |
|
40 |
<ARCS> |
|
41 |
<ARC to="registerWf"/> |
|
42 |
</ARCS> |
|
43 |
</NODE> |
|
44 |
|
|
45 |
<NODE name="registerWf" type="RegisterWorkflowFromTemplate"> |
|
46 |
<DESCRIPTION>Create Workflow</DESCRIPTION> |
|
47 |
<PARAMETERS> |
|
48 |
<PARAM name="wfName" value="Aggregate Metadata from DLI Link Provider Datasource[Ingestion]"/> |
|
49 |
<PARAM name="wfTemplate" |
|
50 |
value="/eu/dnetlib/dli/workflows/repo-hi/dli_aggregation_incremental_wf.xml.st"/> |
|
51 |
<PARAM name="description" value="Aggregate Metadata from DLI Link Provider [Ingestion]"/> |
|
52 |
|
|
53 |
</PARAMETERS> |
|
54 |
<ARCS> |
|
55 |
<ARC to="createDLINative"/> |
|
56 |
<ARC to="createScholixResolved"/> |
|
57 |
</ARCS> |
|
58 |
</NODE> |
|
59 |
|
|
60 |
<NODE name="createDLINative" type="CreateMDStore"> |
|
61 |
<DESCRIPTION>Create oai_dc store</DESCRIPTION> |
|
62 |
<PARAMETERS> |
|
63 |
<PARAM name="format" value="GMF"/> |
|
64 |
<PARAM name="interpretation" value="native"/> |
|
65 |
<PARAM name="layout" value="store"/> |
|
66 |
<PARAM name="outputPrefix" value="harv_"/> |
|
67 |
</PARAMETERS> |
|
68 |
<ARCS> |
|
69 |
<ARC to="updateParameters"/> |
|
70 |
</ARCS> |
|
71 |
</NODE> |
|
72 |
|
|
73 |
<NODE name="createScholixResolved" type="CreateMDStore"> |
|
74 |
<DESCRIPTION>Create oai_dc store</DESCRIPTION> |
|
75 |
<PARAMETERS> |
|
76 |
<PARAM name="format" value="Scholix"/> |
|
77 |
<PARAM name="interpretation" value="cleaned"/> |
|
78 |
<PARAM name="layout" value="store"/> |
|
79 |
<PARAM name="outputPrefix" value="incremental_"/> |
|
80 |
</PARAMETERS> |
|
81 |
<ARCS> |
|
82 |
<ARC to="updateParameters"/> |
|
83 |
</ARCS> |
|
84 |
</NODE> |
|
85 |
<NODE name="updateParameters" type="UpdateWfParameters" isJoin="true"> |
|
86 |
<DESCRIPTION>Update Workflow Parameters</DESCRIPTION> |
|
87 |
<PARAMETERS> |
|
88 |
<PARAM name="envParams"> |
|
89 |
<MAP> |
|
90 |
<ENTRY key="collMdstoreId" value="harv_id"/> |
|
91 |
<ENTRY key="incrementalMdstoreId" value="incremental_id"/> |
|
92 |
</MAP> |
|
93 |
</PARAM> |
|
94 |
</PARAMETERS> |
|
95 |
<ARCS> |
|
96 |
<ARC to="success"/> |
|
97 |
</ARCS> |
|
98 |
</NODE> |
|
99 |
</WORKFLOW> |
|
100 |
</CONFIGURATION> |
|
101 |
<NOTIFICATIONS/> |
|
102 |
<SCHEDULING enabled="false"> |
|
103 |
<CRON>9 9 9 ? * *</CRON> |
|
104 |
<MININTERVAL>10080</MININTERVAL> |
|
105 |
</SCHEDULING> |
|
106 |
<STATUS/> |
|
107 |
</BODY> |
|
108 |
</RESOURCE_PROFILE> |
modules/dnet-core-services/trunk/src/main/java/eu/dnetlib/enabling/resultset/client/HttpResultSetClientIterator.java | ||
---|---|---|
35 | 35 |
@Override |
36 | 36 |
protected List<T> nextPage() throws ResultSetException { |
37 | 37 |
final HttpGet method = new HttpGet(getBaseUrl() + "/" + getId() + "/next/" + PAGE_SIZE); |
38 |
method.addHeader("Accept", "application/json"); |
|
38 | 39 |
|
39 | 40 |
try(CloseableHttpResponse httpResponse = client.execute(method)) { |
40 | 41 |
|
modules/dnet-core-components/trunk/src/main/java/eu/dnetlib/enabling/tools/DnetStreamSupport.java | ||
---|---|---|
11 | 11 |
*/ |
12 | 12 |
public class DnetStreamSupport { |
13 | 13 |
|
14 |
public static <T> Stream<T> generateStreamFromIterator(Iterator<T> iterator) {
|
|
15 |
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false);
|
|
14 |
public static <T> Stream<T> generateStreamFromIterator(Iterator<T> iterator) {
|
|
15 |
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false);
|
|
16 | 16 |
} |
17 | 17 |
|
18 |
public static <T> Stream<T> generateParallelStreamFromIterator(Iterator<T> iterator) { |
|
19 |
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), true); |
|
20 |
} |
|
21 |
|
|
18 | 22 |
} |
modules/dnet-core-components/trunk/src/main/java/eu/dnetlib/rmi/data/MDStoreService.java | ||
---|---|---|
17 | 17 |
* @return |
18 | 18 |
*/ |
19 | 19 |
@Override |
20 |
public String identify();
|
|
20 |
String identify();
|
|
21 | 21 |
|
22 | 22 |
/** |
23 | 23 |
* Returns ResultSet EPR for delivered mdstore records. |
... | ... | |
30 | 30 |
* @return ResultSet EPR |
31 | 31 |
* @throws MDStoreServiceException |
32 | 32 |
*/ |
33 |
public ResultSet<String> deliverMDRecords(@WebParam(name = "mdId") final String mdId,
|
|
34 |
@WebParam(name = "from") final String from,
|
|
35 |
@WebParam(name = "until") final String until,
|
|
36 |
@WebParam(name = "recordsFilter") final String recordFilter) throws MDStoreServiceException;
|
|
33 |
ResultSet<String> deliverMDRecords(@WebParam(name = "mdId") final String mdId,
|
|
34 |
@WebParam(name = "from") final String from,
|
|
35 |
@WebParam(name = "until") final String until,
|
|
36 |
@WebParam(name = "recordsFilter") final String recordFilter) throws MDStoreServiceException;
|
|
37 | 37 |
|
38 | 38 |
/** |
39 | 39 |
* Deliver single record from selected mdstore. |
... | ... | |
43 | 43 |
* @return record |
44 | 44 |
* @throws MDStoreServiceException |
45 | 45 |
*/ |
46 |
public String deliverRecord(@WebParam(name = "mdId") final String mdId, @WebParam(name = "recordId") final String recordId) throws MDStoreServiceException, |
|
47 |
DocumentNotFoundException; |
|
46 |
String deliverRecord(@WebParam(name = "mdId") final String mdId, @WebParam(name = "recordId") final String recordId) throws MDStoreServiceException; |
|
48 | 47 |
|
49 | 48 |
/** |
50 | 49 |
* Returns list of all stored indices. |
51 | 50 |
* |
52 | 51 |
* @return list of all stored indices |
53 | 52 |
*/ |
54 |
public List<String> getListOfMDStores() throws MDStoreServiceException;
|
|
53 |
List<String> getListOfMDStores() throws MDStoreServiceException;
|
|
55 | 54 |
|
56 |
public List<String> listMDStores(@WebParam(name = "format") final String format,
|
|
57 |
@WebParam(name = "layout") final String layout,
|
|
55 |
List<String> listMDStores(@WebParam(name = "format") final String format,
|
|
56 |
@WebParam(name = "layout") final String layout,
|
|
58 | 57 |
@WebParam(name = "interpretation") final String interpretation) throws MDStoreServiceException; |
59 | 58 |
|
60 |
public ResultSet<String> bulkDeliverMDRecords(@WebParam(name = "format") final String format,
|
|
61 |
@WebParam(name = "layout") final String layout,
|
|
62 |
@WebParam(name = "interpretation") final String interpretation) throws MDStoreServiceException;
|
|
59 |
ResultSet<String> bulkDeliverMDRecords(@WebParam(name = "format") final String format,
|
|
60 |
@WebParam(name = "layout") final String layout,
|
|
61 |
@WebParam(name = "interpretation") final String interpretation) throws MDStoreServiceException;
|
|
63 | 62 |
|
64 | 63 |
/** |
65 | 64 |
* Store md records from a result set |
... | ... | |
71 | 70 |
* @throws MDStoreServiceException |
72 | 71 |
*/ |
73 | 72 |
@Deprecated |
74 |
public boolean storeMDRecordsFromRS(@WebParam(name = "mdId") final String mdId,
|
|
75 |
@WebParam(name = "rsId") final String rsId,
|
|
76 |
@WebParam(name = "storingType") final String storingType) throws MDStoreServiceException;
|
|
73 |
boolean storeMDRecordsFromRS(@WebParam(name = "mdId") final String mdId,
|
|
74 |
@WebParam(name = "rsId") final String rsId,
|
|
75 |
@WebParam(name = "storingType") final String storingType) throws MDStoreServiceException;
|
|
77 | 76 |
|
78 | 77 |
/** |
Also available in: Unified diff
Upgraded incremental harvesting and other stuff