Revision 43902
Added by Sandro La Bruzzo over 7 years ago
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/parser/InputRecord.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
3 |
xmlns="http://namespace.openaire.eu/"> |
|
4 |
<oai:header> |
|
5 |
<dri:objIdentifier>dli::r3d100010134::00002f60593fd1f758fb838fafb46795</dri:objIdentifier> |
|
6 |
<dri:recordIdentifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</dri:recordIdentifier> |
|
7 |
<dri:dateOfCollection>2016-09-12T16:55:22.27+02:00</dri:dateOfCollection> |
|
8 |
<dri:repositoryId/> |
|
9 |
<dri:datasourceprefix>dli::r3d100010134</dri:datasourceprefix> |
|
10 |
<dri:datasourceID/> |
|
11 |
<identifier xmlns="http://www.openarchives.org/OAI/2.0/" |
|
12 |
>oai:pangaea.de:doi:10.1594/PANGAEA.432865 |
|
13 |
</identifier> |
|
14 |
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2016-06-25T12:35:00Z</datestamp> |
|
15 |
</oai:header> |
|
16 |
<metadata xmlns="http://www.openarchives.org/OAI/2.0/"> |
|
17 |
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
18 |
xmlns="http://datacite.org/schema/kernel-3" |
|
19 |
xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"> |
|
20 |
<identifier identifierType="DOI">10.1594/PANGAEA.432865</identifier> |
|
21 |
<creators> |
|
22 |
<creator> |
|
23 |
<creatorName>WOCE Sea Level, WSL</creatorName> |
|
24 |
<creatorName>WOCE Sea Level, WSL</creatorName> |
|
25 |
</creator> |
|
26 |
</creators> |
|
27 |
<titles> |
|
28 |
<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research |
|
29 |
quality database) |
|
30 |
</title> |
|
31 |
<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research |
|
32 |
quality database) |
|
33 |
</title> |
|
34 |
</titles> |
|
35 |
<publisher>PANGAEA - Data Publisher for Earth & Environmental Science</publisher> |
|
36 |
<publicationYear>2006</publicationYear> |
|
37 |
<subjects> |
|
38 |
<subject subjectScheme="Parameter">DATE/TIME</subject> |
|
39 |
<subject subjectScheme="Parameter">Sea level</subject> |
|
40 |
<subject subjectScheme="Campaign">SeaLevel</subject> |
|
41 |
<subject subjectScheme="Project">World Ocean Circulation Experiment (WOCE)</subject> |
|
42 |
</subjects> |
|
43 |
<contributors> |
|
44 |
<contributor contributorType="HostingInstitution"> |
|
45 |
<contributorName>Sea Level Center, University of Hawaii</contributorName> |
|
46 |
</contributor> |
|
47 |
</contributors> |
|
48 |
<dates> |
|
49 |
<date dateType="Collected">1978-01-01T12:00:00/1978-12-31T12:00:00</date> |
|
50 |
</dates> |
|
51 |
<language>eng</language> |
|
52 |
<resourceType resourceTypeGeneral="Dataset">Dataset</resourceType> |
|
53 |
<relatedIdentifiers> |
|
54 |
<relatedIdentifier relatedIdentifierType="URL" relationType="IsDocumentedBy" |
|
55 |
>http://store.pangaea.de/Projects/WOCE/SeaLevel_rqds/Woods_Hole.txt |
|
56 |
</relatedIdentifier> |
|
57 |
</relatedIdentifiers> |
|
58 |
<sizes> |
|
59 |
<size>365 data points</size> |
|
60 |
</sizes> |
|
61 |
<formats> |
|
62 |
<format>text/tab-separated-values</format> |
|
63 |
</formats> |
|
64 |
<rightsList> |
|
65 |
<rights rightsURI="http://creativecommons.org/licenses/by/3.0/">Creative Commons |
|
66 |
Attribution 3.0 Unported (CC-BY) |
|
67 |
</rights> |
|
68 |
</rightsList> |
|
69 |
<geoLocations> |
|
70 |
<geoLocation> |
|
71 |
<geoLocationPoint>41.5233 -70.6717</geoLocationPoint> |
|
72 |
</geoLocation> |
|
73 |
</geoLocations> |
|
74 |
</resource> |
|
75 |
</metadata> |
|
76 |
</oai:record> |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/ProteinResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.resolver; |
|
2 |
|
|
3 |
import java.util.Arrays; |
|
4 |
|
|
5 |
import eu.dnetlib.resolver.model.CompletionStatus; |
|
6 |
import eu.dnetlib.resolver.model.ObjectProvenance; |
|
7 |
import eu.dnetlib.resolver.model.ObjectProvisionMode; |
|
8 |
import eu.dnetlib.resolver.model.ResolvedObject; |
|
9 |
import org.springframework.beans.factory.annotation.Autowired; |
|
10 |
|
|
11 |
/** |
|
12 |
* Created by sandro on 9/26/16. |
|
13 |
*/ |
|
14 |
public class ProteinResolver extends AbstractPIDResolver { |
|
15 |
|
|
16 |
private static String baseUrl = "http://www.uniprot.org/uniprot/%s.xml"; |
|
17 |
|
|
18 |
@Autowired |
|
19 |
private ProteinParser proteinParser; |
|
20 |
|
|
21 |
@Override |
|
22 |
protected boolean canResolvePid(final String pidType) { |
|
23 |
return pidType != null && (pidType.toLowerCase().equals("protein") || pidType.toLowerCase().equals("ncbi-n")); |
|
24 |
} |
|
25 |
|
|
26 |
@Override |
|
27 |
protected ResolvedObject resolve(final String pid, final String pidType) { |
|
28 |
|
|
29 |
if ((pid == null) || !pid.toLowerCase().startsWith("p")) |
|
30 |
return null; |
|
31 |
final ResolvedObject record = proteinParser.parseRecord(requestURL(String.format(baseUrl, pid))); |
|
32 |
if (record != null) { |
|
33 |
record.setPid(pid); |
|
34 |
record.setPidType(pidType); |
|
35 |
record.setCompletionStatus(CompletionStatus.complete.toString()); |
|
36 |
ObjectProvenance provenance = new ObjectProvenance(); |
|
37 |
provenance.setCompletionStatus(CompletionStatus.complete.toString()); |
|
38 |
provenance.setDatasourceId("dli::r3d100010357"); |
|
39 |
provenance.setDatasource("The Universal Protein Resource"); |
|
40 |
provenance.setProvisionMode(ObjectProvisionMode.resolved.toString()); |
|
41 |
record.setDatasourceProvenance(Arrays.asList(provenance)); |
|
42 |
} |
|
43 |
return record; |
|
44 |
} |
|
45 |
|
|
46 |
public ProteinParser getProteinParser() { |
|
47 |
return proteinParser; |
|
48 |
} |
|
49 |
|
|
50 |
public void setProteinParser(final ProteinParser proteinParser) { |
|
51 |
this.proteinParser = proteinParser; |
|
52 |
} |
|
53 |
} |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/ProteinParser.java | ||
---|---|---|
1 |
package eu.dnetlib.resolver; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import com.ximpleware.AutoPilot; |
|
6 |
import com.ximpleware.VTDGen; |
|
7 |
import com.ximpleware.VTDNav; |
|
8 |
import eu.dnetlib.resolver.model.ObjectType; |
|
9 |
import eu.dnetlib.resolver.model.ResolvedObject; |
|
10 |
import eu.dnetlib.resolver.parser.UtilityParser; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
13 |
|
|
14 |
/** |
|
15 |
* Created by sandro on 9/26/16. |
|
16 |
*/ |
|
17 |
public class ProteinParser { |
|
18 |
|
|
19 |
private static final Log log = LogFactory.getLog(ProteinParser.class); |
|
20 |
|
|
21 |
public ResolvedObject parseRecord(final String record) { |
|
22 |
try { |
|
23 |
final ResolvedObject parsedObject = new ResolvedObject(); |
|
24 |
final VTDGen vg = new VTDGen(); |
|
25 |
vg.setDoc(record.getBytes()); |
|
26 |
vg.parse(true); |
|
27 |
final VTDNav vn = vg.getNav(); |
|
28 |
final AutoPilot ap = new AutoPilot(vn); |
|
29 |
|
|
30 |
final List<String> titles = |
|
31 |
UtilityParser.getTextValue(ap, vn, "//*[local-name()='protein']/*[local-name()='recommendedName']/*[local-name()='fullName']"); |
|
32 |
|
|
33 |
parsedObject.setTitles(titles); |
|
34 |
|
|
35 |
titles.forEach(it -> System.out.println("it = " + it)); |
|
36 |
|
|
37 |
final List<String> descriptions = |
|
38 |
UtilityParser.getTextValue(ap, vn, "//*[local-name()='comment' and ./@type='function']/*[local-name()='text']"); |
|
39 |
|
|
40 |
if (descriptions != null && descriptions.size() > 0) { |
|
41 |
parsedObject.setDescription(descriptions.get(0)); |
|
42 |
} |
|
43 |
|
|
44 |
parsedObject.setType(ObjectType.dataset); |
|
45 |
return parsedObject; |
|
46 |
} catch (Throwable e) { |
|
47 |
log.error(String.format("Error on parsing document %s", record), e); |
|
48 |
return null; |
|
49 |
} |
|
50 |
} |
|
51 |
|
|
52 |
} |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/repo_by_link_proovider.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="8bf9879c-535a-4818-8de7-790a3eb90675_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowTemplateDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<PARAMETERS> |
|
13 |
<PARAM name="dsId" description="Datasource Id" required="true" type="string"/> |
|
14 |
<PARAM name="interface" description="Datasource Interface" required="true" type="string"/> |
|
15 |
<PARAM name="collMdstoreId" description="Store for collected records" required="true" type="string"/> |
|
16 |
<PARAM name="cleanMdstoreId" description="Store for cleaned records" required="true" type="string"/> |
|
17 |
<PARAM name="patchMdstoreId" description="Store for patched records" required="true" type="string"/> |
|
18 |
<PARAM name="nativeObjectStoreId" description="Object Store for native images" required="true" type="string"/> |
|
19 |
<PARAM name="indexId" description="Index Identifier" required="true" type="string"/> |
|
20 |
</PARAMETERS> |
|
21 |
<WORKFLOW> |
|
22 |
<NODE name="deleteCollMdStore" type="DeleteMDStore" isStart="true"> |
|
23 |
<DESCRIPTION>Delete the mdstore of collected records</DESCRIPTION> |
|
24 |
<PARAMETERS> |
|
25 |
<PARAM name="mdstoreId" ref="collMdstoreId"/> |
|
26 |
</PARAMETERS> |
|
27 |
<ARCS> |
|
28 |
<ARC to="deleteCleanMdStore"/> |
|
29 |
</ARCS> |
|
30 |
</NODE> |
|
31 |
<NODE name="deleteCleanMdStore" type="DeleteMDStore"> |
|
32 |
<DESCRIPTION>Delete the mdstore of cleaned records</DESCRIPTION> |
|
33 |
<PARAMETERS> |
|
34 |
<PARAM name="mdstoreId" ref="cleanMdstoreId"/> |
|
35 |
</PARAMETERS> |
|
36 |
<ARCS> |
|
37 |
<ARC to="removeApiExtraFields"/> |
|
38 |
</ARCS> |
|
39 |
</NODE> |
|
40 |
|
|
41 |
<NODE name="removeApiExtraFields" type="RemoveApiExtraFields"> |
|
42 |
<DESCRIPTION>Reset the extrafields of the api</DESCRIPTION> |
|
43 |
<PARAMETERS> |
|
44 |
<PARAM name="datasourceId" ref="dsId"/> |
|
45 |
<PARAM name="datasourceInterface" ref="interface"/> |
|
46 |
<PARAM name="fields"> |
|
47 |
<LIST> |
|
48 |
<ITEM value="last_collection_total"/> |
|
49 |
<ITEM value="last_collection_date"/> |
|
50 |
<ITEM value="last_collection_mdId"/> |
|
51 |
<ITEM value="last_aggregation_total"/> |
|
52 |
<ITEM value="last_aggregation_date"/> |
|
53 |
<ITEM value="last_aggregation_mdId"/> |
|
54 |
</LIST> |
|
55 |
</PARAM> |
|
56 |
</PARAMETERS> |
|
57 |
<ARCS> |
|
58 |
<ARC to="success"/> |
|
59 |
</ARCS> |
|
60 |
</NODE> |
|
61 |
</WORKFLOW> |
|
62 |
</CONFIGURATION> |
|
63 |
</BODY> |
|
64 |
</RESOURCE_PROFILE> |
webapps/dnet-dli-container/trunk/src/main/resources/log4j.properties | ||
---|---|---|
34 | 34 |
log4j.logger.eu.dnetlib.enabling.is.sn.ISSNServiceCore=WARN |
35 | 35 |
log4j.logger.eu.dnetlib.xml.database.exist.ExistDatabase=WARN |
36 | 36 |
log4j.logger.eu.dnetlib.enabling.is.store.AbstractContentInitializer=FATAL |
37 |
log4j.logger.eu.dnetlib.data.collector.plugins.oai.engine.HttpConnector=FATAL |
|
37 | 38 |
|
38 | 39 |
log4j.logger.org.apache.hadoop.hbase.mapreduce.TableInputFormatBase=FATAL |
39 | 40 |
|
modules/dnet-dli/trunk/src/test/java/eu/dnetlib/resolver/DLIParserTest.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.IOException; |
4 | 4 |
import java.io.InputStream; |
5 |
import java.nio.file.FileSystems; |
|
6 |
import java.nio.file.Files; |
|
7 |
import java.nio.file.Path; |
|
5 | 8 |
import java.util.Arrays; |
9 |
import java.util.List; |
|
10 |
import java.util.stream.Collectors; |
|
6 | 11 |
|
12 |
import eu.dnetlib.resolver.model.CompletionStatus; |
|
7 | 13 |
import eu.dnetlib.resolver.model.ResolvedObject; |
8 | 14 |
import eu.dnetlib.resolver.model.serializer.DMFSerializer; |
9 | 15 |
import eu.dnetlib.resolver.parser.DMFDom4jResolverParser; |
10 | 16 |
import eu.dnetlib.resolver.parser.DMFResolverParser; |
17 |
import eu.dnetlib.resolver.parser.ScholixResolverParser; |
|
11 | 18 |
import org.antlr.stringtemplate.StringTemplate; |
12 | 19 |
import org.apache.commons.io.IOUtils; |
20 |
import org.apache.commons.lang3.tuple.Pair; |
|
13 | 21 |
import org.apache.commons.logging.Log; |
14 | 22 |
import org.apache.commons.logging.LogFactory; |
23 |
import org.apache.http.HttpEntity; |
|
24 |
import org.apache.http.client.methods.HttpGet; |
|
25 |
import org.apache.http.impl.client.CloseableHttpClient; |
|
26 |
import org.apache.http.impl.client.HttpClients; |
|
27 |
import org.apache.http.util.EntityUtils; |
|
15 | 28 |
import org.junit.Assert; |
16 | 29 |
import org.junit.Test; |
17 | 30 |
|
... | ... | |
24 | 37 |
private static final Log log = LogFactory.getLog(DLIParserTest.class); |
25 | 38 |
|
26 | 39 |
@Test |
27 |
public void testParser() throws IOException { |
|
40 |
public void testDMFParser() throws IOException {
|
|
28 | 41 |
final InputStream resourceAsStream = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputRecord.xml"); |
29 | 42 |
|
30 | 43 |
String str = IOUtils.toString(resourceAsStream); |
... | ... | |
47 | 60 |
} |
48 | 61 |
|
49 | 62 |
@Test |
63 |
public void testScholixParser() throws IOException { |
|
64 |
final InputStream resourceAsStream = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputRecordScholix.xml"); |
|
65 |
|
|
66 |
String str = IOUtils.toString(resourceAsStream); |
|
67 |
ScholixResolverParser parser = new ScholixResolverParser(); |
|
68 |
ResolvedObject object = parser.parseObject(str); |
|
69 |
|
|
70 |
System.out.println("object = " + object); |
|
71 |
|
|
72 |
Assert.assertNotNull(object); |
|
73 |
System.out.println("object.getCompletionStatus() = " + object.getCompletionStatus()); |
|
74 |
|
|
75 |
Assert.assertTrue(object.getCompletionStatus().equals(CompletionStatus.incomplete.toString())); |
|
76 |
|
|
77 |
Assert.assertNotNull(object.getRelations()); |
|
78 |
|
|
79 |
Assert.assertTrue(object.getRelations().size() == 1); |
|
80 |
|
|
81 |
System.out.println("object.getRelations().get(0) = " + object.getRelations().get(0)); |
|
82 |
|
|
83 |
} |
|
84 |
|
|
85 |
@Test |
|
86 |
public void testProteinParser() throws IOException { |
|
87 |
final NCBINResolver resolver = new NCBINResolver(); |
|
88 |
resolver.setNCBINParser(new NCBINParser()); |
|
89 |
final ResolvedObject p02768 = resolver.resolve("U36380", "protein"); |
|
90 |
|
|
91 |
System.out.println(p02768.getDescription()); |
|
92 |
|
|
93 |
System.out.println("p02768 = " + p02768); |
|
94 |
|
|
95 |
} |
|
96 |
|
|
97 |
@Test |
|
98 |
public void testNCBIn() throws Exception { |
|
99 |
|
|
100 |
Path path = FileSystems.getDefault().getPath("/var/lib/dli/elsevier.csv"); |
|
101 |
|
|
102 |
//Files.lines(path).forEach(it -> System.out.println("it = " + it)); |
|
103 |
|
|
104 |
List<Pair<String, String>> elem = |
|
105 |
Files.lines(path).map(it -> it.split("@")).filter(it -> (it != null && it.length == 5)).map(it -> Pair.of(it[3], it[4])) |
|
106 |
.collect(Collectors.toList()); |
|
107 |
|
|
108 |
final NCBINResolver resolver = new NCBINResolver(); |
|
109 |
resolver.setNCBINParser(new NCBINParser()); |
|
110 |
|
|
111 |
elem.forEach(it -> { |
|
112 |
if (it.getLeft().equals("ncbi-n")) { |
|
113 |
final ResolvedObject obj = resolver.resolve(it.getRight(), "ncbi-n"); |
|
114 |
System.out.println("p02768 = " + obj); |
|
115 |
|
|
116 |
} |
|
117 |
}); |
|
118 |
|
|
119 |
} |
|
120 |
|
|
121 |
private String requestUrl(final String url) { |
|
122 |
final CloseableHttpClient httpclient = HttpClients.createDefault(); |
|
123 |
try { |
|
124 |
HttpGet httpGet = new HttpGet(url); |
|
125 |
final String out = httpclient.execute(httpGet, response -> { |
|
126 |
int status = response.getStatusLine().getStatusCode(); |
|
127 |
if (status >= 200 && status < 300) { |
|
128 |
HttpEntity entity = response.getEntity(); |
|
129 |
return entity != null ? EntityUtils.toString(entity) : null; |
|
130 |
} else { |
|
131 |
return null; |
|
132 |
} |
|
133 |
}); |
|
134 |
return out; |
|
135 |
} catch (Throwable e) { |
|
136 |
log.error(e); |
|
137 |
return null; |
|
138 |
} finally { |
|
139 |
try { |
|
140 |
httpclient.close(); |
|
141 |
} catch (IOException e) { |
|
142 |
log.error("Error on closing httpclient", e); |
|
143 |
} |
|
144 |
} |
|
145 |
} |
|
146 |
|
|
147 |
|
|
148 |
|
|
149 |
|
|
150 |
|
|
151 |
@Test |
|
50 | 152 |
public void testDom4jParser() throws Exception { |
51 | 153 |
|
52 | 154 |
final InputStream resourceAsStream = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputRecord.xml"); |
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/parser/InputRecordDMF.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
3 |
xmlns="http://namespace.openaire.eu/"> |
|
4 |
<oai:header> |
|
5 |
<dri:objIdentifier>dli::r3d100010134::00002f60593fd1f758fb838fafb46795</dri:objIdentifier> |
|
6 |
<dri:recordIdentifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</dri:recordIdentifier> |
|
7 |
<dri:dateOfCollection>2016-09-12T16:55:22.27+02:00</dri:dateOfCollection> |
|
8 |
<dri:repositoryId/> |
|
9 |
<dri:datasourceprefix>dli::r3d100010134</dri:datasourceprefix> |
|
10 |
<dri:datasourceID/> |
|
11 |
<identifier xmlns="http://www.openarchives.org/OAI/2.0/" |
|
12 |
>oai:pangaea.de:doi:10.1594/PANGAEA.432865 |
|
13 |
</identifier> |
|
14 |
<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2016-06-25T12:35:00Z</datestamp> |
|
15 |
</oai:header> |
|
16 |
<metadata xmlns="http://www.openarchives.org/OAI/2.0/"> |
|
17 |
<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
18 |
xmlns="http://datacite.org/schema/kernel-3" |
|
19 |
xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"> |
|
20 |
<identifier identifierType="DOI">10.1594/PANGAEA.432865</identifier> |
|
21 |
<creators> |
|
22 |
<creator> |
|
23 |
<creatorName>WOCE Sea Level, WSL</creatorName> |
|
24 |
<creatorName>WOCE Sea Level, WSL</creatorName> |
|
25 |
</creator> |
|
26 |
</creators> |
|
27 |
<titles> |
|
28 |
<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research |
|
29 |
quality database) |
|
30 |
</title> |
|
31 |
<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research |
|
32 |
quality database) |
|
33 |
</title> |
|
34 |
</titles> |
|
35 |
<publisher>PANGAEA - Data Publisher for Earth & Environmental Science</publisher> |
|
36 |
<publicationYear>2006</publicationYear> |
|
37 |
<subjects> |
|
38 |
<subject subjectScheme="Parameter">DATE/TIME</subject> |
|
39 |
<subject subjectScheme="Parameter">Sea level</subject> |
|
40 |
<subject subjectScheme="Campaign">SeaLevel</subject> |
|
41 |
<subject subjectScheme="Project">World Ocean Circulation Experiment (WOCE)</subject> |
|
42 |
</subjects> |
|
43 |
<contributors> |
|
44 |
<contributor contributorType="HostingInstitution"> |
|
45 |
<contributorName>Sea Level Center, University of Hawaii</contributorName> |
|
46 |
</contributor> |
|
47 |
</contributors> |
|
48 |
<dates> |
|
49 |
<date dateType="Collected">1978-01-01T12:00:00/1978-12-31T12:00:00</date> |
|
50 |
</dates> |
|
51 |
<language>eng</language> |
|
52 |
<resourceType resourceTypeGeneral="Dataset">Dataset</resourceType> |
|
53 |
<relatedIdentifiers> |
|
54 |
<relatedIdentifier relatedIdentifierType="URL" relationType="IsDocumentedBy" |
|
55 |
>http://store.pangaea.de/Projects/WOCE/SeaLevel_rqds/Woods_Hole.txt |
|
56 |
</relatedIdentifier> |
|
57 |
</relatedIdentifiers> |
|
58 |
<sizes> |
|
59 |
<size>365 data points</size> |
|
60 |
</sizes> |
|
61 |
<formats> |
|
62 |
<format>text/tab-separated-values</format> |
|
63 |
</formats> |
|
64 |
<rightsList> |
|
65 |
<rights rightsURI="http://creativecommons.org/licenses/by/3.0/">Creative Commons |
|
66 |
Attribution 3.0 Unported (CC-BY) |
|
67 |
</rights> |
|
68 |
</rightsList> |
|
69 |
<geoLocations> |
|
70 |
<geoLocation> |
|
71 |
<geoLocationPoint>41.5233 -70.6717</geoLocationPoint> |
|
72 |
</geoLocation> |
|
73 |
</geoLocations> |
|
74 |
</resource> |
|
75 |
</metadata> |
|
76 |
</oai:record> |
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/parser/InputRecordScholix.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<record xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
3 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri"> |
|
4 |
<oai:header xmlns:oai="http://www.openarchives.org/OAI/2.0/" |
|
5 |
> |
|
6 |
<dri:objIdentifier>dli::elsevier::0000d9de7b24b7cb315852926b88e473</dri:objIdentifier> |
|
7 |
<dri:recordIdentifier>10.1016/j.jmgm.2014.05.002::3CQZ</dri:recordIdentifier> |
|
8 |
<dri:dateOfCollection>2016-09-29T16:23:34.316+02:00</dri:dateOfCollection> |
|
9 |
<dri:repositoryId>2d1245d4-c169-4247-9106-0f69f8d752eb_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId> |
|
10 |
<dri:datasourceprefix>dli::elsevier</dri:datasourceprefix> |
|
11 |
</oai:header> |
|
12 |
<metadata> |
|
13 |
<scholix:link xmlns:scholix="http://www.scholix.org"> |
|
14 |
<assertion_info> |
|
15 |
<source>dli::elsevier</source> |
|
16 |
<relationType scheme="datacite">unknown</relationType> |
|
17 |
</assertion_info> |
|
18 |
<source> |
|
19 |
<pid type="DOI">10.1016/j.jmgm.2014.05.002</pid> |
|
20 |
<type>publication</type> |
|
21 |
</source> |
|
22 |
<target> |
|
23 |
<pid type="pdb">3CQZ</pid> |
|
24 |
<type>dataset</type> |
|
25 |
</target> |
|
26 |
</scholix:link> |
|
27 |
</metadata> |
|
28 |
<oaf:datainfo> |
|
29 |
<oaf:completionStatus>incomplete</oaf:completionStatus> |
|
30 |
<oaf:provisionMode>collected</oaf:provisionMode> |
|
31 |
</oaf:datainfo> |
|
32 |
</record> |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/NCBINParser.java | ||
---|---|---|
1 |
package eu.dnetlib.resolver; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import com.ximpleware.AutoPilot; |
|
6 |
import com.ximpleware.VTDGen; |
|
7 |
import com.ximpleware.VTDNav; |
|
8 |
import eu.dnetlib.resolver.model.ObjectType; |
|
9 |
import eu.dnetlib.resolver.model.ResolvedObject; |
|
10 |
import eu.dnetlib.resolver.parser.UtilityParser; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
13 |
|
|
14 |
/** |
|
15 |
* Created by sandro on 9/26/16. |
|
16 |
*/ |
|
17 |
public class NCBINParser { |
|
18 |
|
|
19 |
private static final Log log = LogFactory.getLog(NCBINParser.class); |
|
20 |
|
|
21 |
public ResolvedObject parseRecord(final String record) { |
|
22 |
try { |
|
23 |
final ResolvedObject parsedObject = new ResolvedObject(); |
|
24 |
final VTDGen vg = new VTDGen(); |
|
25 |
vg.setDoc(record.getBytes()); |
|
26 |
vg.parse(true); |
|
27 |
final VTDNav vn = vg.getNav(); |
|
28 |
final AutoPilot ap = new AutoPilot(vn); |
|
29 |
|
|
30 |
final List<String> titles = |
|
31 |
UtilityParser.getTextValue(ap, vn, "//GBSeq_definition"); |
|
32 |
|
|
33 |
parsedObject.setTitles(titles); |
|
34 |
|
|
35 |
titles.forEach(it -> System.out.println("it = " + it)); |
|
36 |
|
|
37 |
final List<String> descriptions = |
|
38 |
UtilityParser.getTextValue(ap, vn, "//GBSeq_comment"); |
|
39 |
|
|
40 |
if (descriptions != null && descriptions.size() > 0) { |
|
41 |
parsedObject.setDescription(descriptions.get(0)); |
|
42 |
} |
|
43 |
|
|
44 |
parsedObject.setType(ObjectType.dataset); |
|
45 |
return parsedObject; |
|
46 |
} catch (Throwable e) { |
|
47 |
log.error(String.format("Error on parsing document %s", record), e); |
|
48 |
return null; |
|
49 |
} |
|
50 |
} |
|
51 |
|
|
52 |
} |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/NCBINResolver.java | ||
---|---|---|
1 |
package eu.dnetlib.resolver; |
|
2 |
|
|
3 |
import java.util.Arrays; |
|
4 |
|
|
5 |
import eu.dnetlib.resolver.model.CompletionStatus; |
|
6 |
import eu.dnetlib.resolver.model.ObjectProvenance; |
|
7 |
import eu.dnetlib.resolver.model.ObjectProvisionMode; |
|
8 |
import eu.dnetlib.resolver.model.ResolvedObject; |
|
9 |
import org.springframework.beans.factory.annotation.Autowired; |
|
10 |
|
|
11 |
/** |
|
12 |
* Created by sandro on 9/26/16. |
|
13 |
*/ |
|
14 |
public class NCBINResolver extends AbstractPIDResolver { |
|
15 |
|
|
16 |
private static String baseUrl = |
|
17 |
"https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&val=%s&page_size=5&fmt_mask=0&report=gbx&retmode=text&page=1&page_size=1"; |
|
18 |
|
|
19 |
@Autowired |
|
20 |
private NCBINParser NCBINParser; |
|
21 |
|
|
22 |
@Override |
|
23 |
protected boolean canResolvePid(final String pidType) { |
|
24 |
return pidType != null && (pidType.toLowerCase().equals("protein") || pidType.toLowerCase().equals("ncbi-n")); |
|
25 |
} |
|
26 |
|
|
27 |
@Override |
|
28 |
protected ResolvedObject resolve(final String pid, final String pidType) { |
|
29 |
|
|
30 |
if ((pid == null)) |
|
31 |
return null; |
|
32 |
final ResolvedObject record = NCBINParser.parseRecord(requestURL(String.format(baseUrl, pid))); |
|
33 |
if (record != null) { |
|
34 |
record.setPid(pid); |
|
35 |
record.setPidType(pidType); |
|
36 |
record.setCompletionStatus(CompletionStatus.complete.toString()); |
|
37 |
ObjectProvenance provenance = new ObjectProvenance(); |
|
38 |
provenance.setCompletionStatus(CompletionStatus.complete.toString()); |
|
39 |
provenance.setDatasourceId("dli::r3d100010778"); |
|
40 |
provenance.setDatasource("NCBI Nucleotide"); |
|
41 |
provenance.setProvisionMode(ObjectProvisionMode.resolved.toString()); |
|
42 |
record.setDatasourceProvenance(Arrays.asList(provenance)); |
|
43 |
} |
|
44 |
return record; |
|
45 |
} |
|
46 |
|
|
47 |
public NCBINParser getNCBINParser() { |
|
48 |
return NCBINParser; |
|
49 |
} |
|
50 |
|
|
51 |
public void setNCBINParser(final NCBINParser NCBINParser) { |
|
52 |
this.NCBINParser = NCBINParser; |
|
53 |
} |
|
54 |
} |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/parser/UtilityParser.java | ||
---|---|---|
1 |
package eu.dnetlib.resolver.parser; |
|
2 |
|
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
|
|
8 |
import com.ximpleware.AutoPilot; |
|
9 |
import com.ximpleware.VTDNav; |
|
10 |
|
|
11 |
/** |
|
12 |
* Created by sandro on 9/29/16. |
|
13 |
*/ |
|
14 |
public class UtilityParser { |
|
15 |
|
|
16 |
public static List<Node> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes) |
|
17 |
throws Exception { |
|
18 |
List<Node> results = new ArrayList<>(); |
|
19 |
ap.selectXPath(xpath); |
|
20 |
while (ap.evalXPath() != -1) { |
|
21 |
final Node currentNode = new Node(); |
|
22 |
final Map<String, String> currentAttributes = new HashMap<>(); |
|
23 |
int t = vn.getText(); |
|
24 |
currentNode.setTextValue(vn.toNormalizedString(t)); |
|
25 |
|
|
26 |
attributes.forEach(attributeKey -> { |
|
27 |
try { |
|
28 |
int attr = vn.getAttrVal(attributeKey); |
|
29 |
currentAttributes.put(attributeKey, vn.toNormalizedString(attr)); |
|
30 |
} catch (Throwable e) { |
|
31 |
|
|
32 |
} |
|
33 |
}); |
|
34 |
currentNode.setAttributes(currentAttributes); |
|
35 |
results.add(currentNode); |
|
36 |
} |
|
37 |
return results; |
|
38 |
} |
|
39 |
|
|
40 |
public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws Exception { |
|
41 |
List<String> results = new ArrayList<>(); |
|
42 |
ap.selectXPath(xpath); |
|
43 |
while (ap.evalXPath() != -1) { |
|
44 |
|
|
45 |
int t = vn.getText(); |
|
46 |
results.add(vn.toNormalizedString(t)); |
|
47 |
} |
|
48 |
return results; |
|
49 |
} |
|
50 |
|
|
51 |
public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws Exception { |
|
52 |
ap.selectXPath(xpath); |
|
53 |
while (ap.evalXPath() != -1) { |
|
54 |
return nav.toNormalizedString(nav.getText()); |
|
55 |
} |
|
56 |
return null; |
|
57 |
} |
|
58 |
|
|
59 |
public static class Node { |
|
60 |
|
|
61 |
private String textValue; |
|
62 |
|
|
63 |
private Map<String, String> attributes; |
|
64 |
|
|
65 |
public String getTextValue() { |
|
66 |
return textValue; |
|
67 |
} |
|
68 |
|
|
69 |
public void setTextValue(final String textValue) { |
|
70 |
this.textValue = textValue; |
|
71 |
} |
|
72 |
|
|
73 |
public Map<String, String> getAttributes() { |
|
74 |
return attributes; |
|
75 |
} |
|
76 |
|
|
77 |
public void setAttributes(final Map<String, String> attributes) { |
|
78 |
this.attributes = attributes; |
|
79 |
} |
|
80 |
} |
|
81 |
|
|
82 |
} |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/parser/DMFResolverParser.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.resolver.parser; |
2 | 2 |
|
3 |
import java.util.*; |
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.Arrays; |
|
5 |
import java.util.List; |
|
4 | 6 |
|
5 | 7 |
import com.google.common.collect.Lists; |
6 | 8 |
import com.ximpleware.AutoPilot; |
7 | 9 |
import com.ximpleware.VTDGen; |
8 | 10 |
import com.ximpleware.VTDNav; |
9 | 11 |
import eu.dnetlib.resolver.model.*; |
12 |
import eu.dnetlib.resolver.parser.UtilityParser.Node; |
|
10 | 13 |
import org.apache.commons.logging.Log; |
11 | 14 |
import org.apache.commons.logging.LogFactory; |
12 | 15 |
|
... | ... | |
29 | 32 |
final AutoPilot ap = new AutoPilot(vn); |
30 | 33 |
ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri"); |
31 | 34 |
|
32 |
final String datasourcePrefix = getSingleValue(ap, vn, "//dri:datasourceprefix"); |
|
35 |
final String datasourcePrefix = UtilityParser.getSingleValue(ap, vn, "//dri:datasourceprefix");
|
|
33 | 36 |
ap.declareXPathNameSpace("oaf", "http://namespace.openaire.eu/oaf"); |
34 | 37 |
|
35 |
final String completionStatus = getSingleValue(ap, vn, "//oaf:completionStatus"); |
|
36 |
final String provisionMode = getSingleValue(ap, vn, "//oaf:provisionMode"); |
|
38 |
final String completionStatus = UtilityParser.getSingleValue(ap, vn, "//oaf:completionStatus");
|
|
39 |
final String provisionMode = UtilityParser.getSingleValue(ap, vn, "//oaf:provisionMode");
|
|
37 | 40 |
|
38 | 41 |
final ObjectProvenance provenance = new ObjectProvenance(); |
39 | 42 |
provenance.setDatasourceId(datasourcePrefix); |
... | ... | |
43 | 46 |
parsedObject.setDatasourceProvenance(Lists.newArrayList(provenance)); |
44 | 47 |
|
45 | 48 |
ap.declareXPathNameSpace("datacite", "http://datacite.org/schema/kernel-3"); |
46 |
final List<Map<String, String>> identifierType = getTextValuesWithAttributes(ap, vn, "//datacite:identifier", Lists.newArrayList("identifierType")); |
|
49 |
final List<Node> identifierType = |
|
50 |
UtilityParser.getTextValuesWithAttributes(ap, vn, "//datacite:identifier", Lists.newArrayList("identifierType")); |
|
47 | 51 |
|
48 | 52 |
if (identifierType != null && identifierType.size() > 0) { |
49 | 53 |
|
50 |
final Map<String, String> result = identifierType.get(0);
|
|
51 |
parsedObject.setPid(result.get("resultText"));
|
|
52 |
parsedObject.setPidType(result.get("identifierType")); |
|
54 |
final Node result = identifierType.get(0);
|
|
55 |
parsedObject.setPid(result.getTextValue());
|
|
56 |
parsedObject.setPidType(result.getAttributes().get("identifierType"));
|
|
53 | 57 |
} else { |
54 | 58 |
log.error("Error on parsing record the identifire should not null "); |
55 | 59 |
return null; |
56 | 60 |
} |
57 | 61 |
|
58 |
final List<Map<String, String>> relations =
|
|
59 |
getTextValuesWithAttributes(ap, vn, "//datacite:relatedIdentifier", Arrays.asList("relatedIdentifierType", "relationType")); |
|
62 |
final List<Node> relations =
|
|
63 |
UtilityParser.getTextValuesWithAttributes(ap, vn, "//datacite:relatedIdentifier", Arrays.asList("relatedIdentifierType", "relationType"));
|
|
60 | 64 |
|
61 | 65 |
if (relations != null && relations.size() > 0) { |
62 | 66 |
final List<ObjectRelation> relationsResult = new ArrayList<>(); |
63 | 67 |
relations.forEach(relationMap -> { |
64 |
final String relationType = relationMap.get("relationType"); |
|
65 |
final String relatedIdentifierType = relationMap.get("relatedIdentifierType"); |
|
66 |
final String relatedPid = relationMap.get("resultText");
|
|
68 |
final String relationType = relationMap.getAttributes().get("relationType");
|
|
69 |
final String relatedIdentifierType = relationMap.getAttributes().get("relatedIdentifierType");
|
|
70 |
final String relatedPid = relationMap.getTextValue();
|
|
67 | 71 |
final ObjectRelation currentRelation = new ObjectRelation(); |
68 | 72 |
currentRelation.setTargetPID(new PID(relatedPid, relatedIdentifierType)); |
69 | 73 |
currentRelation.setRelationSemantics(relationType); |
... | ... | |
73 | 77 |
parsedObject.setRelations(relationsResult); |
74 | 78 |
} |
75 | 79 |
|
76 |
final List<Map<String, String>> subjects = getTextValuesWithAttributes(ap, vn, "//datacite:subject", Arrays.asList("subjectScheme"));
|
|
80 |
final List<Node> subjects = UtilityParser.getTextValuesWithAttributes(ap, vn, "//datacite:subject", Arrays.asList("subjectScheme"));
|
|
77 | 81 |
|
78 | 82 |
if (subjects != null && subjects.size() > 0) { |
79 | 83 |
final List<SubjectType> subjectResult = new ArrayList<>(); |
80 | 84 |
subjects.forEach(subjectMap -> { |
81 |
final SubjectType subject = new SubjectType(subjectMap.get("subjectScheme"), subjectMap.get("resultText"));
|
|
85 |
final SubjectType subject = new SubjectType(subjectMap.getAttributes().get("subjectScheme"), subjectMap.getTextValue());
|
|
82 | 86 |
subjectResult.add(subject); |
83 | 87 |
}); |
84 | 88 |
parsedObject.setSubjects(subjectResult); |
... | ... | |
86 | 90 |
|
87 | 91 |
parsedObject.setCompletionStatus(completionStatus); |
88 | 92 |
|
89 |
final List<String> creators = getTextValue(ap, vn, "//datacite:creator/datacite:creatorName"); |
|
93 |
final List<String> creators = UtilityParser.getTextValue(ap, vn, "//datacite:creator/datacite:creatorName");
|
|
90 | 94 |
if (creators != null && creators.size() > 0) { |
91 | 95 |
parsedObject.setAuthors(creators); |
92 | 96 |
} |
93 |
final List<String> titles = getTextValue(ap, vn, "//datacite:title"); |
|
97 |
final List<String> titles = UtilityParser.getTextValue(ap, vn, "//datacite:title");
|
|
94 | 98 |
if (titles != null && titles.size() > 0) { |
95 | 99 |
parsedObject.setTitles(titles); |
96 | 100 |
} |
97 |
final String type = getSingleValue(ap, vn, "//datacite:resourceType"); |
|
101 |
final String type = UtilityParser.getSingleValue(ap, vn, "//datacite:resourceType");
|
|
98 | 102 |
|
99 | 103 |
if (setDatasetType(parsedObject, type)) return null; |
100 | 104 |
|
101 |
final List<String> dates = getTextValue(ap, vn, "//datacite:dates/date"); |
|
105 |
final List<String> dates = UtilityParser.getTextValue(ap, vn, "//datacite:dates/date");
|
|
102 | 106 |
|
103 | 107 |
if (dates != null && dates.size() > 0) { |
104 | 108 |
parsedObject.setDate(dates.get(0)); |
... | ... | |
110 | 114 |
} |
111 | 115 |
} |
112 | 116 |
|
113 |
private List<Map<String, String>> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes) |
|
114 |
throws Exception { |
|
115 |
List<Map<String, String>> results = new ArrayList<>(); |
|
116 |
ap.selectXPath(xpath); |
|
117 |
while (ap.evalXPath() != -1) { |
|
118 |
Map<String, String> currentValue = new HashMap<>(); |
|
119 |
int t = vn.getText(); |
|
120 |
currentValue.put("resultText", vn.toNormalizedString(t)); |
|
121 |
attributes.forEach(attributeKey -> { |
|
122 |
try { |
|
123 |
int attr = vn.getAttrVal(attributeKey); |
|
124 |
currentValue.put(attributeKey, vn.toNormalizedString(attr)); |
|
125 |
} catch (Throwable e) { |
|
126 | 117 |
|
127 |
} |
|
128 |
}); |
|
129 | 118 |
|
130 |
results.add(currentValue); |
|
131 |
} |
|
132 |
return results; |
|
133 |
} |
|
134 |
|
|
135 |
private List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws Exception { |
|
136 |
List<String> results = new ArrayList<>(); |
|
137 |
ap.selectXPath(xpath); |
|
138 |
while (ap.evalXPath() != -1) { |
|
139 |
|
|
140 |
int t = vn.getText(); |
|
141 |
results.add(vn.toNormalizedString(t)); |
|
142 |
} |
|
143 |
return results; |
|
144 |
} |
|
145 |
|
|
146 |
private String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws Exception { |
|
147 |
ap.selectXPath(xpath); |
|
148 |
while (ap.evalXPath() != -1) { |
|
149 |
return nav.toNormalizedString(nav.getText()); |
|
150 |
} |
|
151 |
return null; |
|
152 |
} |
|
153 |
|
|
154 | 119 |
} |
155 | 120 |
|
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/parser/ScholixResolverParser.java | ||
---|---|---|
1 |
package eu.dnetlib.resolver.parser; |
|
2 |
|
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.Arrays; |
|
5 |
import java.util.List; |
|
6 |
|
|
7 |
import com.google.common.collect.Lists; |
|
8 |
import com.ximpleware.AutoPilot; |
|
9 |
import com.ximpleware.VTDGen; |
|
10 |
import com.ximpleware.VTDNav; |
|
11 |
import eu.dnetlib.resolver.model.*; |
|
12 |
import eu.dnetlib.resolver.parser.UtilityParser.Node; |
|
13 |
import org.apache.commons.logging.Log; |
|
14 |
import org.apache.commons.logging.LogFactory; |
|
15 |
|
|
16 |
/** |
|
17 |
* Created by sandro on 9/29/16. |
|
18 |
*/ |
|
19 |
public class ScholixResolverParser extends AbstractResolverParser { |
|
20 |
|
|
21 |
private static final Log log = LogFactory.getLog(ScholixResolverParser.class); |
|
22 |
|
|
23 |
@Override |
|
24 |
public ResolvedObject parseObject(final String record) { |
|
25 |
|
|
26 |
try { |
|
27 |
final ResolvedObject parsedObject = new ResolvedObject(); |
|
28 |
final VTDGen vg = new VTDGen(); |
|
29 |
vg.setDoc(record.getBytes()); |
|
30 |
vg.parse(true); |
|
31 |
|
|
32 |
final VTDNav vn = vg.getNav(); |
|
33 |
final AutoPilot ap = new AutoPilot(vn); |
|
34 |
ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri"); |
|
35 |
|
|
36 |
final String datasourcePrefix = UtilityParser.getSingleValue(ap, vn, "//dri:datasourceprefix"); |
|
37 |
ap.declareXPathNameSpace("oaf", "http://namespace.openaire.eu/oaf"); |
|
38 |
|
|
39 |
final String completionStatus = UtilityParser.getSingleValue(ap, vn, "//oaf:completionStatus"); |
|
40 |
final String provisionMode = UtilityParser.getSingleValue(ap, vn, "//oaf:provisionMode"); |
|
41 |
|
|
42 |
final ObjectProvenance provenance = new ObjectProvenance(); |
|
43 |
provenance.setDatasourceId(datasourcePrefix); |
|
44 |
provenance.setDatasource(datasourcePrefix); |
|
45 |
provenance.setCompletionStatus(completionStatus); |
|
46 |
provenance.setProvisionMode(provisionMode); |
|
47 |
parsedObject.setDatasourceProvenance(Lists.newArrayList(provenance)); |
|
48 |
|
|
49 |
parsedObject.setCompletionStatus(completionStatus); |
|
50 |
|
|
51 |
ap.declareXPathNameSpace("scholix", "http://www.scholix.org"); |
|
52 |
|
|
53 |
List<Node> sourcePid = UtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='source']/*[local-name()='pid']", Arrays.asList("type")); |
|
54 |
|
|
55 |
if (sourcePid == null || sourcePid.size() == 0) { |
|
56 |
return null; |
|
57 |
} |
|
58 |
List<Node> targetPid = UtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='target']/*[local-name()='pid']", Arrays.asList("type")); |
|
59 |
if (targetPid == null || targetPid.size() == 0) { |
|
60 |
return null; |
|
61 |
} |
|
62 |
parsedObject.setPid(sourcePid.get(0).getTextValue()); |
|
63 |
parsedObject.setPidType(sourcePid.get(0).getAttributes().get("type")); |
|
64 |
|
|
65 |
List<String> sourceTypes = UtilityParser.getTextValue(ap, vn, "//*[local-name()='source']/*[local-name()='type']"); |
|
66 |
|
|
67 |
List<String> targetTypes = UtilityParser.getTextValue(ap, vn, "//*[local-name()='target']/*[local-name()='type']"); |
|
68 |
|
|
69 |
if (sourceTypes != null && sourceTypes.size() > 0) { |
|
70 |
parsedObject.setType(ObjectType.valueOf(sourceTypes.get(0))); |
|
71 |
} |
|
72 |
|
|
73 |
List<String> relationType = UtilityParser.getTextValue(ap, vn, "//*[local-name()='relationType']"); |
|
74 |
String relationSemantic = null; |
|
75 |
|
|
76 |
if (relationType != null && relationType.size() > 0) { |
|
77 |
relationSemantic = relationType.get(0); |
|
78 |
} |
|
79 |
|
|
80 |
if (sourceTypes != null && sourceTypes.size() > 0) { |
|
81 |
parsedObject.setType(ObjectType.valueOf(sourceTypes.get(0))); |
|
82 |
} |
|
83 |
|
|
84 |
final List<ObjectRelation> relations = new ArrayList<>(); |
|
85 |
|
|
86 |
ObjectRelation relation = new ObjectRelation(); |
|
87 |
relation.setCompletionStatus(CompletionStatus.incomplete.toString()); |
|
88 |
relation.setRelationProvenance(parsedObject.getDatasourceProvenance()); |
|
89 |
relation.setRelationSemantics(relationSemantic); |
|
90 |
relation.setTargetPID(new PID(targetPid.get(0).getTextValue(), targetPid.get(0).getAttributes().get("type"))); |
|
91 |
if (targetTypes != null || targetTypes.size() > 0) |
|
92 |
relation.setTargetType(ObjectType.valueOf(targetTypes.get(0))); |
|
93 |
relations.add(relation); |
|
94 |
parsedObject.setRelations(relations); |
|
95 |
|
|
96 |
return parsedObject; |
|
97 |
} catch (Throwable e) { |
|
98 |
log.error("Error on parsing parser"); |
|
99 |
} |
|
100 |
return null; |
|
101 |
} |
|
102 |
} |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/model/ResolvedObject.java | ||
---|---|---|
42 | 42 |
*/ |
43 | 43 |
private ObjectType type; |
44 | 44 |
|
45 |
/** |
|
46 |
* Description of the resolved object |
|
47 |
*/ |
|
45 | 48 |
private String description; |
46 | 49 |
|
47 | 50 |
/** |
... | ... | |
59 | 62 |
*/ |
60 | 63 |
private String date; |
61 | 64 |
|
65 |
/** |
|
66 |
* A list of subjects of type scheme term |
|
67 |
*/ |
|
62 | 68 |
private List<SubjectType> subjects; |
63 | 69 |
|
64 | 70 |
/** |
65 |
* The relations.
|
|
71 |
* The related objects.
|
|
66 | 72 |
*/ |
67 | 73 |
private List<ObjectRelation> relations; |
68 | 74 |
|
... | ... | |
402 | 408 |
@Override |
403 | 409 |
public String toString() { |
404 | 410 |
|
405 |
String s = "DLIObject \n\t Identifier: %s \n\t Pid:%s \n\t PIDType: %s \n\t Entity Type: %s \n\t Titles:%s \n\t Authors:%s, \n\tProvenance%s"; |
|
411 |
String s = |
|
412 |
"DLIObject \n\t Identifier: %s \n\t Pid:%s \n\t PIDType: %s \n\t Entity Type: %s \n\t Titles:%s \n\t Description:%s \n\t Authors:%s, \n\tProvenance%s"; |
|
406 | 413 |
|
407 |
return String.format(s, this.getIdentifier(), this.getPid(), this.getPidType(), this.getType(), this.getTitles(), |
|
414 |
return String.format(s, this.getIdentifier(), this.getPid(), this.getPidType(), this.getType(), this.getTitles(), this.description,
|
|
408 | 415 |
this.authors, Arrays.toString(this.getDatasourceProvenance().toArray())); |
409 | 416 |
|
410 | 417 |
} |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/resolver/applicationContext-dli-resolver.xml | ||
---|---|---|
40 | 40 |
<property name="cache" ref="dliResolverCache"/> |
41 | 41 |
</bean> |
42 | 42 |
|
43 |
<bean id="ncbinResolver" |
|
44 |
class="eu.dnetlib.resolver.NCBINResolver"> |
|
45 |
<property name="cache" ref="dliResolverCache"/> |
|
46 |
</bean> |
|
47 |
|
|
48 |
<bean id="ncbinParser" |
|
49 |
class="eu.dnetlib.resolver.NCBINParser"/> |
|
50 |
|
|
43 | 51 |
<bean id="crossrefRecordParserJSON" |
44 | 52 |
class="eu.dnetlib.resolver.CrossRefParserJSON"/> |
45 | 53 |
|
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/workflows/repo-hi/dli_aggregation_wf.xml.st | ||
---|---|---|
19 | 19 |
<PARAMETERS> |
20 | 20 |
<PARAM name="collMdstoreId" description="Store for collected records" required="true" managedBy="system" category="MDSTORE_ID"/> |
21 | 21 |
<PARAM name="cleanMdstoreId" description="Store for cleaned records" required="true" managedBy="system" category="MDSTORE_ID"/> |
22 |
<PARAM name="cleanTransformationRuleId" description="Transformation Rule Identifier" required="true" managedBy="user" category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE')"/> |
|
22 |
<PARAM name="cleanTransformationRuleId" description="Transformation Rule Identifier" required="true" managedBy="user" category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE', 'DLI:')"/>
|
|
23 | 23 |
</PARAMETERS> |
24 | 24 |
<WORKFLOW> |
25 | 25 |
<NODE isStart="true" name="collection" type="LaunchWorkflowTemplate"> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/oai_datacite_transform.xml | ||
---|---|---|
11 | 11 |
<CONFIGURATION> |
12 | 12 |
<IMPORTED/> |
13 | 13 |
<SCRIPT> |
14 |
<TITLE>OAI_Datacite to DMF transform</TITLE> |
|
14 |
<TITLE>DLI: OAI_Datacite to DMF transform</TITLE>
|
|
15 | 15 |
<CODE><![CDATA[ |
16 | 16 |
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
17 | 17 |
xmlns:datetime="http://exslt.org/dates-and-times" xmlns:exslt="http://exslt.org/common" |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/elsevier_transform.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="d6aa3c16-b6e8-4953-a39e-cb2ea99d2ba8_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/> |
|
5 |
<RESOURCE_TYPE value="TransformationRuleDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="TransformationRuleDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2014-11-19T11:05:55+01:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<IMPORTED/> |
|
13 |
<SCRIPT> |
|
14 |
<TITLE>DLI: Elsevier to Scholix transform</TITLE> |
|
15 |
<CODE><![CDATA[ |
|
16 |
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
17 |
xmlns:datetime="http://exslt.org/dates-and-times" xmlns:exslt="http://exslt.org/common" |
|
18 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
19 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
20 |
exclude-result-prefixes="xsl datetime exslt"> |
|
21 |
<xsl:template match="/"> |
|
22 |
<record> |
|
23 |
<xsl:copy-of select="//*[local-name()='header']"/> |
|
24 |
|
|
25 |
<xsl:variable name="source" ><xsl:value-of select="//dri:datasourceprefix"/></xsl:variable> |
|
26 |
<metadata> |
|
27 |
<scholix:link xmlns:scholix="http://www.scholix.org" > |
|
28 |
<assertion_info> |
|
29 |
<source><xsl:value-of select="$source"/></source> |
|
30 |
<relationType scheme="datacite">unknown</relationType> |
|
31 |
</assertion_info> |
|
32 |
<source> |
|
33 |
<pid type="DOI"><xsl:value-of select="//column[./@name='ArticleID']"/></pid> |
|
34 |
<type>publication</type> |
|
35 |
</source> |
|
36 |
|
|
37 |
<target> |
|
38 |
<pid> |
|
39 |
<xsl:attribute name="type"><xsl:value-of select="//column[./@name='db']"/></xsl:attribute> |
|
40 |
<xsl:value-of select="//column[./@name='datasetID']"/></pid> |
|
41 |
<type>dataset</type> |
|
42 |
</target> |
|
43 |
</scholix:link> |
|
44 |
</metadata> |
|
45 |
<oaf:datainfo> |
|
46 |
<oaf:completionStatus>incomplete</oaf:completionStatus> |
|
47 |
<oaf:provisionMode>collected</oaf:provisionMode> |
|
48 |
</oaf:datainfo> |
|
49 |
</record> |
|
50 |
</xsl:template> |
|
51 |
</xsl:stylesheet> |
|
52 |
]]></CODE> |
|
53 |
</SCRIPT> |
|
54 |
</CONFIGURATION> |
|
55 |
<STATUS/> |
|
56 |
<SECURITY_PARAMETERS/> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/repo_by_link_provider.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="8bf9879c-535a-4818-8de7-790a3eb90675_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowTemplateDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<CONFIGURATION> |
|
12 |
<PARAMETERS> |
|
13 |
<PARAM name="dsId" description="Datasource Id" required="true" type="string"/> |
|
14 |
<PARAM name="interface" description="Datasource Interface" required="true" type="string"/> |
|
15 |
<PARAM name="collMdstoreId" description="Store for collected records" required="true" type="string"/> |
|
16 |
<PARAM name="cleanMdstoreId" description="Store for cleaned records" required="true" type="string"/> |
|
17 |
<PARAM name="patchMdstoreId" description="Store for patched records" required="true" type="string"/> |
|
18 |
<PARAM name="nativeObjectStoreId" description="Object Store for native images" required="true" type="string"/> |
|
19 |
<PARAM name="indexId" description="Index Identifier" required="true" type="string"/> |
|
20 |
</PARAMETERS> |
|
21 |
<WORKFLOW> |
|
22 |
<NODE name="deleteCollMdStore" type="DeleteMDStore" isStart="true"> |
|
23 |
<DESCRIPTION>Delete the mdstore of collected records</DESCRIPTION> |
|
24 |
<PARAMETERS> |
|
25 |
<PARAM name="mdstoreId" ref="collMdstoreId"/> |
|
26 |
</PARAMETERS> |
|
27 |
<ARCS> |
|
28 |
<ARC to="deleteCleanMdStore"/> |
|
29 |
</ARCS> |
|
30 |
</NODE> |
|
31 |
<NODE name="deleteCleanMdStore" type="DeleteMDStore"> |
|
32 |
<DESCRIPTION>Delete the mdstore of cleaned records</DESCRIPTION> |
|
33 |
<PARAMETERS> |
|
34 |
<PARAM name="mdstoreId" ref="cleanMdstoreId"/> |
|
35 |
</PARAMETERS> |
|
36 |
<ARCS> |
|
37 |
<ARC to="removeApiExtraFields"/> |
|
38 |
</ARCS> |
|
39 |
</NODE> |
|
40 |
|
|
41 |
<NODE name="removeApiExtraFields" type="RemoveApiExtraFields"> |
|
42 |
<DESCRIPTION>Reset the extrafields of the api</DESCRIPTION> |
|
43 |
<PARAMETERS> |
|
44 |
<PARAM name="datasourceId" ref="dsId"/> |
|
45 |
<PARAM name="datasourceInterface" ref="interface"/> |
|
46 |
<PARAM name="fields"> |
|
47 |
<LIST> |
|
48 |
<ITEM value="last_collection_total"/> |
|
49 |
<ITEM value="last_collection_date"/> |
|
50 |
<ITEM value="last_collection_mdId"/> |
|
51 |
<ITEM value="last_aggregation_total"/> |
|
52 |
<ITEM value="last_aggregation_date"/> |
|
53 |
<ITEM value="last_aggregation_mdId"/> |
|
54 |
</LIST> |
|
55 |
</PARAM> |
|
56 |
</PARAMETERS> |
|
57 |
<ARCS> |
|
58 |
<ARC to="success"/> |
|
59 |
</ARCS> |
|
60 |
</NODE> |
|
61 |
</WORKFLOW> |
|
62 |
</CONFIGURATION> |
|
63 |
</BODY> |
|
64 |
</RESOURCE_PROFILE> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/repo_hi_link_provider.xml | ||
---|---|---|
45 | 45 |
<PARAMETERS> |
46 | 46 |
<PARAM name="wfName" value="Aggregate Metadata from DLI Link Provider Datasource[Ingestion]"/> |
47 | 47 |
<PARAM name="wfTemplate" value="/eu/dnetlib/dli/workflows/repo-hi/dli_aggregation_wf.xml.st"/> |
48 |
<PARAM name="description" value="Aggregate and Validate Metadata from DLI Link Provider [Ingestion]"/>
|
|
48 |
<PARAM name="description" value="Aggregate Metadata from DLI Link Provider [Ingestion]"/> |
|
49 | 49 |
</PARAMETERS> |
50 | 50 |
<ARCS> |
51 | 51 |
<ARC to="createDLINative"/> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/link_provider_transform_template.xml | ||
---|---|---|
21 | 21 |
<DESCRIPTION>Fetch records from MDStore</DESCRIPTION> |
22 | 22 |
<PARAMETERS> |
23 | 23 |
<PARAM name="mdId" ref="collMdstoreId"/> |
24 |
<PARAM name="mdFormat" value="oai_efg"/> |
|
25 | 24 |
<PARAM name="eprParam" value="orig_epr"/> |
26 | 25 |
</PARAMETERS> |
27 | 26 |
<ARCS> |
modules/dnet-dli/trunk/pom.xml | ||
---|---|---|
44 | 44 |
<version>2.11</version> |
45 | 45 |
</dependency> |
46 | 46 |
|
47 |
<!-- https://mvnrepository.com/artifact/org.biojava/biojava3-core --> |
|
48 |
<dependency> |
|
49 |
<groupId>org.biojava</groupId> |
|
50 |
<artifactId>biojava3-core</artifactId> |
|
51 |
<version>3.0</version> |
|
52 |
</dependency> |
|
47 | 53 |
|
54 |
|
|
48 | 55 |
<dependency> |
49 | 56 |
<groupId>junit</groupId> |
50 | 57 |
<artifactId>junit</artifactId> |
modules/dnet-core-components/trunk/src/main/resources/eu/dnetlib/cnr-default.properties | ||
---|---|---|
71 | 71 |
services.aggregator.name=Aggregator TEST |
72 | 72 |
services.aggregator.host=driver33.isti.cnr.it |
73 | 73 |
services.aggregator.port=9000 |
74 |
services.aggregator.country=DE
|
|
74 |
services.aggregator.country=EU
|
|
75 | 75 |
services.aggregator.ui.address=http://${services.aggregator.host}:${services.aggregator.port}/cgi-bin/ASmanager.pl |
76 | 76 |
services.aggregator.ws.endpoint=http://${services.aggregator.host}:${services.aggregator.port}/cgi-bin/SoapDriver.cgi |
77 | 77 |
|
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/oai/SyncOAIStoreJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.workflows.nodes.oai; |
2 | 2 |
|
3 |
import org.apache.commons.logging.Log; |
|
4 |
import org.apache.commons.logging.LogFactory; |
|
5 |
import org.springframework.beans.factory.annotation.Autowired; |
|
6 |
|
|
7 | 3 |
import eu.dnetlib.enabling.resultset.client.ResultSetClient; |
8 | 4 |
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob; |
9 | 5 |
import eu.dnetlib.msro.workflows.procs.Token; |
10 | 6 |
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider; |
11 | 7 |
import eu.dnetlib.rmi.common.ResultSet; |
12 | 8 |
import eu.dnetlib.rmi.common.ResultSetException; |
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.springframework.beans.factory.annotation.Autowired; |
|
13 | 12 |
|
14 | 13 |
public class SyncOAIStoreJobNode extends AbstractOAIJobNode { |
15 | 14 |
|
... | ... | |
20 | 19 |
@Autowired |
21 | 20 |
private ResultSetClient resultSetClient; |
22 | 21 |
|
23 |
private String formatParam, layoutParam, interpretationParam, oai_dbName;
|
|
22 |
private String oai_dbName; |
|
24 | 23 |
|
25 | 24 |
private boolean alwaysNewRecord = false; |
26 | 25 |
|
... | ... | |
34 | 33 |
job.setAction("SYNC"); |
35 | 34 |
job.getParameters().put("oai_syncEpr", rs.toJson()); |
36 | 35 |
job.getParameters().put("oai_recordSource", token.getEnv().getAttribute("oai_recordSource", String.class)); |
37 |
job.getParameters().put("oai_formatName", token.getEnv().getAttribute(this.formatParam, String.class));
|
|
38 |
job.getParameters().put("oai_formatLayout", token.getEnv().getAttribute(this.layoutParam, String.class));
|
|
39 |
job.getParameters().put("oai_formatInterpretation", token.getEnv().getAttribute(this.interpretationParam, String.class));
|
|
36 |
job.getParameters().put("oai_formatName", format);
|
|
37 |
job.getParameters().put("oai_formatLayout", layout);
|
|
38 |
job.getParameters().put("oai_formatInterpretation", interpretation);
|
|
40 | 39 |
job.getParameters().put("oai_dbName", getOai_dbName()); |
41 | 40 |
job.getParameters().put("oai_alwaysNewRecord", String.valueOf(this.alwaysNewRecord)); |
42 | 41 |
token.getEnv().setAttribute("oai_dbName", getOai_dbName()); |
... | ... | |
52 | 51 |
this.eprParam = eprParam; |
53 | 52 |
} |
54 | 53 |
|
55 |
public String getFormatParam() { |
|
56 |
return this.formatParam; |
|
57 |
} |
|
58 |
|
|
59 |
public void setFormatParam(final String formatParam) { |
|
60 |
this.formatParam = formatParam; |
|
61 |
} |
|
62 |
|
|
63 |
public String getLayoutParam() { |
|
64 |
return this.layoutParam; |
|
65 |
} |
|
66 |
|
|
67 |
public void setLayoutParam(final String layoutParam) { |
|
68 |
this.layoutParam = layoutParam; |
|
69 |
} |
|
70 |
|
|
71 |
public String getInterpretationParam() { |
|
72 |
return this.interpretationParam; |
|
73 |
} |
|
74 |
|
|
75 |
public void setInterpretationParam(final String interpretationParam) { |
|
76 |
this.interpretationParam = interpretationParam; |
|
77 |
} |
|
78 |
|
|
79 | 54 |
public String getOai_dbName() { |
80 | 55 |
return this.oai_dbName; |
81 | 56 |
} |
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/oai/AbstractOAIJobNode.java | ||
---|---|---|
6 | 6 |
import eu.dnetlib.msro.workflows.procs.Token; |
7 | 7 |
import eu.dnetlib.rmi.common.ResultSetException; |
8 | 8 |
import eu.dnetlib.rmi.provision.OAIStoreService; |
9 |
import org.apache.commons.lang3.StringUtils; |
|
9 | 10 |
|
10 | 11 |
public abstract class AbstractOAIJobNode extends BlackboardJobNode { |
11 | 12 |
|
12 |
private String dbName;
|
|
13 |
protected String dbName;
|
|
13 | 14 |
|
14 |
private String collectionName;
|
|
15 |
protected String collectionName;
|
|
15 | 16 |
|
17 |
protected String format, layout, interpretation; |
|
18 |
|
|
16 | 19 |
@Override |
17 | 20 |
protected String obtainServiceId(final Env env) { |
18 | 21 |
return getServiceLocator().getServiceId(OAIStoreService.class); |
... | ... | |
36 | 39 |
} |
37 | 40 |
|
38 | 41 |
public String getCollectionName() { |
39 |
return collectionName; |
|
42 |
|
|
43 |
if (!StringUtils.isBlank(collectionName)) |
|
44 |
return collectionName; |
|
45 |
if (!StringUtils.isBlank(format) && !StringUtils.isBlank(layout) && !StringUtils.isBlank(interpretation)) { |
|
46 |
return String.format("%s-%s-%s", format, layout, interpretation); |
|
47 |
} |
|
48 |
return null; |
|
49 |
|
|
40 | 50 |
} |
41 | 51 |
|
42 | 52 |
public void setCollectionName(final String collectionName) { |
43 | 53 |
this.collectionName = collectionName; |
44 | 54 |
} |
55 |
|
|
56 |
public String getFormat() { |
|
57 |
return format; |
|
58 |
} |
|
59 |
|
|
60 |
public void setFormat(final String format) { |
|
61 |
this.format = format; |
|
62 |
} |
|
63 |
|
|
64 |
public String getLayout() { |
|
65 |
return layout; |
|
66 |
} |
|
67 |
|
|
68 |
public void setLayout(final String layout) { |
|
69 |
this.layout = layout; |
|
70 |
} |
|
71 |
|
|
72 |
public String getInterpretation() { |
|
73 |
return interpretation; |
|
74 |
} |
|
75 |
|
|
76 |
public void setInterpretation(final String interpretation) { |
|
77 |
this.interpretation = interpretation; |
|
78 |
} |
|
45 | 79 |
} |
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/transform/MdBuilderJobNode.java | ||
---|---|---|
4 | 4 |
import java.net.URLEncoder; |
5 | 5 |
import java.util.List; |
6 | 6 |
|
7 |
import org.antlr.stringtemplate.StringTemplate; |
|
8 |
import org.apache.commons.io.IOUtils; |
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.springframework.beans.factory.annotation.Autowired; |
|
12 |
import org.springframework.beans.factory.annotation.Required; |
|
13 |
import org.springframework.core.io.Resource; |
|
14 |
|
|
15 | 7 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
16 | 8 |
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory; |
17 | 9 |
import eu.dnetlib.msro.workflows.graph.Arc; |
18 | 10 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
19 | 11 |
import eu.dnetlib.msro.workflows.procs.Env; |
12 |
import eu.dnetlib.msro.workflows.util.WorkflowsConstants; |
|
20 | 13 |
import eu.dnetlib.rmi.common.ResultSet; |
21 | 14 |
import eu.dnetlib.rmi.enabling.ISLookUpException; |
22 | 15 |
import eu.dnetlib.rmi.enabling.ISLookUpService; |
23 | 16 |
import eu.dnetlib.rmi.manager.MSROException; |
17 |
import org.antlr.stringtemplate.StringTemplate; |
|
18 |
import org.apache.commons.io.IOUtils; |
|
19 |
import org.apache.commons.logging.Log; |
|
20 |
import org.apache.commons.logging.LogFactory; |
|
21 |
import org.springframework.beans.factory.annotation.Autowired; |
|
22 |
import org.springframework.beans.factory.annotation.Required; |
|
23 |
import org.springframework.core.io.Resource; |
|
24 | 24 |
|
25 | 25 |
public class MdBuilderJobNode extends SimpleJobNode { |
26 | 26 |
|
... | ... | |
48 | 48 |
try { |
49 | 49 |
st = new StringTemplate(IOUtils.toString(getMdBuilderTemplateXslt().getInputStream())); |
50 | 50 |
st.setAttribute("datasourceId", this.datasourceId); |
51 |
st.setAttribute("xpath", getMetadataIdentifierPath()); |
|
51 |
st.setAttribute("xpath", getMetadataIdentifierPath().replace("\"", "'"));
|
|
52 | 52 |
st.setAttribute("baseurl", URLEncoder.encode(getBaseUrl(), "UTF-8")); |
53 | 53 |
st.setAttribute("metadatanamespace", getMetadataNamespace()); |
54 |
|
|
55 |
/* |
|
56 |
* If namespacePrefix has been already pushed to env by some custom JobNode e.g. ObtainOpenaireDataSourceParamsJobNode then push |
|
57 |
* it to ST. Else: a) try to get it from EXTRAFIELDS of the datasource b) try to get it from DATASOURCE_ORIGINAL_ID of the |
|
58 |
* datasource c) if any of the is present, then push to ST the datasourceId |
|
59 |
*/ |
|
60 |
if (env.hasAttribute("namespacePrefix")) { |
|
61 |
st.setAttribute("namespacePrefix", env.getAttribute("namespacePrefix")); |
|
62 |
} else { |
|
63 |
List<String> namespacePrefix; |
|
64 |
String xQuery = "/*[.//RESOURCE_IDENTIFIER/@value='" + this.datasourceId + "']//EXTRA_FIELDS/FIELD/value[../key='NamespacePrefix']/string()"; |
|
65 |
namespacePrefix = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQuery); |
|
66 |
if (namespacePrefix.size() != 0) { |
|
67 |
st.setAttribute("namespacePrefix", namespacePrefix.get(0)); |
|
68 |
} else { |
|
69 |
xQuery = "/*[.//RESOURCE_IDENTIFIER/@value='" + this.datasourceId + "']//DATASOURCE_ORIGINAL_ID/string()"; |
|
70 |
namespacePrefix = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQuery); |
|
71 |
if (namespacePrefix.size() != 0) { |
|
72 |
st.setAttribute("namespacePrefix", namespacePrefix.get(0)); |
|
73 |
} else { |
|
74 |
st.setAttribute("namespacePrefix", this.datasourceId); |
|
75 |
} |
|
76 |
} |
|
54 |
List<String> namespacePrefix; |
|
55 |
String xQuery = "/*[.//RESOURCE_IDENTIFIER/@value='" + this.datasourceId + "']//EXTRA_FIELDS/FIELD/value[../key='NamespacePrefix']/string()"; |
|
56 |
namespacePrefix = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQuery); |
|
57 |
if (namespacePrefix != null && namespacePrefix.size() != 0) { |
|
58 |
st.setAttribute("namespacePrefix", namespacePrefix.get(0)); |
|
77 | 59 |
} |
78 |
|
|
79 | 60 |
final ResultSet<String> rsOut = this.resultSetFactory.xsltMap(rsIn, st.toString()); |
80 | 61 |
|
Also available in: Unified diff
removed useless mdformat param from FetchMdStore, and changed implementation of abastract OAI Node