Project

General

Profile

« Previous | Next » 

Revision 43902

removed useless mdformat param from FetchMdStore, and changed implementation of abastract OAI Node

View differences:

modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/parser/InputRecord.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri"
3
            xmlns="http://namespace.openaire.eu/">
4
	<oai:header>
5
		<dri:objIdentifier>dli::r3d100010134::00002f60593fd1f758fb838fafb46795</dri:objIdentifier>
6
		<dri:recordIdentifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</dri:recordIdentifier>
7
		<dri:dateOfCollection>2016-09-12T16:55:22.27+02:00</dri:dateOfCollection>
8
		<dri:repositoryId/>
9
		<dri:datasourceprefix>dli::r3d100010134</dri:datasourceprefix>
10
		<dri:datasourceID/>
11
		<identifier xmlns="http://www.openarchives.org/OAI/2.0/"
12
		>oai:pangaea.de:doi:10.1594/PANGAEA.432865
13
		</identifier>
14
		<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2016-06-25T12:35:00Z</datestamp>
15
	</oai:header>
16
	<metadata xmlns="http://www.openarchives.org/OAI/2.0/">
17
		<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
18
		          xmlns="http://datacite.org/schema/kernel-3"
19
		          xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd">
20
			<identifier identifierType="DOI">10.1594/PANGAEA.432865</identifier>
21
			<creators>
22
				<creator>
23
					<creatorName>WOCE Sea Level, WSL</creatorName>
24
					<creatorName>WOCE Sea Level, WSL</creatorName>
25
				</creator>
26
			</creators>
27
			<titles>
28
				<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research
29
					quality database)
30
				</title>
31
				<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research
32
					quality database)
33
				</title>
34
			</titles>
35
			<publisher>PANGAEA - Data Publisher for Earth &amp; Environmental Science</publisher>
36
			<publicationYear>2006</publicationYear>
37
			<subjects>
38
				<subject subjectScheme="Parameter">DATE/TIME</subject>
39
				<subject subjectScheme="Parameter">Sea level</subject>
40
				<subject subjectScheme="Campaign">SeaLevel</subject>
41
				<subject subjectScheme="Project">World Ocean Circulation Experiment (WOCE)</subject>
42
			</subjects>
43
			<contributors>
44
				<contributor contributorType="HostingInstitution">
45
					<contributorName>Sea Level Center, University of Hawaii</contributorName>
46
				</contributor>
47
			</contributors>
48
			<dates>
49
				<date dateType="Collected">1978-01-01T12:00:00/1978-12-31T12:00:00</date>
50
			</dates>
51
			<language>eng</language>
52
			<resourceType resourceTypeGeneral="Dataset">Dataset</resourceType>
53
			<relatedIdentifiers>
54
				<relatedIdentifier relatedIdentifierType="URL" relationType="IsDocumentedBy"
55
				>http://store.pangaea.de/Projects/WOCE/SeaLevel_rqds/Woods_Hole.txt
56
				</relatedIdentifier>
57
			</relatedIdentifiers>
58
			<sizes>
59
				<size>365 data points</size>
60
			</sizes>
61
			<formats>
62
				<format>text/tab-separated-values</format>
63
			</formats>
64
			<rightsList>
65
				<rights rightsURI="http://creativecommons.org/licenses/by/3.0/">Creative Commons
66
					Attribution 3.0 Unported (CC-BY)
67
				</rights>
68
			</rightsList>
69
			<geoLocations>
70
				<geoLocation>
71
					<geoLocationPoint>41.5233 -70.6717</geoLocationPoint>
72
				</geoLocation>
73
			</geoLocations>
74
		</resource>
75
	</metadata>
76
</oai:record>
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/ProteinResolver.java
1
package eu.dnetlib.resolver;
2

  
3
import java.util.Arrays;
4

  
5
import eu.dnetlib.resolver.model.CompletionStatus;
6
import eu.dnetlib.resolver.model.ObjectProvenance;
7
import eu.dnetlib.resolver.model.ObjectProvisionMode;
8
import eu.dnetlib.resolver.model.ResolvedObject;
9
import org.springframework.beans.factory.annotation.Autowired;
10

  
11
/**
12
 * Created by sandro on 9/26/16.
13
 */
14
public class ProteinResolver extends AbstractPIDResolver {
15

  
16
	private static String baseUrl = "http://www.uniprot.org/uniprot/%s.xml";
17

  
18
	@Autowired
19
	private ProteinParser proteinParser;
20

  
21
	@Override
22
	protected boolean canResolvePid(final String pidType) {
23
		return pidType != null && (pidType.toLowerCase().equals("protein") || pidType.toLowerCase().equals("ncbi-n"));
24
	}
25

  
26
	@Override
27
	protected ResolvedObject resolve(final String pid, final String pidType) {
28

  
29
		if ((pid == null) || !pid.toLowerCase().startsWith("p"))
30
			return null;
31
		final ResolvedObject record = proteinParser.parseRecord(requestURL(String.format(baseUrl, pid)));
32
		if (record != null) {
33
			record.setPid(pid);
34
			record.setPidType(pidType);
35
			record.setCompletionStatus(CompletionStatus.complete.toString());
36
			ObjectProvenance provenance = new ObjectProvenance();
37
			provenance.setCompletionStatus(CompletionStatus.complete.toString());
38
			provenance.setDatasourceId("dli::r3d100010357");
39
			provenance.setDatasource("The Universal Protein Resource");
40
			provenance.setProvisionMode(ObjectProvisionMode.resolved.toString());
41
			record.setDatasourceProvenance(Arrays.asList(provenance));
42
		}
43
		return record;
44
	}
45

  
46
	public ProteinParser getProteinParser() {
47
		return proteinParser;
48
	}
49

  
50
	public void setProteinParser(final ProteinParser proteinParser) {
51
		this.proteinParser = proteinParser;
52
	}
53
}
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/ProteinParser.java
1
package eu.dnetlib.resolver;
2

  
3
import java.util.List;
4

  
5
import com.ximpleware.AutoPilot;
6
import com.ximpleware.VTDGen;
7
import com.ximpleware.VTDNav;
8
import eu.dnetlib.resolver.model.ObjectType;
9
import eu.dnetlib.resolver.model.ResolvedObject;
10
import eu.dnetlib.resolver.parser.UtilityParser;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13

  
14
/**
15
 * Created by sandro on 9/26/16.
16
 */
17
public class ProteinParser {
18

  
19
	private static final Log log = LogFactory.getLog(ProteinParser.class);
20

  
21
	public ResolvedObject parseRecord(final String record) {
22
		try {
23
			final ResolvedObject parsedObject = new ResolvedObject();
24
			final VTDGen vg = new VTDGen();
25
			vg.setDoc(record.getBytes());
26
			vg.parse(true);
27
			final VTDNav vn = vg.getNav();
28
			final AutoPilot ap = new AutoPilot(vn);
29

  
30
			final List<String> titles =
31
					UtilityParser.getTextValue(ap, vn, "//*[local-name()='protein']/*[local-name()='recommendedName']/*[local-name()='fullName']");
32

  
33
			parsedObject.setTitles(titles);
34

  
35
			titles.forEach(it -> System.out.println("it = " + it));
36

  
37
			final List<String> descriptions =
38
					UtilityParser.getTextValue(ap, vn, "//*[local-name()='comment' and ./@type='function']/*[local-name()='text']");
39

  
40
			if (descriptions != null && descriptions.size() > 0) {
41
				parsedObject.setDescription(descriptions.get(0));
42
			}
43

  
44
			parsedObject.setType(ObjectType.dataset);
45
			return parsedObject;
46
		} catch (Throwable e) {
47
			log.error(String.format("Error on parsing document %s", record), e);
48
			return null;
49
		}
50
	}
51

  
52
}
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/repo_by_link_proovider.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="8bf9879c-535a-4818-8de7-790a3eb90675_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<CONFIGURATION>
12
			<PARAMETERS>
13
				<PARAM name="dsId" description="Datasource Id" required="true" type="string"/>
14
				<PARAM name="interface" description="Datasource Interface" required="true" type="string"/>
15
				<PARAM name="collMdstoreId" description="Store for collected records" required="true" type="string"/>
16
				<PARAM name="cleanMdstoreId" description="Store for cleaned records" required="true" type="string"/>
17
				<PARAM name="patchMdstoreId" description="Store for patched records" required="true" type="string"/>
18
				<PARAM name="nativeObjectStoreId" description="Object Store for native images" required="true" type="string"/>
19
				<PARAM name="indexId" description="Index Identifier" required="true" type="string"/>
20
			</PARAMETERS>
21
			<WORKFLOW>
22
				<NODE name="deleteCollMdStore" type="DeleteMDStore" isStart="true">
23
					<DESCRIPTION>Delete the mdstore of collected records</DESCRIPTION>
24
					<PARAMETERS>
25
						<PARAM name="mdstoreId" ref="collMdstoreId"/>
26
					</PARAMETERS>
27
					<ARCS>
28
						<ARC to="deleteCleanMdStore"/>
29
					</ARCS>
30
				</NODE>
31
				<NODE name="deleteCleanMdStore" type="DeleteMDStore">
32
					<DESCRIPTION>Delete the mdstore of cleaned records</DESCRIPTION>
33
					<PARAMETERS>
34
						<PARAM name="mdstoreId" ref="cleanMdstoreId"/>
35
					</PARAMETERS>
36
					<ARCS>
37
						<ARC to="removeApiExtraFields"/>
38
					</ARCS>
39
				</NODE>
40

  
41
				<NODE name="removeApiExtraFields" type="RemoveApiExtraFields">
42
					<DESCRIPTION>Reset the extrafields of the api</DESCRIPTION>
43
					<PARAMETERS>
44
						<PARAM name="datasourceId" ref="dsId"/>
45
						<PARAM name="datasourceInterface" ref="interface"/>
46
						<PARAM name="fields">
47
							<LIST>
48
								<ITEM value="last_collection_total"/>
49
								<ITEM value="last_collection_date"/>
50
								<ITEM value="last_collection_mdId"/>
51
								<ITEM value="last_aggregation_total"/>
52
								<ITEM value="last_aggregation_date"/>
53
								<ITEM value="last_aggregation_mdId"/>
54
							</LIST>
55
						</PARAM>
56
					</PARAMETERS>
57
					<ARCS>
58
						<ARC to="success"/>
59
					</ARCS>
60
				</NODE>
61
			</WORKFLOW>
62
		</CONFIGURATION>
63
	</BODY>
64
</RESOURCE_PROFILE>
webapps/dnet-dli-container/trunk/src/main/resources/log4j.properties
34 34
log4j.logger.eu.dnetlib.enabling.is.sn.ISSNServiceCore=WARN
35 35
log4j.logger.eu.dnetlib.xml.database.exist.ExistDatabase=WARN
36 36
log4j.logger.eu.dnetlib.enabling.is.store.AbstractContentInitializer=FATAL
37
log4j.logger.eu.dnetlib.data.collector.plugins.oai.engine.HttpConnector=FATAL
37 38

  
38 39
log4j.logger.org.apache.hadoop.hbase.mapreduce.TableInputFormatBase=FATAL
39 40

  
modules/dnet-dli/trunk/src/test/java/eu/dnetlib/resolver/DLIParserTest.java
2 2

  
3 3
import java.io.IOException;
4 4
import java.io.InputStream;
5
import java.nio.file.FileSystems;
6
import java.nio.file.Files;
7
import java.nio.file.Path;
5 8
import java.util.Arrays;
9
import java.util.List;
10
import java.util.stream.Collectors;
6 11

  
12
import eu.dnetlib.resolver.model.CompletionStatus;
7 13
import eu.dnetlib.resolver.model.ResolvedObject;
8 14
import eu.dnetlib.resolver.model.serializer.DMFSerializer;
9 15
import eu.dnetlib.resolver.parser.DMFDom4jResolverParser;
10 16
import eu.dnetlib.resolver.parser.DMFResolverParser;
17
import eu.dnetlib.resolver.parser.ScholixResolverParser;
11 18
import org.antlr.stringtemplate.StringTemplate;
12 19
import org.apache.commons.io.IOUtils;
20
import org.apache.commons.lang3.tuple.Pair;
13 21
import org.apache.commons.logging.Log;
14 22
import org.apache.commons.logging.LogFactory;
23
import org.apache.http.HttpEntity;
24
import org.apache.http.client.methods.HttpGet;
25
import org.apache.http.impl.client.CloseableHttpClient;
26
import org.apache.http.impl.client.HttpClients;
27
import org.apache.http.util.EntityUtils;
15 28
import org.junit.Assert;
16 29
import org.junit.Test;
17 30

  
......
24 37
	private static final Log log = LogFactory.getLog(DLIParserTest.class);
25 38

  
26 39
	@Test
27
	public void testParser() throws IOException {
40
	public void testDMFParser() throws IOException {
28 41
		final InputStream resourceAsStream = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputRecord.xml");
29 42

  
30 43
		String str = IOUtils.toString(resourceAsStream);
......
47 60
	}
48 61

  
49 62
	@Test
63
	public void testScholixParser() throws IOException {
64
		final InputStream resourceAsStream = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputRecordScholix.xml");
65

  
66
		String str = IOUtils.toString(resourceAsStream);
67
		ScholixResolverParser parser = new ScholixResolverParser();
68
		ResolvedObject object = parser.parseObject(str);
69

  
70
		System.out.println("object = " + object);
71

  
72
		Assert.assertNotNull(object);
73
		System.out.println("object.getCompletionStatus() = " + object.getCompletionStatus());
74

  
75
		Assert.assertTrue(object.getCompletionStatus().equals(CompletionStatus.incomplete.toString()));
76

  
77
		Assert.assertNotNull(object.getRelations());
78

  
79
		Assert.assertTrue(object.getRelations().size() == 1);
80

  
81
		System.out.println("object.getRelations().get(0) = " + object.getRelations().get(0));
82

  
83
	}
84

  
85
	@Test
86
	public void testProteinParser() throws IOException {
87
		final NCBINResolver resolver = new NCBINResolver();
88
		resolver.setNCBINParser(new NCBINParser());
89
		final ResolvedObject p02768 = resolver.resolve("U36380", "protein");
90

  
91
		System.out.println(p02768.getDescription());
92

  
93
		System.out.println("p02768 = " + p02768);
94

  
95
	}
96

  
97
	@Test
98
	public void testNCBIn() throws Exception {
99

  
100
		Path path = FileSystems.getDefault().getPath("/var/lib/dli/elsevier.csv");
101

  
102
		//Files.lines(path).forEach(it -> System.out.println("it = " + it));
103

  
104
		List<Pair<String, String>> elem =
105
				Files.lines(path).map(it -> it.split("@")).filter(it -> (it != null && it.length == 5)).map(it -> Pair.of(it[3], it[4]))
106
						.collect(Collectors.toList());
107

  
108
		final NCBINResolver resolver = new NCBINResolver();
109
		resolver.setNCBINParser(new NCBINParser());
110

  
111
		elem.forEach(it -> {
112
			if (it.getLeft().equals("ncbi-n")) {
113
				final ResolvedObject obj = resolver.resolve(it.getRight(), "ncbi-n");
114
				System.out.println("p02768 = " + obj);
115

  
116
			}
117
		});
118

  
119
	}
120

  
121
	private String requestUrl(final String url) {
122
		final CloseableHttpClient httpclient = HttpClients.createDefault();
123
		try {
124
			HttpGet httpGet = new HttpGet(url);
125
			final String out = httpclient.execute(httpGet, response -> {
126
				int status = response.getStatusLine().getStatusCode();
127
				if (status >= 200 && status < 300) {
128
					HttpEntity entity = response.getEntity();
129
					return entity != null ? EntityUtils.toString(entity) : null;
130
				} else {
131
					return null;
132
				}
133
			});
134
			return out;
135
		} catch (Throwable e) {
136
			log.error(e);
137
			return null;
138
		} finally {
139
			try {
140
				httpclient.close();
141
			} catch (IOException e) {
142
				log.error("Error on closing httpclient", e);
143
			}
144
		}
145
	}
146

  
147

  
148

  
149

  
150

  
151
	@Test
50 152
	public void testDom4jParser() throws Exception {
51 153

  
52 154
		final InputStream resourceAsStream = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputRecord.xml");
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/parser/InputRecordDMF.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<oai:record xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:dri="http://www.driver-repository.eu/namespace/dri"
3
            xmlns="http://namespace.openaire.eu/">
4
	<oai:header>
5
		<dri:objIdentifier>dli::r3d100010134::00002f60593fd1f758fb838fafb46795</dri:objIdentifier>
6
		<dri:recordIdentifier>oai:pangaea.de:doi:10.1594/PANGAEA.432865</dri:recordIdentifier>
7
		<dri:dateOfCollection>2016-09-12T16:55:22.27+02:00</dri:dateOfCollection>
8
		<dri:repositoryId/>
9
		<dri:datasourceprefix>dli::r3d100010134</dri:datasourceprefix>
10
		<dri:datasourceID/>
11
		<identifier xmlns="http://www.openarchives.org/OAI/2.0/"
12
		>oai:pangaea.de:doi:10.1594/PANGAEA.432865
13
		</identifier>
14
		<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2016-06-25T12:35:00Z</datestamp>
15
	</oai:header>
16
	<metadata xmlns="http://www.openarchives.org/OAI/2.0/">
17
		<resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
18
		          xmlns="http://datacite.org/schema/kernel-3"
19
		          xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd">
20
			<identifier identifierType="DOI">10.1594/PANGAEA.432865</identifier>
21
			<creators>
22
				<creator>
23
					<creatorName>WOCE Sea Level, WSL</creatorName>
24
					<creatorName>WOCE Sea Level, WSL</creatorName>
25
				</creator>
26
			</creators>
27
			<titles>
28
				<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research
29
					quality database)
30
				</title>
31
				<title>Daily sea level from coastal tide gauge station Woods_Hole in 1978 (Research
32
					quality database)
33
				</title>
34
			</titles>
35
			<publisher>PANGAEA - Data Publisher for Earth &amp; Environmental Science</publisher>
36
			<publicationYear>2006</publicationYear>
37
			<subjects>
38
				<subject subjectScheme="Parameter">DATE/TIME</subject>
39
				<subject subjectScheme="Parameter">Sea level</subject>
40
				<subject subjectScheme="Campaign">SeaLevel</subject>
41
				<subject subjectScheme="Project">World Ocean Circulation Experiment (WOCE)</subject>
42
			</subjects>
43
			<contributors>
44
				<contributor contributorType="HostingInstitution">
45
					<contributorName>Sea Level Center, University of Hawaii</contributorName>
46
				</contributor>
47
			</contributors>
48
			<dates>
49
				<date dateType="Collected">1978-01-01T12:00:00/1978-12-31T12:00:00</date>
50
			</dates>
51
			<language>eng</language>
52
			<resourceType resourceTypeGeneral="Dataset">Dataset</resourceType>
53
			<relatedIdentifiers>
54
				<relatedIdentifier relatedIdentifierType="URL" relationType="IsDocumentedBy"
55
				>http://store.pangaea.de/Projects/WOCE/SeaLevel_rqds/Woods_Hole.txt
56
				</relatedIdentifier>
57
			</relatedIdentifiers>
58
			<sizes>
59
				<size>365 data points</size>
60
			</sizes>
61
			<formats>
62
				<format>text/tab-separated-values</format>
63
			</formats>
64
			<rightsList>
65
				<rights rightsURI="http://creativecommons.org/licenses/by/3.0/">Creative Commons
66
					Attribution 3.0 Unported (CC-BY)
67
				</rights>
68
			</rightsList>
69
			<geoLocations>
70
				<geoLocation>
71
					<geoLocationPoint>41.5233 -70.6717</geoLocationPoint>
72
				</geoLocation>
73
			</geoLocations>
74
		</resource>
75
	</metadata>
76
</oai:record>
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/parser/InputRecordScholix.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<record xmlns:oaf="http://namespace.openaire.eu/oaf"
3
        xmlns:dri="http://www.driver-repository.eu/namespace/dri">
4
	<oai:header xmlns:oai="http://www.openarchives.org/OAI/2.0/"
5
	>
6
		<dri:objIdentifier>dli::elsevier::0000d9de7b24b7cb315852926b88e473</dri:objIdentifier>
7
		<dri:recordIdentifier>10.1016/j.jmgm.2014.05.002::3CQZ</dri:recordIdentifier>
8
		<dri:dateOfCollection>2016-09-29T16:23:34.316+02:00</dri:dateOfCollection>
9
		<dri:repositoryId>2d1245d4-c169-4247-9106-0f69f8d752eb_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>
10
		<dri:datasourceprefix>dli::elsevier</dri:datasourceprefix>
11
	</oai:header>
12
	<metadata>
13
		<scholix:link xmlns:scholix="http://www.scholix.org">
14
			<assertion_info>
15
				<source>dli::elsevier</source>
16
				<relationType scheme="datacite">unknown</relationType>
17
			</assertion_info>
18
			<source>
19
				<pid type="DOI">10.1016/j.jmgm.2014.05.002</pid>
20
				<type>publication</type>
21
			</source>
22
			<target>
23
				<pid type="pdb">3CQZ</pid>
24
				<type>dataset</type>
25
			</target>
26
		</scholix:link>
27
	</metadata>
28
	<oaf:datainfo>
29
		<oaf:completionStatus>incomplete</oaf:completionStatus>
30
		<oaf:provisionMode>collected</oaf:provisionMode>
31
	</oaf:datainfo>
32
</record>
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/NCBINParser.java
1
package eu.dnetlib.resolver;
2

  
3
import java.util.List;
4

  
5
import com.ximpleware.AutoPilot;
6
import com.ximpleware.VTDGen;
7
import com.ximpleware.VTDNav;
8
import eu.dnetlib.resolver.model.ObjectType;
9
import eu.dnetlib.resolver.model.ResolvedObject;
10
import eu.dnetlib.resolver.parser.UtilityParser;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13

  
14
/**
15
 * Created by sandro on 9/26/16.
16
 */
17
public class NCBINParser {
18

  
19
	private static final Log log = LogFactory.getLog(NCBINParser.class);
20

  
21
	public ResolvedObject parseRecord(final String record) {
22
		try {
23
			final ResolvedObject parsedObject = new ResolvedObject();
24
			final VTDGen vg = new VTDGen();
25
			vg.setDoc(record.getBytes());
26
			vg.parse(true);
27
			final VTDNav vn = vg.getNav();
28
			final AutoPilot ap = new AutoPilot(vn);
29

  
30
			final List<String> titles =
31
					UtilityParser.getTextValue(ap, vn, "//GBSeq_definition");
32

  
33
			parsedObject.setTitles(titles);
34

  
35
			titles.forEach(it -> System.out.println("it = " + it));
36

  
37
			final List<String> descriptions =
38
					UtilityParser.getTextValue(ap, vn, "//GBSeq_comment");
39

  
40
			if (descriptions != null && descriptions.size() > 0) {
41
				parsedObject.setDescription(descriptions.get(0));
42
			}
43

  
44
			parsedObject.setType(ObjectType.dataset);
45
			return parsedObject;
46
		} catch (Throwable e) {
47
			log.error(String.format("Error on parsing document %s", record), e);
48
			return null;
49
		}
50
	}
51

  
52
}
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/NCBINResolver.java
1
package eu.dnetlib.resolver;
2

  
3
import java.util.Arrays;
4

  
5
import eu.dnetlib.resolver.model.CompletionStatus;
6
import eu.dnetlib.resolver.model.ObjectProvenance;
7
import eu.dnetlib.resolver.model.ObjectProvisionMode;
8
import eu.dnetlib.resolver.model.ResolvedObject;
9
import org.springframework.beans.factory.annotation.Autowired;
10

  
11
/**
12
 * Created by sandro on 9/26/16.
13
 */
14
public class NCBINResolver extends AbstractPIDResolver {
15

  
16
	private static String baseUrl =
17
			"https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&val=%s&page_size=5&fmt_mask=0&report=gbx&retmode=text&page=1&page_size=1";
18

  
19
	@Autowired
20
	private NCBINParser NCBINParser;
21

  
22
	@Override
23
	protected boolean canResolvePid(final String pidType) {
24
		return pidType != null && (pidType.toLowerCase().equals("protein") || pidType.toLowerCase().equals("ncbi-n"));
25
	}
26

  
27
	@Override
28
	protected ResolvedObject resolve(final String pid, final String pidType) {
29

  
30
		if ((pid == null))
31
			return null;
32
		final ResolvedObject record = NCBINParser.parseRecord(requestURL(String.format(baseUrl, pid)));
33
		if (record != null) {
34
			record.setPid(pid);
35
			record.setPidType(pidType);
36
			record.setCompletionStatus(CompletionStatus.complete.toString());
37
			ObjectProvenance provenance = new ObjectProvenance();
38
			provenance.setCompletionStatus(CompletionStatus.complete.toString());
39
			provenance.setDatasourceId("dli::r3d100010778");
40
			provenance.setDatasource("NCBI Nucleotide");
41
			provenance.setProvisionMode(ObjectProvisionMode.resolved.toString());
42
			record.setDatasourceProvenance(Arrays.asList(provenance));
43
		}
44
		return record;
45
	}
46

  
47
	public NCBINParser getNCBINParser() {
48
		return NCBINParser;
49
	}
50

  
51
	public void setNCBINParser(final NCBINParser NCBINParser) {
52
		this.NCBINParser = NCBINParser;
53
	}
54
}
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/parser/UtilityParser.java
1
package eu.dnetlib.resolver.parser;
2

  
3
import java.util.ArrayList;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
7

  
8
import com.ximpleware.AutoPilot;
9
import com.ximpleware.VTDNav;
10

  
11
/**
12
 * Created by sandro on 9/29/16.
13
 */
14
public class UtilityParser {
15

  
16
	public static List<Node> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
17
			throws Exception {
18
		List<Node> results = new ArrayList<>();
19
		ap.selectXPath(xpath);
20
		while (ap.evalXPath() != -1) {
21
			final Node currentNode = new Node();
22
			final Map<String, String> currentAttributes = new HashMap<>();
23
			int t = vn.getText();
24
			currentNode.setTextValue(vn.toNormalizedString(t));
25

  
26
			attributes.forEach(attributeKey -> {
27
				try {
28
					int attr = vn.getAttrVal(attributeKey);
29
					currentAttributes.put(attributeKey, vn.toNormalizedString(attr));
30
				} catch (Throwable e) {
31

  
32
				}
33
			});
34
			currentNode.setAttributes(currentAttributes);
35
			results.add(currentNode);
36
		}
37
		return results;
38
	}
39

  
40
	public static List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws Exception {
41
		List<String> results = new ArrayList<>();
42
		ap.selectXPath(xpath);
43
		while (ap.evalXPath() != -1) {
44

  
45
			int t = vn.getText();
46
			results.add(vn.toNormalizedString(t));
47
		}
48
		return results;
49
	}
50

  
51
	public static String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws Exception {
52
		ap.selectXPath(xpath);
53
		while (ap.evalXPath() != -1) {
54
			return nav.toNormalizedString(nav.getText());
55
		}
56
		return null;
57
	}
58

  
59
	public static class Node {
60

  
61
		private String textValue;
62

  
63
		private Map<String, String> attributes;
64

  
65
		public String getTextValue() {
66
			return textValue;
67
		}
68

  
69
		public void setTextValue(final String textValue) {
70
			this.textValue = textValue;
71
		}
72

  
73
		public Map<String, String> getAttributes() {
74
			return attributes;
75
		}
76

  
77
		public void setAttributes(final Map<String, String> attributes) {
78
			this.attributes = attributes;
79
		}
80
	}
81

  
82
}
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/parser/DMFResolverParser.java
1 1
package eu.dnetlib.resolver.parser;
2 2

  
3
import java.util.*;
3
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.List;
4 6

  
5 7
import com.google.common.collect.Lists;
6 8
import com.ximpleware.AutoPilot;
7 9
import com.ximpleware.VTDGen;
8 10
import com.ximpleware.VTDNav;
9 11
import eu.dnetlib.resolver.model.*;
12
import eu.dnetlib.resolver.parser.UtilityParser.Node;
10 13
import org.apache.commons.logging.Log;
11 14
import org.apache.commons.logging.LogFactory;
12 15

  
......
29 32
			final AutoPilot ap = new AutoPilot(vn);
30 33
			ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri");
31 34

  
32
			final String datasourcePrefix = getSingleValue(ap, vn, "//dri:datasourceprefix");
35
			final String datasourcePrefix = UtilityParser.getSingleValue(ap, vn, "//dri:datasourceprefix");
33 36
			ap.declareXPathNameSpace("oaf", "http://namespace.openaire.eu/oaf");
34 37

  
35
			final String completionStatus = getSingleValue(ap, vn, "//oaf:completionStatus");
36
			final String provisionMode = getSingleValue(ap, vn, "//oaf:provisionMode");
38
			final String completionStatus = UtilityParser.getSingleValue(ap, vn, "//oaf:completionStatus");
39
			final String provisionMode = UtilityParser.getSingleValue(ap, vn, "//oaf:provisionMode");
37 40

  
38 41
			final ObjectProvenance provenance = new ObjectProvenance();
39 42
			provenance.setDatasourceId(datasourcePrefix);
......
43 46
			parsedObject.setDatasourceProvenance(Lists.newArrayList(provenance));
44 47

  
45 48
			ap.declareXPathNameSpace("datacite", "http://datacite.org/schema/kernel-3");
46
			final List<Map<String, String>> identifierType = getTextValuesWithAttributes(ap, vn, "//datacite:identifier", Lists.newArrayList("identifierType"));
49
			final List<Node> identifierType =
50
					UtilityParser.getTextValuesWithAttributes(ap, vn, "//datacite:identifier", Lists.newArrayList("identifierType"));
47 51

  
48 52
			if (identifierType != null && identifierType.size() > 0) {
49 53

  
50
				final Map<String, String> result = identifierType.get(0);
51
				parsedObject.setPid(result.get("resultText"));
52
				parsedObject.setPidType(result.get("identifierType"));
54
				final Node result = identifierType.get(0);
55
				parsedObject.setPid(result.getTextValue());
56
				parsedObject.setPidType(result.getAttributes().get("identifierType"));
53 57
			} else {
54 58
				log.error("Error on parsing record the identifire should not null ");
55 59
				return null;
56 60
			}
57 61

  
58
			final List<Map<String, String>> relations =
59
					getTextValuesWithAttributes(ap, vn, "//datacite:relatedIdentifier", Arrays.asList("relatedIdentifierType", "relationType"));
62
			final List<Node> relations =
63
					UtilityParser.getTextValuesWithAttributes(ap, vn, "//datacite:relatedIdentifier", Arrays.asList("relatedIdentifierType", "relationType"));
60 64

  
61 65
			if (relations != null && relations.size() > 0) {
62 66
				final List<ObjectRelation> relationsResult = new ArrayList<>();
63 67
				relations.forEach(relationMap -> {
64
					final String relationType = relationMap.get("relationType");
65
					final String relatedIdentifierType = relationMap.get("relatedIdentifierType");
66
					final String relatedPid = relationMap.get("resultText");
68
					final String relationType = relationMap.getAttributes().get("relationType");
69
					final String relatedIdentifierType = relationMap.getAttributes().get("relatedIdentifierType");
70
					final String relatedPid = relationMap.getTextValue();
67 71
					final ObjectRelation currentRelation = new ObjectRelation();
68 72
					currentRelation.setTargetPID(new PID(relatedPid, relatedIdentifierType));
69 73
					currentRelation.setRelationSemantics(relationType);
......
73 77
				parsedObject.setRelations(relationsResult);
74 78
			}
75 79

  
76
			final List<Map<String, String>> subjects = getTextValuesWithAttributes(ap, vn, "//datacite:subject", Arrays.asList("subjectScheme"));
80
			final List<Node> subjects = UtilityParser.getTextValuesWithAttributes(ap, vn, "//datacite:subject", Arrays.asList("subjectScheme"));
77 81

  
78 82
			if (subjects != null && subjects.size() > 0) {
79 83
				final List<SubjectType> subjectResult = new ArrayList<>();
80 84
				subjects.forEach(subjectMap -> {
81
					final SubjectType subject = new SubjectType(subjectMap.get("subjectScheme"), subjectMap.get("resultText"));
85
					final SubjectType subject = new SubjectType(subjectMap.getAttributes().get("subjectScheme"), subjectMap.getTextValue());
82 86
					subjectResult.add(subject);
83 87
				});
84 88
				parsedObject.setSubjects(subjectResult);
......
86 90

  
87 91
			parsedObject.setCompletionStatus(completionStatus);
88 92

  
89
			final List<String> creators = getTextValue(ap, vn, "//datacite:creator/datacite:creatorName");
93
			final List<String> creators = UtilityParser.getTextValue(ap, vn, "//datacite:creator/datacite:creatorName");
90 94
			if (creators != null && creators.size() > 0) {
91 95
				parsedObject.setAuthors(creators);
92 96
			}
93
			final List<String> titles = getTextValue(ap, vn, "//datacite:title");
97
			final List<String> titles = UtilityParser.getTextValue(ap, vn, "//datacite:title");
94 98
			if (titles != null && titles.size() > 0) {
95 99
				parsedObject.setTitles(titles);
96 100
			}
97
			final String type = getSingleValue(ap, vn, "//datacite:resourceType");
101
			final String type = UtilityParser.getSingleValue(ap, vn, "//datacite:resourceType");
98 102

  
99 103
			if (setDatasetType(parsedObject, type)) return null;
100 104

  
101
			final List<String> dates = getTextValue(ap, vn, "//datacite:dates/date");
105
			final List<String> dates = UtilityParser.getTextValue(ap, vn, "//datacite:dates/date");
102 106

  
103 107
			if (dates != null && dates.size() > 0) {
104 108
				parsedObject.setDate(dates.get(0));
......
110 114
		}
111 115
	}
112 116

  
113
	private List<Map<String, String>> getTextValuesWithAttributes(final AutoPilot ap, final VTDNav vn, final String xpath, final List<String> attributes)
114
			throws Exception {
115
		List<Map<String, String>> results = new ArrayList<>();
116
		ap.selectXPath(xpath);
117
		while (ap.evalXPath() != -1) {
118
			Map<String, String> currentValue = new HashMap<>();
119
			int t = vn.getText();
120
			currentValue.put("resultText", vn.toNormalizedString(t));
121
			attributes.forEach(attributeKey -> {
122
				try {
123
					int attr = vn.getAttrVal(attributeKey);
124
					currentValue.put(attributeKey, vn.toNormalizedString(attr));
125
				} catch (Throwable e) {
126 117

  
127
				}
128
			});
129 118

  
130
			results.add(currentValue);
131
		}
132
		return results;
133
	}
134

  
135
	private List<String> getTextValue(final AutoPilot ap, final VTDNav vn, final String xpath) throws Exception {
136
		List<String> results = new ArrayList<>();
137
		ap.selectXPath(xpath);
138
		while (ap.evalXPath() != -1) {
139

  
140
			int t = vn.getText();
141
			results.add(vn.toNormalizedString(t));
142
		}
143
		return results;
144
	}
145

  
146
	private String getSingleValue(final AutoPilot ap, final VTDNav nav, final String xpath) throws Exception {
147
		ap.selectXPath(xpath);
148
		while (ap.evalXPath() != -1) {
149
			return nav.toNormalizedString(nav.getText());
150
		}
151
		return null;
152
	}
153

  
154 119
}
155 120

  
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/parser/ScholixResolverParser.java
1
package eu.dnetlib.resolver.parser;
2

  
3
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.List;
6

  
7
import com.google.common.collect.Lists;
8
import com.ximpleware.AutoPilot;
9
import com.ximpleware.VTDGen;
10
import com.ximpleware.VTDNav;
11
import eu.dnetlib.resolver.model.*;
12
import eu.dnetlib.resolver.parser.UtilityParser.Node;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15

  
16
/**
17
 * Created by sandro on 9/29/16.
18
 */
19
public class ScholixResolverParser extends AbstractResolverParser {
20

  
21
	private static final Log log = LogFactory.getLog(ScholixResolverParser.class);
22

  
23
	@Override
24
	public ResolvedObject parseObject(final String record) {
25

  
26
		try {
27
			final ResolvedObject parsedObject = new ResolvedObject();
28
			final VTDGen vg = new VTDGen();
29
			vg.setDoc(record.getBytes());
30
			vg.parse(true);
31

  
32
			final VTDNav vn = vg.getNav();
33
			final AutoPilot ap = new AutoPilot(vn);
34
			ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri");
35

  
36
			final String datasourcePrefix = UtilityParser.getSingleValue(ap, vn, "//dri:datasourceprefix");
37
			ap.declareXPathNameSpace("oaf", "http://namespace.openaire.eu/oaf");
38

  
39
			final String completionStatus = UtilityParser.getSingleValue(ap, vn, "//oaf:completionStatus");
40
			final String provisionMode = UtilityParser.getSingleValue(ap, vn, "//oaf:provisionMode");
41

  
42
			final ObjectProvenance provenance = new ObjectProvenance();
43
			provenance.setDatasourceId(datasourcePrefix);
44
			provenance.setDatasource(datasourcePrefix);
45
			provenance.setCompletionStatus(completionStatus);
46
			provenance.setProvisionMode(provisionMode);
47
			parsedObject.setDatasourceProvenance(Lists.newArrayList(provenance));
48

  
49
			parsedObject.setCompletionStatus(completionStatus);
50

  
51
			ap.declareXPathNameSpace("scholix", "http://www.scholix.org");
52

  
53
			List<Node> sourcePid = UtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='source']/*[local-name()='pid']", Arrays.asList("type"));
54

  
55
			if (sourcePid == null || sourcePid.size() == 0) {
56
				return null;
57
			}
58
			List<Node> targetPid = UtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='target']/*[local-name()='pid']", Arrays.asList("type"));
59
			if (targetPid == null || targetPid.size() == 0) {
60
				return null;
61
			}
62
			parsedObject.setPid(sourcePid.get(0).getTextValue());
63
			parsedObject.setPidType(sourcePid.get(0).getAttributes().get("type"));
64

  
65
			List<String> sourceTypes = UtilityParser.getTextValue(ap, vn, "//*[local-name()='source']/*[local-name()='type']");
66

  
67
			List<String> targetTypes = UtilityParser.getTextValue(ap, vn, "//*[local-name()='target']/*[local-name()='type']");
68

  
69
			if (sourceTypes != null && sourceTypes.size() > 0) {
70
				parsedObject.setType(ObjectType.valueOf(sourceTypes.get(0)));
71
			}
72

  
73
			List<String> relationType = UtilityParser.getTextValue(ap, vn, "//*[local-name()='relationType']");
74
			String relationSemantic = null;
75

  
76
			if (relationType != null && relationType.size() > 0) {
77
				relationSemantic = relationType.get(0);
78
			}
79

  
80
			if (sourceTypes != null && sourceTypes.size() > 0) {
81
				parsedObject.setType(ObjectType.valueOf(sourceTypes.get(0)));
82
			}
83

  
84
			final List<ObjectRelation> relations = new ArrayList<>();
85

  
86
			ObjectRelation relation = new ObjectRelation();
87
			relation.setCompletionStatus(CompletionStatus.incomplete.toString());
88
			relation.setRelationProvenance(parsedObject.getDatasourceProvenance());
89
			relation.setRelationSemantics(relationSemantic);
90
			relation.setTargetPID(new PID(targetPid.get(0).getTextValue(), targetPid.get(0).getAttributes().get("type")));
91
			if (targetTypes != null || targetTypes.size() > 0)
92
				relation.setTargetType(ObjectType.valueOf(targetTypes.get(0)));
93
			relations.add(relation);
94
			parsedObject.setRelations(relations);
95

  
96
			return parsedObject;
97
		} catch (Throwable e) {
98
			log.error("Error on parsing parser");
99
		}
100
		return null;
101
	}
102
}
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/model/ResolvedObject.java
42 42
	 */
43 43
	private ObjectType type;
44 44

  
45
	/**
46
	 * Description of the resolved object
47
	 */
45 48
	private String description;
46 49

  
47 50
	/**
......
59 62
	 */
60 63
	private String date;
61 64

  
65
	/**
66
	 * A list of subjects of type scheme term
67
	 */
62 68
	private List<SubjectType> subjects;
63 69

  
64 70
	/**
65
	 * The relations.
71
	 * The related objects.
66 72
	 */
67 73
	private List<ObjectRelation> relations;
68 74

  
......
402 408
	@Override
403 409
	public String toString() {
404 410

  
405
		String s = "DLIObject \n\t Identifier: %s \n\t Pid:%s \n\t PIDType: %s \n\t Entity Type: %s \n\t Titles:%s \n\t Authors:%s, \n\tProvenance%s";
411
		String s =
412
				"DLIObject \n\t Identifier: %s \n\t Pid:%s \n\t PIDType: %s \n\t Entity Type: %s \n\t Titles:%s \n\t Description:%s \n\t Authors:%s, \n\tProvenance%s";
406 413

  
407
		return String.format(s, this.getIdentifier(), this.getPid(), this.getPidType(), this.getType(), this.getTitles(),
414
		return String.format(s, this.getIdentifier(), this.getPid(), this.getPidType(), this.getType(), this.getTitles(), this.description,
408 415
				this.authors, Arrays.toString(this.getDatasourceProvenance().toArray()));
409 416

  
410 417
	}
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/resolver/applicationContext-dli-resolver.xml
40 40
		<property name="cache" ref="dliResolverCache"/>
41 41
	</bean>
42 42

  
43
	<bean id="ncbinResolver"
44
	      class="eu.dnetlib.resolver.NCBINResolver">
45
		<property name="cache" ref="dliResolverCache"/>
46
	</bean>
47

  
48
	<bean id="ncbinParser"
49
	      class="eu.dnetlib.resolver.NCBINParser"/>
50

  
43 51
	<bean id="crossrefRecordParserJSON"
44 52
	      class="eu.dnetlib.resolver.CrossRefParserJSON"/>
45 53

  
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/workflows/repo-hi/dli_aggregation_wf.xml.st
19 19
            <PARAMETERS>
20 20
            	<PARAM name="collMdstoreId"                     description="Store for collected records"           required="true" managedBy="system" category="MDSTORE_ID"/>
21 21
            	<PARAM name="cleanMdstoreId"                    description="Store for cleaned records"             required="true" managedBy="system" category="MDSTORE_ID"/>
22
            	<PARAM name="cleanTransformationRuleId"         description="Transformation Rule Identifier"        required="true" managedBy="user"   category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE')"/>
22
            	<PARAM name="cleanTransformationRuleId"         description="Transformation Rule Identifier"        required="true" managedBy="user"   category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE', 'DLI:')"/>
23 23
            </PARAMETERS>
24 24
            <WORKFLOW>
25 25
                <NODE isStart="true" name="collection" type="LaunchWorkflowTemplate">
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/oai_datacite_transform.xml
11 11
		<CONFIGURATION>
12 12
			<IMPORTED/>
13 13
			<SCRIPT>
14
				<TITLE>OAI_Datacite to DMF transform</TITLE>
14
				<TITLE>DLI: OAI_Datacite to DMF transform</TITLE>
15 15
				<CODE><![CDATA[
16 16
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
17 17
                xmlns:datetime="http://exslt.org/dates-and-times" xmlns:exslt="http://exslt.org/common"
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/TransformationRuleDSResources/elsevier_transform.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="d6aa3c16-b6e8-4953-a39e-cb2ea99d2ba8_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
5
		<RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
6
		<RESOURCE_KIND value="TransformationRuleDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2014-11-19T11:05:55+01:00"/>
9
	</HEADER>
10
	<BODY>
11
		<CONFIGURATION>
12
			<IMPORTED/>
13
			<SCRIPT>
14
				<TITLE>DLI: Elsevier to Scholix transform</TITLE>
15
				<CODE><![CDATA[
16
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
17
    xmlns:datetime="http://exslt.org/dates-and-times" xmlns:exslt="http://exslt.org/common"
18
    xmlns:oaf="http://namespace.openaire.eu/oaf"
19
    xmlns:dri="http://www.driver-repository.eu/namespace/dri"
20
    exclude-result-prefixes="xsl datetime exslt">
21
    <xsl:template match="/">
22
        <record>
23
            <xsl:copy-of select="//*[local-name()='header']"/>
24

  
25
            <xsl:variable name="source" ><xsl:value-of select="//dri:datasourceprefix"/></xsl:variable>
26
            <metadata>
27
                <scholix:link xmlns:scholix="http://www.scholix.org" >
28
                    <assertion_info>
29
                        <source><xsl:value-of select="$source"/></source>
30
                        <relationType scheme="datacite">unknown</relationType>
31
                    </assertion_info>
32
                    <source>
33
                        <pid type="DOI"><xsl:value-of select="//column[./@name='ArticleID']"/></pid>
34
                        <type>publication</type>
35
                    </source>
36

  
37
                    <target>
38
                        <pid>
39
                            <xsl:attribute name="type"><xsl:value-of select="//column[./@name='db']"/></xsl:attribute>
40
                            <xsl:value-of select="//column[./@name='datasetID']"/></pid>
41
                        <type>dataset</type>
42
                    </target>
43
                </scholix:link>
44
            </metadata>
45
            <oaf:datainfo>
46
                <oaf:completionStatus>incomplete</oaf:completionStatus>
47
                <oaf:provisionMode>collected</oaf:provisionMode>
48
            </oaf:datainfo>
49
        </record>
50
    </xsl:template>
51
</xsl:stylesheet>
52
]]></CODE>
53
			</SCRIPT>
54
		</CONFIGURATION>
55
		<STATUS/>
56
		<SECURITY_PARAMETERS/>
57
	</BODY>
58
</RESOURCE_PROFILE>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/repo_by_link_provider.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="8bf9879c-535a-4818-8de7-790a3eb90675_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowTemplateDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<CONFIGURATION>
12
			<PARAMETERS>
13
				<PARAM name="dsId" description="Datasource Id" required="true" type="string"/>
14
				<PARAM name="interface" description="Datasource Interface" required="true" type="string"/>
15
				<PARAM name="collMdstoreId" description="Store for collected records" required="true" type="string"/>
16
				<PARAM name="cleanMdstoreId" description="Store for cleaned records" required="true" type="string"/>
17
				<PARAM name="patchMdstoreId" description="Store for patched records" required="true" type="string"/>
18
				<PARAM name="nativeObjectStoreId" description="Object Store for native images" required="true" type="string"/>
19
				<PARAM name="indexId" description="Index Identifier" required="true" type="string"/>
20
			</PARAMETERS>
21
			<WORKFLOW>
22
				<NODE name="deleteCollMdStore" type="DeleteMDStore" isStart="true">
23
					<DESCRIPTION>Delete the mdstore of collected records</DESCRIPTION>
24
					<PARAMETERS>
25
						<PARAM name="mdstoreId" ref="collMdstoreId"/>
26
					</PARAMETERS>
27
					<ARCS>
28
						<ARC to="deleteCleanMdStore"/>
29
					</ARCS>
30
				</NODE>
31
				<NODE name="deleteCleanMdStore" type="DeleteMDStore">
32
					<DESCRIPTION>Delete the mdstore of cleaned records</DESCRIPTION>
33
					<PARAMETERS>
34
						<PARAM name="mdstoreId" ref="cleanMdstoreId"/>
35
					</PARAMETERS>
36
					<ARCS>
37
						<ARC to="removeApiExtraFields"/>
38
					</ARCS>
39
				</NODE>
40

  
41
				<NODE name="removeApiExtraFields" type="RemoveApiExtraFields">
42
					<DESCRIPTION>Reset the extrafields of the api</DESCRIPTION>
43
					<PARAMETERS>
44
						<PARAM name="datasourceId" ref="dsId"/>
45
						<PARAM name="datasourceInterface" ref="interface"/>
46
						<PARAM name="fields">
47
							<LIST>
48
								<ITEM value="last_collection_total"/>
49
								<ITEM value="last_collection_date"/>
50
								<ITEM value="last_collection_mdId"/>
51
								<ITEM value="last_aggregation_total"/>
52
								<ITEM value="last_aggregation_date"/>
53
								<ITEM value="last_aggregation_mdId"/>
54
							</LIST>
55
						</PARAM>
56
					</PARAMETERS>
57
					<ARCS>
58
						<ARC to="success"/>
59
					</ARCS>
60
				</NODE>
61
			</WORKFLOW>
62
		</CONFIGURATION>
63
	</BODY>
64
</RESOURCE_PROFILE>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/repo_hi_link_provider.xml
45 45
					<PARAMETERS>
46 46
						<PARAM name="wfName" value="Aggregate Metadata from DLI Link Provider Datasource[Ingestion]"/>
47 47
						<PARAM name="wfTemplate" value="/eu/dnetlib/dli/workflows/repo-hi/dli_aggregation_wf.xml.st"/>
48
						<PARAM name="description" value="Aggregate and Validate Metadata from DLI Link Provider [Ingestion]"/>
48
						<PARAM name="description" value="Aggregate  Metadata from DLI Link Provider [Ingestion]"/>
49 49
					</PARAMETERS>
50 50
					<ARCS>
51 51
						<ARC to="createDLINative"/>
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/linkprovider/link_provider_transform_template.xml
21 21
					<DESCRIPTION>Fetch records from MDStore</DESCRIPTION>
22 22
					<PARAMETERS>
23 23
						<PARAM name="mdId" ref="collMdstoreId"/>
24
						<PARAM name="mdFormat" value="oai_efg"/>
25 24
						<PARAM name="eprParam" value="orig_epr"/>
26 25
					</PARAMETERS>
27 26
					<ARCS>
modules/dnet-dli/trunk/pom.xml
44 44
			<version>2.11</version>
45 45
		</dependency>
46 46

  
47
		<!-- https://mvnrepository.com/artifact/org.biojava/biojava3-core -->
48
		<dependency>
49
			<groupId>org.biojava</groupId>
50
			<artifactId>biojava3-core</artifactId>
51
			<version>3.0</version>
52
		</dependency>
47 53

  
54

  
48 55
		<dependency>
49 56
			<groupId>junit</groupId>
50 57
			<artifactId>junit</artifactId>
modules/dnet-core-components/trunk/src/main/resources/eu/dnetlib/cnr-default.properties
71 71
services.aggregator.name=Aggregator TEST
72 72
services.aggregator.host=driver33.isti.cnr.it
73 73
services.aggregator.port=9000
74
services.aggregator.country=DE
74
services.aggregator.country=EU
75 75
services.aggregator.ui.address=http://${services.aggregator.host}:${services.aggregator.port}/cgi-bin/ASmanager.pl
76 76
services.aggregator.ws.endpoint=http://${services.aggregator.host}:${services.aggregator.port}/cgi-bin/SoapDriver.cgi
77 77

  
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/oai/SyncOAIStoreJobNode.java
1 1
package eu.dnetlib.msro.workflows.nodes.oai;
2 2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5
import org.springframework.beans.factory.annotation.Autowired;
6

  
7 3
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
8 4
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
9 5
import eu.dnetlib.msro.workflows.procs.Token;
10 6
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider;
11 7
import eu.dnetlib.rmi.common.ResultSet;
12 8
import eu.dnetlib.rmi.common.ResultSetException;
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.springframework.beans.factory.annotation.Autowired;
13 12

  
14 13
public class SyncOAIStoreJobNode extends AbstractOAIJobNode {
15 14

  
......
20 19
	@Autowired
21 20
	private ResultSetClient resultSetClient;
22 21

  
23
	private String formatParam, layoutParam, interpretationParam, oai_dbName;
22
	private String oai_dbName;
24 23

  
25 24
	private boolean alwaysNewRecord = false;
26 25

  
......
34 33
		job.setAction("SYNC");
35 34
		job.getParameters().put("oai_syncEpr", rs.toJson());
36 35
		job.getParameters().put("oai_recordSource", token.getEnv().getAttribute("oai_recordSource", String.class));
37
		job.getParameters().put("oai_formatName", token.getEnv().getAttribute(this.formatParam, String.class));
38
		job.getParameters().put("oai_formatLayout", token.getEnv().getAttribute(this.layoutParam, String.class));
39
		job.getParameters().put("oai_formatInterpretation", token.getEnv().getAttribute(this.interpretationParam, String.class));
36
		job.getParameters().put("oai_formatName", format);
37
		job.getParameters().put("oai_formatLayout", layout);
38
		job.getParameters().put("oai_formatInterpretation", interpretation);
40 39
		job.getParameters().put("oai_dbName", getOai_dbName());
41 40
		job.getParameters().put("oai_alwaysNewRecord", String.valueOf(this.alwaysNewRecord));
42 41
		token.getEnv().setAttribute("oai_dbName", getOai_dbName());
......
52 51
		this.eprParam = eprParam;
53 52
	}
54 53

  
55
	public String getFormatParam() {
56
		return this.formatParam;
57
	}
58

  
59
	public void setFormatParam(final String formatParam) {
60
		this.formatParam = formatParam;
61
	}
62

  
63
	public String getLayoutParam() {
64
		return this.layoutParam;
65
	}
66

  
67
	public void setLayoutParam(final String layoutParam) {
68
		this.layoutParam = layoutParam;
69
	}
70

  
71
	public String getInterpretationParam() {
72
		return this.interpretationParam;
73
	}
74

  
75
	public void setInterpretationParam(final String interpretationParam) {
76
		this.interpretationParam = interpretationParam;
77
	}
78

  
79 54
	public String getOai_dbName() {
80 55
		return this.oai_dbName;
81 56
	}
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/oai/AbstractOAIJobNode.java
6 6
import eu.dnetlib.msro.workflows.procs.Token;
7 7
import eu.dnetlib.rmi.common.ResultSetException;
8 8
import eu.dnetlib.rmi.provision.OAIStoreService;
9
import org.apache.commons.lang3.StringUtils;
9 10

  
10 11
public abstract class AbstractOAIJobNode extends BlackboardJobNode {
11 12

  
12
	private String dbName;
13
	protected String dbName;
13 14

  
14
	private String collectionName;
15
	protected String collectionName;
15 16

  
17
	protected String format, layout, interpretation;
18

  
16 19
	@Override
17 20
	protected String obtainServiceId(final Env env) {
18 21
		return getServiceLocator().getServiceId(OAIStoreService.class);
......
36 39
	}
37 40

  
38 41
	public String getCollectionName() {
39
		return collectionName;
42

  
43
		if (!StringUtils.isBlank(collectionName))
44
			return collectionName;
45
		if (!StringUtils.isBlank(format) && !StringUtils.isBlank(layout) && !StringUtils.isBlank(interpretation)) {
46
			return String.format("%s-%s-%s", format, layout, interpretation);
47
		}
48
		return null;
49

  
40 50
	}
41 51

  
42 52
	public void setCollectionName(final String collectionName) {
43 53
		this.collectionName = collectionName;
44 54
	}
55

  
56
	public String getFormat() {
57
		return format;
58
	}
59

  
60
	public void setFormat(final String format) {
61
		this.format = format;
62
	}
63

  
64
	public String getLayout() {
65
		return layout;
66
	}
67

  
68
	public void setLayout(final String layout) {
69
		this.layout = layout;
70
	}
71

  
72
	public String getInterpretation() {
73
		return interpretation;
74
	}
75

  
76
	public void setInterpretation(final String interpretation) {
77
		this.interpretation = interpretation;
78
	}
45 79
}
modules/dnet-msro-service/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/transform/MdBuilderJobNode.java
4 4
import java.net.URLEncoder;
5 5
import java.util.List;
6 6

  
7
import org.antlr.stringtemplate.StringTemplate;
8
import org.apache.commons.io.IOUtils;
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.springframework.beans.factory.annotation.Autowired;
12
import org.springframework.beans.factory.annotation.Required;
13
import org.springframework.core.io.Resource;
14

  
15 7
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
16 8
import eu.dnetlib.enabling.resultset.factory.ResultSetFactory;
17 9
import eu.dnetlib.msro.workflows.graph.Arc;
18 10
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
19 11
import eu.dnetlib.msro.workflows.procs.Env;
12
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
20 13
import eu.dnetlib.rmi.common.ResultSet;
21 14
import eu.dnetlib.rmi.enabling.ISLookUpException;
22 15
import eu.dnetlib.rmi.enabling.ISLookUpService;
23 16
import eu.dnetlib.rmi.manager.MSROException;
17
import org.antlr.stringtemplate.StringTemplate;
18
import org.apache.commons.io.IOUtils;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21
import org.springframework.beans.factory.annotation.Autowired;
22
import org.springframework.beans.factory.annotation.Required;
23
import org.springframework.core.io.Resource;
24 24

  
25 25
public class MdBuilderJobNode extends SimpleJobNode {
26 26

  
......
48 48
		try {
49 49
			st = new StringTemplate(IOUtils.toString(getMdBuilderTemplateXslt().getInputStream()));
50 50
			st.setAttribute("datasourceId", this.datasourceId);
51
			st.setAttribute("xpath", getMetadataIdentifierPath());
51
			st.setAttribute("xpath", getMetadataIdentifierPath().replace("\"", "'"));
52 52
			st.setAttribute("baseurl", URLEncoder.encode(getBaseUrl(), "UTF-8"));
53 53
			st.setAttribute("metadatanamespace", getMetadataNamespace());
54

  
55
			/*
56
			 * If namespacePrefix has been already pushed to env by some custom JobNode e.g. ObtainOpenaireDataSourceParamsJobNode then push
57
			 * it to ST. Else: a) try to get it from EXTRAFIELDS of the datasource b) try to get it from DATASOURCE_ORIGINAL_ID of the
58
			 * datasource c) if any of the is present, then push to ST the datasourceId
59
			 */
60
			if (env.hasAttribute("namespacePrefix")) {
61
				st.setAttribute("namespacePrefix", env.getAttribute("namespacePrefix"));
62
			} else {
63
				List<String> namespacePrefix;
64
				String xQuery = "/*[.//RESOURCE_IDENTIFIER/@value='" + this.datasourceId + "']//EXTRA_FIELDS/FIELD/value[../key='NamespacePrefix']/string()";
65
				namespacePrefix = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQuery);
66
				if (namespacePrefix.size() != 0) {
67
					st.setAttribute("namespacePrefix", namespacePrefix.get(0));
68
				} else {
69
					xQuery = "/*[.//RESOURCE_IDENTIFIER/@value='" + this.datasourceId + "']//DATASOURCE_ORIGINAL_ID/string()";
70
					namespacePrefix = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQuery);
71
					if (namespacePrefix.size() != 0) {
72
						st.setAttribute("namespacePrefix", namespacePrefix.get(0));
73
					} else {
74
						st.setAttribute("namespacePrefix", this.datasourceId);
75
					}
76
				}
54
			List<String> namespacePrefix;
55
			String xQuery = "/*[.//RESOURCE_IDENTIFIER/@value='" + this.datasourceId + "']//EXTRA_FIELDS/FIELD/value[../key='NamespacePrefix']/string()";
56
			namespacePrefix = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQuery);
57
			if (namespacePrefix != null && namespacePrefix.size() != 0) {
58
				st.setAttribute("namespacePrefix", namespacePrefix.get(0));
77 59
			}
78

  
79 60
			final ResultSet<String> rsOut = this.resultSetFactory.xsltMap(rsIn, st.toString());
80 61

  
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff