Revision 54884
Added by Sandro La Bruzzo over 5 years ago
modules/dnet-dli/trunk/src/test/java/eu/dnetlib/resolver/DLIResolverTest.java | ||
---|---|---|
6 | 6 |
import eu.dnetlib.data.proto.dli.ScholixObjectProtos; |
7 | 7 |
import eu.dnetlib.dli.resolver.*; |
8 | 8 |
|
9 |
import java.io.BufferedReader; |
|
10 |
import java.io.InputStreamReader; |
|
9 | 11 |
import java.net.URI; |
10 | 12 |
|
11 | 13 |
import eu.dnetlib.dli.resolver.model.DLIResolvedObject; |
... | ... | |
14 | 16 |
import org.apache.commons.logging.Log; |
15 | 17 |
import org.apache.commons.logging.LogFactory; |
16 | 18 |
import org.bson.Document; |
19 |
import org.junit.Assert; |
|
17 | 20 |
import org.junit.Test; |
18 | 21 |
|
19 | 22 |
import java.io.IOException; |
20 | 23 |
import java.io.InputStream; |
21 | 24 |
import java.net.URISyntaxException; |
22 | 25 |
import java.time.LocalDateTime; |
26 |
import java.util.ArrayList; |
|
23 | 27 |
import java.util.Arrays; |
24 | 28 |
import java.util.List; |
29 |
import java.util.stream.Collectors; |
|
25 | 30 |
|
26 | 31 |
/** |
27 | 32 |
* Created by sandro on 9/8/16. |
... | ... | |
37 | 42 |
NCBINResolver resolver = new NCBINResolver(); |
38 | 43 |
NCBINParser parser = new NCBINParser(); |
39 | 44 |
resolver.setNCBINParser(parser); |
45 |
|
|
46 |
|
|
40 | 47 |
DLIResolvedObject resolve = resolver.resolve("mg456816", "ncbi"); |
48 |
|
|
49 |
System.out.println(resolve); |
|
50 |
|
|
41 | 51 |
CrossRefParserJSON parserJSON = new CrossRefParserJSON(); |
42 | 52 |
final String json = IOUtils.toString(this.getClass().getResourceAsStream("/eu/dnetlib/dli/parser/crossRefItem.json")); |
43 | 53 |
|
... | ... | |
73 | 83 |
|
74 | 84 |
|
75 | 85 |
@Test |
86 |
public void TestResolveLotOfPid() throws IOException { |
|
87 |
final NCBINResolver resolver = new NCBINResolver(); |
|
88 |
NCBINParser parser = new NCBINParser(); |
|
89 |
resolver.setNCBINParser(parser); |
|
90 |
|
|
91 |
|
|
92 |
|
|
93 |
|
|
94 |
InputStream resourceAsStream = this.getClass().getResourceAsStream("/eu/dnetlib/dli/parser/pid_ncbi"); |
|
95 |
BufferedReader reader = new BufferedReader(new InputStreamReader(resourceAsStream)); |
|
96 |
String line = null; |
|
97 |
|
|
98 |
List<String> resolvePid = new ArrayList<>(); |
|
99 |
|
|
100 |
while ((line= reader.readLine())!=null) { |
|
101 |
resolvePid.add(line.trim().toLowerCase()); |
|
102 |
} |
|
103 |
long resolvedCNT = resolvePid.parallelStream() |
|
104 |
.map(it -> { |
|
105 |
DLIResolvedObject ncbi = resolver.resolve(it.trim().toLowerCase(), "ncbi"); |
|
106 |
if (ncbi == null) { |
|
107 |
log.error("Unable to Resolve " + it); |
|
108 |
return null; |
|
109 |
} |
|
110 |
return ncbi.getPid(); |
|
111 |
}).filter(it -> it != null).count(); |
|
112 |
|
|
113 |
System.out.printf("Total item resolved %d/%d",resolvedCNT, resolvePid.size()); |
|
114 |
|
|
115 |
|
|
116 |
} |
|
117 |
|
|
118 |
|
|
119 |
@Test |
|
76 | 120 |
public void testDataciteOfflineResolver() { |
77 |
MongoClient client = new MongoClient(); |
|
78 |
MongoCollection<Document> collection = client.getDatabase("datacite_resolver").getCollection("datacite"); |
|
79 | 121 |
|
80 | 122 |
|
81 | 123 |
List<String> dois = Arrays.asList( |
82 |
"10.1594/pangaea.230638");
|
|
124 |
"10.5072%2Fexample-full");
|
|
83 | 125 |
|
84 | 126 |
DataciteOfflineResolver resolver = new DataciteOfflineResolver(); |
85 |
resolver.setMongoClient(client); |
|
86 |
resolver.setDatabaseName("datacite_resolver"); |
|
87 |
resolver.setCollectionName("datacite"); |
|
88 | 127 |
dois.forEach(it -> System.out.println(resolver.resolve(it, "doi"))); |
89 | 128 |
|
90 | 129 |
|
modules/dnet-dli/trunk/src/test/resources/eu/dnetlib/dli/parser/datacite.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<oai:record xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
3 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
4 |
xmlns="http://namespace.openaire.eu/" |
|
5 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
6 |
xmlns:oai="http://www.openarchives.org/OAI/2.0/" |
|
7 |
xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" |
|
8 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri"> |
|
9 |
<oai:header> |
|
10 |
<dri:objIdentifier>r3d100010327::00173b5006857d68ecd326756dbdf33d</dri:objIdentifier> |
|
11 |
<dri:recordIdentifier>2NXF</dri:recordIdentifier> |
|
12 |
<dri:dateOfCollection>2017-10-31T10:42:55.142+01:00</dri:dateOfCollection> |
|
13 |
<dri:repositoryId>e6be5d88-4120-47f8-8d7b-dd14946e02ac_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId> |
|
14 |
<dri:datasourceprefix>r3d100010327</dri:datasourceprefix> |
|
1 |
<record xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
2 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
3 |
<oai:header xmlns="http://namespace.openaire.eu/" |
|
4 |
xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
5 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
6 |
xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance"> |
|
7 |
<dri:objIdentifier>datacite____::1e2d8fca9db41dd1c7dbfbfacf3b8436</dri:objIdentifier> |
|
8 |
<dri:recordIdentifier>10.17035/d.2017.0033099548</dri:recordIdentifier> |
|
9 |
<dri:dateOfCollection>2018-10-28T00:39:04.337Z</dri:dateOfCollection> |
|
10 |
<oaf:datasourceprefix>datacite____</oaf:datasourceprefix> |
|
11 |
<identifier xmlns="http://www.openarchives.org/OAI/2.0/">10.17035/d.2017.0033099548</identifier> |
|
12 |
<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">BL.CARDIFF</setSpec> |
|
15 | 13 |
</oai:header> |
16 | 14 |
<metadata> |
17 |
<resource xmlns="http://datacite.org/schema/kernel-3" |
|
18 |
xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"> |
|
19 |
<identifier identifierType="pdb">2NXF</identifier> |
|
15 |
<resource xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd" xmlns="http://datacite.org/schema/kernel-3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
16 |
<identifier identifierType="DOI">10.17035/d.2017.0033099548</identifier> |
|
20 | 17 |
<creators> |
21 | 18 |
<creator> |
22 |
<creatorName>Bitto, E.</creatorName>
|
|
19 |
<creatorName>Hroch L</creatorName>
|
|
23 | 20 |
</creator> |
24 | 21 |
<creator> |
25 |
<creatorName>Wesenberg, G.E.</creatorName>
|
|
22 |
<creatorName>Hughes RL</creatorName>
|
|
26 | 23 |
</creator> |
27 | 24 |
<creator> |
28 |
<creatorName>Phillips Jr., G.N.</creatorName>
|
|
25 |
<creatorName>Williams T</creatorName>
|
|
29 | 26 |
</creator> |
30 | 27 |
<creator> |
31 |
<creatorName>McCoy, J.G.</creatorName>
|
|
28 |
<creatorName>Angelastro A</creatorName>
|
|
32 | 29 |
</creator> |
33 | 30 |
<creator> |
34 |
<creatorName>Bingman, C.A.</creatorName>
|
|
31 |
<creatorName>Loveridge EJ</creatorName>
|
|
35 | 32 |
</creator> |
36 | 33 |
</creators> |
37 | 34 |
<titles> |
38 |
<title>Crystal Structure of a dimetal phosphatase from Danio rerio LOC 393393</title>
|
|
35 |
<title >Data underpinning "Reduction of Folate by Dihydrofolate Reductase from Thermotoga maritima"</title>
|
|
39 | 36 |
</titles> |
37 |
<publisher>Cardiff University</publisher> |
|
38 |
<publicationYear>2017</publicationYear> |
|
39 |
|
|
40 |
<subjects> |
|
41 |
<subject schemeURI="http://www.rcuk.ac.uk/research/efficiency/researchadmin/harmonisation/" subjectScheme="">Biomolecules and biochemistry</subject> |
|
42 |
</subjects> |
|
43 |
|
|
40 | 44 |
<dates> |
41 |
<date dateType="Collected">2006-12-12</date>
|
|
45 |
<date dateType="Issued">2017</date>
|
|
42 | 46 |
</dates> |
43 |
<resourceType resourceTypeGeneral="Dataset">Dataset</resourceType> |
|
47 |
<language>English</language> |
|
48 |
<resourceType resourceTypeGeneral="Dataset">chromatography & mass spectromoetry data</resourceType> |
|
49 |
<alternateIdentifiers> |
|
50 |
<alternateIdentifier alternateIdentifierType="DOI">https://doi.org/10.17035/d.2017.0033099548</alternateIdentifier> |
|
51 |
</alternateIdentifiers> |
|
44 | 52 |
<relatedIdentifiers> |
45 |
<relatedIdentifier xmlns:oaf="http://namespace.dnet.eu/oaf" inverseRelationType="unknown" |
|
46 |
relationType="unknown" |
|
47 |
relatedIdentifierType=""/> |
|
53 |
<relatedIdentifier relatedIdentifierType="DOI" relationType="IsReferencedBy" >10.1021/acs.biochem.6b01268</relatedIdentifier> |
|
48 | 54 |
</relatedIdentifiers> |
55 |
|
|
56 |
<formats> |
|
57 |
<format>.inf, .dat, .idx, .sts, .txt</format> |
|
58 |
</formats> |
|
59 |
<version>None</version> |
|
60 |
|
|
61 |
<descriptions> |
|
62 |
<description descriptionType="Abstract">We have shown that DHFR from the hyperthermophilic bacterium Thermotoga maritima is able to catalyse reduction of folate to tetrahydrofolate with a similar efficiency to reduction of dihydrofolate under saturating conditions. NMR and mass spectrometry experiments showed no evidence for production of free dihydrofolate during either the EcDHFR- or TmDHFR-catalysed reductions of folate, suggesting that both enzymes perform the two reduction steps without release of the partially reduced substrate. Herein we include underpinning liquid chromatography - mass spectrometry data.</description> |
|
63 |
</descriptions> |
|
64 |
|
|
65 |
<oaf:hostedBy id="openaire____::1256f046-bf1f-4afc-8b47-d0b147148b18" name="Unknown Repository"/> |
|
49 | 66 |
</resource> |
50 | 67 |
</metadata> |
51 |
<oaf:about xmlns:oaf="http://namespace.dnet.eu/oaf">
|
|
68 |
<about >
|
|
52 | 69 |
<oaf:datainfo> |
53 |
<oaf:collectedFrom completionStatus="complete" id="dli_________::r3d100010327" name="RCSB"/> |
|
54 |
<oaf:completionStatus>complete</oaf:completionStatus> |
|
55 |
<oaf:provisionMode>collected</oaf:provisionMode> |
|
70 |
<oaf:inferred>false</oaf:inferred> |
|
71 |
<oaf:deletedbyinference>false</oaf:deletedbyinference> |
|
72 |
<oaf:trust>0.9</oaf:trust> |
|
73 |
<oaf:inferenceprovenance/> |
|
74 |
<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive" |
|
75 |
classname="sysimport:crosswalk:datasetarchive" |
|
76 |
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/> |
|
56 | 77 |
</oaf:datainfo> |
57 |
</oaf:about> |
|
58 |
</oai:record> |
|
78 |
</about> |
|
79 |
</record> |
modules/dnet-dli/trunk/src/test/resources/log4j.properties | ||
---|---|---|
6 | 6 |
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c - %m%n |
7 | 7 |
org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger |
8 | 8 |
### Application Level ### |
9 |
log4j.logger.eu.dnetlib=DEBUG
|
|
9 |
log4j.logger.eu.dnetlib=INFO
|
|
10 | 10 |
|
11 | 11 |
log4j.logger.eu.dnetlib.pid.resolver.parser.AbstractResolverParser=FATAL |
12 |
log4j.logger.org.apache.http.client.protocol.ResponseProcessCookies =FATAL |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/mdstore/plugin/DLIRecordResolverFactory.java | ||
---|---|---|
12 | 12 |
|
13 | 13 |
public class DLIRecordResolverFactory implements RecordResolverFactory { |
14 | 14 |
@Override |
15 |
public RecordResolver createResolver(BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, boolean offline) { |
|
15 |
public RecordResolver createResolver(BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, boolean offline, final boolean forceResolver) {
|
|
16 | 16 |
|
17 |
return createResolver(System.currentTimeMillis(), queue, outputCollection, serializer, pluginResolver, offline); |
|
17 |
return createResolver(System.currentTimeMillis(), queue, outputCollection, serializer, pluginResolver, offline, forceResolver);
|
|
18 | 18 |
} |
19 | 19 |
|
20 | 20 |
@Override |
21 |
public RecordResolver createResolver(long identifier, BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, boolean offline) { |
|
22 |
final DLIRecordResolver resolver = new DLIRecordResolver(System.currentTimeMillis()); |
|
21 |
public RecordResolver createResolver(long identifier, BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, boolean offline, final boolean forceResolver) {
|
|
22 |
final DLIRecordResolver resolver = new DLIRecordResolver(System.currentTimeMillis(), forceResolver);
|
|
23 | 23 |
resolver.setInputQueue(queue); |
24 | 24 |
resolver.setOutputCollection(outputCollection); |
25 | 25 |
resolver.setSerializer(serializer); |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/resolver/mdstore/plugin/DLIRecordResolver.java | ||
---|---|---|
37 | 37 |
|
38 | 38 |
private final DLIParser parser = new DLIParser(); |
39 | 39 |
|
40 |
public DLIRecordResolver(long ts) { |
|
40 |
public DLIRecordResolver(long ts, boolean forceResolver) {
|
|
41 | 41 |
super(ts); |
42 |
this.forceResolver = forceResolver; |
|
42 | 43 |
} |
43 | 44 |
|
44 | 45 |
|
... | ... | |
47 | 48 |
final DLIResolvedObject inputObject = (DLIResolvedObject) parser.parse(inputRecord); |
48 | 49 |
if (inputObject==null) |
49 | 50 |
return null; |
50 |
if (StringUtils.isNoneBlank(inputObject.getResolvedDate())) |
|
51 |
if (!forceResolver && StringUtils.isNoneBlank(inputObject.getResolvedDate()))
|
|
51 | 52 |
return null; |
52 | 53 |
|
53 | 54 |
if (!StringUtils.isBlank(inputObject.getPid())) { |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/dli/resolver/DataciteOfflineResolver.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.dli.resolver; |
2 | 2 |
|
3 |
import com.google.gson.JsonArray; |
|
3 | 4 |
import com.google.gson.JsonElement; |
4 | 5 |
import com.google.gson.JsonObject; |
5 | 6 |
import com.google.gson.JsonParser; |
... | ... | |
10 | 11 |
import eu.dnetlib.dli.resolver.model.CompletionStatus; |
11 | 12 |
import eu.dnetlib.dli.resolver.model.DLIObjectProvenance; |
12 | 13 |
import eu.dnetlib.dli.resolver.model.DLIResolvedObject; |
14 |
import eu.dnetlib.dli.resolver.model.ObjectProvisionMode; |
|
13 | 15 |
import eu.dnetlib.enabling.tools.DnetStreamSupport; |
14 | 16 |
import eu.dnetlib.pid.resolver.AbstractPIDResolver; |
15 | 17 |
import eu.dnetlib.pid.resolver.model.ObjectType; |
16 | 18 |
import eu.dnetlib.pid.resolver.model.SubjectType; |
19 |
import org.apache.commons.lang3.StringUtils; |
|
17 | 20 |
import org.apache.commons.logging.Log; |
18 | 21 |
import org.apache.commons.logging.LogFactory; |
19 | 22 |
import org.bson.Document; |
... | ... | |
21 | 24 |
import org.springframework.beans.factory.annotation.Value; |
22 | 25 |
|
23 | 26 |
|
24 |
import java.util.List; |
|
25 |
import java.util.Objects; |
|
27 |
import java.util.*; |
|
26 | 28 |
import java.util.stream.Collectors; |
27 | 29 |
|
28 | 30 |
|
... | ... | |
33 | 35 |
*/ |
34 | 36 |
private static final Log log = LogFactory.getLog(DataciteOfflineResolver.class); |
35 | 37 |
|
36 |
private MongoClient client;
|
|
38 |
private static String esUrl = "http://ip-90-147-167-25.ct1.garrservices.it:9200/datacite/dump/";
|
|
37 | 39 |
|
38 |
private MongoCollection<Document> dataciteCollection; |
|
39 |
|
|
40 |
@Value("${dli.dataciteOfflineResolver.databaseName}") |
|
41 |
private String databaseName; |
|
42 |
|
|
43 |
@Value("${dli.dataciteOfflineResolver.collectionName}") |
|
44 |
private String collectionName; |
|
45 |
|
|
46 |
|
|
47 | 40 |
@Override |
48 | 41 |
protected boolean canResolvePid(final String pidType) { |
49 | 42 |
return (pidType != null) && ("doi".equals(pidType.toLowerCase().trim()) || "handle".equals(pidType.toLowerCase().trim())); |
... | ... | |
52 | 45 |
|
53 | 46 |
@Override |
54 | 47 |
public DLIResolvedObject resolve(final String pid, final String pidType) { |
55 |
try { |
|
56 |
final Document result = retrieveItem(pid); |
|
57 |
if (result!= null) { |
|
58 |
return parseResponse(result); |
|
59 |
} |
|
60 |
} catch (Throwable e) { |
|
61 |
log.error("Error on getting item with Pid: " + pid, e); |
|
62 |
} |
|
63 |
return null; |
|
48 |
final String result = requestURL(esUrl + pid.replaceAll("/","%2F")); |
|
49 |
return parseResponse(result); |
|
64 | 50 |
} |
65 | 51 |
|
66 |
private DLIResolvedObject parseResponse(Document result) { |
|
67 |
final String json = result.toJson(); |
|
52 |
private DLIResolvedObject parseResponse(String result) { |
|
53 |
if (result == null) { |
|
54 |
return null; |
|
55 |
} |
|
68 | 56 |
final JsonParser p = new JsonParser(); |
69 |
JsonObject root = p.parse(json).getAsJsonObject(); |
|
57 |
JsonObject root = p.parse(result).getAsJsonObject(); |
|
58 |
if (!(root.has("found") && root.get("found").getAsBoolean())) |
|
59 |
return null; |
|
60 |
|
|
70 | 61 |
final DLIResolvedObject dli = new DLIResolvedObject(); |
71 | 62 |
|
72 |
final String description = getStringValue(root, "description"); |
|
73 |
dli.setDescription(description); |
|
74 |
dli.setPid(getStringValue(root, "pid")); |
|
75 |
dli.setPidType(getStringValue(root, "pidType")); |
|
76 |
dli.setType(ObjectType.dataset); |
|
77 |
dli.setDate(getStringValue(root, "date")); |
|
78 |
dli.setTitles(getStringValues(root, "titles")); |
|
79 |
dli.setAuthors(getStringValues(root, "authors")); |
|
63 |
root = root.getAsJsonObject("_source").getAsJsonObject("attributes"); |
|
64 |
|
|
65 |
if (root.has("descriptions") && root.get("descriptions").isJsonArray()) { |
|
66 |
final JsonArray descriptions = root.get("descriptions").getAsJsonArray(); |
|
67 |
for (JsonElement description: descriptions) { |
|
68 |
if (!description.isJsonNull() && description.isJsonObject()) { |
|
69 |
dli.setDescription(getStringValue(description.getAsJsonObject(),"description")); |
|
70 |
} |
|
71 |
} |
|
72 |
} |
|
73 |
String current_date= null; |
|
74 |
if (root.has("dates") && root.get("dates").isJsonArray()) { |
|
75 |
final JsonArray dates = root.get("dates").getAsJsonArray(); |
|
76 |
for (JsonElement date: dates) { |
|
77 |
if (!date.isJsonNull() && date.isJsonObject()) { |
|
78 |
final String tmp = getStringValue(date.getAsJsonObject(),"date"); |
|
79 |
if (current_date == null && StringUtils.isNotBlank(tmp)) { |
|
80 |
current_date = tmp; |
|
81 |
} else if (current_date != null && StringUtils.isNotBlank(tmp) && tmp.length() > current_date.length()) { |
|
82 |
current_date = tmp; |
|
83 |
} |
|
84 |
} |
|
85 |
} |
|
86 |
} |
|
87 |
|
|
88 |
final List<String> currentTitles = new ArrayList<>(); |
|
89 |
if (root.has("titles") && root.get("titles").isJsonArray()) { |
|
90 |
final JsonArray titles = root.get("titles").getAsJsonArray(); |
|
91 |
for (JsonElement title: titles) { |
|
92 |
if (title!=null && !title.isJsonNull() && title.isJsonObject()) { |
|
93 |
currentTitles.add(getStringValue(title.getAsJsonObject(),"title")); |
|
94 |
} |
|
95 |
|
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
final List<String> authors = new ArrayList<>(); |
|
100 |
if (root.has("creators") && root.get("creators").isJsonArray()) { |
|
101 |
final JsonArray creators = root.get("creators").getAsJsonArray(); |
|
102 |
for (JsonElement creator: creators) { |
|
103 |
if (creator!=null && !creator.isJsonNull() && creator.isJsonObject()) { |
|
104 |
authors.add(getStringValue(creator.getAsJsonObject(),"name")); |
|
105 |
} |
|
106 |
|
|
107 |
} |
|
108 |
} |
|
109 |
dli.setAuthors(authors); |
|
110 |
dli.setTitles(currentTitles); |
|
111 |
dli.setDate(current_date); |
|
112 |
final List<SubjectType> current_subjects = new ArrayList<>(); |
|
80 | 113 |
if (root.has("subjects") && root.get("subjects").isJsonArray()) { |
81 |
dli.setSubjects( |
|
82 |
DnetStreamSupport.generateStreamFromIterator(root.getAsJsonArray("subjects").iterator()) |
|
83 |
.map(JsonElement::getAsJsonObject) |
|
84 |
.map(this::createSubjectType) |
|
85 |
.filter(Objects::nonNull) |
|
86 |
.collect(Collectors.toList()) |
|
87 |
); |
|
114 |
final JsonArray subjects = root.get("subjects").getAsJsonArray(); |
|
115 |
for (JsonElement subject: subjects) { |
|
116 |
if (subject!=null && !subject.isJsonNull() && subject.isJsonObject()) { |
|
117 |
final String scheme = getStringValue(subject.getAsJsonObject(),"subjectScheme"); |
|
118 |
final String term = getStringValue(subject.getAsJsonObject(),"subject"); |
|
119 |
current_subjects.add(new SubjectType(scheme, term)); |
|
120 |
} |
|
121 |
} |
|
88 | 122 |
} |
89 |
dli.setDatasourceProvenance(
|
|
90 |
DnetStreamSupport.generateStreamFromIterator(root.getAsJsonArray("datasourceProvenance").iterator())
|
|
91 |
.map(JsonElement::getAsJsonObject)
|
|
92 |
.map(it -> {
|
|
93 |
final DLIObjectProvenance provenance =new DLIObjectProvenance();
|
|
94 |
provenance.setDatasourceId(it.get("datasourceId").getAsString());
|
|
95 |
provenance.setCompletionStatus(it.get("completionStatus").getAsString());
|
|
96 |
provenance.setProvisionMode(it.get("provisionMode").getAsString());
|
|
97 |
provenance.setDatasource(it.get("datasource").getAsString());
|
|
98 |
provenance.setPublisher(getStringValue(it, "publisher"));
|
|
99 |
return provenance; |
|
100 |
})
|
|
101 |
.collect(Collectors.toList()) |
|
102 |
); |
|
123 |
dli.setSubjects(current_subjects);
|
|
124 |
dli.setPid(getStringValue(root, "doi"));
|
|
125 |
dli.setPidType("doi");
|
|
126 |
dli.setType(ObjectType.dataset);
|
|
127 |
final DLIObjectProvenance provenance =new DLIObjectProvenance(); |
|
128 |
provenance.setDatasourceId("dli_________::datacite");
|
|
129 |
provenance.setCompletionStatus(CompletionStatus.complete.toString());
|
|
130 |
provenance.setProvisionMode(ObjectProvisionMode.resolved.toString());
|
|
131 |
provenance.setDatasource("Datasets in Datacite");
|
|
132 |
String publisher = getStringValue(root, "publisher");
|
|
133 |
|
|
134 |
provenance.setPublisher(publisher);
|
|
135 |
|
|
136 |
dli.setDatasourceProvenance(Collections.singletonList(provenance));
|
|
103 | 137 |
dli.setCompletionStatus(CompletionStatus.complete.toString()); |
104 | 138 |
return dli; |
105 | 139 |
} |
... | ... | |
130 | 164 |
return null; |
131 | 165 |
} |
132 | 166 |
|
133 |
|
|
134 |
private Document retrieveItem(final String pid){ |
|
135 |
FindIterable<Document> pids = getDataciteCollection().find(Filters.eq("pid", pid)); |
|
136 |
if (pids!= null) { |
|
137 |
return pids.first(); |
|
138 |
} |
|
139 |
return null; |
|
140 |
|
|
141 |
} |
|
142 |
|
|
143 |
@Required |
|
144 |
public void setMongoClient(final MongoClient client) { |
|
145 |
this.client = client; |
|
146 |
} |
|
147 |
|
|
148 |
private MongoCollection<Document> getDataciteCollection() { |
|
149 |
|
|
150 |
if (dataciteCollection == null) |
|
151 |
{ |
|
152 |
dataciteCollection = client.getDatabase(databaseName).getCollection(collectionName); |
|
153 |
} |
|
154 |
return dataciteCollection; |
|
155 |
} |
|
156 |
|
|
157 |
|
|
158 |
public String getDatabaseName() { |
|
159 |
return databaseName; |
|
160 |
} |
|
161 |
|
|
162 |
public void setDatabaseName(String databaseName) { |
|
163 |
this.databaseName = databaseName; |
|
164 |
} |
|
165 |
|
|
166 |
public String getCollectionName() { |
|
167 |
return collectionName; |
|
168 |
} |
|
169 |
|
|
170 |
public void setCollectionName(String collectionName) { |
|
171 |
this.collectionName = collectionName; |
|
172 |
} |
|
173 |
|
|
174 | 167 |
@Override |
175 | 168 |
public boolean isAvailableOffline() { |
176 | 169 |
return true; |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/dli/resolver/NCBINParser.java | ||
---|---|---|
21 | 21 |
|
22 | 22 |
public DLIResolvedObject parseRecord(final String record) { |
23 | 23 |
try { |
24 |
if (record == null) |
|
24 |
if (record == null || record.contains("ERROR"))
|
|
25 | 25 |
return null; |
26 |
|
|
26 | 27 |
final DLIResolvedObject parsedObject = new DLIResolvedObject(); |
27 | 28 |
final VTDGen vg = new VTDGen(); |
28 | 29 |
vg.setDoc(record.getBytes()); |
29 | 30 |
vg.parse(true); |
30 | 31 |
final VTDNav vn = vg.getNav(); |
31 | 32 |
final AutoPilot ap = new AutoPilot(vn); |
33 |
parsedObject.setTitles(VtdUtilityParser.getTextValue(ap, vn, "//Item[./@Name='Title']")); |
|
34 |
List<String> dates = VtdUtilityParser.getTextValue(ap, vn, "//Item[./@Name='CreateDate']"); |
|
35 |
if(dates!=null && dates.size()>0) |
|
36 |
parsedObject.setDate(dates.get(0)); |
|
32 | 37 |
|
33 |
parsedObject.setTitles(VtdUtilityParser.getTextValue(ap, vn, "//GBSeq_definition")); |
|
34 |
|
|
35 |
final List<String> descriptions = VtdUtilityParser.getTextValue(ap, vn, "//GBSeq_comment"); |
|
36 |
if (descriptions != null && descriptions.size() > 0) { |
|
37 |
parsedObject.setDescription(descriptions.get(0)); |
|
38 |
} |
|
39 |
|
|
40 | 38 |
parsedObject.setType(ObjectType.dataset); |
41 | 39 |
return parsedObject; |
42 | 40 |
} catch (Throwable e) { |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/dli/resolver/NCBINResolver.java | ||
---|---|---|
4 | 4 |
import eu.dnetlib.dli.resolver.model.DLIObjectProvenance; |
5 | 5 |
import eu.dnetlib.dli.resolver.model.DLIResolvedObject; |
6 | 6 |
import eu.dnetlib.pid.resolver.AbstractPIDResolver; |
7 |
|
|
7 | 8 |
import org.springframework.beans.factory.annotation.Autowired; |
8 | 9 |
|
9 | 10 |
import java.util.Arrays; |
... | ... | |
14 | 15 |
public class NCBINResolver extends AbstractPIDResolver { |
15 | 16 |
|
16 | 17 |
private final static String baseUrl = |
17 |
"https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&val=%s&page_size=5&fmt_mask=0&report=gbx&retmode=text&page=1&page_size=1";
|
|
18 |
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=nucleotide&id=%s";
|
|
18 | 19 |
|
20 |
// "https://www.ncbi.nlm.nih.gov/sviewer/viewer.fcgi?db=nuccore&val=%s&page_size=5&fmt_mask=0&report=gbx&retmode=text&page=1&page_size=1"; |
|
21 |
|
|
19 | 22 |
private final static String NCBI_NSPREFIX = "r3d100010778"; |
20 | 23 |
|
21 | 24 |
@Autowired |
... | ... | |
27 | 30 |
} |
28 | 31 |
|
29 | 32 |
@Override |
33 |
@Deprecated |
|
30 | 34 |
public DLIResolvedObject resolve(final String pid, final String pidType) { |
31 |
|
|
32 |
if ((pid == null)) |
|
33 |
return null; |
|
34 |
final DLIResolvedObject record = NCBINParser.parseRecord(requestURL(String.format(baseUrl, pid))); |
|
35 |
if (record != null) { |
|
36 |
record.setPid(pid); |
|
37 |
record.setPidType(pidType); |
|
38 |
record.setCompletionStatus(CompletionStatus.complete.toString()); |
|
39 |
DLIObjectProvenance provenance = new DLIObjectProvenance(); |
|
40 |
DLIPIDResolver.setDatasourceProvenance(provenance, NCBI_NSPREFIX); |
|
41 |
record.setDatasourceProvenance(Arrays.asList(provenance)); |
|
42 |
} |
|
43 |
return record; |
|
35 |
return null; |
|
36 |
// if ((pid == null))
|
|
37 |
// return null;
|
|
38 |
// final DLIResolvedObject record = NCBINParser.parseRecord(requestURL(String.format(baseUrl, pid)));
|
|
39 |
// if (record != null) {
|
|
40 |
// record.setPid(pid);
|
|
41 |
// record.setPidType(pidType);
|
|
42 |
// record.setCompletionStatus(CompletionStatus.complete.toString());
|
|
43 |
// DLIObjectProvenance provenance = new DLIObjectProvenance();
|
|
44 |
// DLIPIDResolver.setDatasourceProvenance(provenance, NCBI_NSPREFIX);
|
|
45 |
// record.setDatasourceProvenance(Arrays.asList(provenance));
|
|
46 |
// }
|
|
47 |
// return record;
|
|
44 | 48 |
} |
45 | 49 |
|
46 | 50 |
public NCBINParser getNCBINParser() { |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/dli/collector/plugin/CrossRefIterator.java | ||
---|---|---|
70 | 70 |
final String id_type = item.get("Source").getAsJsonObject().get("Identifier").getAsJsonObject().get("IDScheme").getAsString(); |
71 | 71 |
object.setPid(id); |
72 | 72 |
object.setPidType(id_type); |
73 |
final JsonElement licenseURL = item.get("LicenseURL"); |
|
74 |
|
|
75 |
|
|
76 |
|
|
73 | 77 |
final String type = item.get("Source").getAsJsonObject().get("Type").getAsJsonObject().get("Name").isJsonNull()?"unknownw":item.get("Source").getAsJsonObject().get("Type").getAsJsonObject().get("Name").getAsString(); |
74 | 78 |
object.setType(type.equals("literature") ? ObjectType.publication : ObjectType.dataset); |
75 | 79 |
final String relType = item.get("RelationshipType").getAsJsonObject().get("Name").getAsString(); |
... | ... | |
81 | 85 |
|
82 | 86 |
final String t_type = item.get("Target").getAsJsonObject().get("Type").getAsJsonObject().get("Name").isJsonNull()?"unknown":item.get("Target").getAsJsonObject().get("Type").getAsJsonObject().get("Name").getAsString(); |
83 | 87 |
|
88 |
if (licenseURL!= null && !licenseURL.isJsonNull()) |
|
89 |
relation.setLicense(licenseURL.getAsString()); |
|
84 | 90 |
relation.setTargetPID(new PID(t_id, t_id_type)); |
85 | 91 |
relation.setTargetType(t_type.equals("literature") ? ObjectType.publication : ObjectType.dataset); |
86 | 92 |
object.setRelations(Collections.singletonList(relation)); |
modules/dnet-dli/trunk/src/main/java/eu/dnetlib/dli/collector/plugin/ScholixJSONParser.java | ||
---|---|---|
47 | 47 |
log.error("Error SOURCE PID cannot be null"); |
48 | 48 |
return null; |
49 | 49 |
} |
50 |
|
|
51 |
|
|
52 |
|
|
50 | 53 |
final PID targetPID = getPid(target); |
51 | 54 |
if (targetPID == null) { |
52 | 55 |
log.error("Error Target PID cannot be null"); |
... | ... | |
57 | 60 |
|
58 | 61 |
final String title = getStringFromObject(source, "Title"); |
59 | 62 |
final String date = getStringFromObject(source, "PublicationDate"); |
60 |
result.setTitles(Arrays.asList(title));
|
|
63 |
result.setTitles(Collections.singletonList(title));
|
|
61 | 64 |
result.setDate(date); |
62 | 65 |
result.setType(sourceType); |
63 | 66 |
final DLIObjectRelation relation = new DLIObjectRelation(); |
64 | 67 |
relation.setCompletionStatus(CompletionStatus.incomplete.toString()); |
65 | 68 |
relation.setTargetPID(targetPID); |
69 |
relation.setTargetType(getType(target)); |
|
66 | 70 |
relation.setRelationSemantics(relationSemantic); |
67 | 71 |
try { |
68 | 72 |
relation.setInverseRelation(DLIUtils.getInverse(relationSemantic)); |
... | ... | |
88 | 92 |
return null; |
89 | 93 |
final String pid = getStringFromObject(identifier, "ID"); |
90 | 94 |
final String pidType = getStringFromObject(identifier, "IDScheme"); |
95 |
final String resolvedURL= getStringFromObject(identifier, "IDURL"); |
|
91 | 96 |
|
92 | 97 |
if(pid == null || pidType ==null || StringUtils.isEmpty(pid) ||StringUtils.isEmpty(pidType)) { |
93 | 98 |
return null; |
94 | 99 |
} |
95 |
return DLIUtils.createCorrectPID(pid, pidType); |
|
100 |
return DLIUtils.createCorrectPID(pid, pidType, resolvedURL);
|
|
96 | 101 |
} |
97 | 102 |
|
98 | 103 |
public static ObjectType getType(final JsonObject root) { |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/resolver/applicationContext-dli-resolver.xml | ||
---|---|---|
48 | 48 |
|
49 | 49 |
<bean id="dataciteOfflineResolver" |
50 | 50 |
class="eu.dnetlib.dli.resolver.DataciteOfflineResolver" |
51 |
p:order="0" p:availableOffline="true" |
|
52 |
p:mongoClient-ref="mongoClient"> |
|
51 |
p:order="0" p:availableOffline="true"> |
|
53 | 52 |
<property name="cache" ref="dliResolverCache"/> |
54 | 53 |
|
55 | 54 |
</bean> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/templates/Scholixv1.st | ||
---|---|---|
17 | 17 |
$endif$ |
18 | 18 |
</relashionship> |
19 | 19 |
}$ |
20 |
|
|
21 |
<license> |
|
22 |
$object.relations:{ |
|
23 |
<url>$it.licenseEscapedXML$</url> |
|
24 |
}$ |
|
25 |
</license> |
|
20 | 26 |
<source> |
21 | 27 |
<identifier> |
22 | 28 |
<identifier>$object.escapedXMLPid$</identifier> |
modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/dli/templates/PMFXML.st | ||
---|---|---|
9 | 9 |
}$ |
10 | 10 |
<dc:type>$object.type$</dc:type> |
11 | 11 |
$object.relations:{ |
12 |
<oaf:relatedIdentifier relatedIdentifierType="$it.targetPID.type$" relationType="$it.relationSemantics$" inverseRelationType="$it.inverseRelation$" entityType="$it.targetType$">$it.targetPID.escapeXMLId$</oaf:relatedIdentifier> }$ |
|
12 |
<oaf:relatedIdentifier relatedIdentifierType="$it.targetPID.type$" relationType="$it.relationSemantics$" inverseRelationType="$it.inverseRelation$" entityType="$it.targetType$" resolvedUrl="$it.targetPID.resolvedUrlEscaped$">$it.targetPID.escapeXMLId$</oaf:relatedIdentifier> }$
|
|
13 | 13 |
$object.datasourceProvenance:{$if(it.showPublisher)$<dc:publisher>$it.escapedPublisher$</dc:publisher>$endif$}$ |
14 | 14 |
</metadata> |
15 | 15 |
<oaf:about> |
modules/dnet-dli/trunk/pom.xml | ||
---|---|---|
1 | 1 |
<?xml version="1.0" encoding="UTF-8"?> |
2 | 2 |
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0" |
3 | 3 |
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
4 |
<parent>
|
|
5 |
<groupId>eu.dnetlib</groupId>
|
|
6 |
<artifactId>dnet-parent</artifactId>
|
|
7 |
<version>2.0.0-SNAPSHOT</version>
|
|
8 |
<relativePath/>
|
|
9 |
</parent>
|
|
10 |
<modelVersion>4.0.0</modelVersion>
|
|
11 |
<groupId>eu.dnetlib</groupId>
|
|
12 |
<artifactId>dnet-dli</artifactId>
|
|
13 |
<packaging>jar</packaging>
|
|
14 |
<version>1.0.0-SNAPSHOT</version>
|
|
4 |
<parent>
|
|
5 |
<groupId>eu.dnetlib</groupId>
|
|
6 |
<artifactId>dnet-parent</artifactId>
|
|
7 |
<version>2.0.0-SNAPSHOT</version>
|
|
8 |
<relativePath/>
|
|
9 |
</parent>
|
|
10 |
<modelVersion>4.0.0</modelVersion>
|
|
11 |
<groupId>eu.dnetlib</groupId>
|
|
12 |
<artifactId>dnet-dli</artifactId>
|
|
13 |
<packaging>jar</packaging>
|
|
14 |
<version>1.0.0-SNAPSHOT</version>
|
|
15 | 15 |
|
16 | 16 |
|
17 |
<scm> |
|
18 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet50/modules/dnet-dli/trunk</developerConnection> |
|
19 |
</scm> |
|
17 |
<scm> |
|
18 |
<developerConnection> |
|
19 |
scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet50/modules/dnet-dli/trunk |
|
20 |
</developerConnection> |
|
21 |
</scm> |
|
20 | 22 |
<dependencies> |
21 |
<dependency>
|
|
22 |
<groupId>eu.dnetlib</groupId>
|
|
23 |
<artifactId>dnet-msro-service</artifactId>
|
|
24 |
<version>[5.0.0-SNAPSHOT, 6.0.0-SNAPSHOT)</version>
|
|
25 |
</dependency>
|
|
23 |
<dependency>
|
|
24 |
<groupId>eu.dnetlib</groupId>
|
|
25 |
<artifactId>dnet-msro-service</artifactId>
|
|
26 |
<version>[5.0.0-SNAPSHOT, 6.0.0-SNAPSHOT)</version>
|
|
27 |
</dependency>
|
|
26 | 28 |
|
27 |
<dependency>
|
|
28 |
<groupId>eu.dnetlib</groupId>
|
|
29 |
<artifactId>dnet-data-services</artifactId>
|
|
30 |
<version>[1.0.0-SNAPSHOT, 2.0.0-SNAPSHOT)</version>
|
|
31 |
</dependency>
|
|
29 |
<dependency>
|
|
30 |
<groupId>eu.dnetlib</groupId>
|
|
31 |
<artifactId>dnet-data-services</artifactId>
|
|
32 |
<version>[1.0.0-SNAPSHOT, 2.0.0-SNAPSHOT)</version>
|
|
33 |
</dependency>
|
|
32 | 34 |
|
33 |
<dependency>
|
|
34 |
<groupId>eu.dnetlib</groupId>
|
|
35 |
<artifactId>dnet-dli-domain</artifactId>
|
|
36 |
<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
|
|
37 |
</dependency>
|
|
35 |
<dependency>
|
|
36 |
<groupId>eu.dnetlib</groupId>
|
|
37 |
<artifactId>dnet-dli-domain</artifactId>
|
|
38 |
<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
|
|
39 |
</dependency>
|
|
38 | 40 |
|
39 | 41 |
<dependency> |
40 | 42 |
<groupId>eu.dnetlib</groupId> |
... | ... | |
43 | 45 |
</dependency> |
44 | 46 |
|
45 | 47 |
<dependency> |
46 |
<groupId>eu.dnetlib</groupId>
|
|
47 |
<artifactId>dnet-mapreduce-jobs</artifactId>
|
|
48 |
<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
|
|
49 |
</dependency>
|
|
48 |
<groupId>eu.dnetlib</groupId>
|
|
49 |
<artifactId>dnet-mapreduce-jobs</artifactId>
|
|
50 |
<version>[1.0.0-SNAPSHOT,2.0.0-SNAPSHOT)</version>
|
|
51 |
</dependency>
|
|
50 | 52 |
|
51 |
<dependency>
|
|
52 |
<groupId>org.mongodb</groupId>
|
|
53 |
<artifactId>mongo-java-driver</artifactId>
|
|
54 |
<version>${mongodb.driver.version}</version>
|
|
55 |
</dependency>
|
|
53 |
<dependency>
|
|
54 |
<groupId>org.mongodb</groupId>
|
|
55 |
<artifactId>mongo-java-driver</artifactId>
|
|
56 |
<version>${mongodb.driver.version}</version>
|
|
57 |
</dependency>
|
|
56 | 58 |
|
57 |
<dependency>
|
|
58 |
<groupId>junit</groupId>
|
|
59 |
<artifactId>junit</artifactId>
|
|
60 |
<version>${junit.version}</version>
|
|
61 |
<scope>test</scope>
|
|
62 |
</dependency>
|
|
59 |
<dependency>
|
|
60 |
<groupId>junit</groupId>
|
|
61 |
<artifactId>junit</artifactId>
|
|
62 |
<version>${junit.version}</version>
|
|
63 |
<scope>test</scope>
|
|
64 |
</dependency>
|
|
63 | 65 |
|
64 | 66 |
<dependency> |
65 | 67 |
<groupId>org.mockito</groupId> |
... | ... | |
72 | 74 |
<artifactId>dnet-pid-resolver</artifactId> |
73 | 75 |
<version>1.0.0-SNAPSHOT</version> |
74 | 76 |
</dependency> |
77 |
|
|
75 | 78 |
<dependency> |
76 | 79 |
<groupId>de.flapdoodle.embed</groupId> |
77 | 80 |
<artifactId>de.flapdoodle.embed.mongo</artifactId> |
78 | 81 |
<version>2.0.0</version> |
79 |
<scope>test</scope>
|
|
82 |
<scope>test</scope>
|
|
80 | 83 |
</dependency> |
81 | 84 |
|
82 | 85 |
|
modules/dnet-dli-domain/trunk/src/main/java/eu/dnetlib/dli/resolver/model/DLIResolvedObjectDeserializer.java | ||
---|---|---|
40 | 40 |
resolvedObject.setDatasourceProvenance(extractedDatasourceProvenance); |
41 | 41 |
resolvedObject.setTitles(extractListValues(root, "titles")); |
42 | 42 |
resolvedObject.setAuthors(extractListValues(root, "authors")); |
43 |
resolvedObject.setDate(getValue(root,"date")); |
|
43 | 44 |
resolvedObject.setCompletionStatus(getValue(root, "completionStatus")); |
44 | 45 |
} |
45 | 46 |
|
modules/dnet-dli-domain/trunk/src/main/java/eu/dnetlib/dli/resolver/model/DLIObjectRelation.java | ||
---|---|---|
310 | 310 |
return null; |
311 | 311 |
} |
312 | 312 |
|
313 |
public DLIResolvedObject getExtraInfo() { |
|
313 |
public String getLicenseEscapedXML() { |
|
314 |
if (license != null) |
|
315 |
return StringEscapeUtils.escapeXml11(license); |
|
316 |
return null; |
|
317 |
} |
|
318 |
|
|
319 |
public String getLicense() { |
|
320 |
return license; |
|
321 |
} |
|
322 |
|
|
323 |
public void setLicense(String license) { |
|
324 |
this.license = license; |
|
325 |
} |
|
326 |
|
|
327 |
public DLIResolvedObject getExtraInfo() { |
|
314 | 328 |
return extraInfo; |
315 | 329 |
} |
316 | 330 |
|
modules/dnet-dli-domain/trunk/src/main/java/eu/dnetlib/dli/DLIUtils.java | ||
---|---|---|
150 | 150 |
} |
151 | 151 |
|
152 | 152 |
|
153 |
public static PID createCorrectPID(final String pid, final String pidType, final String resolvedUrl) { |
|
154 |
PID correctPID = createCorrectPID(pid, pidType); |
|
155 |
correctPID.setResolvedUrl(resolvedUrl); |
|
156 |
return correctPID; |
|
157 |
} |
|
153 | 158 |
|
154 | 159 |
public static PID createCorrectPID(final String pid, final String pidType) { |
155 | 160 |
final String validDoi = isValidDoi(pid); |
... | ... | |
162 | 167 |
public static String isValidDoi(final String url) { |
163 | 168 |
|
164 | 169 |
|
165 |
final String regex = "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\\\"&\\'])\\S)+)";
|
|
170 |
final String regex = "(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&'])\\S)+)";
|
|
166 | 171 |
|
167 | 172 |
|
168 | 173 |
final Pattern pattern = Pattern.compile(regex); |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/record_dli_dmf.xml | ||
---|---|---|
39 | 39 |
<resourceType resourceTypeGeneral="dataset">dataset</resourceType> |
40 | 40 |
<relatedIdentifiers> |
41 | 41 |
|
42 |
<relatedIdentifier relatedIdentifierType="doi" relationType="IsSupplementTo" inverseRelationType="" entityType="publication">10.1002/(sici)1521-3749(1998120)624:12%3c2007::aid-zaac2007%3e3.3.co;2-x</relatedIdentifier>
|
|
42 |
<relatedIdentifier relatedIdentifierType="doi" relationType="isSupplementTo" inverseRelationType="" entityType="publication">10.1002/(sici)1521-3749(1998120)624:12%3c2007::aid-zaac2007%3e3.3.co;2-x</relatedIdentifier>
|
|
43 | 43 |
|
44 | 44 |
</relatedIdentifiers> |
45 | 45 |
<descriptions> |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/record_dli_pmf.xml | ||
---|---|---|
22 | 22 |
<dc:description/> |
23 | 23 |
<dc:type>publication</dc:type> |
24 | 24 |
|
25 |
<oaf:relatedIdentifier entityType="publication" inverseRelationType="isReferencedBy"
|
|
25 |
<oaf:relatedIdentifier entityType="publication" inverseRelationType="isSupplementTo"
|
|
26 | 26 |
relatedIdentifierType="dnet" |
27 |
relationType="References">dli_resolver::7c8d9ec3c66a6f8cef950d3df730bf3d</oaf:relatedIdentifier>
|
|
28 |
<oaf:relatedIdentifier entityType="publication" inverseRelationType="isReferencedBy"
|
|
27 |
relationType="isSupplementTo">dli_resolver::7c8d9ec3c66a6f8cef950d3df730bf3d</oaf:relatedIdentifier>
|
|
28 |
<oaf:relatedIdentifier entityType="publication" inverseRelationType="isSupplementTo"
|
|
29 | 29 |
relatedIdentifierType="dnet" |
30 |
relationType="References">dli_resolver::a030485d494cec37b4d0823854e3fe37</oaf:relatedIdentifier>
|
|
31 |
<oaf:relatedIdentifier relatedIdentifierType="doi" relationType="IsSupplementTo" inverseRelationType="" entityType="dataset">10.1002/(sici)1521-3749(1998120)624:12%3c2007::aid-zaac2007%3e3.3.co;2-x</oaf:relatedIdentifier>
|
|
30 |
relationType="isSupplementTo">dli_resolver::a030485d494cec37b4d0823854e3fe37</oaf:relatedIdentifier>
|
|
31 |
<oaf:relatedIdentifier relatedIdentifierType="doi" relationType="isSupplementTo" inverseRelationType="" entityType="dataset">10.1002/(sici)1521-3749(1998120)624:12%3c2007::aid-zaac2007%3e3.3.co;2-x</oaf:relatedIdentifier>
|
|
32 | 32 |
</metadata> |
33 | 33 |
<oaf:about xmlns=""> |
34 | 34 |
<oaf:datainfo> |
modules/dnet-pid-resolver/trunk/src/main/java/eu/dnetlib/pid/resolver/model/PID.java | ||
---|---|---|
21 | 21 |
*/ |
22 | 22 |
private String type; |
23 | 23 |
|
24 |
|
|
25 |
|
|
26 |
private String resolvedUrl; |
|
27 |
|
|
24 | 28 |
/** |
25 | 29 |
* Instantiates a new pid. |
26 | 30 |
*/ |
... | ... | |
92 | 96 |
} |
93 | 97 |
} |
94 | 98 |
|
99 |
public String getResolvedUrl() { |
|
100 |
return resolvedUrl; |
|
101 |
} |
|
102 |
|
|
103 |
public String getResolvedUrlEscaped() { |
|
104 |
return StringEscapeUtils.escapeXml11(resolvedUrl); |
|
105 |
} |
|
106 |
|
|
107 |
public void setResolvedUrl(String resolvedUrl) { |
|
108 |
this.resolvedUrl = resolvedUrl; |
|
109 |
} |
|
110 |
|
|
111 |
|
|
112 |
|
|
95 | 113 |
@Override |
96 | 114 |
public String toString() { |
97 | 115 |
return g.toJson(this); |
modules/dnet-pid-resolver/trunk/src/main/java/eu/dnetlib/pid/resolver/AbstractPIDResolver.java | ||
---|---|---|
48 | 48 |
// public abstract void setDatasourceProvenance(final ObjectProvenance provenance, final String namespacePrefix); |
49 | 49 |
|
50 | 50 |
public ResolvedObject retrievePID(final String pid, final String pidType, boolean offline) { |
51 |
final String dnetIdentifier = storeManager.generateDNetIdentifier(pid, pidType); |
|
52 |
final Element item = cache.get(dnetIdentifier); |
|
53 |
if (item != null) return (ResolvedObject) item.getObjectValue(); |
|
51 | 54 |
if (offline && !isAvailableOffline()) |
52 | 55 |
return null; |
53 |
if (!canResolvePid(pidType)) return null; |
|
54 |
final String dnetIdentifier = storeManager.generateDNetIdentifier(pid, pidType); |
|
55 |
final Element item = cache.get(dnetIdentifier); |
|
56 |
ResolvedObject resolvedObject = storeManager.getRecord(dnetIdentifier); |
|
57 |
log.debug("Cache miss for pid:" + pid + " and type :" + pidType); |
|
56 | 58 |
|
57 |
if (item != null) return (ResolvedObject) item.getObjectValue(); |
|
58 |
|
|
59 |
ResolvedObject resolvedObject = storeManager.getRecord(dnetIdentifier); |
|
60 |
|
|
61 | 59 |
if (resolvedObject != null) { |
62 | 60 |
cache.put(new Element(storeManager.generateDNetIdentifier(pid, pidType), resolvedObject)); |
63 | 61 |
return resolvedObject; |
64 | 62 |
} |
63 |
if (!canResolvePid(pidType)) return null; |
|
65 | 64 |
|
66 |
log.debug("Cache miss for pid:" + pid + " and type :" + pidType); |
|
67 | 65 |
resolvedObject = resolve(pid, pidType); |
68 | 66 |
if (resolvedObject != null) { |
69 | 67 |
storeManager.insertRecord(getName(), resolvedObject); |
modules/dnet-pid-resolver/trunk/src/main/java/eu/dnetlib/pid/resolver/mdstore/plugin/AbstractRecordResolver.java | ||
---|---|---|
24 | 24 |
protected MongoCollection<DBObject> outputCollection; |
25 | 25 |
protected ResolverSerializer serializer; |
26 | 26 |
protected boolean offline; |
27 |
protected boolean forceResolver = false; |
|
27 | 28 |
|
28 | 29 |
public AbstractRecordResolver(final long ts) { |
29 | 30 |
this.timestamp = ts; |
... | ... | |
64 | 65 |
while (currentObject != ResolverMDStorePlugin.DONE) { |
65 | 66 |
try { |
66 | 67 |
currentRecord = (String) currentObject.get("body"); |
67 |
if (currentObject.get("resolved_ts") == null) { |
|
68 |
if (forceResolver || currentObject.get("resolved_ts") == null) {
|
|
68 | 69 |
final double start = System.currentTimeMillis(); |
69 | 70 |
final String resolvedRecord = resolve(currentRecord); |
70 | 71 |
if (resolvedRecord != null) { |
modules/dnet-pid-resolver/trunk/src/main/java/eu/dnetlib/pid/resolver/mdstore/plugin/RecordResolverFactory.java | ||
---|---|---|
9 | 9 |
|
10 | 10 |
public interface RecordResolverFactory { |
11 | 11 |
|
12 |
RecordResolver createResolver(BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, final boolean offline); |
|
12 |
RecordResolver createResolver(BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, final boolean offline, final boolean forceResolver);
|
|
13 | 13 |
|
14 |
RecordResolver createResolver(long identifier, BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, final boolean offline); |
|
14 |
RecordResolver createResolver(long identifier, BlockingQueue<DBObject> queue, MongoCollection<DBObject> outputCollection, ResolverSerializer serializer, List<PIDResolver> pluginResolver, final boolean offline, final boolean forceResolver);
|
|
15 | 15 |
|
16 | 16 |
|
17 | 17 |
} |
modules/dnet-pid-resolver/trunk/src/main/java/eu/dnetlib/pid/resolver/mdstore/plugin/ResolverMDStorePlugin.java | ||
---|---|---|
31 | 31 |
/** |
32 | 32 |
* Created by sandro on 9/9/16. |
33 | 33 |
*/ |
34 |
public class ResolverMDStorePlugin implements MDStorePlugin { |
|
34 |
public class ResolverMDStorePlugin implements MDStorePlugin {
|
|
35 | 35 |
|
36 | 36 |
private static final Log log = LogFactory.getLog(ResolverMDStorePlugin.class); |
37 | 37 |
public static DBObject DONE = new BasicDBObject(); |
... | ... | |
84 | 84 |
log.error("Number of threads Param is not an int value it will apply by default 4"); |
85 | 85 |
} |
86 | 86 |
|
87 |
final boolean refresh = params.get("refresh") != null && Boolean.parseBoolean(params.get("refresh"));
|
|
87 |
final boolean refresh = "refresh".equalsIgnoreCase(resolvingMode);
|
|
88 | 88 |
|
89 | 89 |
final String internalId = transactionManager.readMdStore(id); |
90 | 90 |
|
... | ... | |
105 | 105 |
if (refresh) { |
106 | 106 |
resolvedRecord.drop(); |
107 | 107 |
} |
108 |
final FindIterable<DBObject> mdstoreRecords = currentMdStoreCollection.find(dateQuery(lastResolveDate==null?0:Long.parseLong(lastResolveDate),null)); |
|
108 |
final FindIterable<DBObject> mdstoreRecords = "refresh".equalsIgnoreCase(resolvingMode)?currentMdStoreCollection.find(): currentMdStoreCollection.find(dateQuery(lastResolveDate==null?0:Long.parseLong(lastResolveDate),null));
|
|
109 | 109 |
|
110 | 110 |
mdstoreRecords.noCursorTimeout(true); |
111 | 111 |
|
... | ... | |
115 | 115 |
|
116 | 116 |
final ExecutorService executor = Executors.newFixedThreadPool(100); |
117 | 117 |
|
118 |
final long total = currentMdStoreCollection.count(dateQuery(lastResolveDate==null?0:Long.parseLong(lastResolveDate),null)); |
|
118 |
final long total = "refresh".equalsIgnoreCase(resolvingMode)?currentMdStoreCollection.count():currentMdStoreCollection.count(dateQuery(lastResolveDate==null?0:Long.parseLong(lastResolveDate),null));
|
|
119 | 119 |
|
120 | 120 |
int previousPrintValue = -1; |
121 | 121 |
int currentPerc; |
... | ... | |
126 | 126 |
Collections.sort(pluginResolver); |
127 | 127 |
|
128 | 128 |
for (int i = 0; i < numberOfThreads; i++) { |
129 |
final RecordResolver resolver = recordResolverFactory.createResolver(ts, queue, resolvedRecord, resolverSerializer, pluginResolver, offline); |
|
129 |
final RecordResolver resolver = recordResolverFactory.createResolver(ts, queue, resolvedRecord, resolverSerializer, pluginResolver, offline, true);
|
|
130 | 130 |
responses.add(executor.submit(resolver)); |
131 | 131 |
} |
132 | 132 |
|
Also available in: Unified diff
Ignored Test