Revision 48525
Added by Sandro La Bruzzo over 7 years ago
modules/dnet-dli/branches/new-mdstore/src/test/java/eu/dnetlib/resolver/DLISerializerTest.java | ||
---|---|---|
31 | 31 |
/** |
32 | 32 |
* Created by sandro on 10/4/16. |
33 | 33 |
*/ |
34 |
//@RunWith(SpringJUnit4ClassRunner.class) |
|
35 |
//@ContextConfiguration(classes = { ConfigurationTestConfig.class, ConfigurationResolverStoreTestConfig.class }) |
|
36 |
@Ignore |
|
34 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
35 |
@ContextConfiguration(classes = {ConfigurationTestConfig.class, ConfigurationResolverStoreTestConfig.class}) |
|
37 | 36 |
public class DLISerializerTest { |
38 | 37 |
|
39 | 38 |
private static final Log log = LogFactory.getLog(DLISerializerTest.class); |
... | ... | |
87 | 86 |
final InputStream recordStream = this.getClass().getResourceAsStream("/eu/dnetlib/dli/transform/resolvedObject.json"); |
88 | 87 |
final Gson gson = new Gson(); |
89 | 88 |
final ResolvedObject resolvedObject = gson.fromJson(IOUtils.toString(recordStream), ResolvedObject.class); |
90 |
Assert.assertEquals(resolvedObject.getType(), ObjectType.dataset);
|
|
89 |
Assert.assertEquals(resolvedObject.getType(), ObjectType.publication);
|
|
91 | 90 |
final String recordTemplate = |
92 | 91 |
"<root xmlns:oaf=\"http://namespace.dnet.eu/oaf\" xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\">%s</root>"; |
93 | 92 |
final String xml = serializer.serializeToXML(resolvedObject); |
94 | 93 |
|
94 |
|
|
95 |
System.out.println("xml = " + xml); |
|
96 |
|
|
95 | 97 |
final String inputRecord = String.format(recordTemplate, xml); |
96 | 98 |
|
97 | 99 |
final InputStream xsltAsStream = getClass().getResourceAsStream("/eu/dnetlib/resolver/xslt/transformResolvedRecord.xsl"); |
... | ... | |
110 | 112 |
System.out.println("record = " + record); |
111 | 113 |
} |
112 | 114 |
|
115 |
|
|
116 |
@Test |
|
117 |
public void testEscapingXML() { |
|
118 |
|
|
119 |
|
|
120 |
} |
|
121 |
|
|
113 | 122 |
private ResolvedObject createPMFMock() throws IOException { |
114 | 123 |
final InputStream resource = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputPMF.xml"); |
115 | 124 |
String xml = IOUtils.toString(resource); |
modules/dnet-dli/branches/new-mdstore/src/test/java/eu/dnetlib/resolver/DLIResolverTest.java | ||
---|---|---|
14 | 14 |
import org.apache.commons.io.IOUtils; |
15 | 15 |
import org.apache.commons.logging.Log; |
16 | 16 |
import org.apache.commons.logging.LogFactory; |
17 |
|
|
18 | 17 |
import org.junit.Assert; |
19 | 18 |
import org.junit.Before; |
20 | 19 |
import org.junit.Ignore; |
... | ... | |
158 | 157 |
|
159 | 158 |
@Test |
160 | 159 |
public void rcsbResolverTest() throws Exception { |
161 |
final ResolvedObject pubmedObject = rcsbResolver.retrievePID("2Y9C", "pdb");
|
|
160 |
final ResolvedObject pubmedObject = rcsbResolver.retrievePID("2C2V", "pdb");
|
|
162 | 161 |
Assert.assertNotNull(pubmedObject); |
163 | 162 |
Assert.assertNotNull(pubmedObject.getAuthors()); |
164 | 163 |
Assert.assertNotNull(pubmedObject.getTitles()); |
modules/dnet-dli/branches/new-mdstore/src/test/resources/eu/dnetlib/dli/transform/resolvedObject.json | ||
---|---|---|
1 | 1 |
{ |
2 |
"pid": "10.1594/PANGAEA.695501", |
|
3 |
"pidType": "DOI", |
|
4 |
"titles": [ |
|
5 |
"Stable isotopes measured on Ammonia beccarii from sediment core GIK12523-1" |
|
6 |
], |
|
7 |
"authors": [ |
|
8 |
"Winn, Kyaw", |
|
9 |
"Erlenkeuser, Helmut", |
|
10 |
"Nordberg, Kjell", |
|
11 |
"Gustafsson, Mikael" |
|
12 |
], |
|
13 |
"type": "dataset", |
|
14 | 2 |
"datasourceProvenance": [ |
15 | 3 |
{ |
16 |
"datasource": "Datasets in Datacite",
|
|
17 |
"datasourceId": "dli_________::datacite",
|
|
18 |
"publisher": "PANGAEA - Data Publisher for Earth \u0026 Environmental Science",
|
|
4 |
"completionStatus": "complete",
|
|
5 |
"visible": true,
|
|
6 |
"datasourceId": "dli_________::pubmed",
|
|
19 | 7 |
"provisionMode": "resolved", |
20 |
"completionStatus": "complete", |
|
21 |
"visible": false, |
|
22 |
"datasourceContribution": "[\"pid\",\"pidType\"]" |
|
8 |
"datasource": "PubMed" |
|
23 | 9 |
} |
24 | 10 |
], |
25 |
"completionStatus": "complete", |
|
26 |
"subjects": [ |
|
27 |
{ |
|
28 |
"scheme": "unknown", |
|
29 |
"term": "DEPTH, sediment/rock" |
|
30 |
}, |
|
31 |
{ |
|
32 |
"scheme": "unknown", |
|
33 |
"term": "Size fraction \u003e 0.063 mm, sand" |
|
34 |
}, |
|
35 |
{ |
|
36 |
"scheme": "unknown", |
|
37 |
"term": "Coiling ratio" |
|
38 |
}, |
|
39 |
{ |
|
40 |
"scheme": "unknown", |
|
41 |
"term": "Ammonia beccarii dextral" |
|
42 |
}, |
|
43 |
{ |
|
44 |
"scheme": "unknown", |
|
45 |
"term": "Ammonia beccarii dextral, d18O" |
|
46 |
}, |
|
47 |
{ |
|
48 |
"scheme": "unknown", |
|
49 |
"term": "Ammonia beccarii dextral, d13C" |
|
50 |
}, |
|
51 |
{ |
|
52 |
"scheme": "unknown", |
|
53 |
"term": "Ammonia beccarii sinistral" |
|
54 |
}, |
|
55 |
{ |
|
56 |
"scheme": "unknown", |
|
57 |
"term": "Ammonia beccarii sinistral, d18O" |
|
58 |
}, |
|
59 |
{ |
|
60 |
"scheme": "unknown", |
|
61 |
"term": "Ammonia beccarii sinistral, d13C" |
|
62 |
}, |
|
63 |
{ |
|
64 |
"scheme": "unknown", |
|
65 |
"term": "GIK-cruise" |
|
66 |
} |
|
67 |
] |
|
11 |
"pid": "PMC23771", |
|
12 |
"description": "prova & suca", |
|
13 |
"pidType": "pmcid", |
|
14 |
"titles": [ |
|
15 |
"The first step of aminoacylation at the atomic level in histidyl-tRNA synthetase." |
|
16 |
], |
|
17 |
"authors": [ |
|
18 |
"Arnez JG", |
|
19 |
" Augustine JG", |
|
20 |
" Moras D", |
|
21 |
" Francklyn CS." |
|
22 |
], |
|
23 |
"date": "1997", |
|
24 |
"type": "publication" |
|
68 | 25 |
} |
modules/dnet-dli/branches/new-mdstore/src/main/java/eu/dnetlib/resolver/mdstore/plugin/RecordResolver.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.resolver.mdstore.plugin; |
2 | 2 |
|
3 |
import java.util.HashMap; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.concurrent.BlockingQueue; |
|
7 |
import java.util.concurrent.Callable; |
|
8 |
|
|
9 |
import com.mongodb.DBObject; |
|
3 |
import com.google.gson.Gson; |
|
10 | 4 |
import com.mongodb.client.MongoCollection; |
5 |
import eu.dnetlib.data.mdstore.modular.newmongodb.model.MDStoreEncoder; |
|
11 | 6 |
import eu.dnetlib.dli.DLIUtils; |
12 | 7 |
import eu.dnetlib.dli.resolver.PIDResolver; |
13 | 8 |
import eu.dnetlib.dli.resolver.model.*; |
14 | 9 |
import eu.dnetlib.dli.resolver.model.serializer.ResolverSerializer; |
15 |
import eu.dnetlib.miscutils.collections.Pair; |
|
16 | 10 |
import eu.dnetlib.resolver.parser.DLIParser; |
17 | 11 |
import org.antlr.stringtemplate.StringTemplate; |
18 | 12 |
import org.apache.commons.lang3.StringUtils; |
19 | 13 |
import org.apache.commons.logging.Log; |
20 | 14 |
import org.apache.commons.logging.LogFactory; |
15 |
import org.bson.Document; |
|
21 | 16 |
|
22 |
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put; |
|
17 |
import java.util.HashMap; |
|
18 |
import java.util.List; |
|
19 |
import java.util.Map; |
|
20 |
import java.util.concurrent.BlockingQueue; |
|
21 |
import java.util.concurrent.Callable; |
|
23 | 22 |
|
24 | 23 |
/** |
25 | 24 |
* Created by sandro on 9/22/16. |
... | ... | |
31 | 30 |
private final DLIParser parser = new DLIParser(); |
32 | 31 |
private final Long timestamp; |
33 | 32 |
private List<PIDResolver> pluginResolver; |
34 |
private BlockingQueue<DBObject> inputQueue;
|
|
35 |
private MongoCollection<DBObject> outputCollection;
|
|
36 |
private ResolverSerializer serializer;
|
|
33 |
private BlockingQueue<Document> inputQueue;
|
|
34 |
private MongoCollection<Document> outputCollection;
|
|
35 |
private ResolverSerializer serializer;
|
|
37 | 36 |
|
38 | 37 |
public RecordResolver(final long ts) { |
39 | 38 |
this.timestamp = Long.valueOf(ts); |
... | ... | |
44 | 43 |
final ResolvedObject inputObject = parser.parse(inputRecord); |
45 | 44 |
if (inputObject != null && StringUtils.isNoneBlank(inputObject.getResolvedDate())) |
46 | 45 |
return null; |
47 |
|
|
48 | 46 |
if (inputObject != null && !StringUtils.isBlank(inputObject.getPid())) { |
49 | 47 |
log.debug("trying to resolve " + inputObject.getPid()); |
50 | 48 |
} |
51 |
|
|
52 | 49 |
boolean shouldUpdate = false; |
53 | 50 |
if (inputObject.getCompletionStatus() == null || !inputObject.getCompletionStatus().equals(CompletionStatus.complete.toString())) { |
54 | 51 |
shouldUpdate = shouldUpdate || tryToResolveRecord(inputObject); |
... | ... | |
57 | 54 |
for (ObjectRelation rel : inputObject.getRelations()) { |
58 | 55 |
final Map<String, ObjectType> resolvedRelation = tryToResolveRelation(rel.getTargetPID()); |
59 | 56 |
if (resolvedRelation != null && !resolvedRelation.isEmpty()) { |
60 |
resolvedRelation.entrySet() |
|
61 |
.forEach(e -> { |
|
62 |
rel.setTargetPID(new PID(e.getKey(), "dnet")); |
|
63 |
rel.setTargetType(e.getValue()); |
|
64 |
}); |
|
57 |
resolvedRelation.forEach((key, value) -> { |
|
58 |
rel.setTargetPID(new PID(key, "dnet")); |
|
59 |
rel.setTargetType(value); |
|
60 |
}); |
|
65 | 61 |
shouldUpdate = true; |
66 | 62 |
} |
67 | 63 |
} |
68 | 64 |
if (shouldUpdate) { |
69 |
final String newXML = serializer.serializeReplacingXML(inputRecord, inputObject); |
|
70 |
return newXML; |
|
71 |
} |
|
65 |
if (!inputObject.getCompletionStatus().equals(CompletionStatus.complete.toString())) { |
|
66 |
log.error("Write uncomplete record" + new Gson().toJson(inputRecord)); |
|
67 |
} |
|
68 |
|
|
69 |
return serializer.serializeReplacingXML(inputRecord, inputObject); |
|
70 |
} |
|
72 | 71 |
} |
73 | 72 |
return null; |
74 | 73 |
} |
... | ... | |
131 | 130 |
|
132 | 131 |
log.info("START HERE!"); |
133 | 132 |
|
134 |
DBObject currentObject = inputQueue.take();
|
|
135 |
int i = 0;
|
|
133 |
Document currentObject = inputQueue.take();
|
|
134 |
int i = 0;
|
|
136 | 135 |
String currentRecord = null; |
137 | 136 |
double sumTotal = 0; |
138 | 137 |
while (currentObject != ResolverMDStorePlugin.DONE) { |
139 | 138 |
try { |
140 |
currentRecord = (String) currentObject.get("body");
|
|
139 |
currentRecord = MDStoreEncoder.decodeMDStoreRecord(currentObject).getBody();
|
|
141 | 140 |
if (currentObject.get("resolved_ts") == null) { |
142 | 141 |
final double start = System.currentTimeMillis(); |
143 | 142 |
final String resolvedRecord = resolve(currentRecord); |
144 | 143 |
if (resolvedRecord != null) { |
145 |
currentObject.put("body", resolvedRecord);
|
|
146 |
currentObject.removeField("_id");
|
|
144 |
currentObject.put("body", MDStoreEncoder.compress(resolvedRecord));
|
|
145 |
currentObject.remove("_id"); |
|
147 | 146 |
currentObject.put("resolved_ts", timestamp); |
148 | 147 |
outputCollection.insertOne(currentObject); |
149 | 148 |
} |
... | ... | |
169 | 168 |
|
170 | 169 |
} |
171 | 170 |
|
172 |
public BlockingQueue<DBObject> getInputQueue() {
|
|
173 |
return inputQueue;
|
|
171 |
public BlockingQueue<Document> getInputQueue() {
|
|
172 |
return inputQueue;
|
|
174 | 173 |
} |
175 | 174 |
|
176 |
public void setInputQueue(final BlockingQueue<DBObject> inputQueue) {
|
|
177 |
this.inputQueue = inputQueue;
|
|
175 |
public void setInputQueue(final BlockingQueue<Document> inputQueue) {
|
|
176 |
this.inputQueue = inputQueue;
|
|
178 | 177 |
} |
179 | 178 |
|
180 |
public MongoCollection<DBObject> getOutputCollection() {
|
|
181 |
return outputCollection;
|
|
179 |
public MongoCollection<Document> getOutputCollection() {
|
|
180 |
return outputCollection;
|
|
182 | 181 |
} |
183 | 182 |
|
184 |
public void setOutputCollection(final MongoCollection<DBObject> outputCollection) {
|
|
185 |
this.outputCollection = outputCollection;
|
|
183 |
public void setOutputCollection(final MongoCollection<Document> outputCollection) {
|
|
184 |
this.outputCollection = outputCollection;
|
|
186 | 185 |
} |
187 | 186 |
|
188 | 187 |
public void setSerializer(final ResolverSerializer serializer) { |
modules/dnet-dli/branches/new-mdstore/src/main/java/eu/dnetlib/resolver/mdstore/plugin/ResolverMDStorePlugin.java | ||
---|---|---|
13 | 13 |
import eu.dnetlib.data.mdstore.modular.action.MDStorePlugin; |
14 | 14 |
import eu.dnetlib.data.mdstore.modular.connector.MDStoreDao; |
15 | 15 |
import eu.dnetlib.data.mdstore.modular.mongodb.MDStoreTransactionManagerImpl; |
16 |
import eu.dnetlib.data.mdstore.modular.newmongodb.MongoMDStoreDAO; |
|
17 |
import eu.dnetlib.data.mdstore.modular.newmongodb.repository.MetadataRepository; |
|
16 | 18 |
import eu.dnetlib.dli.resolver.PIDResolver; |
17 | 19 |
import eu.dnetlib.dli.resolver.model.serializer.ResolverSerializer; |
18 | 20 |
import eu.dnetlib.rmi.data.MDStoreServiceException; |
... | ... | |
36 | 38 |
public class ResolverMDStorePlugin implements MDStorePlugin { |
37 | 39 |
|
38 | 40 |
private static final Log log = LogFactory.getLog(ResolverMDStorePlugin.class); |
39 |
public static DBObject DONE = new BasicDBObject();
|
|
41 |
public static Document DONE = new Document();
|
|
40 | 42 |
|
41 | 43 |
@Autowired |
42 | 44 |
private List<PIDResolver> pluginResolver; |
... | ... | |
46 | 48 |
private ResolverSerializer resolverSerializer; |
47 | 49 |
|
48 | 50 |
@Autowired |
49 |
private MDStoreTransactionManagerImpl transactionManager;
|
|
51 |
private MetadataRepository metadata;
|
|
50 | 52 |
|
51 | 53 |
public static void save(MongoCollection<DBObject> collection, DBObject document) { |
52 | 54 |
Object id = document.get("_id"); |
... | ... | |
79 | 81 |
|
80 | 82 |
final boolean refresh = params.get("refresh") != null && Boolean.parseBoolean(params.get("refresh")); |
81 | 83 |
|
82 |
final String internalId = transactionManager.readMdStore(id); |
|
83 | 84 |
|
84 |
final MongoDatabase db = transactionManager.getDb();
|
|
85 |
final String internalId = metadata.findByMdId(id).getCurrentId();
|
|
85 | 86 |
|
86 |
final MongoCollection<DBObject> currentMdStoreCollection = db.getCollection(internalId, DBObject.class);
|
|
87 |
final MongoCollection<Document> currentMdStoreCollection = ((MongoMDStoreDAO) dao).getRecordCollection(internalId);
|
|
87 | 88 |
|
88 |
final MongoCollection<DBObject> resolvedRecord = db.getCollection("resolved_" + StringUtils.substringBefore(id, "_"), DBObject.class);
|
|
89 |
final MongoCollection<Document> resolvedRecord = ((MongoMDStoreDAO) dao).getRecordCollection("resolved_" + StringUtils.substringBefore(id, "_"));
|
|
89 | 90 |
|
90 | 91 |
final BasicDBObject idx = new BasicDBObject(); |
91 | 92 |
idx.put("resolved_ts", 1); |
... | ... | |
97 | 98 |
resolvedRecord.drop(); |
98 | 99 |
} |
99 | 100 |
|
100 |
final FindIterable<DBObject> mdstoreRecords = currentMdStoreCollection.find();
|
|
101 |
final FindIterable<Document> mdstoreRecords = currentMdStoreCollection.find();
|
|
101 | 102 |
|
102 |
final BlockingQueue<DBObject> queue = new ArrayBlockingQueue<>(100);
|
|
103 |
final BlockingQueue<Document> queue = new ArrayBlockingQueue<>(100);
|
|
103 | 104 |
|
104 | 105 |
final List<Future<Boolean>> responses = new ArrayList<>(); |
105 | 106 |
|
... | ... | |
123 | 124 |
|
124 | 125 |
int parsed = 0; |
125 | 126 |
|
126 |
for (DBObject currentMdStoreRecord : mdstoreRecords) {
|
|
127 |
queue.put(currentMdStoreRecord);
|
|
127 |
for (Document currentMdStoreRecord : mdstoreRecords) {
|
|
128 |
queue.put(currentMdStoreRecord);
|
|
128 | 129 |
|
129 | 130 |
currentPerc = Math.round(((float) ++parsed / (float) total) * 100.0F); |
130 | 131 |
|
... | ... | |
146 | 147 |
} |
147 | 148 |
} |
148 | 149 |
|
149 |
private void upsertResolved(MongoCollection<DBObject> currentMdStoreCollection, MongoCollection<DBObject> resolvedRecord, final long timestamp) {
|
|
150 |
private void upsertResolved(MongoCollection<Document> currentMdStoreCollection, MongoCollection<Document> resolvedRecord, final long timestamp) {
|
|
150 | 151 |
log.info("Updating resolved objects"); |
151 |
|
|
152 |
final Bson queryByTs = Filters.gte("resolved_ts", Long.valueOf(timestamp)); |
|
152 |
final Bson queryByTs = Filters.gte("resolved_ts", timestamp); |
|
153 | 153 |
int i = 0; |
154 |
final FindIterable<DBObject> dbObjects = timestamp == 0 ? resolvedRecord.find() : resolvedRecord.find(queryByTs);
|
|
155 |
for (DBObject object : dbObjects) {
|
|
156 |
Bson query = Filters.eq("id", object.get("id").toString());
|
|
157 |
final DBObject replacedObj = BasicDBObjectBuilder.start()
|
|
158 |
.add("body", object.get("body").toString())
|
|
154 |
final FindIterable<Document> dbObjects = timestamp == 0 ? resolvedRecord.find() : resolvedRecord.find(queryByTs);
|
|
155 |
for (Document object : dbObjects) {
|
|
156 |
Bson query = Filters.eq("recordIdentifier", object.get("recordIdentifier").toString());
|
|
157 |
final DBObject replacedObj = BasicDBObjectBuilder.start()
|
|
158 |
.add("body", object.get("body"))
|
|
159 | 159 |
.add("resolved_ts", object.get("resolved_ts")) |
160 | 160 |
.get(); |
161 |
Bson newDocument = new Document("$set", replacedObj);
|
|
162 |
currentMdStoreCollection.findOneAndUpdate(query, newDocument);
|
|
161 |
final Document newDocument = new Document("$set", replacedObj);
|
|
162 |
currentMdStoreCollection.updateMany(query, newDocument);
|
|
163 | 163 |
i++; |
164 |
|
|
165 | 164 |
} |
166 | 165 |
|
167 | 166 |
log.info("Updated " + i); |
Also available in: Unified diff
adapted some modules to the new mdstore