Project

General

Profile

« Previous | Next » 

Revision 48525

adapted some modules to the new mdstore

View differences:

modules/dnet-dli/branches/new-mdstore/src/test/java/eu/dnetlib/resolver/DLISerializerTest.java
31 31
/**
32 32
 * Created by sandro on 10/4/16.
33 33
 */
34
//@RunWith(SpringJUnit4ClassRunner.class)
35
//@ContextConfiguration(classes = { ConfigurationTestConfig.class, ConfigurationResolverStoreTestConfig.class })
36
@Ignore
34
@RunWith(SpringJUnit4ClassRunner.class)
35
@ContextConfiguration(classes = {ConfigurationTestConfig.class, ConfigurationResolverStoreTestConfig.class})
37 36
public class DLISerializerTest {
38 37

  
39 38
    private static final Log log = LogFactory.getLog(DLISerializerTest.class);
......
87 86
		final InputStream recordStream = this.getClass().getResourceAsStream("/eu/dnetlib/dli/transform/resolvedObject.json");
88 87
		final Gson gson = new Gson();
89 88
		final ResolvedObject resolvedObject = gson.fromJson(IOUtils.toString(recordStream), ResolvedObject.class);
90
		Assert.assertEquals(resolvedObject.getType(), ObjectType.dataset);
89
		Assert.assertEquals(resolvedObject.getType(), ObjectType.publication);
91 90
		final String recordTemplate =
92 91
				"<root xmlns:oaf=\"http://namespace.dnet.eu/oaf\" xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\">%s</root>";
93 92
		final String xml = serializer.serializeToXML(resolvedObject);
94 93

  
94

  
95
		System.out.println("xml = " + xml);
96

  
95 97
		final String inputRecord = String.format(recordTemplate, xml);
96 98

  
97 99
		final InputStream xsltAsStream = getClass().getResourceAsStream("/eu/dnetlib/resolver/xslt/transformResolvedRecord.xsl");
......
110 112
		System.out.println("record = " + record);
111 113
	}
112 114

  
115

  
116
	@Test
117
	public void testEscapingXML() {
118

  
119

  
120
	}
121

  
113 122
	private ResolvedObject createPMFMock() throws IOException {
114 123
		final InputStream resource = getClass().getResourceAsStream("/eu/dnetlib/dli/parser/InputPMF.xml");
115 124
		String xml = IOUtils.toString(resource);
modules/dnet-dli/branches/new-mdstore/src/test/java/eu/dnetlib/resolver/DLIResolverTest.java
14 14
import org.apache.commons.io.IOUtils;
15 15
import org.apache.commons.logging.Log;
16 16
import org.apache.commons.logging.LogFactory;
17

  
18 17
import org.junit.Assert;
19 18
import org.junit.Before;
20 19
import org.junit.Ignore;
......
158 157

  
159 158
	@Test
160 159
	public void rcsbResolverTest() throws Exception {
161
		final ResolvedObject pubmedObject = rcsbResolver.retrievePID("2Y9C", "pdb");
160
		final ResolvedObject pubmedObject = rcsbResolver.retrievePID("2C2V", "pdb");
162 161
		Assert.assertNotNull(pubmedObject);
163 162
		Assert.assertNotNull(pubmedObject.getAuthors());
164 163
		Assert.assertNotNull(pubmedObject.getTitles());
modules/dnet-dli/branches/new-mdstore/src/test/resources/eu/dnetlib/dli/transform/resolvedObject.json
1 1
{
2
  "pid": "10.1594/PANGAEA.695501",
3
  "pidType": "DOI",
4
  "titles": [
5
    "Stable isotopes measured on Ammonia beccarii from sediment core GIK12523-1"
6
  ],
7
  "authors": [
8
    "Winn, Kyaw",
9
    "Erlenkeuser, Helmut",
10
    "Nordberg, Kjell",
11
    "Gustafsson, Mikael"
12
  ],
13
  "type": "dataset",
14 2
  "datasourceProvenance": [
15 3
    {
16
      "datasource": "Datasets in Datacite",
17
      "datasourceId": "dli_________::datacite",
18
      "publisher": "PANGAEA - Data Publisher for Earth \u0026 Environmental Science",
4
      "completionStatus": "complete",
5
      "visible": true,
6
      "datasourceId": "dli_________::pubmed",
19 7
      "provisionMode": "resolved",
20
      "completionStatus": "complete",
21
      "visible": false,
22
      "datasourceContribution": "[\"pid\",\"pidType\"]"
8
      "datasource": "PubMed"
23 9
    }
24 10
  ],
25
  "completionStatus": "complete",
26
  "subjects": [
27
    {
28
      "scheme": "unknown",
29
      "term": "DEPTH, sediment/rock"
30
    },
31
    {
32
      "scheme": "unknown",
33
      "term": "Size fraction \u003e 0.063 mm, sand"
34
    },
35
    {
36
      "scheme": "unknown",
37
      "term": "Coiling ratio"
38
    },
39
    {
40
      "scheme": "unknown",
41
      "term": "Ammonia beccarii dextral"
42
    },
43
    {
44
      "scheme": "unknown",
45
      "term": "Ammonia beccarii dextral, d18O"
46
    },
47
    {
48
      "scheme": "unknown",
49
      "term": "Ammonia beccarii dextral, d13C"
50
    },
51
    {
52
      "scheme": "unknown",
53
      "term": "Ammonia beccarii sinistral"
54
    },
55
    {
56
      "scheme": "unknown",
57
      "term": "Ammonia beccarii sinistral, d18O"
58
    },
59
    {
60
      "scheme": "unknown",
61
      "term": "Ammonia beccarii sinistral, d13C"
62
    },
63
    {
64
      "scheme": "unknown",
65
      "term": "GIK-cruise"
66
    }
67
  ]
11
  "pid": "PMC23771",
12
  "description": "prova & suca",
13
  "pidType": "pmcid",
14
  "titles": [
15
    "The first step of aminoacylation at the atomic level in histidyl-tRNA synthetase."
16
  ],
17
  "authors": [
18
    "Arnez JG",
19
    " Augustine JG",
20
    " Moras D",
21
    " Francklyn CS."
22
  ],
23
  "date": "1997",
24
  "type": "publication"
68 25
}
modules/dnet-dli/branches/new-mdstore/src/main/java/eu/dnetlib/resolver/mdstore/plugin/RecordResolver.java
1 1
package eu.dnetlib.resolver.mdstore.plugin;
2 2

  
3
import java.util.HashMap;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.concurrent.BlockingQueue;
7
import java.util.concurrent.Callable;
8

  
9
import com.mongodb.DBObject;
3
import com.google.gson.Gson;
10 4
import com.mongodb.client.MongoCollection;
5
import eu.dnetlib.data.mdstore.modular.newmongodb.model.MDStoreEncoder;
11 6
import eu.dnetlib.dli.DLIUtils;
12 7
import eu.dnetlib.dli.resolver.PIDResolver;
13 8
import eu.dnetlib.dli.resolver.model.*;
14 9
import eu.dnetlib.dli.resolver.model.serializer.ResolverSerializer;
15
import eu.dnetlib.miscutils.collections.Pair;
16 10
import eu.dnetlib.resolver.parser.DLIParser;
17 11
import org.antlr.stringtemplate.StringTemplate;
18 12
import org.apache.commons.lang3.StringUtils;
19 13
import org.apache.commons.logging.Log;
20 14
import org.apache.commons.logging.LogFactory;
15
import org.bson.Document;
21 16

  
22
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.put;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.concurrent.BlockingQueue;
21
import java.util.concurrent.Callable;
23 22

  
24 23
/**
25 24
 * Created by sandro on 9/22/16.
......
31 30
	private final DLIParser parser = new DLIParser();
32 31
    private final Long timestamp;
33 32
    private List<PIDResolver> pluginResolver;
34
	private BlockingQueue<DBObject> inputQueue;
35
	private MongoCollection<DBObject> outputCollection;
36
	private ResolverSerializer serializer;
33
    private BlockingQueue<Document> inputQueue;
34
    private MongoCollection<Document> outputCollection;
35
    private ResolverSerializer serializer;
37 36

  
38 37
    public RecordResolver(final long ts) {
39 38
        this.timestamp = Long.valueOf(ts);
......
44 43
		final ResolvedObject inputObject = parser.parse(inputRecord);
45 44
		if (inputObject != null && StringUtils.isNoneBlank(inputObject.getResolvedDate()))
46 45
			return null;
47

  
48 46
		if (inputObject != null && !StringUtils.isBlank(inputObject.getPid())) {
49 47
			log.debug("trying to resolve " + inputObject.getPid());
50 48
		}
51

  
52 49
		boolean shouldUpdate = false;
53 50
		if (inputObject.getCompletionStatus() == null || !inputObject.getCompletionStatus().equals(CompletionStatus.complete.toString())) {
54 51
			shouldUpdate = shouldUpdate || tryToResolveRecord(inputObject);
......
57 54
			for (ObjectRelation rel : inputObject.getRelations()) {
58 55
                final Map<String, ObjectType> resolvedRelation = tryToResolveRelation(rel.getTargetPID());
59 56
                if (resolvedRelation != null && !resolvedRelation.isEmpty()) {
60
                    resolvedRelation.entrySet()
61
                            .forEach(e -> {
62
                                rel.setTargetPID(new PID(e.getKey(), "dnet"));
63
                                rel.setTargetType(e.getValue());
64
                            });
57
                    resolvedRelation.forEach((key, value) -> {
58
                        rel.setTargetPID(new PID(key, "dnet"));
59
                        rel.setTargetType(value);
60
                    });
65 61
                    shouldUpdate = true;
66 62
				}
67 63
			}
68 64
			if (shouldUpdate) {
69
				final String newXML = serializer.serializeReplacingXML(inputRecord, inputObject);
70
				return newXML;
71
			}
65
                if (!inputObject.getCompletionStatus().equals(CompletionStatus.complete.toString())) {
66
                    log.error("Write uncomplete record" + new Gson().toJson(inputRecord));
67
                }
68

  
69
                return serializer.serializeReplacingXML(inputRecord, inputObject);
70
            }
72 71
		}
73 72
		return null;
74 73
	}
......
131 130

  
132 131
        log.info("START HERE!");
133 132

  
134
		DBObject currentObject = inputQueue.take();
135
		int i = 0;
133
        Document currentObject = inputQueue.take();
134
        int i = 0;
136 135
        String currentRecord = null;
137 136
        double sumTotal = 0;
138 137
        while (currentObject != ResolverMDStorePlugin.DONE) {
139 138
			try {
140
                currentRecord = (String) currentObject.get("body");
139
                currentRecord = MDStoreEncoder.decodeMDStoreRecord(currentObject).getBody();
141 140
                if (currentObject.get("resolved_ts") == null) {
142 141
                    final double start = System.currentTimeMillis();
143 142
                    final String resolvedRecord = resolve(currentRecord);
144 143
                    if (resolvedRecord != null) {
145
                        currentObject.put("body", resolvedRecord);
146
                        currentObject.removeField("_id");
144
                        currentObject.put("body", MDStoreEncoder.compress(resolvedRecord));
145
                        currentObject.remove("_id");
147 146
                        currentObject.put("resolved_ts", timestamp);
148 147
                        outputCollection.insertOne(currentObject);
149 148
                    }
......
169 168

  
170 169
	}
171 170

  
172
	public BlockingQueue<DBObject> getInputQueue() {
173
		return inputQueue;
171
    public BlockingQueue<Document> getInputQueue() {
172
        return inputQueue;
174 173
	}
175 174

  
176
	public void setInputQueue(final BlockingQueue<DBObject> inputQueue) {
177
		this.inputQueue = inputQueue;
175
    public void setInputQueue(final BlockingQueue<Document> inputQueue) {
176
        this.inputQueue = inputQueue;
178 177
	}
179 178

  
180
	public MongoCollection<DBObject> getOutputCollection() {
181
		return outputCollection;
179
    public MongoCollection<Document> getOutputCollection() {
180
        return outputCollection;
182 181
	}
183 182

  
184
	public void setOutputCollection(final MongoCollection<DBObject> outputCollection) {
185
		this.outputCollection = outputCollection;
183
    public void setOutputCollection(final MongoCollection<Document> outputCollection) {
184
        this.outputCollection = outputCollection;
186 185
	}
187 186

  
188 187
	public void setSerializer(final ResolverSerializer serializer) {
modules/dnet-dli/branches/new-mdstore/src/main/java/eu/dnetlib/resolver/mdstore/plugin/ResolverMDStorePlugin.java
13 13
import eu.dnetlib.data.mdstore.modular.action.MDStorePlugin;
14 14
import eu.dnetlib.data.mdstore.modular.connector.MDStoreDao;
15 15
import eu.dnetlib.data.mdstore.modular.mongodb.MDStoreTransactionManagerImpl;
16
import eu.dnetlib.data.mdstore.modular.newmongodb.MongoMDStoreDAO;
17
import eu.dnetlib.data.mdstore.modular.newmongodb.repository.MetadataRepository;
16 18
import eu.dnetlib.dli.resolver.PIDResolver;
17 19
import eu.dnetlib.dli.resolver.model.serializer.ResolverSerializer;
18 20
import eu.dnetlib.rmi.data.MDStoreServiceException;
......
36 38
public class ResolverMDStorePlugin implements MDStorePlugin {
37 39

  
38 40
	private static final Log log = LogFactory.getLog(ResolverMDStorePlugin.class);
39
	public static DBObject DONE = new BasicDBObject();
41
    public static Document DONE = new Document();
40 42

  
41 43
	@Autowired
42 44
	private List<PIDResolver> pluginResolver;
......
46 48
	private ResolverSerializer resolverSerializer;
47 49

  
48 50
	@Autowired
49
	private MDStoreTransactionManagerImpl transactionManager;
51
    private MetadataRepository metadata;
50 52

  
51 53
	public static void save(MongoCollection<DBObject> collection, DBObject document) {
52 54
		Object id = document.get("_id");
......
79 81

  
80 82
			final boolean refresh = params.get("refresh") != null && Boolean.parseBoolean(params.get("refresh"));
81 83

  
82
			final String internalId = transactionManager.readMdStore(id);
83 84

  
84
			final MongoDatabase db = transactionManager.getDb();
85
            final String internalId = metadata.findByMdId(id).getCurrentId();
85 86

  
86
			final MongoCollection<DBObject> currentMdStoreCollection = db.getCollection(internalId, DBObject.class);
87
            final MongoCollection<Document> currentMdStoreCollection = ((MongoMDStoreDAO) dao).getRecordCollection(internalId);
87 88

  
88
			final MongoCollection<DBObject> resolvedRecord = db.getCollection("resolved_" + StringUtils.substringBefore(id, "_"), DBObject.class);
89
            final MongoCollection<Document> resolvedRecord = ((MongoMDStoreDAO) dao).getRecordCollection("resolved_" + StringUtils.substringBefore(id, "_"));
89 90

  
90 91
            final BasicDBObject idx = new BasicDBObject();
91 92
            idx.put("resolved_ts", 1);
......
97 98
				resolvedRecord.drop();
98 99
			}
99 100

  
100
			final FindIterable<DBObject> mdstoreRecords = currentMdStoreCollection.find();
101
            final FindIterable<Document> mdstoreRecords = currentMdStoreCollection.find();
101 102

  
102
			final BlockingQueue<DBObject> queue = new ArrayBlockingQueue<>(100);
103
            final BlockingQueue<Document> queue = new ArrayBlockingQueue<>(100);
103 104

  
104 105
			final List<Future<Boolean>> responses = new ArrayList<>();
105 106

  
......
123 124

  
124 125
			int parsed = 0;
125 126

  
126
			for (DBObject currentMdStoreRecord : mdstoreRecords) {
127
				queue.put(currentMdStoreRecord);
127
            for (Document currentMdStoreRecord : mdstoreRecords) {
128
                queue.put(currentMdStoreRecord);
128 129

  
129 130
				currentPerc = Math.round(((float) ++parsed / (float) total) * 100.0F);
130 131

  
......
146 147
		}
147 148
	}
148 149

  
149
    private void upsertResolved(MongoCollection<DBObject> currentMdStoreCollection, MongoCollection<DBObject> resolvedRecord, final long timestamp) {
150
    private void upsertResolved(MongoCollection<Document> currentMdStoreCollection, MongoCollection<Document> resolvedRecord, final long timestamp) {
150 151
        log.info("Updating resolved objects");
151

  
152
        final Bson queryByTs = Filters.gte("resolved_ts", Long.valueOf(timestamp));
152
        final Bson queryByTs = Filters.gte("resolved_ts", timestamp);
153 153
        int i = 0;
154
        final FindIterable<DBObject> dbObjects = timestamp == 0 ? resolvedRecord.find() : resolvedRecord.find(queryByTs);
155
        for (DBObject object : dbObjects) {
156
			Bson query = Filters.eq("id", object.get("id").toString());
157
			final DBObject replacedObj = BasicDBObjectBuilder.start()
158
					.add("body", object.get("body").toString())
154
        final FindIterable<Document> dbObjects = timestamp == 0 ? resolvedRecord.find() : resolvedRecord.find(queryByTs);
155
        for (Document object : dbObjects) {
156
            Bson query = Filters.eq("recordIdentifier", object.get("recordIdentifier").toString());
157
            final DBObject replacedObj = BasicDBObjectBuilder.start()
158
                    .add("body", object.get("body"))
159 159
                    .add("resolved_ts", object.get("resolved_ts"))
160 160
                    .get();
161
			Bson newDocument = new Document("$set", replacedObj);
162
			currentMdStoreCollection.findOneAndUpdate(query, newDocument);
161
            final Document newDocument = new Document("$set", replacedObj);
162
            currentMdStoreCollection.updateMany(query, newDocument);
163 163
            i++;
164

  
165 164
		}
166 165

  
167 166
        log.info("Updated " + i);

Also available in: Unified diff