Revision 56011
Added by Alessia Bardi almost 5 years ago
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/actions/OrcidToActionsTest.java | ||
---|---|---|
6 | 6 |
import java.io.InputStreamReader; |
7 | 7 |
import java.util.List; |
8 | 8 |
|
9 |
import org.apache.commons.lang3.StringUtils; |
|
10 |
import org.junit.Before; |
|
11 |
import org.junit.Test; |
|
12 |
|
|
13 | 9 |
import com.google.gson.JsonObject; |
14 | 10 |
import com.google.gson.JsonParser; |
15 |
|
|
11 |
import com.googlecode.protobuf.format.JsonFormat; |
|
12 |
import com.googlecode.protobuf.format.JsonFormat.ParseException; |
|
16 | 13 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
17 | 14 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
18 | 15 |
import eu.dnetlib.actionmanager.common.Agent; |
19 | 16 |
import eu.dnetlib.data.mapreduce.hbase.Reporter; |
20 | 17 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.OrcidToActions; |
18 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
19 |
import org.apache.commons.lang3.StringUtils; |
|
20 |
import org.junit.Before; |
|
21 |
import org.junit.Test; |
|
21 | 22 |
|
22 | 23 |
public class OrcidToActionsTest { |
23 | 24 |
|
... | ... | |
25 | 26 |
private Agent agent; |
26 | 27 |
private Reporter reporter; |
27 | 28 |
|
29 |
private String generatedJson = "{\"kind\": \"entity\",\"entity\": {\"type\": \"result\",\"result\": {\"metadata\": {\"title\": [{\"value\": \"Compulsory Licences and Trade Marks\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2012-01-01\",\"qualifier\": {\"classid\": \"issued\",\"classname\": \"issued\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"resulttype\": {\"classid\": \"publication\",\"classname\": \"publication\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"resourcetype\": {\"classid\": \"journal-article\",\"classname\": \"journal-article\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"source\": [{\"value\": \"University of Copenhagen\"}],\"author\": [{\"fullname\": \"Thomas Riis\",\"rank\": 1},{\"fullname\": \"Jens Hemmingsen Schovsbo\",\"name\": \"Jens Hemmingsen\",\"surname\": \"Schovsbo\",\"rank\": 2,\"pid\": [{\"key\": \"ORCID\",\"value\": \"0000-0003-4872-879X\"}]}]},\"instance\": [{\"accessright\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0001\",\"classname\": \"Article\",\"schemeid\": \"dnet:publication_resource\",\"schemename\": \"dnet:publication_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"https://curis.ku.dk/portal/en/publications/compulsory-licences-and-trade-marks(93dc5709-2675-4463-bb27-eb54844c2381).html\"],\"collectedfrom\": {\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"},\"dateofacceptance\": {\"value\": \"2012-01-01\"}}]},\"pid\": [{\"value\": \"93dc5709-2675-4463-bb27-eb54844c2381\",\"qualifier\": {\"classid\": \"orcidworkid\",\"classname\": \"orcidworkid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2018-10-22\",\"id\": \"50|orcid____::1523b5ccb7304c768c5c540b747983a3\"}}"; |
|
28 | 30 |
@Before |
29 | 31 |
public void setup() { |
30 | 32 |
setName = "ORCID"; |
... | ... | |
70 | 72 |
doTestAllOrcidAction("/eu/dnetlib/data/mapreduce/actions/part-100"); |
71 | 73 |
} |
72 | 74 |
|
75 |
@Test |
|
76 |
public void testJsonToProto() throws ParseException { |
|
77 |
Oaf.Builder builder = Oaf.newBuilder(); |
|
78 |
JsonFormat.merge(generatedJson, builder); |
|
79 |
System.out.println(builder.build()); |
|
80 |
} |
|
81 |
|
|
73 | 82 |
private void doTestSingleAction(final String filePath) throws IOException { |
74 | 83 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
75 | 84 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
... | ... | |
79 | 88 |
final JsonParser parser = new JsonParser(); |
80 | 89 |
final JsonObject root = parser.parse(line).getAsJsonObject(); |
81 | 90 |
final List<AtomicAction> lista = OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter); |
82 |
// System.out.println(new Gson().toJson(lista));
|
|
91 |
//System.out.println(new Gson().toJson(lista)); |
|
83 | 92 |
} |
84 | 93 |
|
85 | 94 |
private void doTestAllOrcidAction(final String filePath) throws IOException { |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/OrcidToActions.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.dataimport; |
2 | 2 |
|
3 |
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getArrayValues; |
|
4 |
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getDefaultResulttype; |
|
5 |
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getQualifier; |
|
6 |
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getStringValue; |
|
7 |
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.isValidDate; |
|
8 |
|
|
9 | 3 |
import java.io.IOException; |
10 | 4 |
import java.io.InputStream; |
11 | 5 |
import java.util.ArrayList; |
... | ... | |
13 | 7 |
import java.util.List; |
14 | 8 |
import java.util.Map; |
15 | 9 |
|
16 |
import org.apache.commons.io.IOUtils; |
|
17 |
import org.apache.commons.lang3.StringUtils; |
|
18 |
|
|
19 | 10 |
import com.google.gson.Gson; |
20 | 11 |
import com.google.gson.JsonArray; |
21 | 12 |
import com.google.gson.JsonElement; |
22 | 13 |
import com.google.gson.JsonObject; |
23 |
|
|
24 | 14 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
25 | 15 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
26 | 16 |
import eu.dnetlib.actionmanager.common.Agent; |
27 | 17 |
import eu.dnetlib.data.mapreduce.hbase.Reporter; |
28 | 18 |
import eu.dnetlib.data.mapreduce.util.StreamUtils; |
29 |
import eu.dnetlib.data.proto.FieldTypeProtos; |
|
30 |
import eu.dnetlib.data.proto.FieldTypeProtos.Author; |
|
31 |
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
32 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
33 |
import eu.dnetlib.data.proto.FieldTypeProtos.StringField; |
|
34 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
35 |
import eu.dnetlib.data.proto.KindProtos; |
|
36 |
import eu.dnetlib.data.proto.OafProtos; |
|
37 |
import eu.dnetlib.data.proto.ResultProtos; |
|
38 |
import eu.dnetlib.data.proto.TypeProtos; |
|
19 |
import eu.dnetlib.data.proto.*; |
|
20 |
import eu.dnetlib.data.proto.FieldTypeProtos.*; |
|
39 | 21 |
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; |
40 | 22 |
import eu.dnetlib.miscutils.collections.Pair; |
23 |
import org.apache.commons.io.IOUtils; |
|
24 |
import org.apache.commons.lang3.StringUtils; |
|
41 | 25 |
|
26 |
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*; |
|
27 |
|
|
42 | 28 |
public class OrcidToActions { |
43 | 29 |
|
44 | 30 |
public static final String ORCID = "ORCID"; |
... | ... | |
222 | 208 |
|
223 | 209 |
actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray())); |
224 | 210 |
|
211 |
//System.out.println(JsonFormat.printToString(oaf.build())); |
|
225 | 212 |
return actionList; |
226 | 213 |
|
227 | 214 |
} |
... | ... | |
338 | 325 |
|
339 | 326 |
final String pubDate = getPublicationDate(rootElement, "publication_date"); |
340 | 327 |
if (StringUtils.isNotBlank(pubDate)) { |
341 |
// if (addToDateOfAcceptance) {
|
|
342 |
// metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(pubDate).build());
|
|
343 |
// }
|
|
328 |
if (addToDateOfAcceptance) { |
|
329 |
metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(pubDate).build()); |
|
330 |
} |
|
344 | 331 |
metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder() |
345 | 332 |
.setValue(pubDate) |
346 | 333 |
.setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date")) |
Also available in: Unified diff
print json but commented out.
Added test to get a proto from a json