1
|
package eu.dnetlib.data.mapreduce.actions;
|
2
|
|
3
|
import java.io.BufferedReader;
|
4
|
import java.io.IOException;
|
5
|
import java.io.InputStream;
|
6
|
import java.io.InputStreamReader;
|
7
|
import java.util.List;
|
8
|
|
9
|
import com.google.gson.JsonObject;
|
10
|
import com.google.gson.JsonParser;
|
11
|
import com.googlecode.protobuf.format.JsonFormat;
|
12
|
import com.googlecode.protobuf.format.JsonFormat.ParseException;
|
13
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
14
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
15
|
import eu.dnetlib.actionmanager.common.Agent;
|
16
|
import eu.dnetlib.data.mapreduce.hbase.Reporter;
|
17
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.OrcidToActions;
|
18
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
19
|
import org.apache.commons.lang3.StringUtils;
|
20
|
import org.junit.Before;
|
21
|
import org.junit.Test;
|
22
|
|
23
|
public class OrcidToActionsTest {
|
24
|
|
25
|
private String setName;
|
26
|
private Agent agent;
|
27
|
private Reporter reporter;
|
28
|
|
29
|
private String generatedJson = "{\"kind\": \"entity\",\"entity\": {\"type\": \"result\",\"result\": {\"metadata\": {\"title\": [{\"value\": \"Compulsory Licences and Trade Marks\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2012-01-01\",\"qualifier\": {\"classid\": \"issued\",\"classname\": \"issued\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"resulttype\": {\"classid\": \"publication\",\"classname\": \"publication\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"resourcetype\": {\"classid\": \"journal-article\",\"classname\": \"journal-article\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"source\": [{\"value\": \"University of Copenhagen\"}],\"author\": [{\"fullname\": \"Thomas Riis\",\"rank\": 1},{\"fullname\": \"Jens Hemmingsen Schovsbo\",\"name\": \"Jens Hemmingsen\",\"surname\": \"Schovsbo\",\"rank\": 2,\"pid\": [{\"key\": \"ORCID\",\"value\": \"0000-0003-4872-879X\"}]}]},\"instance\": [{\"accessright\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0001\",\"classname\": \"Article\",\"schemeid\": \"dnet:publication_resource\",\"schemename\": \"dnet:publication_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"https://curis.ku.dk/portal/en/publications/compulsory-licences-and-trade-marks(93dc5709-2675-4463-bb27-eb54844c2381).html\"],\"collectedfrom\": {\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"},\"dateofacceptance\": {\"value\": \"2012-01-01\"}}]},\"pid\": [{\"value\": \"93dc5709-2675-4463-bb27-eb54844c2381\",\"qualifier\": {\"classid\": \"orcidworkid\",\"classname\": \"orcidworkid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2018-10-22\",\"id\": \"50|orcid____::1523b5ccb7304c768c5c540b747983a3\"}}";
|
30
|
@Before
|
31
|
public void setup() {
|
32
|
setName = "ORCID";
|
33
|
agent = new Agent("agentId", "agentName", Agent.AGENT_TYPE.service);
|
34
|
reporter =
|
35
|
(Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta));
|
36
|
}
|
37
|
|
38
|
@Test
|
39
|
public void testUnicodeAction() throws IOException {
|
40
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_1.json");
|
41
|
|
42
|
}
|
43
|
|
44
|
@Test
|
45
|
public void testSourceWorkIdAction() throws IOException {
|
46
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_2.json");
|
47
|
|
48
|
}
|
49
|
|
50
|
@Test
|
51
|
public void testAuthorsRank() throws IOException {
|
52
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_3.json");
|
53
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_4.json");
|
54
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_5.json");
|
55
|
|
56
|
}
|
57
|
|
58
|
@Test
|
59
|
public void testUrl() throws IOException {
|
60
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_6.json");
|
61
|
|
62
|
}
|
63
|
|
64
|
@Test
|
65
|
public void testNullUrlAction() throws IOException {
|
66
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_7.json");
|
67
|
|
68
|
}
|
69
|
|
70
|
@Test
|
71
|
public void testMassiveOrcidAction() throws IOException {
|
72
|
doTestAllOrcidAction("/eu/dnetlib/data/mapreduce/actions/part-100");
|
73
|
}
|
74
|
|
75
|
@Test
|
76
|
public void testJsonToProto() throws ParseException {
|
77
|
Oaf.Builder builder = Oaf.newBuilder();
|
78
|
JsonFormat.merge(generatedJson, builder);
|
79
|
System.out.println(builder.build());
|
80
|
}
|
81
|
|
82
|
private void doTestSingleAction(final String filePath) throws IOException {
|
83
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
84
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
85
|
|
86
|
final String line = in.readLine();
|
87
|
|
88
|
final JsonParser parser = new JsonParser();
|
89
|
final JsonObject root = parser.parse(line).getAsJsonObject();
|
90
|
final List<AtomicAction> lista = OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter);
|
91
|
//System.out.println(new Gson().toJson(lista));
|
92
|
}
|
93
|
|
94
|
private void doTestAllOrcidAction(final String filePath) throws IOException {
|
95
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
96
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
97
|
String line = in.readLine();
|
98
|
int i = 0;
|
99
|
while (StringUtils.isNotBlank(line)) {
|
100
|
final JsonParser parser = new JsonParser();
|
101
|
final JsonObject root = parser.parse(line).getAsJsonObject();
|
102
|
try {
|
103
|
OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter);
|
104
|
} catch (final Throwable e) {
|
105
|
System.out.println("Exception parsing: " + line);
|
106
|
throw new RuntimeException(e);
|
107
|
}
|
108
|
line = in.readLine();
|
109
|
i += 1;
|
110
|
}
|
111
|
System.out.println("parsed: " + i);
|
112
|
}
|
113
|
}
|