1
|
package eu.dnetlib.data.mapreduce.actions;
|
2
|
|
3
|
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getStringValue;
|
4
|
import static org.junit.Assert.assertEquals;
|
5
|
import static org.junit.Assert.assertNotNull;
|
6
|
import static org.junit.Assert.assertTrue;
|
7
|
|
8
|
import java.io.BufferedReader;
|
9
|
import java.io.IOException;
|
10
|
import java.io.InputStream;
|
11
|
import java.io.InputStreamReader;
|
12
|
import java.util.List;
|
13
|
|
14
|
import com.google.protobuf.InvalidProtocolBufferException;
|
15
|
import org.apache.commons.lang3.StringUtils;
|
16
|
import org.junit.Before;
|
17
|
import org.junit.Test;
|
18
|
|
19
|
import com.google.gson.JsonArray;
|
20
|
import com.google.gson.JsonObject;
|
21
|
import com.google.gson.JsonParser;
|
22
|
import com.googlecode.protobuf.format.JsonFormat;
|
23
|
import com.googlecode.protobuf.format.JsonFormat.ParseException;
|
24
|
|
25
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
26
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
27
|
import eu.dnetlib.actionmanager.common.Agent;
|
28
|
import eu.dnetlib.data.mapreduce.hbase.Reporter;
|
29
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility;
|
30
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.OrcidToActions;
|
31
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
32
|
|
33
|
public class OrcidToActionsTest {
|
34
|
|
35
|
private String setName;
|
36
|
private Agent agent;
|
37
|
private Reporter reporter;
|
38
|
|
39
|
private final String generatedJson =
|
40
|
"{\"kind\": \"entity\",\"entity\": {\"type\": \"result\",\"result\": {\"metadata\": {\"title\": [{\"value\": \"Cryptogenic fibrosing alveolitis in Bergen Hospital district\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"1999-01-01\",\"qualifier\": {\"classid\": \"issued\",\"classname\": \"issued\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"1999-01-01\"},\"resulttype\": {\"classid\": \"publication\",\"classname\": \"publication\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"resourcetype\": {\"classid\": \"conference-abstract\",\"classname\": \"conference-abstract\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"source\": [{\"value\": \"University of Southern Denmark\"}],\"author\": [{\"fullname\": \"Christian von Plessen\",\"name\": \"Christian\",\"surname\": \"von Plessen\",\"rank\": 1,\"pid\": [{\"key\": \"ORCID\",\"value\": \"0000-0002-6134-6780\"}]},{\"fullname\": \"Grinde, Ø.\",\"name\": \"Ø.\",\"surname\": \"Grinde\",\"rank\": 2},{\"fullname\": \"Gulsvik, A.\",\"name\": \"A.\",\"surname\": \"Gulsvik\",\"rank\": 3}]},\"instance\": [{\"accessright\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0004\",\"classname\": \"Conference object\",\"schemeid\": \"dnet:publication_resource\",\"schemename\": \"dnet:publication_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"http://findresearcher.sdu.dk/portal/en/publications/cryptogenic-fibrosing-alveolitis-in-bergen-hospital-district(14a8eea6-ddf6-4055-9e7e-a82525ae36c2).html\"],\"collectedfrom\": {\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"},\"dateofacceptance\": {\"value\": \"1999-01-01\"}}]},\"originalId\": [\"29721246\"],\"collectedfrom\": [{\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"}],\"pid\": [{\"value\": \"14a8eea6-ddf6-4055-9e7e-a82525ae36c2\",\"qualifier\": {\"classid\": \"orcidworkid\",\"classname\": \"orcidworkid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2018-10-22\",\"id\": \"50|orcid_______::01d661c1f7824215e5150d59b30f7cf7\",\"dateoftransformation\": \"2019-10-04T12:14:24+02:00\"},\"dataInfo\": {\"inferred\": false,\"deletedbyinference\": false,\"trust\": \"0.9\",\"provenanceaction\": {\"classid\": \"sysimport:actionset:orcidworks-no-doi\",\"classname\": \"sysimport:actionset:orcidworks-no-doi\",\"schemeid\": \"dnet:provenanceActions\",\"schemename\": \"dnet:provenanceActions\"}},\"lastupdatetimestamp\": 1570184064208}\n" +
|
41
|
"";
|
42
|
|
43
|
private boolean printProto = false;
|
44
|
|
45
|
@Before
|
46
|
public void setup() {
|
47
|
setName = "ORCID";
|
48
|
agent = new Agent("agentId", "agentName", Agent.AGENT_TYPE.service);
|
49
|
reporter =
|
50
|
(Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta));
|
51
|
}
|
52
|
|
53
|
@Test
|
54
|
public void testUnicodeAction() throws IOException {
|
55
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_1.json", printProto);
|
56
|
|
57
|
}
|
58
|
|
59
|
@Test
|
60
|
public void testSourceWorkIdAction() throws IOException {
|
61
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_2.json", printProto);
|
62
|
|
63
|
}
|
64
|
|
65
|
@Test
|
66
|
public void testAuthorsRank() throws IOException {
|
67
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_3.json", printProto);
|
68
|
|
69
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_4.json", printProto);
|
70
|
|
71
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_5.json", printProto);
|
72
|
|
73
|
|
74
|
}
|
75
|
|
76
|
@Test
|
77
|
public void testUrl() throws IOException {
|
78
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_6.json", printProto);
|
79
|
}
|
80
|
|
81
|
@Test
|
82
|
public void testNullUrlAction() throws IOException {
|
83
|
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_7.json", printProto);
|
84
|
|
85
|
}
|
86
|
|
87
|
@Test
|
88
|
public void testMassiveOrcidAction() throws IOException {
|
89
|
doTestAllOrcidAction("/eu/dnetlib/data/mapreduce/actions/part-100");
|
90
|
}
|
91
|
|
92
|
@Test
|
93
|
public void testJsonToProto() throws ParseException {
|
94
|
final Oaf.Builder builder = Oaf.newBuilder();
|
95
|
JsonFormat.merge(generatedJson, builder);
|
96
|
System.out.println(builder.build());
|
97
|
}
|
98
|
|
99
|
private void doTestSingleAction(final String filePath, boolean print) throws IOException {
|
100
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
101
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
102
|
|
103
|
final String line = in.readLine();
|
104
|
|
105
|
final JsonParser parser = new JsonParser();
|
106
|
final JsonObject root = parser.parse(line).getAsJsonObject();
|
107
|
final List<AtomicAction> actions = OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter);
|
108
|
if(print) {
|
109
|
if (actions!= null) {
|
110
|
actions.forEach(it -> {
|
111
|
try {
|
112
|
System.out.println(
|
113
|
String.format(" RowKey:%s TargetColumnFamily:%s TargetColumn: %s\n value:\n%s", it.getTargetRowKey(), it.getTargetColumnFamily(),
|
114
|
it.getTargetColumn(),
|
115
|
Oaf.parseFrom(it.getTargetValue())));
|
116
|
} catch (InvalidProtocolBufferException e) {
|
117
|
e.printStackTrace();
|
118
|
}
|
119
|
});
|
120
|
}
|
121
|
}
|
122
|
}
|
123
|
|
124
|
|
125
|
|
126
|
private void doTestAllOrcidAction(final String filePath) throws IOException {
|
127
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
128
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
129
|
String line = in.readLine();
|
130
|
int i = 0;
|
131
|
while (StringUtils.isNotBlank(line)) {
|
132
|
final JsonParser parser = new JsonParser();
|
133
|
final JsonObject root = parser.parse(line).getAsJsonObject();
|
134
|
try {
|
135
|
OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter);
|
136
|
} catch (final Throwable e) {
|
137
|
System.out.println("Exception parsing: " + line);
|
138
|
throw new RuntimeException(e);
|
139
|
}
|
140
|
line = in.readLine();
|
141
|
i += 1;
|
142
|
}
|
143
|
}
|
144
|
|
145
|
@Test
|
146
|
public void testGeneratedJson() throws ParseException {
|
147
|
final JsonParser parser = new JsonParser();
|
148
|
final JsonObject root = parser.parse(generatedJson).getAsJsonObject();
|
149
|
assertEquals(getStringValue(root, "kind"), "entity");
|
150
|
final JsonObject entity = root.getAsJsonObject("entity");
|
151
|
final List<JsonObject> pids = DumpToActionsUtility.getArrayObjects(entity, "pid");
|
152
|
pids.forEach(pid -> {
|
153
|
final JsonObject qualifier = pid.getAsJsonObject("qualifier");
|
154
|
assertTrue(qualifier.get("classid").getAsString().matches("(ark|arxiv|pmc|pmid|orcidworkid|urn)"));
|
155
|
});
|
156
|
assertEquals(getStringValue(entity, "type"), "result");
|
157
|
assertNotNull(getStringValue(entity, "id"));
|
158
|
assertNotNull(getStringValue(entity, "dateofcollection"));
|
159
|
assertTrue(DumpToActionsUtility.isValidDate(getStringValue(entity, "dateofcollection")));
|
160
|
final JsonObject metadata = entity.getAsJsonObject("result").getAsJsonObject("metadata");
|
161
|
assertNotNull(metadata.getAsJsonArray("title"));
|
162
|
assertNotNull(metadata.getAsJsonArray("relevantdate"));
|
163
|
assertNotNull(metadata.getAsJsonObject("dateofacceptance"));
|
164
|
assertTrue(DumpToActionsUtility.isValidDate(metadata.getAsJsonObject("dateofacceptance").get("value").getAsString()));
|
165
|
assertNotNull(metadata.getAsJsonObject("resulttype"));
|
166
|
assertNotNull(metadata.getAsJsonArray("author"));
|
167
|
final JsonArray instance = entity.getAsJsonObject("result").getAsJsonArray("instance");
|
168
|
instance.forEach(i -> {
|
169
|
assertNotNull(i.getAsJsonObject().getAsJsonObject("accessright"));
|
170
|
assertNotNull(i.getAsJsonObject().getAsJsonObject("hostedby"));
|
171
|
assertNotNull(i.getAsJsonObject().getAsJsonObject("collectedfrom"));
|
172
|
assertNotNull(i.getAsJsonObject().getAsJsonObject("instancetype"));
|
173
|
});
|
174
|
|
175
|
}
|
176
|
|
177
|
}
|