1
|
package eu.dnetlib.data.mapreduce.hbase.dataimport;
|
2
|
|
3
|
import com.google.gson.JsonArray;
|
4
|
import com.google.gson.JsonElement;
|
5
|
import com.google.gson.JsonObject;
|
6
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
7
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
8
|
import eu.dnetlib.actionmanager.common.Agent;
|
9
|
import eu.dnetlib.data.mapreduce.util.StreamUtils;
|
10
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
11
|
import org.apache.commons.lang3.StringUtils;
|
12
|
|
13
|
import java.util.ArrayList;
|
14
|
import java.util.List;
|
15
|
import java.util.Objects;
|
16
|
|
17
|
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
|
18
|
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
|
19
|
import static eu.dnetlib.data.proto.KindProtos.Kind;
|
20
|
import static eu.dnetlib.data.proto.OafProtos.Oaf;
|
21
|
import static eu.dnetlib.data.proto.OafProtos.OafEntity;
|
22
|
import static eu.dnetlib.data.proto.ResultProtos.Result;
|
23
|
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
|
24
|
import static eu.dnetlib.data.proto.TypeProtos.Type;
|
25
|
|
26
|
public class CrossRefToActions {
|
27
|
|
28
|
public static AtomicAction generateActionsFromDump(final JsonObject rootElement, ActionFactory factory, final String setName, final Agent agent, boolean invisible) {
|
29
|
|
30
|
|
31
|
|
32
|
//Create OAF Proto
|
33
|
final Oaf.Builder oaf = Oaf.newBuilder();
|
34
|
//Add Data Info
|
35
|
oaf.setDataInfo(DataInfo.newBuilder()
|
36
|
.setInvisible(invisible)
|
37
|
.setDeletedbyinference(false)
|
38
|
.setInferred(false)
|
39
|
.setTrust("0.9")
|
40
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
41
|
.build());
|
42
|
|
43
|
//Adding Kind
|
44
|
oaf.setKind(Kind.entity);
|
45
|
|
46
|
//creating Result Proto
|
47
|
final OafEntity.Builder entity = OafEntity.newBuilder().setType(Type.result);
|
48
|
|
49
|
//Adding Collected From
|
50
|
entity.setDateofcollection(Objects.requireNonNull(getStringValue(rootElement, "dateOfCollection")));
|
51
|
if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
|
52
|
StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
|
53
|
.map(JsonElement::getAsJsonObject)
|
54
|
.forEach(cf ->
|
55
|
{
|
56
|
final String id =getStringValue(cf,"id");
|
57
|
final String name =getStringValue(cf,"name");
|
58
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
59
|
final KeyValue collectedFrom = KeyValue.newBuilder()
|
60
|
.setValue(name)
|
61
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
|
62
|
.build();
|
63
|
entity.addCollectedfrom(collectedFrom);
|
64
|
}
|
65
|
}
|
66
|
);
|
67
|
}
|
68
|
|
69
|
|
70
|
//Adding identifier
|
71
|
final String objIdentifier = getStringValue(rootElement, "objIdentifier");
|
72
|
final String nsPrefix = getStringValue(rootElement,"datasourcePrefix");
|
73
|
if (StringUtils.isBlank(objIdentifier)) return null;
|
74
|
final String sourceId = String.format("50|%s::%s", nsPrefix, objIdentifier);
|
75
|
entity.setId(sourceId);
|
76
|
final String doi = getStringValue(rootElement, "doi");
|
77
|
//ADDING PID
|
78
|
if (doi == null)
|
79
|
return null;
|
80
|
entity.addPid(StructuredProperty.newBuilder()
|
81
|
.setValue(doi)
|
82
|
.setQualifier(getQualifier("doi", "dnet:pid_types"))
|
83
|
.build());
|
84
|
|
85
|
|
86
|
//Create Result Field
|
87
|
Result.Builder result = Result.newBuilder();
|
88
|
|
89
|
|
90
|
//Adding Instance
|
91
|
final String typeValue = getStringValue(rootElement.getAsJsonObject("type"),"value");
|
92
|
final String cobjValue = getStringValue(rootElement.getAsJsonObject("type"),"cobj");
|
93
|
|
94
|
//Add UnpayWall instance
|
95
|
final String best_oa_location_url = getStringValue(rootElement, "best_oa_location_url");
|
96
|
Instance.Builder instance= Instance.newBuilder();
|
97
|
instance.setInstancetype(Qualifier.newBuilder()
|
98
|
.setClassid(cobjValue)
|
99
|
.setClassname(typeValue)
|
100
|
.setSchemeid("dnet:publication_resource")
|
101
|
.setSchemename("dnet:publication_resource")
|
102
|
.build());
|
103
|
instance.setHostedby(KeyValue.newBuilder()
|
104
|
.setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
|
105
|
.setValue("Unknown Repository")
|
106
|
.build());
|
107
|
if (StringUtils.isNotBlank(best_oa_location_url)){
|
108
|
|
109
|
instance.addUrl(best_oa_location_url);
|
110
|
instance.setAccessright(Qualifier.newBuilder()
|
111
|
.setClassid("OPEN")
|
112
|
.setClassname("open access")
|
113
|
.setSchemeid("dnet:access_modes")
|
114
|
.setSchemename("dnet:access_modes")
|
115
|
.build());
|
116
|
instance.setCollectedfrom(KeyValue.newBuilder()
|
117
|
.setValue("UnpayWall")
|
118
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("unpaywall"))
|
119
|
.build());
|
120
|
} else {
|
121
|
instance = Instance.newBuilder();
|
122
|
instance.addUrl(String.format("http://dx.doi.org/%s", doi));
|
123
|
instance.setAccessright(Qualifier.newBuilder()
|
124
|
.setClassid("CLOSED")
|
125
|
.setClassname("Closed Access")
|
126
|
.setSchemeid("dnet:access_modes")
|
127
|
.setSchemename("dnet:access_modes")
|
128
|
.build());
|
129
|
instance.setCollectedfrom(KeyValue.newBuilder()
|
130
|
.setValue("CrossRef")
|
131
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("crossref"))
|
132
|
.build());
|
133
|
}
|
134
|
result.addInstance(instance.build());
|
135
|
|
136
|
|
137
|
|
138
|
|
139
|
//Create Metadata Proto
|
140
|
Metadata.Builder metadata = Metadata.newBuilder();
|
141
|
|
142
|
//Adding Authors
|
143
|
final List<Author> authors = createAuthors(rootElement);
|
144
|
if (authors!= null)
|
145
|
metadata.addAllAuthor(authors);
|
146
|
//adding Language
|
147
|
metadata.setLanguage(Qualifier.newBuilder()
|
148
|
.setClassid("und")
|
149
|
.setClassname("Undetermined")
|
150
|
.setSchemeid("dent:languages")
|
151
|
.setSchemename("dent:languages")
|
152
|
.build());
|
153
|
|
154
|
//Adding subjects
|
155
|
List<String> subjects =getArrayValues(rootElement, "subject");
|
156
|
|
157
|
subjects.forEach(s-> metadata.addSubject(StructuredProperty.newBuilder()
|
158
|
.setValue(s)
|
159
|
.setQualifier(getQualifier("keyword", "dnet:subject"))
|
160
|
.build()));
|
161
|
|
162
|
//Adding titles
|
163
|
List<String>titles =getArrayValues(rootElement, "title");
|
164
|
titles.forEach(t->
|
165
|
metadata.addTitle(StructuredProperty.newBuilder()
|
166
|
.setValue(t)
|
167
|
.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
|
168
|
.build()));
|
169
|
|
170
|
//Adding date
|
171
|
String date = getStringValue(rootElement,"issued");
|
172
|
if (date.length()==4)
|
173
|
date +="-01-01";
|
174
|
|
175
|
metadata.setDateofacceptance(StringField.newBuilder().setValue(date).build());
|
176
|
|
177
|
//Adding description
|
178
|
String description=null;
|
179
|
if (rootElement.has("abstract") && rootElement.get("abstract").isJsonArray())
|
180
|
description =String.join(" ",getArrayValues(rootElement,"abstract"));
|
181
|
else if (rootElement.has("abstract") )
|
182
|
description = rootElement.get("abstract").getAsString();
|
183
|
|
184
|
if(StringUtils.isNotBlank(description))
|
185
|
metadata.addDescription(StringField.newBuilder().setValue(description).build());
|
186
|
|
187
|
//Adding Journal
|
188
|
final String publisher = getStringValue(rootElement,"publisher");
|
189
|
if (StringUtils.isNotBlank(publisher)){
|
190
|
|
191
|
final Journal.Builder journal = Journal.newBuilder().setName(publisher);
|
192
|
|
193
|
if (hasJSONArrayField(rootElement,"issn" )){
|
194
|
StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
|
195
|
.map(JsonElement::getAsJsonObject)
|
196
|
.forEach(it -> {
|
197
|
final String type = getStringValue(it, "type");
|
198
|
final String value = getStringValue(it, "value");
|
199
|
if("electronic".equals(type)){
|
200
|
journal.setIssnOnline(value);
|
201
|
}
|
202
|
if ("print".equals(type))
|
203
|
journal.setIssnPrinted(value);
|
204
|
});
|
205
|
}
|
206
|
metadata.setJournal(journal.build());
|
207
|
}
|
208
|
|
209
|
metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
|
210
|
result.setMetadata(metadata.build());
|
211
|
entity.setResult(result.build());
|
212
|
oaf.setEntity(entity.build());
|
213
|
|
214
|
|
215
|
|
216
|
|
217
|
return factory.createAtomicAction(setName, agent,oaf.getEntity().getId(), "result", "body",oaf.build().toByteArray());
|
218
|
}
|
219
|
|
220
|
|
221
|
private static boolean hasJSONArrayField(final JsonObject root, final String key) {
|
222
|
return root.has(key) && root.get(key).isJsonArray();
|
223
|
}
|
224
|
|
225
|
|
226
|
public static List<Author> createAuthors(final JsonObject root) {
|
227
|
|
228
|
if (root.has("author") && root.get("author").isJsonArray()) {
|
229
|
|
230
|
final List<Author> authors = new ArrayList<>();
|
231
|
final JsonArray jsonAuthors = root.getAsJsonArray("author");
|
232
|
int i = 0;
|
233
|
for (JsonElement item: jsonAuthors) {
|
234
|
final JsonObject author = item.getAsJsonObject();
|
235
|
final Author.Builder result =Author.newBuilder();
|
236
|
final String given = getStringValue(author, "given");
|
237
|
final String family = getStringValue(author, "family");
|
238
|
final String orchid = getStringValue(author, "ORCID");
|
239
|
if (StringUtils.isBlank(given) && StringUtils.isBlank(family))
|
240
|
continue;
|
241
|
result.setFullname(given+" "+ family);
|
242
|
if (StringUtils.isNotBlank(given))
|
243
|
result.setName(given);
|
244
|
if (StringUtils.isNotBlank(family))
|
245
|
result.setSurname(family);
|
246
|
if (StringUtils.isNotBlank(orchid))
|
247
|
{
|
248
|
result.addPid(KeyValue.newBuilder()
|
249
|
.setValue(orchid)
|
250
|
.setKey("ORCID")
|
251
|
.build());
|
252
|
}
|
253
|
result.setRank(i++);
|
254
|
authors.add(result.build());
|
255
|
}
|
256
|
return authors;
|
257
|
|
258
|
}
|
259
|
return null;
|
260
|
|
261
|
|
262
|
}
|
263
|
|
264
|
|
265
|
|
266
|
|
267
|
}
|