1
|
package eu.dnetlib.data.mapreduce.hbase.dataimport;
|
2
|
|
3
|
import com.google.gson.Gson;
|
4
|
import com.google.gson.JsonElement;
|
5
|
import com.google.gson.JsonObject;
|
6
|
import com.googlecode.protobuf.format.JsonFormat;
|
7
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
8
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
9
|
import eu.dnetlib.actionmanager.common.Agent;
|
10
|
import eu.dnetlib.data.mapreduce.util.StreamUtils;
|
11
|
import eu.dnetlib.data.proto.*;
|
12
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
13
|
import eu.dnetlib.miscutils.collections.Pair;
|
14
|
import org.apache.commons.io.IOUtils;
|
15
|
import org.apache.commons.lang3.StringUtils;
|
16
|
import java.io.IOException;
|
17
|
import java.io.InputStream;
|
18
|
import java.util.*;
|
19
|
import java.util.concurrent.atomic.AtomicInteger;
|
20
|
import java.util.function.Function;
|
21
|
import java.util.stream.Collectors;
|
22
|
|
23
|
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
|
24
|
import static eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
|
25
|
|
26
|
|
27
|
public class DOIBoostToActions {
|
28
|
|
29
|
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{
|
30
|
put("MAG", new Pair<>("Microsoft Academic Graph", "openaire____::microsoft"));
|
31
|
put("ORCID", new Pair<>("ORCID", "openaire____::orcid"));
|
32
|
put("CrossRef", new Pair<>("Crossref", "openaire____::crossref"));
|
33
|
put("UnpayWall", new Pair<>("UnpayWall", "openaire____::unpaywall"));
|
34
|
|
35
|
}};
|
36
|
|
37
|
private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType = new HashMap<String, FieldTypeProtos.Qualifier>() {{
|
38
|
put("MAG", FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id" ).setClassname("Microsoft Academic Graph Identifier").setSchemename("dnet:pid_types").setSchemeid("dnet:pid_types").build());
|
39
|
put("grid.ac", getQualifier("grid", "dnet:pid_types"));
|
40
|
put("wikpedia", getQualifier("urn", "dnet:pid_types"));
|
41
|
}};
|
42
|
|
43
|
static Map<String, Map<String, String>> typologiesMapping;
|
44
|
|
45
|
static {
|
46
|
try {
|
47
|
final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
|
48
|
final String tt =IOUtils.toString(is);
|
49
|
typologiesMapping = new Gson().fromJson(tt, Map.class);
|
50
|
} catch (IOException e) {
|
51
|
e.printStackTrace();
|
52
|
}
|
53
|
}
|
54
|
|
55
|
final static String doiBoostNSPREFIX ="doiboost____";
|
56
|
|
57
|
|
58
|
public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement, final ActionFactory factory, final String setName, final Agent agent, boolean invisible,
|
59
|
final boolean onlyOrganization) {
|
60
|
|
61
|
//Create OAF Proto
|
62
|
|
63
|
final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
64
|
//Add Data Info
|
65
|
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
66
|
.setInvisible(invisible)
|
67
|
.setDeletedbyinference(false)
|
68
|
.setInferred(false)
|
69
|
.setTrust("0.9")
|
70
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
71
|
.build());
|
72
|
|
73
|
//Adding Kind
|
74
|
oaf.setKind(KindProtos.Kind.entity);
|
75
|
|
76
|
//creating Result Proto
|
77
|
final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
|
78
|
|
79
|
entity.setDateofcollection("2018-10-10");
|
80
|
|
81
|
if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
|
82
|
StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
|
83
|
.map(JsonElement::getAsString)
|
84
|
.forEach(cf ->
|
85
|
{
|
86
|
final String id =datasources.get(cf).getValue();
|
87
|
final String name =datasources.get(cf).getKey();
|
88
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
89
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
90
|
.setValue(name)
|
91
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
|
92
|
.build();
|
93
|
entity.addCollectedfrom(collectedFrom);
|
94
|
}
|
95
|
}
|
96
|
);
|
97
|
}
|
98
|
//Adding identifier
|
99
|
final String doi = getStringValue(rootElement, "doi");
|
100
|
if (doi == null)
|
101
|
return null;
|
102
|
final String sourceId = String.format("50|%s::%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
|
103
|
entity.setId(sourceId);
|
104
|
|
105
|
entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
|
106
|
.setValue(doi)
|
107
|
.setQualifier(getQualifier("doi", "dnet:pid_types"))
|
108
|
.build());
|
109
|
|
110
|
|
111
|
//Create Result Field
|
112
|
ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();
|
113
|
|
114
|
final String type = getStringValue(rootElement,"type");
|
115
|
|
116
|
if (!typologiesMapping.containsKey(type))
|
117
|
return null;
|
118
|
|
119
|
//Adding Instances
|
120
|
final String typeValue = typologiesMapping.get(type).get("value");
|
121
|
final String cobjValue = typologiesMapping.get(type).get("cobj");
|
122
|
|
123
|
|
124
|
getArrayObjects(rootElement, "instances").stream().map(it ->
|
125
|
{
|
126
|
ResultProtos.Result.Instance.Builder instance= ResultProtos.Result.Instance.newBuilder();
|
127
|
instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
|
128
|
.setClassid(cobjValue)
|
129
|
.setClassname(typeValue)
|
130
|
.setSchemeid("dnet:publication_resource")
|
131
|
.setSchemename("dnet:publication_resource")
|
132
|
.build());
|
133
|
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
134
|
.setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
|
135
|
.setValue("Unknown Repository")
|
136
|
.build());
|
137
|
|
138
|
final String acc_class_id =it.get("access-rights").getAsString();
|
139
|
String acc_class_value;
|
140
|
switch (acc_class_id){
|
141
|
case "OPEN": {
|
142
|
acc_class_value = "open access";
|
143
|
break;
|
144
|
}
|
145
|
case "CLOSED": {
|
146
|
acc_class_value = "closed access";
|
147
|
break;
|
148
|
}
|
149
|
|
150
|
default: {
|
151
|
acc_class_value = "not available";
|
152
|
}
|
153
|
|
154
|
}
|
155
|
|
156
|
instance.addUrl(it.get("url").getAsString());
|
157
|
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
158
|
.setClassid(acc_class_id)
|
159
|
.setClassname(acc_class_value)
|
160
|
.setSchemeid("dnet:access_modes")
|
161
|
.setSchemename("dnet:access_modes")
|
162
|
.build());
|
163
|
|
164
|
final String id =datasources.get(it.get("provenance").getAsString()).getValue();
|
165
|
final String name =datasources.get(it.get("provenance").getAsString()).getKey();
|
166
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
167
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
168
|
.setValue(name)
|
169
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
|
170
|
.build();
|
171
|
|
172
|
instance.setCollectedfrom(collectedFrom);
|
173
|
}
|
174
|
|
175
|
return instance.build();
|
176
|
}).forEach(result::addInstance);
|
177
|
|
178
|
//Adding DOI URL as Instance
|
179
|
final String doiURL = getStringValue(rootElement, "doi-url");
|
180
|
if (StringUtils.isNotBlank(doiURL)) {
|
181
|
|
182
|
|
183
|
final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
|
184
|
instance.addUrl(doiURL);
|
185
|
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
186
|
.setClassid("CLOSED")
|
187
|
.setClassname("Closed Access")
|
188
|
.setSchemeid("dnet:access_modes")
|
189
|
.setSchemename("dnet:access_modes")
|
190
|
.build());
|
191
|
instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
192
|
.setValue("CrossRef")
|
193
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("crossref"))
|
194
|
.build());
|
195
|
result.addInstance(instance);
|
196
|
}
|
197
|
|
198
|
//Create Metadata Proto
|
199
|
final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
|
200
|
|
201
|
|
202
|
Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);
|
203
|
|
204
|
if (authorsOrganizations.getKey().size() > 0) {
|
205
|
metadata.addAllAuthor(authorsOrganizations.getKey());
|
206
|
}
|
207
|
else {
|
208
|
return null;
|
209
|
}
|
210
|
//adding Language
|
211
|
metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
|
212
|
.setClassid("und")
|
213
|
.setClassname("Undetermined")
|
214
|
.setSchemeid("dent:languages")
|
215
|
.setSchemename("dent:languages")
|
216
|
.build());
|
217
|
|
218
|
//Adding subjects
|
219
|
List<String> subjects =getArrayValues(rootElement, "subject");
|
220
|
|
221
|
subjects.forEach(s-> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
|
222
|
.setValue(s)
|
223
|
.setQualifier(getQualifier("keyword", "dnet:subject"))
|
224
|
.build()));
|
225
|
|
226
|
List<String>titles =getArrayValues(rootElement, "title");
|
227
|
titles.forEach(t->
|
228
|
metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
|
229
|
.setValue(t)
|
230
|
.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
|
231
|
.build()));
|
232
|
settingRelevantDate(rootElement, metadata, "issued", "issued", true);
|
233
|
settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
|
234
|
settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
|
235
|
settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
|
236
|
|
237
|
|
238
|
getArrayObjects(rootElement, "abstract").forEach(d -> metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build()));
|
239
|
|
240
|
|
241
|
|
242
|
//Adding Journal
|
243
|
final String publisher = getStringValue(rootElement,"publisher");
|
244
|
if (StringUtils.isNotBlank(publisher)){
|
245
|
|
246
|
final ResultProtos.Result.Journal.Builder journal = ResultProtos.Result.Journal.newBuilder().setName(publisher);
|
247
|
|
248
|
if (hasJSONArrayField(rootElement,"issn" )){
|
249
|
StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
|
250
|
.map(JsonElement::getAsJsonObject)
|
251
|
.forEach(it -> {
|
252
|
final String issntype = getStringValue(it, "type");
|
253
|
final String value = getStringValue(it, "value");
|
254
|
if("electronic".equals(issntype)){
|
255
|
journal.setIssnOnline(value);
|
256
|
}
|
257
|
if ("print".equals(issntype))
|
258
|
journal.setIssnPrinted(value);
|
259
|
});
|
260
|
}
|
261
|
metadata.setJournal(journal.build());
|
262
|
}
|
263
|
metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
|
264
|
result.setMetadata(metadata.build());
|
265
|
entity.setResult(result.build());
|
266
|
oaf.setEntity(entity.build());
|
267
|
System.out.println(JsonFormat.printToString(oaf.build()));
|
268
|
final List<AtomicAction> actionList = new ArrayList<>();
|
269
|
if (!onlyOrganization)
|
270
|
actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
|
271
|
|
272
|
if (!authorsOrganizations.getValue().isEmpty()) {
|
273
|
|
274
|
authorsOrganizations.getValue().forEach(o ->
|
275
|
{
|
276
|
|
277
|
actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
|
278
|
if (!onlyOrganization)
|
279
|
actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
|
280
|
final String gridOrganization = getSimilarGridOrganization(o.getEntity());
|
281
|
if (gridOrganization!= null) {
|
282
|
actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization, "".getBytes()));
|
283
|
actionList.add(factory.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(), "".getBytes()));
|
284
|
}
|
285
|
});
|
286
|
}
|
287
|
return actionList;
|
288
|
|
289
|
}
|
290
|
|
291
|
|
292
|
private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {
|
293
|
|
294
|
final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
|
295
|
if (pidList!= null ) {
|
296
|
for (FieldTypeProtos.StructuredProperty p: pidList) {
|
297
|
if (p.getQualifier().getClassname().equals("grid")){
|
298
|
return "20|grid________::"+AbstractDNetXsltFunctions.md5(p.getValue());
|
299
|
}
|
300
|
}
|
301
|
}
|
302
|
return null;
|
303
|
|
304
|
}
|
305
|
|
306
|
private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication, final OafProtos.Oaf organization, final ActionFactory factory, final String setName, final Agent agent) {
|
307
|
|
308
|
List<AtomicAction> result = new ArrayList<>();
|
309
|
|
310
|
final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
|
311
|
roaf.setKind(KindProtos.Kind.relation);
|
312
|
|
313
|
roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
314
|
.setInvisible(false)
|
315
|
.setDeletedbyinference(false)
|
316
|
.setInferred(false)
|
317
|
.setTrust("0.9")
|
318
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
319
|
.build());
|
320
|
|
321
|
|
322
|
final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();
|
323
|
|
324
|
rel.setRelType(RelTypeProtos.RelType.resultOrganization);
|
325
|
rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);
|
326
|
|
327
|
//Create a relation Result --> Organization
|
328
|
rel.setSource(publication.getEntity().getId());
|
329
|
rel.setTarget(organization.getEntity().getId());
|
330
|
rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());
|
331
|
|
332
|
final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();
|
333
|
|
334
|
final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
|
335
|
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
336
|
.setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
|
337
|
.build());
|
338
|
rel_instance.setAffiliation(affiliationRel.build());
|
339
|
rel.setResultOrganization(rel_instance.build());
|
340
|
|
341
|
rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
342
|
.setValue(datasources.get("MAG").getKey())
|
343
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(datasources.get("MAG").getValue(), "::")))
|
344
|
.build());
|
345
|
|
346
|
|
347
|
|
348
|
rel.setChild(false);
|
349
|
roaf.setRel(rel.build());
|
350
|
|
351
|
result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution", organization.getEntity().getId(), roaf.build().toByteArray() ));
|
352
|
|
353
|
|
354
|
//Create a relation Organization --> Result
|
355
|
rel.setTarget(publication.getEntity().getId());
|
356
|
rel.setSource(organization.getEntity().getId());
|
357
|
rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());
|
358
|
|
359
|
|
360
|
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
361
|
.setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
|
362
|
.build());
|
363
|
rel_instance.setAffiliation(affiliationRel.build());
|
364
|
rel.setResultOrganization(rel_instance.build());
|
365
|
roaf.setRel(rel.build());
|
366
|
result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf", publication.getEntity().getId(), roaf.build().toByteArray()));
|
367
|
|
368
|
return result;
|
369
|
|
370
|
}
|
371
|
|
372
|
private static boolean hasJSONArrayField(final JsonObject root, final String key) {
|
373
|
return root.has(key) && root.get(key).isJsonArray();
|
374
|
}
|
375
|
|
376
|
private static void settingRelevantDate(JsonObject rootElement, ResultProtos.Result.Metadata.Builder metadata , final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) {
|
377
|
//Adding date
|
378
|
String date = getStringValue(rootElement,jsonKey);
|
379
|
if (date == null)
|
380
|
return;
|
381
|
if (date.length() == 4) {
|
382
|
date += "-01-01";
|
383
|
}
|
384
|
if (isValidDate(date)) {
|
385
|
if (addToDateOfAcceptance)
|
386
|
metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
|
387
|
metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
|
388
|
.setValue(date)
|
389
|
.setQualifier(getQualifier(dictionaryKey,"dnet:dataCite_date"))
|
390
|
.build());
|
391
|
}
|
392
|
}
|
393
|
|
394
|
|
395
|
public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
|
396
|
FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
|
397
|
if (StringUtils.contains(value, "orcid.org")){
|
398
|
return pid.setValue(value)
|
399
|
.setKey("ORCID").build();
|
400
|
}
|
401
|
if (StringUtils.contains(value, "academic.microsoft.com/#/detail")){
|
402
|
return pid.setValue(value)
|
403
|
.setKey("MAG Identifier").build();
|
404
|
}
|
405
|
return pid.setValue(value)
|
406
|
.setKey("URL").build();
|
407
|
}
|
408
|
|
409
|
|
410
|
public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
|
411
|
final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
|
412
|
final List<String> magId = new ArrayList<>();
|
413
|
getArrayObjects(affiliation, "identifiers").forEach(it -> {
|
414
|
if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
|
415
|
affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get("MAG"));
|
416
|
magId.add(it.get("value").getAsString());
|
417
|
}
|
418
|
else
|
419
|
affiliationIdentifiers.put( it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
|
420
|
});
|
421
|
if (magId.size() > 0) {
|
422
|
final String microsoftID = magId.get(0);
|
423
|
OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
424
|
oaf.setKind(KindProtos.Kind.entity);
|
425
|
OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
|
426
|
entity.setType(TypeProtos.Type.organization);
|
427
|
entity.setId("20|microsoft___::"+AbstractDNetXsltFunctions.md5(microsoftID));
|
428
|
final String id =datasources.get(affiliation.get("provenance").getAsString()).getValue();
|
429
|
final String name =datasources.get(affiliation.get("provenance").getAsString()).getKey();
|
430
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
431
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
432
|
.setValue(name)
|
433
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
|
434
|
.build();
|
435
|
entity.addCollectedfrom(collectedFrom);
|
436
|
} else {
|
437
|
return null;
|
438
|
}
|
439
|
entity.addOriginalId(microsoftID);
|
440
|
|
441
|
affiliationIdentifiers.forEach((key, value) -> entity.addPid(
|
442
|
FieldTypeProtos.StructuredProperty.newBuilder()
|
443
|
.setQualifier(value)
|
444
|
.setValue(key)
|
445
|
.build()));
|
446
|
|
447
|
final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
|
448
|
organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
|
449
|
.setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
|
450
|
.setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
|
451
|
.build());
|
452
|
|
453
|
entity.setOrganization(organization);
|
454
|
oaf.setEntity(entity);
|
455
|
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
456
|
.setInvisible(false)
|
457
|
.setDeletedbyinference(false)
|
458
|
.setInferred(false)
|
459
|
.setTrust("0.9")
|
460
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
461
|
.build());
|
462
|
return oaf.build();
|
463
|
}
|
464
|
return null;
|
465
|
}
|
466
|
|
467
|
public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> createAuthorsOrganization(final JsonObject root) {
|
468
|
|
469
|
final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();
|
470
|
|
471
|
List<JsonObject> authors = getArrayObjects(root, "authors");
|
472
|
|
473
|
final AtomicInteger counter = new AtomicInteger();
|
474
|
|
475
|
List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
|
476
|
final String given = getStringValue(author, "given");
|
477
|
final String family = getStringValue(author, "family");
|
478
|
String fullname = getStringValue(author, "fullname");
|
479
|
|
480
|
if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
|
481
|
fullname = String.format("%s %s", given, family);
|
482
|
}
|
483
|
|
484
|
if (StringUtils.isBlank(fullname)){
|
485
|
return null;
|
486
|
|
487
|
}
|
488
|
final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();
|
489
|
|
490
|
if (StringUtils.isNotBlank(given))
|
491
|
abuilder.setName(given);
|
492
|
if (StringUtils.isNotBlank(family))
|
493
|
abuilder.setSurname(family);
|
494
|
if (StringUtils.isNotBlank(fullname))
|
495
|
abuilder.setFullname(fullname);
|
496
|
|
497
|
final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
|
498
|
final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");
|
499
|
|
500
|
authorAffiliation.forEach(it ->
|
501
|
{
|
502
|
OafProtos.Oaf org = createOrganizationFromJSON(it);
|
503
|
if (org != null) {
|
504
|
affiliations.put(org.getEntity().getId(), org);
|
505
|
abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
|
506
|
}
|
507
|
});
|
508
|
identifiers.stream().map(id -> {
|
509
|
final String value = id.get("value").getAsString();
|
510
|
return extractIdentifier(value);
|
511
|
}).collect(
|
512
|
Collectors.toMap(
|
513
|
FieldTypeProtos.KeyValue::getKey,
|
514
|
Function.identity(),
|
515
|
(a,b) -> a
|
516
|
)).values().forEach(abuilder::addPid);
|
517
|
abuilder.setRank(counter.getAndIncrement());
|
518
|
|
519
|
return abuilder.build();
|
520
|
|
521
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
522
|
|
523
|
return new Pair<> ( collect,affiliations.values() );
|
524
|
}
|
525
|
|
526
|
|
527
|
|
528
|
|
529
|
|
530
|
|
531
|
}
|