Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import com.google.gson.Gson;
4
import com.google.gson.JsonElement;
5
import com.google.gson.JsonObject;
6
import com.googlecode.protobuf.format.JsonFormat;
7
import eu.dnetlib.actionmanager.actions.ActionFactory;
8
import eu.dnetlib.actionmanager.actions.AtomicAction;
9
import eu.dnetlib.actionmanager.common.Agent;
10
import eu.dnetlib.data.mapreduce.util.StreamUtils;
11
import eu.dnetlib.data.proto.*;
12
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
13
import eu.dnetlib.miscutils.collections.Pair;
14
import org.apache.commons.codec.binary.Base64;
15
import org.apache.commons.io.IOUtils;
16
import org.apache.commons.lang3.StringUtils;
17

    
18
import java.io.ByteArrayOutputStream;
19
import java.io.IOException;
20
import java.io.InputStream;
21
import java.util.*;
22
import java.util.concurrent.atomic.AtomicInteger;
23
import java.util.function.Function;
24
import java.util.stream.Collectors;
25
import java.util.zip.Inflater;
26

    
27
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
28
import static eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
29

    
30
public class DOIBoostToActions {
31

    
32
    public static final String MAG = "MAG";
33
    public static final String ORCID = "ORCID";
34
    public static final String CROSSREF = "Crossref";
35
    public static final String UNPAYWALL = "UnpayWall";
36

    
37
    public static final String GRID_AC = "grid.ac";
38
    public static final String WIKPEDIA = "wikpedia";
39

    
40
    public final static String doiBoostNSPREFIX ="doiboost____";
41
    public static final String OPENAIRE_PREFIX = "openaire____";
42

    
43
    public static final String SEPARATOR = "::";
44

    
45
    private static Map<String, Pair<String, String>> datasources =  new HashMap<String, Pair<String, String>>() {{
46
        put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
47
        put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
48
        put(CROSSREF.toLowerCase(), new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
49
        put(UNPAYWALL.toLowerCase(), new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));
50

    
51
    }};
52

    
53
    private static String decompressAbstract(final String abstractCompressed)  {
54
        try {
55
            byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
56
            final Inflater decompresser = new Inflater();
57
            decompresser.setInput(byteArray);
58
            final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
59
            byte[] buffer = new byte[8192];
60
            while (!decompresser.finished()) {
61
                int size = decompresser.inflate(buffer);
62
                bos.write(buffer, 0, size);
63
            }
64
            byte[] unzippeddata = bos.toByteArray();
65
            decompresser.end();
66
            return new String(unzippeddata);
67
        } catch (Throwable e) {
68
            System.out.println("Wrong abstract:"+ abstractCompressed);
69
            throw  new RuntimeException(e);
70
        }
71
    }
72

    
73
    public static final String PID_TYPES = "dnet:pid_types";
74
    private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType =  new HashMap<String, FieldTypeProtos.Qualifier>() {{
75
        put(MAG, FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id" ).setClassname("Microsoft Academic Graph Identifier").setSchemename(PID_TYPES).setSchemeid(PID_TYPES).build());
76
        put(GRID_AC, getQualifier("grid", PID_TYPES));
77
        put(WIKPEDIA, getQualifier("urn", PID_TYPES));
78
    }};
79

    
80
    static Map<String, Map<String, String>> typologiesMapping;
81

    
82
    static {
83
        try {
84
            final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
85
            final String tt =IOUtils.toString(is);
86
            typologiesMapping = new Gson().fromJson(tt, Map.class);
87
        } catch (IOException e) {
88
            e.printStackTrace();
89
        }
90
    }
91

    
92

    
93

    
94

    
95
    public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement, final ActionFactory factory, final String setName, final Agent agent, boolean invisible,
96
                                                                        final boolean onlyOrganization) {
97

    
98
        //Create OAF Proto
99

    
100
        final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
101
        //Add Data Info
102
        oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
103
                .setInvisible(invisible)
104
                .setDeletedbyinference(false)
105
                .setInferred(false)
106
                .setTrust("0.9")
107
                .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
108
                .build());
109

    
110
        //Adding Kind
111
        oaf.setKind(KindProtos.Kind.entity);
112

    
113
        //creating Result Proto
114
        final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
115

    
116
        entity.setDateofcollection("2019-02-15");
117

    
118

    
119

    
120
        if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
121
            StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
122
                    .map(JsonElement::getAsString)
123
                    .forEach(cf -> {
124
                                final String id = datasources.get(cf.toLowerCase()).getValue();
125
                                final String name = datasources.get(cf.toLowerCase()).getKey();
126
                                if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
127
                                    final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
128
                                            .setValue(name)
129
                                            .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
130
                                            .build();
131
                                    entity.addCollectedfrom(collectedFrom);
132
                                }
133
                            }
134
                    );
135
        }
136
        //Adding identifier
137
        final String doi = getStringValue(rootElement, "doi");
138
        if (doi == null)
139
            return null;
140
        entity.addOriginalId(doi);
141
        final String sourceId = String.format("50|%s" + SEPARATOR + "%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
142
        entity.setId(sourceId);
143

    
144
        entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
145
                .setValue(doi)
146
                .setQualifier(getQualifier("doi", PID_TYPES))
147
                .build());
148

    
149

    
150
        //Create Result Field
151
        ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();
152

    
153
        final String type = getStringValue(rootElement,"type");
154

    
155
        if (!typologiesMapping.containsKey(type))
156
            return null;
157

    
158
        //Adding Instances
159
        final String typeValue = typologiesMapping.get(type).get("value");
160
        final String cobjValue = typologiesMapping.get(type).get("cobj");
161

    
162

    
163
        getArrayObjects(rootElement, "instances").stream().map(it ->
164
        {
165
            ResultProtos.Result.Instance.Builder instance= ResultProtos.Result.Instance.newBuilder();
166
            instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
167
                    .setClassid(cobjValue)
168
                    .setClassname(typeValue)
169
                    .setSchemeid("dnet:publication_resource")
170
                    .setSchemename("dnet:publication_resource")
171
                    .build());
172
            instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
173
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
174
                    .setValue("Unknown Repository")
175
                    .build());
176

    
177
            final String acc_class_id =it.get("access-rights").getAsString();
178
            String acc_class_value;
179
            switch (acc_class_id){
180
                case "OPEN": {
181
                    acc_class_value = "open access";
182
                    break;
183
                }
184
                case "CLOSED": {
185
                    acc_class_value = "closed access";
186
                    break;
187
                }
188

    
189
                default: {
190
                    acc_class_value = "not available";
191
                }
192

    
193
            }
194

    
195
            instance.addUrl(it.get("url").getAsString());
196
            instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
197
                    .setClassid(acc_class_id)
198
                    .setClassname(acc_class_value)
199
                    .setSchemeid("dnet:access_modes")
200
                    .setSchemename("dnet:access_modes")
201
                    .build());
202

    
203
            final String id =datasources.get(it.get("provenance").getAsString().toLowerCase()).getValue();
204
            final String name =datasources.get(it.get("provenance").getAsString().toLowerCase()).getKey();
205
            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
206
                final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
207
                        .setValue(name)
208
                        .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
209
                        .build();
210

    
211
                instance.setCollectedfrom(collectedFrom);
212
            }
213

    
214
            return  instance.build();
215
        }).forEach(result::addInstance);
216

    
217
        //Adding DOI URL as  Instance
218
        final String doiURL = getStringValue(rootElement, "doi-url");
219
        JsonObject hostedByOpenAire = null;
220
        if (rootElement.has("hostedByOpenAire")) {
221
            hostedByOpenAire = rootElement.getAsJsonObject("hostedByOpenAire");
222
        }
223

    
224
        if (StringUtils.isNotBlank(doiURL)) {
225

    
226

    
227

    
228

    
229
            final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
230
            instance.addUrl(doiURL);
231
            instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
232
                    .setClassid(cobjValue)
233
                    .setClassname(typeValue)
234
                    .setSchemeid("dnet:publication_resource")
235
                    .setSchemename("dnet:publication_resource")
236
                    .build());
237
            instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
238
                    .setClassid("CLOSED")
239
                    .setClassname("Closed Access")
240
                    .setSchemeid("dnet:access_modes")
241
                    .setSchemename("dnet:access_modes")
242
                    .build());
243
            instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
244
                    .setValue(CROSSREF)
245
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5("crossref"))
246
                    .build());
247

    
248
            if (hostedByOpenAire == null)
249
            instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
250
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
251
                    .setValue("Unknown Repository")
252
                    .build());
253
            else{
254
                instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
255
                        .setKey(AbstractDNetXsltFunctions.oafSplitId("datasource",hostedByOpenAire.get("id").getAsString()))
256
                        .setValue(hostedByOpenAire.get("name").getAsString())
257
                        .build());
258
            }
259

    
260
            result.addInstance(instance);
261
        }
262

    
263
        //Create Metadata Proto
264
        final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
265

    
266

    
267
        Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);
268

    
269
        if (authorsOrganizations.getKey().size() > 0) {
270
            metadata.addAllAuthor(authorsOrganizations.getKey());
271
        }
272
        else {
273
            return null;
274
        }
275
        //adding Language
276
        metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
277
                .setClassid("und")
278
                .setClassname("Undetermined")
279
                .setSchemeid("dent:languages")
280
                .setSchemename("dent:languages")
281
                .build());
282

    
283
        //Adding subjects
284
        List<String> subjects =getArrayValues(rootElement, "subject");
285

    
286
        subjects.forEach(s-> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
287
                .setValue(s)
288
                .setQualifier(getQualifier("keyword", "dnet:subject"))
289
                .build()));
290

    
291
        List<String>titles =getArrayValues(rootElement, "title");
292
        titles.forEach(t->
293
                metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
294
                        .setValue(t)
295
                        .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
296
                        .build()));
297
        settingRelevantDate(rootElement, metadata, "issued", "issued", true);
298
        settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
299
        settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
300
        settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
301

    
302

    
303
        getArrayObjects(rootElement, "abstract").forEach(d ->
304
                {
305
                    if (MAG.equals(d.get("provenance").getAsString()))
306
                        metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(decompressAbstract(d.get("value").getAsString())).build());
307
                    else
308
                        metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build());
309
                }
310
                );
311

    
312

    
313

    
314
        //Adding Journal
315
        final String publisher = getStringValue(rootElement,"publisher");
316
        if (StringUtils.isNotBlank(publisher)){
317

    
318
            final ResultProtos.Result.Journal.Builder journal = ResultProtos.Result.Journal.newBuilder().setName(publisher);
319

    
320
            if (hasJSONArrayField(rootElement,"issn" )){
321
                StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
322
                        .map(JsonElement::getAsJsonObject)
323
                        .forEach(it -> {
324
                            final String issntype = getStringValue(it, "type");
325
                            final String value = getStringValue(it, "value");
326
                            if("electronic".equals(issntype)){
327
                                journal.setIssnOnline(value);
328
                            }
329
                            if ("print".equals(issntype))
330
                                journal.setIssnPrinted(value);
331
                        });
332
            }
333
            metadata.setJournal(journal.build());
334
        }
335
        metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
336
        result.setMetadata(metadata.build());
337
        entity.setResult(result.build());
338
        oaf.setEntity(entity.build());
339

    
340
        //System.out.println(JsonFormat.printToString(oaf.build()));
341

    
342
        final List<AtomicAction> actionList = new ArrayList<>();
343

    
344
        if (!onlyOrganization)
345
            actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
346

    
347
        if (!authorsOrganizations.getValue().isEmpty()) {
348

    
349
            authorsOrganizations.getValue().forEach(o ->
350
            {
351

    
352
                actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
353
                if (!onlyOrganization)
354
                    actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
355
                final String gridOrganization = getSimilarGridOrganization(o.getEntity());
356
                if (gridOrganization!= null) {
357
                    actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization, "".getBytes()));
358
                    actionList.add(factory.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(), "".getBytes()));
359
                }
360
            });
361
        }
362
        return actionList;
363

    
364
    }
365

    
366

    
367
    private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {
368

    
369
        final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
370
        if (pidList!= null ) {
371
            for (FieldTypeProtos.StructuredProperty p: pidList) {
372
                if (p.getQualifier().getClassname().equals("grid")){
373
                    return "20|grid________" + SEPARATOR +AbstractDNetXsltFunctions.md5(p.getValue());
374
                }
375
            }
376
        }
377
        return null;
378

    
379
    }
380

    
381
    private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication, final OafProtos.Oaf organization, final ActionFactory factory, final String setName, final Agent agent) {
382

    
383
        List<AtomicAction> result = new ArrayList<>();
384

    
385
        final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
386
        roaf.setKind(KindProtos.Kind.relation);
387

    
388
        roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
389
                .setInvisible(false)
390
                .setDeletedbyinference(false)
391
                .setInferred(false)
392
                .setTrust("0.9")
393
                .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
394
                .build());
395

    
396

    
397
        final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();
398

    
399
        rel.setRelType(RelTypeProtos.RelType.resultOrganization);
400
        rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);
401

    
402
        //Create a relation Result --> Organization
403
        rel.setSource(publication.getEntity().getId());
404
        rel.setTarget(organization.getEntity().getId());
405
        rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());
406

    
407
        final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();
408

    
409
        final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
410
        affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
411
                .setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
412
                .build());
413
        rel_instance.setAffiliation(affiliationRel.build());
414
        rel.setResultOrganization(rel_instance.build());
415

    
416
        rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
417
                .setValue(datasources.get(MAG.toLowerCase()).getKey())
418
                .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(datasources.get(MAG.toLowerCase()).getValue(), SEPARATOR)))
419
                .build());
420

    
421

    
422

    
423
        rel.setChild(false);
424
        roaf.setRel(rel.build());
425

    
426
        result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution", organization.getEntity().getId(), roaf.build().toByteArray() ));
427

    
428

    
429
        //Create a relation Organization --> Result
430
        rel.setTarget(publication.getEntity().getId());
431
        rel.setSource(organization.getEntity().getId());
432
        rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());
433

    
434

    
435
        affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
436
                .setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
437
                .build());
438
        rel_instance.setAffiliation(affiliationRel.build());
439
        rel.setResultOrganization(rel_instance.build());
440
        roaf.setRel(rel.build());
441
        result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf", publication.getEntity().getId(), roaf.build().toByteArray()));
442

    
443
        return result;
444

    
445
    }
446

    
447
    private static boolean hasJSONArrayField(final JsonObject root, final String key) {
448
        return root.has(key) && root.get(key).isJsonArray();
449
    }
450

    
451
    private static void settingRelevantDate(JsonObject rootElement, ResultProtos.Result.Metadata.Builder metadata , final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) {
452
        //Adding date
453
        String date = getStringValue(rootElement,jsonKey);
454
        if (date == null)
455
            return;
456
        if (date.length() == 4) {
457
            date += "-01-01";
458
        }
459
        if (isValidDate(date)) {
460
            if (addToDateOfAcceptance)
461
                metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
462
            metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
463
                    .setValue(date)
464
                    .setQualifier(getQualifier(dictionaryKey,"dnet:dataCite_date"))
465
                    .build());
466
        }
467
    }
468

    
469

    
470
    public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
471
        FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
472
        if (StringUtils.contains(value, "orcid.org")){
473
            return pid.setValue(value.replaceAll("https://orcid.org/",""))
474
                    .setKey(ORCID).build();
475
        }
476
        if (StringUtils.contains(value, "academic.microsoft.com/#/detail")){
477
            return pid.setValue(value.replaceAll("https://academic.microsoft.com/#/detail/",""))
478
                    .setKey("MAG Identifier").build();
479
        }
480
        return pid.setValue(value)
481
                .setKey("URL").build();
482
    }
483

    
484

    
485
    public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
486
        final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
487
        final List<String> magId = new ArrayList<>();
488
        getArrayObjects(affiliation, "identifiers").forEach(it -> {
489
            if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
490
                affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(MAG));
491
                magId.add(it.get("value").getAsString());
492
            }
493
            else
494
                affiliationIdentifiers.put( it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
495
        });
496
        if (magId.size() > 0) {
497
            final String microsoftID = magId.get(0);
498
            OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
499
            oaf.setKind(KindProtos.Kind.entity);
500
            OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
501
            entity.setType(TypeProtos.Type.organization);
502
            entity.setId("20|microsoft___" + SEPARATOR +AbstractDNetXsltFunctions.md5(microsoftID));
503
            final String id =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getValue();
504
            final String name =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getKey();
505
            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
506
                final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
507
                        .setValue(name)
508
                        .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
509
                        .build();
510
                entity.addCollectedfrom(collectedFrom);
511
            } else {
512
                return null;
513
            }
514
            entity.addOriginalId(microsoftID);
515

    
516
            affiliationIdentifiers.forEach((key, value) -> entity.addPid(
517
                    FieldTypeProtos.StructuredProperty.newBuilder()
518
                            .setQualifier(value)
519
                            .setValue(key)
520
                            .build()));
521

    
522
            final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
523
            organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
524
                    .setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
525
                    .setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
526
                    .build());
527

    
528
            entity.setOrganization(organization);
529
            oaf.setEntity(entity);
530
            oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
531
                    .setInvisible(false)
532
                    .setDeletedbyinference(false)
533
                    .setInferred(false)
534
                    .setTrust("0.9")
535
                    .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
536
                    .build());
537
            return oaf.build();
538
        }
539
        return  null;
540
    }
541

    
542
    public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>>  createAuthorsOrganization(final JsonObject root) {
543

    
544
        final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();
545

    
546
        List<JsonObject> authors = getArrayObjects(root, "authors");
547

    
548
        final AtomicInteger counter = new AtomicInteger(1);
549

    
550

    
551
        List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
552
            final String given = getStringValue(author, "given");
553
            final String family = getStringValue(author, "family");
554
            String fullname = getStringValue(author, "fullname");
555

    
556
            if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
557
                fullname = String.format("%s %s", given, family);
558
            }
559

    
560
            if (StringUtils.isBlank(fullname)){
561
                return null;
562

    
563
            }
564
            final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();
565

    
566
            if (StringUtils.isNotBlank(given))
567
                abuilder.setName(given);
568
            if (StringUtils.isNotBlank(family))
569
                abuilder.setSurname(family);
570
            if (StringUtils.isNotBlank(fullname))
571
                abuilder.setFullname(fullname);
572

    
573
            final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
574
            final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");
575

    
576
            authorAffiliation.forEach(it ->
577
            {
578
                OafProtos.Oaf org = createOrganizationFromJSON(it);
579
                if (org != null) {
580
                    affiliations.put(org.getEntity().getId(), org);
581
                    abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
582
                }
583
            });
584
            identifiers.stream().map(id -> {
585
                final String value = id.get("value").getAsString();
586
                return extractIdentifier(value);
587
            }).collect(
588
                    Collectors.toMap(
589
                            FieldTypeProtos.KeyValue::getKey,
590
                            Function.identity(),
591
                            (a,b) -> a
592
                    )).values().forEach(abuilder::addPid);
593
            abuilder.setRank(counter.getAndIncrement());
594

    
595
            return abuilder.build();
596

    
597
        }).filter(Objects::nonNull).collect(Collectors.toList());
598

    
599
        return new Pair<> ( collect,affiliations.values() );
600
    }
601

    
602

    
603

    
604

    
605

    
606

    
607
}
(4-4/16)