Project

General

Profile

1 52912 sandro.lab
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2
3 52931 claudio.at
import java.util.ArrayList;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.Objects;
7
8 52912 sandro.lab
import com.google.gson.JsonArray;
9
import com.google.gson.JsonObject;
10
import eu.dnetlib.actionmanager.actions.ActionFactory;
11
import eu.dnetlib.actionmanager.actions.AtomicAction;
12
import eu.dnetlib.actionmanager.common.Agent;
13
import eu.dnetlib.data.proto.RelMetadataProtos;
14
import eu.dnetlib.data.proto.RelTypeProtos;
15
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
16
import org.apache.commons.lang3.StringUtils;
17
18
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
19
import static eu.dnetlib.data.proto.KindProtos.Kind;
20
import static eu.dnetlib.data.proto.OafProtos.*;
21
import static eu.dnetlib.data.proto.ResultProtos.Result;
22
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
23
import static eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
24
import static eu.dnetlib.data.proto.TypeProtos.Type;
25
26
public class ScholixToActions {
27
28 52931 claudio.at
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> conf,
29 52912 sandro.lab
                                                                final String setName, final Agent agent, ActionFactory factory, String nsPrefix, final String dsName,
30
                                                                final String dsId, String dateOfCollection) {
31
32
        final List<AtomicAction> actions = new ArrayList<>();
33
34
        final String typology = getStringValue(rootElement, "typology");
35
        final List<String> publisher = getArrayValues(rootElement, "publisher");
36
        final String abstractValue = getStringValue(rootElement, "abstract");
37
        final List<String> authors = getArrayValues(rootElement, "author");
38
        final List<String> dates = getArrayValues(rootElement, "date");
39
        final JsonObject localIdentifier = rootElement.getAsJsonArray("localIdentifier").get(0).getAsJsonObject();
40
        final String dnetId = getStringValue(rootElement, "id").substring(17);
41
42 52931 claudio.at
        String title = "";
43 52912 sandro.lab
        if (rootElement.has("title") && rootElement.get("title").isJsonArray()) {
44
            StringBuilder ttl = new StringBuilder();
45
            getArrayValues(rootElement, "title").forEach(ttl::append);
46
            title = ttl.toString();
47
        } else {
48
            title = getStringValue(rootElement, "title");
49
        }
50
51 52931 claudio.at
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"') {
52 52912 sandro.lab
            title = title.substring(1, title.length() - 1);
53 52931 claudio.at
        }
54 52912 sandro.lab
55
        final Oaf.Builder oafBuilder = Oaf.newBuilder();
56 52931 claudio.at
        final boolean isVisible = StringUtils.isNotBlank(title) && conf.get(getStringValue(localIdentifier, "type")).isVisible();
57 52912 sandro.lab
        oafBuilder.setDataInfo(
58
                DataInfo.newBuilder()
59
                        .setInvisible(!isVisible)
60
                        .setDeletedbyinference(false)
61
                        .setInferred(false)
62
                        .setTrust("0.9")
63
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
64
                        .build());
65
        oafBuilder.setKind(Kind.entity);
66
        final String sourceId = String.format("50|%s::%s", nsPrefix, dnetId);
67 52931 claudio.at
        final KeyValue collectedFrom = KeyValue.newBuilder()
68
                .setValue(dsName)
69
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
70
                .build();
71
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder()
72
                .setType(Type.result)
73
                .setDateofcollection(dateOfCollection)
74
                .addCollectedfrom(collectedFrom)
75
                .setId(sourceId);
76
77
        final StructuredProperty pid = getPid(localIdentifier, conf);
78
        if (pid != null) {
79 52912 sandro.lab
            oafEntityBuilder.addPid(pid);
80 52931 claudio.at
        }
81 52912 sandro.lab
        final Result.Builder result = Result.newBuilder();
82 52931 claudio.at
83
        final Metadata.Builder metadata = Metadata.newBuilder()
84
            .setResulttype(getQualifier(typology, "dnet:result_typologies"))
85
            .setLanguage(Qualifier.newBuilder()
86 52912 sandro.lab
                .setClassid("und")
87
                .setClassname("Undetermined")
88
                .setSchemeid("dent:languages")
89
                .setSchemename("dent:languages")
90
                .build());
91 52931 claudio.at
        if (title != null) {
92
            metadata.addTitle(StructuredProperty.newBuilder()
93
                    .setValue(title)
94
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
95
                    .build());
96
        }
97 52912 sandro.lab
        if (publisher.size() > 0)
98
            metadata.setPublisher(StringField.newBuilder().setValue(publisher.get(0)).build());
99
        if (StringUtils.isNotEmpty(abstractValue)) {
100
            metadata.addDescription(StringField.newBuilder().setValue(abstractValue).build());
101
        }
102
        dates.stream().map(it -> {
103
           if (it.length() == 4) {
104
               return it+"-01-01";
105
           }
106
           else if (it.length() > 10) {
107
               return it.substring(0,10);
108
           }
109
           else
110
               return it;
111
        }).forEach(it -> metadata.addRelevantdate(StructuredProperty.newBuilder()
112
                .setValue(it)
113
                .setQualifier(getQualifier("dnet:date", "dnet:date"))
114
                .build()));
115
116
        if (rootElement.has("subject")) {
117
            JsonArray subject = rootElement.getAsJsonArray("subject");
118
            subject.forEach(it ->
119
                    {
120
                        final JsonObject item = it.getAsJsonObject();
121
                        final String scheme = getStringValue(item, "scheme");
122
                        metadata.addSubject(StructuredProperty.newBuilder()
123
                                .setValue(Objects.requireNonNull(getStringValue(item, "value")))
124
                                .setQualifier(getQualifier(scheme, "dnet:subject"))
125
                                .build());
126
                    }
127
            );
128
        }
129
        int i = 1;
130
        for (String it : authors) {
131
            metadata.addAuthor(Author.newBuilder()
132
                    .setFullname(it)
133
                    .setRank(i++)
134
                    .build());
135
        }
136
        result.setMetadata(metadata.build());
137
138
        final String pidType = getStringValue(localIdentifier, "type");
139 52931 claudio.at
        final ScholExplorerConfiguration currentConf = conf.get(pidType);
140
        if (currentConf.getGeneratedUrl() != null) {
141 52912 sandro.lab
            final Instance.Builder instance = Instance.newBuilder();
142
            final String pidValue = getStringValue(localIdentifier, "id");
143 52931 claudio.at
            instance.addUrl(String.format(currentConf.getGeneratedUrl(), pidValue));
144 52912 sandro.lab
            instance.setAccessright(Qualifier.newBuilder()
145
                    .setClassid("UNKNOWN")
146
                    .setClassname("not available")
147
                    .setSchemeid("dnet:access_modes")
148
                    .setSchemename("dnet:access_modes")
149
                    .build());
150
151
            instance.setInstancetype(Qualifier.newBuilder()
152
                    .setClassid("0000")
153
                    .setClassname("Unknown")
154
                    .setSchemeid("dnet:publication_resource")
155
                    .setSchemename("dnet:publication_resource")
156
                    .build());
157
            instance.setHostedby(KeyValue.newBuilder()
158
                    .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
159
                    .setValue("Unknown Repository")
160
                    .build());
161
162
            instance.setCollectedfrom(collectedFrom);
163
            result.addInstance(instance);
164
        }
165
        generateExternalReference(extractRelations(rootElement, "externalRels"))
166
                .forEach(result::addExternalReference);
167
        oafEntityBuilder.setResult(result.build());
168
        oafBuilder.setEntity(oafEntityBuilder.build());
169
170
//        System.out.println(JsonFormat.printToString(oafBuilder.build()));
171
172
        actions.add(factory.createAtomicAction(setName, agent, oafEntityBuilder.getId(), "result", "body", oafBuilder.build().toByteArray()));
173
174
        final List<JsonObject> doiRels = extractRelations(rootElement, "doiRels");
175
        doiRels.stream().map(it -> convertDoiRelations(it, factory, sourceId, nsPrefix, collectedFrom, setName, agent)).forEach(actions::addAll);
176
        return actions;
177
    }
178
179
180
    private static AtomicAction createResultResultRelation(final String source, final String target,
181
                                                           final KeyValue collectedFrom, final ResultResult resultResultRel, final String relClass, final String cfRelation, final ActionFactory factory, final String setName, final Agent agent) {
182
        final Oaf.Builder oaf = Oaf.newBuilder();
183
        oaf.setDataInfo(
184
                DataInfo.newBuilder()
185
                        .setDeletedbyinference(false)
186
                        .setInferred(false)
187
                        .setTrust("0.9")
188
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
189
                        .build());
190
        oaf.setKind(Kind.relation);
191
        final OafRel.Builder relation = OafRel.newBuilder();
192
        relation.setSource(source);
193
        relation.setTarget(target);
194
        relation.setRelType(RelTypeProtos.RelType.resultResult);
195
        relation.setSubRelType(RelTypeProtos.SubRelType.publicationDataset);
196
        relation.setChild(false);
197
        relation.setResultResult(resultResultRel);
198
        relation.setRelClass(relClass);
199
        relation.addCollectedfrom(collectedFrom);
200
        oaf.setRel(relation.build());
201
202
//        System.out.println(JsonFormat.printToString(oaf.build()));
203
        return factory.createAtomicAction(setName, agent, source, cfRelation, target, oaf.build().toByteArray());
204
    }
205
206
207
    private static List<AtomicAction> convertDoiRelations(final JsonObject doiRel, final ActionFactory factory, final String sourceId, final String nsPrefix, final KeyValue collectedFrom, final String setName, final Agent agent) {
208
        final String target = Objects.requireNonNull(getStringValue(doiRel, "dnetId")).substring(17);
209
        final String targetId = String.format("50|%s::%s", nsPrefix, target);
210
        final String relationSemantic = getStringValue(doiRel, "relationSemantic");
211
        String cfRelation;
212
        String cfInverseRelation;
213
        ResultResult.Builder resultRel = ResultResult.newBuilder();
214
        ResultResult.Builder resultInverseRel = ResultResult.newBuilder();
215
        String relClass;
216
        String inverseRelClass;
217 52931 claudio.at
218 52912 sandro.lab
        switch (relationSemantic) {
219
            case "isSupplementedBy": {
220
                cfRelation = "resultResult_supplement_isSupplementedBy";
221
                cfInverseRelation = "resultResult_supplement_isSupplementTo";
222
223
                relClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
224
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
225
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
226
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
227
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
228
                                .build())
229
                        .build());
230
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
231
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
232
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
233
                                .build())
234
                        .build());
235
                break;
236
            }
237
            case "isSupplementTo": {
238
                cfRelation = "resultResult_supplement_isSupplementTo";
239
                cfInverseRelation = "resultResult_supplement_isSupplementedBy";
240
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
241
                relClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
242
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
243
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
244
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
245
                                .build())
246
                        .build());
247
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
248
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
249
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
250
                                .build())
251
                        .build());
252
                break;
253
            }
254
            default: {
255
                cfRelation = "resultResult_publicationDataset_isRelatedTo";
256
                cfInverseRelation = "resultResult_publicationDataset_isRelatedTo";
257
                relClass = ResultResult.PublicationDataset.RelName.isRelatedTo.toString();
258
                inverseRelClass = relClass;
259
                resultInverseRel.setPublicationDataset(ResultResult.PublicationDataset.newBuilder()
260
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
261
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
262
                                .build())
263
                        .build());
264
                resultRel = resultInverseRel;
265
            }
266
        }
267
268 52931 claudio.at
        final List<AtomicAction> actions = new ArrayList<>();
269 52912 sandro.lab
        actions.add(createResultResultRelation(sourceId, targetId, collectedFrom, resultRel.build(), relClass, cfRelation, factory, setName, agent));
270
        actions.add(createResultResultRelation(targetId, sourceId, collectedFrom, resultInverseRel.build(), inverseRelClass, cfInverseRelation, factory, setName, agent));
271
272
        return actions;
273
    }
274
275
    private static List<ExternalReference> generateExternalReference(final List<JsonObject> jsonRels) {
276
        final List<ExternalReference> result = new ArrayList<>();
277
278
        jsonRels.forEach(it -> {
279
            ExternalReference.Builder builder = ExternalReference.newBuilder();
280
            if("url".equals(getStringValue(it.getAsJsonObject("id"), "schema"))) {
281
                builder.setUrl(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")));
282
            }
283
            result.add(builder
284
                    .setRefidentifier(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")))
285
                    .setSitename(Objects.requireNonNull(getStringValue(it, "collectedFrom")))
286
                    .setQualifier(Qualifier.newBuilder()
287
                            .setClassid(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
288
                            .setClassname(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
289
                            .setSchemename("dnet:externalReference_typologies")
290
                            .setSchemeid("dnet:externalReference_typologies")
291
                            .build())
292
                    .build());
293
        });
294
        return result;
295
    }
296
297
    private static List<JsonObject> extractRelations(final JsonObject rootElement, final String fieldType) {
298
        final List<JsonObject> result = new ArrayList<>();
299
        if (rootElement.has(fieldType) && rootElement.get(fieldType).isJsonArray()) {
300
            final JsonArray asJsonArray = rootElement.getAsJsonArray(fieldType);
301
            asJsonArray.forEach(it -> result.add(it.getAsJsonObject()));
302
        }
303
        return result;
304
    }
305
306
    private static Qualifier getQualifier(final String classValue, final String schemeValue) {
307
308
        return Qualifier.newBuilder()
309
                .setSchemeid(schemeValue)
310
                .setSchemename(schemeValue)
311
                .setClassname(classValue)
312
                .setClassid(classValue)
313
                .build();
314
    }
315
316
    private static StructuredProperty getPid(final JsonObject localIdentifier, final Map<String, ScholExplorerConfiguration> configurationMap) {
317
        final String pidType = getStringValue(localIdentifier, "type");
318
        final ScholExplorerConfiguration configuration = configurationMap.get(pidType);
319
        if (configuration.getCleandPidType() == null)
320
            return null;
321
        final String pid = getStringValue(localIdentifier, "id");
322
        return StructuredProperty.newBuilder()
323
                .setValue(pid)
324
                .setQualifier(getQualifier(configuration.getCleandPidType(), "dnet:pid_types"))
325
                .build();
326
    }
327
328
    private static String getStringValue(final JsonObject root, final String key) {
329
        if (root.has(key))
330
            return root.get(key).getAsString();
331
        return null;
332
    }
333
334
    private static List<String> getArrayValues(final JsonObject root, final String key) {
335
        if (root.has(key) && root.get(key).isJsonArray()) {
336
            final JsonArray asJsonArray = root.get(key).getAsJsonArray();
337
            final List<String> result = new ArrayList<>();
338
339
340
            asJsonArray.forEach(it -> {
341
                if (StringUtils.isNotBlank(it.getAsString())) {
342
                    result.add(it.getAsString());
343
                }
344
            });
345
            return result;
346
        }
347
        return new ArrayList<>();
348
    }
349
350
}