Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import com.google.gson.JsonArray;
4
import com.google.gson.JsonObject;
5
import com.googlecode.protobuf.format.JsonFormat;
6
import eu.dnetlib.actionmanager.actions.ActionFactory;
7
import eu.dnetlib.actionmanager.actions.AtomicAction;
8
import eu.dnetlib.actionmanager.common.Agent;
9
import eu.dnetlib.data.proto.RelMetadataProtos;
10
import eu.dnetlib.data.proto.RelTypeProtos;
11
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
12
import org.apache.commons.lang3.StringUtils;
13

    
14
import java.util.ArrayList;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Objects;
18

    
19
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
20
import static eu.dnetlib.data.proto.KindProtos.Kind;
21
import static eu.dnetlib.data.proto.OafProtos.*;
22
import static eu.dnetlib.data.proto.ResultProtos.Result;
23
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
24
import static eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
25
import static eu.dnetlib.data.proto.TypeProtos.Type;
26

    
27
public class ScholixToActions {
28

    
29
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> configurationMap,
30
                                                                final String setName, final Agent agent, ActionFactory factory, String nsPrefix, final String dsName,
31
                                                                final String dsId, String dateOfCollection) {
32

    
33
        final List<AtomicAction> actions = new ArrayList<>();
34

    
35
        final String typology = getStringValue(rootElement, "typology");
36
        final List<String> publisher = getArrayValues(rootElement, "publisher");
37
        final String abstractValue = getStringValue(rootElement, "abstract");
38
        final List<String> authors = getArrayValues(rootElement, "author");
39
        final List<String> dates = getArrayValues(rootElement, "date");
40
        final JsonObject localIdentifier = rootElement.getAsJsonArray("localIdentifier").get(0).getAsJsonObject();
41
        final String dnetId = getStringValue(rootElement, "id").substring(17);
42

    
43

    
44
        String title;
45
        if (rootElement.has("title") && rootElement.get("title").isJsonArray()) {
46
            StringBuilder ttl = new StringBuilder();
47
            getArrayValues(rootElement, "title").forEach(ttl::append);
48
            title = ttl.toString();
49
        } else {
50
            title = getStringValue(rootElement, "title");
51
        }
52

    
53
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"')
54
            title = title.substring(1, title.length() - 1);
55

    
56

    
57
        final Oaf.Builder oafBuilder = Oaf.newBuilder();
58
        final boolean isVisible = title != null && configurationMap.get(getStringValue(localIdentifier, "type")).isVisible();
59
        oafBuilder.setDataInfo(
60
                DataInfo.newBuilder()
61
                        .setInvisible(!isVisible)
62
                        .setDeletedbyinference(false)
63
                        .setInferred(false)
64
                        .setTrust("0.9")
65
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
66
                        .build());
67
        oafBuilder.setKind(Kind.entity);
68
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder();
69
        oafEntityBuilder.setType(Type.result);
70
        final String sourceId = String.format("50|%s::%s", nsPrefix, dnetId);
71
        oafEntityBuilder.setId(sourceId);
72
        final StructuredProperty pid = getPid(localIdentifier, configurationMap);
73
        if (pid != null)
74
            oafEntityBuilder.addPid(pid);
75
        final Result.Builder result = Result.newBuilder();
76
        final Metadata.Builder metadata = Metadata.newBuilder();
77
        metadata.setResulttype(getQualifier(typology, "dnet:result_typologies"));
78
        metadata.setLanguage(Qualifier.newBuilder()
79
                .setClassid("und")
80
                .setClassname("Undetermined")
81
                .setSchemeid("dent:languages")
82
                .setSchemename("dent:languages")
83
                .build());
84
        if (title!= null)
85
        metadata.addTitle(StructuredProperty.newBuilder()
86
                .setValue(title)
87
                .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
88
                .build());
89
        if (publisher.size() > 0)
90
            metadata.setPublisher(StringField.newBuilder().setValue(publisher.get(0)).build());
91
        if (StringUtils.isNotEmpty(abstractValue)) {
92
            metadata.addDescription(StringField.newBuilder().setValue(abstractValue).build());
93
        }
94
        dates.stream().map(it -> {
95
           if (it.length() == 4) {
96
               return it+"-01-01";
97
           }
98
           else if (it.length() > 10) {
99
               return it.substring(0,10);
100
           }
101
           else
102
               return it;
103
        }).forEach(it -> metadata.addRelevantdate(StructuredProperty.newBuilder()
104
                .setValue(it)
105
                .setQualifier(getQualifier("dnet:date", "dnet:date"))
106
                .build()));
107

    
108
        if (rootElement.has("subject")) {
109
            JsonArray subject = rootElement.getAsJsonArray("subject");
110
            subject.forEach(it ->
111
                    {
112
                        final JsonObject item = it.getAsJsonObject();
113
                        final String scheme = getStringValue(item, "scheme");
114
                        metadata.addSubject(StructuredProperty.newBuilder()
115
                                .setValue(Objects.requireNonNull(getStringValue(item, "value")))
116
                                .setQualifier(getQualifier(scheme, "dnet:subject"))
117
                                .build());
118
                    }
119
            );
120
        }
121
        int i = 1;
122
        for (String it : authors) {
123
            metadata.addAuthor(Author.newBuilder()
124
                    .setFullname(it)
125
                    .setRank(i++)
126
                    .build());
127
        }
128
        result.setMetadata(metadata.build());
129

    
130
        KeyValue collectedFrom = KeyValue.newBuilder()
131
                .setValue(dsName)
132
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
133
                .build();
134
        oafEntityBuilder.addCollectedfrom(collectedFrom);
135

    
136
        oafEntityBuilder.setDateofcollection(dateOfCollection);
137

    
138
        final String pidType = getStringValue(localIdentifier, "type");
139
        final ScholExplorerConfiguration currentConfiguration = configurationMap.get(pidType);
140
        if (currentConfiguration.getGeneratedUrl() != null) {
141
            final Instance.Builder instance = Instance.newBuilder();
142
            final String pidValue = getStringValue(localIdentifier, "id");
143
            instance.addUrl(String.format(currentConfiguration.getGeneratedUrl(), pidValue));
144
            instance.setAccessright(Qualifier.newBuilder()
145
                    .setClassid("UNKNOWN")
146
                    .setClassname("not available")
147
                    .setSchemeid("dnet:access_modes")
148
                    .setSchemename("dnet:access_modes")
149
                    .build());
150

    
151
            instance.setInstancetype(Qualifier.newBuilder()
152
                    .setClassid("0000")
153
                    .setClassname("Unknown")
154
                    .setSchemeid("dnet:publication_resource")
155
                    .setSchemename("dnet:publication_resource")
156
                    .build());
157
            instance.setHostedby(KeyValue.newBuilder()
158
                    .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
159
                    .setValue("Unknown Repository")
160
                    .build());
161

    
162
            instance.setCollectedfrom(collectedFrom);
163
            result.addInstance(instance);
164
        }
165
        generateExternalReference(extractRelations(rootElement, "externalRels"))
166
                .forEach(result::addExternalReference);
167
        oafEntityBuilder.setResult(result.build());
168
        oafBuilder.setEntity(oafEntityBuilder.build());
169

    
170
//        System.out.println(JsonFormat.printToString(oafBuilder.build()));
171

    
172
        actions.add(factory.createAtomicAction(setName, agent, oafEntityBuilder.getId(), "result", "body", oafBuilder.build().toByteArray()));
173

    
174
        final List<JsonObject> doiRels = extractRelations(rootElement, "doiRels");
175
        doiRels.stream().map(it -> convertDoiRelations(it, factory, sourceId, nsPrefix, collectedFrom, setName, agent)).forEach(actions::addAll);
176
        return actions;
177
    }
178

    
179

    
180
    private static AtomicAction createResultResultRelation(final String source, final String target,
181
                                                           final KeyValue collectedFrom, final ResultResult resultResultRel, final String relClass, final String cfRelation, final ActionFactory factory, final String setName, final Agent agent) {
182
        final Oaf.Builder oaf = Oaf.newBuilder();
183
        oaf.setDataInfo(
184
                DataInfo.newBuilder()
185
                        .setDeletedbyinference(false)
186
                        .setInferred(false)
187
                        .setTrust("0.9")
188
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
189
                        .build());
190
        oaf.setKind(Kind.relation);
191
        final OafRel.Builder relation = OafRel.newBuilder();
192
        relation.setSource(source);
193
        relation.setTarget(target);
194
        relation.setRelType(RelTypeProtos.RelType.resultResult);
195
        relation.setSubRelType(RelTypeProtos.SubRelType.publicationDataset);
196
        relation.setChild(false);
197
        relation.setResultResult(resultResultRel);
198
        relation.setRelClass(relClass);
199
        relation.addCollectedfrom(collectedFrom);
200
        oaf.setRel(relation.build());
201

    
202
//        System.out.println(JsonFormat.printToString(oaf.build()));
203
        return factory.createAtomicAction(setName, agent, source, cfRelation, target, oaf.build().toByteArray());
204
    }
205

    
206

    
207
    private static List<AtomicAction> convertDoiRelations(final JsonObject doiRel, final ActionFactory factory, final String sourceId, final String nsPrefix, final KeyValue collectedFrom, final String setName, final Agent agent) {
208
        final String target = Objects.requireNonNull(getStringValue(doiRel, "dnetId")).substring(17);
209
        final String targetId = String.format("50|%s::%s", nsPrefix, target);
210
        final String relationSemantic = getStringValue(doiRel, "relationSemantic");
211
        String cfRelation;
212
        String cfInverseRelation;
213
        ResultResult.Builder resultRel = ResultResult.newBuilder();
214
        ResultResult.Builder resultInverseRel = ResultResult.newBuilder();
215
        String relClass;
216
        String inverseRelClass;
217
        switch (relationSemantic) {
218
            case "isSupplementedBy": {
219
                cfRelation = "resultResult_supplement_isSupplementedBy";
220
                cfInverseRelation = "resultResult_supplement_isSupplementTo";
221

    
222
                relClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
223
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
224
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
225
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
226
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
227
                                .build())
228
                        .build());
229
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
230
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
231
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
232
                                .build())
233
                        .build());
234
                break;
235
            }
236
            case "isSupplementTo": {
237
                cfRelation = "resultResult_supplement_isSupplementTo";
238
                cfInverseRelation = "resultResult_supplement_isSupplementedBy";
239
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
240
                relClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
241
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
242
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
243
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
244
                                .build())
245
                        .build());
246
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
247
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
248
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
249
                                .build())
250
                        .build());
251
                break;
252
            }
253
            default: {
254
                cfRelation = "resultResult_publicationDataset_isRelatedTo";
255
                cfInverseRelation = "resultResult_publicationDataset_isRelatedTo";
256
                relClass = ResultResult.PublicationDataset.RelName.isRelatedTo.toString();
257
                inverseRelClass = relClass;
258
                resultInverseRel.setPublicationDataset(ResultResult.PublicationDataset.newBuilder()
259
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
260
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
261
                                .build())
262
                        .build());
263
                resultRel = resultInverseRel;
264
            }
265
        }
266

    
267
        List<AtomicAction> actions = new ArrayList<>();
268
        actions.add(createResultResultRelation(sourceId, targetId, collectedFrom, resultRel.build(), relClass, cfRelation, factory, setName, agent));
269
        actions.add(createResultResultRelation(targetId, sourceId, collectedFrom, resultInverseRel.build(), inverseRelClass, cfInverseRelation, factory, setName, agent));
270

    
271
        return actions;
272
    }
273

    
274
    private static List<ExternalReference> generateExternalReference(final List<JsonObject> jsonRels) {
275
        final List<ExternalReference> result = new ArrayList<>();
276

    
277
        jsonRels.forEach(it -> {
278
            ExternalReference.Builder builder = ExternalReference.newBuilder();
279
            if("url".equals(getStringValue(it.getAsJsonObject("id"), "schema"))) {
280
                builder.setUrl(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")));
281
            }
282
            result.add(builder
283
                    .setRefidentifier(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")))
284
                    .setSitename(Objects.requireNonNull(getStringValue(it, "collectedFrom")))
285
                    .setQualifier(Qualifier.newBuilder()
286
                            .setClassid(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
287
                            .setClassname(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
288
                            .setSchemename("dnet:externalReference_typologies")
289
                            .setSchemeid("dnet:externalReference_typologies")
290
                            .build())
291
                    .build());
292
        });
293
        return result;
294
    }
295

    
296

    
297
    private static List<JsonObject> extractRelations(final JsonObject rootElement, final String fieldType) {
298
        final List<JsonObject> result = new ArrayList<>();
299
        if (rootElement.has(fieldType) && rootElement.get(fieldType).isJsonArray()) {
300
            final JsonArray asJsonArray = rootElement.getAsJsonArray(fieldType);
301
            asJsonArray.forEach(it -> result.add(it.getAsJsonObject()));
302
        }
303
        return result;
304
    }
305

    
306

    
307
    private static Qualifier getQualifier(final String classValue, final String schemeValue) {
308

    
309
        return Qualifier.newBuilder()
310
                .setSchemeid(schemeValue)
311
                .setSchemename(schemeValue)
312
                .setClassname(classValue)
313
                .setClassid(classValue)
314
                .build();
315
    }
316

    
317
    private static StructuredProperty getPid(final JsonObject localIdentifier, final Map<String, ScholExplorerConfiguration> configurationMap) {
318
        final String pidType = getStringValue(localIdentifier, "type");
319
        final ScholExplorerConfiguration configuration = configurationMap.get(pidType);
320
        if (configuration.getCleandPidType() == null)
321
            return null;
322
        final String pid = getStringValue(localIdentifier, "id");
323
        return StructuredProperty.newBuilder()
324
                .setValue(pid)
325
                .setQualifier(getQualifier(configuration.getCleandPidType(), "dnet:pid_types"))
326
                .build();
327
    }
328

    
329
    private static String getStringValue(final JsonObject root, final String key) {
330
        if (root.has(key))
331
            return root.get(key).getAsString();
332
        return null;
333
    }
334

    
335
    private static List<String> getArrayValues(final JsonObject root, final String key) {
336
        if (root.has(key) && root.get(key).isJsonArray()) {
337
            final JsonArray asJsonArray = root.get(key).getAsJsonArray();
338
            final List<String> result = new ArrayList<>();
339

    
340

    
341
            asJsonArray.forEach(it -> {
342
                if (StringUtils.isNotBlank(it.getAsString())) {
343
                    result.add(it.getAsString());
344
                }
345
            });
346
            return result;
347
        }
348
        return new ArrayList<>();
349
    }
350

    
351
}
(7-7/8)