Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import com.google.gson.JsonArray;
4
import com.google.gson.JsonElement;
5
import com.google.gson.JsonObject;
6
import eu.dnetlib.actionmanager.actions.ActionFactory;
7
import eu.dnetlib.actionmanager.actions.AtomicAction;
8
import eu.dnetlib.actionmanager.common.Agent;
9
import eu.dnetlib.data.mapreduce.util.StreamUtils;
10
import eu.dnetlib.data.proto.RelMetadataProtos;
11
import eu.dnetlib.data.proto.RelTypeProtos;
12
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
13
import org.apache.commons.lang3.StringUtils;
14

    
15
import java.util.ArrayList;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Objects;
19

    
20
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
21
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
22
import static eu.dnetlib.data.proto.KindProtos.Kind;
23
import static eu.dnetlib.data.proto.OafProtos.*;
24
import static eu.dnetlib.data.proto.ResultProtos.Result;
25
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
26
import static eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
27
import static eu.dnetlib.data.proto.TypeProtos.Type;
28

    
29
public class ScholixToActions {
30

    
31
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> conf,
32
                                                                final String setName, final Agent agent, ActionFactory factory, String nsPrefix, final String dsName,
33
                                                                final String dsId, String dateOfCollection) {
34

    
35
        final List<AtomicAction> actions = new ArrayList<>();
36

    
37
        final String typology = getStringValue(rootElement, "typology");
38
        final List<String> publisher = getArrayValues(rootElement, "publisher");
39
        final String abstractValue = getStringValue(rootElement, "abstract");
40
        final List<String> authors = getArrayValues(rootElement, "author");
41
        final List<String> dates = getArrayValues(rootElement, "date");
42

    
43
        final JsonArray localIdentifiers = rootElement.getAsJsonArray("localIdentifier");
44
        final String dnetId = getStringValue(rootElement, "id").substring(17);
45

    
46
        String title = "";
47
        if (rootElement.has("title") && rootElement.get("title").isJsonArray()) {
48
            StringBuilder ttl = new StringBuilder();
49
            getArrayValues(rootElement, "title").forEach(ttl::append);
50
            title = ttl.toString();
51
        } else {
52
            title = getStringValue(rootElement, "title");
53
        }
54

    
55
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"') {
56
            title = title.substring(1, title.length() - 1);
57
        }
58

    
59
        final Oaf.Builder oafBuilder = Oaf.newBuilder();
60

    
61
        final boolean isVisible = StringUtils.isNotBlank(title) && StreamUtils.toStream(localIdentifiers.iterator())
62
                .map(JsonElement::getAsJsonObject)
63
                .anyMatch(o -> {
64
                    final String type = getStringValue(o, "type");
65
                    return StringUtils.isNotBlank(type) && conf.containsKey(type) && conf.get(type).isVisible();
66
                });
67
        oafBuilder.setDataInfo(
68
                DataInfo.newBuilder()
69
                        .setInvisible(!isVisible)
70
                        .setDeletedbyinference(false)
71
                        .setInferred(false)
72
                        .setTrust("0.9")
73
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
74
                        .build());
75
        oafBuilder.setKind(Kind.entity);
76
        final String sourceId = String.format("50|%s::%s", nsPrefix, dnetId);
77
        final KeyValue collectedFrom = KeyValue.newBuilder()
78
                .setValue(dsName)
79
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
80
                .build();
81
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder()
82
                .setType(Type.result)
83
                .setDateofcollection(dateOfCollection)
84
                .addCollectedfrom(collectedFrom)
85
                .setId(sourceId);
86

    
87
        StreamUtils.toStream(localIdentifiers.iterator())
88
                .map(JsonElement::getAsJsonObject)
89
                .map(localIdentifier -> getPid(localIdentifier, conf))
90
                .filter(Objects::nonNull)
91
                .forEach(oafEntityBuilder::addPid);
92

    
93
        final Result.Builder result = Result.newBuilder();
94

    
95
        final Metadata.Builder metadata = Metadata.newBuilder()
96
            .setResulttype(getQualifier(typology, "dnet:result_typologies"))
97
            .setLanguage(Qualifier.newBuilder()
98
                .setClassid("und")
99
                .setClassname("Undetermined")
100
                .setSchemeid("dent:languages")
101
                .setSchemename("dent:languages")
102
                .build());
103
        if (StringUtils.isNotBlank(title)) {
104
            metadata.addTitle(StructuredProperty.newBuilder()
105
                    .setValue(title)
106
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
107
                    .build());
108
        }
109
        if (publisher.size() > 0)
110
            metadata.setPublisher(StringField.newBuilder().setValue(publisher.get(0)).build());
111
        if (StringUtils.isNotEmpty(abstractValue)) {
112
            metadata.addDescription(StringField.newBuilder().setValue(abstractValue).build());
113
        }
114

    
115
        dates.stream()
116
                .map(d -> {
117
                    if (d.length() == 4 && StringUtils.isNumeric(d)) {
118
                        return d + "-01-01";
119
                    } else {
120
                        return d;
121
                    }
122
                }).filter(d -> isValidDate(d))
123
                .forEach(d -> metadata.addRelevantdate(StructuredProperty.newBuilder()
124
                        .setValue(d)
125
                        .setQualifier(getQualifier("dnet:date", "dnet:date"))
126
                        .build()));
127

    
128
        if (rootElement.has("subject")) {
129
            JsonArray subject = rootElement.getAsJsonArray("subject");
130
            subject.forEach(it -> {
131
                    final JsonObject item = it.getAsJsonObject();
132
                    final String scheme = getStringValue(item, "scheme");
133
                    metadata.addSubject(StructuredProperty.newBuilder()
134
                            .setValue(Objects.requireNonNull(getStringValue(item, "value")))
135
                            .setQualifier(getQualifier(scheme, "dnet:subject"))
136
                            .build());
137
                }
138
            );
139
        }
140
        int i = 1;
141
        for (String it : authors) {
142
            metadata.addAuthor(Author.newBuilder()
143
                    .setFullname(it)
144
                    .setRank(i++)
145
                    .build());
146
        }
147
        result.setMetadata(metadata.build());
148

    
149
        localIdentifiers.forEach(it -> {
150

    
151
            final JsonObject localIdentifier = it.getAsJsonObject();
152
            final String pidType = getStringValue(localIdentifier, "type");
153
            final ScholExplorerConfiguration currentConf = conf.get(pidType);
154
            if (currentConf.getGeneratedUrl() != null) {
155
                final Instance.Builder instance = Instance.newBuilder();
156
                final String pidValue = getStringValue(localIdentifier, "id");
157
                instance.addUrl(String.format(currentConf.getGeneratedUrl(), pidValue));
158
                instance.setAccessright(Qualifier.newBuilder()
159
                        .setClassid("UNKNOWN")
160
                        .setClassname("not available")
161
                        .setSchemeid("dnet:access_modes")
162
                        .setSchemename("dnet:access_modes")
163
                        .build());
164

    
165
                instance.setInstancetype(Qualifier.newBuilder()
166
                        .setClassid("0000")
167
                        .setClassname("Unknown")
168
                        .setSchemeid("dnet:publication_resource")
169
                        .setSchemename("dnet:publication_resource")
170
                        .build());
171
                instance.setHostedby(KeyValue.newBuilder()
172
                        .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
173
                        .setValue("Unknown Repository")
174
                        .build());
175

    
176
                instance.setCollectedfrom(collectedFrom);
177
                result.addInstance(instance);
178
            }
179
        });
180

    
181
        generateExternalReference(extractRelations(rootElement, "externalRels"))
182
                .forEach(result::addExternalReference);
183
        oafEntityBuilder.setResult(result.build());
184
        oafBuilder.setEntity(oafEntityBuilder.build());
185

    
186
        //System.out.println(JsonFormat.printToString(oafBuilder.build()));
187

    
188
        actions.add(factory.createAtomicAction(setName, agent, oafEntityBuilder.getId(), "result", "body", oafBuilder.build().toByteArray()));
189

    
190
        final List<JsonObject> doiRels = extractRelations(rootElement, "doiRels");
191
        doiRels.stream().map(it -> convertDoiRelations(it, factory, sourceId, nsPrefix, collectedFrom, setName, agent)).forEach(actions::addAll);
192
        return actions;
193
    }
194

    
195

    
196
    private static AtomicAction createResultResultRelation(final String source, final String target,
197
                                                           final KeyValue collectedFrom, final ResultResult resultResultRel, final String relClass, final String cfRelation, final ActionFactory factory, final String setName, final Agent agent) {
198
        final Oaf.Builder oaf = Oaf.newBuilder();
199
        oaf.setDataInfo(
200
                DataInfo.newBuilder()
201
                        .setDeletedbyinference(false)
202
                        .setInferred(false)
203
                        .setTrust("0.9")
204
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
205
                        .build());
206
        oaf.setKind(Kind.relation);
207
        final OafRel.Builder relation = OafRel.newBuilder();
208
        relation.setSource(source);
209
        relation.setTarget(target);
210
        relation.setRelType(RelTypeProtos.RelType.resultResult);
211

    
212
        if (StringUtils.contains(relClass.toLowerCase(), "supplement")) {
213
            relation.setSubRelType(RelTypeProtos.SubRelType.supplement);
214
        } else {
215
            relation.setSubRelType(RelTypeProtos.SubRelType.publicationDataset);
216
        }
217

    
218
        relation.setChild(false);
219
        relation.setResultResult(resultResultRel);
220
        relation.setRelClass(relClass);
221
        relation.addCollectedfrom(collectedFrom);
222
        oaf.setRel(relation.build());
223

    
224
        //System.out.println(JsonFormat.printToString(oaf.build()));
225
        return factory.createAtomicAction(setName, agent, source, cfRelation, target, oaf.build().toByteArray());
226
    }
227

    
228

    
229
    private static List<AtomicAction> convertDoiRelations(final JsonObject doiRel, final ActionFactory factory, final String sourceId, final String nsPrefix, final KeyValue collectedFrom, final String setName, final Agent agent) {
230
        final String target = Objects.requireNonNull(getStringValue(doiRel, "dnetId")).substring(17);
231
        final String targetId = String.format("50|%s::%s", nsPrefix, target);
232
        final String relationSemantic = getStringValue(doiRel, "relationSemantic");
233
        String cfRelation;
234
        String cfInverseRelation;
235
        ResultResult.Builder resultRel = ResultResult.newBuilder();
236
        ResultResult.Builder resultInverseRel = ResultResult.newBuilder();
237
        String relClass;
238
        String inverseRelClass;
239

    
240
        switch (relationSemantic) {
241
            case "isSupplementedBy": {
242
                cfRelation = "resultResult_supplement_isSupplementedBy";
243
                cfInverseRelation = "resultResult_supplement_isSupplementTo";
244

    
245
                relClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
246
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
247
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
248
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
249
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
250
                                .build())
251
                        .build());
252
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
253
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
254
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
255
                                .build())
256
                        .build());
257
                break;
258
            }
259
            case "isSupplementTo": {
260
                cfRelation = "resultResult_supplement_isSupplementTo";
261
                cfInverseRelation = "resultResult_supplement_isSupplementedBy";
262
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
263
                relClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
264
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
265
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
266
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
267
                                .build())
268
                        .build());
269
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
270
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
271
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
272
                                .build())
273
                        .build());
274
                break;
275
            }
276
            default: {
277
                cfRelation = "resultResult_publicationDataset_isRelatedTo";
278
                cfInverseRelation = "resultResult_publicationDataset_isRelatedTo";
279
                relClass = ResultResult.PublicationDataset.RelName.isRelatedTo.toString();
280
                inverseRelClass = relClass;
281
                resultInverseRel.setPublicationDataset(ResultResult.PublicationDataset.newBuilder()
282
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
283
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
284
                                .build())
285
                        .build());
286
                resultRel = resultInverseRel;
287
            }
288
        }
289

    
290
        final List<AtomicAction> actions = new ArrayList<>();
291
        actions.add(createResultResultRelation(sourceId, targetId, collectedFrom, resultRel.build(), relClass, cfRelation, factory, setName, agent));
292
        actions.add(createResultResultRelation(targetId, sourceId, collectedFrom, resultInverseRel.build(), inverseRelClass, cfInverseRelation, factory, setName, agent));
293

    
294
        return actions;
295
    }
296

    
297
    private static List<ExternalReference> generateExternalReference(final List<JsonObject> jsonRels) {
298
        final List<ExternalReference> result = new ArrayList<>();
299

    
300
        jsonRels.forEach(it -> {
301
            ExternalReference.Builder builder = ExternalReference.newBuilder();
302
            if("url".equals(getStringValue(it.getAsJsonObject("id"), "schema"))) {
303
                builder.setUrl(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")));
304
            }
305
            result.add(builder
306
                    .setRefidentifier(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")))
307
                    .setSitename(Objects.requireNonNull(getStringValue(it, "collectedFrom")))
308
                    .setQualifier(Qualifier.newBuilder()
309
                            .setClassid(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
310
                            .setClassname(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
311
                            .setSchemename("dnet:externalReference_typologies")
312
                            .setSchemeid("dnet:externalReference_typologies")
313
                            .build())
314
                    .build());
315
        });
316
        return result;
317
    }
318

    
319
    private static List<JsonObject> extractRelations(final JsonObject rootElement, final String fieldType) {
320
        final List<JsonObject> result = new ArrayList<>();
321
        if (rootElement.has(fieldType) && rootElement.get(fieldType).isJsonArray()) {
322
            final JsonArray asJsonArray = rootElement.getAsJsonArray(fieldType);
323
            asJsonArray.forEach(it -> result.add(it.getAsJsonObject()));
324
        }
325
        return result;
326
    }
327

    
328
}
(10-10/11)