Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import com.google.gson.JsonArray;
4
import com.google.gson.JsonElement;
5
import com.google.gson.JsonObject;
6
import eu.dnetlib.actionmanager.actions.ActionFactory;
7
import eu.dnetlib.actionmanager.actions.AtomicAction;
8
import eu.dnetlib.actionmanager.common.Agent;
9
import eu.dnetlib.data.mapreduce.util.StreamUtils;
10
import eu.dnetlib.data.proto.RelMetadataProtos;
11
import eu.dnetlib.data.proto.RelTypeProtos;
12
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
13
import org.apache.commons.lang3.StringUtils;
14

    
15
import java.util.ArrayList;
16
import java.util.List;
17
import java.util.Map;
18
import java.util.Objects;
19

    
20
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
21
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
22
import static eu.dnetlib.data.proto.KindProtos.Kind;
23
import static eu.dnetlib.data.proto.OafProtos.*;
24
import static eu.dnetlib.data.proto.ResultProtos.Result;
25
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
26
import static eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
27
import static eu.dnetlib.data.proto.TypeProtos.Type;
28

    
29
public class ScholixToActions {
30

    
31
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> conf,
32
                                                                final String setName, final Agent agent, ActionFactory factory, String nsPrefix, final String dsName,
33
                                                                final String dsId, String dateOfCollection) {
34

    
35
        final List<AtomicAction> actions = new ArrayList<>();
36

    
37
        final String typology = getStringValue(rootElement, "typology");
38
        final List<String> publisher = getArrayValues(rootElement, "publisher");
39
        final String abstractValue = getStringValue(rootElement, "abstract");
40
        final List<String> authors = getArrayValues(rootElement, "author");
41
        final List<String> dates = getArrayValues(rootElement, "date");
42

    
43
        final JsonArray localIdentifiers = rootElement.getAsJsonArray("localIdentifier");
44
        final String dnetId = getStringValue(rootElement, "id").substring(17);
45

    
46
        String title = "";
47
        if (rootElement.has("title") && rootElement.get("title").isJsonArray()) {
48
            StringBuilder ttl = new StringBuilder();
49
            getArrayValues(rootElement, "title").forEach(ttl::append);
50
            title = ttl.toString();
51
        } else {
52
            title = getStringValue(rootElement, "title");
53
        }
54
        if ("\"".equals(title) || title == null || StringUtils.isEmpty(title))
55
            return actions;
56
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"' ) {
57
            title = title.substring(1, title.length() - 1);
58
        }
59

    
60
        final Oaf.Builder oafBuilder = Oaf.newBuilder();
61

    
62
        final boolean isVisible = StringUtils.isNotBlank(title) && StreamUtils.toStream(localIdentifiers.iterator())
63
                .map(JsonElement::getAsJsonObject)
64
                .anyMatch(o -> {
65
                    final String type = getStringValue(o, "type");
66
                    return StringUtils.isNotBlank(type) && conf.containsKey(type) && conf.get(type).isVisible();
67
                });
68
        oafBuilder.setDataInfo(
69
                DataInfo.newBuilder()
70
                        .setInvisible(!isVisible)
71
                        .setDeletedbyinference(false)
72
                        .setInferred(false)
73
                        .setTrust("0.9")
74
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
75
                        .build());
76
        oafBuilder.setKind(Kind.entity);
77
        final String sourceId = String.format("50|%s::%s", nsPrefix, dnetId);
78
        final KeyValue collectedFrom = KeyValue.newBuilder()
79
                .setValue(dsName)
80
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
81
                .build();
82
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder()
83
                .setType(Type.result)
84
                .setDateofcollection(dateOfCollection)
85
                .addCollectedfrom(collectedFrom)
86
                .setId(sourceId);
87

    
88
        StreamUtils.toStream(localIdentifiers.iterator())
89
                .map(JsonElement::getAsJsonObject)
90
                .map(localIdentifier -> getPid(localIdentifier, conf))
91
                .filter(Objects::nonNull)
92
                .forEach(oafEntityBuilder::addPid);
93

    
94
        final Result.Builder result = Result.newBuilder();
95

    
96
        final Metadata.Builder metadata = Metadata.newBuilder()
97
            .setResulttype(getQualifier(typology, "dnet:result_typologies"))
98
            .setLanguage(Qualifier.newBuilder()
99
                .setClassid("und")
100
                .setClassname("Undetermined")
101
                .setSchemeid("dent:languages")
102
                .setSchemename("dent:languages")
103
                .build());
104
        if (StringUtils.isNotBlank(title)) {
105
            metadata.addTitle(StructuredProperty.newBuilder()
106
                    .setValue(title)
107
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
108
                    .build());
109
        }
110
        if (publisher.size() > 0)
111
            metadata.setPublisher(StringField.newBuilder().setValue(publisher.get(0)).build());
112
        if (StringUtils.isNotEmpty(abstractValue)) {
113
            metadata.addDescription(StringField.newBuilder().setValue(abstractValue).build());
114
        }
115

    
116
        dates.stream()
117
                .map(d -> {
118
                    if (d.length() == 4 && StringUtils.isNumeric(d)) {
119
                        return d + "-01-01";
120
                    } else {
121
                        return d;
122
                    }
123
                }).filter(d -> isValidDate(d))
124
                .forEach(d -> metadata.addRelevantdate(StructuredProperty.newBuilder()
125
                        .setValue(d)
126
                        .setQualifier(getQualifier("dnet:date", "dnet:date"))
127
                        .build()));
128

    
129
        if (rootElement.has("subject") && !rootElement.get("subject").isJsonNull()) {
130
            JsonArray subject = rootElement.getAsJsonArray("subject");
131
            subject.forEach(it -> {
132
                    final JsonObject item = it.getAsJsonObject();
133
                    final String scheme = getStringValue(item, "scheme");
134
                    metadata.addSubject(StructuredProperty.newBuilder()
135
                            .setValue(Objects.requireNonNull(getStringValue(item, "value")))
136
                            .setQualifier(getQualifier(scheme, "dnet:subject"))
137
                            .build());
138
                }
139
            );
140
        }
141
        int i = 1;
142
        for (String it : authors) {
143
            metadata.addAuthor(Author.newBuilder()
144
                    .setFullname(it)
145
                    .setRank(i++)
146
                    .build());
147
        }
148
        result.setMetadata(metadata.build());
149

    
150
        localIdentifiers.forEach(it -> {
151

    
152
            final JsonObject localIdentifier = it.getAsJsonObject();
153
            final String pidType = getStringValue(localIdentifier, "type");
154
            final ScholExplorerConfiguration currentConf = conf.get(pidType);
155
            if (currentConf.getGeneratedUrl() != null) {
156
                final Instance.Builder instance = Instance.newBuilder();
157
                final String pidValue = getStringValue(localIdentifier, "id");
158
                instance.addUrl(String.format(currentConf.getGeneratedUrl(), pidValue));
159
                instance.setAccessright(Qualifier.newBuilder()
160
                        .setClassid("UNKNOWN")
161
                        .setClassname("not available")
162
                        .setSchemeid("dnet:access_modes")
163
                        .setSchemename("dnet:access_modes")
164
                        .build());
165

    
166
                instance.setInstancetype(Qualifier.newBuilder()
167
                        .setClassid("0000")
168
                        .setClassname("Unknown")
169
                        .setSchemeid("dnet:publication_resource")
170
                        .setSchemename("dnet:publication_resource")
171
                        .build());
172
                instance.setHostedby(KeyValue.newBuilder()
173
                        .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
174
                        .setValue("Unknown Repository")
175
                        .build());
176

    
177
                instance.setCollectedfrom(collectedFrom);
178
                result.addInstance(instance);
179
            }
180
        });
181

    
182
        generateExternalReference(extractRelations(rootElement, "externalRels"))
183
                .forEach(result::addExternalReference);
184
        oafEntityBuilder.setResult(result.build());
185
        oafBuilder.setEntity(oafEntityBuilder.build());
186

    
187
        //System.out.println(JsonFormat.printToString(oafBuilder.build()));
188

    
189
        actions.add(factory.createAtomicAction(setName, agent, oafEntityBuilder.getId(), "result", "body", oafBuilder.build().toByteArray()));
190

    
191
        final List<JsonObject> doiRels = extractRelations(rootElement, "doiRels");
192
        doiRels.stream().map(it -> convertDoiRelations(it, factory, sourceId, nsPrefix, collectedFrom, setName, agent)).forEach(actions::addAll);
193
        return actions;
194
    }
195

    
196

    
197
    private static AtomicAction createResultResultRelation(final String source, final String target,
198
                                                           final KeyValue collectedFrom, final ResultResult resultResultRel, final String relClass, final String cfRelation, final ActionFactory factory, final String setName, final Agent agent) {
199
        final Oaf.Builder oaf = Oaf.newBuilder();
200
        oaf.setDataInfo(
201
                DataInfo.newBuilder()
202
                        .setDeletedbyinference(false)
203
                        .setInferred(false)
204
                        .setTrust("0.9")
205
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
206
                        .build());
207
        oaf.setKind(Kind.relation);
208
        final OafRel.Builder relation = OafRel.newBuilder();
209
        relation.setSource(source);
210
        relation.setTarget(target);
211
        relation.setRelType(RelTypeProtos.RelType.resultResult);
212

    
213
        if (StringUtils.contains(relClass.toLowerCase(), "supplement")) {
214
            relation.setSubRelType(RelTypeProtos.SubRelType.supplement);
215
        } else {
216
            relation.setSubRelType(RelTypeProtos.SubRelType.publicationDataset);
217
        }
218

    
219
        relation.setChild(false);
220
        relation.setResultResult(resultResultRel);
221
        relation.setRelClass(relClass);
222
        relation.addCollectedfrom(collectedFrom);
223
        oaf.setRel(relation.build());
224

    
225
        //System.out.println(JsonFormat.printToString(oaf.build()));
226
        return factory.createAtomicAction(setName, agent, source, cfRelation, target, oaf.build().toByteArray());
227
    }
228

    
229

    
230
    private static List<AtomicAction> convertDoiRelations(final JsonObject doiRel, final ActionFactory factory, final String sourceId, final String nsPrefix, final KeyValue collectedFrom, final String setName, final Agent agent) {
231
        final String target = Objects.requireNonNull(getStringValue(doiRel, "dnetId")).substring(17);
232
        final String targetId = String.format("50|%s::%s", nsPrefix, target);
233
        final String relationSemantic = getStringValue(doiRel, "relationSemantic");
234
        String cfRelation;
235
        String cfInverseRelation;
236
        ResultResult.Builder resultRel = ResultResult.newBuilder();
237
        ResultResult.Builder resultInverseRel = ResultResult.newBuilder();
238
        String relClass;
239
        String inverseRelClass;
240

    
241
        switch (relationSemantic) {
242
            case "isSupplementedBy": {
243
                cfRelation = "resultResult_supplement_isSupplementedBy";
244
                cfInverseRelation = "resultResult_supplement_isSupplementTo";
245

    
246
                relClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
247
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
248
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
249
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
250
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
251
                                .build())
252
                        .build());
253
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
254
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
255
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
256
                                .build())
257
                        .build());
258
                break;
259
            }
260
            case "isSupplementTo": {
261
                cfRelation = "resultResult_supplement_isSupplementTo";
262
                cfInverseRelation = "resultResult_supplement_isSupplementedBy";
263
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
264
                relClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
265
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
266
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
267
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
268
                                .build())
269
                        .build());
270
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
271
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
272
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
273
                                .build())
274
                        .build());
275
                break;
276
            }
277
            default: {
278
                cfRelation = "resultResult_publicationDataset_isRelatedTo";
279
                cfInverseRelation = "resultResult_publicationDataset_isRelatedTo";
280
                relClass = ResultResult.PublicationDataset.RelName.isRelatedTo.toString();
281
                inverseRelClass = relClass;
282
                resultInverseRel.setPublicationDataset(ResultResult.PublicationDataset.newBuilder()
283
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
284
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
285
                                .build())
286
                        .build());
287
                resultRel = resultInverseRel;
288
            }
289
        }
290

    
291
        final List<AtomicAction> actions = new ArrayList<>();
292
        actions.add(createResultResultRelation(sourceId, targetId, collectedFrom, resultRel.build(), relClass, cfRelation, factory, setName, agent));
293
        actions.add(createResultResultRelation(targetId, sourceId, collectedFrom, resultInverseRel.build(), inverseRelClass, cfInverseRelation, factory, setName, agent));
294

    
295
        return actions;
296
    }
297

    
298
    private static List<ExternalReference> generateExternalReference(final List<JsonObject> jsonRels) {
299
        final List<ExternalReference> result = new ArrayList<>();
300

    
301
        jsonRels.forEach(it -> {
302
            ExternalReference.Builder builder = ExternalReference.newBuilder();
303
            if("url".equals(getStringValue(it.getAsJsonObject("id"), "schema"))) {
304
                builder.setUrl(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")));
305
            }
306
            result.add(builder
307
                    .setRefidentifier(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")))
308
                    .setSitename(Objects.requireNonNull(getStringValue(it, "collectedFrom")))
309
                    .setQualifier(Qualifier.newBuilder()
310
                            .setClassid(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
311
                            .setClassname(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
312
                            .setSchemename("dnet:externalReference_typologies")
313
                            .setSchemeid("dnet:externalReference_typologies")
314
                            .build())
315
                    .build());
316
        });
317
        return result;
318
    }
319

    
320
    private static List<JsonObject> extractRelations(final JsonObject rootElement, final String fieldType) {
321
        final List<JsonObject> result = new ArrayList<>();
322
        if (rootElement.has(fieldType) && rootElement.get(fieldType).isJsonArray()) {
323
            final JsonArray asJsonArray = rootElement.getAsJsonArray(fieldType);
324
            asJsonArray.forEach(it -> result.add(it.getAsJsonObject()));
325
        }
326
        return result;
327
    }
328

    
329
}
(17-17/18)