Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import java.util.ArrayList;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.Objects;
7

    
8
import com.google.gson.JsonArray;
9
import com.google.gson.JsonElement;
10
import com.google.gson.JsonObject;
11
import com.googlecode.protobuf.format.JsonFormat;
12
import eu.dnetlib.actionmanager.actions.ActionFactory;
13
import eu.dnetlib.actionmanager.actions.AtomicAction;
14
import eu.dnetlib.actionmanager.common.Agent;
15
import eu.dnetlib.data.mapreduce.util.StreamUtils;
16
import eu.dnetlib.data.proto.RelMetadataProtos;
17
import eu.dnetlib.data.proto.RelTypeProtos;
18
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
19
import org.apache.commons.lang3.StringUtils;
20

    
21
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
22
import static eu.dnetlib.data.proto.KindProtos.Kind;
23
import static eu.dnetlib.data.proto.OafProtos.*;
24
import static eu.dnetlib.data.proto.ResultProtos.Result;
25
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
26
import static eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
27
import static eu.dnetlib.data.proto.TypeProtos.Type;
28
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
29

    
30
public class ScholixToActions {
31

    
32
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> conf,
33
                                                                final String setName, final Agent agent, ActionFactory factory, String nsPrefix, final String dsName,
34
                                                                final String dsId, String dateOfCollection) {
35

    
36
        final List<AtomicAction> actions = new ArrayList<>();
37

    
38
        final String typology = getStringValue(rootElement, "typology");
39
        final List<String> publisher = getArrayValues(rootElement, "publisher");
40
        final String abstractValue = getStringValue(rootElement, "abstract");
41
        final List<String> authors = getArrayValues(rootElement, "author");
42
        final List<String> dates = getArrayValues(rootElement, "date");
43

    
44
        final JsonArray localIdentifiers = rootElement.getAsJsonArray("localIdentifier");
45
        final String dnetId = getStringValue(rootElement, "id").substring(17);
46

    
47
        String title = "";
48
        if (rootElement.has("title") && rootElement.get("title").isJsonArray()) {
49
            StringBuilder ttl = new StringBuilder();
50
            getArrayValues(rootElement, "title").forEach(ttl::append);
51
            title = ttl.toString();
52
        } else {
53
            title = getStringValue(rootElement, "title");
54
        }
55

    
56
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"') {
57
            title = title.substring(1, title.length() - 1);
58
        }
59

    
60
        final Oaf.Builder oafBuilder = Oaf.newBuilder();
61

    
62
        final boolean isVisible = StringUtils.isNotBlank(title) && StreamUtils.toStream(localIdentifiers.iterator())
63
                .map(JsonElement::getAsJsonObject)
64
                .anyMatch(o -> {
65
                    final String type = getStringValue(o, "type");
66
                    return StringUtils.isNotBlank(type) && conf.containsKey(type) && conf.get(type).isVisible();
67
                });
68
        oafBuilder.setDataInfo(
69
                DataInfo.newBuilder()
70
                        .setInvisible(!isVisible)
71
                        .setDeletedbyinference(false)
72
                        .setInferred(false)
73
                        .setTrust("0.9")
74
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
75
                        .build());
76
        oafBuilder.setKind(Kind.entity);
77
        final String sourceId = String.format("50|%s::%s", nsPrefix, dnetId);
78
        final KeyValue collectedFrom = KeyValue.newBuilder()
79
                .setValue(dsName)
80
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
81
                .build();
82
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder()
83
                .setType(Type.result)
84
                .setDateofcollection(dateOfCollection)
85
                .addCollectedfrom(collectedFrom)
86
                .setId(sourceId);
87

    
88
        StreamUtils.toStream(localIdentifiers.iterator())
89
                .map(JsonElement::getAsJsonObject)
90
                .map(localIdentifier -> getPid(localIdentifier, conf))
91
                .filter(Objects::nonNull)
92
                .forEach(oafEntityBuilder::addPid);
93

    
94
        final Result.Builder result = Result.newBuilder();
95

    
96
        final Metadata.Builder metadata = Metadata.newBuilder()
97
            .setResulttype(getQualifier(typology, "dnet:result_typologies"))
98
            .setLanguage(Qualifier.newBuilder()
99
                .setClassid("und")
100
                .setClassname("Undetermined")
101
                .setSchemeid("dent:languages")
102
                .setSchemename("dent:languages")
103
                .build());
104
        if (StringUtils.isNotBlank(title)) {
105
            metadata.addTitle(StructuredProperty.newBuilder()
106
                    .setValue(title)
107
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
108
                    .build());
109
        }
110
        if (publisher.size() > 0)
111
            metadata.setPublisher(StringField.newBuilder().setValue(publisher.get(0)).build());
112
        if (StringUtils.isNotEmpty(abstractValue)) {
113
            metadata.addDescription(StringField.newBuilder().setValue(abstractValue).build());
114
        }
115
        dates.stream().map(it -> {
116
           if (it.length() == 4) {
117
               return it+"-01-01";
118
           }
119
           else if (it.length() > 10) {
120
               return it.substring(0,10);
121
           }
122
           else
123
               return it;
124
        }).forEach(it -> metadata.addRelevantdate(StructuredProperty.newBuilder()
125
                .setValue(it)
126
                .setQualifier(getQualifier("dnet:date", "dnet:date"))
127
                .build()));
128

    
129
        if (rootElement.has("subject")) {
130
            JsonArray subject = rootElement.getAsJsonArray("subject");
131
            subject.forEach(it -> {
132
                    final JsonObject item = it.getAsJsonObject();
133
                    final String scheme = getStringValue(item, "scheme");
134
                    metadata.addSubject(StructuredProperty.newBuilder()
135
                            .setValue(Objects.requireNonNull(getStringValue(item, "value")))
136
                            .setQualifier(getQualifier(scheme, "dnet:subject"))
137
                            .build());
138
                }
139
            );
140
        }
141
        int i = 1;
142
        for (String it : authors) {
143
            metadata.addAuthor(Author.newBuilder()
144
                    .setFullname(it)
145
                    .setRank(i++)
146
                    .build());
147
        }
148
        result.setMetadata(metadata.build());
149

    
150
        localIdentifiers.forEach(it -> {
151

    
152
            final JsonObject localIdentifier = it.getAsJsonObject();
153
            final String pidType = getStringValue(localIdentifier, "type");
154
            final ScholExplorerConfiguration currentConf = conf.get(pidType);
155
            if (currentConf.getGeneratedUrl() != null) {
156
                final Instance.Builder instance = Instance.newBuilder();
157
                final String pidValue = getStringValue(localIdentifier, "id");
158
                instance.addUrl(String.format(currentConf.getGeneratedUrl(), pidValue));
159
                instance.setAccessright(Qualifier.newBuilder()
160
                        .setClassid("UNKNOWN")
161
                        .setClassname("not available")
162
                        .setSchemeid("dnet:access_modes")
163
                        .setSchemename("dnet:access_modes")
164
                        .build());
165

    
166
                instance.setInstancetype(Qualifier.newBuilder()
167
                        .setClassid("0000")
168
                        .setClassname("Unknown")
169
                        .setSchemeid("dnet:publication_resource")
170
                        .setSchemename("dnet:publication_resource")
171
                        .build());
172
                instance.setHostedby(KeyValue.newBuilder()
173
                        .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
174
                        .setValue("Unknown Repository")
175
                        .build());
176

    
177
                instance.setCollectedfrom(collectedFrom);
178
                result.addInstance(instance);
179
            }
180
        });
181

    
182
        generateExternalReference(extractRelations(rootElement, "externalRels"))
183
                .forEach(result::addExternalReference);
184
        oafEntityBuilder.setResult(result.build());
185
        oafBuilder.setEntity(oafEntityBuilder.build());
186

    
187
        System.out.println(JsonFormat.printToString(oafBuilder.build()));
188

    
189
        actions.add(factory.createAtomicAction(setName, agent, oafEntityBuilder.getId(), "result", "body", oafBuilder.build().toByteArray()));
190

    
191
        final List<JsonObject> doiRels = extractRelations(rootElement, "doiRels");
192
        doiRels.stream().map(it -> convertDoiRelations(it, factory, sourceId, nsPrefix, collectedFrom, setName, agent)).forEach(actions::addAll);
193
        return actions;
194
    }
195

    
196

    
197
    private static AtomicAction createResultResultRelation(final String source, final String target,
198
                                                           final KeyValue collectedFrom, final ResultResult resultResultRel, final String relClass, final String cfRelation, final ActionFactory factory, final String setName, final Agent agent) {
199
        final Oaf.Builder oaf = Oaf.newBuilder();
200
        oaf.setDataInfo(
201
                DataInfo.newBuilder()
202
                        .setDeletedbyinference(false)
203
                        .setInferred(false)
204
                        .setTrust("0.9")
205
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
206
                        .build());
207
        oaf.setKind(Kind.relation);
208
        final OafRel.Builder relation = OafRel.newBuilder();
209
        relation.setSource(source);
210
        relation.setTarget(target);
211
        relation.setRelType(RelTypeProtos.RelType.resultResult);
212
        relation.setSubRelType(RelTypeProtos.SubRelType.publicationDataset);
213
        relation.setChild(false);
214
        relation.setResultResult(resultResultRel);
215
        relation.setRelClass(relClass);
216
        relation.addCollectedfrom(collectedFrom);
217
        oaf.setRel(relation.build());
218

    
219
        //System.out.println(JsonFormat.printToString(oaf.build()));
220
        return factory.createAtomicAction(setName, agent, source, cfRelation, target, oaf.build().toByteArray());
221
    }
222

    
223

    
224
    private static List<AtomicAction> convertDoiRelations(final JsonObject doiRel, final ActionFactory factory, final String sourceId, final String nsPrefix, final KeyValue collectedFrom, final String setName, final Agent agent) {
225
        final String target = Objects.requireNonNull(getStringValue(doiRel, "dnetId")).substring(17);
226
        final String targetId = String.format("50|%s::%s", nsPrefix, target);
227
        final String relationSemantic = getStringValue(doiRel, "relationSemantic");
228
        String cfRelation;
229
        String cfInverseRelation;
230
        ResultResult.Builder resultRel = ResultResult.newBuilder();
231
        ResultResult.Builder resultInverseRel = ResultResult.newBuilder();
232
        String relClass;
233
        String inverseRelClass;
234

    
235
        switch (relationSemantic) {
236
            case "isSupplementedBy": {
237
                cfRelation = "resultResult_supplement_isSupplementedBy";
238
                cfInverseRelation = "resultResult_supplement_isSupplementTo";
239

    
240
                relClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
241
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
242
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
243
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
244
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
245
                                .build())
246
                        .build());
247
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
248
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
249
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
250
                                .build())
251
                        .build());
252
                break;
253
            }
254
            case "isSupplementTo": {
255
                cfRelation = "resultResult_supplement_isSupplementTo";
256
                cfInverseRelation = "resultResult_supplement_isSupplementedBy";
257
                inverseRelClass = ResultResult.Supplement.RelName.isSupplementedBy.toString();
258
                relClass = ResultResult.Supplement.RelName.isSupplementTo.toString();
259
                resultInverseRel.setSupplement(ResultResult.Supplement.newBuilder()
260
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
261
                                .setSemantics(getQualifier(inverseRelClass, "dnet:result_result_relations"))
262
                                .build())
263
                        .build());
264
                resultRel.setSupplement(ResultResult.Supplement.newBuilder()
265
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
266
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
267
                                .build())
268
                        .build());
269
                break;
270
            }
271
            default: {
272
                cfRelation = "resultResult_publicationDataset_isRelatedTo";
273
                cfInverseRelation = "resultResult_publicationDataset_isRelatedTo";
274
                relClass = ResultResult.PublicationDataset.RelName.isRelatedTo.toString();
275
                inverseRelClass = relClass;
276
                resultInverseRel.setPublicationDataset(ResultResult.PublicationDataset.newBuilder()
277
                        .setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
278
                                .setSemantics(getQualifier(relClass, "dnet:result_result_relations"))
279
                                .build())
280
                        .build());
281
                resultRel = resultInverseRel;
282
            }
283
        }
284

    
285
        final List<AtomicAction> actions = new ArrayList<>();
286
        actions.add(createResultResultRelation(sourceId, targetId, collectedFrom, resultRel.build(), relClass, cfRelation, factory, setName, agent));
287
        actions.add(createResultResultRelation(targetId, sourceId, collectedFrom, resultInverseRel.build(), inverseRelClass, cfInverseRelation, factory, setName, agent));
288

    
289
        return actions;
290
    }
291

    
292
    private static List<ExternalReference> generateExternalReference(final List<JsonObject> jsonRels) {
293
        final List<ExternalReference> result = new ArrayList<>();
294

    
295
        jsonRels.forEach(it -> {
296
            ExternalReference.Builder builder = ExternalReference.newBuilder();
297
            if("url".equals(getStringValue(it.getAsJsonObject("id"), "schema"))) {
298
                builder.setUrl(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")));
299
            }
300
            result.add(builder
301
                    .setRefidentifier(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "identifier")))
302
                    .setSitename(Objects.requireNonNull(getStringValue(it, "collectedFrom")))
303
                    .setQualifier(Qualifier.newBuilder()
304
                            .setClassid(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
305
                            .setClassname(Objects.requireNonNull(getStringValue(it.getAsJsonObject("id"), "schema")))
306
                            .setSchemename("dnet:externalReference_typologies")
307
                            .setSchemeid("dnet:externalReference_typologies")
308
                            .build())
309
                    .build());
310
        });
311
        return result;
312
    }
313

    
314
    private static List<JsonObject> extractRelations(final JsonObject rootElement, final String fieldType) {
315
        final List<JsonObject> result = new ArrayList<>();
316
        if (rootElement.has(fieldType) && rootElement.get(fieldType).isJsonArray()) {
317
            final JsonArray asJsonArray = rootElement.getAsJsonArray(fieldType);
318
            asJsonArray.forEach(it -> result.add(it.getAsJsonObject()));
319
        }
320
        return result;
321
    }
322

    
323

    
324

    
325

    
326

    
327

    
328

    
329
}
(10-10/11)