Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import com.google.gson.JsonArray;
4
import com.google.gson.JsonElement;
5
import com.google.gson.JsonObject;
6
import com.googlecode.protobuf.format.JsonFormat;
7
import eu.dnetlib.actionmanager.actions.ActionFactory;
8
import eu.dnetlib.actionmanager.actions.AtomicAction;
9
import eu.dnetlib.data.mapreduce.util.StreamUtils;
10
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
11
import org.apache.avro.data.Json;
12
import org.apache.commons.lang3.StringUtils;
13
import org.apache.solr.common.util.StrUtils;
14

    
15
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
16
import static eu.dnetlib.data.proto.KindProtos.Kind;
17
import static eu.dnetlib.data.proto.OafProtos.*;
18
import static eu.dnetlib.data.proto.ResultProtos.Result;
19
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
20
import static eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
21
import static eu.dnetlib.data.proto.TypeProtos.Type;
22
import static eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions.getQualifier;
23
import static eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions.getStringValue;
24
import static eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions.getArrayValues;
25

    
26
import java.util.ArrayList;
27
import java.util.List;
28
import java.util.Objects;
29
import java.util.stream.Collectors;
30

    
31
public class CrossRefToActions {
32

    
33
    public static List<AtomicAction> generateActionsFromDump(final JsonObject rootElement,ActionFactory factory) {
34
        final List<AtomicAction> actions = new ArrayList<>();
35

    
36

    
37
        //Create OAF Proto
38
        final Oaf.Builder oaf = Oaf.newBuilder();
39
        //Add Data Info
40
        oaf.setDataInfo(DataInfo.newBuilder()
41
                .setInvisible(false)
42
                .setDeletedbyinference(false)
43
                .setInferred(false)
44
                .setTrust("0.9")
45
                .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
46
                .build());
47

    
48
        //Adding Kind
49
        oaf.setKind(Kind.entity);
50

    
51
        //creating Result Proto
52
        final OafEntity.Builder entity = OafEntity.newBuilder().setType(Type.result);
53

    
54
        //Adding Collected From
55
        entity.setDateofcollection(Objects.requireNonNull(getStringValue(rootElement, "dateOfCollection")));
56
        if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
57
            StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
58
                    .map(JsonElement::getAsJsonObject)
59
                    .forEach(cf ->
60
                            {
61
                             final String id =getStringValue(cf,"id");
62
                             final String name =getStringValue(cf,"name");
63
                             if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
64
                                 final KeyValue collectedFrom = KeyValue.newBuilder()
65
                                         .setValue(name)
66
                                         .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
67
                                         .build();
68
                                 entity.addCollectedfrom(collectedFrom);
69
                             }
70
                            }
71
                    );
72
        }
73

    
74

    
75
        //Adding identifier
76
        final String objIdentifier = getStringValue(rootElement, "objIdentifier");
77
        final String nsPrefix = getStringValue(rootElement,"datasourcePrefix");
78
        if (StringUtils.isBlank(objIdentifier)) return null;
79
        final String sourceId = String.format("50|%s::%s", nsPrefix, objIdentifier);
80
        entity.setId(sourceId);
81
        final String doi = getStringValue(rootElement, "doi");
82
        //ADDING PID
83
        if (doi == null)
84
            return null;
85
        entity.addPid(StructuredProperty.newBuilder()
86
                .setValue(doi)
87
                .setQualifier(getQualifier("doi", "dnet:pid_types"))
88
                .build());
89

    
90

    
91
        //Create Result Field
92
        Result.Builder result = Result.newBuilder();
93

    
94

    
95
        //Adding Instance
96
        final String typeValue = getStringValue(rootElement.getAsJsonObject("type"),"value");
97
        final String cobjValue = getStringValue(rootElement.getAsJsonObject("type"),"cobj");
98

    
99
        //Add UnpayWall instance
100
        final String best_oa_location_url = getStringValue(rootElement, "best_oa_location_url");
101
        Instance.Builder instance= Instance.newBuilder();
102
        instance.setInstancetype(Qualifier.newBuilder()
103
                .setClassid(cobjValue)
104
                .setClassname(typeValue)
105
                .setSchemeid("dnet:publication_resource")
106
                .setSchemename("dnet:publication_resource")
107
                .build());
108
        instance.setHostedby(KeyValue.newBuilder()
109
                .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
110
                .setValue("Unknown Repository")
111
                .build());
112
        if (StringUtils.isNotBlank(best_oa_location_url)){
113

    
114
            instance.addUrl(best_oa_location_url);
115
            instance.setAccessright(Qualifier.newBuilder()
116
                    .setClassid("OPEN")
117
                    .setClassname("open access")
118
                    .setSchemeid("dnet:access_modes")
119
                    .setSchemename("dnet:access_modes")
120
                    .build());
121
            instance.setCollectedfrom(KeyValue.newBuilder()
122
                    .setValue("UnpayWall")
123
                    .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("unpaywall"))
124
                    .build());
125
        } else {
126
            instance = Instance.newBuilder();
127
            instance.addUrl(String.format("http://dx.doi.org/%s", doi));
128
            instance.setAccessright(Qualifier.newBuilder()
129
                    .setClassid("CLOSED")
130
                    .setClassname("Closed Access")
131
                    .setSchemeid("dnet:access_modes")
132
                    .setSchemename("dnet:access_modes")
133
                    .build());
134
            instance.setCollectedfrom(KeyValue.newBuilder()
135
                    .setValue("CrossRef")
136
                    .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("crossref"))
137
                    .build());
138
        }
139
        result.addInstance(instance.build());
140

    
141

    
142

    
143

    
144
        //Create Metadata Proto
145
        Metadata.Builder metadata = Metadata.newBuilder();
146

    
147
        //Adding Authors
148
        final List<Author> authors = createAuthors(rootElement);
149
        if (authors!= null)
150
            metadata.addAllAuthor(authors);
151
        //adding Language
152
        metadata.setLanguage(Qualifier.newBuilder()
153
                .setClassid("und")
154
                .setClassname("Undetermined")
155
                .setSchemeid("dent:languages")
156
                .setSchemename("dent:languages")
157
                .build());
158

    
159
        //Adding subjects
160
        List<String> subjects =getArrayValues(rootElement, "subject");
161

    
162
        subjects.forEach(s-> metadata.addSubject(StructuredProperty.newBuilder()
163
                .setValue(s)
164
                .setQualifier(getQualifier("keyword", "dnet:subject"))
165
                .build()));
166

    
167
        //Adding titles
168
        List<String>titles =getArrayValues(rootElement, "title");
169
        titles.forEach(t->
170
            metadata.addTitle(StructuredProperty.newBuilder()
171
                    .setValue(t)
172
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
173
                    .build()));
174

    
175
        //Adding date
176
        String date = getStringValue(rootElement,"issued");
177
        if (date.length()==4)
178
            date +="-01-01";
179

    
180
        metadata.setDateofacceptance(StringField.newBuilder().setValue(date).build());
181

    
182
        //Adding description
183
        String description=null;
184
        if (rootElement.has("abstract") && rootElement.get("abstract").isJsonArray())
185
            description =String.join(" ",getArrayValues(rootElement,"abstract"));
186
        else if (rootElement.has("abstract") )
187
            description = rootElement.get("abstract").getAsString();
188

    
189
        if(StringUtils.isNotBlank(description))
190
            metadata.addDescription(StringField.newBuilder().setValue(description).build());
191

    
192
        //Adding Journal
193
        final String publisher = getStringValue(rootElement,"publisher");
194
        if (StringUtils.isNotBlank(publisher)){
195

    
196
            final Journal.Builder journal = Journal.newBuilder().setName(publisher);
197

    
198
            if (hasJSONArrayField(rootElement,"issn" )){
199
                StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
200
                        .map(JsonElement::getAsJsonObject)
201
                        .forEach(it -> {
202
                            final String type = getStringValue(it, "type");
203
                            final String value = getStringValue(it, "value");
204
                            if("electronic".equals(type)){
205
                                journal.setIssnOnline(value);
206
                            }
207
                            if ("print".equals(type))
208
                                journal.setIssnPrinted(value);
209
                        });
210
            }
211
            metadata.setJournal(journal.build());
212
        }
213

    
214
        metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
215
        result.setMetadata(metadata.build());
216
        entity.setResult(result.build());
217
        oaf.setEntity(entity.build());
218

    
219

    
220
        System.out.println(JsonFormat.printToString(oaf.build()));
221

    
222
        return actions;
223
    }
224

    
225

    
226
    private static boolean hasJSONArrayField(final JsonObject root, final String key) {
227
        return root.has(key) && root.get(key).isJsonArray();
228
    }
229

    
230

    
231
    public static List<Author> createAuthors(final JsonObject root) {
232

    
233
        if (root.has("author") &&  root.get("author").isJsonArray()) {
234

    
235
            final List<Author> authors = new ArrayList<>();
236
            final JsonArray jsonAuthors = root.getAsJsonArray("author");
237
            int i = 0;
238
            for (JsonElement item: jsonAuthors) {
239
                final JsonObject author = item.getAsJsonObject();
240
                final Author.Builder result =Author.newBuilder();
241
                final String given  = getStringValue(author, "given");
242
                final String family = getStringValue(author, "family");
243
                final String orchid = getStringValue(author, "ORCID");
244
                if (StringUtils.isBlank(given) && StringUtils.isBlank(family))
245
                    continue;
246
                result.setFullname(given+" "+ family);
247
                if (StringUtils.isNotBlank(given))
248
                    result.setName(given);
249
                if (StringUtils.isNotBlank(family))
250
                result.setSurname(family);
251
                if (StringUtils.isNotBlank(orchid))
252
                {
253
                    result.addPid(KeyValue.newBuilder()
254
                            .setValue(orchid)
255
                            .setKey("ORCID")
256
                            .build());
257
                }
258
                result.setRank(i++);
259
                authors.add(result.build());
260
            }
261
            return authors;
262

    
263
        }
264
        return null;
265

    
266

    
267
    }
268

    
269
    private static String getDefaultResulttype(final String cobjcategory) {
270
        switch (cobjcategory) {
271
            case "0029":
272
                return "software";
273
            case "0021":
274
            case "0024":
275
            case "0025":
276
            case "0030":
277
                return "dataset";
278
            case "0000":
279
            case "0010":
280
            case "0018":
281
            case "0020":
282
            case "0022":
283
            case "0023":
284
            case "0026":
285
            case "0027":
286
            case "0028":
287
            case "0037":
288
                return "other";
289
            case "0001":
290
            case "0002":
291
            case "0004":
292
            case "0005":
293
            case "0006":
294
            case "0007":
295
            case "0008":
296
            case "0009":
297
            case "0011":
298
            case "0012":
299
            case "0013":
300
            case "0014":
301
            case "0015":
302
            case "0016":
303
            case "0017":
304
            case "0019":
305
            case "0031":
306
            case "0032":
307
                return "publication";
308
            default:
309
                return "publication";
310
        }
311
    }
312

    
313

    
314

    
315
}
(1-1/9)