Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3
import com.google.gson.JsonArray;
4
import com.google.gson.JsonElement;
5
import com.google.gson.JsonObject;
6
import eu.dnetlib.actionmanager.actions.ActionFactory;
7
import eu.dnetlib.actionmanager.actions.AtomicAction;
8
import eu.dnetlib.actionmanager.common.Agent;
9
import eu.dnetlib.data.mapreduce.util.StreamUtils;
10
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
11
import org.apache.commons.lang3.StringUtils;
12

    
13
import java.util.ArrayList;
14
import java.util.List;
15
import java.util.Objects;
16

    
17
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
18
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
19
import static eu.dnetlib.data.proto.KindProtos.Kind;
20
import static eu.dnetlib.data.proto.OafProtos.Oaf;
21
import static eu.dnetlib.data.proto.OafProtos.OafEntity;
22
import static eu.dnetlib.data.proto.ResultProtos.Result;
23
import static eu.dnetlib.data.proto.ResultProtos.Result.*;
24
import static eu.dnetlib.data.proto.TypeProtos.Type;
25

    
26
public class CrossRefToActions {
27

    
28
    public static AtomicAction generateActionsFromDump(final JsonObject rootElement, ActionFactory factory, final String setName, final Agent agent) {
29

    
30

    
31

    
32
        //Create OAF Proto
33
        final Oaf.Builder oaf = Oaf.newBuilder();
34
        //Add Data Info
35
        oaf.setDataInfo(DataInfo.newBuilder()
36
                .setInvisible(false)
37
                .setDeletedbyinference(false)
38
                .setInferred(false)
39
                .setTrust("0.9")
40
                .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
41
                .build());
42

    
43
        //Adding Kind
44
        oaf.setKind(Kind.entity);
45

    
46
        //creating Result Proto
47
        final OafEntity.Builder entity = OafEntity.newBuilder().setType(Type.result);
48

    
49
        //Adding Collected From
50
        entity.setDateofcollection(Objects.requireNonNull(getStringValue(rootElement, "dateOfCollection")));
51
        if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
52
            StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
53
                    .map(JsonElement::getAsJsonObject)
54
                    .forEach(cf ->
55
                            {
56
                             final String id =getStringValue(cf,"id");
57
                             final String name =getStringValue(cf,"name");
58
                             if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
59
                                 final KeyValue collectedFrom = KeyValue.newBuilder()
60
                                         .setValue(name)
61
                                         .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
62
                                         .build();
63
                                 entity.addCollectedfrom(collectedFrom);
64
                             }
65
                            }
66
                    );
67
        }
68

    
69

    
70
        //Adding identifier
71
        final String objIdentifier = getStringValue(rootElement, "objIdentifier");
72
        final String nsPrefix = getStringValue(rootElement,"datasourcePrefix");
73
        if (StringUtils.isBlank(objIdentifier)) return null;
74
        final String sourceId = String.format("50|%s::%s", nsPrefix, objIdentifier);
75
        entity.setId(sourceId);
76
        final String doi = getStringValue(rootElement, "doi");
77
        //ADDING PID
78
        if (doi == null)
79
            return null;
80
        entity.addPid(StructuredProperty.newBuilder()
81
                .setValue(doi)
82
                .setQualifier(getQualifier("doi", "dnet:pid_types"))
83
                .build());
84

    
85

    
86
        //Create Result Field
87
        Result.Builder result = Result.newBuilder();
88

    
89

    
90
        //Adding Instance
91
        final String typeValue = getStringValue(rootElement.getAsJsonObject("type"),"value");
92
        final String cobjValue = getStringValue(rootElement.getAsJsonObject("type"),"cobj");
93

    
94
        //Add UnpayWall instance
95
        final String best_oa_location_url = getStringValue(rootElement, "best_oa_location_url");
96
        Instance.Builder instance= Instance.newBuilder();
97
        instance.setInstancetype(Qualifier.newBuilder()
98
                .setClassid(cobjValue)
99
                .setClassname(typeValue)
100
                .setSchemeid("dnet:publication_resource")
101
                .setSchemename("dnet:publication_resource")
102
                .build());
103
        instance.setHostedby(KeyValue.newBuilder()
104
                .setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
105
                .setValue("Unknown Repository")
106
                .build());
107
        if (StringUtils.isNotBlank(best_oa_location_url)){
108

    
109
            instance.addUrl(best_oa_location_url);
110
            instance.setAccessright(Qualifier.newBuilder()
111
                    .setClassid("OPEN")
112
                    .setClassname("open access")
113
                    .setSchemeid("dnet:access_modes")
114
                    .setSchemename("dnet:access_modes")
115
                    .build());
116
            instance.setCollectedfrom(KeyValue.newBuilder()
117
                    .setValue("UnpayWall")
118
                    .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("unpaywall"))
119
                    .build());
120
        } else {
121
            instance = Instance.newBuilder();
122
            instance.addUrl(String.format("http://dx.doi.org/%s", doi));
123
            instance.setAccessright(Qualifier.newBuilder()
124
                    .setClassid("CLOSED")
125
                    .setClassname("Closed Access")
126
                    .setSchemeid("dnet:access_modes")
127
                    .setSchemename("dnet:access_modes")
128
                    .build());
129
            instance.setCollectedfrom(KeyValue.newBuilder()
130
                    .setValue("CrossRef")
131
                    .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("crossref"))
132
                    .build());
133
        }
134
        result.addInstance(instance.build());
135

    
136

    
137

    
138

    
139
        //Create Metadata Proto
140
        Metadata.Builder metadata = Metadata.newBuilder();
141

    
142
        //Adding Authors
143
        final List<Author> authors = createAuthors(rootElement);
144
        if (authors!= null)
145
            metadata.addAllAuthor(authors);
146
        //adding Language
147
        metadata.setLanguage(Qualifier.newBuilder()
148
                .setClassid("und")
149
                .setClassname("Undetermined")
150
                .setSchemeid("dent:languages")
151
                .setSchemename("dent:languages")
152
                .build());
153

    
154
        //Adding subjects
155
        List<String> subjects =getArrayValues(rootElement, "subject");
156

    
157
        subjects.forEach(s-> metadata.addSubject(StructuredProperty.newBuilder()
158
                .setValue(s)
159
                .setQualifier(getQualifier("keyword", "dnet:subject"))
160
                .build()));
161

    
162
        //Adding titles
163
        List<String>titles =getArrayValues(rootElement, "title");
164
        titles.forEach(t->
165
            metadata.addTitle(StructuredProperty.newBuilder()
166
                    .setValue(t)
167
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
168
                    .build()));
169

    
170
        //Adding date
171
        String date = getStringValue(rootElement,"issued");
172
        if (date.length()==4)
173
            date +="-01-01";
174

    
175
        metadata.setDateofacceptance(StringField.newBuilder().setValue(date).build());
176

    
177
        //Adding description
178
        String description=null;
179
        if (rootElement.has("abstract") && rootElement.get("abstract").isJsonArray())
180
            description =String.join(" ",getArrayValues(rootElement,"abstract"));
181
        else if (rootElement.has("abstract") )
182
            description = rootElement.get("abstract").getAsString();
183

    
184
        if(StringUtils.isNotBlank(description))
185
            metadata.addDescription(StringField.newBuilder().setValue(description).build());
186

    
187
        //Adding Journal
188
        final String publisher = getStringValue(rootElement,"publisher");
189
        if (StringUtils.isNotBlank(publisher)){
190

    
191
            final Journal.Builder journal = Journal.newBuilder().setName(publisher);
192

    
193
            if (hasJSONArrayField(rootElement,"issn" )){
194
                StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
195
                        .map(JsonElement::getAsJsonObject)
196
                        .forEach(it -> {
197
                            final String type = getStringValue(it, "type");
198
                            final String value = getStringValue(it, "value");
199
                            if("electronic".equals(type)){
200
                                journal.setIssnOnline(value);
201
                            }
202
                            if ("print".equals(type))
203
                                journal.setIssnPrinted(value);
204
                        });
205
            }
206
            metadata.setJournal(journal.build());
207
        }
208

    
209
        metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
210
        result.setMetadata(metadata.build());
211
        entity.setResult(result.build());
212
        oaf.setEntity(entity.build());
213

    
214

    
215

    
216

    
217
        return factory.createAtomicAction(setName, agent,oaf.getEntity().getId(), "result", "body",oaf.build().toByteArray());
218
    }
219

    
220

    
221
    private static boolean hasJSONArrayField(final JsonObject root, final String key) {
222
        return root.has(key) && root.get(key).isJsonArray();
223
    }
224

    
225

    
226
    public static List<Author> createAuthors(final JsonObject root) {
227

    
228
        if (root.has("author") &&  root.get("author").isJsonArray()) {
229

    
230
            final List<Author> authors = new ArrayList<>();
231
            final JsonArray jsonAuthors = root.getAsJsonArray("author");
232
            int i = 0;
233
            for (JsonElement item: jsonAuthors) {
234
                final JsonObject author = item.getAsJsonObject();
235
                final Author.Builder result =Author.newBuilder();
236
                final String given  = getStringValue(author, "given");
237
                final String family = getStringValue(author, "family");
238
                final String orchid = getStringValue(author, "ORCID");
239
                if (StringUtils.isBlank(given) && StringUtils.isBlank(family))
240
                    continue;
241
                result.setFullname(given+" "+ family);
242
                if (StringUtils.isNotBlank(given))
243
                    result.setName(given);
244
                if (StringUtils.isNotBlank(family))
245
                result.setSurname(family);
246
                if (StringUtils.isNotBlank(orchid))
247
                {
248
                    result.addPid(KeyValue.newBuilder()
249
                            .setValue(orchid)
250
                            .setKey("ORCID")
251
                            .build());
252
                }
253
                result.setRank(i++);
254
                authors.add(result.build());
255
            }
256
            return authors;
257

    
258
        }
259
        return null;
260

    
261

    
262
    }
263

    
264

    
265

    
266

    
267
}
(2-2/11)