1 |
53554
|
sandro.lab
|
package eu.dnetlib.data.mapreduce.hbase.dataimport;
|
2 |
|
|
|
3 |
|
|
import com.google.gson.Gson;
|
4 |
|
|
import com.google.gson.JsonElement;
|
5 |
|
|
import com.google.gson.JsonObject;
|
6 |
|
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
7 |
|
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
8 |
|
|
import eu.dnetlib.actionmanager.common.Agent;
|
9 |
|
|
import eu.dnetlib.data.mapreduce.util.StreamUtils;
|
10 |
|
|
import eu.dnetlib.data.proto.*;
|
11 |
|
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
12 |
|
|
import eu.dnetlib.miscutils.collections.Pair;
|
13 |
53736
|
sandro.lab
|
import org.apache.commons.codec.binary.Base64;
|
14 |
53554
|
sandro.lab
|
import org.apache.commons.io.IOUtils;
|
15 |
|
|
import org.apache.commons.lang3.StringUtils;
|
16 |
53736
|
sandro.lab
|
|
17 |
|
|
import java.io.ByteArrayOutputStream;
|
18 |
53554
|
sandro.lab
|
import java.io.IOException;
|
19 |
|
|
import java.io.InputStream;
|
20 |
|
|
import java.util.*;
|
21 |
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
22 |
53588
|
sandro.lab
|
import java.util.function.Function;
|
23 |
53554
|
sandro.lab
|
import java.util.stream.Collectors;
|
24 |
53736
|
sandro.lab
|
import java.util.zip.Inflater;
|
25 |
53554
|
sandro.lab
|
|
26 |
|
|
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
|
27 |
|
|
import static eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
public class DOIBoostToActions {
|
31 |
|
|
|
32 |
|
|
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{
|
33 |
|
|
put("MAG", new Pair<>("Microsoft Academic Graph", "openaire____::microsoft"));
|
34 |
|
|
put("ORCID", new Pair<>("ORCID", "openaire____::orcid"));
|
35 |
|
|
put("CrossRef", new Pair<>("Crossref", "openaire____::crossref"));
|
36 |
|
|
put("UnpayWall", new Pair<>("UnpayWall", "openaire____::unpaywall"));
|
37 |
|
|
|
38 |
|
|
}};
|
39 |
|
|
|
40 |
53736
|
sandro.lab
|
private static String decompressAbstract(final String abstractCompressed) {
|
41 |
|
|
try {
|
42 |
|
|
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
|
43 |
|
|
final Inflater decompresser = new Inflater();
|
44 |
|
|
decompresser.setInput(byteArray);
|
45 |
|
|
final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
|
46 |
|
|
byte[] buffer = new byte[8192];
|
47 |
|
|
while (!decompresser.finished()) {
|
48 |
|
|
int size = decompresser.inflate(buffer);
|
49 |
|
|
bos.write(buffer, 0, size);
|
50 |
|
|
}
|
51 |
|
|
byte[] unzippeddata = bos.toByteArray();
|
52 |
|
|
decompresser.end();
|
53 |
|
|
return new String(unzippeddata);
|
54 |
|
|
} catch (Throwable e) {
|
55 |
53737
|
sandro.lab
|
System.out.println("Wrong abstract:"+ abstractCompressed);
|
56 |
53736
|
sandro.lab
|
throw new RuntimeException(e);
|
57 |
|
|
}
|
58 |
|
|
}
|
59 |
|
|
|
60 |
53554
|
sandro.lab
|
private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType = new HashMap<String, FieldTypeProtos.Qualifier>() {{
|
61 |
|
|
put("MAG", FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id" ).setClassname("Microsoft Academic Graph Identifier").setSchemename("dnet:pid_types").setSchemeid("dnet:pid_types").build());
|
62 |
|
|
put("grid.ac", getQualifier("grid", "dnet:pid_types"));
|
63 |
|
|
put("wikpedia", getQualifier("urn", "dnet:pid_types"));
|
64 |
|
|
}};
|
65 |
|
|
|
66 |
|
|
static Map<String, Map<String, String>> typologiesMapping;
|
67 |
|
|
|
68 |
|
|
static {
|
69 |
|
|
try {
|
70 |
|
|
final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
|
71 |
|
|
final String tt =IOUtils.toString(is);
|
72 |
|
|
typologiesMapping = new Gson().fromJson(tt, Map.class);
|
73 |
|
|
} catch (IOException e) {
|
74 |
|
|
e.printStackTrace();
|
75 |
|
|
}
|
76 |
|
|
}
|
77 |
|
|
|
78 |
|
|
final static String doiBoostNSPREFIX ="doiboost____";
|
79 |
|
|
|
80 |
|
|
|
81 |
53565
|
sandro.lab
|
public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement, final ActionFactory factory, final String setName, final Agent agent, boolean invisible,
|
82 |
|
|
final boolean onlyOrganization) {
|
83 |
53554
|
sandro.lab
|
|
84 |
|
|
//Create OAF Proto
|
85 |
|
|
|
86 |
|
|
final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
87 |
|
|
//Add Data Info
|
88 |
|
|
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
89 |
|
|
.setInvisible(invisible)
|
90 |
|
|
.setDeletedbyinference(false)
|
91 |
|
|
.setInferred(false)
|
92 |
|
|
.setTrust("0.9")
|
93 |
|
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
94 |
|
|
.build());
|
95 |
|
|
|
96 |
|
|
//Adding Kind
|
97 |
|
|
oaf.setKind(KindProtos.Kind.entity);
|
98 |
|
|
|
99 |
|
|
//creating Result Proto
|
100 |
|
|
final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
|
101 |
|
|
|
102 |
|
|
entity.setDateofcollection("2018-10-10");
|
103 |
|
|
|
104 |
|
|
if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
|
105 |
|
|
StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
|
106 |
|
|
.map(JsonElement::getAsString)
|
107 |
|
|
.forEach(cf ->
|
108 |
|
|
{
|
109 |
|
|
final String id =datasources.get(cf).getValue();
|
110 |
|
|
final String name =datasources.get(cf).getKey();
|
111 |
|
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
112 |
|
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
113 |
|
|
.setValue(name)
|
114 |
|
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
|
115 |
|
|
.build();
|
116 |
|
|
entity.addCollectedfrom(collectedFrom);
|
117 |
|
|
}
|
118 |
|
|
}
|
119 |
|
|
);
|
120 |
|
|
}
|
121 |
|
|
//Adding identifier
|
122 |
|
|
final String doi = getStringValue(rootElement, "doi");
|
123 |
|
|
if (doi == null)
|
124 |
|
|
return null;
|
125 |
|
|
final String sourceId = String.format("50|%s::%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
|
126 |
|
|
entity.setId(sourceId);
|
127 |
|
|
|
128 |
|
|
entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
|
129 |
|
|
.setValue(doi)
|
130 |
|
|
.setQualifier(getQualifier("doi", "dnet:pid_types"))
|
131 |
|
|
.build());
|
132 |
|
|
|
133 |
|
|
|
134 |
|
|
//Create Result Field
|
135 |
|
|
ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();
|
136 |
|
|
|
137 |
|
|
final String type = getStringValue(rootElement,"type");
|
138 |
|
|
|
139 |
|
|
if (!typologiesMapping.containsKey(type))
|
140 |
|
|
return null;
|
141 |
|
|
|
142 |
|
|
//Adding Instances
|
143 |
|
|
final String typeValue = typologiesMapping.get(type).get("value");
|
144 |
|
|
final String cobjValue = typologiesMapping.get(type).get("cobj");
|
145 |
|
|
|
146 |
|
|
|
147 |
|
|
getArrayObjects(rootElement, "instances").stream().map(it ->
|
148 |
|
|
{
|
149 |
|
|
ResultProtos.Result.Instance.Builder instance= ResultProtos.Result.Instance.newBuilder();
|
150 |
|
|
instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
|
151 |
|
|
.setClassid(cobjValue)
|
152 |
|
|
.setClassname(typeValue)
|
153 |
|
|
.setSchemeid("dnet:publication_resource")
|
154 |
|
|
.setSchemename("dnet:publication_resource")
|
155 |
|
|
.build());
|
156 |
|
|
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
157 |
|
|
.setKey("10|openaire____::55045bd2a65019fd8e6741a755395c8c")
|
158 |
|
|
.setValue("Unknown Repository")
|
159 |
|
|
.build());
|
160 |
|
|
|
161 |
|
|
final String acc_class_id =it.get("access-rights").getAsString();
|
162 |
|
|
String acc_class_value;
|
163 |
|
|
switch (acc_class_id){
|
164 |
|
|
case "OPEN": {
|
165 |
|
|
acc_class_value = "open access";
|
166 |
|
|
break;
|
167 |
|
|
}
|
168 |
|
|
case "CLOSED": {
|
169 |
|
|
acc_class_value = "closed access";
|
170 |
|
|
break;
|
171 |
|
|
}
|
172 |
|
|
|
173 |
|
|
default: {
|
174 |
|
|
acc_class_value = "not available";
|
175 |
|
|
}
|
176 |
|
|
|
177 |
|
|
}
|
178 |
|
|
|
179 |
|
|
instance.addUrl(it.get("url").getAsString());
|
180 |
|
|
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
181 |
|
|
.setClassid(acc_class_id)
|
182 |
|
|
.setClassname(acc_class_value)
|
183 |
|
|
.setSchemeid("dnet:access_modes")
|
184 |
|
|
.setSchemename("dnet:access_modes")
|
185 |
|
|
.build());
|
186 |
|
|
|
187 |
|
|
final String id =datasources.get(it.get("provenance").getAsString()).getValue();
|
188 |
|
|
final String name =datasources.get(it.get("provenance").getAsString()).getKey();
|
189 |
|
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
190 |
|
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
191 |
|
|
.setValue(name)
|
192 |
|
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
|
193 |
|
|
.build();
|
194 |
|
|
|
195 |
|
|
instance.setCollectedfrom(collectedFrom);
|
196 |
|
|
}
|
197 |
|
|
|
198 |
|
|
return instance.build();
|
199 |
|
|
}).forEach(result::addInstance);
|
200 |
|
|
|
201 |
|
|
//Adding DOI URL as Instance
|
202 |
|
|
final String doiURL = getStringValue(rootElement, "doi-url");
|
203 |
|
|
if (StringUtils.isNotBlank(doiURL)) {
|
204 |
|
|
|
205 |
|
|
|
206 |
|
|
final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
|
207 |
|
|
instance.addUrl(doiURL);
|
208 |
|
|
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
209 |
|
|
.setClassid("CLOSED")
|
210 |
|
|
.setClassname("Closed Access")
|
211 |
|
|
.setSchemeid("dnet:access_modes")
|
212 |
|
|
.setSchemename("dnet:access_modes")
|
213 |
|
|
.build());
|
214 |
|
|
instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
215 |
|
|
.setValue("CrossRef")
|
216 |
|
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5("crossref"))
|
217 |
|
|
.build());
|
218 |
|
|
result.addInstance(instance);
|
219 |
|
|
}
|
220 |
|
|
|
221 |
|
|
//Create Metadata Proto
|
222 |
|
|
final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
|
223 |
|
|
|
224 |
|
|
|
225 |
|
|
Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);
|
226 |
|
|
|
227 |
|
|
if (authorsOrganizations.getKey().size() > 0) {
|
228 |
|
|
metadata.addAllAuthor(authorsOrganizations.getKey());
|
229 |
|
|
}
|
230 |
53556
|
sandro.lab
|
else {
|
231 |
|
|
return null;
|
232 |
|
|
}
|
233 |
53554
|
sandro.lab
|
//adding Language
|
234 |
|
|
metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
|
235 |
|
|
.setClassid("und")
|
236 |
|
|
.setClassname("Undetermined")
|
237 |
|
|
.setSchemeid("dent:languages")
|
238 |
|
|
.setSchemename("dent:languages")
|
239 |
|
|
.build());
|
240 |
|
|
|
241 |
|
|
//Adding subjects
|
242 |
|
|
List<String> subjects =getArrayValues(rootElement, "subject");
|
243 |
|
|
|
244 |
|
|
subjects.forEach(s-> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
|
245 |
|
|
.setValue(s)
|
246 |
|
|
.setQualifier(getQualifier("keyword", "dnet:subject"))
|
247 |
|
|
.build()));
|
248 |
|
|
|
249 |
|
|
List<String>titles =getArrayValues(rootElement, "title");
|
250 |
|
|
titles.forEach(t->
|
251 |
|
|
metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
|
252 |
|
|
.setValue(t)
|
253 |
|
|
.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
|
254 |
|
|
.build()));
|
255 |
|
|
settingRelevantDate(rootElement, metadata, "issued", "issued", true);
|
256 |
|
|
settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
|
257 |
|
|
settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
|
258 |
|
|
settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
|
259 |
|
|
|
260 |
|
|
|
261 |
53737
|
sandro.lab
|
getArrayObjects(rootElement, "abstract").forEach(d ->
|
262 |
|
|
{
|
263 |
|
|
if ("MAG".equals(d.get("provenance")))
|
264 |
|
|
metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(decompressAbstract(d.get("value").getAsString())).build());
|
265 |
|
|
else
|
266 |
|
|
metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build());
|
267 |
|
|
}
|
268 |
|
|
);
|
269 |
53554
|
sandro.lab
|
|
270 |
|
|
|
271 |
|
|
|
272 |
|
|
//Adding Journal
|
273 |
|
|
final String publisher = getStringValue(rootElement,"publisher");
|
274 |
|
|
if (StringUtils.isNotBlank(publisher)){
|
275 |
|
|
|
276 |
|
|
final ResultProtos.Result.Journal.Builder journal = ResultProtos.Result.Journal.newBuilder().setName(publisher);
|
277 |
|
|
|
278 |
|
|
if (hasJSONArrayField(rootElement,"issn" )){
|
279 |
|
|
StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
|
280 |
|
|
.map(JsonElement::getAsJsonObject)
|
281 |
|
|
.forEach(it -> {
|
282 |
|
|
final String issntype = getStringValue(it, "type");
|
283 |
|
|
final String value = getStringValue(it, "value");
|
284 |
|
|
if("electronic".equals(issntype)){
|
285 |
|
|
journal.setIssnOnline(value);
|
286 |
|
|
}
|
287 |
|
|
if ("print".equals(issntype))
|
288 |
|
|
journal.setIssnPrinted(value);
|
289 |
|
|
});
|
290 |
|
|
}
|
291 |
|
|
metadata.setJournal(journal.build());
|
292 |
|
|
}
|
293 |
|
|
metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
|
294 |
|
|
result.setMetadata(metadata.build());
|
295 |
|
|
entity.setResult(result.build());
|
296 |
|
|
oaf.setEntity(entity.build());
|
297 |
|
|
final List<AtomicAction> actionList = new ArrayList<>();
|
298 |
53736
|
sandro.lab
|
|
299 |
53565
|
sandro.lab
|
if (!onlyOrganization)
|
300 |
|
|
actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
|
301 |
53554
|
sandro.lab
|
|
302 |
|
|
if (!authorsOrganizations.getValue().isEmpty()) {
|
303 |
|
|
|
304 |
|
|
authorsOrganizations.getValue().forEach(o ->
|
305 |
|
|
{
|
306 |
53565
|
sandro.lab
|
|
307 |
53554
|
sandro.lab
|
actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
|
308 |
53565
|
sandro.lab
|
if (!onlyOrganization)
|
309 |
|
|
actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
|
310 |
53554
|
sandro.lab
|
final String gridOrganization = getSimilarGridOrganization(o.getEntity());
|
311 |
|
|
if (gridOrganization!= null) {
|
312 |
|
|
actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization, "".getBytes()));
|
313 |
|
|
actionList.add(factory.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(), "".getBytes()));
|
314 |
|
|
}
|
315 |
|
|
});
|
316 |
|
|
}
|
317 |
|
|
return actionList;
|
318 |
|
|
|
319 |
|
|
}
|
320 |
|
|
|
321 |
|
|
|
322 |
|
|
private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {
|
323 |
|
|
|
324 |
|
|
final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
|
325 |
|
|
if (pidList!= null ) {
|
326 |
|
|
for (FieldTypeProtos.StructuredProperty p: pidList) {
|
327 |
|
|
if (p.getQualifier().getClassname().equals("grid")){
|
328 |
|
|
return "20|grid________::"+AbstractDNetXsltFunctions.md5(p.getValue());
|
329 |
|
|
}
|
330 |
|
|
}
|
331 |
|
|
}
|
332 |
|
|
return null;
|
333 |
|
|
|
334 |
|
|
}
|
335 |
|
|
|
336 |
|
|
private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication, final OafProtos.Oaf organization, final ActionFactory factory, final String setName, final Agent agent) {
|
337 |
|
|
|
338 |
|
|
List<AtomicAction> result = new ArrayList<>();
|
339 |
|
|
|
340 |
|
|
final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
|
341 |
|
|
roaf.setKind(KindProtos.Kind.relation);
|
342 |
|
|
|
343 |
|
|
roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
344 |
|
|
.setInvisible(false)
|
345 |
|
|
.setDeletedbyinference(false)
|
346 |
|
|
.setInferred(false)
|
347 |
|
|
.setTrust("0.9")
|
348 |
|
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
349 |
|
|
.build());
|
350 |
|
|
|
351 |
|
|
|
352 |
|
|
final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();
|
353 |
|
|
|
354 |
|
|
rel.setRelType(RelTypeProtos.RelType.resultOrganization);
|
355 |
|
|
rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);
|
356 |
|
|
|
357 |
|
|
//Create a relation Result --> Organization
|
358 |
|
|
rel.setSource(publication.getEntity().getId());
|
359 |
|
|
rel.setTarget(organization.getEntity().getId());
|
360 |
|
|
rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());
|
361 |
|
|
|
362 |
|
|
final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();
|
363 |
|
|
|
364 |
|
|
final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
|
365 |
|
|
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
366 |
|
|
.setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
|
367 |
|
|
.build());
|
368 |
|
|
rel_instance.setAffiliation(affiliationRel.build());
|
369 |
|
|
rel.setResultOrganization(rel_instance.build());
|
370 |
|
|
|
371 |
|
|
rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
372 |
|
|
.setValue(datasources.get("MAG").getKey())
|
373 |
|
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(datasources.get("MAG").getValue(), "::")))
|
374 |
|
|
.build());
|
375 |
|
|
|
376 |
|
|
|
377 |
|
|
|
378 |
|
|
rel.setChild(false);
|
379 |
|
|
roaf.setRel(rel.build());
|
380 |
|
|
|
381 |
|
|
result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution", organization.getEntity().getId(), roaf.build().toByteArray() ));
|
382 |
|
|
|
383 |
|
|
|
384 |
|
|
//Create a relation Organization --> Result
|
385 |
|
|
rel.setTarget(publication.getEntity().getId());
|
386 |
|
|
rel.setSource(organization.getEntity().getId());
|
387 |
|
|
rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());
|
388 |
|
|
|
389 |
|
|
|
390 |
|
|
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
391 |
|
|
.setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
|
392 |
|
|
.build());
|
393 |
|
|
rel_instance.setAffiliation(affiliationRel.build());
|
394 |
|
|
rel.setResultOrganization(rel_instance.build());
|
395 |
|
|
roaf.setRel(rel.build());
|
396 |
|
|
result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf", publication.getEntity().getId(), roaf.build().toByteArray()));
|
397 |
|
|
|
398 |
|
|
return result;
|
399 |
|
|
|
400 |
|
|
}
|
401 |
|
|
|
402 |
|
|
private static boolean hasJSONArrayField(final JsonObject root, final String key) {
|
403 |
|
|
return root.has(key) && root.get(key).isJsonArray();
|
404 |
|
|
}
|
405 |
|
|
|
406 |
|
|
private static void settingRelevantDate(JsonObject rootElement, ResultProtos.Result.Metadata.Builder metadata , final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) {
|
407 |
|
|
//Adding date
|
408 |
|
|
String date = getStringValue(rootElement,jsonKey);
|
409 |
|
|
if (date == null)
|
410 |
|
|
return;
|
411 |
|
|
if (date.length() == 4) {
|
412 |
|
|
date += "-01-01";
|
413 |
|
|
}
|
414 |
|
|
if (isValidDate(date)) {
|
415 |
|
|
if (addToDateOfAcceptance)
|
416 |
|
|
metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
|
417 |
|
|
metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
|
418 |
|
|
.setValue(date)
|
419 |
|
|
.setQualifier(getQualifier(dictionaryKey,"dnet:dataCite_date"))
|
420 |
|
|
.build());
|
421 |
|
|
}
|
422 |
|
|
}
|
423 |
|
|
|
424 |
|
|
|
425 |
|
|
public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
|
426 |
|
|
FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
|
427 |
|
|
if (StringUtils.contains(value, "orcid.org")){
|
428 |
|
|
return pid.setValue(value)
|
429 |
|
|
.setKey("ORCID").build();
|
430 |
|
|
}
|
431 |
|
|
if (StringUtils.contains(value, "academic.microsoft.com/#/detail")){
|
432 |
|
|
return pid.setValue(value)
|
433 |
|
|
.setKey("MAG Identifier").build();
|
434 |
|
|
}
|
435 |
|
|
return pid.setValue(value)
|
436 |
|
|
.setKey("URL").build();
|
437 |
|
|
}
|
438 |
|
|
|
439 |
|
|
|
440 |
|
|
public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
|
441 |
|
|
final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
|
442 |
|
|
final List<String> magId = new ArrayList<>();
|
443 |
|
|
getArrayObjects(affiliation, "identifiers").forEach(it -> {
|
444 |
|
|
if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
|
445 |
|
|
affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get("MAG"));
|
446 |
|
|
magId.add(it.get("value").getAsString());
|
447 |
|
|
}
|
448 |
|
|
else
|
449 |
|
|
affiliationIdentifiers.put( it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
|
450 |
|
|
});
|
451 |
|
|
if (magId.size() > 0) {
|
452 |
|
|
final String microsoftID = magId.get(0);
|
453 |
|
|
OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
454 |
|
|
oaf.setKind(KindProtos.Kind.entity);
|
455 |
|
|
OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
|
456 |
|
|
entity.setType(TypeProtos.Type.organization);
|
457 |
|
|
entity.setId("20|microsoft___::"+AbstractDNetXsltFunctions.md5(microsoftID));
|
458 |
|
|
final String id =datasources.get(affiliation.get("provenance").getAsString()).getValue();
|
459 |
|
|
final String name =datasources.get(affiliation.get("provenance").getAsString()).getKey();
|
460 |
|
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
461 |
|
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
462 |
|
|
.setValue(name)
|
463 |
|
|
.setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, "::")))
|
464 |
|
|
.build();
|
465 |
|
|
entity.addCollectedfrom(collectedFrom);
|
466 |
|
|
} else {
|
467 |
|
|
return null;
|
468 |
|
|
}
|
469 |
|
|
entity.addOriginalId(microsoftID);
|
470 |
|
|
|
471 |
|
|
affiliationIdentifiers.forEach((key, value) -> entity.addPid(
|
472 |
|
|
FieldTypeProtos.StructuredProperty.newBuilder()
|
473 |
|
|
.setQualifier(value)
|
474 |
|
|
.setValue(key)
|
475 |
|
|
.build()));
|
476 |
|
|
|
477 |
|
|
final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
|
478 |
|
|
organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
|
479 |
|
|
.setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
|
480 |
|
|
.setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
|
481 |
|
|
.build());
|
482 |
|
|
|
483 |
|
|
entity.setOrganization(organization);
|
484 |
|
|
oaf.setEntity(entity);
|
485 |
|
|
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
486 |
|
|
.setInvisible(false)
|
487 |
|
|
.setDeletedbyinference(false)
|
488 |
|
|
.setInferred(false)
|
489 |
|
|
.setTrust("0.9")
|
490 |
|
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
491 |
|
|
.build());
|
492 |
|
|
return oaf.build();
|
493 |
|
|
}
|
494 |
|
|
return null;
|
495 |
|
|
}
|
496 |
|
|
|
497 |
|
|
public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> createAuthorsOrganization(final JsonObject root) {
|
498 |
|
|
|
499 |
|
|
final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();
|
500 |
|
|
|
501 |
|
|
List<JsonObject> authors = getArrayObjects(root, "authors");
|
502 |
|
|
|
503 |
|
|
final AtomicInteger counter = new AtomicInteger();
|
504 |
|
|
|
505 |
|
|
List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
|
506 |
|
|
final String given = getStringValue(author, "given");
|
507 |
|
|
final String family = getStringValue(author, "family");
|
508 |
|
|
String fullname = getStringValue(author, "fullname");
|
509 |
|
|
|
510 |
|
|
if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
|
511 |
|
|
fullname = String.format("%s %s", given, family);
|
512 |
|
|
}
|
513 |
53556
|
sandro.lab
|
|
514 |
|
|
if (StringUtils.isBlank(fullname)){
|
515 |
|
|
return null;
|
516 |
|
|
|
517 |
|
|
}
|
518 |
53554
|
sandro.lab
|
final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();
|
519 |
|
|
|
520 |
|
|
if (StringUtils.isNotBlank(given))
|
521 |
|
|
abuilder.setName(given);
|
522 |
|
|
if (StringUtils.isNotBlank(family))
|
523 |
|
|
abuilder.setSurname(family);
|
524 |
|
|
if (StringUtils.isNotBlank(fullname))
|
525 |
|
|
abuilder.setFullname(fullname);
|
526 |
|
|
|
527 |
|
|
final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
|
528 |
|
|
final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");
|
529 |
|
|
|
530 |
|
|
authorAffiliation.forEach(it ->
|
531 |
|
|
{
|
532 |
|
|
OafProtos.Oaf org = createOrganizationFromJSON(it);
|
533 |
|
|
if (org != null) {
|
534 |
|
|
affiliations.put(org.getEntity().getId(), org);
|
535 |
|
|
abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
|
536 |
|
|
}
|
537 |
|
|
});
|
538 |
|
|
identifiers.stream().map(id -> {
|
539 |
|
|
final String value = id.get("value").getAsString();
|
540 |
|
|
return extractIdentifier(value);
|
541 |
53588
|
sandro.lab
|
}).collect(
|
542 |
|
|
Collectors.toMap(
|
543 |
|
|
FieldTypeProtos.KeyValue::getKey,
|
544 |
|
|
Function.identity(),
|
545 |
|
|
(a,b) -> a
|
546 |
|
|
)).values().forEach(abuilder::addPid);
|
547 |
53554
|
sandro.lab
|
abuilder.setRank(counter.getAndIncrement());
|
548 |
|
|
|
549 |
|
|
return abuilder.build();
|
550 |
|
|
|
551 |
53556
|
sandro.lab
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
552 |
53554
|
sandro.lab
|
|
553 |
|
|
return new Pair<> ( collect,affiliations.values() );
|
554 |
|
|
}
|
555 |
|
|
|
556 |
|
|
|
557 |
|
|
|
558 |
|
|
|
559 |
|
|
|
560 |
|
|
|
561 |
|
|
}
|