1
|
/**
|
2
|
*
|
3
|
*/
|
4
|
package eu.dnetlib.dli.resolver;
|
5
|
|
6
|
import java.util.ArrayList;
|
7
|
import java.util.List;
|
8
|
|
9
|
import com.google.common.collect.Lists;
|
10
|
import com.google.gson.JsonArray;
|
11
|
import com.google.gson.JsonElement;
|
12
|
import com.google.gson.JsonObject;
|
13
|
import com.google.gson.JsonParser;
|
14
|
import eu.dnetlib.dli.resolver.model.CompletionStatus;
|
15
|
import eu.dnetlib.dli.resolver.model.DLIObjectProvenance;
|
16
|
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
|
17
|
import eu.dnetlib.pid.resolver.model.ObjectType;
|
18
|
import org.apache.commons.lang3.ArrayUtils;
|
19
|
import org.apache.commons.lang3.StringUtils;
|
20
|
import org.apache.commons.logging.Log;
|
21
|
import org.apache.commons.logging.LogFactory;
|
22
|
|
23
|
/**
|
24
|
* @author sandro
|
25
|
*/
|
26
|
public class CrossRefParserJSON {
|
27
|
|
28
|
/**
|
29
|
* The Constant CROSSREF_NS_PREFIX.
|
30
|
*/
|
31
|
private static final String CROSSREF_NS_PREFIX = "crossref____";
|
32
|
private static final Log log = LogFactory.getLog(CrossRefParserJSON.class); // NOPMD by marko on 11/24/08 5:02 PM
|
33
|
|
34
|
public DLIResolvedObject parseRecord(final String record) {
|
35
|
|
36
|
log.debug("Start to parsing " + record);
|
37
|
|
38
|
if (record == null) return null;
|
39
|
JsonElement jElement = new JsonParser().parse(record);
|
40
|
|
41
|
JsonElement source = jElement.getAsJsonObject().get("_source");
|
42
|
if (source== null || !source.isJsonObject())
|
43
|
return null;
|
44
|
|
45
|
final JsonObject message = source.getAsJsonObject();
|
46
|
DLIResolvedObject currentObject = new DLIResolvedObject();
|
47
|
|
48
|
if (message.get("DOI") != null) {
|
49
|
final String doi = message.get("DOI").getAsString();
|
50
|
log.debug("found doi" + doi);
|
51
|
currentObject.setPid(doi);
|
52
|
currentObject.setPidType("doi");
|
53
|
}
|
54
|
|
55
|
if ((!message.get("created").isJsonNull()) && (message.getAsJsonObject("created").get("date-time") != null)) {
|
56
|
currentObject.setDate(message.getAsJsonObject("created").get("date-time").getAsString());
|
57
|
}
|
58
|
|
59
|
if (message.get("title")!= null && !message.get("title").isJsonNull() && message.get("title").isJsonArray() ) {
|
60
|
final List<String> titles = new ArrayList<>();
|
61
|
message.get("title").getAsJsonArray().forEach(
|
62
|
it ->titles.add(it.getAsString())
|
63
|
);
|
64
|
currentObject.setTitles(titles);
|
65
|
}
|
66
|
|
67
|
if (message.get("author") != null && !message.get("author").isJsonNull()) {
|
68
|
JsonArray author = message.getAsJsonArray("author");
|
69
|
List<String> authorList = Lists.newArrayList();
|
70
|
for (JsonElement anAuthor : author) {
|
71
|
JsonObject currentAuth = anAuthor.getAsJsonObject();
|
72
|
|
73
|
String family = "";
|
74
|
String given = "";
|
75
|
if (currentAuth != null && currentAuth.get("family") != null && !currentAuth.get("family").isJsonNull()) {
|
76
|
family = currentAuth.get("family").getAsString();
|
77
|
}
|
78
|
if (currentAuth != null && currentAuth.get("given") != null && !currentAuth.get("given").isJsonNull()) {
|
79
|
given = currentAuth.get("given").getAsString();
|
80
|
}
|
81
|
authorList.add(String.format("%s %s", family, given));
|
82
|
}
|
83
|
currentObject.setAuthors(authorList);
|
84
|
}
|
85
|
final DLIObjectProvenance provenance = new DLIObjectProvenance();
|
86
|
DLIPIDResolver.setDatasourceProvenance(provenance, CROSSREF_NS_PREFIX);
|
87
|
if (message.get("publisher") != null && !message.get("publisher").isJsonNull()) {
|
88
|
provenance.setPublisher(message.get("publisher").getAsString());
|
89
|
}
|
90
|
currentObject.fixContribution(provenance);
|
91
|
currentObject.setDatasourceProvenance(Lists.newArrayList(provenance));
|
92
|
currentObject.setCompletionStatus(CompletionStatus.complete.toString());
|
93
|
|
94
|
if (!message.get("type").isJsonNull()) {
|
95
|
final String type = message.get("type").getAsString();
|
96
|
if ("component".equals(type) || "dataset".equals(type)) {
|
97
|
currentObject.setType(ObjectType.dataset);
|
98
|
} else
|
99
|
currentObject.setType(ObjectType.publication);
|
100
|
}
|
101
|
return currentObject;
|
102
|
|
103
|
}
|
104
|
|
105
|
/**
|
106
|
* @param fullCitations
|
107
|
* @param result
|
108
|
*/
|
109
|
private void parseFullCitation(final String fullCitations, final DLIResolvedObject result) {
|
110
|
if (StringUtils.isEmpty(fullCitations))
|
111
|
return;
|
112
|
|
113
|
String beforeTitles[] = fullCitations.split("'");
|
114
|
|
115
|
if (ArrayUtils.isEmpty(beforeTitles))
|
116
|
return;
|
117
|
|
118
|
String[] data = beforeTitles[0].split(",");
|
119
|
if (ArrayUtils.isEmpty(data)) return;
|
120
|
if (data.length > 1) {
|
121
|
String date = data[data.length - 2];
|
122
|
result.setDate(date.trim());
|
123
|
List<String> authors = Lists.newArrayList();
|
124
|
for (int i = 0; i < data.length - 2; i++) {
|
125
|
authors.add(data[i]);
|
126
|
}
|
127
|
result.setAuthors(authors);
|
128
|
}
|
129
|
}
|
130
|
}
|