30 |
30 |
|
31 |
31 |
public class DOIBoostToActions {
|
32 |
32 |
|
33 |
|
public static final String MAG = "MAG";
|
34 |
|
public static final String ORCID = "ORCID";
|
35 |
|
public static final String CROSSREF = "Crossref";
|
36 |
|
public static final String UNPAYWALL = "UnpayWall";
|
|
33 |
public static final String MAG = "MAG";
|
|
34 |
public static final String ORCID = "ORCID";
|
|
35 |
public static final String CROSSREF = "Crossref";
|
|
36 |
public static final String UNPAYWALL = "UnpayWall";
|
37 |
37 |
|
38 |
|
public static final String GRID_AC = "grid.ac";
|
39 |
|
public static final String WIKPEDIA = "wikpedia";
|
|
38 |
public static final String GRID_AC = "grid.ac";
|
|
39 |
public static final String WIKPEDIA = "wikpedia";
|
40 |
40 |
|
41 |
|
public final static String doiBoostNSPREFIX ="doiboost____";
|
42 |
|
public static final String OPENAIRE_PREFIX = "openaire____";
|
|
41 |
public final static String doiBoostNSPREFIX = "doiboost____";
|
|
42 |
public static final String OPENAIRE_PREFIX = "openaire____";
|
43 |
43 |
|
44 |
|
public static final String SEPARATOR = "::";
|
|
44 |
public static final String SEPARATOR = "::";
|
45 |
45 |
|
46 |
|
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{
|
47 |
|
put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
|
48 |
|
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
49 |
|
put(CROSSREF.toLowerCase(), new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
|
50 |
|
put(UNPAYWALL.toLowerCase(), new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));
|
|
46 |
private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{
|
|
47 |
put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
|
|
48 |
put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
|
|
49 |
put(CROSSREF.toLowerCase(), new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
|
|
50 |
put(UNPAYWALL.toLowerCase(), new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));
|
51 |
51 |
|
52 |
|
}};
|
|
52 |
}};
|
53 |
53 |
|
54 |
|
private static String decompressAbstract(final String abstractCompressed) {
|
55 |
|
try {
|
56 |
|
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
|
57 |
|
final Inflater decompresser = new Inflater();
|
58 |
|
decompresser.setInput(byteArray);
|
59 |
|
final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
|
60 |
|
byte[] buffer = new byte[8192];
|
61 |
|
while (!decompresser.finished()) {
|
62 |
|
int size = decompresser.inflate(buffer);
|
63 |
|
bos.write(buffer, 0, size);
|
64 |
|
}
|
65 |
|
byte[] unzippeddata = bos.toByteArray();
|
66 |
|
decompresser.end();
|
67 |
|
return new String(unzippeddata);
|
68 |
|
} catch (Throwable e) {
|
69 |
|
System.out.println("Wrong abstract:"+ abstractCompressed);
|
70 |
|
throw new RuntimeException(e);
|
71 |
|
}
|
72 |
|
}
|
|
54 |
private static String decompressAbstract(final String abstractCompressed) {
|
|
55 |
try {
|
|
56 |
byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
|
|
57 |
final Inflater decompresser = new Inflater();
|
|
58 |
decompresser.setInput(byteArray);
|
|
59 |
final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
|
|
60 |
byte[] buffer = new byte[8192];
|
|
61 |
while (!decompresser.finished()) {
|
|
62 |
int size = decompresser.inflate(buffer);
|
|
63 |
bos.write(buffer, 0, size);
|
|
64 |
}
|
|
65 |
byte[] unzippeddata = bos.toByteArray();
|
|
66 |
decompresser.end();
|
|
67 |
return new String(unzippeddata);
|
|
68 |
} catch (Throwable e) {
|
|
69 |
System.out.println("Wrong abstract:" + abstractCompressed);
|
|
70 |
throw new RuntimeException(e);
|
|
71 |
}
|
|
72 |
}
|
73 |
73 |
|
74 |
|
public static final String PID_TYPES = "dnet:pid_types";
|
75 |
|
private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType = new HashMap<String, FieldTypeProtos.Qualifier>() {{
|
76 |
|
put(MAG, FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id" ).setClassname("Microsoft Academic Graph Identifier").setSchemename(PID_TYPES).setSchemeid(PID_TYPES).build());
|
77 |
|
put(GRID_AC, getQualifier("grid", PID_TYPES));
|
78 |
|
put(WIKPEDIA, getQualifier("urn", PID_TYPES));
|
79 |
|
}};
|
|
74 |
public static final String PID_TYPES = "dnet:pid_types";
|
|
75 |
private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType = new HashMap<String, FieldTypeProtos.Qualifier>() {{
|
|
76 |
put(MAG, FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id").setClassname("Microsoft Academic Graph Identifier").setSchemename(PID_TYPES)
|
|
77 |
.setSchemeid(PID_TYPES).build());
|
|
78 |
put(GRID_AC, getQualifier("grid", PID_TYPES));
|
|
79 |
put(WIKPEDIA, getQualifier("urn", PID_TYPES));
|
|
80 |
}};
|
80 |
81 |
|
81 |
|
static Map<String, Map<String, String>> typologiesMapping;
|
|
82 |
static Map<String, Map<String, String>> typologiesMapping;
|
82 |
83 |
|
83 |
|
static {
|
84 |
|
try {
|
85 |
|
final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
|
86 |
|
final String tt =IOUtils.toString(is);
|
87 |
|
typologiesMapping = new Gson().fromJson(tt, Map.class);
|
88 |
|
} catch (IOException e) {
|
89 |
|
e.printStackTrace();
|
90 |
|
}
|
91 |
|
}
|
|
84 |
static {
|
|
85 |
try {
|
|
86 |
final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
|
|
87 |
final String tt = IOUtils.toString(is);
|
|
88 |
typologiesMapping = new Gson().fromJson(tt, Map.class);
|
|
89 |
} catch (IOException e) {
|
|
90 |
e.printStackTrace();
|
|
91 |
}
|
|
92 |
}
|
92 |
93 |
|
93 |
|
protected static boolean isValid(final JsonObject rootElement, final Reporter context){
|
|
94 |
protected static boolean isValid(final JsonObject rootElement, final Reporter context) {
|
94 |
95 |
|
95 |
|
final String doi = getStringValue(rootElement, "doi");
|
96 |
|
if (doi == null) {
|
97 |
|
context.incrementCounter("filtered","no_doi", 1);
|
98 |
|
return false;
|
99 |
|
}
|
100 |
|
final String type = getStringValue(rootElement,"type");
|
101 |
|
if (!typologiesMapping.containsKey(type)){
|
102 |
|
context.incrementCounter("filtered","unknowntype_"+type, 1);
|
103 |
|
return false;
|
104 |
|
}
|
105 |
|
// fixes #4360 (test publisher)
|
106 |
|
final String publisher = getStringValue(rootElement, "publisher");
|
107 |
|
if(publisher.equalsIgnoreCase("Test accounts")){
|
108 |
|
context.incrementCounter("filtered","test_publisher", 1);
|
109 |
|
return false;
|
110 |
|
}
|
|
96 |
final String doi = getStringValue(rootElement, "doi");
|
|
97 |
if (doi == null) {
|
|
98 |
context.incrementCounter("filtered", "no_doi", 1);
|
|
99 |
return false;
|
|
100 |
}
|
|
101 |
final String type = getStringValue(rootElement, "type");
|
|
102 |
if (!typologiesMapping.containsKey(type)) {
|
|
103 |
context.incrementCounter("filtered", "unknowntype_" + type, 1);
|
|
104 |
return false;
|
|
105 |
}
|
|
106 |
// fixes #4360 (test publisher)
|
|
107 |
final String publisher = getStringValue(rootElement, "publisher");
|
|
108 |
if (publisher.equalsIgnoreCase("Test accounts")) {
|
|
109 |
context.incrementCounter("filtered", "test_publisher", 1);
|
|
110 |
return false;
|
|
111 |
}
|
111 |
112 |
|
112 |
|
List<JsonObject> authors = getArrayObjects(rootElement, "authors");
|
113 |
|
boolean hasAuthor = false;
|
114 |
|
for(JsonObject author : authors){
|
115 |
|
final String given = getStringValue(author, "given");
|
116 |
|
final String family = getStringValue(author, "family");
|
117 |
|
final String fullname = getStringValue(author, "fullname");
|
118 |
|
if (StringUtils.isNotBlank(fullname) || (StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family))) {
|
119 |
|
hasAuthor = true;
|
120 |
|
}
|
121 |
|
// fixes #4368
|
122 |
|
if(StringUtils.isNotBlank(given) && given.equalsIgnoreCase("Addie") && StringUtils.isNotBlank(family) && family.equalsIgnoreCase("Jackson") && publisher.equalsIgnoreCase("Elsevier BV")){
|
123 |
|
context.incrementCounter("filtered","addieJackson", 1);
|
124 |
|
return false;
|
125 |
|
}
|
126 |
|
}
|
127 |
|
if(!hasAuthor){
|
128 |
|
context.incrementCounter("filtered","no_authors", 1);
|
129 |
|
return false;
|
130 |
|
}
|
131 |
|
// fixes #4360
|
132 |
|
if(getCleanedTitles(rootElement).isEmpty()){
|
133 |
|
context.incrementCounter("filtered","invalid_title", 1);
|
134 |
|
return false;
|
135 |
|
}
|
|
113 |
List<JsonObject> authors = getArrayObjects(rootElement, "authors");
|
|
114 |
boolean hasAuthors = false;
|
|
115 |
for (JsonObject author : authors) {
|
|
116 |
final String given = getStringValue(author, "given");
|
|
117 |
final String family = getStringValue(author, "family");
|
|
118 |
String fullname = getStringValue(author, "fullname");
|
|
119 |
if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
|
|
120 |
fullname = String.format("%s %s", given, family);
|
|
121 |
}
|
|
122 |
// fixes #4368
|
|
123 |
if (fullname.equalsIgnoreCase("Addie Jackson") && publisher.equalsIgnoreCase("Elsevier BV")) {
|
|
124 |
context.incrementCounter("invalid_author", "addiejackson", 1);
|
|
125 |
context.incrementCounter("filtered", "invalid_authors", 1);
|
|
126 |
return false;
|
|
127 |
}
|
|
128 |
if (isValidAuthorName(fullname, context)) hasAuthors = true;
|
|
129 |
}
|
136 |
130 |
|
137 |
|
return true;
|
138 |
|
}
|
|
131 |
if (!hasAuthors) {
|
|
132 |
context.incrementCounter("filtered", "invalid_authors", 1);
|
|
133 |
return false;
|
|
134 |
}
|
|
135 |
// fixes #4360
|
|
136 |
if (getCleanedTitles(rootElement).isEmpty()) {
|
|
137 |
context.incrementCounter("filtered", "invalid_title", 1);
|
|
138 |
return false;
|
|
139 |
}
|
139 |
140 |
|
140 |
|
private static List<String> getCleanedTitles(final JsonObject rootElement){
|
141 |
|
List<String> titles = getArrayValues(rootElement, "title");
|
142 |
|
return titles.stream().filter( t -> StringUtils.isNotBlank(t) && !t.equalsIgnoreCase("[NO TITLE AVAILABLE]")).collect(Collectors.toList());
|
143 |
|
}
|
|
141 |
return true;
|
|
142 |
}
|
144 |
143 |
|
|
144 |
private static List<String> getCleanedTitles(final JsonObject rootElement) {
|
|
145 |
List<String> titles = getArrayValues(rootElement, "title");
|
|
146 |
return titles.stream().filter(t -> StringUtils.isNotBlank(t) && !t.equalsIgnoreCase("[NO TITLE AVAILABLE]")).collect(Collectors.toList());
|
|
147 |
}
|
145 |
148 |
|
146 |
|
public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement, final ActionFactory factory, final String setName, final Agent agent, boolean invisible,
|
147 |
|
final boolean onlyOrganization, final Reporter context) {
|
|
149 |
private static boolean isValidAuthorName(final String fullName, final Reporter context) {
|
|
150 |
if (StringUtils.isBlank(fullName)) {
|
|
151 |
if(context != null) context.incrementCounter("invalid_author", "blank", 1);
|
|
152 |
return false;
|
|
153 |
}
|
|
154 |
// fixes #4391 and subtasks related to DOIBoost
|
|
155 |
switch (fullName) {
|
|
156 |
case ",":
|
|
157 |
case "none none":
|
|
158 |
case "none &na;":
|
|
159 |
case "(:null)":
|
|
160 |
case "&na; &na;": {
|
|
161 |
if(context != null) context.incrementCounter("invalid_author", "value_" + fullName, 1);
|
|
162 |
return false;
|
|
163 |
}
|
|
164 |
}
|
|
165 |
return true;
|
|
166 |
}
|
148 |
167 |
|
149 |
|
if(!isValid(rootElement, context)) return null;
|
|
168 |
public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement,
|
|
169 |
final ActionFactory factory,
|
|
170 |
final String setName,
|
|
171 |
final Agent agent,
|
|
172 |
boolean invisible,
|
|
173 |
final boolean onlyOrganization,
|
|
174 |
final Reporter context) {
|
150 |
175 |
|
151 |
|
//Create OAF Proto
|
|
176 |
if (!isValid(rootElement, context)) return null;
|
152 |
177 |
|
153 |
|
final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
154 |
|
//Add Data Info
|
155 |
|
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
156 |
|
.setInvisible(invisible)
|
157 |
|
.setDeletedbyinference(false)
|
158 |
|
.setInferred(false)
|
159 |
|
.setTrust("0.9")
|
160 |
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
161 |
|
.build());
|
|
178 |
//Create OAF Proto
|
162 |
179 |
|
163 |
|
//Adding Kind
|
164 |
|
oaf.setKind(KindProtos.Kind.entity);
|
|
180 |
final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
|
181 |
//Add Data Info
|
|
182 |
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
|
183 |
.setInvisible(invisible)
|
|
184 |
.setDeletedbyinference(false)
|
|
185 |
.setInferred(false)
|
|
186 |
.setTrust("0.9")
|
|
187 |
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
|
188 |
.build());
|
165 |
189 |
|
166 |
|
//creating Result Proto
|
167 |
|
final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
|
|
190 |
//Adding Kind
|
|
191 |
oaf.setKind(KindProtos.Kind.entity);
|
168 |
192 |
|
169 |
|
entity.setDateofcollection("2019-02-15");
|
|
193 |
//creating Result Proto
|
|
194 |
final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
|
170 |
195 |
|
|
196 |
entity.setDateofcollection("2019-02-15");
|
171 |
197 |
|
|
198 |
if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()) {
|
|
199 |
StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
|
|
200 |
.map(JsonElement::getAsString)
|
|
201 |
.forEach(cf -> {
|
|
202 |
final String id = datasources.get(cf.toLowerCase()).getValue();
|
|
203 |
final String name = datasources.get(cf.toLowerCase()).getKey();
|
|
204 |
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
|
205 |
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
|
206 |
.setValue(name)
|
|
207 |
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
|
|
208 |
.build();
|
|
209 |
entity.addCollectedfrom(collectedFrom);
|
|
210 |
}
|
|
211 |
}
|
|
212 |
);
|
|
213 |
}
|
|
214 |
//Adding identifier
|
|
215 |
final String doi = getStringValue(rootElement, "doi");
|
|
216 |
entity.addOriginalId(doi);
|
172 |
217 |
|
173 |
|
if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
|
174 |
|
StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
|
175 |
|
.map(JsonElement::getAsString)
|
176 |
|
.forEach(cf -> {
|
177 |
|
final String id = datasources.get(cf.toLowerCase()).getValue();
|
178 |
|
final String name = datasources.get(cf.toLowerCase()).getKey();
|
179 |
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
180 |
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
181 |
|
.setValue(name)
|
182 |
|
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
|
183 |
|
.build();
|
184 |
|
entity.addCollectedfrom(collectedFrom);
|
185 |
|
}
|
186 |
|
}
|
187 |
|
);
|
188 |
|
}
|
189 |
|
//Adding identifier
|
190 |
|
final String doi = getStringValue(rootElement, "doi");
|
191 |
|
entity.addOriginalId(doi);
|
|
218 |
final String sourceId = String.format("50|%s" + SEPARATOR + "%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
|
|
219 |
entity.setId(sourceId);
|
192 |
220 |
|
193 |
|
final String sourceId = String.format("50|%s" + SEPARATOR + "%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
|
194 |
|
entity.setId(sourceId);
|
|
221 |
entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
|
|
222 |
.setValue(doi)
|
|
223 |
.setQualifier(getQualifier("doi", PID_TYPES))
|
|
224 |
.build());
|
195 |
225 |
|
196 |
|
entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
|
197 |
|
.setValue(doi)
|
198 |
|
.setQualifier(getQualifier("doi", PID_TYPES))
|
199 |
|
.build());
|
|
226 |
//Create Result Field
|
|
227 |
ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();
|
200 |
228 |
|
|
229 |
final String type = getStringValue(rootElement, "type");
|
201 |
230 |
|
202 |
|
//Create Result Field
|
203 |
|
ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();
|
|
231 |
//Adding Instances
|
|
232 |
final String typeValue = typologiesMapping.get(type).get("value");
|
|
233 |
final String cobjValue = typologiesMapping.get(type).get("cobj");
|
204 |
234 |
|
205 |
|
final String type = getStringValue(rootElement,"type");
|
|
235 |
// TODO: workaround for #4362: remove it when UnpayWall is correctly mapped
|
|
236 |
List<JsonObject> unpaywallLicenses = getArrayObjects(rootElement, "license").stream().filter(prov -> {
|
|
237 |
String provS = getStringValue(prov, "provenance");
|
|
238 |
if (StringUtils.isNotBlank(provS) && provS.equalsIgnoreCase(UNPAYWALL)) return true;
|
|
239 |
else return false;
|
|
240 |
}).collect(Collectors.toList());
|
206 |
241 |
|
207 |
|
//Adding Instances
|
208 |
|
final String typeValue = typologiesMapping.get(type).get("value");
|
209 |
|
final String cobjValue = typologiesMapping.get(type).get("cobj");
|
|
242 |
Stream.concat(unpaywallLicenses.stream(), getArrayObjects(rootElement, "instances").stream()).map(it ->
|
|
243 |
{
|
|
244 |
ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
|
|
245 |
instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
|
|
246 |
.setClassid(cobjValue)
|
|
247 |
.setClassname(typeValue)
|
|
248 |
.setSchemeid("dnet:publication_resource")
|
|
249 |
.setSchemename("dnet:publication_resource")
|
|
250 |
.build());
|
|
251 |
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
|
252 |
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
|
|
253 |
.setValue("Unknown Repository")
|
|
254 |
.build());
|
210 |
255 |
|
211 |
|
// TODO: workaround for #4362: remove it when UnpayWall is correctly mapped
|
212 |
|
List<JsonObject> unpaywallLicenses = getArrayObjects(rootElement, "license").stream().filter(prov -> {
|
213 |
|
String provS = getStringValue(prov,"provenance");
|
214 |
|
if(StringUtils.isNotBlank(provS) && provS.equalsIgnoreCase(UNPAYWALL)) return true;
|
215 |
|
else return false;
|
216 |
|
}).collect(Collectors.toList());
|
|
256 |
final String acc_class_id = it.get("access-rights").getAsString();
|
|
257 |
String acc_class_value;
|
|
258 |
switch (acc_class_id) {
|
|
259 |
case "OPEN": {
|
|
260 |
acc_class_value = "open access";
|
|
261 |
break;
|
|
262 |
}
|
|
263 |
case "CLOSED": {
|
|
264 |
acc_class_value = "closed access";
|
|
265 |
break;
|
|
266 |
}
|
|
267 |
default: {
|
|
268 |
acc_class_value = "not available";
|
|
269 |
}
|
217 |
270 |
|
218 |
|
Stream.concat(unpaywallLicenses.stream(), getArrayObjects(rootElement, "instances").stream()).map(it ->
|
219 |
|
{
|
220 |
|
ResultProtos.Result.Instance.Builder instance= ResultProtos.Result.Instance.newBuilder();
|
221 |
|
instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
|
222 |
|
.setClassid(cobjValue)
|
223 |
|
.setClassname(typeValue)
|
224 |
|
.setSchemeid("dnet:publication_resource")
|
225 |
|
.setSchemename("dnet:publication_resource")
|
226 |
|
.build());
|
227 |
|
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
228 |
|
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
|
229 |
|
.setValue("Unknown Repository")
|
230 |
|
.build());
|
|
271 |
}
|
231 |
272 |
|
232 |
|
final String acc_class_id =it.get("access-rights").getAsString();
|
233 |
|
String acc_class_value;
|
234 |
|
switch (acc_class_id){
|
235 |
|
case "OPEN": {
|
236 |
|
acc_class_value = "open access";
|
237 |
|
break;
|
238 |
|
}
|
239 |
|
case "CLOSED": {
|
240 |
|
acc_class_value = "closed access";
|
241 |
|
break;
|
242 |
|
}
|
243 |
|
default: {
|
244 |
|
acc_class_value = "not available";
|
245 |
|
}
|
|
273 |
instance.addUrl(it.get("url").getAsString());
|
|
274 |
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
|
275 |
.setClassid(acc_class_id)
|
|
276 |
.setClassname(acc_class_value)
|
|
277 |
.setSchemeid("dnet:access_modes")
|
|
278 |
.setSchemename("dnet:access_modes")
|
|
279 |
.build());
|
246 |
280 |
|
247 |
|
}
|
|
281 |
final String id = datasources.get(it.get("provenance").getAsString().toLowerCase()).getValue();
|
|
282 |
final String name = datasources.get(it.get("provenance").getAsString().toLowerCase()).getKey();
|
|
283 |
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
|
284 |
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
|
285 |
.setValue(name)
|
|
286 |
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
|
|
287 |
.build();
|
248 |
288 |
|
249 |
|
instance.addUrl(it.get("url").getAsString());
|
250 |
|
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
251 |
|
.setClassid(acc_class_id)
|
252 |
|
.setClassname(acc_class_value)
|
253 |
|
.setSchemeid("dnet:access_modes")
|
254 |
|
.setSchemename("dnet:access_modes")
|
255 |
|
.build());
|
|
289 |
instance.setCollectedfrom(collectedFrom);
|
|
290 |
}
|
256 |
291 |
|
257 |
|
final String id =datasources.get(it.get("provenance").getAsString().toLowerCase()).getValue();
|
258 |
|
final String name =datasources.get(it.get("provenance").getAsString().toLowerCase()).getKey();
|
259 |
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
260 |
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
261 |
|
.setValue(name)
|
262 |
|
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
|
263 |
|
.build();
|
|
292 |
return instance.build();
|
|
293 |
}).forEach(result::addInstance);
|
264 |
294 |
|
265 |
|
instance.setCollectedfrom(collectedFrom);
|
266 |
|
}
|
|
295 |
//Adding DOI URL as Instance
|
|
296 |
final String doiURL = getStringValue(rootElement, "doi-url");
|
|
297 |
JsonObject hostedByOpenAire = null;
|
|
298 |
if (rootElement.has("hostedByOpenAire")) {
|
|
299 |
hostedByOpenAire = rootElement.getAsJsonObject("hostedByOpenAire");
|
|
300 |
}
|
267 |
301 |
|
268 |
|
return instance.build();
|
269 |
|
}).forEach(result::addInstance);
|
|
302 |
if (StringUtils.isNotBlank(doiURL)) {
|
|
303 |
final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
|
|
304 |
instance.addUrl(doiURL);
|
|
305 |
instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
|
|
306 |
.setClassid(cobjValue)
|
|
307 |
.setClassname(typeValue)
|
|
308 |
.setSchemeid("dnet:publication_resource")
|
|
309 |
.setSchemename("dnet:publication_resource")
|
|
310 |
.build());
|
|
311 |
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
|
312 |
.setClassid("CLOSED")
|
|
313 |
.setClassname("Closed Access")
|
|
314 |
.setSchemeid("dnet:access_modes")
|
|
315 |
.setSchemename("dnet:access_modes")
|
|
316 |
.build());
|
|
317 |
instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
|
318 |
.setValue(CROSSREF)
|
|
319 |
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5("crossref"))
|
|
320 |
.build());
|
270 |
321 |
|
271 |
|
//Adding DOI URL as Instance
|
272 |
|
final String doiURL = getStringValue(rootElement, "doi-url");
|
273 |
|
JsonObject hostedByOpenAire = null;
|
274 |
|
if (rootElement.has("hostedByOpenAire")) {
|
275 |
|
hostedByOpenAire = rootElement.getAsJsonObject("hostedByOpenAire");
|
276 |
|
}
|
|
322 |
if (hostedByOpenAire == null)
|
|
323 |
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
|
324 |
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
|
|
325 |
.setValue("Unknown Repository")
|
|
326 |
.build());
|
|
327 |
else {
|
|
328 |
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
|
329 |
.setKey(AbstractDNetXsltFunctions.oafSplitId("datasource", hostedByOpenAire.get("id").getAsString()))
|
|
330 |
.setValue(hostedByOpenAire.get("name").getAsString())
|
|
331 |
.build());
|
|
332 |
}
|
277 |
333 |
|
278 |
|
if (StringUtils.isNotBlank(doiURL)) {
|
279 |
|
final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
|
280 |
|
instance.addUrl(doiURL);
|
281 |
|
instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
|
282 |
|
.setClassid(cobjValue)
|
283 |
|
.setClassname(typeValue)
|
284 |
|
.setSchemeid("dnet:publication_resource")
|
285 |
|
.setSchemename("dnet:publication_resource")
|
286 |
|
.build());
|
287 |
|
instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
|
288 |
|
.setClassid("CLOSED")
|
289 |
|
.setClassname("Closed Access")
|
290 |
|
.setSchemeid("dnet:access_modes")
|
291 |
|
.setSchemename("dnet:access_modes")
|
292 |
|
.build());
|
293 |
|
instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
294 |
|
.setValue(CROSSREF)
|
295 |
|
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5("crossref"))
|
296 |
|
.build());
|
|
334 |
result.addInstance(instance);
|
|
335 |
}
|
297 |
336 |
|
298 |
|
if (hostedByOpenAire == null)
|
299 |
|
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
300 |
|
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
|
301 |
|
.setValue("Unknown Repository")
|
302 |
|
.build());
|
303 |
|
else{
|
304 |
|
instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
|
305 |
|
.setKey(AbstractDNetXsltFunctions.oafSplitId("datasource",hostedByOpenAire.get("id").getAsString()))
|
306 |
|
.setValue(hostedByOpenAire.get("name").getAsString())
|
307 |
|
.build());
|
308 |
|
}
|
|
337 |
//Create Metadata Proto
|
|
338 |
final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
|
309 |
339 |
|
310 |
|
result.addInstance(instance);
|
311 |
|
}
|
|
340 |
Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);
|
312 |
341 |
|
313 |
|
//Create Metadata Proto
|
314 |
|
final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
|
|
342 |
if (authorsOrganizations.getKey().size() > 0) {
|
|
343 |
metadata.addAllAuthor(authorsOrganizations.getKey());
|
|
344 |
} else {
|
|
345 |
//Should never enter here becasue of the isValid method at the beginning.
|
|
346 |
context.incrementCounter("filtered", "unexpected_no_authors", 1);
|
|
347 |
return null;
|
|
348 |
}
|
|
349 |
//adding Language
|
|
350 |
metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
|
|
351 |
.setClassid("und")
|
|
352 |
.setClassname("Undetermined")
|
|
353 |
.setSchemeid("dent:languages")
|
|
354 |
.setSchemename("dent:languages")
|
|
355 |
.build());
|
315 |
356 |
|
|
357 |
//Adding subjects
|
|
358 |
List<String> subjects = getArrayValues(rootElement, "subject");
|
316 |
359 |
|
317 |
|
Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);
|
|
360 |
subjects.forEach(s -> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
|
|
361 |
.setValue(s)
|
|
362 |
.setQualifier(getQualifier("keyword", "dnet:subject"))
|
|
363 |
.build()));
|
318 |
364 |
|
|
365 |
List<String> titles = getCleanedTitles(rootElement);
|
|
366 |
titles.forEach(t ->
|
|
367 |
metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
|
|
368 |
.setValue(t)
|
|
369 |
.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
|
|
370 |
.build()));
|
319 |
371 |
|
|
372 |
settingRelevantDate(rootElement, metadata, "issued", "issued", true);
|
|
373 |
settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
|
|
374 |
settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
|
|
375 |
settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
|
320 |
376 |
|
321 |
|
if (authorsOrganizations.getKey().size() > 0) {
|
322 |
|
metadata.addAllAuthor(authorsOrganizations.getKey());
|
323 |
|
}
|
324 |
|
else {
|
325 |
|
//Should never enter here becasue of the isValid method at the beginning.
|
326 |
|
context.incrementCounter("filtered","unexpected_no_authors", 1);
|
327 |
|
return null;
|
328 |
|
}
|
329 |
|
//adding Language
|
330 |
|
metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
|
331 |
|
.setClassid("und")
|
332 |
|
.setClassname("Undetermined")
|
333 |
|
.setSchemeid("dent:languages")
|
334 |
|
.setSchemename("dent:languages")
|
335 |
|
.build());
|
|
377 |
getArrayObjects(rootElement, "abstract").forEach(d ->
|
|
378 |
{
|
|
379 |
if (MAG.equals(d.get("provenance").getAsString()))
|
|
380 |
metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(decompressAbstract(d.get("value").getAsString())).build());
|
|
381 |
else
|
|
382 |
metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build());
|
|
383 |
}
|
|
384 |
);
|
336 |
385 |
|
337 |
|
//Adding subjects
|
338 |
|
List<String> subjects =getArrayValues(rootElement, "subject");
|
|
386 |
//Adding Journal
|
|
387 |
final String publisher = getStringValue(rootElement, "publisher");
|
|
388 |
if (StringUtils.isNotBlank(publisher)) {
|
339 |
389 |
|
340 |
|
subjects.forEach(s-> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
|
341 |
|
.setValue(s)
|
342 |
|
.setQualifier(getQualifier("keyword", "dnet:subject"))
|
343 |
|
.build()));
|
|
390 |
final ResultProtos.Result.Journal.Builder journal = ResultProtos.Result.Journal.newBuilder().setName(publisher);
|
344 |
391 |
|
345 |
|
List<String> titles = getCleanedTitles(rootElement);
|
346 |
|
titles.forEach(t->
|
347 |
|
metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
|
348 |
|
.setValue(t)
|
349 |
|
.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
|
350 |
|
.build()));
|
|
392 |
if (hasJSONArrayField(rootElement, "issn")) {
|
|
393 |
StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
|
|
394 |
.map(JsonElement::getAsJsonObject)
|
|
395 |
.forEach(it -> {
|
|
396 |
final String issntype = getStringValue(it, "type");
|
|
397 |
final String value = getStringValue(it, "value");
|
|
398 |
if ("electronic".equals(issntype)) {
|
|
399 |
journal.setIssnOnline(value);
|
|
400 |
}
|
|
401 |
if ("print".equals(issntype))
|
|
402 |
journal.setIssnPrinted(value);
|
|
403 |
});
|
|
404 |
}
|
|
405 |
metadata.setJournal(journal.build());
|
|
406 |
}
|
|
407 |
metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
|
|
408 |
result.setMetadata(metadata.build());
|
|
409 |
entity.setResult(result.build());
|
|
410 |
oaf.setEntity(entity.build());
|
351 |
411 |
|
352 |
|
settingRelevantDate(rootElement, metadata, "issued", "issued", true);
|
353 |
|
settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
|
354 |
|
settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
|
355 |
|
settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
|
|
412 |
//System.out.println(JsonFormat.printToString(oaf.build()));
|
356 |
413 |
|
|
414 |
final List<AtomicAction> actionList = new ArrayList<>();
|
357 |
415 |
|
358 |
|
getArrayObjects(rootElement, "abstract").forEach(d ->
|
359 |
|
{
|
360 |
|
if (MAG.equals(d.get("provenance").getAsString()))
|
361 |
|
metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(decompressAbstract(d.get("value").getAsString())).build());
|
362 |
|
else
|
363 |
|
metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build());
|
364 |
|
}
|
365 |
|
);
|
|
416 |
if (!onlyOrganization)
|
|
417 |
actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
|
366 |
418 |
|
|
419 |
if (!authorsOrganizations.getValue().isEmpty()) {
|
367 |
420 |
|
|
421 |
authorsOrganizations.getValue().forEach(o ->
|
|
422 |
{
|
368 |
423 |
|
369 |
|
//Adding Journal
|
370 |
|
final String publisher = getStringValue(rootElement,"publisher");
|
371 |
|
if (StringUtils.isNotBlank(publisher)){
|
|
424 |
actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
|
|
425 |
if (!onlyOrganization)
|
|
426 |
actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
|
|
427 |
final String gridOrganization = getSimilarGridOrganization(o.getEntity());
|
|
428 |
if (gridOrganization != null) {
|
|
429 |
actionList.add(factory
|
|
430 |
.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization,
|
|
431 |
"".getBytes()));
|
|
432 |
actionList.add(factory
|
|
433 |
.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(),
|
|
434 |
"".getBytes()));
|
|
435 |
}
|
|
436 |
});
|
|
437 |
}
|
|
438 |
return actionList;
|
372 |
439 |
|
373 |
|
final ResultProtos.Result.Journal.Builder journal = ResultProtos.Result.Journal.newBuilder().setName(publisher);
|
|
440 |
}
|
374 |
441 |
|
375 |
|
if (hasJSONArrayField(rootElement,"issn" )){
|
376 |
|
StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
|
377 |
|
.map(JsonElement::getAsJsonObject)
|
378 |
|
.forEach(it -> {
|
379 |
|
final String issntype = getStringValue(it, "type");
|
380 |
|
final String value = getStringValue(it, "value");
|
381 |
|
if("electronic".equals(issntype)){
|
382 |
|
journal.setIssnOnline(value);
|
383 |
|
}
|
384 |
|
if ("print".equals(issntype))
|
385 |
|
journal.setIssnPrinted(value);
|
386 |
|
});
|
387 |
|
}
|
388 |
|
metadata.setJournal(journal.build());
|
389 |
|
}
|
390 |
|
metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
|
391 |
|
result.setMetadata(metadata.build());
|
392 |
|
entity.setResult(result.build());
|
393 |
|
oaf.setEntity(entity.build());
|
|
442 |
private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {
|
394 |
443 |
|
395 |
|
//System.out.println(JsonFormat.printToString(oaf.build()));
|
|
444 |
final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
|
|
445 |
if (pidList != null) {
|
|
446 |
for (FieldTypeProtos.StructuredProperty p : pidList) {
|
|
447 |
if (p.getQualifier().getClassname().equals("grid")) {
|
|
448 |
return "20|grid________" + SEPARATOR + AbstractDNetXsltFunctions.md5(p.getValue());
|
|
449 |
}
|
|
450 |
}
|
|
451 |
}
|
|
452 |
return null;
|
396 |
453 |
|
397 |
|
final List<AtomicAction> actionList = new ArrayList<>();
|
|
454 |
}
|
398 |
455 |
|
399 |
|
if (!onlyOrganization)
|
400 |
|
actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
|
|
456 |
private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication,
|
|
457 |
final OafProtos.Oaf organization,
|
|
458 |
final ActionFactory factory,
|
|
459 |
final String setName,
|
|
460 |
final Agent agent) {
|
401 |
461 |
|
402 |
|
if (!authorsOrganizations.getValue().isEmpty()) {
|
|
462 |
List<AtomicAction> result = new ArrayList<>();
|
403 |
463 |
|
404 |
|
authorsOrganizations.getValue().forEach(o ->
|
405 |
|
{
|
|
464 |
final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
|
|
465 |
roaf.setKind(KindProtos.Kind.relation);
|
406 |
466 |
|
407 |
|
actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
|
408 |
|
if (!onlyOrganization)
|
409 |
|
actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
|
410 |
|
final String gridOrganization = getSimilarGridOrganization(o.getEntity());
|
411 |
|
if (gridOrganization!= null) {
|
412 |
|
actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization, "".getBytes()));
|
413 |
|
actionList.add(factory.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(), "".getBytes()));
|
414 |
|
}
|
415 |
|
});
|
416 |
|
}
|
417 |
|
return actionList;
|
|
467 |
roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
|
468 |
.setInvisible(false)
|
|
469 |
.setDeletedbyinference(false)
|
|
470 |
.setInferred(false)
|
|
471 |
.setTrust("0.9")
|
|
472 |
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
|
473 |
.build());
|
418 |
474 |
|
419 |
|
}
|
|
475 |
final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();
|
420 |
476 |
|
|
477 |
rel.setRelType(RelTypeProtos.RelType.resultOrganization);
|
|
478 |
rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);
|
421 |
479 |
|
422 |
|
private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {
|
|
480 |
//Create a relation Result --> Organization
|
|
481 |
rel.setSource(publication.getEntity().getId());
|
|
482 |
rel.setTarget(organization.getEntity().getId());
|
|
483 |
rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());
|
423 |
484 |
|
424 |
|
final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
|
425 |
|
if (pidList!= null ) {
|
426 |
|
for (FieldTypeProtos.StructuredProperty p: pidList) {
|
427 |
|
if (p.getQualifier().getClassname().equals("grid")){
|
428 |
|
return "20|grid________" + SEPARATOR +AbstractDNetXsltFunctions.md5(p.getValue());
|
429 |
|
}
|
430 |
|
}
|
431 |
|
}
|
432 |
|
return null;
|
|
485 |
final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();
|
433 |
486 |
|
434 |
|
}
|
|
487 |
final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
|
|
488 |
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
|
489 |
.setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
|
|
490 |
.build());
|
|
491 |
rel_instance.setAffiliation(affiliationRel.build());
|
|
492 |
rel.setResultOrganization(rel_instance.build());
|
435 |
493 |
|
436 |
|
private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication, final OafProtos.Oaf organization, final ActionFactory factory, final String setName, final Agent agent) {
|
|
494 |
rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
|
495 |
.setValue(datasources.get(MAG.toLowerCase()).getKey())
|
|
496 |
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions
|
|
497 |
.md5(StringUtils.substringAfter(datasources.get(MAG.toLowerCase()).getValue(), SEPARATOR)))
|
|
498 |
.build());
|
437 |
499 |
|
438 |
|
List<AtomicAction> result = new ArrayList<>();
|
|
500 |
rel.setChild(false);
|
|
501 |
roaf.setRel(rel.build());
|
439 |
502 |
|
440 |
|
final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
|
441 |
|
roaf.setKind(KindProtos.Kind.relation);
|
|
503 |
result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution",
|
|
504 |
organization.getEntity().getId(), roaf.build().toByteArray()));
|
442 |
505 |
|
443 |
|
roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
444 |
|
.setInvisible(false)
|
445 |
|
.setDeletedbyinference(false)
|
446 |
|
.setInferred(false)
|
447 |
|
.setTrust("0.9")
|
448 |
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
449 |
|
.build());
|
|
506 |
//Create a relation Organization --> Result
|
|
507 |
rel.setTarget(publication.getEntity().getId());
|
|
508 |
rel.setSource(organization.getEntity().getId());
|
|
509 |
rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());
|
450 |
510 |
|
|
511 |
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
|
512 |
.setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
|
|
513 |
.build());
|
|
514 |
rel_instance.setAffiliation(affiliationRel.build());
|
|
515 |
rel.setResultOrganization(rel_instance.build());
|
|
516 |
roaf.setRel(rel.build());
|
|
517 |
result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf",
|
|
518 |
publication.getEntity().getId(), roaf.build().toByteArray()));
|
451 |
519 |
|
452 |
|
final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();
|
|
520 |
return result;
|
453 |
521 |
|
454 |
|
rel.setRelType(RelTypeProtos.RelType.resultOrganization);
|
455 |
|
rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);
|
|
522 |
}
|
456 |
523 |
|
457 |
|
//Create a relation Result --> Organization
|
458 |
|
rel.setSource(publication.getEntity().getId());
|
459 |
|
rel.setTarget(organization.getEntity().getId());
|
460 |
|
rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());
|
|
524 |
private static boolean hasJSONArrayField(final JsonObject root, final String key) {
|
|
525 |
return root.has(key) && root.get(key).isJsonArray();
|
|
526 |
}
|
461 |
527 |
|
462 |
|
final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();
|
|
528 |
private static void settingRelevantDate(JsonObject rootElement,
|
|
529 |
ResultProtos.Result.Metadata.Builder metadata,
|
|
530 |
final String jsonKey,
|
|
531 |
final String dictionaryKey,
|
|
532 |
final boolean addToDateOfAcceptance) {
|
|
533 |
//Adding date
|
|
534 |
String date = getStringValue(rootElement, jsonKey);
|
|
535 |
if (date == null)
|
|
536 |
return;
|
|
537 |
if (date.length() == 4) {
|
|
538 |
date += "-01-01";
|
|
539 |
}
|
|
540 |
if (isValidDate(date)) {
|
|
541 |
if (addToDateOfAcceptance)
|
|
542 |
metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
|
|
543 |
metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
|
|
544 |
.setValue(date)
|
|
545 |
.setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date"))
|
|
546 |
.build());
|
|
547 |
}
|
|
548 |
}
|
463 |
549 |
|
464 |
|
final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
|
465 |
|
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
466 |
|
.setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
|
467 |
|
.build());
|
468 |
|
rel_instance.setAffiliation(affiliationRel.build());
|
469 |
|
rel.setResultOrganization(rel_instance.build());
|
|
550 |
public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
|
|
551 |
FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
|
|
552 |
if (StringUtils.contains(value, "orcid.org")) {
|
|
553 |
return pid.setValue(value.replaceAll("https://orcid.org/", ""))
|
|
554 |
.setKey(ORCID).build();
|
|
555 |
}
|
|
556 |
if (StringUtils.contains(value, "academic.microsoft.com/#/detail")) {
|
|
557 |
return pid.setValue(value.replaceAll("https://academic.microsoft.com/#/detail/", ""))
|
|
558 |
.setKey("MAG Identifier").build();
|
|
559 |
}
|
|
560 |
return pid.setValue(value)
|
|
561 |
.setKey("URL").build();
|
|
562 |
}
|
470 |
563 |
|
471 |
|
rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
|
472 |
|
.setValue(datasources.get(MAG.toLowerCase()).getKey())
|
473 |
|
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(datasources.get(MAG.toLowerCase()).getValue(), SEPARATOR)))
|
474 |
|
.build());
|
|
564 |
public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
|
|
565 |
final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
|
|
566 |
final List<String> magId = new ArrayList<>();
|
|
567 |
getArrayObjects(affiliation, "identifiers").forEach(it -> {
|
|
568 |
if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
|
|
569 |
affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(MAG));
|
|
570 |
magId.add(it.get("value").getAsString());
|
|
571 |
} else
|
|
572 |
affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
|
|
573 |
});
|
|
574 |
if (magId.size() > 0) {
|
|
575 |
final String microsoftID = magId.get(0);
|
|
576 |
OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
|
577 |
oaf.setKind(KindProtos.Kind.entity);
|
|
578 |
OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
|
|
579 |
entity.setType(TypeProtos.Type.organization);
|
|
580 |
entity.setId("20|microsoft___" + SEPARATOR + AbstractDNetXsltFunctions.md5(microsoftID));
|
|
581 |
final String id = datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getValue();
|
|
582 |
final String name = datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getKey();
|
|
583 |
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
|
584 |
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
|
585 |
.setValue(name)
|
|
586 |
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
|
|
587 |
.build();
|
|
588 |
entity.addCollectedfrom(collectedFrom);
|
|
589 |
} else {
|
|
590 |
return null;
|
|
591 |
}
|
|
592 |
entity.addOriginalId(microsoftID);
|
475 |
593 |
|
|
594 |
affiliationIdentifiers.forEach((key, value) -> entity.addPid(
|
|
595 |
FieldTypeProtos.StructuredProperty.newBuilder()
|
|
596 |
.setQualifier(value)
|
|
597 |
.setValue(key)
|
|
598 |
.build()));
|
476 |
599 |
|
|
600 |
final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
|
|
601 |
organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
|
|
602 |
.setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
|
|
603 |
.setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
|
|
604 |
.build());
|
477 |
605 |
|
478 |
|
rel.setChild(false);
|
479 |
|
roaf.setRel(rel.build());
|
|
606 |
entity.setOrganization(organization);
|
|
607 |
oaf.setEntity(entity);
|
|
608 |
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
|
609 |
.setInvisible(false)
|
|
610 |
.setDeletedbyinference(false)
|
|
611 |
.setInferred(false)
|
|
612 |
.setTrust("0.9")
|
|
613 |
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
|
614 |
.build());
|
|
615 |
return oaf.build();
|
|
616 |
}
|
|
617 |
return null;
|
|
618 |
}
|
480 |
619 |
|
481 |
|
result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution", organization.getEntity().getId(), roaf.build().toByteArray() ));
|
|
620 |
public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> createAuthorsOrganization(final JsonObject root) {
|
482 |
621 |
|
|
622 |
final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();
|
483 |
623 |
|
484 |
|
//Create a relation Organization --> Result
|
485 |
|
rel.setTarget(publication.getEntity().getId());
|
486 |
|
rel.setSource(organization.getEntity().getId());
|
487 |
|
rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());
|
|
624 |
List<JsonObject> authors = getArrayObjects(root, "authors");
|
488 |
625 |
|
|
626 |
final AtomicInteger counter = new AtomicInteger(1);
|
489 |
627 |
|
490 |
|
affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
|
491 |
|
.setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
|
492 |
|
.build());
|
493 |
|
rel_instance.setAffiliation(affiliationRel.build());
|
494 |
|
rel.setResultOrganization(rel_instance.build());
|
495 |
|
roaf.setRel(rel.build());
|
496 |
|
result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf", publication.getEntity().getId(), roaf.build().toByteArray()));
|
|
628 |
List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
|
|
629 |
final String given = getStringValue(author, "given");
|
|
630 |
final String family = getStringValue(author, "family");
|
|
631 |
String fullname = getStringValue(author, "fullname");
|
497 |
632 |
|
498 |
|
return result;
|
|
633 |
if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
|
|
634 |
fullname = String.format("%s %s", given, family);
|
|
635 |
}
|
499 |
636 |
|
500 |
|
}
|
|
637 |
if (!isValidAuthorName(fullname, null)) {
|
|
638 |
return null;
|
|
639 |
}
|
|
640 |
final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();
|
501 |
641 |
|
502 |
|
private static boolean hasJSONArrayField(final JsonObject root, final String key) {
|
503 |
|
return root.has(key) && root.get(key).isJsonArray();
|
504 |
|
}
|
|
642 |
if (StringUtils.isNotBlank(given))
|
|
643 |
abuilder.setName(given);
|
|
644 |
if (StringUtils.isNotBlank(family))
|
|
645 |
abuilder.setSurname(family);
|
|
646 |
if (StringUtils.isNotBlank(fullname))
|
|
647 |
abuilder.setFullname(fullname);
|
505 |
648 |
|
506 |
|
private static void settingRelevantDate(JsonObject rootElement, ResultProtos.Result.Metadata.Builder metadata , final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) {
|
507 |
|
//Adding date
|
508 |
|
String date = getStringValue(rootElement,jsonKey);
|
509 |
|
if (date == null)
|
510 |
|
return;
|
511 |
|
if (date.length() == 4) {
|
512 |
|
date += "-01-01";
|
513 |
|
}
|
514 |
|
if (isValidDate(date)) {
|
515 |
|
if (addToDateOfAcceptance)
|
516 |
|
metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
|
517 |
|
metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
|
518 |
|
.setValue(date)
|
519 |
|
.setQualifier(getQualifier(dictionaryKey,"dnet:dataCite_date"))
|
520 |
|
.build());
|
521 |
|
}
|
522 |
|
}
|
|
649 |
final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
|
|
650 |
final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");
|
523 |
651 |
|
|
652 |
authorAffiliation.forEach(it ->
|
|
653 |
{
|
|
654 |
OafProtos.Oaf org = createOrganizationFromJSON(it);
|
|
655 |
if (org != null) {
|
|
656 |
affiliations.put(org.getEntity().getId(), org);
|
|
657 |
abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
|
|
658 |
}
|
|
659 |
});
|
|
660 |
identifiers.stream().map(id -> {
|
|
661 |
final String value = id.get("value").getAsString();
|
|
662 |
return extractIdentifier(value);
|
|
663 |
}).collect(
|
|
664 |
Collectors.toMap(
|
|
665 |
FieldTypeProtos.KeyValue::getKey,
|
|
666 |
Function.identity(),
|
|
667 |
(a, b) -> a
|
|
668 |
)).values().forEach(abuilder::addPid);
|
|
669 |
abuilder.setRank(counter.getAndIncrement());
|
524 |
670 |
|
525 |
|
public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
|
526 |
|
FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
|
527 |
|
if (StringUtils.contains(value, "orcid.org")){
|
528 |
|
return pid.setValue(value.replaceAll("https://orcid.org/",""))
|
529 |
|
.setKey(ORCID).build();
|
530 |
|
}
|
531 |
|
if (StringUtils.contains(value, "academic.microsoft.com/#/detail")){
|
532 |
|
return pid.setValue(value.replaceAll("https://academic.microsoft.com/#/detail/",""))
|
533 |
|
.setKey("MAG Identifier").build();
|
534 |
|
}
|
535 |
|
return pid.setValue(value)
|
536 |
|
.setKey("URL").build();
|
537 |
|
}
|
|
671 |
return abuilder.build();
|
538 |
672 |
|
|
673 |
}).filter(Objects::nonNull).collect(Collectors.toList());
|
539 |
674 |
|
540 |
|
public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
|
541 |
|
final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
|
542 |
|
final List<String> magId = new ArrayList<>();
|
543 |
|
getArrayObjects(affiliation, "identifiers").forEach(it -> {
|
544 |
|
if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
|
545 |
|
affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(MAG));
|
546 |
|
magId.add(it.get("value").getAsString());
|
547 |
|
}
|
548 |
|
else
|
549 |
|
affiliationIdentifiers.put( it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
|
550 |
|
});
|
551 |
|
if (magId.size() > 0) {
|
552 |
|
final String microsoftID = magId.get(0);
|
553 |
|
OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
|
554 |
|
oaf.setKind(KindProtos.Kind.entity);
|
555 |
|
OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
|
556 |
|
entity.setType(TypeProtos.Type.organization);
|
557 |
|
entity.setId("20|microsoft___" + SEPARATOR +AbstractDNetXsltFunctions.md5(microsoftID));
|
558 |
|
final String id =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getValue();
|
559 |
|
final String name =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getKey();
|
560 |
|
if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
|
561 |
|
final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
|
562 |
|
.setValue(name)
|
563 |
|
.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
|
564 |
|
.build();
|
565 |
|
entity.addCollectedfrom(collectedFrom);
|
566 |
|
} else {
|
567 |
|
return null;
|
568 |
|
}
|
569 |
|
entity.addOriginalId(microsoftID);
|
|
675 |
return new Pair<>(collect, affiliations.values());
|
|
676 |
}
|
570 |
677 |
|
571 |
|
affiliationIdentifiers.forEach((key, value) -> entity.addPid(
|
572 |
|
FieldTypeProtos.StructuredProperty.newBuilder()
|
573 |
|
.setQualifier(value)
|
574 |
|
.setValue(key)
|
575 |
|
.build()));
|
576 |
|
|
577 |
|
final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
|
578 |
|
organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
|
579 |
|
.setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
|
580 |
|
.setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
|
581 |
|
.build());
|
582 |
|
|
583 |
|
entity.setOrganization(organization);
|
584 |
|
oaf.setEntity(entity);
|
585 |
|
oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
|
586 |
|
.setInvisible(false)
|
587 |
|
.setDeletedbyinference(false)
|
588 |
|
.setInferred(false)
|
589 |
|
.setTrust("0.9")
|
590 |
|
.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
|
591 |
|
.build());
|
592 |
|
return oaf.build();
|
593 |
|
}
|
594 |
|
return null;
|
595 |
|
}
|
596 |
|
|
597 |
|
public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> createAuthorsOrganization(final JsonObject root) {
|
598 |
|
|
599 |
|
final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();
|
600 |
|
|
601 |
|
List<JsonObject> authors = getArrayObjects(root, "authors");
|
602 |
|
|
603 |
|
final AtomicInteger counter = new AtomicInteger(1);
|
604 |
|
|
605 |
|
|
606 |
|
List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
|
607 |
|
final String given = getStringValue(author, "given");
|
608 |
|
final String family = getStringValue(author, "family");
|
609 |
|
String fullname = getStringValue(author, "fullname");
|
610 |
|
|
611 |
|
if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
|
612 |
|
fullname = String.format("%s %s", given, family);
|
613 |
|
}
|
614 |
|
|
615 |
|
if (StringUtils.isBlank(fullname)){
|
616 |
|
return null;
|
617 |
|
|
618 |
|
}
|
619 |
|
final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();
|
620 |
|
|
621 |
|
if (StringUtils.isNotBlank(given))
|
622 |
|
abuilder.setName(given);
|
623 |
|
if (StringUtils.isNotBlank(family))
|
624 |
|
abuilder.setSurname(family);
|
625 |
|
if (StringUtils.isNotBlank(fullname))
|
626 |
|
abuilder.setFullname(fullname);
|
627 |
|
|
628 |
|
final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
|
629 |
|
final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");
|
630 |
|
|
631 |
|
authorAffiliation.forEach(it ->
|
632 |
|
{
|
633 |
|
OafProtos.Oaf org = createOrganizationFromJSON(it);
|
634 |
|
if (org != null) {
|
635 |
|
affiliations.put(org.getEntity().getId(), org);
|
636 |
|
abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
|
637 |
|
}
|
638 |
|
});
|
639 |
|
identifiers.stream().map(id -> {
|
640 |
|
final String value = id.get("value").getAsString();
|
641 |
|
return extractIdentifier(value);
|
642 |
|
}).collect(
|
643 |
|
Collectors.toMap(
|
644 |
|
FieldTypeProtos.KeyValue::getKey,
|
645 |
|
Function.identity(),
|
646 |
|
(a,b) -> a
|
647 |
|
)).values().forEach(abuilder::addPid);
|
648 |
|
abuilder.setRank(counter.getAndIncrement());
|
649 |
|
|
650 |
|
return abuilder.build();
|
651 |
|
|
652 |
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
653 |
|
|
654 |
|
return new Pair<> ( collect,affiliations.values() );
|
655 |
|
}
|
656 |
|
|
657 |
|
|
658 |
|
|
659 |
|
|
660 |
|
|
661 |
|
|
662 |
678 |
}
|
Discard records without a valid author as requested in #4392, #4393, #4395, #4396.
If the record has also at least one valid author, the record is kept but the invalid authors are removed.