1
|
package eu.dnetlib.data.collector.plugins.schemaorg;
|
2
|
|
3
|
import org.json.JSONArray;
|
4
|
import org.json.JSONObject;
|
5
|
|
6
|
import java.util.ArrayList;
|
7
|
import java.util.List;
|
8
|
|
9
|
public class JSONLDUtils {
|
10
|
|
11
|
public interface PrincipalInfo{
|
12
|
String name();
|
13
|
List<String> affiliationNames();
|
14
|
|
15
|
}
|
16
|
|
17
|
public static class OrganizationInfo implements PrincipalInfo{
|
18
|
public String name;
|
19
|
|
20
|
public String name(){return this.name;}
|
21
|
|
22
|
public List<String> affiliationNames(){
|
23
|
return null;
|
24
|
}
|
25
|
|
26
|
public OrganizationInfo(){}
|
27
|
|
28
|
public OrganizationInfo(String name){
|
29
|
this.name = name;
|
30
|
}
|
31
|
}
|
32
|
|
33
|
public static class PersonInfo implements PrincipalInfo{
|
34
|
public String name;
|
35
|
public List<OrganizationInfo> affiliations;
|
36
|
|
37
|
public String name(){return this.name;}
|
38
|
|
39
|
public List<String> affiliationNames(){
|
40
|
if(this.affiliations == null) return null;
|
41
|
List<String> curated = new ArrayList<>();
|
42
|
for(OrganizationInfo item : this.affiliations){
|
43
|
if(item == null || item.name == null || item.name.trim().length() == 0) continue;;
|
44
|
curated.add(item.name.trim());
|
45
|
}
|
46
|
return curated;
|
47
|
}
|
48
|
|
49
|
public PersonInfo(){}
|
50
|
|
51
|
public PersonInfo(String name){
|
52
|
this.name = name;
|
53
|
}
|
54
|
|
55
|
public PersonInfo(String name, List<OrganizationInfo> affiliations){
|
56
|
this.name = name;
|
57
|
this.affiliations = affiliations;
|
58
|
}
|
59
|
}
|
60
|
|
61
|
public static class LicenseInfo{
|
62
|
public String name;
|
63
|
public String url;
|
64
|
|
65
|
public LicenseInfo(){}
|
66
|
|
67
|
public LicenseInfo(String url){
|
68
|
this.url = url;
|
69
|
}
|
70
|
|
71
|
public LicenseInfo(String url, String name){
|
72
|
this.name = name;
|
73
|
this.url = url;
|
74
|
}
|
75
|
}
|
76
|
|
77
|
public static class CitationInfo{
|
78
|
public String url;
|
79
|
|
80
|
public CitationInfo(){}
|
81
|
|
82
|
public CitationInfo(String url){
|
83
|
this.url = url;
|
84
|
}
|
85
|
}
|
86
|
|
87
|
public static class IdentifierInfo{
|
88
|
public String value;
|
89
|
public String type;
|
90
|
|
91
|
public IdentifierInfo(){}
|
92
|
|
93
|
public IdentifierInfo(String value){
|
94
|
this.value = value;
|
95
|
}
|
96
|
|
97
|
public IdentifierInfo(String value, String type){
|
98
|
this.value = value;
|
99
|
this.type = type;
|
100
|
}
|
101
|
}
|
102
|
|
103
|
public static class GeoCoordinatesInfo{
|
104
|
public String latitude;
|
105
|
public String longitude;
|
106
|
|
107
|
public GeoCoordinatesInfo(){}
|
108
|
|
109
|
public GeoCoordinatesInfo(String latitude, String longitude){
|
110
|
this.latitude = latitude;
|
111
|
this.longitude = longitude;
|
112
|
}
|
113
|
}
|
114
|
|
115
|
public static class GeoShapeInfo{
|
116
|
public String box;
|
117
|
|
118
|
public GeoShapeInfo(){}
|
119
|
|
120
|
public GeoShapeInfo(String box){
|
121
|
this.box = box;
|
122
|
}
|
123
|
}
|
124
|
|
125
|
public static class PlaceInfo{
|
126
|
public String name;
|
127
|
public List<GeoCoordinatesInfo> geoCoordinates;
|
128
|
public List<GeoShapeInfo> geoShapes;
|
129
|
|
130
|
public PlaceInfo(){}
|
131
|
|
132
|
public PlaceInfo(String name, List<GeoCoordinatesInfo> geoCoordinates, List<GeoShapeInfo> geoShapes){
|
133
|
this.name = name;
|
134
|
this.geoCoordinates = geoCoordinates;
|
135
|
this.geoShapes = geoShapes;
|
136
|
}
|
137
|
}
|
138
|
|
139
|
private static PlaceInfo extractPlaceSingle(JSONObject document){
|
140
|
if(document == null || !"Place".equals(document.optString("@type"))) return null;
|
141
|
String name = document.optString("name");
|
142
|
List<GeoCoordinatesInfo> geoCoordinates = JSONLDUtils.extractGeoCoordinates(document, "geo");
|
143
|
List<GeoShapeInfo> geoShapes = JSONLDUtils.extractGeoShapes(document, "geo");
|
144
|
if((name==null || name.trim().length() == 0) &&
|
145
|
(geoCoordinates == null || geoCoordinates.size() == 0) &&
|
146
|
(geoShapes == null || geoShapes.size() == 0)) return null;
|
147
|
return new PlaceInfo(name, geoCoordinates, geoShapes);
|
148
|
}
|
149
|
|
150
|
public static List<PlaceInfo> extractPlaces(JSONObject document, String key) {
|
151
|
List<PlaceInfo> items = new ArrayList<>();
|
152
|
|
153
|
JSONArray array = document.optJSONArray(key);
|
154
|
JSONObject obj = document.optJSONObject(key);
|
155
|
|
156
|
if (array != null) {
|
157
|
for (int i = 0; i < array.length(); i += 1) {
|
158
|
PlaceInfo nfo = JSONLDUtils.extractPlaceSingle(array.optJSONObject(i));
|
159
|
if(nfo!=null) items.add(nfo);
|
160
|
}
|
161
|
}else if (obj!=null) {
|
162
|
PlaceInfo nfo = JSONLDUtils.extractPlaceSingle(obj);
|
163
|
if(nfo!=null) items.add(nfo);
|
164
|
}
|
165
|
|
166
|
return items;
|
167
|
}
|
168
|
|
169
|
private static GeoCoordinatesInfo extractGeoCoordinatesSingle(JSONObject document){
|
170
|
if(document == null || !"GeoCoordinates".equals(document.optString("@type"))) return null;
|
171
|
String latitude = document.optString("latitude");
|
172
|
String longitude = document.optString("longitude");
|
173
|
if(latitude==null || latitude.trim().length()==0 || longitude==null || longitude.trim().length()==0) return null;
|
174
|
return new GeoCoordinatesInfo(latitude, longitude);
|
175
|
}
|
176
|
|
177
|
private static List<GeoCoordinatesInfo> extractGeoCoordinates(JSONObject document, String key) {
|
178
|
List<GeoCoordinatesInfo> items = new ArrayList<>();
|
179
|
|
180
|
JSONArray array = document.optJSONArray(key);
|
181
|
JSONObject obj = document.optJSONObject(key);
|
182
|
|
183
|
if (array != null) {
|
184
|
for (int i = 0; i < array.length(); i += 1) {
|
185
|
GeoCoordinatesInfo nfo = JSONLDUtils.extractGeoCoordinatesSingle(array.optJSONObject(i));
|
186
|
if(nfo!=null) items.add(nfo);
|
187
|
}
|
188
|
}else if (obj!=null) {
|
189
|
GeoCoordinatesInfo nfo = JSONLDUtils.extractGeoCoordinatesSingle(obj);
|
190
|
if(nfo!=null) items.add(nfo);
|
191
|
}
|
192
|
|
193
|
return items;
|
194
|
}
|
195
|
|
196
|
private static GeoShapeInfo extractGeoShapeSingle(JSONObject document){
|
197
|
if(document == null || !"GeoShape".equals(document.optString("@type"))) return null;
|
198
|
String box = document.optString("box");
|
199
|
if(box==null || box.trim().length()==0 ) return null;
|
200
|
return new GeoShapeInfo(box);
|
201
|
}
|
202
|
|
203
|
private static List<GeoShapeInfo> extractGeoShapes(JSONObject document, String key) {
|
204
|
List<GeoShapeInfo> items = new ArrayList<>();
|
205
|
|
206
|
JSONArray array = document.optJSONArray(key);
|
207
|
JSONObject obj = document.optJSONObject(key);
|
208
|
|
209
|
if (array != null) {
|
210
|
for (int i = 0; i < array.length(); i += 1) {
|
211
|
GeoShapeInfo nfo = JSONLDUtils.extractGeoShapeSingle(array.optJSONObject(i));
|
212
|
if(nfo!=null) items.add(nfo);
|
213
|
}
|
214
|
}else if (obj!=null) {
|
215
|
GeoShapeInfo nfo = JSONLDUtils.extractGeoShapeSingle(obj);
|
216
|
if(nfo!=null) items.add(nfo);
|
217
|
}
|
218
|
|
219
|
return items;
|
220
|
}
|
221
|
|
222
|
private static OrganizationInfo extractOrganizationSingle(JSONObject document){
|
223
|
if(document == null || !"Organization".equals(document.optString("@type"))) return null;
|
224
|
String name = document.optString("name");
|
225
|
if(name==null || name.trim().length()==0) return null;
|
226
|
return new OrganizationInfo(name);
|
227
|
}
|
228
|
|
229
|
private static List<OrganizationInfo> extractOrganization(JSONObject document, String key) {
|
230
|
List<OrganizationInfo> items = new ArrayList<>();
|
231
|
|
232
|
JSONArray array = document.optJSONArray(key);
|
233
|
JSONObject obj = document.optJSONObject(key);
|
234
|
|
235
|
if (array != null) {
|
236
|
for (int i = 0; i < array.length(); i += 1) {
|
237
|
OrganizationInfo nfo = JSONLDUtils.extractOrganizationSingle(array.optJSONObject(i));
|
238
|
if(nfo!=null) items.add(nfo);
|
239
|
}
|
240
|
}else if (obj!=null) {
|
241
|
OrganizationInfo nfo = JSONLDUtils.extractOrganizationSingle(obj);
|
242
|
if(nfo!=null) items.add(nfo);
|
243
|
}
|
244
|
|
245
|
return items;
|
246
|
}
|
247
|
|
248
|
private static PersonInfo extractPersonSingle(JSONObject document) {
|
249
|
if(document == null || !"Person".equals(document.optString("@type"))) return null;
|
250
|
String name = document.optString("name");
|
251
|
String givenName = document.optString("givenName");
|
252
|
String familyName = document.optString("familyName");
|
253
|
if ((name == null || name.trim().length() == 0) && (givenName!=null || familyName !=null)) {
|
254
|
if(givenName !=null && familyName!=null) name = String.join(" ", familyName, givenName).trim();
|
255
|
else if (givenName == null) name = familyName;
|
256
|
else if (familyName == null) name = givenName;
|
257
|
}
|
258
|
if(name==null || name.trim().length()==0) return null;
|
259
|
List<OrganizationInfo> affiliations = JSONLDUtils.extractOrganization(document, "affiliation");
|
260
|
return new PersonInfo(name, affiliations);
|
261
|
}
|
262
|
|
263
|
private static List<PersonInfo> extractPerson(JSONObject document, String key) {
|
264
|
List<PersonInfo> items = new ArrayList<>();
|
265
|
|
266
|
JSONArray array = document.optJSONArray(key);
|
267
|
JSONObject obj = document.optJSONObject(key);
|
268
|
|
269
|
if (array != null) {
|
270
|
for (int i = 0; i < array.length(); i += 1) {
|
271
|
PersonInfo nfo = JSONLDUtils.extractPersonSingle(array.optJSONObject(i));
|
272
|
if(nfo!=null) items.add(nfo);
|
273
|
}
|
274
|
}else if (obj!=null) {
|
275
|
PersonInfo nfo = JSONLDUtils.extractPersonSingle(obj);
|
276
|
if(nfo!=null) items.add(nfo);
|
277
|
} else {
|
278
|
String value = document.optString(key);
|
279
|
if (value != null) items.add(new PersonInfo(value));
|
280
|
}
|
281
|
|
282
|
return items;
|
283
|
}
|
284
|
|
285
|
public static PrincipalInfo extractPrincipalSingle(JSONObject document) {
|
286
|
PrincipalInfo principal = JSONLDUtils.extractPersonSingle(document);
|
287
|
if(principal == null) principal = JSONLDUtils.extractOrganizationSingle(document);
|
288
|
return principal;
|
289
|
}
|
290
|
|
291
|
public static List<PrincipalInfo> extractPrincipal(JSONObject document, String key) {
|
292
|
List<PrincipalInfo> items = new ArrayList<>();
|
293
|
|
294
|
JSONArray array = document.optJSONArray(key);
|
295
|
JSONObject obj = document.optJSONObject(key);
|
296
|
|
297
|
if (array != null) {
|
298
|
for (int i = 0; i < array.length(); i += 1) {
|
299
|
PrincipalInfo nfo = JSONLDUtils.extractPrincipalSingle(array.optJSONObject(i));
|
300
|
if(nfo!=null) items.add(nfo);
|
301
|
}
|
302
|
}else if (obj!=null) {
|
303
|
PrincipalInfo nfo = JSONLDUtils.extractPrincipalSingle(obj);
|
304
|
if(nfo!=null) items.add(nfo);
|
305
|
} else {
|
306
|
String value = document.optString(key);
|
307
|
if (value != null) items.add(new PersonInfo(value));
|
308
|
}
|
309
|
|
310
|
return items;
|
311
|
}
|
312
|
|
313
|
public static List<String> extractString(JSONObject document, String key){
|
314
|
List<String> items = new ArrayList<>();
|
315
|
|
316
|
if (!document.has(key)) return items;
|
317
|
|
318
|
JSONArray array = document.optJSONArray(key);
|
319
|
JSONObject obj = document.optJSONObject(key);
|
320
|
if (array != null) {
|
321
|
for (int i = 0; i < array.length(); i += 1) {
|
322
|
JSONObject item = array.optJSONObject(i);
|
323
|
if(item != null) continue;
|
324
|
String value = array.optString(i);
|
325
|
if(value == null) continue;
|
326
|
items.add(value);
|
327
|
}
|
328
|
} else if (obj == null) {
|
329
|
String value = document.optString(key);
|
330
|
if(value != null) items.add(value);
|
331
|
}
|
332
|
|
333
|
return items;
|
334
|
|
335
|
}
|
336
|
|
337
|
public static List<String> extractSize(JSONObject document, String key){
|
338
|
List<String> items = new ArrayList<>();
|
339
|
|
340
|
JSONArray array = document.optJSONArray(key);
|
341
|
JSONObject obj = document.optJSONObject(key);
|
342
|
if (array != null) {
|
343
|
for (int i = 0; i < array.length(); i += 1) {
|
344
|
JSONObject item = array.optJSONObject(i);
|
345
|
if (item == null || !"DataDownload".equals((item.optString("@type")))) continue;
|
346
|
String size = item.optString("contentSize");
|
347
|
if (size != null) items.add(size);
|
348
|
}
|
349
|
} else if (obj != null) {
|
350
|
String size = obj.optString("contentSize");
|
351
|
if ("DataDownload".equals((obj.optString("@type"))) && size != null) {
|
352
|
items.add(size);
|
353
|
}
|
354
|
}
|
355
|
|
356
|
return items;
|
357
|
}
|
358
|
|
359
|
public static List<String> extractEncodingFormat(JSONObject document, String key){
|
360
|
List<String> items = new ArrayList<>();
|
361
|
|
362
|
JSONArray array = document.optJSONArray(key);
|
363
|
JSONObject obj = document.optJSONObject(key);
|
364
|
if (array != null) {
|
365
|
for (int i = 0; i < array.length(); i += 1) {
|
366
|
JSONObject item = array.optJSONObject(i);
|
367
|
if (item == null || !"DataDownload".equals((item.optString("@type")))) continue;
|
368
|
String encodingFormat = item.optString("encodingFormat");
|
369
|
if (encodingFormat != null) items.add(encodingFormat);
|
370
|
String fileFormat = item.optString("fileFormat");
|
371
|
if (fileFormat != null) items.add(fileFormat);
|
372
|
}
|
373
|
} else if (obj != null) {
|
374
|
if ("DataDownload".equals((obj.optString("@type")))) {
|
375
|
String encodingFormat = obj.optString("encodingFormat");
|
376
|
if (encodingFormat != null) items.add(encodingFormat);
|
377
|
String fileFormat = obj.optString("fileFormat");
|
378
|
if (fileFormat != null) items.add(fileFormat);
|
379
|
}
|
380
|
}
|
381
|
|
382
|
return items;
|
383
|
}
|
384
|
|
385
|
public static List<String> extractLanguage(JSONObject document, String key){
|
386
|
List<String> items = new ArrayList<>();
|
387
|
|
388
|
JSONArray array = document.optJSONArray(key);
|
389
|
JSONObject obj = document.optJSONObject(key);
|
390
|
if (array != null) {
|
391
|
for (int i = 0; i < array.length(); i += 1) {
|
392
|
JSONObject item = array.optJSONObject(i);
|
393
|
if (item == null) {
|
394
|
String value = array.optString(i);
|
395
|
if (value != null) items.add(value);
|
396
|
} else {
|
397
|
if (!"Language".equals((item.optString("@type")))) continue;
|
398
|
String name = item.optString("name");
|
399
|
if (name != null) items.add(name);
|
400
|
String alternateName = item.optString("alternateName");
|
401
|
if (alternateName != null) items.add(alternateName);
|
402
|
}
|
403
|
}
|
404
|
} else if (obj != null) {
|
405
|
if ("Language".equals((obj.optString("@type")))){
|
406
|
String name = obj.optString("name");
|
407
|
if (name != null) items.add(name);
|
408
|
String alternateName = obj.optString("alternateName");
|
409
|
if (alternateName != null) items.add(alternateName);
|
410
|
}
|
411
|
} else {
|
412
|
String value = document.optString(key);
|
413
|
if (value != null) items.add(value);
|
414
|
}
|
415
|
|
416
|
return items;
|
417
|
}
|
418
|
|
419
|
public static List<LicenseInfo> extractLicenses(JSONObject document, String key){
|
420
|
List<LicenseInfo> items = new ArrayList<>();
|
421
|
|
422
|
JSONArray array = document.optJSONArray(key);
|
423
|
JSONObject obj = document.optJSONObject(key);
|
424
|
if (array != null) {
|
425
|
for (int i = 0; i < array.length(); i += 1) {
|
426
|
JSONObject item = array.optJSONObject(i);
|
427
|
if (item == null) {
|
428
|
String value = array.optString(i);
|
429
|
if(value != null) items.add(new LicenseInfo(value));
|
430
|
} else {
|
431
|
if (!"CreativeWork".equals((item.optString("@type")))) continue;
|
432
|
String url = item.optString("url");
|
433
|
String name = item.optString("name");
|
434
|
if (url != null || name != null) items.add(new LicenseInfo(url, name));
|
435
|
}
|
436
|
}
|
437
|
} else if (obj != null) {
|
438
|
if("CreativeWork".equals((obj.optString("@type")))) {
|
439
|
String url = obj.optString("url");
|
440
|
String name = obj.optString("name");
|
441
|
if (url != null || name != null) items.add(new LicenseInfo(url, name));
|
442
|
}
|
443
|
} else {
|
444
|
String value = document.optString(key);
|
445
|
if (value != null) items.add(new LicenseInfo(value));
|
446
|
}
|
447
|
|
448
|
return items;
|
449
|
}
|
450
|
|
451
|
public static List<CitationInfo> extractCitations(JSONObject document, String key){
|
452
|
List<CitationInfo> items = new ArrayList<>();
|
453
|
|
454
|
JSONArray array = document.optJSONArray(key);
|
455
|
JSONObject obj = document.optJSONObject(key);
|
456
|
if (array != null) {
|
457
|
for (int i = 0; i < array.length(); i += 1) {
|
458
|
JSONObject item = array.optJSONObject(i);
|
459
|
if (item == null) {
|
460
|
String value = array.optString(i);
|
461
|
if(value != null) items.add(new CitationInfo(value));
|
462
|
} else {
|
463
|
if (!"CreativeWork".equals((item.optString("@type")))) continue;
|
464
|
String url = item.optString("url");
|
465
|
if (url != null) items.add(new CitationInfo(url));
|
466
|
}
|
467
|
}
|
468
|
} else if (obj != null) {
|
469
|
if("CreativeWork".equals((obj.optString("@type")))) {
|
470
|
String url = obj.optString("url");
|
471
|
if (url != null) items.add(new CitationInfo(url));
|
472
|
}
|
473
|
} else {
|
474
|
String value = document.optString(key);
|
475
|
if (value != null) items.add(new CitationInfo(value));
|
476
|
}
|
477
|
|
478
|
return items;
|
479
|
}
|
480
|
|
481
|
private static IdentifierInfo extractIdentifierSingle(JSONObject document){
|
482
|
if(document == null || !"PropertyValue".equals(document.optString("@type"))) return null;
|
483
|
String name = document.optString("name");
|
484
|
String value = document.optString("value");
|
485
|
if(value==null || value.trim().length()==0) return null;
|
486
|
return new IdentifierInfo(value, name);
|
487
|
}
|
488
|
|
489
|
public static List<IdentifierInfo> extractIdentifier(JSONObject document, String key) {
|
490
|
List<IdentifierInfo> items = new ArrayList<>();
|
491
|
|
492
|
JSONArray array = document.optJSONArray(key);
|
493
|
JSONObject obj = document.optJSONObject(key);
|
494
|
|
495
|
if (array != null) {
|
496
|
for (int i = 0; i < array.length(); i += 1) {
|
497
|
IdentifierInfo nfo = null;
|
498
|
if (array.optJSONObject(i) == null) {
|
499
|
String value = array.optString(i);
|
500
|
if (value != null) nfo = new IdentifierInfo(value);
|
501
|
}
|
502
|
if (nfo == null) nfo = JSONLDUtils.extractIdentifierSingle(array.optJSONObject(i));
|
503
|
if (nfo != null) items.add(nfo);
|
504
|
}
|
505
|
}else if (obj!=null) {
|
506
|
IdentifierInfo nfo = JSONLDUtils.extractIdentifierSingle(obj);
|
507
|
if (nfo != null) items.add(nfo);
|
508
|
} else {
|
509
|
String value = document.optString(key);
|
510
|
if (value != null) items.add(new IdentifierInfo(value));
|
511
|
}
|
512
|
|
513
|
return items;
|
514
|
}
|
515
|
}
|