1
|
package eu.dnetlib.data.transform.xml;
|
2
|
|
3
|
import java.nio.charset.Charset;
|
4
|
import java.security.MessageDigest;
|
5
|
import java.util.*;
|
6
|
import java.util.function.Function;
|
7
|
import java.util.stream.Collectors;
|
8
|
|
9
|
import com.google.common.base.Predicate;
|
10
|
import com.google.common.base.Splitter;
|
11
|
import com.google.common.collect.Lists;
|
12
|
import com.google.common.collect.Maps;
|
13
|
import com.google.common.collect.Sets;
|
14
|
import com.google.protobuf.Descriptors.Descriptor;
|
15
|
import com.google.protobuf.Descriptors.FieldDescriptor;
|
16
|
import com.google.protobuf.InvalidProtocolBufferException;
|
17
|
import com.google.protobuf.Message;
|
18
|
import com.google.protobuf.Message.Builder;
|
19
|
import com.google.protobuf.ProtocolMessageEnum;
|
20
|
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
|
21
|
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
|
22
|
import eu.dnetlib.data.proto.DedupProtos.Dedup;
|
23
|
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
|
24
|
import eu.dnetlib.data.proto.FieldTypeProtos.*;
|
25
|
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription;
|
26
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
27
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
28
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
29
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
30
|
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization;
|
31
|
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
|
32
|
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
|
33
|
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
|
34
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
35
|
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
|
36
|
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
|
37
|
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
|
38
|
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
|
39
|
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
|
40
|
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
|
41
|
import eu.dnetlib.data.proto.ResultProtos.Result.Metadata;
|
42
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
|
43
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Part;
|
44
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
|
45
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
|
46
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Supplement;
|
47
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
48
|
import eu.dnetlib.miscutils.collections.Pair;
|
49
|
import eu.dnetlib.miscutils.iterators.IterablePair;
|
50
|
import org.apache.commons.codec.binary.Base64;
|
51
|
import org.apache.commons.codec.binary.Hex;
|
52
|
import org.apache.commons.lang.math.NumberUtils;
|
53
|
import org.apache.commons.lang3.StringUtils;
|
54
|
import org.w3c.dom.NamedNodeMap;
|
55
|
import org.w3c.dom.Node;
|
56
|
import org.w3c.dom.NodeList;
|
57
|
|
58
|
public abstract class AbstractDNetXsltFunctions {
|
59
|
|
60
|
public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
|
61
|
private static final int MAX_NSPREFIX_LEN = 12;
|
62
|
public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX);
|
63
|
public static Map<String, String> code2name = Maps.newHashMap();
|
64
|
|
65
|
/*
|
66
|
* Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the
|
67
|
* relational db
|
68
|
*/
|
69
|
//code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)");
|
70
|
static {
|
71
|
code2name.put("MH","Marshall Islands");
|
72
|
code2name.put("CF","Central African Republic");
|
73
|
code2name.put("TD","Chad");
|
74
|
code2name.put("CN","China (People's Republic of)");
|
75
|
code2name.put("NG","Nigeria");
|
76
|
code2name.put("NF","Norfolk Island");
|
77
|
code2name.put("MP","Northern Mariana Islands");
|
78
|
code2name.put("PS","Palestinian-administered areas");
|
79
|
code2name.put("SZ","Swaziland");
|
80
|
code2name.put("max","Manx");
|
81
|
code2name.put("TW","Taiwan");
|
82
|
code2name.put("TJ","Tajikistan");
|
83
|
code2name.put("BSG","Research for the benefit of specific groups");
|
84
|
code2name.put("CP","Collaborative project");
|
85
|
code2name.put("12MONTHS","12 Months Embargo");
|
86
|
code2name.put("ace","Achinese");
|
87
|
code2name.put("egy","Ancient Egyptian");
|
88
|
code2name.put("ara","Arabic");
|
89
|
code2name.put("arc","Aramaic");
|
90
|
code2name.put("arp","Arapaho");
|
91
|
code2name.put("gon","Gondi");
|
92
|
code2name.put("ine","Indo-European");
|
93
|
code2name.put("ipk","Inupiaq");
|
94
|
code2name.put("ira","Iranian");
|
95
|
code2name.put("lim","Limburgan; Limburger; Limburgish");
|
96
|
code2name.put("mni","Manipuri");
|
97
|
code2name.put("mno","Manobo");
|
98
|
code2name.put("men","Mende");
|
99
|
code2name.put("CX","Christmas Island");
|
100
|
code2name.put("CC","Cocos (Keeling) Islands");
|
101
|
code2name.put("KM","Comoros");
|
102
|
code2name.put("CG","Congo");
|
103
|
code2name.put("CK","Cook Islands");
|
104
|
code2name.put("HR","Croatia");
|
105
|
code2name.put("arn","Araucanian");
|
106
|
code2name.put("art","Artificial");
|
107
|
code2name.put("nah","Aztec");
|
108
|
code2name.put("bug","Buginese");
|
109
|
code2name.put("chn","Chinook jargon");
|
110
|
code2name.put("chv","Chuvash");
|
111
|
code2name.put("mus","Creek");
|
112
|
code2name.put("mic","Micmac");
|
113
|
code2name.put("min","Minangkabau");
|
114
|
code2name.put("fro","Old French");
|
115
|
code2name.put("cpp","Portuguese-based Creoles and Pidgins");
|
116
|
code2name.put("som","Somali");
|
117
|
code2name.put("wen","Sorbian");
|
118
|
code2name.put("hrv","Croatian");
|
119
|
code2name.put("cus","Cushitic");
|
120
|
code2name.put("sot","Sotho, Southern");
|
121
|
code2name.put("sai","South American Indian");
|
122
|
code2name.put("esl/spa","Spanish");
|
123
|
code2name.put("CU","Cuba");
|
124
|
code2name.put("CW","Curaçao");
|
125
|
code2name.put("CZ","Czech Republic");
|
126
|
code2name.put("DK","Denmark");
|
127
|
code2name.put("ER","Eritrea");
|
128
|
code2name.put("TF","French Southern Territories");
|
129
|
code2name.put("GW","Guinea-Bissau");
|
130
|
code2name.put("VA","Holy See (Vatican City State)");
|
131
|
code2name.put("BO","Bolivia");
|
132
|
code2name.put("KY","Cayman Islands");
|
133
|
code2name.put("dra","Dravidian");
|
134
|
code2name.put("cpe","English-based Creoles and Pidgins");
|
135
|
code2name.put("oji","Ojibwa");
|
136
|
code2name.put("CIP-EIP-TN","CIP-Eco-Innovation - CIP-Thematic Network");
|
137
|
code2name.put("jav/jaw","Javanese");
|
138
|
code2name.put("ach","Acoli");
|
139
|
code2name.put("ada","Adangme");
|
140
|
code2name.put("afh","Afrihili");
|
141
|
code2name.put("afr","Afrikaans");
|
142
|
code2name.put("afa","Afro-Asiatic");
|
143
|
code2name.put("ale","Aleut");
|
144
|
code2name.put("alg","Algonquian languages");
|
145
|
code2name.put("arw","Arawak");
|
146
|
code2name.put("asm","Assamese");
|
147
|
code2name.put("ava","Avaric");
|
148
|
code2name.put("ave","Avestan");
|
149
|
code2name.put("bra","Braj");
|
150
|
code2name.put("bua","Buriat");
|
151
|
code2name.put("chr","Cherokee");
|
152
|
code2name.put("chy","Cheyenne");
|
153
|
code2name.put("jrb","Judeo-Arabic");
|
154
|
code2name.put("jpr","Judeo-Persian");
|
155
|
code2name.put("kab","Kabyle");
|
156
|
code2name.put("kac","Kachin");
|
157
|
code2name.put("kaa","Kara-Kalpak");
|
158
|
code2name.put("loz","Lozi");
|
159
|
code2name.put("mwr","Marwari");
|
160
|
code2name.put("DJ","Djibouti");
|
161
|
code2name.put("JM","Jamaica");
|
162
|
code2name.put("JP","Japan");
|
163
|
code2name.put("JE","Jersey");
|
164
|
code2name.put("JO","Jordan");
|
165
|
code2name.put("KZ","Kazakhstan");
|
166
|
code2name.put("KE","Kenya");
|
167
|
code2name.put("KI","Kiribati");
|
168
|
code2name.put("KR","Korea (Republic of)");
|
169
|
code2name.put("KP","Korea, Democatric People's Republic of");
|
170
|
code2name.put("XK","Kosovo * UN resolution");
|
171
|
code2name.put("KW","Kuwait");
|
172
|
code2name.put("NL","Netherlands");
|
173
|
code2name.put("PE","Peru");
|
174
|
code2name.put("PH","Philippines");
|
175
|
code2name.put("fre/fra","French");
|
176
|
code2name.put("PL","Poland");
|
177
|
code2name.put("PT","Portugal");
|
178
|
code2name.put("PR","Puerto Rico");
|
179
|
code2name.put("QA","Qatar");
|
180
|
code2name.put("RO","Romania");
|
181
|
code2name.put("RU","Russian Federation");
|
182
|
code2name.put("RW","Rwanda");
|
183
|
code2name.put("RE","Réunion");
|
184
|
code2name.put("sve/swe","Swedish");
|
185
|
code2name.put("myn","Mayan");
|
186
|
code2name.put("dum","Middle Dutch");
|
187
|
code2name.put("mun","Munda");
|
188
|
code2name.put("nde","Ndebele, North");
|
189
|
code2name.put("ndo","Ndonga");
|
190
|
code2name.put("nyn","Nyankole");
|
191
|
code2name.put("nzi","Nzima");
|
192
|
code2name.put("oci","Occitan (post 1500); Provençal");
|
193
|
code2name.put("GU","Guam");
|
194
|
code2name.put("tut","Altaic");
|
195
|
code2name.put("awa","Awadhi");
|
196
|
code2name.put("ban","Balinese");
|
197
|
code2name.put("bal","Baluchi");
|
198
|
code2name.put("bai","Bamileke");
|
199
|
code2name.put("bad","Banda");
|
200
|
code2name.put("UK","United Kingdom");
|
201
|
code2name.put("bas","Basa");
|
202
|
code2name.put("tib/bod","Tibetan");
|
203
|
code2name.put("ben","Bengali");
|
204
|
code2name.put("ber","Berber");
|
205
|
code2name.put("cho","Choctaw");
|
206
|
code2name.put("cop","Coptic");
|
207
|
code2name.put("crp","Creoles and Pidgins");
|
208
|
code2name.put("dak","Dakota");
|
209
|
code2name.put("del","Delaware");
|
210
|
code2name.put("div","Divehi");
|
211
|
code2name.put("kha","Khasi");
|
212
|
code2name.put("khi","Khoisan");
|
213
|
code2name.put("kho","Khotanese");
|
214
|
code2name.put("osa","Osage");
|
215
|
code2name.put("oss","Ossetian; Ossetic");
|
216
|
code2name.put("oto","Otomian");
|
217
|
code2name.put("GT","Guatemala");
|
218
|
code2name.put("ota","Ottoman");
|
219
|
code2name.put("GG","Guernsey");
|
220
|
code2name.put("GY","Guyana");
|
221
|
code2name.put("LA","Lao (People's Democratic Republic)");
|
222
|
code2name.put("LB","Lebanon");
|
223
|
code2name.put("LY","Libyan Arab Jamahiriya");
|
224
|
code2name.put("LI","Liechtenstein");
|
225
|
code2name.put("LT","Lithuania");
|
226
|
code2name.put("LU","Luxembourg");
|
227
|
code2name.put("PW","Palau");
|
228
|
code2name.put("BL","Saint-Barthélemy");
|
229
|
code2name.put("SM","San Marino");
|
230
|
code2name.put("SX","Sint Maarten (Dutch Part)");
|
231
|
code2name.put("TL","Timor-Leste");
|
232
|
code2name.put("TK","Tokelau");
|
233
|
code2name.put("TO","Tonga");
|
234
|
code2name.put("TN","Tunisia");
|
235
|
code2name.put("TC","Turks and Caicos Islands");
|
236
|
code2name.put("TV","Tuvalu");
|
237
|
code2name.put("GB","United Kingdom");
|
238
|
code2name.put("VU","Vanuatu");
|
239
|
code2name.put("pal","Pahlavi");
|
240
|
code2name.put("pau","Palauan");
|
241
|
code2name.put("pam","Pampanga");
|
242
|
code2name.put("pag","Pangasinan");
|
243
|
code2name.put("pap","Papiamento");
|
244
|
code2name.put("fas/per","Persian");
|
245
|
code2name.put("phn","Phoenician");
|
246
|
code2name.put("sid","Sidamo");
|
247
|
code2name.put("GA","Gabon");
|
248
|
code2name.put("GL","Greenland");
|
249
|
code2name.put("GD","Grenada");
|
250
|
code2name.put("GP","Guadeloupe");
|
251
|
code2name.put("IE","Ireland");
|
252
|
code2name.put("spa","Spanish; Castilian");
|
253
|
code2name.put("IM","Isle of Man");
|
254
|
code2name.put("IT","Italy");
|
255
|
code2name.put("ES","Spain");
|
256
|
code2name.put("SR","Suriname");
|
257
|
code2name.put("TZ","Tanzania (United Republic of)");
|
258
|
code2name.put("TH","Thailand");
|
259
|
code2name.put("TG","Togo");
|
260
|
code2name.put("UG","Uganda");
|
261
|
code2name.put("UZ","Uzbekistan");
|
262
|
code2name.put("VE","Venezuela");
|
263
|
code2name.put("VI","Virgin Islands, U.S.");
|
264
|
code2name.put("WF","Wallis and Futuna");
|
265
|
code2name.put("COFUND-PCP","COFUND (PCP)");
|
266
|
code2name.put("amh","Amharic");
|
267
|
code2name.put("map","Austronesian");
|
268
|
code2name.put("aym","Aymara");
|
269
|
code2name.put("bnt","Bantu");
|
270
|
code2name.put("bak","Bashkir");
|
271
|
code2name.put("bho","Bhojpuri");
|
272
|
code2name.put("bik","Bikol");
|
273
|
code2name.put("bul","Bulgarian");
|
274
|
code2name.put("cor","Cornish");
|
275
|
code2name.put("dua","Duala");
|
276
|
code2name.put("dut/nld","Dutch; Flemish");
|
277
|
code2name.put("isRelatedTo","isRelatedTo");
|
278
|
code2name.put("coauthor","coauthor");
|
279
|
code2name.put("dyu","Dyula");
|
280
|
code2name.put("eka","Ekajuk");
|
281
|
code2name.put("gil","Gilbertese");
|
282
|
code2name.put("suk","Sukuma");
|
283
|
code2name.put("sux","Sumerian");
|
284
|
code2name.put("sun","Sundanese");
|
285
|
code2name.put("sus","Susu");
|
286
|
code2name.put("swa","Swahili");
|
287
|
code2name.put("0010","Lecture");
|
288
|
code2name.put("0007","Master thesis");
|
289
|
code2name.put("0027","Model");
|
290
|
code2name.put("0012","Newsletter");
|
291
|
code2name.put("0020","Other ORP type");
|
292
|
code2name.put("0038","Other literature type");
|
293
|
code2name.put("0039","Other dataset type");
|
294
|
code2name.put("0040","Other software type");
|
295
|
code2name.put("0013","Part of book or chapter of book");
|
296
|
code2name.put("0019","Patent");
|
297
|
code2name.put("0028","PhysicalObject");
|
298
|
code2name.put("0016","Preprint");
|
299
|
code2name.put("DM","Dominica");
|
300
|
code2name.put("DO","Dominican Republic");
|
301
|
code2name.put("EC","Ecuador");
|
302
|
code2name.put("EG","Egypt");
|
303
|
code2name.put("GQ","Equatorial Guinea");
|
304
|
code2name.put("EE","Estonia");
|
305
|
code2name.put("ET","Ethiopia");
|
306
|
code2name.put("GR","Greece");
|
307
|
code2name.put("HM","Heard Island and McDonald Islands");
|
308
|
code2name.put("got","Gothic");
|
309
|
code2name.put("grb","Grebo");
|
310
|
code2name.put("ell/gre","Greek");
|
311
|
code2name.put("hat","Haitian; Haitian Creole");
|
312
|
code2name.put("hau","Hausa");
|
313
|
code2name.put("haw","Hawaiian");
|
314
|
code2name.put("heb","Hebrew");
|
315
|
code2name.put("gai/iri","Irish");
|
316
|
code2name.put("kar","Karen");
|
317
|
code2name.put("lui","Luiseno");
|
318
|
code2name.put("goh","Old High German");
|
319
|
code2name.put("abk","Abkhazian");
|
320
|
code2name.put("aar","Afar");
|
321
|
code2name.put("aggregator::pubsrepository::journals","Journal Aggregator/Publisher");
|
322
|
code2name.put("pubsrepository::mock","Other");
|
323
|
code2name.put("pubscatalogue::unknown","Publication Catalogue");
|
324
|
code2name.put("BI","Burundi");
|
325
|
code2name.put("CM","Cameroon");
|
326
|
code2name.put("CD","Congo (Democratic Republic of)");
|
327
|
code2name.put("CR","Costa Rica");
|
328
|
code2name.put("CI","Cote d'Ivoire");
|
329
|
code2name.put("arg","Aragonese");
|
330
|
code2name.put("aze","Azerbaijani");
|
331
|
code2name.put("EU","European Union");
|
332
|
code2name.put("FK","Falkland Islands (Malvinas)");
|
333
|
code2name.put("scr/hrv","Croatian");
|
334
|
code2name.put("bam","Bambara");
|
335
|
code2name.put("baq/eus","Basque");
|
336
|
code2name.put("bih","Bihari");
|
337
|
code2name.put("FO","Faroe Islands");
|
338
|
code2name.put("FJ","Fiji");
|
339
|
code2name.put("FI","Finland");
|
340
|
code2name.put("ger/deu","German");
|
341
|
code2name.put("MK","Former Yugoslav Republic of Macedonia");
|
342
|
code2name.put("FR","France");
|
343
|
code2name.put("bis","Bislama");
|
344
|
code2name.put("cat","Catalan; Valencian");
|
345
|
code2name.put("cha","Chamorro");
|
346
|
code2name.put("che","Chechen");
|
347
|
code2name.put("cos","Corsican");
|
348
|
code2name.put("elx","Elamite");
|
349
|
code2name.put("eng","English");
|
350
|
code2name.put("est","Estonian");
|
351
|
code2name.put("deu/ger","German");
|
352
|
code2name.put("gle","Irish");
|
353
|
code2name.put("gem","Germanic");
|
354
|
code2name.put("GF","French Guiana");
|
355
|
code2name.put("PF","French Polynesia");
|
356
|
code2name.put("GM","Gambia");
|
357
|
code2name.put("kik","Gikuyu; Kikuyu");
|
358
|
code2name.put("gre/ell","Greek, Modern (1453-)");
|
359
|
code2name.put("DE","Germany");
|
360
|
code2name.put("mac/mkd","Macedonian");
|
361
|
code2name.put("scc/srp","Serbian");
|
362
|
code2name.put("grn","Guarani");
|
363
|
code2name.put("ssw","Swati");
|
364
|
code2name.put("swe","Swedish");
|
365
|
code2name.put("syr","Syriac");
|
366
|
code2name.put("tgl","Tagalog");
|
367
|
code2name.put("tah","Tahitian");
|
368
|
code2name.put("tgk","Tajik");
|
369
|
code2name.put("tmh","Tamashek");
|
370
|
code2name.put("tam","Tamil");
|
371
|
code2name.put("tat","Tatar");
|
372
|
code2name.put("aggregator::pubsrepository::institutional","Institutional Repository Aggregator");
|
373
|
code2name.put("per/fas","Persian");
|
374
|
code2name.put("FCT","Fundação para a Ciência e Tecnologia");
|
375
|
code2name.put("user:claim:pid","user:claim:pid");
|
376
|
code2name.put("entityregistry","Registry");
|
377
|
code2name.put("hin","Hindi");
|
378
|
code2name.put("NA","Namibia");
|
379
|
code2name.put("ido","Ido");
|
380
|
code2name.put("ibo","Igbo");
|
381
|
code2name.put("orcid","Open Researcher and Contributor ID");
|
382
|
code2name.put("TT","Trinidad and Tobago");
|
383
|
code2name.put("TR","Turkey");
|
384
|
code2name.put("TM","Turkmenistan");
|
385
|
code2name.put("arXiv","arXiv");
|
386
|
code2name.put("providedBy","provided by");
|
387
|
code2name.put("EMBARGO","Embargo");
|
388
|
code2name.put("dataset_dataset","dataset_dataset");
|
389
|
code2name.put("publication_dataset","publication_dataset");
|
390
|
code2name.put("publication_publication","publication_publication");
|
391
|
code2name.put("coordinator","coordinator");
|
392
|
code2name.put("participant","participant");
|
393
|
code2name.put("subcontractor","subcontractor");
|
394
|
code2name.put("principal investigating","principal investigating");
|
395
|
code2name.put("exploitation","exploitation");
|
396
|
code2name.put("OPEN","Open Access");
|
397
|
code2name.put("OPEN SOURCE","Open Source");
|
398
|
code2name.put("doi","doi");
|
399
|
code2name.put("orcidworkid","orcid workid");
|
400
|
code2name.put("MQ","Martinique");
|
401
|
code2name.put("MR","Mauritania");
|
402
|
code2name.put("jpn","Japanese");
|
403
|
code2name.put("pubsrepository::unknown","Publication Repository");
|
404
|
code2name.put("aggregator::pubsrepository::unknown","Publication Repository Aggregator");
|
405
|
code2name.put("UA","Ukraine");
|
406
|
code2name.put("YT","Mayotte");
|
407
|
code2name.put("OTHER","Other");
|
408
|
code2name.put("RESTRICTED","Restricted");
|
409
|
code2name.put("AE","United Arab Emirates");
|
410
|
code2name.put("aka","Akan");
|
411
|
code2name.put("US","United States");
|
412
|
code2name.put("author","author");
|
413
|
code2name.put("isResultOf","isResultOf");
|
414
|
code2name.put("kin","Kinyarwanda");
|
415
|
code2name.put("kom","Komi");
|
416
|
code2name.put("new","Newari");
|
417
|
code2name.put("NR","Nauru");
|
418
|
code2name.put("FM","Micronesia, Federated States of");
|
419
|
code2name.put("NP","Nepal");
|
420
|
code2name.put("MN","Mongolia");
|
421
|
code2name.put("rum/ron","Romanian");
|
422
|
code2name.put("submitted","submitted");
|
423
|
code2name.put("driver-openaire2.0","OpenAIRE 2.0+ (DRIVER OA, EC funding)");
|
424
|
code2name.put("result","result");
|
425
|
code2name.put("roh","Raeto-Romance");
|
426
|
code2name.put("run","Rundi");
|
427
|
code2name.put("bin","Bini");
|
428
|
code2name.put("bos","Bosnian");
|
429
|
code2name.put("din","Dinka");
|
430
|
code2name.put("tel","Telugu");
|
431
|
code2name.put("MA","Morocco");
|
432
|
code2name.put("MZ","Mozambique");
|
433
|
code2name.put("ewo","Ewondo");
|
434
|
code2name.put("ter","Tereno");
|
435
|
code2name.put("fat","Fanti");
|
436
|
code2name.put("fao","Faroese");
|
437
|
code2name.put("hai","Haida");
|
438
|
code2name.put("MM","Myanmar");
|
439
|
code2name.put("NU","Niue");
|
440
|
code2name.put("PK","Pakistan");
|
441
|
code2name.put("PG","Papua New Guinea");
|
442
|
code2name.put("file::WoS","file::WoS");
|
443
|
code2name.put("metadata","metadata");
|
444
|
code2name.put("file::hybrid","file::hybrid");
|
445
|
code2name.put("nbl","Ndebele, South");
|
446
|
code2name.put("akk","Akkadian");
|
447
|
code2name.put("alb/sqi","Albanian");
|
448
|
code2name.put("arm/hye","Armenian");
|
449
|
code2name.put("ath","Athapascan");
|
450
|
code2name.put("CA","Canada");
|
451
|
code2name.put("CV","Cape Verde");
|
452
|
code2name.put("CL","Chile");
|
453
|
code2name.put("bat","Baltic");
|
454
|
code2name.put("CO","Colombia");
|
455
|
code2name.put("CY","Cyprus");
|
456
|
code2name.put("SV","El Salvador");
|
457
|
code2name.put("HT","Haiti");
|
458
|
code2name.put("bej","Beja");
|
459
|
code2name.put("HN","Honduras");
|
460
|
code2name.put("HK","Hong Kong");
|
461
|
code2name.put("HU","Hungary");
|
462
|
code2name.put("bel","Belarusian");
|
463
|
code2name.put("bem","Bemba");
|
464
|
code2name.put("slo/slk","Slovak");
|
465
|
code2name.put("bre","Breton");
|
466
|
code2name.put("car","Carib");
|
467
|
code2name.put("cau","Caucasian");
|
468
|
code2name.put("ewe","Ewe");
|
469
|
code2name.put("tha","Thai");
|
470
|
code2name.put("fan","Fang");
|
471
|
code2name.put("fij","Fijian");
|
472
|
code2name.put("fin","Finnish");
|
473
|
code2name.put("her","Herero");
|
474
|
code2name.put("hil","Hiligaynon");
|
475
|
code2name.put("bod/tib","Tibetan");
|
476
|
code2name.put("tig","Tigre");
|
477
|
code2name.put("tir","Tigrinya");
|
478
|
code2name.put("tem","Timne");
|
479
|
code2name.put("wel/cym","Welsh");
|
480
|
code2name.put("KO","Kosovo * UN resolution");
|
481
|
code2name.put("tiv","Tivi");
|
482
|
code2name.put("tli","Tlingit");
|
483
|
code2name.put("ton","Tonga (Tonga Islands)");
|
484
|
code2name.put("tog","Tonga(Nyasa)");
|
485
|
code2name.put("tru","Truk");
|
486
|
code2name.put("tsi","Tsimshian");
|
487
|
code2name.put("tso","Tsonga");
|
488
|
code2name.put("tsn","Tswana");
|
489
|
code2name.put("IsPreviousVersionOf","IsPreviousVersionOf");
|
490
|
code2name.put("IsReferencedBy","IsReferencedBy");
|
491
|
code2name.put("References","References");
|
492
|
code2name.put("IS","Iceland");
|
493
|
code2name.put("IN","India");
|
494
|
code2name.put("ID","Indonesia");
|
495
|
code2name.put("IL","Israel");
|
496
|
code2name.put("NZ","New Zealand");
|
497
|
code2name.put("NI","Nicaragua");
|
498
|
code2name.put("NE","Niger");
|
499
|
code2name.put("ARK","ARK");
|
500
|
code2name.put("BW","Botswana");
|
501
|
code2name.put("BR","Brazil");
|
502
|
code2name.put("BF","Burkina Faso");
|
503
|
code2name.put("KH","Cambodia");
|
504
|
code2name.put("hmo","Hiri Motu");
|
505
|
code2name.put("hun","Hungarian");
|
506
|
code2name.put("ice/isl","Icelandic");
|
507
|
code2name.put("ind","Indonesian");
|
508
|
code2name.put("ile","Interlingue");
|
509
|
code2name.put("kam","Kamba");
|
510
|
code2name.put("lub","Luba-Katanga");
|
511
|
code2name.put("nav","Navajo; Navaho");
|
512
|
code2name.put("datasetsbyproject","datasetsbyproject");
|
513
|
code2name.put("ISSN","ISSN");
|
514
|
code2name.put("MC","Support for training and career development of researchers (Marie Curie)");
|
515
|
code2name.put("nor","Norwegian");
|
516
|
code2name.put("file","file");
|
517
|
code2name.put("ISTC","ISTC");
|
518
|
code2name.put("CSA-LS","CSA Lump sum");
|
519
|
code2name.put("MX","Mexico");
|
520
|
code2name.put("ME","Montenegro");
|
521
|
code2name.put("ceb","Cebuano");
|
522
|
code2name.put("nub","Nubian");
|
523
|
code2name.put("nym","Nyamwezi");
|
524
|
code2name.put("nyo","Nyoro");
|
525
|
code2name.put("tum","Tumbuka");
|
526
|
code2name.put("tur","Turkish");
|
527
|
code2name.put("tuk","Turkmen");
|
528
|
code2name.put("dnet:od_subjects","OpenDOAR subjects");
|
529
|
code2name.put("wos","Web of Science Subject Areas");
|
530
|
code2name.put("arxiv","arXiv");
|
531
|
code2name.put("nsf:fieldOfApplication","Field of Application (NSF)");
|
532
|
code2name.put("NetCDF","NetCDF");
|
533
|
code2name.put("OpenDAP","OpenDAP");
|
534
|
code2name.put("api","api");
|
535
|
code2name.put("datasetsbyjournal","datasetsbyjournal");
|
536
|
code2name.put("DOI","DOI");
|
537
|
code2name.put("EAN13","EAN13");
|
538
|
code2name.put("EISSN","EISSN");
|
539
|
code2name.put("Handle","Handle");
|
540
|
code2name.put("ISBN","ISBN");
|
541
|
code2name.put("LISSN","LISSN");
|
542
|
code2name.put("LSID","LSID");
|
543
|
code2name.put("PURL","PURL");
|
544
|
code2name.put("UPC","UPC");
|
545
|
code2name.put("URL","URL");
|
546
|
code2name.put("URN","URN");
|
547
|
code2name.put("cel","Celtic");
|
548
|
code2name.put("chg","Chagatai");
|
549
|
code2name.put("chb","Chibcha");
|
550
|
code2name.put("AF","Afghanistan");
|
551
|
code2name.put("AL","Albania");
|
552
|
code2name.put("PY","Paraguay");
|
553
|
code2name.put("PN","Pitcairn");
|
554
|
code2name.put("KN","Saint Kitts and Nevis");
|
555
|
code2name.put("UY","Uruguay");
|
556
|
code2name.put("VN","Viet Nam");
|
557
|
code2name.put("VG","Virgin Islands (British)");
|
558
|
code2name.put("EH","Western Sahara");
|
559
|
code2name.put("YE","Yemen");
|
560
|
code2name.put("YU","Yugoslavia");
|
561
|
code2name.put("ZW","Zimbabwe");
|
562
|
code2name.put("ec:hasprogram","hasprogram");
|
563
|
code2name.put("ec:hasspecificprogram","hasspecificprogram");
|
564
|
code2name.put("available","available");
|
565
|
code2name.put("chi/zho","Chinese");
|
566
|
code2name.put("ces/cze","Czech");
|
567
|
code2name.put("guj","Gujarati");
|
568
|
code2name.put("him","Himachali");
|
569
|
code2name.put("hup","Hupa");
|
570
|
code2name.put("iba","Iban");
|
571
|
code2name.put("ijo","Ijo");
|
572
|
code2name.put("ilo","Iloko");
|
573
|
code2name.put("inc","Indic");
|
574
|
code2name.put("kan","Kannada");
|
575
|
code2name.put("DZ","Algeria");
|
576
|
code2name.put("BT","Bhutan");
|
577
|
code2name.put("kau","Kanuri");
|
578
|
code2name.put("mul","Multiple languages");
|
579
|
code2name.put("BA","Bosnia and Herzegovina");
|
580
|
code2name.put("MU","Mauritius");
|
581
|
code2name.put("CSA","Coordination and support action");
|
582
|
code2name.put("fileCSV","fileCSV");
|
583
|
code2name.put("AS","American Samoa");
|
584
|
code2name.put("ERC","Support for frontier research (ERC)");
|
585
|
code2name.put("IA","Innovation action");
|
586
|
code2name.put("AD","Andorra");
|
587
|
code2name.put("AO","Angola");
|
588
|
code2name.put("AI","Anguilla");
|
589
|
code2name.put("AQ","Antarctica");
|
590
|
code2name.put("AG","Antigua and Barbuda");
|
591
|
code2name.put("AR","Argentina");
|
592
|
code2name.put("AM","Armenia");
|
593
|
code2name.put("AW","Aruba");
|
594
|
code2name.put("AU","Australia");
|
595
|
code2name.put("AT","Austria");
|
596
|
code2name.put("AZ","Azerbaijan");
|
597
|
code2name.put("BS","Bahamas");
|
598
|
code2name.put("BH","Bahrain");
|
599
|
code2name.put("BE","Belgium");
|
600
|
code2name.put("BZ","Belize");
|
601
|
code2name.put("BJ","Benin");
|
602
|
code2name.put("BM","Bermuda");
|
603
|
code2name.put("GE","Georgia");
|
604
|
code2name.put("GH","Ghana");
|
605
|
code2name.put("GI","Gibraltar");
|
606
|
code2name.put("GN","Guinea");
|
607
|
code2name.put("IR","Iran (Islamic Republic of)");
|
608
|
code2name.put("IQ","Iraq");
|
609
|
code2name.put("6MONTHS","6 Months Embargo");
|
610
|
code2name.put("CLOSED","Closed Access");
|
611
|
code2name.put("ina","Auxiliary Language Association)");
|
612
|
code2name.put("bur/mya","Burmese");
|
613
|
code2name.put("cad","Caddo");
|
614
|
code2name.put("cai","Central American Indian");
|
615
|
code2name.put("chu","Church Slavic; Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic");
|
616
|
code2name.put("kal","Greenlandic; Kalaallisut");
|
617
|
code2name.put("iku","Inuktitut");
|
618
|
code2name.put("iro","Iroquoian");
|
619
|
code2name.put("ita","Italian");
|
620
|
code2name.put("jav","Javanese");
|
621
|
code2name.put("kua","Kuanyama; Kwanyama");
|
622
|
code2name.put("kum","Kumyk");
|
623
|
code2name.put("kru","Kurukh");
|
624
|
code2name.put("kus","Kusaie");
|
625
|
code2name.put("vie","Vietnamese");
|
626
|
code2name.put("vol","Volapük");
|
627
|
code2name.put("vot","Votic");
|
628
|
code2name.put("wak","Wakashan");
|
629
|
code2name.put("wal","Walamo");
|
630
|
code2name.put("wln","Walloon");
|
631
|
code2name.put("war","Waray");
|
632
|
code2name.put("ST","São Tomé and Príncipe");
|
633
|
code2name.put("endDate","endDate");
|
634
|
code2name.put("issued","issued");
|
635
|
code2name.put("startDate","startDate");
|
636
|
code2name.put("FCH2-CSA","Coordination & support action");
|
637
|
code2name.put("nic","Niger-Kordofanian");
|
638
|
code2name.put("ssa","Nilo-Saharan");
|
639
|
code2name.put("MSCA-RISE","RISE");
|
640
|
code2name.put("RIA","Research and Innovation action");
|
641
|
code2name.put("MSCA-IF-EF-ST","Standard EF");
|
642
|
code2name.put("PendingRepositoryResources","Pending datasource");
|
643
|
code2name.put("RepositoryServiceResources","Valid datasource");
|
644
|
code2name.put("publication","publication");
|
645
|
code2name.put("niu","Niuean");
|
646
|
code2name.put("sysimport:crosswalk:aggregator","sysimport:crosswalk:aggregator");
|
647
|
code2name.put("sysimport:crosswalk:cris","sysimport:crosswalk:cris");
|
648
|
code2name.put("sysimport:crosswalk:datasetarchive","sysimport:crosswalk:datasetarchive");
|
649
|
code2name.put("sysimport:crosswalk:entityregistry","sysimport:crosswalk:entityregistry");
|
650
|
code2name.put("non","Norse");
|
651
|
code2name.put("nai","North American Indian");
|
652
|
code2name.put("sme","Northern Sami");
|
653
|
code2name.put("nno","Norwegian Nynorsk; Nynorsk, Norwegian");
|
654
|
code2name.put("yor","Yoruba");
|
655
|
code2name.put("nob","Bokmål, Norwegian; Norwegian Bokmål");
|
656
|
code2name.put("kaz","Kazakh");
|
657
|
code2name.put("khm","Khmer");
|
658
|
code2name.put("kor","Korean");
|
659
|
code2name.put("ltz","Letzeburgesch; Luxembourgish");
|
660
|
code2name.put("mar","Marathi");
|
661
|
code2name.put("mas","Masai");
|
662
|
code2name.put("enm","Middle English");
|
663
|
code2name.put("frm","Middle French");
|
664
|
code2name.put("mis","Miscellaneous");
|
665
|
code2name.put("zap","Zapotec");
|
666
|
code2name.put("zul","Zulu");
|
667
|
code2name.put("KG","Kyrgyzstan");
|
668
|
code2name.put("LV","Latvia");
|
669
|
code2name.put("LS","Lesotho");
|
670
|
code2name.put("LR","Liberia");
|
671
|
code2name.put("MO","Macao");
|
672
|
code2name.put("MG","Madagascar");
|
673
|
code2name.put("MW","Malawi");
|
674
|
code2name.put("MY","Malaysia");
|
675
|
code2name.put("MD","Moldova (Republic of)");
|
676
|
code2name.put("MS","Montserrat");
|
677
|
code2name.put("AX","Åland Islands");
|
678
|
code2name.put("moh","Mohawk");
|
679
|
code2name.put("mol","Moldavian");
|
680
|
code2name.put("mkh","Mon-Kmer");
|
681
|
code2name.put("lol","Mongo");
|
682
|
code2name.put("copyrighted","copyrighted");
|
683
|
code2name.put("created","created");
|
684
|
code2name.put("updated","updated");
|
685
|
code2name.put("valid","valid");
|
686
|
code2name.put("BBI-IA-DEMO","Bio-based Industries Innovation action - Demonstration");
|
687
|
code2name.put("MSCA-IF-EF-CAR","CAR – Career Restart panel");
|
688
|
code2name.put("MSCA-ITN-ETN","European Training Networks");
|
689
|
code2name.put("interactiveResource","interactiveResource");
|
690
|
code2name.put("model","model");
|
691
|
code2name.put("ML","Mali");
|
692
|
code2name.put("FCH2-RIA","FCH2 Research and Innovation action");
|
693
|
code2name.put("MSCA-COFUND-FP","Fellowship programmes");
|
694
|
code2name.put("physicalObject","physicalObject");
|
695
|
code2name.put("MSCA-IF-GF","Global Fellowships");
|
696
|
code2name.put("sysimport:crosswalk:infospace","sysimport:crosswalk:infospace");
|
697
|
code2name.put("sysimport:crosswalk:repository","sysimport:crosswalk:repository");
|
698
|
code2name.put("sysimport:mining:aggregator","sysimport:mining:aggregator");
|
699
|
code2name.put("fry","Frisian");
|
700
|
code2name.put("gaa","Ga");
|
701
|
code2name.put("gae/gdh","Gaelic");
|
702
|
code2name.put("service","service");
|
703
|
code2name.put("software","software");
|
704
|
code2name.put("sound","sound");
|
705
|
code2name.put("glg","Galician");
|
706
|
code2name.put("lug","Ganda");
|
707
|
code2name.put("gay","Gayo");
|
708
|
code2name.put("gez","Geez");
|
709
|
code2name.put("MT","Malta");
|
710
|
code2name.put("text","text");
|
711
|
code2name.put("AN","Netherlands Antilles");
|
712
|
code2name.put("NC","New Caledonia");
|
713
|
code2name.put("NO","Norway");
|
714
|
code2name.put("OC","Oceania");
|
715
|
code2name.put("user:claim:search","user:claim:search");
|
716
|
code2name.put("OM","Oman");
|
717
|
code2name.put("PA","Panama");
|
718
|
code2name.put("user:insert","user:insert");
|
719
|
code2name.put("171","Article 171 of the Treaty");
|
720
|
code2name.put("nya","Chewa; Chichewa; Nyanja");
|
721
|
code2name.put("cre","Cree");
|
722
|
code2name.put("geo/kat","Georgian");
|
723
|
code2name.put("dan","Danish");
|
724
|
code2name.put("MV","Maldives");
|
725
|
code2name.put("dzo","Dzongkha");
|
726
|
code2name.put("efi","Efik");
|
727
|
code2name.put("LC","Saint Lucia");
|
728
|
code2name.put("zun","Zuni");
|
729
|
code2name.put("sga","old Irish");
|
730
|
code2name.put("file::EuropePMC","file::EuropePMC");
|
731
|
code2name.put("MF","Saint Martin (French Part)");
|
732
|
code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)");
|
733
|
code2name.put("file::PDF","file::PDF");
|
734
|
code2name.put("esk","Eskimo");
|
735
|
code2name.put("ec:program","program");
|
736
|
code2name.put("epo","Esperanto");
|
737
|
code2name.put("fct:program","fct:program");
|
738
|
code2name.put("ec:specificprogram","specificprogram");
|
739
|
code2name.put("collection","collection");
|
740
|
code2name.put("ERC-ADG","Advanced Grant");
|
741
|
code2name.put("ERA-NET-Cofund","ERA-NET Cofund");
|
742
|
code2name.put("dataset","dataset");
|
743
|
code2name.put("event","event");
|
744
|
code2name.put("ERC-LVG","ERC low value grant");
|
745
|
code2name.put("film","film");
|
746
|
code2name.put("image","image");
|
747
|
code2name.put("SL","Sierra Leone");
|
748
|
code2name.put("ec:hasframeworkprogram","hasframeworkprogram");
|
749
|
code2name.put("ERC-POC","Proof of Concept Grant");
|
750
|
code2name.put("sysimport:mining:cris","sysimport:mining:cris");
|
751
|
code2name.put("sysimport:mining:datasetarchive","sysimport:mining:datasetarchive");
|
752
|
code2name.put("CP-CSA","Combination of CP & CSA");
|
753
|
code2name.put("NoE","Network of Excellence");
|
754
|
code2name.put("grc","Ancient Greek");
|
755
|
code2name.put("lat","Latin");
|
756
|
code2name.put("ori","Oriya");
|
757
|
code2name.put("orm","Oromo");
|
758
|
code2name.put("nso","Sotho");
|
759
|
code2name.put("ddc","Dewey Decimal Classification");
|
760
|
code2name.put("zen","Zenaga");
|
761
|
code2name.put("ec:h2020topics","Horizon 2020 Topics");
|
762
|
code2name.put("alternative title","alternative title");
|
763
|
code2name.put("mesheuropmc","Medical Subject Headings");
|
764
|
code2name.put("apa","Apache");
|
765
|
code2name.put("SH","Saint Helena, Ascension and Tristan da Cunha");
|
766
|
code2name.put("PM","Saint Pierre and Miquelon");
|
767
|
code2name.put("MSCA-COFUND-DP","Doctoral programmes");
|
768
|
code2name.put("VC","Saint Vincent and the Grenadines");
|
769
|
code2name.put("ECSEL-IA","ECSEL Innovation Action");
|
770
|
code2name.put("kpe","Kpelle");
|
771
|
code2name.put("ECSEL-RIA","ECSEL Research and Innovation Actions");
|
772
|
code2name.put("MSCA-ITN-EID","European Industrial Doctorates");
|
773
|
code2name.put("sysimport:mining:entityregistry","sysimport:mining:entityregistry");
|
774
|
code2name.put("sysimport:mining:infospace","sysimport:mining:infospace");
|
775
|
code2name.put("sysimport:mining:repository","sysimport:mining:repository");
|
776
|
code2name.put("main title","main title");
|
777
|
code2name.put("subtitle","subtitle");
|
778
|
code2name.put("translated title","translated title");
|
779
|
code2name.put("lav","Latvian");
|
780
|
code2name.put("kro","Kru");
|
781
|
code2name.put("kur","Kurdish");
|
782
|
code2name.put("kut","Kutenai");
|
783
|
code2name.put("pli","Pali");
|
784
|
code2name.put("pan","Panjabi; Punjabi");
|
785
|
code2name.put("paa","Papuan-Australian");
|
786
|
code2name.put("peo","Persian, Old (ca 600 - 400 B.C.)");
|
787
|
code2name.put("zha","Zhuang; Chuang");
|
788
|
code2name.put("pmc","pmc");
|
789
|
code2name.put("pmid","pmid");
|
790
|
code2name.put("urn","urn");
|
791
|
code2name.put("IO","British Indian Ocean Territory");
|
792
|
code2name.put("WS","Samoa");
|
793
|
code2name.put("SA","Saudi Arabia");
|
794
|
code2name.put("SN","Senegal");
|
795
|
code2name.put("RS","Serbia");
|
796
|
code2name.put("MSCA-ITN-EJD","European Joint Doctorates");
|
797
|
code2name.put("wt:hasParentFunding","wt:hasParentFunding");
|
798
|
code2name.put("lad","Ladino");
|
799
|
code2name.put("bla","Siksika");
|
800
|
code2name.put("lah","Lahnda");
|
801
|
code2name.put("lam","Lamba");
|
802
|
code2name.put("lao","Lao");
|
803
|
code2name.put("snd","Sindhi");
|
804
|
code2name.put("son","Songhai");
|
805
|
code2name.put("DFG","DFG Classification");
|
806
|
code2name.put("SC","Seychelles");
|
807
|
code2name.put("SG","Singapore");
|
808
|
code2name.put("SK","Slovakia");
|
809
|
code2name.put("SI","Slovenia");
|
810
|
code2name.put("lez","Lezghian");
|
811
|
code2name.put("SB","Solomon Islands");
|
812
|
code2name.put("SO","Somalia");
|
813
|
code2name.put("ZA","South Africa");
|
814
|
code2name.put("GS","South Georgia and the South Sandwich Islands");
|
815
|
code2name.put("fiu","Finno-Ugrian");
|
816
|
code2name.put("fon","Fon");
|
817
|
code2name.put("fra/fre","French");
|
818
|
code2name.put("cpf","French-based Creoles and Pidgins");
|
819
|
code2name.put("SS","South Sudan");
|
820
|
code2name.put("ful","Fulah");
|
821
|
code2name.put("gla","Gaelic; Scottish Gaelic");
|
822
|
code2name.put("kas","Kashmiri");
|
823
|
code2name.put("LK","Sri Lanka");
|
824
|
code2name.put("SD","Sudan");
|
825
|
code2name.put("SJ","Svalbard and Jan Mayen");
|
826
|
code2name.put("SE","Sweden");
|
827
|
code2name.put("CH","Switzerland");
|
828
|
code2name.put("SY","Syrian Arab Republic");
|
829
|
code2name.put("fct:hasParentFunding","fct:hasParentFunding");
|
830
|
code2name.put("FCH2-IA","FCH2 Innovation action");
|
831
|
code2name.put("MSCA-IF-EF-RI","RI – Reintegration panel");
|
832
|
code2name.put("kaw","Kawi");
|
833
|
code2name.put("kir","Kirghiz");
|
834
|
code2name.put("kon","Kongo");
|
835
|
code2name.put("kok","Konkani");
|
836
|
code2name.put("lin","Lingala");
|
837
|
code2name.put("lit","Lithuanian");
|
838
|
code2name.put("lun","Lunda");
|
839
|
code2name.put("luo","Luo");
|
840
|
code2name.put("mac/mak","Macedonian");
|
841
|
code2name.put("mak","Makasar");
|
842
|
code2name.put("mlt","Maltese");
|
843
|
code2name.put("pol","Polish");
|
844
|
code2name.put("pon","Ponape");
|
845
|
code2name.put("por","Portuguese");
|
846
|
code2name.put("pra","Prakrit");
|
847
|
code2name.put("pro","Provencal");
|
848
|
code2name.put("pus","Pushto");
|
849
|
code2name.put("que","Quechua");
|
850
|
code2name.put("raj","Rajasthani");
|
851
|
code2name.put("rar","Rarotongan");
|
852
|
code2name.put("roa","Romance");
|
853
|
code2name.put("ron/rum","Romanian");
|
854
|
code2name.put("rom","Romany");
|
855
|
code2name.put("rus","Russian");
|
856
|
code2name.put("sit","Sino-Tibetan");
|
857
|
code2name.put("sio","Siouan");
|
858
|
code2name.put("fileGzip","fileGzip");
|
859
|
code2name.put("files_by_rpc","files_by_rpc");
|
860
|
code2name.put("files_from_mdstore","files_from_mdstore");
|
861
|
code2name.put("files_from_metadata","files_from_metadata");
|
862
|
code2name.put("scr","Serbo-Croatian");
|
863
|
code2name.put("mad","Madurese");
|
864
|
code2name.put("mag","Magahi");
|
865
|
code2name.put("mai","Maithili");
|
866
|
code2name.put("mlg","Malagasy");
|
867
|
code2name.put("may/msa","Malay");
|
868
|
code2name.put("mal","Malayalam");
|
869
|
code2name.put("man","Mandingo");
|
870
|
code2name.put("glv","Manx");
|
871
|
code2name.put("mao/mri","Maori");
|
872
|
code2name.put("chm","Mari");
|
873
|
code2name.put("srr","Serer");
|
874
|
code2name.put("shn","Shan");
|
875
|
code2name.put("sna","Shona");
|
876
|
code2name.put("iii","Sichuan Yi");
|
877
|
code2name.put("sin","Sinhala; Sinhalese");
|
878
|
code2name.put("sla","Slavic");
|
879
|
code2name.put("slk/slo","Slovak");
|
880
|
code2name.put("slv","Slovenian");
|
881
|
code2name.put("sog","Sogdian");
|
882
|
code2name.put("Contract","Contract");
|
883
|
code2name.put("Contract Interagency Agreement","Contract Interagency Agreement");
|
884
|
code2name.put("Cooperative Agreement","Cooperative Agreement");
|
885
|
code2name.put("Fellowship","Fellowship");
|
886
|
code2name.put("Fixed Price Award","Fixed Price Award");
|
887
|
code2name.put("Interagency Agreement","Interagency Agreement");
|
888
|
code2name.put("Intergovernmental Personnel Award","Intergovernmental Personnel Award");
|
889
|
code2name.put("Personnel Agreement","Personnel Agreement");
|
890
|
code2name.put("Standard Grant","Standard Grant");
|
891
|
code2name.put("GAA","GAA");
|
892
|
code2name.put("mah","Marshallese");
|
893
|
code2name.put("gmh","Middle High German");
|
894
|
code2name.put("mga","Middle Irish");
|
895
|
code2name.put("filesystem","filesystem");
|
896
|
code2name.put("ftp","ftp");
|
897
|
code2name.put("http","http");
|
898
|
code2name.put("SME-1","SME instrument phase 1");
|
899
|
code2name.put("SME-2","SME instrument phase 2");
|
900
|
code2name.put("SGA-CSA","Specific Grant agreement and Coordination and Support Action");
|
901
|
code2name.put("mon","Mongolian");
|
902
|
code2name.put("mos","Mossi");
|
903
|
code2name.put("nau","Nauru");
|
904
|
code2name.put("nep","Nepali");
|
905
|
code2name.put("ang","Old English");
|
906
|
code2name.put("sal","Salishan");
|
907
|
code2name.put("sam","Samaritan");
|
908
|
code2name.put("smi","Sami");
|
909
|
code2name.put("smo","Samoan");
|
910
|
code2name.put("sad","Sandawe");
|
911
|
code2name.put("sag","Sango");
|
912
|
code2name.put("san","Sanskrit");
|
913
|
code2name.put("srd","Sardinian");
|
914
|
code2name.put("sco","Scots");
|
915
|
code2name.put("sel","Selkup");
|
916
|
code2name.put("sem","Semitic");
|
917
|
code2name.put("srp","Serbian");
|
918
|
code2name.put("tyv","Tuvinian");
|
919
|
code2name.put("twi","Twi");
|
920
|
code2name.put("uga","Ugaritic");
|
921
|
code2name.put("uig","Uighur; Uyghur");
|
922
|
code2name.put("ukr","Ukrainian");
|
923
|
code2name.put("umb","Umbundu");
|
924
|
code2name.put("und","Undetermined");
|
925
|
code2name.put("urd","Urdu");
|
926
|
code2name.put("uzb","Uzbek");
|
927
|
code2name.put("vai","Vai");
|
928
|
code2name.put("ven","Venda");
|
929
|
code2name.put("was","Washo");
|
930
|
code2name.put("cym/wel","Welsh");
|
931
|
code2name.put("wol","Wolof");
|
932
|
code2name.put("xho","Xhosa");
|
933
|
code2name.put("sah","Yakut");
|
934
|
code2name.put("yao","Yao");
|
935
|
code2name.put("yap","Yap");
|
936
|
code2name.put("yid","Yiddish");
|
937
|
code2name.put("httpCSV","httpCSV");
|
938
|
code2name.put("httpList","httpList");
|
939
|
code2name.put("jdbc","jdbc");
|
940
|
code2name.put("oai","oai");
|
941
|
code2name.put("oai_sets","oai_sets");
|
942
|
code2name.put("other","other");
|
943
|
code2name.put("re3data","re3data");
|
944
|
code2name.put("rest","rest");
|
945
|
code2name.put("sftp","sftp");
|
946
|
code2name.put("soap","soap");
|
947
|
code2name.put("sparql","sparql");
|
948
|
code2name.put("sword","sword");
|
949
|
code2name.put("targz","targz");
|
950
|
code2name.put("ec:frameworkprogram","frameworkprogram");
|
951
|
code2name.put("UNKNOWN","UNKNOWN");
|
952
|
code2name.put("0021","Dataset");
|
953
|
code2name.put("0006","Doctoral thesis");
|
954
|
code2name.put("0023","Event");
|
955
|
code2name.put("0009","External research report");
|
956
|
code2name.put("0024","Film");
|
957
|
code2name.put("0025","Image");
|
958
|
code2name.put("0026","InteractiveResource");
|
959
|
code2name.put("0011","Internal report");
|
960
|
code2name.put("0017","Report");
|
961
|
code2name.put("0014","Research");
|
962
|
code2name.put("0015","Review");
|
963
|
code2name.put("0029","Software");
|
964
|
code2name.put("0032","Software Paper");
|
965
|
code2name.put("0030","Sound");
|
966
|
code2name.put("0000","Unknown");
|
967
|
code2name.put("0034","Project deliverable");
|
968
|
code2name.put("0035","Project proposal");
|
969
|
code2name.put("0036","Project milestone");
|
970
|
code2name.put("0037","Clinical Trial");
|
971
|
code2name.put("crissystem","CRIS System");
|
972
|
code2name.put("datarepository::unknown","Data Repository");
|
973
|
code2name.put("aggregator::datarepository","Data Repository Aggregator");
|
974
|
code2name.put("infospace","Information Space");
|
975
|
code2name.put("pubsrepository::institutional","Institutional Repository");
|
976
|
code2name.put("pubsrepository::journal","Journal");
|
977
|
code2name.put("scholarcomminfra","Scholarly Comm. Infrastructure");
|
978
|
code2name.put("pubsrepository::thematic","Thematic Repository");
|
979
|
code2name.put("websource","Web Source");
|
980
|
code2name.put("entityregistry::projects","Funder database");
|
981
|
code2name.put("entityregistry::repositories","Registry of repositories");
|
982
|
code2name.put("wt:fundingStream","Wellcome Trust: Funding Stream");
|
983
|
code2name.put("IsCitedBy","IsCitedBy");
|
984
|
code2name.put("IsNewVersionOf","IsNewVersionOf");
|
985
|
code2name.put("IsPartOf","IsPartOf");
|
986
|
code2name.put("COFUND-EJP","COFUND (European Joint Programme)");
|
987
|
code2name.put("COFUND-PPI","COFUND (PPI)");
|
988
|
code2name.put("CS2-IA","CS2 Innovation Action");
|
989
|
code2name.put("CS2-RIA","CS2 Research and Innovation action");
|
990
|
code2name.put("files","files");
|
991
|
code2name.put("ERC-COG","Consolidator Grant");
|
992
|
code2name.put("SESAR-RIA","SESAR: Research and Innovation action");
|
993
|
code2name.put("SGA-RIA","SGA Research and Innovation action");
|
994
|
code2name.put("ERC-STG","Starting Grant");
|
995
|
code2name.put("BOA/Task Order","BOA/Task Order");
|
996
|
code2name.put("0018","Annotation");
|
997
|
code2name.put("0001","Article");
|
998
|
code2name.put("0033","Audiovisual");
|
999
|
code2name.put("0008","Bachelor thesis");
|
1000
|
code2name.put("Continuing grant","Continuing grant");
|
1001
|
code2name.put("0002","Book");
|
1002
|
code2name.put("0022","Collection");
|
1003
|
code2name.put("0004","Conference object");
|
1004
|
code2name.put("0005","Contribution for newspaper or weekly magazine");
|
1005
|
code2name.put("0031","Data Paper");
|
1006
|
code2name.put("BD","Bangladesh");
|
1007
|
code2name.put("BB","Barbados");
|
1008
|
code2name.put("BY","Belarus");
|
1009
|
code2name.put("BQ","Bonaire, Sint Eustatius and Saba");
|
1010
|
code2name.put("BV","Bouvet Island");
|
1011
|
code2name.put("BN","Brunei Darussalam");
|
1012
|
code2name.put("BG","Bulgaria");
|
1013
|
code2name.put("UM","United States Minor Outlying Islands");
|
1014
|
code2name.put("ZM","Zambia");
|
1015
|
code2name.put("openaire2.0","OpenAIRE 2.0 (EC funding)");
|
1016
|
code2name.put("openaire3.0","OpenAIRE 3.0 (OA, funding)");
|
1017
|
code2name.put("driver","OpenAIRE Basic (DRIVER OA)");
|
1018
|
code2name.put("native","proprietary");
|
1019
|
code2name.put("hostedBy","collected from a compatible aggregator");
|
1020
|
code2name.put("notCompatible","under validation");
|
1021
|
code2name.put("BBI-IA-FLAG","Bio-based Industries Innovation action - Flagship");
|
1022
|
code2name.put("BBI-RIA","Bio-based Industries Research and Innovation action");
|
1023
|
}
|
1024
|
|
1025
|
protected static String getDefaultResulttype(final Element cobjcategory) {
|
1026
|
switch (cobjcategory.getText()) {
|
1027
|
case "0029":
|
1028
|
case "0040":
|
1029
|
return "software";
|
1030
|
case "0021":
|
1031
|
case "0024":
|
1032
|
case "0025":
|
1033
|
case "0030":
|
1034
|
case "0039":
|
1035
|
return "dataset";
|
1036
|
case "0000":
|
1037
|
case "0010":
|
1038
|
case "0018":
|
1039
|
case "0020":
|
1040
|
case "0022":
|
1041
|
case "0023":
|
1042
|
case "0026":
|
1043
|
case "0027":
|
1044
|
case "0028":
|
1045
|
case "0037":
|
1046
|
return "other";
|
1047
|
case "0001":
|
1048
|
case "0002":
|
1049
|
case "0004":
|
1050
|
case "0005":
|
1051
|
case "0006":
|
1052
|
case "0007":
|
1053
|
case "0008":
|
1054
|
case "0009":
|
1055
|
case "0011":
|
1056
|
case "0012":
|
1057
|
case "0013":
|
1058
|
case "0014":
|
1059
|
case "0015":
|
1060
|
case "0016":
|
1061
|
case "0017":
|
1062
|
case "0019":
|
1063
|
case "0031":
|
1064
|
case "0032":
|
1065
|
case "0034":
|
1066
|
case "0035":
|
1067
|
case "0036":
|
1068
|
case "0038":
|
1069
|
return "publication";
|
1070
|
default:
|
1071
|
return "publication";
|
1072
|
}
|
1073
|
}
|
1074
|
|
1075
|
protected static OafRel.Builder getRelBuilder(final RelType rType, final SubRelType subRelType, OafRel.Builder rel, final Builder subRel) {
|
1076
|
|
1077
|
switch(rType) {
|
1078
|
|
1079
|
case datasourceOrganization:
|
1080
|
return rel.setDatasourceOrganization(DatasourceOrganization.newBuilder().setProvision((Provision.Builder) subRel));
|
1081
|
case projectOrganization:
|
1082
|
return rel.setProjectOrganization(ProjectOrganization.newBuilder().setParticipation((Participation.Builder) subRel));
|
1083
|
case resultOrganization:
|
1084
|
return rel.setResultOrganization(ResultOrganization.newBuilder().setAffiliation((Affiliation.Builder) subRel));
|
1085
|
case resultProject:
|
1086
|
return rel.setResultProject(ResultProject.newBuilder().setOutcome((Outcome.Builder) subRel));
|
1087
|
case resultResult:
|
1088
|
final ResultResult.Builder rr = ResultResult.newBuilder();
|
1089
|
switch (subRelType) {
|
1090
|
|
1091
|
case similarity:
|
1092
|
return rel.setResultResult(rr.setSimilarity((Similarity.Builder) subRel));
|
1093
|
case publicationDataset:
|
1094
|
return rel.setResultResult(rr.setPublicationDataset((PublicationDataset.Builder) subRel));
|
1095
|
case dedup:
|
1096
|
return rel.setResultResult(rr.setDedup((Dedup.Builder) subRel));
|
1097
|
case dedupSimilarity:
|
1098
|
return rel.setResultResult(rr.setDedupSimilarity((DedupSimilarity.Builder) subRel));
|
1099
|
case supplement:
|
1100
|
return rel.setResultResult(rr.setSupplement((Supplement.Builder) subRel));
|
1101
|
case part:
|
1102
|
return rel.setResultResult(rr.setPart((Part.Builder) subRel));
|
1103
|
default:
|
1104
|
throw new IllegalArgumentException("invalid subRelType for result_result relations: " + subRelType.toString());
|
1105
|
}
|
1106
|
case organizationOrganization:
|
1107
|
final OrganizationOrganization.Builder oo = OrganizationOrganization.newBuilder();
|
1108
|
switch (subRelType) {
|
1109
|
case dedup:
|
1110
|
return rel.setOrganizationOrganization(oo.setDedup((Dedup.Builder) subRel));
|
1111
|
case dedupSimilarity:
|
1112
|
return rel.setOrganizationOrganization(oo.setDedupSimilarity((DedupSimilarity.Builder) subRel));
|
1113
|
default:
|
1114
|
throw new IllegalArgumentException("invalid subRelType for organization_organization relations: " + subRelType.toString());
|
1115
|
}
|
1116
|
}
|
1117
|
throw new IllegalArgumentException("invalid relation type " + rType.toString());
|
1118
|
}
|
1119
|
|
1120
|
protected static Builder getSubRelBuilder(final RelMetadata.Builder metadata, final SubRelType subRelType, final Map<String, String> params) {
|
1121
|
|
1122
|
switch (subRelType) {
|
1123
|
|
1124
|
case provision:
|
1125
|
return Provision.newBuilder().setRelMetadata(metadata);
|
1126
|
case outcome:
|
1127
|
return Outcome.newBuilder().setRelMetadata(metadata);
|
1128
|
case similarity:
|
1129
|
return Similarity.newBuilder().setRelMetadata(metadata);
|
1130
|
case publicationDataset:
|
1131
|
return PublicationDataset.newBuilder().setRelMetadata(metadata);
|
1132
|
case affiliation:
|
1133
|
return Affiliation.newBuilder().setRelMetadata(metadata);
|
1134
|
case dedup:
|
1135
|
return Dedup.newBuilder().setRelMetadata(metadata);
|
1136
|
case dedupSimilarity:
|
1137
|
return DedupSimilarity.newBuilder().setRelMetadata(metadata);
|
1138
|
case supplement:
|
1139
|
return Supplement.newBuilder().setRelMetadata(metadata);
|
1140
|
case part:
|
1141
|
return Part.newBuilder().setRelMetadata(metadata);
|
1142
|
}
|
1143
|
throw new IllegalArgumentException("invalid relation type " + subRelType.toString());
|
1144
|
}
|
1145
|
|
1146
|
protected static String getVocabularyName(final RelType relType) {
|
1147
|
switch (relType) {
|
1148
|
|
1149
|
case datasourceOrganization:
|
1150
|
return "dnet:datasource_organization_relations";
|
1151
|
case projectOrganization:
|
1152
|
return "dnet:project_organization_relations";
|
1153
|
case resultOrganization:
|
1154
|
return "dnet:result_organization_relations";
|
1155
|
case resultProject:
|
1156
|
return "dnet:result_project_relations";
|
1157
|
case resultResult:
|
1158
|
return "dnet:result_result_relations";
|
1159
|
case organizationOrganization:
|
1160
|
return "dnet:organization_organization_relations";
|
1161
|
}
|
1162
|
throw new IllegalArgumentException("invalid relation type " + relType.toString());
|
1163
|
}
|
1164
|
|
1165
|
|
1166
|
// Builder for Entities
|
1167
|
protected static Oaf getOaf(final OafEntity.Builder entity, final DataInfo.Builder info) {
|
1168
|
return _getOaf(Oaf.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build();
|
1169
|
}
|
1170
|
|
1171
|
// Builder for Rels
|
1172
|
protected static Oaf getOaf(final OafRel.Builder rel, final DataInfo.Builder info) {
|
1173
|
return _getOaf(Oaf.newBuilder(), info).setKind(Kind.relation).setRel(rel).build();
|
1174
|
}
|
1175
|
|
1176
|
private static Oaf.Builder _getOaf(final Oaf.Builder oaf, final DataInfo.Builder info) {
|
1177
|
if (info != null) {
|
1178
|
return oaf.setDataInfo(ensureDataInfo(info));
|
1179
|
} else return oaf;
|
1180
|
}
|
1181
|
|
1182
|
protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) {
|
1183
|
if (info.isInitialized()) return info;
|
1184
|
return getDataInfo(false, null, "UNKNOWN", "0.9", false, false);
|
1185
|
}
|
1186
|
|
1187
|
protected static List<KeyValue> getKeyValues(final ValueMap values, final String fieldName, final Type type) {
|
1188
|
final ElementList collectedFroms = values.get(fieldName);
|
1189
|
if (collectedFroms == null) {
|
1190
|
throw new IllegalArgumentException("missing field " + fieldName);
|
1191
|
}
|
1192
|
return collectedFroms.stream()
|
1193
|
.filter(e -> StringUtils.isNotBlank(e.getAttributeValue("id")))
|
1194
|
.filter(e -> StringUtils.isNotBlank(e.getAttributeValue("name")))
|
1195
|
.map(e -> getKV(oafSplitId(type.name(), e.getAttributeValue("id")), e.getAttributeValue("name")))
|
1196
|
.collect(Collectors.toList());
|
1197
|
}
|
1198
|
|
1199
|
protected static KeyValue getKV(final String id, final String name) {
|
1200
|
return KeyValue.newBuilder().setKey(id).setValue(name).build();
|
1201
|
}
|
1202
|
|
1203
|
protected static OafRel.Builder getRel(final String sourceId,
|
1204
|
final String targetId,
|
1205
|
final RelType relType,
|
1206
|
final SubRelType subRelType,
|
1207
|
final String relClass,
|
1208
|
final List<KeyValue> collectedFrom,
|
1209
|
final boolean isChild) {
|
1210
|
final OafRel.Builder oafRel = OafRel.newBuilder().setSource(sourceId)
|
1211
|
.setTarget(targetId)
|
1212
|
.setRelType(relType)
|
1213
|
.setSubRelType(subRelType)
|
1214
|
.setRelClass(relClass)
|
1215
|
.setChild(isChild);
|
1216
|
|
1217
|
if (collectedFrom != null) {
|
1218
|
oafRel.addAllCollectedfrom(collectedFrom);
|
1219
|
}
|
1220
|
return oafRel;
|
1221
|
}
|
1222
|
|
1223
|
protected static OafEntity.Builder getEntity(final Type type,
|
1224
|
final String id,
|
1225
|
final List<KeyValue> collectedFrom,
|
1226
|
final Collection<String> originalIds,
|
1227
|
final String dateOfCollection,
|
1228
|
final String dateOfTransformation,
|
1229
|
final List<StructuredProperty> pids) {
|
1230
|
final OafEntity.Builder builder = OafEntity.newBuilder().setType(type).setId(id);
|
1231
|
if (collectedFrom != null) builder.addAllCollectedfrom(collectedFrom);
|
1232
|
builder.setDateoftransformation(StringUtils.isBlank(dateOfTransformation) ? "" : dateOfTransformation);
|
1233
|
builder.setDateofcollection(StringUtils.isBlank(dateOfCollection) ? "" : dateOfCollection);
|
1234
|
|
1235
|
if ((originalIds != null) && !originalIds.isEmpty()) {
|
1236
|
builder.addAllOriginalId(originalIds.stream()
|
1237
|
.filter(StringUtils::isNotBlank)
|
1238
|
.collect(Collectors.toList()));
|
1239
|
}
|
1240
|
|
1241
|
if ((pids != null) && !pids.isEmpty()) {
|
1242
|
builder.addAllPid(
|
1243
|
pids.stream().filter(Objects::nonNull)
|
1244
|
.collect(Collectors.toList()));
|
1245
|
}
|
1246
|
|
1247
|
return builder;
|
1248
|
}
|
1249
|
|
1250
|
public static DataInfo.Builder getDataInfo(
|
1251
|
final NodeList about,
|
1252
|
final String provenanceaction,
|
1253
|
final String trust,
|
1254
|
final boolean deletedbyinference,
|
1255
|
final boolean inferred) {
|
1256
|
return getDataInfo(false, about, provenanceaction, trust, deletedbyinference, inferred);
|
1257
|
}
|
1258
|
|
1259
|
public static DataInfo.Builder getDataInfo(
|
1260
|
final boolean invisible,
|
1261
|
final NodeList about,
|
1262
|
final String provenanceaction,
|
1263
|
final String trust,
|
1264
|
final boolean deletedbyinference,
|
1265
|
final boolean inferred) {
|
1266
|
|
1267
|
final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder();
|
1268
|
dataInfoBuilder.setInvisible(invisible);
|
1269
|
dataInfoBuilder.setInferred(inferred);
|
1270
|
dataInfoBuilder.setDeletedbyinference(deletedbyinference);
|
1271
|
dataInfoBuilder.setTrust(trust);
|
1272
|
dataInfoBuilder.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
|
1273
|
|
1274
|
// checking instanceof because when receiving an empty <oaf:datainfo> we don't want to parse it.
|
1275
|
if (((about != null) && (about.getLength() > 0)) /* && (dataInfo instanceof org.w3c.dom.Element) */) {
|
1276
|
|
1277
|
final org.w3c.dom.Element dataInfoElement = getDirectChild((org.w3c.dom.Element) about.item(0), "datainfo");
|
1278
|
if (dataInfoElement != null) {
|
1279
|
org.w3c.dom.Element elem = getDirectChild(dataInfoElement, "inferred");
|
1280
|
dataInfoBuilder.setInferred(Boolean.valueOf(getStringValue(elem, String.valueOf(inferred))));
|
1281
|
|
1282
|
elem = getDirectChild(dataInfoElement, "deletedbyinference");
|
1283
|
dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(getStringValue(elem, String.valueOf(deletedbyinference))));
|
1284
|
|
1285
|
elem = getDirectChild(dataInfoElement, "trust");
|
1286
|
dataInfoBuilder.setTrust(getStringValue(elem, trust));
|
1287
|
|
1288
|
elem = getDirectChild(dataInfoElement, "invisible");
|
1289
|
dataInfoBuilder.setInvisible(getBooleanValue(elem, invisible));
|
1290
|
|
1291
|
elem = getDirectChild(dataInfoElement, "inferenceprovenance");
|
1292
|
dataInfoBuilder.setInferenceprovenance(getStringValue(elem));
|
1293
|
|
1294
|
elem = getDirectChild(dataInfoElement, "provenanceaction");
|
1295
|
final Qualifier.Builder pBuilder = Qualifier.newBuilder();
|
1296
|
if (elem.hasAttributes()) {
|
1297
|
final NamedNodeMap attributes = elem.getAttributes();
|
1298
|
pBuilder.setClassid(getAttributeValue(attributes, "classid"));
|
1299
|
pBuilder.setClassname(getAttributeValue(attributes, "classname"));
|
1300
|
pBuilder.setSchemeid(getAttributeValue(attributes, "schemeid"));
|
1301
|
pBuilder.setSchemename(getAttributeValue(attributes, "schemename"));
|
1302
|
} else {
|
1303
|
pBuilder.mergeFrom(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
|
1304
|
}
|
1305
|
dataInfoBuilder.setProvenanceaction(pBuilder);
|
1306
|
}
|
1307
|
}
|
1308
|
|
1309
|
return dataInfoBuilder;
|
1310
|
}
|
1311
|
|
1312
|
protected static OAIProvenance getOAIProvenance(final NodeList about) {
|
1313
|
|
1314
|
OAIProvenance.Builder oaiProv = OAIProvenance.newBuilder();
|
1315
|
|
1316
|
if (((about != null) && (about.getLength() > 0))) {
|
1317
|
|
1318
|
final org.w3c.dom.Element provenance = getDirectChild((org.w3c.dom.Element) about.item(0), "provenance");
|
1319
|
|
1320
|
if (provenance != null) {
|
1321
|
final org.w3c.dom.Element origDesc = getDirectChild(provenance, "originDescription");
|
1322
|
oaiProv.setOriginDescription(buildOriginDescription(origDesc, OriginDescription.newBuilder()));
|
1323
|
}
|
1324
|
}
|
1325
|
|
1326
|
return oaiProv.build();
|
1327
|
}
|
1328
|
|
1329
|
private static OriginDescription buildOriginDescription(final org.w3c.dom.Element origDesc, final OriginDescription.Builder od) {
|
1330
|
od.setHarvestDate(origDesc.getAttribute("harvestDate")).setAltered(Boolean.valueOf(origDesc.getAttribute("altered")));
|
1331
|
|
1332
|
org.w3c.dom.Element elem = getDirectChild(origDesc, "baseURL");
|
1333
|
od.setBaseURL(getStringValue(elem));
|
1334
|
|
1335
|
elem = getDirectChild(origDesc, "identifier");
|
1336
|
od.setIdentifier(getStringValue(elem));
|
1337
|
|
1338
|
elem = getDirectChild(origDesc, "datestamp");
|
1339
|
od.setDatestamp(getStringValue(elem));
|
1340
|
|
1341
|
elem = getDirectChild(origDesc, "metadataNamespace");
|
1342
|
od.setMetadataNamespace(getStringValue(elem));
|
1343
|
|
1344
|
elem = getDirectChild(origDesc, "originDescription");
|
1345
|
|
1346
|
if (elem != null) {
|
1347
|
|
1348
|
od.setOriginDescription(buildOriginDescription(elem, OriginDescription.newBuilder()));
|
1349
|
}
|
1350
|
|
1351
|
return od.build();
|
1352
|
}
|
1353
|
|
1354
|
private static boolean getBooleanValue(final org.w3c.dom.Element elem, final boolean defaultValue) {
|
1355
|
return (elem != null && elem.getTextContent() != null) ? Boolean.valueOf(elem.getTextContent()) : defaultValue;
|
1356
|
}
|
1357
|
|
1358
|
private static String getStringValue(final org.w3c.dom.Element elem, final String defaultValue) {
|
1359
|
return (elem != null && elem.getTextContent() != null) ? elem.getTextContent() : defaultValue;
|
1360
|
}
|
1361
|
|
1362
|
private static String getStringValue(final org.w3c.dom.Element elem) {
|
1363
|
return getStringValue(elem, "");
|
1364
|
}
|
1365
|
|
1366
|
protected static String getAttributeValue(final NamedNodeMap attributes, final String name) {
|
1367
|
final Node attr = attributes.getNamedItem(name);
|
1368
|
if (attr == null) return "";
|
1369
|
final String value = attr.getNodeValue();
|
1370
|
return value != null ? value : "";
|
1371
|
}
|
1372
|
|
1373
|
protected static org.w3c.dom.Element getDirectChild(final org.w3c.dom.Element parent, final String name) {
|
1374
|
for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {
|
1375
|
if ((child instanceof org.w3c.dom.Element) && name.equals(child.getLocalName())) return (org.w3c.dom.Element) child;
|
1376
|
}
|
1377
|
return null;
|
1378
|
}
|
1379
|
|
1380
|
protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) {
|
1381
|
return getQualifier(classname, classname, schemename, schemename);
|
1382
|
}
|
1383
|
|
1384
|
protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) {
|
1385
|
return getQualifier(classname.toString(), classname.toString(), schemename, schemename);
|
1386
|
}
|
1387
|
|
1388
|
protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
|
1389
|
return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename);
|
1390
|
}
|
1391
|
|
1392
|
protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List<String> fields) {
|
1393
|
if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) return null;
|
1394
|
|
1395
|
if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) {
|
1396
|
qualifier.setClassid(fields.get(0));
|
1397
|
qualifier.setClassname(getClassName(fields.get(0)));
|
1398
|
}
|
1399
|
return qualifier;
|
1400
|
}
|
1401
|
|
1402
|
protected static void addStructuredProps(final Builder builder,
|
1403
|
final FieldDescriptor fd,
|
1404
|
final ElementList values,
|
1405
|
final String defaultClass,
|
1406
|
final String defaultScheme) {
|
1407
|
if (values != null) {
|
1408
|
for (final Element s : values) {
|
1409
|
final String classId = s.getAttributeValue("classid") != null ? s.getAttributeValue("classid") : defaultClass;
|
1410
|
final String className = s.getAttributeValue("classname") != null ? s.getAttributeValue("classname") : defaultClass;
|
1411
|
final String schemeId = s.getAttributeValue("schemeid") != null ? s.getAttributeValue("schemeid") : defaultScheme;
|
1412
|
final String schemeName = s.getAttributeValue("schemename") != null ? s.getAttributeValue("schemename") : defaultScheme;
|
1413
|
addField(builder, fd, getStructuredProperty(s.getText(), classId, className, schemeId, schemeName));
|
1414
|
}
|
1415
|
}
|
1416
|
}
|
1417
|
|
1418
|
protected static void addJournal(final Metadata.Builder metadataProto, Element journalElement){
|
1419
|
final Journal.Builder journal = Journal.newBuilder();
|
1420
|
if (journalElement.getText() != null) {
|
1421
|
journal.setName(journalElement.getText());
|
1422
|
}
|
1423
|
|
1424
|
final Map<String, String> attr = journalElement.getAttributes();
|
1425
|
if (attr != null) {
|
1426
|
if (attr.get("issn") != null) {
|
1427
|
journal.setIssnPrinted(attr.get("issn"));
|
1428
|
}
|
1429
|
if (attr.get("eissn") != null) {
|
1430
|
journal.setIssnOnline(attr.get("eissn"));
|
1431
|
}
|
1432
|
if (attr.get("lissn") != null) {
|
1433
|
journal.setIssnLinking(attr.get("lissn"));
|
1434
|
}
|
1435
|
|
1436
|
if (attr.get("ep") != null) {
|
1437
|
journal.setEp(attr.get("ep"));
|
1438
|
}
|
1439
|
if (attr.get("iss") != null) {
|
1440
|
journal.setIss(attr.get("iss"));
|
1441
|
}
|
1442
|
if (attr.get("sp") != null) {
|
1443
|
journal.setSp(attr.get("sp"));
|
1444
|
}
|
1445
|
if (attr.get("vol") != null) {
|
1446
|
journal.setVol(attr.get("vol"));
|
1447
|
}
|
1448
|
//TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28
|
1449
|
if (attr.get("ed") != null) {
|
1450
|
journal.setEdition(attr.get("ed"));
|
1451
|
}
|
1452
|
//TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28
|
1453
|
if (attr.get("conferenceplace") != null) {
|
1454
|
journal.setConferenceplace(attr.get("conferenceplace"));
|
1455
|
}
|
1456
|
//TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28
|
1457
|
if (attr.get("conferencedate") != null) {
|
1458
|
journal.setConferencedate(attr.get("conferencedate"));
|
1459
|
}
|
1460
|
}
|
1461
|
metadataProto.setJournal(journal.build());
|
1462
|
}
|
1463
|
|
1464
|
|
1465
|
|
1466
|
private static final Set<String> invalidPidTypes =
|
1467
|
Sets.newHashSet("distributionlocation", "url", " ", "local accession id", "local", "landingpage", "publisherid", "report number", "uri", "contract", "doc",
|
1468
|
"issn", "issn (online)", "issn (print)", "eissn", "citation", "unknown", "other", "oai", "case number", "section", "series", "report",
|
1469
|
"other numbers", "site id", "fulltext", "internal", "report numbers", "product number", "depositor id", "isbn13", "doe contract number", "revision",
|
1470
|
"issue", "pages", "volume", "another identifier for this resource");
|
1471
|
protected static List<StructuredProperty> parsePids(final NodeList nodelist) {
|
1472
|
|
1473
|
final List<StructuredProperty> pids = Lists.newArrayList();
|
1474
|
|
1475
|
for (int i = 0; i < nodelist.getLength(); i++) {
|
1476
|
final Node node = nodelist.item(i);
|
1477
|
Node pidType = null;
|
1478
|
if (node.getNodeType() == Node.ELEMENT_NODE) {
|
1479
|
if (node.getLocalName().equalsIgnoreCase("identifier")) {
|
1480
|
pidType = node.getAttributes().getNamedItem("identifierType");
|
1481
|
}
|
1482
|
//this is to handle dataset pids
|
1483
|
if (node.getLocalName().equalsIgnoreCase("alternateIdentifier")) {
|
1484
|
pidType = node.getAttributes().getNamedItem("alternateIdentifierType");
|
1485
|
}
|
1486
|
|
1487
|
for (int j = 0; j < node.getChildNodes().getLength(); j++) {
|
1488
|
final Node child = node.getChildNodes().item(j);
|
1489
|
|
1490
|
if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()) {
|
1491
|
|
1492
|
final String type = pidType.getNodeValue().toLowerCase();
|
1493
|
|
1494
|
if (invalidPidTypes.contains(type)) {
|
1495
|
break;
|
1496
|
}
|
1497
|
|
1498
|
final String value = child.getTextContent();
|
1499
|
|
1500
|
pids.add(getStructuredProperty(value, type, getClassName(type), "dnet:pid_types", "dnet:pid_types"));
|
1501
|
break;
|
1502
|
}
|
1503
|
}
|
1504
|
}
|
1505
|
}
|
1506
|
|
1507
|
final Map<String, StructuredProperty> pidMap = pids.stream()
|
1508
|
.collect(Collectors.toMap(
|
1509
|
p -> getStructuredPropertyKey(p),
|
1510
|
Function.identity(),
|
1511
|
(oldValue, newValue) -> newValue));
|
1512
|
|
1513
|
return Lists.newArrayList(pidMap.values());
|
1514
|
}
|
1515
|
|
1516
|
private static String getStructuredPropertyKey(final StructuredProperty p) {
|
1517
|
return StringUtils.lowerCase(p.getQualifier().getClassid()) + StringUtils.lowerCase(p.getValue());
|
1518
|
}
|
1519
|
|
1520
|
@SuppressWarnings("unchecked")
|
1521
|
protected static void addField(final Builder builder, final FieldDescriptor descriptor, Object value) {
|
1522
|
|
1523
|
if (value == null) return;
|
1524
|
|
1525
|
if (value instanceof List<?>) {
|
1526
|
for (final Object o : (List<Object>) value) {
|
1527
|
addField(builder, descriptor, o);
|
1528
|
}
|
1529
|
} else {
|
1530
|
Object fieldValue = value;
|
1531
|
switch (descriptor.getType()) {
|
1532
|
case BOOL:
|
1533
|
fieldValue = Boolean.valueOf(value.toString());
|
1534
|
break;
|
1535
|
case BYTES:
|
1536
|
fieldValue = value.toString().getBytes(Charset.forName("UTF-8"));
|
1537
|
break;
|
1538
|
case DOUBLE:
|
1539
|
fieldValue = Double.valueOf(value.toString());
|
1540
|
break;
|
1541
|
case FLOAT:
|
1542
|
fieldValue = Float.valueOf(value.toString());
|
1543
|
break;
|
1544
|
case INT32:
|
1545
|
case INT64:
|
1546
|
case SINT32:
|
1547
|
case SINT64:
|
1548
|
fieldValue = Integer.valueOf(value.toString());
|
1549
|
break;
|
1550
|
case MESSAGE:
|
1551
|
final Builder q = builder.newBuilderForField(descriptor);
|
1552
|
|
1553
|
if (value instanceof Builder) {
|
1554
|
value = ((Builder) value).build();
|
1555
|
final byte[] b = ((Message) value).toByteArray();
|
1556
|
try {
|
1557
|
q.mergeFrom(b);
|
1558
|
} catch (final InvalidProtocolBufferException e) {
|
1559
|
throw new IllegalArgumentException("Unable to merge value: " + value + " with builder: " + q.getDescriptorForType().getName());
|
1560
|
}
|
1561
|
} else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1562
|
if (value instanceof Qualifier) {
|
1563
|
q.mergeFrom((Qualifier) value);
|
1564
|
} else {
|
1565
|
|
1566
|
List<String> split = Lists.newArrayList(Splitter
|
1567
|
.on("@@@").trimResults().split(value.toString()));
|
1568
|
if (split.size() == 4) {
|
1569
|
parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@");
|
1570
|
} else {
|
1571
|
final String classid = split.get(0);
|
1572
|
final String schemeid = split.get(1);
|
1573
|
final Qualifier qualifier = Qualifier.newBuilder()
|
1574
|
.setClassid(classid)
|
1575
|
.setClassname(getClassName(classid))
|
1576
|
.setSchemeid(schemeid)
|
1577
|
.setSchemename(schemeid).build();
|
1578
|
q.mergeFrom(qualifier);
|
1579
|
}
|
1580
|
}
|
1581
|
} else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1582
|
if (value instanceof StructuredProperty) {
|
1583
|
q.mergeFrom((StructuredProperty) value);
|
1584
|
} else {
|
1585
|
parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###");
|
1586
|
}
|
1587
|
} else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1588
|
if (value instanceof KeyValue) {
|
1589
|
q.mergeFrom((KeyValue) value);
|
1590
|
} else {
|
1591
|
parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&");
|
1592
|
}
|
1593
|
} else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1594
|
if (value instanceof StringField) {
|
1595
|
q.mergeFrom((StringField) value);
|
1596
|
} else {
|
1597
|
q.setField(StringField.getDescriptor().findFieldByName("value"), value);
|
1598
|
}
|
1599
|
} else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1600
|
if (value instanceof BoolField) {
|
1601
|
q.mergeFrom((BoolField) value);
|
1602
|
} else if (value instanceof String) {
|
1603
|
q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) value));
|
1604
|
} else {
|
1605
|
q.setField(BoolField.getDescriptor().findFieldByName("value"), value);
|
1606
|
}
|
1607
|
} else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1608
|
if (value instanceof IntField) {
|
1609
|
q.mergeFrom((IntField) value);
|
1610
|
} else if (value instanceof String) {
|
1611
|
q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) value));
|
1612
|
} else {
|
1613
|
q.setField(IntField.getDescriptor().findFieldByName("value"), value);
|
1614
|
}
|
1615
|
}
|
1616
|
|
1617
|
fieldValue = q.buildPartial();
|
1618
|
break;
|
1619
|
default:
|
1620
|
break;
|
1621
|
}
|
1622
|
|
1623
|
doAddField(builder, descriptor, fieldValue);
|
1624
|
}
|
1625
|
|
1626
|
}
|
1627
|
|
1628
|
protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) {
|
1629
|
if (value != null) {
|
1630
|
if (fd.isRepeated()) {
|
1631
|
builder.addRepeatedField(fd, value);
|
1632
|
} else if (fd.isOptional() || fd.isRequired()) {
|
1633
|
builder.setField(fd, value);
|
1634
|
}
|
1635
|
}
|
1636
|
}
|
1637
|
|
1638
|
protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) {
|
1639
|
final IterablePair<FieldDescriptor, String> iterablePair =
|
1640
|
new IterablePair<FieldDescriptor, String>(descriptor.getFields(), Lists.newArrayList(Splitter
|
1641
|
.on(split).trimResults().split(value)));
|
1642
|
|
1643
|
for (final Pair<FieldDescriptor, String> p : iterablePair) {
|
1644
|
addField(builder, p.getKey(), p.getValue());
|
1645
|
}
|
1646
|
}
|
1647
|
|
1648
|
protected static String base64(final byte[] data) {
|
1649
|
return new String(Base64.encodeBase64(data));
|
1650
|
}
|
1651
|
|
1652
|
public static String replace(final String s, final String regex, final String replacement) {
|
1653
|
return s.replaceAll(regex, replacement);
|
1654
|
}
|
1655
|
|
1656
|
public static String trim(final String s) {
|
1657
|
return s.trim();
|
1658
|
}
|
1659
|
|
1660
|
protected static String removePrefix(final Type type, final String s) {
|
1661
|
return removePrefix(type.toString(), s);
|
1662
|
}
|
1663
|
|
1664
|
private static String removePrefix(final String prefix, final String s) {
|
1665
|
return StringUtils.removeStart("" + s, prefix + "|");
|
1666
|
}
|
1667
|
|
1668
|
protected static Qualifier.Builder getDefaultQualifier(final String scheme) {
|
1669
|
final Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme);
|
1670
|
return qualifier;
|
1671
|
}
|
1672
|
|
1673
|
protected static StructuredProperty getStructuredProperty(final String value,
|
1674
|
final String classid,
|
1675
|
final String classname,
|
1676
|
final String schemeid,
|
1677
|
final String schemename) {
|
1678
|
if ((value == null) || value.isEmpty()) return null;
|
1679
|
return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build();
|
1680
|
}
|
1681
|
|
1682
|
protected static StringField.Builder sf(final String s) {
|
1683
|
return StringField.newBuilder().setValue(s);
|
1684
|
}
|
1685
|
|
1686
|
public static String generateNsPrefix(final String prefix, final String externalId) {
|
1687
|
return StringUtils.substring(prefix + StringUtils.leftPad(externalId, MAX_NSPREFIX_LEN - prefix.length(), "_"), 0, MAX_NSPREFIX_LEN);
|
1688
|
}
|
1689
|
|
1690
|
public static String md5(final String s) {
|
1691
|
try {
|
1692
|
final MessageDigest md = MessageDigest.getInstance("MD5");
|
1693
|
md.update(s.getBytes("UTF-8"));
|
1694
|
return new String(Hex.encodeHex(md.digest()));
|
1695
|
} catch (final Exception e) {
|
1696
|
System.err.println("Error creating id");
|
1697
|
return null;
|
1698
|
}
|
1699
|
}
|
1700
|
|
1701
|
public static String oafId(final String entityType, final String prefix, final String id) {
|
1702
|
if (id.isEmpty() || prefix.isEmpty()) return "";
|
1703
|
return oafSimpleId(entityType, prefix + "::" + md5(id));
|
1704
|
}
|
1705
|
|
1706
|
public static String oafSimpleId(final String entityType, final String id) {
|
1707
|
return (Type.valueOf(entityType).getNumber() + "|" + id).replaceAll("\\s|\\n", "");
|
1708
|
}
|
1709
|
|
1710
|
public static String oafSplitId(final String entityType, final String fullId) {
|
1711
|
return oafId(entityType, StringUtils.substringBefore(fullId, "::"), StringUtils.substringAfter(fullId, "::"));
|
1712
|
}
|
1713
|
|
1714
|
/**
|
1715
|
* Gets the classname of the given class code
|
1716
|
*
|
1717
|
* @param code class code.
|
1718
|
* @return the class name, if the code is a key of the map. The code itself otherwise.
|
1719
|
*/
|
1720
|
public static String getClassName(final String code) {
|
1721
|
final String classname = code2name.get(code);
|
1722
|
if (StringUtils.isBlank(classname)) return code;
|
1723
|
return classname;
|
1724
|
}
|
1725
|
|
1726
|
/**
|
1727
|
* Utility method, allows to perform param based map lookups in xsl
|
1728
|
*
|
1729
|
* @param map
|
1730
|
* @param key
|
1731
|
* @return value associated to the key.
|
1732
|
*/
|
1733
|
public static Object lookupValue(final Map<String, Object> map, final String key) {
|
1734
|
return map.get(key);
|
1735
|
}
|
1736
|
|
1737
|
/**
|
1738
|
* Utility method, allows to perform param based map lookups in xsl
|
1739
|
*
|
1740
|
* @param map
|
1741
|
* @param key
|
1742
|
* @return value associated to the key.
|
1743
|
*/
|
1744
|
public static int mustMerge(final Map<String, Object> map, final String key) {
|
1745
|
final Object val = lookupValue(map, key);
|
1746
|
return (val != null) && (val instanceof String) && val.equals("true") ? 1 : 0;
|
1747
|
}
|
1748
|
|
1749
|
public static String[] split(String name, String token){
|
1750
|
return name.split(token);
|
1751
|
}
|
1752
|
|
1753
|
}
|