Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.nio.charset.Charset;
4
import java.security.MessageDigest;
5
import java.util.*;
6
import java.util.function.Function;
7
import java.util.stream.Collectors;
8

    
9
import com.google.common.base.Predicate;
10
import com.google.common.base.Splitter;
11
import com.google.common.collect.Lists;
12
import com.google.common.collect.Maps;
13
import com.google.common.collect.Sets;
14
import com.google.protobuf.Descriptors.Descriptor;
15
import com.google.protobuf.Descriptors.FieldDescriptor;
16
import com.google.protobuf.InvalidProtocolBufferException;
17
import com.google.protobuf.Message;
18
import com.google.protobuf.Message.Builder;
19
import com.google.protobuf.ProtocolMessageEnum;
20
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
21
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
22
import eu.dnetlib.data.proto.DedupProtos.Dedup;
23
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
24
import eu.dnetlib.data.proto.FieldTypeProtos.*;
25
import eu.dnetlib.data.proto.FieldTypeProtos.Journal;
26
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription;
27
import eu.dnetlib.data.proto.KindProtos.Kind;
28
import eu.dnetlib.data.proto.OafProtos.Oaf;
29
import eu.dnetlib.data.proto.OafProtos.OafEntity;
30
import eu.dnetlib.data.proto.OafProtos.OafRel;
31
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization;
32
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
33
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
34
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
35
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
36
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
37
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
38
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
39
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
40
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
41
import eu.dnetlib.data.proto.ResultProtos.Result.Metadata;
42
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
43
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Part;
44
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
45
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
46
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Supplement;
47
import eu.dnetlib.data.proto.TypeProtos.Type;
48
import eu.dnetlib.miscutils.collections.Pair;
49
import eu.dnetlib.miscutils.iterators.IterablePair;
50
import org.apache.commons.codec.binary.Base64;
51
import org.apache.commons.codec.binary.Hex;
52
import org.apache.commons.lang.math.NumberUtils;
53
import org.apache.commons.lang3.StringUtils;
54
import org.w3c.dom.NamedNodeMap;
55
import org.w3c.dom.Node;
56
import org.w3c.dom.NodeList;
57

    
58
public abstract class AbstractDNetXsltFunctions {
59

    
60
	public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
61
	private static final int MAX_NSPREFIX_LEN = 12;
62
	public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX);
63
	public static Map<String, String> code2name = Maps.newHashMap();
64

    
65
	/*
66
	 * Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the
67
	 * relational db
68
	 */
69
	//code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)");
70
	static {
71
		code2name.put("MH","Marshall Islands");
72
		code2name.put("CF","Central African Republic");
73
		code2name.put("TD","Chad");
74
		code2name.put("CN","China (People's Republic of)");
75
		code2name.put("NG","Nigeria");
76
		code2name.put("NF","Norfolk Island");
77
		code2name.put("MP","Northern Mariana Islands");
78
		code2name.put("PS","Palestinian-administered areas");
79
		code2name.put("SZ","Swaziland");
80
		code2name.put("max","Manx");
81
		code2name.put("TW","Taiwan");
82
		code2name.put("TJ","Tajikistan");
83
		code2name.put("BSG","Research for the benefit of specific groups");
84
		code2name.put("CP","Collaborative project");
85
		code2name.put("12MONTHS","12 Months Embargo");
86
		code2name.put("ace","Achinese");
87
		code2name.put("egy","Ancient Egyptian");
88
		code2name.put("ara","Arabic");
89
		code2name.put("arc","Aramaic");
90
		code2name.put("arp","Arapaho");
91
		code2name.put("gon","Gondi");
92
		code2name.put("ine","Indo-European");
93
		code2name.put("ipk","Inupiaq");
94
		code2name.put("ira","Iranian");
95
		code2name.put("lim","Limburgan; Limburger; Limburgish");
96
		code2name.put("mni","Manipuri");
97
		code2name.put("mno","Manobo");
98
		code2name.put("men","Mende");
99
		code2name.put("CX","Christmas Island");
100
		code2name.put("CC","Cocos (Keeling) Islands");
101
		code2name.put("KM","Comoros");
102
		code2name.put("CG","Congo");
103
		code2name.put("CK","Cook Islands");
104
		code2name.put("HR","Croatia");
105
		code2name.put("arn","Araucanian");
106
		code2name.put("art","Artificial");
107
		code2name.put("nah","Aztec");
108
		code2name.put("bug","Buginese");
109
		code2name.put("chn","Chinook jargon");
110
		code2name.put("chv","Chuvash");
111
		code2name.put("mus","Creek");
112
		code2name.put("mic","Micmac");
113
		code2name.put("min","Minangkabau");
114
		code2name.put("fro","Old French");
115
		code2name.put("cpp","Portuguese-based Creoles and Pidgins");
116
		code2name.put("som","Somali");
117
		code2name.put("wen","Sorbian");
118
		code2name.put("hrv","Croatian");
119
		code2name.put("cus","Cushitic");
120
		code2name.put("sot","Sotho, Southern");
121
		code2name.put("sai","South American Indian");
122
		code2name.put("esl/spa","Spanish");
123
		code2name.put("CU","Cuba");
124
		code2name.put("CW","Curaçao");
125
		code2name.put("CZ","Czech Republic");
126
		code2name.put("DK","Denmark");
127
		code2name.put("ER","Eritrea");
128
		code2name.put("TF","French Southern Territories");
129
		code2name.put("GW","Guinea-Bissau");
130
		code2name.put("VA","Holy See (Vatican City State)");
131
		code2name.put("BO","Bolivia");
132
		code2name.put("KY","Cayman Islands");
133
		code2name.put("dra","Dravidian");
134
		code2name.put("cpe","English-based Creoles and Pidgins");
135
		code2name.put("oji","Ojibwa");
136
		code2name.put("CIP-EIP-TN","CIP-Eco-Innovation - CIP-Thematic Network");
137
		code2name.put("jav/jaw","Javanese");
138
		code2name.put("ach","Acoli");
139
		code2name.put("ada","Adangme");
140
		code2name.put("afh","Afrihili");
141
		code2name.put("afr","Afrikaans");
142
		code2name.put("afa","Afro-Asiatic");
143
		code2name.put("ale","Aleut");
144
		code2name.put("alg","Algonquian languages");
145
		code2name.put("arw","Arawak");
146
		code2name.put("asm","Assamese");
147
		code2name.put("ava","Avaric");
148
		code2name.put("ave","Avestan");
149
		code2name.put("bra","Braj");
150
		code2name.put("bua","Buriat");
151
		code2name.put("chr","Cherokee");
152
		code2name.put("chy","Cheyenne");
153
		code2name.put("jrb","Judeo-Arabic");
154
		code2name.put("jpr","Judeo-Persian");
155
		code2name.put("kab","Kabyle");
156
		code2name.put("kac","Kachin");
157
		code2name.put("kaa","Kara-Kalpak");
158
		code2name.put("loz","Lozi");
159
		code2name.put("mwr","Marwari");
160
		code2name.put("DJ","Djibouti");
161
		code2name.put("JM","Jamaica");
162
		code2name.put("JP","Japan");
163
		code2name.put("JE","Jersey");
164
		code2name.put("JO","Jordan");
165
		code2name.put("KZ","Kazakhstan");
166
		code2name.put("KE","Kenya");
167
		code2name.put("KI","Kiribati");
168
		code2name.put("KR","Korea (Republic of)");
169
		code2name.put("KP","Korea, Democatric People's Republic of");
170
		code2name.put("XK","Kosovo * UN resolution");
171
		code2name.put("KW","Kuwait");
172
		code2name.put("NL","Netherlands");
173
		code2name.put("PE","Peru");
174
		code2name.put("PH","Philippines");
175
		code2name.put("fre/fra","French");
176
		code2name.put("PL","Poland");
177
		code2name.put("PT","Portugal");
178
		code2name.put("PR","Puerto Rico");
179
		code2name.put("QA","Qatar");
180
		code2name.put("RO","Romania");
181
		code2name.put("RU","Russian Federation");
182
		code2name.put("RW","Rwanda");
183
		code2name.put("RE","Réunion");
184
		code2name.put("sve/swe","Swedish");
185
		code2name.put("myn","Mayan");
186
		code2name.put("dum","Middle Dutch");
187
		code2name.put("mun","Munda");
188
		code2name.put("nde","Ndebele, North");
189
		code2name.put("ndo","Ndonga");
190
		code2name.put("nyn","Nyankole");
191
		code2name.put("nzi","Nzima");
192
		code2name.put("oci","Occitan (post 1500); Provençal");
193
		code2name.put("GU","Guam");
194
		code2name.put("tut","Altaic");
195
		code2name.put("awa","Awadhi");
196
		code2name.put("ban","Balinese");
197
		code2name.put("bal","Baluchi");
198
		code2name.put("bai","Bamileke");
199
		code2name.put("bad","Banda");
200
		code2name.put("UK","United Kingdom");
201
		code2name.put("bas","Basa");
202
		code2name.put("tib/bod","Tibetan");
203
		code2name.put("ben","Bengali");
204
		code2name.put("ber","Berber");
205
		code2name.put("cho","Choctaw");
206
		code2name.put("cop","Coptic");
207
		code2name.put("crp","Creoles and Pidgins");
208
		code2name.put("dak","Dakota");
209
		code2name.put("del","Delaware");
210
		code2name.put("div","Divehi");
211
		code2name.put("kha","Khasi");
212
		code2name.put("khi","Khoisan");
213
		code2name.put("kho","Khotanese");
214
		code2name.put("osa","Osage");
215
		code2name.put("oss","Ossetian; Ossetic");
216
		code2name.put("oto","Otomian");
217
		code2name.put("GT","Guatemala");
218
		code2name.put("ota","Ottoman");
219
		code2name.put("GG","Guernsey");
220
		code2name.put("GY","Guyana");
221
		code2name.put("LA","Lao (People's Democratic Republic)");
222
		code2name.put("LB","Lebanon");
223
		code2name.put("LY","Libyan Arab Jamahiriya");
224
		code2name.put("LI","Liechtenstein");
225
		code2name.put("LT","Lithuania");
226
		code2name.put("LU","Luxembourg");
227
		code2name.put("PW","Palau");
228
		code2name.put("BL","Saint-Barthélemy");
229
		code2name.put("SM","San Marino");
230
		code2name.put("SX","Sint Maarten (Dutch Part)");
231
		code2name.put("TL","Timor-Leste");
232
		code2name.put("TK","Tokelau");
233
		code2name.put("TO","Tonga");
234
		code2name.put("TN","Tunisia");
235
		code2name.put("TC","Turks and Caicos Islands");
236
		code2name.put("TV","Tuvalu");
237
		code2name.put("GB","United Kingdom");
238
		code2name.put("VU","Vanuatu");
239
		code2name.put("pal","Pahlavi");
240
		code2name.put("pau","Palauan");
241
		code2name.put("pam","Pampanga");
242
		code2name.put("pag","Pangasinan");
243
		code2name.put("pap","Papiamento");
244
		code2name.put("fas/per","Persian");
245
		code2name.put("phn","Phoenician");
246
		code2name.put("sid","Sidamo");
247
		code2name.put("GA","Gabon");
248
		code2name.put("GL","Greenland");
249
		code2name.put("GD","Grenada");
250
		code2name.put("GP","Guadeloupe");
251
		code2name.put("IE","Ireland");
252
		code2name.put("spa","Spanish; Castilian");
253
		code2name.put("IM","Isle of Man");
254
		code2name.put("IT","Italy");
255
		code2name.put("ES","Spain");
256
		code2name.put("SR","Suriname");
257
		code2name.put("TZ","Tanzania (United Republic of)");
258
		code2name.put("TH","Thailand");
259
		code2name.put("TG","Togo");
260
		code2name.put("UG","Uganda");
261
		code2name.put("UZ","Uzbekistan");
262
		code2name.put("VE","Venezuela");
263
		code2name.put("VI","Virgin Islands, U.S.");
264
		code2name.put("WF","Wallis and Futuna");
265
		code2name.put("COFUND-PCP","COFUND (PCP)");
266
		code2name.put("amh","Amharic");
267
		code2name.put("map","Austronesian");
268
		code2name.put("aym","Aymara");
269
		code2name.put("bnt","Bantu");
270
		code2name.put("bak","Bashkir");
271
		code2name.put("bho","Bhojpuri");
272
		code2name.put("bik","Bikol");
273
		code2name.put("bul","Bulgarian");
274
		code2name.put("cor","Cornish");
275
		code2name.put("dua","Duala");
276
		code2name.put("dut/nld","Dutch; Flemish");
277
		code2name.put("isRelatedTo","isRelatedTo");
278
		code2name.put("coauthor","coauthor");
279
		code2name.put("dyu","Dyula");
280
		code2name.put("eka","Ekajuk");
281
		code2name.put("gil","Gilbertese");
282
		code2name.put("suk","Sukuma");
283
		code2name.put("sux","Sumerian");
284
		code2name.put("sun","Sundanese");
285
		code2name.put("sus","Susu");
286
		code2name.put("swa","Swahili");
287
		code2name.put("0010","Lecture");
288
		code2name.put("0007","Master thesis");
289
		code2name.put("0027","Model");
290
		code2name.put("0012","Newsletter");
291
		code2name.put("0020","Other ORP type");
292
		code2name.put("0038","Other literature type");
293
		code2name.put("0039","Other dataset type");
294
		code2name.put("0040","Other software type");
295
		code2name.put("0013","Part of book or chapter of book");
296
		code2name.put("0019","Patent");
297
		code2name.put("0028","PhysicalObject");
298
		code2name.put("0016","Preprint");
299
		code2name.put("DM","Dominica");
300
		code2name.put("DO","Dominican Republic");
301
		code2name.put("EC","Ecuador");
302
		code2name.put("EG","Egypt");
303
		code2name.put("GQ","Equatorial Guinea");
304
		code2name.put("EE","Estonia");
305
		code2name.put("ET","Ethiopia");
306
		code2name.put("GR","Greece");
307
		code2name.put("HM","Heard Island and McDonald Islands");
308
		code2name.put("got","Gothic");
309
		code2name.put("grb","Grebo");
310
		code2name.put("ell/gre","Greek");
311
		code2name.put("hat","Haitian; Haitian Creole");
312
		code2name.put("hau","Hausa");
313
		code2name.put("haw","Hawaiian");
314
		code2name.put("heb","Hebrew");
315
		code2name.put("gai/iri","Irish");
316
		code2name.put("kar","Karen");
317
		code2name.put("lui","Luiseno");
318
		code2name.put("goh","Old High German");
319
		code2name.put("abk","Abkhazian");
320
		code2name.put("aar","Afar");
321
		code2name.put("aggregator::pubsrepository::journals","Journal Aggregator/Publisher");
322
		code2name.put("pubsrepository::mock","Other");
323
		code2name.put("pubscatalogue::unknown","Publication Catalogue");
324
		code2name.put("BI","Burundi");
325
		code2name.put("CM","Cameroon");
326
		code2name.put("CD","Congo (Democratic Republic of)");
327
		code2name.put("CR","Costa Rica");
328
		code2name.put("CI","Cote d'Ivoire");
329
		code2name.put("arg","Aragonese");
330
		code2name.put("aze","Azerbaijani");
331
		code2name.put("EU","European Union");
332
		code2name.put("FK","Falkland Islands (Malvinas)");
333
		code2name.put("scr/hrv","Croatian");
334
		code2name.put("bam","Bambara");
335
		code2name.put("baq/eus","Basque");
336
		code2name.put("bih","Bihari");
337
		code2name.put("FO","Faroe Islands");
338
		code2name.put("FJ","Fiji");
339
		code2name.put("FI","Finland");
340
		code2name.put("ger/deu","German");
341
		code2name.put("MK","Former Yugoslav Republic of Macedonia");
342
		code2name.put("FR","France");
343
		code2name.put("bis","Bislama");
344
		code2name.put("cat","Catalan; Valencian");
345
		code2name.put("cha","Chamorro");
346
		code2name.put("che","Chechen");
347
		code2name.put("cos","Corsican");
348
		code2name.put("elx","Elamite");
349
		code2name.put("eng","English");
350
		code2name.put("est","Estonian");
351
		code2name.put("deu/ger","German");
352
		code2name.put("gle","Irish");
353
		code2name.put("gem","Germanic");
354
		code2name.put("GF","French Guiana");
355
		code2name.put("PF","French Polynesia");
356
		code2name.put("GM","Gambia");
357
		code2name.put("kik","Gikuyu; Kikuyu");
358
		code2name.put("gre/ell","Greek, Modern (1453-)");
359
		code2name.put("DE","Germany");
360
		code2name.put("mac/mkd","Macedonian");
361
		code2name.put("scc/srp","Serbian");
362
		code2name.put("grn","Guarani");
363
		code2name.put("ssw","Swati");
364
		code2name.put("swe","Swedish");
365
		code2name.put("syr","Syriac");
366
		code2name.put("tgl","Tagalog");
367
		code2name.put("tah","Tahitian");
368
		code2name.put("tgk","Tajik");
369
		code2name.put("tmh","Tamashek");
370
		code2name.put("tam","Tamil");
371
		code2name.put("tat","Tatar");
372
		code2name.put("aggregator::pubsrepository::institutional","Institutional Repository Aggregator");
373
		code2name.put("per/fas","Persian");
374
		code2name.put("FCT","Fundação para a Ciência e Tecnologia");
375
		code2name.put("user:claim:pid","user:claim:pid");
376
		code2name.put("entityregistry","Registry");
377
		code2name.put("hin","Hindi");
378
		code2name.put("NA","Namibia");
379
		code2name.put("ido","Ido");
380
		code2name.put("ibo","Igbo");
381
		code2name.put("orcid","Open Researcher and Contributor ID");
382
		code2name.put("TT","Trinidad and Tobago");
383
		code2name.put("TR","Turkey");
384
		code2name.put("TM","Turkmenistan");
385
		code2name.put("arXiv","arXiv");
386
		code2name.put("providedBy","provided by");
387
		code2name.put("EMBARGO","Embargo");
388
		code2name.put("dataset_dataset","dataset_dataset");
389
		code2name.put("publication_dataset","publication_dataset");
390
		code2name.put("publication_publication","publication_publication");
391
		code2name.put("coordinator","coordinator");
392
		code2name.put("participant","participant");
393
		code2name.put("subcontractor","subcontractor");
394
		code2name.put("principal investigating","principal investigating");
395
		code2name.put("exploitation","exploitation");
396
		code2name.put("OPEN","Open Access");
397
		code2name.put("OPEN SOURCE","Open Source");
398
		code2name.put("doi","doi");
399
		code2name.put("orcidworkid","orcid workid");
400
		code2name.put("MQ","Martinique");
401
		code2name.put("MR","Mauritania");
402
		code2name.put("jpn","Japanese");
403
		code2name.put("pubsrepository::unknown","Publication Repository");
404
		code2name.put("aggregator::pubsrepository::unknown","Publication Repository Aggregator");
405
		code2name.put("UA","Ukraine");
406
		code2name.put("YT","Mayotte");
407
		code2name.put("OTHER","Other");
408
		code2name.put("RESTRICTED","Restricted");
409
		code2name.put("AE","United Arab Emirates");
410
		code2name.put("aka","Akan");
411
		code2name.put("US","United States");
412
		code2name.put("author","author");
413
		code2name.put("isResultOf","isResultOf");
414
		code2name.put("kin","Kinyarwanda");
415
		code2name.put("kom","Komi");
416
		code2name.put("new","Newari");
417
		code2name.put("NR","Nauru");
418
		code2name.put("FM","Micronesia, Federated States of");
419
		code2name.put("NP","Nepal");
420
		code2name.put("MN","Mongolia");
421
		code2name.put("rum/ron","Romanian");
422
		code2name.put("submitted","submitted");
423
		code2name.put("driver-openaire2.0","OpenAIRE 2.0+ (DRIVER OA, EC funding)");
424
		code2name.put("result","result");
425
		code2name.put("roh","Raeto-Romance");
426
		code2name.put("run","Rundi");
427
		code2name.put("bin","Bini");
428
		code2name.put("bos","Bosnian");
429
		code2name.put("din","Dinka");
430
		code2name.put("tel","Telugu");
431
		code2name.put("MA","Morocco");
432
		code2name.put("MZ","Mozambique");
433
		code2name.put("ewo","Ewondo");
434
		code2name.put("ter","Tereno");
435
		code2name.put("fat","Fanti");
436
		code2name.put("fao","Faroese");
437
		code2name.put("hai","Haida");
438
		code2name.put("MM","Myanmar");
439
		code2name.put("NU","Niue");
440
		code2name.put("PK","Pakistan");
441
		code2name.put("PG","Papua New Guinea");
442
		code2name.put("file::WoS","file::WoS");
443
		code2name.put("metadata","metadata");
444
		code2name.put("file::hybrid","file::hybrid");
445
		code2name.put("nbl","Ndebele, South");
446
		code2name.put("akk","Akkadian");
447
		code2name.put("alb/sqi","Albanian");
448
		code2name.put("arm/hye","Armenian");
449
		code2name.put("ath","Athapascan");
450
		code2name.put("CA","Canada");
451
		code2name.put("CV","Cape Verde");
452
		code2name.put("CL","Chile");
453
		code2name.put("bat","Baltic");
454
		code2name.put("CO","Colombia");
455
		code2name.put("CY","Cyprus");
456
		code2name.put("SV","El Salvador");
457
		code2name.put("HT","Haiti");
458
		code2name.put("bej","Beja");
459
		code2name.put("HN","Honduras");
460
		code2name.put("HK","Hong Kong");
461
		code2name.put("HU","Hungary");
462
		code2name.put("bel","Belarusian");
463
		code2name.put("bem","Bemba");
464
		code2name.put("slo/slk","Slovak");
465
		code2name.put("bre","Breton");
466
		code2name.put("car","Carib");
467
		code2name.put("cau","Caucasian");
468
		code2name.put("ewe","Ewe");
469
		code2name.put("tha","Thai");
470
		code2name.put("fan","Fang");
471
		code2name.put("fij","Fijian");
472
		code2name.put("fin","Finnish");
473
		code2name.put("her","Herero");
474
		code2name.put("hil","Hiligaynon");
475
		code2name.put("bod/tib","Tibetan");
476
		code2name.put("tig","Tigre");
477
		code2name.put("tir","Tigrinya");
478
		code2name.put("tem","Timne");
479
		code2name.put("wel/cym","Welsh");
480
		code2name.put("KO","Kosovo * UN resolution");
481
		code2name.put("tiv","Tivi");
482
		code2name.put("tli","Tlingit");
483
		code2name.put("ton","Tonga (Tonga Islands)");
484
		code2name.put("tog","Tonga(Nyasa)");
485
		code2name.put("tru","Truk");
486
		code2name.put("tsi","Tsimshian");
487
		code2name.put("tso","Tsonga");
488
		code2name.put("tsn","Tswana");
489
		code2name.put("IsPreviousVersionOf","IsPreviousVersionOf");
490
		code2name.put("IsReferencedBy","IsReferencedBy");
491
		code2name.put("References","References");
492
		code2name.put("IS","Iceland");
493
		code2name.put("IN","India");
494
		code2name.put("ID","Indonesia");
495
		code2name.put("IL","Israel");
496
		code2name.put("NZ","New Zealand");
497
		code2name.put("NI","Nicaragua");
498
		code2name.put("NE","Niger");
499
		code2name.put("ARK","ARK");
500
		code2name.put("BW","Botswana");
501
		code2name.put("BR","Brazil");
502
		code2name.put("BF","Burkina Faso");
503
		code2name.put("KH","Cambodia");
504
		code2name.put("hmo","Hiri Motu");
505
		code2name.put("hun","Hungarian");
506
		code2name.put("ice/isl","Icelandic");
507
		code2name.put("ind","Indonesian");
508
		code2name.put("ile","Interlingue");
509
		code2name.put("kam","Kamba");
510
		code2name.put("lub","Luba-Katanga");
511
		code2name.put("nav","Navajo; Navaho");
512
		code2name.put("datasetsbyproject","datasetsbyproject");
513
		code2name.put("ISSN","ISSN");
514
		code2name.put("MC","Support for training and career development of researchers (Marie Curie)");
515
		code2name.put("nor","Norwegian");
516
		code2name.put("file","file");
517
		code2name.put("ISTC","ISTC");
518
		code2name.put("CSA-LS","CSA Lump sum");
519
		code2name.put("MX","Mexico");
520
		code2name.put("ME","Montenegro");
521
		code2name.put("ceb","Cebuano");
522
		code2name.put("nub","Nubian");
523
		code2name.put("nym","Nyamwezi");
524
		code2name.put("nyo","Nyoro");
525
		code2name.put("tum","Tumbuka");
526
		code2name.put("tur","Turkish");
527
		code2name.put("tuk","Turkmen");
528
		code2name.put("dnet:od_subjects","OpenDOAR subjects");
529
		code2name.put("wos","Web of Science Subject Areas");
530
		code2name.put("arxiv","arXiv");
531
		code2name.put("nsf:fieldOfApplication","Field of Application (NSF)");
532
		code2name.put("NetCDF","NetCDF");
533
		code2name.put("OpenDAP","OpenDAP");
534
		code2name.put("api","api");
535
		code2name.put("datasetsbyjournal","datasetsbyjournal");
536
		code2name.put("DOI","DOI");
537
		code2name.put("EAN13","EAN13");
538
		code2name.put("EISSN","EISSN");
539
		code2name.put("Handle","Handle");
540
		code2name.put("ISBN","ISBN");
541
		code2name.put("LISSN","LISSN");
542
		code2name.put("LSID","LSID");
543
		code2name.put("PURL","PURL");
544
		code2name.put("UPC","UPC");
545
		code2name.put("URL","URL");
546
		code2name.put("URN","URN");
547
		code2name.put("cel","Celtic");
548
		code2name.put("chg","Chagatai");
549
		code2name.put("chb","Chibcha");
550
		code2name.put("AF","Afghanistan");
551
		code2name.put("AL","Albania");
552
		code2name.put("PY","Paraguay");
553
		code2name.put("PN","Pitcairn");
554
		code2name.put("KN","Saint Kitts and Nevis");
555
		code2name.put("UY","Uruguay");
556
		code2name.put("VN","Viet Nam");
557
		code2name.put("VG","Virgin Islands (British)");
558
		code2name.put("EH","Western Sahara");
559
		code2name.put("YE","Yemen");
560
		code2name.put("YU","Yugoslavia");
561
		code2name.put("ZW","Zimbabwe");
562
		code2name.put("ec:hasprogram","hasprogram");
563
		code2name.put("ec:hasspecificprogram","hasspecificprogram");
564
		code2name.put("available","available");
565
		code2name.put("chi/zho","Chinese");
566
		code2name.put("ces/cze","Czech");
567
		code2name.put("guj","Gujarati");
568
		code2name.put("him","Himachali");
569
		code2name.put("hup","Hupa");
570
		code2name.put("iba","Iban");
571
		code2name.put("ijo","Ijo");
572
		code2name.put("ilo","Iloko");
573
		code2name.put("inc","Indic");
574
		code2name.put("kan","Kannada");
575
		code2name.put("DZ","Algeria");
576
		code2name.put("BT","Bhutan");
577
		code2name.put("kau","Kanuri");
578
		code2name.put("mul","Multiple languages");
579
		code2name.put("BA","Bosnia and Herzegovina");
580
		code2name.put("MU","Mauritius");
581
		code2name.put("CSA","Coordination and support action");
582
		code2name.put("fileCSV","fileCSV");
583
		code2name.put("AS","American Samoa");
584
		code2name.put("ERC","Support for frontier research (ERC)");
585
		code2name.put("IA","Innovation action");
586
		code2name.put("AD","Andorra");
587
		code2name.put("AO","Angola");
588
		code2name.put("AI","Anguilla");
589
		code2name.put("AQ","Antarctica");
590
		code2name.put("AG","Antigua and Barbuda");
591
		code2name.put("AR","Argentina");
592
		code2name.put("AM","Armenia");
593
		code2name.put("AW","Aruba");
594
		code2name.put("AU","Australia");
595
		code2name.put("AT","Austria");
596
		code2name.put("AZ","Azerbaijan");
597
		code2name.put("BS","Bahamas");
598
		code2name.put("BH","Bahrain");
599
		code2name.put("BE","Belgium");
600
		code2name.put("BZ","Belize");
601
		code2name.put("BJ","Benin");
602
		code2name.put("BM","Bermuda");
603
		code2name.put("GE","Georgia");
604
		code2name.put("GH","Ghana");
605
		code2name.put("GI","Gibraltar");
606
		code2name.put("GN","Guinea");
607
		code2name.put("IR","Iran (Islamic Republic of)");
608
		code2name.put("IQ","Iraq");
609
		code2name.put("6MONTHS","6 Months Embargo");
610
		code2name.put("CLOSED","Closed Access");
611
		code2name.put("ina","Auxiliary Language Association)");
612
		code2name.put("bur/mya","Burmese");
613
		code2name.put("cad","Caddo");
614
		code2name.put("cai","Central American Indian");
615
		code2name.put("chu","Church Slavic; Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic");
616
		code2name.put("kal","Greenlandic; Kalaallisut");
617
		code2name.put("iku","Inuktitut");
618
		code2name.put("iro","Iroquoian");
619
		code2name.put("ita","Italian");
620
		code2name.put("jav","Javanese");
621
		code2name.put("kua","Kuanyama; Kwanyama");
622
		code2name.put("kum","Kumyk");
623
		code2name.put("kru","Kurukh");
624
		code2name.put("kus","Kusaie");
625
		code2name.put("vie","Vietnamese");
626
		code2name.put("vol","Volapük");
627
		code2name.put("vot","Votic");
628
		code2name.put("wak","Wakashan");
629
		code2name.put("wal","Walamo");
630
		code2name.put("wln","Walloon");
631
		code2name.put("war","Waray");
632
		code2name.put("ST","São Tomé and Príncipe");
633
		code2name.put("endDate","endDate");
634
		code2name.put("issued","issued");
635
		code2name.put("startDate","startDate");
636
		code2name.put("FCH2-CSA","Coordination & support action");
637
		code2name.put("nic","Niger-Kordofanian");
638
		code2name.put("ssa","Nilo-Saharan");
639
		code2name.put("MSCA-RISE","RISE");
640
		code2name.put("RIA","Research and Innovation action");
641
		code2name.put("MSCA-IF-EF-ST","Standard EF");
642
		code2name.put("PendingRepositoryResources","Pending datasource");
643
		code2name.put("RepositoryServiceResources","Valid datasource");
644
		code2name.put("publication","publication");
645
		code2name.put("niu","Niuean");
646
		code2name.put("sysimport:crosswalk:aggregator","sysimport:crosswalk:aggregator");
647
		code2name.put("sysimport:crosswalk:cris","sysimport:crosswalk:cris");
648
		code2name.put("sysimport:crosswalk:datasetarchive","sysimport:crosswalk:datasetarchive");
649
		code2name.put("sysimport:crosswalk:entityregistry","sysimport:crosswalk:entityregistry");
650
		code2name.put("non","Norse");
651
		code2name.put("nai","North American Indian");
652
		code2name.put("sme","Northern Sami");
653
		code2name.put("nno","Norwegian Nynorsk; Nynorsk, Norwegian");
654
		code2name.put("yor","Yoruba");
655
		code2name.put("nob","Bokmål, Norwegian; Norwegian Bokmål");
656
		code2name.put("kaz","Kazakh");
657
		code2name.put("khm","Khmer");
658
		code2name.put("kor","Korean");
659
		code2name.put("ltz","Letzeburgesch; Luxembourgish");
660
		code2name.put("mar","Marathi");
661
		code2name.put("mas","Masai");
662
		code2name.put("enm","Middle English");
663
		code2name.put("frm","Middle French");
664
		code2name.put("mis","Miscellaneous");
665
		code2name.put("zap","Zapotec");
666
		code2name.put("zul","Zulu");
667
		code2name.put("KG","Kyrgyzstan");
668
		code2name.put("LV","Latvia");
669
		code2name.put("LS","Lesotho");
670
		code2name.put("LR","Liberia");
671
		code2name.put("MO","Macao");
672
		code2name.put("MG","Madagascar");
673
		code2name.put("MW","Malawi");
674
		code2name.put("MY","Malaysia");
675
		code2name.put("MD","Moldova (Republic of)");
676
		code2name.put("MS","Montserrat");
677
		code2name.put("AX","Åland Islands");
678
		code2name.put("moh","Mohawk");
679
		code2name.put("mol","Moldavian");
680
		code2name.put("mkh","Mon-Kmer");
681
		code2name.put("lol","Mongo");
682
		code2name.put("copyrighted","copyrighted");
683
		code2name.put("created","created");
684
		code2name.put("updated","updated");
685
		code2name.put("valid","valid");
686
		code2name.put("BBI-IA-DEMO","Bio-based Industries Innovation action - Demonstration");
687
		code2name.put("MSCA-IF-EF-CAR","CAR – Career Restart panel");
688
		code2name.put("MSCA-ITN-ETN","European Training Networks");
689
		code2name.put("interactiveResource","interactiveResource");
690
		code2name.put("model","model");
691
		code2name.put("ML","Mali");
692
		code2name.put("FCH2-RIA","FCH2 Research and Innovation action");
693
		code2name.put("MSCA-COFUND-FP","Fellowship programmes");
694
		code2name.put("physicalObject","physicalObject");
695
		code2name.put("MSCA-IF-GF","Global Fellowships");
696
		code2name.put("sysimport:crosswalk:infospace","sysimport:crosswalk:infospace");
697
		code2name.put("sysimport:crosswalk:repository","sysimport:crosswalk:repository");
698
		code2name.put("sysimport:mining:aggregator","sysimport:mining:aggregator");
699
		code2name.put("fry","Frisian");
700
		code2name.put("gaa","Ga");
701
		code2name.put("gae/gdh","Gaelic");
702
		code2name.put("service","service");
703
		code2name.put("software","software");
704
		code2name.put("sound","sound");
705
		code2name.put("glg","Galician");
706
		code2name.put("lug","Ganda");
707
		code2name.put("gay","Gayo");
708
		code2name.put("gez","Geez");
709
		code2name.put("MT","Malta");
710
		code2name.put("text","text");
711
		code2name.put("AN","Netherlands Antilles");
712
		code2name.put("NC","New Caledonia");
713
		code2name.put("NO","Norway");
714
		code2name.put("OC","Oceania");
715
		code2name.put("user:claim:search","user:claim:search");
716
		code2name.put("OM","Oman");
717
		code2name.put("PA","Panama");
718
		code2name.put("user:insert","user:insert");
719
		code2name.put("171","Article 171 of the Treaty");
720
		code2name.put("nya","Chewa; Chichewa; Nyanja");
721
		code2name.put("cre","Cree");
722
		code2name.put("geo/kat","Georgian");
723
		code2name.put("dan","Danish");
724
		code2name.put("MV","Maldives");
725
		code2name.put("dzo","Dzongkha");
726
		code2name.put("efi","Efik");
727
		code2name.put("LC","Saint Lucia");
728
		code2name.put("zun","Zuni");
729
		code2name.put("sga","old Irish");
730
		code2name.put("file::EuropePMC","file::EuropePMC");
731
		code2name.put("MF","Saint Martin (French Part)");
732
		code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)");
733
		code2name.put("file::PDF","file::PDF");
734
		code2name.put("esk","Eskimo");
735
		code2name.put("ec:program","program");
736
		code2name.put("epo","Esperanto");
737
		code2name.put("fct:program","fct:program");
738
		code2name.put("ec:specificprogram","specificprogram");
739
		code2name.put("collection","collection");
740
		code2name.put("ERC-ADG","Advanced Grant");
741
		code2name.put("ERA-NET-Cofund","ERA-NET Cofund");
742
		code2name.put("dataset","dataset");
743
		code2name.put("event","event");
744
		code2name.put("ERC-LVG","ERC low value grant");
745
		code2name.put("film","film");
746
		code2name.put("image","image");
747
		code2name.put("SL","Sierra Leone");
748
		code2name.put("ec:hasframeworkprogram","hasframeworkprogram");
749
		code2name.put("ERC-POC","Proof of Concept Grant");
750
		code2name.put("sysimport:mining:cris","sysimport:mining:cris");
751
		code2name.put("sysimport:mining:datasetarchive","sysimport:mining:datasetarchive");
752
		code2name.put("CP-CSA","Combination of CP & CSA");
753
		code2name.put("NoE","Network of Excellence");
754
		code2name.put("grc","Ancient Greek");
755
		code2name.put("lat","Latin");
756
		code2name.put("ori","Oriya");
757
		code2name.put("orm","Oromo");
758
		code2name.put("nso","Sotho");
759
		code2name.put("ddc","Dewey Decimal Classification");
760
		code2name.put("zen","Zenaga");
761
		code2name.put("ec:h2020topics","Horizon 2020 Topics");
762
		code2name.put("alternative title","alternative title");
763
		code2name.put("mesheuropmc","Medical Subject Headings");
764
		code2name.put("apa","Apache");
765
		code2name.put("SH","Saint Helena, Ascension and Tristan da Cunha");
766
		code2name.put("PM","Saint Pierre and Miquelon");
767
		code2name.put("MSCA-COFUND-DP","Doctoral programmes");
768
		code2name.put("VC","Saint Vincent and the Grenadines");
769
		code2name.put("ECSEL-IA","ECSEL Innovation Action");
770
		code2name.put("kpe","Kpelle");
771
		code2name.put("ECSEL-RIA","ECSEL Research and Innovation Actions");
772
		code2name.put("MSCA-ITN-EID","European Industrial Doctorates");
773
		code2name.put("sysimport:mining:entityregistry","sysimport:mining:entityregistry");
774
		code2name.put("sysimport:mining:infospace","sysimport:mining:infospace");
775
		code2name.put("sysimport:mining:repository","sysimport:mining:repository");
776
		code2name.put("main title","main title");
777
		code2name.put("subtitle","subtitle");
778
		code2name.put("translated title","translated title");
779
		code2name.put("lav","Latvian");
780
		code2name.put("kro","Kru");
781
		code2name.put("kur","Kurdish");
782
		code2name.put("kut","Kutenai");
783
		code2name.put("pli","Pali");
784
		code2name.put("pan","Panjabi; Punjabi");
785
		code2name.put("paa","Papuan-Australian");
786
		code2name.put("peo","Persian, Old (ca 600 - 400 B.C.)");
787
		code2name.put("zha","Zhuang; Chuang");
788
		code2name.put("pmc","pmc");
789
		code2name.put("pmid","pmid");
790
		code2name.put("urn","urn");
791
		code2name.put("IO","British Indian Ocean Territory");
792
		code2name.put("WS","Samoa");
793
		code2name.put("SA","Saudi Arabia");
794
		code2name.put("SN","Senegal");
795
		code2name.put("RS","Serbia");
796
		code2name.put("MSCA-ITN-EJD","European Joint Doctorates");
797
		code2name.put("wt:hasParentFunding","wt:hasParentFunding");
798
		code2name.put("lad","Ladino");
799
		code2name.put("bla","Siksika");
800
		code2name.put("lah","Lahnda");
801
		code2name.put("lam","Lamba");
802
		code2name.put("lao","Lao");
803
		code2name.put("snd","Sindhi");
804
		code2name.put("son","Songhai");
805
		code2name.put("DFG","DFG Classification");
806
		code2name.put("SC","Seychelles");
807
		code2name.put("SG","Singapore");
808
		code2name.put("SK","Slovakia");
809
		code2name.put("SI","Slovenia");
810
		code2name.put("lez","Lezghian");
811
		code2name.put("SB","Solomon Islands");
812
		code2name.put("SO","Somalia");
813
		code2name.put("ZA","South Africa");
814
		code2name.put("GS","South Georgia and the South Sandwich Islands");
815
		code2name.put("fiu","Finno-Ugrian");
816
		code2name.put("fon","Fon");
817
		code2name.put("fra/fre","French");
818
		code2name.put("cpf","French-based Creoles and Pidgins");
819
		code2name.put("SS","South Sudan");
820
		code2name.put("ful","Fulah");
821
		code2name.put("gla","Gaelic; Scottish Gaelic");
822
		code2name.put("kas","Kashmiri");
823
		code2name.put("LK","Sri Lanka");
824
		code2name.put("SD","Sudan");
825
		code2name.put("SJ","Svalbard and Jan Mayen");
826
		code2name.put("SE","Sweden");
827
		code2name.put("CH","Switzerland");
828
		code2name.put("SY","Syrian Arab Republic");
829
		code2name.put("fct:hasParentFunding","fct:hasParentFunding");
830
		code2name.put("FCH2-IA","FCH2 Innovation action");
831
		code2name.put("MSCA-IF-EF-RI","RI – Reintegration panel");
832
		code2name.put("kaw","Kawi");
833
		code2name.put("kir","Kirghiz");
834
		code2name.put("kon","Kongo");
835
		code2name.put("kok","Konkani");
836
		code2name.put("lin","Lingala");
837
		code2name.put("lit","Lithuanian");
838
		code2name.put("lun","Lunda");
839
		code2name.put("luo","Luo");
840
		code2name.put("mac/mak","Macedonian");
841
		code2name.put("mak","Makasar");
842
		code2name.put("mlt","Maltese");
843
		code2name.put("pol","Polish");
844
		code2name.put("pon","Ponape");
845
		code2name.put("por","Portuguese");
846
		code2name.put("pra","Prakrit");
847
		code2name.put("pro","Provencal");
848
		code2name.put("pus","Pushto");
849
		code2name.put("que","Quechua");
850
		code2name.put("raj","Rajasthani");
851
		code2name.put("rar","Rarotongan");
852
		code2name.put("roa","Romance");
853
		code2name.put("ron/rum","Romanian");
854
		code2name.put("rom","Romany");
855
		code2name.put("rus","Russian");
856
		code2name.put("sit","Sino-Tibetan");
857
		code2name.put("sio","Siouan");
858
		code2name.put("fileGzip","fileGzip");
859
		code2name.put("files_by_rpc","files_by_rpc");
860
		code2name.put("files_from_mdstore","files_from_mdstore");
861
		code2name.put("files_from_metadata","files_from_metadata");
862
		code2name.put("scr","Serbo-Croatian");
863
		code2name.put("mad","Madurese");
864
		code2name.put("mag","Magahi");
865
		code2name.put("mai","Maithili");
866
		code2name.put("mlg","Malagasy");
867
		code2name.put("may/msa","Malay");
868
		code2name.put("mal","Malayalam");
869
		code2name.put("man","Mandingo");
870
		code2name.put("glv","Manx");
871
		code2name.put("mao/mri","Maori");
872
		code2name.put("chm","Mari");
873
		code2name.put("srr","Serer");
874
		code2name.put("shn","Shan");
875
		code2name.put("sna","Shona");
876
		code2name.put("iii","Sichuan Yi");
877
		code2name.put("sin","Sinhala; Sinhalese");
878
		code2name.put("sla","Slavic");
879
		code2name.put("slk/slo","Slovak");
880
		code2name.put("slv","Slovenian");
881
		code2name.put("sog","Sogdian");
882
		code2name.put("Contract","Contract");
883
		code2name.put("Contract Interagency Agreement","Contract Interagency Agreement");
884
		code2name.put("Cooperative Agreement","Cooperative Agreement");
885
		code2name.put("Fellowship","Fellowship");
886
		code2name.put("Fixed Price Award","Fixed Price Award");
887
		code2name.put("Interagency Agreement","Interagency Agreement");
888
		code2name.put("Intergovernmental Personnel Award","Intergovernmental Personnel Award");
889
		code2name.put("Personnel Agreement","Personnel Agreement");
890
		code2name.put("Standard Grant","Standard Grant");
891
		code2name.put("GAA","GAA");
892
		code2name.put("mah","Marshallese");
893
		code2name.put("gmh","Middle High German");
894
		code2name.put("mga","Middle Irish");
895
		code2name.put("filesystem","filesystem");
896
		code2name.put("ftp","ftp");
897
		code2name.put("http","http");
898
		code2name.put("SME-1","SME instrument phase 1");
899
		code2name.put("SME-2","SME instrument phase 2");
900
		code2name.put("SGA-CSA","Specific Grant agreement and Coordination and Support Action");
901
		code2name.put("mon","Mongolian");
902
		code2name.put("mos","Mossi");
903
		code2name.put("nau","Nauru");
904
		code2name.put("nep","Nepali");
905
		code2name.put("ang","Old English");
906
		code2name.put("sal","Salishan");
907
		code2name.put("sam","Samaritan");
908
		code2name.put("smi","Sami");
909
		code2name.put("smo","Samoan");
910
		code2name.put("sad","Sandawe");
911
		code2name.put("sag","Sango");
912
		code2name.put("san","Sanskrit");
913
		code2name.put("srd","Sardinian");
914
		code2name.put("sco","Scots");
915
		code2name.put("sel","Selkup");
916
		code2name.put("sem","Semitic");
917
		code2name.put("srp","Serbian");
918
		code2name.put("tyv","Tuvinian");
919
		code2name.put("twi","Twi");
920
		code2name.put("uga","Ugaritic");
921
		code2name.put("uig","Uighur; Uyghur");
922
		code2name.put("ukr","Ukrainian");
923
		code2name.put("umb","Umbundu");
924
		code2name.put("und","Undetermined");
925
		code2name.put("urd","Urdu");
926
		code2name.put("uzb","Uzbek");
927
		code2name.put("vai","Vai");
928
		code2name.put("ven","Venda");
929
		code2name.put("was","Washo");
930
		code2name.put("cym/wel","Welsh");
931
		code2name.put("wol","Wolof");
932
		code2name.put("xho","Xhosa");
933
		code2name.put("sah","Yakut");
934
		code2name.put("yao","Yao");
935
		code2name.put("yap","Yap");
936
		code2name.put("yid","Yiddish");
937
		code2name.put("httpCSV","httpCSV");
938
		code2name.put("httpList","httpList");
939
		code2name.put("jdbc","jdbc");
940
		code2name.put("oai","oai");
941
		code2name.put("oai_sets","oai_sets");
942
		code2name.put("other","other");
943
		code2name.put("re3data","re3data");
944
		code2name.put("rest","rest");
945
		code2name.put("sftp","sftp");
946
		code2name.put("soap","soap");
947
		code2name.put("sparql","sparql");
948
		code2name.put("sword","sword");
949
		code2name.put("targz","targz");
950
		code2name.put("ec:frameworkprogram","frameworkprogram");
951
		code2name.put("UNKNOWN","UNKNOWN");
952
		code2name.put("0021","Dataset");
953
		code2name.put("0006","Doctoral thesis");
954
		code2name.put("0023","Event");
955
		code2name.put("0009","External research report");
956
		code2name.put("0024","Film");
957
		code2name.put("0025","Image");
958
		code2name.put("0026","InteractiveResource");
959
		code2name.put("0011","Internal report");
960
		code2name.put("0017","Report");
961
		code2name.put("0014","Research");
962
		code2name.put("0015","Review");
963
		code2name.put("0029","Software");
964
		code2name.put("0032","Software Paper");
965
		code2name.put("0030","Sound");
966
		code2name.put("0000","Unknown");
967
		code2name.put("0034","Project deliverable");
968
		code2name.put("0035","Project proposal");
969
		code2name.put("0036","Project milestone");
970
		code2name.put("0037","Clinical Trial");
971
		code2name.put("crissystem","CRIS System");
972
		code2name.put("datarepository::unknown","Data Repository");
973
		code2name.put("aggregator::datarepository","Data Repository Aggregator");
974
		code2name.put("infospace","Information Space");
975
		code2name.put("pubsrepository::institutional","Institutional Repository");
976
		code2name.put("pubsrepository::journal","Journal");
977
		code2name.put("scholarcomminfra","Scholarly Comm. Infrastructure");
978
		code2name.put("pubsrepository::thematic","Thematic Repository");
979
		code2name.put("websource","Web Source");
980
		code2name.put("entityregistry::projects","Funder database");
981
		code2name.put("entityregistry::repositories","Registry of repositories");
982
		code2name.put("wt:fundingStream","Wellcome Trust: Funding Stream");
983
		code2name.put("IsCitedBy","IsCitedBy");
984
		code2name.put("IsNewVersionOf","IsNewVersionOf");
985
		code2name.put("IsPartOf","IsPartOf");
986
		code2name.put("COFUND-EJP","COFUND (European Joint Programme)");
987
		code2name.put("COFUND-PPI","COFUND (PPI)");
988
		code2name.put("CS2-IA","CS2 Innovation Action");
989
		code2name.put("CS2-RIA","CS2 Research and Innovation action");
990
		code2name.put("files","files");
991
		code2name.put("ERC-COG","Consolidator Grant");
992
		code2name.put("SESAR-RIA","SESAR: Research and Innovation action");
993
		code2name.put("SGA-RIA","SGA Research and Innovation action");
994
		code2name.put("ERC-STG","Starting Grant");
995
		code2name.put("BOA/Task Order","BOA/Task Order");
996
		code2name.put("0018","Annotation");
997
		code2name.put("0001","Article");
998
		code2name.put("0033","Audiovisual");
999
		code2name.put("0008","Bachelor thesis");
1000
		code2name.put("Continuing grant","Continuing grant");
1001
		code2name.put("0002","Book");
1002
		code2name.put("0022","Collection");
1003
		code2name.put("0004","Conference object");
1004
		code2name.put("0005","Contribution for newspaper or weekly magazine");
1005
		code2name.put("0031","Data Paper");
1006
		code2name.put("BD","Bangladesh");
1007
		code2name.put("BB","Barbados");
1008
		code2name.put("BY","Belarus");
1009
		code2name.put("BQ","Bonaire, Sint Eustatius and Saba");
1010
		code2name.put("BV","Bouvet Island");
1011
		code2name.put("BN","Brunei Darussalam");
1012
		code2name.put("BG","Bulgaria");
1013
		code2name.put("UM","United States Minor Outlying Islands");
1014
		code2name.put("ZM","Zambia");
1015
		code2name.put("openaire2.0","OpenAIRE 2.0 (EC funding)");
1016
		code2name.put("openaire3.0","OpenAIRE 3.0 (OA, funding)");
1017
		code2name.put("driver","OpenAIRE Basic (DRIVER OA)");
1018
		code2name.put("native","proprietary");
1019
		code2name.put("hostedBy","collected from a compatible aggregator");
1020
		code2name.put("notCompatible","under validation");
1021
		code2name.put("BBI-IA-FLAG","Bio-based Industries Innovation action - Flagship");
1022
		code2name.put("BBI-RIA","Bio-based Industries Research and Innovation action");
1023
	}
1024

    
1025
	protected static String getDefaultResulttype(final Element cobjcategory) {
1026
		switch (cobjcategory.getText()) {
1027
		case "0029":
1028
		case "0040":
1029
			return "software";
1030
		case "0021":
1031
		case "0024":
1032
		case "0025":
1033
		case "0030":
1034
		case "0039":
1035
			return "dataset";
1036
		case "0000":
1037
		case "0010":
1038
		case "0018":
1039
		case "0020":
1040
		case "0022":
1041
		case "0023":
1042
		case "0026":
1043
		case "0027":
1044
		case "0028":
1045
		case "0037":
1046
			return "other";
1047
		case "0001":
1048
		case "0002":
1049
		case "0004":
1050
		case "0005":
1051
		case "0006":
1052
		case "0007":
1053
		case "0008":
1054
		case "0009":
1055
		case "0011":
1056
		case "0012":
1057
		case "0013":
1058
		case "0014":
1059
		case "0015":
1060
		case "0016":
1061
		case "0017":
1062
		case "0019":
1063
		case "0031":
1064
		case "0032":
1065
        case "0034":
1066
        case "0035":
1067
        case "0036":
1068
		case "0038":
1069
			return "publication";
1070
		default:
1071
			return "publication";
1072
		}
1073
	}
1074

    
1075
	protected static OafRel.Builder getRelBuilder(final RelType rType, final SubRelType subRelType, OafRel.Builder rel, final Builder subRel) {
1076

    
1077
		switch(rType) {
1078

    
1079
		case datasourceOrganization:
1080
			return rel.setDatasourceOrganization(DatasourceOrganization.newBuilder().setProvision((Provision.Builder) subRel));
1081
		case projectOrganization:
1082
			return rel.setProjectOrganization(ProjectOrganization.newBuilder().setParticipation((Participation.Builder) subRel));
1083
		case resultOrganization:
1084
			return rel.setResultOrganization(ResultOrganization.newBuilder().setAffiliation((Affiliation.Builder) subRel));
1085
		case resultProject:
1086
			return rel.setResultProject(ResultProject.newBuilder().setOutcome((Outcome.Builder) subRel));
1087
		case resultResult:
1088
			final ResultResult.Builder rr = ResultResult.newBuilder();
1089
			switch (subRelType) {
1090

    
1091
			case similarity:
1092
				return rel.setResultResult(rr.setSimilarity((Similarity.Builder) subRel));
1093
			case publicationDataset:
1094
				return rel.setResultResult(rr.setPublicationDataset((PublicationDataset.Builder) subRel));
1095
			case dedup:
1096
				return rel.setResultResult(rr.setDedup((Dedup.Builder) subRel));
1097
			case dedupSimilarity:
1098
				return rel.setResultResult(rr.setDedupSimilarity((DedupSimilarity.Builder) subRel));
1099
			case supplement:
1100
				return rel.setResultResult(rr.setSupplement((Supplement.Builder) subRel));
1101
			case part:
1102
				return rel.setResultResult(rr.setPart((Part.Builder) subRel));
1103
			default:
1104
				throw new IllegalArgumentException("invalid subRelType for result_result relations: " + subRelType.toString());
1105
			}
1106
		case organizationOrganization:
1107
			final OrganizationOrganization.Builder oo = OrganizationOrganization.newBuilder();
1108
			switch (subRelType) {
1109
			case dedup:
1110
				return rel.setOrganizationOrganization(oo.setDedup((Dedup.Builder) subRel));
1111
			case dedupSimilarity:
1112
				return rel.setOrganizationOrganization(oo.setDedupSimilarity((DedupSimilarity.Builder) subRel));
1113
			default:
1114
				throw new IllegalArgumentException("invalid subRelType for organization_organization relations: " + subRelType.toString());
1115
			}
1116
		}
1117
		throw new IllegalArgumentException("invalid relation type " + rType.toString());
1118
	}
1119

    
1120
	protected static Builder getSubRelBuilder(final RelMetadata.Builder metadata, final SubRelType subRelType, final Map<String, String> params) {
1121

    
1122
		switch (subRelType) {
1123

    
1124
		case provision:
1125
			return Provision.newBuilder().setRelMetadata(metadata);
1126
		case outcome:
1127
			return Outcome.newBuilder().setRelMetadata(metadata);
1128
		case similarity:
1129
			return Similarity.newBuilder().setRelMetadata(metadata);
1130
		case publicationDataset:
1131
			return PublicationDataset.newBuilder().setRelMetadata(metadata);
1132
		case affiliation:
1133
			return Affiliation.newBuilder().setRelMetadata(metadata);
1134
		case dedup:
1135
			return Dedup.newBuilder().setRelMetadata(metadata);
1136
		case dedupSimilarity:
1137
			return DedupSimilarity.newBuilder().setRelMetadata(metadata);
1138
		case supplement:
1139
			return Supplement.newBuilder().setRelMetadata(metadata);
1140
		case part:
1141
			return Part.newBuilder().setRelMetadata(metadata);
1142
		}
1143
		throw new IllegalArgumentException("invalid relation type " + subRelType.toString());
1144
	}
1145

    
1146
	protected static String getVocabularyName(final RelType relType) {
1147
		switch (relType) {
1148

    
1149
		case datasourceOrganization:
1150
			return "dnet:datasource_organization_relations";
1151
		case projectOrganization:
1152
			return "dnet:project_organization_relations";
1153
		case resultOrganization:
1154
			return "dnet:result_organization_relations";
1155
		case resultProject:
1156
			return "dnet:result_project_relations";
1157
		case resultResult:
1158
			return "dnet:result_result_relations";
1159
		case organizationOrganization:
1160
			return "dnet:organization_organization_relations";
1161
		}
1162
		throw new IllegalArgumentException("invalid relation type " + relType.toString());
1163
	}
1164

    
1165

    
1166
	// Builder for Entities
1167
	protected static Oaf getOaf(final OafEntity.Builder entity, final DataInfo.Builder info) {
1168
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build();
1169
	}
1170

    
1171
	// Builder for Rels
1172
	protected static Oaf getOaf(final OafRel.Builder rel, final DataInfo.Builder info) {
1173
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.relation).setRel(rel).build();
1174
	}
1175

    
1176
	private static Oaf.Builder _getOaf(final Oaf.Builder oaf, final DataInfo.Builder info) {
1177
		if (info != null) {
1178
			return oaf.setDataInfo(ensureDataInfo(info));
1179
		} else return oaf;
1180
	}
1181

    
1182
	protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) {
1183
		if (info.isInitialized()) return info;
1184
		return getDataInfo(false, null, "UNKNOWN", "0.9", false, false);
1185
	}
1186

    
1187
	protected static List<KeyValue> getKeyValues(final ValueMap values, final String fieldName, final Type type) {
1188
		final ElementList collectedFroms = values.get(fieldName);
1189
		if (collectedFroms == null) {
1190
			throw new IllegalArgumentException("missing field " + fieldName);
1191
		}
1192
		return collectedFroms.stream()
1193
				.filter(e -> StringUtils.isNotBlank(e.getAttributeValue("id")))
1194
				.filter(e -> StringUtils.isNotBlank(e.getAttributeValue("name")))
1195
				.map(e -> getKV(oafSplitId(type.name(), e.getAttributeValue("id")), e.getAttributeValue("name")))
1196
				.collect(Collectors.toList());
1197
	}
1198

    
1199
	protected static KeyValue getKV(final String id, final String name) {
1200
		return KeyValue.newBuilder().setKey(id).setValue(name).build();
1201
	}
1202

    
1203
	protected static OafRel.Builder getRel(final String sourceId,
1204
			final String targetId,
1205
			final RelType relType,
1206
			final SubRelType subRelType,
1207
			final String relClass,
1208
			final List<KeyValue> collectedFrom,
1209
			final boolean isChild) {
1210
		final OafRel.Builder oafRel = OafRel.newBuilder().setSource(sourceId)
1211
				.setTarget(targetId)
1212
				.setRelType(relType)
1213
				.setSubRelType(subRelType)
1214
				.setRelClass(relClass)
1215
				.setChild(isChild);
1216

    
1217
		if (collectedFrom != null) {
1218
			oafRel.addAllCollectedfrom(collectedFrom);
1219
		}
1220
		return oafRel;
1221
	}
1222

    
1223
	protected static OafEntity.Builder getEntity(final Type type,
1224
			final String id,
1225
			final List<KeyValue> collectedFrom,
1226
			final Collection<String> originalIds,
1227
			final String dateOfCollection,
1228
			final String dateOfTransformation,
1229
			final List<StructuredProperty> pids) {
1230
		final OafEntity.Builder builder = OafEntity.newBuilder().setType(type).setId(id);
1231
		if (collectedFrom != null) builder.addAllCollectedfrom(collectedFrom);
1232
		builder.setDateoftransformation(StringUtils.isBlank(dateOfTransformation) ? "" : dateOfTransformation);
1233
		builder.setDateofcollection(StringUtils.isBlank(dateOfCollection) ? "" : dateOfCollection);
1234

    
1235
		if ((originalIds != null) && !originalIds.isEmpty()) {
1236
			builder.addAllOriginalId(originalIds.stream()
1237
					.filter(StringUtils::isNotBlank)
1238
					.collect(Collectors.toList()));
1239
		}
1240

    
1241
		if ((pids != null) && !pids.isEmpty()) {
1242
			builder.addAllPid(
1243
			        pids.stream().filter(Objects::nonNull)
1244
                            .collect(Collectors.toList()));
1245
		}
1246

    
1247
		return builder;
1248
	}
1249

    
1250
	public static DataInfo.Builder getDataInfo(
1251
			final NodeList about,
1252
			final String provenanceaction,
1253
			final String trust,
1254
			final boolean deletedbyinference,
1255
			final boolean inferred) {
1256
		return getDataInfo(false, about, provenanceaction, trust, deletedbyinference, inferred);
1257
	}
1258

    
1259
	public static DataInfo.Builder getDataInfo(
1260
			final boolean invisible,
1261
			final NodeList about,
1262
			final String provenanceaction,
1263
			final String trust,
1264
			final boolean deletedbyinference,
1265
			final boolean inferred) {
1266

    
1267
		final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder();
1268
		dataInfoBuilder.setInvisible(invisible);
1269
		dataInfoBuilder.setInferred(inferred);
1270
		dataInfoBuilder.setDeletedbyinference(deletedbyinference);
1271
		dataInfoBuilder.setTrust(trust);
1272
		dataInfoBuilder.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
1273

    
1274
		// checking instanceof because when receiving an empty <oaf:datainfo> we don't want to parse it.
1275
		if (((about != null) && (about.getLength() > 0)) /* && (dataInfo instanceof org.w3c.dom.Element) */) {
1276

    
1277
			final org.w3c.dom.Element dataInfoElement = getDirectChild((org.w3c.dom.Element) about.item(0), "datainfo");
1278
			if (dataInfoElement != null) {
1279
				org.w3c.dom.Element elem = getDirectChild(dataInfoElement, "inferred");
1280
				dataInfoBuilder.setInferred(Boolean.valueOf(getStringValue(elem, String.valueOf(inferred))));
1281

    
1282
				elem = getDirectChild(dataInfoElement, "deletedbyinference");
1283
				dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(getStringValue(elem, String.valueOf(deletedbyinference))));
1284

    
1285
				elem = getDirectChild(dataInfoElement, "trust");
1286
				dataInfoBuilder.setTrust(getStringValue(elem, trust));
1287

    
1288
				elem = getDirectChild(dataInfoElement, "invisible");
1289
				dataInfoBuilder.setInvisible(getBooleanValue(elem, invisible));
1290

    
1291
				elem = getDirectChild(dataInfoElement, "inferenceprovenance");
1292
				dataInfoBuilder.setInferenceprovenance(getStringValue(elem));
1293

    
1294
				elem = getDirectChild(dataInfoElement, "provenanceaction");
1295
				final Qualifier.Builder pBuilder = Qualifier.newBuilder();
1296
				if (elem.hasAttributes()) {
1297
					final NamedNodeMap attributes = elem.getAttributes();
1298
					pBuilder.setClassid(getAttributeValue(attributes, "classid"));
1299
					pBuilder.setClassname(getAttributeValue(attributes, "classname"));
1300
					pBuilder.setSchemeid(getAttributeValue(attributes, "schemeid"));
1301
					pBuilder.setSchemename(getAttributeValue(attributes, "schemename"));
1302
				} else {
1303
					pBuilder.mergeFrom(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
1304
				}
1305
				dataInfoBuilder.setProvenanceaction(pBuilder);
1306
			}
1307
		}
1308

    
1309
		return dataInfoBuilder;
1310
	}
1311

    
1312
	protected static OAIProvenance getOAIProvenance(final NodeList about) {
1313

    
1314
		OAIProvenance.Builder oaiProv = OAIProvenance.newBuilder();
1315

    
1316
		if (((about != null) && (about.getLength() > 0))) {
1317

    
1318
			final org.w3c.dom.Element provenance = getDirectChild((org.w3c.dom.Element) about.item(0), "provenance");
1319

    
1320
			if (provenance != null) {
1321
				final org.w3c.dom.Element origDesc = getDirectChild(provenance, "originDescription");
1322
				oaiProv.setOriginDescription(buildOriginDescription(origDesc, OriginDescription.newBuilder()));
1323
			}
1324
		}
1325

    
1326
		return oaiProv.build();
1327
	}
1328

    
1329
	private static OriginDescription buildOriginDescription(final org.w3c.dom.Element origDesc, final OriginDescription.Builder od) {
1330
		od.setHarvestDate(origDesc.getAttribute("harvestDate")).setAltered(Boolean.valueOf(origDesc.getAttribute("altered")));
1331

    
1332
		org.w3c.dom.Element elem = getDirectChild(origDesc, "baseURL");
1333
		od.setBaseURL(getStringValue(elem));
1334

    
1335
		elem = getDirectChild(origDesc, "identifier");
1336
		od.setIdentifier(getStringValue(elem));
1337

    
1338
		elem = getDirectChild(origDesc, "datestamp");
1339
		od.setDatestamp(getStringValue(elem));
1340

    
1341
		elem = getDirectChild(origDesc, "metadataNamespace");
1342
		od.setMetadataNamespace(getStringValue(elem));
1343

    
1344
		elem = getDirectChild(origDesc, "originDescription");
1345

    
1346
		if (elem != null) {
1347

    
1348
			od.setOriginDescription(buildOriginDescription(elem, OriginDescription.newBuilder()));
1349
		}
1350

    
1351
		return od.build();
1352
	}
1353

    
1354
	private static boolean getBooleanValue(final org.w3c.dom.Element elem, final boolean defaultValue) {
1355
		return (elem != null && elem.getTextContent() != null) ? Boolean.valueOf(elem.getTextContent()) : defaultValue;
1356
	}
1357

    
1358
	private static String getStringValue(final org.w3c.dom.Element elem, final String defaultValue) {
1359
		return (elem != null && elem.getTextContent() != null) ? elem.getTextContent() : defaultValue;
1360
	}
1361

    
1362
	private static String getStringValue(final org.w3c.dom.Element elem) {
1363
		return getStringValue(elem, "");
1364
	}
1365

    
1366
	protected static String getAttributeValue(final NamedNodeMap attributes, final String name) {
1367
		final Node attr = attributes.getNamedItem(name);
1368
		if (attr == null) return "";
1369
		final String value = attr.getNodeValue();
1370
		return value != null ? value : "";
1371
	}
1372

    
1373
	protected static org.w3c.dom.Element getDirectChild(final org.w3c.dom.Element parent, final String name) {
1374
		for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {
1375
			if ((child instanceof org.w3c.dom.Element) && name.equals(child.getLocalName())) return (org.w3c.dom.Element) child;
1376
		}
1377
		return null;
1378
	}
1379

    
1380
	protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) {
1381
		return getQualifier(classname, classname, schemename, schemename);
1382
	}
1383

    
1384
	protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) {
1385
		return getQualifier(classname.toString(), classname.toString(), schemename, schemename);
1386
	}
1387

    
1388
	protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
1389
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename);
1390
	}
1391

    
1392
	protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List<String> fields) {
1393
		if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) return null;
1394

    
1395
		if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) {
1396
			qualifier.setClassid(fields.get(0));
1397
			qualifier.setClassname(getClassName(fields.get(0)));
1398
		}
1399
		return qualifier;
1400
	}
1401

    
1402
	protected static void addStructuredProps(final Builder builder,
1403
			final FieldDescriptor fd,
1404
			final ElementList values,
1405
			final String defaultClass,
1406
			final String defaultScheme) {
1407
		if (values != null) {
1408
			for (final Element s : values) {
1409
				final String classId = s.getAttributeValue("classid") != null ? s.getAttributeValue("classid") : defaultClass;
1410
				final String className = s.getAttributeValue("classname") != null ? s.getAttributeValue("classname") : defaultClass;
1411
				final String schemeId = s.getAttributeValue("schemeid") != null ? s.getAttributeValue("schemeid") : defaultScheme;
1412
				final String schemeName = s.getAttributeValue("schemename") != null ? s.getAttributeValue("schemename") : defaultScheme;
1413
				addField(builder, fd, getStructuredProperty(s.getText(), classId, className, schemeId, schemeName));
1414
			}
1415
		}
1416
	}
1417

    
1418
	protected static void addJournal(final Metadata.Builder metadataProto, Element journalElement){
1419
		final Journal.Builder journal = Journal.newBuilder();
1420
		if (journalElement.getText() != null) {
1421
			journal.setName(journalElement.getText());
1422
		}
1423

    
1424
		final Map<String, String> attr = journalElement.getAttributes();
1425
		if (attr != null) {
1426
			if (attr.get("issn") != null) {
1427
				journal.setIssnPrinted(attr.get("issn"));
1428
			}
1429
			if (attr.get("eissn") != null) {
1430
				journal.setIssnOnline(attr.get("eissn"));
1431
			}
1432
			if (attr.get("lissn") != null) {
1433
				journal.setIssnLinking(attr.get("lissn"));
1434
			}
1435

    
1436
			if (attr.get("ep") != null) {
1437
				journal.setEp(attr.get("ep"));
1438
			}
1439
			if (attr.get("iss") != null) {
1440
				journal.setIss(attr.get("iss"));
1441
			}
1442
			if (attr.get("sp") != null) {
1443
				journal.setSp(attr.get("sp"));
1444
			}
1445
			if (attr.get("vol") != null) {
1446
				journal.setVol(attr.get("vol"));
1447
			}
1448
			//TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28
1449
			if (attr.get("ed") != null) {
1450
				journal.setEdition(attr.get("ed"));
1451
			}
1452
			//TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28
1453
			if (attr.get("conferenceplace") != null) {
1454
				journal.setConferenceplace(attr.get("conferenceplace"));
1455
			}
1456
			//TODO: CHECK ACTUAL ATTRIBUTE NAME #371#note-28
1457
			if (attr.get("conferencedate") != null) {
1458
				journal.setConferencedate(attr.get("conferencedate"));
1459
			}
1460
		}
1461
		metadataProto.setJournal(journal.build());
1462
	}
1463

    
1464

    
1465

    
1466
	private static final Set<String> invalidPidTypes =
1467
			Sets.newHashSet("distributionlocation", "url", " ", "local accession id", "local", "local id", "a local accession number", "landingpage", "publisherid", "report number", "uri", "contract", "doc",
1468
					"issn", "issn (online)", "issn (print)", "eissn", "citation", "unknown", "other", "oai", "case number", "section", "series", "report",
1469
					"other numbers", "site id", "fulltext", "internal", "report numbers", "product number", "depositor id", "isbn13", "doe contract number", "revision",
1470
					"issue", "pages", "volume", "another identifier for this resource", "csvdownload", "hepdatarecord", "hepdatarecordalt", "rootdownload", "yamldownload", "yodadownload",
1471
					"md5", "firstid", "uuid", "poster number", "compactidentifiers", "sample_id", "source identifier", "lod-catalog", "internal id", "funder", "department",
1472
					"odin doi viewer", "odin matdb viewer", "bitstream", "dipartimento", "technical note (national research council of canada. division of building research) series",
1473
					"internal report (national research council canada. division of building research) series", "dk.dda.ddieditor.version", "extended kim id", "kim id", "ccin",
1474
					"dk.dda.study.annonymizeddata", "e-issn", "call number", "sequenza");
1475
	protected static List<StructuredProperty> parsePids(final NodeList nodelist) {
1476

    
1477
		final List<StructuredProperty> pids = Lists.newArrayList();
1478

    
1479
		for (int i = 0; i < nodelist.getLength(); i++) {
1480
			final Node node = nodelist.item(i);
1481
			Node pidType = null;
1482
			if (node.getNodeType() == Node.ELEMENT_NODE) {
1483
				if (node.getLocalName().equalsIgnoreCase("identifier")) {
1484
					pidType = node.getAttributes().getNamedItem("identifierType");
1485
				}
1486
				//this is to handle dataset pids
1487
				if (node.getLocalName().equalsIgnoreCase("alternateIdentifier")) {
1488
					pidType = node.getAttributes().getNamedItem("alternateIdentifierType");
1489
				}
1490

    
1491
				for (int j = 0; j < node.getChildNodes().getLength(); j++) {
1492
					final Node child = node.getChildNodes().item(j);
1493

    
1494
					if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()) {
1495

    
1496
						final String type = pidType.getNodeValue().toLowerCase();
1497

    
1498
						if (invalidPidTypes.contains(type)) {
1499
							break;
1500
						}
1501

    
1502
						final String value = child.getTextContent();
1503

    
1504
						pids.add(getStructuredProperty(value, type, getClassName(type), "dnet:pid_types", "dnet:pid_types"));
1505
						break;
1506
					}
1507
				}
1508
			}
1509
		}
1510

    
1511
		final Map<String, StructuredProperty> pidMap = pids.stream()
1512
				.collect(Collectors.toMap(
1513
						p -> getStructuredPropertyKey(p),
1514
						Function.identity(),
1515
						(oldValue, newValue) -> newValue));
1516

    
1517
		return Lists.newArrayList(pidMap.values());
1518
	}
1519

    
1520
	private static String getStructuredPropertyKey(final StructuredProperty p) {
1521
		return StringUtils.lowerCase(p.getQualifier().getClassid()) + StringUtils.lowerCase(p.getValue());
1522
	}
1523

    
1524
	@SuppressWarnings("unchecked")
1525
	protected static void addField(final Builder builder, final FieldDescriptor descriptor, Object value) {
1526

    
1527
		if (value == null) return;
1528

    
1529
		if (value instanceof List<?>) {
1530
			for (final Object o : (List<Object>) value) {
1531
				addField(builder, descriptor, o);
1532
			}
1533
		} else {
1534
			Object fieldValue = value;
1535
			switch (descriptor.getType()) {
1536
			case BOOL:
1537
				fieldValue = Boolean.valueOf(value.toString());
1538
				break;
1539
			case BYTES:
1540
				fieldValue = value.toString().getBytes(Charset.forName("UTF-8"));
1541
				break;
1542
			case DOUBLE:
1543
				fieldValue = Double.valueOf(value.toString());
1544
				break;
1545
			case FLOAT:
1546
				fieldValue = Float.valueOf(value.toString());
1547
				break;
1548
			case INT32:
1549
			case INT64:
1550
			case SINT32:
1551
			case SINT64:
1552
				fieldValue = Integer.valueOf(value.toString());
1553
				break;
1554
			case MESSAGE:
1555
				final Builder q = builder.newBuilderForField(descriptor);
1556

    
1557
				if (value instanceof Builder) {
1558
					value = ((Builder) value).build();
1559
					final byte[] b = ((Message) value).toByteArray();
1560
					try {
1561
						q.mergeFrom(b);
1562
					} catch (final InvalidProtocolBufferException e) {
1563
						throw new IllegalArgumentException("Unable to merge value: " + value + " with builder: " + q.getDescriptorForType().getName());
1564
					}
1565
				} else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1566
					if (value instanceof Qualifier) {
1567
						q.mergeFrom((Qualifier) value);
1568
					} else {
1569

    
1570
						List<String> split = Lists.newArrayList(Splitter
1571
								.on("@@@").trimResults().split(value.toString()));
1572
						if (split.size() == 4) {
1573
							parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@");
1574
						} else {
1575
							final String classid = split.get(0);
1576
							final String schemeid = split.get(1);
1577
							final Qualifier qualifier = Qualifier.newBuilder()
1578
									.setClassid(classid)
1579
									.setClassname(getClassName(classid))
1580
									.setSchemeid(schemeid)
1581
									.setSchemename(schemeid).build();
1582
							q.mergeFrom(qualifier);
1583
						}
1584
					}
1585
				} else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1586
					if (value instanceof StructuredProperty) {
1587
						q.mergeFrom((StructuredProperty) value);
1588
					} else {
1589
						parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###");
1590
					}
1591
				} else if(Journal.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1592
					final Journal.Builder journal = (Journal.Builder) q;
1593
					List<String> ssns = Splitter.on("@@@").splitToList(value.toString());
1594
					//in order: issn, eissn, lissn
1595
					journal.setIssnPrinted(ssns.get(0)).setIssnOnline(ssns.get(1)).setIssnLinking(ssns.get(2));
1596
				} else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1597
					if (value instanceof KeyValue) {
1598
						q.mergeFrom((KeyValue) value);
1599
					} else {
1600
						parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&");
1601
					}
1602
				} else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1603
					if (value instanceof StringField) {
1604
						q.mergeFrom((StringField) value);
1605
					} else {
1606
						q.setField(StringField.getDescriptor().findFieldByName("value"), value);
1607
					}
1608
				} else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1609
					if (value instanceof BoolField) {
1610
						q.mergeFrom((BoolField) value);
1611
					} else if (value instanceof String) {
1612
						q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) value));
1613
					} else {
1614
						q.setField(BoolField.getDescriptor().findFieldByName("value"), value);
1615
					}
1616
				} else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1617
					if (value instanceof IntField) {
1618
						q.mergeFrom((IntField) value);
1619
					} else if (value instanceof String) {
1620
						q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) value));
1621
					} else {
1622
						q.setField(IntField.getDescriptor().findFieldByName("value"), value);
1623
					}
1624
				}
1625

    
1626
				fieldValue = q.buildPartial();
1627
				break;
1628
			default:
1629
				break;
1630
			}
1631

    
1632
			doAddField(builder, descriptor, fieldValue);
1633
		}
1634

    
1635
	}
1636

    
1637
	protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) {
1638
		if (value != null) {
1639
			if (fd.isRepeated()) {
1640
				builder.addRepeatedField(fd, value);
1641
			} else if (fd.isOptional() || fd.isRequired()) {
1642
				builder.setField(fd, value);
1643
			}
1644
		}
1645
	}
1646

    
1647
	protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) {
1648
		final IterablePair<FieldDescriptor, String> iterablePair =
1649
				new IterablePair<FieldDescriptor, String>(descriptor.getFields(), Lists.newArrayList(Splitter
1650
						.on(split).trimResults().split(value)));
1651

    
1652
		for (final Pair<FieldDescriptor, String> p : iterablePair) {
1653
			addField(builder, p.getKey(), p.getValue());
1654
		}
1655
	}
1656

    
1657
	protected static String base64(final byte[] data) {
1658
		return new String(Base64.encodeBase64(data));
1659
	}
1660

    
1661
	public static String replace(final String s, final String regex, final String replacement) {
1662
		return s.replaceAll(regex, replacement);
1663
	}
1664

    
1665
	public static String trim(final String s) {
1666
		return s.trim();
1667
	}
1668

    
1669
	protected static String removePrefix(final Type type, final String s) {
1670
		return removePrefix(type.toString(), s);
1671
	}
1672

    
1673
	private static String removePrefix(final String prefix, final String s) {
1674
		return StringUtils.removeStart("" + s, prefix + "|");
1675
	}
1676

    
1677
	protected static Qualifier.Builder getDefaultQualifier(final String scheme) {
1678
		final Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme);
1679
		return qualifier;
1680
	}
1681

    
1682
	protected static StructuredProperty getStructuredProperty(final String value,
1683
			final String classid,
1684
			final String classname,
1685
			final String schemeid,
1686
			final String schemename) {
1687
		if ((value == null) || value.isEmpty()) return null;
1688
		return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build();
1689
	}
1690

    
1691
	protected static StringField.Builder sf(final String s) {
1692
		return StringField.newBuilder().setValue(s);
1693
	}
1694

    
1695
	public static String generateNsPrefix(final String prefix, final String externalId) {
1696
		return StringUtils.substring(prefix + StringUtils.leftPad(externalId, MAX_NSPREFIX_LEN - prefix.length(), "_"), 0, MAX_NSPREFIX_LEN);
1697
	}
1698

    
1699
	public static String md5(final String s) {
1700
		try {
1701
			final MessageDigest md = MessageDigest.getInstance("MD5");
1702
			md.update(s.getBytes("UTF-8"));
1703
			return new String(Hex.encodeHex(md.digest()));
1704
		} catch (final Exception e) {
1705
			System.err.println("Error creating id");
1706
			return null;
1707
		}
1708
	}
1709

    
1710
	public static String oafId(final String entityType, final String prefix, final String id) {
1711
		if (id.isEmpty() || prefix.isEmpty()) return "";
1712
		return oafSimpleId(entityType, prefix + "::" + md5(id));
1713
	}
1714

    
1715
	public static String oafSimpleId(final String entityType, final String id) {
1716
		return (Type.valueOf(entityType).getNumber() + "|" + id).replaceAll("\\s|\\n", "");
1717
	}
1718

    
1719
	public static String oafSplitId(final String entityType, final String fullId) {
1720
		return oafId(entityType, StringUtils.substringBefore(fullId, "::"), StringUtils.substringAfter(fullId, "::"));
1721
	}
1722

    
1723
	/**
1724
	 * Gets the classname of the given class code
1725
	 *
1726
	 * @param code class code.
1727
	 * @return the class name, if the code is a key of the map. The code itself otherwise.
1728
	 */
1729
	public static String getClassName(final String code) {
1730
		final String classname = code2name.get(code);
1731
		if (StringUtils.isBlank(classname)) return code;
1732
		return classname;
1733
	}
1734

    
1735
	/**
1736
	 * Utility method, allows to perform param based map lookups in xsl
1737
	 *
1738
	 * @param map
1739
	 * @param key
1740
	 * @return value associated to the key.
1741
	 */
1742
	public static Object lookupValue(final Map<String, Object> map, final String key) {
1743
		return map.get(key);
1744
	}
1745

    
1746
	/**
1747
	 * Utility method, allows to perform param based map lookups in xsl
1748
	 *
1749
	 * @param map
1750
	 * @param key
1751
	 * @return value associated to the key.
1752
	 */
1753
	public static int mustMerge(final Map<String, Object> map, final String key) {
1754
		final Object val = lookupValue(map, key);
1755
		return (val != null) && (val instanceof String) && val.equals("true") ? 1 : 0;
1756
	}
1757

    
1758
	public static String[] split(String name, String token){
1759
		return name.split(token);
1760
	}
1761

    
1762
}
(1-1/10)