Project

General

Profile

1
package eu.dnetlib.data.transform.xml;
2

    
3
import java.nio.charset.Charset;
4
import java.security.MessageDigest;
5
import java.util.List;
6
import java.util.Map;
7

    
8
import org.apache.commons.codec.binary.Base64;
9
import org.apache.commons.codec.binary.Hex;
10
import org.apache.commons.collections.MapUtils;
11
import org.apache.commons.lang.StringUtils;
12
import org.apache.commons.lang.math.NumberUtils;
13
import org.w3c.dom.NamedNodeMap;
14
import org.w3c.dom.Node;
15
import org.w3c.dom.NodeList;
16

    
17
import com.google.common.base.Predicate;
18
import com.google.common.base.Predicates;
19
import com.google.common.base.Splitter;
20
import com.google.common.collect.Iterables;
21
import com.google.common.collect.Lists;
22
import com.google.common.collect.Maps;
23
import com.google.protobuf.Descriptors.Descriptor;
24
import com.google.protobuf.Descriptors.FieldDescriptor;
25
import com.google.protobuf.InvalidProtocolBufferException;
26
import com.google.protobuf.Message;
27
import com.google.protobuf.Message.Builder;
28
import com.google.protobuf.ProtocolMessageEnum;
29

    
30
import eu.dnetlib.data.proto.FieldTypeProtos.BoolField;
31
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
32
import eu.dnetlib.data.proto.FieldTypeProtos.IntField;
33
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
34
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
35
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
36
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
37
import eu.dnetlib.data.proto.KindProtos.Kind;
38
import eu.dnetlib.data.proto.OafProtos.Oaf;
39
import eu.dnetlib.data.proto.OafProtos.OafEntity;
40
import eu.dnetlib.data.proto.OafProtos.OafRel;
41
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
42
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
43
import eu.dnetlib.data.proto.TypeProtos.Type;
44
import eu.dnetlib.miscutils.collections.Pair;
45
import eu.dnetlib.miscutils.iterators.IterablePair;
46

    
47
public abstract class AbstractDNetOafXsltFunctions {
48

    
49
	private static final int MAX_NSPREFIX_LEN = 12;
50
	public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
51
	protected static Map<String, String> code2name = Maps.newHashMap();
52

    
53
	public static Predicate<String> urlFilter = new Predicate<String>() {
54

    
55
		@Override
56
		public boolean apply(final String s) {
57
			return s.trim().matches(URL_REGEX);
58
		}
59
	};
60

    
61
	/*
62
	 * Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the
63
	 * relational db
64
	 */
65
	static {
66
		code2name.put("MO", "Macao");
67
		code2name.put("UK", "United Kingdom");
68
		code2name.put("WF", "WALLIS AND FUTUNA");
69
		code2name.put("srp", "Serbian");
70
		code2name.put("FM", "MICRONESIA, FEDERATED STATES OF");
71
		code2name.put("PendingRepositoryResources", "Pending datasource");
72
		code2name.put("12MONTHS", "12 Months Embargo");
73
		code2name.put("chy", "Cheyenne");
74
		code2name.put("yid", "Yiddish");
75
		code2name.put("sword", "sword");
76
		code2name.put("aggregator::pubsrepository::institutional", "Aggregator of Institutional Publication Repositories");
77
		code2name.put("LSID", "LSID");
78
		code2name.put("BO", "Bolivia");
79
		code2name.put("0008", "Bachelor thesis");
80
		code2name.put("CW", "CURAÇAO");
81
		code2name.put("PURL", "PURL");
82
		code2name.put("HR", "Croatia");
83
		code2name.put("KM", "COMOROS");
84
		code2name.put("VU", "VANUATU");
85
		code2name.put("CU", "Cuba");
86
		code2name.put("MN", "MONGOLIA");
87
		code2name.put("RepositoryServiceResources", "Valid datasource");
88
		code2name.put("cop", "Coptic");
89
		code2name.put("cpe", "English-based Creoles and Pidgins");
90
		code2name.put("ine", "Indo-European");
91
		code2name.put("ipk", "Inupiaq");
92
		code2name.put("ira", "Iranian");
93
		code2name.put("max", "Manx");
94
		code2name.put("men", "Mende");
95
		code2name.put("mga", "Middle Irish");
96
		code2name.put("sco", "Scots");
97
		code2name.put("scr", "Serbo-Croatian");
98
		code2name.put("yao", "Yao");
99
		code2name.put("yap", "Yap");
100
		code2name.put("yor", "Yoruba");
101
		code2name.put("api", "api");
102
		code2name.put("file", "file");
103
		code2name.put("files_from_metadata", "files_from_metadata");
104
		code2name.put("ftp", "ftp");
105
		code2name.put("0009", "External research report");
106
		code2name.put("UPC", "UPC");
107
		code2name.put("CK", "COOK ISLANDS");
108
		code2name.put("CZ", "Czech Republic");
109
		code2name.put("MS", "MONTSERRAT");
110
		code2name.put("MW", "Malawi");
111
		code2name.put("MV", "Maldives");
112
		code2name.put("MH", "Marshall Islands");
113
		code2name.put("URL", "URL");
114
		code2name.put("CG", "Congo");
115
		code2name.put("DK", "Denmark");
116
		code2name.put("VI", "VIRGIN ISLANDS, U.S.");
117
		code2name.put("cpf", "French-based Creoles and Pidgins");
118
		code2name.put("mic", "Micmac");
119
		code2name.put("min", "Minangkabau");
120
		code2name.put("sel", "Selkup");
121
		code2name.put("sem", "Semitic");
122
		code2name.put("sga", "old Irish");
123
		code2name.put("zap", "Zapotec");
124
		code2name.put("zen", "Zenaga");
125
		code2name.put("http", "http");
126
		code2name.put("jdbc", "jdbc");
127
		code2name.put("cpp", "Portuguese-based Creoles and Pidgins");
128
		code2name.put("crp", "Creoles and Pidgins");
129
		code2name.put("oai", "oai");
130
		code2name.put("mis", "Miscellaneous");
131
		code2name.put("mkh", "Mon-Kmer");
132
		code2name.put("mni", "Manipuri");
133
		code2name.put("mno", "Manobo");
134
		code2name.put("moh", "Mohawk");
135
		code2name.put("shn", "Shan");
136
		code2name.put("sid", "Sidamo");
137
		code2name.put("sio", "Siouan");
138
		code2name.put("sit", "Sino-Tibetan");
139
		code2name.put("zha", "Zhuang; Chuang");
140
		code2name.put("other", "other");
141
		code2name.put("rest", "rest");
142
		code2name.put("soap", "soap");
143
		code2name.put("sparql", "sparql");
144
		code2name.put("zul", "Zulu");
145
		code2name.put("zun", "Zuni");
146
		code2name.put("file::EuropePMC", "file::EuropePMC");
147
		code2name.put("mos", "Mossi");
148
		code2name.put("sla", "Slavic");
149
		code2name.put("iro", "Iroquoian");
150
		code2name.put("slk/slo", "Slovak");
151
		code2name.put("httpCSV", "httpCSV");
152
		code2name.put("IS", "Iceland");
153
		code2name.put("NA", "Namibia");
154
		code2name.put("QA", "Qatar");
155
		code2name.put("endDate", "endDate");
156
		code2name.put("issued", "issued");
157
		code2name.put("CIP-EIP-TN", "CIP-Eco-Innovation - CIP-Thematic Network");
158
		code2name.put("ace", "Achinese");
159
		code2name.put("akk", "Akkadian");
160
		code2name.put("alb/sqi", "Albanian");
161
		code2name.put("bra", "Braj");
162
		code2name.put("bua", "Buriat");
163
		code2name.put("0019", "Patent");
164
		code2name.put("NL", "Netherlands");
165
		code2name.put("RO", "Romania");
166
		code2name.put("CP", "Collaborative project");
167
		code2name.put("ach", "Acoli");
168
		code2name.put("bug", "Buginese");
169
		code2name.put("dra", "Dravidian");
170
		code2name.put("dua", "Duala");
171
		code2name.put("dum", "Middle Dutch");
172
		code2name.put("epo", "Esperanto");
173
		code2name.put("esk", "Eskimo");
174
		code2name.put("jav/jaw", "Javanese");
175
		code2name.put("jpr", "Judeo-Persian");
176
		code2name.put("0018", "Annotation");
177
		code2name.put("mun", "Munda");
178
		code2name.put("mus", "Creek");
179
		code2name.put("mwr", "Marwari");
180
		code2name.put("myn", "Mayan");
181
		code2name.put("smi", "Sami");
182
		code2name.put("sog", "Sogdian");
183
		code2name.put("son", "Songhai");
184
		code2name.put("sot", "Sotho, Southern");
185
		code2name.put("tgk", "Tajik");
186
		code2name.put("arxiv", "arXiv");
187
		code2name.put("datasetsbyproject", "datasetsbyproject");
188
		code2name.put("oai_sets", "oai_sets");
189
		code2name.put("0021", "Dataset");
190
		code2name.put("VA", "HOLY SEE (VATICAN CITY STATE)");
191
		code2name.put("HT", "Haiti");
192
		code2name.put("IN", "India");
193
		code2name.put("ID", "Indonesia");
194
		code2name.put("RU", "Russian Federation");
195
		code2name.put("CP-CSA", "Combination of CP & CSA");
196
		code2name.put("CSA", "Coordination and support action");
197
		code2name.put("ada", "Adangme");
198
		code2name.put("HN", "Honduras");
199
		code2name.put("HK", "Hong Kong");
200
		code2name.put("IR", "Iran (Islamic Republic of)");
201
		code2name.put("IL", "Israel");
202
		code2name.put("RW", "Rwanda");
203
		code2name.put("RE", "RÉUNION");
204
		code2name.put("BL", "SAINT BARTHÉLEMY");
205
		code2name.put("afa", "Afro-Asiatic");
206
		code2name.put("afh", "Afrihili");
207
		code2name.put("dyu", "Dyula");
208
		code2name.put("efi", "Efik");
209
		code2name.put("egy", "Ancient Egyptian");
210
		code2name.put("jrb", "Judeo-Arabic");
211
		code2name.put("kaa", "Kara-Kalpak");
212
		code2name.put("kab", "Kabyle");
213
		code2name.put("nah", "Aztec");
214
		code2name.put("nai", "North American Indian");
215
		code2name.put("spa", "Spanish; Castilian");
216
		code2name.put("srd", "Sardinian");
217
		code2name.put("srr", "Serer");
218
		code2name.put("HU", "Hungary");
219
		code2name.put("IQ", "IRAQ");
220
		code2name.put("SH", "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA");
221
		code2name.put("afr", "Afrikaans");
222
		code2name.put("kac", "Kachin");
223
		code2name.put("nde", "Ndebele, North");
224
		code2name.put("ssa", "Nilo-Saharan");
225
		code2name.put("ssw", "Swati");
226
		code2name.put("suk", "Sukuma");
227
		code2name.put("kam", "Kamba");
228
		code2name.put("kan", "Kannada");
229
		code2name.put("kar", "Karen");
230
		code2name.put("ndo", "Ndonga");
231
		code2name.put("nep", "Nepali");
232
		code2name.put("new", "Newari");
233
		code2name.put("sun", "Sundanese");
234
		code2name.put("niu", "Niuean");
235
		code2name.put("non", "Norse");
236
		code2name.put("sux", "Sumerian");
237
		code2name.put("wos", "Web of Science Subject Areas");
238
		code2name.put("ISTC", "ISTC");
239
		code2name.put("KZ", "Kazakhstan");
240
		code2name.put("KG", "Kyrgyzstan");
241
		code2name.put("sysimport:crosswalk:repository", "sysimport:crosswalk:repository");
242
		code2name.put("alg", "Algonquian languages");
243
		code2name.put("ava", "Avaric");
244
		code2name.put("fon", "Fon");
245
		code2name.put("fra/fre", "French");
246
		code2name.put("fry", "Frisian");
247
		code2name.put("kha", "Khasi");
248
		code2name.put("kro", "Kru");
249
		code2name.put("nso", "Sotho");
250
		code2name.put("fileGzip", "fileGzip");
251
		code2name.put("FR", "France");
252
		code2name.put("KE", "Kenya");
253
		code2name.put("syr", "Syriac");
254
		code2name.put("tem", "Timne");
255
		code2name.put("aggregator::datarepository", "Aggregator of Data Repositories");
256
		code2name.put("aggregator::pubsrepository::journals", "Aggregator/Publisher of Journals");
257
		code2name.put("ISSN", "ISSN");
258
		code2name.put("JP", "Japan");
259
		code2name.put("JO", "Jordan");
260
		code2name.put("fre/fra", "French");
261
		code2name.put("KR", "Korea (Republic of)");
262
		code2name.put("PT", "Portugal");
263
		code2name.put("driver-openaire2.0", "OpenAIRE 2.0+ (DRIVER OA, EC funding)");
264
		code2name.put("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry");
265
		code2name.put("sysimport:crosswalk:infospace", "sysimport:crosswalk:infospace");
266
		code2name.put("ale", "Aleut");
267
		code2name.put("ang", "Old English");
268
		code2name.put("apa", "Apache");
269
		code2name.put("arc", "Aramaic");
270
		code2name.put("ave", "Avestan");
271
		code2name.put("sve/swe", "Swedish");
272
		code2name.put("ewo", "Ewondo");
273
		code2name.put("fan", "Fang");
274
		code2name.put("fas/per", "Persian");
275
		code2name.put("khi", "Khoisan");
276
		code2name.put("kho", "Khotanese");
277
		code2name.put("kok", "Konkani");
278
		code2name.put("nub", "Nubian");
279
		code2name.put("nym", "Nyamwezi");
280
		code2name.put("nyn", "Nyankole");
281
		code2name.put("nyo", "Nyoro");
282
		code2name.put("ter", "Tereno");
283
		code2name.put("tgl", "Tagalog");
284
		code2name.put("KI", "KIRIBATI");
285
		code2name.put("arn", "Araucanian");
286
		code2name.put("arp", "Arapaho");
287
		code2name.put("fat", "Fanti");
288
		code2name.put("fin", "Finnish");
289
		code2name.put("fiu", "Finno-Ugrian");
290
		code2name.put("kon", "Kongo");
291
		code2name.put("kor", "Korean");
292
		code2name.put("kpe", "Kpelle");
293
		code2name.put("nzi", "Nzima");
294
		code2name.put("osa", "Osage");
295
		code2name.put("oss", "Ossetian; Ossetic");
296
		code2name.put("tha", "Thai");
297
		code2name.put("tib/bod", "Tibetan");
298
		code2name.put("tig", "Tigre");
299
		code2name.put("tir", "Tigrinya");
300
		code2name.put("KP", "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF");
301
		code2name.put("XK", "Kosovo * UN resolution");
302
		code2name.put("art", "Artificial");
303
		code2name.put("arw", "Arawak");
304
		code2name.put("asm", "Assamese");
305
		code2name.put("frm", "Middle French");
306
		code2name.put("fro", "Old French");
307
		code2name.put("kru", "Kurukh");
308
		code2name.put("kua", "Kuanyama; Kwanyama");
309
		code2name.put("ota", "Ottoman");
310
		code2name.put("oto", "Otomian");
311
		code2name.put("kum", "Kumyk");
312
		code2name.put("ath", "Athapascan");
313
		code2name.put("user:insert", "user:insert");
314
		code2name.put("awa", "Awadhi");
315
		code2name.put("bad", "Banda");
316
		code2name.put("ben", "Bengali");
317
		code2name.put("ber", "Berber");
318
		code2name.put("eka", "Ekajuk");
319
		code2name.put("gaa", "Ga");
320
		code2name.put("gae/gdh", "Gaelic");
321
		code2name.put("gai/iri", "Irish");
322
		code2name.put("gay", "Gayo");
323
		code2name.put("kus", "Kusaie");
324
		code2name.put("kut", "Kutenai");
325
		code2name.put("lad", "Ladino");
326
		code2name.put("lah", "Lahnda");
327
		code2name.put("paa", "Papuan-Australian");
328
		code2name.put("pag", "Pangasinan");
329
		code2name.put("pal", "Pahlavi");
330
		code2name.put("pam", "Pampanga");
331
		code2name.put("pap", "Papiamento");
332
		code2name.put("pau", "Palauan");
333
		code2name.put("tiv", "Tivi");
334
		code2name.put("tli", "Tlingit");
335
		code2name.put("tmh", "Tamashek");
336
		code2name.put("tum", "Tumbuka");
337
		code2name.put("ec:frameworkprogram", "frameworkprogram");
338
		code2name.put("tog", "Tonga(Nyasa)");
339
		code2name.put("tru", "Truk");
340
		code2name.put("tsi", "Tsimshian");
341
		code2name.put("ec:program", "program");
342
		code2name.put("ec:specificprogram", "specificprogram");
343
		code2name.put("hostedBy", "not available");
344
		code2name.put("GP", "GUADELOUPE");
345
		code2name.put("GT", "Guatemala");
346
		code2name.put("collection", "collection");
347
		code2name.put("0022", "Collection");
348
		code2name.put("0027", "Model");
349
		code2name.put("0023", "Event");
350
		code2name.put("0024", "Film");
351
		code2name.put("0025", "Image");
352
		code2name.put("0026", "InteractiveResource");
353
		code2name.put("GD", "GRENADA");
354
		code2name.put("GU", "GUAM");
355
		code2name.put("GG", "GUERNSEY");
356
		code2name.put("GA", "Gabon");
357
		code2name.put("GL", "Greenland");
358
		code2name.put("GY", "Guyana");
359
		code2name.put("LR", "LIBERIA");
360
		code2name.put("LV", "Latvia");
361
		code2name.put("LB", "Lebanon");
362
		code2name.put("LS", "Lesotho");
363
		code2name.put("PW", "PALAU");
364
		code2name.put("subtitle", "subtitle");
365
		code2name.put("translated title", "translated title");
366
		code2name.put("bai", "Bamileke");
367
		code2name.put("bal", "Baluchi");
368
		code2name.put("ell/gre", "Greek");
369
		code2name.put("elx", "Elamite");
370
		code2name.put("gem", "Germanic");
371
		code2name.put("lam", "Lamba");
372
		code2name.put("user:claim:search", "user:claim:search");
373
		code2name.put("ban", "Balinese");
374
		code2name.put("bas", "Basa");
375
		code2name.put("enm", "Middle English");
376
		code2name.put("gez", "Geez");
377
		code2name.put("lez", "Lezghian");
378
		code2name.put("lin", "Lingala");
379
		code2name.put("lit", "Lithuanian");
380
		code2name.put("lol", "Mongo");
381
		code2name.put("peo", "Persian, Old (ca 600 - 400 B.C.)");
382
		code2name.put("phn", "Phoenician");
383
		code2name.put("pus", "Pushto");
384
		code2name.put("tut", "Altaic");
385
		code2name.put("tyv", "Tuvinian");
386
		code2name.put("uga", "Ugaritic");
387
		code2name.put("bat", "Baltic");
388
		code2name.put("bej", "Beja");
389
		code2name.put("bel", "Belarusian");
390
		code2name.put("gil", "Gilbertese");
391
		code2name.put("glg", "Galician");
392
		code2name.put("gmh", "Middle High German");
393
		code2name.put("goh", "Old High German");
394
		code2name.put("loz", "Lozi");
395
		code2name.put("ltz", "Letzeburgesch; Luxembourgish");
396
		code2name.put("gon", "Gondi");
397
		code2name.put("DOI", "DOI");
398
		code2name.put("IsCitedBy", "IsCitedBy");
399
		code2name.put("dataset", "dataset");
400
		code2name.put("bho", "Bhojpuri");
401
		code2name.put("got", "Gothic");
402
		code2name.put("AG", "ANTIGUA AND BARBUDA");
403
		code2name.put("IsPartOf", "IsPartOf");
404
		code2name.put("IE", "Ireland");
405
		code2name.put("bik", "Bikol");
406
		code2name.put("lui", "Luiseno");
407
		code2name.put("grb", "Grebo");
408
		code2name.put("AW", "ARUBA");
409
		code2name.put("lun", "Lunda");
410
		code2name.put("luo", "Luo");
411
		code2name.put("raj", "Rajasthani");
412
		code2name.put("IM", "ISLE OF MAN");
413
		code2name.put("umb", "Umbundu");
414
		code2name.put("rar", "Rarotongan");
415
		code2name.put("roa", "Romance");
416
		code2name.put("rom", "Romany");
417
		code2name.put("LY", "Libyan Arab Jamahiriya");
418
		code2name.put("dnet:od_subjects", "OpenDOAR subjects");
419
		code2name.put("EAN13", "EAN13");
420
		code2name.put("grc", "Ancient Greek");
421
		code2name.put("vai", "Vai");
422
		code2name.put("vie", "Vietnamese");
423
		code2name.put("vol", "Volapük");
424
		code2name.put("0028", "Service");
425
		code2name.put("EISSN", "EISSN");
426
		code2name.put("IT", "Italy");
427
		code2name.put("JE", "JERSEY");
428
		code2name.put("LI", "Liechtenstein");
429
		code2name.put("ddc", "Dewey Decimal Classification");
430
		code2name.put("event", "event");
431
		code2name.put("IsReferencedBy", "IsReferencedBy");
432
		code2name.put("JM", "Jamaica");
433
		code2name.put("LT", "Lithuania");
434
		code2name.put("0029", "Software");
435
		code2name.put("bin", "Bini");
436
		code2name.put("hai", "Haida");
437
		code2name.put("mac/mak", "Macedonian");
438
		code2name.put("vot", "Votic");
439
		code2name.put("wak", "Wakashan");
440
		code2name.put("mesheuropmc", "Medical Subject Headings");
441
		code2name.put("image", "image");
442
		code2name.put("interactiveResource", "interactiveResource");
443
		code2name.put("model", "model");
444
		code2name.put("physicalObject", "physicalObject");
445
		code2name.put("0030", "Sound");
446
		code2name.put("References", "References");
447
		code2name.put("ARK", "ARK");
448
		code2name.put("isRelatedTo", "isRelatedTo");
449
		code2name.put("coauthor", "coauthor");
450
		code2name.put("Handle", "Handle");
451
		code2name.put("ISBN", "ISBN");
452
		code2name.put("LISSN", "LISSN");
453
		code2name.put("LU", "Luxembourg");
454
		code2name.put("MQ", "MARTINIQUE");
455
		code2name.put("MR", "MAURITANIA");
456
		code2name.put("bla", "Siksika");
457
		code2name.put("bnt", "Bantu");
458
		code2name.put("bod/tib", "Tibetan");
459
		code2name.put("hat", "Haitian; Haitian Creole");
460
		code2name.put("hau", "Hausa");
461
		code2name.put("haw", "Hawaiian");
462
		code2name.put("mad", "Madurese");
463
		code2name.put("ron/rum", "Romanian");
464
		code2name.put("mag", "Magahi");
465
		code2name.put("mai", "Maithili");
466
		code2name.put("rus", "Russian");
467
		code2name.put("sad", "Sandawe");
468
		code2name.put("sah", "Yakut");
469
		code2name.put("wal", "Walamo");
470
		code2name.put("war", "Waray");
471
		code2name.put("service", "service");
472
		code2name.put("software", "software");
473
		code2name.put("URN", "URN");
474
		code2name.put("YT", "MAYOTTE");
475
		code2name.put("heb", "Hebrew");
476
		code2name.put("mak", "Makasar");
477
		code2name.put("sai", "South American Indian");
478
		code2name.put("was", "Washo");
479
		code2name.put("sound", "sound");
480
		code2name.put("man", "Mandingo");
481
		code2name.put("wen", "Sorbian");
482
		code2name.put("map", "Austronesian");
483
		code2name.put("sal", "Salishan");
484
		code2name.put("mas", "Masai");
485
		code2name.put("sam", "Samaritan");
486
		code2name.put("hil", "Hiligaynon");
487
		code2name.put("hmo", "Hiri Motu");
488
		code2name.put("dan", "Danish");
489
		code2name.put("div", "Divehi");
490
		code2name.put("hun", "Hungarian");
491
		code2name.put("ibo", "Igbo");
492
		code2name.put("kik", "Gikuyu; Kikuyu");
493
		code2name.put("lug", "Ganda");
494
		code2name.put("nic", "Niger-Kordofanian");
495
		code2name.put("nob", "Bokmål, Norwegian; Norwegian Bokmål");
496
		code2name.put("AF", "AFGHANISTAN");
497
		code2name.put("AZ", "Azerbaijan");
498
		code2name.put("CM", "Cameroon");
499
		code2name.put("CR", "Costa Rica");
500
		code2name.put("CI", "Cote d'Ivoire");
501
		code2name.put("VE", "Venezuela");
502
		code2name.put("VG", "Virgin Islands (British)");
503
		code2name.put("EH", "WESTERN SAHARA");
504
		code2name.put("BSG", "Research for the benefit of specific groups");
505
		code2name.put("NoE", "Network of Excellence");
506
		code2name.put("aar", "Afar");
507
		code2name.put("arm/hye", "Armenian");
508
		code2name.put("aym", "Aymara");
509
		code2name.put("aze", "Azerbaijani");
510
		code2name.put("bak", "Bashkir");
511
		code2name.put("bam", "Bambara");
512
		code2name.put("baq/eus", "Basque");
513
		code2name.put("bih", "Bihari");
514
		code2name.put("bre", "Breton");
515
		code2name.put("bul", "Bulgarian");
516
		code2name.put("cat", "Catalan; Valencian");
517
		code2name.put("cha", "Chamorro");
518
		code2name.put("che", "Chechen");
519
		code2name.put("cor", "Cornish");
520
		code2name.put("dut/nld", "Dutch; Flemish");
521
		code2name.put("dzo", "Dzongkha");
522
		code2name.put("eng", "English");
523
		code2name.put("est", "Estonian");
524
		code2name.put("fij", "Fijian");
525
		code2name.put("ful", "Fulah");
526
		code2name.put("geo/kat", "Georgian");
527
		code2name.put("gla", "Gaelic; Scottish Gaelic");
528
		code2name.put("gre/ell", "Greek, Modern (1453-)");
529
		code2name.put("grn", "Guarani");
530
		code2name.put("guj", "Gujarati");
531
		code2name.put("ice/isl", "Icelandic");
532
		code2name.put("nya", "Chewa; Chichewa; Nyanja");
533
		code2name.put("DFG", "DFG Classification");
534
		code2name.put("pubsrepository::mock", "Mock Publication Repository");
535
		code2name.put("scholarcomminfra", "Scholarly Communication Infrastructure");
536
		code2name.put("EU", "European Union");
537
		code2name.put("scr/hrv", "Croatian");
538
		code2name.put("GF", "French Guiana");
539
		code2name.put("GR", "GREECE");
540
		code2name.put("abk", "Abkhazian");
541
		code2name.put("amh", "Amharic");
542
		code2name.put("ara", "Arabic");
543
		code2name.put("arg", "Aragonese");
544
		code2name.put("cos", "Corsican");
545
		code2name.put("cre", "Cree");
546
		code2name.put("cze/ces", "Czech");
547
		code2name.put("her", "Herero");
548
		code2name.put("ind", "Indonesian");
549
		code2name.put("kal", "Greenlandic; Kalaallisut");
550
		code2name.put("HM", "HEARD ISLAND AND MCDONALD ISLANDS");
551
		code2name.put("AE", "United Arab Emirates");
552
		code2name.put("bis", "Bislama");
553
		code2name.put("ewe", "Ewe");
554
		code2name.put("bos", "Bosnian");
555
		code2name.put("fao", "Faroese");
556
		code2name.put("ger/deu", "German");
557
		code2name.put("ton", "Tonga (Tonga Islands)");
558
		code2name.put("und", "Undetermined");
559
		code2name.put("pubsrepository::thematic", "Thematic Publication Repository");
560
		code2name.put("urd", "Urdu");
561
		code2name.put("uzb", "Uzbek");
562
		code2name.put("doi", "doi");
563
		code2name.put("IsNewVersionOf", "IsNewVersionOf");
564
		code2name.put("smo", "Samoan");
565
		code2name.put("nau", "Nauru");
566
		code2name.put("nor", "Norwegian");
567
		code2name.put("esl/spa", "Spanish");
568
		code2name.put("iii", "Sichuan Yi");
569
		code2name.put("que", "Quechua");
570
		code2name.put("AS", "AMERICAN SAMOA");
571
		code2name.put("ita", "Italian");
572
		code2name.put("BI", "Burundi");
573
		code2name.put("sin", "Sinhala; Sinhalese");
574
		code2name.put("may/msa", "Malay");
575
		code2name.put("tsn", "Tswana");
576
		code2name.put("tso", "Tsonga");
577
		code2name.put("CD", "Congo (Democratic Republic of)");
578
		code2name.put("FJ", "Fiji");
579
		code2name.put("GM", "Gambia");
580
		code2name.put("kaz", "Kazakh");
581
		code2name.put("GW", "Guinea-Bissau");
582
		code2name.put("KW", "Kuwait");
583
		code2name.put("mac/mkd", "Macedonian");
584
		code2name.put("scc/srp", "Serbian");
585
		code2name.put("kur", "Kurdish");
586
		code2name.put("LA", "Lao (People's Democratic Republic)");
587
		code2name.put("UNKNOWN", "UNKNOWN");
588
		code2name.put("mah", "Marshallese");
589
		code2name.put("IsPreviousVersionOf", "IsPreviousVersionOf");
590
		code2name.put("gle", "Irish");
591
		code2name.put("glv", "Manx");
592
		code2name.put("iku", "Inuktitut");
593
		code2name.put("mlg", "Malagasy");
594
		code2name.put("ile", "Interlingue");
595
		code2name.put("jav", "Javanese");
596
		code2name.put("jpn", "Japanese");
597
		code2name.put("nav", "Navajo; Navaho");
598
		code2name.put("khm", "Khmer");
599
		code2name.put("kin", "Kinyarwanda");
600
		code2name.put("lao", "Lao");
601
		code2name.put("lat", "Latin");
602
		code2name.put("oci", "Occitan (post 1500); Provençal");
603
		code2name.put("lav", "Latvian");
604
		code2name.put("mal", "Malayalam");
605
		code2name.put("mao/mri", "Maori");
606
		code2name.put("mlt", "Maltese");
607
		code2name.put("mol", "Moldavian");
608
		code2name.put("nbl", "Ndebele, South");
609
		code2name.put("oji", "Ojibwa");
610
		code2name.put("ori", "Oriya");
611
		code2name.put("pan", "Panjabi; Punjabi");
612
		code2name.put("per/fas", "Persian");
613
		code2name.put("pli", "Pali");
614
		code2name.put("roh", "Raeto-Romance");
615
		code2name.put("pol", "Polish");
616
		code2name.put("rum/ron", "Romanian");
617
		code2name.put("slo/slk", "Slovak");
618
		code2name.put("run", "Rundi");
619
		code2name.put("slv", "Slovenian");
620
		code2name.put("sme", "Northern Sami");
621
		code2name.put("sna", "Shona");
622
		code2name.put("snd", "Sindhi");
623
		code2name.put("som", "Somali");
624
		code2name.put("sus", "Susu");
625
		code2name.put("swe", "Swedish");
626
		code2name.put("swa", "Swahili");
627
		code2name.put("tah", "Tahitian");
628
		code2name.put("tam", "Tamil");
629
		code2name.put("tat", "Tatar");
630
		code2name.put("tuk", "Turkmen");
631
		code2name.put("tur", "Turkish");
632
		code2name.put("ven", "Venda");
633
		code2name.put("kir", "Kirghiz");
634
		code2name.put("mon", "Mongolian");
635
		code2name.put("orm", "Oromo");
636
		code2name.put("sag", "Sango");
637
		code2name.put("kom", "Komi");
638
		code2name.put("mar", "Marathi");
639
		code2name.put("tel", "Telugu");
640
		code2name.put("twi", "Twi");
641
		code2name.put("ukr", "Ukrainian");
642
		code2name.put("OPEN", "Open Access");
643
		code2name.put("171", "Article 171 of the Treaty");
644
		code2name.put("film", "film");
645
		code2name.put("providedBy", "provided by");
646
		code2name.put("dataset_dataset", "dataset_dataset");
647
		code2name.put("publication_dataset", "publication_dataset");
648
		code2name.put("publication_publication", "publication_publication");
649
		code2name.put("coordinator", "coordinator");
650
		code2name.put("participant", "participant");
651
		code2name.put("subcontractor", "subcontractor");
652
		code2name.put("principal investigating", "principal investigating");
653
		code2name.put("exploitation", "exploitation");
654
		code2name.put("DM", "DOMINICA");
655
		code2name.put("BS", "BAHAMAS");
656
		code2name.put("KY", "CAYMAN ISLANDS");
657
		code2name.put("wt:fundingStream", "Wellcome Trust: Funding Stream");
658
		code2name.put("MY", "Malaysia");
659
		code2name.put("0012", "Newsletter");
660
		code2name.put("alternative title", "alternative title");
661
		code2name.put("0014", "Research");
662
		code2name.put("AX", "ÃLAND ISLANDS");
663
		code2name.put("main title", "main title");
664
		code2name.put("GQ", "EQUATORIAL GUINEA");
665
		code2name.put("ML", "Mali");
666
		code2name.put("driver", "OpenAIRE Basic (DRIVER OA)");
667
		code2name.put("wt:hasParentFunding", "wt:hasParentFunding");
668
		code2name.put("OTHER", "Other");
669
		code2name.put("openaire2.0", "OpenAIRE 2.0 (EC funding)");
670
		code2name.put("RESTRICTED", "Restricted");
671
		code2name.put("0020", "Other");
672
		code2name.put("0013", "Part of book or chapter of book");
673
		code2name.put("0016", "Preprint");
674
		code2name.put("aka", "Akan");
675
		code2name.put("TD", "CHAD");
676
		code2name.put("author", "author");
677
		code2name.put("isResultOf", "isResultOf");
678
		code2name.put("0015", "Review");
679
		code2name.put("AD", "ANDORRA");
680
		code2name.put("AI", "ANGUILLA");
681
		code2name.put("BZ", "BELIZE");
682
		code2name.put("CX", "CHRISTMAS ISLAND");
683
		code2name.put("ER", "ERITREA");
684
		code2name.put("MT", "Malta");
685
		code2name.put("NR", "NAURU");
686
		code2name.put("openaire3.0", "OpenAIRE 3.0 (OA, funding)");
687
		code2name.put("0000", "Unknown");
688
		code2name.put("FK", "FALKLAND ISLANDS (MALVINAS)");
689
		code2name.put("NU", "NIUE");
690
		code2name.put("AQ", "ANTARCTICA");
691
		code2name.put("CC", "COCOS (KEELING) ISLANDS");
692
		code2name.put("publication", "publication");
693
		code2name.put("user:claim:pid", "user:claim:pid");
694
		code2name.put("EMBARGO", "Embargo");
695
		code2name.put("0011", "Internal report");
696
		code2name.put("hin", "Hindi");
697
		code2name.put("0010", "Lecture");
698
		code2name.put("BM", "BERMUDA");
699
		code2name.put("pubsrepository::unknown", "Publication Repository");
700
		code2name.put("result", "result");
701
		code2name.put("websource", "Other Source");
702
		code2name.put("BQ", "BONAIRE, SINT EUSTATIUS AND SABA");
703
		code2name.put("DJ", "DJIBOUTI");
704
		code2name.put("GB", "UNITED KINGDOM");
705
		code2name.put("TF", "FRENCH SOUTHERN TERRITORIES");
706
		code2name.put("ido", "Ido");
707
		code2name.put("0017", "Report");
708
		code2name.put("BV", "BOUVET ISLAND");
709
		code2name.put("MG", "Madagascar");
710
		code2name.put("UM", "UNITED STATES MINOR OUTLYING ISLANDS");
711
		code2name.put("datasetsbyjournal", "datasetsbyjournal");
712
		code2name.put("IO", "BRITISH INDIAN OCEAN TERRITORY");
713
		code2name.put("copyrighted", "copyrighted");
714
		code2name.put("created", "created");
715
		code2name.put("notCompatible", "under validation");
716
		code2name.put("native", "native");
717
		code2name.put("fct:program", "fct:program");
718
		code2name.put("fct:hasParentFunding", "fct:hasParentFunding");
719
		code2name.put("0032", "Software Paper");
720
		code2name.put("AO", "ANGOLA");
721
		code2name.put("AR", "Argentina");
722
		code2name.put("AU", "Australia");
723
		code2name.put("sysimport:crosswalk:aggregator", "sysimport:crosswalk:aggregator");
724
		code2name.put("sysimport:crosswalk:cris", "sysimport:crosswalk:cris");
725
		code2name.put("sysimport:crosswalk:datasetarchive", "sysimport:crosswalk:datasetarchive");
726
		code2name.put("bem", "Bemba");
727
		code2name.put("deu/ger", "German");
728
		code2name.put("lub", "Luba-Katanga");
729
		code2name.put("wel/cym", "Welsh");
730
		code2name.put("wln", "Walloon");
731
		code2name.put("AT", "Austria");
732
		code2name.put("BH", "Bahrain");
733
		code2name.put("wol", "Wolof");
734
		code2name.put("xho", "Xhosa");
735
		code2name.put("file::hybrid", "file::hybrid");
736
		code2name.put("orcid", "Open Researcher and Contributor ID");
737
		code2name.put("BW", "Botswana");
738
		code2name.put("BR", "Brazil");
739
		code2name.put("BF", "Burkina Faso");
740
		code2name.put("KH", "Cambodia");
741
		code2name.put("CA", "Canada");
742
		code2name.put("CV", "Cape Verde");
743
		code2name.put("CL", "Chile");
744
		code2name.put("CO", "Colombia");
745
		code2name.put("SV", "El Salvador");
746
		code2name.put("PF", "FRENCH POLYNESIA");
747
		code2name.put("FI", "Finland");
748
		code2name.put("MK", "Former Yugoslav Republic of Macedonia");
749
		code2name.put("DE", "Germany");
750
		code2name.put("KO", "Kosovo * UN resolution");
751
		code2name.put("TR", "Turkey");
752
		code2name.put("TM", "Turkmenistan");
753
		code2name.put("UY", "Uruguay");
754
		code2name.put("ZM", "Zambia");
755
		code2name.put("pmc", "pmc");
756
		code2name.put("arXiv", "arXiv");
757
		code2name.put("httpList", "httpList");
758
		code2name.put("0033", "Audiovisual");
759
		code2name.put("0031", "Data Paper");
760
		code2name.put("ZW", "Zimbabwe");
761
		code2name.put("ec:hasframeworkprogram", "hasframeworkprogram");
762
		code2name.put("ec:hasprogram", "hasprogram");
763
		code2name.put("ec:hasspecificprogram", "hasspecificprogram");
764
		code2name.put("available", "available");
765
		code2name.put("filesystem", "filesystem");
766
		code2name.put("text", "text");
767
		code2name.put("files", "files");
768
		code2name.put("AL", "Albania");
769
		code2name.put("DZ", "Algeria");
770
		code2name.put("AM", "Armenia");
771
		code2name.put("BT", "BHUTAN");
772
		code2name.put("car", "Carib");
773
		code2name.put("cau", "Caucasian");
774
		code2name.put("BD", "Bangladesh");
775
		code2name.put("BB", "Barbados");
776
		code2name.put("BY", "Belarus");
777
		code2name.put("BE", "Belgium");
778
		code2name.put("ceb", "Cebuano");
779
		code2name.put("cel", "Celtic");
780
		code2name.put("BJ", "Benin");
781
		code2name.put("BA", "Bosnia and Herzegovina");
782
		code2name.put("BN", "Brunei Darussalam");
783
		code2name.put("BG", "Bulgaria");
784
		code2name.put("CF", "Central African Republic");
785
		code2name.put("CN", "China (People's Republic of)");
786
		code2name.put("CY", "Cyprus");
787
		code2name.put("DO", "Dominican Republic");
788
		code2name.put("EC", "Ecuador");
789
		code2name.put("ces/cze", "Czech");
790
		code2name.put("chb", "Chibcha");
791
		code2name.put("chg", "Chagatai");
792
		code2name.put("chi/zho", "Chinese");
793
		code2name.put("chm", "Mari");
794
		code2name.put("him", "Himachali");
795
		code2name.put("hup", "Hupa");
796
		code2name.put("iba", "Iban");
797
		code2name.put("ijo", "Ijo");
798
		code2name.put("ilo", "Iloko");
799
		code2name.put("inc", "Indic");
800
		code2name.put("mul", "Multiple languages");
801
		code2name.put("file::PDF", "file::PDF");
802
		code2name.put("file::WoS", "file::WoS");
803
		code2name.put("metadata", "metadata");
804
		code2name.put("FCT", "Fundação para a Ciência e Tecnologia");
805
		code2name.put("EG", "Egypt");
806
		code2name.put("EE", "Estonia");
807
		code2name.put("ET", "Ethiopia");
808
		code2name.put("UG", "Uganda");
809
		code2name.put("UA", "Ukraine");
810
		code2name.put("US", "United States");
811
		code2name.put("UZ", "Uzbekistan");
812
		code2name.put("VN", "Viet Nam");
813
		code2name.put("YE", "Yemen");
814
		code2name.put("orcidworkid", "orcid workid");
815
		code2name.put("fileCSV", "fileCSV");
816
		code2name.put("files_by_rpc", "files_by_rpc");
817
		code2name.put("files_from_mdstore", "files_from_mdstore");
818
		code2name.put("MP", "NORTHERN MARIANA ISLANDS");
819
		code2name.put("datarepository::unknown", "Data Repository");
820
		code2name.put("entityregistry", "Entity Registry");
821
		code2name.put("infospace", "Information Space");
822
		code2name.put("pubsrepository::institutional", "Institutional Publication Repository");
823
		code2name.put("sysimport:mining:cris", "sysimport:mining:cris");
824
		code2name.put("sysimport:mining:datasetarchive", "sysimport:mining:datasetarchive");
825
		code2name.put("sysimport:mining:entityregistry", "sysimport:mining:entityregistry");
826
		code2name.put("pubsrepository::journal", "Journal Platform");
827
		code2name.put("NP", "Nepal");
828
		code2name.put("AN", "Netherlands Antilles");
829
		code2name.put("NC", "New Caledonia");
830
		code2name.put("FO", "Faroe Islands");
831
		code2name.put("MU", "Mauritius");
832
		code2name.put("MX", "Mexico");
833
		code2name.put("sysimport:mining:infospace", "sysimport:mining:infospace");
834
		code2name.put("KN", "SAINT KITTS AND NEVIS");
835
		code2name.put("LC", "SAINT LUCIA");
836
		code2name.put("MF", "SAINT MARTIN (FRENCH PART)");
837
		code2name.put("CH", "Switzerland");
838
		code2name.put("SY", "Syrian Arab Republic");
839
		code2name.put("TL", "TIMOR-LESTE");
840
		code2name.put("TK", "TOKELAU");
841
		code2name.put("PM", "SAINT PIERRE AND MIQUELON");
842
		code2name.put("TO", "TONGA");
843
		code2name.put("TC", "TURKS AND CAICOS ISLANDS");
844
		code2name.put("TV", "TUVALU");
845
		code2name.put("TW", "Taiwan");
846
		code2name.put("TJ", "Tajikistan");
847
		code2name.put("ina", "Auxiliary Language Association)");
848
		code2name.put("kas", "Kashmiri");
849
		code2name.put("kau", "Kanuri");
850
		code2name.put("kaw", "Kawi");
851
		code2name.put("aggregator::pubsrepository::unknown", "Aggregator of Publication Repositories");
852
		code2name.put("crissystem", "CRIS System");
853
		code2name.put("MD", "Moldova (Republic of)");
854
		code2name.put("ME", "Montenegro");
855
		code2name.put("VC", "SAINT VINCENT AND THE GRENADINES");
856
		code2name.put("WS", "SAMOA");
857
		code2name.put("ST", "SAO TOME AND PRINCIPE");
858
		code2name.put("SL", "SIERRA LEONE");
859
		code2name.put("MC", "Support for training and career development of researchers (Marie Curie)");
860
		code2name.put("TZ", "Tanzania (United Republic of)");
861
		code2name.put("TH", "Thailand");
862
		code2name.put("TG", "Togo");
863
		code2name.put("MA", "Morocco");
864
		code2name.put("MZ", "Mozambique");
865
		code2name.put("MM", "Myanmar");
866
		code2name.put("SR", "Suriname");
867
		code2name.put("SZ", "Swaziland");
868
		code2name.put("startDate", "startDate");
869
		code2name.put("NF", "NORFOLK ISLAND");
870
		code2name.put("sysimport:mining:aggregator", "sysimport:mining:aggregator");
871
		code2name.put("SE", "Sweden");
872
		code2name.put("submitted", "submitted");
873
		code2name.put("valid", "valid");
874
		code2name.put("cad", "Caddo");
875
		code2name.put("GH", "Ghana");
876
		code2name.put("GI", "Gibraltar");
877
		code2name.put("EL", "Greece");
878
		code2name.put("PY", "PARAGUAY");
879
		code2name.put("PN", "PITCAIRN");
880
		code2name.put("cai", "Central American Indian");
881
		code2name.put("cus", "Cushitic");
882
		code2name.put("PR", "PUERTO RICO");
883
		code2name.put("PS", "Palestinian-administered areas");
884
		code2name.put("lim", "Limburgan; Limburger; Limburgish");
885
		code2name.put("PA", "Panama");
886
		code2name.put("PG", "Papua New Guinea");
887
		code2name.put("GS", "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS");
888
		code2name.put("SS", "SOUTH SUDAN");
889
		code2name.put("SJ", "SVALBARD AND JAN MAYEN");
890
		code2name.put("SM", "San Marino");
891
		code2name.put("SK", "Slovakia");
892
		code2name.put("SI", "Slovenia");
893
		code2name.put("SO", "Somalia");
894
		code2name.put("ZA", "South Africa");
895
		code2name.put("PK", "Pakistan");
896
		code2name.put("ES", "Spain");
897
		code2name.put("LK", "Sri Lanka");
898
		code2name.put("uig", "Uighur; Uyghur");
899
		code2name.put("sysimport:mining:repository", "sysimport:mining:repository");
900
		code2name.put("ERC", "Support for frontier research (ERC)");
901
		code2name.put("6MONTHS", "6 Months Embargo");
902
		code2name.put("CLOSED", "Closed Access");
903
		code2name.put("bur/mya", "Burmese");
904
		code2name.put("0001", "Article");
905
		code2name.put("chn", "Chinook jargon");
906
		code2name.put("0002", "Book");
907
		code2name.put("SA", "Saudi Arabia");
908
		code2name.put("SN", "Senegal");
909
		code2name.put("cho", "Choctaw");
910
		code2name.put("chr", "Cherokee");
911
		code2name.put("chu", "Church Slavic; Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic");
912
		code2name.put("chv", "Chuvash");
913
		code2name.put("cym/wel", "Welsh");
914
		code2name.put("dak", "Dakota");
915
		code2name.put("del", "Delaware");
916
		code2name.put("din", "Dinka");
917
		code2name.put("nno", "Norwegian Nynorsk; Nynorsk, Norwegian");
918
		code2name.put("pon", "Ponape");
919
		code2name.put("por", "Portuguese");
920
		code2name.put("pra", "Prakrit");
921
		code2name.put("pro", "Provencal");
922
		code2name.put("0004", "Conference object");
923
		code2name.put("0005", "Contribution for newspaper or weekly magazine");
924
		code2name.put("0006", "Doctoral thesis");
925
		code2name.put("GN", "Guinea");
926
		code2name.put("0007", "Master thesis");
927
		code2name.put("NZ", "New Zealand");
928
		code2name.put("NI", "Nicaragua");
929
		code2name.put("NE", "Niger");
930
		code2name.put("PE", "Peru");
931
		code2name.put("PH", "Philippines");
932
		code2name.put("SD", "Sudan");
933
		code2name.put("PL", "Poland");
934
		code2name.put("RS", "Serbia");
935
		code2name.put("san", "Sanskrit");
936
		code2name.put("GE", "Georgia");
937
		code2name.put("hrv", "Croatian");
938
		code2name.put("SX", "SINT MAARTEN (DUTCH PART)");
939
		code2name.put("TT", "Trinidad and Tobago");
940
		code2name.put("NG", "Nigeria");
941
		code2name.put("NO", "Norway");
942
		code2name.put("OM", "Oman");
943
		code2name.put("SB", "SOLOMON ISLANDS");
944
		code2name.put("TN", "Tunisia");
945
		code2name.put("updated", "updated");
946
		code2name.put("SC", "Seychelles");
947
		code2name.put("SG", "Singapore");
948

    
949
	}
950

    
951
	// Builder for Entities
952
	protected static Oaf getOaf(final OafEntity.Builder entity, final DataInfo.Builder info) {
953
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build();
954
	}
955

    
956
	// Builder for Rels
957
	protected static Oaf getOaf(final OafRel.Builder rel, final DataInfo.Builder info) {
958
		return _getOaf(Oaf.newBuilder(), info).setKind(Kind.relation).setRel(rel).build();
959
	}
960

    
961
	private static Oaf.Builder _getOaf(final Oaf.Builder oaf, final DataInfo.Builder info) {
962
		return oaf.setDataInfo(ensureDataInfo(info)).setTimestamp(System.currentTimeMillis());
963
	}
964

    
965
	protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) {
966
		if (info.isInitialized()) return info;
967
		return getDataInfo(null, "UNKNOWN", "0.9", false, false);
968
	}
969

    
970
	protected static KeyValue getKV(final String id, final String name) {
971
		return KeyValue.newBuilder().setKey(id).setValue(name).build();
972
	}
973

    
974
	protected static OafRel.Builder getRel(final String sourceId,
975
			final String targetId,
976
			final RelType relType,
977
			final SubRelType subRelType,
978
			final String relClass,
979
			final boolean isChild) {
980
		return OafRel.newBuilder().setSource(sourceId).setTarget(targetId).setRelType(relType).setSubRelType(subRelType).setRelClass(relClass)
981
				.setChild(isChild);
982
	}
983

    
984
	protected static OafEntity.Builder getEntity(final Type type,
985
			final String id,
986
			final KeyValue collectedFrom,
987
			final String originalId,
988
			final String dateOfCollection,
989
			final List<StructuredProperty> pids) {
990
		final OafEntity.Builder builder = OafEntity.newBuilder().setType(type).setId(id).addCollectedfrom(collectedFrom).addOriginalId(originalId)
991
				.setDateofcollection(dateOfCollection);
992

    
993
		if ((pids != null) && !pids.isEmpty()) {
994
			builder.addAllPid(Iterables.filter(pids, Predicates.notNull()));
995
		}
996

    
997
		return builder;
998
	}
999

    
1000
	public static DataInfo.Builder getDataInfo(final NodeList about,
1001
			final String provenanceaction,
1002
			final String trust,
1003
			final boolean deletedbyinference,
1004
			final boolean inferred) {
1005

    
1006
		final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder();
1007
		dataInfoBuilder.setInferred(Boolean.valueOf(inferred));
1008
		dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(deletedbyinference));
1009
		dataInfoBuilder.setTrust(trust);
1010
		dataInfoBuilder.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
1011

    
1012
		// checking instanceof because when receiving an empty <oaf:datainfo> we don't want to parse it.
1013
		if (((about != null) && (about.getLength() > 0)) /* && (dataInfo instanceof org.w3c.dom.Element) */) {
1014

    
1015
			final org.w3c.dom.Element e = getDirectChild((org.w3c.dom.Element) about.item(0), "datainfo");
1016

    
1017
			org.w3c.dom.Element elem = getDirectChild(e, "inferred");
1018
			dataInfoBuilder.setInferred(Boolean.valueOf(elem != null ? elem.getTextContent() : String.valueOf(inferred)));
1019

    
1020
			elem = getDirectChild(e, "deletedbyinference");
1021
			dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(elem != null ? elem.getTextContent() : String.valueOf(deletedbyinference)));
1022

    
1023
			elem = getDirectChild(e, "trust");
1024
			dataInfoBuilder.setTrust(elem != null ? elem.getTextContent() : trust);
1025

    
1026
			elem = getDirectChild(e, "inferenceprovenance");
1027
			dataInfoBuilder.setInferenceprovenance(elem != null ? elem.getTextContent() : "");
1028

    
1029
			elem = getDirectChild(e, "provenanceaction");
1030
			final Qualifier.Builder pBuilder = Qualifier.newBuilder();
1031
			if (elem.hasAttributes()) {
1032
				final NamedNodeMap attributes = elem.getAttributes();
1033
				pBuilder.setClassid(attributes.getNamedItem("classid").getNodeValue());
1034
				pBuilder.setClassname(attributes.getNamedItem("classname").getNodeValue());
1035
				pBuilder.setSchemeid(attributes.getNamedItem("schemeid").getNodeValue());
1036
				pBuilder.setSchemename(attributes.getNamedItem("schemename").getNodeValue());
1037
			} else {
1038
				pBuilder.mergeFrom(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
1039
			}
1040
			dataInfoBuilder.setProvenanceaction(pBuilder);
1041
		}
1042

    
1043
		return dataInfoBuilder;
1044
	}
1045

    
1046
	protected static org.w3c.dom.Element getDirectChild(final org.w3c.dom.Element parent, final String name) {
1047
		for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {
1048
			if ((child instanceof org.w3c.dom.Element) && name.equals(child.getLocalName())) return (org.w3c.dom.Element) child;
1049
		}
1050
		return null;
1051
	}
1052

    
1053
	protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) {
1054
		return getQualifier(classname, classname, schemename, schemename);
1055
	}
1056

    
1057
	protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) {
1058
		return getQualifier(classname.toString(), classname.toString(), schemename, schemename);
1059
	}
1060

    
1061
	protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
1062
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename);
1063
	}
1064

    
1065
	protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List<String> fields) {
1066
		if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) return null;
1067

    
1068
		if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) {
1069
			qualifier.setClassid(fields.get(0));
1070
			qualifier.setClassname(getClassName(fields.get(0)));
1071
		}
1072
		return qualifier;
1073
	}
1074

    
1075
	protected static void addStructuredProps(final Message.Builder builder,
1076
			final FieldDescriptor fd,
1077
			final List<String> values,
1078
			final String classid,
1079
			final String schemeid) {
1080
		if (values != null) {
1081
			for (final String s : values) {
1082
				addField(builder, fd, getStructuredProperty(s, classid, classid, schemeid, schemeid));
1083
			}
1084
		}
1085
	}
1086

    
1087
	protected static List<StructuredProperty> parsePids(final NodeList nodelist) {
1088

    
1089
		final List<StructuredProperty> pids = Lists.newArrayList();
1090

    
1091
		for (int i = 0; i < nodelist.getLength(); i++) {
1092
			final Node node = nodelist.item(i);
1093
			if ((node.getNodeType() == Node.ELEMENT_NODE) && node.getLocalName().toLowerCase().equals("identifier")) {
1094

    
1095
				final Node pidType = node.getAttributes().getNamedItem("identifierType");
1096

    
1097
				for (int j = 0; j < node.getChildNodes().getLength(); j++) {
1098
					final Node child = node.getChildNodes().item(j);
1099

    
1100
					if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()
1101
							&& !pidType.getNodeValue().equalsIgnoreCase("url")) {
1102

    
1103
						final String type = pidType.getNodeValue().toLowerCase();
1104

    
1105
						final String value = child.getTextContent();
1106

    
1107
						pids.add(getStructuredProperty(value, type, getClassName(type), "dnet:pid_types", "dnet:pid_types"));
1108
						break;
1109
					}
1110
				}
1111
			}
1112
		}
1113
		return pids;
1114
	}
1115

    
1116
	@SuppressWarnings("unchecked")
1117
	protected static void addField(final Builder builder, final FieldDescriptor descriptor, Object value) {
1118

    
1119
		if (value == null) return;
1120

    
1121
		if (value instanceof List<?>) {
1122
			for (final Object o : (List<Object>) value) {
1123
				addField(builder, descriptor, o);
1124
			}
1125
		} else {
1126
			Object fieldValue = value;
1127
			switch (descriptor.getType()) {
1128
			case BOOL:
1129
				fieldValue = Boolean.valueOf(value.toString());
1130
				break;
1131
			case BYTES:
1132
				fieldValue = value.toString().getBytes(Charset.forName("UTF-8"));
1133
				break;
1134
			case DOUBLE:
1135
				fieldValue = Double.valueOf(value.toString());
1136
				break;
1137
			case FLOAT:
1138
				fieldValue = Float.valueOf(value.toString());
1139
				break;
1140
			case INT32:
1141
			case INT64:
1142
			case SINT32:
1143
			case SINT64:
1144
				fieldValue = Integer.valueOf(value.toString());
1145
				break;
1146
			case MESSAGE:
1147
				final Builder q = builder.newBuilderForField(descriptor);
1148

    
1149
				if (value instanceof Builder) {
1150
					value = ((Builder) value).build();
1151
					final byte[] b = ((Message) value).toByteArray();
1152
					try {
1153
						q.mergeFrom(b);
1154
					} catch (final InvalidProtocolBufferException e) {
1155
						throw new IllegalArgumentException("Unable to merge value: " + value + " with builder: " + q.getDescriptorForType().getName());
1156
					}
1157
				} else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1158
					if (value instanceof Qualifier) {
1159
						q.mergeFrom((Qualifier) value);
1160
					} else {
1161
						parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@");
1162
					}
1163
				} else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1164
					if (value instanceof StructuredProperty) {
1165
						q.mergeFrom((StructuredProperty) value);
1166
					} else {
1167
						parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###");
1168
					}
1169
				} else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1170
					if (value instanceof KeyValue) {
1171
						q.mergeFrom((KeyValue) value);
1172
					} else {
1173
						parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&");
1174
					}
1175
				} else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1176
					if (value instanceof StringField) {
1177
						q.mergeFrom((StringField) value);
1178
					} else {
1179
						q.setField(StringField.getDescriptor().findFieldByName("value"), value);
1180
					}
1181
				} else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1182
					if (value instanceof BoolField) {
1183
						q.mergeFrom((BoolField) value);
1184
					} else if (value instanceof String) {
1185
						q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) value));
1186
					} else {
1187
						q.setField(BoolField.getDescriptor().findFieldByName("value"), value);
1188
					}
1189
				} else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
1190
					if (value instanceof IntField) {
1191
						q.mergeFrom((IntField) value);
1192
					} else if (value instanceof String) {
1193
						q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) value));
1194
					} else {
1195
						q.setField(IntField.getDescriptor().findFieldByName("value"), value);
1196
					}
1197
				}
1198

    
1199
				fieldValue = q.buildPartial();
1200
				break;
1201
			default:
1202
				break;
1203
			}
1204

    
1205
			doAddField(builder, descriptor, fieldValue);
1206
		}
1207

    
1208
	}
1209

    
1210
	protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) {
1211
		if (value != null) {
1212
			if (fd.isRepeated()) {
1213
				builder.addRepeatedField(fd, value);
1214
			} else if (fd.isOptional() || fd.isRequired()) {
1215
				builder.setField(fd, value);
1216
			}
1217
		}
1218
	}
1219

    
1220
	protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) {
1221
		final IterablePair<FieldDescriptor, String> iterablePair =
1222
				new IterablePair<FieldDescriptor, String>(descriptor.getFields(), Lists.newArrayList(Splitter
1223
						.on(split).trimResults().split(value)));
1224

    
1225
		for (final Pair<FieldDescriptor, String> p : iterablePair) {
1226
			addField(builder, p.getKey(), p.getValue());
1227
		}
1228
	}
1229

    
1230
	protected static String base64(final byte[] data) {
1231
		return new String(Base64.encodeBase64(data));
1232
	}
1233

    
1234
	public static String replace(final String s, final String regex, final String replacement) {
1235
		return s.replaceAll(regex, replacement);
1236
	}
1237

    
1238
	public static String trim(final String s) {
1239
		return s.trim();
1240
	}
1241

    
1242
	protected static String removePrefix(final Type type, final String s) {
1243
		return removePrefix(type.toString(), s);
1244
	}
1245

    
1246
	private static String removePrefix(final String prefix, final String s) {
1247
		return StringUtils.removeStart("" + s, prefix + "|");
1248
	}
1249

    
1250
	protected static Qualifier.Builder getDefaultQualifier(final String scheme) {
1251
		final Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme);
1252
		return qualifier;
1253
	}
1254

    
1255
	protected static StructuredProperty getStructuredProperty(final String value,
1256
			final String classid,
1257
			final String classname,
1258
			final String schemeid,
1259
			final String schemename) {
1260
		if ((value == null) || value.isEmpty()) return null;
1261
		return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build();
1262
	}
1263

    
1264
	protected static StringField.Builder sf(final String s) {
1265
		return StringField.newBuilder().setValue(s);
1266
	}
1267

    
1268
	public static String generateNsPrefix(final String prefix, final String externalId) {
1269
		return StringUtils.substring(prefix + StringUtils.leftPad(externalId, MAX_NSPREFIX_LEN - prefix.length(), "_"), 0, MAX_NSPREFIX_LEN);
1270
	}
1271

    
1272
	public static String md5(final String s) {
1273
		try {
1274
			final MessageDigest md = MessageDigest.getInstance("MD5");
1275
			md.update(s.getBytes("UTF-8"));
1276
			return new String(Hex.encodeHex(md.digest()));
1277
		} catch (final Exception e) {
1278
			System.err.println("Error creating id");
1279
			return null;
1280
		}
1281
	}
1282

    
1283
	public static String oafId(final String entityType, final String prefix, final String id) {
1284
		if (id.isEmpty() || prefix.isEmpty()) return "";
1285
		return oafSimpleId(entityType, prefix + "::" + md5(id));
1286
	}
1287

    
1288
	public static String oafPersonId(final String entityType, final String prefix, final String parentId, final String localId, final Map<String, Object> map) {
1289
		if (MapUtils.isNotEmpty(map)) {
1290
			final Object val = map.get(prefix);
1291
			if ((val != null) && val.equals("true")) {
1292
				final String oafId = oafId(entityType, prefix, localId);
1293
				return oafId;
1294
			}
1295
		}
1296

    
1297
		final String oafId = oafId(entityType, prefix, parentId + "::" + localId);
1298
		return oafId;
1299
	}
1300

    
1301
	public static String oafSimpleId(final String entityType, final String id) {
1302
		return (Type.valueOf(entityType).getNumber() + "|" + id).replaceAll("\\s|\\n", "");
1303
	}
1304

    
1305
	public static String oafSplitId(final String entityType, final String fullId) {
1306
		return oafId(entityType, StringUtils.substringBefore(fullId, "::"), StringUtils.substringAfter(fullId, "::"));
1307
	}
1308

    
1309
	/**
1310
	 * Gets the classname of the given class code
1311
	 *
1312
	 * @param code
1313
	 *            class code.
1314
	 * @return the class name, if the code is a key of the map. The code itself otherwise.
1315
	 */
1316
	public static String getClassName(final String code) {
1317
		final String classname = code2name.get(code);
1318
		if (StringUtils.isBlank(classname)) return code;
1319
		return classname;
1320
	}
1321

    
1322
	/**
1323
	 * Utility method, allows to perform param based map lookups in xsl
1324
	 *
1325
	 * @param map
1326
	 * @param key
1327
	 * @return value associated to the key.
1328
	 */
1329
	public static Object lookupValue(final Map<String, Object> map, final String key) {
1330
		return map.get(key);
1331
	}
1332

    
1333
	/**
1334
	 * Utility method, allows to perform param based map lookups in xsl
1335
	 *
1336
	 * @param map
1337
	 * @param key
1338
	 * @return value associated to the key.
1339
	 */
1340
	public static int mustMerge(final Map<String, Object> map, final String key) {
1341
		final Object val = lookupValue(map, key);
1342
		return (val != null) && (val instanceof String) && val.equals("true") ? 1 : 0;
1343
	}
1344

    
1345
}
(1-1/9)