1
|
package eu.dnetlib.data.transform.xml;
|
2
|
|
3
|
import java.nio.charset.Charset;
|
4
|
import java.security.MessageDigest;
|
5
|
import java.util.List;
|
6
|
import java.util.Map;
|
7
|
|
8
|
import com.google.common.base.Predicate;
|
9
|
import com.google.common.base.Predicates;
|
10
|
import com.google.common.base.Splitter;
|
11
|
import com.google.common.collect.Iterables;
|
12
|
import com.google.common.collect.Lists;
|
13
|
import com.google.common.collect.Maps;
|
14
|
import com.google.protobuf.Descriptors.Descriptor;
|
15
|
import com.google.protobuf.Descriptors.FieldDescriptor;
|
16
|
import com.google.protobuf.InvalidProtocolBufferException;
|
17
|
import com.google.protobuf.Message;
|
18
|
import com.google.protobuf.Message.Builder;
|
19
|
import com.google.protobuf.ProtocolMessageEnum;
|
20
|
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
|
21
|
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
|
22
|
import eu.dnetlib.data.proto.DedupProtos.Dedup;
|
23
|
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
|
24
|
import eu.dnetlib.data.proto.FieldTypeProtos.*;
|
25
|
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription;
|
26
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
27
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
28
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
29
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
30
|
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization;
|
31
|
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson;
|
32
|
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship;
|
33
|
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult;
|
34
|
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
|
35
|
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Contribution;
|
36
|
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
|
37
|
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
|
38
|
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson;
|
39
|
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson;
|
40
|
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
|
41
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
42
|
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
|
43
|
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
|
44
|
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
|
45
|
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
|
46
|
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
|
47
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
|
48
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Part;
|
49
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
|
50
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
|
51
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Supplement;
|
52
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
53
|
import eu.dnetlib.miscutils.collections.Pair;
|
54
|
import eu.dnetlib.miscutils.iterators.IterablePair;
|
55
|
import org.apache.commons.codec.binary.Base64;
|
56
|
import org.apache.commons.codec.binary.Hex;
|
57
|
import org.apache.commons.lang.StringUtils;
|
58
|
import org.apache.commons.lang.math.NumberUtils;
|
59
|
import org.w3c.dom.NamedNodeMap;
|
60
|
import org.w3c.dom.Node;
|
61
|
import org.w3c.dom.NodeList;
|
62
|
|
63
|
public abstract class AbstractDNetXsltFunctions {
|
64
|
|
65
|
public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
|
66
|
private static final int MAX_NSPREFIX_LEN = 12;
|
67
|
public static Predicate<String> urlFilter = new Predicate<String>() {
|
68
|
|
69
|
@Override
|
70
|
public boolean apply(final String s) {
|
71
|
return s.trim().matches(URL_REGEX);
|
72
|
}
|
73
|
};
|
74
|
protected static Map<String, String> code2name = Maps.newHashMap();
|
75
|
|
76
|
/*
|
77
|
* Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the
|
78
|
* relational db
|
79
|
*/
|
80
|
static {
|
81
|
code2name.put("cpe", "English-based Creoles and Pidgins");
|
82
|
code2name.put("max", "Manx");
|
83
|
code2name.put("gon", "Gondi");
|
84
|
code2name.put("ine", "Indo - European");
|
85
|
code2name.put("ira", "Iranian");
|
86
|
code2name.put("mni", "Manipuri");
|
87
|
code2name.put("mic", "Micmac");
|
88
|
code2name.put("min", "Minangkabau");
|
89
|
code2name.put("cpp", "Portuguese-based Creoles and Pidgins");
|
90
|
code2name.put("smi", "Sami");
|
91
|
code2name.put("srd", "Sardinian");
|
92
|
code2name.put("sco", "Scots");
|
93
|
code2name.put("sel", "Selkup");
|
94
|
code2name.put("sem", "Semitic");
|
95
|
code2name.put("sid", "Sidamo");
|
96
|
code2name.put("sit", "Sino-Tibetan");
|
97
|
code2name.put("sio", "Siouan");
|
98
|
code2name.put("ssw", "Swati");
|
99
|
code2name.put("tgk", "Tajik");
|
100
|
code2name.put("yao", "Yao");
|
101
|
code2name.put("yap", "Yap");
|
102
|
code2name.put("yid", "Yiddish");
|
103
|
code2name.put("arxiv", "arXiv");
|
104
|
code2name.put("api", "api");
|
105
|
code2name.put("datasetsbyproject", "datasetsbyproject");
|
106
|
code2name.put("file", "file");
|
107
|
code2name.put("files_from_metadata", "files_from_metadata");
|
108
|
code2name.put("ftp", "ftp");
|
109
|
code2name.put("http", "http");
|
110
|
code2name.put("jdbc", "jdbc");
|
111
|
code2name.put("other", "other");
|
112
|
code2name.put("rest", "rest");
|
113
|
code2name.put("soap", "soap");
|
114
|
code2name.put("0009", "External research report");
|
115
|
code2name.put("aggregator::pubsrepository::institutional", "Aggregator of Institutional Publication Repositories");
|
116
|
code2name.put("LSID", "LSID");
|
117
|
code2name.put("PURL", "PURL");
|
118
|
code2name.put("UPC", "UPC");
|
119
|
code2name.put("URL", "URL");
|
120
|
code2name.put("CK", "COOK ISLANDS");
|
121
|
code2name.put("CW", "CURAÇAO");
|
122
|
code2name.put("CG", "Congo");
|
123
|
code2name.put("HR", "Croatia");
|
124
|
code2name.put("CU", "Cuba");
|
125
|
code2name.put("CZ", "Czech Republic");
|
126
|
code2name.put("DK", "Denmark");
|
127
|
code2name.put("ER", "ERITREA");
|
128
|
code2name.put("VA", "HOLY SEE (VATICAN CITY STATE)");
|
129
|
code2name.put("FM", "MICRONESIA, FEDERATED STATES OF");
|
130
|
code2name.put("MN", "MONGOLIA");
|
131
|
code2name.put("MS", "MONTSERRAT");
|
132
|
code2name.put("MW", "Malawi");
|
133
|
code2name.put("MV", "Maldives");
|
134
|
code2name.put("MH", "Marshall Islands");
|
135
|
code2name.put("NA", "Namibia");
|
136
|
code2name.put("OC", "Oceania");
|
137
|
code2name.put("TK", "TOKELAU");
|
138
|
code2name.put("TC", "TURKS AND CAICOS ISLANDS");
|
139
|
code2name.put("TV", "TUVALU");
|
140
|
code2name.put("TW", "Taiwan");
|
141
|
code2name.put("TJ", "Tajikistan");
|
142
|
code2name.put("ECSEL-RIA", "ECSEL Research and Innovation Actions");
|
143
|
code2name.put("12MONTHS", "12 Months Embargo");
|
144
|
code2name.put("ace", "Achinese");
|
145
|
code2name.put("egy", "Ancient Egyptian");
|
146
|
code2name.put("nah", "Aztec");
|
147
|
code2name.put("bug", "Buginese");
|
148
|
code2name.put("chy", "Cheyenne");
|
149
|
code2name.put("cop", "Coptic");
|
150
|
code2name.put("crp", "Creoles and Pidgins");
|
151
|
code2name.put("dra", "Dravidian");
|
152
|
code2name.put("ipk", "Inupiaq");
|
153
|
code2name.put("mno", "Manobo");
|
154
|
code2name.put("men", "Mende");
|
155
|
code2name.put("BO", "Bolivia");
|
156
|
code2name.put("KM", "COMOROS");
|
157
|
code2name.put("FK", "FALKLAND ISLANDS (MALVINAS)");
|
158
|
code2name.put("TF", "FRENCH SOUTHERN TERRITORIES");
|
159
|
code2name.put("MO", "Macao");
|
160
|
code2name.put("TO", "TONGA");
|
161
|
code2name.put("jav/jaw", "Javanese");
|
162
|
code2name.put("ale", "Aleut");
|
163
|
code2name.put("asm", "Assamese");
|
164
|
code2name.put("ava", "Avaric");
|
165
|
code2name.put("ave", "Avestan");
|
166
|
code2name.put("bra", "Braj");
|
167
|
code2name.put("bua", "Buriat");
|
168
|
code2name.put("chr", "Cherokee");
|
169
|
code2name.put("chn", "Chinook jargon");
|
170
|
code2name.put("cho", "Choctaw");
|
171
|
code2name.put("mus", "Creek");
|
172
|
code2name.put("div", "Divehi");
|
173
|
code2name.put("dua", "Duala");
|
174
|
code2name.put("fre/fra", "French");
|
175
|
code2name.put("sve/swe", "Swedish");
|
176
|
code2name.put("dut/nld", "Dutch; Flemish");
|
177
|
code2name.put("ewo", "Ewondo");
|
178
|
code2name.put("jrb", "Judeo-Arabic");
|
179
|
code2name.put("jpr", "Judeo-Persian");
|
180
|
code2name.put("kab", "Kabyle");
|
181
|
code2name.put("mwr", "Marwari");
|
182
|
code2name.put("mun", "Munda");
|
183
|
code2name.put("fro", "Old French");
|
184
|
code2name.put("osa", "Osage");
|
185
|
code2name.put("oss", "Ossetian; Ossetic");
|
186
|
code2name.put("oto", "Otomian");
|
187
|
code2name.put("syr", "Syriac");
|
188
|
code2name.put("tgl", "Tagalog");
|
189
|
code2name.put("ter", "Tereno");
|
190
|
code2name.put("file::EuropePMC", "file::EuropePMC");
|
191
|
code2name.put("nsf:fieldOfApplication", "Field of Application (NSF)");
|
192
|
code2name.put("fileGzip", "fileGzip");
|
193
|
code2name.put("httpCSV", "httpCSV");
|
194
|
code2name.put("oai_sets", "oai_sets");
|
195
|
code2name.put("0018", "Annotation");
|
196
|
code2name.put("0021", "Dataset");
|
197
|
code2name.put("0019", "Patent");
|
198
|
code2name.put("aggregator::datarepository", "Aggregator of Data Repositories");
|
199
|
code2name.put("aggregator::pubsrepository::journals", "Aggregator/Publisher of Journals");
|
200
|
code2name.put("ISSN", "ISSN");
|
201
|
code2name.put("ISTC", "ISTC");
|
202
|
code2name.put("FR", "France");
|
203
|
code2name.put("HT", "Haiti");
|
204
|
code2name.put("HN", "Honduras");
|
205
|
code2name.put("HK", "Hong Kong");
|
206
|
code2name.put("KP", "KOREA, DEMOCRATIC PEOPLE'S REPUBLIC OF");
|
207
|
code2name.put("KZ", "Kazakhstan");
|
208
|
code2name.put("KE", "Kenya");
|
209
|
code2name.put("KR", "Korea (Republic of)");
|
210
|
code2name.put("NL", "Netherlands");
|
211
|
code2name.put("PT", "Portugal");
|
212
|
code2name.put("QA", "Qatar");
|
213
|
code2name.put("RO", "Romania");
|
214
|
code2name.put("RU", "Russian Federation");
|
215
|
code2name.put("TZ", "Tanzania (United Republic of)");
|
216
|
code2name.put("TH", "Thailand");
|
217
|
code2name.put("TG", "Togo");
|
218
|
code2name.put("MSCA-COFUND-DP", "Doctoral programmes");
|
219
|
code2name.put("IA", "Innovation action");
|
220
|
code2name.put("ERC-STG", "Starting Grant");
|
221
|
code2name.put("CIP-EIP-TN", "CIP-Eco-Innovation - CIP-Thematic Network");
|
222
|
code2name.put("ach", "Acoli");
|
223
|
code2name.put("ada", "Adangme");
|
224
|
code2name.put("afh", "Afrihili");
|
225
|
code2name.put("afr", "Afrikaans");
|
226
|
code2name.put("afa", "Afro-Asiatic");
|
227
|
code2name.put("alg", "Algonquian languages");
|
228
|
code2name.put("arw", "Arawak");
|
229
|
code2name.put("dyu", "Dyula");
|
230
|
code2name.put("kac", "Kachin");
|
231
|
code2name.put("kaa", "Kara-Kalpak");
|
232
|
code2name.put("loz", "Lozi");
|
233
|
code2name.put("myn", "Mayan");
|
234
|
code2name.put("dum", "Middle Dutch");
|
235
|
code2name.put("nde", "Ndebele, North");
|
236
|
code2name.put("ndo", "Ndonga");
|
237
|
code2name.put("ota", "Ottoman");
|
238
|
code2name.put("fas/per", "Persian");
|
239
|
code2name.put("HU", "Hungary");
|
240
|
code2name.put("IQ", "IRAQ");
|
241
|
code2name.put("IS", "Iceland");
|
242
|
code2name.put("RW", "Rwanda");
|
243
|
code2name.put("RE", "RÉUNION");
|
244
|
code2name.put("BL", "SAINT BARTHÉLEMY");
|
245
|
code2name.put("bas", "Basa");
|
246
|
code2name.put("eka", "Ekajuk");
|
247
|
code2name.put("elx", "Elamite");
|
248
|
code2name.put("eng", "English");
|
249
|
code2name.put("fat", "Fanti");
|
250
|
code2name.put("kha", "Khasi");
|
251
|
code2name.put("khi", "Khoisan");
|
252
|
code2name.put("tib/bod", "Tibetan");
|
253
|
code2name.put("kho", "Khotanese");
|
254
|
code2name.put("pal", "Pahlavi");
|
255
|
code2name.put("pau", "Palauan");
|
256
|
code2name.put("pam", "Pampanga");
|
257
|
code2name.put("pag", "Pangasinan");
|
258
|
code2name.put("pap", "Papiamento");
|
259
|
code2name.put("phn", "Phoenician");
|
260
|
code2name.put("pus", "Pushto");
|
261
|
code2name.put("sux", "Sumerian");
|
262
|
code2name.put("tmh", "Tamashek");
|
263
|
code2name.put("tha", "Thai");
|
264
|
code2name.put("tig", "Tigre");
|
265
|
code2name.put("tir", "Tigrinya");
|
266
|
code2name.put("tem", "Timne");
|
267
|
code2name.put("tiv", "Tivi");
|
268
|
code2name.put("uga", "Ugaritic");
|
269
|
code2name.put("wos", "Web of Science Subject Areas");
|
270
|
code2name.put("ec:frameworkprogram", "frameworkprogram");
|
271
|
code2name.put("ec:program", "program");
|
272
|
code2name.put("ec:specificprogram", "specificprogram");
|
273
|
code2name.put("collection", "collection");
|
274
|
code2name.put("hostedBy", "not available");
|
275
|
code2name.put("0022", "Collection");
|
276
|
code2name.put("0023", "Event");
|
277
|
code2name.put("0024", "Film");
|
278
|
code2name.put("0025", "Image");
|
279
|
code2name.put("0026", "InteractiveResource");
|
280
|
code2name.put("0027", "Model");
|
281
|
code2name.put("pubscatalogue::unknown", "Publication Catalogue");
|
282
|
code2name.put("GD", "GRENADA");
|
283
|
code2name.put("GP", "GUADELOUPE");
|
284
|
code2name.put("GU", "GUAM");
|
285
|
code2name.put("GG", "GUERNSEY");
|
286
|
code2name.put("GA", "Gabon");
|
287
|
code2name.put("GL", "Greenland");
|
288
|
code2name.put("GT", "Guatemala");
|
289
|
code2name.put("GY", "Guyana");
|
290
|
code2name.put("JP", "Japan");
|
291
|
code2name.put("JO", "Jordan");
|
292
|
code2name.put("KI", "KIRIBATI");
|
293
|
code2name.put("XK", "Kosovo * UN resolution");
|
294
|
code2name.put("KG", "Kyrgyzstan");
|
295
|
code2name.put("LR", "LIBERIA");
|
296
|
code2name.put("LV", "Latvia");
|
297
|
code2name.put("LS", "Lesotho");
|
298
|
code2name.put("PW", "PALAU");
|
299
|
code2name.put("TN", "Tunisia");
|
300
|
code2name.put("UK", "United Kingdom");
|
301
|
code2name.put("VU", "VANUATU");
|
302
|
code2name.put("VI", "VIRGIN ISLANDS, U.S.");
|
303
|
code2name.put("WF", "WALLIS AND FUTUNA");
|
304
|
code2name.put("driver-openaire2.0", "OpenAIRE 2.0+ (DRIVER OA, EC funding)");
|
305
|
code2name.put("CSA-LS", "CSA Lump sum");
|
306
|
code2name.put("MSCA-ITN-EID", "European Industrial Doctorates");
|
307
|
code2name.put("MSCA-ITN-EJD", "European Joint Doctorates");
|
308
|
code2name.put("FCH2-RIA", "FCH2 Research and Innovation action");
|
309
|
code2name.put("MSCA-IF-GF", "Global Fellowships");
|
310
|
code2name.put("MSCA-RISE", "RISE");
|
311
|
code2name.put("MSCA-IF-EF-ST", "Standard EF");
|
312
|
code2name.put("tut", "Altaic");
|
313
|
code2name.put("awa", "Awadhi");
|
314
|
code2name.put("ban", "Balinese");
|
315
|
code2name.put("bal", "Baluchi");
|
316
|
code2name.put("bai", "Bamileke");
|
317
|
code2name.put("bad", "Banda");
|
318
|
code2name.put("ben", "Bengali");
|
319
|
code2name.put("ber", "Berber");
|
320
|
code2name.put("dak", "Dakota");
|
321
|
code2name.put("del", "Delaware");
|
322
|
code2name.put("paa", "Papuan-Australian");
|
323
|
code2name.put("tli", "Tlingit");
|
324
|
code2name.put("tog", "Tonga(Nyasa)");
|
325
|
code2name.put("tru", "Truk");
|
326
|
code2name.put("bak", "Bashkir");
|
327
|
code2name.put("bul", "Bulgarian");
|
328
|
code2name.put("cor", "Cornish");
|
329
|
code2name.put("est", "Estonian");
|
330
|
code2name.put("fao", "Faroese");
|
331
|
code2name.put("gil", "Gilbertese");
|
332
|
code2name.put("got", "Gothic");
|
333
|
code2name.put("grb", "Grebo");
|
334
|
code2name.put("hat", "Haitian; Haitian Creole");
|
335
|
code2name.put("hau", "Hausa");
|
336
|
code2name.put("isRelatedTo", "isRelatedTo");
|
337
|
code2name.put("coauthor", "coauthor");
|
338
|
code2name.put("haw", "Hawaiian");
|
339
|
code2name.put("gai/iri", "Irish");
|
340
|
code2name.put("kar", "Karen");
|
341
|
code2name.put("lui", "Luiseno");
|
342
|
code2name.put("goh", "Old High German");
|
343
|
code2name.put("raj", "Rajasthani");
|
344
|
code2name.put("rar", "Rarotongan");
|
345
|
code2name.put("roa", "Romance");
|
346
|
code2name.put("ron/rum", "Romanian");
|
347
|
code2name.put("sal", "Salishan");
|
348
|
code2name.put("sad", "Sandawe");
|
349
|
code2name.put("bla", "Siksika");
|
350
|
code2name.put("bod/tib", "Tibetan");
|
351
|
code2name.put("umb", "Umbundu");
|
352
|
code2name.put("vai", "Vai");
|
353
|
code2name.put("vie", "Vietnamese");
|
354
|
code2name.put("vol", "Volapük");
|
355
|
code2name.put("DFG", "DFG Classification");
|
356
|
code2name.put("ddc", "Dewey Decimal Classification");
|
357
|
code2name.put("mesheuropmc", "Medical Subject Headings");
|
358
|
code2name.put("dnet:od_subjects", "OpenDOAR subjects");
|
359
|
code2name.put("event", "event");
|
360
|
code2name.put("image", "image");
|
361
|
code2name.put("interactiveResource", "interactiveResource");
|
362
|
code2name.put("IsCitedBy", "IsCitedBy");
|
363
|
code2name.put("0028", "Service");
|
364
|
code2name.put("0029", "Software");
|
365
|
code2name.put("0030", "Sound");
|
366
|
code2name.put("AG", "ANTIGUA AND BARBUDA");
|
367
|
code2name.put("IsPartOf", "IsPartOf");
|
368
|
code2name.put("IsReferencedBy", "IsReferencedBy");
|
369
|
code2name.put("References", "References");
|
370
|
code2name.put("AW", "ARUBA");
|
371
|
code2name.put("GR", "Greece");
|
372
|
code2name.put("HM", "HEARD ISLAND AND MCDONALD ISLANDS");
|
373
|
code2name.put("IM", "ISLE OF MAN");
|
374
|
code2name.put("IE", "Ireland");
|
375
|
code2name.put("IT", "Italy");
|
376
|
code2name.put("LY", "Libyan Arab Jamahiriya");
|
377
|
code2name.put("JE", "JERSEY");
|
378
|
code2name.put("JM", "Jamaica");
|
379
|
code2name.put("LI", "Liechtenstein");
|
380
|
code2name.put("LT", "Lithuania");
|
381
|
code2name.put("LU", "Luxembourg");
|
382
|
code2name.put("MQ", "MARTINIQUE");
|
383
|
code2name.put("AE", "United Arab Emirates");
|
384
|
code2name.put("VE", "Venezuela");
|
385
|
code2name.put("BOA/Task Order", "BOA/Task Order");
|
386
|
code2name.put("Continuing grant", "Continuing grant");
|
387
|
code2name.put("Contract", "Contract");
|
388
|
code2name.put("Contract Interagency Agreement", "Contract Interagency Agreement");
|
389
|
code2name.put("Cooperative Agreement", "Cooperative Agreement");
|
390
|
code2name.put("Fellowship", "Fellowship");
|
391
|
code2name.put("amh", "Amharic");
|
392
|
code2name.put("map", "Austronesian");
|
393
|
code2name.put("aym", "Aymara");
|
394
|
code2name.put("bnt", "Bantu");
|
395
|
code2name.put("bho", "Bhojpuri");
|
396
|
code2name.put("bik", "Bikol");
|
397
|
code2name.put("ell/gre", "Greek");
|
398
|
code2name.put("heb", "Hebrew");
|
399
|
code2name.put("rom", "Romany");
|
400
|
code2name.put("vot", "Votic");
|
401
|
code2name.put("model", "model");
|
402
|
code2name.put("ARK", "ARK");
|
403
|
code2name.put("DOI", "DOI");
|
404
|
code2name.put("MR", "MAURITANIA");
|
405
|
code2name.put("Fixed Price Award", "Fixed Price Award");
|
406
|
code2name.put("rus", "Russian");
|
407
|
code2name.put("YT", "MAYOTTE");
|
408
|
code2name.put("bis", "Bislama");
|
409
|
code2name.put("cha", "Chamorro");
|
410
|
code2name.put("che", "Chechen");
|
411
|
code2name.put("scr/hrv", "Croatian");
|
412
|
code2name.put("cos", "Corsican");
|
413
|
code2name.put("kik", "Gikuyu; Kikuyu");
|
414
|
code2name.put("gre/ell", "Greek, Modern (1453-)");
|
415
|
code2name.put("her", "Herero");
|
416
|
code2name.put("hil", "Hiligaynon");
|
417
|
code2name.put("hmo", "Hiri Motu");
|
418
|
code2name.put("ger/deu", "German");
|
419
|
code2name.put("hun", "Hungarian");
|
420
|
code2name.put("iku", "Inuktitut");
|
421
|
code2name.put("ita", "Italian");
|
422
|
code2name.put("jav", "Javanese");
|
423
|
code2name.put("gle", "Irish");
|
424
|
code2name.put("kaz", "Kazakh");
|
425
|
code2name.put("mac/mkd", "Macedonian");
|
426
|
code2name.put("scc/srp", "Serbian");
|
427
|
code2name.put("khm", "Khmer");
|
428
|
code2name.put("ori", "Oriya");
|
429
|
code2name.put("orm", "Oromo");
|
430
|
code2name.put("pan", "Panjabi; Punjabi");
|
431
|
code2name.put("pol", "Polish");
|
432
|
code2name.put("que", "Quechua");
|
433
|
code2name.put("smo", "Samoan");
|
434
|
code2name.put("sna", "Shona");
|
435
|
code2name.put("iii", "Sichuan Yi");
|
436
|
code2name.put("snd", "Sindhi");
|
437
|
code2name.put("som", "Somali");
|
438
|
code2name.put("esl/spa", "Spanish");
|
439
|
code2name.put("sun", "Sundanese");
|
440
|
code2name.put("sus", "Susu");
|
441
|
code2name.put("swa", "Swahili");
|
442
|
code2name.put("swe", "Swedish");
|
443
|
code2name.put("ton", "Tonga (Tonga Islands)");
|
444
|
code2name.put("tso", "Tsonga");
|
445
|
code2name.put("tsn", "Tswana");
|
446
|
code2name.put("tur", "Turkish");
|
447
|
code2name.put("tuk", "Turkmen");
|
448
|
code2name.put("und", "Undetermined");
|
449
|
code2name.put("pubsrepository::mock", "Mock Publication Repository");
|
450
|
code2name.put("scholarcomminfra", "Scholarly Communication Infrastructure");
|
451
|
code2name.put("pubsrepository::thematic", "Thematic Publication Repository");
|
452
|
code2name.put("IsNewVersionOf", "IsNewVersionOf");
|
453
|
code2name.put("AF", "AFGHANISTAN");
|
454
|
code2name.put("AS", "AMERICAN SAMOA");
|
455
|
code2name.put("AZ", "Azerbaijan");
|
456
|
code2name.put("BI", "Burundi");
|
457
|
code2name.put("CM", "Cameroon");
|
458
|
code2name.put("CD", "Congo (Democratic Republic of)");
|
459
|
code2name.put("CR", "Costa Rica");
|
460
|
code2name.put("CI", "Cote d'Ivoire");
|
461
|
code2name.put("EU", "European Union");
|
462
|
code2name.put("FJ", "Fiji");
|
463
|
code2name.put("GF", "French Guiana");
|
464
|
code2name.put("GM", "Gambia");
|
465
|
code2name.put("GW", "Guinea-Bissau");
|
466
|
code2name.put("KW", "Kuwait");
|
467
|
code2name.put("LA", "Lao (People's Democratic Republic)");
|
468
|
code2name.put("VG", "Virgin Islands (British)");
|
469
|
code2name.put("EH", "WESTERN SAHARA");
|
470
|
code2name.put("endDate", "endDate");
|
471
|
code2name.put("issued", "issued");
|
472
|
code2name.put("startDate", "startDate");
|
473
|
code2name.put("submitted", "submitted");
|
474
|
code2name.put("fct:hasParentFunding", "fct:hasParentFunding");
|
475
|
code2name.put("FCH2-CSA", "Coordination & support action");
|
476
|
code2name.put("Standard Grant", "Standard Grant");
|
477
|
code2name.put("abk", "Abkhazian");
|
478
|
code2name.put("aar", "Afar");
|
479
|
code2name.put("arg", "Aragonese");
|
480
|
code2name.put("aze", "Azerbaijani");
|
481
|
code2name.put("bam", "Bambara");
|
482
|
code2name.put("baq/eus", "Basque");
|
483
|
code2name.put("bih", "Bihari");
|
484
|
code2name.put("cat", "Catalan; Valencian");
|
485
|
code2name.put("ice/isl", "Icelandic");
|
486
|
code2name.put("pli", "Pali");
|
487
|
code2name.put("per/fas", "Persian");
|
488
|
code2name.put("bos", "Bosnian");
|
489
|
code2name.put("jpn", "Japanese");
|
490
|
code2name.put("kin", "Kinyarwanda");
|
491
|
code2name.put("websource", "Other Source");
|
492
|
code2name.put("kom", "Komi");
|
493
|
code2name.put("new", "Newari");
|
494
|
code2name.put("twi", "Twi");
|
495
|
code2name.put("ukr", "Ukrainian");
|
496
|
code2name.put("urd", "Urdu");
|
497
|
code2name.put("BS", "BAHAMAS");
|
498
|
code2name.put("providedBy", "provided by");
|
499
|
code2name.put("dataset_dataset", "dataset_dataset");
|
500
|
code2name.put("publication_dataset", "publication_dataset");
|
501
|
code2name.put("publication_publication", "publication_publication");
|
502
|
code2name.put("coordinator", "coordinator");
|
503
|
code2name.put("participant", "participant");
|
504
|
code2name.put("subcontractor", "subcontractor");
|
505
|
code2name.put("principal investigating", "principal investigating");
|
506
|
code2name.put("exploitation", "exploitation");
|
507
|
code2name.put("uzb", "Uzbek");
|
508
|
code2name.put("author", "author");
|
509
|
code2name.put("isResultOf", "isResultOf");
|
510
|
code2name.put("rum/ron", "Romanian");
|
511
|
code2name.put("doi", "doi");
|
512
|
code2name.put("datasetsbyjournal", "datasetsbyjournal");
|
513
|
code2name.put("film", "film");
|
514
|
code2name.put("result", "result");
|
515
|
code2name.put("0011", "Internal report");
|
516
|
code2name.put("0010", "Lecture");
|
517
|
code2name.put("0012", "Newsletter");
|
518
|
code2name.put("roh", "Raeto-Romance");
|
519
|
code2name.put("0020", "Other");
|
520
|
code2name.put("pubsrepository::unknown", "Publication Repository");
|
521
|
code2name.put("AD", "ANDORRA");
|
522
|
code2name.put("AI", "ANGUILLA");
|
523
|
code2name.put("AQ", "ANTARCTICA");
|
524
|
code2name.put("BZ", "BELIZE");
|
525
|
code2name.put("0013", "Part of book or chapter of book");
|
526
|
code2name.put("BM", "BERMUDA");
|
527
|
code2name.put("BQ", "BONAIRE, SINT EUSTATIUS AND SABA");
|
528
|
code2name.put("BV", "BOUVET ISLAND");
|
529
|
code2name.put("KY", "CAYMAN ISLANDS");
|
530
|
code2name.put("TD", "CHAD");
|
531
|
code2name.put("CX", "CHRISTMAS ISLAND");
|
532
|
code2name.put("CC", "COCOS (KEELING) ISLANDS");
|
533
|
code2name.put("DJ", "DJIBOUTI");
|
534
|
code2name.put("DM", "DOMINICA");
|
535
|
code2name.put("GQ", "EQUATORIAL GUINEA");
|
536
|
code2name.put("MG", "Madagascar");
|
537
|
code2name.put("MY", "Malaysia");
|
538
|
code2name.put("ML", "Mali");
|
539
|
code2name.put("MT", "Malta");
|
540
|
code2name.put("NR", "NAURU");
|
541
|
code2name.put("NU", "NIUE");
|
542
|
code2name.put("PR", "PUERTO RICO");
|
543
|
code2name.put("PK", "Pakistan");
|
544
|
code2name.put("PA", "Panama");
|
545
|
code2name.put("PG", "Papua New Guinea");
|
546
|
code2name.put("PE", "Peru");
|
547
|
code2name.put("PH", "Philippines");
|
548
|
code2name.put("PL", "Poland");
|
549
|
code2name.put("GB", "UNITED KINGDOM");
|
550
|
code2name.put("ZM", "Zambia");
|
551
|
code2name.put("ECSEL-IA", "ECSEL Innovation Action");
|
552
|
code2name.put("user:claim:pid", "user:claim:pid");
|
553
|
code2name.put("EMBARGO", "Embargo");
|
554
|
code2name.put("OPEN", "Open Access");
|
555
|
code2name.put("OTHER", "Other");
|
556
|
code2name.put("RESTRICTED", "Restricted");
|
557
|
code2name.put("aka", "Akan");
|
558
|
code2name.put("bin", "Bini");
|
559
|
code2name.put("din", "Dinka");
|
560
|
code2name.put("PS", "Palestinian-administered areas");
|
561
|
code2name.put("hin", "Hindi");
|
562
|
code2name.put("ido", "Ido");
|
563
|
code2name.put("run", "Rundi");
|
564
|
code2name.put("sag", "Sango");
|
565
|
code2name.put("UM", "UNITED STATES MINOR OUTLYING ISLANDS");
|
566
|
code2name.put("ibo", "Igbo");
|
567
|
code2name.put("fij", "Fijian");
|
568
|
code2name.put("fin", "Finnish");
|
569
|
code2name.put("ind", "Indonesian");
|
570
|
code2name.put("ile", "Interlingue");
|
571
|
code2name.put("kam", "Kamba");
|
572
|
code2name.put("nic", "Niger-Kordofanian");
|
573
|
code2name.put("ssa", "Nilo-Saharan");
|
574
|
code2name.put("niu", "Niuean");
|
575
|
code2name.put("slo/slk", "Slovak");
|
576
|
code2name.put("non", "Norse");
|
577
|
code2name.put("nai", "North American Indian");
|
578
|
code2name.put("sme", "Northern Sami");
|
579
|
code2name.put("wel/cym", "Welsh");
|
580
|
code2name.put("KO", "Kosovo * UN resolution");
|
581
|
code2name.put("zun", "Zuni");
|
582
|
code2name.put("YU", "Yemen");
|
583
|
code2name.put("file::WoS", "file::WoS");
|
584
|
code2name.put("metadata", "metadata");
|
585
|
code2name.put("FCT", "Fundação para a Ciência e Tecnologia");
|
586
|
code2name.put("pmc", "pmc");
|
587
|
code2name.put("pmid", "pmid");
|
588
|
code2name.put("urn", "urn");
|
589
|
code2name.put("httpList", "httpList");
|
590
|
code2name.put("ftp2", "ftp2");
|
591
|
code2name.put("0033", "Audiovisual");
|
592
|
code2name.put("0031", "Data Paper");
|
593
|
code2name.put("0032", "Software Paper");
|
594
|
code2name.put("AO", "ANGOLA");
|
595
|
code2name.put("AL", "Albania");
|
596
|
code2name.put("DZ", "Algeria");
|
597
|
code2name.put("AR", "Argentina");
|
598
|
code2name.put("AM", "Armenia");
|
599
|
code2name.put("AU", "Australia");
|
600
|
code2name.put("AT", "Austria");
|
601
|
code2name.put("BR", "Brazil");
|
602
|
code2name.put("BF", "Burkina Faso");
|
603
|
code2name.put("KH", "Cambodia");
|
604
|
code2name.put("CA", "Canada");
|
605
|
code2name.put("CV", "Cape Verde");
|
606
|
code2name.put("CL", "Chile");
|
607
|
code2name.put("CO", "Colombia");
|
608
|
code2name.put("CY", "Cyprus");
|
609
|
code2name.put("SV", "El Salvador");
|
610
|
code2name.put("IL", "Israel");
|
611
|
code2name.put("SX", "SINT MAARTEN (DUTCH PART)");
|
612
|
code2name.put("SB", "SOLOMON ISLANDS");
|
613
|
code2name.put("SS", "SOUTH SUDAN");
|
614
|
code2name.put("SJ", "SVALBARD AND JAN MAYEN");
|
615
|
code2name.put("SD", "Sudan");
|
616
|
code2name.put("CH", "Switzerland");
|
617
|
code2name.put("SY", "Syrian Arab Republic");
|
618
|
code2name.put("ZW", "Zimbabwe");
|
619
|
code2name.put("ec:hasframeworkprogram", "hasframeworkprogram");
|
620
|
code2name.put("ec:hasprogram", "hasprogram");
|
621
|
code2name.put("ec:hasspecificprogram", "hasspecificprogram");
|
622
|
code2name.put("available", "available");
|
623
|
code2name.put("FCH2-IA", "FCH2 Innovation action");
|
624
|
code2name.put("akk", "Akkadian");
|
625
|
code2name.put("alb/sqi", "Albanian");
|
626
|
code2name.put("arm/hye", "Armenian");
|
627
|
code2name.put("ath", "Athapascan");
|
628
|
code2name.put("bat", "Baltic");
|
629
|
code2name.put("bel", "Belarusian");
|
630
|
code2name.put("bem", "Bemba");
|
631
|
code2name.put("bre", "Breton");
|
632
|
code2name.put("car", "Carib");
|
633
|
code2name.put("cau", "Caucasian");
|
634
|
code2name.put("ewe", "Ewe");
|
635
|
code2name.put("lub", "Luba-Katanga");
|
636
|
code2name.put("zul", "Zulu");
|
637
|
code2name.put("GS", "SOUTH GEORGIA AND THE SOUTH SANDWICH ISLANDS");
|
638
|
code2name.put("SM", "San Marino");
|
639
|
code2name.put("TT", "Trinidad and Tobago");
|
640
|
code2name.put("TR", "Turkey");
|
641
|
code2name.put("TM", "Turkmenistan");
|
642
|
code2name.put("UY", "Uruguay");
|
643
|
code2name.put("bej", "Beja");
|
644
|
code2name.put("fan", "Fang");
|
645
|
code2name.put("BH", "Bahrain");
|
646
|
code2name.put("BW", "Botswana");
|
647
|
code2name.put("UG", "Uganda");
|
648
|
code2name.put("hup", "Hupa");
|
649
|
code2name.put("iba", "Iban");
|
650
|
code2name.put("ijo", "Ijo");
|
651
|
code2name.put("kan", "Kannada");
|
652
|
code2name.put("kau", "Kanuri");
|
653
|
code2name.put("mul", "Multiple languages");
|
654
|
code2name.put("ven", "Venda");
|
655
|
code2name.put("wal", "Walamo");
|
656
|
code2name.put("wln", "Walloon");
|
657
|
code2name.put("war", "Waray");
|
658
|
code2name.put("orcidworkid", "orcid workid");
|
659
|
code2name.put("NetCDF", "NetCDF");
|
660
|
code2name.put("fileCSV", "fileCSV");
|
661
|
code2name.put("files_by_rpc", "files_by_rpc");
|
662
|
code2name.put("files_from_mdstore", "files_from_mdstore");
|
663
|
code2name.put("filesystem", "filesystem");
|
664
|
code2name.put("text", "text");
|
665
|
code2name.put("infospace", "Information Space");
|
666
|
code2name.put("pubsrepository::institutional", "Institutional Publication Repository");
|
667
|
code2name.put("BT", "BHUTAN");
|
668
|
code2name.put("BD", "Bangladesh");
|
669
|
code2name.put("BB", "Barbados");
|
670
|
code2name.put("BY", "Belarus");
|
671
|
code2name.put("BA", "Bosnia and Herzegovina");
|
672
|
code2name.put("BE", "Belgium");
|
673
|
code2name.put("CF", "Central African Republic");
|
674
|
code2name.put("CN", "China (People's Republic of)");
|
675
|
code2name.put("DO", "Dominican Republic");
|
676
|
code2name.put("EC", "Ecuador");
|
677
|
code2name.put("EG", "Egypt");
|
678
|
code2name.put("EE", "Estonia");
|
679
|
code2name.put("ET", "Ethiopia");
|
680
|
code2name.put("MU", "Mauritius");
|
681
|
code2name.put("MX", "Mexico");
|
682
|
code2name.put("MD", "Moldova (Republic of)");
|
683
|
code2name.put("ME", "Montenegro");
|
684
|
code2name.put("AN", "Netherlands Antilles");
|
685
|
code2name.put("NC", "New Caledonia");
|
686
|
code2name.put("KN", "SAINT KITTS AND NEVIS");
|
687
|
code2name.put("LC", "SAINT LUCIA");
|
688
|
code2name.put("RS", "Serbia");
|
689
|
code2name.put("SC", "Seychelles");
|
690
|
code2name.put("SG", "Singapore");
|
691
|
code2name.put("SK", "Slovakia");
|
692
|
code2name.put("SI", "Slovenia");
|
693
|
code2name.put("ZA", "South Africa");
|
694
|
code2name.put("LK", "Sri Lanka");
|
695
|
code2name.put("MC", "Support for training and career development of researchers (Marie Curie)");
|
696
|
code2name.put("SR", "Suriname");
|
697
|
code2name.put("SZ", "Swaziland");
|
698
|
code2name.put("SE", "Sweden");
|
699
|
code2name.put("UA", "Ukraine");
|
700
|
code2name.put("chb", "Chibcha");
|
701
|
code2name.put("US", "United States");
|
702
|
code2name.put("UZ", "Uzbekistan");
|
703
|
code2name.put("VN", "Viet Nam");
|
704
|
code2name.put("YE", "Yemen");
|
705
|
code2name.put("ilo", "Iloko");
|
706
|
code2name.put("ceb", "Cebuano");
|
707
|
code2name.put("cel", "Celtic");
|
708
|
code2name.put("files", "files");
|
709
|
code2name.put("chg", "Chagatai");
|
710
|
code2name.put("chi/zho", "Chinese");
|
711
|
code2name.put("ces/cze", "Czech");
|
712
|
code2name.put("guj", "Gujarati");
|
713
|
code2name.put("him", "Himachali");
|
714
|
code2name.put("inc", "Indic");
|
715
|
code2name.put("wak", "Wakashan");
|
716
|
code2name.put("file::PDF", "file::PDF");
|
717
|
code2name.put("datarepository::unknown", "Data Repository");
|
718
|
code2name.put("entityregistry", "Entity Registry");
|
719
|
code2name.put("BJ", "Benin");
|
720
|
code2name.put("FO", "Faroe Islands");
|
721
|
code2name.put("MA", "Morocco");
|
722
|
code2name.put("SO", "Somalia");
|
723
|
code2name.put("TL", "TIMOR-LESTE");
|
724
|
code2name.put("BN", "Brunei Darussalam");
|
725
|
code2name.put("BG", "Bulgaria");
|
726
|
code2name.put("bur/mya", "Burmese");
|
727
|
code2name.put("chu", "Church Slavic; Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic");
|
728
|
code2name.put("chv", "Chuvash");
|
729
|
code2name.put("hrv", "Croatian");
|
730
|
code2name.put("kal", "Greenlandic; Kalaallisut");
|
731
|
code2name.put("kua", "Kuanyama; Kwanyama");
|
732
|
code2name.put("kum", "Kumyk");
|
733
|
code2name.put("kru", "Kurukh");
|
734
|
code2name.put("lin", "Lingala");
|
735
|
code2name.put("lit", "Lithuanian");
|
736
|
code2name.put("mak", "Makasar");
|
737
|
code2name.put("mlt", "Maltese");
|
738
|
code2name.put("nno", "Norwegian Nynorsk; Nynorsk, Norwegian");
|
739
|
code2name.put("peo", "Persian, Old (ca 600 - 400 B.C.)");
|
740
|
code2name.put("pon", "Ponape");
|
741
|
code2name.put("por", "Portuguese");
|
742
|
code2name.put("pra", "Prakrit");
|
743
|
code2name.put("pro", "Provencal");
|
744
|
code2name.put("san", "Sanskrit");
|
745
|
code2name.put("uig", "Uighur; Uyghur");
|
746
|
code2name.put("was", "Washo");
|
747
|
code2name.put("GE", "Georgia");
|
748
|
code2name.put("cym/wel", "Welsh");
|
749
|
code2name.put("wol", "Wolof");
|
750
|
code2name.put("NZ", "New Zealand");
|
751
|
code2name.put("xho", "Xhosa");
|
752
|
code2name.put("file::hybrid", "file::hybrid");
|
753
|
code2name.put("orcid", "Open Researcher and Contributor ID");
|
754
|
code2name.put("MF", "SAINT MARTIN (FRENCH PART)");
|
755
|
code2name.put("0008", "Bachelor thesis");
|
756
|
code2name.put("0002", "Book");
|
757
|
code2name.put("0007", "Master thesis");
|
758
|
code2name.put("0000", "Unknown");
|
759
|
code2name.put("crissystem", "CRIS System");
|
760
|
code2name.put("GH", "Ghana");
|
761
|
code2name.put("GI", "Gibraltar");
|
762
|
code2name.put("LB", "Lebanon");
|
763
|
code2name.put("MZ", "Mozambique");
|
764
|
code2name.put("MM", "Myanmar");
|
765
|
code2name.put("NF", "NORFOLK ISLAND");
|
766
|
code2name.put("MP", "NORTHERN MARIANA ISLANDS");
|
767
|
code2name.put("NI", "Nicaragua");
|
768
|
code2name.put("NE", "Niger");
|
769
|
code2name.put("NG", "Nigeria");
|
770
|
code2name.put("PM", "SAINT PIERRE AND MIQUELON");
|
771
|
code2name.put("VC", "SAINT VINCENT AND THE GRENADINES");
|
772
|
code2name.put("0001", "Article");
|
773
|
code2name.put("WS", "SAMOA");
|
774
|
code2name.put("ST", "SAO TOME AND PRINCIPE");
|
775
|
code2name.put("SL", "SIERRA LEONE");
|
776
|
code2name.put("SN", "Senegal");
|
777
|
code2name.put("ES", "Spain");
|
778
|
code2name.put("aggregator::pubsrepository::unknown", "Aggregator of Publication Repositories");
|
779
|
code2name.put("ERC", "Support for frontier research (ERC)");
|
780
|
code2name.put("6MONTHS", "6 Months Embargo");
|
781
|
code2name.put("CLOSED", "Closed Access");
|
782
|
code2name.put("ina", "Auxiliary Language Association)");
|
783
|
code2name.put("cad", "Caddo");
|
784
|
code2name.put("cai", "Central American Indian");
|
785
|
code2name.put("cus", "Cushitic");
|
786
|
code2name.put("kus", "Kusaie");
|
787
|
code2name.put("GN", "Guinea");
|
788
|
code2name.put("kut", "Kutenai");
|
789
|
code2name.put("lad", "Ladino");
|
790
|
code2name.put("sah", "Yakut");
|
791
|
code2name.put("yor", "Yoruba");
|
792
|
code2name.put("zap", "Zapotec");
|
793
|
code2name.put("0004", "Conference object");
|
794
|
code2name.put("0005", "Contribution for newspaper or weekly magazine");
|
795
|
code2name.put("0006", "Doctoral thesis");
|
796
|
code2name.put("NP", "Nepal");
|
797
|
code2name.put("NO", "Norway");
|
798
|
code2name.put("OM", "Oman");
|
799
|
code2name.put("PY", "PARAGUAY");
|
800
|
code2name.put("PN", "PITCAIRN");
|
801
|
code2name.put("lah", "Lahnda");
|
802
|
code2name.put("zen", "Zenaga");
|
803
|
code2name.put("zha", "Zhuang; Chuang");
|
804
|
code2name.put("MSCA-IF-EF-CAR", "CAR – Career Restart panel");
|
805
|
code2name.put("COFUND-EJP", "COFUND (European Joint Programme)");
|
806
|
code2name.put("RIA", "Research and Innovation action");
|
807
|
code2name.put("ERC-COG", "Consolidator Grant");
|
808
|
code2name.put("wt:fundingStream", "Wellcome Trust: Funding Stream");
|
809
|
code2name.put("EAN13", "EAN13");
|
810
|
code2name.put("EISSN", "EISSN");
|
811
|
code2name.put("Handle", "Handle");
|
812
|
code2name.put("SME-1", "SME instrument phase 1");
|
813
|
code2name.put("SME-2", "SME instrument phase 2");
|
814
|
code2name.put("SGA-CSA", "Specific Grant agreement and Coordination and Support Action");
|
815
|
code2name.put("updated", "updated");
|
816
|
code2name.put("valid", "valid");
|
817
|
code2name.put("driver", "OpenAIRE Basic (DRIVER OA)");
|
818
|
code2name.put("notCompatible", "under validation");
|
819
|
code2name.put("openaire2.0", "OpenAIRE 2.0 (EC funding)");
|
820
|
code2name.put("openaire3.0", "OpenAIRE 3.0 (OA, funding)");
|
821
|
code2name.put("native", "proprietary");
|
822
|
code2name.put("BBI-IA-DEMO", "Bio-based Industries Innovation action - Demonstration");
|
823
|
code2name.put("ISBN", "ISBN");
|
824
|
code2name.put("nob", "Bokmål, Norwegian; Norwegian Bokmål");
|
825
|
code2name.put("Personnel Agreement", "Personnel Agreement");
|
826
|
code2name.put("PendingRepositoryResources", "Pending datasource");
|
827
|
code2name.put("RepositoryServiceResources", "Valid datasource");
|
828
|
code2name.put("LISSN", "LISSN");
|
829
|
code2name.put("sysimport:crosswalk:datasetarchive", "sysimport:crosswalk:datasetarchive");
|
830
|
code2name.put("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry");
|
831
|
code2name.put("ltz", "Letzeburgesch; Luxembourgish");
|
832
|
code2name.put("publication", "publication");
|
833
|
code2name.put("sysimport:crosswalk:aggregator", "sysimport:crosswalk:aggregator");
|
834
|
code2name.put("sysimport:crosswalk:cris", "sysimport:crosswalk:cris");
|
835
|
code2name.put("BBI-IA-FLAG", "Bio-based Industries Innovation action - Flagship");
|
836
|
code2name.put("mar", "Marathi");
|
837
|
code2name.put("BBI-RIA", "Bio-based Industries Research and Innovation action");
|
838
|
code2name.put("mas", "Masai");
|
839
|
code2name.put("MSCA-COFUND-FP", "Fellowship programmes");
|
840
|
code2name.put("enm", "Middle English");
|
841
|
code2name.put("frm", "Middle French");
|
842
|
code2name.put("Interagency Agreement", "Interagency Agreement");
|
843
|
code2name.put("ERC-POC", "Proof of Concept Grant");
|
844
|
code2name.put("URN", "URN");
|
845
|
code2name.put("AX", "ÅLAND ISLANDS");
|
846
|
code2name.put("MSCA-IF-EF-RI", "RI – Reintegration panel");
|
847
|
code2name.put("Intergovernmental Personnel Award", "Intergovernmental Personnel Award");
|
848
|
code2name.put("copyrighted", "copyrighted");
|
849
|
code2name.put("mis", "Miscellaneous");
|
850
|
code2name.put("moh", "Mohawk");
|
851
|
code2name.put("mol", "Moldavian");
|
852
|
code2name.put("mkh", "Mon-Kmer");
|
853
|
code2name.put("lol", "Mongo");
|
854
|
code2name.put("created", "created");
|
855
|
code2name.put("dataset", "dataset");
|
856
|
code2name.put("nya", "Chewa; Chichewa; Nyanja");
|
857
|
code2name.put("cre", "Cree");
|
858
|
code2name.put("dan", "Danish");
|
859
|
code2name.put("efi", "Efik");
|
860
|
code2name.put("esk", "Eskimo");
|
861
|
code2name.put("epo", "Esperanto");
|
862
|
code2name.put("fry", "Frisian");
|
863
|
code2name.put("gaa", "Ga");
|
864
|
code2name.put("gae/gdh", "Gaelic");
|
865
|
code2name.put("glg", "Galician");
|
866
|
code2name.put("lug", "Ganda");
|
867
|
code2name.put("gay", "Gayo");
|
868
|
code2name.put("lun", "Lunda");
|
869
|
code2name.put("gez", "Geez");
|
870
|
code2name.put("geo/kat", "Georgian");
|
871
|
code2name.put("iro", "Iroquoian");
|
872
|
code2name.put("kor", "Korean");
|
873
|
code2name.put("kpe", "Kpelle");
|
874
|
code2name.put("kro", "Kru");
|
875
|
code2name.put("kur", "Kurdish");
|
876
|
code2name.put("lam", "Lamba");
|
877
|
code2name.put("lao", "Lao");
|
878
|
code2name.put("luo", "Luo");
|
879
|
code2name.put("mac/mak", "Macedonian");
|
880
|
code2name.put("mon", "Mongolian");
|
881
|
code2name.put("mos", "Mossi");
|
882
|
code2name.put("nau", "Nauru");
|
883
|
code2name.put("nav", "Navajo; Navaho");
|
884
|
code2name.put("nbl", "Ndebele, South");
|
885
|
code2name.put("nep", "Nepali");
|
886
|
code2name.put("nor", "Norwegian");
|
887
|
code2name.put("nub", "Nubian");
|
888
|
code2name.put("nym", "Nyamwezi");
|
889
|
code2name.put("nyn", "Nyankole");
|
890
|
code2name.put("nyo", "Nyoro");
|
891
|
code2name.put("nzi", "Nzima");
|
892
|
code2name.put("oci", "Occitan (post 1500); Provençal");
|
893
|
code2name.put("oji", "Ojibwa");
|
894
|
code2name.put("tah", "Tahitian");
|
895
|
code2name.put("tum", "Tumbuka");
|
896
|
code2name.put("tyv", "Tuvinian");
|
897
|
code2name.put("sga", "old Irish");
|
898
|
code2name.put("physicalObject", "physicalObject");
|
899
|
code2name.put("service", "service");
|
900
|
code2name.put("software", "software");
|
901
|
code2name.put("sound", "sound");
|
902
|
code2name.put("0016", "Preprint");
|
903
|
code2name.put("0017", "Report");
|
904
|
code2name.put("0014", "Research");
|
905
|
code2name.put("0015", "Review");
|
906
|
code2name.put("pubsrepository::journal", "Journal Platform");
|
907
|
code2name.put("IsPreviousVersionOf", "IsPreviousVersionOf");
|
908
|
code2name.put("ERA-NET-Cofund", "ERA-NET Cofund");
|
909
|
code2name.put("fct:program", "fct:program");
|
910
|
code2name.put("ERC-ADG", "Advanced Grant");
|
911
|
code2name.put("ERC-LVG", "ERC low value grant");
|
912
|
code2name.put("MSCA-ITN-ETN", "European Training Networks");
|
913
|
code2name.put("sysimport:crosswalk:infospace", "sysimport:crosswalk:infospace");
|
914
|
code2name.put("sysimport:crosswalk:repository", "sysimport:crosswalk:repository");
|
915
|
code2name.put("sysimport:mining:aggregator", "sysimport:mining:aggregator");
|
916
|
code2name.put("user:claim:search", "user:claim:search");
|
917
|
code2name.put("user:insert", "user:insert");
|
918
|
code2name.put("171", "Article 171 of the Treaty");
|
919
|
code2name.put("BSG", "Research for the benefit of specific groups");
|
920
|
code2name.put("CP", "Collaborative project");
|
921
|
code2name.put("dzo", "Dzongkha");
|
922
|
code2name.put("deu/ger", "German");
|
923
|
code2name.put("gem", "Germanic");
|
924
|
code2name.put("lat", "Latin");
|
925
|
code2name.put("lav", "Latvian");
|
926
|
code2name.put("lez", "Lezghian");
|
927
|
code2name.put("tam", "Tamil");
|
928
|
code2name.put("tat", "Tatar");
|
929
|
code2name.put("tel", "Telugu");
|
930
|
code2name.put("tsi", "Tsimshian");
|
931
|
code2name.put("grn", "Guarani");
|
932
|
code2name.put("hai", "Haida");
|
933
|
code2name.put("lim", "Limburgan; Limburger; Limburgish");
|
934
|
code2name.put("sysimport:mining:entityregistry", "sysimport:mining:entityregistry");
|
935
|
code2name.put("sysimport:mining:infospace", "sysimport:mining:infospace");
|
936
|
code2name.put("sysimport:mining:repository", "sysimport:mining:repository");
|
937
|
code2name.put("CP-CSA", "Combination of CP & CSA");
|
938
|
code2name.put("NoE", "Network of Excellence");
|
939
|
code2name.put("alternative title", "alternative title");
|
940
|
code2name.put("main title", "main title");
|
941
|
code2name.put("subtitle", "subtitle");
|
942
|
code2name.put("translated title", "translated title");
|
943
|
code2name.put("wt:hasParentFunding", "wt:hasParentFunding");
|
944
|
code2name.put("grc", "Ancient Greek");
|
945
|
code2name.put("apa", "Apache");
|
946
|
code2name.put("ara", "Arabic");
|
947
|
code2name.put("arc", "Aramaic");
|
948
|
code2name.put("arp", "Arapaho");
|
949
|
code2name.put("arn", "Araucanian");
|
950
|
code2name.put("art", "Artificial");
|
951
|
code2name.put("son", "Songhai");
|
952
|
code2name.put("nso", "Sotho");
|
953
|
code2name.put("ec:h2020topics", "Horizon 2020 Topics");
|
954
|
code2name.put("IO", "BRITISH INDIAN OCEAN TERRITORY");
|
955
|
code2name.put("SH", "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA");
|
956
|
code2name.put("SA", "Saudi Arabia");
|
957
|
code2name.put("UNKNOWN", "UNKNOWN");
|
958
|
code2name.put("CSA", "Coordination and support action");
|
959
|
code2name.put("sysimport:mining:cris", "sysimport:mining:cris");
|
960
|
code2name.put("sysimport:mining:datasetarchive", "sysimport:mining:datasetarchive");
|
961
|
code2name.put("fiu", "Finno-Ugrian");
|
962
|
code2name.put("fon", "Fon");
|
963
|
code2name.put("fra/fre", "French");
|
964
|
code2name.put("cpf", "French-based Creoles and Pidgins");
|
965
|
code2name.put("ful", "Fulah");
|
966
|
code2name.put("gla", "Gaelic; Scottish Gaelic");
|
967
|
code2name.put("kas", "Kashmiri");
|
968
|
code2name.put("kaw", "Kawi");
|
969
|
code2name.put("kir", "Kirghiz");
|
970
|
code2name.put("kon", "Kongo");
|
971
|
code2name.put("kok", "Konkani");
|
972
|
code2name.put("ang", "Old English");
|
973
|
code2name.put("sam", "Samaritan");
|
974
|
code2name.put("srp", "Serbian");
|
975
|
code2name.put("scr", "Serbo-Croatian");
|
976
|
code2name.put("srr", "Serer");
|
977
|
code2name.put("shn", "Shan");
|
978
|
code2name.put("sin", "Sinhala; Sinhalese");
|
979
|
code2name.put("sla", "Slavic");
|
980
|
code2name.put("slk/slo", "Slovak");
|
981
|
code2name.put("slv", "Slovenian");
|
982
|
code2name.put("sog", "Sogdian");
|
983
|
code2name.put("wen", "Sorbian");
|
984
|
code2name.put("sot", "Sotho, Southern");
|
985
|
code2name.put("sai", "South American Indian");
|
986
|
code2name.put("spa", "Spanish; Castilian");
|
987
|
code2name.put("suk", "Sukuma");
|
988
|
code2name.put("arXiv", "arXiv");
|
989
|
code2name.put("OpenDAP", "OpenDAP");
|
990
|
code2name.put("oai", "oai");
|
991
|
code2name.put("re3data", "re3data");
|
992
|
code2name.put("sparql", "sparql");
|
993
|
code2name.put("sword", "sword");
|
994
|
code2name.put("PF", "FRENCH POLYNESIA");
|
995
|
code2name.put("FI", "Finland");
|
996
|
code2name.put("MK", "Former Yugoslav Republic of Macedonia");
|
997
|
code2name.put("DE", "Germany");
|
998
|
code2name.put("IN", "India");
|
999
|
code2name.put("ID", "Indonesia");
|
1000
|
code2name.put("IR", "Iran (Islamic Republic of)");
|
1001
|
code2name.put("mad", "Madurese");
|
1002
|
code2name.put("mag", "Magahi");
|
1003
|
code2name.put("mai", "Maithili");
|
1004
|
code2name.put("mlg", "Malagasy");
|
1005
|
code2name.put("may/msa", "Malay");
|
1006
|
code2name.put("mal", "Malayalam");
|
1007
|
code2name.put("man", "Mandingo");
|
1008
|
code2name.put("glv", "Manx");
|
1009
|
code2name.put("mao/mri", "Maori");
|
1010
|
code2name.put("chm", "Mari");
|
1011
|
code2name.put("mah", "Marshallese");
|
1012
|
code2name.put("gmh", "Middle High German");
|
1013
|
code2name.put("mga", "Middle Irish");
|
1014
|
|
1015
|
|
1016
|
}
|
1017
|
|
1018
|
protected static OafRel.Builder getRelBuilder(final RelType rType, final SubRelType subRelType, OafRel.Builder rel, final Builder subRel) {
|
1019
|
|
1020
|
switch(rType) {
|
1021
|
|
1022
|
case datasourceOrganization:
|
1023
|
return rel.setDatasourceOrganization(DatasourceOrganization.newBuilder().setProvision((Provision.Builder) subRel));
|
1024
|
case personResult:
|
1025
|
final PersonResult.Builder pr = PersonResult.newBuilder();
|
1026
|
switch (subRelType) {
|
1027
|
case authorship:
|
1028
|
return rel.setPersonResult(pr.setAuthorship((Authorship.Builder) subRel));
|
1029
|
case contribution:
|
1030
|
return rel.setPersonResult(pr.setContribution((Contribution.Builder) subRel));
|
1031
|
default:
|
1032
|
throw new IllegalArgumentException("invalid subRelType for result_person relations: " + subRelType.toString());
|
1033
|
}
|
1034
|
case projectOrganization:
|
1035
|
return rel.setProjectOrganization(ProjectOrganization.newBuilder().setParticipation((Participation.Builder) subRel));
|
1036
|
case projectPerson:
|
1037
|
return rel.setProjectPerson(ProjectPerson.newBuilder().setContactPerson(((ContactPerson.Builder) subRel)));
|
1038
|
case resultOrganization:
|
1039
|
return rel.setResultOrganization(ResultOrganization.newBuilder().setAffiliation((Affiliation.Builder) subRel));
|
1040
|
case resultProject:
|
1041
|
return rel.setResultProject(ResultProject.newBuilder().setOutcome((Outcome.Builder) subRel));
|
1042
|
case resultResult:
|
1043
|
final ResultResult.Builder rr = ResultResult.newBuilder();
|
1044
|
switch (subRelType) {
|
1045
|
|
1046
|
case similarity:
|
1047
|
return rel.setResultResult(rr.setSimilarity((Similarity.Builder) subRel));
|
1048
|
case publicationDataset:
|
1049
|
return rel.setResultResult(rr.setPublicationDataset((PublicationDataset.Builder) subRel));
|
1050
|
case dedup:
|
1051
|
return rel.setResultResult(rr.setDedup((Dedup.Builder) subRel));
|
1052
|
case dedupSimilarity:
|
1053
|
return rel.setResultResult(rr.setDedupSimilarity((DedupSimilarity.Builder) subRel));
|
1054
|
case supplement:
|
1055
|
return rel.setResultResult(rr.setSupplement((Supplement.Builder) subRel));
|
1056
|
case part:
|
1057
|
return rel.setResultResult(rr.setPart((Part.Builder) subRel));
|
1058
|
default:
|
1059
|
throw new IllegalArgumentException("invalid subRelType for result_result relations: " + subRelType.toString());
|
1060
|
}
|
1061
|
case personPerson:
|
1062
|
final PersonPerson.Builder pp = PersonPerson.newBuilder();
|
1063
|
switch (subRelType) {
|
1064
|
case dedup:
|
1065
|
return rel.setPersonPerson(pp.setDedup((Dedup.Builder) subRel));
|
1066
|
case dedupSimilarity:
|
1067
|
return rel.setPersonPerson(pp.setDedupSimilarity((DedupSimilarity.Builder) subRel));
|
1068
|
default:
|
1069
|
throw new IllegalArgumentException("invalid subRelType for person_person relations: " + subRelType.toString());
|
1070
|
}
|
1071
|
case organizationOrganization:
|
1072
|
final OrganizationOrganization.Builder oo = OrganizationOrganization.newBuilder();
|
1073
|
switch (subRelType) {
|
1074
|
case dedup:
|
1075
|
return rel.setOrganizationOrganization(oo.setDedup((Dedup.Builder) subRel));
|
1076
|
case dedupSimilarity:
|
1077
|
return rel.setOrganizationOrganization(oo.setDedupSimilarity((DedupSimilarity.Builder) subRel));
|
1078
|
default:
|
1079
|
throw new IllegalArgumentException("invalid subRelType for organization_organization relations: " + subRelType.toString());
|
1080
|
}
|
1081
|
}
|
1082
|
throw new IllegalArgumentException("invalid relation type " + rType.toString());
|
1083
|
}
|
1084
|
|
1085
|
protected static Builder getSubRelBuilder(final RelMetadata.Builder metadata, final SubRelType subRelType, final Map<String, String> params) {
|
1086
|
|
1087
|
switch (subRelType) {
|
1088
|
|
1089
|
case provision:
|
1090
|
return Provision.newBuilder().setRelMetadata(metadata);
|
1091
|
case coauthorship:
|
1092
|
return CoAuthorship.newBuilder().setRelMetadata(metadata);
|
1093
|
case authorship:
|
1094
|
return Authorship.newBuilder().setRelMetadata(metadata).setRanking("" + params.get("rank"));
|
1095
|
case contribution:
|
1096
|
return Contribution.newBuilder().setRelMetadata(metadata);
|
1097
|
case participation:
|
1098
|
return Participation.newBuilder().setRelMetadata(metadata);
|
1099
|
case contactPerson:
|
1100
|
return ContactPerson.newBuilder().setRelMetadata(metadata);
|
1101
|
case outcome:
|
1102
|
return Outcome.newBuilder().setRelMetadata(metadata);
|
1103
|
case similarity:
|
1104
|
return Similarity.newBuilder().setRelMetadata(metadata);
|
1105
|
case publicationDataset:
|
1106
|
return PublicationDataset.newBuilder().setRelMetadata(metadata);
|
1107
|
case affiliation:
|
1108
|
return Affiliation.newBuilder().setRelMetadata(metadata);
|
1109
|
case dedup:
|
1110
|
return Dedup.newBuilder().setRelMetadata(metadata);
|
1111
|
case dedupSimilarity:
|
1112
|
return DedupSimilarity.newBuilder().setRelMetadata(metadata);
|
1113
|
case supplement:
|
1114
|
return Supplement.newBuilder().setRelMetadata(metadata);
|
1115
|
case part:
|
1116
|
return Part.newBuilder().setRelMetadata(metadata);
|
1117
|
}
|
1118
|
throw new IllegalArgumentException("invalid relation type " + subRelType.toString());
|
1119
|
}
|
1120
|
|
1121
|
protected static String getVocabularyName(final RelType relType) {
|
1122
|
switch (relType) {
|
1123
|
|
1124
|
case datasourceOrganization:
|
1125
|
return "dnet:datasource_organization_relations";
|
1126
|
case personResult:
|
1127
|
return "dnet:person_result_relations";
|
1128
|
case projectOrganization:
|
1129
|
return "dnet:project_organization_relations";
|
1130
|
case projectPerson:
|
1131
|
return "dnet:project_person_relations";
|
1132
|
case resultOrganization:
|
1133
|
return "dnet:result_organization_relations";
|
1134
|
case resultProject:
|
1135
|
return "dnet:result_project_relations";
|
1136
|
case resultResult:
|
1137
|
return "dnet:result_result_relations";
|
1138
|
case personPerson:
|
1139
|
return "dnet:person_person_relations";
|
1140
|
case organizationOrganization:
|
1141
|
return "dnet:organization_organization_relations";
|
1142
|
}
|
1143
|
throw new IllegalArgumentException("invalid relation type " + relType.toString());
|
1144
|
}
|
1145
|
|
1146
|
|
1147
|
// Builder for Entities
|
1148
|
protected static Oaf getOaf(final OafEntity.Builder entity, final DataInfo.Builder info) {
|
1149
|
return _getOaf(Oaf.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build();
|
1150
|
}
|
1151
|
|
1152
|
// Builder for Rels
|
1153
|
protected static Oaf getOaf(final OafRel.Builder rel, final DataInfo.Builder info) {
|
1154
|
return _getOaf(Oaf.newBuilder(), info).setKind(Kind.relation).setRel(rel).build();
|
1155
|
}
|
1156
|
|
1157
|
private static Oaf.Builder _getOaf(final Oaf.Builder oaf, final DataInfo.Builder info) {
|
1158
|
return oaf.setDataInfo(ensureDataInfo(info)).setTimestamp(System.currentTimeMillis());
|
1159
|
}
|
1160
|
|
1161
|
protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) {
|
1162
|
if (info.isInitialized()) return info;
|
1163
|
return getDataInfo(null, "UNKNOWN", "0.9", false, false);
|
1164
|
}
|
1165
|
|
1166
|
protected static KeyValue getKV(final String id, final String name) {
|
1167
|
return KeyValue.newBuilder().setKey(id).setValue(name).build();
|
1168
|
}
|
1169
|
|
1170
|
protected static OafRel.Builder getRel(final String sourceId,
|
1171
|
final String targetId,
|
1172
|
final RelType relType,
|
1173
|
final SubRelType subRelType,
|
1174
|
final String relClass,
|
1175
|
final boolean isChild) {
|
1176
|
return OafRel.newBuilder().setSource(sourceId).setTarget(targetId).setRelType(relType).setSubRelType(subRelType).setRelClass(relClass)
|
1177
|
.setChild(isChild);
|
1178
|
}
|
1179
|
|
1180
|
protected static OafEntity.Builder getEntity(final Type type,
|
1181
|
final String id,
|
1182
|
final KeyValue collectedFrom,
|
1183
|
final List<String> originalIds,
|
1184
|
final String dateOfCollection,
|
1185
|
final List<StructuredProperty> pids) {
|
1186
|
final OafEntity.Builder builder = OafEntity.newBuilder().setType(type).setId(id).addCollectedfrom(collectedFrom).addAllOriginalId(originalIds)
|
1187
|
.setDateofcollection(dateOfCollection);
|
1188
|
|
1189
|
if ((pids != null) && !pids.isEmpty()) {
|
1190
|
builder.addAllPid(Iterables.filter(pids, Predicates.notNull()));
|
1191
|
}
|
1192
|
|
1193
|
return builder;
|
1194
|
}
|
1195
|
|
1196
|
public static DataInfo.Builder getDataInfo(final NodeList about,
|
1197
|
final String provenanceaction,
|
1198
|
final String trust,
|
1199
|
final boolean deletedbyinference,
|
1200
|
final boolean inferred) {
|
1201
|
|
1202
|
final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder();
|
1203
|
dataInfoBuilder.setInferred(Boolean.valueOf(inferred));
|
1204
|
dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(deletedbyinference));
|
1205
|
dataInfoBuilder.setTrust(trust);
|
1206
|
dataInfoBuilder.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
|
1207
|
|
1208
|
// checking instanceof because when receiving an empty <oaf:datainfo> we don't want to parse it.
|
1209
|
if (((about != null) && (about.getLength() > 0)) /* && (dataInfo instanceof org.w3c.dom.Element) */) {
|
1210
|
|
1211
|
final org.w3c.dom.Element dataInfoElement = getDirectChild((org.w3c.dom.Element) about.item(0), "datainfo");
|
1212
|
if (dataInfoElement != null) {
|
1213
|
org.w3c.dom.Element elem = getDirectChild(dataInfoElement, "inferred");
|
1214
|
dataInfoBuilder.setInferred(Boolean.valueOf(elem != null ? elem.getTextContent() : String.valueOf(inferred)));
|
1215
|
|
1216
|
elem = getDirectChild(dataInfoElement, "deletedbyinference");
|
1217
|
dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(elem != null ? elem.getTextContent() : String.valueOf(deletedbyinference)));
|
1218
|
|
1219
|
elem = getDirectChild(dataInfoElement, "trust");
|
1220
|
dataInfoBuilder.setTrust(elem != null ? elem.getTextContent() : trust);
|
1221
|
|
1222
|
elem = getDirectChild(dataInfoElement, "inferenceprovenance");
|
1223
|
dataInfoBuilder.setInferenceprovenance(elem != null ? elem.getTextContent() : "");
|
1224
|
|
1225
|
elem = getDirectChild(dataInfoElement, "provenanceaction");
|
1226
|
final Qualifier.Builder pBuilder = Qualifier.newBuilder();
|
1227
|
if (elem.hasAttributes()) {
|
1228
|
final NamedNodeMap attributes = elem.getAttributes();
|
1229
|
pBuilder.setClassid(attributes.getNamedItem("classid").getNodeValue());
|
1230
|
pBuilder.setClassname(attributes.getNamedItem("classname").getNodeValue());
|
1231
|
pBuilder.setSchemeid(attributes.getNamedItem("schemeid").getNodeValue());
|
1232
|
pBuilder.setSchemename(attributes.getNamedItem("schemename").getNodeValue());
|
1233
|
} else {
|
1234
|
pBuilder.mergeFrom(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build());
|
1235
|
}
|
1236
|
dataInfoBuilder.setProvenanceaction(pBuilder);
|
1237
|
}
|
1238
|
}
|
1239
|
|
1240
|
return dataInfoBuilder;
|
1241
|
}
|
1242
|
|
1243
|
protected static OAIProvenance getOAIProvenance(final NodeList about) {
|
1244
|
|
1245
|
OAIProvenance.Builder oaiProv = OAIProvenance.newBuilder();
|
1246
|
|
1247
|
if (((about != null) && (about.getLength() > 0))) {
|
1248
|
|
1249
|
final org.w3c.dom.Element provenance = getDirectChild((org.w3c.dom.Element) about.item(0), "provenance");
|
1250
|
|
1251
|
if (provenance != null) {
|
1252
|
final org.w3c.dom.Element origDesc = getDirectChild(provenance, "originDescription");
|
1253
|
oaiProv.setOriginDescription(buildOriginDescription(origDesc, OriginDescription.newBuilder()));
|
1254
|
}
|
1255
|
}
|
1256
|
|
1257
|
return oaiProv.build();
|
1258
|
}
|
1259
|
|
1260
|
private static OriginDescription buildOriginDescription(final org.w3c.dom.Element origDesc, final OriginDescription.Builder od) {
|
1261
|
od.setHarvestDate(origDesc.getAttribute("harvestDate")).setAltered(Boolean.valueOf(origDesc.getAttribute("altered")));
|
1262
|
|
1263
|
org.w3c.dom.Element elem = getDirectChild(origDesc, "baseURL");
|
1264
|
od.setBaseURL(elem != null ? elem.getTextContent() : "");
|
1265
|
|
1266
|
elem = getDirectChild(origDesc, "identifier");
|
1267
|
od.setIdentifier(elem != null ? elem.getTextContent() : "");
|
1268
|
|
1269
|
elem = getDirectChild(origDesc, "datestamp");
|
1270
|
od.setDatestamp(elem != null ? elem.getTextContent() : "");
|
1271
|
|
1272
|
elem = getDirectChild(origDesc, "metadataNamespace");
|
1273
|
od.setMetadataNamespace(elem != null ? elem.getTextContent() : "");
|
1274
|
|
1275
|
elem = getDirectChild(origDesc, "originDescription");
|
1276
|
|
1277
|
if (elem != null) {
|
1278
|
|
1279
|
od.setOriginDescription(buildOriginDescription(elem, OriginDescription.newBuilder()));
|
1280
|
}
|
1281
|
|
1282
|
return od.build();
|
1283
|
}
|
1284
|
|
1285
|
protected static org.w3c.dom.Element getDirectChild(final org.w3c.dom.Element parent, final String name) {
|
1286
|
for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {
|
1287
|
if ((child instanceof org.w3c.dom.Element) && name.equals(child.getLocalName())) return (org.w3c.dom.Element) child;
|
1288
|
}
|
1289
|
return null;
|
1290
|
}
|
1291
|
|
1292
|
protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) {
|
1293
|
return getQualifier(classname, classname, schemename, schemename);
|
1294
|
}
|
1295
|
|
1296
|
protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) {
|
1297
|
return getQualifier(classname.toString(), classname.toString(), schemename, schemename);
|
1298
|
}
|
1299
|
|
1300
|
protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) {
|
1301
|
return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename);
|
1302
|
}
|
1303
|
|
1304
|
protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List<String> fields) {
|
1305
|
if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) return null;
|
1306
|
|
1307
|
if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) {
|
1308
|
qualifier.setClassid(fields.get(0));
|
1309
|
qualifier.setClassname(getClassName(fields.get(0)));
|
1310
|
}
|
1311
|
return qualifier;
|
1312
|
}
|
1313
|
|
1314
|
protected static void addStructuredProps(final Message.Builder builder,
|
1315
|
final FieldDescriptor fd,
|
1316
|
final List<String> values,
|
1317
|
final String classid,
|
1318
|
final String schemeid) {
|
1319
|
if (values != null) {
|
1320
|
for (final String s : values) {
|
1321
|
addField(builder, fd, getStructuredProperty(s, classid, classid, schemeid, schemeid));
|
1322
|
}
|
1323
|
}
|
1324
|
}
|
1325
|
|
1326
|
protected static List<StructuredProperty> parsePids(final NodeList nodelist) {
|
1327
|
|
1328
|
final List<StructuredProperty> pids = Lists.newArrayList();
|
1329
|
|
1330
|
for (int i = 0; i < nodelist.getLength(); i++) {
|
1331
|
final Node node = nodelist.item(i);
|
1332
|
if ((node.getNodeType() == Node.ELEMENT_NODE) && node.getLocalName().toLowerCase().equals("identifier")) {
|
1333
|
|
1334
|
final Node pidType = node.getAttributes().getNamedItem("identifierType");
|
1335
|
|
1336
|
for (int j = 0; j < node.getChildNodes().getLength(); j++) {
|
1337
|
final Node child = node.getChildNodes().item(j);
|
1338
|
|
1339
|
if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty()
|
1340
|
&& !pidType.getNodeValue().equalsIgnoreCase("url")) {
|
1341
|
|
1342
|
final String type = pidType.getNodeValue().toLowerCase();
|
1343
|
|
1344
|
final String value = child.getTextContent();
|
1345
|
|
1346
|
pids.add(getStructuredProperty(value, type, getClassName(type), "dnet:pid_types", "dnet:pid_types"));
|
1347
|
break;
|
1348
|
}
|
1349
|
}
|
1350
|
}
|
1351
|
}
|
1352
|
return pids;
|
1353
|
}
|
1354
|
|
1355
|
@SuppressWarnings("unchecked")
|
1356
|
protected static void addField(final Builder builder, final FieldDescriptor descriptor, Object value) {
|
1357
|
|
1358
|
if (value == null) return;
|
1359
|
|
1360
|
if (value instanceof List<?>) {
|
1361
|
for (final Object o : (List<Object>) value) {
|
1362
|
addField(builder, descriptor, o);
|
1363
|
}
|
1364
|
} else {
|
1365
|
Object fieldValue = value;
|
1366
|
switch (descriptor.getType()) {
|
1367
|
case BOOL:
|
1368
|
fieldValue = Boolean.valueOf(value.toString());
|
1369
|
break;
|
1370
|
case BYTES:
|
1371
|
fieldValue = value.toString().getBytes(Charset.forName("UTF-8"));
|
1372
|
break;
|
1373
|
case DOUBLE:
|
1374
|
fieldValue = Double.valueOf(value.toString());
|
1375
|
break;
|
1376
|
case FLOAT:
|
1377
|
fieldValue = Float.valueOf(value.toString());
|
1378
|
break;
|
1379
|
case INT32:
|
1380
|
case INT64:
|
1381
|
case SINT32:
|
1382
|
case SINT64:
|
1383
|
fieldValue = Integer.valueOf(value.toString());
|
1384
|
break;
|
1385
|
case MESSAGE:
|
1386
|
final Builder q = builder.newBuilderForField(descriptor);
|
1387
|
|
1388
|
if (value instanceof Builder) {
|
1389
|
value = ((Builder) value).build();
|
1390
|
final byte[] b = ((Message) value).toByteArray();
|
1391
|
try {
|
1392
|
q.mergeFrom(b);
|
1393
|
} catch (final InvalidProtocolBufferException e) {
|
1394
|
throw new IllegalArgumentException("Unable to merge value: " + value + " with builder: " + q.getDescriptorForType().getName());
|
1395
|
}
|
1396
|
} else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1397
|
if (value instanceof Qualifier) {
|
1398
|
q.mergeFrom((Qualifier) value);
|
1399
|
} else {
|
1400
|
parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@");
|
1401
|
}
|
1402
|
} else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1403
|
if (value instanceof StructuredProperty) {
|
1404
|
q.mergeFrom((StructuredProperty) value);
|
1405
|
} else {
|
1406
|
parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###");
|
1407
|
}
|
1408
|
} else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1409
|
if (value instanceof KeyValue) {
|
1410
|
q.mergeFrom((KeyValue) value);
|
1411
|
} else {
|
1412
|
parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&");
|
1413
|
}
|
1414
|
} else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1415
|
if (value instanceof StringField) {
|
1416
|
q.mergeFrom((StringField) value);
|
1417
|
} else {
|
1418
|
q.setField(StringField.getDescriptor().findFieldByName("value"), value);
|
1419
|
}
|
1420
|
} else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1421
|
if (value instanceof BoolField) {
|
1422
|
q.mergeFrom((BoolField) value);
|
1423
|
} else if (value instanceof String) {
|
1424
|
q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) value));
|
1425
|
} else {
|
1426
|
q.setField(BoolField.getDescriptor().findFieldByName("value"), value);
|
1427
|
}
|
1428
|
} else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) {
|
1429
|
if (value instanceof IntField) {
|
1430
|
q.mergeFrom((IntField) value);
|
1431
|
} else if (value instanceof String) {
|
1432
|
q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) value));
|
1433
|
} else {
|
1434
|
q.setField(IntField.getDescriptor().findFieldByName("value"), value);
|
1435
|
}
|
1436
|
}
|
1437
|
|
1438
|
fieldValue = q.buildPartial();
|
1439
|
break;
|
1440
|
default:
|
1441
|
break;
|
1442
|
}
|
1443
|
|
1444
|
doAddField(builder, descriptor, fieldValue);
|
1445
|
}
|
1446
|
|
1447
|
}
|
1448
|
|
1449
|
protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) {
|
1450
|
if (value != null) {
|
1451
|
if (fd.isRepeated()) {
|
1452
|
builder.addRepeatedField(fd, value);
|
1453
|
} else if (fd.isOptional() || fd.isRequired()) {
|
1454
|
builder.setField(fd, value);
|
1455
|
}
|
1456
|
}
|
1457
|
}
|
1458
|
|
1459
|
protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) {
|
1460
|
final IterablePair<FieldDescriptor, String> iterablePair =
|
1461
|
new IterablePair<FieldDescriptor, String>(descriptor.getFields(), Lists.newArrayList(Splitter
|
1462
|
.on(split).trimResults().split(value)));
|
1463
|
|
1464
|
for (final Pair<FieldDescriptor, String> p : iterablePair) {
|
1465
|
addField(builder, p.getKey(), p.getValue());
|
1466
|
}
|
1467
|
}
|
1468
|
|
1469
|
protected static String base64(final byte[] data) {
|
1470
|
return new String(Base64.encodeBase64(data));
|
1471
|
}
|
1472
|
|
1473
|
public static String replace(final String s, final String regex, final String replacement) {
|
1474
|
return s.replaceAll(regex, replacement);
|
1475
|
}
|
1476
|
|
1477
|
public static String trim(final String s) {
|
1478
|
return s.trim();
|
1479
|
}
|
1480
|
|
1481
|
protected static String removePrefix(final Type type, final String s) {
|
1482
|
return removePrefix(type.toString(), s);
|
1483
|
}
|
1484
|
|
1485
|
private static String removePrefix(final String prefix, final String s) {
|
1486
|
return StringUtils.removeStart("" + s, prefix + "|");
|
1487
|
}
|
1488
|
|
1489
|
protected static Qualifier.Builder getDefaultQualifier(final String scheme) {
|
1490
|
final Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme);
|
1491
|
return qualifier;
|
1492
|
}
|
1493
|
|
1494
|
protected static StructuredProperty getStructuredProperty(final String value,
|
1495
|
final String classid,
|
1496
|
final String classname,
|
1497
|
final String schemeid,
|
1498
|
final String schemename) {
|
1499
|
if ((value == null) || value.isEmpty()) return null;
|
1500
|
return StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classid, classname, schemeid, schemename)).build();
|
1501
|
}
|
1502
|
|
1503
|
protected static StringField.Builder sf(final String s) {
|
1504
|
return StringField.newBuilder().setValue(s);
|
1505
|
}
|
1506
|
|
1507
|
public static String generateNsPrefix(final String prefix, final String externalId) {
|
1508
|
return StringUtils.substring(prefix + StringUtils.leftPad(externalId, MAX_NSPREFIX_LEN - prefix.length(), "_"), 0, MAX_NSPREFIX_LEN);
|
1509
|
}
|
1510
|
|
1511
|
public static String md5(final String s) {
|
1512
|
try {
|
1513
|
final MessageDigest md = MessageDigest.getInstance("MD5");
|
1514
|
md.update(s.getBytes("UTF-8"));
|
1515
|
return new String(Hex.encodeHex(md.digest()));
|
1516
|
} catch (final Exception e) {
|
1517
|
System.err.println("Error creating id");
|
1518
|
return null;
|
1519
|
}
|
1520
|
}
|
1521
|
|
1522
|
public static String oafId(final String entityType, final String prefix, final String id) {
|
1523
|
if (id.isEmpty() || prefix.isEmpty()) return "";
|
1524
|
return oafSimpleId(entityType, prefix + "::" + md5(id));
|
1525
|
}
|
1526
|
|
1527
|
public static String oafSimpleId(final String entityType, final String id) {
|
1528
|
return (Type.valueOf(entityType).getNumber() + "|" + id).replaceAll("\\s|\\n", "");
|
1529
|
}
|
1530
|
|
1531
|
public static String oafSplitId(final String entityType, final String fullId) {
|
1532
|
return oafId(entityType, StringUtils.substringBefore(fullId, "::"), StringUtils.substringAfter(fullId, "::"));
|
1533
|
}
|
1534
|
|
1535
|
/**
|
1536
|
* Gets the classname of the given class code
|
1537
|
*
|
1538
|
* @param code class code.
|
1539
|
* @return the class name, if the code is a key of the map. The code itself otherwise.
|
1540
|
*/
|
1541
|
public static String getClassName(final String code) {
|
1542
|
final String classname = code2name.get(code);
|
1543
|
if (StringUtils.isBlank(classname)) return code;
|
1544
|
return classname;
|
1545
|
}
|
1546
|
|
1547
|
/**
|
1548
|
* Utility method, allows to perform param based map lookups in xsl
|
1549
|
*
|
1550
|
* @param map
|
1551
|
* @param key
|
1552
|
* @return value associated to the key.
|
1553
|
*/
|
1554
|
public static Object lookupValue(final Map<String, Object> map, final String key) {
|
1555
|
return map.get(key);
|
1556
|
}
|
1557
|
|
1558
|
/**
|
1559
|
* Utility method, allows to perform param based map lookups in xsl
|
1560
|
*
|
1561
|
* @param map
|
1562
|
* @param key
|
1563
|
* @return value associated to the key.
|
1564
|
*/
|
1565
|
public static int mustMerge(final Map<String, Object> map, final String key) {
|
1566
|
final Object val = lookupValue(map, key);
|
1567
|
return (val != null) && (val instanceof String) && val.equals("true") ? 1 : 0;
|
1568
|
}
|
1569
|
|
1570
|
}
|