1
|
package eu.dnetlib.data.collective.transformation.engine;
|
2
|
|
3
|
import static org.junit.Assert.*;
|
4
|
import static org.mockito.Mockito.*;
|
5
|
|
6
|
import java.io.StringReader;
|
7
|
import java.util.Arrays;
|
8
|
import java.util.HashMap;
|
9
|
import java.util.LinkedList;
|
10
|
import java.util.List;
|
11
|
import java.util.Map;
|
12
|
|
13
|
import javax.xml.transform.TransformerConfigurationException;
|
14
|
|
15
|
import org.apache.commons.logging.Log;
|
16
|
import org.apache.commons.logging.LogFactory;
|
17
|
import org.dom4j.Document;
|
18
|
import org.dom4j.DocumentException;
|
19
|
import org.dom4j.io.SAXReader;
|
20
|
import org.junit.Before;
|
21
|
import org.junit.Test;
|
22
|
import org.junit.runner.RunWith;
|
23
|
import org.mockito.Mock;
|
24
|
import org.mockito.junit.MockitoJUnitRunner;
|
25
|
import org.springframework.core.io.ClassPathResource;
|
26
|
import org.springframework.core.io.Resource;
|
27
|
|
28
|
import eu.dnetlib.common.profile.ResourceDao;
|
29
|
import eu.dnetlib.data.collective.transformation.VocabularyMap;
|
30
|
import eu.dnetlib.data.collective.transformation.VocabularyRegistry;
|
31
|
import eu.dnetlib.data.collective.transformation.engine.core.TransformationImpl;
|
32
|
import eu.dnetlib.data.collective.transformation.engine.functions.DateVocabulary;
|
33
|
import eu.dnetlib.data.collective.transformation.engine.functions.PersonVocabulary;
|
34
|
import eu.dnetlib.data.collective.transformation.engine.functions.ProcessingException;
|
35
|
import eu.dnetlib.data.collective.transformation.engine.functions.Vocabulary;
|
36
|
import eu.dnetlib.data.collective.transformation.rulelanguage.RuleLanguageParser;
|
37
|
|
38
|
@RunWith(MockitoJUnitRunner.class)
|
39
|
public class SimpleTransformationEngineTest {
|
40
|
|
41
|
private static final Log log = LogFactory.getLog(SimpleTransformationEngineTest.class);
|
42
|
|
43
|
private static final String xslTemplatePath = "eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl";
|
44
|
private transient Resource xslTemplateResource = new ClassPathResource(xslTemplatePath);
|
45
|
private static final String schemaPath = "eu/dnetlib/data/collective/transformation/schema/DMFSchema_vTransformator.xsd";
|
46
|
private transient Resource schemaResource = new ClassPathResource(schemaPath);
|
47
|
|
48
|
private static final String xslTemplatePath_oaf = "eu/dnetlib/data/collective/transformation/engine/oaftemplate.xsl"; // OpenAIRE specific
|
49
|
private transient Resource xslTemplateResource_oaf = new ClassPathResource(xslTemplatePath_oaf); // OpenAIRE specific
|
50
|
private static final String schemaPath_oaf = "eu/dnetlib/data/collective/transformation/schema/OAFSchema_vTransformator.xsd"; // OpenAIRE specific
|
51
|
private transient Resource schemaResource_oaf = new ClassPathResource(schemaPath_oaf); // OpenAIRE specific
|
52
|
|
53
|
// class under test
|
54
|
private transient SimpleTransformationEngine transformationEngine;
|
55
|
|
56
|
private transient TransformationImpl transformation;
|
57
|
|
58
|
private transient TransformationImpl transformationOAF; // OpenAIRE specific
|
59
|
|
60
|
private transient TransformationImpl transformationProvenance; // OpenAIRE specific + provenance
|
61
|
|
62
|
private transient TransformationImpl transformationAnyFunderProject; // OpenAIREplus specific
|
63
|
|
64
|
private transient TransformationImpl transformationWoS;
|
65
|
|
66
|
@Mock
|
67
|
private transient eu.dnetlib.common.profile.Resource resource;
|
68
|
@Mock
|
69
|
private transient ResourceDao resourceDao;
|
70
|
@Mock
|
71
|
private transient VocabularyRegistry vocabularyRegistry;
|
72
|
private transient VocabularyMap vocabularyMapWrapper = new VocabularyMap();
|
73
|
private transient Map<String, Vocabulary> vocabularies = new HashMap<String, Vocabulary>();
|
74
|
@Mock
|
75
|
private transient Vocabulary vocabularyLang;
|
76
|
|
77
|
private transient DateVocabulary vocabularyDate = new DateVocabulary();
|
78
|
|
79
|
private transient PersonVocabulary vocabularyPerson = new PersonVocabulary();
|
80
|
|
81
|
private transient Vocabulary vocabularyTypes;
|
82
|
|
83
|
private transient Vocabulary vocabularyRights;
|
84
|
private transient String[] rights = {"info:eu-repo/semantics/openAccess",
|
85
|
"info:eu-repo/semantics/closedAccess",
|
86
|
"info:eu-repo/semantics/embargoedAccess",
|
87
|
"info:eu-repo/semantics/restrictedAccess"};
|
88
|
|
89
|
private transient String repositoryId = "profile-123";
|
90
|
|
91
|
private transient String dataSinkId = "dnet://MDStoreDS/4-9c7cf682-849b-48bd-92cf-e65367f38e14_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==?type=REFRESH";
|
92
|
|
93
|
|
94
|
@SuppressWarnings("unchecked")
|
95
|
@Before
|
96
|
public void setUp() throws TransformerConfigurationException, ProcessingException{
|
97
|
System.setProperty("javax.xml.transform.TransformerFactory", "net.sf.saxon.TransformerFactoryImpl");
|
98
|
List<String> rightsOA = new LinkedList<String>();
|
99
|
rightsOA.add(rights[0]);
|
100
|
|
101
|
vocabularyTypes = new Vocabulary();
|
102
|
@SuppressWarnings("rawtypes")
|
103
|
Map encodingMap = new HashMap();
|
104
|
encodingMap.put("name", "Article");
|
105
|
encodingMap.put("encoding", "0001");
|
106
|
encodingMap.put("code", "0001");
|
107
|
encodingMap.put("synonyms", Arrays.asList(new String[]{"info:eu-repo/semantics/article"}));
|
108
|
List<Map<String, ?>> termList = new LinkedList<Map<String,?>>();
|
109
|
termList.add(encodingMap);
|
110
|
encodingMap = new HashMap();
|
111
|
encodingMap.put("name", "Unknown");
|
112
|
encodingMap.put("encoding", "0000");
|
113
|
encodingMap.put("code", "0000");
|
114
|
encodingMap.put("synonyms", Arrays.asList(new String[]{}));
|
115
|
termList.add(encodingMap);
|
116
|
|
117
|
vocabularyTypes.setResource(termList);
|
118
|
|
119
|
vocabularyRights = new Vocabulary();
|
120
|
encodingMap = new HashMap();
|
121
|
encodingMap.put("name", "");
|
122
|
encodingMap.put("encoding", "OPEN");
|
123
|
encodingMap.put("code", "OPEN");
|
124
|
encodingMap.put("synonyms", Arrays.asList(new String[]{"info:eu-repo/semantics/openAccess"}));
|
125
|
termList = new LinkedList<Map<String,?>>();
|
126
|
termList.add(encodingMap);
|
127
|
vocabularyRights.setResource(termList);
|
128
|
when(vocabularyRegistry.getVocabularies()).thenReturn(vocabularyMapWrapper);
|
129
|
when(vocabularyRegistry.getVocabulary("LangVocab")).thenReturn(vocabularyLang);
|
130
|
when(vocabularyRegistry.getVocabulary("RightsVocab")).thenReturn(vocabularyRights);
|
131
|
//when(vocabularyRegistry.getVocabulary("DateISO8601")).thenReturn(vocabularyDate);
|
132
|
//when(vocabularyRegistry.getVocabulary("Person")).thenReturn(vocabularyPerson);
|
133
|
//when(vocabularyRegistry.getVocabulary("TypesVocab")).thenReturn(vocabularyTypes);
|
134
|
when(vocabularyLang.encoding(anyList())).thenReturn("Unknown Language");
|
135
|
//when(vocabularyLang.getName()).thenReturn("someQuery");
|
136
|
vocabularies.put("LangVocab", vocabularyLang);
|
137
|
vocabularies.put("RightsVocab", vocabularyRights); // OpenAIRE specific
|
138
|
vocabularies.put("DateISO8601", vocabularyDate);
|
139
|
vocabularies.put("Person", vocabularyPerson);
|
140
|
vocabularies.put("TypesVocab", vocabularyTypes);
|
141
|
|
142
|
vocabularyMapWrapper.setMap(vocabularies);
|
143
|
transformationEngine = new SimpleTransformationEngine();
|
144
|
transformationEngine.setVocabularyRegistry(vocabularyRegistry);
|
145
|
transformationEngine.setResourceDao(resourceDao);
|
146
|
transformation = new TransformationImpl();
|
147
|
transformation.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId);
|
148
|
transformation.setTemplate(xslTemplateResource);
|
149
|
transformation.setSchema(schemaResource);
|
150
|
transformation.init();
|
151
|
transformation.setRuleLanguageParser(getRuleLanguageParser(getTransformationScript()));
|
152
|
//transformation.setRootElement("record");
|
153
|
transformation.configureTransformation();
|
154
|
// OpenAIRE specific
|
155
|
transformationOAF = new TransformationImpl();
|
156
|
transformationOAF.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId);
|
157
|
transformationOAF.setTemplate(xslTemplateResource_oaf);
|
158
|
transformationOAF.setSchema(schemaResource_oaf);
|
159
|
transformationOAF.init();
|
160
|
transformationOAF.setRuleLanguageParser(getRuleLanguageParser(getOAFTransformationScript()));
|
161
|
System.out.println("OAF CONFIGURE TRANSFORMATIOn");
|
162
|
transformationOAF.configureTransformation();
|
163
|
|
164
|
transformationProvenance = new TransformationImpl();
|
165
|
transformationProvenance.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId);
|
166
|
transformationProvenance.setTemplate(xslTemplateResource_oaf);
|
167
|
transformationProvenance.setSchema(schemaResource_oaf);
|
168
|
transformationProvenance.init();
|
169
|
transformationProvenance.setRuleLanguageParser(getRuleLanguageParser(getProvenanceTransformationScript() ));
|
170
|
transformationProvenance.configureTransformation();
|
171
|
|
172
|
transformationAnyFunderProject = new TransformationImpl();
|
173
|
transformationAnyFunderProject.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId);
|
174
|
transformationAnyFunderProject.setTemplate(xslTemplateResource_oaf);
|
175
|
transformationAnyFunderProject.setSchema(schemaResource_oaf);
|
176
|
transformationAnyFunderProject.init();
|
177
|
// transformationAnyFunderProject.setRuleLanguageParser(getRuleLanguageParser(getFunderTransformationScript() ));
|
178
|
transformationAnyFunderProject.setRuleLanguageParser(getRuleLanguageParser(getOpenaireplusCompatibleFunderTransformationScript() ));
|
179
|
transformationAnyFunderProject.configureTransformation();
|
180
|
|
181
|
transformationWoS = new TransformationImpl();
|
182
|
transformationWoS.addJobConstant(TransformationImpl.JOBCONST_DATASINKID, dataSinkId);
|
183
|
transformationWoS.setTemplate(xslTemplateResource_oaf);
|
184
|
transformationWoS.setSchema(schemaResource_oaf);
|
185
|
transformationWoS.init();
|
186
|
// transformationAnyFunderProject.setRuleLanguageParser(getRuleLanguageParser(getFunderTransformationScript() ));
|
187
|
transformationWoS.setRuleLanguageParser(getRuleLanguageParser(getWosTransformationScript() ));
|
188
|
transformationWoS.configureTransformation();
|
189
|
|
190
|
|
191
|
String xpathExprOnProfile = "//CONFIGURATION/OFFICIAL_NAME";
|
192
|
String valueOnProfile = "repositoryOfficialName";
|
193
|
try{
|
194
|
when(resourceDao.getResourceByQuery("collection('/db/DRIVER/RepositoryServiceResources')//RESOURCE_PROFILE[.//EXTRA_FIELDS/FIELD[key=\"NamespacePrefix\"][value=\"\"]]")).thenReturn(resource);
|
195
|
when(resourceDao.getResource(repositoryId)).thenReturn(resource);
|
196
|
//when(resourceDao.getResourceByQuery("concat('collection()', '')")).thenReturn(resource);
|
197
|
when(resourceDao.getResourceByQuery("collection()")).thenReturn(resource);
|
198
|
}catch(Exception e){
|
199
|
e.printStackTrace();
|
200
|
}
|
201
|
when(resource.getValue(xpathExprOnProfile)).thenReturn(valueOnProfile);
|
202
|
String xpathExprDataSourceId = "//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"; // OpenAIRE specific
|
203
|
String valueDataSourceId = "opendoar::166"; // OpenAIRE specific
|
204
|
when(resource.getValue(xpathExprDataSourceId)).thenReturn(valueDataSourceId); // OpenAIRE specific
|
205
|
String xpathExprDataSourceType = "//EXTRA_FIELDS/FIELD[key='DataSourceType']/value"; // prototype
|
206
|
String valueDataSourceType = "Aggregator"; // prototype
|
207
|
when(resource.getValue(xpathExprDataSourceType)).thenReturn(valueDataSourceType); // prototype
|
208
|
|
209
|
}
|
210
|
|
211
|
@Test
|
212
|
public void testTransformationWithObjectRecords() throws DocumentException{
|
213
|
transformationEngine.setTransformation(transformation);
|
214
|
List<String> mdRecords = new LinkedList<String>();
|
215
|
mdRecords.add(getMdRecord("obj-132", "md-1", getDC()));
|
216
|
// mdRecords.add(getMdRecord("obj-132", "md-2", getDidl()));
|
217
|
// List<String> objRecords = new LinkedList<String>();
|
218
|
// objRecords.add(getObjectRecord(mdRecords));
|
219
|
@SuppressWarnings("unused")
|
220
|
String dump;
|
221
|
assertNotNull(dump = transformation.dumpStylesheet());
|
222
|
List<String> transformedMdRecordsResult = new LinkedList<String>();
|
223
|
for (String srcRecord: mdRecords){
|
224
|
transformedMdRecordsResult.add(transformationEngine.transform(srcRecord));
|
225
|
}
|
226
|
// assertEquals(objRecords.size(), transformedMdRecordsResult.size());
|
227
|
//System.out.println(dump);
|
228
|
Document record = (new SAXReader()).read(new StringReader(transformedMdRecordsResult.get(0)));
|
229
|
assertEquals("", record.valueOf("//*[local-name()='header']/@status"));
|
230
|
assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']"));
|
231
|
assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']"));
|
232
|
assertEquals("http://somehost", record.valueOf("//dc:identifier"));
|
233
|
}
|
234
|
|
235
|
@Test
|
236
|
public void testTransformationWithMdRecords() throws DocumentException{
|
237
|
transformationEngine.setTransformation(transformation);
|
238
|
// List<String> mdRecords = new LinkedList<String>();
|
239
|
// mdRecords.add(getMdRecord("obj-132", "md-1==::oai:bla-1", getDC()));
|
240
|
@SuppressWarnings("unused")
|
241
|
String dump;
|
242
|
assertNotNull(dump = transformation.dumpStylesheet());
|
243
|
System.out.println("DUMP: " + dump);
|
244
|
|
245
|
String transformedRecordResult = transformationEngine.transform(getMdRecord("obj-132", "md-1==::oai:bla-1", getDC()));
|
246
|
// assertEquals(mdRecords.size(), transformedMdRecordsResult.size());
|
247
|
Document record = (new SAXReader()).read(new StringReader(transformedRecordResult));
|
248
|
System.out.println(record.asXML());
|
249
|
// assertEquals("", record.valueOf("//*[local-name()='header']/@status"));
|
250
|
// assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']"));
|
251
|
// assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']"));
|
252
|
// assertEquals("http://somehost", record.valueOf("//dc:identifier"));
|
253
|
}
|
254
|
|
255
|
@Test
|
256
|
public void testTransformationWithMdRecords_oaf_failed() throws DocumentException{
|
257
|
transformationEngine.setTransformation(transformationOAF);
|
258
|
// List<String> mdRecords = new LinkedList<String>();
|
259
|
// mdRecords.add(getMdRecord("obj-132", "md-1", getOAFDC()));
|
260
|
String dump;
|
261
|
assertNotNull(dump = transformationOAF.dumpStylesheet());
|
262
|
//System.out.println(dump);
|
263
|
String transformedRecordResult = transformationEngine.transform(getMdRecord("obj-132", "md-1", getOAFDC()));
|
264
|
// assertEquals(mdRecords.size(), transformedMdRecordsResult.size());
|
265
|
System.out.println("*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*\r\n" + dump);
|
266
|
Document record = (new SAXReader()).read(new StringReader(transformedRecordResult));
|
267
|
System.out.println(record.asXML());
|
268
|
assertEquals("", record.valueOf("//*[local-name()='header']/@status"));
|
269
|
assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']"));
|
270
|
assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']"));
|
271
|
assertEquals("failed", record.valueOf("//*[local-name()='header']/@syntaxcheck"));
|
272
|
//assertEquals("http://somehost", record.valueOf("//*[local-name()='metadata']//*[local-name()='identifier']"));
|
273
|
//assertEquals("OPEN", record.valueOf("//oaf:accessrights")); // test convert function with rights vocabulary for many dc:rights elements
|
274
|
//assertEquals("0001", record.valueOf("//dr:CobjCategory"));
|
275
|
}
|
276
|
|
277
|
@Test
|
278
|
public void testTransformationOfProjectInformation_oaf() throws DocumentException{
|
279
|
transformationEngine.setTransformation(transformationAnyFunderProject);
|
280
|
// List<String> mdRecords = new LinkedList<String>();
|
281
|
// mdRecords.add(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance()));
|
282
|
String dump;
|
283
|
System.err.println("BEFORE DUMP\r\n");
|
284
|
assertNotNull(dump = transformationAnyFunderProject.dumpStylesheet());
|
285
|
String transformedRecordResult = transformationEngine.transform(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance()));
|
286
|
// assertEquals(mdRecords.size(), transformedMdRecordsResult.size());
|
287
|
System.err.println("DUMP\r\n" + dump);
|
288
|
Document record = (new SAXReader()).read(new StringReader(transformedRecordResult));
|
289
|
System.out.println(record.asXML());
|
290
|
assertEquals("", record.valueOf("oaf:projectid"));
|
291
|
}
|
292
|
|
293
|
@Test
|
294
|
public void testTransformationOfWos_oaf() throws DocumentException{
|
295
|
transformationEngine.setTransformation(transformationWoS);
|
296
|
// List<String> mdRecords = new LinkedList<String>();
|
297
|
// mdRecords.add(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance()));
|
298
|
String dump;
|
299
|
assertNotNull(dump = transformationWoS.dumpStylesheet());
|
300
|
String transformedRecordResult = transformationEngine.transform(getMdRecordWithProvenance("obj-132", "md-1", getWOS(), getProvenance()));
|
301
|
// assertEquals(mdRecords.size(), transformedMdRecordsResult.size());
|
302
|
System.out.println("___WOS___");
|
303
|
System.out.println(dump);
|
304
|
System.out.println("___WOS___");
|
305
|
Document record = (new SAXReader()).read(new StringReader(transformedRecordResult));
|
306
|
System.out.println(record.asXML());
|
307
|
assertEquals("", record.valueOf("oaf:projectid"));
|
308
|
}
|
309
|
|
310
|
|
311
|
@Test
|
312
|
public void testTransformationWithMdRecords_provenance() throws DocumentException{
|
313
|
transformationEngine.setTransformation(transformationProvenance);
|
314
|
// List<String> mdRecords = new LinkedList<String>();
|
315
|
// mdRecords.add(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance() ));
|
316
|
String dump;
|
317
|
assertNotNull(dump = transformationProvenance.dumpStylesheet());
|
318
|
String transformedRecordResult = transformationEngine.transform(getMdRecordWithProvenance("obj-132", "md-1", getOAFDC(), getProvenance() ));
|
319
|
// assertEquals(mdRecords.size(), transformedMdRecordsResult.size());
|
320
|
System.out.println(dump);
|
321
|
Document record = (new SAXReader()).read(new StringReader(transformedRecordResult));
|
322
|
System.out.println("_______________________________________");
|
323
|
System.out.println(record.asXML());
|
324
|
System.out.println("_______________________________________");
|
325
|
assertEquals("", record.valueOf("//*[local-name()='header']/@status"));
|
326
|
assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']"));
|
327
|
assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']"));
|
328
|
assertEquals("http://somehost", record.valueOf("//dc:identifier"));
|
329
|
assertEquals("OPEN", record.valueOf("//oaf:accessrights")); // test convert function with rights vocabulary for many dc:rights elements
|
330
|
|
331
|
}
|
332
|
|
333
|
// @Test
|
334
|
// public void testTransformationWithSkippedRecord() throws DocumentException{
|
335
|
// transformationEngine.setTransformation(transformation);
|
336
|
// List<String> mdRecords = new LinkedList<String>();
|
337
|
// mdRecords.add(getMdRecord("obj-132", "md-1==::oai:bla-1", getDC()));
|
338
|
// mdRecords.add(getMdRecord("obj-133", "md-1==::oai:bla-2", getDC2()));
|
339
|
// List<String> transformedMdRecordsResult = transformationEngine.transform(mdRecords);
|
340
|
// assertEquals(mdRecords.size() - 1, transformedMdRecordsResult.size());
|
341
|
// //System.out.println(dump);
|
342
|
// Document record = (new SAXReader()).read(new StringReader(transformedMdRecordsResult.get(0)));
|
343
|
// System.out.println(record.asXML());
|
344
|
// assertEquals("", record.valueOf("//*[local-name()='header']/@status"));
|
345
|
// assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']"));
|
346
|
// assertNotNull("record contains no metadata node", record.selectSingleNode("//*[local-name()='metadata']"));
|
347
|
// assertEquals("http://somehost", record.valueOf("//dc:identifier"));
|
348
|
// }
|
349
|
|
350
|
@Test
|
351
|
public void testTransformationWithDeletedRecord() throws DocumentException{
|
352
|
transformationEngine.setTransformation(transformation);
|
353
|
// List<String> mdRecords = new LinkedList<String>();
|
354
|
// mdRecords.add(getDeletedMdRecord("obj-132", "md-1"));
|
355
|
@SuppressWarnings("unused")
|
356
|
String dump;
|
357
|
assertNotNull(dump = transformation.dumpStylesheet());
|
358
|
String transformedRecordResult = transformationEngine.transform(getDeletedMdRecord("obj-132", "md-1"));
|
359
|
// assertEquals(mdRecords.size(), transformedMdRecordsResult.size());
|
360
|
//System.out.println(dump);
|
361
|
Document record = (new SAXReader()).read(new StringReader(transformedRecordResult));
|
362
|
//System.out.println(record.asXML());
|
363
|
assertEquals("deleted", record.valueOf("//*[local-name()='header']/@status"));
|
364
|
assertNotNull("record contains no header node", record.selectSingleNode("//*[local-name()='header']"));
|
365
|
assertNull("record contains a metadata node", record.selectSingleNode("//*[local-name()='metadata']"));
|
366
|
}
|
367
|
|
368
|
@Test
|
369
|
public void testTransformationBehaviourWithEmptyMetadata() throws DocumentException{
|
370
|
log.debug("applying OAF transformation");
|
371
|
transformationEngine.setTransformation(transformationOAF);
|
372
|
// List<String> mdRecords = new LinkedList<String>();
|
373
|
// mdRecords.add(getEmptyMetadataMdRecord("obj-132", "md-1"));
|
374
|
@SuppressWarnings("unused")
|
375
|
String dump;
|
376
|
assertNotNull(dump = transformationOAF.dumpStylesheet());
|
377
|
String transformedRecordResult = transformationEngine.transform(getEmptyMetadataMdRecord("obj-132", "md-1"));
|
378
|
// assertEquals(mdRecords.size(), transformedMdRecordsResult.size());
|
379
|
log.debug("record output: " + transformedRecordResult);
|
380
|
}
|
381
|
|
382
|
private RuleLanguageParser getRuleLanguageParser(String aTransformationScript){
|
383
|
RuleLanguageParser parser = new RuleLanguageParser();
|
384
|
System.out.println(aTransformationScript);
|
385
|
StringReader reader = new StringReader(aTransformationScript);
|
386
|
parser.parse(reader);
|
387
|
return parser;
|
388
|
}
|
389
|
|
390
|
private String getTransformationScript(){
|
391
|
StringBuilder scriptBuilder = new StringBuilder();
|
392
|
scriptBuilder.append("declare_script \"MainSample\";\r\n");
|
393
|
scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n");
|
394
|
scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n");
|
395
|
scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n");
|
396
|
scriptBuilder.append("declare_ns didl = \"urn:mpeg:mpeg21:2002:02-DIDL-NS\";\r\n");
|
397
|
//scriptBuilder.append("$a1 = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n");
|
398
|
//scriptBuilder.append("dri:mdFormat = $a1;\r\n");
|
399
|
scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n");
|
400
|
scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n");
|
401
|
scriptBuilder.append("dc:relation = xpath:\"//didl:Resource/@ref\";\r\n");
|
402
|
scriptBuilder.append("dc:title = copy(\"dc:title\", \"//dc:title\", \"@*|node()\");\r\n");
|
403
|
//scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n");
|
404
|
scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n");
|
405
|
scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n");
|
406
|
//scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n");
|
407
|
scriptBuilder.append("dc:language = Convert(xpath:\"normalize-space(//dc:language[2])\",LangVocab);\r\n");
|
408
|
scriptBuilder.append("dri:recordIdentifier = RegExpr(xpath:\"//dri:recordIdentifier\", $var1, \"s/^(.*)(::)/$2/\");\r\n");
|
409
|
scriptBuilder.append("$var0 = \"''\";\r\n");
|
410
|
scriptBuilder.append("static $var1 = RegExpr($job.datasinkid, $var0, \"s/^(dnet:\\/\\/MDStoreDS\\/)|(\\?.*)//g\");\r\n");
|
411
|
scriptBuilder.append("if xpath:\"//dc:format[text()='digital']\" dc:publisher = xpath:\"//dc:publisher\"; else dc:publisher = skipRecord();\r\n");
|
412
|
scriptBuilder.append("end\r\n");
|
413
|
|
414
|
return scriptBuilder.toString();
|
415
|
}
|
416
|
|
417
|
private String getOAFTransformationScript(){
|
418
|
StringBuilder scriptBuilder = new StringBuilder();
|
419
|
scriptBuilder.append("declare_script \"MainSample_OAF\";\r\n");
|
420
|
scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n");
|
421
|
scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n");
|
422
|
scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n");
|
423
|
scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n");
|
424
|
scriptBuilder.append("oaf:dateAccepted = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n");
|
425
|
// scriptBuilder.append("oaf:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n");
|
426
|
//scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n");
|
427
|
scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix
|
428
|
scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"string-length(translate(normalize-space(.),'info:eu-repo/grantAgreement/EC/FP7','')) = 5\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//gm\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix
|
429
|
// scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//gm\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix
|
430
|
// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary
|
431
|
scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n");
|
432
|
scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile
|
433
|
scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n");
|
434
|
scriptBuilder.append("dc:rights = skipRecord();\r\n");
|
435
|
scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n");
|
436
|
scriptBuilder.append("$varFulltext = xpath:\"//dc:relation[starts-with(., 'http')]\";\r\n");
|
437
|
scriptBuilder.append("if xpath:\"//dc:relation[starts-with(., 'http')]\" oaf:fulltext = $varFulltext; else $var0 = \"''\";\r\n");
|
438
|
scriptBuilder.append("oaf:person = set(xpath:\"//dc:creator\", @normalized = Convert(xpath:\".\", Person););\r\n");
|
439
|
// scriptBuilder.append("apply xpath:\"//dc:creator\" if xpath:\"string-length(.) > 0\" oaf:person = set(xpath:\".\", @normalized = \"test\";); else dc:creator = xpath:\"normalize-space(.)\";\r\n");
|
440
|
// scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n");
|
441
|
// scriptBuilder.append("apply xpath:\"//dc:creator\" if xpath:\"string-length(.) > 0\" dc:creator = Convert(xpath:\".\", Person); else $var0 = \"''\";\r\n");
|
442
|
scriptBuilder.append("$varjournaltitle = \"'some title'\";\r\n");
|
443
|
scriptBuilder.append("oaf:journal = set($varjournaltitle, @issn=\"1234-5678\"; , @eissn=\"1234-5679\";);\r\n");
|
444
|
scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n");
|
445
|
//scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n");
|
446
|
//scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n");
|
447
|
scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n");
|
448
|
//scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n");
|
449
|
scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string
|
450
|
scriptBuilder.append("oaf:hostedBy = set(\"''\", @name = \"hostedName\"; , @id = \"hostedId\";);\r\n");
|
451
|
scriptBuilder.append("$varId = identifierExtract('[\"//dc:identifier\"]' , xpath:\"./record\" , '(10[.][0-9]{4,}[^\\s\"/<>]*/[^\\s\"<>]+)');\r\n");
|
452
|
scriptBuilder.append("oaf:identifier = set(xpath:\"$varId//value\", @identifierType = \"doi\";);\r\n");
|
453
|
scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//dc:type\",TypesVocab);\r\n");
|
454
|
|
455
|
scriptBuilder.append("end\r\n");
|
456
|
|
457
|
return scriptBuilder.toString();
|
458
|
}
|
459
|
|
460
|
private String getProvenanceTransformationScript(){
|
461
|
StringBuilder scriptBuilder = new StringBuilder();
|
462
|
|
463
|
scriptBuilder.append("declare_script \"MainSample_Provenance\";\r\n");
|
464
|
scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n");
|
465
|
scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n");
|
466
|
scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n");
|
467
|
scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n");
|
468
|
scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n");
|
469
|
// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n");
|
470
|
//scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n");
|
471
|
//scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n");
|
472
|
scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix
|
473
|
scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/)//gm\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix
|
474
|
// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary
|
475
|
scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n");
|
476
|
scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile
|
477
|
scriptBuilder.append("static $varDsType = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='DataSourceType']/value\"]);\r\n");
|
478
|
scriptBuilder.append("if xpath:\"$varDsType='Aggregator'\" oaf:hostingDatasourceid = xpath:\"//prov:baseURL\"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n");
|
479
|
scriptBuilder.append("oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n");
|
480
|
scriptBuilder.append("oaf:concept = \"'CONCEPT'\";\r\n");
|
481
|
scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n");
|
482
|
scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n");
|
483
|
scriptBuilder.append("%myTemplate = split(xpath:\"//dc:creator/text()\", \"dc:creator\", \";\");\r\n");
|
484
|
scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n");
|
485
|
//scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n");
|
486
|
//scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n");
|
487
|
scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n");
|
488
|
//scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n");
|
489
|
scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string
|
490
|
scriptBuilder.append("end\r\n");
|
491
|
|
492
|
return scriptBuilder.toString();
|
493
|
}
|
494
|
|
495
|
private String getWosTransformationScript(){
|
496
|
StringBuilder scriptBuilder = new StringBuilder();
|
497
|
|
498
|
scriptBuilder.append("declare_script \"MainSample\";\r\n");
|
499
|
scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n");
|
500
|
scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n");
|
501
|
scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n");
|
502
|
scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n");
|
503
|
scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n");
|
504
|
// header
|
505
|
scriptBuilder.append("dri:objIdentifier = xpath:\"//dri:objIdentifier\";\r\n");
|
506
|
scriptBuilder.append("dri:recordIdentifier = xpath:\"//csvRecord/row[@name='UT']\";\r\n");
|
507
|
scriptBuilder.append("dri:dateOfCollection = xpath:\"//dri:dateOfCollection\";\r\n");
|
508
|
// scriptBuilder.append("dri:repositoryId;\r\n");
|
509
|
// scriptBuilder.append("dri:datasourceprefix;\r\n");
|
510
|
// metadata
|
511
|
scriptBuilder.append("dc:language = Convert(xpath:\"//csvRecord/row[@name='LA']\", LangVocab);\r\n");
|
512
|
scriptBuilder.append("dc:title = xpath:\"//csvRecord/row[@name='TI']\";\r\n");
|
513
|
scriptBuilder.append("%myTemplate = split(xpath:\"//csvRecord/row[@name='AF']/text()\", \"dc:creator\", \";\");\r\n");
|
514
|
scriptBuilder.append("$varIssn = xpath:\"//csvRecord/row[@name='SN']\";");
|
515
|
scriptBuilder.append("oaf:journal = set(xpath:\"//csvRecord/row[@name='SO']\", @issn = $varIssn;);\r\n");
|
516
|
// scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//csvRecord/row[@name='DT']\", TextTypologies);\r\n");
|
517
|
scriptBuilder.append("dr:CobjCategory = Convert(xpath:\"//csvRecord/row[@name='DT']\", LangVocab);\r\n");
|
518
|
scriptBuilder.append("dc:subject = xpath:\"//csvRecord/row[@name='ID']\";\r\n");
|
519
|
scriptBuilder.append("dc:description = xpath:\"//csvRecord/row[@name='AB']\";\r\n");
|
520
|
scriptBuilder.append("dc:publisher = xpath:\"//csvRecord/row[@name='PU']\";\r\n");
|
521
|
scriptBuilder.append("dc:dateAccepted = xpath:\"//csvRecord/row[@name='PY']\";\r\n");
|
522
|
scriptBuilder.append("$varDoi = xpath:\"concat('http://dx.doi.org/', normalize-space(//csvRecord/row[@name='DI']))\";\r\n");
|
523
|
scriptBuilder.append("dc:identifier = $varDoi;\r\n");
|
524
|
scriptBuilder.append("$varPart1 = xpath:\"concat('Test', 'No.2')\";\r\n");
|
525
|
scriptBuilder.append("dc:subject = $varPart1;\r\n");
|
526
|
scriptBuilder.append("$varIfTest = xpath:\"//dc:creator\";\r\n");
|
527
|
scriptBuilder.append("if xpath:\"count($varIfTest) > 0\" dc:subject = \"'yes'\"; else dc:subject = \"'no'\";\r\n");
|
528
|
scriptBuilder.append("oaf:identifier = set(xpath:\"//csvRecord/row[@name='DI']\", @identifierType = \"doi\";);\r\n");
|
529
|
scriptBuilder.append("oaf:fundingunit = xpath:\"//csvRecord/row[@name='FU']\";\r\n");
|
530
|
scriptBuilder.append("oaf:fundingtext = xpath:\"//csvRecord/row[@name='FX']\";\r\n");
|
531
|
// adapt the attribute values for name and id
|
532
|
scriptBuilder.append("oaf:hostedBy = set(\"''\", @name=\"Unknown Repository\";, @id=\"openaire____::55045bd2a65019fd8e6741a755395c8c\";);\r\n");
|
533
|
scriptBuilder.append("oaf:collectedFrom = set(\"''\", @name=\"Unknown Repository\";, @id=\"openaire____::55045bd2a65019fd8e6741a755395c8c\";);\r\n");
|
534
|
|
535
|
scriptBuilder.append("end\r\n");
|
536
|
return scriptBuilder.toString();
|
537
|
}
|
538
|
|
539
|
private String getFunderTransformationScript(){
|
540
|
StringBuilder scriptBuilder = new StringBuilder();
|
541
|
|
542
|
scriptBuilder.append("declare_script \"MainSample\";\r\n");
|
543
|
scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n");
|
544
|
scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n");
|
545
|
scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n");
|
546
|
scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n");
|
547
|
scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n");
|
548
|
// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n");
|
549
|
//scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n");
|
550
|
//scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n");
|
551
|
scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix
|
552
|
scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"/info:eu-repo/grantAgreement/([A-Za-z]+)/(.*)/([0-9]+)/\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix
|
553
|
// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary
|
554
|
scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n");
|
555
|
scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile
|
556
|
scriptBuilder.append("static $varDsType = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//EXTRA_FIELDS/FIELD[key='DataSourceType']/value\"]);\r\n");
|
557
|
scriptBuilder.append("if xpath:\"$varDsType='Aggregator'\" oaf:hostingDatasourceid = xpath:\"//prov:baseURL\"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:\"//dri:repositoryId\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n");
|
558
|
scriptBuilder.append("oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n");
|
559
|
scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n");
|
560
|
scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n");
|
561
|
scriptBuilder.append("%myTemplate = split(xpath:\"//dc:creator/text()\", \"dc:creator\", \";\");\r\n");
|
562
|
scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection(')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n");
|
563
|
//scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n");
|
564
|
//scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n");
|
565
|
scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n");
|
566
|
//scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n");
|
567
|
scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string
|
568
|
scriptBuilder.append("$varPmc = \"'PMC:123456'\";\r\n");
|
569
|
scriptBuilder.append("oaf:identifier = set($varPmc, @identifierType = \"pmc\";);");
|
570
|
scriptBuilder.append("end\r\n");
|
571
|
|
572
|
return scriptBuilder.toString();
|
573
|
}
|
574
|
|
575
|
private String getOpenaireplusCompatibleFunderTransformationScript(){
|
576
|
StringBuilder scriptBuilder = new StringBuilder();
|
577
|
|
578
|
scriptBuilder.append("declare_script \"MainSample\";\r\n");
|
579
|
scriptBuilder.append("declare_ns oaf = \"http://namespace.openaire.eu/oaf\";\r\n");
|
580
|
scriptBuilder.append("declare_ns dr = \"http://www.driver-repository.eu/namespace/dr\";\r\n");
|
581
|
scriptBuilder.append("declare_ns dri = \"http://www.driver-repository.eu/namespace/dri\";\r\n");
|
582
|
scriptBuilder.append("declare_ns dc = \"http://purl.org/dc/elements/1.1/\";\r\n");
|
583
|
scriptBuilder.append("declare_ns prov = \"http://www.openarchives.org/OAI/2.0/provenance\";\r\n");
|
584
|
// scriptBuilder.append("dc:dateAccepted = Convert(xpath:\"//dc:date\", DateISO8601);\r\n");
|
585
|
//scriptBuilder.append("dc:creator = Convert(xpath:\"descendant-or-self::dc:date\", DateISO8601, \"yyyy-MM-dd\", \"min()\");\r\n");
|
586
|
//scriptBuilder.append("dc:creator = xpath:\"//dc:creator\";\r\n");
|
587
|
scriptBuilder.append("apply xpath:\"//dc:date\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/date')\" oaf:embargoenddate = RegExpr(xpath:\"normalize-space(.)\", $var0, \"s/^(.*info:eu-repo\\/date\\/embargoEnd\\/)//g\"); else $var0 = \"''\";\r\n"); // retrieve from dc:date , test the info prefix
|
588
|
// String regExpr = "s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/([0-9]+).*/$1/gm";
|
589
|
String arg = "$1"; // TODO
|
590
|
// scriptBuilder.append("$varPrj0 = RegExpr(xpath:\"//dc:relation[0][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", $var0, \"s/^.*info:eu-repo\\/grantAgreement\\/EU\\/FP7\\/([0-9]+)//gm\");");
|
591
|
scriptBuilder.append("$varCorda = \"'corda_______::$1'\";\r\n");
|
592
|
|
593
|
String regExpr = "s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*/$1/gm";
|
594
|
scriptBuilder.append("$varArg = \"'$1'\";\r\n");
|
595
|
scriptBuilder.append("$varPrj1 = " +
|
596
|
"RegExpr(xpath:\"//dc:relation[1][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " +
|
597
|
"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n");
|
598
|
scriptBuilder.append("$varPrj2 = " +
|
599
|
"RegExpr(xpath:\"//dc:relation[2][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " +
|
600
|
"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n");
|
601
|
scriptBuilder.append("$varPrj3 = " +
|
602
|
"RegExpr(xpath:\"//dc:relation[3][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " +
|
603
|
"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n");
|
604
|
scriptBuilder.append("$varPrj4 = " +
|
605
|
"RegExpr(xpath:\"//dc:relation[4][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " +
|
606
|
"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n");
|
607
|
scriptBuilder.append("$varPrj5 = " +
|
608
|
"RegExpr(xpath:\"//dc:relation[5][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " +
|
609
|
"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n");
|
610
|
scriptBuilder.append("$varPrj6 = " +
|
611
|
"RegExpr(xpath:\"//dc:relation[6][starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')]\", " +
|
612
|
"$varCorda, \"s/^.*info:eu-repo\\/grantAgreement\\/EC\\/FP7\\/(\\d\\d\\d\\d\\d\\d).*//gm\");\r\n");
|
613
|
scriptBuilder.append("if xpath:\"string-length($varPrj1) = 20\" oaf:projectid = $varPrj1; else $var0 = \"''\";\r\n");
|
614
|
scriptBuilder.append("if xpath:\"string-length($varPrj2) = 20\" oaf:projectid = $varPrj2; else $var0 = \"''\";\r\n");
|
615
|
scriptBuilder.append("if xpath:\"string-length($varPrj3) = 20\" oaf:projectid = $varPrj3; else $var0 = \"''\";\r\n");
|
616
|
scriptBuilder.append("if xpath:\"string-length($varPrj4) = 20\" oaf:projectid = $varPrj4; else $var0 = \"''\";\r\n");
|
617
|
scriptBuilder.append("if xpath:\"string-length($varPrj5) = 20\" oaf:projectid = $varPrj5; else $var0 = \"''\";\r\n");
|
618
|
scriptBuilder.append("if xpath:\"string-length($varPrj6) = 20\" oaf:projectid = $varPrj6; else $var0 = \"''\";\r\n");
|
619
|
// scriptBuilder.append("apply xpath:\"//dc:relation[starts-with(., 'info:eu-repo/grantAgreement')]\" if xpath:\"string-length() = 6\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"/info:eu-repo/grantAgreement/([A-Za-z]+)/(.*)/([0-9]+)/\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix
|
620
|
// scriptBuilder.append("apply xpath:\"//dc:relation\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')\" oaf:projectid = RegExpr(xpath:\"normalize-space(.)\", $var0, \"/info:eu-repo/grantAgreement/([A-Za-z]+)/(.*)/([0-9]+)/\"); else dc:relation = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:relation , test the info prefix
|
621
|
// scriptBuilder.append("apply xpath:\"//dc:rights\" if xpath:\"starts-with(normalize-space(.), 'info:eu-repo/semantics')\" dc:rights = empty; else dc:rights = xpath:\"normalize-space(.)\";\r\n"); // retrieve from dc:rights, better retrieve from vocabulary
|
622
|
scriptBuilder.append("oaf:accessrights = Convert(xpath:\"//dc:rights\", RightsVocab);\r\n");
|
623
|
scriptBuilder.append("oaf:datasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n"); // retrieve from profile
|
624
|
scriptBuilder.append("static $varDsType = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='DataSourceType']/value\"]);\r\n");
|
625
|
scriptBuilder.append("if xpath:\"$varDsType='Aggregator'\" oaf:hostingDatasourceid = xpath:\"//prov:baseURL\"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n");
|
626
|
scriptBuilder.append("oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value\"]);\r\n");
|
627
|
scriptBuilder.append("dr:CobjMDFormats = xpath:\"//dc:title\";\r\n");
|
628
|
scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language\", LangVocab);\r\n");
|
629
|
scriptBuilder.append("%myTemplate = split(xpath:\"//dc:creator/text()\", \"dc:creator\", \";\");\r\n");
|
630
|
scriptBuilder.append("dr:repositoryName = getValue(PROFILEFIELD, [xpath:\"concat('collection()', '')\", xpath:\"//CONFIGURATION/OFFICIAL_NAME\"]);\r\n");
|
631
|
//scriptBuilder.append("if xpath:\"//dc:title\" dr:aggregatorName = \"abc\"; else dr:repositoryName = \"def\";\r\n");
|
632
|
//scriptBuilder.append("if xpath:\"//dc:language[4]\" dr:CobjTypology = xpath:\"//dc:language[2]\"; else dr:CobjTypology = \"test\";\r\n");
|
633
|
scriptBuilder.append("apply xpath:\"//dc:identifier\" if xpath:\"starts-with(normalize-space(.), 'http')\" dc:identifier = xpath:\"normalize-space(.)\"; else dr:CobjIdentifier = xpath:\"normalize-space(.)\";\r\n");
|
634
|
//scriptBuilder.append("dc:language = Convert(xpath:\"//dc:language[2]\",LangVocab);\r\n");
|
635
|
scriptBuilder.append("$var0 = \"''\";\r\n"); // workaround - placeholder for an empty string
|
636
|
scriptBuilder.append("$varPmc = \"'PMC:123456'\";\r\n");
|
637
|
scriptBuilder.append("oaf:identifier = set($varPmc, @identifierType = \"pmc\";);");
|
638
|
scriptBuilder.append("end\r\n");
|
639
|
|
640
|
return scriptBuilder.toString();
|
641
|
}
|
642
|
|
643
|
|
644
|
private String getObjectRecord(List<String> mdRecords){
|
645
|
StringBuilder builder = new StringBuilder();
|
646
|
builder.append("<objectRecord>");
|
647
|
for (String record: mdRecords) builder.append(record);
|
648
|
builder.append("</objectRecord>");
|
649
|
return builder.toString();
|
650
|
}
|
651
|
|
652
|
private String getMdRecord(String objIdentifier, String recordIdentifier, String metadata){
|
653
|
StringBuilder builder = new StringBuilder();
|
654
|
builder.append("<record xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">");
|
655
|
builder.append("<header>");
|
656
|
builder.append("<dri:objIdentifier>");
|
657
|
builder.append(objIdentifier);
|
658
|
builder.append("</dri:objIdentifier>");
|
659
|
builder.append("<dri:repositoryId>profile-123</dri:repositoryId>");
|
660
|
builder.append("<dri:recordIdentifier>");
|
661
|
builder.append(recordIdentifier);
|
662
|
builder.append("</dri:recordIdentifier>");
|
663
|
builder.append("<dri:dateOfCollection>2009-09-30T13:08:57Z</dri:dateOfCollection>");
|
664
|
builder.append("<dri:mdFormat/>");
|
665
|
builder.append("<dri:mdFormatInterpretation/>");
|
666
|
builder.append("<dri:repositoryId>71f5069a-9ea2-41fa-968a-4f69a5722ad0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>");
|
667
|
builder.append("</header><metadata>");
|
668
|
builder.append(metadata);
|
669
|
builder.append("</metadata>");
|
670
|
builder.append("</record>");
|
671
|
return builder.toString();
|
672
|
}
|
673
|
|
674
|
private String getMdRecordWithProvenance(String objIdentifier, String recordIdentifier, String metadata, String provenance){
|
675
|
StringBuilder builder = new StringBuilder();
|
676
|
builder.append("<record xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">");
|
677
|
builder.append("<header>");
|
678
|
builder.append("<dri:objIdentifier>");
|
679
|
builder.append(objIdentifier);
|
680
|
builder.append("</dri:objIdentifier>");
|
681
|
builder.append("<dri:repositoryId>profile-123</dri:repositoryId>");
|
682
|
builder.append("<dri:recordIdentifier>");
|
683
|
builder.append(recordIdentifier);
|
684
|
builder.append("</dri:recordIdentifier>");
|
685
|
builder.append("<dri:dateOfCollection>2009-09-30T13:08:57Z</dri:dateOfCollection>");
|
686
|
builder.append("<dri:mdFormat/>");
|
687
|
builder.append("<dri:mdFormatInterpretation/>");
|
688
|
builder.append("<dri:repositoryId>71f5069a-9ea2-41fa-968a-4f69a5722ad0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>");
|
689
|
builder.append("</header><metadata>");
|
690
|
builder.append(metadata);
|
691
|
builder.append("</metadata>");
|
692
|
builder.append("<about>");
|
693
|
builder.append(provenance);
|
694
|
builder.append("</about>");
|
695
|
builder.append("</record>");
|
696
|
return builder.toString();
|
697
|
}
|
698
|
|
699
|
private String getDeletedMdRecord(String objIdentifier, String recordIdentifier){
|
700
|
StringBuilder builder = new StringBuilder();
|
701
|
builder.append("<record xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\">");
|
702
|
builder.append("<header status=\"deleted\">");
|
703
|
builder.append("<dri:objIdentifier>");
|
704
|
builder.append(objIdentifier);
|
705
|
builder.append("</dri:objIdentifier>");
|
706
|
builder.append("<dri:repositoryId>profile-123</dri:repositoryId>");
|
707
|
builder.append("<dri:recordIdentifier>");
|
708
|
builder.append(recordIdentifier);
|
709
|
builder.append("</dri:recordIdentifier>");
|
710
|
builder.append("<dri:dateOfCollection>2009-09-30T13:08:57Z</dri:dateOfCollection>");
|
711
|
builder.append("<dri:mdFormat/>");
|
712
|
builder.append("<dri:mdFormatInterpretation/>");
|
713
|
builder.append("<dri:repositoryId>71f5069a-9ea2-41fa-968a-4f69a5722ad0_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>");
|
714
|
builder.append("</header>");
|
715
|
builder.append("</record>");
|
716
|
return builder.toString();
|
717
|
}
|
718
|
|
719
|
private String getEmptyMetadataMdRecord(String objIdentifier, String recordIdentifier){
|
720
|
// this is an exception case
|
721
|
StringBuilder builder = new StringBuilder();
|
722
|
builder.append("<record>");
|
723
|
builder.append("<header xmlns:oai=\"http://www.openarchives.org/OAI/2.0/\" xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">");
|
724
|
builder.append("<dri:objIdentifier>");
|
725
|
builder.append(objIdentifier);
|
726
|
builder.append("</dri:objIdentifier>");
|
727
|
builder.append("<dri:recordIdentifier>");
|
728
|
builder.append(recordIdentifier);
|
729
|
builder.append("</dri:recordIdentifier>");
|
730
|
builder.append("<dri:dateOfCollection>2011-03-29T08:41:48Z</dri:dateOfCollection>");
|
731
|
builder.append("<dri:repositoryId>profile-123</dri:repositoryId>");
|
732
|
builder.append("<identifier>oai:openaire.cern.ch:8</identifier>");
|
733
|
builder.append("<datestamp>2010-12-11T19:14:26Z</datestamp>");
|
734
|
builder.append("<setSpec>EC_fundedresources</setSpec>");
|
735
|
builder.append("</header>");
|
736
|
builder.append("<metadata>");
|
737
|
builder.append("</metadata>");
|
738
|
builder.append("</record>");
|
739
|
return builder.toString();
|
740
|
}
|
741
|
|
742
|
private String getDC(){
|
743
|
StringBuilder builder = new StringBuilder();
|
744
|
builder.append("<oai_dc:dc xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd\">");
|
745
|
builder.append("<dc:title xml:lang=\"en\">SomeTitle</dc:title>");
|
746
|
builder.append("<dc:title xml:lang=\"de\">Irgendein Titel</dc:title>");
|
747
|
//builder.append("<title>SomeTitle</title>");
|
748
|
builder.append("<dc:language>firstLang</dc:language>");
|
749
|
builder.append("<dc:language> \r\n" + "middleLang </dc:language>");
|
750
|
builder.append("<dc:language>lastLang</dc:language>");
|
751
|
builder.append("<dc:creator>Any Author</dc:creator>");
|
752
|
builder.append("<dc:contributor>First Contributor</dc:contributor>");
|
753
|
builder.append("<dc:contributor>Second Contributor</dc:contributor>");
|
754
|
builder.append("<dc:format>9</dc:format>");
|
755
|
builder.append("<dc:format>application/pdf</dc:format>");
|
756
|
builder.append("<dc:format>digital</dc:format>");
|
757
|
builder.append("<dc:format>dc</dc:format>");
|
758
|
builder.append("<dc:identifier> http://somehost </dc:identifier>");
|
759
|
builder.append("<dc:identifier>urn:nbn:123-456</dc:identifier>");
|
760
|
builder.append("<dc:source>4-9c7cf682-849b-48bd-92cf-e65367f38e14_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==</dc:source>");
|
761
|
builder.append("<dc:type>someType</dc:type>");
|
762
|
builder.append("<dc:type>info:eu-repo/semantics/article</dc:type>");
|
763
|
builder.append("</oai_dc:dc>");
|
764
|
return builder.toString();
|
765
|
}
|
766
|
|
767
|
private String getDC2(){
|
768
|
StringBuilder builder = new StringBuilder();
|
769
|
builder.append("<oai_dc:dc xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd\">");
|
770
|
builder.append("<dc:title>SomeTitle</dc:title>");
|
771
|
//builder.append("<title>SomeTitle</title>");
|
772
|
builder.append("<dc:language>firstLang</dc:language>");
|
773
|
builder.append("<dc:language>middleLang</dc:language>");
|
774
|
builder.append("<dc:language>lastLang</dc:language>");
|
775
|
builder.append("<dc:creator>Any Author</dc:creator>");
|
776
|
builder.append("<dc:contributor>First Contributor</dc:contributor>");
|
777
|
builder.append("<dc:contributor>Second Contributor</dc:contributor>");
|
778
|
builder.append("<dc:identifier> http://somehost </dc:identifier>");
|
779
|
builder.append("<dc:identifier>urn:nbn:123-456</dc:identifier>");
|
780
|
builder.append("<dc:source>4-9c7cf682-849b-48bd-92cf-e65367f38e14_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==</dc:source>");
|
781
|
builder.append("<dc:type>someType</dc:type>");
|
782
|
builder.append("<dc:type>info:eu-repo/semantics/article</dc:type>");
|
783
|
builder.append("</oai_dc:dc>");
|
784
|
return builder.toString();
|
785
|
}
|
786
|
|
787
|
private String getDidl(){
|
788
|
StringBuilder builder = new StringBuilder();
|
789
|
builder.append("<didl:DIDL xmlns:didl=\"urn:mpeg:mpeg21:2002:02-DIDL-NS\" xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns:diext=\"http://library.lanl.gov/2004-04/STB-RL/DIEXT\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:dii=\"urn:mpeg:mpeg21:2002:01-DII-NS\" xmlns:dip=\"urn:mpeg:mpeg21:2002:01-DIP-NS\" DIDLDocumentId=\"DIDL:URN:NBN:NL:UI:10-1874-1678\" xsi:schemaLocation=\" urn:mpeg:mpeg21:2002:02-DIDL-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/did/didl.xsd urn:mpeg:mpeg21:2002:01-DII-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/dii/dii.xsd urn:mpeg:mpeg21:2005:01-DIP-NS http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-21_schema_files/dip/dip.xsd http://library.lanl.gov/2004-04/STB-RL/DIEXT http://purl.lanl.gov/STB-RL/schemas/2004-04/DIEXT.xsd\">");
|
790
|
builder.append("<didl:Item>");
|
791
|
builder.append("<didl:Descriptor>");
|
792
|
builder.append("<didl:Statement mimeType=\"application/xml\">");
|
793
|
builder.append("<dip:ObjectType>info:eu-repo/semantics/humanStartPage</dip:ObjectType>");
|
794
|
builder.append("</didl:Statement></didl:Descriptor><didl:Component>");
|
795
|
builder.append("<didl:Resource ref=\"http://igitur-archive.library.uu.nl/bio/2001-0803-123812/UUindex.html\" mimeType=\"text/html\"/>");
|
796
|
builder.append("</didl:Component></didl:Item>");
|
797
|
builder.append("</didl:DIDL>");
|
798
|
|
799
|
return builder.toString();
|
800
|
}
|
801
|
|
802
|
private String getWOS(){
|
803
|
StringBuilder builder = new StringBuilder();
|
804
|
builder.append("<oai:record xmlns:oai=\"http://www.openarchives.org/OAI/2.0/\"" +
|
805
|
" xmlns:dnet=\"eu.dnetlib.data.transform.xml.DataciteToHbaseXsltFunctions\"" +
|
806
|
" xmlns:oaf=\"http://namespace.openaire.eu/oaf\" xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\">" +
|
807
|
" <header>" +
|
808
|
" <dri:objIdentifier>::00a3e38eff10c4f2f35ffde55ee22e63</dri:objIdentifier>" +
|
809
|
" <dri:recordIdentifier>WOS:000298601300043</dri:recordIdentifier>" +
|
810
|
" <dri:dateOfCollection>2013-10-29T10:25:51+01:00</dri:dateOfCollection>" +
|
811
|
" <dri:repositoryId/>" +
|
812
|
" <oaf:datasourceprefix/>" +
|
813
|
" </header>" +
|
814
|
" <metadata>" +
|
815
|
" <csvRecord>" +
|
816
|
" <row name=\"OA\">true</row>" +
|
817
|
" <row name=\"PT\">J</row>" +
|
818
|
" <row name=\"AU\">Punta, M; Coggill, PC; Eberhardt, RY; Mistry, J; Tate, J; Boursnell, C; Pang, N;Forslund, K; Ceric, G; Clements, J; Heger, A; Holm, L; Sonnhammer, ELL; Eddy, SR; Bateman, A; Finn, RD</row>" +
|
819
|
" <row name=\"AF\">Punta, Marco; Coggill, Penny C.; Eberhardt, Ruth Y.; Mistry, Jaina; Tate, John; Boursnell, Chris; Pang, Ningze; Forslund, Kristoffer; Ceric, Goran; Clements, Jody; Heger, Andreas; Holm, Liisa; Sonnhammer, Erik L. L.; Eddy, Sean R.; Bateman, Alex; Finn, Robert D.</row>" +
|
820
|
" <row name=\"TI\">The Pfam protein families database</row>" +
|
821
|
" <row name=\"SO\">NUCLEIC ACIDS RESEARCH</row>" +
|
822
|
" <row name=\"LA\">English</row>" +
|
823
|
" <row name=\"DT\">Article</row>" +
|
824
|
" <row name=\"ID\">CRYSTAL-STRUCTURE; DOMAIN; IDENTIFICATION; ANNOTATION; HOMOLOGY; CAPSULE; REVEALS; SEARCH</row>" +
|
825
|
" <row name=\"AB\">Pfam is a widely used database of protein families, currently containing more than 13 000 manually curated protein families as of release 26.0. Pfam is available via servers in the UK (http://pfam.sanger.ac.uk/), the USA (http://pfam.janelia.org/) and Sweden" +
|
826
|
" (http://pfam.sbc.su.se/). Here, we report on changes that have occurred since our 2010 NAR paper (release 24.0). Over the last 2 years, we have generated 1840 new families and" +
|
827
|
" increased coverage of the UniProt Knowledgebase (UniProtKB) to nearly 80%. Notably, we have" +
|
828
|
" taken the step of opening up the annotation of our families to the Wikipedia community, by" +
|
829
|
" linking Pfam families to relevant Wikipedia pages and encouraging the Pfam and Wikipedia" +
|
830
|
" communities to improve and expand those pages. We continue to improve the Pfam website and" +
|
831
|
" add new visualizations, such as the 'sunburst' representation of taxonomic distribution of" +
|
832
|
" families. In this work we additionally address two topics that will be of particular" +
|
833
|
" interest to the Pfam community. First, we explain the definition and use of family-specific," +
|
834
|
" manually curated gathering thresholds. Second, we discuss some of the features of domains of" +
|
835
|
" unknown function (also known as DUFs), which constitute a rapidly growing class of families" +
|
836
|
" within Pfam.</row>" +
|
837
|
" <row name=\"C1\">[Punta, Marco; Coggill, Penny C.; Eberhardt, Ruth Y.; Mistry, Jaina; Tate, John;" +
|
838
|
" Boursnell, Chris; Pang, Ningze; Bateman, Alex] Wellcome Trust Sanger Inst, Hinxton CB10 1SA," +
|
839
|
" England; [Forslund, Kristoffer; Sonnhammer, Erik L. L.] Stockholm Univ, Dept Biochem &" +
|
840
|
" Biophys, Sci Life Lab, Swedish eSci Res Ctr,Stockholm Bioinformat Ctr, SE-17121 Solna," +
|
841
|
" Sweden; [Ceric, Goran; Clements, Jody; Eddy, Sean R.; Finn, Robert D.] HHMI Janelia Farm Res" +
|
842
|
" Campus, Ashburn, VA 20147 USA; [Heger, Andreas] Univ Oxford, MRC Funct Genom Unit, Dept" +
|
843
|
" Physiol Anat & Genet, Oxford OX1 3QX, England; [Holm, Liisa] Univ Helsinki, Inst" +
|
844
|
" Biotechnol, Helsinki 00014, Finland; [Holm, Liisa] Univ Helsinki, Dept Biol & Environm" +
|
845
|
" Sci, FIN-00014 Helsinki, Finland</row>" +
|
846
|
" <row name=\"RP\">Punta, M (reprint author), Wellcome Trust Sanger Inst, Wellcome Trust Genome" +
|
847
|
" Campus, Hinxton CB10 1SA, England.</row>" +
|
848
|
" <row name=\"EM\">mp13@sanger.ac.uk</row>" +
|
849
|
" <row name=\"FU\">Wellcome Trust [WT077044/Z/05/Z]; BBSRC [BB/F010435/1]; Howard Hughes Medical" +
|
850
|
" Institute; Stockholm University; Royal Institute of Technology; Swedish Natural Sciences" +
|
851
|
" Research Council</row>" +
|
852
|
" <row name=\"FX\">Wellcome Trust (grant numbers WT077044/Z/05/Z); BBSRC Bioinformatics and" +
|
853
|
" Biological Resources Fund (grant numbers BB/F010435/1); Howard Hughes Medical Institute (to" +
|
854
|
" G. C., J.C., S. R. E and R. D. F.); Stockholm University, Royal Institute of Technology and" +
|
855
|
" the Swedish Natural Sciences Research Council (to K. F. and E. L. L. S.) and Systems, Web" +
|
856
|
" and Database administration teams at Wellcome Trust Sanger Institute (WTSI) (infrastructure" +
|
857
|
" support). Funding for open access charge: Wellcome Trust (grant numbers WT077044/Z/05/Z);" +
|
858
|
" BBSRC Bioinformatics and Biological Resources Fund (grant numbers BB/F010435/1).</row>" +
|
859
|
" <row name=\"NR\">29</row>" +
|
860
|
" <row name=\"TC\">92</row>" +
|
861
|
" <row name=\"Z9\">94</row>" +
|
862
|
" <row name=\"PU\">OXFORD UNIV PRESS</row>" +
|
863
|
" <row name=\"PI\">OXFORD</row> " +
|
864
|
" <row name=\"PA\">GREAT CLARENDON ST, OXFORD OX2 6DP, ENGLAND</row>" +
|
865
|
" <row name=\"SN\">0305-1048</row>" +
|
866
|
" <row name=\"J9\">NUCLEIC ACIDS RES</row>" +
|
867
|
" <row name=\"JI\">Nucleic Acids Res.</row>" +
|
868
|
" <row name=\"PD\">JAN</row>" +
|
869
|
" <row name=\"PY\">2012</row>" +
|
870
|
" <row name=\"VL\">40</row>" +
|
871
|
" <row name=\"IS\">D1</row>" +
|
872
|
" <row name=\"BP\">D290</row>" +
|
873
|
" <row name=\"EP\">D301</row>" +
|
874
|
" <row name=\"DI\">10.1093/nar/gkr1065</row> " +
|
875
|
"<row name=\"PG\">12</row>" +
|
876
|
" <row name=\"WC\">Biochemistry & Molecular Biology</row>" +
|
877
|
" <row name=\"SC\">Biochemistry & Molecular Biology</row>" +
|
878
|
" <row name=\"GA\">869MD</row>" +
|
879
|
" <row isID=\"true\" name=\"UT\">WOS:000298601300043</row>" +
|
880
|
" </csvRecord>" +
|
881
|
" </metadata>" +
|
882
|
"</oai:record>");
|
883
|
return builder.toString();
|
884
|
}
|
885
|
|
886
|
private String getOAFDC(){
|
887
|
StringBuilder builder = new StringBuilder();
|
888
|
builder.append(
|
889
|
"<oai_dc:dc xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd\">" +
|
890
|
"<dc:title>Grass roots lobbying: marketing politics and policy 'beyond the Beltway'</dc:title>" +
|
891
|
"<dc:creator>McGrath, Conor; Xyz, Opq</dc:creator>" +
|
892
|
"<dc:creator>Abc, Def</dc:creator>" +
|
893
|
"<dc:creator>Muñoz-Castellanos, L</dc:creator>" +
|
894
|
"<dc:subject>JA Political science (General)</dc:subject>" +
|
895
|
"<dc:description/>" +
|
896
|
"<dc:publisher/>" +
|
897
|
"<dc:date>2011</dc:date>" +
|
898
|
"<dc:date>info:eu-repo/date/embargoEnd/2011-05-12</dc:date>" +
|
899
|
"<dc:date>2004-03-15</dc:date>" +
|
900
|
"<dc:date>2009-02-24T13:27:42Z</dc:date>" +
|
901
|
"<dc:date>2009-02</dc:date>" +
|
902
|
"<dc:date>2009</dc:date>" +
|
903
|
"<dc:type>Conference or Workshop Item</dc:type>" +
|
904
|
"<dc:type>NonPeerReviewed</dc:type>" +
|
905
|
"<dc:type>info:eu-repo/semantics/article</dc:type>" +
|
906
|
"<dc:identifier>http://somehost</dc:identifier>" +
|
907
|
"<dc:format>application/pdf</dc:format>" +
|
908
|
"<dc:relation>info:eu-repo/grantAgreement/EC/FP7/241479</dc:relation>" +
|
909
|
"<dc:relation>http://sherpa.bl.uk/1/01/PMMcgrath.pdf</dc:relation>" +
|
910
|
"<dc:relation>info:eu-repo/grantAgreement/EC/FP7/246682/EU/Towards a 10-Year Vision for Global Research Data Infrastructures/GRDI2020</dc:relation>" +
|
911
|
"<dc:relation>info:eu-repo/grantAgreement/EC/FP7/PITN-GA-2009-237252</dc:relation>" +
|
912
|
"<dc:relation>info:eu-repo/grantAgreement/EC/FP7/PITN-GA-2009-235114</dc:relation>" +
|
913
|
"<dc:relation>info:eu-repo/grantAgreement/EC/FP7/237252</dc:relation>" +
|
914
|
"<dc:identifier>http://dx.doi.org/10.1103/PhysRevLett.104.126402</dc:identifier>" +
|
915
|
"<dc:rights>Tots els drets reservats</dc:rights>" +
|
916
|
"<dc:rights>Used by permission of the publisher</dc:rights>" +
|
917
|
"<dc:rights>info:eu-repo/semantics/openAccess </dc:rights>" +
|
918
|
"</oai_dc:dc>");
|
919
|
return builder.toString();
|
920
|
}
|
921
|
|
922
|
private String getProvenance(){
|
923
|
StringBuilder builder = new StringBuilder();
|
924
|
builder.append(
|
925
|
"<provenance xmlns=\"http://www.openarchives.org/OAI/2.0/provenance\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd\">" +
|
926
|
"<originDescription altered=\"true\" harvestDate=\"2012-01-20T00:04:10Z\">" +
|
927
|
"<baseURL>http://dspace.library.uu.nl:8080/dspace-oai/request</baseURL>" +
|
928
|
"<identifier>oai:dspace.library.uu.nl:1874/218065</identifier>" +
|
929
|
"<datestamp>2012-01-19T12:38:56Z</datestamp>" +
|
930
|
"<metadataNamespace>http://www.loc.gov/mods/v3</metadataNamespace>" +
|
931
|
"</originDescription>" +
|
932
|
"</provenance>");
|
933
|
return builder.toString();
|
934
|
|
935
|
}
|
936
|
|
937
|
}
|