Project

General

Profile

« Previous | Next » 

Revision 48842

Tests moved from msro to dnet-parthenos, where they should have belonged in the first place

View differences:

modules/dnet-msro-service/branches/saxonHE/src/test/java/eu/dnetlib/x3m/ApplyX3MappingTest.java
1
package eu.dnetlib.x3m;
2

  
3
import java.io.IOException;
4

  
5
import eu.dnetlib.msro.workflows.nodes.transform.ApplyX3Mapping;
6
import org.apache.commons.io.IOUtils;
7
import org.junit.Ignore;
8
import org.junit.Test;
9
import org.springframework.core.io.ClassPathResource;
10

  
11
/**
12
 * Created by alessia on 13/03/17.
13
 */
14
public class ApplyX3MappingTest {
15

  
16
	//generic mappings
17
	final String mappingPath = "/eu/dnetlib/x3m/mappings.x3ml";
18
	final String policyPath = "/eu/dnetlib/x3m/maria-policy.xml";
19
	final String forthMappingPath = "/eu/dnetlib/x3m/mappingsWithoutGenerator.x3ml";
20
	//generic files
21
	final String forthInputPath ="/eu/dnetlib/x3m/input.xml";
22

  
23
	//Parthenos mappings
24
	final String parthenosPolicyPath = "/eu/dnetlib/x3m/parthenos_policy.xml";
25
	final String mappingAriadnePath = "/eu/dnetlib/x3m/ariadne_dataset_mapping377.x3ml";
26
	final String mappingEhriPath = "/eu/dnetlib/x3m/ehri_mapping.x3ml";
27
	final String mappingCulturaItaliaPath = "/eu/dnetlib/x3m/cultura_italia_musei.x3ml";
28

  
29
	//Parthenos records
30
	final String ariadnePath = "/eu/dnetlib/x3m/new-10304741.xml";
31
	final String ehriRecord = "/eu/dnetlib/x3m/ehri_test.xml";
32
	final String ehriRecord2 = "/eu/dnetlib/x3m/EHRI_sample_record_328.xml";
33
	final String[] ariadneFiles =
34
			new String[] { ariadnePath, "/eu/dnetlib/x3m/new-10304737.xml", "/eu/dnetlib/x3m/new-10304738.xml", "/eu/dnetlib/x3m/new-10304739.xml",
35
					"/eu/dnetlib/x3m/new-10304740.xml", "/eu/dnetlib/x3m/new-10304741.xml", "/eu/dnetlib/x3m/new-10304742.xml" };
36
	final String[] clarinFiles =
37
			new String[] { "/eu/dnetlib/x3m/clarin_dataset.xml", "/eu/dnetlib/x3m/clarin_dataset2.xml",
38
					"/eu/dnetlib/x3m/clarin_service.xml", "/eu/dnetlib/x3m/clarin_service2.xml" };
39

  
40
	final String culturaItaliaPath = "/eu/dnetlib/x3m/culturaitalia_record.xml";
41

  
42
	@Test
43
	public void testForth() throws Exception{
44
		String m = getString(forthMappingPath);
45
		String r = getString(forthInputPath);
46

  
47
		ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{m}, null, true);
48
		String res = x3m.apply(r);
49
		System.out.println(res);
50
	}
51

  
52
	@Test
53
	public void test() throws IOException {
54
		doBasicTest(this.mappingPath, this.ariadnePath);
55
	}
56

  
57
	@Test
58
	public void testAriadne() throws IOException {
59
		doBasicTest(this.mappingAriadnePath, this.ariadnePath);
60
	}
61

  
62
	@Test
63
	public void testAriadneAll() throws IOException {
64
		String m = getString(mappingAriadnePath);
65
		String g = getString(parthenosPolicyPath);
66

  
67
		ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{m}, g, false);
68
		for(String path : ariadneFiles){
69
			String res = x3m.apply(getString(path));
70
			System.out.println(res);
71
		}
72
	}
73

  
74
	@Test
75
	public void testEhri() throws IOException {
76
		doBasicTest(this.mappingEhriPath, this.ehriRecord);
77
	}
78
	@Test
79
	public void testEhri2(){
80
		doBasicTest(this.mappingEhriPath, this.ehriRecord2);
81
	}
82

  
83
	@Ignore
84
	@Test
85
	public void testCulturaItalia() throws IOException {
86
		doBasicTest(this.mappingCulturaItaliaPath, this.culturaItaliaPath);
87
	}
88

  
89

  
90
	@Test
91
	public void testClarinGysseling365() throws IOException {
92
		doBasicTest("/eu/dnetlib/x3m/clarin_mapping_365.x3ml", "/eu/dnetlib/x3m/clarin_gysseling_corpus.xml");
93
	}
94

  
95
	@Ignore
96
	@Test
97
	public void testAllClarin(){
98
		String mapService = getString("/eu/dnetlib/x3m/clarin_service_mapping.x3ml");
99
		String mapData = getString("/eu/dnetlib/x3m/clarin_dataset_mapping.x3ml");
100
		String policy = getString(parthenosPolicyPath);
101
		ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{mapData,mapService}, policy, true);
102
		for(String path : clarinFiles){
103
			String res = x3m.apply(getString(path));
104
			System.out.println(res);
105
		}
106
	}
107

  
108

  
109
	public void doBasicTest(String mappingPath, String recordPath){
110
		String m = getString(mappingPath);
111
		String g = getString(parthenosPolicyPath);
112
		String r = getString(recordPath);
113
		ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{m}, g, true);
114
		String res = x3m.apply(r);
115
		System.out.println(res);
116

  
117
	}
118

  
119
	private String getString(final String classpath) {
120
		try {
121
			final ClassPathResource resource = new ClassPathResource(classpath);
122
			return IOUtils.toString(resource.getInputStream(), "UTF-8");
123
		}catch(IOException e){
124
			return null;
125
		}
126
	}
127
}
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/culturaItalia_record.xml
1
<pico:record xmlns:pico="http://purl.org/pico/1.0/"
2
             xmlns=""
3
             xmlns:premis="info:lc/xmlns/premis-v2"
4
             xmlns:mets="http://www.loc.gov/METS/"
5
             xmlns:vra="http://www.vraweb.org/vracore4.htm"
6
             xmlns:f="http://purl.org/pico/iccd/2.00/f/"
7
             xmlns:nu="http://purl.org/pico/iccd/3.00/nu/"
8
             xmlns:bdm="http://purl.org/pico/iccd/2.00/bdm/"
9
             xmlns:iccd="http://purl.org/pico/iccd/2.00/"
10
             xmlns:iccd3="http://purl.org/pico/iccd/3.00/"
11
             xmlns:smi="http://purl.org/pico/iccd/2.00/s-mi/"
12
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
13
             xmlns:oad="http://purl.org/pico/iccd/2.00/oa-d-n/"
14
             xmlns:dcterms="http://purl.org/dc/terms/"
15
             xmlns:mix="http://www.loc.gov/mix/v20"
16
             xmlns:dc="http://purl.org/dc/elements/1.1/"
17
             xmlns:xlink="http://www.w3.org/1999/xlink"
18
             xsi:schemaLocation="http://purl.org/pico/1.0/               http://www.culturaitalia.it/pico/schemas/1.0/pico.xsd                     http://purl.org/pico/iccd/2.00/         http://www.culturaitalia.it/pico/schemas/iccd/2.00/iccd.xsd                     http://purl.org/pico/iccd/2.00/oa-d-n/  http://www.culturaitalia.it/pico/schemas/iccd/2.00/oa-d-n.xsd                     http://purl.org/pico/iccd/2.00/s-mi/    http://www.culturaitalia.it/pico/schemas/iccd/2.00/s-mi.xsd                     http://purl.org/pico/iccd/2.00/bdm/     http://www.culturaitalia.it/pico/schemas/iccd/2.00/bdm.xsd                     http://purl.org/pico/iccd/2.00/f/       http://www.culturaitalia.it/pico/schemas/iccd/2.00/f.xsd                     http://purl.org/pico/iccd/3.00/         http://www.culturaitalia.it/pico/schemas/iccd/3.00/iccd.xsd                     http://purl.org/pico/iccd/3.00/nu/      http://www.culturaitalia.it/pico/schemas/iccd/3.00/nu.xsd">
19
	<dc:identifier>coll_91</dc:identifier>
20
	<dc:title>Collezione di telecomunicazioni del Museo Nazionale della Scienza e della Tecnologia "Leonardo da Vinci"</dc:title>
21
	<dc:description xml:lang="it">La raccolta si compone di oltre 1300 beni, dal 1850 ad oggi, ed è costituita da strumenti ed apparati di tipo storico e didattico per la comunicazione a distanza, provenienti da enti pubblici, università, aziende e privati cittadini. Fanno parte di questa collezione cimeli rari come quelli marconiani e importanti riproduzioni storiche (come i telefoni di Meucci, il pantelegrafo di Caselli), impianti o parti di apparati pubblici e privati, dispositivi utilizzati in laboratori specialistici e prodotti industriali di largo consumo.</dc:description>
22
	<dc:description xml:lang="it">Nell'ambito del Sistema Informativo Regionale Beni Culturali sono state realizzate 148 schede di catalogo.</dc:description>
23
	<dc:description xml:lang="it">La raccolta di telecomunicazioni nei primi anni di vita del Museo, 1953-1958, contava già più di cento importanti beni provenienti da istituti pubblici e privati e da privati cittadini (CNR, l'allora Ministero delle Poste e Telecomunicazioni, la Società Radiomattima, la Compagnia Marconi, la Magneti Marelli, o il cavalier Donner Flori). Nel 1971, in occasione dell'inaugurazione di una nuova sala dedicata alle telecomunicazioni, la raccolta si arricchì di beni aventi una più ampia apertura verso le tecnologie della rete, grazie anche alle collaborazioni con società come la SIT-Siemens, la SIP, la Sirti, la Telettra. La raccolta è costantemente incrementata e aggiornata sulle nuove tecnologie anche in relazione alle nuove sezione espositive di Telegrafo e telefono, Radio (2005) e Televisione (2008).</dc:description>
24
	<dcterms:spatial>Museo Nazionale della Scienza e della Tecnologia "Leonardo da Vinci", Via San Vittore, 21 - Milano (MI), Italia - proprietà privata</dcterms:spatial>
25
	<dcterms:spatial xsi:type="pico:ISTAT">name=Milano; year=2001; code=015146</dcterms:spatial>
26
	<dcterms:created>1953 post; XX/ XXI</dcterms:created>
27
	<dcterms:created xsi:type="dcterms:Period">start=1953; end=1953</dcterms:created>
28
	<dc:type xsi:type="mdi:Type">Collezioni</dc:type>
29
	<dc:type xsi:type="dcterms:DCMIType">Collection</dc:type>
30
	<dcterms:isPartOf xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-mus_4467</dcterms:isPartOf>
31
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7244</dcterms:hasPart>
32
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7245</dcterms:hasPart>
33
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7246</dcterms:hasPart>
34
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7247</dcterms:hasPart>
35
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7248</dcterms:hasPart>
36
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7249</dcterms:hasPart>
37
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7250</dcterms:hasPart>
38
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7251</dcterms:hasPart>
39
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7252</dcterms:hasPart>
40
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7253</dcterms:hasPart>
41
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7254</dcterms:hasPart>
42
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7255</dcterms:hasPart>
43
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7256</dcterms:hasPart>
44
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7257</dcterms:hasPart>
45
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7258</dcterms:hasPart>
46
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7259</dcterms:hasPart>
47
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7260</dcterms:hasPart>
48
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7261</dcterms:hasPart>
49
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7262</dcterms:hasPart>
50
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7263</dcterms:hasPart>
51
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7264</dcterms:hasPart>
52
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7265</dcterms:hasPart>
53
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7266</dcterms:hasPart>
54
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7267</dcterms:hasPart>
55
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7268</dcterms:hasPart>
56
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7269</dcterms:hasPart>
57
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7270</dcterms:hasPart>
58
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7271</dcterms:hasPart>
59
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7272</dcterms:hasPart>
60
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7273</dcterms:hasPart>
61
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7274</dcterms:hasPart>
62
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7275</dcterms:hasPart>
63
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7276</dcterms:hasPart>
64
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7277</dcterms:hasPart>
65
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7278</dcterms:hasPart>
66
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7279</dcterms:hasPart>
67
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7280</dcterms:hasPart>
68
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7281</dcterms:hasPart>
69
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7282</dcterms:hasPart>
70
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7283</dcterms:hasPart>
71
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7284</dcterms:hasPart>
72
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7285</dcterms:hasPart>
73
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7286</dcterms:hasPart>
74
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7287</dcterms:hasPart>
75
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7288</dcterms:hasPart>
76
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7289</dcterms:hasPart>
77
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7290</dcterms:hasPart>
78
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7291</dcterms:hasPart>
79
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7292</dcterms:hasPart>
80
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7293</dcterms:hasPart>
81
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7294</dcterms:hasPart>
82
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7295</dcterms:hasPart>
83
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7296</dcterms:hasPart>
84
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7297</dcterms:hasPart>
85
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7298</dcterms:hasPart>
86
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7299</dcterms:hasPart>
87
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7300</dcterms:hasPart>
88
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7301</dcterms:hasPart>
89
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7302</dcterms:hasPart>
90
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7303</dcterms:hasPart>
91
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7304</dcterms:hasPart>
92
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7305</dcterms:hasPart>
93
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7306</dcterms:hasPart>
94
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7307</dcterms:hasPart>
95
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7308</dcterms:hasPart>
96
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7309</dcterms:hasPart>
97
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7310</dcterms:hasPart>
98
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7311</dcterms:hasPart>
99
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7312</dcterms:hasPart>
100
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7313</dcterms:hasPart>
101
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7314</dcterms:hasPart>
102
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7315</dcterms:hasPart>
103
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7316</dcterms:hasPart>
104
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7317</dcterms:hasPart>
105
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7318</dcterms:hasPart>
106
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7319</dcterms:hasPart>
107
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7320</dcterms:hasPart>
108
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7321</dcterms:hasPart>
109
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7322</dcterms:hasPart>
110
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7323</dcterms:hasPart>
111
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7324</dcterms:hasPart>
112
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7325</dcterms:hasPart>
113
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7326</dcterms:hasPart>
114
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7327</dcterms:hasPart>
115
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7328</dcterms:hasPart>
116
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7329</dcterms:hasPart>
117
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7330</dcterms:hasPart>
118
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7331</dcterms:hasPart>
119
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7332</dcterms:hasPart>
120
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7333</dcterms:hasPart>
121
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7334</dcterms:hasPart>
122
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7335</dcterms:hasPart>
123
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7336</dcterms:hasPart>
124
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7337</dcterms:hasPart>
125
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7338</dcterms:hasPart>
126
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7339</dcterms:hasPart>
127
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7340</dcterms:hasPart>
128
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7341</dcterms:hasPart>
129
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7342</dcterms:hasPart>
130
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7343</dcterms:hasPart>
131
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7344</dcterms:hasPart>
132
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7345</dcterms:hasPart>
133
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7346</dcterms:hasPart>
134
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7347</dcterms:hasPart>
135
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7348</dcterms:hasPart>
136
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7349</dcterms:hasPart>
137
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7350</dcterms:hasPart>
138
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7351</dcterms:hasPart>
139
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7352</dcterms:hasPart>
140
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7353</dcterms:hasPart>
141
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7354</dcterms:hasPart>
142
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7355</dcterms:hasPart>
143
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7356</dcterms:hasPart>
144
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7357</dcterms:hasPart>
145
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7358</dcterms:hasPart>
146
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7359</dcterms:hasPart>
147
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7360</dcterms:hasPart>
148
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7361</dcterms:hasPart>
149
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7362</dcterms:hasPart>
150
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7363</dcterms:hasPart>
151
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7364</dcterms:hasPart>
152
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7365</dcterms:hasPart>
153
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7366</dcterms:hasPart>
154
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7367</dcterms:hasPart>
155
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7368</dcterms:hasPart>
156
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7369</dcterms:hasPart>
157
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7370</dcterms:hasPart>
158
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7371</dcterms:hasPart>
159
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7372</dcterms:hasPart>
160
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7373</dcterms:hasPart>
161
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7374</dcterms:hasPart>
162
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7375</dcterms:hasPart>
163
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7376</dcterms:hasPart>
164
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7377</dcterms:hasPart>
165
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7378</dcterms:hasPart>
166
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7379</dcterms:hasPart>
167
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7380</dcterms:hasPart>
168
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7381</dcterms:hasPart>
169
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7382</dcterms:hasPart>
170
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7383</dcterms:hasPart>
171
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7384</dcterms:hasPart>
172
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7385</dcterms:hasPart>
173
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7386</dcterms:hasPart>
174
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7387</dcterms:hasPart>
175
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7388</dcterms:hasPart>
176
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7389</dcterms:hasPart>
177
	<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7390</dcterms:hasPart>
178
	<dcterms:isReferencedBy xml:lang="it">scheda SIRBeC COL: COL-ST010-0000002</dcterms:isReferencedBy>
179
	<pico:preview xsi:type="dcterms:URI">http://194.242.241.163/fedora/objects/coll:91/datastreams/MM258501/content</pico:preview>
180
	<dcterms:isReferencedBy xsi:type="pico:Anchor">title=visualizza il file Mets; URL=fedora/objects/coll:91/datastreams/export/content</dcterms:isReferencedBy>
181
</pico:record>
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/EHRI_sample_record_328.xml
1
<?xml version="1.0" ?>
2
<ead xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd">
3
    <eadheader countryencoding="iso3166-1" dateencoding="iso8601" scriptencoding="iso15924" repositoryencoding="iso15511" relatedencoding="DC">
4
        <eadid>us-005578-irn516886</eadid>
5
        <filedesc>
6
            <titlestmt>
7
                <titleproper>Romana Primus photograph collection</titleproper>
8
            </titlestmt>
9
            <publicationstmt>
10
                <publisher>United States Holocaust Memorial Museum</publisher>
11
                <address>
12
                    <addressline>100 Raoul Wallenberg Place, S.W.</addressline>
13
                    <addressline>DC 20024-2126</addressline>
14
                    <addressline>Washington</addressline>
15
                    <addressline>District of Columbia</addressline>
16
                    <addressline>US</addressline>
17
                    <addressline>202 488 0400</addressline>
18
                    <addressline>202-479-9726</addressline>
19
                    <addressline>http://www.ushmm.org/</addressline>
20
                    <addressline>archives@ushmm.org</addressline>
21
                    <addressline>United States</addressline>
22
                </address>
23
            </publicationstmt>
24
            <notestmt>
25
                <note>
26
                    <p>This encoded description is derived from structured data provided to EHRI by a partner institution but may differ in structure and/or content from its source. The collection holding institution considers this description as an accurate reflection of the archival holdings to which it refers at the moment of data transfer.</p>
27
                </note>
28
            </notestmt>
29
        </filedesc>
30
        <profiledesc>
31
            <creation>This file was exported automatically from the EHRI database administration tool and represents a work-in-progress.
32
                <date normal="20170601">2017-06-01T16:15:08.688+01:00</date>
33
            </creation>
34
            <langusage>
35
                <language langcode="eng">English</language>
36
            </langusage>
37
        </profiledesc>
38
        <revisiondesc>
39
            <change>
40
                <date>2014-12-19T16:12:54.402Z</date>
41
                <item>These files were provided by the United States Holocaust Memorial Museum to EHRI on 2014-11-21.
42

  
43
                    [ingest]</item>
44
            </change>
45
        </revisiondesc>
46
    </eadheader>
47
    <archdesc level="collection">
48
        <did>
49
            <unitid>irn516886</unitid>
50
            <unittitle encodinganalog="3.1.2">Romana Primus photograph collection</unittitle>
51
            <unitdate encodinganalog="3.1.3">1946-1947</unitdate>
52
            <repository>
53
                <corpname>United States Holocaust Memorial Museum</corpname>
54
            </repository>
55
        </did>
56
        <scopecontent encodinganalog="3.3.1">
57
            <p><![CDATA[The collection consists of four photographs of Romana Strochlitz Primus as a baby, her parents, Sigmund and Ruzka (Rose) Grinburg Strochlitz, and other refugees at the Bergen-Belsen displaced persons camp in Germany after World War II.]]></p>
58
        </scopecontent>
59
        <accessrestrict encodinganalog="3.4.1">
60
            <p><![CDATA[No restrictions on access]]></p>
61
        </accessrestrict>
62
        <userestrict encodinganalog="3.4.2">
63
            <p><![CDATA[No restrictions on use]]></p>
64
        </userestrict>
65
        <acqinfo encodinganalog="3.2.4">
66
            <p><![CDATA[Accession number: 1999.18]]></p>
67
        </acqinfo>
68
        <custodhist encodinganalog="3.2.3">
69
            <p><![CDATA[The collection was donated to the United States Holocaust Memorial Museum by Romana Strochlitz Primus in 1999.]]></p>
70
        </custodhist>
71
        <odd encodinganalog="3.6.1">
72
            <p><![CDATA[Record type: Document]]></p>
73
        </odd>
74
        <controlaccess>
75
            <subject>Refugees--Germany--1940-1950.</subject>
76
            <subject>Refugee camps--Germany--1940-1950.</subject>
77
        </controlaccess>
78
        <controlaccess>
79
            <genreform>Photographs.</genreform>
80
        </controlaccess>
81
        <controlaccess>
82
            <persname>Kirszenbaum, Halina Grauman.</persname>
83
            <persname>Brechner, Dosia Grinburg.</persname>
84
            <persname>Weinreich, Hela.</persname>
85
            <persname>Strochlitz, Rose Grinburg.</persname>
86
            <persname>Strochlitz, Sigmund.</persname>
87
            <persname>Primus, Romana Strochlitz.</persname>
88
        </controlaccess>
89
    </archdesc>
90
</ead>
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/ehri_test.xml
1
<ead xmlns="urn:isbn:1-931666-22-9"
2
     xmlns:xlink="http://www.w3.org/1999/xlink"
3
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4
     xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd">
5
	<eadheader xmlns=""
6
	           countryencoding="iso3166-1"
7
	           dateencoding="iso8601"
8
	           relatedencoding="DC"
9
	           repositoryencoding="iso15511"
10
	           scriptencoding="iso15924">
11
		<eadid>us-005578-irn516886</eadid>
12
		<filedesc>
13
			<titlestmt>
14
				<titleproper>Romana Primus photograph collection</titleproper>
15
			</titlestmt>
16
			<publicationstmt>
17
				<publisher>United States Holocaust Memorial Museum</publisher>
18
				<address>
19
					<addressline>100 Raoul Wallenberg Place, S.W.</addressline>
20
					<addressline>DC 20024-2126</addressline>
21
					<addressline>Washington</addressline>
22
					<addressline>District of Columbia</addressline>
23
					<addressline>US</addressline>
24
					<addressline>202 488 0400</addressline>
25
					<addressline>202-479-9726</addressline>
26
					<addressline>http://www.ushmm.org/</addressline>
27
					<addressline>archives@ushmm.org</addressline>
28
					<addressline>United States</addressline>
29
				</address>
30
			</publicationstmt>
31
			<notestmt>
32
				<note>
33
					<p>This encoded description is derived from structured data provided to EHRI by a partner institution but may differ in structure and/or content from its source. The collection holding institution considers this description as an accurate reflection of the archival holdings to which it refers at the moment of data transfer.</p>
34
				</note>
35
			</notestmt>
36
		</filedesc>
37
		<profiledesc>
38
			<creation>This file was exported automatically from the EHRI database administration tool and represents a work-in-progress.
39
				<date normal="20170513">2017-05-13T09:25:31.407+01:00</date>
40
			</creation>
41
			<langusage>
42
				<language langcode="eng">English</language>
43
			</langusage>
44
		</profiledesc>
45
		<revisiondesc>
46
			<change>
47
				<date>2014-12-19T16:12:54.402Z</date>
48
				<item>These files were provided by the United States Holocaust Memorial Museum to EHRI on 2014-11-21.
49

  
50
					[ingest]
51
				</item>
52
			</change>
53
		</revisiondesc>
54
	</eadheader>
55
	<archdesc xmlns="" level="collection">
56
		<did>
57
			<unitid>irn516886</unitid>
58
			<unittitle encodinganalog="3.1.2">Romana Primus photograph collection</unittitle>
59
			<unitdate encodinganalog="3.1.3">1946-1947</unitdate>
60
			<repository>
61
				<corpname>United States Holocaust Memorial Museum</corpname>
62
			</repository>
63
		</did>
64
		<scopecontent encodinganalog="3.3.1">
65
			<p>The collection consists of four photographs of Romana Strochlitz Primus as a baby, her parents, Sigmund and Ruzka (Rose) Grinburg Strochlitz, and other refugees at the Bergen-Belsen displaced persons camp in Germany after World War II.</p>
66
		</scopecontent>
67
		<accessrestrict encodinganalog="3.4.1">
68
			<p>No restrictions on access</p>
69
		</accessrestrict>
70
		<userestrict encodinganalog="3.4.2">
71
			<p>No restrictions on use</p>
72
		</userestrict>
73
		<acqinfo encodinganalog="3.2.4">
74
			<p>Accession number: 1999.18</p>
75
		</acqinfo>
76
		<custodhist encodinganalog="3.2.3">
77
			<p>The collection was donated to the United States Holocaust Memorial Museum by Romana Strochlitz Primus in 1999.</p>
78
		</custodhist>
79
		<odd encodinganalog="3.6.1">
80
			<p>Record type: Document</p>
81
		</odd>
82
		<controlaccess>
83
			<subject>Refugees--Germany--1940-1950.</subject>
84
			<subject>Refugee camps--Germany--1940-1950.</subject>
85
		</controlaccess>
86
		<controlaccess>
87
			<persname>Kirszenbaum, Halina Grauman.</persname>
88
			<persname>Brechner, Dosia Grinburg.</persname>
89
			<persname>Weinreich, Hela.</persname>
90
			<persname>Strochlitz, Rose Grinburg.</persname>
91
			<persname>Strochlitz, Sigmund.</persname>
92
			<persname>Primus, Romana Strochlitz.</persname>
93
		</controlaccess>
94
		<controlaccess>
95
			<genreform>Photographs.</genreform>
96
		</controlaccess>
97
	</archdesc>
98
</ead>
99

  
100

  
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_dataset2.xml
1
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1407745711925/xsd">
2
	<cmd:Header>
3
		<cmd:MdCreator>Gunn Inger Lyse Samdal</cmd:MdCreator>
4
		<cmd:MdCreationDate>2015-10-12</cmd:MdCreationDate>
5
		<cmd:MdSelfLink>http://hdl.handle.net/11509/80</cmd:MdSelfLink>
6
		<cmd:MdProfile>clarin.eu:cr1:p_1407745711925</cmd:MdProfile>
7
		<cmd:MdCollectionDisplayName>Clarino UiB</cmd:MdCollectionDisplayName>
8
	</cmd:Header>
9
	<cmd:Resources>
10
		<cmd:ResourceProxyList>
11
			<cmd:ResourceProxy id="landing-page-ubb">
12
				<cmd:ResourceType mimetype="">LandingPage</cmd:ResourceType>
13
				<cmd:ResourceRef>http://hdl.handle.net/11509/80</cmd:ResourceRef>
14
			</cmd:ResourceProxy>
15
			<cmd:ResourceProxy id="search-page-corpuscle">
16
				<cmd:ResourceType mimetype="">SearchPage</cmd:ResourceType>
17
				<cmd:ResourceRef>http://clarino.uib.no/korpuskel/landing-page?identifier=forskning-no&amp;view=short</cmd:ResourceRef>
18
			</cmd:ResourceProxy>
19
			<cmd:ResourceProxy id="resource-fn">
20
				<cmd:ResourceType mimetype="">Resource</cmd:ResourceType>
21
				<cmd:ResourceRef>http://hdl.handle.net/11509/80</cmd:ResourceRef>
22
			</cmd:ResourceProxy>
23
		</cmd:ResourceProxyList>
24
		<cmd:JournalFileProxyList/>
25
		<cmd:ResourceRelationList/>
26
	</cmd:Resources>
27
	<cmd:IsPartOfList/>
28
	<cmd:Components>
29
		<cmdp:corpusProfile>
30
			<cmdp:resourceCommonInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485126">
31
				<cmdp:resourceType>corpus</cmdp:resourceType>
32
				<cmdp:identificationInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485125">
33
					<cmdp:resourceName xml:lang="en">Text material from Forskning.no (1998 - 2012)</cmdp:resourceName>
34
					<cmdp:description xml:lang="en">Data set containing texts from the popular science website forskning.no. The text material is constituted by articles published by Forskning.no belonging to the following three categories:
35
						1) Articles written by journalists employed at forskning.no
36
						2) Articles written by member institutions of forskning.no (76 universities, colleges, research centers, research departments in government agencies and more). These articles are written by staff journalists, information officers and other non-academic staff. Each article has been edited by forskning.no.
37
						3) Articles from the newsdesk NRK Viten, with whom forskning.no cooperates. These articles are written by NRK journalists.
38
						Forskning.no kindly makes this material available in CLARINO as downloadable XML to promote language research. CLARINO's agreement also includes the permission to use future articles to be published by forskning.no; as of October 2015, however, the newest downloadable text is from October 2012.
39

  
40
						ACCESS: the material is available in downloadable form at the CLARINO Bergen Centre and in searchable form at Corpuscle (see links in metadata). Corpuscle allows you to pass queries to the corpus, and you may ask for concordances, collocations and distribution.</cmdp:description>
41
					<cmdp:resourceShortName xml:lang="no">Forskning.no</cmdp:resourceShortName>
42
					<cmdp:url description="downloadable - UBB" ref="landing-page-ubb">http://hdl.handle.net/11509/80</cmdp:url>
43
					<cmdp:url description="searchable - Corpuscle" ref="search-page-corpuscle">http://clarino.uib.no/korpuskel/landing-page?identifier=forskning-no&amp;view=short</cmdp:url>
44
					<cmdp:PID description="handle @ UBB" ref="landing-page-ubb">http://hdl.handle.net/11509/80</cmdp:PID>
45
				</cmdp:identificationInfo>
46
				<cmdp:distributionInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485124">
47
					<cmdp:licenceInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485158">
48
						<cmdp:userCategory>Restricted</cmdp:userCategory>
49
						<cmdp:distributionAccessMedium ref="landing-page-ubb">downloadable</cmdp:distributionAccessMedium>
50
						<cmdp:distributionAccessMedium ref="search-page-corpuscle">accessibleThroughInterface</cmdp:distributionAccessMedium>
51
						<cmdp:downloadLocation description="landing page @ UBB portal" ref="landing-page-ubb">http://hdl.handle.net/11509/80</cmdp:downloadLocation>
52
						<cmdp:downloadLocation description="landing page @ Corpuscle" ref="search-page-corpuscle">http://clarino.uib.no/korpuskel/landing-page?identifier=forskning-no</cmdp:downloadLocation>
53
						<cmdp:licence cmd:ComponentId="clarin.eu:cr1:c_1447674760330">
54
							<cmdp:licenceFamily>CLARIN</cmdp:licenceFamily>
55
							<cmdp:licenceName>CLARIN_RES-DEP</cmdp:licenceName>
56
							<cmdp:licenceURL>https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEula?RES=1&amp;ID=1&amp;PERM=1&amp;PLAN=1&amp;BY=1&amp;NORED=1&amp;DEP=1</cmdp:licenceURL>
57
							<cmdp:conditionsOfUse>BY</cmdp:conditionsOfUse>
58
							<cmdp:conditionsOfUse>DEP</cmdp:conditionsOfUse>
59
							<cmdp:conditionsOfUse>ID</cmdp:conditionsOfUse>
60
							<cmdp:conditionsOfUse>NORED</cmdp:conditionsOfUse>
61
							<cmdp:conditionsOfUse>PLAN</cmdp:conditionsOfUse>
62
							<cmdp:nonStandardConditionsOfUse>It is not allowed to distribute/publish complete articles, presented as a coherent text, from the Resource.</cmdp:nonStandardConditionsOfUse>
63
						</cmdp:licence>
64
						<cmdp:licensor>
65
							<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194">
66
								<cmdp:actorType>organization</cmdp:actorType>
67
								<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192">
68
									<cmdp:surname>Kristiansen</cmdp:surname>
69
									<cmdp:givenName>Nina</cmdp:givenName>
70
									<cmdp:position>Editor in chief</cmdp:position>
71
									<cmdp:affiliation>
72
										<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883">
73
											<cmdp:organizationName>forskning.no</cmdp:organizationName>
74
										</cmdp:organizationInfo>
75
									</cmdp:affiliation>
76
								</cmdp:personInfo>
77
								<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883">
78
									<cmdp:organizationName>forskning.no</cmdp:organizationName>
79
								</cmdp:organizationInfo>
80
								<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460">
81
									<cmdp:email>Nina@forskning.no</cmdp:email>
82
									<cmdp:city>Oslo</cmdp:city>
83
									<cmdp:country>Norway</cmdp:country>
84
								</cmdp:communicationInfo>
85
							</cmdp:actorInfo>
86
						</cmdp:licensor>
87
					</cmdp:licenceInfo>
88
				</cmdp:distributionInfo>
89
				<cmdp:contact>
90
					<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194">
91
						<cmdp:actorType>organization</cmdp:actorType>
92
						<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883">
93
							<cmdp:organizationName xml:lang="en">CLARINO Bergen Centre</cmdp:organizationName>
94
						</cmdp:organizationInfo>
95
						<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460">
96
							<cmdp:email>clarin@uib.no</cmdp:email>
97
							<cmdp:url>https://repo.clarino.uib.no/xmlui/</cmdp:url>
98
						</cmdp:communicationInfo>
99
					</cmdp:actorInfo>
100
				</cmdp:contact>
101
				<cmdp:metadataInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711922">
102
					<cmdp:metadataCreationDate>2015-07-29</cmdp:metadataCreationDate>
103
					<cmdp:metadataLanguageName>English</cmdp:metadataLanguageName>
104
					<cmdp:metadataLanguageId>en</cmdp:metadataLanguageId>
105
					<cmdp:metadataLastDateUpdated>2016-02-12</cmdp:metadataLastDateUpdated>
106
					<cmdp:metadataCreator>
107
						<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194">
108
							<cmdp:actorType>person</cmdp:actorType>
109
							<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192">
110
								<cmdp:surname xml:lang="no">Lyse</cmdp:surname>
111
								<cmdp:givenName xml:lang="no">Gunn Inger</cmdp:givenName>
112
								<cmdp:sex>female</cmdp:sex>
113
								<cmdp:position>Researcher (Ph.D)</cmdp:position>
114
								<cmdp:affiliation>
115
									<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883">
116
										<cmdp:organizationName xml:lang="en">University of Bergen</cmdp:organizationName>
117
										<cmdp:organizationName xml:lang="no">Universitetet i Bergen</cmdp:organizationName>
118
										<cmdp:organizationShortName xml:lang="no">UiB</cmdp:organizationShortName>
119
										<cmdp:organizationShortName xml:lang="en">UoB</cmdp:organizationShortName>
120
										<cmdp:departmentName xml:lang="en">Department of Linguistic, Literary and Aesthetic Studies</cmdp:departmentName>
121
									</cmdp:organizationInfo>
122
								</cmdp:affiliation>
123
							</cmdp:personInfo>
124
							<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460">
125
								<cmdp:email>clarin@uib.no</cmdp:email>
126
							</cmdp:communicationInfo>
127
						</cmdp:actorInfo>
128
					</cmdp:metadataCreator>
129
				</cmdp:metadataInfo>
130
				<cmdp:resourceCreationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711921">
131
					<cmdp:resourceCreator>
132
						<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194" cmd:ref="resource-fn">
133
							<cmdp:actorType>person</cmdp:actorType>
134
							<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192">
135
								<cmdp:surname xml:lang="en">Hofland</cmdp:surname>
136
								<cmdp:givenName xml:lang="en">Knut</cmdp:givenName>
137
								<cmdp:sex>male</cmdp:sex>
138
								<cmdp:position>Fagkonsulent / Specialist Consultant</cmdp:position>
139
								<cmdp:affiliation>
140
									<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883">
141
										<cmdp:organizationName xml:lang="en">Uni Research AS</cmdp:organizationName>
142
										<cmdp:departmentName xml:lang="en">Uni Research Computing</cmdp:departmentName>
143
									</cmdp:organizationInfo>
144
								</cmdp:affiliation>
145
							</cmdp:personInfo>
146
							<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460">
147
								<cmdp:email>knut.hofland@uni.no</cmdp:email>
148
								<cmdp:url>http://uni.no/nb/staff/directory/knut-hofland/</cmdp:url>
149
								<cmdp:city>Bergen</cmdp:city>
150
								<cmdp:country>Norway</cmdp:country>
151
								<cmdp:telephoneNumber>+47 5558 9463</cmdp:telephoneNumber>
152
							</cmdp:communicationInfo>
153
						</cmdp:actorInfo>
154
						<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194" cmd:ref="search-page-corpuscle">
155
							<cmdp:actorType>person</cmdp:actorType>
156
							<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192">
157
								<cmdp:surname xml:lang="en">Meurer</cmdp:surname>
158
								<cmdp:givenName xml:lang="en">Paul</cmdp:givenName>
159
								<cmdp:sex>male</cmdp:sex>
160
								<cmdp:position>Senior researcher</cmdp:position>
161
								<cmdp:affiliation>
162
									<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883">
163
										<cmdp:organizationName xml:lang="en">Uni Research AS</cmdp:organizationName>
164
										<cmdp:departmentName xml:lang="en">Uni Research Computing</cmdp:departmentName>
165
									</cmdp:organizationInfo>
166
								</cmdp:affiliation>
167
							</cmdp:personInfo>
168
							<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460">
169
								<cmdp:email>paul.meurer@uni.no</cmdp:email>
170
							</cmdp:communicationInfo>
171
						</cmdp:actorInfo>
172
					</cmdp:resourceCreator>
173
				</cmdp:resourceCreationInfo>
174
			</cmdp:resourceCommonInfo>
175
			<cmdp:corpusInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711878">
176
				<cmdp:corpusType>Written Corpus</cmdp:corpusType>
177
				<cmdp:corpusPartInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711885">
178
					<cmdp:mediaType>text</cmdp:mediaType>
179
					<cmdp:corpusTextInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485188"/>
180
				</cmdp:corpusPartInfo>
181
				<cmdp:corpusPartGeneralInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711882">
182
					<cmdp:sourceWorkInfo cmd:ComponentId="clarin.eu:cr1:c_1407745712071">
183
						<cmdp:workDescription>The text material is constituted by articles published by Forskning.no (CLARINO's agreement also includes the permission to use future articles to be published by Forskning.no) belonging to the following three categories:
184
							1) Articles written by journalists employed at Forskning.no
185
							2) Articles written by member institutions of Forskning.no (76 universities, colleges, research
186
							centers, research departments in government agencies and more). These articles are written by staff journalists, information officers and other non-academic staff. Each article has been edited by Forskning.no.
187
							3) Articles from the newsdesk NRK Viten, with whom Forskning.no cooperates. These articles are written by NRK journalists. A full list of partner/cooperation institutions may be presented on demand.</cmdp:workDescription>
188
						<cmdp:publisher>
189
							<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194">
190
								<cmdp:actorType>organization</cmdp:actorType>
191
								<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883">
192
									<cmdp:organizationName>forskning.no</cmdp:organizationName>
193
								</cmdp:organizationInfo>
194
								<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460">
195
									<cmdp:email>Nina@forskning.no</cmdp:email>
196
									<cmdp:city>Oslo</cmdp:city>
197
									<cmdp:country>Norway</cmdp:country>
198
								</cmdp:communicationInfo>
199
							</cmdp:actorInfo>
200
						</cmdp:publisher>
201
					</cmdp:sourceWorkInfo>
202
					<cmdp:lingualityInfo cmd:ComponentId="clarin.eu:cr1:c_1355150532313">
203
						<cmdp:lingualityType>monolingual</cmdp:lingualityType>
204
					</cmdp:lingualityInfo>
205
					<cmdp:languageInfo cmd:ComponentId="clarin.eu:cr1:c_1428388179423">
206
						<cmdp:languageId>no</cmdp:languageId>
207
						<cmdp:languageName>Norwegian</cmdp:languageName>
208
					</cmdp:languageInfo>
209
					<cmdp:languageInfo cmd:ComponentId="clarin.eu:cr1:c_1428388179423">
210
						<cmdp:languageId>nb</cmdp:languageId>
211
						<cmdp:languageName>Norwegian Bokmål</cmdp:languageName>
212
					</cmdp:languageInfo>
213
					<cmdp:modalityInfo cmd:ComponentId="clarin.eu:cr1:c_1447674760356">
214
						<cmdp:modalityType>writtenLanguage</cmdp:modalityType>
215
					</cmdp:modalityInfo>
216
					<cmdp:sizeInfo cmd:ComponentId="clarin.eu:cr1:c_1353678848785">
217
						<cmdp:size>ca. 489 000</cmdp:size>
218
						<cmdp:sizeUnit>sentences</cmdp:sizeUnit>
219
					</cmdp:sizeInfo>
220
					<cmdp:sizeInfo cmd:ComponentId="clarin.eu:cr1:c_1353678848785">
221
						<cmdp:size>ca. 8 300 000</cmdp:size>
222
						<cmdp:sizeUnit>words</cmdp:sizeUnit>
223
					</cmdp:sizeInfo>
224
					<cmdp:sizeInfo cmd:ComponentId="clarin.eu:cr1:c_1353678848785">
225
						<cmdp:size>ca. 13 200</cmdp:size>
226
						<cmdp:sizeUnit>articles</cmdp:sizeUnit>
227
					</cmdp:sizeInfo>
228
					<cmdp:classificationInfo cmd:ComponentId="clarin.eu:cr1:c_1403588862809">
229
						<cmdp:genreInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711877">
230
							<cmdp:genreType>textGenre</cmdp:genreType>
231
							<cmdp:genre>newspaper and magazines</cmdp:genre>
232
						</cmdp:genreInfo>
233
					</cmdp:classificationInfo>
234
					<cmdp:timeCoverageInfo cmd:ComponentId="clarin.eu:cr1:c_1447674760358">
235
						<cmdp:timeCoverage>1998-05-01 - 2012-10-20</cmdp:timeCoverage>
236
					</cmdp:timeCoverageInfo>
237
				</cmdp:corpusPartGeneralInfo>
238
			</cmdp:corpusInfo>
239
		</cmdp:corpusProfile>
240
	</cmd:Components>
241
</cmd:CMD>
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_gysseling_corpus.xml
1
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1"
2
         xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1271859438164"
3
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4
         CMDVersion="1.2"
5
         xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1271859438164 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1271859438164/xsd">
6
	<cmd:Header>
7
		<cmd:MdCreator>servicedesk@inl.nl</cmd:MdCreator>
8
		<cmd:MdCreationDate>2012-06-11</cmd:MdCreationDate>
9
		<cmd:MdSelfLink>hdl:10032/cd747deda6f459853a24906eaa20b3e1</cmd:MdSelfLink>
10
		<cmd:MdProfile>clarin.eu:cr1:p_1271859438164</cmd:MdProfile>
11
		<cmd:MdCollectionDisplayName>INL Taalbank Nederlands</cmd:MdCollectionDisplayName>
12
	</cmd:Header>
13
	<cmd:Resources>
14
		<cmd:ResourceProxyList>
15
			<cmd:ResourceProxy id="resource">
16
				<cmd:ResourceType>Resource</cmd:ResourceType>
17
				<cmd:ResourceRef>hdl:10032/9dd8605956a31dbd8c2b63ffef998bd2</cmd:ResourceRef>
18
			</cmd:ResourceProxy>
19
			<cmd:ResourceProxy id="SearchPage">
20
				<cmd:ResourceType>SearchPage</cmd:ResourceType>
21
				<cmd:ResourceRef>hdl:10032/9dd8605956a31dbd8c2b63ffef998bd2</cmd:ResourceRef>
22
			</cmd:ResourceProxy>
23
			<cmd:ResourceProxy id="sruCQL">
24
				<cmd:ResourceType mimetype="application/sru+xml">SearchService</cmd:ResourceType>
25
				<!-- http://gysseling.corpus.taalbanknederlands.inl.nl/cqlwebapp/cql -->
26
				<cmd:ResourceRef>hdl:10032/56a70d6a67e396bdd13b0a67e735d791</cmd:ResourceRef>
27
			</cmd:ResourceProxy>
28
			<cmd:ResourceProxy id="LandingPage">
29
				<cmd:ResourceType>LandingPage</cmd:ResourceType>
30
				<cmd:ResourceRef>hdl:10032/99fb7c459b7848118ec1a7cbb14c47ea</cmd:ResourceRef>
31
			</cmd:ResourceProxy>
32
		</cmd:ResourceProxyList>
33
		<cmd:JournalFileProxyList/>
34
		<cmd:ResourceRelationList/>
35
	</cmd:Resources>
36
	<cmd:Components>
37
		<cmdp:TextCorpusProfile>
38
			<cmdp:Collection>
39
				<cmdp:GeneralInfo>
40
					<cmdp:Name>Corpus Gysseling</cmdp:Name>
41
					<cmdp:TimeCoverage>
42
						<cmdp:minDate>1200-01-01</cmdp:minDate>
43
						<cmdp:maxDate>1300-01-01</cmdp:maxDate>
44
					</cmdp:TimeCoverage>
45
					<cmdp:Description>
46
						<cmdp:Description>
47
							Corpus Gysseling van 13de eeuwse tekstem
48
						</cmdp:Description>
49
					</cmdp:Description>
50
				</cmdp:GeneralInfo>
51
				<cmdp:OriginLocation>
52
					<cmdp:Location>
53
						<cmdp:Country>
54
							<cmdp:Code>NL</cmdp:Code>
55
						</cmdp:Country>
56
					</cmdp:Location>
57
				</cmdp:OriginLocation>
58
				<cmdp:Creators>
59
					<cmdp:Creator>
60
						<cmdp:Contact>
61
							<cmdp:Email>servicedesk@inl.nl</cmdp:Email>
62
							<cmdp:Organisation>INL</cmdp:Organisation>
63
						</cmdp:Contact>
64
					</cmdp:Creator>
65
				</cmdp:Creators>
66

  
67
				<cmdp:DocumentationLanguages>
68
					<cmdp:Language>
69
						<cmdp:LanguageName>Dutch</cmdp:LanguageName>
70
						<cmdp:ISO639>
71
							<cmdp:iso-639-3-code>nld</cmdp:iso-639-3-code>
72
						</cmdp:ISO639>
73
					</cmdp:Language>
74
				</cmdp:DocumentationLanguages>
75

  
76
				<cmdp:Access cmd:ref="resource">
77
					<cmdp:Availability>free for academic use; non appliccable for commercial parties</cmdp:Availability>
78
					<cmdp:DistributionMedium>online application</cmdp:DistributionMedium>
79
					<cmdp:CatalogueLink>http://gysseling.corpus.taalbanknederlands.inl.nl/gysseling/page/search</cmdp:CatalogueLink>
80
					<cmdp:Contact>
81
						<cmdp:Email>servidesk@inl.nl</cmdp:Email>
82
						<cmdp:Organisation>INL</cmdp:Organisation>
83
						<cmdp:Website>www.inl.nl</cmdp:Website>
84
					</cmdp:Contact>
85
					<cmdp:Price>
86
						<cmdp:Price>free</cmdp:Price>
87
					</cmdp:Price>
88
				</cmdp:Access>
89

  
90
			</cmdp:Collection>
91
			<cmdp:Corpus>
92
				<cmdp:SubjectLanguages>
93
					<!-- Middle Dutch (ca. 1050-1350) -->
94
					<cmdp:SubjectLanguage>
95
						<cmdp:Language>
96
							<cmdp:LanguageName>Middle Dutch</cmdp:LanguageName>
97
							<cmdp:ISO639>
98
								<cmdp:iso-639-3-code>dum</cmdp:iso-639-3-code>
99
							</cmdp:ISO639>
100
						</cmdp:Language>
101
					</cmdp:SubjectLanguage>
102
				</cmdp:SubjectLanguages>
103
			</cmdp:Corpus>
104
			<cmdp:TextCorpus> </cmdp:TextCorpus>
105
		</cmdp:TextCorpusProfile>
106
	</cmd:Components>
107
</cmd:CMD>
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_dataset.xml
1
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1403526079380" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1403526079380 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1403526079380/xsd">
2
	<cmd:Header>
3
		<cmd:MdCreationDate>2017-01-20</cmd:MdCreationDate>
4
		<cmd:MdSelfLink>http://hdl.handle.net/11356/1052@format=cmdi</cmd:MdSelfLink>
5
		<cmd:MdProfile>clarin.eu:cr1:p_1403526079380</cmd:MdProfile>
6
		<cmd:MdCollectionDisplayName>CLARIN.SI data &amp; tools</cmd:MdCollectionDisplayName>
7
	</cmd:Header>
8
	<cmd:Resources>
9
		<cmd:ResourceProxyList>
10
			<cmd:ResourceProxy id="lp_1590">
11
				<cmd:ResourceType>LandingPage</cmd:ResourceType>
12
				<cmd:ResourceRef>http://hdl.handle.net/11356/1052</cmd:ResourceRef>
13
			</cmd:ResourceProxy>
14
			<cmd:ResourceProxy id="uri_1">
15
				<cmd:ResourceType mimetype="text/html">Resource</cmd:ResourceType>
16
				<cmd:ResourceRef>http://eng.slovenscina.eu/tehnologije/ucni-korpus</cmd:ResourceRef>
17
			</cmd:ResourceProxy>
18
		</cmd:ResourceProxyList>
19
		<cmd:JournalFileProxyList/>
20
		<cmd:ResourceRelationList/>
21
	</cmd:Resources>
22
	<cmd:Components>
23
		<cmdp:LINDAT_CLARIN>
24
			<cmdp:bibliographicInfo>
25
				<cmdp:projectUrl>http://eng.slovenscina.eu/tehnologije/ucni-korpus</cmdp:projectUrl>
26
				<cmdp:titles>
27
					<cmdp:title xml:lang="en">Training corpus ssj500k 1.4</cmdp:title>
28
				</cmdp:titles>
29
				<cmdp:authors>
30
					<cmdp:author>
31
						<cmdp:lastName>Krek</cmdp:lastName>
32
						<cmdp:firstName> Simon</cmdp:firstName>
33
					</cmdp:author>
34
					<cmdp:author>
35
						<cmdp:lastName>Dobrovoljc</cmdp:lastName>
36
						<cmdp:firstName> Kaja</cmdp:firstName>
37
					</cmdp:author>
38
					<cmdp:author>
39
						<cmdp:lastName>Erjavec</cmdp:lastName>
40
						<cmdp:firstName> Tomaž</cmdp:firstName>
41
					</cmdp:author>
42
					<cmdp:author>
43
						<cmdp:lastName>Može</cmdp:lastName>
44
						<cmdp:firstName> Sara</cmdp:firstName>
45
					</cmdp:author>
46
					<cmdp:author>
47
						<cmdp:lastName>Ledinek</cmdp:lastName>
48
						<cmdp:firstName> Nina</cmdp:firstName>
49
					</cmdp:author>
50
					<cmdp:author>
51
						<cmdp:lastName>Holz</cmdp:lastName>
52
						<cmdp:firstName> Nanika</cmdp:firstName>
53
					</cmdp:author>
54
				</cmdp:authors>
55
				<cmdp:dates>
56
					<cmdp:dateIssued>2015-10-26</cmdp:dateIssued>
57
				</cmdp:dates>
58
				<cmdp:identifiers>
59
					<cmdp:identifier type="Handle">http://hdl.handle.net/11356/1052</cmdp:identifier>
60
				</cmdp:identifiers>
61
				<cmdp:funds>
62
					<cmdp:funding>
63
						<cmdp:organization>Ministry of Education, Science and Sport</cmdp:organization>
64
						<cmdp:code>3311-08-986003</cmdp:code>
65
						<cmdp:projectName>Communication in Slovene</cmdp:projectName>
66
						<cmdp:fundsType>euFunds</cmdp:fundsType>
67
					</cmdp:funding>
68
				</cmdp:funds>
69
				<cmdp:contactPerson>
70
					<cmdp:firstName>Simon</cmdp:firstName>
71
					<cmdp:lastName>Krek</cmdp:lastName>
72
					<cmdp:email>simon.krek@guest.arnes.si</cmdp:email>
73
					<cmdp:affiliation>Jožef Stefan Institute</cmdp:affiliation>
74
				</cmdp:contactPerson>
75
				<cmdp:publishers>
76
					<cmdp:publisher>Centre for Language Resources and Technologies, University of Ljubljana</cmdp:publisher>
77
				</cmdp:publishers>
78
			</cmdp:bibliographicInfo>
79
			<cmdp:dataInfo>
80
				<cmdp:type>corpus</cmdp:type>
81
				<cmdp:description>The ssj500k training corpus contains 500,000 words, manually annotated on the levels of tokenization, sentence segmentation, morphosyntactic tagging, lemmatisation, named entities, and, partially, syntactic dependencies. The ssj500k corpus uses the MULTEXT-East / JOS morphosyntactic tagset and the JOS dependency schema and is based on the jos100k and jos1M corpora. Note that this entry updates ssj500k 1.3 by fixing many annotation errors.</cmdp:description>
82
				<cmdp:languages>
83
					<cmdp:language>
84
						<cmdp:code>slv</cmdp:code>
85
						<cmdp:name>Slovenian</cmdp:name>
86
					</cmdp:language>
87
				</cmdp:languages>
88
				<cmdp:keywords>
89
					<cmdp:keyword>tagging</cmdp:keyword>
90
					<cmdp:keyword>dependency treebank</cmdp:keyword>
91
					<cmdp:keyword>parsing</cmdp:keyword>
92
					<cmdp:keyword>named entities</cmdp:keyword>
93
					<cmdp:keyword>tokenisation</cmdp:keyword>
94
					<cmdp:keyword>manual annotation</cmdp:keyword>
95
					<cmdp:keyword>TEI</cmdp:keyword>
96
				</cmdp:keywords>
97
				<cmdp:links>
98
					<cmdp:link>http://nl.ijs.si/noske/sl-ref.cgi/corp_info?corpname=ssj500k</cmdp:link>
99
				</cmdp:links>
100
				<cmdp:sizeInfo>
101
					<cmdp:size>
102
						<cmdp:size>500295</cmdp:size>
103
						<cmdp:unit>words</cmdp:unit>
104
					</cmdp:size>
105
					<cmdp:size>
106
						<cmdp:size>586248</cmdp:size>
107
						<cmdp:unit>tokens</cmdp:unit>
108
					</cmdp:size>
109
					<cmdp:size>
110
						<cmdp:size>27829</cmdp:size>
111
						<cmdp:unit>sentences</cmdp:unit>
112
					</cmdp:size>
113
				</cmdp:sizeInfo>
114
			</cmdp:dataInfo>
115
			<cmdp:licenseInfo>
116
				<cmdp:license>
117
					<cmdp:uri>https://creativecommons.org/licenses/by-nc-sa/4.0/</cmdp:uri>
118
				</cmdp:license>
119
			</cmdp:licenseInfo>
120
		</cmdp:LINDAT_CLARIN>
121
	</cmd:Components>
122
</cmd:CMD>
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/parthenos_policy.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<generator_policy>
3

  
4
	<!-- The prefix is a namespace that must be declared in the X3ML definition.
5
	In PARTHENOS it was decided that it is constructed by the namespace of PARTHENOS followed by the provider
6
	and the database that is being mapped
7
	e.g. prefix="parthenos" and in the x3ml define <namespace prefix="parthenos" uri="http://parthenos-project.eu/FORTH/myDB/"/> -->
8

  
9
	<generator name="ServiceURI" prefix="parthenos">
10
		<pattern>Service/{term}</pattern>
11
	</generator>
12
	<generator name="DatasetURI" prefix="parthenos">
13
		<pattern>Dataset/{term}</pattern>
14
	</generator>
15
	<generator name="SoftwareURI" prefix="parthenos">
16
		<pattern>Software/{term}</pattern>
17
	</generator>
18
	<generator name="ActorURI" prefix="parthenos">
19
		<pattern>Actor/{term}</pattern>
20
	</generator>
21
	<generator name="ProjectURI" prefix="parthenos">
22
		<pattern>Project/{term}</pattern>
23
	</generator>
24
	<generator name="ServiceAppellationURI" prefix="parthenos">
25
		<pattern>Service/Appellation/{term}</pattern>
26
	</generator>
27
	<generator name="DatasetAppellationURI" prefix="parthenos">
28
		<pattern>Dataset/Appellation/{term}</pattern>
29
	</generator>
30
	<generator name="SoftwareAppellationURI" prefix="parthenos">
31
		<pattern>Software/Appellation/{term}</pattern>
32
	</generator>
33
	<generator name="ActorAppellationURI" prefix="parthenos">
34
		<pattern>Actor/Appellation/{term}</pattern>
35
	</generator>
36
	<generator name="ProjectAppellationURI" prefix="parthenos">
37
		<pattern>Project/Appellation/{term}</pattern>
38
	</generator>
39
	<generator name="ThingAppellationURI" prefix="parthenos">
40
		<pattern>Thing/Appellation/{term}</pattern>
41
	</generator>
42
	<generator name="EventAppellationURI" prefix="parthenos">
43
		<pattern>Event/Appellation/{term}</pattern>
44
	</generator>
45
	<generator name="PlaceAppellationURI" prefix="parthenos">
46
		<pattern>Place/Appellation/{term}</pattern>
47
	</generator>
48
	<generator name="ThingURI" prefix="parthenos">
49
		<pattern>Thing/{term}</pattern>
50
	</generator>
51
	<generator name="EventURI" prefix="parthenos">
52
		<pattern>Event/{term}</pattern>
53
	</generator>
54
	<generator name="Time-SpanURI" prefix="parthenos">
55
		<pattern>Time-Span/{term}</pattern>
56
	</generator>
57
	<generator name="PlaceURI" prefix="parthenos">
58
		<pattern>Place/{term}</pattern>
59
	</generator>
60
	<generator name="DimensionURI" prefix="parthenos">
61
		<pattern>Dimension/{term}</pattern>
62
	</generator>
63
	<generator name="ConceptURI" prefix="parthenos">
64
		<pattern>Concept/{term}</pattern>
65
	</generator>
66
	<generator name="OneLevelCustomURI" prefix="parthenos">
67
		<pattern>{level1}/{term}</pattern>
68
	</generator>
69
	<generator name="TwoLevelCustomURI" prefix="parthenos">
70
		<pattern>{level1}/{level2}/{term}</pattern>
71
	</generator>
72
	<generator name="OneLevelCustomURIwCount" prefix="parthenos">
73
		<pattern>{level1}/{count}/{term}</pattern>
74
	</generator>
75
	<generator name="TwoLevelCustomURIwCount" prefix="parthenos">
76
		<pattern>{level1}/{level2}/{count}/{term}</pattern>
77
	</generator>
78
	<generator name="SimpleLabel">
79
		<pattern>{label}</pattern>
80
	</generator>
81
	<generator name="CompositeLabel">
82
		<pattern>{label} {text}</pattern>
83
	</generator>
84
	<generator name="GermanDateTime">
85
		<custom generatorClass="gr.forth.GermanDate">
86
			<set-arg name="bound" type="constant"/>
87
			<set-arg name="text"/>
88
		</custom>
89
	</generator>
90
	<generator name="URIorUUID">
91
		<custom generatorClass="gr.forth.URIorUUID">
92
			<set-arg name="text"/>
93
		</custom>
94
	</generator>
95
</generator_policy>
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_service2.xml
1
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1423750293168" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1423750293168 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1423750293168/xsd">
2
	<cmd:Header>
3
		<cmd:MdCreator>Thomas Kisler</cmd:MdCreator>
4
		<cmd:MdCreationDate>2013-12-05</cmd:MdCreationDate>
5
		<cmd:MdSelfLink>https://clarin.phonetik.uni-muenchen.de/BASRepository/WebServices/BAS_Webservices.cmdi.xml</cmd:MdSelfLink>
6
		<cmd:MdProfile>clarin.eu:cr1:p_1423750293168</cmd:MdProfile>
7
		<cmd:MdCollectionDisplayName>Bavarian Archive for Speech Signals (BAS)</cmd:MdCollectionDisplayName>
8
	</cmd:Header>
9
	<cmd:Resources>
10
		<cmd:ResourceProxyList>
11
			<cmd:ResourceProxy id="locid1">
12
				<cmd:ResourceType mimetype="application/vnd.sun.wadl+xml">Resource</cmd:ResourceType>
13
				<cmd:ResourceRef>https://clarin.phonetik.uni-muenchen.de/BASWebServices/application-hand.wadl</cmd:ResourceRef>
14
			</cmd:ResourceProxy>
15
			<cmd:ResourceProxy id="lp_0000000001">
16
				<cmd:ResourceType mimetype="text/html">LandingPage</cmd:ResourceType>
17
				<cmd:ResourceRef>http://clarin.phonetik.uni-muenchen.de/BASWebServices/</cmd:ResourceRef>
18
			</cmd:ResourceProxy>
19
		</cmd:ResourceProxyList>
20
		<cmd:JournalFileProxyList/>
21
		<cmd:ResourceRelationList/>
22
	</cmd:Resources>
23
	<cmd:Components>
24
		<cmdp:BASWebService>
25
			<cmdp:Description>
26
				<cmdp:Description>This is the description of the BAS Web Services being hosted in the Bavarian Archive for Speech
27
					Signals (BAS) in Munich. Parameters possessing a "mimetype" tag are being processed as files and need to be
28
					provided to ensure the services to run (other options have a default option, so it is optional if those are
29
					passed).</cmdp:Description>
30
			</cmdp:Description>
31
			<cmdp:Service>
32
				<cmdp:Name>BAS Webservices</cmdp:Name>
33
				<cmdp:Description>Several services processing phonetic data (signals and annotations/segmentations) provided by
34
					BAS</cmdp:Description>
35
				<cmdp:ServiceDescriptionLocation cmd:ref="locid1"/>
36
				<cmdp:CollectionType>
37
					<cmdp:CollectionType>tool</cmdp:CollectionType>
38
				</cmdp:CollectionType>
39
				<cmdp:Operations>
40
					<cmdp:Operation>
41
						<cmdp:Name>runMAUSBasic</cmdp:Name>
42
						<cmdp:Description>segments an audio file into SAM-PA phonetic segments given an orthographic transcription;
43
							result is stored in a three-layer (word segmentation with orthographic labels, word segmentation with
44
							canonical pronunciation labels in SAM-PA, phonemic segmentation with SAM-PA labels) praat textgrid file;
45
							this is a basic MAUS service which uses only default options, for a more controllable service see operation
46
							'runMAUS'.</cmdp:Description>
47
						<cmdp:Input>
48
							<cmdp:Parameter>
49
								<cmdp:Name>SIGNAL</cmdp:Name>
50
								<cmdp:Description>mono sound file containing the speech signal to be segmented; PCM 16 bit resolution; any
51
									sampling rate; optimal results if leading and trailing silence intervals are truncated before
52
									processing; max. file size is 20MBytes. Although the mimetype of this input file is restricted to
53
									audio/x-wav (wav|WAV), the service will also process NIST/SPHERE (nis|NIS) and ALAW
54
									(al|AL|dea|DEA).</cmdp:Description>
55
								<cmdp:MIMEType>audio/x-wav</cmdp:MIMEType>
56
								<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter>
57
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2653</cmdp:DataCategory>
58
								<cmdp:displayName>Signal file</cmdp:displayName>
59
								<cmdp:MaxFileSize>
60
									<cmdp:TotalSize>
61
										<cmdp:Number>200</cmdp:Number>
62
										<cmdp:SizeUnit>MB</cmdp:SizeUnit>
63
									</cmdp:TotalSize>
64
								</cmdp:MaxFileSize>
65
							</cmdp:Parameter>
66
							<cmdp:Parameter>
67
								<cmdp:Name>TEXT</cmdp:Name>
68
								<cmdp:Description>orthographic text of the utterance to be segmented; words are white space separated;
69
									encoding is utf-8; punctuations are ignored</cmdp:Description>
70
								<cmdp:MIMEType>text/plain; charset=UTF-8</cmdp:MIMEType>
71
								<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter>
72
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2462</cmdp:DataCategory>
73
								<cmdp:displayName>Text file</cmdp:displayName>
74
								<cmdp:MaxFileSize>
75
									<cmdp:TotalSize>
76
										<cmdp:Number>20</cmdp:Number>
77
										<cmdp:SizeUnit>MB</cmdp:SizeUnit>
78
									</cmdp:TotalSize>
79
								</cmdp:MaxFileSize>
80
							</cmdp:Parameter>
81
							<cmdp:Parameter>
82
								<cmdp:Name>LANGUAGE</cmdp:Name>
83
								<cmdp:Description>Language of the speech to be processed; we use the RFC5646 sub-structure 'iso639-3 -
84
									iso3166-1 [ - iso3166-2], e.g. 'eng-US' for American English, 'deu-AT-1' for Austrian German spoken in
85
									'Oberoesterreich'; defines the possible orthographic text language in the input, the text-to-phoneme
86
									tranformation and some language specific transformations within the MAUS process. The code 'gsw-CH' (=
87
									Swiss German) denotes orthographic text input in Swiss German 'Dieth' encoding.</cmdp:Description>
88
								<cmdp:DataType>xsd:string</cmdp:DataType>
89
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
90
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2482</cmdp:DataCategory>
91
								<cmdp:Default>deu-DE</cmdp:Default>
92
								<cmdp:displayName>Language</cmdp:displayName>
93
								<cmdp:Values>
94
									<cmdp:ParameterValue>
95
										<cmdp:Value>cat-ES</cmdp:Value>
96
										<cmdp:Description>Catalan (ES)</cmdp:Description>
97
									</cmdp:ParameterValue>
98
									<cmdp:ParameterValue>
99
										<cmdp:Value>nld-NL</cmdp:Value>
100
										<cmdp:Description>Dutch (NL)</cmdp:Description>
101
									</cmdp:ParameterValue>
102
									<cmdp:ParameterValue>
103
										<cmdp:Value>eng-AU</cmdp:Value>
104
										<cmdp:Description>English (AU)</cmdp:Description>
105
									</cmdp:ParameterValue>
106
									<cmdp:ParameterValue>
107
										<cmdp:Value>eng-US</cmdp:Value>
108
										<cmdp:Description>English (US) </cmdp:Description>
109
									</cmdp:ParameterValue>
110
									<cmdp:ParameterValue>
111
										<cmdp:Value>eng-GB</cmdp:Value>
112
										<cmdp:Description>English (GB) </cmdp:Description>
113
									</cmdp:ParameterValue>
114
									<cmdp:ParameterValue>
115
										<cmdp:Value>eng-NZ</cmdp:Value>
116
										<cmdp:Description>English (NZ) </cmdp:Description>
117
									</cmdp:ParameterValue>
118
									<cmdp:ParameterValue>
119
										<cmdp:Value>ekk-EE</cmdp:Value>
120
										<cmdp:Description>Estonian (EE)</cmdp:Description>
121
									</cmdp:ParameterValue>
122
									<cmdp:ParameterValue>
123
										<cmdp:Value>fin-FI</cmdp:Value>
124
										<cmdp:Description>Finnish (FI)</cmdp:Description>
125
									</cmdp:ParameterValue>
126
									<cmdp:ParameterValue>
127
										<cmdp:Value>fra-FR</cmdp:Value>
128
										<cmdp:Description>French (FR)</cmdp:Description>
129
									</cmdp:ParameterValue>
130
									<cmdp:ParameterValue>
131
										<cmdp:Value>kat-GE</cmdp:Value>
132
										<cmdp:Description>Georgian (GE)</cmdp:Description>
133
									</cmdp:ParameterValue>
134
									<cmdp:ParameterValue>
135
										<cmdp:Value>deu-DE</cmdp:Value>
136
										<cmdp:Description>German (DE)</cmdp:Description>
137
									</cmdp:ParameterValue>
138
									<cmdp:ParameterValue>
139
										<cmdp:Value>gsw-CH</cmdp:Value>
140
										<cmdp:Description>German Dieth (CH)</cmdp:Description>
141
									</cmdp:ParameterValue>
142
									<cmdp:ParameterValue>
143
										<cmdp:Value>gsw-CH-BE</cmdp:Value>
144
										<cmdp:Description>German Dieth (CH), Bern dialect</cmdp:Description>
145
									</cmdp:ParameterValue>
146
									<cmdp:ParameterValue>
147
										<cmdp:Value>gsw-CH-BS</cmdp:Value>
148
										<cmdp:Description>German Dieth (CH), Basel dialect</cmdp:Description>
149
									</cmdp:ParameterValue>
150
									<cmdp:ParameterValue>
151
										<cmdp:Value>gsw-CH-GR</cmdp:Value>
152
										<cmdp:Description>German Dieth (CH), Graubunden dialect</cmdp:Description>
153
									</cmdp:ParameterValue>
154
									<cmdp:ParameterValue>
155
										<cmdp:Value>gsw-CH-SG</cmdp:Value>
156
										<cmdp:Description>German Dieth (CH), St. Gallen dialect</cmdp:Description>
157
									</cmdp:ParameterValue>
158
									<cmdp:ParameterValue>
159
										<cmdp:Value>gsw-CH-ZH</cmdp:Value>
160
										<cmdp:Description>German Dieth (CH), Zurich dialect</cmdp:Description>
161
									</cmdp:ParameterValue>
162
									<cmdp:ParameterValue>
163
										<cmdp:Value>hun-HU</cmdp:Value>
164
										<cmdp:Description>Hungarian (HU)</cmdp:Description>
165
									</cmdp:ParameterValue>
166
									<cmdp:ParameterValue>
167
										<cmdp:Value>ita-IT</cmdp:Value>
168
										<cmdp:Description>Italian (IT)</cmdp:Description>
169
									</cmdp:ParameterValue>
170
									<cmdp:ParameterValue>
171
										<cmdp:Value>mlt-MT</cmdp:Value>
172
										<cmdp:Description>Maltese (MT)</cmdp:Description>
173
									</cmdp:ParameterValue>
174
									<cmdp:ParameterValue>
175
										<cmdp:Value>pol-PL</cmdp:Value>
176
										<cmdp:Description>Polish (PL)</cmdp:Description>
177
									</cmdp:ParameterValue>
178
									<cmdp:ParameterValue>
179
										<cmdp:Value>rus-RU</cmdp:Value>
180
										<cmdp:Description>Russian (RU)</cmdp:Description>
181
									</cmdp:ParameterValue>
182
									<cmdp:ParameterValue>
183
										<cmdp:Value>spa-ES</cmdp:Value>
184
										<cmdp:Description>Spanish (ES)</cmdp:Description>
185
									</cmdp:ParameterValue>
186
									<cmdp:ParameterValue>
187
										<cmdp:Value>cat</cmdp:Value>
188
										<cmdp:Description/>
189
									</cmdp:ParameterValue>
190
									<cmdp:ParameterValue>
191
										<cmdp:Value>deu</cmdp:Value>
192
										<cmdp:Description/>
193
									</cmdp:ParameterValue>
194
									<cmdp:ParameterValue>
195
										<cmdp:Value>eng</cmdp:Value>
196
										<cmdp:Description/>
197
									</cmdp:ParameterValue>
198
									<cmdp:ParameterValue>
199
										<cmdp:Value>fra</cmdp:Value>
200
										<cmdp:Description/>
201
									</cmdp:ParameterValue>
202
									<cmdp:ParameterValue>
203
										<cmdp:Value>hun</cmdp:Value>
204
										<cmdp:Description/>
205
									</cmdp:ParameterValue>
206
									<cmdp:ParameterValue>
207
										<cmdp:Value>ita</cmdp:Value>
208
										<cmdp:Description/>
209
									</cmdp:ParameterValue>
210
									<cmdp:ParameterValue>
211
										<cmdp:Value>mlt</cmdp:Value>
212
										<cmdp:Description/>
213
									</cmdp:ParameterValue>
214
									<cmdp:ParameterValue>
215
										<cmdp:Value>nld</cmdp:Value>
216
										<cmdp:Description/>
217
									</cmdp:ParameterValue>
218
									<cmdp:ParameterValue>
219
										<cmdp:Value>aus</cmdp:Value>
220
										<cmdp:Description/>
221
									</cmdp:ParameterValue>
222
									<cmdp:ParameterValue>
223
										<cmdp:Value>pol</cmdp:Value>
224
										<cmdp:Description/>
225
									</cmdp:ParameterValue>
226
									<cmdp:ParameterValue>
227
										<cmdp:Value>nze</cmdp:Value>
228
										<cmdp:Description/>
229
									</cmdp:ParameterValue>
230
									<cmdp:ParameterValue>
231
										<cmdp:Value>fin</cmdp:Value>
232
										<cmdp:Description/>
233
									</cmdp:ParameterValue>
234
									<cmdp:ParameterValue>
235
										<cmdp:Value>spa</cmdp:Value>
236
										<cmdp:Description/>
237
									</cmdp:ParameterValue>
238
								</cmdp:Values>
239
							</cmdp:Parameter>
240
							<cmdp:Parameter>
241
								<cmdp:Name>INSKANTEXTGRID</cmdp:Name>
242
								<cmdp:Description>Switch to create an additional tier in the TextGrid output file with a word segmentation
243
									labelled with the canonic phonemic transcript (taken from the input KAN tier). This option can not be
244
									set in this service.</cmdp:Description>
245
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
246
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory>
247
								<cmdp:Default>true</cmdp:Default>
248
								<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter>
249
								<cmdp:displayName>KAN tier in TextGrid</cmdp:displayName>
250
							</cmdp:Parameter>
251
							<cmdp:Parameter>
252
								<cmdp:Name>INSORTTEXTGRID</cmdp:Name>
253
								<cmdp:Description>Switch to create an additional tier ORT in the TextGrid output file with a word
254
									segmentation labelled with the orthographic transcript (taken from the input ORT tier); this option is
255
									only effective, if the input BPF contains an additional ORT tier. This option can not be set in this
256
									service.</cmdp:Description>
257
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
258
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory>
259
								<cmdp:Default>true</cmdp:Default>
260
								<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter>
261
								<cmdp:displayName>ORT tier in TextGrid</cmdp:displayName>
262
							</cmdp:Parameter>
263
							<cmdp:Parameter>
264
								<cmdp:Name>OUTFORMAT</cmdp:Name>
265
								<cmdp:Description>Defines the possible output formats: TextGrid - a praat compatible TextGrid file with two
266
									tiers; par|mau-append - the input BPF file with a new (or replaced) tier MAU; csv|mau - only the BPF
267
									MAU tier (CSV table); legacyEMU - a file with extension *.EMU that contains in the first part the Emu
268
									hlb file (*.hlb) and in the second part the Emu phonetic segmentation (*.phonetic)(parts are separated
269
									by a line '--- cut here ---'); for a description of BPF see
270
									http://www.bas.uni-muenchen.de/forschung/Bas/BasFormatseng.html This option can not be set in this
271
									service.</cmdp:Description>
272
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
273
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory>
274
								<cmdp:Default>TextGrid</cmdp:Default>
275
								<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter>
276
								<cmdp:displayName>Output format</cmdp:displayName>
277
							</cmdp:Parameter>
278
							<cmdp:Parameter>
279
								<cmdp:Name>USETRN</cmdp:Name>
280
								<cmdp:Description>If set to true, the service searches the input BPF for a TRN tier (turn/chunk
281
									segmentation, see http://www.bas.uni-muenchen.de/forschung/Bas/BasFormatsdeu.html#TRN). The synopsis
282
									for a TRN entry is: 'TRN: (start-sample) (duration-sample) (word-link-list) (label)', e.g. 'TRN: 23654
283
									56432 0,1,2,3,4,5,6 sentence1' (the speech within the recording 'sentence1' starts with sample 23654,
284
									last for 56432 samples and covers the words 0-6). If only one TRN entry is found, the segmentation is
285
									restricted within a time range given by this TRN tier entry; this is useful, if there exists a reliable
286
									pre-segmentation of the recorded utterance, i.e. the start and end of speech within the recording is
287
									known. If more than one TRN entry is found, the webservice performs an segmentation for each 'chunk'
288
									defined by a TRN entry and aggregates all individual results into a single results file; this is useful
289
									if the input consists of long recordings, for which a manual chunk segmentation is available. If USETRN
290
									is set to 'force', a pre-segmentation using the wav2trn tool is done by the webservice on-the-fly; this
291
									is useful, if the input BPF does not contain a TRN entry and the input signal has leading and/or
292
									trailing silence. This option can not be set in this service.</cmdp:Description>
293
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
294
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory>
295
								<cmdp:Default>force</cmdp:Default>
296
								<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter>
297
								<cmdp:displayName>Chunk Segmentation</cmdp:displayName>
298
							</cmdp:Parameter>
299
							<cmdp:Parameter>
300
								<cmdp:Name>NOINITIALFINALSILENCE</cmdp:Name>
301
								<cmdp:Description>Switch to suppress the automatic modeling on a leading/trailing silence interval. This is
302
									useful if the signal is for instance cut from a larger utterance and is known to have no
303
									leading/trailing silence. This option can not be set in this service.</cmdp:Description>
304
								<cmdp:DataType>xsd:boolean { pattern='true|false' }</cmdp:DataType>
305
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
306
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory>
307
								<cmdp:Default>false</cmdp:Default>
308
								<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter>
309
								<cmdp:displayName>No silence model</cmdp:displayName>
310
							</cmdp:Parameter>
311
							<cmdp:Parameter>
312
								<cmdp:Name>RELAXMINDUR</cmdp:Name>
313
								<cmdp:Description>Option RELAXMINDUR changes the default minimum duration of 30msec for consonants
314
									and short/lax vowels and of 40msec for tense/long vowels and diphthongs to 10 and 20msec respectively.
315
									This is not optimal for general segmentation because MAUS will start to insert many very short
316
									vowels/glottal stops where they are not appropriate. But for some special investigations
317
									(e.g. the duration of /t/) it alleviates the ceiling problem at 30msec duration.
318
								</cmdp:Description>
319
								<cmdp:DataType>xsd:boolean { pattern='true|false' }</cmdp:DataType>
320
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
321
								<cmdp:Default>false</cmdp:Default>
322
								<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter>
323
								<cmdp:displayName>Relax Min Duration</cmdp:displayName>
324
							</cmdp:Parameter>
325
							<cmdp:Parameter>
326
								<cmdp:Name>BPFTHRESHOLD</cmdp:Name>
327
								<cmdp:Description>Option BPFTHRESHOLD changes the threshold of the MAUS pre-validation: if the
328
									input BPF file contains more KAN tier lines than this value, maus exits with an ERROR
329
									message and exit code 2. The motivation is that the computational effort of MAUS increases
330
									quadratically with input length. Using this option you can set the threshhold to another
331
									value.
332
								</cmdp:Description>
333
								<cmdp:DataType>xsd:string</cmdp:DataType>
334
								<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter>
335
								<cmdp:Default>3000</cmdp:Default>
336
								<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter>
337
								<cmdp:displayName>Max number of input words</cmdp:displayName>
338
							</cmdp:Parameter>
339
						</cmdp:Input>
340
						<cmdp:Output>
341
							<cmdp:Parameter>
342
								<cmdp:Name>processfiles-out</cmdp:Name>
343
								<cmdp:Description>A XML response containing the tags "success", "downloadLink", "output" and "warning.
344
									success states if the processing was successful or not, downloadLink specifies the location where the
345
									Praat TextGrid file can be found, output contains the output that is mostly useful during debugging
346
									errors and warnings if any warnings occured during the processing. The Praat TextGrid file containing
347
									three tiers: orthographic transcription (segmented in words), canonical phonemic transcription in
348
									SAM-PA (segmented in words), phonemic segmentation by MAUS in SAM-PA</cmdp:Description>
349
								<cmdp:MIMEType>application/xml; charset=UTF-8</cmdp:MIMEType>
350
								<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter>
351
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2462</cmdp:DataCategory>
352
							</cmdp:Parameter>
353
						</cmdp:Output>
354
						<cmdp:Developer>
355
							<cmdp:developerName>Florian Schiel</cmdp:developerName>
356
							<cmdp:developerName>Andreas Kipp</cmdp:developerName>
357
							<cmdp:developerName>Thomas Kisler</cmdp:developerName>
358
							<cmdp:developerName>Ines Wendler</cmdp:developerName>
359
							<cmdp:developerFundingOrg>Bavarian Archive for Speech Signals, Munich, Germany</cmdp:developerFundingOrg>
360
							<cmdp:developerFundingOrg>Bundesminister für Bildung und Forschung, Germany</cmdp:developerFundingOrg>
361
							<cmdp:developerCitation>Schiel, F. (1999). Automatic Phonetic Transcription of Non-Prompted Speech. In Proc.
362
								of the ICPhS (pp. 607-610).</cmdp:developerCitation>
363
							<cmdp:developerCitation>Kisler, T. and Reichel U. D. and Schiel, F. and Draxler, Ch. and Jackl, B. and Pörner, N. (2016): BAS Speech Science Web Services - an Update of Current Developments, Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016), Portorož, Slovenia, paper id 668. </cmdp:developerCitation>
364
						</cmdp:Developer>
365
					</cmdp:Operation>
366
					<cmdp:Operation>
367
						<cmdp:Name>runMAUSBasicGerman</cmdp:Name>
368
						<cmdp:Description>segments a German audio file into SAM-PA phonetic segments given a German orthographic
369
							transcription; result is stored in a three-layer (word segmentation with orthographic labels, word
370
							segmentation with canonical pronunciation labels in SAM-PA, phonemic segmentation with SAM-PA labels) praat
371
							textgrid file; this is a basic MAUS service which uses only default options, for a more controllable
372
							service see operation 'runMAUS'.</cmdp:Description>
373
						<cmdp:Input>
374
							<cmdp:Parameter>
375
								<cmdp:Name>SIGNAL</cmdp:Name>
376
								<cmdp:Description>mono sound file containing the speech signal to be segmented; PCM 16 bit resolution; any
377
									sampling rate; optimal results if leading and trailing silence intervals are truncated before
378
									processing; max. file size is 20MBytes. Although the mimetype of this input file is restricted to
379
									audio/x-wav (wav|WAV), the service will also process NIST/SPHERE (nis|NIS) and ALAW
380
									(al|AL|dea|DEA).</cmdp:Description>
381
								<cmdp:MIMEType>audio/x-wav</cmdp:MIMEType>
382
								<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter>
383
								<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2653</cmdp:DataCategory>
384
								<cmdp:displayName>Signal file</cmdp:displayName>
385
								<cmdp:MaxFileSize>
386
									<cmdp:TotalSize>
387
										<cmdp:Number>200</cmdp:Number>
388
										<cmdp:SizeUnit>MB</cmdp:SizeUnit>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff