Project

General

Profile

« Previous | Next » 

Revision 42829

moved in dnet-openaire-domain

View differences:

modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-openaireplus-profiles/trunk/", "deploy_repository": "dnet4-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", "name": "dnet-openaireplus-profiles"}
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="iisPreprocessingJob" type="oozie">
11
 			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
				<PROPERTY key="user.name" value="dnet.beta" />
16

  
17
				<!-- Runtime -->
18
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"  />
19
				<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"  />
20
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"  />
21
				<PROPERTY key="export_action_hbase_table_initialize" value="false"/>
22
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
23
				<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
24
        	</STATIC_CONFIGURATION>
25
        	<JOB_INTERFACE>
26
 		       	<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" />
27
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
28
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" />
29
        		<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" />
30
        		<PARAM name="import_database_service_location" required="true" description="database service endpoint" />
31
        		<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" />
32
        		<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" />
33
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
35
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
36
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
37
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
38
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
39
       			<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" />
40
       			<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" />
41
       			<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" />
42
     			<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" />
43
        	</JOB_INTERFACE>
44
        </HADOOP_JOB>
45
        <STATUS>
46
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
47
            <RUNNING_INSTANCES value="0"/>
48
            <CUMULATIVE_RUN value="0" />
49
        </STATUS>
50
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
51
    </BODY>
52
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupPersonJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="29638605-235b-4cc1-9bf5-a5dd2fc84915_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupPersonJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT  -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupPersonMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupPersonReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="1000" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN>
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES>
51
        			<FAMILY param="entityType" />
52
        		</FAMILIES>
53
        	</SCAN>
54
        </HADOOP_JOB>
55
        <STATUS>
56
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
57
            <RUNNING_INSTANCES value="0"/>
58
            <CUMULATIVE_RUN value="0" />
59
        </STATUS>
60
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
61
    </BODY>
62
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="iisMainJob" type="oozie">
11
            <DESCRIPTION>IIS main workflow</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
        	
14
				<!-- Cluster wide -->
15
                <PROPERTY key="queueName" value="default"/>
16
                <PROPERTY key="user.name" value="dnet.beta"/>
17

  
18
				<!-- Runtime -->
19
				<PROPERTY key="active_existence_filter" value="true"/>
20
                <PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/>
21
                <PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/>
22
   				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
23
                <PROPERTY key="export_action_hbase_table_initialize" value="true"/>
24
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
25
                <PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/>
26
				<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
27
            </STATIC_CONFIGURATION>
28
        	<JOB_INTERFACE>
29
        		<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint" />
30
        		<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing" />
31
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
32
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records" />
33
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
35
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
36
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
37
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
38
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
39
                <PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
40
                <PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
41
                <PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/>
42
                <PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
43
                <PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/>
44
                <PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
45
                <PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
46
                <PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
47
       		
48
        		<!-- flags to enable/disable IIS modules -->
49
        		<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module" />
50
        		<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module" />
51
        		<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module" />
52
        		<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module" />
53
        		<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module" />
54
        		<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module" />
55
        		<PARAM name="active_referenceextraction_researchinitiative" required="true" description="enable/disable the research initiative extraction module" />
56
        		<PARAM name="active_statistics" required="true" description="enable/disable the statistics module" />
57
        	</JOB_INTERFACE>
58
        </HADOOP_JOB>
59
        <STATUS>
60
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
61
            <RUNNING_INSTANCES value="0"/>
62
            <CUMULATIVE_RUN value="0"/>
63
        </STATUS>
64
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
65
    </BODY>
66
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="61f9270e-ffc1-4095-9f76-3852e4d227fb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupFindRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that find the root of a similarity group</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
48
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
49
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
50
        		</FAMILIES>
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/indexFeedJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="1c34963b-75b3-4440-9f42-72445a26c077_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="indexFeedJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.IndexFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
				
23
				<!-- JOB GLOBAL -->		
24
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
25
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
26
				
27
				<!-- MISC -->					
28
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<PROPERTY key="mapred.fairscheduler.pool" value="solr"/>
32
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
33
				
34
		<!--  	Uncomment to override the default lib path -->			
35
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
36
        	</STATIC_CONFIGURATION>
37
        	<JOB_INTERFACE>
38
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />
39
        		<PARAM name="mapred.output.dir" required="true" description="destination path on hdfs for rotten index xml records" />
40
        		
41
        		<PARAM name="index.solr.url" required="false" description="url used to instantiate the solr client" /> 
42
       			<PARAM name="index.solr.collection" required="true" description="target solr collection to be fed" />
43

  
44
       			<PARAM name="id" required="true" description="index DS id" />
45
				<PARAM name="index.shutdown.wait.time" required="true" description="wait time before shut down the solr client pool" />
46
       			<PARAM name="index.buffer.flush.threshold" required="true" description="indexing buffer flush threshold" />
47
       			<PARAM name="index.feed.timestamp" required="true" description="timestamp used as ds_version" />
48
				<PARAM name="index.solr.sim.mode" required="true" description="boolean value, allows to run this job in simulation mode" />
49
				<PARAM name="index.xslt" required="true" description="record transformer created by the MSRO service" />
50
        	</JOB_INTERFACE>
51
        	<SCAN>
52
        		<FILTERS />
53
        		<FAMILIES />
54
        	</SCAN>
55
        </HADOOP_JOB>
56
        <STATUS>
57
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
58
            <RUNNING_INSTANCES value="0"/>
59
            <CUMULATIVE_RUN value="0" />
60
        </STATUS>
61
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
62
    </BODY>
63
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/oaiFeedJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="03d7af20-63bb-4790-a052-6cdbc1e05fce_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2015-02-09T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="oaiFeedJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that feeds the OAI store</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />	
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.oai.OaiFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" />				
22
				
23
				<!-- JOB GLOBAL -->		
24
<!--                 <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" /> -->
25
<!--                 <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/> -->
26
				
27
				<!-- MISC -->					
28
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
				<!--  	Uncomment to override the default lib path -->			
34
				<PROPERTY key="job.lib" value="/user/dnet/lib/dnet-mapreduce-jobs-assembly-0.0.6.3-SNAPSHOT.jar"/> 
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />
38
				<PARAM name="services.publisher.oai.collection" required="true" description="target mongodb collection" />
39
				<PARAM name="oaiConfiguration" required="true" description="configuration bean used to guide the OAI feeding" />
40
				<PARAM name="oai.feed.date" required="true" description="timestamp" />				
41
				<PARAM name="services.publisher.oai.host" required="true" description="mongodb host" />
42
				<PARAM name="services.publisher.oai.port" required="true" description="mongodb port" />
43
				<PARAM name="services.publisher.oai.db" required="true" description="mongodb database name" />	
44
				<PARAM name="services.publisher.oai.skipDuplicates" required="true" description="skip duplicated records." />	
45
				<PARAM name="services.publisher.oai.duplicateXPath" required="true" description="records with this xpath are identified as duplicates" />																
46
        	</JOB_INTERFACE>
47
        	<SCAN>
48
        		<FILTERS />
49
        		<FAMILIES />
50
        	</SCAN>
51
        </HADOOP_JOB>
52
        <STATUS>
53
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
54
            <RUNNING_INSTANCES value="0"/>
55
            <CUMULATIVE_RUN value="0" />
56
        </STATUS>
57
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
58
    </BODY>
59
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupCandidateScanJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="675f1436-205a-4b19-8b6b-35e1c17fb125_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupCandidateScanJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.compress.map.output" value="true" />	
30
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
33
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
34
			
35
				<PROPERTY key="mapred.reduce.tasks" value="1000" />
36
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
37
				
38
		<!--  	Uncomment to override the default lib path -->			
39
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
40
        	</STATIC_CONFIGURATION>
41
        	<JOB_INTERFACE>
42
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
43
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
44
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
45
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
46
        		<PARAM name="dedup.conf" required="true" description="dedup configuration" />
47
        	</JOB_INTERFACE>
48
        	<SCAN>
49
        		<FILTERS operator="MUST_PASS_ALL">
50
        			<FILTER type="prefix" param="entityTypeId" />
51
        		</FILTERS>
52
        		<FAMILIES>
53
        			<FAMILY param="entityType" />
54
           			<FAMILY value="resultResult_dedup_merges" />
55
        			<FAMILY value="personPerson_dedup_merges" />
56
     				<FAMILY value="organizationOrganization_dedup_merges" />        			
57
        		</FAMILIES>
58
        	</SCAN>
59
        </HADOOP_JOB>
60
        <STATUS>
61
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
62
            <RUNNING_INSTANCES value="0"/>
63
            <CUMULATIVE_RUN value="0" />
64
        </STATUS>
65
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
66
    </BODY>
67
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/resetDedupJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bc4f377a-af07-403d-a019-af60aa557652_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="resetDedupJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that reset the dedup jobs</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"  />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />		
17

  
18
				<!-- MAPPER -->        	
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.reset.HBaseResetMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"  />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Mutation"  />
22

  
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"  />
25
				<PROPERTY key="mapreduce.map.speculative" value="false"  />
26
				<PROPERTY key="mapred.reduce.tasks" value="0"  />
27
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
28
				
29
		<!--  	Uncomment to override the default lib path -->			
30
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
31
				
32
        	</STATIC_CONFIGURATION>
33
        	<JOB_INTERFACE>
34
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
35
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
36
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />        		
38
        	</JOB_INTERFACE>
39
        </HADOOP_JOB>
40
        <STATUS>
41
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
42
            <RUNNING_INSTANCES value="0"/>
43
            <CUMULATIVE_RUN value="0" />
44
        </STATUS>
45
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
46
    </BODY>
47
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGrouperJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="6b2d8db3-346f-4ddc-8591-39fd488c1191_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupGrouperJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />		        	
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupGrouperMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
47
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
48
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
49
        		</FAMILIES>
50
        	</SCAN>
51
        </HADOOP_JOB>
52
        <STATUS>
53
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
54
            <RUNNING_INSTANCES value="0"/>
55
            <CUMULATIVE_RUN value="0" />
56
        </STATUS>
57
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
58
    </BODY>
59
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsToCSVJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="027554bd-3d5c-4c50-9170-90d8c4402bc3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupRootsToCSVJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that exports the representatives as CSV files</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat	" />
17
				<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
18
        	
19
        		<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupRootsToCsvMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
23
			
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupRootsToCsvReducer" />
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
27
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
28
				
29
				<!-- MUTIPLE OUTPUT -->
30
				<PROPERTY key="mapreduce.multipleoutputs" value="NativeGroups Groups NativeEntities" />
31

  
32
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
33
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.key" value="org.apache.hadoop.io.Text" />
34
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.value" value="org.apache.hadoop.io.Text" />
35
				
36
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
37
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.key" value="org.apache.hadoop.io.Text" />
38
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.value" value="org.apache.hadoop.io.Text" />				
39
				
40
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
41
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.key" value="org.apache.hadoop.io.Text" />
42
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.value" value="org.apache.hadoop.io.Text" />				
43
				
44
				<!-- MISC -->
45
				
46
                <PROPERTY key="mapred.textoutputformat.wrapper" value="#"/>
47
                <PROPERTY key="mapred.textoutputformat.separator" value="!"/>
48

  
49
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
50
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
51
				<PROPERTY key="mapreduce.map.speculative" value="false" />
52
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
53
			
54
				<PROPERTY key="mapred.reduce.tasks" value="3" />
55
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
56
				
57
		<!--  	Uncomment to override the default lib path -->			
58
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
59
        	</STATIC_CONFIGURATION>
60
        	<JOB_INTERFACE>
61
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
62
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
63
        		
64
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
65
        	</JOB_INTERFACE>
66
        	<SCAN>
67
        		<FILTERS operator="MUST_PASS_ALL">
68
        			<FILTER type="prefix" param="entityTypeId" />
69
        		</FILTERS>
70
        		<FAMILIES>
71
	       			<FAMILY param="entityType" />
72
        			<FAMILY value="resultResult_dedup_merges" />
73
        			<FAMILY value="personPerson_dedup_merges" />
74
     				<FAMILY value="organizationOrganization_dedup_merges" />
75
        		</FAMILIES>
76
        	</SCAN>
77
        </HADOOP_JOB>
78
        <STATUS>
79
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
80
            <RUNNING_INSTANCES value="0"/>
81
            <CUMULATIVE_RUN value="0" />
82
        </STATUS>
83
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
84
    </BODY>
85
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupBuildRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="895ce6a9-4131-4954-b9ed-949ff78f5448_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupBuildRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22

  
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
	
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="500" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
	
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN caching="10">
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES/>        		
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupSimilarity2ActionsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="5c4b4dbf-8198-4f7a-9a35-367c7b0a7391_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupSimilarity2ActionsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupSimilarityToActionsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.compress.map.output" value="true" />	
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
39
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
40
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
41
        		<PARAM name="dedup.conf" required="true" description="dedup configuration" />
42
        		<PARAM name="rawSetId" required="true" description="raw set identifier" />
43
        		<PARAM name="similarityCF" required="true" description="similarity column family name" />      			
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
        		<FILTERS operator="MUST_PASS_ALL">
47
        			<FILTER type="prefix" param="entityTypeId" />
48
        		</FILTERS>
49
        		<FAMILIES>
50
        			<FAMILY param="entityType" />
51
           			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
52
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
53
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />        			
54
        		</FAMILIES>
55
        	</SCAN>
56
        </HADOOP_JOB>
57
        <STATUS>
58
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
59
            <RUNNING_INSTANCES value="0"/>
60
            <CUMULATIVE_RUN value="0" />
61
        </STATUS>
62
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
63
    </BODY>
64
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/promoteActions.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
			value="8bb6c559-edf3-4da1-87d7-cdee4fba21dd_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ==" />
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType" />
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources" />
7
		<RESOURCE_URI value="" />
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00" />
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="promoteActionsJob" type="mapreduce">
12
			<DESCRIPTION>map only job that promote actions from a set to the data table</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14
				<PROPERTY key="mapreduce.map.class"	value="eu.dnetlib.data.mapreduce.hbase.actions2.PromoteActionsMapper" />
15
				<PROPERTY key="mapreduce.inputformat.class"	value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
18
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
19
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
20
				<PROPERTY key="mapreduce.map.speculative" value="false" />
21
				<PROPERTY key="mapred.reduce.tasks" value="0" />
22
				<!-- <PROPERTY key="job.lib" value="/tmp/dnet-mapreduce-jobs-0.0.3-SNAPSHOT-jar-with-dependencies.jar" />  -->
23
			</STATIC_CONFIGURATION>
24

  
25
			<JOB_INTERFACE>
26
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
27
				<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
28
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
29
				<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
30
				<PARAM name="latestRawSets" required="true" description="the latest action raw sets" />
31
			</JOB_INTERFACE>
32

  
33
			<SCAN>
34
				<FILTERS operator="MUST_PASS_ALL">
35
					<FILTER type="prefix" value="aac|" />
36
				</FILTERS>
37
				<FAMILIES />
38
			</SCAN>
39
			
40
		</HADOOP_JOB>
41
		
42
		<STATUS>
43
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00" />
44
			<RUNNING_INSTANCES value="0" />
45
			<CUMULATIVE_RUN value="0" />
46
		</STATUS>
47
		
48
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
49
	</BODY>
50
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupIndexFeedJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="fa0e188c-aec4-4877-93b4-43e3c5acae1d_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupIndexFeedJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that updates the dedup index</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.DedupIndexFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
				
23
				<!-- JOB GLOBAL -->		
24
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
25
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
26
				
27
				<!-- MISC -->					
28
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<PROPERTY key="mapred.fairscheduler.pool" value="solr"/>
32
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
33
				
34
		<!--  	Uncomment to override the default lib path -->			
35
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
36
        	</STATIC_CONFIGURATION>
37
        	<JOB_INTERFACE>
38
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
39
	       		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
40
        		<PARAM name="mapred.output.dir" required="true" description="destination path on hdfs for rotten index xml records" />
41
        		
42
        		<PARAM name="index.solr.url" required="false" description="url used to instantiate the solr client" /> 
43
       			<PARAM name="index.solr.collection" required="true" description="target solr collection to be fed" />
44

  
45
       			<PARAM name="id" required="true" description="index DS id" />
46
				<PARAM name="index.shutdown.wait.time" required="true" description="wait time before shut down the solr client pool" />
47
       			<PARAM name="index.buffer.flush.threshold" required="true" description="indexing buffer flush threshold" />
48
       			<PARAM name="index.feed.timestamp" required="true" description="timestamp used as ds_version" />
49
				<PARAM name="index.solr.sim.mode" required="true" description="boolean value, allows to run this job in simulation mode" />
50
				<PARAM name="index.fields" required="true" description="fields from a given MDFormatDSResourceType" />
51
				<PARAM name="entityType" required="true" description="entity Type name" />
52
        	</JOB_INTERFACE>
53
        	<SCAN>
54
        		<FILTERS operator="MUST_PASS_ALL">
55
        			<FILTER type="prefix" param="entityTypeId" />
56
        		</FILTERS>
57
        		<FAMILIES>
58
        			<FAMILY param="entityType" />
59
        		</FAMILIES>
60
        	</SCAN>
61
        </HADOOP_JOB>
62
        <STATUS>
63
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
64
            <RUNNING_INSTANCES value="0"/>
65
            <CUMULATIVE_RUN value="0" />
66
        </STATUS>
67
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
68
    </BODY>
69
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/mdStoreHdfsImportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="5cd115e7-9650-4263-9c0d-a3fbf6d9549d_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="mdStoreHdfsImportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that maps xml metada records from a sequence file into an hbase table</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
				
18
				<!-- MAPPER -->        	
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.ImportRecordsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
			
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
25
				<PROPERTY key="mapreduce.map.speculative" value="false" />
26
				<PROPERTY key="mapred.reduce.tasks" value="0" />
27
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
28
				
29
		<!--  	Uncomment to override the default lib path -->			
30
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
31
				
32
        	</STATIC_CONFIGURATION>
33
        	<JOB_INTERFACE>
34
        		<PARAM name="mapred.input.dir" required="true" description="input sequence file" />
35
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
36
        		<PARAM name="hbase.import.xslt" required="true" description="mapping" />
37
        	</JOB_INTERFACE>
38
        </HADOOP_JOB>
39
        <STATUS>
40
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
41
            <RUNNING_INSTANCES value="0"/>
42
            <CUMULATIVE_RUN value="0" />
43
        </STATUS>
44
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
45
    </BODY>
46
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/prepareIndexDataJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="ed8c0a4e-7cf2-49df-bfed-fcfab0699ade_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="prepareIndexDataJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.index.PrepareFeedReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
27
				
28
				<!-- MISC -->
29
				<PROPERTY key="mapred.output.compress" value="false" />						
30
				<PROPERTY key="mapred.compress.map.output" value="true" />	
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
33
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
35
							
36
				<PROPERTY key="mapred.reduce.tasks" value="500" />
37
				<PROPERTY key="dfs.blocksize" value="32M" />
38
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
39
				
40
		<!--  	Uncomment to override the default lib path -->			
41
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
42
        	</STATIC_CONFIGURATION>
43
        	<JOB_INTERFACE>
44
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
45
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
46
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
47
        		
48
        		<PARAM name="index.entity.links" required="true" description="entity joiner configuration" /> 
49
        		<PARAM name="contextmap" required="true" description="context map (ContextDSResources)" />        		
50
        	</JOB_INTERFACE>
51
        	<SCAN>
52
        		<FILTERS />
53
        		<FAMILIES />
54
        	</SCAN>
55
        </HADOOP_JOB>
56
        <STATUS>
57
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
58
            <RUNNING_INSTANCES value="0"/>
59
            <CUMULATIVE_RUN value="0" />
60
        </STATUS>
61
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
62
    </BODY>
63
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/copyTableJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="cd176186-58ba-44b7-ae7e-25994c0c95bb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="copytable" type="admin">
11
 			<DESCRIPTION>copies a table on the same cluster or on a remote one, mimics http://hbase.apache.org/book/ops_mgt.html#copytable</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- MISC -->				
15
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
16
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
17
			
18
				<PROPERTY key="mapred.reduce.tasks" value="0" />
19
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
20
				
21
		<!--  	Uncomment to override the default lib path -->			
22
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
23
        	</STATIC_CONFIGURATION>
24
        	<JOB_INTERFACE>
25
        		<PARAM name="new.name" required="true" description="target hbase table" />
26
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
27
        		<PARAM name="peer.adr" required="false" description="target hbase quorum" />
28
        	</JOB_INTERFACE>
29
        	<SCAN caching="100">
30
          		<FILTERS />
31
        		<FAMILIES />      		
32
        	</SCAN>
33
        </HADOOP_JOB>
34
        <STATUS>
35
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
36
            <RUNNING_INSTANCES value="0"/>
37
            <CUMULATIVE_RUN value="0" />
38
        </STATUS>
39
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
40
    </BODY>
41
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/sqoopStatsUpdateJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bf0ab07b-36bf-4164-ab73-342bfb11e51a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="StatsExportJob" type="oozie">
11
            <DESCRIPTION>Job for importing data from HBASE to the relational Stats Database</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
			
14
				<!-- Cluster wide -->
15
                <PROPERTY key="queueName" value="default"/>
16
                <PROPERTY key="user.name" value="dnet"/> <!-- username = sqoop?? -->
17
                <PROPERTY key="workingDir" value="/user/dnet/lib/stats/working_dir"/>
18
                <PROPERTY key="numReducers" value="1"/>
19
                
20
				<PROPERTY key="oozie.wf.application.path" value="hdfs://nmis-hadoop-cluster/user/eri.katsari/stats/oozie_app"/><!-- edit this property! -->
21
				<PROPERTY key="Stats_db_Url" value="jdbc:postgresql://node1.t.openaire.research-infrastructures.eu:5432/stats"/><!-- complete the jdbc url with the actual value! -->
22
				<PROPERTY key="Stats_db_User" value="sqoop"/>
23
				<PROPERTY key="Stats_db_Pass" value="sqoop"/>
24
				<PROPERTY key="Stats_db_Driver" value="org.postgresql.Driver"/>
25
				<PROPERTY key="Stats_db_table_map" value="datasourceLanguage=datasource_languages,datasource=datasource,project=project,result=result,organization=organization,datasourceOrganization=datasource_organizations,datasourceTopic=datasource_topics,projectOrganization=project_organizations,resultClaim=result_claims,resultClassification=result_classifications,resultConcept=result_concepts,resultLanguage=result_languages,resultOrganization=result_organizations,resultResult=result_results,resultProject=project_results,resultTopic=result_topics,category=category,context=context,claim=claim,concept=concept,datasourceLanguage=datasource_languages,resultLanguage=result_languages,resultDatasource=result_datasources"/>
26
				<PROPERTY key="Stats_sqoop_RecsPerStatement" value="10000"/>
27
				<PROPERTY key="Stats_sqoop_StatementPerTrans" value="1000000"/>
28
				<PROPERTY key="Stats_sqoop_ReducersCount" value="4"/>
29
				<PROPERTY key="Stats_output_Path" value="/tmp/stats/"/>
30
				<PROPERTY key="Stats_null_String_Field" value="null"/>
31
				<PROPERTY key="Stats_null_Numeric_Field" value="null"/>
32
				<PROPERTY key="Stats_enclosing_Character" value="#"/>
33
				<PROPERTY key="Stats_delim_Character" value="!"/>
34
				<PROPERTY key="out1" value="datasource"/>
35
				<PROPERTY key="out2" value="project"/>
36
				<PROPERTY key="out3" value="organization"/>
37
				<PROPERTY key="out4" value="datasourceOrganization"/>
38
				<PROPERTY key="out5" value="datasourceTopic"/>
39
				<PROPERTY key="out6" value="datasourceLanguage"/>
40
				<PROPERTY key="out7" value="projectOrganization"/>
41
				<PROPERTY key="out8" value="resultClaim"/>
42
				<PROPERTY key="out9" value="resultClassification"/>
43
				<PROPERTY key="out10" value="resultConcept"/>
44
				<PROPERTY key="out11" value="resultLanguage"/>
45
				<PROPERTY key="out12" value="resultOrganization"/>
46
				<PROPERTY key="out13" value="resultResult"/>
47
				<PROPERTY key="out14" value="resultProject"/>
48
				<PROPERTY key="out15" value="category"/>
49
				<PROPERTY key="out16" value="resultTopic"/>
50
				<PROPERTY key="out17" value="resultDatasource"/>
51
				<PROPERTY key="out18" value="result"/>
52
				<PROPERTY key="out19" value="claim"/>
53
				<PROPERTY key="out20" value="concept"/>
54
            </STATIC_CONFIGURATION>
55
            <JOB_INTERFACE>
56
                <PARAM name="nameNode" required="true" description="hdfs name node"/>
57
                <PARAM name="jobTracker" required="true" description="job tracker name"/>
58
                <PARAM name="Stats_Hbase_Source_Table" required="true" description="Hbase Table with Protobuffs."/>
59
                <PARAM name="Stats_indexConf" required="true" description="Index Entity Links configuration."/>
60
                <PARAM name="isLookupEndpoint" required="true" description="IS lookup service endpoint"/>
61
            </JOB_INTERFACE>
62
        </HADOOP_JOB>
63
        <STATUS>
64
            <LAST_SUBMISSION_DATE value="2014-11-14T19:57:25+00:00"/>
65
            <RUNNING_INSTANCES value="0"/>
66
            <CUMULATIVE_RUN value="75"/>
67
        </STATUS>
68
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
69
    </BODY>
70
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingQuickJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="13beed98-81bf-4fbd-ab4f-de071177997c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
    	<HADOOP_JOB name="iisPreprocessingQuickJob" type="oozie">
11
        	<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
				<!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
				<PROPERTY key="user.name" value="dnet" />
16

  
17
				<!-- Runtime -->
18
                <PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing_quick_test"/>
19
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
20
                <PROPERTY key="input_referenceextraction_project" value="/user/marek.horst/share/referenceextraction/document_projects/2014-04-11"/>
21
                <PROPERTY key="input_referenceextraction_dataset" value="/user/marek.horst/share/referenceextraction/document_datasets/all/2014-04-11"/>
22
                <PROPERTY key="export_action_hbase_table_initialize" value="false"/>
23
            </STATIC_CONFIGURATION>
24
        	<JOB_INTERFACE>
25
 		       	<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" />
26
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
27
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" />
28
        		<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" />
29
        		<PARAM name="import_database_service_location" required="true" description="database service endpoint" />
30
        		<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" />
31
        		<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" />
32
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
33
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
35
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
36
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
37
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
38
       			<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" />
39
       			<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" />
40
       			<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" />
41
     			<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" />
42
        	</JOB_INTERFACE>
43
        </HADOOP_JOB>
44
        <STATUS>
45
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
46
            <RUNNING_INSTANCES value="0"/>
47
            <CUMULATIVE_RUN value="0" />
48
        </STATUS>
49
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
50
    </BODY>
51
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/offlineHbaseLoadJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="073e55eb-c6f4-49a9-80b3-1a927612ba5b_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="offlineHbaseLoad" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that loads a given entity type in the offline dedup table</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.OfflineHbaseLoadMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.compress.map.output" value="true" />	
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
39
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
40
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
41
        	</JOB_INTERFACE>
42
        	<SCAN>
43
        		<FILTERS operator="MUST_PASS_ALL">
44
        			<FILTER type="prefix" param="entityTypeId" />
45
        		</FILTERS>
46
        		<FAMILIES>
47
        			<FAMILY param="entityType" />
48
        		</FAMILIES>
49
        	</SCAN>
50
        </HADOOP_JOB>
51
        <STATUS>
52
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
            <RUNNING_INSTANCES value="0"/>
54
            <CUMULATIVE_RUN value="0" />
55
        </STATUS>
56
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
    </BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/OAIPublisherConfigurationDSResources/OAIPublisherConfigurationDSResourceType/OAIPublisherConfiguration-OpenAire.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="53b2865b-53a3-444c-a3a5-d09bb7be9374_T0FJUHVibGlzaGVyQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL09BSVB1Ymxpc2hlckNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="OAIPublisherConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="OAIPublisherConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2014-10-23T12:33:34+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IDSCHEME>oai</IDSCHEME>
12
            <IDNAMESPACE>dnet</IDNAMESPACE>
13
            <CURRENTDB>oaistore</CURRENTDB>
14
            <OAISETS>
15
                <OAISET enabled="true">
16
                    <spec>OpenAccess</spec>
17
                    <name>Set of Open Access results</name>
18
                    <description>OPEN ACCESS publications and datasets</description>
19
                    <query>(license = "OPEN")</query>
20
                </OAISET>
21
                <OAISET enabled="true">
22
                    <spec>publications</spec>
23
                    <name>Publications</name>
24
                    <description>Set of all publications aggregated in OpenAIREPlus</description>
25
                    <query>resulttypeid="publication"</query>
26
                </OAISET>
27
                <OAISET enabled="true">
28
                    <spec>organizations</spec>
29
                    <name>Organizations</name>
30
                    <description>Set of all organizations aggregated in OpenAIREPlus</description>
31
                    <query>oaftype="organization"</query>
32
                </OAISET>
33
                <OAISET enabled="true">
34
                    <spec>datasets</spec>
35
                    <name>datasets</name>
36
                    <description>Set of all datasets aggregated in OpenAIREPlus</description>
37
                    <query>resulttypeid="dataset"</query>
38
                </OAISET>
39
                <OAISET enabled="true">
40
                    <spec>projects</spec>
41
                    <name>projects</name>
42
                    <description>Set of all projects: includes FP7, WellcomeTrust and, soon, H2020 funded projects</description>
43
                    <query>oaftype exact "project"</query>
44
                </OAISET>
45
                <OAISET enabled="true">
46
                    <spec>FP7Projects</spec>
47
                    <name>FP7 projects</name>
48
                    <description>Projects funded by EC FP7</description>
49
                    <query>(oaftype exact "project" and funder exact "corda_______::FP7")</query>
50
                </OAISET>
51
                <OAISET enabled="true">
52
                    <spec>WTProjects</spec>
53
                    <name>Wellcome Trust projects</name>
54
                    <description>Projects funded by Wellcome Trust</description>
55
                    <query>(oaftype exact "project" and funder exact "wt::WT")</query>
56
                </OAISET>
57
            </OAISETS>
58
            <METADATAFORMATS>
59
                <METADATAFORMAT exportable="true" metadataPrefix="oaf">
60
                    <NAMESPACE>http://namespace.openaire.eu/oaf</NAMESPACE>
61
                    <SCHEMA>http://www.openaire.eu/schema/0.2/oaf-0.2.xsd</SCHEMA>
62
                    <SOURCE_METADATA_FORMAT interpretation="openaire" layout="index" name="oaf"/>
63
                    <TRANSFORMATION_RULE/>
64
                    <BASE_QUERY>(oaftype &lt;&gt; "person")</BASE_QUERY>
65
                </METADATAFORMAT>
66
                <METADATAFORMAT exportable="true" metadataPrefix="oai_dc">
67
                    <NAMESPACE>http://www.openarchives.org/OAI/2.0/oai_dc/</NAMESPACE>
68
                    <SCHEMA>http://www.openarchives.org/OAI/2.0/oai_dc.xsd</SCHEMA>
69
                    <SOURCE_METADATA_FORMAT interpretation="openaire" layout="index" name="oaf"/>
70
                    <TRANSFORMATION_RULE>oaf2dc_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=</TRANSFORMATION_RULE>
71
                    <BASE_QUERY>oaftype="result"</BASE_QUERY>
72
                </METADATAFORMAT>
73
            </METADATAFORMATS>
74
            <INDICES>
75
                <INDEX name="objIdentifier" repeatable="false">
76
                    <SOURCE interpretation="openaire" layout="index" name="oaf" path="//*[local-name() ='objIdentifier']"/>
77
                </INDEX>
78
                <INDEX name="set" repeatable="true">
79
                    <SOURCE interpretation="openaire" layout="index" name="oaf" path="//collectedfrom/@name"/>
80
                </INDEX>
81
                <INDEX name="license" repeatable="false">
82
                    <SOURCE interpretation="openaire" layout="index" name="oaf" path="//bestlicense/@classid"/>
83
                </INDEX>
84
                <INDEX name="oaftype" repeatable="false">
85
                    <SOURCE interpretation="openaire" layout="index" name="oaf" path="local-name(//*[local-name()='entity']/*)"/>
86
                </INDEX>
87
                <INDEX name="resulttypeid" repeatable="false">
88
                    <SOURCE interpretation="openaire" layout="index" name="oaf" path="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid"/>
89
                </INDEX>
90
                <INDEX name="funder" repeatable="false">
91
                    <SOURCE interpretation="openaire" layout="index" name="oaf" path="//*[local-name()='entity']/*[local-name()='project']/fundingtree//funding_level_0/id"/>
92
                </INDEX>
93
            </INDICES>
94
        </CONFIGURATION>
95
        <STATUS>
96
            <LAST_UPDATE value="2001-12-31T12:00:00"/>
97
        </STATUS>
98
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
99
    </BODY>
100
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_dc_source.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="0c46760d-4787-4b56-87a7-ab91983b7494_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
5
        <RESOURCE_KIND value="TransformationRuleDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-02-28T12:14:22+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <IMPORTED/>
12
            <SCRIPT>
13
                <TITLE>dc_cleaning_OPENAIREplus_dc_source</TITLE>
14
                <CODE><![CDATA[
15
declare_script "dc_cleaning_OpenAIREplus_dc_source";
16
declare_ns oaf = "http://namespace.openaire.eu/oaf";
17
declare_ns dri = "http://www.driver-repository.eu/namespace/dri";
18
declare_ns dr = "http://www.driver-repository.eu/namespace/dr";
19
declare_ns dc = "http://purl.org/dc/elements/1.1/";
20
declare_ns prov = "http://www.openarchives.org/OAI/2.0/provenance";
21
$var0 = "''";
22
$var1 = "'corda_______::'";
23
$varDummy = "''";
24
static $varDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
25
static $varRepoid = xpath:"//dri:repositoryId";
26
static $varOfficialname = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//OFFICIAL_NAME"]);
27
dri:objIdentifier = xpath:"//dri:objIdentifier";
28
dri:repositoryId = $varRepoid;
29
//dri:repositoryId = xpath:"//dri:repositoryId";
30
dri:recordIdentifier = RegExpr(xpath:"//dri:recordIdentifier", $var0, "s/^(.*)(::)/$2/");
31
apply xpath:"//dc:creator" if xpath:"string-length(.) > 0" dc:creator = xpath:"normalize-space(.)"; else $varDummy = "''";
32
apply xpath:"//dc:title" if xpath:"string-length(.) > 0" dc:title = xpath:"normalize-space(.)"; else $varDummy = "''";
33
apply xpath:"//dc:subject" if xpath:"string-length(.) > 0" dc:subject = xpath:"normalize-space(.)"; else $varDummy = "''";
34
apply xpath:"//dc:publisher" if xpath:"string-length(.) > 0" dc:publisher = xpath:"normalize-space(.)"; else $varDummy = "''";
35
apply xpath:"//dc:source" if xpath:"not(starts-with(normalize-space(.), 'http'))" dc:source = xpath:"normalize-space(.)"; else $varDummy = "''";
36
dc:contributor = xpath:"//dc:contributor";
37
dc:description = xpath:"//dc:description";
38
$varHttpTest = "''";
39
if xpath:"//dc:source[starts-with(., 'http')]" $varHttpTest = "true"; else dc:source = skipRecord();
40
dc:identifier = xpath:"//dc:source[starts-with(normalize-space(.), 'http')]";
41
apply xpath:"//dc:identifier" if xpath:"starts-with(normalize-space(.), 'http')" dc:identifier = xpath:"normalize-space(.)"; else dr:CobjIdentifier = xpath:"normalize-space(.)";
42
static dr:dateOfCollection = getValue(CURRENTDATE, []);
43
// dc:type = xpath:"//dc:type";
44
dc:language = Convert(xpath:"//dc:language", Languages);
45
//dc:language = "eng";
46
//if xpath:"//dc:rights[text()='info:eu-repo/semantics/openAccess']" dc:publisher = xpath:"//dc:publisher"; else dc:publisher = skipRecord();
47
dc:date = xpath:"//dc:date";
48
oaf:dateAccepted = Convert(xpath:"descendant-or-self::dc:date", DateISO8601, "yyyy-MM-dd", "min()");
49
apply xpath:"//dc:date" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/date')" oaf:embargoenddate = RegExpr(xpath:"normalize-space(.)", $var0, "s/^(.*info:eu-repo\/date\/embargoEnd\/)//gmi"); else $var0 = "''";
50
apply xpath:"//dc:relation" if xpath:"string-length(substring-after(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7/')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
51
//apply xpath:"//dc:relation" if xpath:"string-length(translate(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7','')) = 6" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
52
//apply xpath:"//dc:relation" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/grantAgreement/EC/FP7')" oaf:projectid = RegExpr(xpath:"normalize-space(.)", $var1, "s/^(.*info:eu-repo\/grantAgreement\/EC\/FP7\/)//gmi"); else dc:relation = xpath:"normalize-space(.)";
53
//comment-js-09-10-2012 apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" dc:rights = empty; else dc:rights = xpath:"normalize-space(.)";
54
// static oaf:datasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
55
//
56
//
57
//
58
static $varDsType = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//CONFIGURATION/DATASOURCE_TYPE"]);
59
//if xpath:"$varDsType='aggregator'" oaf:hostingDatasourceid = xpath:"//prov:baseURL"; else oaf:hostingDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
60
oaf:collectedDatasourceid = getValue(PROFILEFIELD, [xpath:"//dri:repositoryId", xpath:"//EXTRA_FIELDS/FIELD[key='OpenAireDataSourceId']/value"]);
61
//
62
apply xpath:"//dc:type" if xpath:"." dr:CobjCategory = Convert(xpath:"normalize-space(.)", TextTypologies); else dc:type = xpath:".";
63
apply xpath:"//dc:rights" if xpath:"starts-with(normalize-space(.), 'info:eu-repo/semantics')" oaf:accessrights = Convert(xpath:"normalize-space(.)", AccessRights); else dc:rights = xpath:".";
64
if xpath:"//dc:rights[starts-with(normalize-space(.), 'info:eu-repo/semantics')]" $var0 = "''"; else oaf:accessrights = "OPEN";
65
//if xpath:"count(//dc:rights) = 0" oaf:accessrights = "OPEN"; else $var0 = "''";
66
// oaf:accessrights = Convert(xpath:"normalize-space(//dc:rights)", AccessRights);
67
//
68
static oaf:collectedFrom = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
69
static oaf:hostedBy = set("''", @name = $varOfficialname; , @id = $varDatasourceid;);
70
//
71
$varId = identifierExtract('["//dc:identifier", "//dc:relation"]' , xpath:"./*[local-name()='record']" , '(10[.][0-9]{4,}[^\s"/<>]*/[^\s"<>]+)');
72
oaf:identifier = set(xpath:"$varId//value", @identifierType = "doi";);
73
oaf:datasourceprefix = xpath:"//oaf:datasourceprefix";
74
end
75
]]></CODE>
76
            </SCRIPT>
77
        </CONFIGURATION>
78
        <STATUS/>
79
        <SECURITY_PARAMETERS/>
80
    </BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.6/src/main/resources/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/dc2oaf_cleaning_OPENAIREplus_od______1581.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="36517292-69af-4793-9769-6985cddccfff_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU="/>
4
        <RESOURCE_TYPE value="TransformationRuleDSResourceType"/>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff