Project

General

Profile

« Previous | Next » 

Revision 47984

[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.16

View differences:

modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/pom.xml
1
<?xml version="1.0" ?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-openaireplus-profiles</artifactId>
12
	<packaging>jar</packaging>
13
	<version>1.0.16</version>
14
	<scm>
15
   		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16</developerConnection>
16
	</scm>
17

  
18
	<dependencies>
19

  
20
	</dependencies>
21
</project>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/result_organization_relations.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="2e4251fe-66a8-4155-a902-72161601b8cd_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="OntologyDSResourceType"/>
5
        <RESOURCE_KIND value="OntologyDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2016-11-18T10:46:37+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <ONTOLOGY_NAME code="dnet:result_organization_relations">dnet:result_organization_relations</ONTOLOGY_NAME>
12
            <ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Results entities</ONTOLOGY_DESCRIPTION>
13
	        <TERMS>
14
		        <TERM code="isAffiliatedWith" encoding="resultOrganization_affiliation_hasAuthorInstitutionOf" english_name="has author institution of" native_name="has author institution of">
15
			        <SYNONYMS/>
16
			        <RELATIONS>
17
				        <RELATION type="inverseOf" code="isAuthorInstitutionOf"/>
18
			        </RELATIONS>
19
		        </TERM>
20
		        <TERM code="affiliates" encoding="resultOrganization_affiliation_isAuthorInstitutionOf" english_name="is author institution of" native_name="is author institution of">
21
			        <SYNONYMS/>
22
			        <RELATIONS>
23
				        <RELATION type="inverseOf" code="hasAuthorInstitutionOf"/>
24
			        </RELATIONS>
25
		        </TERM>
26
	        </TERMS>
27
        </CONFIGURATION>
28
        <STATUS>
29
            <LAST_UPDATE value="2016-11-18T10:46:36Z"/>
30
        </STATUS>
31
        <SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
32
    </BODY>
33
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="de888da6-2d10-4d42-a624-a44d4083414a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="dedupMinDistGraphJob" type="mapreduce">
12
			<DESCRIPTION>map reduce job that finds the minimum vertex in each connected component in the input graph (as adjacency lists)</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchReducer"/>
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
27
				<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.compress.map.output" value="false"/>
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
33
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38

  
39
				<!--  	Uncomment to override the default lib path -->
40
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/>
44
				<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/>
45
			</JOB_INTERFACE>
46
			<SCAN>
47
				<FILTERS/>
48
				<FAMILIES/>
49
			</SCAN>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
54
			<CUMULATIVE_RUN value="0"/>
55
		</STATUS>
56
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
	</BODY>
58
</RESOURCE_PROFILE>
59

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/person_result_relations.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="12b8832a-0e97-43a9-b5ec-c33ff3c29778_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="OntologyDSResourceType"/>
5
        <RESOURCE_KIND value="OntologyDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <ONTOLOGY_NAME code="dnet:person_result_relations">dnet:person_result_relations</ONTOLOGY_NAME>
12
            <ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Result and Person entities</ONTOLOGY_DESCRIPTION>
13
            <TERMS>
14
                <TERM code="isAuthorOf" encoding="personResult_authorship_isAuthorOf" english_name="author of" native_name="author of">
15
                    <SYNONYMS/>
16
         			<RELATIONS>
17
						<RELATION type="inverseOf" code="hasAuthor"/>
18
					</RELATIONS>
19
                </TERM>
20
                <TERM code="hasAuthor" encoding="personResult_authorship_hasAuthor" english_name="has author" native_name="has author">
21
                    <SYNONYMS/>
22
         			<RELATIONS>
23
						<RELATION type="inverseOf" code="isAuthorOf"/>
24
					</RELATIONS>
25
                </TERM>
26
                <TERM code="isContributorOf" encoding="personResult_contribution_isContributorOf" english_name="contributor of" native_name="contributor of">
27
                    <SYNONYMS/>
28
                    <RELATIONS>
29
                        <RELATION type="inverseOf" code="hasContributor"/>
30
                    </RELATIONS>
31
                </TERM>
32
                <TERM code="hasContributor" encoding="personResult_contribution_hasContributor" english_name="has contributor" native_name="has contributor">
33
                    <SYNONYMS/>
34
                    <RELATIONS>
35
                        <RELATION type="inverseOf" code="isContributorOf"/>
36
                    </RELATIONS>
37
                </TERM>
38
            </TERMS>
39
        </CONFIGURATION>
40
        <STATUS>
41
            <LAST_UPDATE value="2013-11-18T10:46:36Z"/>
42
        </STATUS>
43
        <SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
44
    </BODY>
45
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/connectedComponentsJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="28e8d911-87e5-4f39-9ce9-0d445126c75f_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="connectedComponentsJob" type="mapreduce">
12
			<DESCRIPTION>map reduce job joins all the vertex ids to build the connected components in the graph</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/>
23

  
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsReducer"/>
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
27
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.hbase.client.Put"/>
28

  
29
				<!-- MISC -->
30
				<PROPERTY key="mapred.compress.map.output" value="false"/>
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
33
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
35

  
36
				<PROPERTY key="mapred.reduce.tasks" value="1"/>
37
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
38

  
39
				<!--  	Uncomment to override the default lib path -->
40
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
41
			</STATIC_CONFIGURATION>
42
			<JOB_INTERFACE>
43
				<PARAM name="mapred.input.dir" required="true" description="source path on hdfs"/>
44
				<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/>
45
			</JOB_INTERFACE>
46
			<SCAN>
47
				<FILTERS/>
48
				<FAMILIES/>
49
			</SCAN>
50
		</HADOOP_JOB>
51
		<STATUS>
52
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
			<RUNNING_INSTANCES value="0"/>
54
			<CUMULATIVE_RUN value="0"/>
55
		</STATUS>
56
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
	</BODY>
58
</RESOURCE_PROFILE>
59

  
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceImportJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="b7d51a07-6996-4841-9a4a-685a044638e3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="informationSpaceImportJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that import the whole information space table from a sequence file holding a json dump</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
				
18
				<!-- MAPPER -->        	
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.ImportInformationSpaceDumpMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
			
23
				<!-- MISC -->
24
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />
25
				<PROPERTY key="mapreduce.map.speculative" value="false" />
26
				<PROPERTY key="mapred.reduce.tasks" value="0" />
27
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
28
				
29
		<!--  	Uncomment to override the default lib path -->			
30
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
31
        	</STATIC_CONFIGURATION>
32
        	<JOB_INTERFACE>
33
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
34
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />
35
        		
36
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />         		
37
        	</JOB_INTERFACE>
38
        	<SCAN>
39
        		<FILTERS />
40
        		<FAMILIES />
41
        	</SCAN>
42
        </HADOOP_JOB>
43
        <STATUS>
44
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
45
            <RUNNING_INSTANCES value="0"/>
46
            <CUMULATIVE_RUN value="0" />
47
        </STATUS>
48
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
49
    </BODY>
50
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteSimRelsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="6363b833-ac88-421c-8596-440a3dc735db_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="deleteSimRelJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that deletes the similarity rels used to in the deduplication process (person)</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteSimRelMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
48
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
49
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
50
        		</FAMILIES>
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/result_result_relations.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="c2cd2bab-6a9e-4a34-9318-3b11a349ad4c_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="OntologyDSResourceType"/>
5
        <RESOURCE_KIND value="OntologyDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <ONTOLOGY_NAME code="dnet:result_result_relations">dnet:result_result_relations</ONTOLOGY_NAME>
12
            <ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Results entities</ONTOLOGY_DESCRIPTION>
13
            <TERMS>
14
                <TERM code="hasAmongTopNSimilarDocuments" encoding="resultResult_similarity_hasAmongTopNSimilarDocuments" english_name="has Among Top N Similar Documents" native_name="has Among Top N Similar Documents">
15
                    <SYNONYMS/>
16
                    <RELATIONS>
17
						<RELATION type="inverseOf" code="isAmongTopNSimilarDocuments"/>
18
					</RELATIONS>                    
19
                </TERM>
20
                <TERM code="isAmongTopNSimilarDocuments" encoding="resultResult_similarity_isAmongTopNSimilarDocuments" english_name="is Among Top N Similar Documents" native_name="is Among Top N Similar Documents">
21
                    <SYNONYMS/>
22
                    <RELATIONS>
23
						<RELATION type="inverseOf" code="hasAmongTopNSimilarDocuments"/>
24
					</RELATIONS>                    
25
                </TERM>
26
                <TERM code="isRelatedTo" encoding="resultResult_publicationDataset_isRelatedTo" english_name="is related to" native_name="is related to">
27
                    <SYNONYMS/>
28
                    <RELATIONS>
29
						<RELATION type="inverseOf" code="isRelatedTo"/>
30
					</RELATIONS>                    
31
                </TERM>
32
	            <TERM code="isSupplementTo" encoding="resultResult_supplement_isSupplementTo" english_name="is supplement to" native_name="is supplement to">
33
		            <SYNONYMS/>
34
		            <RELATIONS>
35
			            <RELATION type="inverseOf" code="isSupplementedBy"/>
36
		            </RELATIONS>
37
	            </TERM>
38
	            <TERM code="isSupplementedBy" encoding="resultResult_supplement_isSupplementedBy" english_name="is supplemented by" native_name="is supplemented by">
39
		            <SYNONYMS/>
40
		            <RELATIONS>
41
			            <RELATION type="inverseOf" code="isSupplementTo"/>
42
		            </RELATIONS>
43
	            </TERM>
44
	            <TERM code="isPartOf" encoding="resultResult_part_isPartOf" english_name="is part of" native_name="is part of">
45
		            <SYNONYMS/>
46
		            <RELATIONS>
47
			            <RELATION type="inverseOf" code="hasPart"/>
48
		            </RELATIONS>
49
	            </TERM>
50
	            <TERM code="hasPart" encoding="resultResult_part_hasPart" english_name="has part" native_name="has part">
51
		            <SYNONYMS/>
52
		            <RELATIONS>
53
			            <RELATION type="inverseOf" code="isPartOf"/>
54
		            </RELATIONS>
55
	            </TERM>
56
                <TERM code="merges" encoding="resultResult_dedup_merges" english_name="merges" native_name="merges">
57
                    <SYNONYMS/>
58
                    <RELATIONS>
59
						<RELATION type="inverseOf" code="isMergedIn"/>
60
					</RELATIONS>                    
61
                </TERM>
62
                <TERM code="isMergedIn" encoding="resultResult_dedup_isMergedIn" english_name="is Merged In" native_name="is Merged In">
63
                    <SYNONYMS/>
64
                    <RELATIONS>
65
						<RELATION type="inverseOf" code="merges"/>
66
					</RELATIONS>                    
67
                </TERM>
68
                <TERM code="isSimilarTo" encoding="resultResult_dedupSimilarity_isSimilarTo" english_name="is similar to" native_name="is similar to">
69
                    <SYNONYMS/>
70
                    <RELATIONS>
71
						<RELATION type="inverseOf" code="isSimilarTo"/>
72
					</RELATIONS>
73
                </TERM>
74
            </TERMS>
75
        </CONFIGURATION>
76
        <STATUS>
77
            <LAST_UPDATE value="2013-11-18T10:46:36Z"/>
78
        </STATUS>
79
        <SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
80
    </BODY>
81
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindPersonRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="8ec4731e-4e91-4863-9a4b-7f0a8ca0542e_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupFindPersonRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that find the root of a similarity group (person)</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="100" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN>
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES>
51
        			<FAMILY param="entityType" />
52
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
53
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
54
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
55
        		</FAMILIES>
56
        	</SCAN>
57
        </HADOOP_JOB>
58
        <STATUS>
59
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
60
            <RUNNING_INSTANCES value="0"/>
61
            <CUMULATIVE_RUN value="0" />
62
        </STATUS>
63
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
64
    </BODY>
65
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGTCleanerJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="888ef72f-701a-4d59-8b8a-2ad01986f975_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="gtCleanerJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that deletes the non-GT rows</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.GTCleanerMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Delete" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />		
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY param="entityType" />
47
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
48
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
49
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
50
        		</FAMILIES>
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsToCSVJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="027554bd-3d5c-4c50-9170-90d8c4402bc3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupRootsToCSVJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that exports the representatives as CSV files</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" />
17
				<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
18
        	
19
        		<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupRootsToCsvMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
23
			
24
				<!-- REDUCER -->
25
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupRootsToCsvReducer" />
26
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
27
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
28
				
29
				<!-- MUTIPLE OUTPUT -->
30
				<PROPERTY key="mapreduce.multipleoutputs" value="NativeGroups Groups NativeEntities" />
31

  
32
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
33
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.key" value="org.apache.hadoop.io.Text" />
34
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.value" value="org.apache.hadoop.io.Text" />
35
				
36
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
37
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.key" value="org.apache.hadoop.io.Text" />
38
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.value" value="org.apache.hadoop.io.Text" />				
39
				
40
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />
41
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.key" value="org.apache.hadoop.io.Text" />
42
				<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.value" value="org.apache.hadoop.io.Text" />				
43
				
44
				<!-- MISC -->
45
				
46
                <PROPERTY key="mapred.textoutputformat.wrapper" value="#"/>
47
                <PROPERTY key="mapred.textoutputformat.separator" value="!"/>
48

  
49
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
50
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
51
				<PROPERTY key="mapreduce.map.speculative" value="false" />
52
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
53
			
54
				<PROPERTY key="mapred.reduce.tasks" value="3" />
55
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
56
				
57
		<!--  	Uncomment to override the default lib path -->			
58
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
59
        	</STATIC_CONFIGURATION>
60
        	<JOB_INTERFACE>
61
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
62
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
63
        		
64
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" />         		
65
        	</JOB_INTERFACE>
66
        	<SCAN>
67
        		<FILTERS operator="MUST_PASS_ALL">
68
        			<FILTER type="prefix" param="entityTypeId" />
69
        		</FILTERS>
70
        		<FAMILIES>
71
	       			<FAMILY param="entityType" />
72
        			<FAMILY value="resultResult_dedup_merges" />
73
        			<FAMILY value="personPerson_dedup_merges" />
74
     				<FAMILY value="organizationOrganization_dedup_merges" />
75
        		</FAMILIES>
76
        	</SCAN>
77
        </HADOOP_JOB>
78
        <STATUS>
79
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
80
            <RUNNING_INSTANCES value="0"/>
81
            <CUMULATIVE_RUN value="0" />
82
        </STATUS>
83
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
84
    </BODY>
85
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/indexFeedJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="1c34963b-75b3-4440-9f42-72445a26c077_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="indexFeedJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.IndexFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
				
23
				<!-- JOB GLOBAL -->		
24
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />
25
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
26
				
27
				<!-- MISC -->		
28
				<PROPERTY key="mapred.task.timeout" value="1800000"/>			
29
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
31
				<PROPERTY key="mapred.reduce.tasks" value="0" />
32
				<PROPERTY key="mapred.fairscheduler.pool" value="solr"/>
33
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
34
				
35
		<!--  	Uncomment to override the default lib path -->			
36
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
37
        	</STATIC_CONFIGURATION>
38
        	<JOB_INTERFACE>
39
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />
40
        		<PARAM name="mapred.output.dir" required="true" description="destination path on hdfs for rotten index xml records" />
41
        		
42
        		<PARAM name="index.solr.url" required="false" description="url used to instantiate the solr client" /> 
43
       			<PARAM name="index.solr.collection" required="true" description="target solr collection to be fed" />
44

  
45
       			<PARAM name="id" required="true" description="index DS id" />
46
				<PARAM name="index.shutdown.wait.time" required="true" description="wait time before shut down the solr client pool" />
47
       			<PARAM name="index.buffer.flush.threshold" required="true" description="indexing buffer flush threshold" />
48
       			<PARAM name="index.feed.timestamp" required="true" description="timestamp used as ds_version" />
49
				<PARAM name="index.solr.sim.mode" required="true" description="boolean value, allows to run this job in simulation mode" />
50
				<PARAM name="index.xslt" required="true" description="record transformer created by the MSRO service" />
51
        	</JOB_INTERFACE>
52
        	<SCAN>
53
        		<FILTERS />
54
        		<FAMILIES />
55
        	</SCAN>
56
        </HADOOP_JOB>
57
        <STATUS>
58
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
59
            <RUNNING_INSTANCES value="0"/>
60
            <CUMULATIVE_RUN value="0" />
61
        </STATUS>
62
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
63
    </BODY>
64
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/datasources_organizations_typologies.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="bf12ea02-8d59-49f0-bba4-cdbb5b741981_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="OntologyDSResourceType"/>
5
        <RESOURCE_KIND value="OntologyDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <ONTOLOGY_NAME code="dnet:datasources_organizations_typologies">dnet:datasources_organizations_typologies</ONTOLOGY_NAME>
12
            <ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Datasource and Organization entities</ONTOLOGY_DESCRIPTION>
13
            <TERMS>
14
                <TERM code="isProvidedBy" encoding="datasourceOrganization_provision_isProvidedBy" english_name="is provided by" native_name="is provided by">
15
                    <SYNONYMS/>
16
                    <RELATIONS>
17
						<RELATION type="inverseOf" code="provides"/>
18
					</RELATIONS>
19
                </TERM>
20
                <TERM code="provides" encoding="datasourceOrganization_provision_provides" english_name="provides" native_name="provides">
21
                    <SYNONYMS/>
22
                    <RELATIONS>
23
						<RELATION type="inverseOf" code="isProvidedBy"/>
24
					</RELATIONS>
25
                </TERM>
26
            </TERMS>
27
        </CONFIGURATION>
28
        <STATUS>
29
            <LAST_UPDATE value="2013-11-18T10:46:36Z"/>
30
        </STATUS>
31
        <SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
32
    </BODY>
33
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/publicationAnalysisJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="721fd82c-6444-41c9-ba23-5eb0652ddaeb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="publicationAnalysisJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that analyses publication features</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.PublicationAnalysisMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" />
22
			
23
				<!-- MISC -->
24
				
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
27
				<PROPERTY key="mapreduce.map.speculative" value="false" />
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
29
			
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
		<!--  	Uncomment to override the default lib path -->			
34
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        	</JOB_INTERFACE>
40
        	<SCAN>
41
        		<FILTERS operator="MUST_PASS_ALL">
42
        			<FILTER type="prefix" value="50" />
43
        		</FILTERS>
44
        		<FAMILIES>
45
	       			<FAMILY value="result" />
46
        		</FAMILIES>
47
        	</SCAN>
48
        </HADOOP_JOB>
49
        <STATUS>
50
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
51
            <RUNNING_INSTANCES value="0"/>
52
            <CUMULATIVE_RUN value="0" />
53
        </STATUS>
54
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
55
    </BODY>
56
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/result_project_relations.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="4f06cc59-6eca-4492-a2d1-ac2a80682ec3_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="OntologyDSResourceType"/>
5
        <RESOURCE_KIND value="OntologyDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <ONTOLOGY_NAME code="dnet:result_project_relations">dnet:result_project_relations</ONTOLOGY_NAME>
12
            <ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Result and Project entities</ONTOLOGY_DESCRIPTION>
13
            <TERMS>
14
                <TERM code="isProducedBy" encoding="resultProject_outcome_isProducedBy" english_name="is produced by" native_name="is produced by">
15
                    <SYNONYMS/>
16
                    <RELATIONS>
17
						<RELATION type="inverseOf" code="produces"/>
18
					</RELATIONS>
19
                </TERM>
20
                <TERM code="produces" encoding="resultProject_outcome_produces" english_name="produces" native_name="produces">
21
                    <SYNONYMS/>
22
                    <RELATIONS>
23
						<RELATION type="inverseOf" code="isProducedBy"/>
24
					</RELATIONS>
25
                </TERM>
26
            </TERMS>
27
        </CONFIGURATION>
28
        <STATUS>
29
            <LAST_UPDATE value="2013-11-18T10:46:36Z"/>
30
        </STATUS>
31
        <SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
32
    </BODY>
33
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupAnchorStatsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="2af384c6-9118-426d-9394-d7bbc42d707c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupAnchorStatsJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that calculate statistics on the person anchors</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />
17

  
18
        	
19
        		<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.AnchorStatsMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
23
				
24
				<!-- JOB GLOBAL -->		
25
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" />
26
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/>
27
				
28
				<!-- MISC -->					
29
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
31
				<PROPERTY key="mapred.reduce.tasks" value="0" />
32
				<!--<PROPERTY key="mapred.fairscheduler.pool" value="solr"/> -->
33
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
34
				
35
		<!--  	Uncomment to override the default lib path -->			
36
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
37
        	</STATIC_CONFIGURATION>
38
        	<JOB_INTERFACE>
39
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
40
	       		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
41
        	</JOB_INTERFACE>
42
        	<SCAN>
43
        		<FILTERS operator="MUST_PASS_ALL">
44
        			<FILTER type="prefix" param="entityTypeId" />
45
        		</FILTERS>
46
        		<FAMILIES>
47
        			<FAMILY param="entityType" />
48
        		</FAMILIES>
49
        	</SCAN>
50
        </HADOOP_JOB>
51
        <STATUS>
52
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
53
            <RUNNING_INSTANCES value="0"/>
54
            <CUMULATIVE_RUN value="0" />
55
        </STATUS>
56
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
57
    </BODY>
58
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMarkDeletedEntityJob.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="667fe203-ee51-4dff-8c9c-b90e66e96eb4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="dedupMarkDeletedEntityJob" type="mapreduce">
12
			<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
15
				<!-- I/O FORMAT -->
16
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/>
18

  
19
				<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMarkDeletedEntityMapper"/>
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/>
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/>
23

  
24
				<!-- MISC -->
25
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
26
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
27
				<PROPERTY key="mapreduce.map.speculative" value="false"/>
28
				<PROPERTY key="mapreduce.reduce.speculative" value="false"/>
29

  
30
				<PROPERTY key="mapred.reduce.tasks" value="0"/>
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32

  
33
				<!--  	Uncomment to override the default lib path -->
34
				<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
35
			</STATIC_CONFIGURATION>
36
			<JOB_INTERFACE>
37
				<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/>
38
				<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/>
39
				<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/>
40
				<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/>
41
			</JOB_INTERFACE>
42
			<SCAN>
43
				<FILTERS operator="MUST_PASS_ALL">
44
					<FILTER type="prefix" param="entityTypeId"/>
45
				</FILTERS>
46
				<FAMILIES>
47
					<FAMILY param="entityType"/>
48
					<FAMILY value="resultResult_dedup_isMergedIn"/>
49
					<FAMILY value="personPerson_dedup_isMergedIn"/>
50
					<FAMILY value="organizationOrganization_dedup_isMergedIn"/>
51
				</FAMILIES>
52
			</SCAN>
53
		</HADOOP_JOB>
54
		<STATUS>
55
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
56
			<RUNNING_INSTANCES value="0"/>
57
			<CUMULATIVE_RUN value="0"/>
58
		</STATUS>
59
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
60
	</BODY>
61
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupBuildRootsJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="895ce6a9-4131-4954-b9ed-949ff78f5448_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupBuildRootsJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17

  
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22

  
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
	
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="500" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
	
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
43
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
44
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
45
        	</JOB_INTERFACE>
46
        	<SCAN caching="10">
47
        		<FILTERS operator="MUST_PASS_ALL">
48
        			<FILTER type="prefix" param="entityTypeId" />
49
        		</FILTERS>
50
        		<FAMILIES/>        		
51
        	</SCAN>
52
        </HADOOP_JOB>
53
        <STATUS>
54
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
55
            <RUNNING_INSTANCES value="0"/>
56
            <CUMULATIVE_RUN value="0" />
57
        </STATUS>
58
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
59
    </BODY>
60
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/buildMergedToAnchorMapJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="4e16c8dd-8944-4266-8b5c-62e4b26e3090_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="buildMergedToAnchorMapJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that builds a map {merged author id --> anchorId}</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" />				
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" />				
27
				
28
				<!-- MISC -->
29
				<PROPERTY key="mapred.output.compress" value="false" />
30
				<PROPERTY key="mapred.compress.map.output" value="true" />	
31
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
32
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
33
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
34
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
35
							
36
				<PROPERTY key="mapred.reduce.tasks" value="1" />
37
				<PROPERTY key="dfs.blocksize" value="256M" />
38
				<PROPERTY key="mapred.textoutputformat.separator" value="=" />
39
				
40
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
41
				
42
		<!--  	Uncomment to override the default lib path -->			
43
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
44
        	</STATIC_CONFIGURATION>
45
        	<JOB_INTERFACE>
46
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
47
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
48
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
49
        	</JOB_INTERFACE>
50
        	<SCAN>
51
                <FILTERS operator="MUST_PASS_ALL">
52
                    <FILTER type="prefix" value="30"/>
53
                </FILTERS>
54
                <FAMILIES>
55
                    <FAMILY value="person"/>
56
	                <FAMILY value="personPerson_dedup_isMergedIn" />
57
                </FAMILIES>
58
        	</SCAN>
59
        </HADOOP_JOB>
60
        <STATUS>
61
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
62
            <RUNNING_INSTANCES value="0"/>
63
            <CUMULATIVE_RUN value="0" />
64
        </STATUS>
65
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
66
    </BODY>
67
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/oaiFeedJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="03d7af20-63bb-4790-a052-6cdbc1e05fce_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2015-02-09T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="oaiFeedJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that feeds the OAI store</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" />	
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.oai.OaiFeedMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" />				
22
				
23
				<!-- JOB GLOBAL -->		
24
<!--                 <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" /> -->
25
<!--                 <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/> -->
26
				
27
				<!-- MISC -->					
28
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
30
				<PROPERTY key="mapred.reduce.tasks" value="0" />
31
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
32
				
33
				<!--  	Uncomment to override the default lib path -->			
34
				<PROPERTY key="job.lib" value="/user/dnet/lib/dnet-mapreduce-jobs-assembly-0.0.6.3-SNAPSHOT.jar"/> 
35
        	</STATIC_CONFIGURATION>
36
        	<JOB_INTERFACE>
37
        		<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" />
38
				<PARAM name="services.publisher.oai.collection" required="true" description="target mongodb collection" />
39
				<PARAM name="oaiConfiguration" required="true" description="configuration bean used to guide the OAI feeding" />
40
				<PARAM name="oai.feed.date" required="true" description="timestamp" />				
41
				<PARAM name="services.publisher.oai.host" required="true" description="mongodb host" />
42
				<PARAM name="services.publisher.oai.port" required="true" description="mongodb port" />
43
				<PARAM name="services.publisher.oai.db" required="true" description="mongodb database name" />	
44
				<PARAM name="services.publisher.oai.skipDuplicates" required="true" description="skip duplicated records." />	
45
				<PARAM name="services.publisher.oai.duplicateXPath" required="true" description="records with this xpath are identified as duplicates" />																
46
        	</JOB_INTERFACE>
47
        	<SCAN>
48
        		<FILTERS />
49
        		<FAMILIES />
50
        	</SCAN>
51
        </HADOOP_JOB>
52
        <STATUS>
53
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
54
            <RUNNING_INSTANCES value="0"/>
55
            <CUMULATIVE_RUN value="0" />
56
        </STATUS>
57
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
58
    </BODY>
59
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/organization_organizations_typologies.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="3bedf36c-de3b-4fae-a77a-ab9bee6e2718_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="OntologyDSResourceType"/>
5
        <RESOURCE_KIND value="OntologyDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/>
8
    </HEADER>
9
    <BODY>
10
        <CONFIGURATION>
11
            <ONTOLOGY_NAME code="dnet:organization_organizations_typologies">dnet:organization_organizations_typologies</ONTOLOGY_NAME>
12
            <ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Organization entities</ONTOLOGY_DESCRIPTION>
13
            <TERMS>
14
                <TERM code="merges" encoding="organizationOrganization_dedup_merges" english_name="merges" native_name="merges">
15
                    <SYNONYMS/>
16
                    <RELATIONS>
17
						<RELATION type="inverseOf" code="isMergedIn"/>
18
					</RELATIONS>                    
19
                </TERM>
20
                <TERM code="isMergedIn" encoding="organizationOrganization_dedup_isMergedIn" english_name="is Merged In" native_name="is Merged In">
21
                    <SYNONYMS/>
22
                    <RELATIONS>
23
						<RELATION type="inverseOf" code="merges"/>
24
					</RELATIONS>                    
25
                </TERM>
26
                <TERM code="isSimilarTo" encoding="organizationOrganization_dedupSimilarity_isSimilarTo" english_name="is similar to" native_name="is similar to">
27
                    <SYNONYMS/>
28
                    <RELATIONS>
29
						<RELATION type="inverseOf" code="isSimilarTo"/>
30
					</RELATIONS>
31
                </TERM>
32
            </TERMS>
33
        </CONFIGURATION>
34
        <STATUS>
35
            <LAST_UPDATE value="2013-11-18T10:46:36Z"/>
36
        </STATUS>
37
        <SECURITY_PARAMETERS>String</SECURITY_PARAMETERS>
38
    </BODY>
39
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/calculatePersonDistributionStep1Job.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER
4
                value="f61bc720-2821-4871-937d-64b79f098714_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value="2015-06-15T10:50:29+02:00"/>
9
    </HEADER>
10
    <BODY>
11
        <HADOOP_JOB name="calculatePersonDistributionStep1Job" type="mapreduce">
12
            <DESCRIPTION>map reduce job that perform tests for notificationBroker service (step 1/2)</DESCRIPTION>
13
            <STATIC_CONFIGURATION>
14

  
15
                <!-- I/O FORMAT -->
16
                <PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/>
17
                <PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/>
18

  
19
                <!-- MAPPER -->
20
                <PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Mapper"/>
21
                <PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/>
22
                <PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/>
23

  
24
                <!-- REDUCER -->
25
                <PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Reducer"/>
26
                <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/>
27
                <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/>
28

  
29
                <!-- MISC -->
30
                <PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/>
31
                <PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/>
32
                <PROPERTY key="mapreduce.map.speculative" value="false"/>
33
                <PROPERTY key="mapreduce.reduce.speculative" value="false"/>
34
                <PROPERTY key="mapred.reduce.tasks" value="100"/>
35

  
36
                <!-- <PROPERTY key="user.name" value="dnet" /> -->
37

  
38
                <!--  	Uncomment to override the default lib path -->
39
                <!-- <PROPERTY key="job.lib" value="/user/michele.artini/lib/dnet-mapreduce-jobs-assembly-0.0.6.6-SNAPSHOT.jar"/> -->
40
            </STATIC_CONFIGURATION>
41
            <JOB_INTERFACE>
42
                <PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/>
43
                <PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/>
44
            </JOB_INTERFACE>
45
            <SCAN>
46
                <FILTERS operator="MUST_PASS_ONE">
47
                    <FILTER type="prefix" value="50"/>
48
                    <FILTER type="prefix" value="10"/>
49
                </FILTERS>
50
                <FAMILIES>
51
                    <FAMILY value="result"/>
52
                    <FAMILY value="datasource"/>
53
                </FAMILIES>
54
            </SCAN>
55
        </HADOOP_JOB>
56
        <STATUS>
57
            <LAST_SUBMISSION_DATE value="2015-06-15T11:10:17+02:00"/>
58
            <RUNNING_INSTANCES value="0"/>
59
            <CUMULATIVE_RUN value="7"/>
60
        </STATUS>
61
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
62
    </BODY>
63
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/coauthorUpdateJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="6d91b311-a7fd-48ff-98d2-1fed70850e3a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="coauthorUpdateJob" type="mapreduce">
11
 			<DESCRIPTION>update coauthors using a map {merged author id --> anchorId}</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.inputformat.class"	value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
17
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />			
18
        	
19
        		<!-- MAPPER -->
20
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.CoAuthorUpdateMapper" />
21
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
23
				
24
				
25
				<!-- MISC -->
26
				<PROPERTY key="mapred.output.compress" value="false" />
27
				<PROPERTY key="mapred.compress.map.output" value="true" />	
28
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
29
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
31
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />
32
							
33
				<PROPERTY key="mapred.reduce.tasks" value="0" />
34
				
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
42
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
43
        		<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> 
44
        	</JOB_INTERFACE>
45
        	<SCAN>
46
                <FILTERS operator="MUST_PASS_ALL">
47
                    <FILTER type="prefix" value="30"/>
48
                </FILTERS>
49
                <FAMILIES>
50
                    <FAMILY value="person"/>
51
                </FAMILIES>
52
        	</SCAN>
53
        </HADOOP_JOB>
54
        <STATUS>
55
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
56
            <RUNNING_INSTANCES value="0"/>
57
            <CUMULATIVE_RUN value="0" />
58
        </STATUS>
59
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
60
    </BODY>
61
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGrouperJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="6b2d8db3-346f-4ddc-8591-39fd488c1191_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupGrouperJob" type="mapreduce">
11
 			<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13
        	
14
				<!-- I/O FORMAT -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />		        	
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupGrouperMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" />
22
				
23
				<!-- MISC -->				
24
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
25
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
26
				<PROPERTY key="mapreduce.map.speculative" value="false" />
27
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />	
28
			
29
				<PROPERTY key="mapred.reduce.tasks" value="0" />
30
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
31
				
32
		<!--  	Uncomment to override the default lib path -->			
33
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
34
        	</STATIC_CONFIGURATION>
35
        	<JOB_INTERFACE>
36
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
37
        		<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" />
38
        		<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" />
39
        		<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" />     
40
        	</JOB_INTERFACE>
41
        	<SCAN>
42
        		<FILTERS operator="MUST_PASS_ALL">
43
        			<FILTER type="prefix" param="entityTypeId" />
44
        		</FILTERS>
45
        		<FAMILIES>
46
        			<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" />
47
        			<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" />
48
     				<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" />
49
        		</FAMILIES>
50
        	</SCAN>
51
        </HADOOP_JOB>
52
        <STATUS>
53
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
54
            <RUNNING_INSTANCES value="0"/>
55
            <CUMULATIVE_RUN value="0" />
56
        </STATUS>
57
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
58
    </BODY>
59
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupPersonJob.xml
1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="29638605-235b-4cc1-9bf5-a5dd2fc84915_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="dedupPersonJob" type="mapreduce">
11
 			<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
13

  
14
				<!-- I/O FORMAT  -->
15
				<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" />
16
				<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" />
17
        	
18
        		<!-- MAPPER -->
19
				<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonMapper" />
20
				<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" />
21
				<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />
22
				
23
				<!-- REDUCER -->
24
				<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonReducer" />
25
				<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" />				
26
				<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" />				
27
				
28
				<!-- MISC -->				
29
				<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" />	
30
				<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" />	
31
				<PROPERTY key="mapreduce.map.speculative" value="false" />		
32
				<PROPERTY key="mapreduce.reduce.speculative" value="false" />				
33
			
34
				<PROPERTY key="mapred.reduce.tasks" value="1000" />
35
				<!-- <PROPERTY key="user.name" value="dnet" /> -->
36
				
37
		<!--  	Uncomment to override the default lib path -->			
38
		<!--	<PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> -->
39
        	</STATIC_CONFIGURATION>
40
        	<JOB_INTERFACE>
41
        		<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" />
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff