Revision 47984
Added by Claudio Atzori over 7 years ago
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" ?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>dnet45-parent</artifactId> |
|
6 |
<version>1.0.0</version> |
|
7 |
<relativePath /> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<groupId>eu.dnetlib</groupId> |
|
11 |
<artifactId>dnet-openaireplus-profiles</artifactId> |
|
12 |
<packaging>jar</packaging> |
|
13 |
<version>1.0.16</version> |
|
14 |
<scm> |
|
15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16</developerConnection> |
|
16 |
</scm> |
|
17 |
|
|
18 |
<dependencies> |
|
19 |
|
|
20 |
</dependencies> |
|
21 |
</project> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/result_organization_relations.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="2e4251fe-66a8-4155-a902-72161601b8cd_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="OntologyDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="OntologyDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2016-11-18T10:46:37+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<ONTOLOGY_NAME code="dnet:result_organization_relations">dnet:result_organization_relations</ONTOLOGY_NAME> |
|
12 |
<ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Results entities</ONTOLOGY_DESCRIPTION> |
|
13 |
<TERMS> |
|
14 |
<TERM code="isAffiliatedWith" encoding="resultOrganization_affiliation_hasAuthorInstitutionOf" english_name="has author institution of" native_name="has author institution of"> |
|
15 |
<SYNONYMS/> |
|
16 |
<RELATIONS> |
|
17 |
<RELATION type="inverseOf" code="isAuthorInstitutionOf"/> |
|
18 |
</RELATIONS> |
|
19 |
</TERM> |
|
20 |
<TERM code="affiliates" encoding="resultOrganization_affiliation_isAuthorInstitutionOf" english_name="is author institution of" native_name="is author institution of"> |
|
21 |
<SYNONYMS/> |
|
22 |
<RELATIONS> |
|
23 |
<RELATION type="inverseOf" code="hasAuthorInstitutionOf"/> |
|
24 |
</RELATIONS> |
|
25 |
</TERM> |
|
26 |
</TERMS> |
|
27 |
</CONFIGURATION> |
|
28 |
<STATUS> |
|
29 |
<LAST_UPDATE value="2016-11-18T10:46:36Z"/> |
|
30 |
</STATUS> |
|
31 |
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS> |
|
32 |
</BODY> |
|
33 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="de888da6-2d10-4d42-a624-a44d4083414a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMinDistGraphJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that finds the minimum vertex in each connected component in the input graph (as adjacency lists)</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchReducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/> |
|
44 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS/> |
|
48 |
<FAMILIES/> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
|
59 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/person_result_relations.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="12b8832a-0e97-43a9-b5ec-c33ff3c29778_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="OntologyDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="OntologyDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<ONTOLOGY_NAME code="dnet:person_result_relations">dnet:person_result_relations</ONTOLOGY_NAME> |
|
12 |
<ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Result and Person entities</ONTOLOGY_DESCRIPTION> |
|
13 |
<TERMS> |
|
14 |
<TERM code="isAuthorOf" encoding="personResult_authorship_isAuthorOf" english_name="author of" native_name="author of"> |
|
15 |
<SYNONYMS/> |
|
16 |
<RELATIONS> |
|
17 |
<RELATION type="inverseOf" code="hasAuthor"/> |
|
18 |
</RELATIONS> |
|
19 |
</TERM> |
|
20 |
<TERM code="hasAuthor" encoding="personResult_authorship_hasAuthor" english_name="has author" native_name="has author"> |
|
21 |
<SYNONYMS/> |
|
22 |
<RELATIONS> |
|
23 |
<RELATION type="inverseOf" code="isAuthorOf"/> |
|
24 |
</RELATIONS> |
|
25 |
</TERM> |
|
26 |
<TERM code="isContributorOf" encoding="personResult_contribution_isContributorOf" english_name="contributor of" native_name="contributor of"> |
|
27 |
<SYNONYMS/> |
|
28 |
<RELATIONS> |
|
29 |
<RELATION type="inverseOf" code="hasContributor"/> |
|
30 |
</RELATIONS> |
|
31 |
</TERM> |
|
32 |
<TERM code="hasContributor" encoding="personResult_contribution_hasContributor" english_name="has contributor" native_name="has contributor"> |
|
33 |
<SYNONYMS/> |
|
34 |
<RELATIONS> |
|
35 |
<RELATION type="inverseOf" code="isContributorOf"/> |
|
36 |
</RELATIONS> |
|
37 |
</TERM> |
|
38 |
</TERMS> |
|
39 |
</CONFIGURATION> |
|
40 |
<STATUS> |
|
41 |
<LAST_UPDATE value="2013-11-18T10:46:36Z"/> |
|
42 |
</STATUS> |
|
43 |
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS> |
|
44 |
</BODY> |
|
45 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/connectedComponentsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="28e8d911-87e5-4f39-9ce9-0d445126c75f_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="connectedComponentsJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job joins all the vertex ids to build the connected components in the graph</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.ConnectedComponentsReducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.hbase.client.Put"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1"/> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="source path on hdfs"/> |
|
44 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS/> |
|
48 |
<FAMILIES/> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
|
59 |
|
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/informationSpaceImportJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="b7d51a07-6996-4841-9a4a-685a044638e3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="informationSpaceImportJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that import the whole information space table from a sequence file holding a json dump</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dataimport.ImportInformationSpaceDumpMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
26 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
27 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
28 |
|
|
29 |
<!-- Uncomment to override the default lib path --> |
|
30 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
31 |
</STATIC_CONFIGURATION> |
|
32 |
<JOB_INTERFACE> |
|
33 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
34 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
35 |
|
|
36 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" /> |
|
37 |
</JOB_INTERFACE> |
|
38 |
<SCAN> |
|
39 |
<FILTERS /> |
|
40 |
<FAMILIES /> |
|
41 |
</SCAN> |
|
42 |
</HADOOP_JOB> |
|
43 |
<STATUS> |
|
44 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
45 |
<RUNNING_INSTANCES value="0"/> |
|
46 |
<CUMULATIVE_RUN value="0" /> |
|
47 |
</STATUS> |
|
48 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
49 |
</BODY> |
|
50 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupDeleteSimRelsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="6363b833-ac88-421c-8596-440a3dc735db_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="deleteSimRelJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that deletes the similarity rels used to in the deduplication process (person)</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupDeleteSimRelMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.Writable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY param="entityType" /> |
|
47 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
48 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
49 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0" /> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/result_result_relations.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="c2cd2bab-6a9e-4a34-9318-3b11a349ad4c_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="OntologyDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="OntologyDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<ONTOLOGY_NAME code="dnet:result_result_relations">dnet:result_result_relations</ONTOLOGY_NAME> |
|
12 |
<ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Results entities</ONTOLOGY_DESCRIPTION> |
|
13 |
<TERMS> |
|
14 |
<TERM code="hasAmongTopNSimilarDocuments" encoding="resultResult_similarity_hasAmongTopNSimilarDocuments" english_name="has Among Top N Similar Documents" native_name="has Among Top N Similar Documents"> |
|
15 |
<SYNONYMS/> |
|
16 |
<RELATIONS> |
|
17 |
<RELATION type="inverseOf" code="isAmongTopNSimilarDocuments"/> |
|
18 |
</RELATIONS> |
|
19 |
</TERM> |
|
20 |
<TERM code="isAmongTopNSimilarDocuments" encoding="resultResult_similarity_isAmongTopNSimilarDocuments" english_name="is Among Top N Similar Documents" native_name="is Among Top N Similar Documents"> |
|
21 |
<SYNONYMS/> |
|
22 |
<RELATIONS> |
|
23 |
<RELATION type="inverseOf" code="hasAmongTopNSimilarDocuments"/> |
|
24 |
</RELATIONS> |
|
25 |
</TERM> |
|
26 |
<TERM code="isRelatedTo" encoding="resultResult_publicationDataset_isRelatedTo" english_name="is related to" native_name="is related to"> |
|
27 |
<SYNONYMS/> |
|
28 |
<RELATIONS> |
|
29 |
<RELATION type="inverseOf" code="isRelatedTo"/> |
|
30 |
</RELATIONS> |
|
31 |
</TERM> |
|
32 |
<TERM code="isSupplementTo" encoding="resultResult_supplement_isSupplementTo" english_name="is supplement to" native_name="is supplement to"> |
|
33 |
<SYNONYMS/> |
|
34 |
<RELATIONS> |
|
35 |
<RELATION type="inverseOf" code="isSupplementedBy"/> |
|
36 |
</RELATIONS> |
|
37 |
</TERM> |
|
38 |
<TERM code="isSupplementedBy" encoding="resultResult_supplement_isSupplementedBy" english_name="is supplemented by" native_name="is supplemented by"> |
|
39 |
<SYNONYMS/> |
|
40 |
<RELATIONS> |
|
41 |
<RELATION type="inverseOf" code="isSupplementTo"/> |
|
42 |
</RELATIONS> |
|
43 |
</TERM> |
|
44 |
<TERM code="isPartOf" encoding="resultResult_part_isPartOf" english_name="is part of" native_name="is part of"> |
|
45 |
<SYNONYMS/> |
|
46 |
<RELATIONS> |
|
47 |
<RELATION type="inverseOf" code="hasPart"/> |
|
48 |
</RELATIONS> |
|
49 |
</TERM> |
|
50 |
<TERM code="hasPart" encoding="resultResult_part_hasPart" english_name="has part" native_name="has part"> |
|
51 |
<SYNONYMS/> |
|
52 |
<RELATIONS> |
|
53 |
<RELATION type="inverseOf" code="isPartOf"/> |
|
54 |
</RELATIONS> |
|
55 |
</TERM> |
|
56 |
<TERM code="merges" encoding="resultResult_dedup_merges" english_name="merges" native_name="merges"> |
|
57 |
<SYNONYMS/> |
|
58 |
<RELATIONS> |
|
59 |
<RELATION type="inverseOf" code="isMergedIn"/> |
|
60 |
</RELATIONS> |
|
61 |
</TERM> |
|
62 |
<TERM code="isMergedIn" encoding="resultResult_dedup_isMergedIn" english_name="is Merged In" native_name="is Merged In"> |
|
63 |
<SYNONYMS/> |
|
64 |
<RELATIONS> |
|
65 |
<RELATION type="inverseOf" code="merges"/> |
|
66 |
</RELATIONS> |
|
67 |
</TERM> |
|
68 |
<TERM code="isSimilarTo" encoding="resultResult_dedupSimilarity_isSimilarTo" english_name="is similar to" native_name="is similar to"> |
|
69 |
<SYNONYMS/> |
|
70 |
<RELATIONS> |
|
71 |
<RELATION type="inverseOf" code="isSimilarTo"/> |
|
72 |
</RELATIONS> |
|
73 |
</TERM> |
|
74 |
</TERMS> |
|
75 |
</CONFIGURATION> |
|
76 |
<STATUS> |
|
77 |
<LAST_UPDATE value="2013-11-18T10:46:36Z"/> |
|
78 |
</STATUS> |
|
79 |
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS> |
|
80 |
</BODY> |
|
81 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFindPersonRootsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="8ec4731e-4e91-4863-9a4b-7f0a8ca0542e_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupFindPersonRootsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that find the root of a similarity group (person)</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFindRootsPersonReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="100" /> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
43 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS operator="MUST_PASS_ALL"> |
|
48 |
<FILTER type="prefix" param="entityTypeId" /> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES> |
|
51 |
<FAMILY param="entityType" /> |
|
52 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
53 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
54 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
55 |
</FAMILIES> |
|
56 |
</SCAN> |
|
57 |
</HADOOP_JOB> |
|
58 |
<STATUS> |
|
59 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
60 |
<RUNNING_INSTANCES value="0"/> |
|
61 |
<CUMULATIVE_RUN value="0" /> |
|
62 |
</STATUS> |
|
63 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
64 |
</BODY> |
|
65 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGTCleanerJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="888ef72f-701a-4d59-8b8a-2ad01986f975_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="gtCleanerJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that deletes the non-GT rows</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.GTCleanerMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Delete" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY param="entityType" /> |
|
47 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
48 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
49 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
50 |
</FAMILIES> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0" /> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupRootsToCSVJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="027554bd-3d5c-4c50-9170-90d8c4402bc3_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupRootsToCSVJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that exports the representatives as CSV files</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat" /> |
|
17 |
<PROPERTY key="mapreduce.output.lazyoutputformat.outputformat" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupRootsToCsvMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupRootsToCsvReducer" /> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
28 |
|
|
29 |
<!-- MUTIPLE OUTPUT --> |
|
30 |
<PROPERTY key="mapreduce.multipleoutputs" value="NativeGroups Groups NativeEntities" /> |
|
31 |
|
|
32 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
33 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.key" value="org.apache.hadoop.io.Text" /> |
|
34 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeGroups.value" value="org.apache.hadoop.io.Text" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
37 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.key" value="org.apache.hadoop.io.Text" /> |
|
38 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.Groups.value" value="org.apache.hadoop.io.Text" /> |
|
39 |
|
|
40 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.format" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
41 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.key" value="org.apache.hadoop.io.Text" /> |
|
42 |
<PROPERTY key="mapreduce.multipleoutputs.namedOutput.NativeEntities.value" value="org.apache.hadoop.io.Text" /> |
|
43 |
|
|
44 |
<!-- MISC --> |
|
45 |
|
|
46 |
<PROPERTY key="mapred.textoutputformat.wrapper" value="#"/> |
|
47 |
<PROPERTY key="mapred.textoutputformat.separator" value="!"/> |
|
48 |
|
|
49 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
50 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
51 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
52 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
53 |
|
|
54 |
<PROPERTY key="mapred.reduce.tasks" value="3" /> |
|
55 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
56 |
|
|
57 |
<!-- Uncomment to override the default lib path --> |
|
58 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
59 |
</STATIC_CONFIGURATION> |
|
60 |
<JOB_INTERFACE> |
|
61 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
62 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
63 |
|
|
64 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
65 |
</JOB_INTERFACE> |
|
66 |
<SCAN> |
|
67 |
<FILTERS operator="MUST_PASS_ALL"> |
|
68 |
<FILTER type="prefix" param="entityTypeId" /> |
|
69 |
</FILTERS> |
|
70 |
<FAMILIES> |
|
71 |
<FAMILY param="entityType" /> |
|
72 |
<FAMILY value="resultResult_dedup_merges" /> |
|
73 |
<FAMILY value="personPerson_dedup_merges" /> |
|
74 |
<FAMILY value="organizationOrganization_dedup_merges" /> |
|
75 |
</FAMILIES> |
|
76 |
</SCAN> |
|
77 |
</HADOOP_JOB> |
|
78 |
<STATUS> |
|
79 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
80 |
<RUNNING_INSTANCES value="0"/> |
|
81 |
<CUMULATIVE_RUN value="0" /> |
|
82 |
</STATUS> |
|
83 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
84 |
</BODY> |
|
85 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/indexFeedJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="1c34963b-75b3-4440-9f42-72445a26c077_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="indexFeedJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces a sequence file containig the xml records</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.index.IndexFeedMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
<!-- JOB GLOBAL --> |
|
24 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
25 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.task.timeout" value="1800000"/> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
32 |
<PROPERTY key="mapred.fairscheduler.pool" value="solr"/> |
|
33 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
34 |
|
|
35 |
<!-- Uncomment to override the default lib path --> |
|
36 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
37 |
</STATIC_CONFIGURATION> |
|
38 |
<JOB_INTERFACE> |
|
39 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" /> |
|
40 |
<PARAM name="mapred.output.dir" required="true" description="destination path on hdfs for rotten index xml records" /> |
|
41 |
|
|
42 |
<PARAM name="index.solr.url" required="false" description="url used to instantiate the solr client" /> |
|
43 |
<PARAM name="index.solr.collection" required="true" description="target solr collection to be fed" /> |
|
44 |
|
|
45 |
<PARAM name="id" required="true" description="index DS id" /> |
|
46 |
<PARAM name="index.shutdown.wait.time" required="true" description="wait time before shut down the solr client pool" /> |
|
47 |
<PARAM name="index.buffer.flush.threshold" required="true" description="indexing buffer flush threshold" /> |
|
48 |
<PARAM name="index.feed.timestamp" required="true" description="timestamp used as ds_version" /> |
|
49 |
<PARAM name="index.solr.sim.mode" required="true" description="boolean value, allows to run this job in simulation mode" /> |
|
50 |
<PARAM name="index.xslt" required="true" description="record transformer created by the MSRO service" /> |
|
51 |
</JOB_INTERFACE> |
|
52 |
<SCAN> |
|
53 |
<FILTERS /> |
|
54 |
<FAMILIES /> |
|
55 |
</SCAN> |
|
56 |
</HADOOP_JOB> |
|
57 |
<STATUS> |
|
58 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
59 |
<RUNNING_INSTANCES value="0"/> |
|
60 |
<CUMULATIVE_RUN value="0" /> |
|
61 |
</STATUS> |
|
62 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
63 |
</BODY> |
|
64 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/datasources_organizations_typologies.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="bf12ea02-8d59-49f0-bba4-cdbb5b741981_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="OntologyDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="OntologyDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<ONTOLOGY_NAME code="dnet:datasources_organizations_typologies">dnet:datasources_organizations_typologies</ONTOLOGY_NAME> |
|
12 |
<ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Datasource and Organization entities</ONTOLOGY_DESCRIPTION> |
|
13 |
<TERMS> |
|
14 |
<TERM code="isProvidedBy" encoding="datasourceOrganization_provision_isProvidedBy" english_name="is provided by" native_name="is provided by"> |
|
15 |
<SYNONYMS/> |
|
16 |
<RELATIONS> |
|
17 |
<RELATION type="inverseOf" code="provides"/> |
|
18 |
</RELATIONS> |
|
19 |
</TERM> |
|
20 |
<TERM code="provides" encoding="datasourceOrganization_provision_provides" english_name="provides" native_name="provides"> |
|
21 |
<SYNONYMS/> |
|
22 |
<RELATIONS> |
|
23 |
<RELATION type="inverseOf" code="isProvidedBy"/> |
|
24 |
</RELATIONS> |
|
25 |
</TERM> |
|
26 |
</TERMS> |
|
27 |
</CONFIGURATION> |
|
28 |
<STATUS> |
|
29 |
<LAST_UPDATE value="2013-11-18T10:46:36Z"/> |
|
30 |
</STATUS> |
|
31 |
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS> |
|
32 |
</BODY> |
|
33 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/publicationAnalysisJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="721fd82c-6444-41c9-ba23-5eb0652ddaeb_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="publicationAnalysisJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that analyses publication features</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.PublicationAnalysisMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
|
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
</JOB_INTERFACE> |
|
40 |
<SCAN> |
|
41 |
<FILTERS operator="MUST_PASS_ALL"> |
|
42 |
<FILTER type="prefix" value="50" /> |
|
43 |
</FILTERS> |
|
44 |
<FAMILIES> |
|
45 |
<FAMILY value="result" /> |
|
46 |
</FAMILIES> |
|
47 |
</SCAN> |
|
48 |
</HADOOP_JOB> |
|
49 |
<STATUS> |
|
50 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
51 |
<RUNNING_INSTANCES value="0"/> |
|
52 |
<CUMULATIVE_RUN value="0" /> |
|
53 |
</STATUS> |
|
54 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
55 |
</BODY> |
|
56 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/result_project_relations.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="4f06cc59-6eca-4492-a2d1-ac2a80682ec3_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="OntologyDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="OntologyDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<ONTOLOGY_NAME code="dnet:result_project_relations">dnet:result_project_relations</ONTOLOGY_NAME> |
|
12 |
<ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Result and Project entities</ONTOLOGY_DESCRIPTION> |
|
13 |
<TERMS> |
|
14 |
<TERM code="isProducedBy" encoding="resultProject_outcome_isProducedBy" english_name="is produced by" native_name="is produced by"> |
|
15 |
<SYNONYMS/> |
|
16 |
<RELATIONS> |
|
17 |
<RELATION type="inverseOf" code="produces"/> |
|
18 |
</RELATIONS> |
|
19 |
</TERM> |
|
20 |
<TERM code="produces" encoding="resultProject_outcome_produces" english_name="produces" native_name="produces"> |
|
21 |
<SYNONYMS/> |
|
22 |
<RELATIONS> |
|
23 |
<RELATION type="inverseOf" code="isProducedBy"/> |
|
24 |
</RELATIONS> |
|
25 |
</TERM> |
|
26 |
</TERMS> |
|
27 |
</CONFIGURATION> |
|
28 |
<STATUS> |
|
29 |
<LAST_UPDATE value="2013-11-18T10:46:36Z"/> |
|
30 |
</STATUS> |
|
31 |
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS> |
|
32 |
</BODY> |
|
33 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupAnchorStatsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="2af384c6-9118-426d-9394-d7bbc42d707c_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupAnchorStatsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that calculate statistics on the person anchors</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" /> |
|
17 |
|
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.experiment.AnchorStatsMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
23 |
|
|
24 |
<!-- JOB GLOBAL --> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
32 |
<!--<PROPERTY key="mapred.fairscheduler.pool" value="solr"/> --> |
|
33 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
34 |
|
|
35 |
<!-- Uncomment to override the default lib path --> |
|
36 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
37 |
</STATIC_CONFIGURATION> |
|
38 |
<JOB_INTERFACE> |
|
39 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
40 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId" /> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType" /> |
|
48 |
</FAMILIES> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0" /> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMarkDeletedEntityJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="667fe203-ee51-4dff-8c9c-b90e66e96eb4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMarkDeletedEntityJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMarkDeletedEntityMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId"/> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY param="entityType"/> |
|
48 |
<FAMILY value="resultResult_dedup_isMergedIn"/> |
|
49 |
<FAMILY value="personPerson_dedup_isMergedIn"/> |
|
50 |
<FAMILY value="organizationOrganization_dedup_isMergedIn"/> |
|
51 |
</FAMILIES> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0"/> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupBuildRootsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="895ce6a9-4131-4954-b9ed-949ff78f5448_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupBuildRootsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="500" /> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
43 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN caching="10"> |
|
47 |
<FILTERS operator="MUST_PASS_ALL"> |
|
48 |
<FILTER type="prefix" param="entityTypeId" /> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES/> |
|
51 |
</SCAN> |
|
52 |
</HADOOP_JOB> |
|
53 |
<STATUS> |
|
54 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
55 |
<RUNNING_INSTANCES value="0"/> |
|
56 |
<CUMULATIVE_RUN value="0" /> |
|
57 |
</STATUS> |
|
58 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
59 |
</BODY> |
|
60 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/buildMergedToAnchorMapJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="4e16c8dd-8944-4266-8b5c-62e4b26e3090_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="buildMergedToAnchorMapJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that builds a map {merged author id --> anchorId}</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.TextOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.BuildMergedAnchorMapReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.output.compress" value="false" /> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="1" /> |
|
37 |
<PROPERTY key="dfs.blocksize" value="256M" /> |
|
38 |
<PROPERTY key="mapred.textoutputformat.separator" value="=" /> |
|
39 |
|
|
40 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
41 |
|
|
42 |
<!-- Uncomment to override the default lib path --> |
|
43 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
44 |
</STATIC_CONFIGURATION> |
|
45 |
<JOB_INTERFACE> |
|
46 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
47 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
48 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
49 |
</JOB_INTERFACE> |
|
50 |
<SCAN> |
|
51 |
<FILTERS operator="MUST_PASS_ALL"> |
|
52 |
<FILTER type="prefix" value="30"/> |
|
53 |
</FILTERS> |
|
54 |
<FAMILIES> |
|
55 |
<FAMILY value="person"/> |
|
56 |
<FAMILY value="personPerson_dedup_isMergedIn" /> |
|
57 |
</FAMILIES> |
|
58 |
</SCAN> |
|
59 |
</HADOOP_JOB> |
|
60 |
<STATUS> |
|
61 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
62 |
<RUNNING_INSTANCES value="0"/> |
|
63 |
<CUMULATIVE_RUN value="0" /> |
|
64 |
</STATUS> |
|
65 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
66 |
</BODY> |
|
67 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/oaiFeedJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="03d7af20-63bb-4790-a052-6cdbc1e05fce_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2015-02-09T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="oaiFeedJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that feeds the OAI store</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.NullOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.oai.OaiFeedMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.NullWritable" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.NullWritable" /> |
|
22 |
|
|
23 |
<!-- JOB GLOBAL --> |
|
24 |
<!-- <PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.NullWritable" /> --> |
|
25 |
<!-- <PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.NullWritable"/> --> |
|
26 |
|
|
27 |
<!-- MISC --> |
|
28 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
29 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<PROPERTY key="job.lib" value="/user/dnet/lib/dnet-mapreduce-jobs-assembly-0.0.6.3-SNAPSHOT.jar"/> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs" /> |
|
38 |
<PARAM name="services.publisher.oai.collection" required="true" description="target mongodb collection" /> |
|
39 |
<PARAM name="oaiConfiguration" required="true" description="configuration bean used to guide the OAI feeding" /> |
|
40 |
<PARAM name="oai.feed.date" required="true" description="timestamp" /> |
|
41 |
<PARAM name="services.publisher.oai.host" required="true" description="mongodb host" /> |
|
42 |
<PARAM name="services.publisher.oai.port" required="true" description="mongodb port" /> |
|
43 |
<PARAM name="services.publisher.oai.db" required="true" description="mongodb database name" /> |
|
44 |
<PARAM name="services.publisher.oai.skipDuplicates" required="true" description="skip duplicated records." /> |
|
45 |
<PARAM name="services.publisher.oai.duplicateXPath" required="true" description="records with this xpath are identified as duplicates" /> |
|
46 |
</JOB_INTERFACE> |
|
47 |
<SCAN> |
|
48 |
<FILTERS /> |
|
49 |
<FAMILIES /> |
|
50 |
</SCAN> |
|
51 |
</HADOOP_JOB> |
|
52 |
<STATUS> |
|
53 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
54 |
<RUNNING_INSTANCES value="0"/> |
|
55 |
<CUMULATIVE_RUN value="0" /> |
|
56 |
</STATUS> |
|
57 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
58 |
</BODY> |
|
59 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType/organization_organizations_typologies.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="3bedf36c-de3b-4fae-a77a-ab9bee6e2718_T250b2xvZ3lEU1Jlc291cmNlcy9PbnRvbG9neURTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="OntologyDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="OntologyDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2013-11-18T10:46:37+01:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<CONFIGURATION> |
|
11 |
<ONTOLOGY_NAME code="dnet:organization_organizations_typologies">dnet:organization_organizations_typologies</ONTOLOGY_NAME> |
|
12 |
<ONTOLOGY_DESCRIPTION>Vocabulary of the relations between Organization entities</ONTOLOGY_DESCRIPTION> |
|
13 |
<TERMS> |
|
14 |
<TERM code="merges" encoding="organizationOrganization_dedup_merges" english_name="merges" native_name="merges"> |
|
15 |
<SYNONYMS/> |
|
16 |
<RELATIONS> |
|
17 |
<RELATION type="inverseOf" code="isMergedIn"/> |
|
18 |
</RELATIONS> |
|
19 |
</TERM> |
|
20 |
<TERM code="isMergedIn" encoding="organizationOrganization_dedup_isMergedIn" english_name="is Merged In" native_name="is Merged In"> |
|
21 |
<SYNONYMS/> |
|
22 |
<RELATIONS> |
|
23 |
<RELATION type="inverseOf" code="merges"/> |
|
24 |
</RELATIONS> |
|
25 |
</TERM> |
|
26 |
<TERM code="isSimilarTo" encoding="organizationOrganization_dedupSimilarity_isSimilarTo" english_name="is similar to" native_name="is similar to"> |
|
27 |
<SYNONYMS/> |
|
28 |
<RELATIONS> |
|
29 |
<RELATION type="inverseOf" code="isSimilarTo"/> |
|
30 |
</RELATIONS> |
|
31 |
</TERM> |
|
32 |
</TERMS> |
|
33 |
</CONFIGURATION> |
|
34 |
<STATUS> |
|
35 |
<LAST_UPDATE value="2013-11-18T10:46:36Z"/> |
|
36 |
</STATUS> |
|
37 |
<SECURITY_PARAMETERS>String</SECURITY_PARAMETERS> |
|
38 |
</BODY> |
|
39 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/calculatePersonDistributionStep1Job.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="f61bc720-2821-4871-937d-64b79f098714_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2015-06-15T10:50:29+02:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="calculatePersonDistributionStep1Job" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that perform tests for notificationBroker service (step 1/2)</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Mapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.broker.CalculatePersonDistributionStep1Reducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
31 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="100"/> |
|
35 |
|
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/michele.artini/lib/dnet-mapreduce-jobs-assembly-0.0.6.6-SNAPSHOT.jar"/> --> |
|
40 |
</STATIC_CONFIGURATION> |
|
41 |
<JOB_INTERFACE> |
|
42 |
<PARAM description="source hbase table" name="hbase.mapred.inputtable" required="true"/> |
|
43 |
<PARAM description="source hbase table" name="hbase.mapreduce.inputtable" required="true"/> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ONE"> |
|
47 |
<FILTER type="prefix" value="50"/> |
|
48 |
<FILTER type="prefix" value="10"/> |
|
49 |
</FILTERS> |
|
50 |
<FAMILIES> |
|
51 |
<FAMILY value="result"/> |
|
52 |
<FAMILY value="datasource"/> |
|
53 |
</FAMILIES> |
|
54 |
</SCAN> |
|
55 |
</HADOOP_JOB> |
|
56 |
<STATUS> |
|
57 |
<LAST_SUBMISSION_DATE value="2015-06-15T11:10:17+02:00"/> |
|
58 |
<RUNNING_INSTANCES value="0"/> |
|
59 |
<CUMULATIVE_RUN value="7"/> |
|
60 |
</STATUS> |
|
61 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
62 |
</BODY> |
|
63 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/coauthorUpdateJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="6d91b311-a7fd-48ff-98d2-1fed70850e3a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="coauthorUpdateJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>update coauthors using a map {merged author id --> anchorId}</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.gt.CoAuthorUpdateMapper" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
23 |
|
|
24 |
|
|
25 |
<!-- MISC --> |
|
26 |
<PROPERTY key="mapred.output.compress" value="false" /> |
|
27 |
<PROPERTY key="mapred.compress.map.output" value="true" /> |
|
28 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
29 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
32 |
|
|
33 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
34 |
|
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
42 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
43 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs" /> |
|
44 |
</JOB_INTERFACE> |
|
45 |
<SCAN> |
|
46 |
<FILTERS operator="MUST_PASS_ALL"> |
|
47 |
<FILTER type="prefix" value="30"/> |
|
48 |
</FILTERS> |
|
49 |
<FAMILIES> |
|
50 |
<FAMILY value="person"/> |
|
51 |
</FAMILIES> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0" /> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupGrouperJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="6b2d8db3-346f-4ddc-8591-39fd488c1191_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupGrouperJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupGrouperMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put" /> |
|
22 |
|
|
23 |
<!-- MISC --> |
|
24 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
25 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
26 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
27 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
28 |
|
|
29 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
|
30 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
31 |
|
|
32 |
<!-- Uncomment to override the default lib path --> |
|
33 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
34 |
</STATIC_CONFIGURATION> |
|
35 |
<JOB_INTERFACE> |
|
36 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
37 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
38 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
39 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
40 |
</JOB_INTERFACE> |
|
41 |
<SCAN> |
|
42 |
<FILTERS operator="MUST_PASS_ALL"> |
|
43 |
<FILTER type="prefix" param="entityTypeId" /> |
|
44 |
</FILTERS> |
|
45 |
<FAMILIES> |
|
46 |
<FAMILY value="resultResult_dedupSimilarity_isSimilarTo" /> |
|
47 |
<FAMILY value="personPerson_dedupSimilarity_isSimilarTo" /> |
|
48 |
<FAMILY value="organizationOrganization_dedupSimilarity_isSimilarTo" /> |
|
49 |
</FAMILIES> |
|
50 |
</SCAN> |
|
51 |
</HADOOP_JOB> |
|
52 |
<STATUS> |
|
53 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
54 |
<RUNNING_INSTANCES value="0"/> |
|
55 |
<CUMULATIVE_RUN value="0" /> |
|
56 |
</STATUS> |
|
57 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
58 |
</BODY> |
|
59 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-profiles/tags/dnet-openaireplus-profiles-1.0.16/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupPersonJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="29638605-235b-4cc1-9bf5-a5dd2fc84915_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupPersonJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that scans a given entity type and creates the similarRel graph</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.SimpleDedupPersonReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false" /> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="1000" /> |
|
35 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
36 |
|
|
37 |
<!-- Uncomment to override the default lib path --> |
|
38 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
39 |
</STATIC_CONFIGURATION> |
|
40 |
<JOB_INTERFACE> |
|
41 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-profiles-1.0.16