Project

General

Profile

« Previous | Next » 

Revision 39556

added default threshold parameters. #1209

View differences:

modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisPreprocessingJob.xml
1 1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="iisPreprocessingJob" type="oozie">
11
 			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
12
        	<STATIC_CONFIGURATION>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="732656e3-5ac6-4344-9d1f-f5c805f53a06_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisPreprocessingJob" type="oozie">
12
			<DESCRIPTION>IIS preprocessing</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
13 14
				<!-- Cluster wide -->
14
                <PROPERTY key="queueName" value="default"/>
15
				<PROPERTY key="user.name" value="dnet.beta" />
15
				<PROPERTY key="queueName" value="default"/>
16
				<PROPERTY key="user.name" value="dnet.beta"/>
16 17

  
17 18
				<!-- Runtime -->
18
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"  />
19
				<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"  />
20
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"  />
19
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/preprocessing"/>
20
				<PROPERTY key="oozie.wf.application.path" value="/tmp/integration/apps/preprocessing"/>
21
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
21 22
				<PROPERTY key="export_action_hbase_table_initialize" value="false"/>
22 23
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
23
				<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
24
        	</STATIC_CONFIGURATION>
25
        	<JOB_INTERFACE>
26
 		       	<PARAM name="import_content_object_store_location" required="true" description="mdstore service location" />
27
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
28
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records" />
29
        		<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records" />
30
        		<PARAM name="import_database_service_location" required="true" description="database service endpoint" />
31
        		<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction" />
32
        		<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext" />
33
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
34
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
35
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
36
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
37
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
38
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
39
       			<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references" />
40
       			<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references" />
41
       			<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities" />
42
     			<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities" />
43
        	</JOB_INTERFACE>
44
        </HADOOP_JOB>
45
        <STATUS>
46
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
47
            <RUNNING_INSTANCES value="0"/>
48
            <CUMULATIVE_RUN value="0" />
49
        </STATUS>
50
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
51
    </BODY>
24
				<PROPERTY key="metadataextraction_excluded_checksums"
25
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
26
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="1.0"/>
27
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="1.0"/>
28
			</STATIC_CONFIGURATION>
29
			<JOB_INTERFACE>
30
				<PARAM name="import_content_object_store_location" required="true" description="mdstore service location"/>
31
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
32
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore id for dataset records"/>
33
				<PARAM name="import_wos_mdstore_id" required="true" description="mdstore id for WoS records"/>
34
				<PARAM name="import_database_service_location" required="true" description="database service endpoint"/>
35
				<PARAM name="import_content_datacite_objectstores_csv" required="true" description="objecstore ids subject to dataset reference extraction"/>
36
				<PARAM name="import_content_wos_plaintext_objectstores_csv" required="true" description="objecstore ids for WoS plaintext"/>
37
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
38
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
39
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
40
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
41
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
42
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
43
				<PARAM name="export_action_set_id_document_referencedProjects" required="true" description="target action set for project references"/>
44
				<PARAM name="export_action_set_id_document_referencedDatasets" required="true" description="target action set for dataset references"/>
45
				<PARAM name="export_action_set_id_entity_wos" required="true" description="target action set for WoS entities"/>
46
				<PARAM name="export_action_set_id_entity_dataset" required="true" description="target action set for dataset entities"/>
47
			</JOB_INTERFACE>
48
		</HADOOP_JOB>
49
		<STATUS>
50
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
51
			<RUNNING_INSTANCES value="0"/>
52
			<CUMULATIVE_RUN value="0"/>
53
		</STATUS>
54
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
55
	</BODY>
52 56
</RESOURCE_PROFILE>
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/iisMainJob.xml
1 1
<RESOURCE_PROFILE>
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
4
        <RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
5
        <RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2001-12-31T12:00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <HADOOP_JOB name="iisMainJob" type="oozie">
11
            <DESCRIPTION>IIS main workflow</DESCRIPTION>
12
            <STATIC_CONFIGURATION>
13
        	
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="f6e4cbcd-b173-4f8d-9205-e64ba15f03ad_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisMainJob" type="oozie">
12
			<DESCRIPTION>IIS main workflow</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

  
14 15
				<!-- Cluster wide -->
15
                <PROPERTY key="queueName" value="default"/>
16
                <PROPERTY key="user.name" value="dnet.beta"/>
16
				<PROPERTY key="queueName" value="default"/>
17
				<PROPERTY key="user.name" value="dnet.beta"/>
17 18

  
18 19
				<!-- Runtime -->
19 20
				<PROPERTY key="match_content_with_metadata" value="true"/>
20
                <PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/>
21
                <PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/>
22
   				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
23
                <PROPERTY key="export_action_hbase_table_initialize" value="true"/>
24
                <PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
25
                <PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/>
26
				<PROPERTY key="metadataextraction_excluded_checksums" value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
21
				<PROPERTY key="import_hbase_approved_datasources_csv" value="$UNDEFINED$"/>
22
				<PROPERTY key="import_hbase_input_table" value="information_space-2014-11-05"/>
23
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
24
				<PROPERTY key="export_action_hbase_table_initialize" value="true"/>
25
				<PROPERTY key="oozie.wf.validate.ForkJoin" value="false"/>
26
				<PROPERTY key="workingDir" value="/tmp/integration/working_dir/main"/>
27
				<PROPERTY key="metadataextraction_excluded_checksums"
28
				          value="1e5b574109da731f4918c7f91fc24864,bea4728578070c3d66774bf9454d41fe,da458477233b5561ae47042aa2a73086"/>
27 29
				<PROPERTY key="export_documentssimilarity_threshold" value="0.8"/>
28
                <PROPERTY key="export_trust_level_threshold_document_classes" value="0.9"/>
29
            </STATIC_CONFIGURATION>
30
        	<JOB_INTERFACE>
31
        		<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint" />
32
        		<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing" />
33
        		<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location" />
34
        		<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records" />
35
	      		<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table" />
36
        		<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum" />
37
        		<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port" />
38
        		<PARAM name="nameNode" required="true" description="hdfs name node" />
39
        		<PARAM name="jobTracker" required="true" description="job tracker name" />
40
        		<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path" />
41
                <PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
42
                <PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
43
                <PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative" required="true"/>
44
                <PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
45
                <PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/>
46
                <PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
47
                <PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
48
                <PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
49
                <PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/>
50
       		
51
        		<!-- flags to enable/disable IIS modules -->
52
        		<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module" />
53
        		<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module" />
54
        		<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module" />
55
        		<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module" />
56
        		<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module" />
57
        		<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module" />
58
        		<PARAM name="active_referenceextraction_researchinitiative" required="true" description="enable/disable the research initiative extraction module" />
59
        		<PARAM name="active_statistics" required="true" description="enable/disable the statistics module" />
60
        		<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module" />
61
        		 
62
        	</JOB_INTERFACE>
63
        </HADOOP_JOB>
64
        <STATUS>
65
            <LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
66
            <RUNNING_INSTANCES value="0"/>
67
            <CUMULATIVE_RUN value="0"/>
68
        </STATUS>
69
        <SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
70
    </BODY>
30
				<PROPERTY key="export_trust_level_threshold_document_classes" value="0.9"/>
31
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="1.0"/>
32
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="1.0"/>
33
				<PROPERTY key="export_trust_level_threshold_document_pdb" value="1.0"/>
34
			</STATIC_CONFIGURATION>
35
			<JOB_INTERFACE>
36
				<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/>
37
				<PARAM name="import_content_objectstores_csv" required="true" description="csv list of the available object stores subject to processing"/>
38
				<PARAM name="import_mdstore_service_location" required="true" description="mdstore service location"/>
39
				<PARAM name="import_dataset_mdstore_ids_csv" required="true" description="mdstore ids for dataset records"/>
40
				<PARAM name="export_action_hbase_table_name" required="true" description="destination action manager table"/>
41
				<PARAM name="export_action_hbase_remote_zookeeper_quorum" required="true" description="ZK quorum"/>
42
				<PARAM name="export_action_hbase_remote_zookeeper_clientport" required="true" description="ZK port"/>
43
				<PARAM name="nameNode" required="true" description="hdfs name node"/>
44
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
45
				<PARAM name="oozie.wf.application.path" required="true" description="oozie job application absolute path"/>
46
				<PARAM description="target action set for referenced projects" name="export_action_set_id_document_referencedProjects" required="true"/>
47
				<PARAM description="target action set for referenced datasets" name="export_action_set_id_document_referencedDatasets" required="true"/>
48
				<PARAM description="target action set for research initiative outcome" name="export_action_set_id_document_research_initiative"
49
				       required="true"/>
50
				<PARAM description="target action set for document similarities" name="export_action_set_id_document_similarities_standard" required="true"/>
51
				<PARAM description="target action set for document statistics" name="export_action_set_id_document_statistics" required="true"/>
52
				<PARAM description="target action set for document classification" name="export_action_set_id_document_classes" required="true"/>
53
				<PARAM description="target action set for document citations" name="export_action_set_id_document_referencedDocuments" required="true"/>
54
				<PARAM description="target action set for dataset entities" name="export_action_set_id_entity_dataset" required="true"/>
55
				<PARAM description="target action set for pdb entities" name="export_action_set_id_document_pdb" required="true"/>
56

  
57
				<!-- flags to enable/disable IIS modules -->
58
				<PARAM name="active_metadataextraction_export" required="true" description="enable/disable the affiliation matching module"/>
59
				<PARAM name="active_citationmatching" required="true" description="enable/disable the citation matching module"/>
60
				<PARAM name="active_documentsclassification" required="true" description="enable/disable the document classification module"/>
61
				<PARAM name="active_documentssimilarity" required="true" description="enable/disable the document similarity module"/>
62
				<PARAM name="active_referenceextraction_dataset" required="true" description="enable/disable the dataset reference extraction module"/>
63
				<PARAM name="active_referenceextraction_project" required="true" description="enable/disable the project reference extracion module"/>
64
				<PARAM name="active_referenceextraction_researchinitiative" required="true"
65
				       description="enable/disable the research initiative extraction module"/>
66
				<PARAM name="active_statistics" required="true" description="enable/disable the statistics module"/>
67
				<PARAM name="active_referenceextraction_pdb" required="true" description="enable/disable the protein data bank extraction module"/>
68

  
69
			</JOB_INTERFACE>
70
		</HADOOP_JOB>
71
		<STATUS>
72
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
73
			<RUNNING_INSTANCES value="0"/>
74
			<CUMULATIVE_RUN value="0"/>
75
		</STATUS>
76
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
77
	</BODY>
71 78
</RESOURCE_PROFILE>

Also available in: Unified diff