Project

General

Profile

1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER
4
				value="d730a831-a982-4034-a890-de98fd972e87_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/>
5
		<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/>
6
		<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2001-12-31T12:00:00"/>
9
	</HEADER>
10
	<BODY>
11
		<HADOOP_JOB name="iisMainJobV2" type="oozie">
12
			<DESCRIPTION>IIS main workflow</DESCRIPTION>
13
			<STATIC_CONFIGURATION>
14

    
15
				<!-- Cluster wide -->
16
				<PROPERTY key="user.name" value="dnet.beta"/>
17
				<!-- Runtime -->
18
				<PROPERTY key="metadataextraction_default_cache_location" value="/cache/metadataextraction"/>
19
				<PROPERTY key="export_documentssimilarity_threshold" value="0.7"/>
20
				<PROPERTY key="export_trust_level_threshold_document_classes" value="0.5"/>
21
				<PROPERTY key="export_trust_level_threshold_document_referencedDatasets" value="0.0"/>
22
				<PROPERTY key="export_trust_level_threshold_document_referencedProjects" value="0.0"/>
23
				<PROPERTY key="export_trust_level_threshold_document_pdb" value="0.0"/>
24
				<PROPERTY key="export_trust_level_threshold_document_software_url" value="0.0"/>
25
				<PROPERTY key="export_trust_level_threshold_matched_doc_organizations" value="0.0"/>
26
				<PROPERTY key="output_remote_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/var/lib/dnet/actionManager_BETA"/>
27
				<PROPERTY key="import_hbase_dump_location" value="webhdfs://namenode.hadoop.dm.openaire.eu:50071/tmp/db_openaireplus_services.export.2016.09.15"/>
28
				<PROPERTY key="oozieServiceLoc" value="http://iis-cdh5-test-m3:11000/oozie"/>
29
			</STATIC_CONFIGURATION>
30
			<JOB_INTERFACE>
31

    
32
				<PARAM name="nameNode"   required="true" description="hdfs name node"/>
33
				<PARAM name="jobTracker" required="true" description="job tracker name"/>
34
				<PARAM name="import_hbase_dump_location" required="true" description="InfoSpace HBase JSON records dump (not default HBase export outcome!) remote location. Should include protocol prefix and namenode when copying from remote cluster."/>
35
				<PARAM name="oozie.wf.application.path"  required="true" description="oozie job application absolute path"/>
36

    
37
				<PARAM name="import_content_object_store_location" required="true" description="objectStore service endpoint"/>
38
				<PARAM name="import_content_objectstores_csv"      required="true" description="csv list of the available object stores subject to processing"/>
39
				<PARAM name="import_mdstore_service_location"      required="true" description="mdstore service location"/>
40
				<PARAM name="import_dataset_mdstore_ids_csv"       required="true" description="mdstore ids for dataset records"/>
41

    
42

    
43
				<PARAM name="export_action_set_id_document_referencedDatasets"  required="true" description="target action set for referenced datasets"/>
44
				<PARAM name="export_action_set_id_document_referencedProjects"  required="true" description="target action set for referenced projects"/>
45
				<PARAM name="export_action_set_id_document_research_initiative" required="true" description="target action set for research initiative outcome"/>
46
				<PARAM name="export_action_set_id_document_similarities_standard" required="true" description="target action set for document similarities"   />
47
				<PARAM name="export_action_set_id_document_classes"               required="true" description="target action set for document classification" />
48
				<PARAM name="export_action_set_id_document_referencedDocuments"   required="true" description="target action set for document citations"      />
49
				<PARAM name="export_action_set_id_entity_dataset"                 required="true" description="target action set for dataset entities"        />
50
				<PARAM name="export_action_set_id_document_pdb"                   required="true" description="target action set for pdb entities"            />
51
				<PARAM name="export_action_set_id_document_software_url"          required="true" description="software reference extraction actionset identifier"/>
52
				<PARAM name="export_action_set_id_matched_doc_organizations"      required="true" description="target action set for affiliations"/>
53

    
54

    
55
				<!-- flags to enable/disable IIS modules -->
56
				<PARAM name="active_citationmatching"                       required="true" description="enable/disable the citation matching module"/>
57
				<PARAM name="active_documentsclassification"                required="true" description="enable/disable the document classification module"/>
58
				<PARAM name="active_documentssimilarity"                    required="true" description="enable/disable the document similarity module"/>
59
				<PARAM name="active_referenceextraction_dataset"            required="true" description="enable/disable the dataset reference extraction module"/>
60
				<PARAM name="active_referenceextraction_project"            required="true" description="enable/disable the project reference extracion module"/>
61
				<PARAM name="active_referenceextraction_researchinitiative" required="true" description="enable/disable the research initiative extraction module"/>
62
				<PARAM name="active_referenceextraction_pdb"                required="true" description="enable/disable the protein data bank extraction module"/>
63
				<PARAM name="active_referenceextraction_software_url"       required="true" description="enable/disable the software reference extraction algorithm"/>
64

    
65
			</JOB_INTERFACE>
66
		</HADOOP_JOB>
67
		<STATUS>
68
			<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/>
69
			<RUNNING_INSTANCES value="0"/>
70
			<CUMULATIVE_RUN value="0"/>
71
		</STATUS>
72
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
73
	</BODY>
74
</RESOURCE_PROFILE>
(34-34/53)