Project

General

Profile

1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="importer_mdrecord">
2
	
3
	<parameters>
4
        <property>
5
            <name>mdstore_facade_factory_classname</name>
6
            <value>eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacadeFactory</value>
7
            <description>ServiceFacadeFactory implementation class name producing eu.dnetlib.dhp.wf.importer.facade.WebServiceMDStoreFacade</description>
8
        </property>
9
		<property>
10
			<name>mdstore_service_location</name>
11
            <value>$UNDEFINED$</value>
12
			<description>mdstore service location</description>
13
		</property>
14
		<property>
15
			<name>mdstore_ids_csv</name>
16
            <value>$UNDEFINED$</value>
17
			<description>comma separated mdstore identifiers</description>
18
		</property>
19
        <property>
20
            <name>mdstore_record_maxlength</name>
21
            <value>500000</value>
22
            <description>maximum allowed length of mdstore record</description>
23
        </property>
24
        <property>
25
            <name>output</name>
26
            <description>ImportedRecord avro datastore output holding mdrecords</description>
27
        </property>
28
        <property>
29
            <name>output_report_root_path</name>
30
            <description>base directory for storing reports</description>
31
        </property>
32
        <property>
33
            <name>output_report_relative_path</name>
34
            <value>import_mdrecord</value>
35
            <description>directory for storing report (relative to output_report_root_path)</description>
36
        </property>
37
        <property>
38
            <name>dnet_service_client_read_timeout</name>
39
            <value>60000</value>
40
            <description>DNet service client reading timeout (expressed in milliseconds)</description>
41
        </property>
42
        <property>
43
            <name>dnet_service_client_connection_timeout</name>
44
            <value>60000</value>
45
            <description>DNet service client connection timeout (expressed in milliseconds)</description>
46
        </property>
47
        <property>
48
            <name>resultset_client_read_timeout</name>
49
            <value>60000</value>
50
            <description>result set client reading timeout (expressed in milliseconds)</description>
51
        </property>
52
        <property>
53
            <name>resultset_client_connection_timeout</name>
54
            <value>60000</value>
55
            <description>result set client connection timeout (expressed in milliseconds)</description>
56
        </property>
57
        <property>
58
            <name>report_properties_prefix</name>
59
            <value>import.mdrecord</value>
60
            <description>report entry related to total number of imported records</description>
61
        </property>
62
	</parameters>
63

    
64
    <global>
65
        <job-tracker>${jobTracker}</job-tracker>
66
        <name-node>${nameNode}</name-node>
67
        <configuration>
68
            <property>
69
                <name>mapreduce.job.queuename</name>
70
                <value>${queueName}</value>
71
            </property>
72
            <property>
73
                <name>oozie.launcher.mapred.job.queue.name</name>
74
                <value>${oozieLauncherQueueName}</value>
75
            </property>
76
        </configuration>
77
    </global>
78
    
79
	<start to="mdrecord-importer" />
80
	
81
	<action name="mdrecord-importer">
82
		<java>
83
			<!-- The data generated by this node is deleted in this section -->
84
			<prepare>
85
				<delete path="${nameNode}${output}" />
86
			</prepare>
87
			<main-class>eu.dnetlib.dhp.common.java.ProcessWrapper</main-class>
88
			<arg>eu.dnetlib.dhp.wf.importer.mdrecord.MDStoreRecordsImporter</arg>
89
            
90
			<arg>-Pimport.mdstore.service.location=${mdstore_service_location}</arg>
91
			<arg>-Pimport.mdstore.ids.csv=${mdstore_ids_csv}</arg>
92
            <arg>-Pimport.mdstore.record.maxlength=${mdstore_record_maxlength}</arg>
93

    
94
            <arg>-Pimport.resultset.client.read.timeout=${resultset_client_read_timeout}</arg>
95
            <arg>-Pimport.resultset.client.connection.timeout=${resultset_client_connection_timeout}</arg>
96
            <arg>-Pdnet.service.client.read.timeout=${dnet_service_client_read_timeout}</arg>
97
            <arg>-Pdnet.service.client.connection.timeout=${dnet_service_client_connection_timeout}</arg>
98
            
99
            <arg>-Pimport.facade.factory.classname=${mdstore_facade_factory_classname}</arg>
100
			<arg>-Omdrecords=${output}</arg>
101
			<capture-output />
102
		</java>
103
		<ok to="report" />
104
		<error to="fail" />
105
	</action>
106
    
107
    <action name="report">
108
        <java>
109
            <main-class>eu.dnetlib.dhp.common.java.ProcessWrapper</main-class>
110
            <arg>eu.dnetlib.dhp.common.report.ReportGenerator</arg>
111
            <arg>-Preport.${report_properties_prefix}.total=${wf:actionData('mdrecord-importer')['TOTAL']}</arg>
112
            <arg>-Preport.${report_properties_prefix}.invalid.sizeExceeded=${wf:actionData('mdrecord-importer')['SIZE_EXCEEDED']}</arg>
113
            <arg>-Oreport=${output_report_root_path}/${output_report_relative_path}</arg>
114
        </java>
115
        <ok to="end" />
116
        <error to="fail" />
117
    </action>
118
    
119
	<kill name="fail">
120
		<message>Unfortunately, the process failed -- error message:
121
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
122
	</kill>
123
	<end name="end" />
124
</workflow-app>
    (1-1/1)