Project

General

Profile

1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="mainworkflows_primary_main">
2
	
3
	<parameters>
4
		<property>
5
            <name>remove_sideproducts</name>
6
            <value>true</value>
7
            <description>flag indicating inference side products will be erased</description>
8
        </property>
9
		<property>
10
			<name>active_existence_filter</name>
11
			<value>true</value>
12
			<description>flag indicating contents should be filtered against metadata entries retrieved from InformationSpace.
13
			This way only contents having metadata representation will be processed. 
14
			To be disabled when processing new contents which metadata is not available in hbase.</description>
15
		</property>
16
		<!-- processing modes -->
17
		<property>
18
			<name>active_metadataextraction_export</name>
19
			<value>false</value>
20
			<description>flag indicating metadata extraction export should be enabled</description>
21
		</property>
22
		<property>
23
			<name>active_referenceextraction_project</name>
24
			<value>false</value>
25
			<description>flag indicating project reference extraction should be enabled</description>
26
		</property>
27
		<property>
28
			<name>active_referenceextraction_dataset</name>
29
			<value>false</value>
30
			<description>flag indicating dataset reference extraction should be enabled</description>
31
		</property>
32
		<property>
33
			<!-- currently disabled by default -->
34
			<name>active_referenceextraction_researchinitiative</name>
35
			<value>false</value>
36
			<description>flag indicating researchinitiative reference extraction should be enabled</description>
37
		</property>
38
		<property>
39
			<name>active_documentsclassification</name>
40
			<value>false</value>
41
			<description>flag indicating documents classification should be enabled</description>
42
		</property>
43
		<property>
44
			<name>active_documentssimilarity</name>
45
			<value>false</value>
46
			<description>flag indicating documents similarity should be enabled</description>
47
		</property>
48
		<property>
49
			<name>active_citationmatching</name>
50
			<value>false</value>
51
			<description>flag indicating citation matching should be enabled</description>
52
		</property>
53
		<property>
54
			<name>active_statistics</name>
55
			<value>false</value>
56
			<description>flag indicating statistics generation should be enabled</description>
57
		</property>
58
		<property>
59
			<name>active_websiteusage_analysis</name>
60
			<value>false</value>
61
			<description>flag indicating logs should be imported from HDFS log file into avro datastore 
62
				and website usage analysis should be performed</description>
63
		</property>
64
		<property>
65
			<name>active_export</name>
66
			<value>true</value>
67
			<description>flag indicating export should be performed</description>
68
		</property>
69
		<!-- import concepts related -->
70
		<property>
71
			<name>import_islookup_service_location</name>
72
			<description>IS Lookup service location</description>
73
		</property>
74
		<property>
75
			<name>import_project_concepts_context_ids_csv</name>
76
			<value>fet-fp7,fet-h2020</value>
77
			<description>comma separated list of concepts context identifiers to be picked by ISLookup</description>
78
		</property>
79
		<!-- import metadata related -->
80
		<property>
81
			<name>import_hbase_input_table</name>
82
			<description>HBase input table holding InformationSpace, available on local cluster</description>
83
		</property>
84
		<property>
85
			<name>import_hbase_approved_datasources_csv</name>
86
			<value>$UNDEFINED$</value>
87
			<description>CSV list of datasource ids to be approved during import. Applied on result and person entities.</description>
88
		</property>
89
		<!-- import datacite related -->
90
		<property>
91
			<name>import_mdstore_service_location</name>
92
			<value>$UNDEFINED$</value>
93
			<description>MDStore service (not WSDL) location URL</description>
94
		</property>
95
		<property>
96
			<name>import_dataset_mdstore_ids_csv</name>
97
			<value>$UNDEFINED$</value>
98
			<description>MDStore identifier</description>
99
		</property>
100
		<!-- import content related -->
101
		<property>
102
			<name>import_content_object_store_location</name>
103
			<value>$UNDEFINED$</value>
104
			<description>object store service location required for content retrieval</description>
105
		</property>
106
		<property>
107
			<name>import_content_objectstores_csv</name>
108
			<value>$UNDEFINED$</value>
109
			<description>CSV list of object stores identifiers to be processed</description>
110
		</property>
111
		<property>
112
			<name>import_content_mimetypes_pdf</name>
113
			<value>pdf,application/pdf</value>
114
			<description>pdf mime types</description>
115
		</property>
116
		<property>
117
			<name>import_content_mimetypes_text</name>
118
			<value>text,text/plain</value>
119
			<description>text mime types</description>
120
		</property>
121
		<property>
122
			<name>import_content_mimetypes_html</name>
123
			<value>text/html</value>
124
			<description>html mime types</description>
125
		</property>
126
		<property>
127
			<name>import_content_mimetypes_xml_pmc</name>
128
			<value>xml</value>
129
			<description>xml pmc types</description>
130
		</property>
131
		<property>
132
			<name>import_content_mimetypes_wos</name>
133
			<value>file::WoS</value>
134
			<description>WoS types</description>
135
		</property>
136
		<!-- import timeouts related -->
137
		<property>
138
			<name>import_resultset_client_read_timeout</name>
139
			<value>60000</value>
140
			<description>resultset client read timeout</description>
141
		</property>
142
		<property>
143
			<name>import_content_connection_timeout</name>
144
			<value>60000</value>
145
			<description>import content connection timeout</description>
146
		</property>
147
		<property>
148
			<name>import_content_read_timeout</name>
149
			<value>60000</value>
150
			<description>import content read timeout</description>
151
		</property>
152
		<!-- import logs related -->
153
		<property>
154
			<name>portal_logs_location</name>
155
			<value>/cache/portal-piwik-logs</value>
156
			<description>portal log files HDFS location</description>
157
		</property>
158
		<!-- metadata extraction related -->
159
		<property>
160
			<name>metadataextraction_excluded_checksums</name>
161
			<value>$UNDEFINED$</value>
162
			<description>list of content checksums excluded from metadataextraction processing</description>
163
		</property>
164
		<property>
165
			<name>metadataextraction_max_file_size_mb</name>
166
			<value>500</value>
167
			<description>maximum allowed file size in Megabytes</description>
168
		</property>
169
		<property>
170
			<name>metadataextraction_default_cache_location</name>
171
			<value>/cache/metadataextraction</value>
172
			<description>metadata extraction HDFS cache location</description>
173
		</property>
174
		<property>
175
			<name>metadataextraction_processing_mode</name>
176
			<value>StreamingMetadataExtractorMapper</value>
177
			<description>metadata extraction processing mode</description>
178
		</property>
179
		<property>
180
			<name>metadataextraction_input_classname</name>
181
			<value>eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</value>
182
			<description>metadata extraction input classname</description>
183
		</property>
184
		<!-- export related -->
185
		<property>
186
			<name>export_action_hbase_table_name</name>
187
			<description>action manager hbase table name</description>
188
		</property>
189
		<property>
190
			<name>export_action_hbase_table_initialize</name>
191
			<description>flag indicating input table should be initialized</description>
192
		</property>
193
		<!-- action set id properties -->
194
		<property>
195
			<name>export_action_set_id</name>
196
			<value>$UNDEFINED$</value>
197
			<description>action-set identifier of exported data</description>
198
		</property>
199
		<property>
200
			<name>export_action_set_id_dataset_similarities_websiteusage</name>
201
			<value>$UNDEFINED$</value>
202
			<description>dataset_similarities_websiteusage action-set identifier of exported data</description>
203
		</property>
204
		<property>
205
			<name>export_action_set_id_person_similarities_websiteusage</name>
206
			<value>$UNDEFINED$</value>
207
			<description>person_similarities_websiteusage action-set identifier of exported data</description>
208
		</property>
209
		<property>
210
			<name>export_action_set_id_person_statistics</name>
211
			<value>$UNDEFINED$</value>
212
			<description>person_statistics action-set identifier of exported data</description>
213
		</property>
214
		<property>
215
			<name>export_action_set_id_project_statistics</name>
216
			<value>$UNDEFINED$</value>
217
			<description>project_statistics action-set identifier of exported data</description>
218
		</property>
219
		<property>
220
			<name>export_action_set_id_document_similarities_standard</name>
221
			<value>$UNDEFINED$</value>
222
			<description>document_similarities_standard action-set identifier of exported data</description>
223
		</property>
224
		<property>
225
			<name>export_action_set_id_document_similarities_websiteusage</name>
226
			<value>$UNDEFINED$</value>
227
			<description>document_similarities_websiteusage action-set identifier of exported data</description>
228
		</property>
229
		<property>
230
			<name>export_action_set_id_document_statistics</name>
231
			<value>$UNDEFINED$</value>
232
			<description>document_statistics action-set identifier of exported data</description>
233
		</property>
234
		<property>
235
			<name>export_action_set_id_document_classes</name>
236
			<value>$UNDEFINED$</value>
237
			<description>document_classes action-set identifier of exported data</description>
238
		</property>
239
		<property>
240
			<name>export_action_set_id_document_referencedProjects</name>
241
			<value>$UNDEFINED$</value>
242
			<description>document_referencedProjects action-set identifier of exported data</description>
243
		</property>
244
		<property>
245
			<name>export_action_set_id_document_referencedDatasets</name>
246
			<value>$UNDEFINED$</value>
247
			<description>document_referencedDatasets action-set identifier of exported data</description>
248
		</property>
249
		<property>
250
			<name>export_action_set_id_document_referencedDocuments</name>
251
			<value>$UNDEFINED$</value>
252
			<description>document_referencedDocuments action-set identifier of exported data</description>
253
		</property>
254
		<property>
255
			<name>export_action_set_id_document_research_initiative</name>
256
			<value>$UNDEFINED$</value>
257
			<description>document research initiative action-set identifier of exported data</description>
258
		</property>
259
		<!--  -->
260
		<property>
261
			<name>export_action_set_id_entity_dataset</name>
262
			<value>$UNDEFINED$</value>
263
			<description>action-set identifier of exported data containing dataset entities</description>
264
		</property>
265
		<property>
266
			<name>export_action_hbase_remote_zookeeper_quorum</name>
267
			<value>$UNDEFINED$</value>
268
			<description>external hbase zookeeper quorum, set to empty value by default which means data will be exported to local hbase instance</description>
269
		</property>
270
		<property>
271
			<name>export_action_hbase_remote_zookeeper_clientport</name>
272
			<value>$UNDEFINED$</value>
273
			<description>external hbase zookeeper client port, required only whe zookeeper quorum property is set</description>
274
		</property>
275
		<property>
276
			<name>export_documentssimilarity_threshold</name>
277
			<value>$UNDEFINED$</value>
278
			<description>documents similarity threshold value below which similarity export is omitted</description>
279
		</property>
280
		<!-- working directory related -->
281
		<property>
282
			<name>execution_environment</name>
283
			<value>primary</value>
284
			<description>execution environment used for workingDir creation</description>
285
		</property>
286
		<property>
287
			<name>workingDir</name>
288
			<value>/user/${user.name}/iis/working_dirs/${execution_environment}</value>
289
			<description>working directory</description>
290
		</property>
291
	</parameters>
292
	
293
	<global>
294
        <job-tracker>${jobTracker}</job-tracker>
295
        <name-node>${nameNode}</name-node>
296
        <configuration>
297
            <property>
298
                <name>mapred.job.queue.name</name>
299
                <value>${queueName}</value>
300
            </property>
301
		</configuration>
302
	</global>
303
	
304
	<start to="init-workingDir"/>
305

    
306
	<action name="init-workingDir">
307
         <fs>
308
            <delete path="${nameNode}${workingDir}" />
309
			<mkdir path="${nameNode}${workingDir}" />
310
        </fs>
311
        <ok to="copy-version"/>
312
        <error to="fail"/>
313
    </action>
314
	
315
	<action name="copy-version">
316
       <distcp xmlns="uri:oozie:distcp-action:0.1">
317
       		<!-- distcp doesn't support global parameters so we need to provide them explicitly -->
318
       	   <job-tracker>${jobTracker}</job-tracker>
319
           <name-node>${nameNode}</name-node>
320
           <configuration>
321
               <property>
322
                   <name>mapred.job.queue.name</name>
323
                   <value>${queueName}</value>
324
               </property>
325
           </configuration>
326
           <arg>${wf:appPath()}/version.properties</arg>
327
           <arg>${nameNode}${workingDir}</arg>
328
           </distcp>
329
       <ok to="import"/>
330
       <error to="fail"/>
331
	</action>
332

    
333
	<action name="import">
334
	    <sub-workflow>
335
            <app-path>${wf:appPath()}/mainworkflows_common_import</app-path>
336
            <propagate-configuration/>
337
            <configuration>
338
            	<property>
339
                    <name>workingDir</name>
340
                    <value>${workingDir}/mainworkflows_common_import/working_dir</value>
341
                </property>
342
                <!-- importing modes -->
343
                <property>
344
					<name>active_existence_filter</name>
345
					<value>${active_existence_filter}</value>
346
				</property>
347
				<property>
348
					<name>active_import_metadata</name>
349
					<value>true</value>
350
				</property>
351
				<property>
352
					<name>active_import_dataset</name>
353
					<value>${active_referenceextraction_dataset}</value>
354
				</property>
355
				<property>
356
					<name>active_import_concept</name>
357
					<value>${active_referenceextraction_project}</value>
358
				</property>
359
				<property>
360
					<name>active_ingest_pmc_citations</name>
361
					<value>${active_citationmatching}</value>
362
				</property>
363
                <!-- import metadata related -->
364
				<property>
365
					<name>hbase_input_table</name>
366
					<value>${import_hbase_input_table}</value>
367
				</property>
368
				<property>
369
					<name>hbase_approved_datasources_csv</name>
370
					<value>${import_hbase_approved_datasources_csv}</value>
371
				</property>
372
				<!-- import datacite related -->
373
				<property>
374
					<name>mdstore_service_location</name>
375
					<value>${import_mdstore_service_location}</value>
376
				</property>
377
				<property>
378
					<name>dataset_mdstore_ids_csv</name>
379
					<value>${import_dataset_mdstore_ids_csv}</value>
380
				</property>
381
				<!-- project concept related -->
382
				<property>
383
					<name>islookup_service_location</name>
384
					<value>${import_islookup_service_location}</value>
385
				</property>
386
				<property>
387
					<name>project_concepts_context_ids_csv</name>
388
					<value>${import_project_concepts_context_ids_csv}</value>
389
				</property>
390
				<!-- import content related -->
391
				<property>
392
					<name>objectstore_service_location</name>
393
					<value>${import_content_object_store_location}</value>
394
				</property>
395
				<property>
396
					<name>approved_objectstores_csv</name>
397
					<value>${import_content_objectstores_csv}</value>
398
				</property>
399
				<property>
400
					<name>mimetypes_pdf</name>
401
					<value>${import_content_mimetypes_pdf}</value>
402
				</property>
403
				<property>
404
					<name>mimetypes_text</name>
405
					<value>${import_content_mimetypes_text}</value>
406
				</property>
407
				<property>
408
					<name>mimetypes_html</name>
409
					<value>${import_content_mimetypes_html}</value>
410
				</property>
411
				<property>
412
					<name>mimetypes_xml_pmc</name>
413
					<value>${import_content_mimetypes_xml_pmc}</value>
414
				</property>
415
				<property>
416
					<name>mimetypes_wos</name>
417
					<value>${import_content_mimetypes_wos}</value>
418
				</property>
419
				<!-- import timeouts related -->
420
				<property>
421
					<name>resultset_client_read_timeout</name>
422
					<value>${import_resultset_client_read_timeout}</value>
423
				</property>
424
				<property>
425
					<name>content_connection_timeout</name>
426
					<value>${import_content_connection_timeout}</value>
427
				</property>
428
				<property>
429
					<name>content_read_timeout</name>
430
					<value>${import_content_read_timeout}</value>
431
				</property>
432
				<!-- metadata extraction related -->
433
				<property>
434
					<name>metadataextraction_excluded_checksums</name>
435
					<value>${metadataextraction_excluded_checksums}</value>
436
				</property>
437
				<property>
438
					<name>metadataextraction_max_file_size_mb</name>
439
					<value>${metadataextraction_max_file_size_mb}</value>
440
				</property>
441
				<property>
442
					<name>metadataextraction_default_cache_location</name>
443
					<value>${metadataextraction_default_cache_location}</value>
444
				</property>
445
				<!-- metadatainput and metadataextraction output subdirectory names -->
446
				<property>
447
					<name>metadataimport_output_name_document_meta</name>
448
					<value>docmeta</value>
449
				</property>
450
				<property>
451
					<name>metadataimport_output_name_document_project</name>
452
					<value>docproject</value>
453
				</property>
454
				<property>
455
					<name>metadataimport_output_name_project</name>
456
					<value>project</value>
457
				</property>
458
				<property>
459
					<name>metadataimport_output_name_person</name>
460
					<value>person</value>
461
				</property>
462
				<property>
463
					<name>metadataimport_output_name_dedup_mapping</name>
464
					<value>dedupmapping</value>
465
				</property>
466
				<!-- output parameters -->
467
				<property>
468
					<name>output_extracted_document_metadata</name>
469
					<value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value>
470
				</property>
471
				<property>
472
					<name>output_metadataimport_root</name>
473
					<value>${workingDir}/mainworkflows_common_import/metadataimport</value>
474
				</property>
475
				<property>
476
					<name>output_dataset</name>
477
					<value>${workingDir}/mainworkflows_common_import/dataset</value>
478
				</property>
479
				<property>
480
					<name>output_dataset_to_mdstore</name>
481
					<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value>
482
				</property>
483
				<property>
484
					<name>output_citation_pmc</name>
485
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
486
				</property>
487
				<property>
488
					<name>output_document_text</name>
489
					<value>${workingDir}/mainworkflows_common_import/document-text</value>
490
				</property>
491
				<property>
492
					<name>output_project_concept</name>
493
					<value>${workingDir}/mainworkflows_common_import/project-concept</value>
494
				</property>
495
				<property>
496
					<name>output_wos</name>
497
					<value>${workingDir}/mainworkflows_common_import/wos</value>
498
				</property>
499
            </configuration>
500
        </sub-workflow>
501
		<ok to="decision-import_logs"/>
502
		<error to="fail" />
503
    </action>
504

    
505
	<decision name="decision-import_logs">
506
        <switch>
507
            <case to="import_logs">${active_websiteusage_analysis eq "true"}</case>
508
            <default to="mainworkflows_primary_processing"/>
509
        </switch>
510
    </decision>
511

    
512
	<action name="import_logs">
513
		<sub-workflow>
514
            <app-path>${wf:appPath()}/import_logs</app-path>
515
            <propagate-configuration/>
516
            <configuration>
517
            	<property>
518
                    <name>workingDir</name>
519
                    <value>${workingDir}/websiteusage_analysis/import_logs/working_dir</value>
520
                </property>
521
                <property>
522
					<name>input</name>
523
					<value>${portal_logs_location}</value>
524
				</property>
525
            	<property>
526
					<name>output</name>
527
					<value>${workingDir}/websiteusage_analysis/import_logs/output</value>
528
				</property>
529
			</configuration>
530
        </sub-workflow>
531
		<ok to="mainworkflows_primary_processing" />
532
		<error to="fail" />
533
	</action>
534

    
535
	<action name="mainworkflows_primary_processing">
536
		<sub-workflow>
537
            <app-path>${wf:appPath()}/mainworkflows_primary_processing</app-path>
538
            <propagate-configuration/>
539
            <configuration>
540
            	<property>
541
                    <name>input_document_metadata</name>
542
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/docmeta</value>
543
                </property>
544
                <property>
545
                    <name>input_document_to_project</name>
546
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/docproject</value>
547
                </property>
548
                <property>
549
                    <name>input_document_text</name>
550
                    <value>${workingDir}/mainworkflows_common_import/document-text</value>
551
                </property>
552
                <property>
553
                    <name>input_document_text_wos</name>
554
                    <value>${workingDir}/mainworkflows_common_import/wos</value>
555
                </property>
556
                <property>
557
                    <name>input_project</name>
558
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/project</value>
559
                </property>
560
                <property>
561
                    <name>input_person</name>
562
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/person</value>
563
                </property>
564
                <property>
565
                    <name>input_dataset</name>
566
                    <value>${workingDir}/mainworkflows_common_import/dataset</value>
567
                </property>
568
                <property>
569
					<name>input_extracted_document_metadata</name>
570
					<value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value>
571
				</property>
572
				<property>
573
					<name>input_citation_pmc</name>
574
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
575
				</property>
576
				<property>
577
					<name>input_project_concept</name>
578
					<value>${workingDir}/mainworkflows_common_import/project-concept</value>
579
				</property>
580
				<property>
581
                    <name>output_document_to_project</name>
582
                    <value>${workingDir}/exported/document_to_project</value>
583
                </property>
584
                <property>
585
                    <name>output_document_to_project_concepts</name>
586
                    <value>${workingDir}/exported/document_to_project_concepts</value>
587
                </property>
588
                <property>
589
                    <name>output_document_to_dataset</name>
590
                    <value>${workingDir}/exported/document_to_dataset</value>
591
                </property>
592
                <property>
593
                    <name>output_document_to_research_initiatives</name>
594
                    <value>${workingDir}/exported/document_to_research_initiatives</value>
595
                </property>
596
                <property>
597
                    <name>output_document_to_document_classes</name>
598
                    <value>${workingDir}/exported/document_to_document_classes</value>
599
                </property>
600
                <property>
601
                    <name>output_citation</name>
602
                    <value>${workingDir}/exported/citation</value>
603
                </property>
604
                <property>
605
                    <name>output_document_similarity</name>
606
                    <value>${workingDir}/exported/document_similarity</value>
607
                </property>
608
				<property>
609
                    <name>output_document_statistics</name>
610
                    <value>${workingDir}/exported/document_statistics</value>
611
                </property>                
612
                <property>
613
                    <name>output_author_statistics</name>
614
                    <value>${workingDir}/exported/author_statistics</value>
615
                </property>
616
                <property>
617
                    <name>output_project_statistics</name>
618
                    <value>${workingDir}/exported/project_statistics</value>
619
                </property>
620
            </configuration>
621
        </sub-workflow>
622
		<ok to="decision-websiteusage_analysis"/>
623
		<error to="fail" />
624
	</action>
625
    
626
    <!-- website usage analysis block -->
627
    <decision name="decision-websiteusage_analysis">
628
        <switch>
629
            <case to="websiteusage-idextractor">${active_websiteusage_analysis eq "true"}</case>
630
            <default to="skip-websiteusage"/>
631
        </switch>
632
    </decision>
633
    
634
    <action name="websiteusage-idextractor">
635
        <sub-workflow>
636
            <app-path>${wf:appPath()}/transformers_idextractor</app-path>
637
            <propagate-configuration/>
638
            <configuration>
639
                <property>
640
                    <name>workingDir</name>
641
                    <value>${workingDir}/websiteusage_analysis/transformers_idextractor/working_dir</value>
642
                </property>
643
                <property>
644
                    <name>input_document_metadata</name>
645
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/docmeta</value>
646
                </property>
647
                <property>
648
                    <name>output_identifier</name>
649
                    <value>${workingDir}/websiteusage_analysis/transformers_idextractor/output</value>
650
                </property>
651
            </configuration>
652
        </sub-workflow>
653
        <ok to="mainworkflows_websiteusage_document"/>
654
        <error to="fail"/>
655
    </action>
656
    
657
    <action name="mainworkflows_websiteusage_document">
658
        <sub-workflow>
659
            <app-path>${wf:appPath()}/mainworkflows_websiteusage_document</app-path>
660
            <propagate-configuration/>
661
            <configuration>
662
                <property>
663
                    <name>workingDir</name>
664
                    <value>${workingDir}/websiteusage_analysis/working_dir</value>
665
                </property>
666
                <property>
667
                    <name>input_logs</name>
668
                    <value>${workingDir}/websiteusage_analysis/import_logs/output</value>
669
                </property>
670
                <property>
671
                    <name>input_id_mapping</name>
672
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/dedupmapping</value>
673
                </property>
674
                <property>
675
                    <name>input_document_id</name>
676
                    <value>${workingDir}/websiteusage_analysis/transformers_idextractor/output</value>
677
                </property>
678
                <property>
679
                    <name>output</name>
680
                    <value>${workingDir}/websiteusage_analysis/output</value>
681
                </property>
682
            </configuration>
683
        </sub-workflow>
684
        <ok to="decision-transform-metadataextraction-for-export"/>
685
        <error to="fail"/>
686
    </action>    
687
    
688
    <action name="skip-websiteusage">
689
        <java>
690
			<prepare>
691
				<!-- notice: directory have to aligned with skipped action output -->
692
				<delete path="${nameNode}${workingDir}/websiteusage_analysis" />
693
				<mkdir path="${nameNode}${workingDir}/websiteusage_analysis" />
694
			</prepare>
695
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
696
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
697
            <arg>-C{websiteusage_analysis_document,
698
				eu.dnetlib.iis.websiteusage.schemas.DocumentsWithWebsiteUsageSimilarities,
699
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
700
            <arg>-SworkingDir=${workingDir}/websiteusage_analysis/working_dir</arg>
701
            <arg>-Owebsiteusage_analysis_document=${workingDir}/websiteusage_analysis/output</arg>
702
        </java>
703
        <ok to="decision-transform-metadataextraction-for-export"/>
704
        <error to="fail"/>
705
    </action>
706
    <!-- end of website usage analysis block -->
707
    
708
    <decision name="decision-transform-metadataextraction-for-export">
709
        <switch>
710
            <case to="transformers_export_documentmetadata">${active_metadataextraction_export eq "true"}</case>
711
            <default to="skip-transformers_export_documentmetadata"/>
712
        </switch>
713
    </decision>
714
    
715
    <action name="transformers_export_documentmetadata">
716
        <sub-workflow>
717
            <app-path>${wf:appPath()}/transformers_export_documentmetadata</app-path>
718
            <configuration>
719
                <property>
720
                    <name>jobTracker</name>
721
                    <value>${jobTracker}</value>
722
                </property>
723
                <property>
724
                    <name>nameNode</name>
725
                    <value>${nameNode}</value>
726
                </property>
727
                <property>
728
                    <name>queueName</name>
729
                    <value>${queueName}</value>
730
                </property>
731
                <!-- Working directory of the subworkflow -->
732
                <property>
733
                    <name>workingDir</name>
734
                    <value>${workingDir}/transformers_export_documentmetadata/working_dir</value>
735
                </property>
736
                <property>
737
                    <name>input_extracted_metadata</name>
738
                    <value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value>
739
                </property>
740
                <property>
741
                    <name>output_metadata</name>
742
                    <value>${workingDir}/transformers_export_documentmetadata/output_metadata</value>
743
                </property>
744
            </configuration>
745
        </sub-workflow>
746
        <ok to="decision-export"/>
747
        <error to="fail"/>
748
    </action>
749
    
750
    <action name="skip-transformers_export_documentmetadata">
751
        <java>
752
			<prepare>
753
				<!-- notice: directory have to aligned with skipped action output -->
754
				<delete path="${nameNode}${workingDir}/transformers_export_documentmetadata" />
755
				<mkdir path="${nameNode}${workingDir}/transformers_export_documentmetadata" />
756
			</prepare>
757
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
758
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
759
            <arg>-C{document_metadata,
760
				eu.dnetlib.iis.export.schemas.DocumentMetadata,
761
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
762
            <arg>-SworkingDir=${workingDir}/transformers_export_documentmetadata/working_dir</arg>
763
            <!-- notice: directory have to aligned with skipped action output -->
764
            <arg>-Odocument_metadata=${workingDir}/transformers_export_documentmetadata/output_metadata</arg>
765
        </java>
766
        <ok to="decision-export"/>
767
        <error to="fail"/>
768
    </action>
769
    
770
    <decision name="decision-export">
771
        <switch>
772
            <case to="export">${active_export eq "true"}</case>
773
            <default to="end"/>
774
        </switch>
775
    </decision>
776
    
777
    <action name="export">
778
		<sub-workflow>
779
            <app-path>${wf:appPath()}/mainworkflows_common_export</app-path>
780
            <propagate-configuration/>
781
            <configuration>
782
            	<property>
783
                    <name>workingDir</name>
784
                    <value>${workingDir}/mainworkflows_common_export/working_dir</value>
785
                </property>
786
                <!-- input ports -->
787
                <property>
788
					<name>input_document_metadata</name>
789
					<value>${workingDir}/transformers_export_documentmetadata/output_metadata</value>
790
				</property>
791
                <property>
792
					<name>input_document_to_project</name>
793
					<value>${workingDir}/exported/document_to_project</value>
794
				</property>
795
				<property>
796
					<name>input_document_to_project_concepts</name>
797
					<value>${workingDir}/exported/document_to_project_concepts</value>
798
				</property>
799
				<property>
800
					<name>input_document_to_dataset</name>
801
					<value>${workingDir}/exported/document_to_dataset</value>
802
				</property>
803
				<property>
804
					<name>input_document_to_mdstore</name>
805
					<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value>
806
				</property>
807
				<property>
808
					<name>input_document_to_research_initiatives</name>
809
					<value>${workingDir}/exported/document_to_research_initiatives</value>
810
				</property>
811
				<property>
812
					<name>input_document_to_document_classes</name>
813
					<value>${workingDir}/exported/document_to_document_classes</value>
814
				</property>
815
				<property>
816
					<name>input_citations</name>
817
					<value>${workingDir}/exported/citation</value>
818
				</property>
819
				<property>
820
					<name>input_document_similarity</name>
821
					<value>${workingDir}/exported/document_similarity</value>
822
				</property>
823
				<property>
824
					<name>input_document_statistics</name>
825
					<value>${workingDir}/exported/document_statistics</value>
826
				</property>
827
				<property>
828
					<name>input_document_websiteusage_similarity</name>
829
					<value>${workingDir}/websiteusage_analysis/output</value>
830
				</property>
831
				<property>
832
					<name>input_author_statistics</name>
833
					<value>${workingDir}/exported/author_statistics</value>
834
				</property>
835
				<property>
836
					<name>input_project_statistics</name>
837
					<value>${workingDir}/exported/project_statistics</value>
838
				</property>
839
				<!-- entities exporting modes -->
840
				<property>
841
					<name>active_export_referenceddataset_datasets</name>
842
					<value>${active_referenceextraction_dataset}</value>
843
				</property>
844
				<property>
845
					<name>active_export_referencedproject_entities</name>
846
					<value>false</value>
847
				</property>
848
				<property>
849
					<name>mdstore_service_location</name>
850
					<value>${import_mdstore_service_location}</value>
851
				</property>
852
				<!-- export related -->
853
				<property>
854
					<name>action_hbase_table_name</name>
855
					<value>${export_action_hbase_table_name}</value>
856
				</property>
857
				<property>
858
					<name>action_hbase_table_initialize</name>
859
					<value>${export_action_hbase_table_initialize}</value>
860
				</property>
861
				<!-- action set id properties -->
862
				<property>
863
					<name>action_set_id</name>
864
					<value>${export_action_set_id}</value>
865
				</property>
866
				<property>
867
					<name>action_set_id_dataset_similarities_websiteusage</name>
868
					<value>${export_action_set_id_dataset_similarities_websiteusage}</value>
869
				</property>
870
				<property>
871
					<name>action_set_id_person_similarities_websiteusage</name>
872
					<value>${export_action_set_id_person_similarities_websiteusage}</value>
873
				</property>
874
				<property>
875
					<name>action_set_id_person_statistics</name>
876
					<value>${export_action_set_id_person_statistics}</value>
877
				</property>
878
				<property>
879
					<name>action_set_id_project_statistics</name>
880
					<value>${export_action_set_id_project_statistics}</value>
881
				</property>
882
				<property>
883
					<name>action_set_id_document_similarities_standard</name>
884
					<value>${export_action_set_id_document_similarities_standard}</value>
885
				</property>
886
				<property>
887
					<name>action_set_id_document_similarities_websiteusage</name>
888
					<value>${export_action_set_id_document_similarities_websiteusage}</value>
889
				</property>
890
				<property>
891
					<name>action_set_id_document_statistics</name>
892
					<value>${export_action_set_id_document_statistics}</value>
893
				</property>
894
				<property>
895
					<name>action_set_id_document_classes</name>
896
					<value>${export_action_set_id_document_classes}</value>
897
				</property>
898
				<property>
899
					<name>action_set_id_document_referencedProjects</name>
900
					<value>${export_action_set_id_document_referencedProjects}</value>
901
				</property>
902
				<property>
903
					<name>action_set_id_document_referencedDatasets</name>
904
					<value>${export_action_set_id_document_referencedDatasets}</value>
905
				</property>
906
				<property>
907
					<name>action_set_id_document_referencedDocuments</name>
908
					<value>${export_action_set_id_document_referencedDocuments}</value>
909
				</property>
910
				<property>
911
					<name>action_set_id_document_research_initiative</name>
912
					<value>${export_action_set_id_document_research_initiative}</value>
913
				</property>
914
				<property>
915
					<name>action_set_id_entity_dataset</name>
916
					<value>${export_action_set_id_entity_dataset}</value>
917
				</property>
918
				<property>
919
					<name>action_hbase_remote_zookeeper_quorum</name>
920
					<value>${export_action_hbase_remote_zookeeper_quorum}</value>
921
				</property>
922
				<property>
923
					<name>action_hbase_remote_zookeeper_clientport</name>
924
					<value>${export_action_hbase_remote_zookeeper_clientport}</value>
925
				</property>
926
				<property>
927
					<name>documentssimilarity_threshold</name>
928
					<value>${export_documentssimilarity_threshold}</value>
929
				</property>
930
            </configuration>
931
        </sub-workflow>
932
		<ok to="end"/>
933
		<error to="fail" />
934
	</action>
935
    
936
	<kill name="fail">
937
		<message>Unfortunately, the process failed -- error message:
938
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
939
	</kill>
940
	<end name="end" />
941
</workflow-app>
(2-2/2)