Project

General

Profile

1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="mainworkflows_primary_main">
2
	
3
	<parameters>
4
		<property>
5
            <name>remove_sideproducts</name>
6
            <value>true</value>
7
            <description>flag indicating inference side products will be erased</description>
8
        </property>
9
		<property>
10
			<name>match_content_with_metadata</name>
11
			<value>true</value>
12
			<description>flag indicating contents should be filtered and their identifiers should be deduplicated against metadata entries retrieved from InformationSpace.
13
			This way only contents having metadata representation will be processed. 
14
			To be disabled when processing new contents which metadata is not available in hbase or when original identifiers should be preserved (contents will not be filtered as well).</description>
15
		</property>
16
		<!-- processing modes -->
17
		<property>
18
			<name>active_metadataextraction_export</name>
19
			<value>false</value>
20
			<description>flag indicating metadata extraction export should be enabled</description>
21
		</property>
22
		<property>
23
			<name>active_referenceextraction_project</name>
24
			<value>false</value>
25
			<description>flag indicating project reference extraction should be enabled</description>
26
		</property>
27
		<property>
28
			<name>active_referenceextraction_dataset</name>
29
			<value>false</value>
30
			<description>flag indicating dataset reference extraction should be enabled</description>
31
		</property>
32
		<property>
33
			<!-- currently disabled by default -->
34
			<name>active_referenceextraction_researchinitiative</name>
35
			<value>false</value>
36
			<description>flag indicating researchinitiative reference extraction should be enabled</description>
37
		</property>
38
		<property>
39
			<!-- currently disabled by default -->
40
			<name>active_referenceextraction_pdb</name>
41
			<value>false</value>
42
			<description>flag indicating protein databank reference extraction should be enabled</description>
43
		</property>
44
		<property>
45
			<name>active_documentsclassification</name>
46
			<value>false</value>
47
			<description>flag indicating documents classification should be enabled</description>
48
		</property>
49
		<property>
50
			<name>active_documentssimilarity</name>
51
			<value>false</value>
52
			<description>flag indicating documents similarity should be enabled</description>
53
		</property>
54
		<property>
55
			<name>active_citationmatching</name>
56
			<value>false</value>
57
			<description>flag indicating citation matching should be enabled</description>
58
		</property>
59
		<property>
60
			<name>active_statistics</name>
61
			<value>false</value>
62
			<description>flag indicating statistics generation should be enabled</description>
63
		</property>
64
		<property>
65
			<name>active_websiteusage_analysis</name>
66
			<value>false</value>
67
			<description>flag indicating logs should be imported from HDFS log file into avro datastore 
68
				and website usage analysis should be performed</description>
69
		</property>
70
		<property>
71
			<name>active_export_to_hbase</name>
72
			<value>true</value>
73
			<description>flag indicating hbase export should be performed</description>
74
		</property>
75
		<property>
76
			<name>active_export_to_json</name>
77
			<value>false</value>
78
			<description>flag indicating json export should be performed</description>
79
		</property>
80
		<!-- import concepts related -->
81
		<property>
82
			<name>import_islookup_service_location</name>
83
			<description>IS Lookup service location</description>
84
		</property>
85
		<property>
86
			<name>import_project_concepts_context_ids_csv</name>
87
			<value>fet-fp7,fet-h2020</value>
88
			<description>comma separated list of concepts context identifiers to be picked by ISLookup</description>
89
		</property>
90
		<!-- import metadata related -->
91
		<property>
92
			<name>import_hbase_input_table</name>
93
			<description>HBase input table holding InformationSpace, available on local cluster</description>
94
		</property>
95
		<property>
96
			<name>import_hbase_approved_datasources_csv</name>
97
			<value>$UNDEFINED$</value>
98
			<description>CSV list of datasource ids to be approved during import. Applied on result and person entities.</description>
99
		</property>
100
		<!-- import datacite related -->
101
		<property>
102
			<name>import_mdstore_service_location</name>
103
			<value>$UNDEFINED$</value>
104
			<description>MDStore service (not WSDL) location URL</description>
105
		</property>
106
		<property>
107
			<name>import_dataset_mdstore_ids_csv</name>
108
			<value>$UNDEFINED$</value>
109
			<description>MDStore identifier</description>
110
		</property>
111
		<!-- import content related -->
112
		<property>
113
			<name>import_content_object_store_location</name>
114
			<value>$UNDEFINED$</value>
115
			<description>object store service location required for content retrieval</description>
116
		</property>
117
		<property>
118
			<name>import_content_objectstores_csv</name>
119
			<value>$UNDEFINED$</value>
120
			<description>CSV list of object stores identifiers to be processed</description>
121
		</property>
122
		<property>
123
			<name>import_content_mimetypes_pdf</name>
124
			<value>pdf,application/pdf</value>
125
			<description>pdf mime types</description>
126
		</property>
127
		<property>
128
			<name>import_content_mimetypes_text</name>
129
			<value>text,text/plain</value>
130
			<description>text mime types</description>
131
		</property>
132
		<property>
133
			<name>import_content_mimetypes_html</name>
134
			<value>text/html</value>
135
			<description>html mime types</description>
136
		</property>
137
		<property>
138
			<name>import_content_mimetypes_xml_pmc</name>
139
			<value>xml</value>
140
			<description>xml pmc types</description>
141
		</property>
142
		<property>
143
			<name>import_content_mimetypes_wos</name>
144
			<value>file::WoS</value>
145
			<description>WoS types</description>
146
		</property>
147
		<!-- import timeouts related -->
148
		<property>
149
			<name>import_resultset_client_read_timeout</name>
150
			<value>60000</value>
151
			<description>resultset client read timeout</description>
152
		</property>
153
		<property>
154
			<name>import_content_connection_timeout</name>
155
			<value>60000</value>
156
			<description>import content connection timeout</description>
157
		</property>
158
		<property>
159
			<name>import_content_read_timeout</name>
160
			<value>60000</value>
161
			<description>import content read timeout</description>
162
		</property>
163
		<!-- import logs related -->
164
		<property>
165
			<name>portal_logs_location</name>
166
			<value>/cache/portal-piwik-logs</value>
167
			<description>portal log files HDFS location</description>
168
		</property>
169
		<!-- metadata extraction related -->
170
		<property>
171
			<name>metadataextraction_excluded_checksums</name>
172
			<value>$UNDEFINED$</value>
173
			<description>list of content checksums excluded from metadataextraction processing</description>
174
		</property>
175
		<property>
176
			<name>metadataextraction_max_file_size_mb</name>
177
			<value>500</value>
178
			<description>maximum allowed file size in Megabytes</description>
179
		</property>
180
		<property>
181
			<name>metadataextraction_default_cache_location</name>
182
			<value>/cache/metadataextraction</value>
183
			<description>metadata extraction HDFS cache location</description>
184
		</property>
185
		<property>
186
			<name>metadataextraction_processing_mode</name>
187
			<value>StreamingMetadataExtractorMapper</value>
188
			<description>metadata extraction processing mode</description>
189
		</property>
190
		<property>
191
			<name>metadataextraction_input_classname</name>
192
			<value>eu.dnetlib.iis.importer.auxiliary.schemas.DocumentContentUrl</value>
193
			<description>metadata extraction input classname</description>
194
		</property>
195
		<!-- export related -->
196
		<property>
197
			<name>export_action_hbase_table_name</name>
198
			<description>action manager hbase table name</description>
199
		</property>
200
		<property>
201
			<name>export_action_hbase_table_initialize</name>
202
			<description>flag indicating input table should be initialized</description>
203
		</property>
204
		<!-- action set id properties -->
205
		<property>
206
			<name>export_action_set_id</name>
207
			<value>$UNDEFINED$</value>
208
			<description>action-set identifier of exported data</description>
209
		</property>
210
		<property>
211
			<name>export_action_set_id_dataset_similarities_websiteusage</name>
212
			<value>$UNDEFINED$</value>
213
			<description>dataset_similarities_websiteusage action-set identifier of exported data</description>
214
		</property>
215
		<property>
216
			<name>export_action_set_id_person_similarities_websiteusage</name>
217
			<value>$UNDEFINED$</value>
218
			<description>person_similarities_websiteusage action-set identifier of exported data</description>
219
		</property>
220
		<property>
221
			<name>export_action_set_id_person_statistics</name>
222
			<value>$UNDEFINED$</value>
223
			<description>person_statistics action-set identifier of exported data</description>
224
		</property>
225
		<property>
226
			<name>export_action_set_id_project_statistics</name>
227
			<value>$UNDEFINED$</value>
228
			<description>project_statistics action-set identifier of exported data</description>
229
		</property>
230
		<property>
231
			<name>export_action_set_id_document_similarities_standard</name>
232
			<value>$UNDEFINED$</value>
233
			<description>document_similarities_standard action-set identifier of exported data</description>
234
		</property>
235
		<property>
236
			<name>export_action_set_id_document_similarities_websiteusage</name>
237
			<value>$UNDEFINED$</value>
238
			<description>document_similarities_websiteusage action-set identifier of exported data</description>
239
		</property>
240
		<property>
241
			<name>export_action_set_id_document_statistics</name>
242
			<value>$UNDEFINED$</value>
243
			<description>document_statistics action-set identifier of exported data</description>
244
		</property>
245
		<property>
246
			<name>export_action_set_id_document_classes</name>
247
			<value>$UNDEFINED$</value>
248
			<description>document_classes action-set identifier of exported data</description>
249
		</property>
250
		<property>
251
			<name>export_action_set_id_document_referencedProjects</name>
252
			<value>$UNDEFINED$</value>
253
			<description>document_referencedProjects action-set identifier of exported data</description>
254
		</property>
255
		<property>
256
			<name>export_action_set_id_document_referencedDatasets</name>
257
			<value>$UNDEFINED$</value>
258
			<description>document_referencedDatasets action-set identifier of exported data</description>
259
		</property>
260
		<property>
261
			<name>export_action_set_id_document_referencedDocuments</name>
262
			<value>$UNDEFINED$</value>
263
			<description>document_referencedDocuments action-set identifier of exported data</description>
264
		</property>
265
		<property>
266
			<name>export_action_set_id_document_research_initiative</name>
267
			<value>$UNDEFINED$</value>
268
			<description>document research initiative action-set identifier of exported data</description>
269
		</property>
270
		<property>
271
			<name>export_action_set_id_document_pdb</name>
272
			<value>$UNDEFINED$</value>
273
			<description>document to protein databank action-set identifier of exported data</description>
274
		</property>
275
		<!--  -->
276
		<property>
277
			<name>export_action_set_id_entity_dataset</name>
278
			<value>$UNDEFINED$</value>
279
			<description>action-set identifier of exported data containing dataset entities</description>
280
		</property>
281
		<property>
282
			<name>export_action_hbase_remote_zookeeper_quorum</name>
283
			<value>$UNDEFINED$</value>
284
			<description>external hbase zookeeper quorum, set to empty value by default which means data will be exported to local hbase instance</description>
285
		</property>
286
		<property>
287
			<name>export_action_hbase_remote_zookeeper_clientport</name>
288
			<value>$UNDEFINED$</value>
289
			<description>external hbase zookeeper client port, required only whe zookeeper quorum property is set</description>
290
		</property>
291
		<!-- trust level threshold section -->
292
		<property>
293
			<name>export_trust_level_threshold</name>
294
			<value>$UNDEFINED$</value>
295
			<description>default trust level threshold of exported data</description>
296
		</property>
297
		<property>
298
			<name>export_trust_level_threshold_document_classes</name>
299
			<value>$UNDEFINED$</value>
300
			<description>document_classes trust level threshold</description>
301
		</property>
302
		<property>
303
			<name>export_trust_level_threshold_document_referencedProjects</name>
304
			<value>$UNDEFINED$</value>
305
			<description>document_referencedProjects trust level threshold</description>
306
		</property>
307
		<property>
308
			<name>export_trust_level_threshold_document_referencedDatasets</name>
309
			<value>$UNDEFINED$</value>
310
			<description>document_referencedDatasets trust level threshold</description>
311
		</property>
312
		<property>
313
			<name>export_trust_level_threshold_document_pdb</name>
314
			<value>$UNDEFINED$</value>
315
			<description>document to protein databank trust level threshold</description>
316
		</property>
317
		<!--  -->
318
		<property>
319
			<name>export_documentssimilarity_threshold</name>
320
			<value>$UNDEFINED$</value>
321
			<description>documents similarity threshold value below which similarity export is omitted</description>
322
		</property>
323
		<property>
324
			<name>export_referenceextraction_pdb_url_root</name>
325
			<value>http://www.rcsb.org/pdb/explore/explore.do?structureId=</value>
326
			<description>protein databank URL root part to be concatenated with pdb identifier when forming final URL</description>
327
		</property>
328
		<!-- working directory related -->
329
		<property>
330
			<name>execution_environment</name>
331
			<value>primary</value>
332
			<description>execution environment used for workingDir creation</description>
333
		</property>
334
		<property>
335
			<name>workingDir</name>
336
			<value>/user/${user.name}/iis/working_dirs/${execution_environment}</value>
337
			<description>working directory</description>
338
		</property>
339
	</parameters>
340
	
341
	<global>
342
        <job-tracker>${jobTracker}</job-tracker>
343
        <name-node>${nameNode}</name-node>
344
        <configuration>
345
            <property>
346
                <name>mapred.job.queue.name</name>
347
                <value>${queueName}</value>
348
            </property>
349
		</configuration>
350
	</global>
351
	
352
	<start to="init-workingDir"/>
353

    
354
	<action name="init-workingDir">
355
         <fs>
356
            <delete path="${nameNode}${workingDir}" />
357
			<mkdir path="${nameNode}${workingDir}" />
358
        </fs>
359
        <ok to="copy-version"/>
360
        <error to="fail"/>
361
    </action>
362
	
363
	<action name="copy-version">
364
       <distcp xmlns="uri:oozie:distcp-action:0.1">
365
           <job-tracker>${jobTracker}</job-tracker>
366
           <name-node>${nameNode}</name-node>
367
           <arg>${wf:appPath()}/version.properties</arg>
368
           <arg>${nameNode}${workingDir}</arg>
369
           </distcp>
370
       <ok to="import"/>
371
       <error to="fail"/>
372
	</action>
373

    
374
	<action name="import">
375
	    <sub-workflow>
376
            <app-path>${wf:appPath()}/mainworkflows_common_import</app-path>
377
            <propagate-configuration/>
378
            <configuration>
379
            	<property>
380
                    <name>workingDir</name>
381
                    <value>${workingDir}/mainworkflows_common_import/working_dir</value>
382
                </property>
383
                <!-- importing modes -->
384
                <property>
385
					<name>match_content_with_metadata</name>
386
					<value>${match_content_with_metadata}</value>
387
				</property>
388
				<property>
389
					<name>active_import_metadata</name>
390
					<value>true</value>
391
				</property>
392
				<property>
393
					<name>active_import_dataset</name>
394
					<value>${active_referenceextraction_dataset}</value>
395
				</property>
396
				<property>
397
					<name>active_import_concept</name>
398
					<value>${active_referenceextraction_project}</value>
399
				</property>
400
				<property>
401
					<name>active_ingest_pmc</name>
402
					<!-- enabling when either citation matching is enabled or metadata export which may include pmc affiliations -->
403
					<value>${active_citationmatching eq "true" or active_metadataextraction_export eq "true"}</value>
404
				</property>
405
                <!-- import metadata related -->
406
				<property>
407
					<name>hbase_input_table</name>
408
					<value>${import_hbase_input_table}</value>
409
				</property>
410
				<property>
411
					<name>hbase_approved_datasources_csv</name>
412
					<value>${import_hbase_approved_datasources_csv}</value>
413
				</property>
414
				<!-- import datacite related -->
415
				<property>
416
					<name>mdstore_service_location</name>
417
					<value>${import_mdstore_service_location}</value>
418
				</property>
419
				<property>
420
					<name>dataset_mdstore_ids_csv</name>
421
					<value>${import_dataset_mdstore_ids_csv}</value>
422
				</property>
423
				<!-- project concept related -->
424
				<property>
425
					<name>islookup_service_location</name>
426
					<value>${import_islookup_service_location}</value>
427
				</property>
428
				<property>
429
					<name>project_concepts_context_ids_csv</name>
430
					<value>${import_project_concepts_context_ids_csv}</value>
431
				</property>
432
				<!-- import content related -->
433
				<property>
434
					<name>objectstore_service_location</name>
435
					<value>${import_content_object_store_location}</value>
436
				</property>
437
				<property>
438
					<name>approved_objectstores_csv</name>
439
					<value>${import_content_objectstores_csv}</value>
440
				</property>
441
				<property>
442
					<name>mimetypes_pdf</name>
443
					<value>${import_content_mimetypes_pdf}</value>
444
				</property>
445
				<property>
446
					<name>mimetypes_text</name>
447
					<value>${import_content_mimetypes_text}</value>
448
				</property>
449
				<property>
450
					<name>mimetypes_html</name>
451
					<value>${import_content_mimetypes_html}</value>
452
				</property>
453
				<property>
454
					<name>mimetypes_xml_pmc</name>
455
					<value>${import_content_mimetypes_xml_pmc}</value>
456
				</property>
457
				<property>
458
					<name>mimetypes_wos</name>
459
					<value>${import_content_mimetypes_wos}</value>
460
				</property>
461
				<!-- import timeouts related -->
462
				<property>
463
					<name>resultset_client_read_timeout</name>
464
					<value>${import_resultset_client_read_timeout}</value>
465
				</property>
466
				<property>
467
					<name>content_connection_timeout</name>
468
					<value>${import_content_connection_timeout}</value>
469
				</property>
470
				<property>
471
					<name>content_read_timeout</name>
472
					<value>${import_content_read_timeout}</value>
473
				</property>
474
				<!-- metadata extraction related are autmatically propagated-->
475
				<!-- metadatainput and metadataextraction output subdirectory names -->
476
				<property>
477
					<name>metadataimport_output_name_document_meta</name>
478
					<value>docmeta</value>
479
				</property>
480
				<property>
481
					<name>metadataimport_output_name_document_project</name>
482
					<value>docproject</value>
483
				</property>
484
				<property>
485
					<name>metadataimport_output_name_project</name>
486
					<value>project</value>
487
				</property>
488
				<property>
489
					<name>metadataimport_output_name_person</name>
490
					<value>person</value>
491
				</property>
492
				<property>
493
					<name>metadataimport_output_name_dedup_mapping</name>
494
					<value>dedupmapping</value>
495
				</property>
496
				<!-- output parameters -->
497
				<property>
498
					<name>output_extracted_document_metadata</name>
499
					<value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value>
500
				</property>
501
				<property>
502
					<name>output_metadataimport_root</name>
503
					<value>${workingDir}/mainworkflows_common_import/metadataimport</value>
504
				</property>
505
				<property>
506
					<name>output_dataset</name>
507
					<value>${workingDir}/mainworkflows_common_import/dataset</value>
508
				</property>
509
				<property>
510
					<name>output_dataset_to_mdstore</name>
511
					<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value>
512
				</property>
513
				<property>
514
					<name>output_citation_pmc</name>
515
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
516
				</property>
517
				<property>
518
					<name>output_document_text</name>
519
					<value>${workingDir}/mainworkflows_common_import/document-text</value>
520
				</property>
521
				<property>
522
					<name>output_project_concept</name>
523
					<value>${workingDir}/mainworkflows_common_import/project-concept</value>
524
				</property>
525
				<property>
526
					<name>output_wos</name>
527
					<value>${workingDir}/mainworkflows_common_import/wos</value>
528
				</property>
529
				<property>
530
					<name>output_faults</name>
531
					<value>${workingDir}/mainworkflows_common_import/faults</value>
532
				</property>
533
            </configuration>
534
        </sub-workflow>
535
		<ok to="decision-import_logs"/>
536
		<error to="fail" />
537
    </action>
538

    
539
	<decision name="decision-import_logs">
540
        <switch>
541
            <case to="import_logs">${active_websiteusage_analysis eq "true"}</case>
542
            <default to="mainworkflows_primary_processing"/>
543
        </switch>
544
    </decision>
545

    
546
	<action name="import_logs">
547
		<sub-workflow>
548
            <app-path>${wf:appPath()}/import_logs</app-path>
549
            <propagate-configuration/>
550
            <configuration>
551
            	<property>
552
                    <name>workingDir</name>
553
                    <value>${workingDir}/websiteusage_analysis/import_logs/working_dir</value>
554
                </property>
555
                <property>
556
					<name>input</name>
557
					<value>${portal_logs_location}</value>
558
				</property>
559
            	<property>
560
					<name>output</name>
561
					<value>${workingDir}/websiteusage_analysis/import_logs/output</value>
562
				</property>
563
			</configuration>
564
        </sub-workflow>
565
		<ok to="mainworkflows_primary_processing" />
566
		<error to="fail" />
567
	</action>
568

    
569
	<action name="mainworkflows_primary_processing">
570
		<sub-workflow>
571
            <app-path>${wf:appPath()}/mainworkflows_primary_processing</app-path>
572
            <propagate-configuration/>
573
            <configuration>
574
            	<property>
575
                    <name>input_document_metadata</name>
576
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/docmeta</value>
577
                </property>
578
                <property>
579
                    <name>input_document_to_project</name>
580
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/docproject</value>
581
                </property>
582
                <property>
583
                    <name>input_document_text</name>
584
                    <value>${workingDir}/mainworkflows_common_import/document-text</value>
585
                </property>
586
                <property>
587
                    <name>input_document_text_wos</name>
588
                    <value>${workingDir}/mainworkflows_common_import/wos</value>
589
                </property>
590
                <property>
591
                    <name>input_project</name>
592
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/project</value>
593
                </property>
594
                <property>
595
                    <name>input_person</name>
596
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/person</value>
597
                </property>
598
                <property>
599
                    <name>input_dataset</name>
600
                    <value>${workingDir}/mainworkflows_common_import/dataset</value>
601
                </property>
602
                <property>
603
					<name>input_extracted_document_metadata</name>
604
					<value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value>
605
				</property>
606
				<property>
607
					<name>input_citation_pmc</name>
608
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
609
				</property>
610
				<property>
611
					<name>input_project_concept</name>
612
					<value>${workingDir}/mainworkflows_common_import/project-concept</value>
613
				</property>
614
				<property>
615
                    <name>output_document_to_project</name>
616
                    <value>${workingDir}/exported/document_to_project</value>
617
                </property>
618
                <property>
619
                    <name>output_document_to_project_concepts</name>
620
                    <value>${workingDir}/exported/document_to_project_concepts</value>
621
                </property>
622
                <property>
623
                    <name>output_document_to_dataset</name>
624
                    <value>${workingDir}/exported/document_to_dataset</value>
625
                </property>
626
                <property>
627
                    <name>output_document_to_research_initiatives</name>
628
                    <value>${workingDir}/exported/document_to_research_initiatives</value>
629
                </property>
630
                <property>
631
                    <name>output_document_to_pdb</name>
632
                    <value>${workingDir}/exported/document_to_pdb</value>
633
                </property>
634
                <property>
635
                    <name>output_document_to_document_classes</name>
636
                    <value>${workingDir}/exported/document_to_document_classes</value>
637
                </property>
638
                <property>
639
                    <name>output_citation</name>
640
                    <value>${workingDir}/exported/citation</value>
641
                </property>
642
                <property>
643
                    <name>output_document_similarity</name>
644
                    <value>${workingDir}/exported/document_similarity</value>
645
                </property>
646
				<property>
647
                    <name>output_document_statistics</name>
648
                    <value>${workingDir}/exported/document_statistics</value>
649
                </property>                
650
                <property>
651
                    <name>output_author_statistics</name>
652
                    <value>${workingDir}/exported/author_statistics</value>
653
                </property>
654
                <property>
655
                    <name>output_project_statistics</name>
656
                    <value>${workingDir}/exported/project_statistics</value>
657
                </property>
658
            </configuration>
659
        </sub-workflow>
660
		<ok to="decision-websiteusage_analysis"/>
661
		<error to="fail" />
662
	</action>
663
    
664
    <!-- website usage analysis block -->
665
    <decision name="decision-websiteusage_analysis">
666
        <switch>
667
            <case to="websiteusage-idextractor">${active_websiteusage_analysis eq "true"}</case>
668
            <default to="skip-websiteusage"/>
669
        </switch>
670
    </decision>
671
    
672
    <action name="websiteusage-idextractor">
673
        <sub-workflow>
674
            <app-path>${wf:appPath()}/transformers_idextractor</app-path>
675
            <propagate-configuration/>
676
            <configuration>
677
                <property>
678
                    <name>workingDir</name>
679
                    <value>${workingDir}/websiteusage_analysis/transformers_idextractor/working_dir</value>
680
                </property>
681
                <property>
682
                    <name>input_document_metadata</name>
683
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/docmeta</value>
684
                </property>
685
                <property>
686
                    <name>output_identifier</name>
687
                    <value>${workingDir}/websiteusage_analysis/transformers_idextractor/output</value>
688
                </property>
689
            </configuration>
690
        </sub-workflow>
691
        <ok to="mainworkflows_websiteusage_document"/>
692
        <error to="fail"/>
693
    </action>
694
    
695
    <action name="mainworkflows_websiteusage_document">
696
        <sub-workflow>
697
            <app-path>${wf:appPath()}/mainworkflows_websiteusage_document</app-path>
698
            <propagate-configuration/>
699
            <configuration>
700
                <property>
701
                    <name>workingDir</name>
702
                    <value>${workingDir}/websiteusage_analysis/working_dir</value>
703
                </property>
704
                <property>
705
                    <name>input_logs</name>
706
                    <value>${workingDir}/websiteusage_analysis/import_logs/output</value>
707
                </property>
708
                <property>
709
                    <name>input_id_mapping</name>
710
                    <value>${workingDir}/mainworkflows_common_import/metadataimport/dedupmapping</value>
711
                </property>
712
                <property>
713
                    <name>input_document_id</name>
714
                    <value>${workingDir}/websiteusage_analysis/transformers_idextractor/output</value>
715
                </property>
716
                <property>
717
                    <name>output</name>
718
                    <value>${workingDir}/websiteusage_analysis/output</value>
719
                </property>
720
            </configuration>
721
        </sub-workflow>
722
        <ok to="decision-transform-metadataextraction-for-export"/>
723
        <error to="fail"/>
724
    </action>    
725
    
726
    <action name="skip-websiteusage">
727
        <java>
728
			<prepare>
729
				<!-- notice: directory have to aligned with skipped action output -->
730
				<delete path="${nameNode}${workingDir}/websiteusage_analysis" />
731
				<mkdir path="${nameNode}${workingDir}/websiteusage_analysis" />
732
			</prepare>
733
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
734
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
735
            <arg>-C{websiteusage_analysis_document,
736
				eu.dnetlib.iis.websiteusage.schemas.DocumentsWithWebsiteUsageSimilarities,
737
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
738
            <arg>-Owebsiteusage_analysis_document=${workingDir}/websiteusage_analysis/output</arg>
739
        </java>
740
        <ok to="decision-transform-metadataextraction-for-export"/>
741
        <error to="fail"/>
742
    </action>
743
    <!-- end of website usage analysis block -->
744
    
745
    <decision name="decision-transform-metadataextraction-for-export">
746
        <switch>
747
            <case to="transformers_export_documentmetadata">${active_metadataextraction_export eq "true"}</case>
748
            <default to="skip-transformers_export_documentmetadata"/>
749
        </switch>
750
    </decision>
751
    
752
    <action name="transformers_export_documentmetadata">
753
        <sub-workflow>
754
            <app-path>${wf:appPath()}/transformers_export_documentmetadata</app-path>
755
            <propagate-configuration/>
756
            <configuration>
757
                <property>
758
                    <name>workingDir</name>
759
                    <value>${workingDir}/transformers_export_documentmetadata/working_dir</value>
760
                </property>
761
                <property>
762
                    <name>input_extracted_metadata</name>
763
                    <value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value>
764
                </property>
765
                <property>
766
                    <name>output_metadata</name>
767
                    <value>${workingDir}/transformers_export_documentmetadata/output_metadata</value>
768
                </property>
769
            </configuration>
770
        </sub-workflow>
771
        <ok to="decision-export-to-hbase"/>
772
        <error to="fail"/>
773
    </action>
774
    
775
    <action name="skip-transformers_export_documentmetadata">
776
        <java>
777
			<prepare>
778
				<!-- notice: directory have to aligned with skipped action output -->
779
				<delete path="${nameNode}${workingDir}/transformers_export_documentmetadata" />
780
				<mkdir path="${nameNode}${workingDir}/transformers_export_documentmetadata" />
781
			</prepare>
782
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
783
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
784
            <arg>-C{document_metadata,
785
				eu.dnetlib.iis.export.schemas.DocumentMetadata,
786
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
787
            <!-- notice: directory have to aligned with skipped action output -->
788
            <arg>-Odocument_metadata=${workingDir}/transformers_export_documentmetadata/output_metadata</arg>
789
        </java>
790
        <ok to="decision-export-to-hbase"/>
791
        <error to="fail"/>
792
    </action>
793
    
794
    <decision name="decision-export-to-hbase">
795
        <switch>
796
            <case to="export-to-hbase">${active_export_to_hbase eq "true"}</case>
797
            <default to="decision-export-to-json"/>
798
        </switch>
799
    </decision>
800
    
801
    <action name="export-to-hbase">
802
		<sub-workflow>
803
            <app-path>${wf:appPath()}/mainworkflows_common_export</app-path>
804
            <propagate-configuration/>
805
            <configuration>
806
            	<property>
807
                    <name>workingDir</name>
808
                    <value>${workingDir}/mainworkflows_common_export/working_dir</value>
809
                </property>
810
                <!-- input ports -->
811
                <property>
812
					<name>input_document_metadata</name>
813
					<value>${workingDir}/transformers_export_documentmetadata/output_metadata</value>
814
				</property>
815
                <property>
816
					<name>input_document_to_project</name>
817
					<value>${workingDir}/exported/document_to_project</value>
818
				</property>
819
				<property>
820
					<name>input_document_to_project_concepts</name>
821
					<value>${workingDir}/exported/document_to_project_concepts</value>
822
				</property>
823
				<property>
824
					<name>input_document_to_dataset</name>
825
					<value>${workingDir}/exported/document_to_dataset</value>
826
				</property>
827
				<property>
828
					<name>input_document_to_mdstore</name>
829
					<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value>
830
				</property>
831
				<property>
832
					<name>input_document_to_research_initiatives</name>
833
					<value>${workingDir}/exported/document_to_research_initiatives</value>
834
				</property>
835
				<property>
836
					<name>input_document_to_pdb</name>
837
					<value>${workingDir}/exported/document_to_pdb</value>
838
				</property>
839
				<property>
840
					<name>input_document_to_document_classes</name>
841
					<value>${workingDir}/exported/document_to_document_classes</value>
842
				</property>
843
				<property>
844
					<name>input_citations</name>
845
					<value>${workingDir}/exported/citation</value>
846
				</property>
847
				<property>
848
					<name>input_document_similarity</name>
849
					<value>${workingDir}/exported/document_similarity</value>
850
				</property>
851
				<property>
852
					<name>input_document_statistics</name>
853
					<value>${workingDir}/exported/document_statistics</value>
854
				</property>
855
				<property>
856
					<name>input_document_websiteusage_similarity</name>
857
					<value>${workingDir}/websiteusage_analysis/output</value>
858
				</property>
859
				<property>
860
					<name>input_author_statistics</name>
861
					<value>${workingDir}/exported/author_statistics</value>
862
				</property>
863
				<property>
864
					<name>input_project_statistics</name>
865
					<value>${workingDir}/exported/project_statistics</value>
866
				</property>
867
				<!-- entities exporting modes -->
868
				<property>
869
					<name>active_export_referenceddataset_datasets</name>
870
					<value>${active_referenceextraction_dataset}</value>
871
				</property>
872
				<property>
873
					<name>active_export_referencedproject_entities</name>
874
					<value>false</value>
875
				</property>
876
				<property>
877
					<name>mdstore_service_location</name>
878
					<value>${import_mdstore_service_location}</value>
879
				</property>
880
				<!-- export related -->
881
				<property>
882
					<name>action_hbase_table_name</name>
883
					<value>${export_action_hbase_table_name}</value>
884
				</property>
885
				<property>
886
					<name>action_hbase_table_initialize</name>
887
					<value>${export_action_hbase_table_initialize}</value>
888
				</property>
889
				<!-- action set id properties -->
890
				<property>
891
					<name>action_set_id</name>
892
					<value>${export_action_set_id}</value>
893
				</property>
894
				<property>
895
					<name>action_set_id_dataset_similarities_websiteusage</name>
896
					<value>${export_action_set_id_dataset_similarities_websiteusage}</value>
897
				</property>
898
				<property>
899
					<name>action_set_id_person_similarities_websiteusage</name>
900
					<value>${export_action_set_id_person_similarities_websiteusage}</value>
901
				</property>
902
				<property>
903
					<name>action_set_id_person_statistics</name>
904
					<value>${export_action_set_id_person_statistics}</value>
905
				</property>
906
				<property>
907
					<name>action_set_id_project_statistics</name>
908
					<value>${export_action_set_id_project_statistics}</value>
909
				</property>
910
				<property>
911
					<name>action_set_id_document_similarities_standard</name>
912
					<value>${export_action_set_id_document_similarities_standard}</value>
913
				</property>
914
				<property>
915
					<name>action_set_id_document_similarities_websiteusage</name>
916
					<value>${export_action_set_id_document_similarities_websiteusage}</value>
917
				</property>
918
				<property>
919
					<name>action_set_id_document_statistics</name>
920
					<value>${export_action_set_id_document_statistics}</value>
921
				</property>
922
				<property>
923
					<name>action_set_id_document_classes</name>
924
					<value>${export_action_set_id_document_classes}</value>
925
				</property>
926
				<property>
927
					<name>action_set_id_document_referencedProjects</name>
928
					<value>${export_action_set_id_document_referencedProjects}</value>
929
				</property>
930
				<property>
931
					<name>action_set_id_document_referencedDatasets</name>
932
					<value>${export_action_set_id_document_referencedDatasets}</value>
933
				</property>
934
				<property>
935
					<name>action_set_id_document_referencedDocuments</name>
936
					<value>${export_action_set_id_document_referencedDocuments}</value>
937
				</property>
938
				<property>
939
					<name>action_set_id_document_research_initiative</name>
940
					<value>${export_action_set_id_document_research_initiative}</value>
941
				</property>
942
				<property>
943
					<name>action_set_id_document_pdb</name>
944
					<value>${export_action_set_id_document_pdb}</value>
945
				</property>
946
				<property>
947
					<name>action_set_id_entity_dataset</name>
948
					<value>${export_action_set_id_entity_dataset}</value>
949
				</property>
950
				
951
				<property>
952
					<name>trust_level_threshold</name>
953
					<value>${export_trust_level_threshold}</value>
954
				</property>
955
				<property>
956
					<name>trust_level_threshold_document_referencedProjects</name>
957
					<value>${export_trust_level_threshold_document_referencedProjects}</value>
958
				</property>
959
				<property>
960
					<name>trust_level_threshold_document_referencedDatasets</name>
961
					<value>${export_trust_level_threshold_document_referencedDatasets}</value>
962
				</property>
963
				<property>
964
					<name>trust_level_threshold_document_classes</name>
965
					<value>${export_trust_level_threshold_document_classes}</value>
966
				</property>
967
				<property>
968
					<name>trust_level_threshold_document_pdb</name>
969
					<value>${export_trust_level_threshold_document_pdb}</value>
970
				</property>
971
				<property>
972
					<name>action_hbase_remote_zookeeper_quorum</name>
973
					<value>${export_action_hbase_remote_zookeeper_quorum}</value>
974
				</property>
975
				<property>
976
					<name>action_hbase_remote_zookeeper_clientport</name>
977
					<value>${export_action_hbase_remote_zookeeper_clientport}</value>
978
				</property>
979
				<property>
980
					<name>documentssimilarity_threshold</name>
981
					<value>${export_documentssimilarity_threshold}</value>
982
				</property>
983
				<property>
984
					<name>referenceextraction_pdb_url_root</name>
985
					<value>${export_referenceextraction_pdb_url_root}</value>
986
				</property>
987
            </configuration>
988
        </sub-workflow>
989
		<ok to="decision-export-to-json"/>
990
		<error to="fail" />
991
	</action>
992
    
993
    <decision name="decision-export-to-json">
994
        <switch>
995
            <case to="export-to-json">${active_export_to_json eq "true"}</case>
996
            <default to="end"/>
997
        </switch>
998
    </decision>
999
    
1000
    <action name="export-to-json">
1001
		<sub-workflow>
1002
            <app-path>${wf:appPath()}/mainworkflows_common_export_to_json</app-path>
1003
            <propagate-configuration/>
1004
            <configuration>
1005
                <!-- input ports -->
1006
                <property>
1007
					<name>input_document_metadata</name>
1008
					<value>${workingDir}/transformers_export_documentmetadata/output_metadata</value>
1009
				</property>
1010
                <property>
1011
					<name>input_document_to_project</name>
1012
					<value>${workingDir}/exported/document_to_project</value>
1013
				</property>
1014
				<property>
1015
					<name>input_document_to_project_concepts</name>
1016
					<value>${workingDir}/exported/document_to_project_concepts</value>
1017
				</property>
1018
				<property>
1019
					<name>input_document_to_dataset</name>
1020
					<value>${workingDir}/exported/document_to_dataset</value>
1021
				</property>
1022
				<property>
1023
					<name>input_document_to_mdstore</name>
1024
					<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value>
1025
				</property>
1026
				<property>
1027
					<name>input_document_to_research_initiatives</name>
1028
					<value>${workingDir}/exported/document_to_research_initiatives</value>
1029
				</property>
1030
				<property>
1031
					<name>input_document_to_pdb</name>
1032
					<value>${workingDir}/exported/document_to_pdb</value>
1033
				</property>
1034
				<property>
1035
					<name>input_document_to_document_classes</name>
1036
					<value>${workingDir}/exported/document_to_document_classes</value>
1037
				</property>
1038
				<property>
1039
					<name>input_citations</name>
1040
					<value>${workingDir}/exported/citation</value>
1041
				</property>
1042
				<property>
1043
					<name>input_document_similarity</name>
1044
					<value>${workingDir}/exported/document_similarity</value>
1045
				</property>
1046
				<property>
1047
					<name>input_document_statistics</name>
1048
					<value>${workingDir}/exported/document_statistics</value>
1049
				</property>
1050
				<property>
1051
					<name>input_document_websiteusage_similarity</name>
1052
					<value>${workingDir}/websiteusage_analysis/output</value>
1053
				</property>
1054
				<property>
1055
					<name>input_author_statistics</name>
1056
					<value>${workingDir}/exported/author_statistics</value>
1057
				</property>
1058
				<property>
1059
					<name>input_project_statistics</name>
1060
					<value>${workingDir}/exported/project_statistics</value>
1061
				</property>
1062
				<property>
1063
					<name>output_root</name>
1064
					<value>${workingDir}/exported_as_json</value>
1065
				</property>
1066
            </configuration>
1067
        </sub-workflow>
1068
		<ok to="end"/>
1069
		<error to="fail" />
1070
	</action>
1071
    
1072
	<kill name="fail">
1073
		<message>Unfortunately, the process failed -- error message:
1074
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
1075
	</kill>
1076
	<end name="end" />
1077
</workflow-app>
(2-2/2)