Project

General

Profile

1 26772 marek.hors
<workflow-app xmlns="uri:oozie:workflow:0.4" name="mainworkflows_primary_processing">
2 20569 marek.hors
3
	<parameters>
4 35057 marek.hors
		<property>
5
            <name>remove_sideproducts</name>
6
            <value>true</value>
7
            <description>flag indicating inference side products will be erased</description>
8
        </property>
9 23440 marek.hors
		<!-- processing modes -->
10 20569 marek.hors
		<property>
11 23558 marek.hors
			<name>active_referenceextraction_project</name>
12 23446 marek.hors
			<value>true</value>
13
			<description>flag indicating project reference extraction should be enabled</description>
14
		</property>
15
		<property>
16 23558 marek.hors
			<name>active_referenceextraction_dataset</name>
17 23446 marek.hors
			<value>true</value>
18
			<description>flag indicating dataset reference extraction should be enabled</description>
19
		</property>
20
		<property>
21 24851 marek.hors
			<!-- currently disabled by default -->
22
			<name>active_referenceextraction_researchinitiative</name>
23
			<value>false</value>
24
			<description>flag indicating researchinitiative reference extraction should be enabled</description>
25
		</property>
26
		<property>
27 37432 marek.hors
			<!-- currently disabled by default -->
28
			<name>active_referenceextraction_pdb</name>
29
			<value>false</value>
30
			<description>flag indicating protein databank reference extraction should be enabled</description>
31
		</property>
32
		<property>
33 23558 marek.hors
			<name>active_documentsclassification</name>
34 23446 marek.hors
			<value>true</value>
35
			<description>flag indicating documents classification should be enabled</description>
36
		</property>
37
		<property>
38 23558 marek.hors
			<name>active_documentssimilarity</name>
39 23446 marek.hors
			<value>true</value>
40
			<description>flag indicating documents similarity should be enabled</description>
41
		</property>
42 24465 marek.hors
		<property>
43
			<name>active_citationmatching</name>
44
			<!-- currently disabled by default -->
45
			<value>false</value>
46
			<description>flag indicating citation matching should be enabled</description>
47
		</property>
48
		<property>
49
			<name>active_statistics</name>
50
			<!-- currently disabled by default -->
51
			<value>false</value>
52
			<description>flag indicating statistics generation should be enabled</description>
53
		</property>
54 23446 marek.hors
		<!-- input ports -->
55
		<property>
56 20569 marek.hors
			<name>input_document_metadata</name>
57
			<description>input document metadata directory</description>
58
		</property>
59
		<property>
60 27488 marek.hors
			<name>input_document_to_project</name>
61
			<description>input document to project relation directory</description>
62
		</property>
63
		<property>
64 23418 marek.hors
			<name>input_document_text</name>
65
			<description>input document text directory</description>
66
		</property>
67
		<property>
68 29016 marek.hors
            <name>input_document_text_wos</name>
69
			<description>input document text directory holding WOS records</description>
70
        </property>
71
		<property>
72 20569 marek.hors
			<name>input_project</name>
73
			<description>input project directory</description>
74
		</property>
75
		<property>
76
			<name>input_person</name>
77
			<description>input person directory</description>
78
		</property>
79
		<property>
80 21313 marek.hors
			<name>input_dataset</name>
81
			<description>input dataset directory</description>
82
		</property>
83
		<property>
84 25563 marek.hors
			<name>input_extracted_document_metadata</name>
85
			<description>input extracted document metadata directory</description>
86 20664 marek.hors
		</property>
87
		<property>
88 28987 marek.hors
			<name>input_citation_pmc</name>
89
			<description>input directory holding citations extracted from PMC</description>
90 24465 marek.hors
		</property>
91 33184 marek.hors
		<property>
92
			<name>input_project_concept</name>
93
			<description>input project concept directory</description>
94
		</property>
95 24609 marek.hors
		<!-- citation matching related -->
96 24465 marek.hors
		<property>
97 21313 marek.hors
            <name>cit_genAuthorIdxJavaOpts</name>
98
            <value>-Xmx8g</value>
99
            <description>java opts for author index creation for citation purposes</description>
100
        </property>
101 24609 marek.hors
        <!-- document similarity related -->
102
        <property>
103
            <name>ds_parallel</name>
104 27909 marek.hors
            <value>20</value>
105 24609 marek.hors
            <description>document similarity pig parallel</description>
106
        </property>
107
        <property>
108 26161 marek.hors
            <name>ds_mapredChildJavaOpts</name>
109 27909 marek.hors
            <value>-Xmx20g</value>
110 26161 marek.hors
            <description>mapred child java opts</description>
111
        </property>
112
        <property>
113
            <name>ds_sample</name>
114 27551 marek.hors
            <value>1.0</value>
115 26161 marek.hors
            <description>sample rate</description>
116
        </property>
117
        <property>
118 24609 marek.hors
            <name>ds_removal_rate</name>
119 27994 marek.hors
            <value>0.99</value>
120 24609 marek.hors
            <description>document similarity removal rate</description>
121
        </property>
122
        <property>
123
            <name>ds_removal_least_used</name>
124 27551 marek.hors
            <value>20</value>
125 24609 marek.hors
            <description>document similarity least used removal</description>
126
        </property>
127 27551 marek.hors
        <property>
128
            <name>ds_tfidfTopnTermPerDocument</name>
129 27994 marek.hors
            <value>20</value>
130 27551 marek.hors
        </property>
131
        <property>
132
            <name>ds_similarityTopnDocumentPerDocument</name>
133
            <value>20</value>
134
        </property>
135 24465 marek.hors
		<!-- output ports -->
136 21313 marek.hors
		<property>
137 26238 marek.hors
			<name>output_document_to_project</name>
138
			<description>project reference extraction output directory</description>
139
		</property>
140
		<property>
141 33184 marek.hors
			<name>output_document_to_project_concepts</name>
142
			<description>document to project concepts output directory</description>
143
		</property>
144
		<property>
145 26238 marek.hors
			<name>output_document_to_dataset</name>
146
			<description>dataset reference extraction output directory</description>
147
		</property>
148
		<property>
149
			<name>output_document_to_research_initiatives</name>
150
			<description>research initiatives reference extraction output directory</description>
151
		</property>
152
		<property>
153 37432 marek.hors
			<name>output_document_to_pdb</name>
154
			<description>protein databank reference extraction output directory</description>
155
		</property>
156
		<property>
157 26238 marek.hors
			<name>output_document_to_document_classes</name>
158
			<description>output document classification directory</description>
159
		</property>
160
		<property>
161
			<name>output_citation</name>
162 29967 marek.hors
			<description>output containing grouped citations coming from citation matching and pmc ingestion</description>
163 26238 marek.hors
		</property>
164
		<property>
165 20569 marek.hors
			<name>output_document_similarity</name>
166
			<description>output document similarity directory</description>
167
		</property>
168
		<property>
169 26238 marek.hors
			<name>output_document_statistics</name>
170
			<description>output document statistics directory</description>
171 20569 marek.hors
		</property>
172
		<property>
173 26238 marek.hors
			<name>output_author_statistics</name>
174
			<description>output author statistics directory</description>
175 20569 marek.hors
		</property>
176
		<property>
177 25414 marek.hors
			<name>output_project_statistics</name>
178
			<description>output project statistics directory</description>
179
		</property>
180 20569 marek.hors
	</parameters>
181 20051 marek.hors
182 26347 marek.hors
	<global>
183
        <job-tracker>${jobTracker}</job-tracker>
184
        <name-node>${nameNode}</name-node>
185
        <configuration>
186
            <property>
187
                <name>mapred.job.queue.name</name>
188
                <value>${queueName}</value>
189
            </property>
190
		</configuration>
191
	</global>
192
193 25563 marek.hors
	<start to="forking" />
194 19628 marek.hors
195
    <fork name="forking">
196 23446 marek.hors
    	<path start="decision-referenceextraction_project"/>
197
    	<path start="decision-referenceextraction_dataset"/>
198 24851 marek.hors
    	<path start="decision-referenceextraction_researchinitiative"/>
199 37432 marek.hors
    	<path start="decision-referenceextraction_pdb"/>
200 19628 marek.hors
        <path start="transformers_metadatamerger"/>
201
    </fork>
202
203 23446 marek.hors
    <!-- start of project reference extraction block -->
204
    <decision name="decision-referenceextraction_project">
205
        <switch>
206 23861 marek.hors
            <case to="referenceextraction_project">${active_referenceextraction_project eq "true"}</case>
207 23525 marek.hors
            <default to="skip-referenceextraction_project"/>
208 23446 marek.hors
        </switch>
209
    </decision>
210
211 19980 marek.hors
    <action name="referenceextraction_project">
212
	    <sub-workflow>
213
            <app-path>${wf:appPath()}/referenceextraction_project</app-path>
214
            <propagate-configuration/>
215
            <configuration>
216
            	<property>
217
                    <name>workingDir</name>
218
                    <value>${workingDir}/referenceextraction_project/working_dir</value>
219
                </property>
220
            	<property>
221
					<name>input_document_text</name>
222 25563 marek.hors
					<value>${input_document_text}</value>
223 19980 marek.hors
				</property>
224
				<property>
225
					<name>input_project</name>
226 20051 marek.hors
					<value>${input_project}</value>
227 19980 marek.hors
				</property>
228
				<property>
229 20186 marek.hors
					<name>output_document_to_project</name>
230 19980 marek.hors
					<!-- referenceextraction_project directory is created at subworkflow prepare phase -->
231 26238 marek.hors
					<value>${output_document_to_project}</value>
232 19980 marek.hors
				</property>
233
            </configuration>
234
        </sub-workflow>
235 33184 marek.hors
		<ok to="transformers_project_toconcept"/>
236
		<error to="fail" />
237
    </action>
238
239
    <action name="transformers_project_toconcept">
240
	    <sub-workflow>
241
            <app-path>${wf:appPath()}/transformers_project_toconcept</app-path>
242
            <propagate-configuration/>
243
            <configuration>
244
            	<property>
245
                    <name>workingDir</name>
246
                    <value>${workingDir}/transformers_project_toconcept/working_dir</value>
247
                </property>
248
            	<property>
249
					<name>input_document_to_project</name>
250
					<value>${output_document_to_project}</value>
251
				</property>
252
				<property>
253
					<name>input_project</name>
254
					<value>${input_project}</value>
255
				</property>
256
				<property>
257
					<name>input_concept</name>
258
					<value>${input_project_concept}</value>
259
				</property>
260
				<property>
261
					<name>output</name>
262
					<value>${workingDir}/transformers_project_toconcept/out</value>
263
				</property>
264
            </configuration>
265
        </sub-workflow>
266
		<ok to="transformers_concept_to_researchinitiatives"/>
267
		<error to="fail" />
268
    </action>
269
270
    <action name="transformers_concept_to_researchinitiatives">
271
	    <sub-workflow>
272
            <app-path>${wf:appPath()}/transformers_export_researchinitiatives</app-path>
273
            <propagate-configuration/>
274
            <configuration>
275
            	<property>
276
                    <name>workingDir</name>
277
                    <value>${workingDir}/transformers_concept_to_researchinitiatives/working_dir</value>
278
                </property>
279
            	<property>
280
					<name>input_document_to_research_initiative</name>
281
					<value>${workingDir}/transformers_project_toconcept/out</value>
282
				</property>
283
				<property>
284
					<name>output_document_to_research_initiatives</name>
285
					<value>${output_document_to_project_concepts}</value>
286
				</property>
287
            </configuration>
288
        </sub-workflow>
289 19980 marek.hors
		<ok to="joining"/>
290
		<error to="fail" />
291
    </action>
292
293 23446 marek.hors
    <action name="skip-referenceextraction_project">
294
        <java>
295
			<prepare>
296
				<!-- notice: directory have to aligned with skipped action output -->
297
				<delete path="${nameNode}${workingDir}/referenceextraction_project" />
298 33184 marek.hors
				<delete path="${nameNode}${workingDir}/transformers_project_toconcept" />
299
				<delete path="${nameNode}${workingDir}/transformers_concept_to_researchinitiatives" />
300 26238 marek.hors
				<delete path="${nameNode}${output_document_to_project}"/>
301 33184 marek.hors
				<delete path="${nameNode}${output_document_to_project_concepts}"/>
302 34702 marek.hors
				<mkdir path="${nameNode}${workingDir}/referenceextraction_project" />
303 26238 marek.hors
				<mkdir path="${nameNode}${output_document_to_project}"/>
304 33184 marek.hors
				<mkdir path="${nameNode}${output_document_to_project_concepts}"/>
305 23446 marek.hors
			</prepare>
306
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
307
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
308
            <arg>-C{referenceextraction_project,
309
				eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject,
310
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
311 33184 marek.hors
            <arg>-C{document_to_project_concepts,
312 33249 marek.hors
				eu.dnetlib.iis.export.schemas.DocumentToConceptIds,
313 33184 marek.hors
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
314 23446 marek.hors
            <!-- notice: directory have to aligned with skipped action output -->
315 26238 marek.hors
            <arg>-Oreferenceextraction_project=${output_document_to_project}</arg>
316 33184 marek.hors
            <arg>-Odocument_to_project_concepts=${output_document_to_project_concepts}</arg>
317 23446 marek.hors
        </java>
318
        <ok to="joining"/>
319
        <error to="fail"/>
320
    </action>
321
    <!-- end of project reference extraction block -->
322
323
    <!-- start of dataset reference extraction block -->
324
    <decision name="decision-referenceextraction_dataset">
325
        <switch>
326 23861 marek.hors
            <case to="referenceextraction_dataset">${active_referenceextraction_dataset eq "true"}</case>
327 23525 marek.hors
            <default to="skip-referenceextraction_dataset"/>
328 23446 marek.hors
        </switch>
329
    </decision>
330
331 21313 marek.hors
    <action name="referenceextraction_dataset">
332
	    <sub-workflow>
333
            <app-path>${wf:appPath()}/referenceextraction_dataset</app-path>
334
            <propagate-configuration/>
335
            <configuration>
336
            	<property>
337
                    <name>workingDir</name>
338
                    <value>${workingDir}/referenceextraction_dataset/working_dir</value>
339
                </property>
340
            	<property>
341
					<name>input_document_text</name>
342 25563 marek.hors
					<value>${input_document_text}</value>
343 21313 marek.hors
				</property>
344
				<property>
345
					<name>input_dataset</name>
346
					<value>${input_dataset}</value>
347
				</property>
348
				<property>
349
					<name>output_document_to_dataset</name>
350
					<!-- referenceextraction_dataset directory is created at subworkflow prepare phase -->
351 26238 marek.hors
					<value>${output_document_to_dataset}</value>
352 21313 marek.hors
				</property>
353
            </configuration>
354
        </sub-workflow>
355
		<ok to="joining"/>
356
		<error to="fail" />
357
    </action>
358 19980 marek.hors
359 23446 marek.hors
    <action name="skip-referenceextraction_dataset">
360
        <java>
361
			<prepare>
362
				<!-- notice: directory have to aligned with skipped action output -->
363
				<delete path="${nameNode}${workingDir}/referenceextraction_dataset" />
364 26238 marek.hors
				<delete path="${nameNode}${output_document_to_dataset}"/>
365 34702 marek.hors
				<mkdir path="${nameNode}${workingDir}/referenceextraction_dataset" />
366 26238 marek.hors
				<mkdir path="${nameNode}${output_document_to_dataset}"/>
367 23446 marek.hors
			</prepare>
368
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
369
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
370
            <arg>-C{referenceextraction_dataset,
371
				eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet,
372
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
373
            <!-- notice: directory have to aligned with skipped action output -->
374 26238 marek.hors
            <arg>-Oreferenceextraction_dataset=${output_document_to_dataset}</arg>
375 23446 marek.hors
        </java>
376
        <ok to="joining"/>
377
        <error to="fail"/>
378
    </action>
379
    <!-- end of dataset reference extraction block -->
380
381 24851 marek.hors
    <!-- start of researchinitiative reference extraction block -->
382
    <decision name="decision-referenceextraction_researchinitiative">
383
        <switch>
384 29016 marek.hors
            <case to="referenceextraction_researchinitiative_collapser">${active_referenceextraction_researchinitiative eq "true"}</case>
385 24851 marek.hors
            <default to="skip-referenceextraction_researchinitiative"/>
386
        </switch>
387
    </decision>
388
389 29016 marek.hors
    <action name="referenceextraction_researchinitiative_collapser">
390
        <sub-workflow>
391
            <app-path>${wf:appPath()}/collapsers_multiple_input_collapser</app-path>
392
            <propagate-configuration/>
393
            <configuration>
394
                <property>
395
                    <name>workingDir</name>
396
                    <value>${workingDir}/referenceextraction_researchinitiative_collapser/working_dir</value>
397
                </property>
398
                <property>
399
                    <name>origin_1</name>
400
                    <value>document_text</value>
401
                </property>
402
                <property>
403
                    <name>input_1</name>
404
                    <value>${input_document_text}</value>
405
                </property>
406
                <property>
407
                    <name>origin_2</name>
408
                    <value>document_text_wos</value>
409
                </property>
410
                <property>
411
                    <name>input_2</name>
412
                    <value>${input_document_text_wos}</value>
413
                </property>
414
                <property>
415
                    <name>blocking_field</name>
416
                    <value>id</value>
417
                </property>
418
                <property>
419
                    <name>schema_input</name>
420
                    <value>eu.dnetlib.iis.metadataextraction.schemas.DocumentText</value>
421
                </property>
422
                <property>
423
                    <name>output</name>
424
                    <value>${workingDir}/referenceextraction_researchinitiative_collapser/output</value>
425
                </property>
426
                <property>
427
                    <name>schema_input_envelope</name>
428
                    <value>eu.dnetlib.iis.collapsers.schemas.DocumentTextEnvelope</value>
429
                </property>
430
                <property>
431
                    <name>record_collapser</name>
432
                    <value>eu.dnetlib.iis.collapsers.origins.DocumentTextCollapser</value>
433
        		</property>
434
            </configuration>
435
        </sub-workflow>
436
        <ok to="referenceextraction_researchinitiative"/>
437
        <error to="fail"/>
438
    </action>
439
440 24851 marek.hors
    <action name="referenceextraction_researchinitiative">
441
	    <sub-workflow>
442
            <app-path>${wf:appPath()}/referenceextraction_researchinitiative</app-path>
443
            <propagate-configuration/>
444
            <configuration>
445
            	<property>
446
                    <name>workingDir</name>
447
                    <value>${workingDir}/referenceextraction_researchinitiative/working_dir</value>
448
                </property>
449
            	<property>
450
					<name>input_document_text</name>
451 29016 marek.hors
					<value>${workingDir}/referenceextraction_researchinitiative_collapser/output</value>
452 24851 marek.hors
				</property>
453
				<property>
454
					<name>output_document_to_research_initiative</name>
455 26238 marek.hors
					<value>${workingDir}/referenceextraction_researchinitiative/output</value>
456 24851 marek.hors
				</property>
457
            </configuration>
458
        </sub-workflow>
459 26238 marek.hors
		<ok to="transformers_export_researchinitiatives"/>
460
		<error to="fail" />
461
    </action>
462
463
    <action name="transformers_export_researchinitiatives">
464
	    <sub-workflow>
465
            <app-path>${wf:appPath()}/transformers_export_researchinitiatives</app-path>
466
            <propagate-configuration/>
467
            <configuration>
468
            	<property>
469
                    <name>workingDir</name>
470
                    <value>${workingDir}/transformers_export_researchinitiatives/working_dir</value>
471
                </property>
472
            	<property>
473
					<name>input_document_to_research_initiative</name>
474
					<value>${workingDir}/referenceextraction_researchinitiative/output</value>
475
				</property>
476
				<property>
477
					<name>output_document_to_research_initiatives</name>
478
					<value>${output_document_to_research_initiatives}</value>
479
				</property>
480
            </configuration>
481
        </sub-workflow>
482
483 24851 marek.hors
		<ok to="joining"/>
484
		<error to="fail" />
485
    </action>
486
487
    <action name="skip-referenceextraction_researchinitiative">
488
        <java>
489
			<prepare>
490
				<!-- notice: directory have to aligned with skipped action output -->
491 26238 marek.hors
				<delete path="${nameNode}${workingDir}/transformers_export_researchinitiatives" />
492
				<delete path="${nameNode}${output_document_to_research_initiatives}"/>
493 34702 marek.hors
				<mkdir path="${nameNode}${workingDir}/transformers_export_researchinitiatives" />
494 26238 marek.hors
				<mkdir path="${nameNode}${output_document_to_research_initiatives}"/>
495 24851 marek.hors
			</prepare>
496
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
497
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
498 26238 marek.hors
            <arg>-C{referenceextraction_researchinitiatives,
499 33249 marek.hors
				eu.dnetlib.iis.export.schemas.DocumentToConceptIds,
500 24851 marek.hors
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
501
            <!-- notice: directory have to aligned with skipped action output -->
502 26238 marek.hors
            <arg>-Oreferenceextraction_researchinitiatives=${output_document_to_research_initiatives}</arg>
503 24851 marek.hors
        </java>
504
        <ok to="joining"/>
505
        <error to="fail"/>
506
    </action>
507
    <!-- end of researchinitiative reference extraction block -->
508
509 37432 marek.hors
	<!-- start of pdb reference extraction block -->
510
    <decision name="decision-referenceextraction_pdb">
511
        <switch>
512
            <case to="referenceextraction_pdb">${active_referenceextraction_pdb eq "true"}</case>
513
            <default to="skip-referenceextraction_pdb"/>
514
        </switch>
515
    </decision>
516
517
    <action name="referenceextraction_pdb">
518
	    <sub-workflow>
519
            <app-path>${wf:appPath()}/referenceextraction_pdb</app-path>
520
            <propagate-configuration/>
521
            <configuration>
522
            	<property>
523
                    <name>workingDir</name>
524
                    <value>${workingDir}/referenceextraction_pdb/working_dir</value>
525
                </property>
526
            	<property>
527
					<name>input_document_text</name>
528
					<value>${input_document_text}</value>
529
				</property>
530
				<property>
531
					<name>output</name>
532
					<value>${workingDir}/referenceextraction_pdb/output</value>
533
				</property>
534
            </configuration>
535
        </sub-workflow>
536
		<ok to="transformers_export_pdb"/>
537
		<error to="fail" />
538
    </action>
539
540
    <action name="transformers_export_pdb">
541
	    <sub-workflow>
542
            <app-path>${wf:appPath()}/transformers_export_researchinitiatives</app-path>
543
            <propagate-configuration/>
544
            <configuration>
545
            	<property>
546
                    <name>workingDir</name>
547
                    <value>${workingDir}/transformers_export_pdb/working_dir</value>
548
                </property>
549
            	<property>
550
					<name>input_document_to_research_initiative</name>
551
					<value>${workingDir}/referenceextraction_pdb/output</value>
552
				</property>
553
				<property>
554
					<name>output_document_to_research_initiatives</name>
555
					<value>${output_document_to_pdb}</value>
556
				</property>
557
            </configuration>
558
        </sub-workflow>
559
560
		<ok to="joining"/>
561
		<error to="fail" />
562
    </action>
563
564
    <action name="skip-referenceextraction_pdb">
565
        <java>
566
			<prepare>
567
				<!-- notice: directory have to aligned with skipped action output -->
568
				<delete path="${nameNode}${workingDir}/transformers_export_pdb" />
569
				<delete path="${nameNode}${output_document_to_pdb}"/>
570
				<mkdir path="${nameNode}${workingDir}/transformers_export_pdb" />
571
				<mkdir path="${nameNode}${output_document_to_pdb}"/>
572
			</prepare>
573
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
574
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
575
            <arg>-C{referenceextraction_pdb,
576
				eu.dnetlib.iis.export.schemas.DocumentToConceptIds,
577
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
578
            <!-- notice: directory have to aligned with skipped action output -->
579
            <arg>-Oreferenceextraction_pdb=${output_document_to_pdb}</arg>
580
        </java>
581
        <ok to="joining"/>
582
        <error to="fail"/>
583
    </action>
584
    <!-- end of pdb reference extraction block -->
585
586 23527 marek.hors
    <!-- metadatamerger branch -->
587 19628 marek.hors
    <action name="transformers_metadatamerger">
588
	    <sub-workflow>
589
            <app-path>${wf:appPath()}/transformers_metadatamerger</app-path>
590
            <propagate-configuration/>
591
            <configuration>
592
            	<property>
593 19980 marek.hors
                    <name>workingDir</name>
594
                    <value>${workingDir}/transformers_metadatamerger/working_dir</value>
595
                </property>
596
            	<property>
597 19628 marek.hors
					<name>input_base_metadata</name>
598 20051 marek.hors
					<value>${input_document_metadata}</value>
599 19628 marek.hors
				</property>
600
				<property>
601
					<name>input_extracted_metadata</name>
602 25563 marek.hors
					<value>${input_extracted_document_metadata}</value>
603 19628 marek.hors
				</property>
604
				<property>
605
					<name>output_merged_metadata</name>
606 26567 marek.hors
					<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value>
607 19628 marek.hors
				</property>
608
            </configuration>
609
        </sub-workflow>
610 22848 marek.hors
611 27714 marek.hors
		<ok to="decision-documentsclassification"/>
612 19628 marek.hors
		<error to="fail" />
613
    </action>
614
615 23446 marek.hors
    <!-- start of documents classification part -->
616
    <decision name="decision-documentsclassification">
617
        <switch>
618 23861 marek.hors
            <case to="transformers_documentsclassification">${active_documentsclassification eq "true"}</case>
619 23525 marek.hors
            <default to="skip-documentsclassification"/>
620 23446 marek.hors
        </switch>
621
    </decision>
622
623 22402 marek.hors
    <action name="transformers_documentsclassification">
624
	    <sub-workflow>
625
            <app-path>${wf:appPath()}/transformers_documentsclassification</app-path>
626
            <propagate-configuration/>
627
            <configuration>
628
            	<property>
629
                    <name>workingDir</name>
630
                    <value>${workingDir}/transformers_documentsclassification/working_dir</value>
631
                </property>
632
            	<property>
633
					<name>input_merged_metadata</name>
634 26567 marek.hors
					<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value>
635 22402 marek.hors
				</property>
636
				<property>
637
					<name>output_document_metadata</name>
638 26567 marek.hors
					<value>${workingDir}/transformers_documentsclassification/output_document_metadata</value>
639 22402 marek.hors
				</property>
640
            </configuration>
641
        </sub-workflow>
642
		<ok to="documentsclassification_main"/>
643
		<error to="fail" />
644
    </action>
645
646
    <action name="documentsclassification_main">
647
	    <sub-workflow>
648
            <app-path>${wf:appPath()}/documentsclassification_main</app-path>
649
            <propagate-configuration/>
650
            <configuration>
651
            	<property>
652
                    <name>workingDir</name>
653
                    <value>${workingDir}/documentsclassification_main/working_dir</value>
654
                </property>
655
            	<property>
656
					<name>input_document_metadata</name>
657 26567 marek.hors
					<value>${workingDir}/transformers_documentsclassification/output_document_metadata</value>
658 22402 marek.hors
				</property>
659
				<property>
660
					<name>output_document_to_document_classes</name>
661 26238 marek.hors
					<value>${output_document_to_document_classes}</value>
662 22402 marek.hors
				</property>
663
            </configuration>
664
        </sub-workflow>
665 27714 marek.hors
		<ok to="joining"/>
666 22402 marek.hors
		<error to="fail" />
667
    </action>
668
669 23446 marek.hors
    <action name="skip-documentsclassification">
670
        <java>
671
			<prepare>
672
				<!-- notice: directory have to aligned with skipped action output -->
673
				<delete path="${nameNode}${workingDir}/documentsclassification_main" />
674 26238 marek.hors
				<delete path="${nameNode}${output_document_to_document_classes}"/>
675 34702 marek.hors
				<mkdir path="${nameNode}${workingDir}/documentsclassification_main" />
676 26238 marek.hors
				<mkdir path="${nameNode}${output_document_to_document_classes}"/>
677 23446 marek.hors
			</prepare>
678
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
679
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
680
            <arg>-C{documentsclassification,
681
				eu.dnetlib.iis.documentsclassification.schemas.DocumentToDocumentClasses,
682
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
683
            <!-- notice: directory have to aligned with skipped action output -->
684 26238 marek.hors
            <arg>-Odocumentsclassification=${output_document_to_document_classes}</arg>
685 23446 marek.hors
        </java>
686 27714 marek.hors
        <ok to="joining"/>
687 23446 marek.hors
        <error to="fail"/>
688
    </action>
689
    <!-- end of documents classification part -->
690
691 27714 marek.hors
    <join name="joining" to="decision-citationmatching"/>
692
693 24465 marek.hors
    <!-- citation matching part -->
694
    <decision name="decision-citationmatching">
695
        <switch>
696
            <case to="transformers_citationmatching">${active_citationmatching eq "true"}</case>
697
            <default to="skip-citationmatching"/>
698
        </switch>
699
    </decision>
700
701 20623 marek.hors
    <action name="transformers_citationmatching">
702 19628 marek.hors
	    <sub-workflow>
703 20623 marek.hors
            <app-path>${wf:appPath()}/transformers_citationmatching</app-path>
704 19628 marek.hors
            <propagate-configuration/>
705
            <configuration>
706
            	<property>
707 19980 marek.hors
                    <name>workingDir</name>
708 20623 marek.hors
                    <value>${workingDir}/transformers_citationmatching/working_dir</value>
709 19980 marek.hors
                </property>
710
            	<property>
711 19628 marek.hors
					<name>input_metadata</name>
712 26567 marek.hors
					<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value>
713 19628 marek.hors
				</property>
714
				<property>
715
					<name>input_person</name>
716 20051 marek.hors
					<value>${input_person}</value>
717 19628 marek.hors
				</property>
718
				<property>
719
					<name>output_citation_metadata</name>
720 26567 marek.hors
					<value>${workingDir}/transformers_citationmatching/output_citation_metadata</value>
721 19628 marek.hors
				</property>
722
            </configuration>
723
        </sub-workflow>
724
		<ok to="citationmatching_chain"/>
725
		<error to="fail" />
726
    </action>
727
728
    <action name="citationmatching_chain">
729
	    <sub-workflow>
730
            <app-path>${wf:appPath()}/citationmatching_chain</app-path>
731
            <propagate-configuration/>
732
            <configuration>
733
            	<property>
734 19980 marek.hors
                    <name>workingDir</name>
735
                    <value>${workingDir}/citationmatching_chain/working_dir</value>
736
                </property>
737
            	<property>
738 19628 marek.hors
					<name>input</name>
739 26567 marek.hors
					<value>${workingDir}/transformers_citationmatching/output_citation_metadata</value>
740 19628 marek.hors
				</property>
741
				<property>
742
					<name>output</name>
743 29967 marek.hors
					<value>${workingDir}/citationmatching_chain/output</value>
744 19628 marek.hors
				</property>
745 21313 marek.hors
				<property>
746
            		<name>cit_genAuthorIdxJavaOpts</name>
747
        		    <value>${cit_genAuthorIdxJavaOpts}</value>
748
		        </property>
749 19628 marek.hors
            </configuration>
750
        </sub-workflow>
751 27714 marek.hors
		<ok to="decision-documentssimilarity"/>
752 19628 marek.hors
		<error to="fail" />
753
    </action>
754 25765 marek.hors
755
    <action name="skip-citationmatching">
756
        <java>
757
			<prepare>
758
				<!-- notice: directory have to aligned with skipped action output -->
759
				<delete path="${nameNode}${workingDir}/citationmatching_chain" />
760 34702 marek.hors
				<mkdir path="${nameNode}${workingDir}/citationmatching_chain" />
761 25765 marek.hors
			</prepare>
762
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
763
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
764
            <arg>-C{citation,
765
				eu.dnetlib.iis.citationmatching.schemas.Citation,
766
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
767
            <!-- notice: directory have to aligned with skipped action output -->
768 29967 marek.hors
            <arg>-Ocitation=${workingDir}/citationmatching_chain/output</arg>
769 25765 marek.hors
        </java>
770 27714 marek.hors
        <ok to="decision-documentssimilarity"/>
771 25765 marek.hors
        <error to="fail"/>
772
    </action>
773
    <!-- end of citation matching part -->
774 19628 marek.hors
775 27530 marek.hors
    <!-- start of documents similarity part -->
776
    <!-- running documentsimilarity sequentially to all the other KDM modules
777
    	due to the lack of memory when executed in parallel -->
778
    <decision name="decision-documentssimilarity">
779
        <switch>
780
            <case to="transformers_documentssimilarity">${active_documentssimilarity eq "true"}</case>
781
            <default to="skip-documentssimilarity"/>
782
        </switch>
783
    </decision>
784
785
    <action name="transformers_documentssimilarity">
786
	    <sub-workflow>
787
            <app-path>${wf:appPath()}/transformers_documentssimilarity</app-path>
788
            <propagate-configuration/>
789
            <configuration>
790
            	<property>
791
                    <name>workingDir</name>
792
                    <value>${workingDir}/transformers_documentssimilarity/working_dir</value>
793
                </property>
794
            	<property>
795
					<name>input_person</name>
796
					<value>${input_person}</value>
797
				</property>
798
				<property>
799
					<name>input_metadata</name>
800
					<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value>
801
				</property>
802
				<property>
803
					<name>output_document_metadata</name>
804
					<value>${workingDir}/transformers_documentssimilarity/output_document_metadata</value>
805
				</property>
806
            </configuration>
807
        </sub-workflow>
808
		<ok to="documentssimilarity_chain"/>
809
		<error to="fail" />
810
    </action>
811
812
    <action name="documentssimilarity_chain">
813
	    <sub-workflow>
814
            <app-path>${wf:appPath()}/documentssimilarity_chain</app-path>
815
            <propagate-configuration/>
816
            <configuration>
817
            	<property>
818
                    <name>workingDir</name>
819
                    <value>${workingDir}/documentssimilarity_chain/working_dir</value>
820
                </property>
821
            	<property>
822
					<name>input_document</name>
823
					<value>${workingDir}/transformers_documentssimilarity/output_document_metadata</value>
824
				</property>
825
				<property>
826
					<name>output_documents_similarity</name>
827
					<value>${output_document_similarity}</value>
828
				</property>
829
				<property>
830 27551 marek.hors
		            <name>parallel</name>
831 27530 marek.hors
		            <value>${ds_parallel}</value>
832
		        </property>
833
		        <property>
834 27551 marek.hors
		            <name>mapredChildJavaOpts</name>
835 27530 marek.hors
		            <value>${ds_mapredChildJavaOpts}</value>
836
		        </property>
837
		        <property>
838 27551 marek.hors
		            <name>sample</name>
839 27530 marek.hors
		            <value>${ds_sample}</value>
840
		        </property>
841
		        <property>
842 27551 marek.hors
		            <name>removal_rate</name>
843 27530 marek.hors
		            <value>${ds_removal_rate}</value>
844
		        </property>
845
		        <property>
846 27551 marek.hors
		            <name>removal_least_used</name>
847 27530 marek.hors
		            <value>${ds_removal_least_used}</value>
848
		        </property>
849 27551 marek.hors
		        <property>
850
		            <name>tfidfTopnTermPerDocument</name>
851
		            <value>${ds_tfidfTopnTermPerDocument}</value>
852
		        </property>
853
		        <property>
854
		            <name>similarityTopnDocumentPerDocument</name>
855
		            <value>${ds_similarityTopnDocumentPerDocument}</value>
856
		        </property>
857 27530 marek.hors
            </configuration>
858
        </sub-workflow>
859 27714 marek.hors
		<ok to="decision-statistics"/>
860 27530 marek.hors
		<error to="fail" />
861
    </action>
862
863
    <action name="skip-documentssimilarity">
864
        <java>
865
			<prepare>
866
				<!-- notice: directory have to aligned with skipped action output -->
867
				<delete path="${nameNode}${workingDir}/documentssimilarity_chain" />
868
				<delete path="${nameNode}${output_document_similarity}" />
869 34702 marek.hors
				<mkdir path="${nameNode}${workingDir}/documentssimilarity_chain" />
870 27530 marek.hors
				<mkdir path="${nameNode}${output_document_similarity}" />
871
			</prepare>
872
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
873
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
874
            <arg>-C{documentssimilarity,
875
				eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity,
876
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
877
            <!-- notice: directory have to aligned with skipped action output -->
878
            <arg>-Odocumentssimilarity=${output_document_similarity}</arg>
879
        </java>
880 27714 marek.hors
        <ok to="decision-statistics"/>
881 27530 marek.hors
        <error to="fail"/>
882
    </action>
883
    <!-- end of documents similarity part -->
884 27714 marek.hors
885 27530 marek.hors
    <!-- statistics are calculated at the end, because they are taking two forked paths
886
    outcome into account: transformers_metadatamerger and referenceextraction_project -->
887 25765 marek.hors
    <!-- statistics part -->
888
    <decision name="decision-statistics">
889
        <switch>
890
            <case to="transformers_statistics">${active_statistics eq "true"}</case>
891
            <default to="skip-statistics"/>
892
        </switch>
893
    </decision>
894
895 27530 marek.hors
    <action name="transformers_statistics">
896 19628 marek.hors
	    <sub-workflow>
897 24465 marek.hors
            <app-path>${wf:appPath()}/transformers_statistics</app-path>
898 19628 marek.hors
            <propagate-configuration/>
899
            <configuration>
900
            	<property>
901 19980 marek.hors
                    <name>workingDir</name>
902 24465 marek.hors
                    <value>${workingDir}/transformers_statistics/working_dir</value>
903 19980 marek.hors
                </property>
904
            	<property>
905 19628 marek.hors
					<name>input_document</name>
906 26567 marek.hors
					<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value>
907 19628 marek.hors
				</property>
908
				<property>
909
					<name>input_citation</name>
910 29967 marek.hors
					<value>${workingDir}/citationmatching_chain/output</value>
911 19628 marek.hors
				</property>
912
				<property>
913 24465 marek.hors
					<!-- NOTICE: reference extraction will have to be enabled to get this input -->
914
					<name>input_document_to_project</name>
915 26238 marek.hors
					<value>${output_document_to_project}</value>
916 24465 marek.hors
				</property>
917
				<property>
918 20253 marek.hors
					<name>input_person</name>
919
					<value>${input_person}</value>
920
				</property>
921
				<property>
922 24465 marek.hors
					<name>input_project</name>
923
					<value>${input_project}</value>
924
				</property>
925
				<property>
926 19628 marek.hors
					<name>output_document_authors_citations</name>
927 26567 marek.hors
					<value>${workingDir}/transformers_statistics/output_document_authors_citations</value>
928 19628 marek.hors
				</property>
929 20253 marek.hors
				<property>
930
					<name>output_person_id</name>
931 26567 marek.hors
					<value>${workingDir}/transformers_statistics/output_person_id</value>
932 20253 marek.hors
				</property>
933 24465 marek.hors
				<property>
934
					<name>output_project_id</name>
935 26567 marek.hors
					<value>${workingDir}/transformers_statistics/output_project_id</value>
936 24465 marek.hors
				</property>
937 19628 marek.hors
            </configuration>
938
        </sub-workflow>
939 25765 marek.hors
		<ok to="statistics"/>
940 19628 marek.hors
		<error to="fail" />
941
    </action>
942
943 24465 marek.hors
    <action name="statistics">
944 19628 marek.hors
	    <sub-workflow>
945 24465 marek.hors
            <app-path>${wf:appPath()}/statistics</app-path>
946 19628 marek.hors
            <propagate-configuration/>
947
            <configuration>
948
            	<property>
949 19980 marek.hors
                    <name>workingDir</name>
950 24465 marek.hors
                    <value>${workingDir}/statistics/working_dir</value>
951 19980 marek.hors
                </property>
952
            	<property>
953 24465 marek.hors
					<name>input_document_authors_citations</name>
954 26567 marek.hors
					<value>${workingDir}/transformers_statistics/output_document_authors_citations</value>
955 19628 marek.hors
				</property>
956
				<property>
957 20253 marek.hors
					<name>input_person_id</name>
958 26567 marek.hors
					<value>${workingDir}/transformers_statistics/output_person_id</value>
959 19628 marek.hors
				</property>
960
				<property>
961 24465 marek.hors
					<name>input_project_id</name>
962 26567 marek.hors
					<value>${workingDir}/transformers_statistics/output_project_id</value>
963 19628 marek.hors
				</property>
964
				<property>
965 24465 marek.hors
					<name>output_document_statistics</name>
966 26286 marek.hors
					<value>${output_document_statistics}</value>
967 19628 marek.hors
				</property>
968 24465 marek.hors
				<property>
969
					<name>output_author_statistics</name>
970 26238 marek.hors
					<value>${output_author_statistics}</value>
971 24465 marek.hors
				</property>
972
				<property>
973
					<name>output_project_statistics</name>
974 25414 marek.hors
					<value>${output_project_statistics}</value>
975 24465 marek.hors
				</property>
976
				<property>
977
					<name>output_global_statistics</name>
978 26567 marek.hors
					<value>${workingDir}/statistics/output_global_statistics</value>
979 24465 marek.hors
				</property>
980 19628 marek.hors
            </configuration>
981
        </sub-workflow>
982 29967 marek.hors
		<ok to="transformers_citations_from_matching"/>
983 19628 marek.hors
		<error to="fail" />
984
    </action>
985 19980 marek.hors
986 24465 marek.hors
    <action name="skip-statistics">
987
        <java>
988
			<prepare>
989
				<!-- notice: directory have to aligned with skipped action output -->
990
				<delete path="${nameNode}${workingDir}/statistics" />
991 26286 marek.hors
				<delete path="${nameNode}${output_document_statistics}"/>
992 26238 marek.hors
				<delete path="${nameNode}${output_author_statistics}"/>
993
				<delete path="${nameNode}${output_project_statistics}"/>
994 34702 marek.hors
				<mkdir path="${nameNode}${workingDir}/statistics" />
995 26286 marek.hors
				<mkdir path="${nameNode}${output_document_statistics}"/>
996 26238 marek.hors
				<mkdir path="${nameNode}${output_author_statistics}"/>
997
				<mkdir path="${nameNode}${output_project_statistics}"/>
998 24465 marek.hors
			</prepare>
999
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
1000
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
1001
            <arg>-C{document_statistics,
1002
				eu.dnetlib.iis.statistics.schemas.DocumentToDocumentStatistics,
1003
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
1004
			<arg>-C{author_statistics,
1005
				eu.dnetlib.iis.statistics.schemas.AuthorToAuthorStatistics,
1006
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
1007
			<arg>-C{project_statistics,
1008
				eu.dnetlib.iis.statistics.schemas.ProjectToProjectStatistics,
1009
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
1010
			<!-- FIXME currently global statistics are not available -->
1011
			<!--
1012
			<arg>-C{global_statistics,
1013
				eu.dnetlib.iis.statistics.schemas.,
1014
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
1015
			-->
1016
            <!-- notice: directory have to aligned with skipped action output -->
1017 26286 marek.hors
            <arg>-Odocument_statistics=${output_document_statistics}</arg>
1018 26238 marek.hors
            <arg>-Oauthor_statistics=${output_author_statistics}</arg>
1019 25414 marek.hors
            <arg>-Oproject_statistics=${output_project_statistics}</arg>
1020 24465 marek.hors
            <!-- FIXME currently global statistics are not available -->
1021
            <!--
1022 26567 marek.hors
            <arg>-Oglobal_statistics=${workingDir}/statistics/output_global_statistics</arg>
1023 24465 marek.hors
             -->
1024
        </java>
1025 29967 marek.hors
        <ok to="transformers_citations_from_matching"/>
1026 24465 marek.hors
        <error to="fail"/>
1027
    </action>
1028
    <!-- end of statistics part -->
1029
1030 29967 marek.hors
    <!-- normalize and group citations part -->
1031
    <action name="transformers_citations_from_matching">
1032
	    <sub-workflow>
1033
            <app-path>${wf:appPath()}/transformers_citations_from_matching</app-path>
1034
            <propagate-configuration/>
1035
            <configuration>
1036
            	<property>
1037
                    <name>workingDir</name>
1038
                    <value>${workingDir}/transformers_citations_from_matching/working_dir</value>
1039
                </property>
1040
            	<property>
1041
					<name>input</name>
1042
					<value>${workingDir}/citationmatching_chain/output</value>
1043
				</property>
1044
				<property>
1045
					<name>output</name>
1046
					<value>${workingDir}/transformers_citations_from_matching/output</value>
1047
				</property>
1048
            </configuration>
1049
        </sub-workflow>
1050
		<ok to="transformers_citations_from_ingestpmc"/>
1051
		<error to="fail" />
1052
    </action>
1053
1054
    <action name="transformers_citations_from_ingestpmc">
1055
	    <sub-workflow>
1056
            <app-path>${wf:appPath()}/transformers_citations_from_ingestpmc</app-path>
1057
            <propagate-configuration/>
1058
            <configuration>
1059
            	<property>
1060
                    <name>workingDir</name>
1061
                    <value>${workingDir}/transformers_citations_from_ingestpmc/working_dir</value>
1062
                </property>
1063
            	<property>
1064
					<name>input</name>
1065
					<value>${input_citation_pmc}</value>
1066
				</property>
1067
				<property>
1068
					<name>output</name>
1069
					<value>${workingDir}/transformers_citations_from_ingestpmc/output</value>
1070
				</property>
1071
            </configuration>
1072
        </sub-workflow>
1073 33098 marek.hors
		<ok to="citations_collapser"/>
1074 29967 marek.hors
		<error to="fail" />
1075
    </action>
1076
1077 33098 marek.hors
    <action name="citations_collapser">
1078
		<sub-workflow>
1079 35042 marek.hors
            <app-path>${wf:appPath()}/collapsers_multiple_input_collapser</app-path>
1080 29967 marek.hors
            <propagate-configuration/>
1081
            <configuration>
1082 33098 marek.hors
                <property>
1083 29967 marek.hors
                    <name>workingDir</name>
1084 33098 marek.hors
                    <value>${workingDir}/citations_collapser/working_dir</value>
1085 29967 marek.hors
                </property>
1086 33098 marek.hors
                <!-- Input ports & parameters. -->
1087
                <property>
1088
                    <name>origin_1</name>
1089
                    <value>ingested</value>
1090
                </property>
1091
                <property>
1092
                    <name>input_1</name>
1093
                    <value>${workingDir}/transformers_citations_from_ingestpmc/output</value>
1094
                </property>
1095
                <property>
1096
                    <name>origin_2</name>
1097
                    <value>matched</value>
1098
                </property>
1099
                <property>
1100
                    <name>input_2</name>
1101
                    <value>${workingDir}/transformers_citations_from_matching/output</value>
1102
                </property>
1103
                <property>
1104
                    <name>blocking_field</name>
1105
                    <value>sourceDocumentId</value>
1106
                </property>
1107
                <property>
1108
                    <name>schema_input</name>
1109
                    <value>eu.dnetlib.iis.common.citations.schemas.Citation</value>
1110
                </property>
1111
                <property>
1112
                    <name>output</name>
1113
                    <value>${workingDir}/citations_collapser/output</value>
1114
                </property>
1115
                <property>
1116
                    <name>schema_input_envelope</name>
1117
                    <value>eu.dnetlib.iis.common.citations.schemas.CitationEnvelope</value>
1118
                </property>
1119
                <property>
1120
                    <name>record_collapser</name>
1121
                    <value>eu.dnetlib.iis.collapsers.origins.PMCCitationCollapser</value>
1122
        		</property>
1123 29967 marek.hors
            </configuration>
1124
        </sub-workflow>
1125
		<ok to="transformers_export_citations"/>
1126
		<error to="fail" />
1127
    </action>
1128
1129
    <action name="transformers_export_citations">
1130
	    <sub-workflow>
1131
            <app-path>${wf:appPath()}/transformers_export_citations</app-path>
1132
            <propagate-configuration/>
1133
            <configuration>
1134
            	<property>
1135
                    <name>workingDir</name>
1136
                    <value>${workingDir}/transformers_export_citations/working_dir</value>
1137
                </property>
1138
            	<property>
1139
					<name>input</name>
1140 33098 marek.hors
					<value>${workingDir}/citations_collapser/output</value>
1141 29967 marek.hors
				</property>
1142
				<property>
1143
					<name>output</name>
1144
					<value>${output_citation}</value>
1145
				</property>
1146
            </configuration>
1147
        </sub-workflow>
1148
		<ok to="end"/>
1149
		<error to="fail" />
1150
    </action>
1151
1152
    <!-- end of normalize and group citations part -->
1153
1154 19628 marek.hors
	<kill name="fail">
1155
		<message>Unfortunately, the process failed -- error message:
1156
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
1157
	</kill>
1158
	<end name="end" />
1159
</workflow-app>