Revision 29079
Added by Marek Horst about 10 years ago
modules/icm-iis-transformers/trunk/src/test/java/eu/dnetlib/iis/transformers/export/document/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.transformers.export.document; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author Dominika Tkaczyk |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/transformers/export/document/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
transformer_export_document classpath eu/dnetlib/iis/transformers/export/document/oozie_app |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-transformers_export_document_sampledataproducer"> |
|
2 |
<start to="producer"/> |
|
3 |
<action name="producer"> |
|
4 |
<java> |
|
5 |
<job-tracker>${jobTracker}</job-tracker> |
|
6 |
<name-node>${nameNode}</name-node> |
|
7 |
<!-- The data generated by this node is deleted in this section --> |
|
8 |
<prepare> |
|
9 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
10 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
11 |
</prepare> |
|
12 |
<configuration> |
|
13 |
<property> |
|
14 |
<name>mapred.job.queue.name</name> |
|
15 |
<value>${queueName}</value> |
|
16 |
</property> |
|
17 |
</configuration> |
|
18 |
<!-- This is simple wrapper for the Java code --> |
|
19 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
20 |
<!-- The business Java code that gets to be executed --> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<!-- Specification of the output ports --> |
|
23 |
<arg>-C{extracted_document_metadata, |
|
24 |
eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata, |
|
25 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/extracted_document_metadata.json}</arg> |
|
26 |
<arg>-C{citation, |
|
27 |
eu.dnetlib.iis.citationmatching.schemas.Citation, |
|
28 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/citations.json}</arg> |
|
29 |
<arg>-C{document_to_project, |
|
30 |
eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject, |
|
31 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_project.json}</arg> |
|
32 |
<arg>-C{document_to_dataset, |
|
33 |
eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet, |
|
34 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_dataset.json}</arg> |
|
35 |
<arg>-C{document_to_research_initiative, |
|
36 |
eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToResearchInitiative, |
|
37 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_research_initiative.json}</arg> |
|
38 |
<arg>-C{document_to_document_clusters, |
|
39 |
eu.dnetlib.iis.documentsclustering.schemas.DocumentToDocumentClusters, |
|
40 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_document_clusters.json}</arg> |
|
41 |
<arg>-C{document_to_document_classes, |
|
42 |
eu.dnetlib.iis.documentsclassification.schemas.DocumentToDocumentClasses, |
|
43 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_document_classes.json}</arg> |
|
44 |
<arg>-C{document_to_document_statistics, |
|
45 |
eu.dnetlib.iis.statistics.schemas.DocumentToDocumentStatistics, |
|
46 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_document_statistics.json}</arg> |
|
47 |
<arg>-C{document_with_website_usage_similarities, |
|
48 |
eu.dnetlib.iis.websiteusage.schemas.DocumentsWithWebsiteUsageSimilarities, |
|
49 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_with_website_usage_similarities.json}</arg> |
|
50 |
|
|
51 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
52 |
directory has to be specified as well --> |
|
53 |
<arg>-SworkingDir=${workingDir}/producer/working_dir</arg> |
|
54 |
<arg>-Oextracted_document_metadata=${workingDir}/producer/extracted_document_metadata</arg> |
|
55 |
<arg>-Ocitation=${workingDir}/producer/citation</arg> |
|
56 |
<arg>-Odocument_to_project=${workingDir}/producer/document_to_project</arg> |
|
57 |
<arg>-Odocument_to_dataset=${workingDir}/producer/document_to_dataset</arg> |
|
58 |
<arg>-Odocument_to_research_initiative=${workingDir}/producer/document_to_research_initiative</arg> |
|
59 |
<arg>-Odocument_to_document_clusters=${workingDir}/producer/document_to_document_clusters</arg> |
|
60 |
<arg>-Odocument_to_document_classes=${workingDir}/producer/document_to_document_classes</arg> |
|
61 |
<arg>-Odocument_to_document_statistics=${workingDir}/producer/document_to_document_statistics</arg> |
|
62 |
<arg>-Odocument_with_website_usage_similarities=${workingDir}/producer/document_with_website_usage_similarities</arg> |
|
63 |
</java> |
|
64 |
<ok to="transformer_export_document"/> |
|
65 |
<error to="fail"/> |
|
66 |
</action> |
|
67 |
<action name="transformer_export_document"> |
|
68 |
<sub-workflow> |
|
69 |
<app-path>${wf:appPath()}/transformer_export_document</app-path> |
|
70 |
<configuration> |
|
71 |
<property> |
|
72 |
<name>jobTracker</name> |
|
73 |
<value>${jobTracker}</value> |
|
74 |
</property> |
|
75 |
<property> |
|
76 |
<name>nameNode</name> |
|
77 |
<value>${nameNode}</value> |
|
78 |
</property> |
|
79 |
<property> |
|
80 |
<name>queueName</name> |
|
81 |
<value>${queueName}</value> |
|
82 |
</property> |
|
83 |
<!-- Working directory of the subworkflow --> |
|
84 |
<property> |
|
85 |
<name>workingDir</name> |
|
86 |
<value>${workingDir}/transformer_export_document/working_dir</value> |
|
87 |
</property> |
|
88 |
<!-- Input ports. --> |
|
89 |
<property> |
|
90 |
<name>input_extracted_document_metadata</name> |
|
91 |
<value>${workingDir}/producer/extracted_document_metadata</value> |
|
92 |
</property> |
|
93 |
<property> |
|
94 |
<name>input_citation</name> |
|
95 |
<value>${workingDir}/producer/citation</value> |
|
96 |
</property> |
|
97 |
<property> |
|
98 |
<name>input_document_to_project</name> |
|
99 |
<value>${workingDir}/producer/document_to_project</value> |
|
100 |
</property> |
|
101 |
<property> |
|
102 |
<name>input_document_to_dataset</name> |
|
103 |
<value>${workingDir}/producer/document_to_dataset</value> |
|
104 |
</property> |
|
105 |
<property> |
|
106 |
<name>input_document_to_research_initiative</name> |
|
107 |
<value>${workingDir}/producer/document_to_research_initiative</value> |
|
108 |
</property> |
|
109 |
<property> |
|
110 |
<name>input_document_to_document_clusters</name> |
|
111 |
<value>${workingDir}/producer/document_to_document_clusters</value> |
|
112 |
</property> |
|
113 |
<property> |
|
114 |
<name>input_document_to_document_classes</name> |
|
115 |
<value>${workingDir}/producer/document_to_document_classes</value> |
|
116 |
</property> |
|
117 |
<property> |
|
118 |
<name>input_document_to_document_statistics</name> |
|
119 |
<value>${workingDir}/producer/document_to_document_statistics</value> |
|
120 |
</property> |
|
121 |
<property> |
|
122 |
<name>input_document_with_website_usage_similarities</name> |
|
123 |
<value>${workingDir}/producer/document_with_website_usage_similarities</value> |
|
124 |
</property> |
|
125 |
<!-- Output port bound to given path --> |
|
126 |
<property> |
|
127 |
<name>output_document_with_inferenced_data</name> |
|
128 |
<value>${workingDir}/transformer_export_document/document_with_inferenced_data</value> |
|
129 |
</property> |
|
130 |
</configuration> |
|
131 |
</sub-workflow> |
|
132 |
<ok to="consumer"/> |
|
133 |
<error to="fail"/> |
|
134 |
</action> |
|
135 |
<action name="consumer"> |
|
136 |
<java> |
|
137 |
<job-tracker>${jobTracker}</job-tracker> |
|
138 |
<name-node>${nameNode}</name-node> |
|
139 |
<!-- The data generated by this node is deleted in this section --> |
|
140 |
<prepare> |
|
141 |
<delete path="${nameNode}${workingDir}/consumer" /> |
|
142 |
<mkdir path="${nameNode}${workingDir}/consumer" /> |
|
143 |
</prepare> |
|
144 |
<configuration> |
|
145 |
<property> |
|
146 |
<name>mapred.job.queue.name</name> |
|
147 |
<value>${queueName}</value> |
|
148 |
</property> |
|
149 |
</configuration> |
|
150 |
<!-- This is simple wrapper for the Java code --> |
|
151 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
152 |
<!-- The business Java code that gets to be executed --> |
|
153 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
154 |
<!-- Specification of the input ports --> |
|
155 |
<arg>-C{document_with_inferenced_data, |
|
156 |
eu.dnetlib.iis.export.schemas.DocumentWithInferencedData, |
|
157 |
eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_with_inferenced_data.json}</arg> |
|
158 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
159 |
directory has to be specified as well --> |
|
160 |
<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg> |
|
161 |
<arg>-Idocument_with_inferenced_data=${workingDir}/transformer_export_document/document_with_inferenced_data</arg> |
|
162 |
</java> |
|
163 |
<ok to="end" /> |
|
164 |
<error to="fail" /> |
|
165 |
</action> |
|
166 |
<kill name="fail"> |
|
167 |
<message>Unfortunately, the workflow failed -- error message: |
|
168 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
169 |
</kill> |
|
170 |
<end name="end"/> |
|
171 |
</workflow-app> |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_document_classes.json | ||
---|---|---|
1 |
{"documentId": "id-10", "classes": {"arXivClasses": null, "WoSClasses": [{"classLabels": ["TOXICOLOGY"], "confidenceLevel": 0.112}, {"classLabels": ["ONCOLOGY"], "confidenceLevel": 0.098}], "DDCClasses":[{"classLabels": ["Technology", "Agriculture"], "confidenceLevel": 0.403}, {"classLabels": ["Science", "Biology"], "confidenceLevel": 0.286}], "meshEuroPMCClasses": null}} |
|
2 |
{"documentId": "id-2", "classes": {"arXivClasses": [{"classLabels": ["Quantitative Biology", "Cell Behavior"], "confidenceLevel": 0.102}, {"classLabels": ["Quantitative Biology", "Tissues and Organs"], "confidenceLevel": 0.101}], "WoSClasses": null, "DDCClasses":[{"classLabels": ["Science", "Science"], "confidenceLevel": 0.285}, {"classLabels": ["Technology", "Medicine & health"], "confidenceLevel": 0.198}], "meshEuroPMCClasses": null}} |
|
3 |
{"documentId": "id-3", "classes": {"arXivClasses": null, "WoSClasses": [{"classLabels": ["SUBSTANCE ABUSE"], "confidenceLevel": 0.096}, {"classLabels": ["COMPUTER SCIENCE, HARDWARE & ARCHITECTURE"], "confidenceLevel": 0.089}], "DDCClasses": null, "meshEuroPMCClasses": null}} |
|
4 |
{"documentId": "id-9", "classes": null} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_with_website_usage_similarities.json | ||
---|---|---|
1 |
{"documentId": "id-2", "otherDocumentId": "id-5", "covisitedSimilarity": 0.45} |
|
2 |
{"documentId": "id-9", "otherDocumentId": "id-5", "covisitedSimilarity": 0.58} |
|
3 |
{"documentId": "id-2", "otherDocumentId": "id-9", "covisitedSimilarity": 0.97} |
|
4 |
{"documentId": "id-3", "otherDocumentId": "id-1", "covisitedSimilarity": null} |
|
5 |
{"documentId": "id-4", "otherDocumentId": "id-10", "covisitedSimilarity": null} |
|
6 |
{"documentId": "id-5", "otherDocumentId": "id-9", "covisitedSimilarity": 0.58} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/citations.json | ||
---|---|---|
1 |
{"sourceDocumentId": "id-1", "rawText": "abc", "destinationDocumentId": "id-2", "confidenceLevel": null} |
|
2 |
{"sourceDocumentId": "id-1", "rawText": "def", "destinationDocumentId": "id-4", "confidenceLevel": null} |
|
3 |
{"sourceDocumentId": "id-3", "rawText": "abc", "destinationDocumentId": "id-4", "confidenceLevel": null} |
|
4 |
{"sourceDocumentId": "id-4", "rawText": "xxx", "destinationDocumentId": "id-1", "confidenceLevel": null} |
|
5 |
{"sourceDocumentId": "id-4", "rawText": "xyz", "destinationDocumentId": "id-2", "confidenceLevel": null} |
|
6 |
{"sourceDocumentId": "id-7", "rawText": "xyz", "destinationDocumentId": "id-4", "confidenceLevel": null} |
|
7 |
{"sourceDocumentId": "id-7", "rawText": "qwerty", "destinationDocumentId": "id-1", "confidenceLevel": null} |
|
8 |
{"sourceDocumentId": "id-7", "rawText": "bnmm", "destinationDocumentId": "id-2", "confidenceLevel": null} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_research_initiative.json | ||
---|---|---|
1 |
{"documentId":"id-1","egiConceptId":"egi-8095","confidenceLevel":1.5766148184367308} |
|
2 |
{"documentId":"id-8","egiConceptId":"egi-0820","confidenceLevel":0.8652002245558127} |
|
3 |
{"documentId":"id-1","egiConceptId":"egi-5103","confidenceLevel":1.3005493344846906} |
|
4 |
{"documentId":"id-4","egiConceptId":"egi-0763","confidenceLevel":1.6733200530681511} |
|
5 |
{"documentId":"id-4","egiConceptId":"egi-3463","confidenceLevel":1.6733200530681511} |
|
6 |
{"documentId":"id-9","egiConceptId":"egi-5840","confidenceLevel":1.6053482043291596} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/extracted_document_metadata.json | ||
---|---|---|
1 |
{"publisher": "Tor Science Fiction", "affiliations": null, "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}, {"authorFullName": "author-3", "affiliationPositions": null}], "language": "eng", "title": "Enders Game", "externalIdentifiers": null, "journal": "Journal-2", "id": "id-1", "pages": {"start": "123", "end": "128"}, "volume": null, "references": null, "year": null, "keywords": null, "issue": null, "abstract": null} |
|
2 |
{"publisher": null, "affiliations": null, "authors": [{"authorFullName": "author-1", "affiliationPositions": null}, {"authorFullName": "author-2", "affiliationPositions": null}], "language": null, "title": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "id": "id-2", "pages": null, "volume": "124", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}, {"position": 2, "basicMetadata": {"publisher": null, "title": "The Other Wind", "url": null, "series": null, "authors": null, "volume": "vol.23", "edition": null, "source": null, "year": "2003", "issue": null, "pages": null, "location": null}, "text": "Ursula K. Le Guin, The Other Wind, 2003"}], "year": 1970, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "issue": null, "abstract": "The tales"} |
|
3 |
{"publisher": "Harp3r T0rch", "affiliations": null, "authors": null, "language": "en", "title": "Small Gods", "externalIdentifiers": null, "journal": "Journal", "id": "id-3", "pages": null, "volume": "32", "references": [{"position": 1, "basicMetadata": {"publisher": null, "title": "The Lord of the Rings", "url": null, "series": null, "authors": null, "volume": null, "edition": null, "source": null, "year": "2012", "issue": null, "pages": {"start": "1", "end": "236"}, "location": null}, "text": "J.R.R. Tolkien, The Lord of the Rings, 2012"}], "year": null, "keywords": null, "issue": "4", "abstract": null} |
|
4 |
{"publisher": null, "affiliations": null, "authors": null, "language": null, "title": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "id": "id-6", "pages": null, "volume": "vol3", "references": null, "year": 2011, "keywords": null, "issue": "6", "abstract": "A Game of Thrones"} |
|
5 |
{"publisher": null, "affiliations": null, "authors": null, "language": null, "title": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "id": "id-4", "pages": {"start": "3", "end": "503"}, "volume": null, "references": null, "year": 1997, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "issue": "2", "abstract": "Interview with the Vampire"} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_document_clusters.json | ||
---|---|---|
1 |
{"documentId": "id-4", "clusters": {"clusteringMethod1ClusterId": 123, "clusteringMethod2ClusterId": 90}} |
|
2 |
{"documentId": "id-10", "clusters": {"clusteringMethod1ClusterId": 123, "clusteringMethod2ClusterId": 90}} |
|
3 |
{"documentId": "id-7", "clusters": {"clusteringMethod1ClusterId": 45, "clusteringMethod2ClusterId": 90}} |
|
4 |
{"documentId": "id-8", "clusters": {"clusteringMethod1ClusterId": 69, "clusteringMethod2ClusterId": null}} |
|
5 |
{"documentId": "id-1", "clusters": null} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_with_inferenced_data.json | ||
---|---|---|
1 |
{"id": "id-1", "title": "Enders Game", "abstract": null, "language": "eng", "keywords": null, "externalIdentifiers": null, "journal": "Journal-2", "year": null, "publisher": "Tor Science Fiction", "text": null, "projectIds": null, "authorIds": null, "matchedCitationDocumentIds": [{"id" : "id-2", "text" : "abc"},{"id" : "id-4", "text" : "def"}], "referencedDataSetIds": ["8095", "5103"], "researchInitiativeConceptIds" : ["egi-8095", "egi-5103"], "clusters": null, "classes": null, "statistics": {"citationsFromAllPapers": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "citationsFromPublishedPapers": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}}, "websiteUsageSimilarities": null} |
|
2 |
{"id": "id-2", "title": null, "abstract": "The tales", "language": null, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "year": 1970, "publisher": null, "text": null, "projectIds": ["248095"], "authorIds": null, "matchedCitationDocumentIds": null, "referencedDataSetIds": null, "researchInitiativeConceptIds" : null, "clusters": null, "classes": {"arXivClasses": [{"classLabels": ["Quantitative Biology", "Cell Behavior"], "confidenceLevel": 0.102}, {"classLabels": ["Quantitative Biology", "Tissues and Organs"], "confidenceLevel": 0.101}], "WoSClasses": null, "DDCClasses":[{"classLabels": ["Science", "Science"], "confidenceLevel": 0.285}, {"classLabels": ["Technology", "Medicine & health"], "confidenceLevel": 0.198}], "meshEuroPMCClasses": null}, "statistics": {"citationsFromAllPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}}, "websiteUsageSimilarities": [{"documentId": "id-5", "covisitedSimilarity": 0.45}, {"documentId": "id-9", "covisitedSimilarity": 0.97}]} |
|
3 |
{"id": "id-3", "title": "Small Gods", "abstract": null, "language": "en", "keywords": null, "externalIdentifiers": null, "journal": "Journal", "year": null, "publisher": "Harp3r T0rch", "text": null, "projectIds": ["300820", "275103"], "authorIds": null, "matchedCitationDocumentIds": [{"id" : "id-4", "text" : "abc"}], "referencedDataSetIds": null, "researchInitiativeConceptIds" : null, "clusters": null, "classes": {"arXivClasses": null, "WoSClasses": [{"classLabels": ["SUBSTANCE ABUSE"], "confidenceLevel": 0.096}, {"classLabels": ["COMPUTER SCIENCE, HARDWARE & ARCHITECTURE"], "confidenceLevel": 0.089}], "DDCClasses": null, "meshEuroPMCClasses": null}, "statistics": {"citationsFromAllPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}}, "websiteUsageSimilarities": [{"documentId": "id-1", "covisitedSimilarity": null}]} |
|
4 |
{"id": "id-4", "title": null, "abstract": "Interview with the Vampire", "language": null, "keywords": ["kwd_1", "kwd_3", "kwd_5"], "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "year": 1997, "publisher": null, "text": null, "projectIds": null, "authorIds": null, "matchedCitationDocumentIds": [{"id" : "id-1", "text" : "xxx"},{"id" : "id-2", "text" : "xyz"}], "referencedDataSetIds": ["0763"], "researchInitiativeConceptIds" : ["egi-0763", "egi-3463"], "clusters": {"clusteringMethod1ClusterId": 123, "clusteringMethod2ClusterId": 90}, "classes": null, "statistics": {"citationsFromAllPapers": {"numberOfCitations": 3, "numberOfCitationsPerYear": {"2010": 1, "2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2010": 1, "2001": 1}}}, "websiteUsageSimilarities": [{"documentId": "id-10", "covisitedSimilarity": null}]} |
|
5 |
{"id": "id-5", "title": null, "abstract": null, "language": null, "keywords": null, "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "text": null, "projectIds": null, "authorIds": null, "matchedCitationDocumentIds": null, "referencedDataSetIds": null, "researchInitiativeConceptIds" : null, "clusters": null, "classes": null, "statistics": null, "websiteUsageSimilarities": [{"documentId": "id-9", "covisitedSimilarity": 0.58}]} |
|
6 |
{"id": "id-6", "title": null, "abstract": "A Game of Thrones", "language": null, "keywords": null, "externalIdentifiers": {"id-1": "val-extr-1", "id-3": "val-extr-3"}, "journal": null, "year": 2011, "publisher": null, "text": null, "projectIds": null, "authorIds": null, "matchedCitationDocumentIds": null, "referencedDataSetIds": null, "researchInitiativeConceptIds" : null, "clusters": null, "classes": null, "statistics": null, "websiteUsageSimilarities": null} |
|
7 |
{"id": "id-7", "title": null, "abstract": null, "language": null, "keywords": null, "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "text": null, "projectIds": null, "authorIds": null, "matchedCitationDocumentIds": [{"id" : "id-4", "text" : "xyz"},{"id" : "id-1", "text" : "qwerty"},{"id" : "id-2", "text" : "bnmm"}], "referencedDataSetIds": null, "researchInitiativeConceptIds" : null, "clusters": {"clusteringMethod1ClusterId": 45, "clusteringMethod2ClusterId": 90}, "classes": null, "statistics": null, "websiteUsageSimilarities": null} |
|
8 |
{"id": "id-8", "title": null, "abstract": null, "language": null, "keywords": null, "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "text": null, "projectIds": ["240763", "275840"], "authorIds": null, "matchedCitationDocumentIds": null, "referencedDataSetIds": ["0820"], "researchInitiativeConceptIds" : ["egi-0820"], "clusters": {"clusteringMethod1ClusterId": 69, "clusteringMethod2ClusterId": null}, "classes": null, "statistics": null, "websiteUsageSimilarities": null} |
|
9 |
{"id": "id-9", "title": null, "abstract": null, "language": null, "keywords": null, "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "text": null, "projectIds": null, "authorIds": null, "matchedCitationDocumentIds": null, "referencedDataSetIds": ["5840"], "researchInitiativeConceptIds" : ["egi-5840"], "clusters": null, "classes": null, "statistics": null, "websiteUsageSimilarities": [{"documentId": "id-5", "covisitedSimilarity": 0.58}]} |
|
10 |
{"id": "id-10", "title": null, "abstract": null, "language": null, "keywords": null, "externalIdentifiers": null, "journal": null, "year": null, "publisher": null, "text": null, "projectIds": null, "authorIds": null, "matchedCitationDocumentIds": null, "referencedDataSetIds": null, "researchInitiativeConceptIds" : null, "clusters": {"clusteringMethod1ClusterId": 123, "clusteringMethod2ClusterId": 90}, "classes": {"arXivClasses": null, "WoSClasses": [{"classLabels": ["TOXICOLOGY"], "confidenceLevel": 0.112}, {"classLabels": ["ONCOLOGY"], "confidenceLevel": 0.098}], "DDCClasses":[{"classLabels": ["Technology", "Agriculture"], "confidenceLevel": 0.403}, {"classLabels": ["Science", "Biology"], "confidenceLevel": 0.286}], "meshEuroPMCClasses": null}, "statistics": null, "websiteUsageSimilarities": null} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_dataset.json | ||
---|---|---|
1 |
{"documentId":"id-1","datasetId":"8095","confidenceLevel":1.5766148184367308} |
|
2 |
{"documentId":"id-8","datasetId":"0820","confidenceLevel":0.8652002245558127} |
|
3 |
{"documentId":"id-1","datasetId":"5103","confidenceLevel":1.3005493344846906} |
|
4 |
{"documentId":"id-4","datasetId":"0763","confidenceLevel":1.6733200530681511} |
|
5 |
{"documentId":"id-9","datasetId":"5840","confidenceLevel":1.6053482043291596} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_project.json | ||
---|---|---|
1 |
{"documentId":"id-2","projectId":"248095","confidenceLevel":1.5766148184367308} |
|
2 |
{"documentId":"id-3","projectId":"300820","confidenceLevel":0.8652002245558127} |
|
3 |
{"documentId":"id-3","projectId":"275103","confidenceLevel":1.3005493344846906} |
|
4 |
{"documentId":"id-8","projectId":"240763","confidenceLevel":1.6733200530681511} |
|
5 |
{"documentId":"id-8","projectId":"275840","confidenceLevel":1.6053482043291596} |
modules/icm-iis-transformers/trunk/src/test/resources/eu/dnetlib/iis/transformers/export/document/sampledataproducer/data/document_to_document_statistics.json | ||
---|---|---|
1 |
{"documentId": "id-1", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "citationsFromPublishedPapers": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}}} |
|
2 |
{"documentId": "id-2", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}}} |
|
3 |
{"documentId": "id-3", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}}} |
|
4 |
{"documentId": "id-4", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 3, "numberOfCitationsPerYear": {"2010": 1, "2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2010": 1, "2001": 1}}}} |
modules/icm-iis-transformers/trunk/src/main/resources/eu/dnetlib/iis/transformers/export/document/oozie_app/lib/scripts/transformer.pig | ||
---|---|---|
1 |
define avro_load_extracted_document_metadata |
|
2 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
3 |
'input_schema_class', '$schema_input_extracted_document_metadata'); |
|
4 |
|
|
5 |
define avro_load_citation |
|
6 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
7 |
'input_schema_class', '$schema_input_citation'); |
|
8 |
|
|
9 |
define avro_load_document_to_project |
|
10 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
11 |
'input_schema_class', '$schema_input_document_to_project'); |
|
12 |
|
|
13 |
define avro_load_document_to_dataset |
|
14 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
15 |
'input_schema_class', '$schema_input_document_to_dataset'); |
|
16 |
|
|
17 |
define avro_load_document_to_research_initiative |
|
18 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
19 |
'input_schema_class', '$schema_input_document_to_research_initiative'); |
|
20 |
|
|
21 |
define avro_load_document_to_document_clusters |
|
22 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
23 |
'input_schema_class', '$schema_input_document_to_document_clusters'); |
|
24 |
|
|
25 |
define avro_load_document_to_document_classes |
|
26 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
27 |
'input_schema_class', '$schema_input_document_to_document_classes'); |
|
28 |
|
|
29 |
define avro_load_document_to_document_statistics |
|
30 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
31 |
'input_schema_class', '$schema_input_document_to_document_statistics'); |
|
32 |
|
|
33 |
define avro_load_document_with_website_usage_similarities |
|
34 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
35 |
'input_schema_class', '$schema_input_document_with_website_usage_similarities'); |
|
36 |
|
|
37 |
|
|
38 |
define avro_store_document_with_inferenced_data |
|
39 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
40 |
'index', '0', |
|
41 |
'output_schema_class', '$schema_output_document_with_inferenced_data'); |
|
42 |
|
|
43 |
|
|
44 |
define FIRST_NOT_NULL_STR eu.dnetlib.iis.transformers.udfs.StringFirstNotEmpty; |
|
45 |
define NULL_EMPTY eu.dnetlib.iis.transformers.udfs.EmptyBagToNull; |
|
46 |
define NULL_EMPTY_TUPLE_FIELDS eu.dnetlib.iis.transformers.udfs.NullTupleFieldsToNull; |
|
47 |
define CREATE_ARRAY eu.dnetlib.iis.transformers.udfs.NullToEmptyBag; |
|
48 |
|
|
49 |
extractedDocument = load '$input_extracted_document_metadata' using avro_load_extracted_document_metadata; |
|
50 |
citation = load '$input_citation' using avro_load_citation; |
|
51 |
documentToProject = load '$input_document_to_project' using avro_load_document_to_project; |
|
52 |
documentToDataset = load '$input_document_to_dataset' using avro_load_document_to_dataset; |
|
53 |
documentToResearchInitiative = load '$input_document_to_research_initiative' using avro_load_document_to_research_initiative; |
|
54 |
documentToDocumentClusters = load '$input_document_to_document_clusters' using avro_load_document_to_document_clusters; |
|
55 |
documentToDocumentClasses = load '$input_document_to_document_classes' using avro_load_document_to_document_classes; |
|
56 |
documentToDocumentStatistics = load '$input_document_to_document_statistics' using avro_load_document_to_document_statistics; |
|
57 |
documentWithWebsiteUsageSimilarities = load '$input_document_with_website_usage_similarities' using avro_load_document_with_website_usage_similarities; |
|
58 |
|
|
59 |
documentToProjectGroupped = group documentToProject by documentId; |
|
60 |
documentToProjectWithArrays = foreach documentToProjectGroupped { |
|
61 |
projectIds = foreach documentToProject generate projectId; |
|
62 |
generate group as id, projectIds; |
|
63 |
} |
|
64 |
|
|
65 |
documentToDatasetGroupped = group documentToDataset by documentId; |
|
66 |
documentToDatasetWithArrays = foreach documentToDatasetGroupped { |
|
67 |
datasetIds = foreach documentToDataset generate datasetId; |
|
68 |
generate group as id, datasetIds; |
|
69 |
} |
|
70 |
|
|
71 |
citationGroupped = group citation by sourceDocumentId; |
|
72 |
citationGrouppedWithText = foreach citationGroupped { |
|
73 |
idWithText = foreach citation generate destinationDocumentId as id, rawText as text; |
|
74 |
generate group as id, idWithText as citations; |
|
75 |
} |
|
76 |
|
|
77 |
joined1 = join documentToProjectWithArrays by id full, citationGrouppedWithText by id; |
|
78 |
joined1Cleaned = foreach joined1 generate |
|
79 |
FIRST_NOT_NULL_STR(documentToProjectWithArrays::id, citationGrouppedWithText::id) as id, |
|
80 |
NULL_EMPTY(null) as authorIds, |
|
81 |
citationGrouppedWithText::citations as matchedCitationDocumentIds, |
|
82 |
NULL_EMPTY(documentToProjectWithArrays::projectIds) as projectIds; |
|
83 |
|
|
84 |
joined2 = join joined1Cleaned by id full, documentToDatasetWithArrays by id; |
|
85 |
joined2Cleaned = foreach joined2 generate |
|
86 |
FIRST_NOT_NULL_STR(joined1Cleaned::id, documentToDatasetWithArrays::id) as id, |
|
87 |
joined1Cleaned::authorIds as authorIds, |
|
88 |
joined1Cleaned::matchedCitationDocumentIds as matchedCitationDocumentIds, |
|
89 |
joined1Cleaned::projectIds as projectIds, |
|
90 |
NULL_EMPTY(documentToDatasetWithArrays::datasetIds) as datasetIds; |
|
91 |
|
|
92 |
joined3 = join joined2Cleaned by id full, documentToDocumentClusters by documentId; |
|
93 |
joined3Cleaned = foreach joined3 generate |
|
94 |
FIRST_NOT_NULL_STR(joined2Cleaned::id, documentToDocumentClusters::documentId) as id, |
|
95 |
joined2Cleaned::authorIds as authorIds, |
|
96 |
joined2Cleaned::matchedCitationDocumentIds as matchedCitationDocumentIds, |
|
97 |
joined2Cleaned::projectIds as projectIds, |
|
98 |
joined2Cleaned::datasetIds as datasetIds, |
|
99 |
documentToDocumentClusters::clusters as clusters; |
|
100 |
|
|
101 |
joined4 = join joined3Cleaned by id full, documentToDocumentClasses by documentId; |
|
102 |
joined4Cleaned = foreach joined4 generate |
|
103 |
FIRST_NOT_NULL_STR(joined3Cleaned::id, documentToDocumentClasses::documentId) as id, |
|
104 |
joined3Cleaned::authorIds as authorIds, |
|
105 |
joined3Cleaned::matchedCitationDocumentIds as matchedCitationDocumentIds, |
|
106 |
joined3Cleaned::projectIds as projectIds, |
|
107 |
joined3Cleaned::datasetIds as datasetIds, |
|
108 |
joined3Cleaned::clusters as clusters, |
|
109 |
documentToDocumentClasses::classes as classes; |
|
110 |
|
|
111 |
joined5 = join joined4Cleaned by id full, documentToDocumentStatistics by documentId; |
|
112 |
joined5Cleaned = foreach joined5 generate |
|
113 |
FIRST_NOT_NULL_STR(joined4Cleaned::id, documentToDocumentStatistics::documentId) as id, |
|
114 |
joined4Cleaned::authorIds as authorIds, |
|
115 |
joined4Cleaned::matchedCitationDocumentIds as matchedCitationDocumentIds, |
|
116 |
joined4Cleaned::projectIds as projectIds, |
|
117 |
joined4Cleaned::datasetIds as datasetIds, |
|
118 |
joined4Cleaned::clusters as clusters, |
|
119 |
joined4Cleaned::classes as classes, |
|
120 |
documentToDocumentStatistics::statistics as statistics; |
|
121 |
|
|
122 |
documentWithWebsiteUsageSimilaritiesGroupped = group documentWithWebsiteUsageSimilarities by documentId; |
|
123 |
outputSimilarities = foreach documentWithWebsiteUsageSimilaritiesGroupped { |
|
124 |
websiteUsageSimilarities = foreach documentWithWebsiteUsageSimilarities generate otherDocumentId as documentId, covisitedSimilarity as covisitedSimilarity; |
|
125 |
generate group as id, websiteUsageSimilarities; |
|
126 |
} |
|
127 |
|
|
128 |
joined6 = join joined5Cleaned by id full, outputSimilarities by id; |
|
129 |
joined6Cleaned = foreach joined6 generate |
|
130 |
FIRST_NOT_NULL_STR(joined5Cleaned::id, outputSimilarities::id) as id, |
|
131 |
joined5Cleaned::authorIds as authorIds, |
|
132 |
joined5Cleaned::matchedCitationDocumentIds as matchedCitationDocumentIds, |
|
133 |
joined5Cleaned::projectIds as projectIds, |
|
134 |
joined5Cleaned::datasetIds as datasetIds, |
|
135 |
joined5Cleaned::clusters as clusters, |
|
136 |
joined5Cleaned::classes as classes, |
|
137 |
joined5Cleaned::statistics as statistics, |
|
138 |
outputSimilarities::websiteUsageSimilarities as websiteUsageSimilarities; |
|
139 |
|
|
140 |
researchInitiativeGroupped = group documentToResearchInitiative by documentId; |
|
141 |
researchInitiative = foreach researchInitiativeGroupped { |
|
142 |
ids = foreach documentToResearchInitiative generate egiConceptId; |
|
143 |
generate group as id, ids as researchInitiativeConceptIds; |
|
144 |
} |
|
145 |
|
|
146 |
joined7 = join joined6Cleaned by id full, researchInitiative by id; |
|
147 |
joined7Cleaned = foreach joined7 generate |
|
148 |
FIRST_NOT_NULL_STR(joined6Cleaned::id, researchInitiative::id) as id, |
|
149 |
joined6Cleaned::authorIds as authorIds, |
|
150 |
joined6Cleaned::matchedCitationDocumentIds as matchedCitationDocumentIds, |
|
151 |
joined6Cleaned::projectIds as projectIds, |
|
152 |
joined6Cleaned::datasetIds as datasetIds, |
|
153 |
researchInitiative::researchInitiativeConceptIds as researchInitiativeConceptIds, |
|
154 |
joined6Cleaned::clusters as clusters, |
|
155 |
joined6Cleaned::classes as classes, |
|
156 |
joined6Cleaned::statistics as statistics, |
|
157 |
joined6Cleaned::websiteUsageSimilarities as websiteUsageSimilarities; |
|
158 |
|
|
159 |
joinedFull = join extractedDocument by id full, joined7Cleaned by id; |
|
160 |
joinedFullCleaned = foreach joinedFull generate |
|
161 |
FIRST_NOT_NULL_STR(extractedDocument::id, joined7Cleaned::id) as id, |
|
162 |
extractedDocument::title as title, extractedDocument::abstract as abstract, |
|
163 |
extractedDocument::language as language, extractedDocument::keywords as keywords, |
|
164 |
extractedDocument::externalIdentifiers as externalIdentifiers, |
|
165 |
extractedDocument::journal as journal, extractedDocument::year as year, |
|
166 |
extractedDocument::publisher as publisher, (chararray)null as text, |
|
167 |
joined7Cleaned::projectIds as projectIds, |
|
168 |
joined7Cleaned::authorIds as authorIds, |
|
169 |
joined7Cleaned::matchedCitationDocumentIds as matchedCitationDocumentIds, |
|
170 |
joined7Cleaned::datasetIds as referencedDataSetIds, |
|
171 |
joined7Cleaned::researchInitiativeConceptIds as researchInitiativeConceptIds, |
|
172 |
joined7Cleaned::clusters as clusters, |
|
173 |
joined7Cleaned::classes as classes, |
|
174 |
joined7Cleaned::statistics as statistics, |
|
175 |
joined7Cleaned::websiteUsageSimilarities as websiteUsageSimilarities; |
|
176 |
|
|
177 |
store joinedFullCleaned into '$output_document_with_inferenced_data' using avro_store_document_with_inferenced_data; |
modules/icm-iis-transformers/trunk/src/main/resources/eu/dnetlib/iis/transformers/export/document/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.4" name="transformers_export_document"> |
|
2 |
|
|
3 |
<parameters> |
|
4 |
<property> |
|
5 |
<name>input_extracted_document_metadata</name> |
|
6 |
</property> |
|
7 |
<property> |
|
8 |
<name>input_citation</name> |
|
9 |
</property> |
|
10 |
<property> |
|
11 |
<name>input_document_to_project</name> |
|
12 |
</property> |
|
13 |
<property> |
|
14 |
<name>input_document_to_dataset</name> |
|
15 |
</property> |
|
16 |
<property> |
|
17 |
<name>input_document_to_research_initiative</name> |
|
18 |
</property> |
|
19 |
<property> |
|
20 |
<name>input_document_to_document_clusters</name> |
|
21 |
</property> |
|
22 |
<property> |
|
23 |
<name>input_document_to_document_classes</name> |
|
24 |
</property> |
|
25 |
<property> |
|
26 |
<name>input_document_to_document_statistics</name> |
|
27 |
</property> |
|
28 |
<property> |
|
29 |
<name>input_document_with_website_usage_similarities</name> |
|
30 |
</property> |
|
31 |
<property> |
|
32 |
<name>output_document_with_inferenced_data</name> |
|
33 |
</property> |
|
34 |
</parameters> |
|
35 |
|
|
36 |
<start to="transformer"/> |
|
37 |
<action name="transformer"> |
|
38 |
<pig> |
|
39 |
<job-tracker>${jobTracker}</job-tracker> |
|
40 |
<name-node>${nameNode}</name-node> |
|
41 |
<!-- The data generated by this node is deleted in this section --> |
|
42 |
<prepare> |
|
43 |
<delete path="${nameNode}${workingDir}/transformer" /> |
|
44 |
<delete path="${nameNode}${output_document_with_inferenced_data}" /> |
|
45 |
<mkdir path="${nameNode}${workingDir}/transformer" /> |
|
46 |
</prepare> |
|
47 |
<configuration> |
|
48 |
<property> |
|
49 |
<name>mapred.job.queue.name</name> |
|
50 |
<value>${queueName}</value> |
|
51 |
</property> |
|
52 |
</configuration> |
|
53 |
<!-- Path to PIG script the workflow executes. --> |
|
54 |
<script>lib/scripts/transformer.pig</script> |
|
55 |
<!-- The working directory of the workflow node. --> |
|
56 |
<param>workingDir=${workingDir}/transformer/working_dir</param> |
|
57 |
|
|
58 |
<param>input_extracted_document_metadata=${input_extracted_document_metadata}</param> |
|
59 |
<param>schema_input_extracted_document_metadata=eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata</param> |
|
60 |
|
|
61 |
<param>input_citation=${input_citation}</param> |
|
62 |
<param>schema_input_citation=eu.dnetlib.iis.citationmatching.schemas.Citation</param> |
|
63 |
|
|
64 |
<param>input_document_to_project=${input_document_to_project}</param> |
|
65 |
<param>schema_input_document_to_project=eu.dnetlib.iis.referenceextraction.project.schemas.DocumentToProject</param> |
|
66 |
|
|
67 |
<param>input_document_to_dataset=${input_document_to_dataset}</param> |
|
68 |
<param>schema_input_document_to_dataset=eu.dnetlib.iis.referenceextraction.dataset.schemas.DocumentToDataSet</param> |
|
69 |
|
|
70 |
<param>input_document_to_research_initiative=${input_document_to_research_initiative}</param> |
|
71 |
<param>schema_input_document_to_research_initiative=eu.dnetlib.iis.referenceextraction.researchinitiative.schemas.DocumentToResearchInitiative</param> |
|
72 |
|
|
73 |
<param>input_document_to_document_clusters=${input_document_to_document_clusters}</param> |
|
74 |
<param>schema_input_document_to_document_clusters=eu.dnetlib.iis.documentsclustering.schemas.DocumentToDocumentClusters</param> |
|
75 |
|
|
76 |
<param>input_document_to_document_classes=${input_document_to_document_classes}</param> |
|
77 |
<param>schema_input_document_to_document_classes=eu.dnetlib.iis.documentsclassification.schemas.DocumentToDocumentClasses</param> |
|
78 |
|
|
79 |
<param>input_document_to_document_statistics=${input_document_to_document_statistics}</param> |
|
80 |
<param>schema_input_document_to_document_statistics=eu.dnetlib.iis.statistics.schemas.DocumentToDocumentStatistics</param> |
|
81 |
|
|
82 |
<param>input_document_with_website_usage_similarities=${input_document_with_website_usage_similarities}</param> |
|
83 |
<param>schema_input_document_with_website_usage_similarities=eu.dnetlib.iis.websiteusage.schemas.DocumentsWithWebsiteUsageSimilarities</param> |
|
84 |
|
|
85 |
<param>output_document_with_inferenced_data=${output_document_with_inferenced_data}</param> |
|
86 |
<param>schema_output_document_with_inferenced_data=eu.dnetlib.iis.export.schemas.DocumentWithInferencedData</param> |
|
87 |
</pig> |
|
88 |
<ok to="end"/> |
|
89 |
<error to="fail"/> |
|
90 |
</action> |
|
91 |
<kill name="fail"> |
|
92 |
<message>Unfortunately, the workflow failed -- error message: |
|
93 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
94 |
</kill> |
|
95 |
<end name="end"/> |
|
96 |
</workflow-app> |
Also available in: Unified diff
#354 removing obsolete transformers/export/document transformer along with tests