Revision 39040
Added by Marek Horst over 8 years ago
modules/icm-iis-citationmatching-direct/trunk/deploy.info | ||
---|---|---|
1 |
[ |
|
2 |
{ |
|
3 |
"type_source": "SVN", |
|
4 |
"goal": "package -U -T 4C source:jar", |
|
5 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-citationmatching-direct/trunk/", |
|
6 |
"deploy_repository": "dnet4-snapshots", |
|
7 |
"version": "4", |
|
8 |
"mail": "m.horst@icm.edu.pl", |
|
9 |
"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", |
|
10 |
"name": "icm-iis-citationmatching-direct" |
|
11 |
}, |
|
12 |
{ |
|
13 |
"type_source": "SVN", |
|
14 |
"goal": "clean verify -U -e -X", |
|
15 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-citationmatching-direct/trunk/", |
|
16 |
"nightly" : "true", |
|
17 |
"cron" : "H H * * *", |
|
18 |
"version": "4", |
|
19 |
"mail": "m.horst@icm.edu.pl", |
|
20 |
"name": "icm-iis-citationmatching-direct-embedded-integration-test" |
|
21 |
} |
|
22 |
] |
modules/icm-iis-citationmatching-direct/trunk/src/test/java/eu/dnetlib/iis/citationmatching/direct/WorkflowTest.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.citationmatching.direct; |
|
2 |
|
|
3 |
import eu.dnetlib.iis.IntegrationTest; |
|
4 |
import eu.dnetlib.iis.core.AbstractWorkflowTestCase; |
|
5 |
import eu.dnetlib.iis.core.WorkflowConfiguration; |
|
6 |
import org.junit.Test; |
|
7 |
import org.junit.experimental.categories.Category; |
|
8 |
|
|
9 |
/** |
|
10 |
* |
|
11 |
* @author mhorst |
|
12 |
* |
|
13 |
*/ |
|
14 |
@Category(IntegrationTest.class) |
|
15 |
public class WorkflowTest extends AbstractWorkflowTestCase { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testWorkflow() throws Exception { |
|
19 |
WorkflowConfiguration wf = new WorkflowConfiguration(); |
|
20 |
wf.setTimeoutInSeconds(720); |
|
21 |
runWorkflow("eu/dnetlib/iis/citationmatching/direct/sampledataproducer/oozie_app", wf); |
|
22 |
} |
|
23 |
|
|
24 |
} |
|
0 | 25 |
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/oozie_app/import.txt | ||
---|---|---|
1 |
## This is a classpath-based import file (this header is required) |
|
2 |
transformer classpath eu/dnetlib/iis/citationmatching/direct/oozie_app |
|
0 | 3 |
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-citationmatching_direct_sampledataproducer"> |
|
2 |
<start to="producer"/> |
|
3 |
<action name="producer"> |
|
4 |
<java> |
|
5 |
<job-tracker>${jobTracker}</job-tracker> |
|
6 |
<name-node>${nameNode}</name-node> |
|
7 |
<!-- The data generated by this node is deleted in this section --> |
|
8 |
<prepare> |
|
9 |
<delete path="${nameNode}${workingDir}/producer" /> |
|
10 |
<mkdir path="${nameNode}${workingDir}/producer" /> |
|
11 |
</prepare> |
|
12 |
<configuration> |
|
13 |
<property> |
|
14 |
<name>mapred.job.queue.name</name> |
|
15 |
<value>${queueName}</value> |
|
16 |
</property> |
|
17 |
</configuration> |
|
18 |
<!-- This is simple wrapper for the Java code --> |
|
19 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
20 |
<!-- The business Java code that gets to be executed --> |
|
21 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
|
22 |
<!-- Specification of the output ports --> |
|
23 |
<arg>-C{metadata, |
|
24 |
eu.dnetlib.iis.citationmatching.direct.schemas.DocumentMetadata, |
|
25 |
eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/metadata.json}</arg> |
|
26 |
<!-- All input and output ports have to be bound to paths in HDFS --> |
|
27 |
<arg>-Ometadata=${workingDir}/producer/metadata</arg> |
|
28 |
</java> |
|
29 |
<ok to="ingest_citations"/> |
|
30 |
<error to="fail"/> |
|
31 |
</action> |
|
32 |
|
|
33 |
<action name="ingest_citations"> |
|
34 |
<sub-workflow> |
|
35 |
<app-path>${wf:appPath()}/transformer</app-path> |
|
36 |
<configuration> |
|
37 |
<property> |
|
38 |
<name>jobTracker</name> |
|
39 |
<value>${jobTracker}</value> |
|
40 |
</property> |
|
41 |
<property> |
|
42 |
<name>nameNode</name> |
|
43 |
<value>${nameNode}</value> |
|
44 |
</property> |
|
45 |
<property> |
|
46 |
<name>queueName</name> |
|
47 |
<value>${queueName}</value> |
|
48 |
</property> |
|
49 |
<!-- Input ports. --> |
|
50 |
<property> |
|
51 |
<name>input</name> |
|
52 |
<value>${workingDir}/producer/metadata</value> |
|
53 |
</property> |
|
54 |
<!-- Output port bound to given path --> |
|
55 |
<property> |
|
56 |
<name>output</name> |
|
57 |
<value>${workingDir}/transformer/citation</value> |
|
58 |
</property> |
|
59 |
</configuration> |
|
60 |
</sub-workflow> |
|
61 |
<ok to="consumer"/> |
|
62 |
<error to="fail"/> |
|
63 |
</action> |
|
64 |
<action name="consumer"> |
|
65 |
<java> |
|
66 |
<job-tracker>${jobTracker}</job-tracker> |
|
67 |
<name-node>${nameNode}</name-node> |
|
68 |
<configuration> |
|
69 |
<property> |
|
70 |
<name>mapred.job.queue.name</name> |
|
71 |
<value>${queueName}</value> |
|
72 |
</property> |
|
73 |
</configuration> |
|
74 |
<!-- This is simple wrapper for the Java code --> |
|
75 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
76 |
<!-- The business Java code that gets to be executed --> |
|
77 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg> |
|
78 |
<!-- Specification of the input ports --> |
|
79 |
<arg>-C{citation, |
|
80 |
eu.dnetlib.iis.citationmatching.direct.schemas.Citation, |
|
81 |
eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/citation.json}</arg> |
|
82 |
<!-- All input and output ports have to be bound to paths in HDFS --> |
|
83 |
<arg>-Icitation=${workingDir}/transformer/citation</arg> |
|
84 |
</java> |
|
85 |
<ok to="end" /> |
|
86 |
<error to="fail" /> |
|
87 |
</action> |
|
88 |
<kill name="fail"> |
|
89 |
<message>Unfortunately, the workflow failed -- error message: |
|
90 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
91 |
</kill> |
|
92 |
<end name="end"/> |
|
93 |
</workflow-app> |
|
0 | 94 |
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/citation.json | ||
---|---|---|
1 |
{ |
|
2 |
"sourceDocumentId": "50|od_______908::c84fe76a7bc6232a6732dab8c72ef9ea", |
|
3 |
"position": 50, |
|
4 |
"destinationDocumentId": "50|od_______908::14ddacb589be0a68489f89818647f27a" |
|
5 |
} |
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/metadata.json | ||
---|---|---|
1 |
{ |
|
2 |
"id": "50|od_______908::c84fe76a7bc6232a6732dab8c72ef9ea", |
|
3 |
"externalIdentifiers": { |
|
4 |
"pmid": "16528104" |
|
5 |
}, |
|
6 |
"publicationTypeName": null, |
|
7 |
"references": [ |
|
8 |
{ |
|
9 |
"position": 1, |
|
10 |
"externalIds": { |
|
11 |
"pmid": "1597408" |
|
12 |
} |
|
13 |
}, |
|
14 |
{ |
|
15 |
"position": 50, |
|
16 |
"externalIds": { |
|
17 |
"pmid": "5490870" |
|
18 |
} |
|
19 |
} |
|
20 |
] |
|
21 |
} |
|
22 |
{ |
|
23 |
"id": "50|od_______908::14ddacb589be0a68489f89818647f27a", |
|
24 |
"externalIdentifiers": { |
|
25 |
"pmid": "5490870" |
|
26 |
}, |
|
27 |
"publicationTypeName": null, |
|
28 |
"references": [] |
|
29 |
} |
modules/icm-iis-citationmatching-direct/trunk/src/main/java/eu/dnetlib/iis/citationmatching/direct/udfs/DeduplicateIdsWithDocumentType.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.citationmatching.direct.udfs; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.util.Iterator; |
|
5 |
|
|
6 |
import org.apache.pig.EvalFunc; |
|
7 |
import org.apache.pig.data.BagFactory; |
|
8 |
import org.apache.pig.data.DataBag; |
|
9 |
import org.apache.pig.data.Tuple; |
|
10 |
import org.apache.pig.impl.logicalLayer.schema.Schema; |
|
11 |
|
|
12 |
import com.google.common.collect.Lists; |
|
13 |
|
|
14 |
/** |
|
15 |
* Deduplicates bag of tuples where tuple[0] is pmid, tuple[1] is document type. |
|
16 |
* 'research-article' type has precedence over any other type when more than one entry provided. |
|
17 |
* Identifiers are sorted lexicographically. |
|
18 |
* |
|
19 |
* @author mhorst |
|
20 |
*/ |
|
21 |
public class DeduplicateIdsWithDocumentType extends EvalFunc<DataBag> { |
|
22 |
|
|
23 |
public static final String DOCUMENT_TYPE_RESEARCH_ARTICLE = "research-article"; |
|
24 |
|
|
25 |
@Override |
|
26 |
public DataBag exec(Tuple tuple) throws IOException { |
|
27 |
if (tuple == null || tuple.size() == 0) { |
|
28 |
return null; |
|
29 |
} |
|
30 |
DataBag db = (DataBag) tuple.get(0); |
|
31 |
if (db==null) { |
|
32 |
return null; |
|
33 |
} |
|
34 |
int count = 0; |
|
35 |
Tuple firstTuple = null; |
|
36 |
Iterator<Tuple> it = db.iterator(); |
|
37 |
while (it.hasNext()) { |
|
38 |
Tuple currentTuple = it.next(); |
|
39 |
if (count==0) { |
|
40 |
firstTuple = currentTuple; |
|
41 |
} |
|
42 |
if (DOCUMENT_TYPE_RESEARCH_ARTICLE.equals(currentTuple.get(1))) { |
|
43 |
return BagFactory.getInstance().newDefaultBag( |
|
44 |
Lists.<Tuple>newArrayList(currentTuple)); |
|
45 |
} |
|
46 |
count++; |
|
47 |
} |
|
48 |
if (count==1) { |
|
49 |
return BagFactory.getInstance().newDefaultBag( |
|
50 |
Lists.<Tuple>newArrayList(firstTuple)); |
|
51 |
} |
|
52 |
// fallback |
|
53 |
return null; |
|
54 |
} |
|
55 |
|
|
56 |
@Override |
|
57 |
public Schema outputSchema(Schema input) { |
|
58 |
return input; |
|
59 |
} |
|
60 |
|
|
61 |
} |
|
0 | 62 |
modules/icm-iis-citationmatching-direct/trunk/src/main/resources/eu/dnetlib/iis/citationmatching/direct/oozie_app/lib/scripts/transformer/transformer.pig | ||
---|---|---|
1 |
define avro_load_metadata |
|
2 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
3 |
'schema', '$schema_input'); |
|
4 |
|
|
5 |
define avro_store_citation |
|
6 |
org.apache.pig.piggybank.storage.avro.AvroStorage( |
|
7 |
'index', '0', |
|
8 |
'schema', '$schema_output'); |
|
9 |
|
|
10 |
define NullToEmptyBag datafu.pig.bags.NullToEmptyBag(); |
|
11 |
define DeduplicateIdsWithDocumentType eu.dnetlib.iis.citationmatching.direct.udfs.DeduplicateIdsWithDocumentType; |
|
12 |
|
|
13 |
documentMetadata = load '$input' using avro_load_metadata; |
|
14 |
|
|
15 |
-- wygenerowanie mapowaĆ pmid_to_oaid i doi_to_oaid na podstawie inputu |
|
16 |
doi_to_oaid_with_nulls = foreach documentMetadata generate externalIdentifiers#'doi' as originalId:chararray, id as newId:chararray; |
|
17 |
doi_to_oaid_nondedup = filter doi_to_oaid_with_nulls by originalId is not null; |
|
18 |
doi_to_oaid_nondedup_groupped = group doi_to_oaid_nondedup by originalId; |
|
19 |
doi_to_oaid = foreach doi_to_oaid_nondedup_groupped { |
|
20 |
first_record = LIMIT doi_to_oaid_nondedup 1; |
|
21 |
-- FIXME it works but what if empty bag was returned?! |
|
22 |
generate group as originalId, flatten(first_record.newId) as newId; |
|
23 |
} |
|
24 |
|
|
25 |
pmid_to_oaid_with_nulls = foreach documentMetadata generate externalIdentifiers#'pmid' as originalId:chararray, id as newId:chararray, publicationTypeName as publicationTypeName; |
|
26 |
|
|
27 |
-- DEBUG: skipping deduplication (disabled) |
|
28 |
-- pmid_to_oaid = filter pmid_to_oaid_with_nulls by originalId is not null; |
|
29 |
pmid_to_oaid_nondedup = filter pmid_to_oaid_with_nulls by originalId is not null; |
|
30 |
pmid_to_oaid_nondedup_groupped = group pmid_to_oaid_nondedup by originalId; |
|
31 |
pmid_to_oaid = foreach pmid_to_oaid_nondedup_groupped { |
|
32 |
idsWithPublicationType = foreach pmid_to_oaid_nondedup generate originalId, newId, publicationTypeName; |
|
33 |
dedupIdsWithPublicationType = DeduplicateIdsWithDocumentType(idsWithPublicationType); |
|
34 |
-- FIXME it works, but what if null was returned?! |
|
35 |
generate group as originalId, flatten(dedupIdsWithPublicationType.newId) as newId; |
|
36 |
} |
|
37 |
|
|
38 |
docWithRefsFlat = foreach documentMetadata generate id, flatten(NullToEmptyBag(references)); |
|
39 |
docWithBasicMetadataFlat = foreach docWithRefsFlat generate id, flatten(references::externalIds), flatten(references::position); |
|
40 |
|
|
41 |
workingCitation = foreach docWithBasicMetadataFlat generate |
|
42 |
id as sourceId:chararray, |
|
43 |
references::position as position:int, |
|
44 |
null as destinationDocumentId:chararray, |
|
45 |
references::externalIds#'pmid' as pmid:chararray, |
|
46 |
references::externalIds#'doi' as doi:chararray; |
|
47 |
|
|
48 |
-- joining with pmid_to_oaid mappings |
|
49 |
joinedWithPmid = join workingCitation by pmid left, pmid_to_oaid by originalId; |
|
50 |
workingCitationWithDestIdFromPmid = foreach joinedWithPmid generate |
|
51 |
workingCitation::sourceId as sourceId, |
|
52 |
workingCitation::doi as doi, |
|
53 |
workingCitation::position as position, |
|
54 |
pmid_to_oaid::newId as destinationDocumentId; |
|
55 |
|
|
56 |
-- joining with doi_to_oaid mappings |
|
57 |
joinedWithDoi = join workingCitationWithDestIdFromPmid by doi left, doi_to_oaid by originalId; |
|
58 |
|
|
59 |
workingCitationWithDestIdFromPmidAndDoi = foreach joinedWithDoi generate |
|
60 |
workingCitationWithDestIdFromPmid::sourceId as sourceId, |
|
61 |
workingCitationWithDestIdFromPmid::position as position, |
|
62 |
-- overriding pmid matched citation with doi matched citation if found |
|
63 |
(doi_to_oaid::newId is not null ? doi_to_oaid::newId : workingCitationWithDestIdFromPmid::destinationDocumentId) as destinationDocumentId; |
|
64 |
|
|
65 |
output_citation = foreach workingCitationWithDestIdFromPmidAndDoi generate |
|
66 |
sourceId as sourceDocumentId, position, destinationDocumentId; |
|
67 |
|
|
68 |
-- accepting only matched citations |
|
69 |
output_citation_matched = filter output_citation by destinationDocumentId is not null; |
|
70 |
|
|
71 |
store output_citation_matched into '$output' using avro_store_citation; |
modules/icm-iis-citationmatching-direct/trunk/src/main/resources/eu/dnetlib/iis/citationmatching/direct/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app xmlns="uri:oozie:workflow:0.4" name="transformers_ingest_citations"> |
|
2 |
|
|
3 |
<parameters> |
|
4 |
<property> |
|
5 |
<name>input</name> |
|
6 |
<description>input containing document metadata records</description> |
|
7 |
</property> |
|
8 |
<property> |
|
9 |
<name>output</name> |
|
10 |
<description>extracted citations</description> |
|
11 |
</property> |
|
12 |
</parameters> |
|
13 |
|
|
14 |
<start to="generate-schema"/> |
|
15 |
|
|
16 |
<action name="generate-schema"> |
|
17 |
<java> |
|
18 |
<job-tracker>${jobTracker}</job-tracker> |
|
19 |
<name-node>${nameNode}</name-node> |
|
20 |
<main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class> |
|
21 |
<arg>eu.dnetlib.iis.citationmatching.direct.schemas.DocumentMetadata</arg> |
|
22 |
<arg>eu.dnetlib.iis.citationmatching.direct.schemas.Citation</arg> |
|
23 |
<capture-output /> |
|
24 |
</java> |
|
25 |
<ok to="transformer" /> |
|
26 |
<error to="fail" /> |
|
27 |
</action> |
|
28 |
|
|
29 |
<action name="transformer"> |
|
30 |
<pig> |
|
31 |
<job-tracker>${jobTracker}</job-tracker> |
|
32 |
<name-node>${nameNode}</name-node> |
|
33 |
<!-- The data generated by this node is deleted in this section --> |
|
34 |
<prepare> |
|
35 |
<delete path="${nameNode}${output}" /> |
|
36 |
</prepare> |
|
37 |
<configuration> |
|
38 |
<property> |
|
39 |
<name>mapred.job.queue.name</name> |
|
40 |
<value>${queueName}</value> |
|
41 |
</property> |
|
42 |
</configuration> |
|
43 |
<!-- Path to PIG script the workflow executes. --> |
|
44 |
<script>lib/scripts/transformer/transformer.pig</script> |
|
45 |
|
|
46 |
<param>input=${input}</param> |
|
47 |
<param>schema_input=${wf:actionData('generate-schema')['eu.dnetlib.iis.citationmatching.direct.schemas.DocumentMetadata']}</param> |
|
48 |
|
|
49 |
<param>output=${output}</param> |
|
50 |
<param>schema_output=${wf:actionData('generate-schema')['eu.dnetlib.iis.citationmatching.direct.schemas.Citation']}</param> |
|
51 |
</pig> |
|
52 |
<ok to="end"/> |
|
53 |
<error to="fail"/> |
|
54 |
</action> |
|
55 |
<kill name="fail"> |
|
56 |
<message>Unfortunately, the workflow failed -- error message: |
|
57 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
|
58 |
</kill> |
|
59 |
<end name="end"/> |
|
60 |
</workflow-app> |
|
0 | 61 |
modules/icm-iis-citationmatching-direct/trunk/src/main/resources/eu/dnetlib/iis/citationmatching/direct/job.properties | ||
---|---|---|
1 |
input=/user/marek.horst/transformers/metadatamerger/working_dir/out |
|
2 |
output=${workingDir}/citation |
|
0 | 3 |
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/test-custom-log4j.properties | ||
---|---|---|
1 |
# |
|
2 |
# Licensed to the Apache Software Foundation (ASF) under one |
|
3 |
# or more contributor license agreements. See the NOTICE file |
|
4 |
# distributed with this work for additional information |
|
5 |
# regarding copyright ownership. The ASF licenses this file |
|
6 |
# to you under the Apache License, Version 2.0 (the |
|
7 |
# "License"); you may not use this file except in compliance |
|
8 |
# with the License. You may obtain a copy of the License at |
|
9 |
# |
|
10 |
# http://www.apache.org/licenses/LICENSE-2.0 |
|
11 |
# |
|
12 |
# Unless required by applicable law or agreed to in writing, software |
|
13 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
14 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
15 |
# See the License for the specific language governing permissions and |
|
16 |
# limitations under the License. |
|
17 |
# |
|
18 |
|
|
19 |
# http://www.apache.org/licenses/LICENSE-2.0 |
|
20 |
# |
|
21 |
# Unless required by applicable law or agreed to in writing, software |
|
22 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
23 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
24 |
# See the License for the specific language governing permissions and |
|
25 |
# limitations under the License. See accompanying LICENSE file. |
|
26 |
|
|
27 |
# |
|
28 |
|
|
29 |
log4j.appender.oozie=org.apache.log4j.ConsoleAppender |
|
30 |
log4j.appender.oozie.Target=System.out |
|
31 |
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout |
|
32 |
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n |
|
33 |
|
|
34 |
log4j.appender.null=org.apache.log4j.varia.NullAppender |
|
35 |
|
|
36 |
log4j.logger.org.apache=INFO, oozie |
|
37 |
log4j.logger.org.mortbay=WARN, oozie |
|
38 |
log4j.logger.org.hsqldb=WARN, oozie |
|
39 |
|
|
40 |
log4j.logger.opslog=NONE, null |
|
41 |
log4j.logger.applog=NONE, null |
|
42 |
log4j.logger.instrument=NONE, null |
|
43 |
|
|
44 |
log4j.logger.a=NONE, null |
|
45 |
|
|
0 | 46 |
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/hsqldb-oozie-site.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
|
3 |
<!-- |
|
4 |
Copyright (c) 2010 Yahoo! Inc. All rights reserved. |
|
5 |
Licensed under the Apache License, Version 2.0 (the "License"); |
|
6 |
you may not use this file except in compliance with the License. |
|
7 |
You may obtain a copy of the License at |
|
8 |
|
|
9 |
http://www.apache.org/licenses/LICENSE-2.0 |
|
10 |
|
|
11 |
Unless required by applicable law or agreed to in writing, software |
|
12 |
distributed under the License is distributed on an "AS IS" BASIS, |
|
13 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 |
See the License for the specific language governing permissions and |
|
15 |
limitations under the License. See accompanying LICENSE file. |
|
16 |
--> |
|
17 |
<configuration> |
|
18 |
<property> |
|
19 |
<name>oozie.action.max.output.data</name> |
|
20 |
<value>8192</value> |
|
21 |
</property> |
|
22 |
<property> |
|
23 |
<name>oozie.service.JPAService.jdbc.driver</name> |
|
24 |
<value>org.hsqldb.jdbcDriver</value> |
|
25 |
</property> |
|
26 |
<property> |
|
27 |
<name>oozie.service.JPAService.jdbc.url</name> |
|
28 |
<value>jdbc:hsqldb:mem:oozie-db;create=true</value> |
|
29 |
</property> |
|
30 |
</configuration> |
|
0 | 31 |
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/oracle-oozie-site.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
|
3 |
<!-- |
|
4 |
Copyright (c) 2010 Yahoo! Inc. All rights reserved. |
|
5 |
Licensed under the Apache License, Version 2.0 (the "License"); |
|
6 |
you may not use this file except in compliance with the License. |
|
7 |
You may obtain a copy of the License at |
|
8 |
|
|
9 |
http://www.apache.org/licenses/LICENSE-2.0 |
|
10 |
|
|
11 |
Unless required by applicable law or agreed to in writing, software |
|
12 |
distributed under the License is distributed on an "AS IS" BASIS, |
|
13 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 |
See the License for the specific language governing permissions and |
|
15 |
limitations under the License. See accompanying LICENSE file. |
|
16 |
--> |
|
17 |
<configuration> |
|
18 |
<property> |
|
19 |
<name>oozie.action.max.output.data</name> |
|
20 |
<value>8192</value> |
|
21 |
</property> |
|
22 |
<property> |
|
23 |
<name>oozie.service.JPAService.jdbc.driver</name> |
|
24 |
<value>oracle.jdbc.driver.OracleDriver</value> |
|
25 |
</property> |
|
26 |
<property> |
|
27 |
<name>oozie.test.db.port</name> |
|
28 |
<value>1521</value> |
|
29 |
</property> |
|
30 |
<property> |
|
31 |
<name>oozie.test.db.name</name> |
|
32 |
<value>xe</value> |
|
33 |
</property> |
|
34 |
<property> |
|
35 |
<name>oozie.service.JPAService.jdbc.url</name> |
|
36 |
<value>jdbc:oracle:thin:@//${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value> |
|
37 |
</property> |
|
38 |
<property> |
|
39 |
<name>oozie.service.JPAService.jdbc.username</name> |
|
40 |
<value>oozie</value> |
|
41 |
</property> |
|
42 |
<property> |
|
43 |
<name>oozie.service.JPAService.jdbc.password</name> |
|
44 |
<value>oozie</value> |
|
45 |
</property> |
|
46 |
</configuration> |
|
0 | 47 |
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/mysql-oozie-site.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<!-- |
|
3 |
Licensed to the Apache Software Foundation (ASF) under one |
|
4 |
or more contributor license agreements. See the NOTICE file |
|
5 |
distributed with this work for additional information |
|
6 |
regarding copyright ownership. The ASF licenses this file |
|
7 |
to you under the Apache License, Version 2.0 (the |
|
8 |
"License"); you may not use this file except in compliance |
|
9 |
with the License. You may obtain a copy of the License at |
|
10 |
|
|
11 |
http://www.apache.org/licenses/LICENSE-2.0 |
|
12 |
|
|
13 |
Unless required by applicable law or agreed to in writing, software |
|
14 |
distributed under the License is distributed on an "AS IS" BASIS, |
|
15 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
16 |
See the License for the specific language governing permissions and |
|
17 |
limitations under the License. |
|
18 |
--> |
|
19 |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
|
20 |
<configuration> |
|
21 |
<property> |
|
22 |
<name>oozie.action.max.output.data</name> |
|
23 |
<value>8192</value> |
|
24 |
</property> |
|
25 |
<property> |
|
26 |
<name>oozie.service.JPAService.jdbc.driver</name> |
|
27 |
<value>com.mysql.jdbc.Driver</value> |
|
28 |
<description>JDBC driver class.</description> |
|
29 |
</property> |
|
30 |
<property> |
|
31 |
<name>oozie.test.db.port</name> |
|
32 |
<value>3306</value> |
|
33 |
</property> |
|
34 |
<property> |
|
35 |
<name>oozie.service.JPAService.jdbc.url</name> |
|
36 |
<value>jdbc:mysql://${oozie.test.db.host}:${oozie.test.db.port}/oozie</value> |
|
37 |
<description>JDBC URL.</description> |
|
38 |
</property> |
|
39 |
<property> |
|
40 |
<name>oozie.service.JPAService.jdbc.username</name> |
|
41 |
<value>oozie</value> |
|
42 |
<description>DB user name.</description> |
|
43 |
</property> |
|
44 |
<property> |
|
45 |
<name>oozie.service.JPAService.jdbc.password</name> |
|
46 |
<value>oozie</value> |
|
47 |
<description> |
|
48 |
DB user password. IMPORTANT: if password is emtpy leave a 1 space string, the service trims the |
|
49 |
value, if empty Configuration assumes it is NULL. |
|
50 |
</description> |
|
51 |
</property> |
|
52 |
</configuration> |
|
0 | 53 |
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/postgres-oozie-site.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
|
3 |
<!-- |
|
4 |
Copyright (c) 2010 Yahoo! Inc. All rights reserved. |
|
5 |
Licensed under the Apache License, Version 2.0 (the "License"); |
|
6 |
you may not use this file except in compliance with the License. |
|
7 |
You may obtain a copy of the License at |
|
8 |
|
|
9 |
http://www.apache.org/licenses/LICENSE-2.0 |
|
10 |
|
|
11 |
Unless required by applicable law or agreed to in writing, software |
|
12 |
distributed under the License is distributed on an "AS IS" BASIS, |
|
13 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 |
See the License for the specific language governing permissions and |
|
15 |
limitations under the License. See accompanying LICENSE file. |
|
16 |
--> |
|
17 |
<configuration> |
|
18 |
<property> |
|
19 |
<name>oozie.action.max.output.data</name> |
|
20 |
<value>8192</value> |
|
21 |
</property> |
|
22 |
<property> |
|
23 |
<name>oozie.service.JPAService.jdbc.driver</name> |
|
24 |
<value>org.postgresql.Driver</value> |
|
25 |
</property> |
|
26 |
<property> |
|
27 |
<name>oozie.test.db.port</name> |
|
28 |
<value>5432</value> |
|
29 |
</property> |
|
30 |
<property> |
|
31 |
<name>oozie.test.db.name</name> |
|
32 |
<value>oozie</value> |
|
33 |
</property> |
|
34 |
<property> |
|
35 |
<name>oozie.service.JPAService.jdbc.url</name> |
|
36 |
<value>jdbc:postgresql://${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value> |
|
37 |
</property> |
|
38 |
<property> |
|
39 |
<name>oozie.service.JPAService.jdbc.username</name> |
|
40 |
<value>oozie</value> |
|
41 |
</property> |
|
42 |
<property> |
|
43 |
<name>oozie.service.JPAService.jdbc.password</name> |
|
44 |
<value>oozie</value> |
|
45 |
</property> |
|
46 |
</configuration> |
|
0 | 47 |
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/test-oozie-log4j.properties | ||
---|---|---|
1 |
# |
|
2 |
# Licensed to the Apache Software Foundation (ASF) under one |
|
3 |
# or more contributor license agreements. See the NOTICE file |
|
4 |
# distributed with this work for additional information |
|
5 |
# regarding copyright ownership. The ASF licenses this file |
|
6 |
# to you under the Apache License, Version 2.0 (the |
|
7 |
# "License"); you may not use this file except in compliance |
|
8 |
# with the License. You may obtain a copy of the License at |
|
9 |
# |
|
10 |
# http://www.apache.org/licenses/LICENSE-2.0 |
|
11 |
# |
|
12 |
# Unless required by applicable law or agreed to in writing, software |
|
13 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
14 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
15 |
# See the License for the specific language governing permissions and |
|
16 |
# limitations under the License. |
|
17 |
# |
|
18 |
|
|
19 |
# http://www.apache.org/licenses/LICENSE-2.0 |
|
20 |
# |
|
21 |
# Unless required by applicable law or agreed to in writing, software |
|
22 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
23 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
24 |
# See the License for the specific language governing permissions and |
|
25 |
# limitations under the License. See accompanying LICENSE file. |
|
26 |
|
|
27 |
# |
|
28 |
|
|
29 |
log4j.appender.oozie=org.apache.log4j.ConsoleAppender |
|
30 |
log4j.appender.oozie.Target=System.out |
|
31 |
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout |
|
32 |
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n |
|
33 |
|
|
34 |
log4j.appender.null=org.apache.log4j.varia.NullAppender |
|
35 |
|
|
36 |
log4j.logger.org.apache=INFO, oozie |
|
37 |
log4j.logger.org.mortbay=WARN, oozie |
|
38 |
log4j.logger.org.hsqldb=WARN, oozie |
|
39 |
|
|
40 |
log4j.logger.opslog=NONE, null |
|
41 |
log4j.logger.applog=NONE, null |
|
42 |
log4j.logger.instrument=NONE, null |
|
43 |
|
|
44 |
log4j.logger.a=ALL, null |
|
45 |
|
|
0 | 46 |
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/hadoop-config.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
|
3 |
<!-- |
|
4 |
Licensed to the Apache Software Foundation (ASF) under one |
|
5 |
or more contributor license agreements. See the NOTICE file |
|
6 |
distributed with this work for additional information |
|
7 |
regarding copyright ownership. The ASF licenses this file |
|
8 |
to you under the Apache License, Version 2.0 (the |
|
9 |
"License"); you may not use this file except in compliance |
|
10 |
with the License. You may obtain a copy of the License at |
|
11 |
|
|
12 |
http://www.apache.org/licenses/LICENSE-2.0 |
|
13 |
|
|
14 |
Unless required by applicable law or agreed to in writing, software |
|
15 |
distributed under the License is distributed on an "AS IS" BASIS, |
|
16 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
17 |
See the License for the specific language governing permissions and |
|
18 |
limitations under the License. |
|
19 |
--> |
|
20 |
<configuration> |
|
21 |
|
|
22 |
<property> |
|
23 |
<name>mapreduce.jobtracker.kerberos.principal</name> |
|
24 |
<value>mapred/_HOST@LOCALREALM</value> |
|
25 |
</property> |
|
26 |
|
|
27 |
<property> |
|
28 |
<name>dfs.namenode.kerberos.principal</name> |
|
29 |
<value>hdfs/_HOST@LOCALREALM</value> |
|
30 |
</property> |
|
31 |
|
|
32 |
<property> |
|
33 |
<name>mapreduce.framework.name</name> |
|
34 |
<value>yarn</value> |
|
35 |
</property> |
|
36 |
|
|
37 |
</configuration> |
|
0 | 38 |
modules/icm-iis-citationmatching-direct/trunk/core/README.md | ||
---|---|---|
1 |
This directory and its subdirectories and files are here as a hack to make the Oozie unit tests work. |
|
2 |
|
|
3 |
Details |
|
4 |
------- |
|
5 |
Oozie tests assume that they're placed inside directory tree of Oozie source code -- see the source code of class `XTestCase` which is an ancestor of `MiniOozieTestCase` class which, in turn, should be inherited by your test case class. |
|
6 |
|
|
7 |
How to get the source code of the `XTestCase` class: |
|
8 |
|
|
9 |
- download source code of the Ubuntu's `oozie` package prepared by Cloudera (`apt-get source oozie`). It is version 3.1.3+155 of this package. |
|
10 |
- open file `oozie-3.1.3+155/src/core/src/test/java/org/apache/oozie/test/XTestCase.java` and look at lines 93-105. |
modules/icm-iis-citationmatching-direct/trunk/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>icm-iis-parent-container</artifactId> |
|
6 |
<version>1.0.1-SNAPSHOT</version> |
|
7 |
</parent> |
|
8 |
<modelVersion>4.0.0</modelVersion> |
|
9 |
<artifactId>icm-iis-citationmatching-direct</artifactId> |
|
10 |
<packaging>jar</packaging> |
|
11 |
<version>1.0.1-SNAPSHOT</version> |
|
12 |
|
|
13 |
<scm> |
|
14 |
<developerConnection> |
|
15 |
scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-citationmatching-direct/trunk |
|
16 |
</developerConnection> |
|
17 |
</scm> |
|
18 |
|
|
19 |
<properties> |
|
20 |
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|
21 |
</properties> |
|
22 |
<dependencies> |
|
23 |
<dependency> |
|
24 |
<groupId>eu.dnetlib</groupId> |
|
25 |
<artifactId>icm-iis-core</artifactId> |
|
26 |
<version>[1.0.0,2.0.0)</version> |
|
27 |
</dependency> |
|
28 |
<dependency> |
|
29 |
<groupId>eu.dnetlib</groupId> |
|
30 |
<artifactId>icm-iis-core</artifactId> |
|
31 |
<version>[1.0.0,2.0.0)</version> |
|
32 |
<type>test-jar</type> |
|
33 |
<scope>test</scope> |
|
34 |
</dependency> |
|
35 |
<dependency> |
|
36 |
<groupId>eu.dnetlib</groupId> |
|
37 |
<artifactId>icm-iis-schemas</artifactId> |
|
38 |
<version>[1.0.0,2.0.0)</version> |
|
39 |
</dependency> |
|
40 |
<!-- required after introducing 'provided' scope for hadoop libs --> |
|
41 |
<dependency> |
|
42 |
<groupId>org.apache.hadoop</groupId> |
|
43 |
<artifactId>hadoop-common</artifactId> |
|
44 |
<version>${iis.hadoop.common.version}</version> |
|
45 |
<scope>provided</scope> |
|
46 |
</dependency> |
|
47 |
<!-- Needed by Oozie tests { --> |
|
48 |
<!-- required after introducing 'provided' scope for hadoop dependencies --> |
|
49 |
<dependency> |
|
50 |
<groupId>org.apache.oozie</groupId> |
|
51 |
<artifactId>oozie-core</artifactId> |
|
52 |
<version>${iis.oozie.version}</version> |
|
53 |
<scope>test</scope> |
|
54 |
</dependency> |
|
55 |
<!-- PigMain was moved to oozie-sharelib-pig since cdh4.3.1 --> |
|
56 |
<dependency> |
|
57 |
<groupId>org.apache.oozie</groupId> |
|
58 |
<artifactId>oozie-sharelib-pig</artifactId> |
|
59 |
<version>${iis.oozie.version}</version> |
|
60 |
<scope>test</scope> |
|
61 |
</dependency> |
|
62 |
<dependency> |
|
63 |
<groupId>org.apache.hadoop</groupId> |
|
64 |
<artifactId>hadoop-hdfs</artifactId> |
|
65 |
<version>${iis.hadoop.hdfs.version}</version> |
|
66 |
<scope>test</scope> |
|
67 |
</dependency> |
|
68 |
<!-- end of required after introducing 'provided' scope for hadoop dependencies --> |
|
69 |
<dependency> |
|
70 |
<groupId>org.apache.oozie</groupId> |
|
71 |
<artifactId>oozie-core</artifactId> |
|
72 |
<version>${iis.oozie.version}</version> |
|
73 |
<type>test-jar</type> |
|
74 |
<scope>test</scope> |
|
75 |
</dependency> |
|
76 |
<dependency> |
|
77 |
<groupId>org.apache.hadoop</groupId> |
|
78 |
<artifactId>hadoop-hdfs</artifactId> |
|
79 |
<version>${iis.hadoop.hdfs.version}</version> |
|
80 |
<type>test-jar</type> |
|
81 |
<scope>test</scope> |
|
82 |
</dependency> |
|
83 |
<dependency> |
|
84 |
<groupId>org.apache.hadoop</groupId> |
|
85 |
<artifactId>hadoop-test</artifactId> |
|
86 |
<version>${iis.hadoop.test.version}</version> |
|
87 |
<scope>test</scope> |
|
88 |
</dependency> |
|
89 |
<dependency> |
|
90 |
<groupId>org.apache.hadoop</groupId> |
|
91 |
<artifactId>hadoop-common</artifactId> |
|
92 |
<version>${iis.hadoop.common.version}</version> |
|
93 |
<type>test-jar</type> |
|
94 |
<scope>test</scope> |
|
95 |
</dependency> |
|
96 |
<!-- Needed by Oozie tests } --> |
|
97 |
<!-- Needed to run Pig jobs { --> |
|
98 |
<dependency> |
|
99 |
<groupId>org.apache.pig</groupId> |
|
100 |
<artifactId>pig</artifactId> |
|
101 |
<version>${iis.pig.version}</version> |
|
102 |
<!-- this lib cannot be set to provided --> |
|
103 |
</dependency> |
|
104 |
|
|
105 |
<!-- replacing hacked pig-avrostorage with original pig --> |
|
106 |
<dependency> |
|
107 |
<groupId>eu.dnetlib</groupId> |
|
108 |
<artifactId>icm-iis-3rdparty-pig-avrostorage</artifactId> |
|
109 |
<version>[1.0.0,2.0.0)</version> |
|
110 |
<type>jar</type> |
|
111 |
</dependency> |
|
112 |
<!-- FIXME change to version bound with CDH5 when upgrading cluster --> |
|
113 |
<!-- |
|
114 |
<dependency> |
|
115 |
<groupId>org.apache.pig</groupId> |
|
116 |
<artifactId>piggybank</artifactId> |
|
117 |
<version>${iis.pig.version}</version> |
|
118 |
</dependency> |
|
119 |
--> |
|
120 |
<dependency> |
|
121 |
<groupId>com.linkedin.datafu</groupId> |
|
122 |
<artifactId>datafu</artifactId> |
|
123 |
<version>1.2.0</version> |
|
124 |
</dependency> |
|
125 |
<!-- Needed to run Pig jobs } --> |
|
126 |
</dependencies> |
|
127 |
<repositories> |
|
128 |
<!-- This repository contains our patched |
|
129 |
version of "avro" and "avro-mapred" modules (see the dependencies section) |
|
130 |
This entry might be removed when the patch to these modules becomes |
|
131 |
a part of the official Avro release.--> |
|
132 |
<repository> |
|
133 |
<id>dnet-deps</id> |
|
134 |
<name>dnet dependencies</name> |
|
135 |
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet-deps</url> |
|
136 |
<releases> |
|
137 |
<enabled>true</enabled> |
|
138 |
</releases> |
|
139 |
<snapshots> |
|
140 |
<enabled>false</enabled> |
|
141 |
</snapshots> |
|
142 |
<layout>default</layout> |
|
143 |
</repository> |
|
144 |
</repositories> |
|
145 |
</project> |
|
0 | 146 |
Also available in: Unified diff
#1498 introducing direct citationmatching module based on pmc citation ingestion