Project

General

Profile

« Previous | Next » 

Revision 39040

Added by Marek Horst over 8 years ago

#1498 introducing direct citationmatching module based on pmc citation ingestion

View differences:

modules/icm-iis-citationmatching-direct/trunk/deploy.info
1
[
2
{
3
  "type_source": "SVN", 
4
  "goal": "package -U -T 4C source:jar", 
5
  "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-citationmatching-direct/trunk/", 
6
  "deploy_repository": "dnet4-snapshots", 
7
  "version": "4",
8
  "mail": "m.horst@icm.edu.pl",
9
  "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", 
10
  "name": "icm-iis-citationmatching-direct"
11
},
12
{
13
  "type_source": "SVN",
14
  "goal": "clean verify -U -e -X",
15
  "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-citationmatching-direct/trunk/",
16
  "nightly" : "true",
17
  "cron" : "H H * * *",
18
  "version": "4",
19
  "mail": "m.horst@icm.edu.pl",
20
  "name": "icm-iis-citationmatching-direct-embedded-integration-test"
21
}
22
]
modules/icm-iis-citationmatching-direct/trunk/src/test/java/eu/dnetlib/iis/citationmatching/direct/WorkflowTest.java
1
package eu.dnetlib.iis.citationmatching.direct;
2

  
3
import eu.dnetlib.iis.IntegrationTest;
4
import eu.dnetlib.iis.core.AbstractWorkflowTestCase;
5
import eu.dnetlib.iis.core.WorkflowConfiguration;
6
import org.junit.Test;
7
import org.junit.experimental.categories.Category;
8

  
9
/**
10
 * 
11
 * @author mhorst
12
 *
13
 */
14
@Category(IntegrationTest.class)
15
public class WorkflowTest extends AbstractWorkflowTestCase {
16

  
17
    @Test
18
	public void testWorkflow() throws Exception {
19
        WorkflowConfiguration wf = new WorkflowConfiguration();
20
        wf.setTimeoutInSeconds(720);
21
        runWorkflow("eu/dnetlib/iis/citationmatching/direct/sampledataproducer/oozie_app", wf);
22
    }
23

  
24
}
0 25

  
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
transformer classpath eu/dnetlib/iis/citationmatching/direct/oozie_app
0 3

  
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-citationmatching_direct_sampledataproducer">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
            <!-- Specification of the output ports -->
23
            <arg>-C{metadata,
24
            eu.dnetlib.iis.citationmatching.direct.schemas.DocumentMetadata,
25
            eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/metadata.json}</arg>
26
            <!-- All input and output ports have to be bound to paths in HDFS -->
27
            <arg>-Ometadata=${workingDir}/producer/metadata</arg>
28
        </java>
29
        <ok to="ingest_citations"/>
30
        <error to="fail"/>
31
    </action>
32
    
33
    <action name="ingest_citations">
34
        <sub-workflow>
35
            <app-path>${wf:appPath()}/transformer</app-path>
36
            <configuration>
37
                <property>
38
                    <name>jobTracker</name>
39
                    <value>${jobTracker}</value>
40
                </property>
41
                <property>
42
                    <name>nameNode</name>
43
                    <value>${nameNode}</value>
44
                </property>
45
                <property>
46
                    <name>queueName</name>
47
                    <value>${queueName}</value>
48
                </property>
49
                <!-- Input ports. -->
50
                <property>
51
                    <name>input</name>
52
                    <value>${workingDir}/producer/metadata</value>
53
                </property>
54
                <!-- Output port bound to given path -->
55
                <property>
56
                    <name>output</name>
57
                    <value>${workingDir}/transformer/citation</value>
58
                </property>
59
            </configuration>
60
        </sub-workflow>
61
        <ok to="consumer"/>
62
        <error to="fail"/>
63
    </action>
64
    <action name="consumer">
65
		<java>
66
			<job-tracker>${jobTracker}</job-tracker>
67
			<name-node>${nameNode}</name-node>
68
			<configuration>
69
				<property>
70
					<name>mapred.job.queue.name</name>
71
					<value>${queueName}</value>
72
				</property>
73
			</configuration>
74
			<!-- This is simple wrapper for the Java code -->
75
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
76
			<!-- The business Java code that gets to be executed -->
77
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
78
			<!-- Specification of the input ports -->
79
			<arg>-C{citation,
80
                eu.dnetlib.iis.citationmatching.direct.schemas.Citation,
81
                eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/citation.json}</arg>
82
    		<!-- All input and output ports have to be bound to paths in HDFS -->
83
			<arg>-Icitation=${workingDir}/transformer/citation</arg>
84
		</java>
85
		<ok to="end" />
86
		<error to="fail" />
87
	</action>
88
    <kill name="fail">
89
		<message>Unfortunately, the workflow failed -- error message:
90
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
91
    </kill>
92
    <end name="end"/>
93
</workflow-app>
0 94

  
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/citation.json
1
{
2
  "sourceDocumentId": "50|od_______908::c84fe76a7bc6232a6732dab8c72ef9ea",
3
  "position": 50,
4
  "destinationDocumentId": "50|od_______908::14ddacb589be0a68489f89818647f27a"
5
}
modules/icm-iis-citationmatching-direct/trunk/src/test/resources/eu/dnetlib/iis/citationmatching/direct/sampledataproducer/data/metadata.json
1
{
2
  "id": "50|od_______908::c84fe76a7bc6232a6732dab8c72ef9ea",
3
  "externalIdentifiers": {
4
          "pmid": "16528104"
5
  },
6
  "publicationTypeName": null,
7
  "references": [
8
    {
9
      "position": 1,
10
      "externalIds": {
11
          "pmid": "1597408"
12
      }
13
    },
14
    {
15
      "position": 50,
16
      "externalIds": {
17
          "pmid": "5490870"
18
      }
19
    }
20
  ]
21
}
22
{
23
  "id": "50|od_______908::14ddacb589be0a68489f89818647f27a",
24
  "externalIdentifiers": {
25
          "pmid": "5490870"
26
  },
27
  "publicationTypeName": null,
28
  "references": []
29
}
modules/icm-iis-citationmatching-direct/trunk/src/main/java/eu/dnetlib/iis/citationmatching/direct/udfs/DeduplicateIdsWithDocumentType.java
1
package eu.dnetlib.iis.citationmatching.direct.udfs;
2

  
3
import java.io.IOException;
4
import java.util.Iterator;
5

  
6
import org.apache.pig.EvalFunc;
7
import org.apache.pig.data.BagFactory;
8
import org.apache.pig.data.DataBag;
9
import org.apache.pig.data.Tuple;
10
import org.apache.pig.impl.logicalLayer.schema.Schema;
11

  
12
import com.google.common.collect.Lists;
13

  
14
/**
15
 * Deduplicates bag of tuples where tuple[0] is pmid, tuple[1] is document type.
16
 * 'research-article' type has precedence over any other type when more than one entry provided.
17
 * Identifiers are sorted lexicographically.
18
 *
19
 * @author mhorst
20
 */
21
public class DeduplicateIdsWithDocumentType extends EvalFunc<DataBag> {
22

  
23
	public static final String DOCUMENT_TYPE_RESEARCH_ARTICLE = "research-article";
24
	
25
    @Override
26
    public DataBag exec(Tuple tuple) throws IOException {
27
        if (tuple == null || tuple.size() == 0) {
28
            return null;
29
        }
30
        DataBag db = (DataBag) tuple.get(0);
31
        if (db==null) {
32
        	return null;
33
        }
34
    	int count = 0;
35
		Tuple firstTuple = null;
36
		Iterator<Tuple> it = db.iterator();
37
        while (it.hasNext()) {
38
        	Tuple currentTuple = it.next();
39
        	if (count==0) {
40
        		firstTuple = currentTuple;
41
        	}
42
        	if (DOCUMENT_TYPE_RESEARCH_ARTICLE.equals(currentTuple.get(1))) {
43
        		return BagFactory.getInstance().newDefaultBag(
44
        				Lists.<Tuple>newArrayList(currentTuple));
45
        	}
46
        	count++;
47
        }
48
		if (count==1) {
49
			return BagFactory.getInstance().newDefaultBag(
50
    				Lists.<Tuple>newArrayList(firstTuple));
51
		}
52
//		fallback
53
		return null;
54
    }
55

  
56
    @Override
57
    public Schema outputSchema(Schema input) {
58
    	return input;
59
    }
60
    
61
}
0 62

  
modules/icm-iis-citationmatching-direct/trunk/src/main/resources/eu/dnetlib/iis/citationmatching/direct/oozie_app/lib/scripts/transformer/transformer.pig
1
define avro_load_metadata
2
org.apache.pig.piggybank.storage.avro.AvroStorage(
3
'schema', '$schema_input');
4

  
5
define avro_store_citation
6
org.apache.pig.piggybank.storage.avro.AvroStorage(
7
'index', '0',
8
'schema', '$schema_output');
9

  
10
define NullToEmptyBag datafu.pig.bags.NullToEmptyBag();
11
define DeduplicateIdsWithDocumentType eu.dnetlib.iis.citationmatching.direct.udfs.DeduplicateIdsWithDocumentType;
12

  
13
documentMetadata = load '$input' using avro_load_metadata;
14

  
15
-- wygenerowanie mapowaƄ pmid_to_oaid i doi_to_oaid na podstawie inputu
16
doi_to_oaid_with_nulls = foreach documentMetadata generate externalIdentifiers#'doi' as originalId:chararray, id as newId:chararray;
17
doi_to_oaid_nondedup = filter doi_to_oaid_with_nulls by originalId is not null;
18
doi_to_oaid_nondedup_groupped = group doi_to_oaid_nondedup by originalId;
19
doi_to_oaid = foreach doi_to_oaid_nondedup_groupped {
20
    first_record = LIMIT doi_to_oaid_nondedup 1;
21
--	FIXME it works but what if empty bag was returned?!
22
    generate group as originalId, flatten(first_record.newId) as newId;
23
}
24

  
25
pmid_to_oaid_with_nulls = foreach documentMetadata generate externalIdentifiers#'pmid' as originalId:chararray, id as newId:chararray, publicationTypeName as publicationTypeName;
26

  
27
-- DEBUG: skipping deduplication (disabled)
28
-- pmid_to_oaid = filter pmid_to_oaid_with_nulls by originalId is not null;
29
pmid_to_oaid_nondedup = filter pmid_to_oaid_with_nulls by originalId is not null;
30
pmid_to_oaid_nondedup_groupped = group pmid_to_oaid_nondedup by originalId;
31
pmid_to_oaid = foreach pmid_to_oaid_nondedup_groupped {
32
     idsWithPublicationType = foreach pmid_to_oaid_nondedup generate originalId, newId, publicationTypeName;
33
     dedupIdsWithPublicationType = DeduplicateIdsWithDocumentType(idsWithPublicationType);
34
--	 FIXME it works, but what if null was returned?!    
35
     generate group as originalId, flatten(dedupIdsWithPublicationType.newId) as newId;
36
 }
37

  
38
docWithRefsFlat = foreach documentMetadata generate id, flatten(NullToEmptyBag(references));
39
docWithBasicMetadataFlat = foreach docWithRefsFlat generate id, flatten(references::externalIds), flatten(references::position);
40

  
41
workingCitation = foreach docWithBasicMetadataFlat generate
42
	id as sourceId:chararray,
43
	references::position as position:int,
44
	null as destinationDocumentId:chararray,
45
	references::externalIds#'pmid' as pmid:chararray,
46
	references::externalIds#'doi' as doi:chararray;
47

  
48
-- joining with pmid_to_oaid mappings
49
joinedWithPmid = join workingCitation by pmid left, pmid_to_oaid by originalId;
50
workingCitationWithDestIdFromPmid = foreach joinedWithPmid generate
51
	workingCitation::sourceId as sourceId,
52
	workingCitation::doi as doi,
53
	workingCitation::position as position,
54
	pmid_to_oaid::newId as destinationDocumentId;
55

  
56
-- joining with doi_to_oaid mappings
57
joinedWithDoi = join workingCitationWithDestIdFromPmid by doi left, doi_to_oaid by originalId;
58

  
59
workingCitationWithDestIdFromPmidAndDoi = foreach joinedWithDoi generate
60
	workingCitationWithDestIdFromPmid::sourceId as sourceId,
61
	workingCitationWithDestIdFromPmid::position as position,
62
-- 	overriding pmid matched citation with doi matched citation if found 
63
	(doi_to_oaid::newId is not null ? doi_to_oaid::newId : workingCitationWithDestIdFromPmid::destinationDocumentId) as destinationDocumentId;
64

  
65
output_citation = foreach workingCitationWithDestIdFromPmidAndDoi generate
66
	sourceId as sourceDocumentId, position, destinationDocumentId;
67

  
68
-- accepting only matched citations
69
output_citation_matched = filter output_citation by destinationDocumentId is not null;
70

  
71
store output_citation_matched into '$output' using avro_store_citation;
modules/icm-iis-citationmatching-direct/trunk/src/main/resources/eu/dnetlib/iis/citationmatching/direct/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="transformers_ingest_citations">
2
	
3
	<parameters>
4
		<property>
5
            <name>input</name>
6
            <description>input containing document metadata records</description>
7
        </property>
8
        <property>
9
            <name>output</name>
10
            <description>extracted citations</description>
11
        </property>
12
	</parameters>
13
	 
14
    <start to="generate-schema"/>
15
    
16
    <action name="generate-schema">
17
	    <java>
18
    		<job-tracker>${jobTracker}</job-tracker>
19
        	<name-node>${nameNode}</name-node>
20
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
21
	        <arg>eu.dnetlib.iis.citationmatching.direct.schemas.DocumentMetadata</arg>
22
	        <arg>eu.dnetlib.iis.citationmatching.direct.schemas.Citation</arg>
23
	        <capture-output />
24
	    </java>
25
	    <ok to="transformer" />
26
	    <error to="fail" />
27
	</action>
28
    
29
    <action name="transformer">
30
        <pig>
31
            <job-tracker>${jobTracker}</job-tracker>
32
            <name-node>${nameNode}</name-node>
33
			<!-- The data generated by this node is deleted in this section -->
34
			<prepare>
35
				<delete path="${nameNode}${output}" />
36
			</prepare>
37
            <configuration>
38
                <property>
39
                    <name>mapred.job.queue.name</name>
40
                    <value>${queueName}</value>
41
                </property>
42
            </configuration>
43
            <!-- Path to PIG script the workflow executes. -->
44
            <script>lib/scripts/transformer/transformer.pig</script>
45
            
46
            <param>input=${input}</param>
47
            <param>schema_input=${wf:actionData('generate-schema')['eu.dnetlib.iis.citationmatching.direct.schemas.DocumentMetadata']}</param>
48
            
49
            <param>output=${output}</param>
50
            <param>schema_output=${wf:actionData('generate-schema')['eu.dnetlib.iis.citationmatching.direct.schemas.Citation']}</param>
51
        </pig>
52
        <ok to="end"/>
53
        <error to="fail"/>
54
    </action>
55
    <kill name="fail">
56
		<message>Unfortunately, the workflow failed -- error message:
57
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
58
    </kill>
59
    <end name="end"/>
60
</workflow-app>
0 61

  
modules/icm-iis-citationmatching-direct/trunk/src/main/resources/eu/dnetlib/iis/citationmatching/direct/job.properties
1
input=/user/marek.horst/transformers/metadatamerger/working_dir/out
2
output=${workingDir}/citation
0 3

  
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/test-custom-log4j.properties
1
#
2
# Licensed to the Apache Software Foundation (ASF) under one
3
# or more contributor license agreements.  See the NOTICE file
4
# distributed with this work for additional information
5
# regarding copyright ownership.  The ASF licenses this file
6
# to you under the Apache License, Version 2.0 (the
7
# "License"); you may not use this file except in compliance
8
# with the License.  You may obtain a copy of the License at
9
# 
10
#      http://www.apache.org/licenses/LICENSE-2.0
11
# 
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
#
18

  
19
#    http://www.apache.org/licenses/LICENSE-2.0
20
#
21
# Unless required by applicable law or agreed to in writing, software
22
# distributed under the License is distributed on an "AS IS" BASIS,
23
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24
# See the License for the specific language governing permissions and
25
# limitations under the License. See accompanying LICENSE file.
26

  
27
#
28

  
29
log4j.appender.oozie=org.apache.log4j.ConsoleAppender
30
log4j.appender.oozie.Target=System.out
31
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
32
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
33

  
34
log4j.appender.null=org.apache.log4j.varia.NullAppender
35

  
36
log4j.logger.org.apache=INFO, oozie
37
log4j.logger.org.mortbay=WARN, oozie
38
log4j.logger.org.hsqldb=WARN, oozie
39

  
40
log4j.logger.opslog=NONE, null
41
log4j.logger.applog=NONE, null
42
log4j.logger.instrument=NONE, null
43

  
44
log4j.logger.a=NONE, null
45

  
0 46

  
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/hsqldb-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
<property>
19
     <name>oozie.action.max.output.data</name>
20
     <value>8192</value>
21
</property>
22
    <property>
23
        <name>oozie.service.JPAService.jdbc.driver</name>
24
        <value>org.hsqldb.jdbcDriver</value>
25
    </property>
26
    <property>
27
        <name>oozie.service.JPAService.jdbc.url</name>
28
        <value>jdbc:hsqldb:mem:oozie-db;create=true</value>
29
    </property>
30
</configuration>
0 31

  
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/oracle-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
<property>
19
     <name>oozie.action.max.output.data</name>
20
     <value>8192</value>
21
</property>
22
    <property>
23
        <name>oozie.service.JPAService.jdbc.driver</name>
24
        <value>oracle.jdbc.driver.OracleDriver</value>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.port</name>
28
        <value>1521</value>
29
    </property>
30
    <property>
31
        <name>oozie.test.db.name</name>
32
        <value>xe</value>
33
    </property>
34
    <property>
35
        <name>oozie.service.JPAService.jdbc.url</name>
36
        <value>jdbc:oracle:thin:@//${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value>
37
    </property>
38
    <property>
39
        <name>oozie.service.JPAService.jdbc.username</name>
40
        <value>oozie</value>
41
    </property>
42
    <property>
43
        <name>oozie.service.JPAService.jdbc.password</name>
44
        <value>oozie</value>
45
    </property>
46
</configuration>
0 47

  
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/mysql-oozie-site.xml
1
<?xml version="1.0"?>
2
<!--
3
  Licensed to the Apache Software Foundation (ASF) under one
4
  or more contributor license agreements.  See the NOTICE file
5
  distributed with this work for additional information
6
  regarding copyright ownership.  The ASF licenses this file
7
  to you under the Apache License, Version 2.0 (the
8
  "License"); you may not use this file except in compliance
9
  with the License.  You may obtain a copy of the License at
10

  
11
       http://www.apache.org/licenses/LICENSE-2.0
12

  
13
  Unless required by applicable law or agreed to in writing, software
14
  distributed under the License is distributed on an "AS IS" BASIS,
15
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
  See the License for the specific language governing permissions and
17
  limitations under the License.
18
-->
19
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
20
<configuration>
21
<property>
22
     <name>oozie.action.max.output.data</name>
23
     <value>8192</value>
24
</property>
25
    <property>
26
      <name>oozie.service.JPAService.jdbc.driver</name>
27
        <value>com.mysql.jdbc.Driver</value>
28
        <description>JDBC driver class.</description>
29
    </property>
30
    <property>
31
        <name>oozie.test.db.port</name>
32
        <value>3306</value>
33
    </property>
34
    <property>
35
      <name>oozie.service.JPAService.jdbc.url</name>
36
        <value>jdbc:mysql://${oozie.test.db.host}:${oozie.test.db.port}/oozie</value>
37
        <description>JDBC URL.</description>
38
    </property>
39
    <property>
40
        <name>oozie.service.JPAService.jdbc.username</name>
41
        <value>oozie</value>
42
        <description>DB user name.</description>
43
    </property>
44
    <property>
45
        <name>oozie.service.JPAService.jdbc.password</name>
46
        <value>oozie</value>
47
        <description>
48
            DB user password. IMPORTANT: if password is emtpy leave a 1 space string, the service trims the
49
            value, if empty Configuration assumes it is NULL.
50
        </description>
51
    </property>
52
</configuration>
0 53

  
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/postgres-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
<property>
19
     <name>oozie.action.max.output.data</name>
20
     <value>8192</value>
21
</property>
22
    <property>
23
        <name>oozie.service.JPAService.jdbc.driver</name>
24
        <value>org.postgresql.Driver</value>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.port</name>
28
        <value>5432</value>
29
    </property>
30
    <property>
31
        <name>oozie.test.db.name</name>
32
        <value>oozie</value>
33
    </property>
34
    <property>
35
        <name>oozie.service.JPAService.jdbc.url</name>
36
        <value>jdbc:postgresql://${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value>
37
    </property>
38
    <property>
39
        <name>oozie.service.JPAService.jdbc.username</name>
40
        <value>oozie</value>
41
    </property>
42
    <property>
43
        <name>oozie.service.JPAService.jdbc.password</name>
44
        <value>oozie</value>
45
    </property>
46
</configuration>
0 47

  
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/test-oozie-log4j.properties
1
#
2
# Licensed to the Apache Software Foundation (ASF) under one
3
# or more contributor license agreements.  See the NOTICE file
4
# distributed with this work for additional information
5
# regarding copyright ownership.  The ASF licenses this file
6
# to you under the Apache License, Version 2.0 (the
7
# "License"); you may not use this file except in compliance
8
# with the License.  You may obtain a copy of the License at
9
# 
10
#      http://www.apache.org/licenses/LICENSE-2.0
11
# 
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
#
18

  
19
#    http://www.apache.org/licenses/LICENSE-2.0
20
#
21
# Unless required by applicable law or agreed to in writing, software
22
# distributed under the License is distributed on an "AS IS" BASIS,
23
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24
# See the License for the specific language governing permissions and
25
# limitations under the License. See accompanying LICENSE file.
26

  
27
#
28

  
29
log4j.appender.oozie=org.apache.log4j.ConsoleAppender
30
log4j.appender.oozie.Target=System.out
31
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
32
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
33

  
34
log4j.appender.null=org.apache.log4j.varia.NullAppender
35

  
36
log4j.logger.org.apache=INFO, oozie
37
log4j.logger.org.mortbay=WARN, oozie
38
log4j.logger.org.hsqldb=WARN, oozie
39

  
40
log4j.logger.opslog=NONE, null
41
log4j.logger.applog=NONE, null
42
log4j.logger.instrument=NONE, null
43

  
44
log4j.logger.a=ALL, null
45

  
0 46

  
modules/icm-iis-citationmatching-direct/trunk/core/src/test/resources/hadoop-config.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Licensed to the Apache Software Foundation (ASF) under one
5
  or more contributor license agreements.  See the NOTICE file
6
  distributed with this work for additional information
7
  regarding copyright ownership.  The ASF licenses this file
8
  to you under the Apache License, Version 2.0 (the
9
  "License"); you may not use this file except in compliance
10
  with the License.  You may obtain a copy of the License at
11

  
12
       http://www.apache.org/licenses/LICENSE-2.0
13

  
14
  Unless required by applicable law or agreed to in writing, software
15
  distributed under the License is distributed on an "AS IS" BASIS,
16
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
  See the License for the specific language governing permissions and
18
  limitations under the License.
19
-->
20
<configuration>
21

  
22
    <property>
23
        <name>mapreduce.jobtracker.kerberos.principal</name>
24
        <value>mapred/_HOST@LOCALREALM</value>
25
    </property>
26

  
27
    <property>
28
        <name>dfs.namenode.kerberos.principal</name>
29
        <value>hdfs/_HOST@LOCALREALM</value>
30
    </property>
31

  
32
    <property>
33
        <name>mapreduce.framework.name</name>
34
        <value>yarn</value>
35
    </property>
36

  
37
</configuration>
0 38

  
modules/icm-iis-citationmatching-direct/trunk/core/README.md
1
This directory and its subdirectories and files are here as a hack to make the Oozie unit tests work. 
2

  
3
Details
4
-------
5
Oozie tests assume that they're placed inside directory tree of Oozie source code -- see the source code of class `XTestCase` which is an ancestor of `MiniOozieTestCase` class which, in turn, should be inherited by your test case class. 
6

  
7
How to get the source code of the `XTestCase` class:
8

  
9
- download source code of the Ubuntu's `oozie` package prepared by Cloudera (`apt-get source oozie`). It is version 3.1.3+155 of this package. 
10
- open file `oozie-3.1.3+155/src/core/src/test/java/org/apache/oozie/test/XTestCase.java` and look at lines 93-105.
modules/icm-iis-citationmatching-direct/trunk/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>icm-iis-parent-container</artifactId>
6
		<version>1.0.1-SNAPSHOT</version>
7
	</parent>
8
	<modelVersion>4.0.0</modelVersion>
9
	<artifactId>icm-iis-citationmatching-direct</artifactId>
10
	<packaging>jar</packaging>
11
	<version>1.0.1-SNAPSHOT</version>
12

  
13
	<scm>
14
	  <developerConnection>
15
	    scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-citationmatching-direct/trunk
16
	  </developerConnection>
17
	</scm>
18
	
19
	<properties>
20
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
21
	</properties>
22
	<dependencies>
23
		<dependency>
24
			<groupId>eu.dnetlib</groupId>
25
			<artifactId>icm-iis-core</artifactId>
26
			<version>[1.0.0,2.0.0)</version>
27
		</dependency>
28
		<dependency>
29
			<groupId>eu.dnetlib</groupId>
30
			<artifactId>icm-iis-core</artifactId>
31
			<version>[1.0.0,2.0.0)</version>
32
			<type>test-jar</type>
33
			<scope>test</scope>
34
		</dependency>
35
        <dependency>
36
            <groupId>eu.dnetlib</groupId>
37
            <artifactId>icm-iis-schemas</artifactId>
38
            <version>[1.0.0,2.0.0)</version>
39
        </dependency>
40
        <!-- required after introducing 'provided' scope for hadoop libs -->
41
        <dependency>
42
			<groupId>org.apache.hadoop</groupId>
43
			<artifactId>hadoop-common</artifactId>
44
			<version>${iis.hadoop.common.version}</version>
45
			<scope>provided</scope>
46
		</dependency>
47
		<!-- Needed by Oozie tests { -->
48
		<!-- required after introducing 'provided' scope for hadoop dependencies -->
49
		<dependency>
50
			<groupId>org.apache.oozie</groupId>
51
			<artifactId>oozie-core</artifactId>
52
			<version>${iis.oozie.version}</version>
53
			<scope>test</scope>
54
		</dependency>
55
		<!-- PigMain was moved to oozie-sharelib-pig since cdh4.3.1 -->
56
		<dependency>
57
			<groupId>org.apache.oozie</groupId>
58
			<artifactId>oozie-sharelib-pig</artifactId>
59
			<version>${iis.oozie.version}</version>
60
			<scope>test</scope>
61
		</dependency>
62
		<dependency>
63
			<groupId>org.apache.hadoop</groupId>
64
			<artifactId>hadoop-hdfs</artifactId>
65
			<version>${iis.hadoop.hdfs.version}</version>
66
			<scope>test</scope>
67
		</dependency>
68
		<!-- end of required after introducing 'provided' scope for hadoop dependencies -->
69
		<dependency>
70
			<groupId>org.apache.oozie</groupId>
71
			<artifactId>oozie-core</artifactId>
72
			<version>${iis.oozie.version}</version>
73
			<type>test-jar</type>
74
			<scope>test</scope>
75
		</dependency>
76
		<dependency>
77
			<groupId>org.apache.hadoop</groupId>
78
			<artifactId>hadoop-hdfs</artifactId>
79
			<version>${iis.hadoop.hdfs.version}</version>
80
			<type>test-jar</type>
81
			<scope>test</scope>
82
		</dependency>
83
		<dependency>
84
			<groupId>org.apache.hadoop</groupId>
85
			<artifactId>hadoop-test</artifactId>
86
			<version>${iis.hadoop.test.version}</version>
87
			<scope>test</scope>
88
		</dependency>
89
		<dependency>
90
			<groupId>org.apache.hadoop</groupId>
91
			<artifactId>hadoop-common</artifactId>
92
			<version>${iis.hadoop.common.version}</version>
93
			<type>test-jar</type>
94
			<scope>test</scope>
95
		</dependency>
96
		<!-- Needed by Oozie tests } -->
97
		<!-- Needed to run Pig jobs { -->
98
		<dependency>
99
			<groupId>org.apache.pig</groupId>
100
			<artifactId>pig</artifactId>
101
			<version>${iis.pig.version}</version>
102
			<!-- this lib cannot be set to provided -->
103
		</dependency>
104

  
105
		<!-- replacing hacked pig-avrostorage with original pig -->
106
	    <dependency>
107
			<groupId>eu.dnetlib</groupId>
108
			<artifactId>icm-iis-3rdparty-pig-avrostorage</artifactId>
109
			<version>[1.0.0,2.0.0)</version>
110
			<type>jar</type>
111
		</dependency>
112
		<!-- FIXME change to version bound with CDH5 when upgrading cluster -->
113
		<!--
114
		<dependency>
115
			<groupId>org.apache.pig</groupId>
116
			<artifactId>piggybank</artifactId>
117
			<version>${iis.pig.version}</version>
118
		</dependency>
119
		-->
120
		<dependency>
121
		  <groupId>com.linkedin.datafu</groupId>
122
		  <artifactId>datafu</artifactId>
123
		  <version>1.2.0</version>
124
		</dependency>
125
		<!-- Needed to run Pig jobs } -->
126
	</dependencies>
127
	<repositories>
128
	    <!-- This repository contains our patched 
129
	    version of "avro" and "avro-mapred" modules (see the dependencies section)
130
	    This entry might be removed when the patch to these modules becomes 
131
	    a part of the official Avro release.-->
132
	    <repository>
133
			<id>dnet-deps</id>
134
			<name>dnet dependencies</name>
135
			<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet-deps</url>
136
			<releases>
137
				<enabled>true</enabled>
138
			</releases>
139
			<snapshots>
140
				<enabled>false</enabled> 
141
			</snapshots>
142
			<layout>default</layout>
143
		</repository>
144
	</repositories>
145
</project>
0 146

  

Also available in: Unified diff