Revision 30855
Added by Mateusz Kobos almost 10 years ago
modules/cern-iis-metrics-extended/trunk/src/main/scripts/generate_example_workflow_app-generic.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
## Generate example workflow app for given user and host name |
|
4 |
|
|
5 |
HADOOP_USER=$1 |
|
6 |
HADOOP_HOST=$2 |
|
7 |
WORKFLOW_NAME=citation_metrics_advanced_workflow_app |
|
8 |
WORKFLOW_PATH=src/main/resources/eu/dnetlib/iis/citation-metrics-advanced |
|
9 |
|
|
10 |
mvn clean |
|
11 |
|
|
12 |
echo "generating package \"$WORKFLOW_NAME\" in directory \"target/$WORKFLOW_NAME\"" |
|
13 |
mvn package -Poozie -Doozie.package.file.name=$WORKFLOW_NAME -Dworkflow.source.dir=$WORKFLOW_PATH -DsandboxDir=/user/$HADOOP_USER/iis_sandbox/$WORKFLOW_NAME -Duser.name=$HADOOP_USER -Dhost.name=$HADOOP_HOST |
|
14 | 0 |
modules/cern-iis-metrics-extended/trunk/src/main/scripts/generate_example_workflow_app-cluster.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
## Generate a sample workflow app to be run on OpenAIRE+ hadoop cluster by user named "mateusz.kobos" |
|
4 |
|
|
5 |
SCRIPTS_DIR=$(dirname $0) |
|
6 |
HADOOP_USER=lars.holmnielsen |
|
7 |
|
|
8 |
$SCRIPTS_DIR/generate_example_workflow_app-generic.sh $HADOOP_USER hadoop-master.openaire.eu |
|
9 | 0 |
modules/cern-iis-metrics-extended/trunk/src/main/scripts/generate_example_workflow_app-localhost.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
## Generate a sample workflow app to be run on local Hadoop instance by the current user |
|
4 |
|
|
5 |
SCRIPTS_DIR=$(dirname $0) |
|
6 |
|
|
7 |
$SCRIPTS_DIR/generate_example_workflow_app-generic.sh $USER localhost |
|
8 | 0 |
modules/cern-iis-metrics-extended/trunk/src/main/scripts/upload_run.sh | ||
---|---|---|
1 |
ssh lars.holmnielsen@hadoop.openaire.eu "rm -Rf /home/lars.holmnielsen/tmp/*.*" |
|
2 |
scp target citation_metrics_advanced_workflow_app.tar.gz lars.holmnielsen@hadoop.openaire.eu:/home/lars.holmnielsen/tmp/ |
|
3 |
ssh lars.holmnielsen@hadoop.openaire.eu "cd /home/lars.holmnielsen/tmp/; tar -xzvf citation_metrics_advanced_workflow_app.tar.gz; ./upload_workflow.sh; ./run_workflow.sh" |
modules/cern-iis-metrics-extended/trunk/src/main/resources/eu/dnetlib/iis/citation-metrics-advanced/oozie_app/workflow.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<!-- Note that documentation placed in comments in this file uses the |
|
3 |
"markdown" syntax (along with division into sections). --> |
|
4 |
<workflow-app xmlns="uri:oozie:workflow:0.3" |
|
5 |
name="invenio-uploader"> |
|
6 |
<start to="invenio_uploader" /> |
|
7 |
<action name="invenio_uploader"> |
|
8 |
<java> |
|
9 |
<job-tracker>${jobTracker}</job-tracker> |
|
10 |
<name-node>${nameNode}</name-node> |
|
11 |
<!-- The data generated by this node is deleted in this section --> |
|
12 |
<prepare> |
|
13 |
<delete path="${nameNode}${workingDir}/invenio_uploader" /> |
|
14 |
<mkdir path="${nameNode}${workingDir}/invenio_uploader" /> |
|
15 |
</prepare> |
|
16 |
<configuration> |
|
17 |
<property> |
|
18 |
<name>mapred.job.queue.name</name> |
|
19 |
<value>${queueName}</value> |
|
20 |
</property> |
|
21 |
</configuration> |
|
22 |
<!-- This is simple wrapper for the Java code --> |
|
23 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
|
24 |
<!-- The business Java code that gets to be executed --> |
|
25 |
<arg>eu.dnetlib.iis.citationmetrics.InvenioUploader</arg> |
|
26 |
<!-- All input and output ports have to be bound to paths in HDFS, working |
|
27 |
directory has to be specified as well --> |
|
28 |
<arg>-SworkingDir=${workingDir}/invenio_uploader/working_dir</arg> |
|
29 |
<!--<arg>-Operson=${workingDir}/invenio_uploader/person</arg> |
|
30 |
<arg>-Odocument=${workingDir}/invenio_uploader/document</arg> |
|
31 |
--> |
|
32 |
</java> |
|
33 |
<ok to="end" /> |
|
34 |
<error to="fail" /> |
|
35 |
</action> |
|
36 |
<kill name="fail"> |
|
37 |
<message>Unfortunately, the process failed -- error message: |
|
38 |
[${wf:errorMessage(wf:lastErrorNode())}] |
|
39 |
</message> |
|
40 |
</kill> |
|
41 |
<end name="end" /> |
|
42 |
</workflow-app> |
modules/cern-iis-metrics-extended/trunk/src/main/java/eu/dnetlib/iis/citationmetrics/InvenioUploader.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.citationmetrics; |
|
2 |
|
|
3 |
import java.util.HashMap; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import java.io.IOException; |
|
7 |
import org.apache.hadoop.fs.FileSystem; |
|
8 |
import org.apache.hadoop.fs.Path; |
|
9 |
|
|
10 |
|
|
11 |
//import eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person; |
|
12 |
//import eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document; |
|
13 |
//import eu.dnetlib.iis.core.examples.StandardDataStoreExamples; |
|
14 |
import eu.dnetlib.iis.core.java.HadoopContext; |
|
15 |
import eu.dnetlib.iis.core.java.PortBindings; |
|
16 |
import eu.dnetlib.iis.core.java.Process; |
|
17 |
import eu.dnetlib.iis.core.java.io.DataStore; |
|
18 |
import eu.dnetlib.iis.core.java.io.FileSystemPath; |
|
19 |
import eu.dnetlib.iis.core.java.porttype.AvroPortType; |
|
20 |
import eu.dnetlib.iis.core.java.porttype.PortType; |
|
21 |
|
|
22 |
/** Invenio Uploader |
|
23 |
* @author Lars Holm Nielsen |
|
24 |
*/ |
|
25 |
public class InvenioUploader implements Process { |
|
26 |
|
|
27 |
// private final static String personPort = "person"; |
|
28 |
// private final static String documentPort = "document"; |
|
29 |
|
|
30 |
public Map<String, PortType> getInputPorts() { |
|
31 |
return new HashMap<String, PortType>(); |
|
32 |
} |
|
33 |
|
|
34 |
@Override |
|
35 |
public Map<String, PortType> getOutputPorts() { |
|
36 |
return createOutputPorts(); |
|
37 |
} |
|
38 |
|
|
39 |
private static HashMap<String, PortType> createOutputPorts(){ |
|
40 |
HashMap<String, PortType> outputPorts = |
|
41 |
new HashMap<String, PortType>(); |
|
42 |
// outputPorts.put(personPort, |
|
43 |
// new AvroPortType(Person.SCHEMA$)); |
|
44 |
// outputPorts.put(documentPort, |
|
45 |
// new AvroPortType(Document.SCHEMA$)); |
|
46 |
return outputPorts; |
|
47 |
} |
|
48 |
|
|
49 |
@Override |
|
50 |
public void run(PortBindings portBindings, HadoopContext context, |
|
51 |
Map<String, String> parameters) throws IOException{ |
|
52 |
System.out.println("Hello World..."); |
|
53 |
// Map<String, Path> output = portBindings.getOutput(); |
|
54 |
// FileSystem fs = FileSystem.get(context.getConfiguration()); |
|
55 |
|
|
56 |
// DataStore.create(StandardDataStoreExamples.getDocument(), |
|
57 |
// new FileSystemPath(fs, output.get(documentPort))); |
|
58 |
// DataStore.create(StandardDataStoreExamples.getPerson(), |
|
59 |
// new FileSystemPath(fs, output.get(personPort))); |
|
60 |
} |
|
61 |
|
|
62 |
} |
modules/cern-iis-metrics-extended/trunk/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
3 |
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
4 |
<parent> |
|
5 |
<groupId>eu.dnetlib</groupId> |
|
6 |
<artifactId>icm-iis-parent-container</artifactId> |
|
7 |
<version>0.0.1-SNAPSHOT</version> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<artifactId>cern-iis-metrics-extended</artifactId> |
|
11 |
<packaging>jar</packaging> |
|
12 |
<!-- <version>0.0.1-SNAPSHOT</version> --> |
|
13 |
<properties> |
|
14 |
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|
15 |
</properties> |
|
16 |
<dependencies> |
|
17 |
<dependency> |
|
18 |
<groupId>eu.dnetlib</groupId> |
|
19 |
<artifactId>icm-iis-core</artifactId> |
|
20 |
<version>0.0.1-SNAPSHOT</version> |
|
21 |
</dependency> |
|
22 |
<dependency> |
|
23 |
<groupId>eu.dnetlib</groupId> |
|
24 |
<artifactId>icm-iis-core</artifactId> |
|
25 |
<version>0.0.1-SNAPSHOT</version> |
|
26 |
<type>test-jar</type> |
|
27 |
<scope>test</scope> |
|
28 |
</dependency> |
|
29 |
<dependency> |
|
30 |
<groupId>com.google.protobuf</groupId> |
|
31 |
<artifactId>protobuf-java</artifactId> |
|
32 |
<version>2.4.1</version> |
|
33 |
</dependency> |
|
34 |
<!-- required after introducing 'provided' scope for hadoop libs --> |
|
35 |
<dependency> |
|
36 |
<groupId>org.apache.hadoop</groupId> |
|
37 |
<artifactId>hadoop-common</artifactId> |
|
38 |
<version>${iis.hadoop.common.version}</version> |
|
39 |
<scope>provided</scope> |
|
40 |
</dependency> |
|
41 |
<!-- Needed by Oozie tests { --> |
|
42 |
<dependency> |
|
43 |
<groupId>org.apache.oozie</groupId> |
|
44 |
<artifactId>oozie-core</artifactId> |
|
45 |
<version>${iis.oozie.version}</version> |
|
46 |
<type>test-jar</type> |
|
47 |
<scope>test</scope> |
|
48 |
</dependency> |
|
49 |
<dependency> |
|
50 |
<groupId>org.apache.hadoop</groupId> |
|
51 |
<artifactId>hadoop-hdfs</artifactId> |
|
52 |
<version>${iis.hadoop.hdfs.version}</version> |
|
53 |
<type>test-jar</type> |
|
54 |
<scope>test</scope> |
|
55 |
</dependency> |
|
56 |
<dependency> |
|
57 |
<groupId>org.apache.hadoop</groupId> |
|
58 |
<artifactId>hadoop-test</artifactId> |
|
59 |
<version>${iis.hadoop.test.version}</version> |
|
60 |
<scope>test</scope> |
|
61 |
</dependency> |
|
62 |
<dependency> |
|
63 |
<groupId>org.apache.hadoop</groupId> |
|
64 |
<artifactId>hadoop-common</artifactId> |
|
65 |
<version>${iis.hadoop.common.version}</version> |
|
66 |
<type>test-jar</type> |
|
67 |
<scope>test</scope> |
|
68 |
</dependency> |
|
69 |
<!-- Needed by Oozie tests } --> |
|
70 |
<!-- Needed to run Pig jobs { --> |
|
71 |
<dependency> |
|
72 |
<groupId>org.apache.pig</groupId> |
|
73 |
<artifactId>pig</artifactId> |
|
74 |
<version>${iis.pig.version}</version> |
|
75 |
<scope>test</scope> |
|
76 |
</dependency> |
|
77 |
<dependency> |
|
78 |
<groupId>org.antlr</groupId> |
|
79 |
<artifactId>antlr-runtime</artifactId> |
|
80 |
<version>3.4</version> |
|
81 |
</dependency> |
|
82 |
<!-- Needed to run Pig jobs } --> |
|
83 |
<dependency> |
|
84 |
<groupId>junit</groupId> |
|
85 |
<artifactId>junit</artifactId> |
|
86 |
<version>4.10</version> |
|
87 |
<scope>test</scope> |
|
88 |
</dependency> |
|
89 |
<!-- Needed by Avro { --> |
|
90 |
<dependency> |
|
91 |
<groupId>org.apache.avro</groupId> |
|
92 |
<artifactId>avro</artifactId> |
|
93 |
<version>1.7.4</version> |
|
94 |
</dependency> |
|
95 |
<dependency> |
|
96 |
<groupId>org.apache.avro</groupId> |
|
97 |
<artifactId>avro-mapred</artifactId> |
|
98 |
<version>1.7.4</version> |
|
99 |
<classifier>hadoop2</classifier> |
|
100 |
</dependency> |
|
101 |
<!-- It seems that this dependency is needed, if it is not here, |
|
102 |
a wrong (older) version of this library is used, which is not |
|
103 |
what Avro expects --> |
|
104 |
<dependency> |
|
105 |
<groupId>org.codehaus.jackson</groupId> |
|
106 |
<artifactId>jackson-core-asl</artifactId> |
|
107 |
<version>1.9.11</version> |
|
108 |
</dependency> |
|
109 |
<!-- It seems that this dependency is needed, if it is not here, |
|
110 |
running Oozie tests of Avro-based map-reduce ends up with |
|
111 |
"java.lang.NoClassDefFoundError" exception and statement that |
|
112 |
"Paranamer" class is missing.--> |
|
113 |
<dependency> |
|
114 |
<groupId>com.thoughtworks.paranamer</groupId> |
|
115 |
<artifactId>paranamer</artifactId> |
|
116 |
<version>2.5.2</version> |
|
117 |
</dependency> |
|
118 |
<!-- Needed by Avro } --> |
|
119 |
</dependencies> |
|
120 |
<build> |
|
121 |
<plugins> |
|
122 |
<!-- Plugin that generates Java classes from Avro schemas --> |
|
123 |
<plugin> |
|
124 |
<groupId>org.apache.avro</groupId> |
|
125 |
<artifactId>avro-maven-plugin</artifactId> |
|
126 |
<version>1.7.3</version> |
|
127 |
<executions> |
|
128 |
<execution> |
|
129 |
<phase>generate-sources</phase> |
|
130 |
<goals> |
|
131 |
<goal>schema</goal> |
|
132 |
<goal>idl-protocol</goal> |
|
133 |
</goals> |
|
134 |
<configuration> |
|
135 |
<sourceDirectory>${project.basedir}/src/main/resources/</sourceDirectory> |
|
136 |
<outputDirectory>${project.basedir}/target/generated-sources/java/</outputDirectory> |
|
137 |
</configuration> |
|
138 |
</execution> |
|
139 |
</executions> |
|
140 |
</plugin> |
|
141 |
<plugin> |
|
142 |
<groupId>org.apache.maven.plugins</groupId> |
|
143 |
<artifactId>maven-compiler-plugin</artifactId> |
|
144 |
<configuration> |
|
145 |
<source>1.6</source> |
|
146 |
<target>1.6</target> |
|
147 |
</configuration> |
|
148 |
</plugin> |
|
149 |
<!-- This plugin makes the Maven->Update Project Configuration not forget |
|
150 |
about the "target/generated-sources/java" source path --> |
|
151 |
<plugin> |
|
152 |
<groupId>org.codehaus.mojo</groupId> |
|
153 |
<artifactId>build-helper-maven-plugin</artifactId> |
|
154 |
<executions> |
|
155 |
<execution> |
|
156 |
<id>add-source</id> |
|
157 |
<phase>generate-sources</phase> |
|
158 |
<goals> |
|
159 |
<goal>add-source</goal> |
|
160 |
</goals> |
|
161 |
<configuration> |
|
162 |
<sources> |
|
163 |
<source>${project.build.directory}/generated-sources/java/</source> |
|
164 |
</sources> |
|
165 |
</configuration> |
|
166 |
</execution> |
|
167 |
</executions> |
|
168 |
</plugin> |
|
169 |
</plugins> |
|
170 |
<pluginManagement> |
|
171 |
<plugins> |
|
172 |
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.--> |
|
173 |
<plugin> |
|
174 |
<groupId>org.eclipse.m2e</groupId> |
|
175 |
<artifactId>lifecycle-mapping</artifactId> |
|
176 |
<version>1.0.0</version> |
|
177 |
<configuration> |
|
178 |
<lifecycleMappingMetadata> |
|
179 |
<pluginExecutions> |
|
180 |
<pluginExecution> |
|
181 |
<pluginExecutionFilter> |
|
182 |
<groupId> |
|
183 |
org.apache.avro |
|
184 |
</groupId> |
|
185 |
<artifactId> |
|
186 |
avro-maven-plugin |
|
187 |
</artifactId> |
|
188 |
<versionRange> |
|
189 |
[1.7.3,) |
|
190 |
</versionRange> |
|
191 |
<goals> |
|
192 |
<goal>idl-protocol</goal> |
|
193 |
<goal>schema</goal> |
|
194 |
</goals> |
|
195 |
</pluginExecutionFilter> |
|
196 |
<action> |
|
197 |
<ignore></ignore> |
|
198 |
</action> |
|
199 |
</pluginExecution> |
|
200 |
<pluginExecution> |
|
201 |
<pluginExecutionFilter> |
|
202 |
<groupId> |
|
203 |
org.codehaus.mojo |
|
204 |
</groupId> |
|
205 |
<artifactId> |
|
206 |
build-helper-maven-plugin |
|
207 |
</artifactId> |
|
208 |
<versionRange> |
|
209 |
[1.7,) |
|
210 |
</versionRange> |
|
211 |
<goals> |
|
212 |
<goal>add-source</goal> |
|
213 |
</goals> |
|
214 |
</pluginExecutionFilter> |
|
215 |
<action> |
|
216 |
<ignore></ignore> |
|
217 |
</action> |
|
218 |
</pluginExecution> |
|
219 |
</pluginExecutions> |
|
220 |
</lifecycleMappingMetadata> |
|
221 |
</configuration> |
|
222 |
</plugin> |
|
223 |
</plugins> |
|
224 |
</pluginManagement> |
|
225 |
</build> |
|
226 |
<repositories> |
|
227 |
<repository> |
|
228 |
<id>cloudera</id> |
|
229 |
<name>Cloudera Repository</name> |
|
230 |
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url> |
|
231 |
<releases> |
|
232 |
<enabled>true</enabled> |
|
233 |
</releases> |
|
234 |
<snapshots> |
|
235 |
<enabled>false</enabled> |
|
236 |
</snapshots> |
|
237 |
</repository> |
|
238 |
<!-- This repository contains our patched |
|
239 |
version of "avro" and "avro-mapred" modules (see the dependencies section) |
|
240 |
This entry might be removed when the patch to these modules becomes |
|
241 |
a part of the official Avro release.--> |
|
242 |
<repository> |
|
243 |
<id>dnet-deps</id> |
|
244 |
<name>dnet dependencies</name> |
|
245 |
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet-deps</url> |
|
246 |
<releases> |
|
247 |
<enabled>true</enabled> |
|
248 |
</releases> |
|
249 |
<snapshots> |
|
250 |
<enabled>false</enabled> |
|
251 |
</snapshots> |
|
252 |
<layout>default</layout> |
|
253 |
</repository> |
|
254 |
</repositories> |
|
255 |
</project> |
Also available in: Unified diff
Deleting the project since this functionality was taken over by ICM and implemented in "icm-iis-statistics" project.