Project

General

Profile

« Previous | Next » 

Revision 30855

Added by Mateusz Kobos almost 10 years ago

Deleting the project since this functionality was taken over by ICM and implemented in "icm-iis-statistics" project.

View differences:

modules/cern-iis-metrics-extended/trunk/src/main/scripts/generate_example_workflow_app-generic.sh
1
#!/bin/bash
2

  
3
## Generate example workflow app for given user and host name
4

  
5
HADOOP_USER=$1
6
HADOOP_HOST=$2
7
WORKFLOW_NAME=citation_metrics_advanced_workflow_app
8
WORKFLOW_PATH=src/main/resources/eu/dnetlib/iis/citation-metrics-advanced
9

  
10
mvn clean
11

  
12
echo "generating package \"$WORKFLOW_NAME\" in directory \"target/$WORKFLOW_NAME\""
13
mvn package -Poozie -Doozie.package.file.name=$WORKFLOW_NAME -Dworkflow.source.dir=$WORKFLOW_PATH -DsandboxDir=/user/$HADOOP_USER/iis_sandbox/$WORKFLOW_NAME -Duser.name=$HADOOP_USER -Dhost.name=$HADOOP_HOST
14 0

  
modules/cern-iis-metrics-extended/trunk/src/main/scripts/generate_example_workflow_app-cluster.sh
1
#!/bin/bash
2

  
3
## Generate a sample workflow app to be run on OpenAIRE+ hadoop cluster by user named "mateusz.kobos"
4

  
5
SCRIPTS_DIR=$(dirname $0)
6
HADOOP_USER=lars.holmnielsen
7

  
8
$SCRIPTS_DIR/generate_example_workflow_app-generic.sh $HADOOP_USER hadoop-master.openaire.eu
9 0

  
modules/cern-iis-metrics-extended/trunk/src/main/scripts/generate_example_workflow_app-localhost.sh
1
#!/bin/bash
2

  
3
## Generate a sample workflow app to be run on local Hadoop instance by the current user
4

  
5
SCRIPTS_DIR=$(dirname $0)
6

  
7
$SCRIPTS_DIR/generate_example_workflow_app-generic.sh $USER localhost
8 0

  
modules/cern-iis-metrics-extended/trunk/src/main/scripts/upload_run.sh
1
ssh lars.holmnielsen@hadoop.openaire.eu "rm -Rf /home/lars.holmnielsen/tmp/*.*"
2
scp target citation_metrics_advanced_workflow_app.tar.gz lars.holmnielsen@hadoop.openaire.eu:/home/lars.holmnielsen/tmp/
3
ssh lars.holmnielsen@hadoop.openaire.eu "cd /home/lars.holmnielsen/tmp/; tar -xzvf citation_metrics_advanced_workflow_app.tar.gz; ./upload_workflow.sh; ./run_workflow.sh"
modules/cern-iis-metrics-extended/trunk/src/main/resources/eu/dnetlib/iis/citation-metrics-advanced/oozie_app/workflow.xml
1
<?xml version="1.0"?>
2
<!-- Note that documentation placed in comments in this file uses the
3
"markdown" syntax (along with division into sections). -->
4
<workflow-app xmlns="uri:oozie:workflow:0.3"
5
    name="invenio-uploader">
6
    <start to="invenio_uploader" />
7
    <action name="invenio_uploader">
8
        <java>
9
            <job-tracker>${jobTracker}</job-tracker>
10
            <name-node>${nameNode}</name-node>
11
            <!-- The data generated by this node is deleted in this section -->
12
            <prepare>
13
                <delete path="${nameNode}${workingDir}/invenio_uploader" />
14
                <mkdir path="${nameNode}${workingDir}/invenio_uploader" />
15
            </prepare>
16
            <configuration>
17
                <property>
18
                    <name>mapred.job.queue.name</name>
19
                    <value>${queueName}</value>
20
                </property>
21
            </configuration>
22
            <!-- This is simple wrapper for the Java code -->
23
            <main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
24
            <!-- The business Java code that gets to be executed -->
25
            <arg>eu.dnetlib.iis.citationmetrics.InvenioUploader</arg>
26
            <!-- All input and output ports have to be bound to paths in HDFS, working
27
                directory has to be specified as well -->
28
            <arg>-SworkingDir=${workingDir}/invenio_uploader/working_dir</arg>
29
            <!--<arg>-Operson=${workingDir}/invenio_uploader/person</arg>
30
            <arg>-Odocument=${workingDir}/invenio_uploader/document</arg>
31
            -->
32
        </java>
33
        <ok to="end" />
34
        <error to="fail" />
35
    </action>
36
    <kill name="fail">
37
        <message>Unfortunately, the process failed -- error message:
38
            [${wf:errorMessage(wf:lastErrorNode())}]
39
        </message>
40
    </kill>
41
    <end name="end" />
42
</workflow-app>
modules/cern-iis-metrics-extended/trunk/src/main/java/eu/dnetlib/iis/citationmetrics/InvenioUploader.java
1
package eu.dnetlib.iis.citationmetrics;
2

  
3
import java.util.HashMap;
4
import java.util.Map;
5

  
6
import java.io.IOException;
7
import org.apache.hadoop.fs.FileSystem;
8
import org.apache.hadoop.fs.Path;
9

  
10

  
11
//import eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person;
12
//import eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document;
13
//import eu.dnetlib.iis.core.examples.StandardDataStoreExamples;
14
import eu.dnetlib.iis.core.java.HadoopContext;
15
import eu.dnetlib.iis.core.java.PortBindings;
16
import eu.dnetlib.iis.core.java.Process;
17
import eu.dnetlib.iis.core.java.io.DataStore;
18
import eu.dnetlib.iis.core.java.io.FileSystemPath;
19
import eu.dnetlib.iis.core.java.porttype.AvroPortType;
20
import eu.dnetlib.iis.core.java.porttype.PortType;
21

  
22
/** Invenio Uploader
23
 * @author Lars Holm Nielsen
24
 */
25
public class InvenioUploader implements Process {
26

  
27
    // private final static String personPort = "person";
28
    // private final static String documentPort = "document";
29

  
30
    public Map<String, PortType> getInputPorts() {
31
        return new HashMap<String, PortType>();
32
    }
33

  
34
    @Override
35
    public Map<String, PortType> getOutputPorts() {
36
        return createOutputPorts();
37
    }
38

  
39
    private static HashMap<String, PortType> createOutputPorts(){
40
        HashMap<String, PortType> outputPorts =
41
                new HashMap<String, PortType>();
42
        // outputPorts.put(personPort,
43
        //         new AvroPortType(Person.SCHEMA$));
44
        // outputPorts.put(documentPort,
45
        //         new AvroPortType(Document.SCHEMA$));
46
        return outputPorts;
47
    }
48

  
49
    @Override
50
    public void run(PortBindings portBindings, HadoopContext context,
51
            Map<String, String> parameters) throws IOException{
52
        System.out.println("Hello World...");
53
        // Map<String, Path> output = portBindings.getOutput();
54
        // FileSystem fs = FileSystem.get(context.getConfiguration());
55

  
56
        // DataStore.create(StandardDataStoreExamples.getDocument(),
57
        //         new FileSystemPath(fs, output.get(documentPort)));
58
        // DataStore.create(StandardDataStoreExamples.getPerson(),
59
        //         new FileSystemPath(fs, output.get(personPort)));
60
    }
61

  
62
}
modules/cern-iis-metrics-extended/trunk/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
4
    <parent>
5
        <groupId>eu.dnetlib</groupId>
6
        <artifactId>icm-iis-parent-container</artifactId>
7
        <version>0.0.1-SNAPSHOT</version>
8
    </parent>
9
    <modelVersion>4.0.0</modelVersion>
10
    <artifactId>cern-iis-metrics-extended</artifactId>
11
    <packaging>jar</packaging>
12
    <!-- <version>0.0.1-SNAPSHOT</version> -->
13
    <properties>
14
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15
    </properties>
16
    <dependencies>
17
        <dependency>
18
            <groupId>eu.dnetlib</groupId>
19
            <artifactId>icm-iis-core</artifactId>
20
            <version>0.0.1-SNAPSHOT</version>
21
        </dependency>
22
        <dependency>
23
            <groupId>eu.dnetlib</groupId>
24
            <artifactId>icm-iis-core</artifactId>
25
            <version>0.0.1-SNAPSHOT</version>
26
            <type>test-jar</type>
27
            <scope>test</scope>
28
        </dependency>
29
        <dependency>
30
            <groupId>com.google.protobuf</groupId>
31
            <artifactId>protobuf-java</artifactId>
32
            <version>2.4.1</version>
33
        </dependency>
34
        <!-- required after introducing 'provided' scope for hadoop libs -->
35
        <dependency>
36
			<groupId>org.apache.hadoop</groupId>
37
			<artifactId>hadoop-common</artifactId>
38
			<version>${iis.hadoop.common.version}</version>
39
			<scope>provided</scope>
40
		</dependency>
41
        <!-- Needed by Oozie tests { -->
42
        <dependency>
43
            <groupId>org.apache.oozie</groupId>
44
            <artifactId>oozie-core</artifactId>
45
            <version>${iis.oozie.version}</version>
46
            <type>test-jar</type>
47
            <scope>test</scope>
48
        </dependency>
49
        <dependency>
50
            <groupId>org.apache.hadoop</groupId>
51
            <artifactId>hadoop-hdfs</artifactId>
52
            <version>${iis.hadoop.hdfs.version}</version>
53
            <type>test-jar</type>
54
            <scope>test</scope>
55
        </dependency>
56
        <dependency>
57
            <groupId>org.apache.hadoop</groupId>
58
            <artifactId>hadoop-test</artifactId>
59
            <version>${iis.hadoop.test.version}</version>
60
            <scope>test</scope>
61
        </dependency>
62
        <dependency>
63
            <groupId>org.apache.hadoop</groupId>
64
            <artifactId>hadoop-common</artifactId>
65
            <version>${iis.hadoop.common.version}</version>
66
            <type>test-jar</type>
67
            <scope>test</scope>
68
        </dependency>
69
        <!-- Needed by Oozie tests } -->
70
        <!-- Needed to run Pig jobs { -->
71
        <dependency>
72
            <groupId>org.apache.pig</groupId>
73
            <artifactId>pig</artifactId>
74
            <version>${iis.pig.version}</version>
75
            <scope>test</scope>
76
        </dependency>
77
        <dependency>
78
            <groupId>org.antlr</groupId>
79
            <artifactId>antlr-runtime</artifactId>
80
            <version>3.4</version>
81
        </dependency>
82
        <!-- Needed to run Pig jobs } -->
83
        <dependency>
84
            <groupId>junit</groupId>
85
            <artifactId>junit</artifactId>
86
            <version>4.10</version>
87
            <scope>test</scope>
88
        </dependency>
89
        <!-- Needed by Avro { -->
90
        <dependency>
91
            <groupId>org.apache.avro</groupId>
92
            <artifactId>avro</artifactId>
93
            <version>1.7.4</version>
94
        </dependency>
95
        <dependency>
96
            <groupId>org.apache.avro</groupId>
97
            <artifactId>avro-mapred</artifactId>
98
            <version>1.7.4</version>
99
            <classifier>hadoop2</classifier>
100
        </dependency>
101
        <!-- It seems that this dependency is needed, if it is not here,
102
        a wrong (older) version of this library is used, which is not
103
        what Avro expects -->
104
        <dependency>
105
            <groupId>org.codehaus.jackson</groupId>
106
            <artifactId>jackson-core-asl</artifactId>
107
            <version>1.9.11</version>
108
        </dependency>
109
        <!-- It seems that this dependency is needed, if it is not here,
110
        running Oozie tests of Avro-based map-reduce ends up with
111
        "java.lang.NoClassDefFoundError" exception and statement that
112
        "Paranamer" class is missing.-->
113
        <dependency>
114
            <groupId>com.thoughtworks.paranamer</groupId>
115
            <artifactId>paranamer</artifactId>
116
            <version>2.5.2</version>
117
        </dependency>
118
        <!-- Needed by Avro } -->
119
    </dependencies>
120
    <build>
121
        <plugins>
122
            <!-- Plugin that generates Java classes from Avro schemas -->
123
            <plugin>
124
                <groupId>org.apache.avro</groupId>
125
                <artifactId>avro-maven-plugin</artifactId>
126
                <version>1.7.3</version>
127
                <executions>
128
                    <execution>
129
                        <phase>generate-sources</phase>
130
                        <goals>
131
                            <goal>schema</goal>
132
                            <goal>idl-protocol</goal>
133
                        </goals>
134
                        <configuration>
135
                            <sourceDirectory>${project.basedir}/src/main/resources/</sourceDirectory>
136
                            <outputDirectory>${project.basedir}/target/generated-sources/java/</outputDirectory>
137
                        </configuration>
138
                    </execution>
139
                </executions>
140
            </plugin>
141
            <plugin>
142
                <groupId>org.apache.maven.plugins</groupId>
143
                <artifactId>maven-compiler-plugin</artifactId>
144
                <configuration>
145
                    <source>1.6</source>
146
                    <target>1.6</target>
147
                </configuration>
148
            </plugin>
149
            <!-- This plugin makes the Maven->Update Project Configuration not forget
150
                about the "target/generated-sources/java" source path -->
151
            <plugin>
152
                <groupId>org.codehaus.mojo</groupId>
153
                <artifactId>build-helper-maven-plugin</artifactId>
154
                <executions>
155
                    <execution>
156
                        <id>add-source</id>
157
                        <phase>generate-sources</phase>
158
                        <goals>
159
                            <goal>add-source</goal>
160
                        </goals>
161
                        <configuration>
162
                            <sources>
163
                                <source>${project.build.directory}/generated-sources/java/</source>
164
                            </sources>
165
                        </configuration>
166
                    </execution>
167
                </executions>
168
            </plugin>
169
        </plugins>
170
        <pluginManagement>
171
            <plugins>
172
                <!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
173
                <plugin>
174
                    <groupId>org.eclipse.m2e</groupId>
175
                    <artifactId>lifecycle-mapping</artifactId>
176
                    <version>1.0.0</version>
177
                    <configuration>
178
                        <lifecycleMappingMetadata>
179
                            <pluginExecutions>
180
                                <pluginExecution>
181
                                    <pluginExecutionFilter>
182
                                        <groupId>
183
                                            org.apache.avro
184
                                        </groupId>
185
                                        <artifactId>
186
                                            avro-maven-plugin
187
                                        </artifactId>
188
                                        <versionRange>
189
                                            [1.7.3,)
190
                                        </versionRange>
191
                                        <goals>
192
                                            <goal>idl-protocol</goal>
193
                                            <goal>schema</goal>
194
                                        </goals>
195
                                    </pluginExecutionFilter>
196
                                    <action>
197
                                        <ignore></ignore>
198
                                    </action>
199
                                </pluginExecution>
200
                                <pluginExecution>
201
                                    <pluginExecutionFilter>
202
                                        <groupId>
203
                                            org.codehaus.mojo
204
                                        </groupId>
205
                                        <artifactId>
206
                                            build-helper-maven-plugin
207
                                        </artifactId>
208
                                        <versionRange>
209
                                            [1.7,)
210
                                        </versionRange>
211
                                        <goals>
212
                                            <goal>add-source</goal>
213
                                        </goals>
214
                                    </pluginExecutionFilter>
215
                                    <action>
216
                                        <ignore></ignore>
217
                                    </action>
218
                                </pluginExecution>
219
                            </pluginExecutions>
220
                        </lifecycleMappingMetadata>
221
                    </configuration>
222
                </plugin>
223
            </plugins>
224
        </pluginManagement>
225
    </build>
226
    <repositories>
227
        <repository>
228
            <id>cloudera</id>
229
            <name>Cloudera Repository</name>
230
            <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
231
            <releases>
232
                <enabled>true</enabled>
233
            </releases>
234
            <snapshots>
235
                <enabled>false</enabled>
236
            </snapshots>
237
        </repository>
238
        <!-- This repository contains our patched
239
        version of "avro" and "avro-mapred" modules (see the dependencies section)
240
        This entry might be removed when the patch to these modules becomes
241
        a part of the official Avro release.-->
242
        <repository>
243
            <id>dnet-deps</id>
244
            <name>dnet dependencies</name>
245
            <url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet-deps</url>
246
            <releases>
247
                <enabled>true</enabled>
248
            </releases>
249
            <snapshots>
250
                <enabled>false</enabled>
251
            </snapshots>
252
            <layout>default</layout>
253
        </repository>
254
    </repositories>
255
</project>

Also available in: Unified diff