Project

General

Profile

« Previous | Next » 

Revision 42697

Added by Eri Katsari about 8 years ago

''

View differences:

modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/run_workflow.sh.667237820.filtered
1
#!/bin/bash
2

  
3
if [ $# == "0" ] ; then
4
    oozie job -oozie http://oozie.hadoop.dm.openaire.eu:11000/oozie -config job.properties -run
5
else
6
    oozie job -oozie http://oozie.hadoop.dm.openaire.eu:11000/oozie -config $1/job.properties -run
7
fi
8

  
9

  
10

  
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data4/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
storageID=DS-49203724-127.0.0.1-33697-1404044726267
3
clusterID=testClusterID
4
cTime=0
5
storageType=DATA_NODE
6
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/name1/current/seen_txid
1
1
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/print_working_dir.sh
1
#!/bin/bash
2
echo ""
3
echo "---->Contents of the working directory"
4
hadoop fs -ls /user/eri.katsari/core/javamapreduce/lodinter/working_dir
5

  
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/print_working_dir.sh.1669837483.filtered
1
#!/bin/bash
2
echo ""
3
echo "---->Contents of the working directory"
4
hadoop fs -ls /user/eri.katsari/core/javamapreduce/lodinter/working_dir
5

  
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/run_workflow.sh
1
#!/bin/bash
2

  
3
if [ $# == "0" ] ; then
4
    oozie job -oozie http://oozie.hadoop.dm.openaire.eu:11000/oozie -config job.properties -run
5
else
6
    oozie job -oozie http://oozie.hadoop.dm.openaire.eu:11000/oozie -config $1/job.properties -run
7
fi
8

  
9

  
10

  
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/get_working_dir.sh.822962452.filtered
1
#!/bin/bash
2
hadoop fs -get /user/eri.katsari/core/javamapreduce/lodinter/working_dir
3

  
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/name1/current/VERSION
1
#Sun Jun 29 15:25:23 EEST 2014
2
namespaceID=1227977966
3
clusterID=testClusterID
4
cTime=0
5
storageType=NAME_NODE
6
blockpoolID=BP-283802306-127.0.0.1-1404044723812
7
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/name1/current/fsimage_0000000000000000000.md5
1
090b992f82e45f2f9ac2ac5328528a4c *fsimage_0000000000000000000
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/name2/current/seen_txid
1
1
modules/dnet-openaire-lod-interlinking-wf/target/primed/job.eri.properties
1
# Mon Jan 25 20:56:15 EET 2016
2
#/log_enable=2
3
isLookupEndpoint=http://services.openaire.eu:8280/is/services/isLookUp
4
jobTracker=nmis-hadoop-jt
5
lod_EntitiesInputFile=/tmp/lod_dump/entities-r-00066
6
lod_RelationsInputFile=/tmp/lod_dump/relations-r-00042
7
lod_baseURI=http://lod.openaire.eu/data/
8
lod_conLine=jdbc:virtuoso://virtuoso-openaire.d4science.org:1111/autoReconnect=true/charset=UTF-8/log_enable=1
9
lod_dataPath=/user/giorgos.alexiou/rdfData
10
lod_defaultGraph=test
11
lod_delim=,
12
lod_enclosing='
13
lod_entitiesPerQuery=10
14
lod_hbase_table=db_openaireplus_services
15
lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
16
lod_jsonEntities={ "result": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.eurocris.org/ontologies/cerif/1.3#name", "6": "http://purl.org/dc/terms/dateAccepted", "7": "http://purl.org/dc/terms/publisher", "8": "http://purl.org/dc/terms/identifier", "9": "http://purl.org/dc/terms/language", "10": "http://purl.org/dc/terms/date", "11": "http://lod.openaire.eu/vocab/resultSubject", "12": "http://lod.openaire.eu/vocab/externalReference", "13": "http://purl.org/dc/terms/source", "14": "http://purl.org/dc/terms/format", "15": "http://lod.openaire.eu/vocab/context", "16": "http://dbpedia.org/ontology/country", "17": "http://purl.org/dc/terms/accessRights", "18": "http://purl.org/dc/terms/description", "19": "http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name", "20": "http://lod.openaire.eu/vocab/dataSourceType", "21": "http://lod.openaire.eu/vocab/device", "22": "http://lod.openaire.eu/vocab/size", "23": "http://lod.openaire.eu/vocab/version", "24": "http://lod.openaire.eu/vocab/lastMetadataUpdate", "25": "http://lod.openaire.eu/vocab/metadataVersion", "26": "http://lod.openaire.eu/vocab/resultType", "27": "http://lod.openaire.eu/vocab/year", "28": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity" }], "person": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier","5": "http://xmlns.com/foaf/0.1/firstName", "6": "http://xmlns.com/foaf/spec/lastName", "7": "http://xmlns.com/foaf/0.1/name", "8": "http://schema.org/faxNumber", "9": "http://xmlns.com/foaf/0.1/mbox", "10": "http://xmlns.com/foaf/0.1/phone", "11": "http://schema.org/nationality", "12": "http://purl.org/dc/terms/identifier", "13": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Person" }], "datasource": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/datasourceType", "6": "http://lod.openaire.eu/vocab/openAIRECompatibility", "7": "http://dbpedia.org/ontology/officialName", "8": "http://lod.openaire.eu/vocab/englishName", "9": "http://schema.org/url", "10": "http://xmlns.com/foaf/0.1/logo", "11": "http://xmlns.com/foaf/0.1/mbox", "12": "http://purl.org/vocab/vann/preferredNamespacePrefix", "13": "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "14": "http://www.w3.org/2003/01/geo/wgs84_pos#long", "15": "http://lod.openaire.eu/vocab/dateOfValidity", "16": "http://purl.org/dc/terms/description", "17": "http://lod.openaire.eu/vocab/subjectList", "18": "http://lod.openaire.eu/numberOfItems", "19": "http://purl.org/dc/terms/date", "20": "http://lod.openaire.eu/vocab/policies", "21": "http://lod.openaire.eu/vocab/languages", "22": "http://lod.openaire.eu/vocab/contentType", "23": "http://lod.openaire.eu/vocab/accessInfoPackage", "24": "http://lod.openaire.eu/vocab/releaseStartDate", "25": "http://lod.openaire.eu/vocab/releaseEndDate", "26": "http://lod.openaire.eu/vocab/missionStatementUrl", "27": "http://www.europeana.eu/schemas/edm/dataProvider", "28": "http://lod.openaire.eu/vocab/serviceProvider", "29": "http://lod.openaire.eu/vocab/databaseAccessType", "30": "http://lod.openaire.eu/vocab/dataUploadType", "31": "http://lod.openaire.eu/vocab/dataUploadRestrictions", "32": "http://lod.openaire.eu/vocab/versioning", "33": "http://lod.openaire.eu/vocab/citationGuidelineUrl", "34": "http://lod.openaire.eu/vocab/qualityManagementKind", "35": "http://lod.openaire.eu/vocab/pidSystems", "36": "http://lod.openaire.eu/vocab/certificates", "37": "http://purl.org/dc/terms/accessRights", "38": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.w3.org/ns/prov#Entity" }], "organization": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.w3.org/2004/02/skos/core#altLabel", "6": "http://www.w3.org/2004/02/skos/core#prefLabel", "7": "http://lod.openaire.eu/vocab/webSiteUrl", "8": "http://xmlns.com/foaf/0.1/logo", "9": "http://dbpedia.org/ontology/country", "10": "http://lod.openaire.eu/vocab/entityType", "11": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Organization" }], "project": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/projectCode", "6": "http://schema.org/url", "7": "http://www.eurocris.org/ontologies/cerif/1.3#acronym", "8": "http://www.eurocris.org/ontologies/cerif/1.3#name", "9": "http://www.eurocris.org/ontologies/cerif/1.3#startDate", "10": "http://www.eurocris.org/ontologies/cerif/1.3#endDate", "11": "http://purl.org/cerif/frapo/hasCallIdentifier", "12": "http://www.eurocris.org/ontologies/cerif/1.3#keyword", "13": "http://www.w3.org/2006/time#hasDurationDescription", "14": "http://lod.openaire.eu/vocab/ec_SC39", "15": "http://lod.openaire.eu/vocab/contractType", "16": "http://lod.openaire.eu/vocab/oaMandatePublications", "17": "http://lod.openaire.eu/vocab/projectSubjects", "18": "http://od.openaire.eu/vocab/ec_article29-3", "19": "http://lod.openaire.eu/vocab/funder", "20": "http://lod.openaire.eu/vocab/fundingLevel0", "21": "http://lod.openaire.eu/vocab/fundingLevel1", "22": "http://lod.openaire.eu/vocab/fundingLevel2", "23": "http://lod.openaire.eu/vocab/fundingLevel3", "24": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#Project" }] }
17
lod_jsonRels={ "resultResult": [{ "property": "http://lod.openaire.eu/vocab/resultResult", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultProject": [{ "property": "http://lod.openaire.eu/vocab/resultProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personResult": [{ "property": "http://lod.openaire.eu/vocab/personResult", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personProject": [{ "property": "http://lod.openaire.eu/vocab/personProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personPerson": [{ "property": "http://lod.openaire.eu/vocab/personPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "datasourceOrganization": [{ "property": "http://lod.openaire.eu/vocab/datasourceOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectOrganization": [{ "property": "http://lod.openaire.eu/vocab/projectOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "organizationOrganization": [{ "property": "http://lod.openaire.eu/vocab/organizationOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectPerson": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "dedup": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }] }
18
lod_lastExecutionDate=2015-05-26
19
lod_maxCpart=3
20
lod_minCpart=1
21
lod_output=/tmp/lod_dump/
22
lod_part=5
23
lod_password=virtramvos
24
lod_relationsGraph=relationsTest
25
lod_relationsPerQuery=50
26
lod_seperator=;
27
lod_username=dba
28
numReducers=15
29
nameNode=hdfs://nmis-hadoop-cluster
30
oozie.wf.application.path=hdfs://nmis-hadoop-cluster/user/eri.katsari/lod/oozie_app
31
oozieServiceLoc=http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie
32
out1=entities
33
out2=relations
34
queueName=default
35
user.name=eri.katsari
36
workingDir=/user/eri.katsari/core/javamapreduce/lodexport/working_dir
modules/dnet-openaire-lod-interlinking-wf/target/primed/oozie_app/workflow.xml
1
<workflow-app name="lod_generation" xmlns="uri:oozie:workflow:0.4">
2
    <!-- map reduce job that exports hbase data and prepares them for import
3
        to the lod_generation -->
4

  
5
    <global>
6
        <job-tracker>${jobTracker}</job-tracker>
7
        <name-node>${nameNode}</name-node>
8
        <configuration>
9
            <property>
10
                <name>mapred.job.queue.name</name>
11
                <value>${queueName}</value>
12
            </property>
13
            <property>
14
                <name>oozie.sqoop.log.level</name>
15
                <value>DEBUG</value>
16
            </property>
17
        </configuration>
18
    </global>
19

  
20

  
21
    <start to='datasetProcessing'/>
22

  
23
    <action name="datasetProcessing">
24
        <map-reduce>
25

  
26
           <!-- <prepare>
27
                <delete path="${nameNode}${lod_output}"/>
28
            </prepare>
29
-->
30
            <configuration>
31

  
32
                <property>
33

  
34
                <!-- ZOOKEEPER -->
35

  
36
                <property>
37
                    <name>hbase.zookeeper.quorum</name>
38
                    <value>
39
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
40
                    </value>
41
                    <!-- <value>quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
42
                        </value> -->
43
                </property>
44

  
45
                <property>
46
                    <name>zookeeper.znode.rootserver</name>
47
                    <value>root-region-server</value>
48
                </property>
49

  
50
                <property>
51
                    <name>hbase.zookeeper.property.clientPort</name>
52
                    <value>2181</value>
53

  
54
                </property>
55

  
56

  
57
                <!-- MR IO FOR MULTIPLE INPUTS-->
58

  
59
                <property>
60
                    <name>mapreduce.inputformat.class</name>
61
                    <value>org.apache.hadoop.mapreduce.lib.input.DelegatingInputFormat</value>
62
                </property>
63

  
64
                <property>
65
                    <name>mapreduce.map.class</name>
66
                    <value>org.apache.hadoop.mapreduce.lib.input.DelegatingMapper</value>
67
                </property>
68

  
69
                <property>
70
                    <name>mapreduce.input.multipleinputs.dir.formats</name>
71
                    <value>${nameNode}${sourceInput};org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat,${nameNode}${targetInput};org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat</value>
72
                </property>
73
                <property>
74
                    <name>mapreduce.input.multipleinputs.dir.mappers</name>
75
                    <value>${nameNode}${sourceInput};eu.dnetlib.data.mapreduce.hbase.lodExport.SourceMapper,${nameNode}${targetInput};eu.dnetlib.data.mapreduce.hbase.lodExport.TargetMapper</value>
76
                </property>
77

  
78
                <property>
79
                    <name>mapred.mapoutput.key.class</name>
80
                    <value>org.apache.hadoop.io.Text</value>
81
                </property>
82
                <property>
83
                    <name>mapred.mapoutput.value.class</name>
84
                    <value>org.apache.hadoop.io.Text</value>
85
                </property>
86

  
87

  
88
                <property>
89
                    <name>mapred.output.key.class</name>
90
                    <value>org.apache.hadoop.io.Text</value>
91
                </property>
92

  
93
                <property>
94
                    <name>mapred.output.value.class</name>
95
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
96
                </property>
97

  
98
                <!-- ## This is required for new MapReduce API usage -->
99
                <property>
100
                    <name>mapred.mapper.new-api</name>
101
                    <value>true</value>
102
                </property>
103
                <property>
104
                    <name>mapred.reducer.new-api</name>
105
                    <value>true</value>
106
                </property>
107

  
108
                <!-- # Job-specific options -->
109
                <property>
110
                    <name>dfs.blocksize</name>
111
                    <value>32M</value>
112
                </property>
113
                <property>
114
                    <name>mapred.output.compress</name>
115
                    <value>false</value>
116
                </property>
117
                <property>
118
                    <name>mapred.reduce.tasks.speculative.execution</name>
119
                    <value>false</value>
120
                </property>
121
                <property>
122
                    <name>mapred.reduce.tasks.speculative.execution</name>
123
                    <value>false</value>
124
                </property>
125

  
126
                <property>
127
                    <name>mapreduce.map.speculative</name>
128
                    <value>false</value>
129
                </property>
130

  
131
                <!-- I/O FORMAT -->
132
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required
133
                    to fix issue with traling tab added between id and value in multiple outputs -->
134

  
135
                <property>
136
                    <name>mapred.textoutputformat.separator</name>
137
                    <value>${lod_delim}</value>
138
                </property>
139

  
140
                <!-- ## Names of all output ports -->
141
<!--
142

  
143
                <property>
144
                    <name>mapreduce.output</name>
145
                    <value>
146
                        ${out1}
147
                    </value>
148

  
149
                </property>
150
-->
151

  
152

  
153
                <!-- ## Classes of mapper and reducer -->
154

  
155
                <!--<property>
156
                    <name>mapreduce.map.class</name>
157
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodMapper</value>
158
                </property>
159
               -->
160
                <property>
161
                    <name>mapreduce.reduce.class</name>
162
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.DatasetReducer</value>
163
                </property>
164

  
165

  
166
                <property>
167
                    <name>io.serializations</name>
168
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
169
                </property>
170

  
171
                <!-- ## Custom config -->
172

  
173
                <!--delim character used to seperate fields in hdfs dump files <property> -->
174
                <property>
175
                    <name>lod.delim</name>
176
                    <value>${lod_delim}</value>
177
                </property>
178

  
179

  
180
                <!-- This directory does not correspond to a data store. In fact, this
181
                    directory only contains multiple data stores. It has to be set to the name
182
                    of the workflow node. -->
183
                <property>
184
                    <name>mapred.output.dir</name>
185
                    <value>${lod_output}${out}</value>
186
                </property>
187

  
188
                <!-- ## Workflow node parameters -->
189
                <property>
190
                    <name>mapred.reduce.tasks</name>
191
                    <value>${numReducers}</value>
192
                </property>
193

  
194
            </configuration>
195

  
196
        </map-reduce>
197
        <ok to="end"/>
198

  
199
        <error to="fail"/>
200
    </action>
201

  
202
<!--
203

  
204
    <action name='blocking'>
205
        <java>
206
            <prepare>
207
            </prepare>
208
            <configuration>
209
                <property>
210
                    <name>mapred.job.queue.name</name>
211
                    <value>${queueName}</value>
212
                </property>
213
            </configuration>
214
            <main-class>eu.dnetlib.iis.core.workflows.lodexport.ClearGraph</main-class>
215

  
216
            <arg>${lod_relationsGraph}</arg>
217

  
218
        </java>
219
        <ok to="end"/>
220
        <error to="fail"/>
221
    </action>
222
-->
223

  
224

  
225
    <!--
226
    <action name="cleanUpHDFS">
227
        <fs>
228
            <delete path="${lod_output}test"/>
229
        </fs>
230

  
231
        <ok to="end"/>
232
        <error to="fail"/>
233
    </action>
234
-->
235

  
236
    <kill name="fail">
237
        <message>
238
            Unfortunately, the process failed -- error message:
239
            [${wf:errorMessage(wf:lastErrorNode())}]
240
        </message>
241
    </kill>
242
    <end name="end"/>
243
</workflow-app>
modules/dnet-openaire-lod-interlinking-wf/target/oozie-package.tar/job.properties
1
# Wed May 25 23:31:08 EEST 2016
2
jobTracker=dm-cluster-jt
3
lod_baseURI=http://lod.openaire.eu/data/
4
lod_conLine=jdbc:virtuoso://virtuoso-beta.openaire.eu:1111/autoReconnect=true/charset=UTF-8/log_enable=1
5
lod_dataPath=/user/giorgos.alexiou/rdfData
6
lod_defaultGraph=test
7
lod_delim=,
8
lod_enclosing='
9
lod_maxCpart=3
10
lod_minCpart=1
11
lod_output=/tmp/lod/
12
lod_password=eiloobi2Ail6Aisi
13
lod_relationsGraph=relationsTest
14
lod_relationsPerQuery=170
15
lod_seperator=;
16
lod_username=dba
17
nameNode=hdfs://dm-cluster-nn
18
numReducers=17
19
oozie.wf.application.path=hdfs://dm-cluster-nn/user/eri.katsari/lodinter/oozie_app
20
oozieServiceLoc=http://oozie.hadoop.dm.openaire.eu:11000/oozie
21
out=dataset
22
queueName=default
23
sourceInput=/tmp/lod2/source
24
targetInput=/tmp/lod2/target
25
user.name=eri.katsari
26
workingDir=/user/eri.katsari/core/javamapreduce/lodinter/working_dir
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/name2/current/VERSION
1
#Sun Jun 29 15:25:23 EEST 2014
2
namespaceID=1227977966
3
clusterID=testClusterID
4
cTime=0
5
storageType=NAME_NODE
6
blockpoolID=BP-283802306-127.0.0.1-1404044723812
7
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/name2/current/fsimage_0000000000000000000.md5
1
090b992f82e45f2f9ac2ac5328528a4c *fsimage_0000000000000000000
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data1/current/BP-283802306-127.0.0.1-1404044723812/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
namespaceID=1227977966
3
cTime=0
4
blockpoolID=BP-283802306-127.0.0.1-1404044723812
5
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data1/current/BP-283802306-127.0.0.1-1404044723812/dncp_block_verification.log.prev
1

  
2
date="2014-06-29 15:25:31,760"	 time="1404044731760"	 genstamp="1002"	 id="6739057604294048250"
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data1/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
storageID=DS-577722861-127.0.0.1-38993-1404044726267
3
clusterID=testClusterID
4
cTime=0
5
storageType=DATA_NODE
6
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data2/current/BP-283802306-127.0.0.1-1404044723812/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
namespaceID=1227977966
3
cTime=0
4
blockpoolID=BP-283802306-127.0.0.1-1404044723812
5
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data2/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
storageID=DS-577722861-127.0.0.1-38993-1404044726267
3
clusterID=testClusterID
4
cTime=0
5
storageType=DATA_NODE
6
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data3/current/BP-283802306-127.0.0.1-1404044723812/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
namespaceID=1227977966
3
cTime=0
4
blockpoolID=BP-283802306-127.0.0.1-1404044723812
5
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data3/current/BP-283802306-127.0.0.1-1404044723812/dncp_block_verification.log.prev
1

  
2
date="2014-06-29 15:25:31,765"	 time="1404044731765"	 genstamp="1002"	 id="6739057604294048250"
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data3/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
storageID=DS-49203724-127.0.0.1-33697-1404044726267
3
clusterID=testClusterID
4
cTime=0
5
storageType=DATA_NODE
6
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/build/test/data/dfs/data/data4/current/BP-283802306-127.0.0.1-1404044723812/current/VERSION
1
#Sun Jun 29 15:25:26 EEST 2014
2
namespaceID=1227977966
3
cTime=0
4
blockpoolID=BP-283802306-127.0.0.1-1404044723812
5
layoutVersion=-40
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/readme.markdown.1781837867.filtered
1
Execute the scripts in the following order:
2

  
3
1. `upload_workflow.sh`
4
2. `run_workflow.sh`
5
3. `print_working_dir.sh` or `get_working_dir.sh`
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/upload_workflow.sh.391051960.filtered
1
#!/bin/bash
2
set -x ## print every executed command
3

  
4

  
5
if [ $# == "0" ] ; then
6
    target_dir_root=`pwd`'/oozie_app'
7
else
8
    target_dir_root=`readlink -f $1`'/oozie_app'
9
fi
10

  
11
# initial phase, creating symbolic links to jars in all subworkflows
12
# currently disabled
13
#libDir=$target_dir_root'/lib'
14
#dirs=`find $target_dir_root/* -maxdepth 10 -type d`
15
#for dir in $dirs
16
#do
17
#        if [ -f $dir/workflow.xml ]
18
#        then
19
#                echo "creating symbolic links to jars in directory: $dir/lib"
20
#                if [ ! -d "$dir/lib" ]; then
21
#                        mkdir $dir/lib
22
#                fi
23
#                find $libDir -type f -exec ln -s \{\} $dir/lib \;
24
#        fi
25
#done
26

  
27

  
28
#uploading
29
hadoop fs -rm -r /user/eri.katsari/eu/dnetlib/iis/core/javamapreduce/lodexport
30
hadoop fs -mkdir /user/eri.katsari/eu/dnetlib/iis/core/javamapreduce/lodexport
31
hadoop fs -mkdir /user/eri.katsari/core/javamapreduce/lodinter/working_dir
32
hadoop fs -put $target_dir_root /user/eri.katsari/eu/dnetlib/iis/core/javamapreduce/lodexport
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/get_working_dir.sh
1
#!/bin/bash
2
hadoop fs -get /user/eri.katsari/core/javamapreduce/lodinter/working_dir
3

  
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/upload_workflow.sh
1
#!/bin/bash
2
set -x ## print every executed command
3

  
4

  
5
if [ $# == "0" ] ; then
6
    target_dir_root=`pwd`'/oozie_app'
7
else
8
    target_dir_root=`readlink -f $1`'/oozie_app'
9
fi
10

  
11
# initial phase, creating symbolic links to jars in all subworkflows
12
# currently disabled
13
#libDir=$target_dir_root'/lib'
14
#dirs=`find $target_dir_root/* -maxdepth 10 -type d`
15
#for dir in $dirs
16
#do
17
#        if [ -f $dir/workflow.xml ]
18
#        then
19
#                echo "creating symbolic links to jars in directory: $dir/lib"
20
#                if [ ! -d "$dir/lib" ]; then
21
#                        mkdir $dir/lib
22
#                fi
23
#                find $libDir -type f -exec ln -s \{\} $dir/lib \;
24
#        fi
25
#done
26

  
27

  
28
#uploading
29
hadoop fs -rm -r /user/eri.katsari/eu/dnetlib/iis/core/javamapreduce/lodexport
30
hadoop fs -mkdir /user/eri.katsari/eu/dnetlib/iis/core/javamapreduce/lodexport
31
hadoop fs -mkdir /user/eri.katsari/core/javamapreduce/lodinter/working_dir
32
hadoop fs -put $target_dir_root /user/eri.katsari/eu/dnetlib/iis/core/javamapreduce/lodexport
modules/dnet-openaire-lod-interlinking-wf/target/archive-tmp/fileSetFormatter.879782440.tmp/readme.markdown
1
Execute the scripts in the following order:
2

  
3
1. `upload_workflow.sh`
4
2. `run_workflow.sh`
5
3. `print_working_dir.sh` or `get_working_dir.sh`
modules/dnet-openaire-lod-interlinking-wf/target/primed/job.properties
1
lod_baseURI=http://lod.openaire.eu/data/
2
lod_dataPath=/user/giorgos.alexiou/rdfData
3
lod_delim=,
4
lod_enclosing='
5
lod_maxCpart=3
6
lod_minCpart=1
7
lod_output=/tmp/lod/
8
 #---------config for CNR------------
9
#lod_conLine=jdbc:virtuoso://virtuoso-openaire.d4science.org:1111/autoReconnect=true/charset=UTF-8/log_enable=1
10
#lod_password=virtramvos
11
#Config for DM
12
lod_conLine=jdbc:virtuoso://virtuoso-beta.openaire.eu:1111/autoReconnect=true/charset=UTF-8/log_enable=1
13
lod_password=eiloobi2Ail6Aisi
14
lod_defaultGraph=test
15
lod_relationsGraph=relationsTest
16
lod_relationsPerQuery=170
17
lod_seperator=;
18
lod_username=dba
19
#--------DM Cluster config-------
20
jobTracker=dm-cluster-jt
21
nameNode=hdfs://dm-cluster-nn
22
oozie.wf.application.path=hdfs://dm-cluster-nn/user/eri.katsari/lodinter/oozie_app
23
#oozie.wf.application.path=hdfs://dm-cluster-nn/user/giorgos.alexiou/lod/oozie_app
24
oozieServiceLoc=http://oozie.hadoop.dm.openaire.eu:11000/oozie
25
#--------CNR cluster config-------
26
#jobTracker=nmis-hadoop-jt
27
#nameNode=hdfs://nmis-hadoop-cluster
28
#oozie.wf.application.path=hdfs://nmis-hadoop-cluster/user/eri.katsari/lod/oozie_app
29
#oozieServiceLoc=http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie
30
numReducers=17
31
out=dataset
32
sourceInput=/tmp/lod2/source
33
targetInput=/tmp/lod2/target
34
queueName=default
35
#user.name=giorgos.alexiou
36
#workingDir=/user/giorgos.alexiou/core/javamapreduce/lodexport/working_dir
37
user.name=eri.katsari
38
workingDir=/user/eri.katsari/core/javamapreduce/lodinter/working_dir
modules/dnet-openaire-lod-interlinking-wf/target/oozie-package.tar/oozie_app/lib/hadoop-ant-2.0.0-mr1-cdh4.3.1.pom
1
<?xml version="1.0" encoding="UTF-8"?>
2
<!--
3
   Licensed to the Apache Software Foundation (ASF) under one or more
4
   contributor license agreements.  See the NOTICE file distributed with
5
   this work for additional information regarding copyright ownership.
6
   The ASF licenses this file to You under the Apache License, Version 2.0
7
   (the "License"); you may not use this file except in compliance with
8
   the License.  You may obtain a copy of the License at
9

  
10
       http://www.apache.org/licenses/LICENSE-2.0
11

  
12
   Unless required by applicable law or agreed to in writing, software
13
   distributed under the License is distributed on an "AS IS" BASIS,
14
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
   See the License for the specific language governing permissions and
16
   limitations under the License.
17
-->
18

  
19
<!--
20
NOTE - This POM is used for Cloudera's build and packaging process.
21
-->
22
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
23
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
24
  <modelVersion>4.0.0</modelVersion>
25
  
26
  <parent>
27
    <groupId>com.cloudera.cdh</groupId>
28
    <artifactId>hadoop-root</artifactId>
29
    <version>2.0.0-mr1-cdh4.3.1</version>
30
    <relativePath>cloudera/maven-packaging/pom.xml</relativePath>
31
  </parent>
32
  
33
  <groupId>com.cloudera.cdh</groupId>
34
  <artifactId>hadoop-ant</artifactId>
35
  <version>2.0.0-mr1-cdh4.3.1</version>
36
  <packaging>pom</packaging>
37
  
38
  <description>Maven wrapper for Ant Hadoop build</description>
39
  <name>CDH Hadoop Maven Wrapper</name>
40

  
41
  <profiles>
42
    <!--To allow standalone project to fetch CDH artifacts -->
43
    <!--Activate by default doing a negation of negation   -->
44
    <!-- this trick allows CDH root to switch it off       -->
45
    <profile>
46
      <id>cdh.repos</id>
47
      <activation>
48
        <activeByDefault>false</activeByDefault>
49
        <property>
50
          <name>use.cdh.repos</name>
51
          <value>!false</value>
52
        </property>
53
      </activation>
54
      <repositories>
55
        <repository>
56
          <id>cdh.releases.repo</id>
57
          <url>https://repository.cloudera.com/content/groups/cdh-releases-rcs</url>
58
          <name>CDH Releases Repository</name>
59
          <snapshots>
60
            <enabled>false</enabled>
61
          </snapshots>
62
        </repository>
63
        <repository>
64
          <id>cdh.snapshots.repo</id>
65
          <url>https://repository.cloudera.com/content/repositories/snapshots</url>
66
          <name>CDH Snapshots Repository</name>
67
          <snapshots>
68
            <enabled>true</enabled>
69
          </snapshots>
70
        </repository>
71
      </repositories>
72
    </profile>
73
    <!-- Similar logic to allow us to skip compilation/tests when run -->
74
    <!-- via do-release-build or packaging scripts, since we're already -->
75
    <!-- compiling via direct ant calls. -->
76
    <profile>
77
      <id>non.release.build</id>
78
      <activation>
79
        <activeByDefault>false</activeByDefault>
80
        <property>
81
          <name>not.cdh.release.build</name>
82
          <value>!false</value>
83
        </property>
84
      </activation>
85
      <build>
86
        <plugins>
87
          <plugin>
88
            <groupId>org.apache.maven.plugins</groupId>
89
            <artifactId>maven-antrun-plugin</artifactId>
90
            <version>1.6</version>
91
            <inherited>false</inherited>
92
            <executions>
93
              <execution>
94
                <id>clean</id>
95
                <configuration>
96
                  <target>
97
                    <exec executable="${ant.cmd}" dir="${basedir}" failonerror="true">
98
                      <arg value="clean"/>
99
                      <arg value="-Dcdh.maven=true"/>
100
                      <arg value="-Divy.cache.dir=${ivy.cache.dir}"/>
101
                    </exec>
102
                  </target>
103
                </configuration>
104
                <goals>
105
                  <goal>run</goal>
106
                </goals>
107
                <phase>clean</phase>
108
              </execution>
109
              <execution>
110
                <id>compile</id>
111
                <configuration>
112
                  <target>
113
                    <exec executable="${ant.cmd}" dir="${basedir}" failonerror="true">
114
                      <arg value="bin-package"/>
115
                      <arg value="-Dcdh.maven=true"/>
116
                      <arg value="-Divy.cache.dir=${ivy.cache.dir}"/>
117
                    </exec>
118
                  </target>
119
                </configuration>
120
                <goals>
121
                  <goal>run</goal>
122
                </goals>
123
                <phase>compile</phase>
124
              </execution>
125
              <execution>
126
                <id>test</id>
127
                <configuration>
128
                  <target unless="${skipTests}">
129
                    <exec executable="${ant.cmd}" dir="${basedir}" failonerror="true">
130
                      <arg value="test"/>
131
                      <arg value="-Dcdh.maven=true"/>
132
                      <arg value="-Divy.cache.dir=${ivy.cache.dir}"/>
133
                </exec>
134
                  </target>
135
                </configuration>
136
                <goals>
137
                  <goal>run</goal>
138
                </goals>
139
                <phase>test</phase>
140
              </execution>
141
            </executions>
142
          </plugin>
143
        </plugins>
144
      </build>
145
    </profile>
146
        <!-- Profile that's only activated when we're calling as a release build, to run -->
147
    <!-- with variables set *and* tar target -->
148
    <profile>
149
      <id>cdh.release.build</id>
150
      <activation>
151
        <activeByDefault>false</activeByDefault>
152
        <property>
153
          <name>not.cdh.release.build</name>
154
          <value>false</value>
155
        </property>
156
      </activation>
157
      <build>
158
        <plugins>
159
          <plugin>
160
            <groupId>org.apache.maven.plugins</groupId>
161
            <artifactId>maven-antrun-plugin</artifactId>
162
            <inherited>false</inherited>
163
            <executions>
164
              <execution>
165
                <id>tar</id>
166
                <configuration>
167
                  <target if="do.full.compile">
168
                    <property file="${basedir}/build.properties"/>
169
                    <exec executable="/bin/bash" dir="${basedir}/cloudera" failonerror="true">
170
                      <arg value="do-release-build" />
171
                      <env key="SKIP_MVN_EXPLICIT" value="true" />
172
                    </exec>
173
                  </target>
174
                </configuration>
175
                <goals>
176
                  <goal>run</goal>
177
                </goals>
178
                <phase>compile</phase>
179
              </execution>
180
            </executions>
181
          </plugin>
182
        </plugins>
183
      </build>
184
    </profile>
185
  </profiles>
186
  
187
  <build>
188
    <plugins>
189
      
190
      <plugin>
191
        <groupId>com.cloudera.kitchen</groupId>
192
        <artifactId>maven-deps-to-props-plugin</artifactId>
193
        
194
        <executions>
195
          <execution>
196
            <id>build.properties</id>
197
            <configuration>
198
              <templateFile>${basedir}/cloudera/maven-packaging/templates/build.template</templateFile>
199
              <targetFile>${basedir}/build.properties</targetFile>
200
            </configuration>
201
            <goals>
202
              <goal>generate</goal>
203
            </goals>
204
            <phase>process-resources</phase>
205
          </execution>
206
        </executions>
207
      </plugin>
208
    </plugins>
209
  </build>
210
  
211
</project>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff