Revision 48938
Added by Tsampikos Livisianos about 6 years ago
modules/dnet-openaire-lodexport-wf/install.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
cd /Users/giorgos/Documents/svn/dnet-openaire-lodimport/trunk |
|
4 |
svn up |
|
5 |
mvn clean install -Dmaven.test.skip=true |
|
6 |
|
|
7 |
cd /Users/giorgos/Documents/svn/dnet-openaire-lodexport-wf |
|
8 |
svn up |
|
9 |
mvn clean package -Dworkflow.source.dir=eu/dnetlib/iis/core/javamapreduce/stats -Poozie-package,deploy -Diis.hadoop.frontend.home.dir=/home -Duser.name=giorgos.alexiou |
|
10 |
|
|
11 |
|
|
0 | 12 |
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/TestMethod.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.core.workflows.lodexport; |
|
2 |
|
|
3 |
import org.apache.log4j.Logger; |
|
4 |
public class TestMethod { |
|
5 |
|
|
6 |
private static Logger log = Logger.getLogger(TestMethod.class); |
|
7 |
|
|
8 |
|
|
9 |
public static void main(String[] args) throws Exception { |
|
10 |
|
|
11 |
log.debug("Finalizing..."); |
|
12 |
|
|
13 |
try { |
|
14 |
|
|
15 |
} catch (Exception e) { |
|
16 |
log.error("Datasource creation failed: " + e.toString(), e); |
|
17 |
System.out.println("Datasource creation failed: " + e.toString()); |
|
18 |
} |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/Finalize.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.core.workflows.lodexport; |
|
2 |
|
|
3 |
import com.jolbox.bonecp.BoneCPDataSource; |
|
4 |
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.DB; |
|
5 |
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.RDFizer; |
|
6 |
import org.apache.log4j.Logger; |
|
7 |
|
|
8 |
import java.sql.Connection; |
|
9 |
public class Finalize { |
|
10 |
|
|
11 |
private static Logger log = Logger.getLogger(Finalize.class); |
|
12 |
private static BoneCPDataSource ds; |
|
13 |
private static Connection conn; |
|
14 |
public static void main(String[] args) throws Exception { |
|
15 |
log.debug("Finalizing..."); |
|
16 |
|
|
17 |
try { |
|
18 |
DB db = new DB(); |
|
19 |
ds = db.getDatasource(args[0], args[1], args[2], args[3], args[4], args[5]); |
|
20 |
|
|
21 |
} catch (Exception e) { |
|
22 |
log.error("Datasource creation failed: " + e.toString(), e); |
|
23 |
System.out.println("Datasource creation failed: " + e.toString()); |
|
24 |
} |
|
25 |
try { |
|
26 |
conn = ds.getConnection(); |
|
27 |
RDFizer.setCheckpoint(conn, 120); |
|
28 |
conn.close(); |
|
29 |
} catch (Exception e) { |
|
30 |
conn.close(); |
|
31 |
log.error("Failed to set checkpoint" + e.toString(), e); |
|
32 |
} |
|
33 |
|
|
34 |
} |
|
35 |
} |
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/HbaseScannerGenerator.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.core.workflows.lodexport; |
|
2 |
|
|
3 |
/** |
|
4 |
* Created by eri_k on 2/6/2016. |
|
5 |
*/ |
|
6 |
|
|
7 |
import org.apache.commons.cli.*; |
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
import org.apache.hadoop.hbase.client.Scan; |
|
10 |
import org.apache.hadoop.hbase.filter.FilterList; |
|
11 |
import org.apache.hadoop.hbase.filter.PrefixFilter; |
|
12 |
import org.apache.hadoop.hbase.util.Base64; |
|
13 |
|
|
14 |
import java.io.*; |
|
15 |
import java.util.Arrays; |
|
16 |
import java.util.Properties; |
|
17 |
|
|
18 |
|
|
19 |
|
|
20 |
|
|
21 |
//Based on icm-iis-import's importer.mapred.helper.ScanStringGenerator |
|
22 |
//by @author mhorst |
|
23 |
|
|
24 |
public class HbaseScannerGenerator { |
|
25 |
|
|
26 |
public static final String DEFAULT_ENCODING = "utf-8"; |
|
27 |
public static final char DEFAULT_CF_CSV_SEPARATOR = ','; |
|
28 |
|
|
29 |
public static void main(String[] args) throws FileNotFoundException, IOException, ParseException { |
|
30 |
// preparing options |
|
31 |
Options options = new Options(); |
|
32 |
options.addOption("c", "cacheSize", true, "scanner caching size: " + "number of rows for caching that will be passed to scanners"); |
|
33 |
options.addOption("s", "startWith", true, "element to start iteration with"); |
|
34 |
options.addOption("e", "endWith", true, "element to end iteration with"); |
|
35 |
options.addOption("r", "rowPrefix", true, "row prefix"); |
|
36 |
options.addOption("f", "columnFamilies", true, "CSV containing comma separated " + "supported column families"); |
|
37 |
options.addOption("x", "encoding", true, "encoding to be used for building byte[] data from parameters," + "set to " + DEFAULT_ENCODING + " by default"); |
|
38 |
|
|
39 |
// parsing parameters |
|
40 |
CommandLineParser parser = new GnuParser(); |
|
41 |
CommandLine cmdLine = parser.parse(options, args); |
|
42 |
|
|
43 |
String encoding = cmdLine.hasOption("x") ? cmdLine.getOptionValue("x") : DEFAULT_ENCODING; |
|
44 |
|
|
45 |
Scan scan = new Scan(); |
|
46 |
if (cmdLine.hasOption("c")) { |
|
47 |
scan.setCaching(Integer.valueOf(cmdLine.getOptionValue("c"))); |
|
48 |
} |
|
49 |
if (cmdLine.hasOption("s")) { |
|
50 |
scan.setStartRow(cmdLine.getOptionValue("s").getBytes(encoding)); |
|
51 |
} |
|
52 |
if (cmdLine.hasOption("e")) { |
|
53 |
scan.setStopRow(cmdLine.getOptionValue("e").getBytes(encoding)); |
|
54 |
} |
|
55 |
if (cmdLine.hasOption("r")) { |
|
56 |
// supporting multiple prefixes |
|
57 |
String[] rowPrefixCSV = StringUtils.split(cmdLine.getOptionValue("r"), DEFAULT_CF_CSV_SEPARATOR); |
|
58 |
if (rowPrefixCSV != null) { |
|
59 |
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE); |
|
60 |
for (String currentRowPrefix : rowPrefixCSV) { |
|
61 |
filterList.addFilter(new PrefixFilter(copyArrayWhenNotNull(currentRowPrefix.trim().getBytes(encoding)))); |
|
62 |
} |
|
63 |
scan.setFilter(filterList); |
|
64 |
} |
|
65 |
} |
|
66 |
|
|
67 |
if (cmdLine.hasOption("f")) { |
|
68 |
String[] cfCSV = StringUtils.split(cmdLine.getOptionValue("f"), DEFAULT_CF_CSV_SEPARATOR); |
|
69 |
if (cfCSV != null) { |
|
70 |
for (String currentCf : cfCSV) { |
|
71 |
scan.addFamily(copyArrayWhenNotNull(currentCf.trim().getBytes(encoding))); |
|
72 |
} |
|
73 |
} |
|
74 |
} |
|
75 |
|
|
76 |
File file = new File(System.getProperty("oozie.action.output.properties")); |
|
77 |
Properties props = new Properties(); |
|
78 |
props.setProperty("scan", convertScanToString(scan)); |
|
79 |
OutputStream os = new FileOutputStream(file); |
|
80 |
try { |
|
81 |
props.store(os, ""); |
|
82 |
} finally { |
|
83 |
os.close(); |
|
84 |
} |
|
85 |
} |
|
86 |
|
|
87 |
private static String convertScanToString(Scan scan) throws IOException { |
|
88 |
ByteArrayOutputStream out = new ByteArrayOutputStream(); |
|
89 |
DataOutputStream dos = new DataOutputStream(out); |
|
90 |
scan.write(dos); |
|
91 |
return Base64.encodeBytes(out.toByteArray()); |
|
92 |
} |
|
93 |
|
|
94 |
/** |
|
95 |
* Copies array or returns null when source is null. |
|
96 |
* |
|
97 |
* @param source |
|
98 |
* @return copied array |
|
99 |
*/ |
|
100 |
final public static byte[] copyArrayWhenNotNull(byte[] source) { |
|
101 |
if (source != null) { |
|
102 |
return Arrays.copyOf(source, source.length); |
|
103 |
} else { |
|
104 |
return null; |
|
105 |
} |
|
106 |
} |
|
107 |
|
|
108 |
} |
|
109 |
|
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/ClearGraph.java | ||
---|---|---|
1 |
package eu.dnetlib.iis.core.workflows.lodexport; |
|
2 |
|
|
3 |
|
|
4 |
import com.jolbox.bonecp.BoneCPDataSource; |
|
5 |
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.DB; |
|
6 |
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.RDFizer; |
|
7 |
import org.apache.log4j.Logger; |
|
8 |
|
|
9 |
import java.sql.Connection; |
|
10 |
|
|
11 |
public class ClearGraph { |
|
12 |
|
|
13 |
private static Logger log = Logger.getLogger(ClearGraph.class); |
|
14 |
private static BoneCPDataSource ds; |
|
15 |
private static Connection conn; |
|
16 |
public static void main(String[] args) throws Exception { |
|
17 |
log.debug("Clearing up Virtuoso Relations Graph..."); |
|
18 |
try { |
|
19 |
DB db = new DB(); |
|
20 |
ds = db.getDatasource(args[0], args[1], args[2], args[3], args[4], args[5]); |
|
21 |
|
|
22 |
} catch (Exception e) { |
|
23 |
log.error("Datasource creation failed: " + e.toString(), e); |
|
24 |
System.out.println("Datasource creation failed: " + e.toString()); |
|
25 |
} |
|
26 |
|
|
27 |
try { |
|
28 |
conn = ds.getConnection(); |
|
29 |
RDFizer.setCheckpoint(conn,-1); |
|
30 |
RDFizer.clearGraph(args[6], conn); |
|
31 |
conn.close(); |
|
32 |
} catch (Exception e){ |
|
33 |
conn.close(); |
|
34 |
log.error("Failed to clear Graph: " + e.toString(),e); |
|
35 |
System.out.println("Failed to clear Graph: " + e.toString()); |
|
36 |
|
|
37 |
} |
|
38 |
|
|
39 |
|
|
40 |
} |
|
41 |
} |
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_test_on_cluster.sh | ||
---|---|---|
1 |
mvn clean package -Pattach-test-resources,oozie,deploy -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation |
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_on_cluster.sh | ||
---|---|---|
1 |
mvn clean package -Poozie,deploy -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/cloner |
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_test_locally.sh | ||
---|---|---|
1 |
mvn clean package -Pattach-test-resources,oozie,deploy-local -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation |
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_locally.sh | ||
---|---|---|
1 |
mvn clean package -Poozie,deploy-local -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/cloner |
modules/dnet-openaire-lodexport-wf/src/main/scripts/update_example_workflow_apps_list.py | ||
---|---|---|
1 |
#!/usr/bin/env python |
|
2 |
|
|
3 |
## Generates a new version of the "generate_example_workflow_apps.properties" |
|
4 |
## file that contains paths to all of the example workflows stored in this |
|
5 |
## project. This is done by scanning the directory tree and searching for |
|
6 |
## directories that look like they contain workflow definitions. |
|
7 |
|
|
8 |
from __future__ import print_function |
|
9 |
|
|
10 |
import os |
|
11 |
import os.path |
|
12 |
|
|
13 |
dir_with_examples = "src/test/resources/eu/dnetlib/iis/core/examples" |
|
14 |
dirs_to_ignore = [".svn"] |
|
15 |
output_file = "src/main/scripts/generate_example_workflow_apps.properties" |
|
16 |
|
|
17 |
def does_contain_example(dir_path): |
|
18 |
if os.path.exists(os.path.join(dir_path, "oozie_app")): |
|
19 |
return True |
|
20 |
else: |
|
21 |
return False |
|
22 |
|
|
23 |
examples = [] |
|
24 |
|
|
25 |
for root, dirs, files in os.walk(dir_with_examples): |
|
26 |
for dir_to_ignore in dirs_to_ignore: |
|
27 |
dirs.remove(dir_to_ignore) |
|
28 |
dirs_to_remove = [] |
|
29 |
for dir_ in dirs: |
|
30 |
dir_path = os.path.join(root, dir_) |
|
31 |
if does_contain_example(dir_path): |
|
32 |
examples.append(dir_path) |
|
33 |
dirs_to_remove.append(dir_) |
|
34 |
for dir_to_remove in dirs_to_remove: |
|
35 |
dirs.remove(dir_to_remove) |
|
36 |
|
|
37 |
examples = sorted(examples) |
|
38 |
with open(output_file, "w") as f: |
|
39 |
for e in examples: |
|
40 |
print(e, file=f) |
|
41 |
print("# remember to leave '\\n' after the last line\n", file=f) |
modules/dnet-openaire-lodexport-wf/src/main/scripts/generate_example_workflow_apps.properties | ||
---|---|---|
1 |
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_with_unicode_escape_codes |
|
2 |
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer |
|
3 |
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_explicit_schema_file |
|
4 |
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_subworkflow |
|
5 |
eu/dnetlib/iis/core/examples/hadoopstreaming/wordcount_with_distributed_cache |
|
6 |
eu/dnetlib/iis/core/examples/java/cloner |
|
7 |
eu/dnetlib/iis/core/examples/java/joiner |
|
8 |
eu/dnetlib/iis/core/examples/java/json_based_producer_and_consumer |
|
9 |
eu/dnetlib/iis/core/examples/java/json_based_producer_and_consumer-failing |
|
10 |
eu/dnetlib/iis/core/examples/java/line_by_line_copier |
|
11 |
eu/dnetlib/iis/core/examples/javamapreduce/cloner |
|
12 |
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_explicit_schema |
|
13 |
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output |
|
14 |
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_with_explicit_schema |
|
15 |
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_without_reducer |
|
16 |
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_without_reducer_with_explicit_schema |
|
17 |
eu/dnetlib/iis/core/examples/javamapreduce/cloner_without_reducer |
|
18 |
eu/dnetlib/iis/core/examples/javamapreduce/oldapi/cloner |
|
19 |
eu/dnetlib/iis/core/examples/javamapreduce/oldapi/cloner_with_explicit_schema |
|
20 |
eu/dnetlib/iis/core/examples/javamapreduce/person_by_age_splitter |
|
21 |
eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation |
|
22 |
eu/dnetlib/iis/core/examples/parallel/parallel_joiner |
|
23 |
eu/dnetlib/iis/core/examples/pig/basic |
|
24 |
eu/dnetlib/iis/core/examples/pig/joiner |
|
25 |
eu/dnetlib/iis/core/examples/pig/joiner_with_explicit_schema |
|
26 |
eu/dnetlib/iis/core/examples/pig/person_by_docs_filter |
|
27 |
eu/dnetlib/iis/core/examples/pig/person_by_docs_filter_with_subworkflow |
|
28 |
eu/dnetlib/iis/core/examples/protobuf/java/cloner |
|
29 |
eu/dnetlib/iis/core/examples/protobuf/java/joiner |
|
30 |
eu/dnetlib/iis/core/examples/protobuf/java/line_by_line_copier |
|
31 |
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner |
|
32 |
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner_with_multiple_output |
|
33 |
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner_without_reducer |
|
34 |
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/person_by_age_splitter |
|
35 |
eu/dnetlib/iis/core/examples/protobuf/subworkflow/cloners |
|
36 |
eu/dnetlib/iis/core/examples/subworkflow/cloners |
|
37 |
# remember to leave '\n' after the last line |
|
38 |
|
modules/dnet-openaire-lodexport-wf/src/main/scripts/README.markdown | ||
---|---|---|
1 |
It is assumed that the scripts in this directory are executed with the current directory set to the main directory of the project - the one that contains the `pom.xml` file. |
|
2 |
|
|
3 |
The scripts with the `run_locally` prefix generate example Oozie workflow applications that are supposed to be run on a local installation of Hadoop (in the standalone or pseudo-distributed mode). On the other hand, scripts with the `run_on_cluster` prefix generate example Oozie workflow applications that are supposed to be run on OpenAIRE+ Hadoop cluster. |
modules/dnet-openaire-lodexport-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.tl.properties | ||
---|---|---|
1 |
isLookupEndpoint=http://beta.services.openaire.eu:8280/is/services/isLookUp |
|
2 |
lod_baseURI=http://lod.openaire.eu/data/ |
|
3 |
lod_dataPath=/user/giorgos.alexiou/rdfData |
|
4 |
lod_delim=, |
|
5 |
lod_enclosing=' |
|
6 |
lod_entitiesPerQuery=25 |
|
7 |
lod_hbase_table=db_openaireplus_services |
|
8 |
lod_indexConf=index.conf { result { dups = true, links = [ { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking], max=1000 }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype,openairecompatibility] } ]}, organization { dups = true, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}} |
|
9 |
#lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}} |
|
10 |
lod_jsonEntities={ "result": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.eurocris.org/ontologies/cerif/1.3#name", "6": "http://purl.org/dc/terms/dateAccepted", "7": "http://purl.org/dc/terms/publisher", "8": "http://purl.org/dc/terms/identifier", "9": "http://purl.org/dc/terms/language", "10": "http://purl.org/dc/terms/date", "11": "http://lod.openaire.eu/vocab/resultSubject", "12": "http://lod.openaire.eu/vocab/externalReference", "13": "http://purl.org/dc/terms/source", "14": "http://purl.org/dc/terms/format", "15": "http://lod.openaire.eu/vocab/context", "16": "http://dbpedia.org/ontology/country", "17": "http://purl.org/dc/terms/accessRights", "18": "http://purl.org/dc/terms/description", "19": "http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name", "20": "http://lod.openaire.eu/vocab/dataSourceType", "21": "http://lod.openaire.eu/vocab/device", "22": "http://lod.openaire.eu/vocab/size", "23": "http://lod.openaire.eu/vocab/version", "24": "http://lod.openaire.eu/vocab/lastMetadataUpdate", "25": "http://lod.openaire.eu/vocab/metadataVersion", "26": "http://lod.openaire.eu/vocab/year", "27": "http://lod.openaire.eu/vocab/resultType", "28": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity" }], "person": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://xmlns.com/foaf/0.1/firstName", "6": "http://xmlns.com/foaf/spec/lastName", "7": "http://xmlns.com/foaf/0.1/name", "8": "http://schema.org/faxNumber", "9": "http://xmlns.com/foaf/0.1/mbox", "10": "http://xmlns.com/foaf/0.1/phone", "11": "http://schema.org/nationality", "12": "http://purl.org/dc/terms/identifier", "13": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http: //xmlns.com/foaf/0.1/Person" }], "datasource": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/datasourceType", "6": "http://lod.openaire.eu/vocab/openAIRECompatibility", "7": "http://dbpedia.org/ontology/officialName", "8": "http://lod.openaire.eu/vocab/englishName", "9": "http://schema.org/url", "10": "http://xmlns.com/foaf/0.1/logo", "11": "http://xmlns.com/foaf/0.1/mbox", "12": "http://purl.org/vocab/vann/preferredNamespacePrefix", "13": "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "14": "http://www.w3.org/2003/01/geo/wgs84_pos#long", "15": "http://lod.openaire.eu/vocab/dateOfValidity", "16": "http://purl.org/dc/terms/description", "17": "http://lod.openaire.eu/vocab/subjectList", "18": "http://lod.openaire.eu/numberOfItems", "19": "http://purl.org/dc/terms/date", "20": "http://lod.openaire.eu/vocab/policies", "21": "http://lod.openaire.eu/vocab/languages", "22": "http://lod.openaire.eu/vocab/contentType", "23": "http://lod.openaire.eu/vocab/accessInfoPackage", "24": "http://lod.openaire.eu/vocab/releaseStartDate", "25": "http://lod.openaire.eu/vocab/releaseEndDate", "26": "http://lod.openaire.eu/vocab/missionStatementUrl", "27": "http://www.europeana.eu/schemas/edm/dataProvider", "28": "http://lod.openaire.eu/vocab/serviceProvider", "29": "http://lod.openaire.eu/vocab/databaseAccessType", "30": "http://lod.openaire.eu/vocab/dataUploadType", "31": "http://lod.openaire.eu/vocab/dataUploadRestrictions", "32": "http://lod.openaire.eu/vocab/versioning", "33": "http://lod.openaire.eu/vocab/citationGuidelineUrl", "34": "http://lod.openaire.eu/vocab/qualityManagementKind", "35": "http://lod.openaire.eu/vocab/pidSystems", "36": "http://lod.openaire.eu/vocab/certificates", "37": "http://purl.org/dc/terms/accessRights", "38": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http: //www.w3.org/ns/prov#Entity" }], "organization": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.w3.org/2004/02/skos/core#altLabel", "6": "http://www.w3.org/2004/02/skos/core#prefLabel", "7": "http://lod.openaire.eu/vocab/webSiteUrl", "8": "http://xmlns.com/foaf/0.1/logo", "9": "http://dbpedia.org/ontology/country", "10": "http://lod.openaire.eu/vocab/entityType", "11": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http: //xmlns.com/foaf/0.1/Organization" }], "project": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/projectCode", "6": "http://schema.org/url", "7": "http://www.eurocris.org/ontologies/cerif/1.3#acronym", "8": "http://www.eurocris.org/ontologies/cerif/1.3#name", "9": "http://www.eurocris.org/ontologies/cerif/1.3#startDate", "10": "http://www.eurocris.org/ontologies/cerif/1.3#endDate", "11": "http://purl.org/cerif/frapo/hasCallIdentifier", "12": "http://www.eurocris.org/ontologies/cerif/1.3#keyword", "13": "http://www.w3.org/2006/time#hasDurationDescription", "14": "http://lod.openaire.eu/vocab/ec_SC39", "15": "http://lod.openaire.eu/vocab/contractType", "16": "http://lod.openaire.eu/vocab/oaMandatePublications", "17": "http://lod.openaire.eu/vocab/projectSubjects", "18": "http://od.openaire.eu/vocab/ec_article29-3", "19": "http://lod.openaire.eu/vocab/funder", "20": "http://lod.openaire.eu/vocab/fundingLevel0", "21": "http://lod.openaire.eu/vocab/fundingLevel1", "22": "http://lod.openaire.eu/vocab/fundingLevel2", "23": "http://lod.openaire.eu/vocab/fundingLevel3", "24": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#Project" }] } |
|
11 |
lod_jsonRels={ "resultResult": [{ "property": "http://purl.org/dc/terms/isPartOf", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultDatasource": [{ "property": "http://www.w3.org/ns/prov#wasDerivedFrom", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personResult": [{ "property": "http://purl.org/dc/terms/creator", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personPerson": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "datasourceOrganization": [{ "property": "http://lod.openaire.eu/vocab/datasourceOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectOrganization": [{ "property": "http://lod.openaire.eu/vocab/projectOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "organizationOrganization": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectPerson": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "dedup": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }] } |
|
12 |
lod_lastExecutionDate=2015-05-26 |
|
13 |
lod_maxCpart=3 |
|
14 |
lod_minCpart=1 |
|
15 |
lod_EntitiesInputFile=/tmp/lod_full/entities |
|
16 |
lod_RelationsInputFile=/tmp/lod_full/relations |
|
17 |
lod_output=/tmp/tlod/ |
|
18 |
lod_part=5 |
|
19 |
#---------config for CNR------------ |
|
20 |
#lod_conLine=jdbc:virtuoso://virtuoso-openaire.d4science.org:1111/autoReconnect=true/charset=UTF-8/log_enable=1 |
|
21 |
#lod_password=virtramvos |
|
22 |
#Config for DM |
|
23 |
lod_conLine=jdbc:virtuoso://virtuoso-beta.openaire.eu:1111/autoReconnect=true/charset=UTF-8/log_enable=1 |
|
24 |
lod_password=eiloobi2Ail6Aisi |
|
25 |
lod_defaultGraph=test |
|
26 |
lod_relationsGraph=relationsTest |
|
27 |
lod_relationsPerQuery=170 |
|
28 |
lod_seperator=; |
|
29 |
lod_username=dba |
|
30 |
#--------DM Cluster config------- |
|
31 |
jobTracker=dm-cluster-jt |
|
32 |
nameNode=hdfs://dm-cluster-nn |
|
33 |
#oozie.wf.application.path=hdfs://dm-cluster-nn/user/eri.katsari/lod/oozie_app |
|
34 |
#oozie.wf.application.path=hdfs://dm-cluster-nn/user/giorgos.alexiou/lod/oozie_app |
|
35 |
oozie.wf.application.path=hdfs://dm-cluster-nn/user/tsampikos.livisianos/core/javamapreduce/lodexport/oozie_app |
|
36 |
oozieServiceLoc=http://oozie.hadoop.dm.openaire.eu:11000/oozie |
|
37 |
#--------CNR cluster config------- |
|
38 |
#jobTracker=nmis-hadoop-jt |
|
39 |
#nameNode=hdfs://nmis-hadoop-cluster |
|
40 |
#oozie.wf.application.path=hdfs://nmis-hadoop-cluster/user/eri.katsari/lod/oozie_app |
|
41 |
#oozieServiceLoc=http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie |
|
42 |
numReducers=17 |
|
43 |
out1=entities |
|
44 |
out2=relations |
|
45 |
queueName=default |
|
46 |
#user.name=giorgos.alexiou |
|
47 |
user.name=tsampikos.livisianos |
|
48 |
workingDir=/user/tsampikos.livisianos/core/javamapreduce/lodexport/working_dir |
|
49 |
#user.name=eri.katsari |
|
50 |
#workingDir=/user/eri.katsari/core/javamapreduce/lodexport/working_dir |
modules/dnet-openaire-lodexport-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.properties | ||
---|---|---|
1 |
isLookupEndpoint=http://beta.services.openaire.eu:8280/is/services/isLookUp |
|
2 |
lod_baseURI=http://lod.openaire.eu/data/ |
|
3 |
lod_dataPath=/user/giorgos.alexiou/rdfData |
|
4 |
lod_delim=, |
|
5 |
lod_enclosing=' |
|
6 |
lod_entitiesPerQuery=25 |
|
7 |
lod_hbase_table=db_openaireplus_services |
|
8 |
lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}} |
|
9 |
lod_jsonEntities={ "result": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.eurocris.org/ontologies/cerif/1.3#name", "6": "http://purl.org/dc/terms/dateAccepted", "7": "http://purl.org/dc/terms/publisher", "8": "http://purl.org/dc/terms/identifier", "9": "http://purl.org/dc/terms/language", "10": "http://purl.org/dc/terms/date", "11": "http://lod.openaire.eu/vocab/resultSubject", "12": "http://lod.openaire.eu/vocab/externalReference", "13": "http://purl.org/dc/terms/source", "14": "http://purl.org/dc/terms/format", "15": "http://lod.openaire.eu/vocab/context", "16": "http://dbpedia.org/ontology/country", "17": "http://purl.org/dc/terms/accessRights", "18": "http://purl.org/dc/terms/description", "19": "http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name", "20": "http://lod.openaire.eu/vocab/dataSourceType", "21": "http://lod.openaire.eu/vocab/device", "22": "http://lod.openaire.eu/vocab/size", "23": "http://lod.openaire.eu/vocab/version", "24": "http://lod.openaire.eu/vocab/lastMetadataUpdate", "25": "http://lod.openaire.eu/vocab/metadataVersion", "26": "http://lod.openaire.eu/vocab/year", "27": "http://lod.openaire.eu/vocab/resultType", "28": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity" }], "person": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://xmlns.com/foaf/0.1/firstName", "6": "http://xmlns.com/foaf/spec/lastName", "7": "http://xmlns.com/foaf/0.1/name", "8": "http://schema.org/faxNumber", "9": "http://xmlns.com/foaf/0.1/mbox", "10": "http://xmlns.com/foaf/0.1/phone", "11": "http://schema.org/nationality", "12": "http://purl.org/dc/terms/identifier", "13": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Person" }], "datasource": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/datasourceType", "6": "http://lod.openaire.eu/vocab/openAIRECompatibility", "7": "http://dbpedia.org/ontology/officialName", "8": "http://lod.openaire.eu/vocab/englishName", "9": "http://schema.org/url", "10": "http://xmlns.com/foaf/0.1/logo", "11": "http://xmlns.com/foaf/0.1/mbox", "12": "http://purl.org/vocab/vann/preferredNamespacePrefix", "13": "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "14": "http://www.w3.org/2003/01/geo/wgs84_pos#long", "15": "http://lod.openaire.eu/vocab/dateOfValidity", "16": "http://purl.org/dc/terms/description", "17": "http://lod.openaire.eu/vocab/subjectList", "18": "http://lod.openaire.eu/numberOfItems", "19": "http://purl.org/dc/terms/date", "20": "http://lod.openaire.eu/vocab/policies", "21": "http://lod.openaire.eu/vocab/languages", "22": "http://lod.openaire.eu/vocab/contentType", "23": "http://lod.openaire.eu/vocab/accessInfoPackage", "24": "http://lod.openaire.eu/vocab/releaseStartDate", "25": "http://lod.openaire.eu/vocab/releaseEndDate", "26": "http://lod.openaire.eu/vocab/missionStatementUrl", "27": "http://www.europeana.eu/schemas/edm/dataProvider", "28": "http://lod.openaire.eu/vocab/serviceProvider", "29": "http://lod.openaire.eu/vocab/databaseAccessType", "30": "http://lod.openaire.eu/vocab/dataUploadType", "31": "http://lod.openaire.eu/vocab/dataUploadRestrictions", "32": "http://lod.openaire.eu/vocab/versioning", "33": "http://lod.openaire.eu/vocab/citationGuidelineUrl", "34": "http://lod.openaire.eu/vocab/qualityManagementKind", "35": "http://lod.openaire.eu/vocab/pidSystems", "36": "http://lod.openaire.eu/vocab/certificates", "37": "http://purl.org/dc/terms/accessRights", "38": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.w3.org/ns/prov#Entity" }], "organization": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.w3.org/2004/02/skos/core#altLabel", "6": "http://www.w3.org/2004/02/skos/core#prefLabel", "7": "http://lod.openaire.eu/vocab/webSiteUrl", "8": "http://xmlns.com/foaf/0.1/logo", "9": "http://dbpedia.org/ontology/country", "10": "http://lod.openaire.eu/vocab/entityType", "11": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Organization" }], "project": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/projectCode", "6": "http://schema.org/url", "7": "http://www.eurocris.org/ontologies/cerif/1.3#acronym", "8": "http://www.eurocris.org/ontologies/cerif/1.3#name", "9": "http://www.eurocris.org/ontologies/cerif/1.3#startDate", "10": "http://www.eurocris.org/ontologies/cerif/1.3#endDate", "11": "http://purl.org/cerif/frapo/hasCallIdentifier", "12": "http://www.eurocris.org/ontologies/cerif/1.3#keyword", "13": "http://www.w3.org/2006/time#hasDurationDescription", "14": "http://lod.openaire.eu/vocab/ec_SC39", "15": "http://lod.openaire.eu/vocab/contractType", "16": "http://lod.openaire.eu/vocab/oaMandatePublications", "17": "http://lod.openaire.eu/vocab/projectSubjects", "18": "http://od.openaire.eu/vocab/ec_article29-3", "19": "http://lod.openaire.eu/vocab/funder", "20": "http://lod.openaire.eu/vocab/fundingLevel0", "21": "http://lod.openaire.eu/vocab/fundingLevel1", "22": "http://lod.openaire.eu/vocab/fundingLevel2", "23": "http://lod.openaire.eu/vocab/fundingLevel3", "24": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#Project" }] } |
|
10 |
lod_jsonRels={ "resultResult": [{ "property": "http://purl.org/dc/terms/isPartOf", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultDatasource": [{ "property": "http://www.w3.org/ns/prov#wasDerivedFrom", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personResult": [{ "property": "http://purl.org/dc/terms/creator", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personPerson": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "datasourceOrganization": [{ "property": "http://lod.openaire.eu/vocab/datasourceOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectOrganization": [{ "property": "http://lod.openaire.eu/vocab/projectOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "organizationOrganization": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectPerson": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "dedup": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }] } |
|
11 |
lod_lastExecutionDate=2015-05-26 |
|
12 |
lod_maxCpart=3 |
|
13 |
lod_minCpart=1 |
|
14 |
lod_EntitiesInputFile=/tmp/lod_full/entities/ |
|
15 |
lod_RelationsInputFile=/tmp/lod_full/relations/ |
|
16 |
lod_output=/tmp/lod/ |
|
17 |
lod_part=5 |
|
18 |
#---------config for CNR------------ |
|
19 |
#lod_conLine=jdbc:virtuoso://virtuoso-openaire.d4science.org:1111/autoReconnect=true/charset=UTF-8/log_enable=1 |
|
20 |
#lod_password=virtramvos |
|
21 |
#Config for DM |
|
22 |
lod_conLine=jdbc:virtuoso://virtuoso-beta.openaire.eu:1111/autoReconnect=true/charset=UTF-8/log_enable=1 |
|
23 |
lod_password=eiloobi2Ail6Aisi |
|
24 |
lod_defaultGraph=test |
|
25 |
lod_relationsGraph=relationsTest |
|
26 |
lod_relationsPerQuery=170 |
|
27 |
lod_seperator=; |
|
28 |
lod_username=dba |
|
29 |
#--------DM Cluster config------- |
|
30 |
jobTracker=dm-cluster-jt |
|
31 |
nameNode=hdfs://dm-cluster-nn |
|
32 |
oozie.wf.application.path=hdfs://dm-cluster-nn/user/giorgos.alexiou/lod/oozie_app |
|
33 |
oozieServiceLoc=http://oozie.hadoop.dm.openaire.eu:11000/oozie |
|
34 |
#--------CNR cluster config------- |
|
35 |
#jobTracker=nmis-hadoop-jt |
|
36 |
#nameNode=hdfs://nmis-hadoop-cluster |
|
37 |
#oozie.wf.application.path=hdfs://nmis-hadoop-cluster/user/eri.katsari/lod/oozie_app |
|
38 |
#oozieServiceLoc=http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie |
|
39 |
numReducers=17 |
|
40 |
out1=relations |
|
41 |
out2=result |
|
42 |
out3=person |
|
43 |
out4=project |
|
44 |
out5=datasource |
|
45 |
out6=organization |
|
46 |
queueName=default |
|
47 |
user.name=giorgos.alexiou |
|
48 |
workingDir=/user/giorgos.alexiou/core/javamapreduce/lodexport/working_dir |
modules/dnet-openaire-lodexport-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/oozie_app/workflow.xml | ||
---|---|---|
1 |
<workflow-app name="lod_generation" xmlns="uri:oozie:workflow:0.4"> |
|
2 |
<!-- map reduce job that exports hbase data and prepares them for import |
|
3 |
to the lod_generation --> |
|
4 |
|
|
5 |
<global> |
|
6 |
<job-tracker>${jobTracker}</job-tracker> |
|
7 |
<name-node>${nameNode}</name-node> |
|
8 |
<configuration> |
|
9 |
<property> |
|
10 |
<name>mapred.job.queue.name</name> |
|
11 |
<value>${queueName}</value> |
|
12 |
</property> |
|
13 |
<property> |
|
14 |
<name>oozie.sqoop.log.level</name> |
|
15 |
<value>DEBUG</value> |
|
16 |
</property> |
|
17 |
|
|
18 |
|
|
19 |
</configuration> |
|
20 |
</global> |
|
21 |
|
|
22 |
<start to="csv_export"/> |
|
23 |
|
|
24 |
<action name="csv_export"> |
|
25 |
<map-reduce> |
|
26 |
|
|
27 |
<prepare> |
|
28 |
<delete path="${nameNode}${lod_output}"/> |
|
29 |
|
|
30 |
</prepare> |
|
31 |
|
|
32 |
<configuration> |
|
33 |
|
|
34 |
<property> |
|
35 |
<name>hbase.mapreduce.scan</name> |
|
36 |
<value>${wf:actionData('get-scanner')['scan']}</value> |
|
37 |
</property> |
|
38 |
<property> |
|
39 |
<name>hbase.rootdir</name> |
|
40 |
<value>$nameNode/hbase</value> |
|
41 |
|
|
42 |
</property> |
|
43 |
|
|
44 |
<property> |
|
45 |
<name>hbase.security.authentication</name> |
|
46 |
<value>simple</value> |
|
47 |
</property> |
|
48 |
<!-- ZOOKEEPER --> |
|
49 |
|
|
50 |
<property> |
|
51 |
<name>hbase.zookeeper.quorum</name> |
|
52 |
<value> |
|
53 |
namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
54 |
</value> |
|
55 |
</property> |
|
56 |
<property> |
|
57 |
<name>zookeeper.znode.rootserver</name> |
|
58 |
<value>root-region-server</value> |
|
59 |
|
|
60 |
</property> |
|
61 |
<property> |
|
62 |
<name>hbase.zookeeper.property.clientPort</name> |
|
63 |
<value>2181</value> |
|
64 |
</property> |
|
65 |
|
|
66 |
|
|
67 |
<!-- MR IO --> |
|
68 |
|
|
69 |
<property> |
|
70 |
<name>mapreduce.inputformat.class</name> |
|
71 |
<value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value> |
|
72 |
</property> |
|
73 |
|
|
74 |
<property> |
|
75 |
<name>mapred.mapoutput.key.class</name> |
|
76 |
<value>org.apache.hadoop.io.Text</value> |
|
77 |
</property> |
|
78 |
<property> |
|
79 |
<name>mapred.mapoutput.value.class</name> |
|
80 |
<value>org.apache.hadoop.io.Text</value> |
|
81 |
</property> |
|
82 |
<property> |
|
83 |
<name>mapred.output.key.class</name> |
|
84 |
<value>org.apache.hadoop.io.Text</value> |
|
85 |
</property> |
|
86 |
<property> |
|
87 |
<name>mapred.output.value.class</name> |
|
88 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value> |
|
89 |
</property> |
|
90 |
|
|
91 |
<!-- ## This is required for new MapReduce API usage --> |
|
92 |
<property> |
|
93 |
<name>mapred.mapper.new-api</name> |
|
94 |
<value>true</value> |
|
95 |
</property> |
|
96 |
<property> |
|
97 |
<name>mapred.reducer.new-api</name> |
|
98 |
<value>true</value> |
|
99 |
</property> |
|
100 |
|
|
101 |
<!-- # Job-specific options --> |
|
102 |
<property> |
|
103 |
<name>dfs.blocksize</name> |
|
104 |
<value>32M</value> |
|
105 |
</property> |
|
106 |
<property> |
|
107 |
<name>mapred.output.compress</name> |
|
108 |
<value>false</value> |
|
109 |
</property> |
|
110 |
<property> |
|
111 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
112 |
<value>false</value> |
|
113 |
</property> |
|
114 |
<property> |
|
115 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
116 |
<value>false</value> |
|
117 |
</property> |
|
118 |
|
|
119 |
<property> |
|
120 |
<name>mapreduce.map.speculative</name> |
|
121 |
<value>false</value> |
|
122 |
</property> |
|
123 |
|
|
124 |
<!-- I/O FORMAT --> |
|
125 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required |
|
126 |
to fix issue with traling tab added between id and value in multiple outputs --> |
|
127 |
<property> |
|
128 |
<name>mapred.textoutputformat.separator</name> |
|
129 |
<value>${lod_delim}</value> |
|
130 |
</property> |
|
131 |
<!-- ## Names of all output ports --> |
|
132 |
|
|
133 |
<property> |
|
134 |
<name>mapreduce.multipleoutputs</name> |
|
135 |
<value> |
|
136 |
${out1} ${out2} ${out3} ${out4} ${out5} ${out6} |
|
137 |
</value> |
|
138 |
|
|
139 |
</property> |
|
140 |
<property> |
|
141 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name> |
|
142 |
<value>org.apache.hadoop.io.Text</value> |
|
143 |
</property> |
|
144 |
<property> |
|
145 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name> |
|
146 |
<value>org.apache.hadoop.io.Text</value> |
|
147 |
</property> |
|
148 |
<property> |
|
149 |
<name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name> |
|
150 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
151 |
</property> |
|
152 |
|
|
153 |
<!-- result --> |
|
154 |
<property> |
|
155 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name> |
|
156 |
<value>org.apache.hadoop.io.Text</value> |
|
157 |
</property> |
|
158 |
<property> |
|
159 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name> |
|
160 |
<value>org.apache.hadoop.io.Text</value> |
|
161 |
</property> |
|
162 |
<property> |
|
163 |
<name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name> |
|
164 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
165 |
</property> |
|
166 |
|
|
167 |
|
|
168 |
<!-- person --> |
|
169 |
<property> |
|
170 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name> |
|
171 |
<value>org.apache.hadoop.io.Text</value> |
|
172 |
</property> |
|
173 |
<property> |
|
174 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name> |
|
175 |
<value>org.apache.hadoop.io.Text</value> |
|
176 |
</property> |
|
177 |
<property> |
|
178 |
<name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name> |
|
179 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
180 |
</property> |
|
181 |
|
|
182 |
|
|
183 |
<!-- project --> |
|
184 |
<property> |
|
185 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name> |
|
186 |
<value>org.apache.hadoop.io.Text</value> |
|
187 |
</property> |
|
188 |
<property> |
|
189 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name> |
|
190 |
<value>org.apache.hadoop.io.Text</value> |
|
191 |
</property> |
|
192 |
<property> |
|
193 |
<name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name> |
|
194 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
195 |
</property> |
|
196 |
|
|
197 |
|
|
198 |
|
|
199 |
<!-- datasource --> |
|
200 |
<property> |
|
201 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name> |
|
202 |
<value>org.apache.hadoop.io.Text</value> |
|
203 |
</property> |
|
204 |
<property> |
|
205 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name> |
|
206 |
<value>org.apache.hadoop.io.Text</value> |
|
207 |
</property> |
|
208 |
<property> |
|
209 |
<name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name> |
|
210 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
211 |
</property> |
|
212 |
|
|
213 |
|
|
214 |
<!-- organization --> |
|
215 |
<property> |
|
216 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name> |
|
217 |
<value>org.apache.hadoop.io.Text</value> |
|
218 |
</property> |
|
219 |
<property> |
|
220 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name> |
|
221 |
<value>org.apache.hadoop.io.Text</value> |
|
222 |
</property> |
|
223 |
<property> |
|
224 |
<name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name> |
|
225 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
226 |
</property> |
|
227 |
|
|
228 |
|
|
229 |
|
|
230 |
<!-- ## Classes of mapper and reducer --> |
|
231 |
|
|
232 |
<property> |
|
233 |
<name>mapreduce.map.class</name> |
|
234 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodMapper</value> |
|
235 |
</property> |
|
236 |
<property> |
|
237 |
<name>mapreduce.reduce.class</name> |
|
238 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodReducer</value> |
|
239 |
</property> |
|
240 |
<property> |
|
241 |
<name>io.serializations</name> |
|
242 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
243 |
</property> |
|
244 |
|
|
245 |
<!-- ## Custom config --> |
|
246 |
|
|
247 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
248 |
<property> |
|
249 |
<name>lod.delim</name> |
|
250 |
<value>${lod_delim}</value> |
|
251 |
</property> |
|
252 |
|
|
253 |
<property> |
|
254 |
<name>lod.enclosing</name> |
|
255 |
<value>${lod_enclosing}</value> |
|
256 |
</property> |
|
257 |
|
|
258 |
|
|
259 |
<property> |
|
260 |
<name>lod.seperator</name> |
|
261 |
<value>${lod_seperator}</value> |
|
262 |
</property> |
|
263 |
|
|
264 |
|
|
265 |
<!--source hbase table --> |
|
266 |
<property> |
|
267 |
<name>hbase.mapreduce.inputtable</name> |
|
268 |
<value>${lod_hbase_table}</value> |
|
269 |
</property> |
|
270 |
<property> |
|
271 |
<name>hbase.mapred.inputtable</name> |
|
272 |
<value>${lod_hbase_table}</value> |
|
273 |
</property> |
|
274 |
|
|
275 |
<!-- This directory does not correspond to a data store. In fact, this |
|
276 |
directory only contains multiple data stores. It has to be set to the name |
|
277 |
of the workflow node. --> |
|
278 |
<property> |
|
279 |
<name>mapred.output.dir</name> |
|
280 |
<value>${lod_output}</value> |
|
281 |
</property> |
|
282 |
<property> |
|
283 |
<name>index.conf</name> |
|
284 |
<value>${lod_indexConf}</value> |
|
285 |
</property> |
|
286 |
|
|
287 |
<property> |
|
288 |
<name>lod.lastExecutionDate</name> |
|
289 |
<value>${lod_lastExecutionDate}</value> |
|
290 |
</property> |
|
291 |
|
|
292 |
<!-- ## Workflow node parameters --> |
|
293 |
<property> |
|
294 |
<name>mapred.reduce.tasks</name> |
|
295 |
<value>${numReducers}</value> |
|
296 |
</property> |
|
297 |
|
|
298 |
</configuration> |
|
299 |
|
|
300 |
</map-reduce> |
|
301 |
<ok to="end"/> |
|
302 |
|
|
303 |
<error to="fail"/> |
|
304 |
</action> |
|
305 |
|
|
306 |
|
|
307 |
<action name='clearGraph'> |
|
308 |
<java> |
|
309 |
<prepare> |
|
310 |
</prepare> |
|
311 |
<configuration> |
|
312 |
<property> |
|
313 |
<name>mapred.job.queue.name</name> |
|
314 |
<value>${queueName}</value> |
|
315 |
</property> |
|
316 |
</configuration> |
|
317 |
<main-class>eu.dnetlib.iis.core.workflows.lodexport.ClearGraph</main-class> |
|
318 |
<arg>${lod_conLine}</arg> |
|
319 |
<arg>${lod_username}</arg> |
|
320 |
<arg>${lod_password}</arg> |
|
321 |
<arg>${lod_minCpart}</arg> |
|
322 |
<arg>${lod_maxCpart}</arg> |
|
323 |
<arg>${lod_part}</arg> |
|
324 |
<arg>${lod_relationsGraph}</arg> |
|
325 |
</java> |
|
326 |
<ok to="rdf_entities_import"/> |
|
327 |
|
|
328 |
<error to="fail"/> |
|
329 |
</action> |
|
330 |
|
|
331 |
<action name="rdf_entities_import"> |
|
332 |
|
|
333 |
<map-reduce> |
|
334 |
|
|
335 |
<prepare> |
|
336 |
<delete path="${nameNode}${lod_output}test"/> |
|
337 |
</prepare> |
|
338 |
|
|
339 |
<configuration> |
|
340 |
<property> |
|
341 |
<name>hbase.security.authentication</name> |
|
342 |
<value>simple</value> |
|
343 |
</property> |
|
344 |
|
|
345 |
<!-- ZOOKEEPER --> |
|
346 |
<property> |
|
347 |
<name>hbase.zookeeper.quorum</name> |
|
348 |
<value> namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
349 |
</value> |
|
350 |
</property> |
|
351 |
<property> |
|
352 |
<name>zookeeper.znode.rootserver</name> |
|
353 |
<value>root-region-server</value> |
|
354 |
</property> |
|
355 |
|
|
356 |
<!-- CSV PROPS GO HERE --> |
|
357 |
<property> |
|
358 |
<name>hbase.zookeeper.property.clientPort</name> |
|
359 |
<value>2181</value> |
|
360 |
</property> |
|
361 |
|
|
362 |
|
|
363 |
<!-- MR IO --> |
|
364 |
|
|
365 |
<property> |
|
366 |
<name>mapred.input.dir</name> |
|
367 |
<value>${lod_EntitiesInputFile}</value> |
|
368 |
</property> |
|
369 |
|
|
370 |
|
|
371 |
<property> |
|
372 |
<name>mapreduce.inputformat.class</name> |
|
373 |
<value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value> |
|
374 |
</property> |
|
375 |
|
|
376 |
<property> |
|
377 |
<name>mapred.mapoutput.key.class</name> |
|
378 |
<value>org.apache.hadoop.io.Text</value> |
|
379 |
</property> |
|
380 |
|
|
381 |
<property> |
|
382 |
<name>mapred.mapoutput.value.class</name> |
|
383 |
<value>org.apache.hadoop.io.Text</value> |
|
384 |
</property> |
|
385 |
|
|
386 |
<property> |
|
387 |
<name>mapred.output.key.class</name> |
|
388 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
389 |
</property> |
|
390 |
|
|
391 |
<property> |
|
392 |
<name>mapred.output.value.class</name> |
|
393 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
394 |
</property> |
|
395 |
|
|
396 |
<!-- ## This is required for new MapReduce API usage --> |
|
397 |
<property> |
|
398 |
<name>mapred.mapper.new-api</name> |
|
399 |
<value>true</value> |
|
400 |
</property> |
|
401 |
<property> |
|
402 |
<name>mapred.reducer.new-api</name> |
|
403 |
<value>true</value> |
|
404 |
</property> |
|
405 |
|
|
406 |
<!-- # Job-specific options --> |
|
407 |
<property> |
|
408 |
<name>dfs.blocksize</name> |
|
409 |
<value>32M</value> |
|
410 |
</property> |
|
411 |
<property> |
|
412 |
<name>mapred.output.compress</name> |
|
413 |
<value>false</value> |
|
414 |
</property> |
|
415 |
<property> |
|
416 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
417 |
<value>false</value> |
|
418 |
</property> |
|
419 |
<property> |
|
420 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
421 |
<value>false</value> |
|
422 |
</property> |
|
423 |
<property> |
|
424 |
<name>mapreduce.map.speculative</name> |
|
425 |
<value>false</value> |
|
426 |
</property> |
|
427 |
|
|
428 |
|
|
429 |
<property> |
|
430 |
<name>map.output.key.field.separator</name> |
|
431 |
<value>${lod_delim}</value> |
|
432 |
</property> |
|
433 |
|
|
434 |
|
|
435 |
<!-- ## Classes of mapper and reducer --> |
|
436 |
<property> |
|
437 |
<name>mapreduce.map.class</name> |
|
438 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value> |
|
439 |
</property> |
|
440 |
|
|
441 |
<property> |
|
442 |
<name>mapreduce.reduce.class</name> |
|
443 |
|
|
444 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value> |
|
445 |
</property> |
|
446 |
|
|
447 |
<property> |
|
448 |
|
|
449 |
<name>io.serializations</name> |
|
450 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
451 |
</property> |
|
452 |
|
|
453 |
<!-- ## Custom config --> |
|
454 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
455 |
|
|
456 |
<property> |
|
457 |
<name>lod.delim</name> |
|
458 |
<value>${lod_delim}</value> |
|
459 |
</property> |
|
460 |
|
|
461 |
<property> |
|
462 |
<name>lod.enclosing</name> |
|
463 |
<value>${lod_enclosing}</value> |
|
464 |
</property> |
|
465 |
|
|
466 |
<property> |
|
467 |
<name>lod.seperator</name> |
|
468 |
<value>${lod_seperator}</value> |
|
469 |
</property> |
|
470 |
|
|
471 |
<property> |
|
472 |
<name>lod.lastExecutionDate</name> |
|
473 |
<value>${lod_lastExecutionDate}</value> |
|
474 |
</property> |
|
475 |
|
|
476 |
<property> |
|
477 |
<name>lod.conLine</name> |
|
478 |
<value>${lod_conLine}</value> |
|
479 |
</property> |
|
480 |
|
|
481 |
<property> |
|
482 |
<name>lod.username</name> |
|
483 |
<value>${lod_username}</value> |
|
484 |
</property> |
|
485 |
|
|
486 |
<property> |
|
487 |
<name>lod.password</name> |
|
488 |
<value>${lod_password}</value> |
|
489 |
</property> |
|
490 |
|
|
491 |
<property> |
|
492 |
<name>lod.minCpart</name> |
|
493 |
<value>${lod_minCpart}</value> |
|
494 |
</property> |
|
495 |
|
|
496 |
<property> |
|
497 |
<name>lod.maxCpart</name> |
|
498 |
<value>${lod_maxCpart}</value> |
|
499 |
</property> |
|
500 |
|
|
501 |
<property> |
|
502 |
<name>lod.part</name> |
|
503 |
<value>${lod_part}</value> |
|
504 |
</property> |
|
505 |
|
|
506 |
<property> |
|
507 |
<name>lod.jsonRels</name> |
|
508 |
<value>${lod_jsonRels}</value> |
|
509 |
</property> |
|
510 |
|
|
511 |
<property> |
|
512 |
<name>lod.jsonEntities</name> |
|
513 |
<value>${lod_jsonEntities}</value> |
|
514 |
</property> |
|
515 |
|
|
516 |
<property> |
|
517 |
<name>lod.defaultGraph</name> |
|
518 |
<value>${lod_defaultGraph}</value> |
|
519 |
</property> |
|
520 |
|
|
521 |
|
|
522 |
<property> |
|
523 |
<name>lod.relationsGraph</name> |
|
524 |
<value>${lod_relationsGraph}</value> |
|
525 |
</property> |
|
526 |
|
|
527 |
|
|
528 |
<property> |
|
529 |
<name>lod.baseURI</name> |
|
530 |
<value>${lod_baseURI}</value> |
|
531 |
</property> |
|
532 |
|
|
533 |
<property> |
|
534 |
<name>mapred.reduce.tasks</name> |
|
535 |
<value>${numReducers}</value> |
|
536 |
|
|
537 |
</property> |
|
538 |
|
|
539 |
<property> |
|
540 |
<name>lod.inputFile</name> |
|
541 |
<value>${lod_EntitiesInputFile}</value> |
|
542 |
</property> |
|
543 |
|
|
544 |
<property> |
|
545 |
<name>mapred.output.dir</name> |
|
546 |
<value>${lod_output}test</value> |
|
547 |
</property> |
|
548 |
|
|
549 |
<property> |
|
550 |
<name>lod.entitiesPerQuery</name> |
|
551 |
<value>${lod_entitiesPerQuery}</value> |
|
552 |
</property> |
|
553 |
<property> |
|
554 |
<name>lod.relationsPerQuery</name> |
|
555 |
<value>${lod_relationsPerQuery}</value> |
|
556 |
</property> |
|
557 |
|
|
558 |
<property> |
|
559 |
<name>lod.dataPath</name> |
|
560 |
<value>${lod_dataPath}</value> |
|
561 |
</property> |
|
562 |
|
|
563 |
|
|
564 |
</configuration> |
|
565 |
</map-reduce> |
|
566 |
|
|
567 |
<ok to="rdf_relations_import"/> |
|
568 |
|
|
569 |
<error to="fail"/> |
|
570 |
</action> |
|
571 |
|
|
572 |
|
|
573 |
<action name="rdf_relations_import"> |
|
574 |
|
|
575 |
<map-reduce> |
|
576 |
|
|
577 |
<prepare> |
|
578 |
<delete path="${nameNode}${lod_output}test"/> |
|
579 |
</prepare> |
|
580 |
|
|
581 |
|
|
582 |
<configuration> |
|
583 |
|
|
584 |
<property> |
|
585 |
<name>hbase.security.authentication</name> |
|
586 |
<value>simple</value> |
|
587 |
</property> |
|
588 |
|
|
589 |
|
|
590 |
<!-- ZOOKEEPER --> |
|
591 |
<property> |
|
592 |
<name>hbase.zookeeper.quorum</name> |
|
593 |
<value> |
|
594 |
quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
595 |
</value> |
|
596 |
</property> |
|
597 |
<property> |
|
598 |
<name>zookeeper.znode.rootserver</name> |
|
599 |
<value>root-region-server</value> |
|
600 |
</property> |
|
601 |
|
|
602 |
<!-- CSV PROPS GO HERE --> |
|
603 |
<property> |
|
604 |
<name>hbase.zookeeper.property.clientPort</name> |
|
605 |
<value>2181</value> |
|
606 |
</property> |
|
607 |
|
|
608 |
|
|
609 |
<!-- MR IO --> |
|
610 |
|
|
611 |
<property> |
|
612 |
<name>mapred.input.dir</name> |
|
613 |
<value>${lod_RelationsInputFile}</value> |
|
614 |
</property> |
|
615 |
|
|
616 |
<property> |
|
617 |
<name>mapreduce.inputformat.class</name> |
|
618 |
<value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value> |
|
619 |
</property> |
|
620 |
|
|
621 |
<property> |
|
622 |
<name>mapred.mapoutput.key.class</name> |
|
623 |
<value>org.apache.hadoop.io.Text</value> |
|
624 |
</property> |
|
625 |
|
|
626 |
<property> |
|
627 |
<name>mapred.mapoutput.value.class</name> |
|
628 |
<value>org.apache.hadoop.io.Text</value> |
|
629 |
</property> |
|
630 |
|
|
631 |
<property> |
|
632 |
<name>mapred.output.key.class</name> |
|
633 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
634 |
</property> |
|
635 |
|
|
636 |
<property> |
|
637 |
<name>mapred.output.value.class</name> |
|
638 |
<value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value> |
|
639 |
</property> |
|
640 |
|
|
641 |
<!-- ## This is required for new MapReduce API usage --> |
|
642 |
<property> |
|
643 |
<name>mapred.mapper.new-api</name> |
|
644 |
<value>true</value> |
|
645 |
</property> |
|
646 |
<property> |
|
647 |
<name>mapred.reducer.new-api</name> |
|
648 |
<value>true</value> |
|
649 |
</property> |
|
650 |
|
|
651 |
<!-- # Job-specific options --> |
|
652 |
<property> |
|
653 |
<name>dfs.blocksize</name> |
|
654 |
<value>32M</value> |
|
655 |
</property> |
|
656 |
<property> |
|
657 |
<name>mapred.output.compress</name> |
|
658 |
<value>false</value> |
|
659 |
</property> |
|
660 |
<property> |
|
661 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
662 |
<value>false</value> |
|
663 |
</property> |
|
664 |
<property> |
|
665 |
<name>mapred.reduce.tasks.speculative.execution</name> |
|
666 |
<value>false</value> |
|
667 |
</property> |
|
668 |
<property> |
|
669 |
<name>mapreduce.map.speculative</name> |
|
670 |
<value>false</value> |
|
671 |
</property> |
|
672 |
|
|
673 |
|
|
674 |
<property> |
|
675 |
<name>map.output.key.field.separator</name> |
|
676 |
<value>${lod_delim}</value> |
|
677 |
</property> |
|
678 |
|
|
679 |
|
|
680 |
<!-- ## Classes of mapper and reducer --> |
|
681 |
<property> |
|
682 |
<name>mapreduce.map.class</name> |
|
683 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value> |
|
684 |
</property> |
|
685 |
|
|
686 |
<property> |
|
687 |
<name>mapreduce.reduce.class</name> |
|
688 |
|
|
689 |
<value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value> |
|
690 |
</property> |
|
691 |
|
|
692 |
<property> |
|
693 |
|
|
694 |
<name>io.serializations</name> |
|
695 |
<value>org.apache.hadoop.io.serializer.WritableSerialization</value> |
|
696 |
</property> |
|
697 |
|
|
698 |
<!-- ## Custom config --> |
|
699 |
<!--delim character used to seperate fields in hdfs dump files <property> --> |
|
700 |
|
|
701 |
<property> |
|
702 |
<name>lod.delim</name> |
|
703 |
<value>${lod_delim}</value> |
|
704 |
</property> |
|
705 |
|
|
706 |
<property> |
|
707 |
<name>lod.enclosing</name> |
|
708 |
<value>${lod_enclosing}</value> |
|
709 |
</property> |
|
710 |
|
|
711 |
<property> |
|
712 |
<name>lod.seperator</name> |
|
713 |
<value>${lod_seperator}</value> |
|
714 |
</property> |
|
715 |
|
|
716 |
<property> |
|
717 |
<name>lod.lastExecutionDate</name> |
|
718 |
<value>${lod_lastExecutionDate}</value> |
|
719 |
</property> |
|
720 |
|
|
721 |
<property> |
|
722 |
<name>lod.conLine</name> |
|
723 |
<value>${lod_conLine}</value> |
|
724 |
</property> |
|
725 |
|
|
726 |
<property> |
|
727 |
<name>lod.username</name> |
|
728 |
<value>${lod_username}</value> |
|
729 |
</property> |
|
730 |
|
|
731 |
<property> |
|
732 |
<name>lod.password</name> |
|
733 |
<value>${lod_password}</value> |
|
734 |
</property> |
|
735 |
|
|
736 |
<property> |
|
737 |
<name>lod.minCpart</name> |
|
738 |
<value>${lod_minCpart}</value> |
|
739 |
</property> |
|
740 |
|
|
741 |
<property> |
|
742 |
<name>lod.maxCpart</name> |
|
743 |
<value>${lod_maxCpart}</value> |
|
744 |
</property> |
|
745 |
|
|
746 |
<property> |
|
747 |
<name>lod.part</name> |
|
748 |
<value>${lod_part}</value> |
|
749 |
</property> |
|
750 |
|
|
751 |
<property> |
|
752 |
<name>lod.jsonRels</name> |
|
753 |
<value>${lod_jsonRels}</value> |
|
754 |
</property> |
|
755 |
|
|
756 |
<property> |
|
757 |
<name>lod.jsonEntities</name> |
|
758 |
<value>${lod_jsonEntities}</value> |
|
759 |
</property> |
|
760 |
|
|
761 |
<property> |
|
762 |
<name>lod.defaultGraph</name> |
|
763 |
<value>${lod_defaultGraph}</value> |
|
764 |
</property> |
|
765 |
|
|
766 |
|
|
767 |
<property> |
|
768 |
<name>lod.relationsGraph</name> |
|
769 |
<value>${lod_relationsGraph}</value> |
|
770 |
</property> |
|
771 |
|
|
772 |
<property> |
|
773 |
<name>lod.baseURI</name> |
|
774 |
<value>${lod_baseURI}</value> |
|
775 |
</property> |
|
776 |
|
|
777 |
<property> |
|
778 |
<name>mapred.reduce.tasks</name> |
|
779 |
<value>${numReducers}</value> |
|
780 |
</property> |
|
781 |
|
|
782 |
<property> |
|
783 |
<name>lod.inputFile</name> |
|
784 |
<value>${lod_RelationsInputFile}</value> |
|
785 |
</property> |
|
786 |
|
|
787 |
|
|
788 |
<property> |
|
789 |
<name>mapred.output.dir</name> |
|
790 |
<value>${lod_output}test</value> |
|
791 |
</property> |
|
792 |
|
|
793 |
<property> |
|
794 |
<name>lod.entitiesPerQuery</name> |
|
795 |
<value>${lod_entitiesPerQuery}</value> |
|
796 |
</property> |
|
797 |
|
|
798 |
<property> |
|
799 |
<name>lod.relationsPerQuery</name> |
|
800 |
<value>${lod_relationsPerQuery}</value> |
|
801 |
</property> |
|
802 |
<property> |
|
803 |
<name>lod.dataPath</name> |
|
804 |
<value>${lod_dataPath}</value> |
|
805 |
</property> |
|
806 |
|
|
807 |
|
|
808 |
</configuration> |
|
809 |
</map-reduce> |
|
810 |
|
|
811 |
<ok to="finalize"/> |
|
812 |
|
|
813 |
<error to="fail"/> |
|
814 |
</action> |
|
815 |
|
|
816 |
|
|
817 |
<action name='finalize'> |
|
818 |
<java> |
|
819 |
<prepare> |
|
820 |
</prepare> |
|
821 |
|
|
822 |
<configuration> |
|
823 |
<property> |
|
824 |
<name>mapred.job.queue.name</name> |
|
825 |
<value>${queueName}</value> |
|
826 |
</property> |
|
827 |
</configuration> |
|
828 |
|
|
829 |
<main-class>eu.dnetlib.iis.core.workflows.lodexport.Finalize</main-class> |
|
830 |
|
|
831 |
<arg>${lod_conLine}</arg> |
|
832 |
<arg>${lod_username}</arg> |
|
833 |
<arg>${lod_password}</arg> |
|
834 |
<arg>${lod_minCpart}</arg> |
|
835 |
<arg>${lod_maxCpart}</arg> |
|
836 |
<arg>${lod_part}</arg> |
|
837 |
<arg>${lod_relationsGraph}</arg> |
|
838 |
</java> |
|
839 |
<ok to="end"/> |
|
840 |
|
|
841 |
<error to="fail"/> |
|
842 |
</action> |
|
843 |
|
|
844 |
<action name="cleanUpHDFS"> |
|
845 |
<fs> |
|
846 |
<delete path="${lod_output}test"/> |
|
847 |
</fs> |
|
848 |
|
|
849 |
<ok to="end"/> |
|
850 |
<error to="fail"/> |
|
851 |
</action> |
|
852 |
|
|
853 |
|
|
854 |
<kill name="fail"> |
|
855 |
<message> |
|
856 |
Unfortunately, the process failed -- error message: |
|
857 |
[${wf:errorMessage(wf:lastErrorNode())}] |
|
858 |
</message> |
|
859 |
</kill> |
|
860 |
<end name="end"/> |
|
861 |
</workflow-app> |
modules/dnet-openaire-lodexport-wf/src/main/resources/oozie-log4j.properties | ||
---|---|---|
1 |
## The main job of this file is to make the Oozie tests log messages |
|
2 |
## less verbose, but you can change this behavior any way you want |
|
3 |
## by editing this file appropriately. |
|
4 |
|
|
5 |
|
|
6 |
log4j.rootLogger=DEBUG,oozie |
|
7 |
|
|
8 |
log4j.appender.oozie=org.apache.log4j.RollingFileAppender |
|
9 |
log4j.appender.oozie.Target=System.out |
|
10 |
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout |
|
11 |
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n |
|
12 |
log4j.appender.oozie.DatePattern='.'yyyy-MM-dd-HH |
|
13 |
|
|
14 |
log4j.appender.none=org.apache.log4j.varia.NullAppender |
|
15 |
|
|
16 |
# Direct log messages to a log file |
|
17 |
log4j.appender.oozie=org.apache.log4j.RollingFileAppender |
|
18 |
log4j.appender.oozie.File=/tmp/statsExportLog |
|
19 |
log4j.appender.oozie.MaxFileSize=1MB |
|
20 |
log4j.appender.oozie.MaxBackupIndex=1 |
|
21 |
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout |
|
22 |
log4j.appender.oozie.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n |
|
23 |
|
|
24 |
#log4j.logger.oozieops=OFF, none |
|
25 |
#log4j.logger.oozieinstrumentation=OFF, none |
|
26 |
#log4j.logger.oozieaudit=OFF, none |
|
27 |
|
|
28 |
log4j.logger.org.apache.hadoop=oozie,DEBUG, test |
|
29 |
log4j.logger.eu.dnetlib.iis=DEBUG,oozie |
modules/dnet-openaire-lodexport-wf/core/src/test/resources/test-custom-log4j.properties | ||
---|---|---|
1 |
# |
|
2 |
# Licensed to the Apache Software Foundation (ASF) under one |
|
3 |
# or more contributor license agreements. See the NOTICE file |
|
4 |
# distributed with this work for additional information |
|
5 |
# regarding copyright ownership. The ASF licenses this file |
|
6 |
# to you under the Apache License, Version 2.0 (the |
|
7 |
# "License"); you may not use this file except in compliance |
|
8 |
# with the License. You may obtain a copy of the License at |
|
9 |
# |
|
10 |
# http://www.apache.org/licenses/LICENSE-2.0 |
|
11 |
# |
|
12 |
# Unless required by applicable law or agreed to in writing, software |
|
13 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
14 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
15 |
# See the License for the specific language governing permissions and |
|
16 |
# limitations under the License. |
|
17 |
# |
|
18 |
|
|
19 |
# http://www.apache.org/licenses/LICENSE-2.0 |
|
20 |
# |
|
21 |
# Unless required by applicable law or agreed to in writing, software |
|
22 |
# distributed under the License is distributed on an "AS IS" BASIS, |
|
23 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
24 |
# See the License for the specific language governing permissions and |
|
25 |
# limitations under the License. See accompanying LICENSE file. |
|
26 |
|
|
27 |
# |
|
28 |
|
|
29 |
log4j.appender.oozie=org.apache.log4j.ConsoleAppender |
|
30 |
log4j.appender.oozie.Target=System.out |
|
31 |
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout |
|
32 |
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n |
|
33 |
|
|
34 |
log4j.appender.null=org.apache.log4j.varia.NullAppender |
|
35 |
|
|
36 |
log4j.logger.org.apache=INFO, oozie |
|
37 |
log4j.logger.org.mortbay=WARN, oozie |
|
38 |
log4j.logger.org.hsqldb=WARN, oozie |
|
39 |
|
|
40 |
log4j.logger.opslog=NONE, null |
|
41 |
log4j.logger.applog=NONE, null |
|
42 |
log4j.logger.instrument=NONE, null |
|
43 |
|
|
44 |
log4j.logger.a=NONE, null |
|
45 |
|
modules/dnet-openaire-lodexport-wf/core/src/test/resources/hsqldb-oozie-site.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
|
3 |
<!-- |
|
4 |
Copyright (c) 2010 Yahoo! Inc. All rights reserved. |
|
5 |
Licensed under the Apache License, Version 2.0 (the "License"); |
|
6 |
you may not use this file except in compliance with the License. |
|
7 |
You may obtain a copy of the License at |
|
8 |
|
|
9 |
http://www.apache.org/licenses/LICENSE-2.0 |
|
10 |
|
|
11 |
Unless required by applicable law or agreed to in writing, software |
|
12 |
distributed under the License is distributed on an "AS IS" BASIS, |
|
13 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
14 |
See the License for the specific language governing permissions and |
|
15 |
limitations under the License. See accompanying LICENSE file. |
|
16 |
--> |
|
17 |
<configuration> |
|
18 |
<property> |
|
19 |
<name>oozie.service.JPAService.jdbc.driver</name> |
|
20 |
<value>org.hsqldb.jdbcDriver</value> |
|
21 |
</property> |
|
22 |
<property> |
|
23 |
<name>oozie.service.JPAService.jdbc.url</name> |
|
24 |
<value>jdbc:hsqldb:mem:oozie-db;create=true</value> |
|
25 |
</property> |
|
26 |
</configuration> |
modules/dnet-openaire-lodexport-wf/core/src/test/resources/mysql-oozie-site.xml | ||
---|---|---|
1 |
<?xml version="1.0"?> |
|
2 |
<!-- |
|
3 |
Licensed to the Apache Software Foundation (ASF) under one |
|
4 |
or more contributor license agreements. See the NOTICE file |
|
5 |
distributed with this work for additional information |
|
6 |
regarding copyright ownership. The ASF licenses this file |
|
7 |
to you under the Apache License, Version 2.0 (the |
|
8 |
"License"); you may not use this file except in compliance |
|
9 |
with the License. You may obtain a copy of the License at |
|
10 |
|
|
11 |
http://www.apache.org/licenses/LICENSE-2.0 |
|
12 |
|
|
13 |
Unless required by applicable law or agreed to in writing, software |
|
14 |
distributed under the License is distributed on an "AS IS" BASIS, |
|
15 |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
16 |
See the License for the specific language governing permissions and |
|
17 |
limitations under the License. |
|
18 |
--> |
|
19 |
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> |
|
20 |
<configuration> |
|
21 |
<property> |
|
22 |
<name>oozie.service.JPAService.jdbc.driver</name> |
|
23 |
<value>com.mysql.jdbc.Driver</value> |
|
24 |
<description>JDBC driver class.</description> |
|
25 |
</property> |
|
26 |
<property> |
|
27 |
<name>oozie.test.db.port</name> |
|
28 |
<value>3306</value> |
|
29 |
</property> |
|
30 |
<property> |
Also available in: Unified diff
dnet45