Project

General

Profile

« Previous | Next » 

Revision 48938

Added by Tsampikos Livisianos over 6 years ago

dnet45

View differences:

modules/dnet-openaire-lodexport-wf/install.sh
1
#!/bin/bash
2

  
3
cd /Users/giorgos/Documents/svn/dnet-openaire-lodimport/trunk
4
svn up
5
mvn clean install -Dmaven.test.skip=true
6

  
7
cd /Users/giorgos/Documents/svn/dnet-openaire-lodexport-wf
8
svn up
9
mvn clean package  -Dworkflow.source.dir=eu/dnetlib/iis/core/javamapreduce/stats -Poozie-package,deploy  -Diis.hadoop.frontend.home.dir=/home  -Duser.name=giorgos.alexiou
10

  
11

  
0 12

  
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/TestMethod.java
1
package eu.dnetlib.iis.core.workflows.lodexport;
2

  
3
import org.apache.log4j.Logger;
4
public class TestMethod {
5
	
6
	private static Logger log = Logger.getLogger(TestMethod.class);
7

  
8

  
9
	public static void main(String[] args) throws Exception {
10

  
11
			log.debug("Finalizing...");
12

  
13
		try {
14

  
15
		} catch (Exception e) {
16
			log.error("Datasource creation failed: " + e.toString(), e);
17
			System.out.println("Datasource creation failed: " + e.toString());
18
		}
19
	}
20

  
21
}
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/Finalize.java
1
package eu.dnetlib.iis.core.workflows.lodexport;
2

  
3
import com.jolbox.bonecp.BoneCPDataSource;
4
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.DB;
5
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.RDFizer;
6
import org.apache.log4j.Logger;
7

  
8
import java.sql.Connection;
9
public class Finalize {
10
	
11
	private static Logger log = Logger.getLogger(Finalize.class);
12
	private static BoneCPDataSource ds;
13
	private static Connection conn;
14
	public static void main(String[] args) throws Exception {
15
		log.debug("Finalizing...");
16
		
17
		try {
18
			DB db = new DB();
19
			ds = db.getDatasource(args[0], args[1], args[2], args[3], args[4], args[5]);
20

  
21
		} catch (Exception e) {
22
			log.error("Datasource creation failed: " + e.toString(), e);
23
			System.out.println("Datasource creation failed: " + e.toString());
24
		}
25
		try {
26
			conn = ds.getConnection();
27
			RDFizer.setCheckpoint(conn, 120);
28
			conn.close();
29
		} catch (Exception e) {
30
			conn.close();
31
			log.error("Failed to set checkpoint" + e.toString(), e);
32
		}
33

  
34
	}
35
}
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/HbaseScannerGenerator.java
1
package eu.dnetlib.iis.core.workflows.lodexport;
2

  
3
/**
4
 * Created by eri_k on 2/6/2016.
5
 */
6

  
7
import org.apache.commons.cli.*;
8
import org.apache.commons.lang.StringUtils;
9
import org.apache.hadoop.hbase.client.Scan;
10
import org.apache.hadoop.hbase.filter.FilterList;
11
import org.apache.hadoop.hbase.filter.PrefixFilter;
12
import org.apache.hadoop.hbase.util.Base64;
13

  
14
import java.io.*;
15
import java.util.Arrays;
16
import java.util.Properties;
17

  
18

  
19

  
20

  
21
//Based  on icm-iis-import's importer.mapred.helper.ScanStringGenerator
22
//by @author mhorst
23

  
24
public class HbaseScannerGenerator {
25

  
26
    public static final String DEFAULT_ENCODING = "utf-8";
27
    public static final char DEFAULT_CF_CSV_SEPARATOR = ',';
28

  
29
    public static void main(String[] args) throws FileNotFoundException, IOException, ParseException {
30
        // preparing options
31
        Options options = new Options();
32
        options.addOption("c", "cacheSize", true, "scanner caching size: " + "number of rows for caching that will be passed to scanners");
33
        options.addOption("s", "startWith", true, "element to start iteration with");
34
        options.addOption("e", "endWith", true, "element to end iteration with");
35
        options.addOption("r", "rowPrefix", true, "row prefix");
36
        options.addOption("f", "columnFamilies", true, "CSV containing comma separated " + "supported column families");
37
        options.addOption("x", "encoding", true, "encoding to be used for building byte[] data from parameters," + "set to " + DEFAULT_ENCODING + " by default");
38

  
39
        // parsing parameters
40
        CommandLineParser parser = new GnuParser();
41
        CommandLine cmdLine = parser.parse(options, args);
42

  
43
        String encoding = cmdLine.hasOption("x") ? cmdLine.getOptionValue("x") : DEFAULT_ENCODING;
44

  
45
        Scan scan = new Scan();
46
        if (cmdLine.hasOption("c")) {
47
            scan.setCaching(Integer.valueOf(cmdLine.getOptionValue("c")));
48
        }
49
        if (cmdLine.hasOption("s")) {
50
            scan.setStartRow(cmdLine.getOptionValue("s").getBytes(encoding));
51
        }
52
        if (cmdLine.hasOption("e")) {
53
            scan.setStopRow(cmdLine.getOptionValue("e").getBytes(encoding));
54
        }
55
        if (cmdLine.hasOption("r")) {
56
            // supporting multiple prefixes
57
            String[] rowPrefixCSV = StringUtils.split(cmdLine.getOptionValue("r"), DEFAULT_CF_CSV_SEPARATOR);
58
            if (rowPrefixCSV != null) {
59
                FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
60
                for (String currentRowPrefix : rowPrefixCSV) {
61
                    filterList.addFilter(new PrefixFilter(copyArrayWhenNotNull(currentRowPrefix.trim().getBytes(encoding))));
62
                }
63
                scan.setFilter(filterList);
64
            }
65
        }
66

  
67
        if (cmdLine.hasOption("f")) {
68
            String[] cfCSV = StringUtils.split(cmdLine.getOptionValue("f"), DEFAULT_CF_CSV_SEPARATOR);
69
            if (cfCSV != null) {
70
                for (String currentCf : cfCSV) {
71
                    scan.addFamily(copyArrayWhenNotNull(currentCf.trim().getBytes(encoding)));
72
                }
73
            }
74
        }
75

  
76
        File file = new File(System.getProperty("oozie.action.output.properties"));
77
        Properties props = new Properties();
78
        props.setProperty("scan", convertScanToString(scan));
79
        OutputStream os = new FileOutputStream(file);
80
        try {
81
            props.store(os, "");
82
        } finally {
83
            os.close();
84
        }
85
    }
86

  
87
    private static String convertScanToString(Scan scan) throws IOException {
88
        ByteArrayOutputStream out = new ByteArrayOutputStream();
89
        DataOutputStream dos = new DataOutputStream(out);
90
        scan.write(dos);
91
        return Base64.encodeBytes(out.toByteArray());
92
    }
93

  
94
    /**
95
     * Copies array or returns null when source is null.
96
     *
97
     * @param source
98
     * @return copied array
99
     */
100
    final public static byte[] copyArrayWhenNotNull(byte[] source) {
101
        if (source != null) {
102
            return Arrays.copyOf(source, source.length);
103
        } else {
104
            return null;
105
        }
106
    }
107

  
108
}
109

  
modules/dnet-openaire-lodexport-wf/src/main/java/eu/dnetlib/iis/core/workflows/lodexport/ClearGraph.java
1
package eu.dnetlib.iis.core.workflows.lodexport;
2

  
3

  
4
import com.jolbox.bonecp.BoneCPDataSource;
5
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.DB;
6
import eu.dnetlib.data.mapreduce.hbase.lodImport.utils.RDFizer;
7
import org.apache.log4j.Logger;
8

  
9
import java.sql.Connection;
10

  
11
public class ClearGraph {
12
	
13
	private static Logger log = Logger.getLogger(ClearGraph.class);
14
	private static BoneCPDataSource ds;
15
	private static Connection conn;
16
	public static void main(String[] args) throws Exception {
17
		log.debug("Clearing up Virtuoso Relations Graph...");
18
		try {
19
			DB db = new DB();
20
			ds = db.getDatasource(args[0], args[1], args[2], args[3], args[4], args[5]);
21

  
22
		} catch (Exception e) {
23
			log.error("Datasource creation failed: " + e.toString(), e);
24
			System.out.println("Datasource creation failed: " + e.toString());
25
		}
26

  
27
		try {
28
			conn =  ds.getConnection();
29
			RDFizer.setCheckpoint(conn,-1);
30
			RDFizer.clearGraph(args[6], conn);
31
			conn.close();
32
		} catch (Exception e){
33
			conn.close();
34
			log.error("Failed to clear Graph: " + e.toString(),e);
35
			System.out.println("Failed to clear Graph: " + e.toString());
36

  
37
		}
38

  
39

  
40
	}
41
}
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_test_on_cluster.sh
1
mvn clean package -Pattach-test-resources,oozie,deploy -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_on_cluster.sh
1
mvn clean package -Poozie,deploy -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/cloner
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_test_locally.sh
1
mvn clean package -Pattach-test-resources,oozie,deploy-local -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation
modules/dnet-openaire-lodexport-wf/src/main/scripts/run_example_locally.sh
1
mvn clean package -Poozie,deploy-local -Dworkflow.source.dir=eu/dnetlib/iis/core/examples/cloner
modules/dnet-openaire-lodexport-wf/src/main/scripts/update_example_workflow_apps_list.py
1
#!/usr/bin/env python
2

  
3
## Generates a new version of the "generate_example_workflow_apps.properties" 
4
## file that contains paths to all of the example workflows stored in this
5
## project. This is done by scanning the directory tree and searching for
6
## directories that look like they contain workflow definitions.
7

  
8
from __future__ import print_function
9

  
10
import os
11
import os.path
12

  
13
dir_with_examples = "src/test/resources/eu/dnetlib/iis/core/examples"
14
dirs_to_ignore = [".svn"]
15
output_file = "src/main/scripts/generate_example_workflow_apps.properties"
16

  
17
def does_contain_example(dir_path):
18
	if os.path.exists(os.path.join(dir_path, "oozie_app")):
19
		return True
20
	else:
21
		return False
22

  
23
examples = []
24

  
25
for root, dirs, files in os.walk(dir_with_examples):
26
	for dir_to_ignore in dirs_to_ignore:
27
		dirs.remove(dir_to_ignore)
28
	dirs_to_remove = []
29
	for dir_ in dirs:
30
		dir_path = os.path.join(root, dir_)
31
		if does_contain_example(dir_path):
32
			examples.append(dir_path)
33
			dirs_to_remove.append(dir_)
34
	for dir_to_remove in dirs_to_remove:
35
		dirs.remove(dir_to_remove)
36

  
37
examples = sorted(examples)
38
with open(output_file, "w") as f:
39
	for e in examples:
40
		print(e, file=f)
41
	print("# remember to leave '\\n' after the last line\n", file=f)
modules/dnet-openaire-lodexport-wf/src/main/scripts/generate_example_workflow_apps.properties
1
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_with_unicode_escape_codes
2
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer
3
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_explicit_schema_file
4
eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_subworkflow
5
eu/dnetlib/iis/core/examples/hadoopstreaming/wordcount_with_distributed_cache
6
eu/dnetlib/iis/core/examples/java/cloner
7
eu/dnetlib/iis/core/examples/java/joiner
8
eu/dnetlib/iis/core/examples/java/json_based_producer_and_consumer
9
eu/dnetlib/iis/core/examples/java/json_based_producer_and_consumer-failing
10
eu/dnetlib/iis/core/examples/java/line_by_line_copier
11
eu/dnetlib/iis/core/examples/javamapreduce/cloner
12
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_explicit_schema
13
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output
14
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_with_explicit_schema
15
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_without_reducer
16
eu/dnetlib/iis/core/examples/javamapreduce/cloner_with_multiple_output_without_reducer_with_explicit_schema
17
eu/dnetlib/iis/core/examples/javamapreduce/cloner_without_reducer
18
eu/dnetlib/iis/core/examples/javamapreduce/oldapi/cloner
19
eu/dnetlib/iis/core/examples/javamapreduce/oldapi/cloner_with_explicit_schema
20
eu/dnetlib/iis/core/examples/javamapreduce/person_by_age_splitter
21
eu/dnetlib/iis/core/examples/javamapreduce/reverse_relation
22
eu/dnetlib/iis/core/examples/parallel/parallel_joiner
23
eu/dnetlib/iis/core/examples/pig/basic
24
eu/dnetlib/iis/core/examples/pig/joiner
25
eu/dnetlib/iis/core/examples/pig/joiner_with_explicit_schema
26
eu/dnetlib/iis/core/examples/pig/person_by_docs_filter
27
eu/dnetlib/iis/core/examples/pig/person_by_docs_filter_with_subworkflow
28
eu/dnetlib/iis/core/examples/protobuf/java/cloner
29
eu/dnetlib/iis/core/examples/protobuf/java/joiner
30
eu/dnetlib/iis/core/examples/protobuf/java/line_by_line_copier
31
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner
32
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner_with_multiple_output
33
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/cloner_without_reducer
34
eu/dnetlib/iis/core/examples/protobuf/javamapreduce/person_by_age_splitter
35
eu/dnetlib/iis/core/examples/protobuf/subworkflow/cloners
36
eu/dnetlib/iis/core/examples/subworkflow/cloners
37
# remember to leave '\n' after the last line
38

  
modules/dnet-openaire-lodexport-wf/src/main/scripts/README.markdown
1
It is assumed that the scripts in this directory are executed with the current directory set to the main directory of the project - the one that contains the `pom.xml` file.
2

  
3
The scripts with the `run_locally` prefix generate example Oozie workflow applications that are supposed to be run on a local installation of Hadoop (in the standalone or pseudo-distributed mode). On the other hand, scripts with the `run_on_cluster` prefix generate example Oozie workflow applications that are supposed to be run on OpenAIRE+ Hadoop cluster.
modules/dnet-openaire-lodexport-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.tl.properties
1
isLookupEndpoint=http://beta.services.openaire.eu:8280/is/services/isLookUp
2
lod_baseURI=http://lod.openaire.eu/data/
3
lod_dataPath=/user/giorgos.alexiou/rdfData
4
lod_delim=,
5
lod_enclosing='
6
lod_entitiesPerQuery=25
7
lod_hbase_table=db_openaireplus_services
8
lod_indexConf=index.conf { result { dups = true, links = [ { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking], max=1000 }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype,openairecompatibility] } ]}, organization { dups = true, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
9
#lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
10
lod_jsonEntities={ "result": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.eurocris.org/ontologies/cerif/1.3#name", "6": "http://purl.org/dc/terms/dateAccepted", "7": "http://purl.org/dc/terms/publisher", "8": "http://purl.org/dc/terms/identifier", "9": "http://purl.org/dc/terms/language", "10": "http://purl.org/dc/terms/date", "11": "http://lod.openaire.eu/vocab/resultSubject", "12": "http://lod.openaire.eu/vocab/externalReference", "13": "http://purl.org/dc/terms/source", "14": "http://purl.org/dc/terms/format", "15": "http://lod.openaire.eu/vocab/context", "16": "http://dbpedia.org/ontology/country", "17": "http://purl.org/dc/terms/accessRights", "18": "http://purl.org/dc/terms/description", "19": "http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name", "20": "http://lod.openaire.eu/vocab/dataSourceType", "21": "http://lod.openaire.eu/vocab/device", "22": "http://lod.openaire.eu/vocab/size", "23": "http://lod.openaire.eu/vocab/version", "24": "http://lod.openaire.eu/vocab/lastMetadataUpdate", "25": "http://lod.openaire.eu/vocab/metadataVersion", "26": "http://lod.openaire.eu/vocab/year", "27": "http://lod.openaire.eu/vocab/resultType", "28": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity" }], "person": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://xmlns.com/foaf/0.1/firstName", "6": "http://xmlns.com/foaf/spec/lastName", "7": "http://xmlns.com/foaf/0.1/name", "8": "http://schema.org/faxNumber", "9": "http://xmlns.com/foaf/0.1/mbox", "10": "http://xmlns.com/foaf/0.1/phone", "11": "http://schema.org/nationality", "12": "http://purl.org/dc/terms/identifier", "13": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http: //xmlns.com/foaf/0.1/Person" }], "datasource": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/datasourceType", "6": "http://lod.openaire.eu/vocab/openAIRECompatibility", "7": "http://dbpedia.org/ontology/officialName", "8": "http://lod.openaire.eu/vocab/englishName", "9": "http://schema.org/url", "10": "http://xmlns.com/foaf/0.1/logo", "11": "http://xmlns.com/foaf/0.1/mbox", "12": "http://purl.org/vocab/vann/preferredNamespacePrefix", "13": "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "14": "http://www.w3.org/2003/01/geo/wgs84_pos#long", "15": "http://lod.openaire.eu/vocab/dateOfValidity", "16": "http://purl.org/dc/terms/description", "17": "http://lod.openaire.eu/vocab/subjectList", "18": "http://lod.openaire.eu/numberOfItems", "19": "http://purl.org/dc/terms/date", "20": "http://lod.openaire.eu/vocab/policies", "21": "http://lod.openaire.eu/vocab/languages", "22": "http://lod.openaire.eu/vocab/contentType", "23": "http://lod.openaire.eu/vocab/accessInfoPackage", "24": "http://lod.openaire.eu/vocab/releaseStartDate", "25": "http://lod.openaire.eu/vocab/releaseEndDate", "26": "http://lod.openaire.eu/vocab/missionStatementUrl", "27": "http://www.europeana.eu/schemas/edm/dataProvider", "28": "http://lod.openaire.eu/vocab/serviceProvider", "29": "http://lod.openaire.eu/vocab/databaseAccessType", "30": "http://lod.openaire.eu/vocab/dataUploadType", "31": "http://lod.openaire.eu/vocab/dataUploadRestrictions", "32": "http://lod.openaire.eu/vocab/versioning", "33": "http://lod.openaire.eu/vocab/citationGuidelineUrl", "34": "http://lod.openaire.eu/vocab/qualityManagementKind", "35": "http://lod.openaire.eu/vocab/pidSystems", "36": "http://lod.openaire.eu/vocab/certificates", "37": "http://purl.org/dc/terms/accessRights", "38": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http: //www.w3.org/ns/prov#Entity" }], "organization": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.w3.org/2004/02/skos/core#altLabel", "6": "http://www.w3.org/2004/02/skos/core#prefLabel", "7": "http://lod.openaire.eu/vocab/webSiteUrl", "8": "http://xmlns.com/foaf/0.1/logo", "9": "http://dbpedia.org/ontology/country", "10": "http://lod.openaire.eu/vocab/entityType", "11": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http: //xmlns.com/foaf/0.1/Organization" }], "project": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/projectCode", "6": "http://schema.org/url", "7": "http://www.eurocris.org/ontologies/cerif/1.3#acronym", "8": "http://www.eurocris.org/ontologies/cerif/1.3#name", "9": "http://www.eurocris.org/ontologies/cerif/1.3#startDate", "10": "http://www.eurocris.org/ontologies/cerif/1.3#endDate", "11": "http://purl.org/cerif/frapo/hasCallIdentifier", "12": "http://www.eurocris.org/ontologies/cerif/1.3#keyword", "13": "http://www.w3.org/2006/time#hasDurationDescription", "14": "http://lod.openaire.eu/vocab/ec_SC39", "15": "http://lod.openaire.eu/vocab/contractType", "16": "http://lod.openaire.eu/vocab/oaMandatePublications", "17": "http://lod.openaire.eu/vocab/projectSubjects", "18": "http://od.openaire.eu/vocab/ec_article29-3", "19": "http://lod.openaire.eu/vocab/funder", "20": "http://lod.openaire.eu/vocab/fundingLevel0", "21": "http://lod.openaire.eu/vocab/fundingLevel1", "22": "http://lod.openaire.eu/vocab/fundingLevel2", "23": "http://lod.openaire.eu/vocab/fundingLevel3", "24": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#Project" }] }
11
lod_jsonRels={ "resultResult": [{ "property": "http://purl.org/dc/terms/isPartOf", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultDatasource": [{ "property": "http://www.w3.org/ns/prov#wasDerivedFrom", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personResult": [{ "property": "http://purl.org/dc/terms/creator", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personPerson": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "datasourceOrganization": [{ "property": "http://lod.openaire.eu/vocab/datasourceOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectOrganization": [{ "property": "http://lod.openaire.eu/vocab/projectOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "organizationOrganization": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectPerson": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "dedup": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }] } 
12
lod_lastExecutionDate=2015-05-26
13
lod_maxCpart=3
14
lod_minCpart=1
15
lod_EntitiesInputFile=/tmp/lod_full/entities
16
lod_RelationsInputFile=/tmp/lod_full/relations
17
lod_output=/tmp/tlod/
18
lod_part=5
19
#---------config for CNR------------
20
#lod_conLine=jdbc:virtuoso://virtuoso-openaire.d4science.org:1111/autoReconnect=true/charset=UTF-8/log_enable=1
21
#lod_password=virtramvos
22
#Config for DM
23
lod_conLine=jdbc:virtuoso://virtuoso-beta.openaire.eu:1111/autoReconnect=true/charset=UTF-8/log_enable=1
24
lod_password=eiloobi2Ail6Aisi
25
lod_defaultGraph=test
26
lod_relationsGraph=relationsTest
27
lod_relationsPerQuery=170
28
lod_seperator=;
29
lod_username=dba
30
#--------DM Cluster config-------
31
jobTracker=dm-cluster-jt
32
nameNode=hdfs://dm-cluster-nn
33
#oozie.wf.application.path=hdfs://dm-cluster-nn/user/eri.katsari/lod/oozie_app
34
#oozie.wf.application.path=hdfs://dm-cluster-nn/user/giorgos.alexiou/lod/oozie_app
35
oozie.wf.application.path=hdfs://dm-cluster-nn/user/tsampikos.livisianos/core/javamapreduce/lodexport/oozie_app
36
oozieServiceLoc=http://oozie.hadoop.dm.openaire.eu:11000/oozie
37
#--------CNR cluster config-------
38
#jobTracker=nmis-hadoop-jt
39
#nameNode=hdfs://nmis-hadoop-cluster
40
#oozie.wf.application.path=hdfs://nmis-hadoop-cluster/user/eri.katsari/lod/oozie_app
41
#oozieServiceLoc=http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie
42
numReducers=17
43
out1=entities
44
out2=relations
45
queueName=default
46
#user.name=giorgos.alexiou
47
user.name=tsampikos.livisianos
48
workingDir=/user/tsampikos.livisianos/core/javamapreduce/lodexport/working_dir
49
#user.name=eri.katsari
50
#workingDir=/user/eri.katsari/core/javamapreduce/lodexport/working_dir
modules/dnet-openaire-lodexport-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.properties
1
isLookupEndpoint=http://beta.services.openaire.eu:8280/is/services/isLookUp
2
lod_baseURI=http://lod.openaire.eu/data/
3
lod_dataPath=/user/giorgos.alexiou/rdfData
4
lod_delim=,
5
lod_enclosing='
6
lod_entitiesPerQuery=25
7
lod_hbase_table=db_openaireplus_services
8
lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
9
lod_jsonEntities={ "result": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.eurocris.org/ontologies/cerif/1.3#name", "6": "http://purl.org/dc/terms/dateAccepted", "7": "http://purl.org/dc/terms/publisher", "8": "http://purl.org/dc/terms/identifier", "9": "http://purl.org/dc/terms/language", "10": "http://purl.org/dc/terms/date", "11": "http://lod.openaire.eu/vocab/resultSubject", "12": "http://lod.openaire.eu/vocab/externalReference", "13": "http://purl.org/dc/terms/source", "14": "http://purl.org/dc/terms/format", "15": "http://lod.openaire.eu/vocab/context", "16": "http://dbpedia.org/ontology/country", "17": "http://purl.org/dc/terms/accessRights", "18": "http://purl.org/dc/terms/description", "19": "http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name", "20": "http://lod.openaire.eu/vocab/dataSourceType", "21": "http://lod.openaire.eu/vocab/device", "22": "http://lod.openaire.eu/vocab/size", "23": "http://lod.openaire.eu/vocab/version", "24": "http://lod.openaire.eu/vocab/lastMetadataUpdate", "25": "http://lod.openaire.eu/vocab/metadataVersion", "26": "http://lod.openaire.eu/vocab/year", "27": "http://lod.openaire.eu/vocab/resultType", "28": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity" }], "person": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://xmlns.com/foaf/0.1/firstName", "6": "http://xmlns.com/foaf/spec/lastName", "7": "http://xmlns.com/foaf/0.1/name", "8": "http://schema.org/faxNumber", "9": "http://xmlns.com/foaf/0.1/mbox", "10": "http://xmlns.com/foaf/0.1/phone", "11": "http://schema.org/nationality", "12": "http://purl.org/dc/terms/identifier", "13": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Person" }], "datasource": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/datasourceType", "6": "http://lod.openaire.eu/vocab/openAIRECompatibility", "7": "http://dbpedia.org/ontology/officialName", "8": "http://lod.openaire.eu/vocab/englishName", "9": "http://schema.org/url", "10": "http://xmlns.com/foaf/0.1/logo", "11": "http://xmlns.com/foaf/0.1/mbox", "12": "http://purl.org/vocab/vann/preferredNamespacePrefix", "13": "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "14": "http://www.w3.org/2003/01/geo/wgs84_pos#long", "15": "http://lod.openaire.eu/vocab/dateOfValidity", "16": "http://purl.org/dc/terms/description", "17": "http://lod.openaire.eu/vocab/subjectList", "18": "http://lod.openaire.eu/numberOfItems", "19": "http://purl.org/dc/terms/date", "20": "http://lod.openaire.eu/vocab/policies", "21": "http://lod.openaire.eu/vocab/languages", "22": "http://lod.openaire.eu/vocab/contentType", "23": "http://lod.openaire.eu/vocab/accessInfoPackage", "24": "http://lod.openaire.eu/vocab/releaseStartDate", "25": "http://lod.openaire.eu/vocab/releaseEndDate", "26": "http://lod.openaire.eu/vocab/missionStatementUrl", "27": "http://www.europeana.eu/schemas/edm/dataProvider", "28": "http://lod.openaire.eu/vocab/serviceProvider", "29": "http://lod.openaire.eu/vocab/databaseAccessType", "30": "http://lod.openaire.eu/vocab/dataUploadType", "31": "http://lod.openaire.eu/vocab/dataUploadRestrictions", "32": "http://lod.openaire.eu/vocab/versioning", "33": "http://lod.openaire.eu/vocab/citationGuidelineUrl", "34": "http://lod.openaire.eu/vocab/qualityManagementKind", "35": "http://lod.openaire.eu/vocab/pidSystems", "36": "http://lod.openaire.eu/vocab/certificates", "37": "http://purl.org/dc/terms/accessRights", "38": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.w3.org/ns/prov#Entity" }], "organization": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.w3.org/2004/02/skos/core#altLabel", "6": "http://www.w3.org/2004/02/skos/core#prefLabel", "7": "http://lod.openaire.eu/vocab/webSiteUrl", "8": "http://xmlns.com/foaf/0.1/logo", "9": "http://dbpedia.org/ontology/country", "10": "http://lod.openaire.eu/vocab/entityType", "11": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Organization" }], "project": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/projectCode", "6": "http://schema.org/url", "7": "http://www.eurocris.org/ontologies/cerif/1.3#acronym", "8": "http://www.eurocris.org/ontologies/cerif/1.3#name", "9": "http://www.eurocris.org/ontologies/cerif/1.3#startDate", "10": "http://www.eurocris.org/ontologies/cerif/1.3#endDate", "11": "http://purl.org/cerif/frapo/hasCallIdentifier", "12": "http://www.eurocris.org/ontologies/cerif/1.3#keyword", "13": "http://www.w3.org/2006/time#hasDurationDescription", "14": "http://lod.openaire.eu/vocab/ec_SC39", "15": "http://lod.openaire.eu/vocab/contractType", "16": "http://lod.openaire.eu/vocab/oaMandatePublications", "17": "http://lod.openaire.eu/vocab/projectSubjects", "18": "http://od.openaire.eu/vocab/ec_article29-3", "19": "http://lod.openaire.eu/vocab/funder", "20": "http://lod.openaire.eu/vocab/fundingLevel0", "21": "http://lod.openaire.eu/vocab/fundingLevel1", "22": "http://lod.openaire.eu/vocab/fundingLevel2", "23": "http://lod.openaire.eu/vocab/fundingLevel3", "24": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#Project" }] }
10
lod_jsonRels={ "resultResult": [{ "property": "http://purl.org/dc/terms/isPartOf", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultDatasource": [{ "property": "http://www.w3.org/ns/prov#wasDerivedFrom", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personResult": [{ "property": "http://purl.org/dc/terms/creator", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personProject": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personPerson": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "datasourceOrganization": [{ "property": "http://lod.openaire.eu/vocab/datasourceOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectOrganization": [{ "property": "http://lod.openaire.eu/vocab/projectOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "organizationOrganization": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectPerson": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "dedup": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }] } 
11
lod_lastExecutionDate=2015-05-26
12
lod_maxCpart=3
13
lod_minCpart=1
14
lod_EntitiesInputFile=/tmp/lod_full/entities/
15
lod_RelationsInputFile=/tmp/lod_full/relations/
16
lod_output=/tmp/lod/
17
lod_part=5
18
#---------config for CNR------------
19
#lod_conLine=jdbc:virtuoso://virtuoso-openaire.d4science.org:1111/autoReconnect=true/charset=UTF-8/log_enable=1
20
#lod_password=virtramvos
21
#Config for DM
22
lod_conLine=jdbc:virtuoso://virtuoso-beta.openaire.eu:1111/autoReconnect=true/charset=UTF-8/log_enable=1
23
lod_password=eiloobi2Ail6Aisi
24
lod_defaultGraph=test
25
lod_relationsGraph=relationsTest
26
lod_relationsPerQuery=170
27
lod_seperator=;
28
lod_username=dba
29
#--------DM Cluster config-------
30
jobTracker=dm-cluster-jt
31
nameNode=hdfs://dm-cluster-nn
32
oozie.wf.application.path=hdfs://dm-cluster-nn/user/giorgos.alexiou/lod/oozie_app
33
oozieServiceLoc=http://oozie.hadoop.dm.openaire.eu:11000/oozie
34
#--------CNR cluster config-------
35
#jobTracker=nmis-hadoop-jt
36
#nameNode=hdfs://nmis-hadoop-cluster
37
#oozie.wf.application.path=hdfs://nmis-hadoop-cluster/user/eri.katsari/lod/oozie_app
38
#oozieServiceLoc=http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie
39
numReducers=17
40
out1=relations
41
out2=result
42
out3=person
43
out4=project
44
out5=datasource
45
out6=organization
46
queueName=default
47
user.name=giorgos.alexiou
48
workingDir=/user/giorgos.alexiou/core/javamapreduce/lodexport/working_dir
modules/dnet-openaire-lodexport-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/oozie_app/workflow.xml
1
<workflow-app name="lod_generation" xmlns="uri:oozie:workflow:0.4">
2
    <!-- map reduce job that exports hbase data and prepares them for import
3
        to the lod_generation -->
4

  
5
    <global>
6
        <job-tracker>${jobTracker}</job-tracker>
7
        <name-node>${nameNode}</name-node>
8
        <configuration>
9
            <property>
10
                <name>mapred.job.queue.name</name>
11
                <value>${queueName}</value>
12
            </property>
13
            <property>
14
                <name>oozie.sqoop.log.level</name>
15
                <value>DEBUG</value>
16
            </property>
17

  
18

  
19
        </configuration>
20
    </global>
21

  
22
    <start to="csv_export"/>
23

  
24
    <action name="csv_export">
25
        <map-reduce>
26

  
27
            <prepare>
28
                <delete path="${nameNode}${lod_output}"/>
29

  
30
            </prepare>
31

  
32
            <configuration>
33

  
34
                <property>
35
                    <name>hbase.mapreduce.scan</name>
36
                    <value>${wf:actionData('get-scanner')['scan']}</value>
37
                </property>
38
                <property>
39
                    <name>hbase.rootdir</name>
40
                    <value>$nameNode/hbase</value>
41

  
42
                </property>
43

  
44
                <property>
45
                    <name>hbase.security.authentication</name>
46
                    <value>simple</value>
47
                </property>
48
                <!-- ZOOKEEPER -->
49

  
50
                <property>
51
                    <name>hbase.zookeeper.quorum</name>
52
                    <value>
53
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
54
                    </value>
55
                </property>
56
                <property>
57
                    <name>zookeeper.znode.rootserver</name>
58
                    <value>root-region-server</value>
59

  
60
                </property>
61
                <property>
62
                    <name>hbase.zookeeper.property.clientPort</name>
63
                    <value>2181</value>
64
                </property>
65

  
66

  
67
                <!-- MR IO -->
68

  
69
                <property>
70
                    <name>mapreduce.inputformat.class</name>
71
                    <value>org.apache.hadoop.hbase.mapreduce.TableInputFormat</value>
72
                </property>
73

  
74
                <property>
75
                    <name>mapred.mapoutput.key.class</name>
76
                    <value>org.apache.hadoop.io.Text</value>
77
                </property>
78
                <property>
79
                    <name>mapred.mapoutput.value.class</name>
80
                    <value>org.apache.hadoop.io.Text</value>
81
                </property>
82
                <property>
83
                    <name>mapred.output.key.class</name>
84
                    <value>org.apache.hadoop.io.Text</value>
85
                </property>
86
                <property>
87
                    <name>mapred.output.value.class</name>
88
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
89
                </property>
90

  
91
                <!-- ## This is required for new MapReduce API usage -->
92
                <property>
93
                    <name>mapred.mapper.new-api</name>
94
                    <value>true</value>
95
                </property>
96
                <property>
97
                    <name>mapred.reducer.new-api</name>
98
                    <value>true</value>
99
                </property>
100

  
101
                <!-- # Job-specific options -->
102
                <property>
103
                    <name>dfs.blocksize</name>
104
                    <value>32M</value>
105
                </property>
106
                <property>
107
                    <name>mapred.output.compress</name>
108
                    <value>false</value>
109
                </property>
110
                <property>
111
                    <name>mapred.reduce.tasks.speculative.execution</name>
112
                    <value>false</value>
113
                </property>
114
                <property>
115
                    <name>mapred.reduce.tasks.speculative.execution</name>
116
                    <value>false</value>
117
                </property>
118

  
119
                <property>
120
                    <name>mapreduce.map.speculative</name>
121
                    <value>false</value>
122
                </property>
123

  
124
                <!-- I/O FORMAT -->
125
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required
126
                    to fix issue with traling tab added between id and value in multiple outputs -->
127
                <property>
128
                    <name>mapred.textoutputformat.separator</name>
129
                    <value>${lod_delim}</value>
130
                </property>
131
                <!-- ## Names of all output ports -->
132

  
133
                <property>
134
                    <name>mapreduce.multipleoutputs</name>
135
                    <value>
136
                        ${out1} ${out2} ${out3} ${out4} ${out5} ${out6}
137
                    </value>
138

  
139
                </property>
140
                <property>
141
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.key</name>
142
                    <value>org.apache.hadoop.io.Text</value>
143
                </property>
144
                <property>
145
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.value</name>
146
                    <value>org.apache.hadoop.io.Text</value>
147
                </property>
148
                <property>
149
                    <name>mapreduce.multipleoutputs.namedOutput.${out1}.format</name>
150
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
151
                </property>
152

  
153
                <!-- result -->
154
                <property>
155
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.key</name>
156
                    <value>org.apache.hadoop.io.Text</value>
157
                </property>
158
                <property>
159
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.value</name>
160
                    <value>org.apache.hadoop.io.Text</value>
161
                </property>
162
                <property>
163
                    <name>mapreduce.multipleoutputs.namedOutput.${out2}.format</name>
164
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
165
                </property>
166

  
167

  
168
                <!-- person -->
169
                <property>
170
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.key</name>
171
                    <value>org.apache.hadoop.io.Text</value>
172
                </property>
173
                <property>
174
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.value</name>
175
                    <value>org.apache.hadoop.io.Text</value>
176
                </property>
177
                <property>
178
                    <name>mapreduce.multipleoutputs.namedOutput.${out3}.format</name>
179
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
180
                </property>
181

  
182

  
183
                <!-- project -->
184
                <property>
185
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.key</name>
186
                    <value>org.apache.hadoop.io.Text</value>
187
                </property>
188
                <property>
189
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.value</name>
190
                    <value>org.apache.hadoop.io.Text</value>
191
                </property>
192
                <property>
193
                    <name>mapreduce.multipleoutputs.namedOutput.${out4}.format</name>
194
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
195
                </property>
196

  
197

  
198

  
199
                <!-- datasource -->
200
                <property>
201
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.key</name>
202
                    <value>org.apache.hadoop.io.Text</value>
203
                </property>
204
                <property>
205
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.value</name>
206
                    <value>org.apache.hadoop.io.Text</value>
207
                </property>
208
                <property>
209
                    <name>mapreduce.multipleoutputs.namedOutput.${out5}.format</name>
210
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
211
                </property>
212

  
213

  
214
                <!-- organization -->
215
                <property>
216
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.key</name>
217
                    <value>org.apache.hadoop.io.Text</value>
218
                </property>
219
                <property>
220
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.value</name>
221
                    <value>org.apache.hadoop.io.Text</value>
222
                </property>
223
                <property>
224
                    <name>mapreduce.multipleoutputs.namedOutput.${out6}.format</name>
225
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
226
                </property>
227

  
228

  
229

  
230
                <!-- ## Classes of mapper and reducer -->
231

  
232
                <property>
233
                    <name>mapreduce.map.class</name>
234
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodMapper</value>
235
                </property>
236
                <property>
237
                    <name>mapreduce.reduce.class</name>
238
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.LodReducer</value>
239
                </property>
240
                <property>
241
                    <name>io.serializations</name>
242
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
243
                </property>
244

  
245
                <!-- ## Custom config -->
246

  
247
                <!--delim character used to seperate fields in hdfs dump files <property> -->
248
                <property>
249
                    <name>lod.delim</name>
250
                    <value>${lod_delim}</value>
251
                </property>
252

  
253
                <property>
254
                    <name>lod.enclosing</name>
255
                    <value>${lod_enclosing}</value>
256
                </property>
257

  
258

  
259
                <property>
260
                    <name>lod.seperator</name>
261
                    <value>${lod_seperator}</value>
262
                </property>
263

  
264

  
265
                <!--source hbase table -->
266
                <property>
267
                    <name>hbase.mapreduce.inputtable</name>
268
                    <value>${lod_hbase_table}</value>
269
                </property>
270
                <property>
271
                    <name>hbase.mapred.inputtable</name>
272
                    <value>${lod_hbase_table}</value>
273
                </property>
274

  
275
                <!-- This directory does not correspond to a data store. In fact, this
276
                    directory only contains multiple data stores. It has to be set to the name
277
                    of the workflow node. -->
278
                <property>
279
                    <name>mapred.output.dir</name>
280
                    <value>${lod_output}</value>
281
                </property>
282
                <property>
283
                    <name>index.conf</name>
284
                    <value>${lod_indexConf}</value>
285
                </property>
286

  
287
                <property>
288
                    <name>lod.lastExecutionDate</name>
289
                    <value>${lod_lastExecutionDate}</value>
290
                </property>
291

  
292
                <!-- ## Workflow node parameters -->
293
                <property>
294
                    <name>mapred.reduce.tasks</name>
295
                    <value>${numReducers}</value>
296
                </property>
297

  
298
            </configuration>
299

  
300
        </map-reduce>
301
        <ok to="end"/>
302

  
303
        <error to="fail"/>
304
    </action>
305

  
306

  
307
    <action name='clearGraph'>
308
        <java>
309
            <prepare>
310
            </prepare>
311
            <configuration>
312
                <property>
313
                    <name>mapred.job.queue.name</name>
314
                    <value>${queueName}</value>
315
                </property>
316
            </configuration>
317
            <main-class>eu.dnetlib.iis.core.workflows.lodexport.ClearGraph</main-class>
318
            <arg>${lod_conLine}</arg>
319
            <arg>${lod_username}</arg>
320
            <arg>${lod_password}</arg>
321
            <arg>${lod_minCpart}</arg>
322
            <arg>${lod_maxCpart}</arg>
323
            <arg>${lod_part}</arg>
324
            <arg>${lod_relationsGraph}</arg>
325
        </java>
326
        <ok to="rdf_entities_import"/>
327

  
328
        <error to="fail"/>
329
    </action>
330

  
331
    <action name="rdf_entities_import">
332

  
333
        <map-reduce>
334

  
335
            <prepare>
336
                <delete path="${nameNode}${lod_output}test"/>
337
            </prepare>
338

  
339
            <configuration>
340
                <property>
341
                    <name>hbase.security.authentication</name>
342
                    <value>simple</value>
343
                </property>
344

  
345
                <!-- ZOOKEEPER -->
346
                <property>
347
                    <name>hbase.zookeeper.quorum</name>
348
                    <value> namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
349
                    </value>
350
                </property>
351
                <property>
352
                    <name>zookeeper.znode.rootserver</name>
353
                    <value>root-region-server</value>
354
                </property>
355

  
356
                <!-- CSV PROPS GO HERE -->
357
                <property>
358
                    <name>hbase.zookeeper.property.clientPort</name>
359
                    <value>2181</value>
360
                </property>
361

  
362

  
363
                <!-- MR IO -->
364

  
365
                <property>
366
                    <name>mapred.input.dir</name>
367
                    <value>${lod_EntitiesInputFile}</value>
368
                </property>
369

  
370

  
371
                <property>
372
                    <name>mapreduce.inputformat.class</name>
373
                    <value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value>
374
                </property>
375

  
376
                <property>
377
                    <name>mapred.mapoutput.key.class</name>
378
                    <value>org.apache.hadoop.io.Text</value>
379
                </property>
380

  
381
                <property>
382
                    <name>mapred.mapoutput.value.class</name>
383
                    <value>org.apache.hadoop.io.Text</value>
384
                </property>
385

  
386
                <property>
387
                    <name>mapred.output.key.class</name>
388
                    <value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value>
389
                </property>
390

  
391
                <property>
392
                    <name>mapred.output.value.class</name>
393
                    <value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value>
394
                </property>
395

  
396
                <!-- ## This is required for new MapReduce API usage -->
397
                <property>
398
                    <name>mapred.mapper.new-api</name>
399
                    <value>true</value>
400
                </property>
401
                <property>
402
                    <name>mapred.reducer.new-api</name>
403
                    <value>true</value>
404
                </property>
405

  
406
                <!-- # Job-specific options -->
407
                <property>
408
                    <name>dfs.blocksize</name>
409
                    <value>32M</value>
410
                </property>
411
                <property>
412
                    <name>mapred.output.compress</name>
413
                    <value>false</value>
414
                </property>
415
                <property>
416
                    <name>mapred.reduce.tasks.speculative.execution</name>
417
                    <value>false</value>
418
                </property>
419
                <property>
420
                    <name>mapred.reduce.tasks.speculative.execution</name>
421
                    <value>false</value>
422
                </property>
423
                <property>
424
                    <name>mapreduce.map.speculative</name>
425
                    <value>false</value>
426
                </property>
427

  
428

  
429
                <property>
430
                    <name>map.output.key.field.separator</name>
431
                    <value>${lod_delim}</value>
432
                </property>
433

  
434

  
435
                <!-- ## Classes of mapper and reducer -->
436
                <property>
437
                    <name>mapreduce.map.class</name>
438
                    <value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value>
439
                </property>
440

  
441
                <property>
442
                    <name>mapreduce.reduce.class</name>
443

  
444
                    <value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value>
445
                </property>
446

  
447
                <property>
448

  
449
                    <name>io.serializations</name>
450
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
451
                </property>
452

  
453
                <!-- ## Custom config -->
454
                <!--delim character used to seperate fields in hdfs dump files <property> -->
455

  
456
                <property>
457
                    <name>lod.delim</name>
458
                    <value>${lod_delim}</value>
459
                </property>
460

  
461
                <property>
462
                    <name>lod.enclosing</name>
463
                    <value>${lod_enclosing}</value>
464
                </property>
465

  
466
                <property>
467
                    <name>lod.seperator</name>
468
                    <value>${lod_seperator}</value>
469
                </property>
470

  
471
                <property>
472
                    <name>lod.lastExecutionDate</name>
473
                    <value>${lod_lastExecutionDate}</value>
474
                </property>
475

  
476
                <property>
477
                    <name>lod.conLine</name>
478
                    <value>${lod_conLine}</value>
479
                </property>
480

  
481
                <property>
482
                    <name>lod.username</name>
483
                    <value>${lod_username}</value>
484
                </property>
485

  
486
                <property>
487
                    <name>lod.password</name>
488
                    <value>${lod_password}</value>
489
                </property>
490

  
491
                <property>
492
                    <name>lod.minCpart</name>
493
                    <value>${lod_minCpart}</value>
494
                </property>
495

  
496
                <property>
497
                    <name>lod.maxCpart</name>
498
                    <value>${lod_maxCpart}</value>
499
                </property>
500

  
501
                <property>
502
                    <name>lod.part</name>
503
                    <value>${lod_part}</value>
504
                </property>
505

  
506
                <property>
507
                    <name>lod.jsonRels</name>
508
                    <value>${lod_jsonRels}</value>
509
                </property>
510

  
511
                <property>
512
                    <name>lod.jsonEntities</name>
513
                    <value>${lod_jsonEntities}</value>
514
                </property>
515

  
516
                <property>
517
                    <name>lod.defaultGraph</name>
518
                    <value>${lod_defaultGraph}</value>
519
                </property>
520

  
521

  
522
                <property>
523
                    <name>lod.relationsGraph</name>
524
                    <value>${lod_relationsGraph}</value>
525
                </property>
526

  
527

  
528
                <property>
529
                    <name>lod.baseURI</name>
530
                    <value>${lod_baseURI}</value>
531
                </property>
532

  
533
                <property>
534
                    <name>mapred.reduce.tasks</name>
535
                    <value>${numReducers}</value>
536

  
537
                </property>
538

  
539
                <property>
540
                    <name>lod.inputFile</name>
541
                    <value>${lod_EntitiesInputFile}</value>
542
                </property>
543

  
544
                <property>
545
                    <name>mapred.output.dir</name>
546
                    <value>${lod_output}test</value>
547
                </property>
548

  
549
                <property>
550
                    <name>lod.entitiesPerQuery</name>
551
                    <value>${lod_entitiesPerQuery}</value>
552
                </property>
553
                <property>
554
                    <name>lod.relationsPerQuery</name>
555
                    <value>${lod_relationsPerQuery}</value>
556
                </property>
557

  
558
                <property>
559
                    <name>lod.dataPath</name>
560
                    <value>${lod_dataPath}</value>
561
                </property>
562

  
563

  
564
            </configuration>
565
        </map-reduce>
566

  
567
        <ok to="rdf_relations_import"/>
568

  
569
        <error to="fail"/>
570
    </action>
571

  
572

  
573
    <action name="rdf_relations_import">
574

  
575
        <map-reduce>
576

  
577
            <prepare>
578
                <delete path="${nameNode}${lod_output}test"/>
579
            </prepare>
580

  
581

  
582
            <configuration>
583

  
584
                <property>
585
                    <name>hbase.security.authentication</name>
586
                    <value>simple</value>
587
                </property>
588

  
589

  
590
                <!-- ZOOKEEPER -->
591
                <property>
592
                    <name>hbase.zookeeper.quorum</name>
593
                    <value>
594
                        quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu
595
                    </value>
596
                </property>
597
                <property>
598
                    <name>zookeeper.znode.rootserver</name>
599
                    <value>root-region-server</value>
600
                </property>
601

  
602
                <!-- CSV PROPS GO HERE -->
603
                <property>
604
                    <name>hbase.zookeeper.property.clientPort</name>
605
                    <value>2181</value>
606
                </property>
607

  
608

  
609
                <!-- MR IO -->
610

  
611
                <property>
612
                    <name>mapred.input.dir</name>
613
                    <value>${lod_RelationsInputFile}</value>
614
                </property>
615

  
616
                <property>
617
                    <name>mapreduce.inputformat.class</name>
618
                    <value>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</value>
619
                </property>
620

  
621
                <property>
622
                    <name>mapred.mapoutput.key.class</name>
623
                    <value>org.apache.hadoop.io.Text</value>
624
                </property>
625

  
626
                <property>
627
                    <name>mapred.mapoutput.value.class</name>
628
                    <value>org.apache.hadoop.io.Text</value>
629
                </property>
630

  
631
                <property>
632
                    <name>mapred.output.key.class</name>
633
                    <value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value>
634
                </property>
635

  
636
                <property>
637
                    <name>mapred.output.value.class</name>
638
                    <value>org.apache.hadoop.mapreduce.lib.output.NullOutputFormat</value>
639
                </property>
640

  
641
                <!-- ## This is required for new MapReduce API usage -->
642
                <property>
643
                    <name>mapred.mapper.new-api</name>
644
                    <value>true</value>
645
                </property>
646
                <property>
647
                    <name>mapred.reducer.new-api</name>
648
                    <value>true</value>
649
                </property>
650

  
651
                <!-- # Job-specific options -->
652
                <property>
653
                    <name>dfs.blocksize</name>
654
                    <value>32M</value>
655
                </property>
656
                <property>
657
                    <name>mapred.output.compress</name>
658
                    <value>false</value>
659
                </property>
660
                <property>
661
                    <name>mapred.reduce.tasks.speculative.execution</name>
662
                    <value>false</value>
663
                </property>
664
                <property>
665
                    <name>mapred.reduce.tasks.speculative.execution</name>
666
                    <value>false</value>
667
                </property>
668
                <property>
669
                    <name>mapreduce.map.speculative</name>
670
                    <value>false</value>
671
                </property>
672

  
673

  
674
                <property>
675
                    <name>map.output.key.field.separator</name>
676
                    <value>${lod_delim}</value>
677
                </property>
678

  
679

  
680
                <!-- ## Classes of mapper and reducer -->
681
                <property>
682
                    <name>mapreduce.map.class</name>
683
                    <value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportMapper</value>
684
                </property>
685

  
686
                <property>
687
                    <name>mapreduce.reduce.class</name>
688

  
689
                    <value>eu.dnetlib.data.mapreduce.hbase.lodImport.LodImportReducer</value>
690
                </property>
691

  
692
                <property>
693

  
694
                    <name>io.serializations</name>
695
                    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
696
                </property>
697

  
698
                <!-- ## Custom config -->
699
                <!--delim character used to seperate fields in hdfs dump files <property> -->
700

  
701
                <property>
702
                    <name>lod.delim</name>
703
                    <value>${lod_delim}</value>
704
                </property>
705

  
706
                <property>
707
                    <name>lod.enclosing</name>
708
                    <value>${lod_enclosing}</value>
709
                </property>
710

  
711
                <property>
712
                    <name>lod.seperator</name>
713
                    <value>${lod_seperator}</value>
714
                </property>
715

  
716
                <property>
717
                    <name>lod.lastExecutionDate</name>
718
                    <value>${lod_lastExecutionDate}</value>
719
                </property>
720

  
721
                <property>
722
                    <name>lod.conLine</name>
723
                    <value>${lod_conLine}</value>
724
                </property>
725

  
726
                <property>
727
                    <name>lod.username</name>
728
                    <value>${lod_username}</value>
729
                </property>
730

  
731
                <property>
732
                    <name>lod.password</name>
733
                    <value>${lod_password}</value>
734
                </property>
735

  
736
                <property>
737
                    <name>lod.minCpart</name>
738
                    <value>${lod_minCpart}</value>
739
                </property>
740

  
741
                <property>
742
                    <name>lod.maxCpart</name>
743
                    <value>${lod_maxCpart}</value>
744
                </property>
745

  
746
                <property>
747
                    <name>lod.part</name>
748
                    <value>${lod_part}</value>
749
                </property>
750

  
751
                <property>
752
                    <name>lod.jsonRels</name>
753
                    <value>${lod_jsonRels}</value>
754
                </property>
755

  
756
                <property>
757
                    <name>lod.jsonEntities</name>
758
                    <value>${lod_jsonEntities}</value>
759
                </property>
760

  
761
                <property>
762
                    <name>lod.defaultGraph</name>
763
                    <value>${lod_defaultGraph}</value>
764
                </property>
765

  
766

  
767
                <property>
768
                    <name>lod.relationsGraph</name>
769
                    <value>${lod_relationsGraph}</value>
770
                </property>
771

  
772
                <property>
773
                    <name>lod.baseURI</name>
774
                    <value>${lod_baseURI}</value>
775
                </property>
776

  
777
                <property>
778
                    <name>mapred.reduce.tasks</name>
779
                    <value>${numReducers}</value>
780
                </property>
781

  
782
                <property>
783
                    <name>lod.inputFile</name>
784
                    <value>${lod_RelationsInputFile}</value>
785
                </property>
786

  
787

  
788
                <property>
789
                    <name>mapred.output.dir</name>
790
                    <value>${lod_output}test</value>
791
                </property>
792

  
793
                <property>
794
                    <name>lod.entitiesPerQuery</name>
795
                    <value>${lod_entitiesPerQuery}</value>
796
                </property>
797

  
798
                <property>
799
                    <name>lod.relationsPerQuery</name>
800
                    <value>${lod_relationsPerQuery}</value>
801
                </property>
802
                <property>
803
                    <name>lod.dataPath</name>
804
                    <value>${lod_dataPath}</value>
805
                </property>
806

  
807

  
808
            </configuration>
809
        </map-reduce>
810

  
811
        <ok to="finalize"/>
812

  
813
        <error to="fail"/>
814
    </action>
815

  
816

  
817
    <action name='finalize'>
818
        <java>
819
            <prepare>
820
            </prepare>
821

  
822
            <configuration>
823
                <property>
824
                    <name>mapred.job.queue.name</name>
825
                    <value>${queueName}</value>
826
                </property>
827
            </configuration>
828

  
829
            <main-class>eu.dnetlib.iis.core.workflows.lodexport.Finalize</main-class>
830

  
831
            <arg>${lod_conLine}</arg>
832
            <arg>${lod_username}</arg>
833
            <arg>${lod_password}</arg>
834
            <arg>${lod_minCpart}</arg>
835
            <arg>${lod_maxCpart}</arg>
836
            <arg>${lod_part}</arg>
837
            <arg>${lod_relationsGraph}</arg>
838
        </java>
839
        <ok to="end"/>
840

  
841
        <error to="fail"/>
842
    </action>
843

  
844
    <action name="cleanUpHDFS">
845
        <fs>
846
            <delete path="${lod_output}test"/>
847
        </fs>
848

  
849
        <ok to="end"/>
850
        <error to="fail"/>
851
    </action>
852

  
853

  
854
    <kill name="fail">
855
        <message>
856
            Unfortunately, the process failed -- error message:
857
            [${wf:errorMessage(wf:lastErrorNode())}]
858
        </message>
859
    </kill>
860
    <end name="end"/>
861
</workflow-app>
modules/dnet-openaire-lodexport-wf/src/main/resources/oozie-log4j.properties
1
## The main job of this file is to make the Oozie tests log messages
2
## less verbose, but you can change this behavior any way you want
3
## by editing this file appropriately.
4

  
5
 
6
log4j.rootLogger=DEBUG,oozie
7
  
8
log4j.appender.oozie=org.apache.log4j.RollingFileAppender
9
log4j.appender.oozie.Target=System.out
10
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
11
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
12
log4j.appender.oozie.DatePattern='.'yyyy-MM-dd-HH
13

  
14
log4j.appender.none=org.apache.log4j.varia.NullAppender
15

  
16
# Direct log messages to a log file
17
log4j.appender.oozie=org.apache.log4j.RollingFileAppender
18
log4j.appender.oozie.File=/tmp/statsExportLog
19
log4j.appender.oozie.MaxFileSize=1MB
20
log4j.appender.oozie.MaxBackupIndex=1
21
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
22
log4j.appender.oozie.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
23
 
24
#log4j.logger.oozieops=OFF, none
25
#log4j.logger.oozieinstrumentation=OFF, none
26
#log4j.logger.oozieaudit=OFF, none
27

  
28
log4j.logger.org.apache.hadoop=oozie,DEBUG, test
29
log4j.logger.eu.dnetlib.iis=DEBUG,oozie
modules/dnet-openaire-lodexport-wf/core/src/test/resources/test-custom-log4j.properties
1
#
2
# Licensed to the Apache Software Foundation (ASF) under one
3
# or more contributor license agreements.  See the NOTICE file
4
# distributed with this work for additional information
5
# regarding copyright ownership.  The ASF licenses this file
6
# to you under the Apache License, Version 2.0 (the
7
# "License"); you may not use this file except in compliance
8
# with the License.  You may obtain a copy of the License at
9
# 
10
#      http://www.apache.org/licenses/LICENSE-2.0
11
# 
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
#
18

  
19
#    http://www.apache.org/licenses/LICENSE-2.0
20
#
21
# Unless required by applicable law or agreed to in writing, software
22
# distributed under the License is distributed on an "AS IS" BASIS,
23
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24
# See the License for the specific language governing permissions and
25
# limitations under the License. See accompanying LICENSE file.
26

  
27
#
28

  
29
log4j.appender.oozie=org.apache.log4j.ConsoleAppender
30
log4j.appender.oozie.Target=System.out
31
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
32
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
33

  
34
log4j.appender.null=org.apache.log4j.varia.NullAppender
35

  
36
log4j.logger.org.apache=INFO, oozie
37
log4j.logger.org.mortbay=WARN, oozie
38
log4j.logger.org.hsqldb=WARN, oozie
39

  
40
log4j.logger.opslog=NONE, null
41
log4j.logger.applog=NONE, null
42
log4j.logger.instrument=NONE, null
43

  
44
log4j.logger.a=NONE, null
45

  
modules/dnet-openaire-lodexport-wf/core/src/test/resources/hsqldb-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
    <property>
19
        <name>oozie.service.JPAService.jdbc.driver</name>
20
        <value>org.hsqldb.jdbcDriver</value>
21
    </property>
22
    <property>
23
        <name>oozie.service.JPAService.jdbc.url</name>
24
        <value>jdbc:hsqldb:mem:oozie-db;create=true</value>
25
    </property>
26
</configuration>
modules/dnet-openaire-lodexport-wf/core/src/test/resources/mysql-oozie-site.xml
1
<?xml version="1.0"?>
2
<!--
3
  Licensed to the Apache Software Foundation (ASF) under one
4
  or more contributor license agreements.  See the NOTICE file
5
  distributed with this work for additional information
6
  regarding copyright ownership.  The ASF licenses this file
7
  to you under the Apache License, Version 2.0 (the
8
  "License"); you may not use this file except in compliance
9
  with the License.  You may obtain a copy of the License at
10

  
11
       http://www.apache.org/licenses/LICENSE-2.0
12

  
13
  Unless required by applicable law or agreed to in writing, software
14
  distributed under the License is distributed on an "AS IS" BASIS,
15
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
  See the License for the specific language governing permissions and
17
  limitations under the License.
18
-->
19
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
20
<configuration>
21
    <property>
22
      <name>oozie.service.JPAService.jdbc.driver</name>
23
        <value>com.mysql.jdbc.Driver</value>
24
        <description>JDBC driver class.</description>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.port</name>
28
        <value>3306</value>
29
    </property>
30
    <property>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff