Revision 44307
Added by Eri Katsari about 8 years ago
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/test/java/PreprocessingTest.java | ||
---|---|---|
1 |
/* |
|
1 | 2 |
import eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockReducer; |
2 | 3 |
import eu.dnetlib.data.mapreduce.hbase.lodExport.build.SourceBuildMapper; |
3 | 4 |
import eu.dnetlib.data.mapreduce.hbase.lodExport.build.TargetBuildMapper; |
... | ... | |
19 | 20 |
import java.util.List; |
20 | 21 |
import java.util.Scanner; |
21 | 22 |
|
23 |
*/ |
|
22 | 24 |
/** |
23 | 25 |
* Created by eri_k on 8/24/2016. |
24 |
*/ |
|
26 |
*//* |
|
27 |
|
|
25 | 28 |
public class PreprocessingTest { |
26 | 29 |
|
27 | 30 |
MapDriver<LongWritable, Text, Text, Text> mapDriver; |
... | ... | |
54 | 57 |
} |
55 | 58 |
mapDriver.withInput(new Text("1"), input); |
56 | 59 |
|
57 |
/* mapDriver.withConfiguration(configuration) |
|
60 |
*/ |
|
61 |
/* mapDriver.withConfiguration(configuration) |
|
58 | 62 |
.withInput(new LongWritable(1), |
59 | 63 |
new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n")) |
60 | 64 |
.withInput(new LongWritable(2), |
61 | 65 |
new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\" .\n")); |
62 |
*/ |
|
66 |
*//*
|
|
63 | 67 |
|
64 |
/* mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), |
|
68 |
|
|
69 |
*/ |
|
70 |
/* mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"), |
|
65 | 71 |
new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil MaxEPA enhances pancreatic carcinogenesis in azaserine treated rats.\",")); |
66 |
*/ |
|
72 |
*//* |
|
73 |
|
|
67 | 74 |
mapDriver.runTest(); |
68 | 75 |
} |
69 | 76 |
|
... | ... | |
91 | 98 |
|
92 | 99 |
@Test |
93 | 100 |
public void testMapReduce() { |
94 |
/*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6")); |
|
101 |
*/ |
|
102 |
/*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6")); |
|
95 | 103 |
mapReduceDriver.withOutput(new Text("6"), new IntWritable(2)); |
96 |
mapReduceDriver.runTest();*/ |
|
104 |
mapReduceDriver.runTest();*//*
|
|
97 | 105 |
|
106 |
|
|
98 | 107 |
} |
99 | 108 |
} |
100 | 109 |
|
110 |
*/ |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/BlockReducer.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.lodExport.build; |
2 | 2 |
|
3 |
import java.io.IOException; |
|
4 |
import java.nio.charset.Charset; |
|
5 |
import java.util.Iterator; |
|
6 |
|
|
3 |
import com.google.common.collect.Iterables; |
|
4 |
import org.apache.hadoop.io.ArrayWritable; |
|
7 | 5 |
import org.apache.hadoop.io.Text; |
8 | 6 |
import org.apache.hadoop.mapreduce.Reducer; |
9 | 7 |
import org.apache.hadoop.mapreduce.TaskInputOutputContext; |
10 | 8 |
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; |
11 | 9 |
import org.apache.log4j.Logger; |
12 | 10 |
|
13 |
public class BlockReducer extends Reducer<Text, Text, Text, Text> { |
|
11 |
import java.io.IOException; |
|
12 |
import java.lang.reflect.Array; |
|
13 |
import java.nio.charset.Charset; |
|
14 |
import java.util.ArrayList; |
|
15 |
import java.util.Arrays; |
|
16 |
import java.util.Iterator; |
|
17 |
import java.util.List;; |
|
18 |
|
|
19 |
public class BlockReducer extends Reducer<Text, Text, Text, BlockReducer.TextArrayWritable> { |
|
14 | 20 |
private static final String SEPERATOR = ","; |
15 | 21 |
|
16 | 22 |
public static enum BLOCKS_COUNTER { |
17 |
WRITTEN_RECORD_IDS,
|
|
18 |
DISCARDED_RECORD_IDS
|
|
23 |
WRITTEN_BLOCKS,
|
|
24 |
DISCARDED_BLOCKS
|
|
19 | 25 |
} |
20 | 26 |
|
21 | 27 |
private Logger log = Logger.getLogger(BlockReducer.class); |
... | ... | |
29 | 35 |
} |
30 | 36 |
|
31 | 37 |
|
38 |
public static class TextArrayWritable extends ArrayWritable { |
|
39 |
|
|
40 |
public TextArrayWritable(Text[] values) { |
|
41 |
super(Text.class, values); |
|
42 |
} |
|
43 |
|
|
44 |
@Override |
|
45 |
public Text[] get() { |
|
46 |
return (Text[]) super.get(); |
|
47 |
} |
|
48 |
|
|
49 |
@Override |
|
50 |
public String toString() { |
|
51 |
Text[] values = get(); |
|
52 |
return Arrays.toString(values); |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
|
|
32 | 57 |
@Override |
33 | 58 |
protected void reduce(final Text key, final Iterable<Text> values, final Context context) throws IOException, InterruptedException { |
34 | 59 |
Iterator<Text> it = values.iterator(); |
60 |
List<String> valuesList = new ArrayList<>(); |
|
35 | 61 |
try { |
36 | 62 |
//each list is a block |
37 |
StringBuilder field = new StringBuilder(); |
|
63 |
// StringBuilder field = new StringBuilder();
|
|
38 | 64 |
int nunberOfEntities = 0; |
39 | 65 |
|
66 |
boolean hasSource = false; |
|
67 |
boolean hasTarget = false; |
|
68 |
|
|
40 | 69 |
while (it.hasNext()) { |
41 |
field.append(it.next().toString()).append(SEPERATOR); |
|
70 |
String val = it.next().toString(); |
|
71 |
if (val.contains("source_")) { |
|
72 |
hasSource = true; |
|
73 |
} else if (val.contains("target_")) { |
|
74 |
hasTarget = true; |
|
75 |
} |
|
76 |
valuesList.add(val + SEPERATOR); |
|
42 | 77 |
nunberOfEntities++; |
43 | 78 |
} |
79 |
// field.append(val).append(SEPERATOR); |
|
44 | 80 |
|
45 |
if (nunberOfEntities > 1) { |
|
46 |
MultipleOutputWriter.write("b", key, new Text(field.toString()), "blocks/b"); |
|
81 |
if (nunberOfEntities > 1 && hasSource && hasTarget) { |
|
82 |
|
|
83 |
MultipleOutputWriter.write("b", key, valuesList, "blocks/b"); |
|
47 | 84 |
MultipleOutputWriter.write("entitiesNumber", key, new Text(String.valueOf(nunberOfEntities).getBytes(Charset.forName("UTF-8"))), "stats/entitiesNumber"); |
48 |
context.getCounter(BLOCKS_COUNTER.WRITTEN_RECORD_IDS).increment(1);
|
|
85 |
context.getCounter(BLOCKS_COUNTER.WRITTEN_BLOCKS).increment(1);
|
|
49 | 86 |
} else { |
50 |
context.getCounter(BLOCKS_COUNTER.DISCARDED_RECORD_IDS).increment(1);
|
|
87 |
context.getCounter(BLOCKS_COUNTER.DISCARDED_BLOCKS).increment(1);
|
|
51 | 88 |
} |
52 |
} catch (Exception e) { |
|
89 |
} catch ( |
|
90 |
Exception e) |
|
91 |
|
|
92 |
{ |
|
53 | 93 |
throw new InterruptedException(e.getMessage()); |
54 | 94 |
} |
55 | 95 |
|
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/SourceBuildMapper.java | ||
---|---|---|
61 | 61 |
|
62 | 62 |
@Override |
63 | 63 |
protected void map(final LongWritable keyIn, final Text result, final Context context) throws IOException { |
64 |
|
|
65 | 64 |
try { |
66 | 65 |
//get ID |
67 | 66 |
StringBuilder id = new StringBuilder(); |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/StreamingTextOutputFormat.java | ||
---|---|---|
19 | 19 |
import org.apache.hadoop.mapred.TextOutputFormat; |
20 | 20 |
import org.apache.hadoop.util.Progressable; |
21 | 21 |
import org.apache.hadoop.util.ReflectionUtils; |
22 |
import org.apache.log4j.Logger; |
|
22 | 23 |
|
23 | 24 |
public class StreamingTextOutputFormat<K, V> extends TextOutputFormat<K, V> { |
24 |
protected static class StreamingLineRecordWriter<K, V> implements |
|
25 |
RecordWriter<K, V> { |
|
25 |
protected static class StreamingLineRecordWriter<K, V> implements RecordWriter<K, V> { |
|
26 | 26 |
private static final String utf8 = "UTF-8"; |
27 | 27 |
private static final byte[] newline; |
28 |
private Logger log = Logger.getLogger(this.getClass()); |
|
28 | 29 |
|
29 | 30 |
static { |
30 | 31 |
try { |
31 | 32 |
newline = "\n".getBytes(utf8); |
32 | 33 |
} catch (UnsupportedEncodingException uee) { |
33 |
throw new IllegalArgumentException("can't find " + utf8 |
|
34 |
+ " encoding"); |
|
34 |
throw new IllegalArgumentException("can't find " + utf8 + " encoding"); |
|
35 | 35 |
} |
36 | 36 |
} |
37 | 37 |
|
38 | 38 |
protected DataOutputStream out; |
39 | 39 |
private final byte[] keyValueSeparator; |
40 |
private final byte[] valueDelimiter; |
|
40 |
private final byte[] valueDelimiter = ",".getBytes(utf8);
|
|
41 | 41 |
private boolean dataWritten = false; |
42 | 42 |
|
43 |
public StreamingLineRecordWriter(DataOutputStream out, String keyValueSeparator, String valueDelimiter) { |
|
43 |
public StreamingLineRecordWriter(DataOutputStream out, String keyValueSeparator, String valueDelimiter) throws UnsupportedEncodingException {
|
|
44 | 44 |
this.out = out; |
45 | 45 |
try { |
46 | 46 |
this.keyValueSeparator = keyValueSeparator.getBytes(utf8); |
47 |
this.valueDelimiter = valueDelimiter.getBytes(utf8);
|
|
47 |
//valueDelimiter.getBytes(utf8);
|
|
48 | 48 |
} catch (UnsupportedEncodingException uee) { |
49 | 49 |
throw new IllegalArgumentException("can't find " + utf8 + " encoding"); |
50 | 50 |
} |
51 | 51 |
} |
52 | 52 |
|
53 |
public StreamingLineRecordWriter(DataOutputStream out) { |
|
53 |
public StreamingLineRecordWriter(DataOutputStream out) throws UnsupportedEncodingException {
|
|
54 | 54 |
this(out, "\t", ","); |
55 | 55 |
} |
56 | 56 |
|
... | ... | |
64 | 64 |
if (o instanceof Text) { |
65 | 65 |
Text to = (Text) o; |
66 | 66 |
out.write(to.getBytes(), 0, to.getLength()); |
67 |
log.info("writing out first value"); |
|
67 | 68 |
} else { |
68 | 69 |
out.write(o.toString().getBytes(utf8)); |
69 | 70 |
} |
... | ... | |
76 | 77 |
if (nullKey && nullValue) { |
77 | 78 |
return; |
78 | 79 |
} |
80 |
log.info("RECEIVED KEY " + key); |
|
81 |
log.info("RECEIVED VALUE " + value); |
|
79 | 82 |
|
80 | 83 |
if (!nullKey) { |
81 | 84 |
// if we've written data before, append a new line |
82 | 85 |
if (dataWritten) { |
83 | 86 |
out.write(newline); |
87 |
log.info("datawrittern :writting new line" + key); |
|
84 | 88 |
} |
85 | 89 |
|
86 | 90 |
// write out the key and separator |
91 |
log.info("written key" + key); |
|
87 | 92 |
writeObject(key); |
88 | 93 |
out.write(keyValueSeparator); |
89 | 94 |
} else if (!nullValue) { |
95 |
log.info("null key not value : writign out" + valueDelimiter + value); |
|
90 | 96 |
// write out the value delimiter |
91 | 97 |
out.write(valueDelimiter); |
98 |
// write out the value |
|
99 |
writeObject(value); |
|
92 | 100 |
} |
93 | 101 |
|
94 |
// write out the value |
|
95 |
writeObject(value); |
|
96 |
|
|
97 | 102 |
// track that we've written some data |
98 | 103 |
dataWritten = true; |
99 | 104 |
} |
... | ... | |
112 | 117 |
public RecordWriter<K, V> getRecordWriter(FileSystem fileSystem, JobConf job, String name, Progressable progress) throws IOException { |
113 | 118 |
boolean isCompressed = getCompressOutput(job); |
114 | 119 |
String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t"); |
115 |
String valueDelimiter = job.get("mapred.textoutputformat.delimiter", ",");
|
|
120 |
String valueDelimiter = ",";
|
|
116 | 121 |
if (!isCompressed) { |
117 | 122 |
Path file = FileOutputFormat.getTaskOutputPath(job, name); |
118 | 123 |
FileSystem fs = file.getFileSystem(job); |
119 | 124 |
FSDataOutputStream fileOut = fs.create(file, progress); |
120 |
return new StreamingLineRecordWriter<K, V>(fileOut, |
|
121 |
keyValueSeparator, valueDelimiter); |
|
125 |
return new StreamingLineRecordWriter<K, V>(fileOut, keyValueSeparator, valueDelimiter); |
|
122 | 126 |
} else { |
123 | 127 |
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass( |
124 | 128 |
job, GzipCodec.class); |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/BlockStreamingReducer.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.lodExport.build; |
2 | 2 |
|
3 |
import com.lambdaworks.redis.RedisClient; |
|
4 |
import com.lambdaworks.redis.RedisConnection; |
|
3 |
import com.lambdaworks.com.google.common.collect.Iterables; |
|
5 | 4 |
import org.apache.hadoop.fs.FileSystem; |
6 | 5 |
import org.apache.hadoop.fs.Path; |
7 | 6 |
import org.apache.hadoop.io.Text; |
8 | 7 |
import org.apache.hadoop.mapreduce.Reducer; |
9 |
import org.apache.hadoop.mapreduce.TaskInputOutputContext; |
|
10 |
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; |
|
11 | 8 |
import org.apache.log4j.Logger; |
12 | 9 |
|
13 | 10 |
import java.io.BufferedWriter; |
... | ... | |
15 | 12 |
import java.io.InterruptedIOException; |
16 | 13 |
import java.io.OutputStream; |
17 | 14 |
import java.io.OutputStreamWriter; |
18 |
import java.nio.charset.Charset; |
|
19 |
import java.util.HashMap; |
|
20 |
import java.util.Map; |
|
21 | 15 |
import java.util.UUID; |
22 | 16 |
|
17 |
import static com.lambdaworks.com.google.common.collect.Iterables.toArray; |
|
18 |
|
|
23 | 19 |
public class BlockStreamingReducer extends Reducer<Text, Text, Text, Text> { |
24 | 20 |
FileSystem hdfs; |
25 | 21 |
OutputStream os; |
... | ... | |
44 | 40 |
IOException, InterruptedException { |
45 | 41 |
int entitiesNumber = 0; |
46 | 42 |
boolean firstKey = true; |
43 |
//each list is a block |
|
47 | 44 |
for (Text value : values) { |
48 |
context.write(firstKey ? key : null, value); |
|
45 |
if (firstKey) { |
|
46 |
context.write(key, value); |
|
47 |
|
|
48 |
} else { |
|
49 |
context.write(null, value); |
|
50 |
} |
|
51 |
|
|
49 | 52 |
firstKey = false; |
50 | 53 |
entitiesNumber++; |
54 |
|
|
51 | 55 |
} |
52 |
//each list is a block
|
|
56 |
//append to output
|
|
53 | 57 |
try { |
54 |
writeStats(key.toString(), String.valueOf(entitiesNumber)); |
|
58 |
// writeStats(key.toString(), String.valueOf(entitiesNumber));
|
|
55 | 59 |
} catch (Exception e) { |
56 | 60 |
log.error("Cannot write to redis! Error :" + e.toString()); |
57 | 61 |
throw new InterruptedIOException(e.toString()); |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/linkage/LinkMapper.java | ||
---|---|---|
50 | 50 |
protected void map(final Text keyIn, final Text result, final Context context) throws IOException { |
51 | 51 |
try { |
52 | 52 |
|
53 |
log.info("KEY" + keyIn.toString()); |
|
54 |
log.info("VALUE" + result.toString()); |
|
55 |
|
|
56 |
//purge blocks with number of records > optimal |
|
53 | 57 |
int recordsNumber = countRecords(result.toString(), SEPERATOR); |
54 |
//purge blocks with number of records > optimal |
|
58 |
//purge blocks that contain only source or target entities |
|
59 |
boolean hasBothSouceAndTarget = result.toString().contains("source_") && result.toString().contains("target_"); |
|
60 |
//how many comparisons we have purged |
|
55 | 61 |
context.getCounter(TEST_COUNTERS.TOTAL_COMPARISONS).increment(recordsNumber * recordsNumber); |
56 |
if (recordsNumber == 1 || recordsNumber >= optimalBlockSize) { |
|
62 |
|
|
63 |
if (recordsNumber == 1 || recordsNumber >= optimalBlockSize || !hasBothSouceAndTarget) { |
|
57 | 64 |
context.getCounter(TEST_COUNTERS.DISCARDED_BLOCKS).increment(1); |
58 | 65 |
context.getCounter(TEST_COUNTERS.DISCARDED_COMPARISONS).increment(recordsNumber * recordsNumber); |
59 | 66 |
} else { |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/linkage/LimesReducer.java | ||
---|---|---|
61 | 61 |
connection = client.connect(); |
62 | 62 |
} |
63 | 63 |
|
64 |
|
|
65 | 64 |
@Override |
66 | 65 |
protected void reduce(final Text key, final Iterable<Text> values, final Context context) throws IOException, InterruptedException { |
67 | 66 |
|
... | ... | |
122 | 121 |
ExecutionEngine engine = ExecutionEngineFactory.getEngine("Default", sourceCache, targetCache, |
123 | 122 |
config.getSourceInfo().getVar(), config.getTargetInfo().getVar()); |
124 | 123 |
|
125 |
Mapping verificationMapping = engine.execute(plan); //mappings for verification
|
|
126 |
|
|
127 |
Mapping acceptanceMapping = verificationMapping.getSubMap(config.getAcceptanceThreshold()); //mappings for acceptance (auta theloume) |
|
128 |
//output |
|
129 |
|
|
130 |
for (String source : acceptanceMapping.getMap().keySet()) {//gia kathe source blepoume ta targets
|
|
131 |
for (String target : acceptanceMapping.getMap().get(source).keySet()) {//gia kathe target blepoume to confidence
|
|
132 |
context.write(new Text(source.replace("source_", source)), new Text(target.replace("_target", "") + "," + acceptanceMapping.getConfidence(source, target)));
|
|
133 |
context.getCounter(LIMES_COUNTERS.WRITTEN_OUT_ENTITIES).increment(1);
|
|
124 |
if (sourceCache.size() > 0 && targetCache.size() > 0) {
|
|
125 |
Mapping verificationMapping = engine.execute(plan); //mappings for verification |
|
126 |
Mapping acceptanceMapping = verificationMapping.getSubMap(config.getAcceptanceThreshold()); //mappings for acceptance (auta theloume)
|
|
127 |
//output
|
|
128 |
for (String source : acceptanceMapping.getMap().keySet()) {//gia kathe source blepoume ta targets |
|
129 |
for (String target : acceptanceMapping.getMap().get(source).keySet()) {//gia kathe target blepoume to confidence
|
|
130 |
context.write(new Text(source.replace("source_", source)), new Text(target.replace("_target", "") + "," + acceptanceMapping.getConfidence(source, target)));
|
|
131 |
context.getCounter(LIMES_COUNTERS.WRITTEN_OUT_ENTITIES).increment(1);
|
|
132 |
}
|
|
134 | 133 |
} |
135 | 134 |
} |
136 |
|
|
137 | 135 |
} catch (Exception e) { |
138 | 136 |
log.error(e); |
139 | 137 |
throw new InterruptedException(e.toString()); |
modules/dnet-openaire-lod-interlinking-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.eri.properties | ||
---|---|---|
12 | 12 |
lod_enclosing=' |
13 | 13 |
lod_entitiesPerQuery=10 |
14 | 14 |
lod_hbase_table=db_openaireplus_services |
15 |
lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
|
|
15 |
lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fieldMap = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fieldMap = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fieldMap = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fieldMap = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
|
|
16 | 16 |
lod_jsonEntities={ "result": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.eurocris.org/ontologies/cerif/1.3#name", "6": "http://purl.org/dc/terms/dateAccepted", "7": "http://purl.org/dc/terms/publisher", "8": "http://purl.org/dc/terms/identifier", "9": "http://purl.org/dc/terms/language", "10": "http://purl.org/dc/terms/date", "11": "http://lod.openaire.eu/vocab/resultSubject", "12": "http://lod.openaire.eu/vocab/externalReference", "13": "http://purl.org/dc/terms/source", "14": "http://purl.org/dc/terms/format", "15": "http://lod.openaire.eu/vocab/context", "16": "http://dbpedia.org/ontology/country", "17": "http://purl.org/dc/terms/accessRights", "18": "http://purl.org/dc/terms/description", "19": "http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name", "20": "http://lod.openaire.eu/vocab/dataSourceType", "21": "http://lod.openaire.eu/vocab/device", "22": "http://lod.openaire.eu/vocab/size", "23": "http://lod.openaire.eu/vocab/version", "24": "http://lod.openaire.eu/vocab/lastMetadataUpdate", "25": "http://lod.openaire.eu/vocab/metadataVersion", "26": "http://lod.openaire.eu/vocab/resultType", "27": "http://lod.openaire.eu/vocab/year", "28": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity" }], "person": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier","5": "http://xmlns.com/foaf/0.1/firstName", "6": "http://xmlns.com/foaf/spec/lastName", "7": "http://xmlns.com/foaf/0.1/name", "8": "http://schema.org/faxNumber", "9": "http://xmlns.com/foaf/0.1/mbox", "10": "http://xmlns.com/foaf/0.1/phone", "11": "http://schema.org/nationality", "12": "http://purl.org/dc/terms/identifier", "13": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Person" }], "datasource": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/datasourceType", "6": "http://lod.openaire.eu/vocab/openAIRECompatibility", "7": "http://dbpedia.org/ontology/officialName", "8": "http://lod.openaire.eu/vocab/englishName", "9": "http://schema.org/url", "10": "http://xmlns.com/foaf/0.1/logo", "11": "http://xmlns.com/foaf/0.1/mbox", "12": "http://purl.org/vocab/vann/preferredNamespacePrefix", "13": "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "14": "http://www.w3.org/2003/01/geo/wgs84_pos#long", "15": "http://lod.openaire.eu/vocab/dateOfValidity", "16": "http://purl.org/dc/terms/description", "17": "http://lod.openaire.eu/vocab/subjectList", "18": "http://lod.openaire.eu/numberOfItems", "19": "http://purl.org/dc/terms/date", "20": "http://lod.openaire.eu/vocab/policies", "21": "http://lod.openaire.eu/vocab/languages", "22": "http://lod.openaire.eu/vocab/contentType", "23": "http://lod.openaire.eu/vocab/accessInfoPackage", "24": "http://lod.openaire.eu/vocab/releaseStartDate", "25": "http://lod.openaire.eu/vocab/releaseEndDate", "26": "http://lod.openaire.eu/vocab/missionStatementUrl", "27": "http://www.europeana.eu/schemas/edm/dataProvider", "28": "http://lod.openaire.eu/vocab/serviceProvider", "29": "http://lod.openaire.eu/vocab/databaseAccessType", "30": "http://lod.openaire.eu/vocab/dataUploadType", "31": "http://lod.openaire.eu/vocab/dataUploadRestrictions", "32": "http://lod.openaire.eu/vocab/versioning", "33": "http://lod.openaire.eu/vocab/citationGuidelineUrl", "34": "http://lod.openaire.eu/vocab/qualityManagementKind", "35": "http://lod.openaire.eu/vocab/pidSystems", "36": "http://lod.openaire.eu/vocab/certificates", "37": "http://purl.org/dc/terms/accessRights", "38": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.w3.org/ns/prov#Entity" }], "organization": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.w3.org/2004/02/skos/core#altLabel", "6": "http://www.w3.org/2004/02/skos/core#prefLabel", "7": "http://lod.openaire.eu/vocab/webSiteUrl", "8": "http://xmlns.com/foaf/0.1/logo", "9": "http://dbpedia.org/ontology/country", "10": "http://lod.openaire.eu/vocab/entityType", "11": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Organization" }], "project": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/projectCode", "6": "http://schema.org/url", "7": "http://www.eurocris.org/ontologies/cerif/1.3#acronym", "8": "http://www.eurocris.org/ontologies/cerif/1.3#name", "9": "http://www.eurocris.org/ontologies/cerif/1.3#startDate", "10": "http://www.eurocris.org/ontologies/cerif/1.3#endDate", "11": "http://purl.org/cerif/frapo/hasCallIdentifier", "12": "http://www.eurocris.org/ontologies/cerif/1.3#keyword", "13": "http://www.w3.org/2006/time#hasDurationDescription", "14": "http://lod.openaire.eu/vocab/ec_SC39", "15": "http://lod.openaire.eu/vocab/contractType", "16": "http://lod.openaire.eu/vocab/oaMandatePublications", "17": "http://lod.openaire.eu/vocab/projectSubjects", "18": "http://od.openaire.eu/vocab/ec_article29-3", "19": "http://lod.openaire.eu/vocab/funder", "20": "http://lod.openaire.eu/vocab/fundingLevel0", "21": "http://lod.openaire.eu/vocab/fundingLevel1", "22": "http://lod.openaire.eu/vocab/fundingLevel2", "23": "http://lod.openaire.eu/vocab/fundingLevel3", "24": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#Project" }] } |
17 | 17 |
lod_jsonRels={ "resultResult": [{ "property": "http://lod.openaire.eu/vocab/resultResult", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultProject": [{ "property": "http://lod.openaire.eu/vocab/resultProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personResult": [{ "property": "http://lod.openaire.eu/vocab/personResult", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personProject": [{ "property": "http://lod.openaire.eu/vocab/personProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personPerson": [{ "property": "http://lod.openaire.eu/vocab/personPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "datasourceOrganization": [{ "property": "http://lod.openaire.eu/vocab/datasourceOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectOrganization": [{ "property": "http://lod.openaire.eu/vocab/projectOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "organizationOrganization": [{ "property": "http://lod.openaire.eu/vocab/organizationOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectPerson": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "dedup": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }] } |
18 | 18 |
lod_lastExecutionDate=2015-05-26 |
modules/dnet-openaire-lod-interlinking-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.properties | ||
---|---|---|
45 | 45 |
groundTruthPath = /tmp/lodfinal/groundTruth |
46 | 46 |
linkageOutputPath = /tmp/lodfinal/source |
47 | 47 |
statsOutputPath=/tmp/lod_blocks/stats/ |
48 |
lod_sourceMappings={"result":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openair |
|
49 |
e.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.eurocris.org/ontologies/cerif/1.3#name","http://purl.org/dc/terms/dateAccepted","http://purl.org/dc/terms/publ |
|
50 |
isher","http://purl.org/dc/terms/identifier","http://purl.org/dc/terms/language","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/resultSubject","http://lod.openaire.eu/vocab/e |
|
51 |
xternalReference","http://purl.org/dc/terms/source","http://purl.org/dc/terms/format","http://lod.openaire.eu/vocab/context","http://dbpedia.org/ontology/country","http://purl.org/dc/terms/ |
|
52 |
accessRights","http://purl.org/dc/terms/description","http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name","http://lod.openaire.eu/vocab/dataSourceType","http://lod.openaire.eu/vocab/ |
|
53 |
device","http://lod.openaire.eu/vocab/size","http://lod.openaire.eu/vocab/version","http://lod.openaire.eu/vocab/lastMetadataUpdate","http://lod.openaire.eu/vocab/metadataVersion","http://l |
|
54 |
od.openaire.eu/vocab/year","http://lod.openaire.eu/vocab/resultType"],"project": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/projectCode","http://schema.org/url","http://www.eurocris.org/ontologies/cerif/1.3#acronym","http://www.eurocris.org/ontologies/cerif/1.3#name","http://www.eurocris.org/ontologies/cerif/1.3#startDate","http://www.eurocris.org/ontologies/cerif/1.3#endDate","http://purl.org/cerif/frapo/hasCallIdentifier","http://www.eurocris.org/ontologies/cerif/1.3#keyword","http://www.w3.org/2006/time#hasDurationDescription","http://lod.openaire.eu/vocab/ec_SC39","http://lod.openaire.eu/vocab/contractType","http://lod.openaire.eu/vocab/oaMandatePublications","http://lod.openaire.eu/vocab/projectSubjects","http://od.openaire.eu/vocab/ec_article29-3","http://lod.openaire.eu/vocab/funder","http://lod.openaire.eu/vocab/fundingLevel0","http://lod.openaire.eu/vocab/fundingLevel1","http://lod.openaire.eu/vocab/fundingLevel2","http://lod.openaire.eu/vocab/fundingLevel3"],"person": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier", "http://xmlns.com/foaf/0.1/firstName","http://xmlns.com/foaf/0.1/lastName", "http://xmlns.com/foaf/0.1/name","http://schema.org/faxNumber","http://xmlns.com/foaf/0.1/mbox","http://xmlns.com/foaf/0.1/phone", "http://schema.org/nationality","http://purl.org/dc/terms/identifier", "http://lod.openaire.eu/vocab/trust"],"organization": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.w3.org/2004/02/skos/core#altLabel","http://www.w3.org/2004/02/skos/core#prefLabel","http://lod.openaire.eu/vocab/webSiteUrl","http://xmlns.com/foaf/0.1/logo","http://dbpedia.org/ontology/country","http://lod.openaire.eu/vocab/entityType" ],"datasource":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/datasourceType","http://lod.openaire.eu/vocab/openAIRECompatibility","http://dbpedia.org/ontology/officialName","http://lod.openaire.eu/vocab/englishName","http://schema.org/url","http://xmlns.com/foaf/0.1/logo","http://xmlns.com/foaf/0.1/mbox","http://purl.org/vocab/vann/preferredNamespacePrefix","http://www.w3.org/2003/01/geo/wgs84_pos#lat","http://www.w3.org/2003/01/geo/wgs84_pos#long","http://lod.openaire.eu/vocab/dateOfValidity","http://purl.org/dc/terms/description","http://lod.openaire.eu/vocab/subjectList","http://lod.openaire.eu/numberOfItems","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/policies","http://lod.openaire.eu/vocab/languages","http://lod.openaire.eu/vocab/contentType","http://lod.openaire.eu/vocab/accessInfoPackage","http://lod.openaire.eu/vocab/releaseStartDate","http://lod.openaire.eu/vocab/releaseEndDate","http://lod.openaire.eu/vocab/missionStatementUrl","http://www.europeana.eu/schemas/edm/dataProvider","http://lod.openaire.eu/vocab/serviceProvider","http://lod.openaire.eu/vocab/databaseAccessType","http://lod.openaire.eu/vocab/dataUploadType","http://lod.openaire.eu/vocab/dataUploadRestrictions","http://lod.openaire.eu/vocab/versioning","http://lod.openaire.eu/vocab/citationGuidelineUrl","http://lod.openaire.eu/vocab/qualityManagementKind","http://lod.openaire.eu/vocab/pidSystems","http://lod.openaire.eu/vocab/certificates","http://purl.org/dc/terms/accessRights"]} |
|
48 | 55 |
lod.configXML=lod_configXML=<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE LIMES SYSTEM "limes.dtd"> <LIMES> <PREFIX> <NAMESPACE>http://www.w3.org/1999/02/22-rdf-syntax-ns#</NAMESPACE> <LABEL>rdf</LABEL> </PREFIX> <PREFIX> <NAMESPACE>http://www.w3.org/2000/01/rdf-schema#</NAMESPACE> <LABEL>rdfs</LABEL> </PREFIX> <SOURCE> <ID>source1</ID> <ENDPOINT>/user/kanakakis/groundTruth/sourceNT</ENDPOINT> <VAR>?x</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?x rdf:type http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity</RESTRICTION> <PROPERTY>http://lod.openaire.eu/vocab/year RENAME Year</PROPERTY> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.eurocris.org/ontologies/cerif/1.3#name AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME publicationName</PROPERTY> </SOURCE> <TARGET> <ID>source2</ID> <ENDPOINT>/user/kanakakis/groundTruth/targetNT</ENDPOINT> <VAR>?y</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?y rdf:type http://swrc.ontoware.org/ontology#Article</RESTRICTION> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/issued RENAME Year</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.w3.org/2000/01/rdf-schema#label AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME articleName</PROPERTY> </TARGET> <METRIC>AND(jaro(x.publicationName,y.articleName)|0.8,jaro(x.Year,y.Year)|1.0)</METRIC> <!-- <METRIC>jaro(x.publicatioName,y.articleName)|0.7</METRIC> --> <ACCEPTANCE> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/accepted_links_0.8_no_purge</FILE> <RELATION>owl:sameAs</RELATION> </ACCEPTANCE> <REVIEW> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/verified_links_0.8</FILE> <RELATION>owl:sameAs</RELATION> </REVIEW> <EXECUTION>Default</EXECUTION> <OUTPUT>TTL</OUTPUT> </LIMES> |
49 | 56 |
lod.limesDTD=<?xml version="1.0" encoding="utf-8"?> <!ELEMENT LIMES (PREFIX*, SOURCE, TARGET, METRIC, ACCEPTANCE, REVIEW, EXECUTION*, GRANULARITY*, OUTPUT*)> <!ELEMENT PREFIX (NAMESPACE, LABEL)> <!ELEMENT NAMESPACE (#PCDATA)> <!ELEMENT LABEL (#PCDATA)> <!ELEMENT SOURCE (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT TARGET (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT ID (#PCDATA)> <!ELEMENT RESTRICTION (#PCDATA)> <!ELEMENT METRIC (#PCDATA)> <!ELEMENT ACCEPTANCE (THRESHOLD, FILE, RELATION)> <!ELEMENT REVIEW (THRESHOLD, FILE, RELATION)> <!ELEMENT RELATION (#PCDATA)> <!ELEMENT ENDPOINT (#PCDATA)> <!ELEMENT GRAPH (#PCDATA)> <!ELEMENT VAR (#PCDATA)> <!ELEMENT CLASS (#PCDATA)> <!ELEMENT PROPERTY (#PCDATA)> <!ELEMENT TYPE (#PCDATA)> <!ELEMENT THRESHOLD (#PCDATA)> <!ELEMENT FILE (#PCDATA)> <!ELEMENT PAGESIZE (#PCDATA)> <!ELEMENT EXECUTION (#PCDATA)> <!ELEMENT GRANULARITY (#PCDATA)> <!ELEMENT OUTPUT (#PCDATA)> |
50 | 57 |
lod_final_output=/tmp/lodfinal/ |
51 |
lod_targetMappings={"result":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.eurocris.org/ontologies/cerif/1.3#name","http://purl.org/dc/terms/dateAccepted","http://purl.org/dc/terms/publisher","http://purl.org/dc/terms/identifier","http://purl.org/dc/terms/language","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/resultSubject","http://lod.openaire.eu/vocab/externalReference","http://purl.org/dc/terms/source","http://purl.org/dc/terms/format","http://lod.openaire.eu/vocab/context","http://dbpedia.org/ontology/country","http://purl.org/dc/terms/accessRights","http://purl.org/dc/terms/description","http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name","http://lod.openaire.eu/vocab/dataSourceType","http://lod.openaire.eu/vocab/device","http://lod.openaire.eu/vocab/size","http://lod.openaire.eu/vocab/version","http://lod.openaire.eu/vocab/lastMetadataUpdate","http://lod.openaire.eu/vocab/metadataVersion","http://lod.openaire.eu/vocab/year","http://lod.openaire.eu/vocab/resultType"],"project": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/projectCode","http://schema.org/url","http://www.eurocris.org/ontologies/cerif/1.3#acronym","http://www.eurocris.org/ontologies/cerif/1.3#name","http://www.eurocris.org/ontologies/cerif/1.3#startDate","http://www.eurocris.org/ontologies/cerif/1.3#endDate","http://purl.org/cerif/frapo/hasCallIdentifier","http://www.eurocris.org/ontologies/cerif/1.3#keyword","http://www.w3.org/2006/time#hasDurationDescription","http://lod.openaire.eu/vocab/ec_SC39","http://lod.openaire.eu/vocab/contractType","http://lod.openaire.eu/vocab/oaMandatePublications","http://lod.openaire.eu/vocab/projectSubjects","http://od.openaire.eu/vocab/ec_article29-3","http://lod.openaire.eu/vocab/funder","http://lod.openaire.eu/vocab/fundingLevel0","http://lod.openaire.eu/vocab/fundingLevel1","http://lod.openaire.eu/vocab/fundingLevel2","http://lod.openaire.eu/vocab/fundingLevel3"],"person": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier", "http://xmlns.com/foaf/0.1/firstName","http://xmlns.com/foaf/0.1/lastName", "http://xmlns.com/foaf/0.1/name","http://schema.org/faxNumber","http://xmlns.com/foaf/0.1/mbox","http://xmlns.com/foaf/0.1/phone", "http://schema.org/nationality","http://purl.org/dc/terms/identifier", "http://lod.openaire.eu/vocab/trust"],"organization": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.w3.org/2004/02/skos/core#altLabel","http://www.w3.org/2004/02/skos/core#prefLabel","http://lod.openaire.eu/vocab/webSiteUrl","http://xmlns.com/foaf/0.1/logo","http://dbpedia.org/ontology/country","http://lod.openaire.eu/vocab/entityType" ],"datasource":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/datasourceType","http://lod.openaire.eu/vocab/openAIRECompatibility","http://dbpedia.org/ontology/officialName","http://lod.openaire.eu/vocab/englishName","http://schema.org/url","http://xmlns.com/foaf/0.1/logo","http://xmlns.com/foaf/0.1/mbox","http://purl.org/vocab/vann/preferredNamespacePrefix","http://www.w3.org/2003/01/geo/wgs84_pos#lat","http://www.w3.org/2003/01/geo/wgs84_pos#long","http://lod.openaire.eu/vocab/dateOfValidity","http://purl.org/dc/terms/description","http://lod.openaire.eu/vocab/subjectList","http://lod.openaire.eu/numberOfItems","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/policies","http://lod.openaire.eu/vocab/languages","http://lod.openaire.eu/vocab/contentType","http://lod.openaire.eu/vocab/accessInfoPackage","http://lod.openaire.eu/vocab/releaseStartDate","http://lod.openaire.eu/vocab/releaseEndDate","http://lod.openaire.eu/vocab/missionStatementUrl","http://www.europeana.eu/schemas/edm/dataProvider","http://lod.openaire.eu/vocab/serviceProvider","http://lod.openaire.eu/vocab/databaseAccessType","http://lod.openaire.eu/vocab/dataUploadType","http://lod.openaire.eu/vocab/dataUploadRestrictions","http://lod.openaire.eu/vocab/versioning","http://lod.openaire.eu/vocab/citationGuidelineUrl","http://lod.openaire.eu/vocab/qualityManagementKind","http://lod.openaire.eu/vocab/pidSystems","http://lod.openaire.eu/vocab/certificates","http://purl.org/dc/terms/accessRights"]} |
|
52 |
lod_sourceMappings={"result":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.eurocris.org/ontologies/cerif/1.3#name","http://purl.org/dc/terms/dateAccepted","http://purl.org/dc/terms/publisher","http://purl.org/dc/terms/identifier","http://purl.org/dc/terms/language","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/resultSubject","http://lod.openaire.eu/vocab/externalReference","http://purl.org/dc/terms/source","http://purl.org/dc/terms/format","http://lod.openaire.eu/vocab/context","http://dbpedia.org/ontology/country","http://purl.org/dc/terms/accessRights","http://purl.org/dc/terms/description","http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name","http://lod.openaire.eu/vocab/dataSourceType","http://lod.openaire.eu/vocab/device","http://lod.openaire.eu/vocab/size","http://lod.openaire.eu/vocab/version","http://lod.openaire.eu/vocab/lastMetadataUpdate","http://lod.openaire.eu/vocab/metadataVersion","http://lod.openaire.eu/vocab/year","http://lod.openaire.eu/vocab/resultType"],"project": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/projectCode","http://schema.org/url","http://www.eurocris.org/ontologies/cerif/1.3#acronym","http://www.eurocris.org/ontologies/cerif/1.3#name","http://www.eurocris.org/ontologies/cerif/1.3#startDate","http://www.eurocris.org/ontologies/cerif/1.3#endDate","http://purl.org/cerif/frapo/hasCallIdentifier","http://www.eurocris.org/ontologies/cerif/1.3#keyword","http://www.w3.org/2006/time#hasDurationDescription","http://lod.openaire.eu/vocab/ec_SC39","http://lod.openaire.eu/vocab/contractType","http://lod.openaire.eu/vocab/oaMandatePublications","http://lod.openaire.eu/vocab/projectSubjects","http://od.openaire.eu/vocab/ec_article29-3","http://lod.openaire.eu/vocab/funder","http://lod.openaire.eu/vocab/fundingLevel0","http://lod.openaire.eu/vocab/fundingLevel1","http://lod.openaire.eu/vocab/fundingLevel2","http://lod.openaire.eu/vocab/fundingLevel3"],"person": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier", "http://xmlns.com/foaf/0.1/firstName","http://xmlns.com/foaf/0.1/lastName", "http://xmlns.com/foaf/0.1/name","http://schema.org/faxNumber","http://xmlns.com/foaf/0.1/mbox","http://xmlns.com/foaf/0.1/phone", "http://schema.org/nationality","http://purl.org/dc/terms/identifier", "http://lod.openaire.eu/vocab/trust"],"organization": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.w3.org/2004/02/skos/core#altLabel","http://www.w3.org/2004/02/skos/core#prefLabel","http://lod.openaire.eu/vocab/webSiteUrl","http://xmlns.com/foaf/0.1/logo","http://dbpedia.org/ontology/country","http://lod.openaire.eu/vocab/entityType" ],"datasource":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/datasourceType","http://lod.openaire.eu/vocab/openAIRECompatibility","http://dbpedia.org/ontology/officialName","http://lod.openaire.eu/vocab/englishName","http://schema.org/url","http://xmlns.com/foaf/0.1/logo","http://xmlns.com/foaf/0.1/mbox","http://purl.org/vocab/vann/preferredNamespacePrefix","http://www.w3.org/2003/01/geo/wgs84_pos#lat","http://www.w3.org/2003/01/geo/wgs84_pos#long","http://lod.openaire.eu/vocab/dateOfValidity","http://purl.org/dc/terms/description","http://lod.openaire.eu/vocab/subjectList","http://lod.openaire.eu/numberOfItems","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/policies","http://lod.openaire.eu/vocab/languages","http://lod.openaire.eu/vocab/contentType","http://lod.openaire.eu/vocab/accessInfoPackage","http://lod.openaire.eu/vocab/releaseStartDate","http://lod.openaire.eu/vocab/releaseEndDate","http://lod.openaire.eu/vocab/missionStatementUrl","http://www.europeana.eu/schemas/edm/dataProvider","http://lod.openaire.eu/vocab/serviceProvider","http://lod.openaire.eu/vocab/databaseAccessType","http://lod.openaire.eu/vocab/dataUploadType","http://lod.openaire.eu/vocab/dataUploadRestrictions","http://lod.openaire.eu/vocab/versioning","http://lod.openaire.eu/vocab/citationGuidelineUrl","http://lod.openaire.eu/vocab/qualityManagementKind","http://lod.openaire.eu/vocab/pidSystems","http://lod.openaire.eu/vocab/certificates","http://purl.org/dc/terms/accessRights"]} |
|
58 |
lod_configXML=<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE LIMES SYSTEM "limes.dtd"> <LIMES> <PREFIX> <NAMESPACE>http://www.w3.org/1999/02/22-rdf-syntax-ns#</NAMESPACE> <LABEL>rdf</LABEL> </PREFIX> <PREFIX> <NAMESPACE>http://www.w3.org/2000/01/rdf-schema#</NAMESPACE> <LABEL>rdfs</LABEL> </PREFIX> <SOURCE> <ID>source1</ID> <ENDPOINT>/user/kanakakis/groundTruth/sourceNT</ENDPOINT> <VAR>?x</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?x rdf:type http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity</RESTRICTION> <PROPERTY>http://lod.openaire.eu/vocab/year RENAME Year</PROPERTY> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.eurocris.org/ontologies/cerif/1.3#name AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME publicationName</PROPERTY> </SOURCE> <TARGET> <ID>source2</ID> <ENDPOINT>/user/kanakakis/groundTruth/targetNT</ENDPOINT> <VAR>?y</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?y rdf:type http://swrc.ontoware.org/ontology#Article</RESTRICTION> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/issued RENAME Year</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.w3.org/2000/01/rdf-schema#label AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME articleName</PROPERTY> </TARGET> <METRIC>AND(jaro(x.publicationName,y.articleName)|0.8,jaro(x.Year,y.Year)|1.0)</METRIC> <!-- <METRIC>jaro(x.publicatioName,y.articleName)|0.7</METRIC> --> <ACCEPTANCE> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/accepted_links_0.8_no_purge</FILE> <RELATION>owl:sameAs</RELATION> </ACCEPTANCE> <REVIEW> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/verified_links_0.8</FILE> <RELATION>owl:sameAs</RELATION> </REVIEW> <EXECUTION>Default</EXECUTION> <OUTPUT>TTL</OUTPUT> </LIMES> |
|
59 |
lod_limesDTD=<?xml version="1.0" encoding="utf-8"?> <!ELEMENT LIMES (PREFIX*, SOURCE, TARGET, METRIC, ACCEPTANCE, REVIEW, EXECUTION*, GRANULARITY*, OUTPUT*)> <!ELEMENT PREFIX (NAMESPACE, LABEL)> <!ELEMENT NAMESPACE (#PCDATA)> <!ELEMENT LABEL (#PCDATA)> <!ELEMENT SOURCE (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT TARGET (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT ID (#PCDATA)> <!ELEMENT RESTRICTION (#PCDATA)> <!ELEMENT METRIC (#PCDATA)> <!ELEMENT ACCEPTANCE (THRESHOLD, FILE, RELATION)> <!ELEMENT REVIEW (THRESHOLD, FILE, RELATION)> <!ELEMENT RELATION (#PCDATA)> <!ELEMENT ENDPOINT (#PCDATA)> <!ELEMENT GRAPH (#PCDATA)> <!ELEMENT VAR (#PCDATA)> <!ELEMENT CLASS (#PCDATA)> <!ELEMENT PROPERTY (#PCDATA)> <!ELEMENT TYPE (#PCDATA)> <!ELEMENT THRESHOLD (#PCDATA)> <!ELEMENT FILE (#PCDATA)> <!ELEMENT PAGESIZE (#PCDATA)> <!ELEMENT EXECUTION (#PCDATA)> <!ELEMENT GRANULARITY (#PCDATA)> <!ELEMENT OUTPUT (#PCDATA)> |
modules/dnet-openaire-lod-interlinking-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/oozie_app/workflow.xml | ||
---|---|---|
19 | 19 |
</global> |
20 | 20 |
|
21 | 21 |
|
22 |
<start to='linkage'/>
|
|
22 |
<start to='build'/>
|
|
23 | 23 |
<action name="preProcessing"> |
24 | 24 |
<map-reduce> |
25 | 25 |
<configuration> |
... | ... | |
104 | 104 |
<!-- Compress Output--> |
105 | 105 |
<property> |
106 | 106 |
<name>mapred.output.compress</name> |
107 |
<value>true</value>
|
|
107 |
<value>false</value>
|
|
108 | 108 |
</property> |
109 |
<!-- |
|
110 |
<property> |
|
111 |
<name>mapred.output.compression.type</name> |
|
112 |
<value>BLOCK</value> |
|
113 |
</property> |
|
109 | 114 |
|
115 |
<property> |
|
116 |
<name>mapred.output.compression.codec</name> |
|
117 |
<value>org.apache.hadoop.io.compress.GzipCodec</value> |
|
118 |
</property> |
|
119 |
--> |
|
110 | 120 |
<property> |
111 |
<name>mapred.output.compression.type</name> |
|
112 |
<value>BLOCK</value> |
|
113 |
</property> |
|
114 |
|
|
115 |
<property> |
|
116 |
<name>mapred.output.compression.codec</name> |
|
117 |
<value>org.apache.hadoop.io.compress.GzipCodec</value> |
|
118 |
</property> |
|
119 |
|
|
120 |
<property> |
|
121 | 121 |
<name>mapreduce.reduce.class</name> |
122 | 122 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.DatasetReducer</value> |
123 |
|
|
123 | 124 |
</property> |
124 | 125 |
<!-- I/O FORMAT --> |
125 | 126 |
<!-- IMPORTANT: sets default delimeter used by text output writer. Required |
... | ... | |
276 | 277 |
|
277 | 278 |
<property> |
278 | 279 |
<name>mapred.input.dir</name> |
279 |
<value>/tmp/lod_blocks/blocks</value>
|
|
280 |
<value>${lod_linkage_input}</value>
|
|
280 | 281 |
</property> |
281 | 282 |
|
282 | 283 |
<property> |
283 | 284 |
<name>mapred.output.dir</name> |
284 |
<value>/tmp/lod_final</value>
|
|
285 |
<value>${linkageOutputPath}</value>
|
|
285 | 286 |
</property> |
286 | 287 |
|
287 | 288 |
|
... | ... | |
412 | 413 |
</configuration> |
413 | 414 |
</map-reduce> |
414 | 415 |
|
415 |
<ok to="end"/>
|
|
416 |
<ok to="compareDatasets"/>
|
|
416 | 417 |
|
417 | 418 |
<error to="fail"/> |
418 | 419 |
</action> |
... | ... | |
430 | 431 |
<value> |
431 | 432 |
namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
432 | 433 |
</value> |
433 |
|
|
434 | 434 |
</property> |
435 |
|
|
436 | 435 |
<property> |
437 | 436 |
<name>zookeeper.znode.rootserver</name> |
438 | 437 |
<value> |
... | ... | |
461 | 460 |
|
462 | 461 |
|
463 | 462 |
<property> |
464 |
|
|
465 | 463 |
<name>mapred.input.dir.formats</name> |
466 | 464 |
<value> |
467 | 465 |
${nameNode}${sourceBuildInput};org.apache.hadoop.mapreduce.lib.input.TextInputFormat,${nameNode}${targetBuildInput};org.apache.hadoop.mapreduce.lib.input.TextInputFormat |
... | ... | |
496 | 494 |
<value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value> |
497 | 495 |
</property> |
498 | 496 |
|
497 |
|
|
499 | 498 |
<!-- ## This is required for new MapReduce API usage --> |
500 | 499 |
<property> |
501 | 500 |
<name>mapred.mapper.new-api</name> |
... | ... | |
529 | 528 |
<value>false</value> |
530 | 529 |
</property> |
531 | 530 |
|
532 |
<property> |
|
533 |
<name>mapred.output.compression.type</name> |
|
534 |
<value>BLOCK</value> |
|
535 |
</property> |
|
536 | 531 |
|
532 |
|
|
537 | 533 |
<property> |
538 | 534 |
<name>mapreduce.reduce.class</name> |
539 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockReducer</value> |
|
535 |
<!-- <value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockReducer</value>--> |
|
536 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockStreamingReducer</value> |
|
540 | 537 |
</property> |
541 | 538 |
<!-- I/O FORMAT --> |
542 | 539 |
|
... | ... | |
555 | 552 |
</property> |
556 | 553 |
|
557 | 554 |
<property> |
555 |
<name>lod.statsOutputPath</name> |
|
556 |
<value>${nameNode}${statsOutputPath}</value> |
|
557 |
</property> |
|
558 |
|
|
559 |
<property> |
|
558 | 560 |
<name>lod.sourceMappings</name> |
559 | 561 |
<value>${lod_sourceMappings}</value> |
560 | 562 |
</property> |
... | ... | |
584 | 586 |
</property> |
585 | 587 |
|
586 | 588 |
|
587 |
<property> |
|
588 |
<name>mapreduce.multipleoutputs</name> |
|
589 |
<value> |
|
590 |
${buildOut1} ${buildOut2} |
|
591 |
</value> |
|
592 |
</property> |
|
589 |
<!-- remove this if straming doenst work--> |
|
593 | 590 |
|
594 |
|
|
595 | 591 |
<property> |
596 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.key</name>
|
|
592 |
<name>mapred.output.key.class</name>
|
|
597 | 593 |
<value>org.apache.hadoop.io.Text</value> |
598 | 594 |
</property> |
599 |
<property> |
|
600 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.value</name> |
|
601 |
<value>org.apache.hadoop.io.Text</value> |
|
602 |
</property> |
|
603 |
<property> |
|
604 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.format</name> |
|
605 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
606 |
</property> |
|
607 | 595 |
|
608 |
|
|
609 |
<!--stats--> |
|
610 | 596 |
<property> |
611 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.key</name>
|
|
612 |
<value>org.apache.hadoop.io.Text</value>
|
|
597 |
<name>mapred.output.value.class</name>
|
|
598 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.StreamingTextOutputFormat</value>
|
|
613 | 599 |
</property> |
614 |
<property> |
|
615 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.value</name> |
|
616 |
<value>org.apache.hadoop.io.Text</value> |
|
617 |
</property> |
|
618 |
<property> |
|
619 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.format</name> |
|
620 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
621 |
</property> |
|
622 | 600 |
|
601 |
<!--Multiple Outputs for Blocks --> |
|
602 |
<!-- |
|
603 |
<property> |
|
604 |
<name>mapreduce.multipleoutputs</name> |
|
605 |
<value> |
|
606 |
${buildOut1} ${buildOut2} |
|
607 |
</value> |
|
608 |
</property> |
|
623 | 609 |
|
610 |
<property> |
|
611 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.key</name> |
|
612 |
<value>org.apache.hadoop.io.Text</value> |
|
613 |
</property> |
|
614 |
<property> |
|
615 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.value</name> |
|
616 |
<value>org.apache.hadoop.io.Text</value> |
|
617 |
</property> |
|
618 |
<property> |
|
619 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.format</name> |
|
620 |
<value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.StreamingTextOutputFormat</value> |
|
621 |
</property> |
|
624 | 622 |
|
623 |
--> |
|
624 |
<!--stats--> |
|
625 |
<!-- <property> |
|
626 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.key</name> |
|
627 |
<value>org.apache.hadoop.io.Text</value> |
|
628 |
</property> |
|
629 |
<property> |
|
630 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.value</name> |
|
631 |
<value>org.apache.hadoop.io.Text</value> |
|
632 |
</property> |
|
633 |
<property> |
|
634 |
<name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.format</name> |
|
635 |
<value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value> |
|
636 |
</property> |
|
637 |
--> |
|
625 | 638 |
<!-- ## Workflow node parameters --> |
626 | 639 |
<property> |
627 | 640 |
<name>mapred.reduce.tasks</name> |
... | ... | |
647 | 660 |
</property> |
648 | 661 |
</configuration> |
649 | 662 |
<main-class>eu.dnetlib.data.mapreduce.hbase.lodExport.utils.FrequencyCounter</main-class> |
650 |
|
|
651 | 663 |
<arg>${lod_redisHost}</arg> |
652 | 664 |
<arg>${lod_redisPort}</arg> |
653 |
<arg>${buildOut2}</arg>
|
|
665 |
<arg>${nameNode}${statsOutputPath}</arg>
|
|
654 | 666 |
|
655 | 667 |
</java> |
656 | 668 |
<ok to="linkage"/> |
Also available in: Unified diff
clea