Project

General

Profile

« Previous | Next » 

Revision 44307

Added by Eri Katsari about 8 years ago

clea

View differences:

modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/test/java/PreprocessingTest.java
1
/*
1 2
import eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockReducer;
2 3
import eu.dnetlib.data.mapreduce.hbase.lodExport.build.SourceBuildMapper;
3 4
import eu.dnetlib.data.mapreduce.hbase.lodExport.build.TargetBuildMapper;
......
19 20
import java.util.List;
20 21
import java.util.Scanner;
21 22

  
23
*/
22 24
/**
23 25
 * Created by eri_k on 8/24/2016.
24
 */
26
 *//*
27

  
25 28
public class PreprocessingTest {
26 29

  
27 30
    MapDriver<LongWritable, Text, Text, Text> mapDriver;
......
54 57
        }
55 58
        mapDriver.withInput(new Text("1"), input);
56 59

  
57
   /*     mapDriver.withConfiguration(configuration)
60
   */
61
/*     mapDriver.withConfiguration(configuration)
58 62
                .withInput(new LongWritable(1),
59 63
                        new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n"))
60 64
                .withInput(new LongWritable(2),
61 65
                        new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.eurocris.org/ontologies/cerif/1.3#name> \"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\" .\n"));
62
*/
66
*//*
63 67

  
64
     /*   mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"),
68

  
69
     */
70
/*   mapDriver .withOutput(new Text("OA,result,<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76>"),
65 71
                new Text("<http://www.eurocris.org/ontologies/cerif/1.3#name>,\"Dietary fish oil  MaxEPA  enhances pancreatic carcinogenesis in azaserine treated rats.\","));
66
*/
72
*//*
73

  
67 74
        mapDriver.runTest();
68 75
    }
69 76

  
......
91 98

  
92 99
    @Test
93 100
    public void testMapReduce() {
94
        /*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6"));
101
        */
102
/*mapReduceDriver.withInput(new LongWritable(), new Text("655209;1;796764372490213;804422938115889;6"));
95 103
        mapReduceDriver.withOutput(new Text("6"), new IntWritable(2));
96
        mapReduceDriver.runTest();*/
104
        mapReduceDriver.runTest();*//*
97 105

  
106

  
98 107
    }
99 108
}
100 109

  
110
*/
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/BlockReducer.java
1 1
package eu.dnetlib.data.mapreduce.hbase.lodExport.build;
2 2

  
3
import java.io.IOException;
4
import java.nio.charset.Charset;
5
import java.util.Iterator;
6

  
3
import com.google.common.collect.Iterables;
4
import org.apache.hadoop.io.ArrayWritable;
7 5
import org.apache.hadoop.io.Text;
8 6
import org.apache.hadoop.mapreduce.Reducer;
9 7
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
10 8
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
11 9
import org.apache.log4j.Logger;
12 10

  
13
public class BlockReducer extends Reducer<Text, Text, Text, Text> {
11
import java.io.IOException;
12
import java.lang.reflect.Array;
13
import java.nio.charset.Charset;
14
import java.util.ArrayList;
15
import java.util.Arrays;
16
import java.util.Iterator;
17
import java.util.List;;
18

  
19
public class BlockReducer extends Reducer<Text, Text, Text, BlockReducer.TextArrayWritable> {
14 20
    private static final String SEPERATOR = ",";
15 21

  
16 22
    public static enum BLOCKS_COUNTER {
17
        WRITTEN_RECORD_IDS,
18
        DISCARDED_RECORD_IDS
23
        WRITTEN_BLOCKS,
24
        DISCARDED_BLOCKS
19 25
    }
20 26

  
21 27
    private Logger log = Logger.getLogger(BlockReducer.class);
......
29 35
    }
30 36

  
31 37

  
38
    public static class TextArrayWritable extends ArrayWritable {
39

  
40
        public TextArrayWritable(Text[] values) {
41
            super(Text.class, values);
42
        }
43

  
44
        @Override
45
        public Text[] get() {
46
            return (Text[]) super.get();
47
        }
48

  
49
        @Override
50
        public String toString() {
51
            Text[] values = get();
52
            return Arrays.toString(values);
53
        }
54
    }
55

  
56

  
32 57
    @Override
33 58
    protected void reduce(final Text key, final Iterable<Text> values, final Context context) throws IOException, InterruptedException {
34 59
        Iterator<Text> it = values.iterator();
60
        List<String> valuesList = new ArrayList<>();
35 61
        try {
36 62
            //each list is a block
37
            StringBuilder field = new StringBuilder();
63
            //  StringBuilder field = new StringBuilder();
38 64
            int nunberOfEntities = 0;
39 65

  
66
            boolean hasSource = false;
67
            boolean hasTarget = false;
68

  
40 69
            while (it.hasNext()) {
41
                field.append(it.next().toString()).append(SEPERATOR);
70
                String val = it.next().toString();
71
                if (val.contains("source_")) {
72
                    hasSource = true;
73
                } else if (val.contains("target_")) {
74
                    hasTarget = true;
75
                }
76
                valuesList.add(val + SEPERATOR);
42 77
                nunberOfEntities++;
43 78
            }
79
            // field.append(val).append(SEPERATOR);
44 80

  
45
            if (nunberOfEntities > 1) {
46
                MultipleOutputWriter.write("b", key, new Text(field.toString()), "blocks/b");
81
            if (nunberOfEntities > 1 && hasSource && hasTarget) {
82

  
83
                MultipleOutputWriter.write("b", key, valuesList, "blocks/b");
47 84
                MultipleOutputWriter.write("entitiesNumber", key, new Text(String.valueOf(nunberOfEntities).getBytes(Charset.forName("UTF-8"))), "stats/entitiesNumber");
48
                context.getCounter(BLOCKS_COUNTER.WRITTEN_RECORD_IDS).increment(1);
85
                context.getCounter(BLOCKS_COUNTER.WRITTEN_BLOCKS).increment(1);
49 86
            } else {
50
                context.getCounter(BLOCKS_COUNTER.DISCARDED_RECORD_IDS).increment(1);
87
                context.getCounter(BLOCKS_COUNTER.DISCARDED_BLOCKS).increment(1);
51 88
            }
52
        } catch (Exception e) {
89
        } catch (
90
                Exception e)
91

  
92
        {
53 93
            throw new InterruptedException(e.getMessage());
54 94
        }
55 95

  
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/SourceBuildMapper.java
61 61

  
62 62
    @Override
63 63
    protected void map(final LongWritable keyIn, final Text result, final Context context) throws IOException {
64

  
65 64
        try {
66 65
            //get ID
67 66
            StringBuilder id = new StringBuilder();
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/StreamingTextOutputFormat.java
19 19
import org.apache.hadoop.mapred.TextOutputFormat;
20 20
import org.apache.hadoop.util.Progressable;
21 21
import org.apache.hadoop.util.ReflectionUtils;
22
import org.apache.log4j.Logger;
22 23

  
23 24
public class StreamingTextOutputFormat<K, V> extends TextOutputFormat<K, V> {
24
    protected static class StreamingLineRecordWriter<K, V> implements
25
            RecordWriter<K, V> {
25
    protected static class StreamingLineRecordWriter<K, V> implements RecordWriter<K, V> {
26 26
        private static final String utf8 = "UTF-8";
27 27
        private static final byte[] newline;
28
        private Logger log = Logger.getLogger(this.getClass());
28 29

  
29 30
        static {
30 31
            try {
31 32
                newline = "\n".getBytes(utf8);
32 33
            } catch (UnsupportedEncodingException uee) {
33
                throw new IllegalArgumentException("can't find " + utf8
34
                        + " encoding");
34
                throw new IllegalArgumentException("can't find " + utf8 + " encoding");
35 35
            }
36 36
        }
37 37

  
38 38
        protected DataOutputStream out;
39 39
        private final byte[] keyValueSeparator;
40
        private final byte[] valueDelimiter;
40
        private final byte[] valueDelimiter = ",".getBytes(utf8);
41 41
        private boolean dataWritten = false;
42 42

  
43
        public StreamingLineRecordWriter(DataOutputStream out, String keyValueSeparator, String valueDelimiter) {
43
        public StreamingLineRecordWriter(DataOutputStream out, String keyValueSeparator, String valueDelimiter) throws UnsupportedEncodingException {
44 44
            this.out = out;
45 45
            try {
46 46
                this.keyValueSeparator = keyValueSeparator.getBytes(utf8);
47
                this.valueDelimiter = valueDelimiter.getBytes(utf8);
47
                //valueDelimiter.getBytes(utf8);
48 48
            } catch (UnsupportedEncodingException uee) {
49 49
                throw new IllegalArgumentException("can't find " + utf8 + " encoding");
50 50
            }
51 51
        }
52 52

  
53
        public StreamingLineRecordWriter(DataOutputStream out) {
53
        public StreamingLineRecordWriter(DataOutputStream out) throws UnsupportedEncodingException {
54 54
            this(out, "\t", ",");
55 55
        }
56 56

  
......
64 64
            if (o instanceof Text) {
65 65
                Text to = (Text) o;
66 66
                out.write(to.getBytes(), 0, to.getLength());
67
                log.info("writing out first value");
67 68
            } else {
68 69
                out.write(o.toString().getBytes(utf8));
69 70
            }
......
76 77
            if (nullKey && nullValue) {
77 78
                return;
78 79
            }
80
            log.info("RECEIVED KEY " + key);
81
            log.info("RECEIVED VALUE " + value);
79 82

  
80 83
            if (!nullKey) {
81 84
                // if we've written data before, append a new line
82 85
                if (dataWritten) {
83 86
                    out.write(newline);
87
                    log.info("datawrittern :writting new line" + key);
84 88
                }
85 89

  
86 90
                // write out the key and separator
91
                log.info("written key" + key);
87 92
                writeObject(key);
88 93
                out.write(keyValueSeparator);
89 94
            } else if (!nullValue) {
95
                log.info("null key not value : writign out" + valueDelimiter + value);
90 96
                // write out the value delimiter
91 97
                out.write(valueDelimiter);
98
                // write out the value
99
                writeObject(value);
92 100
            }
93 101

  
94
            // write out the value
95
            writeObject(value);
96

  
97 102
            // track that we've written some data
98 103
            dataWritten = true;
99 104
        }
......
112 117
    public RecordWriter<K, V> getRecordWriter(FileSystem fileSystem, JobConf job, String name, Progressable progress) throws IOException {
113 118
        boolean isCompressed = getCompressOutput(job);
114 119
        String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t");
115
        String valueDelimiter = job.get("mapred.textoutputformat.delimiter", ",");
120
        String valueDelimiter = ",";
116 121
        if (!isCompressed) {
117 122
            Path file = FileOutputFormat.getTaskOutputPath(job, name);
118 123
            FileSystem fs = file.getFileSystem(job);
119 124
            FSDataOutputStream fileOut = fs.create(file, progress);
120
            return new StreamingLineRecordWriter<K, V>(fileOut,
121
                    keyValueSeparator, valueDelimiter);
125
            return new StreamingLineRecordWriter<K, V>(fileOut, keyValueSeparator, valueDelimiter);
122 126
        } else {
123 127
            Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(
124 128
                    job, GzipCodec.class);
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/BlockStreamingReducer.java
1 1
package eu.dnetlib.data.mapreduce.hbase.lodExport.build;
2 2

  
3
import com.lambdaworks.redis.RedisClient;
4
import com.lambdaworks.redis.RedisConnection;
3
import com.lambdaworks.com.google.common.collect.Iterables;
5 4
import org.apache.hadoop.fs.FileSystem;
6 5
import org.apache.hadoop.fs.Path;
7 6
import org.apache.hadoop.io.Text;
8 7
import org.apache.hadoop.mapreduce.Reducer;
9
import org.apache.hadoop.mapreduce.TaskInputOutputContext;
10
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
11 8
import org.apache.log4j.Logger;
12 9

  
13 10
import java.io.BufferedWriter;
......
15 12
import java.io.InterruptedIOException;
16 13
import java.io.OutputStream;
17 14
import java.io.OutputStreamWriter;
18
import java.nio.charset.Charset;
19
import java.util.HashMap;
20
import java.util.Map;
21 15
import java.util.UUID;
22 16

  
17
import static com.lambdaworks.com.google.common.collect.Iterables.toArray;
18

  
23 19
public class BlockStreamingReducer extends Reducer<Text, Text, Text, Text> {
24 20
    FileSystem hdfs;
25 21
    OutputStream os;
......
44 40
            IOException, InterruptedException {
45 41
        int entitiesNumber = 0;
46 42
        boolean firstKey = true;
43
        //each list is a block
47 44
        for (Text value : values) {
48
            context.write(firstKey ? key : null, value);
45
            if (firstKey) {
46
                context.write(key, value);
47

  
48
            } else {
49
                context.write(null, value);
50
            }
51

  
49 52
            firstKey = false;
50 53
            entitiesNumber++;
54

  
51 55
        }
52
        //each list is a block
56
        //append to output
53 57
        try {
54
            writeStats(key.toString(), String.valueOf(entitiesNumber));
58
            //  writeStats(key.toString(), String.valueOf(entitiesNumber));
55 59
        } catch (Exception e) {
56 60
            log.error("Cannot write to redis! Error :" + e.toString());
57 61
            throw new InterruptedIOException(e.toString());
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/linkage/LinkMapper.java
50 50
    protected void map(final Text keyIn, final Text result, final Context context) throws IOException {
51 51
        try {
52 52

  
53
            log.info("KEY" + keyIn.toString());
54
            log.info("VALUE" + result.toString());
55

  
56
            //purge blocks with number of  records > optimal
53 57
            int recordsNumber = countRecords(result.toString(), SEPERATOR);
54
            //purge blocks with number of  records > optimal
58
            //purge blocks that contain only source or target entities
59
            boolean hasBothSouceAndTarget = result.toString().contains("source_") && result.toString().contains("target_");
60
            //how many comparisons we have purged
55 61
            context.getCounter(TEST_COUNTERS.TOTAL_COMPARISONS).increment(recordsNumber * recordsNumber);
56
            if (recordsNumber == 1 || recordsNumber >= optimalBlockSize) {
62

  
63
            if (recordsNumber == 1 || recordsNumber >= optimalBlockSize || !hasBothSouceAndTarget) {
57 64
                context.getCounter(TEST_COUNTERS.DISCARDED_BLOCKS).increment(1);
58 65
                context.getCounter(TEST_COUNTERS.DISCARDED_COMPARISONS).increment(recordsNumber * recordsNumber);
59 66
            } else {
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/linkage/LimesReducer.java
61 61
        connection = client.connect();
62 62
    }
63 63

  
64

  
65 64
    @Override
66 65
    protected void reduce(final Text key, final Iterable<Text> values, final Context context) throws IOException, InterruptedException {
67 66

  
......
122 121
                ExecutionEngine engine = ExecutionEngineFactory.getEngine("Default", sourceCache, targetCache,
123 122
                        config.getSourceInfo().getVar(), config.getTargetInfo().getVar());
124 123

  
125
                Mapping verificationMapping = engine.execute(plan); //mappings for verification
126

  
127
                Mapping acceptanceMapping = verificationMapping.getSubMap(config.getAcceptanceThreshold()); //mappings for acceptance (auta theloume)
128
                //output
129

  
130
                for (String source : acceptanceMapping.getMap().keySet()) {//gia kathe source blepoume ta targets
131
                    for (String target : acceptanceMapping.getMap().get(source).keySet()) {//gia kathe target blepoume to confidence
132
                        context.write(new Text(source.replace("source_", source)), new Text(target.replace("_target", "") + "," + acceptanceMapping.getConfidence(source, target)));
133
                        context.getCounter(LIMES_COUNTERS.WRITTEN_OUT_ENTITIES).increment(1);
124
                if (sourceCache.size() > 0 && targetCache.size() > 0) {
125
                    Mapping verificationMapping = engine.execute(plan); //mappings for verification
126
                    Mapping acceptanceMapping = verificationMapping.getSubMap(config.getAcceptanceThreshold()); //mappings for acceptance (auta theloume)
127
                    //output
128
                    for (String source : acceptanceMapping.getMap().keySet()) {//gia kathe source blepoume ta targets
129
                        for (String target : acceptanceMapping.getMap().get(source).keySet()) {//gia kathe target blepoume to confidence
130
                            context.write(new Text(source.replace("source_", source)), new Text(target.replace("_target", "") + "," + acceptanceMapping.getConfidence(source, target)));
131
                            context.getCounter(LIMES_COUNTERS.WRITTEN_OUT_ENTITIES).increment(1);
132
                        }
134 133
                    }
135 134
                }
136

  
137 135
            } catch (Exception e) {
138 136
                log.error(e);
139 137
                throw new InterruptedException(e.toString());
modules/dnet-openaire-lod-interlinking-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.eri.properties
12 12
lod_enclosing='
13 13
lod_entitiesPerQuery=10
14 14
lod_hbase_table=db_openaireplus_services
15
lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
15
lod_indexConf=index.conf{ result { dups = true, links = [ { relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype,similarity,type] }, { relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [title,dateofacceptance,publisher,resulttype,similarity,type] } ]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fieldMap = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fieldMap = [officialname,websiteurl,datasourcetype,aggregatortype] } ]}, organization { dups = false, links = [ { relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] }, { relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fieldMap = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fieldMap = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fieldMap = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fieldMap = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
16 16
lod_jsonEntities={ "result": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.eurocris.org/ontologies/cerif/1.3#name", "6": "http://purl.org/dc/terms/dateAccepted", "7": "http://purl.org/dc/terms/publisher", "8": "http://purl.org/dc/terms/identifier", "9": "http://purl.org/dc/terms/language", "10": "http://purl.org/dc/terms/date", "11": "http://lod.openaire.eu/vocab/resultSubject", "12": "http://lod.openaire.eu/vocab/externalReference", "13": "http://purl.org/dc/terms/source", "14": "http://purl.org/dc/terms/format", "15": "http://lod.openaire.eu/vocab/context", "16": "http://dbpedia.org/ontology/country", "17": "http://purl.org/dc/terms/accessRights", "18": "http://purl.org/dc/terms/description", "19": "http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name", "20": "http://lod.openaire.eu/vocab/dataSourceType", "21": "http://lod.openaire.eu/vocab/device", "22": "http://lod.openaire.eu/vocab/size", "23": "http://lod.openaire.eu/vocab/version", "24": "http://lod.openaire.eu/vocab/lastMetadataUpdate", "25": "http://lod.openaire.eu/vocab/metadataVersion", "26": "http://lod.openaire.eu/vocab/resultType", "27": "http://lod.openaire.eu/vocab/year", "28": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity" }], "person": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier","5": "http://xmlns.com/foaf/0.1/firstName", "6": "http://xmlns.com/foaf/spec/lastName", "7": "http://xmlns.com/foaf/0.1/name", "8": "http://schema.org/faxNumber", "9": "http://xmlns.com/foaf/0.1/mbox", "10": "http://xmlns.com/foaf/0.1/phone", "11": "http://schema.org/nationality", "12": "http://purl.org/dc/terms/identifier", "13": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Person" }], "datasource": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/datasourceType", "6": "http://lod.openaire.eu/vocab/openAIRECompatibility", "7": "http://dbpedia.org/ontology/officialName", "8": "http://lod.openaire.eu/vocab/englishName", "9": "http://schema.org/url", "10": "http://xmlns.com/foaf/0.1/logo", "11": "http://xmlns.com/foaf/0.1/mbox", "12": "http://purl.org/vocab/vann/preferredNamespacePrefix", "13": "http://www.w3.org/2003/01/geo/wgs84_pos#lat", "14": "http://www.w3.org/2003/01/geo/wgs84_pos#long", "15": "http://lod.openaire.eu/vocab/dateOfValidity", "16": "http://purl.org/dc/terms/description", "17": "http://lod.openaire.eu/vocab/subjectList", "18": "http://lod.openaire.eu/numberOfItems", "19": "http://purl.org/dc/terms/date", "20": "http://lod.openaire.eu/vocab/policies", "21": "http://lod.openaire.eu/vocab/languages", "22": "http://lod.openaire.eu/vocab/contentType", "23": "http://lod.openaire.eu/vocab/accessInfoPackage", "24": "http://lod.openaire.eu/vocab/releaseStartDate", "25": "http://lod.openaire.eu/vocab/releaseEndDate", "26": "http://lod.openaire.eu/vocab/missionStatementUrl", "27": "http://www.europeana.eu/schemas/edm/dataProvider", "28": "http://lod.openaire.eu/vocab/serviceProvider", "29": "http://lod.openaire.eu/vocab/databaseAccessType", "30": "http://lod.openaire.eu/vocab/dataUploadType", "31": "http://lod.openaire.eu/vocab/dataUploadRestrictions", "32": "http://lod.openaire.eu/vocab/versioning", "33": "http://lod.openaire.eu/vocab/citationGuidelineUrl", "34": "http://lod.openaire.eu/vocab/qualityManagementKind", "35": "http://lod.openaire.eu/vocab/pidSystems", "36": "http://lod.openaire.eu/vocab/certificates", "37": "http://purl.org/dc/terms/accessRights", "38": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.w3.org/ns/prov#Entity" }], "organization": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://www.w3.org/2004/02/skos/core#altLabel", "6": "http://www.w3.org/2004/02/skos/core#prefLabel", "7": "http://lod.openaire.eu/vocab/webSiteUrl", "8": "http://xmlns.com/foaf/0.1/logo", "9": "http://dbpedia.org/ontology/country", "10": "http://lod.openaire.eu/vocab/entityType", "11": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://xmlns.com/foaf/0.1/Organization" }], "project": [{ "0": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "1": "http://purl.org/dc/terms/identifier", "2": "http://lod.openaire.eu/vocab/dateOfTransformation", "3": "http://lod.openaire.eu/vocab/dateOfCollection", "4": "http://purl.org/dc/terms/identifier", "5": "http://lod.openaire.eu/vocab/projectCode", "6": "http://schema.org/url", "7": "http://www.eurocris.org/ontologies/cerif/1.3#acronym", "8": "http://www.eurocris.org/ontologies/cerif/1.3#name", "9": "http://www.eurocris.org/ontologies/cerif/1.3#startDate", "10": "http://www.eurocris.org/ontologies/cerif/1.3#endDate", "11": "http://purl.org/cerif/frapo/hasCallIdentifier", "12": "http://www.eurocris.org/ontologies/cerif/1.3#keyword", "13": "http://www.w3.org/2006/time#hasDurationDescription", "14": "http://lod.openaire.eu/vocab/ec_SC39", "15": "http://lod.openaire.eu/vocab/contractType", "16": "http://lod.openaire.eu/vocab/oaMandatePublications", "17": "http://lod.openaire.eu/vocab/projectSubjects", "18": "http://od.openaire.eu/vocab/ec_article29-3", "19": "http://lod.openaire.eu/vocab/funder", "20": "http://lod.openaire.eu/vocab/fundingLevel0", "21": "http://lod.openaire.eu/vocab/fundingLevel1", "22": "http://lod.openaire.eu/vocab/fundingLevel2", "23": "http://lod.openaire.eu/vocab/fundingLevel3", "24": "http://lod.openaire.eu/vocab/trust", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.eurocris.org/ontologies/cerif/1.3#Project" }] }
17 17
lod_jsonRels={ "resultResult": [{ "property": "http://lod.openaire.eu/vocab/resultResult", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "resultProject": [{ "property": "http://lod.openaire.eu/vocab/resultProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personResult": [{ "property": "http://lod.openaire.eu/vocab/personResult", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personProject": [{ "property": "http://lod.openaire.eu/vocab/personProject", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "personPerson": [{ "property": "http://lod.openaire.eu/vocab/personPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "datasourceOrganization": [{ "property": "http://lod.openaire.eu/vocab/datasourceOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectOrganization": [{ "property": "http://lod.openaire.eu/vocab/projectOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "organizationOrganization": [{ "property": "http://lod.openaire.eu/vocab/organizationOrganization", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "projectPerson": [{ "property": "http://www.eurocris.org/ontologies/cerif/1.3/#linksToPerson", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }], "dedup": [{ "property": "http://www.w3.org/2002/07/owl#sameAs", "sourceType": "1", "sourceId": "2", "targetType": "3", "targetId": "4" }] }
18 18
lod_lastExecutionDate=2015-05-26
modules/dnet-openaire-lod-interlinking-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/job.properties
45 45
groundTruthPath = /tmp/lodfinal/groundTruth
46 46
linkageOutputPath = /tmp/lodfinal/source
47 47
statsOutputPath=/tmp/lod_blocks/stats/
48
lod_sourceMappings={"result":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openair
49
e.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.eurocris.org/ontologies/cerif/1.3#name","http://purl.org/dc/terms/dateAccepted","http://purl.org/dc/terms/publ
50
isher","http://purl.org/dc/terms/identifier","http://purl.org/dc/terms/language","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/resultSubject","http://lod.openaire.eu/vocab/e
51
xternalReference","http://purl.org/dc/terms/source","http://purl.org/dc/terms/format","http://lod.openaire.eu/vocab/context","http://dbpedia.org/ontology/country","http://purl.org/dc/terms/
52
accessRights","http://purl.org/dc/terms/description","http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name","http://lod.openaire.eu/vocab/dataSourceType","http://lod.openaire.eu/vocab/
53
device","http://lod.openaire.eu/vocab/size","http://lod.openaire.eu/vocab/version","http://lod.openaire.eu/vocab/lastMetadataUpdate","http://lod.openaire.eu/vocab/metadataVersion","http://l
54
od.openaire.eu/vocab/year","http://lod.openaire.eu/vocab/resultType"],"project": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/projectCode","http://schema.org/url","http://www.eurocris.org/ontologies/cerif/1.3#acronym","http://www.eurocris.org/ontologies/cerif/1.3#name","http://www.eurocris.org/ontologies/cerif/1.3#startDate","http://www.eurocris.org/ontologies/cerif/1.3#endDate","http://purl.org/cerif/frapo/hasCallIdentifier","http://www.eurocris.org/ontologies/cerif/1.3#keyword","http://www.w3.org/2006/time#hasDurationDescription","http://lod.openaire.eu/vocab/ec_SC39","http://lod.openaire.eu/vocab/contractType","http://lod.openaire.eu/vocab/oaMandatePublications","http://lod.openaire.eu/vocab/projectSubjects","http://od.openaire.eu/vocab/ec_article29-3","http://lod.openaire.eu/vocab/funder","http://lod.openaire.eu/vocab/fundingLevel0","http://lod.openaire.eu/vocab/fundingLevel1","http://lod.openaire.eu/vocab/fundingLevel2","http://lod.openaire.eu/vocab/fundingLevel3"],"person": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier", "http://xmlns.com/foaf/0.1/firstName","http://xmlns.com/foaf/0.1/lastName", "http://xmlns.com/foaf/0.1/name","http://schema.org/faxNumber","http://xmlns.com/foaf/0.1/mbox","http://xmlns.com/foaf/0.1/phone", "http://schema.org/nationality","http://purl.org/dc/terms/identifier", "http://lod.openaire.eu/vocab/trust"],"organization": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.w3.org/2004/02/skos/core#altLabel","http://www.w3.org/2004/02/skos/core#prefLabel","http://lod.openaire.eu/vocab/webSiteUrl","http://xmlns.com/foaf/0.1/logo","http://dbpedia.org/ontology/country","http://lod.openaire.eu/vocab/entityType" ],"datasource":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/datasourceType","http://lod.openaire.eu/vocab/openAIRECompatibility","http://dbpedia.org/ontology/officialName","http://lod.openaire.eu/vocab/englishName","http://schema.org/url","http://xmlns.com/foaf/0.1/logo","http://xmlns.com/foaf/0.1/mbox","http://purl.org/vocab/vann/preferredNamespacePrefix","http://www.w3.org/2003/01/geo/wgs84_pos#lat","http://www.w3.org/2003/01/geo/wgs84_pos#long","http://lod.openaire.eu/vocab/dateOfValidity","http://purl.org/dc/terms/description","http://lod.openaire.eu/vocab/subjectList","http://lod.openaire.eu/numberOfItems","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/policies","http://lod.openaire.eu/vocab/languages","http://lod.openaire.eu/vocab/contentType","http://lod.openaire.eu/vocab/accessInfoPackage","http://lod.openaire.eu/vocab/releaseStartDate","http://lod.openaire.eu/vocab/releaseEndDate","http://lod.openaire.eu/vocab/missionStatementUrl","http://www.europeana.eu/schemas/edm/dataProvider","http://lod.openaire.eu/vocab/serviceProvider","http://lod.openaire.eu/vocab/databaseAccessType","http://lod.openaire.eu/vocab/dataUploadType","http://lod.openaire.eu/vocab/dataUploadRestrictions","http://lod.openaire.eu/vocab/versioning","http://lod.openaire.eu/vocab/citationGuidelineUrl","http://lod.openaire.eu/vocab/qualityManagementKind","http://lod.openaire.eu/vocab/pidSystems","http://lod.openaire.eu/vocab/certificates","http://purl.org/dc/terms/accessRights"]}
48 55
lod.configXML=lod_configXML=<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE LIMES SYSTEM "limes.dtd"> <LIMES> <PREFIX> <NAMESPACE>http://www.w3.org/1999/02/22-rdf-syntax-ns#</NAMESPACE> <LABEL>rdf</LABEL> </PREFIX> <PREFIX> <NAMESPACE>http://www.w3.org/2000/01/rdf-schema#</NAMESPACE> <LABEL>rdfs</LABEL> </PREFIX> <SOURCE> <ID>source1</ID> <ENDPOINT>/user/kanakakis/groundTruth/sourceNT</ENDPOINT> <VAR>?x</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?x rdf:type http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity</RESTRICTION> <PROPERTY>http://lod.openaire.eu/vocab/year RENAME Year</PROPERTY> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.eurocris.org/ontologies/cerif/1.3#name AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME publicationName</PROPERTY> </SOURCE> <TARGET> <ID>source2</ID> <ENDPOINT>/user/kanakakis/groundTruth/targetNT</ENDPOINT> <VAR>?y</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?y rdf:type http://swrc.ontoware.org/ontology#Article</RESTRICTION> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/issued RENAME Year</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.w3.org/2000/01/rdf-schema#label AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME articleName</PROPERTY> </TARGET> <METRIC>AND(jaro(x.publicationName,y.articleName)|0.8,jaro(x.Year,y.Year)|1.0)</METRIC> <!-- <METRIC>jaro(x.publicatioName,y.articleName)|0.7</METRIC> --> <ACCEPTANCE> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/accepted_links_0.8_no_purge</FILE> <RELATION>owl:sameAs</RELATION> </ACCEPTANCE> <REVIEW> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/verified_links_0.8</FILE> <RELATION>owl:sameAs</RELATION> </REVIEW> <EXECUTION>Default</EXECUTION> <OUTPUT>TTL</OUTPUT> </LIMES>
49 56
lod.limesDTD=<?xml version="1.0" encoding="utf-8"?> <!ELEMENT LIMES (PREFIX*, SOURCE, TARGET, METRIC, ACCEPTANCE, REVIEW, EXECUTION*, GRANULARITY*, OUTPUT*)> <!ELEMENT PREFIX (NAMESPACE, LABEL)> <!ELEMENT NAMESPACE (#PCDATA)> <!ELEMENT LABEL (#PCDATA)> <!ELEMENT SOURCE (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT TARGET (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT ID (#PCDATA)> <!ELEMENT RESTRICTION (#PCDATA)> <!ELEMENT METRIC (#PCDATA)> <!ELEMENT ACCEPTANCE (THRESHOLD, FILE, RELATION)> <!ELEMENT REVIEW (THRESHOLD, FILE, RELATION)> <!ELEMENT RELATION (#PCDATA)> <!ELEMENT ENDPOINT (#PCDATA)> <!ELEMENT GRAPH (#PCDATA)> <!ELEMENT VAR (#PCDATA)> <!ELEMENT CLASS (#PCDATA)> <!ELEMENT PROPERTY (#PCDATA)> <!ELEMENT TYPE (#PCDATA)> <!ELEMENT THRESHOLD (#PCDATA)> <!ELEMENT FILE (#PCDATA)> <!ELEMENT PAGESIZE (#PCDATA)> <!ELEMENT EXECUTION (#PCDATA)> <!ELEMENT GRANULARITY (#PCDATA)> <!ELEMENT OUTPUT (#PCDATA)>
50 57
lod_final_output=/tmp/lodfinal/
51
lod_targetMappings={"result":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.eurocris.org/ontologies/cerif/1.3#name","http://purl.org/dc/terms/dateAccepted","http://purl.org/dc/terms/publisher","http://purl.org/dc/terms/identifier","http://purl.org/dc/terms/language","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/resultSubject","http://lod.openaire.eu/vocab/externalReference","http://purl.org/dc/terms/source","http://purl.org/dc/terms/format","http://lod.openaire.eu/vocab/context","http://dbpedia.org/ontology/country","http://purl.org/dc/terms/accessRights","http://purl.org/dc/terms/description","http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name","http://lod.openaire.eu/vocab/dataSourceType","http://lod.openaire.eu/vocab/device","http://lod.openaire.eu/vocab/size","http://lod.openaire.eu/vocab/version","http://lod.openaire.eu/vocab/lastMetadataUpdate","http://lod.openaire.eu/vocab/metadataVersion","http://lod.openaire.eu/vocab/year","http://lod.openaire.eu/vocab/resultType"],"project": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/projectCode","http://schema.org/url","http://www.eurocris.org/ontologies/cerif/1.3#acronym","http://www.eurocris.org/ontologies/cerif/1.3#name","http://www.eurocris.org/ontologies/cerif/1.3#startDate","http://www.eurocris.org/ontologies/cerif/1.3#endDate","http://purl.org/cerif/frapo/hasCallIdentifier","http://www.eurocris.org/ontologies/cerif/1.3#keyword","http://www.w3.org/2006/time#hasDurationDescription","http://lod.openaire.eu/vocab/ec_SC39","http://lod.openaire.eu/vocab/contractType","http://lod.openaire.eu/vocab/oaMandatePublications","http://lod.openaire.eu/vocab/projectSubjects","http://od.openaire.eu/vocab/ec_article29-3","http://lod.openaire.eu/vocab/funder","http://lod.openaire.eu/vocab/fundingLevel0","http://lod.openaire.eu/vocab/fundingLevel1","http://lod.openaire.eu/vocab/fundingLevel2","http://lod.openaire.eu/vocab/fundingLevel3"],"person": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier", "http://xmlns.com/foaf/0.1/firstName","http://xmlns.com/foaf/0.1/lastName", "http://xmlns.com/foaf/0.1/name","http://schema.org/faxNumber","http://xmlns.com/foaf/0.1/mbox","http://xmlns.com/foaf/0.1/phone", "http://schema.org/nationality","http://purl.org/dc/terms/identifier", "http://lod.openaire.eu/vocab/trust"],"organization": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.w3.org/2004/02/skos/core#altLabel","http://www.w3.org/2004/02/skos/core#prefLabel","http://lod.openaire.eu/vocab/webSiteUrl","http://xmlns.com/foaf/0.1/logo","http://dbpedia.org/ontology/country","http://lod.openaire.eu/vocab/entityType" ],"datasource":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/datasourceType","http://lod.openaire.eu/vocab/openAIRECompatibility","http://dbpedia.org/ontology/officialName","http://lod.openaire.eu/vocab/englishName","http://schema.org/url","http://xmlns.com/foaf/0.1/logo","http://xmlns.com/foaf/0.1/mbox","http://purl.org/vocab/vann/preferredNamespacePrefix","http://www.w3.org/2003/01/geo/wgs84_pos#lat","http://www.w3.org/2003/01/geo/wgs84_pos#long","http://lod.openaire.eu/vocab/dateOfValidity","http://purl.org/dc/terms/description","http://lod.openaire.eu/vocab/subjectList","http://lod.openaire.eu/numberOfItems","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/policies","http://lod.openaire.eu/vocab/languages","http://lod.openaire.eu/vocab/contentType","http://lod.openaire.eu/vocab/accessInfoPackage","http://lod.openaire.eu/vocab/releaseStartDate","http://lod.openaire.eu/vocab/releaseEndDate","http://lod.openaire.eu/vocab/missionStatementUrl","http://www.europeana.eu/schemas/edm/dataProvider","http://lod.openaire.eu/vocab/serviceProvider","http://lod.openaire.eu/vocab/databaseAccessType","http://lod.openaire.eu/vocab/dataUploadType","http://lod.openaire.eu/vocab/dataUploadRestrictions","http://lod.openaire.eu/vocab/versioning","http://lod.openaire.eu/vocab/citationGuidelineUrl","http://lod.openaire.eu/vocab/qualityManagementKind","http://lod.openaire.eu/vocab/pidSystems","http://lod.openaire.eu/vocab/certificates","http://purl.org/dc/terms/accessRights"]}
52
lod_sourceMappings={"result":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.eurocris.org/ontologies/cerif/1.3#name","http://purl.org/dc/terms/dateAccepted","http://purl.org/dc/terms/publisher","http://purl.org/dc/terms/identifier","http://purl.org/dc/terms/language","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/resultSubject","http://lod.openaire.eu/vocab/externalReference","http://purl.org/dc/terms/source","http://purl.org/dc/terms/format","http://lod.openaire.eu/vocab/context","http://dbpedia.org/ontology/country","http://purl.org/dc/terms/accessRights","http://purl.org/dc/terms/description","http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name","http://lod.openaire.eu/vocab/dataSourceType","http://lod.openaire.eu/vocab/device","http://lod.openaire.eu/vocab/size","http://lod.openaire.eu/vocab/version","http://lod.openaire.eu/vocab/lastMetadataUpdate","http://lod.openaire.eu/vocab/metadataVersion","http://lod.openaire.eu/vocab/year","http://lod.openaire.eu/vocab/resultType"],"project": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/projectCode","http://schema.org/url","http://www.eurocris.org/ontologies/cerif/1.3#acronym","http://www.eurocris.org/ontologies/cerif/1.3#name","http://www.eurocris.org/ontologies/cerif/1.3#startDate","http://www.eurocris.org/ontologies/cerif/1.3#endDate","http://purl.org/cerif/frapo/hasCallIdentifier","http://www.eurocris.org/ontologies/cerif/1.3#keyword","http://www.w3.org/2006/time#hasDurationDescription","http://lod.openaire.eu/vocab/ec_SC39","http://lod.openaire.eu/vocab/contractType","http://lod.openaire.eu/vocab/oaMandatePublications","http://lod.openaire.eu/vocab/projectSubjects","http://od.openaire.eu/vocab/ec_article29-3","http://lod.openaire.eu/vocab/funder","http://lod.openaire.eu/vocab/fundingLevel0","http://lod.openaire.eu/vocab/fundingLevel1","http://lod.openaire.eu/vocab/fundingLevel2","http://lod.openaire.eu/vocab/fundingLevel3"],"person": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier", "http://xmlns.com/foaf/0.1/firstName","http://xmlns.com/foaf/0.1/lastName", "http://xmlns.com/foaf/0.1/name","http://schema.org/faxNumber","http://xmlns.com/foaf/0.1/mbox","http://xmlns.com/foaf/0.1/phone", "http://schema.org/nationality","http://purl.org/dc/terms/identifier", "http://lod.openaire.eu/vocab/trust"],"organization": ["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://www.w3.org/2004/02/skos/core#altLabel","http://www.w3.org/2004/02/skos/core#prefLabel","http://lod.openaire.eu/vocab/webSiteUrl","http://xmlns.com/foaf/0.1/logo","http://dbpedia.org/ontology/country","http://lod.openaire.eu/vocab/entityType" ],"datasource":["http://www.w3.org/1999/02/22-rdf-syntax-ns#type","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/dateOfTransformation","http://lod.openaire.eu/vocab/dateOfCollection","http://purl.org/dc/terms/identifier","http://lod.openaire.eu/vocab/datasourceType","http://lod.openaire.eu/vocab/openAIRECompatibility","http://dbpedia.org/ontology/officialName","http://lod.openaire.eu/vocab/englishName","http://schema.org/url","http://xmlns.com/foaf/0.1/logo","http://xmlns.com/foaf/0.1/mbox","http://purl.org/vocab/vann/preferredNamespacePrefix","http://www.w3.org/2003/01/geo/wgs84_pos#lat","http://www.w3.org/2003/01/geo/wgs84_pos#long","http://lod.openaire.eu/vocab/dateOfValidity","http://purl.org/dc/terms/description","http://lod.openaire.eu/vocab/subjectList","http://lod.openaire.eu/numberOfItems","http://purl.org/dc/terms/date","http://lod.openaire.eu/vocab/policies","http://lod.openaire.eu/vocab/languages","http://lod.openaire.eu/vocab/contentType","http://lod.openaire.eu/vocab/accessInfoPackage","http://lod.openaire.eu/vocab/releaseStartDate","http://lod.openaire.eu/vocab/releaseEndDate","http://lod.openaire.eu/vocab/missionStatementUrl","http://www.europeana.eu/schemas/edm/dataProvider","http://lod.openaire.eu/vocab/serviceProvider","http://lod.openaire.eu/vocab/databaseAccessType","http://lod.openaire.eu/vocab/dataUploadType","http://lod.openaire.eu/vocab/dataUploadRestrictions","http://lod.openaire.eu/vocab/versioning","http://lod.openaire.eu/vocab/citationGuidelineUrl","http://lod.openaire.eu/vocab/qualityManagementKind","http://lod.openaire.eu/vocab/pidSystems","http://lod.openaire.eu/vocab/certificates","http://purl.org/dc/terms/accessRights"]}
58
lod_configXML=<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE LIMES SYSTEM "limes.dtd"> <LIMES> <PREFIX> <NAMESPACE>http://www.w3.org/1999/02/22-rdf-syntax-ns#</NAMESPACE> <LABEL>rdf</LABEL> </PREFIX> <PREFIX> <NAMESPACE>http://www.w3.org/2000/01/rdf-schema#</NAMESPACE> <LABEL>rdfs</LABEL> </PREFIX> <SOURCE> <ID>source1</ID> <ENDPOINT>/user/kanakakis/groundTruth/sourceNT</ENDPOINT> <VAR>?x</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?x rdf:type http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity</RESTRICTION> <PROPERTY>http://lod.openaire.eu/vocab/year RENAME Year</PROPERTY> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.eurocris.org/ontologies/cerif/1.3#name AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME publicationName</PROPERTY> </SOURCE> <TARGET> <ID>source2</ID> <ENDPOINT>/user/kanakakis/groundTruth/targetNT</ENDPOINT> <VAR>?y</VAR> <PAGESIZE>100</PAGESIZE> <RESTRICTION>?y rdf:type http://swrc.ontoware.org/ontology#Article</RESTRICTION> <PROPERTY>http://www.w3.org/1999/02/22-rdf-syntax-ns# RENAME type</PROPERTY> <PROPERTY>http://purl.org/dc/terms/issued RENAME Year</PROPERTY> <PROPERTY>http://purl.org/dc/terms/identifier RENAME id</PROPERTY> <PROPERTY>http://www.w3.org/2000/01/rdf-schema#label AS lowercase->regexreplace("[^A-Za-z0-9]"," ") RENAME articleName</PROPERTY> </TARGET> <METRIC>AND(jaro(x.publicationName,y.articleName)|0.8,jaro(x.Year,y.Year)|1.0)</METRIC> <!-- <METRIC>jaro(x.publicatioName,y.articleName)|0.7</METRIC> --> <ACCEPTANCE> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/accepted_links_0.8_no_purge</FILE> <RELATION>owl:sameAs</RELATION> </ACCEPTANCE> <REVIEW> <THRESHOLD>0.8</THRESHOLD> <FILE>/user/kanakakis/groundTruth/verified_links_0.8</FILE> <RELATION>owl:sameAs</RELATION> </REVIEW> <EXECUTION>Default</EXECUTION> <OUTPUT>TTL</OUTPUT> </LIMES>
59
lod_limesDTD=<?xml version="1.0" encoding="utf-8"?> <!ELEMENT LIMES (PREFIX*, SOURCE, TARGET, METRIC, ACCEPTANCE, REVIEW, EXECUTION*, GRANULARITY*, OUTPUT*)> <!ELEMENT PREFIX (NAMESPACE, LABEL)> <!ELEMENT NAMESPACE (#PCDATA)> <!ELEMENT LABEL (#PCDATA)> <!ELEMENT SOURCE (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT TARGET (ID, ENDPOINT, GRAPH*, VAR, PAGESIZE, RESTRICTION+, PROPERTY+, TYPE*)> <!ELEMENT ID (#PCDATA)> <!ELEMENT RESTRICTION (#PCDATA)> <!ELEMENT METRIC (#PCDATA)> <!ELEMENT ACCEPTANCE (THRESHOLD, FILE, RELATION)> <!ELEMENT REVIEW (THRESHOLD, FILE, RELATION)> <!ELEMENT RELATION (#PCDATA)> <!ELEMENT ENDPOINT (#PCDATA)> <!ELEMENT GRAPH (#PCDATA)> <!ELEMENT VAR (#PCDATA)> <!ELEMENT CLASS (#PCDATA)> <!ELEMENT PROPERTY (#PCDATA)> <!ELEMENT TYPE (#PCDATA)> <!ELEMENT THRESHOLD (#PCDATA)> <!ELEMENT FILE (#PCDATA)> <!ELEMENT PAGESIZE (#PCDATA)> <!ELEMENT EXECUTION (#PCDATA)> <!ELEMENT GRANULARITY (#PCDATA)> <!ELEMENT OUTPUT (#PCDATA)>
modules/dnet-openaire-lod-interlinking-wf/src/main/resources/eu/dnetlib/iis/core/javamapreduce/lodexport/oozie_app/workflow.xml
19 19
    </global>
20 20

  
21 21

  
22
    <start to='linkage'/>
22
    <start to='build'/>
23 23
    <action name="preProcessing">
24 24
        <map-reduce>
25 25
            <configuration>
......
104 104
                <!-- Compress Output-->
105 105
                <property>
106 106
                    <name>mapred.output.compress</name>
107
                    <value>true</value>
107
                    <value>false</value>
108 108
                </property>
109
                <!--
110
                                <property>
111
                                    <name>mapred.output.compression.type</name>
112
                                    <value>BLOCK</value>
113
                                </property>
109 114

  
115
                                <property>
116
                                    <name>mapred.output.compression.codec</name>
117
                                    <value>org.apache.hadoop.io.compress.GzipCodec</value>
118
                                </property>
119
                -->
110 120
                <property>
111
                    <name>mapred.output.compression.type</name>
112
                    <value>BLOCK</value>
113
                </property>
114

  
115
                <property>
116
                    <name>mapred.output.compression.codec</name>
117
                    <value>org.apache.hadoop.io.compress.GzipCodec</value>
118
                </property>
119

  
120
                <property>
121 121
                    <name>mapreduce.reduce.class</name>
122 122
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.preprocessing.DatasetReducer</value>
123

  
123 124
                </property>
124 125
                <!-- I/O FORMAT -->
125 126
                <!-- IMPORTANT: sets default delimeter used by text output writer. Required
......
276 277

  
277 278
                <property>
278 279
                    <name>mapred.input.dir</name>
279
                    <value>/tmp/lod_blocks/blocks</value>
280
                    <value>${lod_linkage_input}</value>
280 281
                </property>
281 282

  
282 283
                <property>
283 284
                    <name>mapred.output.dir</name>
284
                    <value>/tmp/lod_final</value>
285
                    <value>${linkageOutputPath}</value>
285 286
                </property>
286 287

  
287 288

  
......
412 413
            </configuration>
413 414
        </map-reduce>
414 415

  
415
        <ok to="end"/>
416
        <ok to="compareDatasets"/>
416 417

  
417 418
        <error to="fail"/>
418 419
    </action>
......
430 431
                    <value>
431 432
                        namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu
432 433
                    </value>
433

  
434 434
                </property>
435

  
436 435
                <property>
437 436
                    <name>zookeeper.znode.rootserver</name>
438 437
                    <value>
......
461 460

  
462 461

  
463 462
                <property>
464

  
465 463
                    <name>mapred.input.dir.formats</name>
466 464
                    <value>
467 465
                        ${nameNode}${sourceBuildInput};org.apache.hadoop.mapreduce.lib.input.TextInputFormat,${nameNode}${targetBuildInput};org.apache.hadoop.mapreduce.lib.input.TextInputFormat
......
496 494
                    <value>org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat</value>
497 495
                </property>
498 496

  
497

  
499 498
                <!-- ## This is required for new MapReduce API usage -->
500 499
                <property>
501 500
                    <name>mapred.mapper.new-api</name>
......
529 528
                    <value>false</value>
530 529
                </property>
531 530

  
532
                <property>
533
                    <name>mapred.output.compression.type</name>
534
                    <value>BLOCK</value>
535
                </property>
536 531

  
532

  
537 533
                <property>
538 534
                    <name>mapreduce.reduce.class</name>
539
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockReducer</value>
535
                    <!--                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockReducer</value>-->
536
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.BlockStreamingReducer</value>
540 537
                </property>
541 538
                <!-- I/O FORMAT -->
542 539

  
......
555 552
                </property>
556 553

  
557 554
                <property>
555
                    <name>lod.statsOutputPath</name>
556
                    <value>${nameNode}${statsOutputPath}</value>
557
                </property>
558

  
559
                <property>
558 560
                    <name>lod.sourceMappings</name>
559 561
                    <value>${lod_sourceMappings}</value>
560 562
                </property>
......
584 586
                </property>
585 587

  
586 588

  
587
                <property>
588
                    <name>mapreduce.multipleoutputs</name>
589
                    <value>
590
                        ${buildOut1} ${buildOut2}
591
                    </value>
592
                </property>
589
                <!-- remove this if straming doenst work-->
593 590

  
594

  
595 591
                <property>
596
                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.key</name>
592
                    <name>mapred.output.key.class</name>
597 593
                    <value>org.apache.hadoop.io.Text</value>
598 594
                </property>
599
                <property>
600
                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.value</name>
601
                    <value>org.apache.hadoop.io.Text</value>
602
                </property>
603
                <property>
604
                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.format</name>
605
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
606
                </property>
607 595

  
608

  
609
                <!--stats-->
610 596
                <property>
611
                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.key</name>
612
                    <value>org.apache.hadoop.io.Text</value>
597
                    <name>mapred.output.value.class</name>
598
                    <value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.StreamingTextOutputFormat</value>
613 599
                </property>
614
                <property>
615
                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.value</name>
616
                    <value>org.apache.hadoop.io.Text</value>
617
                </property>
618
                <property>
619
                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.format</name>
620
                    <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
621
                </property>
622 600

  
601
                <!--Multiple Outputs for Blocks -->
602
                <!--
603
                                <property>
604
                                    <name>mapreduce.multipleoutputs</name>
605
                                    <value>
606
                                        ${buildOut1} ${buildOut2}
607
                                    </value>
608
                                </property>
623 609

  
610
                                <property>
611
                                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.key</name>
612
                                    <value>org.apache.hadoop.io.Text</value>
613
                                </property>
614
                                <property>
615
                                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.value</name>
616
                                    <value>org.apache.hadoop.io.Text</value>
617
                                </property>
618
                                <property>
619
                                    <name>mapreduce.multipleoutputs.namedOutput.${buildOut1}.format</name>
620
                                   <value>eu.dnetlib.data.mapreduce.hbase.lodExport.build.StreamingTextOutputFormat</value>
621
                                </property>
624 622

  
623
                -->
624
                <!--stats-->
625
                <!--              <property>
626
                                  <name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.key</name>
627
                                  <value>org.apache.hadoop.io.Text</value>
628
                              </property>
629
                              <property>
630
                                  <name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.value</name>
631
                                  <value>org.apache.hadoop.io.Text</value>
632
                              </property>
633
                              <property>
634
                                  <name>mapreduce.multipleoutputs.namedOutput.${buildOut2}.format</name>
635
                   <value>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</value>
636
                              </property>
637
              -->
625 638
                <!-- ## Workflow node parameters -->
626 639
                <property>
627 640
                    <name>mapred.reduce.tasks</name>
......
647 660
                </property>
648 661
            </configuration>
649 662
            <main-class>eu.dnetlib.data.mapreduce.hbase.lodExport.utils.FrequencyCounter</main-class>
650

  
651 663
            <arg>${lod_redisHost}</arg>
652 664
            <arg>${lod_redisPort}</arg>
653
            <arg>${buildOut2}</arg>
665
            <arg>${nameNode}${statsOutputPath}</arg>
654 666

  
655 667
        </java>
656 668
        <ok to="linkage"/>

Also available in: Unified diff