Project

General

Profile

« Previous | Next » 

Revision 48937

Added by Tsampikos Livisianos over 6 years ago

dnet45

View differences:

modules/dnet-openaire-lodexport/trunk/src/test/java/Test.java
1
import org.joda.time.format.DateTimeFormat;
2
import org.joda.time.format.DateTimeFormatter;
3
import org.jsoup.Jsoup;
4

  
5
import java.io.*;
6
import java.text.DateFormat;
7
import java.text.ParseException;
8
import java.text.SimpleDateFormat;
9
import java.util.ArrayList;
10
import java.util.Arrays;
11
import java.util.List;
12

  
13
/**
14
 * Created by eri on 1/14/16.
15
 */
16
public class Test {
17

  
18

  
19
    public static void main(String[] args) throws ParseException, IOException {
20

  
21

  
22
        File fs = new File("C:\\Users\\eri_k\\Downloads\\relation_proj");
23

  
24
        StringBuilder sb = new StringBuilder();
25

  
26
        try (BufferedReader br = new BufferedReader(new FileReader(fs))) {
27
            String line;
28
            while ((line = br.readLine()) != null) {
29
                String[] split = line.split("!");
30
                sb.append(split[0] + "!" + split[1] + "!" + split[4] + "!" + split[3] + "!" + split[2] + "\n");
31
//                resultProject!result!corda__h2020::c0210cb8b2256f4ca08d18391ae137c0!project!od______1108::18ddc87d7409554b58d0a230b533e516!
32

  
33
            }
34
        }
35

  
36
        System.out.println(sb.toString());
37

  
38

  
39
        File fs2 = new File("C:\\Users\\eri_k\\Downloads\\relations_fixed");
40
        FileWriter fw = new FileWriter(fs2);
41

  
42
        fw.write(sb.toString());
43
        fw.close();
44

  
45

  
46

  
47
    /*    String date = "2015-05-26";
48

  
49
        DateTimeFormatter formatter = DateTimeFormat.forPattern("yyyy-MM-dd");
50
        System.out.println(formatter.parseDateTime(date));
51
        DateFormat df = new SimpleDateFormat("yyyy-mm-dd");
52
        System.out.println(df.parse(date));
53

  
54

  
55
        String value = " \"lala\" /lo {}li?   la ";
56

  
57
        value = "i";
58

  
59
        System.out.println("value " + value);
60
        value = value.replace(" ", "");
61
        System.out.println("value " + value);
62
        value = value.replace(" ", "");
63

  
64
        value="Gary  Richard,Lewin ,Lewin % Gary > Richard>";
65
        value = value.replaceAll("[\"\\r\\\\;]", "");
66

  
67
        value = value.replaceAll("[^a-zA-Z0-9 .-_:/&!@+=]+", " ");
68
        System.out.println("VALUE IS" + value);
69

  
70

  
71
        value="http://dx.doi.org/10.17014/ijog.vol1no2.20063-Papandayan-is-an-A-type-active-strato-volcano-located-at-some-20-km-SW-of-Garut-or-about-70-km-SE-of-Bandung-the-capital-city-of-West-Java-Province.-Geographically-the-summit-of-this-volcano-lies-at-the-intersection-between-07--19--42--S-and-107--44--E.-The-2002-Papandayan-eruption-was-preceded-by-two-felt-earthquakes-8-times-of-A-type-volcanic-earthquakes-and-150-times-of-B--type-volcanic-earthquake.-These-events-were-followed-by-a-phreatic-eruption-that-took-place-on-11-November-at-16.02-local-time.-Field-observation-shows-that-the-summit-region-mainly-around-the-craters-consists-of-rocks-that-have-hydrothermally-altered-to-yield-clay-rich-material.-This-clay-rich-material-covers-most-of-the-crater-fl-oors-and-the-crater-rim.-Mount-Nangklak-that-forms-part-of-the-rim-also-contains-a-lava-plug-from-the-Old-Papandayan-volcano.-This-mountain-is-covered-by-fi-ne-grained-unconsolidated-material-and-altered-rocks.-Much-of-this-altered-rocks-coincides-with-solfataric-and-fumarolic-activities-of-80-to->-300-C.-The-summit-area-also-contains-high-discharge-of-water-either-originating-from-the-springs-or-surface-water.-The-increase-in-seismicity-the-fi-ne-grained-hydrothermal-altered-rocks-and-the-existence-of-some-faults-that-pass-through-the-summit-region-might-have-weaken-the-stability-of-the-summit-area.-As-the-result-a-landslide-occurred-on-the-north-fl-ank-of-Mount-Nangklak-where-the-landslide-material-blocked-the-upper-course-of-Cibeureum-Gede-River.-This-landslide-material-had-formed-big-mudfl-ows-that-caused-several-houses-of-fi-ve-villages-were-partly-burried-some-bridges-were-devastated-and-several-hectares-of-cultivated-land-were-damaged.\n" +
72
                "\tat java.net.URI$Parser.fail(URI.java:2829)";
73
        System.out.println("char is " +value.charAt(1057) + value.charAt(1058));
74

  
75

  
76
        value="http://dx.doi.org/10.5902/1984686X5486-O-objetivo-deste-estudo-foi-descrever-o-processo-de-valida-o-de-conte-do-de-um-instrumento-de-avalia-o-do-esquema-corporal-para-crian-as-comcegueira-cong-nita-com-idade-entre-6-e-9-anos.-A-popula-o-foi-constitu-dapor-sete-professores-universit-rios-os-quais-receberam-uma-primeiravers-o-do-instrumento-eles-analisaram-cada-item-nos-quesitos-clareza-delinguagem-pertin-ncia-te-rica-viabilidade-de-aplica-o-e-adequa-o-aoconstructo.-A-prova-1--obteve-80%-de-concord-ncia-a-Prova-2-recebeu-70%-a-prova-3---60%-e-a-prova-4--75%.-Adotou-se-como-crit-rio-de-perman-nciada-prova-o-m-nimo-de-80%-de-concord-ncia.-A-prova-1-permaneceu-como-naprimeira-vers-o-a-prova-2-foi-exclu-da-e-as-provas-3-e-4-foram-modificadas.-Com-base-nas-sugest-es-dos-ju-zes-e-no--ndice-de-concord-ncia-elaborou-se-uma-segunda-vers-o-do-instrumento-composta-por-tr-s-itens.-Com-isso-foi-poss-vel-considerar-que-as-adapta-es-sugeridas-pelos-ju-zes-foram-importantes-para-garantir-a-validade-de-conte-do-do-instrumento.-Palavras-chave:-Educa-o-Especial-Educa-o-F-sica-adaptada-Cegueira-Avalia-o-Esquema-corporal.\n" +
77
                "java.net.URISyntaxException: Malformed escape pair at index 489: http://dx.doi.org/10.5902/1984686X5486-O-objetivo-deste-estudo-foi-descrever-o-processo-de-valida-o-de-conte-do-de-um-instrumento-de-avalia-o-do-esquema-corporal-para-crian-as-comcegueira-cong-nita-com-idade-entre-6-e-9-anos.-A-popula-o-foi-constitu-dapor-sete-professores-universit-rios-os-quais-receberam-uma-primeiravers-o-do-instrumento-eles-analisaram-cada-item-nos-quesitos-clareza-delinguagem-pertin-ncia-te-rica-viabilidade-de-aplica-o-e-adequa-o-aoconstructo.-A-prova-1--obteve-80%-de-concord-ncia-a-Prova-2-recebeu-70%-a-prova-3---60%-e-a-prova-4--75%.-Adotou-se-como-crit-rio-de-perman-nciada-prova-o-m-nimo-de-80%-de-concord-ncia.-A-prova-1-permaneceu-como-naprimeira-vers-o-a-prova-2-foi-exclu-da-e-as-provas-3-e-4-foram-modificadas.-Com-base-nas-sugest-es-dos-ju-zes-e-no--ndice-de-concord-ncia-elaborou-se-uma-segunda-vers-o-do-instrumento-composta-por-tr-s-itens.-Com-isso-foi-poss-vel-considerar-que-as-adapta-es-sugeridas-pelos-ju-zes-foram-importantes-para-garantir-a-validade-de-conte-do-do-instrumento.-Palavras-chave:-Educa-o-Especial-Educa-o-F-sica-adaptada-Cegueira-Avalia-o-Esquema-corporal.\n" +
78
                "\tat java.net.URI$Parser.fail(URI.java:2829)";
79

  
80

  
81
        System.out.println("malformed escaped pair char is " +value.charAt(489) +value.charAt(490) );
82

  
83

  
84
        *//*value = value.replace("\\r", " ");
85
        value = value.replace("\\", "");
86
		*//*
87

  
88
		 *//*value = value.replace("\\r", " ");
89
         value = value.replace("\\", "");
90
*//*
91

  
92

  
93
        value = "http://dx.doi.org/10.5902/198050985739<br /><br />O presente trabalho objetivou avaliar a capacidade de adesão da madeira serrada de eucalipto proveniente de três sistemas de manejo, empregando-se os adesivos: resorcina formaldeído, e dois adesivos em emulsão aquosa à base de poliacetato de vinila. Os sistemas de manejo foram caracterizados por três estratos, sendo o estrato um (E1) caracterizado por madeira proveniente de talhadia e idade de 70 meses; o estrato dois (E2) caracterizado por madeira de reforma e idade de 166 meses e o estrato três (E3) caracterizado também por reforma aos 70 meses de idade.";
94

  
95

  
96
        System.out.println(Jsoup.parse(value).text());
97

  
98

  
99
        List<String> s = new ArrayList<String>();
100
        s.add("A");
101
        s.add("B");
102
        s.add("C");
103
        s.add("D");
104
        int listSize= s.size();
105
        List<String> newList= new ArrayList<String>();
106
String DELIM=";";
107
        String valueEntity="result";
108

  
109
        for(int i=0; i< s.size()-1;i++)
110
        {
111
            System.out.println(i);
112
            for(int j=i+1; j<s.size();j++)
113
            {System.out.println("j is " +j);
114
                newList.add("dedup" + DELIM +
115
                        valueEntity + DELIM +
116
                        s.get(i) + DELIM +
117
                        valueEntity+ DELIM+
118
                        s.get(j) + DELIM);
119

  
120

  
121
            }
122
        }
123

  
124

  
125
        System.out.println(newList);
126

  
127
*/
128

  
129
        //  MyString.formStrings("");
130
    }
131

  
132
    public static List<List<String>> generatePerm(List<String> original) {
133
        if (original.size() == 0) {
134
            List<List<String>> result = new ArrayList<List<String>>();
135
            result.add(new ArrayList<String>());
136
            return result;
137
        }
138
        String firstElement = original.remove(0);
139
        List<List<String>> returnValue = new ArrayList<List<String>>();
140

  
141
        List<List<String>> permutations = generatePerm(original);
142

  
143
        for (List<String> smallerPermutated : permutations) {
144
            for (int index = 0; index <= smallerPermutated.size(); index++) {
145
                List<String> temp = new ArrayList<String>(smallerPermutated);
146
                temp.add(index, firstElement);
147
                returnValue.add(temp);
148
            }
149
        }
150
        return returnValue;
151
    }
152

  
153
    public static class MyString {
154

  
155
        static final String[] chars = {"a", "b", "c"};
156
        static final int reqLen = 2;
157

  
158
        public static void formStrings(String crtStr) {
159

  
160
            if (crtStr.length() == reqLen) {
161
                System.out.println(crtStr);
162
                return;
163
            } else
164
                for (int i = 1; i < chars.length; i++) {
165
                    if (!crtStr.contains(chars[i] + crtStr))
166
                        formStrings(crtStr + chars[i]);
167
                    else {
168
                        formStrings(crtStr);
169

  
170
                    }
171

  
172
                }
173
        }
174
    }
175

  
176

  
177
    static void fillList(ArrayList list) {
178
        list.add("lala");
179

  
180
    }
181

  
182
}
modules/dnet-openaire-lodexport/trunk/src/test/java/LodReducerTest.java
1

  
2
import eu.dnetlib.data.mapreduce.hbase.lodExport.LodReducer;
3
import org.apache.hadoop.conf.Configuration;
4
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
5
import org.apache.hadoop.io.LongWritable;
6
import org.apache.hadoop.io.Text;
7
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
8
import org.junit.Before;
9
import org.junit.Test;
10

  
11
import java.io.IOException;
12
import java.util.ArrayList;
13

  
14
public class LodReducerTest {
15
    ReduceDriver<Text, Text, Text, Text> reduceDriver;
16

  
17
    @Before
18
    public void setUp() {
19
        Configuration configuration = new Configuration();
20
        configuration.set("lod.delim", ",");
21
        configuration.set("mapreduce.multipleoutputs.namedOutput.result.key", "org.apache.hadoop.io.Text");
22
        configuration.set("mapreduce.multipleoutputs.namedOutput.result.value", "org.apache.hadoop.io.Text");
23
        configuration.set("mapreduce.multipleoutputs.namedOutput.result.format", "org.apache.hadoop.mapreduce.lib.output.TextOutputFormat");
24
        configuration.set("mapreduce.multipleoutputs", "result");
25

  
26

  
27
        LodReducer reducer = new LodReducer();
28
        //String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}";
29
        reduceDriver = ReduceDriver.newReduceDriver(reducer).withConfiguration(configuration);
30

  
31
    }
32

  
33

  
34
    @Test
35
    public void testReducer() throws IOException {
36
        ArrayList list = new ArrayList();
37
        String value = "doajarticles::9aacfbb6168b5346aa63a49c79a7a88d,2017-01-14T12:40:47.751Z,2016-10-12T12:23:03.305Z,oai:doaj.org/article:85b1bd2dbf344a6cab93f36c7bdfff69;,Zika virus in Brazil and the danger of infestation by Aedes  tegomyia) mosquitoes,2015-01-01, ociedade Brasileira de Medicina Tropical  BMT), 10.1590/0037-8682-0220-2015;,eng, , Aedes aegypti;Aedes albopictus;Arbovirus; tegomyia;Culicidae;Medicine (General);R5-920;Medicine;R;Arctic medicine. Tropical medicine;RC955-962;, , Revista da  ociedade Brasileira de Medicina Tropical, Iss 0 (2015);, , , ,Open Access, Abstract Zika virus, already widely distributed in Africa and Asia, was recently reported in two Northeastern Brazilian:  tate of Bahia and  tate of Rio Grande do Norte, and one  outheastern:  tate of  ão Paulo. This finding adds a potentially noxious virus to a list of several other viruses that are widely transmitted by Aedes  tegomyia) aegypti and Aedes  tegomyia) albopictus in Brazil. The pathology and epidemiology, including the distribution and vectors associated with Zika virus, are reviewed. This review is focused on viruses transmitted by Aedes  tegomyia) mosquitoes, including dengue, Chikungunya, Zika, Mayaro, and yellow fever virus, to emphasize the risks of occurrence for these arboviruses in Brazil and neighboring countries. Other species of Aedes  tegomyia) are discussed, emphasizing their involvement in arbovirus transmission and the possibility of adaptation to environments modified by human activities and introduction in Brazil.,Revista da  ociedade Brasileira de Medicina Tropical,,,,,,,2015,publication, ,";
38
        list.add(new Text(value));
39

  
40
        reduceDriver.withInput(new Text("result"), list);
41
        reduceDriver.withOutput(new Text("result"), new Text(value));
42
        reduceDriver.run();
43
    }
44

  
45

  
46
}
47

  
48

  
modules/dnet-openaire-lodexport/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/ContextTransformer.java
1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

  
3
import javax.xml.transform.Transformer;
4
import javax.xml.transform.TransformerFactory;
5
import javax.xml.transform.stream.StreamResult;
6
import javax.xml.transform.stream.StreamSource;
7
import java.io.ByteArrayInputStream;
8
import java.io.ByteArrayOutputStream;
9
import java.io.InputStream;
10

  
11
public class ContextTransformer {
12

  
13
    public String transformXSL(String xml) throws Exception {
14
        Transformer transformer;
15
        TransformerFactory tFactory = TransformerFactory.newInstance();
16

  
17
        if (xml == null) {
18

  
19
            throw new Exception("NULL XML ENTITY CONFIGURATION   ");
20
        }
21

  
22
        InputStream inputStream = null;
23
        ByteArrayInputStream readerStream = null;
24
        ByteArrayOutputStream writerStream = null;
25
        try {
26
            inputStream = ClassLoader.getSystemResourceAsStream("eu/dnetlib/data/mapreduce/hbase/statsExport/" + "context.xsl");
27
            transformer = tFactory.newTransformer(new StreamSource(inputStream));
28

  
29
            readerStream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
30

  
31
            writerStream = new ByteArrayOutputStream();
32
            transformer.transform(new StreamSource(readerStream), new StreamResult(writerStream));
33

  
34
            return writerStream.toString("UTF8");
35

  
36
        } catch (Exception e) {
37
            throw new Exception(e.getMessage(), e);
38
        } finally {
39
            inputStream.close();
40
            readerStream.close();
41
            writerStream.close();
42

  
43
        }
44

  
45
    }
46

  
47

  
48
}
modules/dnet-openaire-lodexport/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/FundingParser.java
1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

  
3
import org.apache.log4j.Logger;
4

  
5

  
6
public class FundingParser {
7

  
8
	private Logger log = Logger.getLogger(this.getClass());
9

  
10
	public static String getFundingLevel(String funding_level, int level,String delim) {
11
		String NULL_STRING = " ";
12

  
13
		if (funding_level.isEmpty()) {
14
			return NULL_STRING + delim;
15
		}
16

  
17

  
18
		if (!funding_level.contains("<funding_level_" + level + ">")) {
19
			return NULL_STRING + delim;
20
		}
21

  
22
		String[] split = funding_level.split("<funding_level_" + level + ">");
23

  
24
		funding_level = split[1];
25

  
26
		split = funding_level.split("<name>");
27
		funding_level = split[1];
28

  
29
		funding_level = funding_level.substring(0, funding_level.indexOf("</name>"));
30
		funding_level = funding_level.replaceAll("\"", "");
31
		funding_level = funding_level.replaceAll("/>", "");
32
		funding_level = funding_level.replaceAll("<", "");
33
		funding_level = funding_level.replaceAll("&amp;", "");
34

  
35
		if (level == 1) {
36
			if (funding_level.equalsIgnoreCase("SP1")) {
37
				funding_level = "SP1-Cooperation";
38
			} else if (funding_level.equalsIgnoreCase("SP2")) {
39
				funding_level = "SP2-Ideas";
40
			}
41
			if (funding_level.equalsIgnoreCase("SP3")) {
42
				funding_level = "SP3-People";
43
			} else if (funding_level.equalsIgnoreCase("SP4")) {
44
				funding_level = "SP4-Capacities";
45

  
46
			} else if (funding_level.equalsIgnoreCase("SP5")) {
47
				funding_level = "SP5-Euratom";
48
			}
49
		}
50

  
51

  
52
		funding_level = funding_level.replaceAll(">", "");
53

  
54
		funding_level = funding_level.replaceAll("</", "");
55

  
56
		return funding_level + delim;
57
	}
58

  
59
	public  static String getFundingInfo(String buff,String delim) {
60
		return FundingParser.getFunder(buff,delim) + getFundingLevel(buff, 0,delim) + (getFundingLevel(buff, 1,delim) + getFundingLevel(buff, 2,delim)
61
                + getFundingLevel(buff, 3,delim));
62
	}
63

  
64
	public static String getFunder(String buff,String delim) {
65

  
66
		String NULL_STRING = " ";
67
		if (buff.isEmpty()) {
68

  
69
				return NULL_STRING +delim;
70

  
71

  
72
		}
73

  
74
		if (!buff.contains("<funder>")) {
75
			return NULL_STRING +delim;
76
		}
77
		String[] split = buff.split("<funder>");
78
		String funder = split[1];
79

  
80
		split = funder.split("<name>");
81

  
82
		funder = split[1];
83

  
84
		funder = funder.substring(0, funder.indexOf("</name>"));
85

  
86
		funder = funder.replaceAll(">", "");
87

  
88
		funder = funder.replaceAll("</", "");
89

  
90
		funder = funder.replaceAll("\"", "");
91
		funder = funder.replaceAll("&amp;", "");
92

  
93

  
94
		return funder + delim;
95
	}
96

  
97
}
98

  
modules/dnet-openaire-lodexport/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/Serializer.java
1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

  
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
5
import eu.dnetlib.data.proto.FieldTypeProtos;
6
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
8
import eu.dnetlib.data.proto.OafProtos.Oaf;
9
import eu.dnetlib.data.proto.OafProtos.OafEntity;
10
import eu.dnetlib.data.proto.OafProtos.OafRel;
11
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
12
//import eu.dnetlib.data.proto.PersonProtos;
13
import eu.dnetlib.data.proto.ProjectProtos.Project;
14
import eu.dnetlib.data.proto.ResultProtos.Result;
15
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
16

  
17
import java.util.ArrayList;
18
import java.util.List;
19
import java.util.Set;
20

  
21
/**
22
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
23
 *         for sqoop
24
 */
25
public class Serializer {
26
    private static final String TEXT_PATTERN = "[^a-zA-Z0-9 .-_:/@+=-]";
27
    private static final String ID_PATTERN = "[^a-zA-Z0-9.:/_-]";
28
    private static final String DOI_PATTERN = ".*\\/(10.)";
29

  
30
    public static String serialize(Oaf oaf, String DELIM) {
31
        switch (oaf.getKind()) {
32
            case entity:
33
                OafEntity valueEntity = oaf.getEntity();
34
                switch (valueEntity.getType()) {
35
                    case datasource:
36
                        return buildDatasource(valueEntity, DELIM);
37
                    case organization:
38
                        return buildOrganization(valueEntity, DELIM);
39
                    case project:
40
                        return buildProject(valueEntity, DELIM);
41
                    case result:
42
                        return buildResult(valueEntity, DELIM);
43
                        /*
44
                    case person:
45
                        return buildPerson(valueEntity, DELIM);
46
                        */
47
                    default:
48
                        break;
49
                }
50
                break;
51
            case relation:
52
                OafRel valueRel = oaf.getRel();
53

  
54
                return serialize(valueRel, DELIM);
55

  
56
        }
57

  
58
        return null;
59

  
60
    }
61

  
62

  
63
    public static void extractRelations(Oaf oaf, String DELIM, Set<String> relations) {
64
        OafEntity valueEntity = oaf.getEntity();
65
        switch (valueEntity.getType()) {
66
            case result:
67
                getResultDatasources(valueEntity, DELIM, relations);
68
                getDedups(valueEntity, DELIM, relations);
69
            case datasource:
70
                getDedups(valueEntity, DELIM, relations);
71
                /*
72
            case person:
73
                getDedups(valueEntity, DELIM, relations);
74
                */
75
            case organization:
76
                getDedups(valueEntity, DELIM, relations);
77
            default:
78
        }
79

  
80
    }
81

  
82
    public static String serialize(OafRel Rel, String DELIM) {
83
        StringBuilder buff;
84
        switch (Rel.getRelType()) {
85
            case datasourceOrganization:
86
                buff = new StringBuilder();
87
                buff.append(Rel.getRelType().name()).append(DELIM).append("datasource").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
88
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM).append("\n");
89
                return buff.toString();
90
            case resultResult:
91
                buff = new StringBuilder();
92
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
93
                        .append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
94
                return buff.toString();
95
                /*
96
            case personPerson:
97
                buff = new StringBuilder();
98
                buff.append(Rel.getRelType().name()).append(DELIM).append("person").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
99
                        .append("person").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
100
                return buff.toString();
101
                */
102
            case organizationOrganization:
103
                buff = new StringBuilder();
104
                buff.append(Rel.getRelType().name()).append(DELIM).append("organization").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
105
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
106
                return buff.toString();
107
                /*
108
            case personResult:
109
                buff = new StringBuilder();
110
                buff.append(Rel.getRelType().name()).append(DELIM).append("person").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
111
                        .append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
112
                return buff.toString();
113
                */
114
            case projectOrganization:
115
                buff = new StringBuilder();
116
                buff.append(Rel.getRelType().name()).append(DELIM).append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
117
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
118
                return buff.toString();
119
                /*
120
            case projectPerson:
121
                buff = new StringBuilder();
122
                buff.append(Rel.getRelType().name()).append(DELIM).append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
123
                        .append("person").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
124
                return buff.toString();
125
                */
126
            case resultOrganization:
127
                buff = new StringBuilder();
128
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
129
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
130
                return buff.toString();
131
//TODO - SOS RELATION RESULT PROJECT IS INVERTED! SOURCE IS PROJECT, TARGET IS RESULT
132

  
133
            case resultProject:
134
                buff = new StringBuilder();
135
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM)
136
                        .append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM);
137
                return buff.toString();
138
            default:
139

  
140
        }
141

  
142
        return "";
143

  
144
    }
145

  
146
    private static String getHeader(OafEntity data, String DELIM) {
147
        String SEPERATOR = ";";
148

  
149
        StringBuilder buff = new StringBuilder();
150

  
151
        //  EntityType
152
        buff.append(data.getType().name()).append(DELIM);
153

  
154
        // OpenaireID
155
        buff.append(cleanId(data.getId())).append(DELIM);
156
        //  dateOfTransformation
157

  
158
        // TODO  CRITERIA FOR IDENTIFYING UPDATED RECORDS
159
        buff.append(cleanId(data.getDateoftransformation())).append(DELIM);
160

  
161
        //    dateOfCollection
162
        buff.append(clean(data.getDateofcollection())).append(DELIM);
163

  
164
        String dataStr = new String();
165
        // originalId
166

  
167
        for (String oid : data.getOriginalIdList()) {
168
            dataStr += cleanDoi(oid) + SEPERATOR; //oid is not openaireid, clean instead of cleanid
169
        }
170

  
171
        buff.append(dataStr).append(DELIM);
172
        return buff.toString();
173
    }
174

  
175

  
176
    private static String buildDatasource(OafEntity data, String DELIM) {
177
        String SEPERATOR = ";";
178

  
179
        StringBuilder buff = new StringBuilder();
180

  
181
        buff.append(getHeader(data, DELIM));
182
        Metadata metadata = data.getDatasource().getMetadata();
183

  
184
        //Datasourcetype
185
        buff.append(clean(metadata.getDatasourcetype().getClassname())).append(DELIM);
186

  
187
        //Openairecompatibility
188
        buff.append(clean(metadata.getOpenairecompatibility().getClassname())).append(DELIM);
189

  
190
        //OfficialName
191
        buff.append(clean(metadata.getOfficialname().getValue())).append(DELIM);
192

  
193
        //  Englishname
194
        buff.append(clean(metadata.getEnglishname().getValue())).append(DELIM);
195

  
196
        //Websiteurl
197
        buff.append(clean(metadata.getWebsiteurl().getValue())).append(DELIM);
198

  
199
        //LogoURL
200
        buff.append(clean(metadata.getLogourl().getValue())).append(DELIM);
201

  
202
        //Contactemail
203
        buff.append(clean(metadata.getContactemail().getValue())).append(DELIM);
204

  
205
        //Namespaceprefix
206
        buff.append(clean(metadata.getNamespaceprefix().getValue())).append(DELIM);
207

  
208
        // latitude
209
        buff.append(clean(metadata.getLatitude().getValue())).append(DELIM);
210

  
211
        // longtitude
212
        buff.append(clean(metadata.getLongitude().getValue())).append(DELIM);
213

  
214
        // dateofvalidation,
215
        buff.append(clean(metadata.getDateofvalidation().getValue())).append(DELIM);
216

  
217
        //Description
218
        buff.append(clean(metadata.getDescription().getValue())).append(DELIM);
219

  
220
        //subjects
221
        String subj = new String();
222
        for (StructuredProperty s : metadata.getSubjectsList()) {
223
            subj += clean(s.getValue()) + SEPERATOR;
224
        }
225

  
226
        //subjectList
227
        buff.append(clean(subj)).append(DELIM);
228

  
229
        //Number of items
230
        buff.append(clean(metadata.getOdnumberofitems().getValue())).append(DELIM);
231

  
232
        //Date of number of items
233
        buff.append(clean(metadata.getOdnumberofitemsdate().getValue())).append(DELIM);
234

  
235
        // Policies
236
        buff.append(clean(metadata.getOdpolicies().getValue())).append(DELIM);
237

  
238
        //languages
239
        String dataStr = new String();
240

  
241
        for (StringField lang : metadata.getOdlanguagesList()) {
242
            dataStr += clean(lang.getValue()) + SEPERATOR;
243
        }
244

  
245
        buff.append(dataStr).append(DELIM);
246
        ;
247

  
248

  
249
        // Content type
250
        dataStr = " ";
251
        for (StringField c : metadata.getOdcontenttypesList()) {
252
            dataStr += clean(c.getValue()) + SEPERATOR;
253
        }
254
        buff.append(dataStr).append(DELIM);
255

  
256
        //Access info package
257
        dataStr = " ";
258

  
259
        for (StringField c : metadata.getAccessinfopackageList()) {
260
            dataStr += clean(c.getValue()) + SEPERATOR;
261
        }
262

  
263
        buff.append(dataStr).append(DELIM);
264

  
265
        //Release start date
266
        buff.append(clean(metadata.getReleasestartdate().getValue())).append(DELIM);
267

  
268
        //Release end date
269
        buff.append(clean(metadata.getReleaseenddate().getValue())).append(DELIM);
270

  
271
        //Mission statement url
272
        buff.append(clean(metadata.getMissionstatementurl().getValue())).append(DELIM);
273

  
274
        //Data provider
275
        buff.append(clean(String.valueOf(metadata.getDataprovider().getValue()))).append(DELIM);
276

  
277
        //Service provider
278
        buff.append(clean(String.valueOf(metadata.getServiceprovider().getValue()))).append(DELIM);
279

  
280
        //Database access type
281
        buff.append(clean(metadata.getDatabaseaccessrestriction().getValue())).append(DELIM);
282

  
283
        //Data upload type
284
        buff.append(clean(metadata.getDatauploadtype().getValue())).append(DELIM);
285

  
286
        //Data upload restrictions
287
        buff.append(clean(metadata.getDatauploadrestriction().getValue())).append(DELIM);
288

  
289
        //Versioning
290
        buff.append(clean(String.valueOf(metadata.getVersioning().getValue()))).append(DELIM);
291

  
292
        //Citation guideline url
293
        buff.append(clean(metadata.getCitationguidelineurl().getValue())).append(DELIM);
294

  
295
        //Quality management kind
296
        buff.append(clean(metadata.getQualitymanagementkind().getValue())).append(DELIM);
297

  
298
        //PID systems
299
        buff.append(clean(metadata.getPidsystems().getValue())).append(DELIM);
300

  
301
        //Certificates
302
        buff.append(clean(metadata.getCertificates().getValue())).append(DELIM);
303

  
304
        //Policies
305
        dataStr = " ";
306
        for (FieldTypeProtos.KeyValue property : metadata.getPoliciesList()) {
307
            dataStr += clean(property.getValue()) + SEPERATOR;
308
        }
309

  
310
        buff.append(dataStr).append(DELIM);
311

  
312
        buff.append(getTrust(data)).append(DELIM);
313
        return buff.toString();
314
    }
315

  
316

  
317
    private static String buildOrganization(OafEntity data, String DELIM) {
318
        String SEPERATOR = ";";
319

  
320
        StringBuilder buff = new StringBuilder();
321
        buff.append(getHeader(data, DELIM));
322

  
323
        Organization organization = data.getOrganization();
324
        Organization.Metadata metadata = organization.getMetadata();
325

  
326
        //getLegalshortname
327
        buff.append(clean(metadata.getLegalshortname().getValue())).append(DELIM);
328
        // `name`,
329
        buff.append(clean(metadata.getLegalname().getValue())).append(DELIM);
330
        //website URL
331
        String[] split = metadata.getWebsiteurl().getValue().split(",");
332
        String dataStr = new String();
333

  
334
        for (String s : split) {
335
            dataStr += s.replace(DELIM, " ") + SEPERATOR;
336
        }
337

  
338
        buff.append(dataStr).append(DELIM);
339
        //logourl
340
        buff.append(clean(metadata.getLogourl().getValue())).append(DELIM);
341
        // `country`,
342
        buff.append(clean(metadata.getCountry().getClassid())).append(DELIM);
343
        buff.append(getTrust(data)).append(DELIM);
344

  
345
        return buff.toString();
346

  
347
    }
348

  
349
    static String getTrust(OafEntity data) {
350
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
351
            return (info.getTrust());
352

  
353
        }
354
        return " ";
355
    }
356

  
357

  
358
    private static String buildResult(OafEntity data, String DELIM) {
359
        String SEPERATOR = ";";
360

  
361
        Result.Metadata metadata = data.getResult().getMetadata();
362

  
363

  
364
        StringBuilder buff = new StringBuilder();
365

  
366
        buff.append(getHeader(data, DELIM));
367

  
368
        //   titleString
369
        String dataStr = new String();
370

  
371
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
372
            StructuredProperty title = metadata.getTitleList().get(i);
373
            dataStr = clean(title.getValue());
374
            break;
375
        }
376

  
377
        //  pubtitle
378
        buff.append(clean(dataStr)).append(DELIM);
379

  
380
        // date of acceptance CHANGED THIS TO DATE FORMAT
381
        buff.append(clean(metadata.getDateofacceptance().getValue())).append(DELIM);
382

  
383
        // publisher
384
        buff.append(clean(metadata.getPublisher().getValue())).append(DELIM);
385

  
386

  
387
        //PID
388
        dataStr = " ";
389
        for (StructuredProperty p : data.getPidList()) {
390
            dataStr += clean(p.getValue()) + SEPERATOR;
391
        }
392

  
393
        buff.append(dataStr).append(DELIM);
394

  
395
        //language
396
        buff.append(clean(metadata.getLanguage().getClassid())).append(DELIM);
397

  
398
        // RelevantDate
399
        dataStr = " ";
400

  
401
        for (StructuredProperty p : metadata.getRelevantdateList()) {
402
            dataStr += clean(p.getValue());
403
            break;
404
        }
405

  
406
        buff.append(dataStr).append(DELIM);
407

  
408
        //Subject
409
        dataStr = " ";
410
        for (StructuredProperty subj : metadata.getSubjectList()) {
411

  
412
            if (subj.getValue() != null && !subj.getValue().isEmpty()) {
413
                dataStr += clean(subj.getValue()) + SEPERATOR;
414
            }
415
        }
416

  
417
        buff.append(dataStr).append(DELIM);
418

  
419
        //TODO ExternalReference
420

  
421
        buff.append(" ").append(DELIM);
422

  
423
        //Source
424
        dataStr = " ";
425
        for (StringField s : metadata.getSourceList()) {
426
            dataStr += clean(s.getValue()) + SEPERATOR;
427
        }
428

  
429
        buff.append(dataStr).append(DELIM);
430

  
431
        //TODO Format     
432
        buff.append(" ").append(DELIM);
433

  
434
        //Context
435
        dataStr = " ";
436
        for (Result.Context s : metadata.getContextList()) {
437
            dataStr += clean(s.getId()) + SEPERATOR;
438
        }
439
        buff.append(dataStr).append(DELIM);
440

  
441
        //country
442

  
443
        String country = " ";
444

  
445
        for (FieldTypeProtos.Qualifier c : metadata.getCountryList()) {
446
            country += clean(c.getClassid()) + SEPERATOR;
447
        }
448

  
449
        buff.append(country).append(DELIM);
450

  
451
        //Best License
452
        buff.append(getBestLicense(data.getResult())).append(DELIM);
453

  
454
        //Description
455
        dataStr = " ";
456

  
457
        for (StringField desc : metadata.getDescriptionList()) {
458
            dataStr += clean(desc.getValue());
459
            break;
460
        }
461

  
462
        buff.append(dataStr).append(DELIM);
463

  
464
        //Journal  
465
        buff.append(clean(metadata.getJournal().getName())).append(DELIM);  //#null#!
466

  
467

  
468
        // TODO ERI SOS : HERE IN GET JOUTNAL. GET DATA INFO I CAN FIND PROVENANCE AND SIMILARITY
469

  
470
        // TODO isRelatedTo
471

  
472
        //   resource type
473
        buff.append(clean(metadata.getResourcetype().getClassname())).append(DELIM);
474
        //   device
475
        buff.append(clean(metadata.getDevice().getValue())).append(DELIM);
476
        //   size
477
        buff.append(clean(metadata.getSize().getValue())).append(DELIM);
478
        //     version
479
        buff.append(clean(metadata.getVersion().getValue())).append(DELIM);
480

  
481
        //   metadata update
482
        buff.append(clean(metadata.getLastmetadataupdate().getValue())).append(DELIM);
483
        //   metadata version
484
        buff.append(clean(metadata.getMetadataversionnumber().getValue())).append(DELIM);
485

  
486
        // year
487
        buff.append(clean(getYearInt(metadata.getDateofacceptance().getValue()))).append(DELIM);
488

  
489
        // type
490
        buff.append(clean(metadata.getResulttype().getClassname())).append(DELIM);
491

  
492
        buff.append(getTrust(data)).append(DELIM);
493

  
494
        //Authors
495
        /*
496
        dataStr = " ";
497

  
498
        for (StringField author : metadata.getContributorList()) {
499
            dataStr += clean(author.getValue()) + SEPERATOR;
500
        }
501
        buff.append(dataStr).append(DELIM);
502
        */
503

  
504
        return buff.toString();
505
    }
506

  
507

  
508
    private static String buildProject(OafEntity data, String DELIM) {
509
        String SEPERATOR = ";";
510

  
511
        StringBuilder buff = new StringBuilder();
512

  
513
        buff.append(getHeader(data, DELIM));
514
        Project.Metadata metadata = data.getProject().getMetadata();
515

  
516

  
517
        //Code
518
        buff.append(metadata.getCode().getValue()).append(DELIM);
519
        // `Websiteurl`,
520
        buff.append(clean(metadata.getWebsiteurl().getValue())).append(DELIM);
521
        //TODO here
522

  
523
        // `Acronym`,
524
        buff.append(clean(metadata.getAcronym().getValue())).append(DELIM);
525

  
526
        //Title
527
        buff.append(clean(metadata.getTitle().getValue())).append(DELIM);
528

  
529
        // Startdate
530
        buff.append(clean(metadata.getStartdate().getValue())).append(DELIM);
531

  
532
        // Enddate
533
        buff.append(clean(metadata.getEnddate().getValue())).append(DELIM);
534

  
535
        //`Call identifer`
536
        buff.append(clean(metadata.getCallidentifier().getValue())).append(DELIM);
537

  
538
        //`KeyWords`
539
        buff.append(clean(metadata.getKeywords().getValue())).append(DELIM);
540

  
541
        //`Duration`
542
        buff.append(clean(metadata.getDuration().getValue())).append(DELIM);
543

  
544
        //esc39
545
        buff.append(clean(metadata.getEcsc39().getValue())).append(DELIM);
546

  
547
        //`Contracttype`
548
        buff.append(clean(metadata.getContracttype().getClassname())).append(DELIM);
549

  
550
        //`OA mandate pubs`  TODO DOES NOT EXIST
551
        buff.append(clean(metadata.getOamandatepublications().getValue())).append(DELIM);
552
        //`Subjects`
553
        String dataStr = new String();
554
        for (StructuredProperty s : metadata.getSubjectsList()) {
555

  
556
            dataStr += clean(s.getValue()) + SEPERATOR;
557
        }
558
        buff.append(dataStr).append(DELIM);
559

  
560
        //`EC293`
561
        buff.append(clean(metadata.getEcarticle293().getValue())).append(DELIM);
562

  
563
        List<StringField> fundList = metadata.getFundingtreeList();
564

  
565
        if (!fundList.isEmpty()) // `funding_lvl0`,
566
        {//TODO funder + 3 funding levels
567
           /* funder text,
568
            funding_lvl0 text,
569
	        funding_lvl1 text,
570
	        funding_lvl2 text,
571
	        funding_lvl3 text,*/
572
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM));
573
        } else {
574

  
575
            buff.append(FundingParser.getFundingInfo("", DELIM));
576
        }
577

  
578
        buff.append(getTrust(data)).append(DELIM);
579

  
580
        return buff.toString();
581

  
582
    }
583

  
584

  
585
    /*
586
    private static String buildPerson(OafEntity data, String DELIM) {
587
        String SEPERATOR = ";";
588

  
589
        PersonProtos.Person person = data.getPerson();
590
        PersonProtos.Person.Metadata metadata = person.getMetadata();
591

  
592
        StringBuilder buff = new StringBuilder();
593

  
594
        buff.append(getHeader(data, DELIM));
595

  
596
        // `firstname`,
597
        buff.append(clean(metadata.getFirstname().getValue())).append(DELIM);
598

  
599
        // `secondNames`,
600
        String dataStr = new String();
601

  
602
        for (StringField s : metadata.getSecondnamesList()) {
603
            dataStr += clean(s.getValue()) + ' ';
604
        }
605

  
606
        buff.append(dataStr).append(DELIM);
607

  
608
        // `fullname`,
609
        buff.append(clean(metadata.getFullname().getValue())).append(DELIM);
610

  
611
        // `Fax`,
612
        buff.append(clean(metadata.getFax().getValue())).append(DELIM);
613

  
614
        // `Email`,
615
        buff.append(clean(metadata.getEmail().getValue())).append(DELIM);
616

  
617
        // `Phone`,
618
        buff.append(clean(metadata.getPhone().getValue())).append(DELIM);
619

  
620
        // `Nationality`,
621
        buff.append(clean(metadata.getNationality().getClassid())).append(DELIM);
622

  
623
        // `PIDS`,
624
        dataStr = " ";
625
        for (StructuredProperty s : data.getPidList()) {
626

  
627
            dataStr += clean(s.getValue()) + ";";
628
        }
629
        buff.append(dataStr).append(DELIM);
630

  
631
        buff.append(getTrust(data)).append(DELIM);
632

  
633
        return buff.toString();
634

  
635
    }
636
    */
637

  
638

  
639
    private static void getResultDatasources(OafEntity valueEntity, String DELIM, Set<String> returnList) {
640
        String SEPERATOR = ";";
641

  
642
        Result result = valueEntity.getResult();
643

  
644
        if (valueEntity.getId().contains("dedup")) return;
645

  
646
        //TODO hosted by
647
        for (Instance instance : (result.getInstanceList())) {
648
            String hostedBy = instance.getHostedby().getKey();
649

  
650
            if (hostedBy != null && !hostedBy.isEmpty()) {
651
                returnList.add("resultDatasource" + DELIM + "result" + DELIM +
652
                        cleanId(valueEntity.getId()) + DELIM + "datasource" + DELIM + cleanId(hostedBy) + DELIM);
653
            }
654
        }
655

  
656
        //TODO  collected froms
657
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
658
            String collectedFrom = collectedFromValue.getKey();
659
            if (collectedFrom != null && !collectedFrom.isEmpty())
660
                returnList.add(("resultDatasource" + DELIM
661
                        + "result" + DELIM + cleanId(valueEntity.getId()) + DELIM
662
                        + "datasource" + DELIM + cleanId(collectedFrom) + DELIM));
663

  
664
        }
665

  
666

  
667
    }
668

  
669

  
670
    public static String cleanId(String value) {
671
        if (value == null) {
672
            return " ";
673
        }
674

  
675
        //   DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
676
        // to datacite____:: )
677
        // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
678
        value = value.replaceFirst(".*\\|", "");
679
        value = value.replaceAll(ID_PATTERN, "");
680
        value = value.replace("#", " ");
681
        value = value.replace("\n", " ");
682
        return value;
683
    }
684

  
685
    private static String clean(String value) {
686
        if (value == null) {
687
            return " ";
688
        }
689

  
690

  
691
        // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
692
        value = value.replaceAll("[\"\\r\\\\;]", "");
693
        value = value.replace("\\", "");
694
        value = value.replaceAll(TEXT_PATTERN, " ");
695

  
696
        value = value.replace(">", " ");
697
        value = value.replace("<", " ");
698
        value = value.replace("\"", " ");
699
        value = value.replace("\\", " ");
700
        value = value.replace("'", " ");
701
        value = value.replace("«", " ");
702
        value = value.replace("»", " ");
703
        value = value.replace("#", " ");
704
        value = value.replace("\\", " ");
705
        value = value.replace("\n", " ");
706
        value = value.replace(",", "");
707
        value = value.replace("#", "");
708
        value = value.replace(";", "");
709

  
710

  
711
        return value;
712
    }
713

  
714
    private static String cleanDoi(String value) {
715
        value = value.replaceAll(DOI_PATTERN, "");
716
        value = value.replace("#", " ");
717
        value = value.replace("\n", " ");
718
        return value;
719
    }
720

  
721

  
722
    //TODO make them in pairs
723
    private static void getDedups(OafEntity valueEntity, String DELIM, Set<String> returnList) {
724
        if (!valueEntity.getChildrenList().isEmpty() && valueEntity.getId().contains("dedup")) {
725
            ArrayList<String> entries = new ArrayList<String>();
726

  
727
            for (OafEntity child : valueEntity.getChildrenList()) {
728
                if (child.getType() == valueEntity.getType() && !child.getId().contains("dedup")) {
729
                    // if it is result, then its the deduplication
730
                    entries.add(cleanId(child.getId()));
731
                }
732
            }
733

  
734
            for (int i = 0; i < entries.size() - 1; i++) {
735
                for (int j = i + 1; j < entries.size(); j++) {
736
                    returnList.add("dedup" + DELIM +
737
                            valueEntity.getType().name() + DELIM +
738
                            entries.get(i) + DELIM +
739
                            valueEntity.getType().name() + DELIM +
740
                            entries.get(j) + DELIM);
741
                }
742
            }
743

  
744
        }
745

  
746
    }
747

  
748

  
749
    private static String getYearInt(String data) {
750
        if (data == null || data.isEmpty() || data.equals("-1")) {
751
            return " ";
752
        }
753
        String[] split = data.split("-");
754

  
755
        if (split == null || split.length == 0) {
756
            return " ";
757
        }
758

  
759

  
760
        return split[0];
761

  
762

  
763
    }
764

  
765

  
766
    private static String getBestLicense(Result result) {
767
        FieldTypeProtos.Qualifier bestLicense = null;
768
        LicenseComparator lc = new LicenseComparator();
769
        for (Instance instance : (result.getInstanceList())) {
770
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
771
                bestLicense = instance.getLicence();
772
            }
773
        }
774
        if (bestLicense != null) {
775
            return bestLicense.getClassname();
776
        } else {
777
            return null;
778
        }
779
    }
780

  
781

  
782
}
0 783

  
modules/dnet-openaire-lodexport/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/ContextExporter.java
1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

  
3
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
4
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
5
import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
6
import org.apache.hadoop.conf.Configuration;
7
import org.apache.hadoop.fs.FSDataOutputStream;
8
import org.apache.hadoop.fs.FileSystem;
9
import org.apache.hadoop.fs.Path;
10
import org.apache.log4j.Logger;
11

  
12
import java.util.ArrayList;
13
import java.util.List;
14

  
15
public class ContextExporter {
16
    private ContextTransformer contextTransformer = new ContextTransformer();
17
    private String outputPath;
18
    private Logger log = Logger.getLogger(this.getClass());
19

  
20

  
21
    private ArrayList<String> context = new ArrayList<String>();
22
    private ArrayList<String> category = new ArrayList<String>();
23
    private ArrayList<String> concept = new ArrayList<String>();
24

  
25
    public ContextExporter(String outputPath, String contextMap, boolean readFromUrl) throws Exception {
26
        if (!outputPath.endsWith("/")) {
27
            outputPath += "/";
28
        }
29
        this.outputPath = outputPath;
30
        if (readFromUrl) {
31
            readFromUrl(contextMap);
32
        } else {
33
            readFromBuffer(contextMap);
34
        }
35

  
36
    }
37

  
38
    public void readFromUrl(String url) throws Exception {
39

  
40
        List<String> concepts = getContextResouces(url);
41
        log.info("Returned concept  " + concepts.size()
42
        );
43

  
44
        for (String data : concepts) {
45
            log.info("++++++++++++++ Transforming concept data ");
46
            String res = contextTransformer.transformXSL(data);
47

  
48
            processData(res);
49
        }
50

  
51
        writeData(this.context, "context");
52
        writeData(this.category, "category");
53
        writeData(this.concept, "concept");
54

  
55

  
56
    }
57

  
58
    private void readFromBuffer(String contextMap) throws Exception {
59

  
60
        if (contextMap == null || contextMap.isEmpty()) {
61
            log.error("Context Resources file is empty.");
62
            throw new Exception("Context Resources file is empty.");
63
        }
64

  
65
        String data = contextTransformer.transformXSL(contextMap);
66

  
67
        log.info(data);
68
        processData(data);
69
    }
70

  
71
    private List<String> getContextResouces(String url) throws ISLookUpException {
72
        ISLookUpService lookUpService;
73

  
74
        JaxWsProxyFactoryBean factory = new JaxWsProxyFactoryBean();
75
        factory.setServiceClass(ISLookUpService.class);
76
        factory.setAddress(url);
77

  
78
        lookUpService = (ISLookUpService) factory.create();
79
//		for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']
80
//		[.//RESOURCE_KIND/@value='ContextDSResources'] return  $x
81
        return lookUpService.quickSearchProfile("//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType'][.//RESOURCE_KIND/@value='ContextDSResources']");
82
    }
83

  
84

  
85
    private void writeData(ArrayList<String> dataList, String listName) throws Exception {
86
        if (dataList.isEmpty()) return;
87
        log.info(listName + "  size " + dataList.size());
88

  
89
        String data = new String();
90
        for (int i = 0; i < dataList.size(); i++) {
91

  
92
            data += dataList.get(i);
93

  
94

  
95
        }
96

  
97

  
98
        data = data.substring(0, data.lastIndexOf("\n"));
99

  
100

  
101
        flushString(data, outputPath + listName);
102

  
103

  
104
    }
105

  
106
    private void processData(String data) throws Exception {
107
        try {
108

  
109
            String[] split = data.split("COPY\n");
110

  
111
            if (split.length > 0) {
112
                context.add(split[0]);
113
            }
114

  
115

  
116
            if (split.length > 1) {
117
                category.add(split[1]);
118
            }
119

  
120
            if (split.length > 2) {
121

  
122
                concept.add(split[2]);
123
            }
124

  
125
        } catch (Exception e) {
126
            String msg = " Unable to create file with context, " + "concept and category values in output path " + outputPath + ". Reason: ";
127
            log.error(msg);
128
            throw new Exception(msg, e);
129
        }
130

  
131
    }
132

  
133
    private void flushString(String data, String destination) throws Exception {
134

  
135
        FSDataOutputStream fin = null;
136
        try {
137

  
138

  
139
            log.info("***********************Writing data:***********************\n" + data);
140
            log.info("***********************  data:***********************\n");
141
            FileSystem fs = FileSystem.get(new Configuration());
142
            fin = fs.create(new Path(destination), true);
143

  
144
            fin.write(data.getBytes());
145

  
146
        } catch (Exception e) {
147
            log.error("Failed  to write exported data to a file : ", e);
148
            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
149

  
150
        } finally {
151

  
152
            fin.close();
153

  
154
        }
155
    }
156

  
157
    public String getOutputPath() {
158
        return outputPath;
159
    }
160

  
161
    public void setOutputPath(String outputPath) {
162
        this.outputPath = outputPath;
163
    }
164

  
165
}
modules/dnet-openaire-lodexport/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/LodMapper.java
1
package eu.dnetlib.data.mapreduce.hbase.lodExport;
2

  
3
import com.google.protobuf.InvalidProtocolBufferException;
4
import eu.dnetlib.data.mapreduce.hbase.index.config.EntityConfigTable;
5
import eu.dnetlib.data.mapreduce.hbase.index.config.IndexConfig;
6
import eu.dnetlib.data.mapreduce.hbase.index.config.LinkDescriptor;
7
import eu.dnetlib.data.mapreduce.hbase.lodExport.utils.Serializer;
8
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
9
import eu.dnetlib.data.mapreduce.util.UpdateMerger;
10
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafRel;
12
import eu.dnetlib.data.proto.OafProtos.OafRelOrBuilder;
13
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
14
import eu.dnetlib.data.proto.TypeProtos.Type;
15
import org.apache.hadoop.hbase.client.Result;
16
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
17
import org.apache.hadoop.hbase.mapreduce.TableMapper;
18
import org.apache.hadoop.hbase.util.Bytes;
19
import org.apache.hadoop.io.Text;
20
import org.apache.log4j.Logger;
21

  
22
import java.io.IOException;
23
import java.text.SimpleDateFormat;
24
import java.util.*;
25
import java.util.Map.Entry;
26

  
27
/**
28
 * Mapper Class that reads HBASE contents and prepares them for the StatsDB
29
 * export
30
 */
31
public class LodMapper extends TableMapper<Text, Text> {
32
    private static final String DATE_OF_TRANSFORMATION_PATTERN = "yyyy-MM-dd'T'HH:mm:ss";
33
    private static final String LAST_EXECUTION_DATE_PATTERN = "yyyy-MM-dd";
34
    private Logger log = Logger.getLogger(this.getClass());
35
    private EntityConfigTable entityConfigTable;
36
    private String lastExecutionDate = "";
37

  
38

  
39
    public static enum ENTITIES_COUNTER {
40
        RESULT,
41
        PROJECT,
42
        DATASOURCE,
43
        PERSON,
44
        ORGANIZATION,
45
        DELETED_BY_INFERENCE,
46
        NOT_DELETED_BY_INFERENCE,
47
        TOTAL_ENTITIES,
48
        TOTAL_RELATIONS,
49
        UPDATED,
50
        NOT_UPDATED
51

  
52
    }
53

  
54
    ;
55

  
56
    private String DELIM;
57

  
58
    @Override
59
    protected void setup(Context context) throws IOException, InterruptedException {
60
        loadEntityConfig(context);
61
        DELIM = context.getConfiguration().get("lod.delim");
62
        lastExecutionDate = context.getConfiguration().get("lod.lastExecutionDate");
63

  
64
    }
65

  
66

  
67
    @Override
68
    protected void map(final ImmutableBytesWritable keyIn, final Result result, final Context context) throws IOException {
69

  
70
        final OafRowKeyDecoder decoder = OafRowKeyDecoder.decode(keyIn.copyBytes());
71
        final Type type = decoder.getType();
72
        final Oaf oaf = UpdateMerger.mergeBodyUpdates(context, result.getFamilyMap(Bytes.toBytes(type.toString())));
73

  
74
        if (isValid(oaf)) {
75

  
76
            if (deletedByInference(oaf)) {
77
                context.getCounter(ENTITIES_COUNTER.DELETED_BY_INFERENCE).increment(1);
78
            } else {
79
                context.getCounter(ENTITIES_COUNTER.NOT_DELETED_BY_INFERENCE).increment(1);
80
            }
81

  
82
            context.getCounter(ENTITIES_COUNTER.TOTAL_ENTITIES).increment(1);
83
            emitProtos(context, result, oaf);
84
        }
85

  
86
    }
87

  
88
    private boolean isValid(Oaf oaf) {
89
        try {
90
            if (oaf != null && oaf.isInitialized()) {
91
                return true;
92
            }
93

  
94
        } catch (Exception e) {
95
            log.error("invalid proto", e);
96
        }
97

  
98
        return false;
99
    }
100

  
101
    private boolean isUpdated(Oaf oaf) throws IOException {
102
        String dateOfTransformationString = "";
103
        try {
104
            SimpleDateFormat simpleDateFormat = new SimpleDateFormat(DATE_OF_TRANSFORMATION_PATTERN, Locale.getDefault());
105
            simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
106
            dateOfTransformationString = oaf.getEntity().getDateoftransformation();
107
            if (dateOfTransformationString == null || dateOfTransformationString.isEmpty() || dateOfTransformationString.equals(" ")) {
108
                return true;
109
            }
110

  
111
            Date dateOfTransformation = simpleDateFormat.parse(dateOfTransformationString);
112

  
113
            SimpleDateFormat lastExecDateFormatter = new SimpleDateFormat(LAST_EXECUTION_DATE_PATTERN);
114
            simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
115
            Date lastExecDate = lastExecDateFormatter.parse(lastExecutionDate);
116

  
117
            if (lastExecDate.before(dateOfTransformation)) {
118
                return true;
119
            }
120
        } catch (Exception e) {
121
            log.error("invalid date " + dateOfTransformationString, e);
122
            throw new IOException(e);
123
        }
124

  
125
        return false;
126
    }
127

  
128
    private void emitProtos(Context context, Result result, Oaf oaf) throws IOException {
129
        Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(oaf.getKind()).setDataInfo(oaf.getDataInfo());
130
        Type type = oaf.getEntity().getType();
131
        oafBuilder.setEntity(oaf.getEntity());
132
        // emit relation first so we can cache them to entity protos
133
        emitRelation(context, result, oaf, type, oafBuilder);
134

  
135
        if (isUpdated(oaf)) {
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff