Project

General

Profile

« Previous | Next » 

Revision 56504

- Made Serializers methods non static to improve readability and maintenability.
- Code cleanup

View differences:

modules/dnet-openaire-stats-export-wf/trunk/dnet-openaire-stats/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/ContextTransformer.java
1 1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2 2

  
3
import org.apache.log4j.Logger;
4

  
3
import org.apache.commons.io.IOUtils;
5 4
import javax.xml.transform.Transformer;
6 5
import javax.xml.transform.TransformerFactory;
7 6
import javax.xml.transform.stream.StreamResult;
......
9 8
import java.io.ByteArrayInputStream;
10 9
import java.io.ByteArrayOutputStream;
11 10
import java.io.InputStream;
11
import java.nio.charset.StandardCharsets;
12 12

  
13 13
public class ContextTransformer {
14 14

  
15
    private Logger log = Logger.getLogger(this.getClass());
16

  
17 15
    public String transformXSL(String xml) throws Exception {
18 16
        Transformer transformer;
19 17
        TransformerFactory tFactory = TransformerFactory.newInstance();
20 18

  
21 19
        if (xml == null) {
22

  
23
            throw new Exception("NULL XML ENTITY CONFIGURATION   ");
20
            throw new IllegalArgumentException("Input xml should not be null!");
24 21
        }
25 22

  
26 23
        InputStream inputStream = null;
27 24
        ByteArrayInputStream readerStream = null;
28 25
        ByteArrayOutputStream writerStream = null;
26

  
29 27
        try {
30 28
            inputStream = ClassLoader.getSystemResourceAsStream("eu/dnetlib/data/mapreduce/hbase/statsExport/" + "context.xsl");
31 29
            transformer = tFactory.newTransformer(new StreamSource(inputStream));
32 30

  
33
            readerStream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
31
            readerStream = new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8));
34 32

  
35 33
            writerStream = new ByteArrayOutputStream();
36 34
            transformer.transform(new StreamSource(readerStream), new StreamResult(writerStream));
37 35

  
38 36
            return writerStream.toString("UTF8");
39

  
40
        } catch (Exception e) {
41

  
42
            log.error(e.getMessage());
43
            throw new Exception(e.getMessage(), e);
44 37
        } finally {
45
            inputStream.close();
46
            readerStream.close();
47
            writerStream.close();
48

  
38
            IOUtils.closeQuietly(inputStream);
39
            IOUtils.closeQuietly(readerStream);
40
            IOUtils.closeQuietly(writerStream);
49 41
        }
50

  
51 42
    }
52

  
53

  
54 43
}
modules/dnet-openaire-stats-export-wf/trunk/dnet-openaire-stats/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/FundingParser.java
10 10

  
11 11
public class FundingParser {
12 12

  
13
	private String DELIM;
14
	private String ENCLOSING;
13 15

  
14
	public static String getFundingLevel(String funding_level, int level, String DELIM, String ENCLOSING) {
16
	public FundingParser(String DELIM, String ENCLOSING) {
17
		this.DELIM = DELIM;
18
		this.ENCLOSING = ENCLOSING;
19
	}
15 20

  
21
	public String getFundingLevel(String funding_level, int level) {
22

  
16 23
		if (funding_level.isEmpty()) {
17 24
			return ENCLOSING + " " + ENCLOSING + DELIM;
18 25
		}
......
40 47
			} else if (funding_level.equalsIgnoreCase("SP2")) {
41 48
				funding_level = "SP2-Ideas";
42 49
			}
50

  
43 51
			if (funding_level.equalsIgnoreCase("SP3")) {
44 52
				funding_level = "SP3-People";
45 53
			} else if (funding_level.equalsIgnoreCase("SP4")) {
46 54
				funding_level = "SP4-Capacities";
47

  
48 55
			} else if (funding_level.equalsIgnoreCase("SP5")) {
49 56
				funding_level = "SP5-Euratom";
50 57
			}
51 58
		}
52 59

  
53

  
54 60
		funding_level = funding_level.replaceAll(">", "");
55

  
56 61
		funding_level = funding_level.replaceAll("</", "");
57 62
		funding_level = funding_level.replace(DELIM, " ");
58 63
		funding_level = funding_level.replace(ENCLOSING, " ");
......
60 65
		return ENCLOSING + funding_level + ENCLOSING + DELIM;
61 66
	}
62 67

  
63
	public static String getFundingInfo(String buff, String DELIM, String ENCLOSING) {
64
		return getFunder(buff, DELIM, ENCLOSING) +
65
				getFundingLevel(buff, 0, DELIM, ENCLOSING) + (getFundingLevel(buff, 1, DELIM, ENCLOSING) + getFundingLevel(buff, 2, DELIM, ENCLOSING)
66
				+ getFundingLevel(buff, 3, DELIM, ENCLOSING));
68
	public String getFundingInfo(String buff) {
69
		return getFunder(buff) + getFundingLevel(buff, 0) + (getFundingLevel(buff, 1) + getFundingLevel(buff, 2)
70
				+ getFundingLevel(buff, 3));
67 71
	}
68 72

  
69
	public static String getFunder(String buff, String DELIM, String ENCLOSING) {
73
	public String getFunder(String buff) {
70 74

  
71 75
		if (buff.isEmpty()) {
72 76
			return ENCLOSING + " " + ENCLOSING + DELIM;
......
81 85
		split = funder.split("<name>");
82 86

  
83 87
		funder = split[1];
84

  
85 88
		funder = funder.substring(0, funder.indexOf("</name>"));
86

  
87 89
		funder = funder.replaceAll(">", "");
88

  
89 90
		funder = funder.replaceAll("</", "");
90

  
91 91
		funder = funder.replaceAll("\"", "");
92 92
		funder = funder.replaceAll("&amp;", "");
93 93
		funder = funder.replace(ENCLOSING, " ");
94 94

  
95 95
		return ENCLOSING + funder + ENCLOSING + DELIM;
96 96
	}
97

  
98 97
}
99 98

  
modules/dnet-openaire-stats-export-wf/trunk/dnet-openaire-stats/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/Serializer.java
2 2

  
3 3
import com.google.common.collect.Multimap;
4 4

  
5

  
6 5
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
7 6
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
8 7
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
......
14 13
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15 14
import eu.dnetlib.data.proto.OafProtos.OafRel;
16 15
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
17
//import eu.dnetlib.data.proto.PersonProtos;
18 16
import eu.dnetlib.data.proto.ProjectProtos.Project;
19 17
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
20 18
import eu.dnetlib.data.proto.ResultProtos.Result;
......
25 23
import java.text.DateFormat;
26 24
import java.text.ParseException;
27 25
import java.text.SimpleDateFormat;
28
import java.util.ArrayList;
29 26
import java.util.Date;
30
import java.util.HashMap;
31 27
import java.util.List;
32 28

  
33 29
import org.w3c.dom.Element;
......
43 39
public class Serializer {
44 40
    private static Logger logger = Logger.getLogger(Serializer.class);
45 41

  
46
    public static String serialize(Oaf oaf, String DELIM, String ENCLOSING) {
42
    private String DELIM;
43
    private String ENCLOSING;
47 44

  
45
    public Serializer(String DELIM, String ENCLOSING) {
46
        this.DELIM = DELIM;
47
        this.ENCLOSING = ENCLOSING;
48
    }
49

  
50
    public String serialize(Oaf oaf) {
51

  
48 52
        switch (oaf.getKind()) {
49 53
            case entity:
50 54
                OafEntity valueEntity = oaf.getEntity();
......
52 56
                switch (valueEntity.getType()) {
53 57
                    case datasource:
54 58

  
55
                        return buildDatasource(oaf, DELIM, ENCLOSING);
59
                        return buildDatasource(oaf);
56 60

  
57 61
                    case organization:
58 62

  
59
                        return buildOrganization(oaf, DELIM, ENCLOSING);
63
                        return buildOrganization(oaf);
60 64

  
61 65
                    case project:
62 66

  
63
                        return buildProject(oaf, DELIM, ENCLOSING);
67
                        return buildProject(oaf);
64 68
                    case result:
65 69

  
66
                        return buildResult(oaf, DELIM, ENCLOSING);
70
                        return buildResult(oaf);
67 71
                    /*
68 72
                    case person:
69 73
                        return buildPerson(oaf, DELIM, ENCLOSING);
......
74 78
                break;
75 79
            case relation:
76 80

  
77
                return buildRel(oaf.getRel(), DELIM, ENCLOSING);
81
                return buildRel(oaf.getRel());
78 82

  
79 83
        }
80 84
        return null;
81 85
    }
82 86

  
83
    public static String serialize(OafRel oaf, String DELIM, String ENCLOSING) {
87
    public String serialize(OafRel oaf) {
84 88

  
85 89
        switch (oaf.getRelType()) {
86 90
            case resultProject:
87
                return getResultProject(oaf, DELIM, ENCLOSING);
91
                return getResultProject(oaf);
88 92
            default:
89
                return buildRel(oaf, DELIM, ENCLOSING);
93
                return buildRel(oaf);
90 94
        }
91 95
    }
92 96

  
93
    private static String buildRel(OafRel Rel, String DELIM, String ENCLOSING) {
97
    private String buildRel(OafRel Rel) {
94 98

  
95
        return cleanId(Rel.getTarget(), DELIM, ENCLOSING) + DELIM;
99
        return cleanId(Rel.getTarget()) + DELIM;
96 100

  
97 101
    }
98 102

  
99
    public static void extractRelations(Oaf oaf, String DELIM, String ENCLOSING, Multimap<String, String> relations) {
103
    public void extractRelations(Oaf oaf, Multimap<String, String> relations) {
100 104
        OafEntity valueEntity = oaf.getEntity();
101
        getOriginalId(valueEntity, relations, DELIM, ENCLOSING);
105
        getOriginalId(valueEntity, relations);
102 106

  
103 107
        switch (valueEntity.getType()) {
104 108
            case datasource:
105
                getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING);
106
//                getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING);
109
                getDatasourceLanguages(valueEntity, relations);
110
//                getDatasourceWebsite(valueEntity, relations);
107 111
            case result:
108
                getResultTopics(valueEntity, relations, DELIM, ENCLOSING);
109
                getResultLanguages(valueEntity, relations, DELIM, ENCLOSING);
110
                getResultClassifications(valueEntity, relations, DELIM, ENCLOSING);
111
                getResultDatasources(valueEntity, relations, DELIM, ENCLOSING);
112
                getResultConcepts(valueEntity, relations, DELIM, ENCLOSING);
113
                getResultDois(valueEntity, relations, DELIM, ENCLOSING);
114
                getResultCitations(valueEntity, relations, DELIM, ENCLOSING);
112
                getResultTopics(valueEntity, relations);
113
                getResultLanguages(valueEntity, relations);
114
                getResultClassifications(valueEntity, relations);
115
                getResultDatasources(valueEntity, relations);
116
                getResultConcepts(valueEntity, relations);
117
                getResultDois(valueEntity, relations);
118
                getResultCitations(valueEntity, relations);
115 119
//                getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING);
116 120
//                getResultExtra(valueEntity, relations, DELIM, ENCLOSING);
117 121

  
118 122
            case project:
119
                getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING);
120
                getProjectSubjects(valueEntity, relations, DELIM, ENCLOSING);
123
                getProjectKeywords(valueEntity, relations);
124
                getProjectSubjects(valueEntity, relations);
121 125

  
122 126
            default:
123 127
        }
124 128

  
125 129
    }
126 130

  
127
    private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
131
    private void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations) {
128 132
        Datasource d = valueEntity.getDatasource();
129 133
        Metadata metadata = d.getMetadata();
130 134

  
131
        relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
135
        relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue()));
132 136
    }
133 137

  
134
    private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
138
    private void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations) {
135 139
        Result result = valueEntity.getResult();
136 140
        Result.Metadata metadata = result.getMetadata();
137 141

  
......
147 151
        }
148 152

  
149 153
        //  pubtitle
150
        buff.append(getStringField(titleString, DELIM, ENCLOSING));
154
        buff.append(getStringField(titleString));
151 155

  
152 156
        String sources = "";
153 157
        for (Instance instance : (result.getInstanceList())) {
154 158
            List<String> urls = instance.getUrlList();
155 159
            for (String url : urls) {
156
                sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
160
                sources += cleanUrl(url) + " ;";
157 161
            }
158 162
        }
159 163

  
......
164 168
        relations.put("resultExtra", buff.toString());
165 169
    }
166 170

  
167
    private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
171
    private void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations) {
168 172

  
169 173
        String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
170 174
        for (String oid : oafEntity.getOriginalIdList()) {
171
            relations.put(relName, cleanId(oid, DELIM, ENCLOSING));
175
            relations.put(relName, cleanId(oid));
172 176
        }
173 177

  
174 178
    }
175 179

  
176
    private static void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
177
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue(), DELIM, ENCLOSING));
180
    private void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations) {
181
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue()));
178 182

  
179 183
    }
180 184

  
181
    private static void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
185
    private void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations) {
182 186
        for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
183
            relations.put("projectSubject", getStringField(subj.getValue(), DELIM, ENCLOSING));
187
            relations.put("projectSubject", getStringField(subj.getValue()));
184 188
        }
185 189
    }
186 190

  
187
    private static String getResultProject(OafRel oaf, String DELIM, String ENCLOSING) {
191
    private String getResultProject(OafRel oaf) {
188 192
        StringBuilder buff = new StringBuilder();
189
        buff.append(cleanId(oaf.getTarget(), DELIM, ENCLOSING) + DELIM);
193
        buff.append(cleanId(oaf.getTarget()) + DELIM);
190 194
        // TODO is declared as int!!!
191 195
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
192 196
        if (diff < 0) {
193 197
            diff = 0;
194 198
        }
195 199

  
196
        buff.append(getNumericField(String.valueOf(diff), DELIM, ENCLOSING));
200
        buff.append(getNumericField(String.valueOf(diff)));
197 201
        return buff.toString();
198 202
    }
199 203

  
200 204

  
201
    private static void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
205
    private void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
202 206
        Datasource d = valueEntity.getDatasource();
203 207
        Metadata metadata = d.getMetadata();
204 208

  
205 209
        for (StringField lang : metadata.getOdlanguagesList()) {
206
            rels.put("datasourceLanguage", getStringField(lang.getValue(), DELIM, ENCLOSING));
210
            rels.put("datasourceLanguage", getStringField(lang.getValue()));
207 211
        }
208 212
    }
209 213

  
210
    private static void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
214
    private void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
211 215

  
212 216
        Result d = valueEntity.getResult();
213 217
        Result.Metadata metadata = d.getMetadata();
214 218
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
215
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname(), DELIM, ENCLOSING));
219
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname()));
216 220
        }
217 221

  
218 222
    }
219 223

  
220
    private static void getResultDois(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
224
    private void getResultDois(OafEntity valueEntity, Multimap<String, String> rels) {
221 225

  
222 226
        for (StructuredProperty pid : valueEntity.getPidList()) {
223

  
224
            rels.put("resultPid",
225
                    getStringField(pid.getQualifier().getClassname(), DELIM, ENCLOSING) + getStringField(pid.getValue(), DELIM, ENCLOSING));
227
            rels.put("resultPid", getStringField(pid.getQualifier().getClassname()) + getStringField(pid.getValue()));
226 228
        }
227 229
    }
228 230

  
229
    private static void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
231
    private void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels) {
230 232

  
231 233
        Result result = valueEntity.getResult();
232 234

  
233 235
        for (Instance instance : (result.getInstanceList())) {
234 236
            String classification = instance.getInstancetype().getClassname();
237

  
235 238
            if (classification != null && !classification.isEmpty()) {
236
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname(), DELIM, ENCLOSING));
237
                // TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT
238
//                break;
239
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname()));
239 240
            }
240 241
        }
241 242
    }
242 243

  
243
    private static void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
244
    private void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels) {
244 245
        Result result = valueEntity.getResult();
245
        //description
246

  
246 247
        for (StringField s : result.getMetadata().getDescriptionList()) {
247

  
248
            rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text(), DELIM, ENCLOSING));
248
            rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text()));
249 249
        }
250 250
    }
251 251

  
252
    private static void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
253

  
252
    private void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels) {
254 253
        Result result = valueEntity.getResult();
255 254

  
256

  
257 255
        for (Result.Context context : result.getMetadata().getContextList()) {
258

  
259
            rels.put("resultConcept", cleanId(context.getId(), DELIM, ENCLOSING));
256
            rels.put("resultConcept", cleanId(context.getId()));
260 257
        }
261 258
    }
262 259

  
263
    private static void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
260
    private void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels) {
264 261
        Result result = valueEntity.getResult();
265 262

  
266 263
//TODO hosted by
267 264
        for (Instance instance : (result.getInstanceList())) {
268 265
            String hostedBy = instance.getHostedby().getKey();
266

  
269 267
            if (hostedBy != null && !hostedBy.isEmpty()) {
270
                rels.put("resultDatasource", cleanId(hostedBy, DELIM, ENCLOSING) + DELIM);
268
                rels.put("resultDatasource", cleanId(hostedBy) + DELIM);
271 269
            }
272 270
        }
273 271

  
274 272
//TODO  collected froms
275 273
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
274
            String collectedFrom = collectedFromValue.getKey();
276 275

  
277
            String collectedFrom = collectedFromValue.getKey();
278 276
            if (collectedFrom != null && !collectedFrom.isEmpty()) {
279
                rels.put("resultDatasource", cleanId(collectedFrom, DELIM, ENCLOSING) + DELIM);
277
                rels.put("resultDatasource", cleanId(collectedFrom) + DELIM);
280 278
            }
281 279
        }
282 280
    }
283 281

  
284
    private static void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
285

  
282
    private void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels) {
286 283
        Result d = valueEntity.getResult();
287 284
        Result.Metadata metadata = d.getMetadata();
288

  
289 285
        List<StructuredProperty> Topics = metadata.getSubjectList();
290 286

  
291 287
        for (StructuredProperty topic : Topics) {
292
            // TODO result topics
293
            rels.put("resultTopic", getStringField(topic.getValue(), DELIM, ENCLOSING));
288
            rels.put("resultTopic", getStringField(topic.getValue()));
294 289
        }
295 290
    }
296 291

  
297 292

  
298
    private static void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
293
    private void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels) {
299 294
        for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
300 295
            if (extraInfo.getName().equals("result citations")) {
301 296
                DOMParser parser = new DOMParser();
......
329 324
        }
330 325
    }
331 326

  
332
    private static String buildDatasource(Oaf oaf, String DELIM, String ENCLOSING) {
327
    private String buildDatasource(Oaf oaf) {
333 328
        Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
334 329
        StringBuilder buff = new StringBuilder();
335 330

  
336 331
        // name
337 332
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
338
            buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING));
333
            buff.append(getStringField("Unknown Repository"));
339 334
        } else {
340
            buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING));
335
            buff.append(getStringField(metadata.getOfficialname().getValue()));
341 336
        }
342 337

  
343 338
        // type
344 339
        if (metadata.hasDatasourcetype()) {
345
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING));
340
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", "")));
346 341
        }
347 342

  
348 343
        // compatibility,
349
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING));
344
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname()));
350 345

  
351 346
        // latitude
352
        buff.append(getLatLongField(metadata.getLatitude().getValue(), DELIM, ENCLOSING));
347
        buff.append(getLatLongField(metadata.getLatitude().getValue()));
353 348

  
354 349
        // longtitude
355
        buff.append(getLatLongField(metadata.getLongitude().getValue(), DELIM, ENCLOSING));
350
        buff.append(getLatLongField(metadata.getLongitude().getValue()));
356 351

  
357 352
        // dateofvalidation,
358
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
353
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue()));
359 354

  
360 355
        // yearofvalidation,
361
        buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
356
        buff.append(getYearInt(metadata.getDateofvalidation().getValue()));
362 357

  
363 358
        //harvested
364
        buff.append(getStringField("false", DELIM, ENCLOSING));
359
        buff.append(getStringField("false"));
365 360

  
366 361
        //piwik_id
367 362
        String piwik_id = "";
......
371 366
                break;
372 367
            }
373 368
        }
374
        buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING));
369
        buff.append(getStringField(cleanNumber(piwik_id)));
375 370

  
376 371
        return buff.toString();
377 372

  
378 373
    }
379 374

  
380
    private static String buildOrganization(Oaf oaf, String DELIM, String ENCLOSING) {
375
    private String buildOrganization(Oaf oaf) {
381 376

  
382 377
        StringBuilder buff = new StringBuilder();
383 378
        Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
384 379

  
385 380
        // `name`,
386
        buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING));
381
        buff.append(getStringField(metadata.getLegalname().getValue()));
387 382

  
388 383
        // `country`,
389
        buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING));
384
        buff.append(getStringField(metadata.getCountry().getClassid()));
390 385

  
391 386
        return buff.toString();
392 387
    }
393 388

  
394
    private static String buildResult(Oaf oaf, String DELIM, String ENCLOSING) {
389
    private String buildResult(Oaf oaf) {
395 390
        StringBuilder buff = new StringBuilder();
396 391

  
397 392
        Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
......
409 404
        }
410 405

  
411 406
        //  pubtitle
412
        buff.append(getStringField(titleString, DELIM, ENCLOSING));
407
        buff.append(getStringField(titleString));
413 408

  
414 409

  
415 410
        //  publisher
416
        buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING));
411
        buff.append(getStringField(metadata.getPublisher().getValue()));
417 412

  
418 413
        //  journal
419
        buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING));  //#null#!
414
        buff.append(getStringField(metadata.getJournal().getName()));  //#null#!
420 415

  
421 416
        // year
422
        buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
417
        buff.append(getYearInt(metadata.getDateofacceptance().getValue()));
423 418

  
424 419
        // date
425
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
420
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue()));
426 421

  
427 422
        // bestlicense
428
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING));
423
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult())));
429 424

  
430 425
        // type
431
        buff.append(getStringField(metadata.getResulttype().getClassname(), DELIM, ENCLOSING));
426
        buff.append(getStringField(metadata.getResulttype().getClassname()));
432 427

  
433 428
        // embargo_end_date
434
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING));
429
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue()));
435 430

  
436 431
        // `authors`,
437 432
        int authors = metadata.getAuthorCount();
......
452 447
            // result.
453 448
            {
454 449
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
455
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate(), DELIM, ENCLOSING);
450
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
456 451
                if (Integer.parseInt(daysfromend) > 0) {
457 452
                    delayed = "yes";
458 453
                }
......
460 455
        }
461 456

  
462 457
        // `delayed`,
463
        buff.append(getStringField(delayed, DELIM, ENCLOSING));
458
        buff.append(getStringField(delayed));
464 459
        //authors
465
        buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING));
460
        buff.append(getNumericField(String.valueOf(authors)));
466 461

  
467 462
        String sources = new String();
468 463

  
......
470 465
        for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) {
471 466
            List<String> urls = instance.getUrlList();
472 467
            for (String url : urls) {
473
                sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
468
                sources += cleanUrl(url) + " ;";
474 469
            }
475 470
        }
476 471

  
......
515 510
    }
516 511
    */
517 512

  
518
    private static String getBestLicense(Result result) {
513
    private String getBestLicense(Result result) {
519 514
        Qualifier bestLicense = null;
520 515
        LicenseComparator lc = new LicenseComparator();
521 516
        for (Instance instance : (result.getInstanceList())) {
......
531 526
    }
532 527

  
533 528
    // TODO here iterate over all values
534
    private static String getAccessMode(Result result) {
529
    private String getAccessMode(Result result) {
535 530
        String accessMode = " ";
536 531
        for (Instance instance : (result.getInstanceList())) {
537 532
            if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) {
......
544 539
        return accessMode;
545 540
    }
546 541

  
547
    private static String buildProject(Oaf oaf, String DELIM, String ENCLOSING) {
542
    private String buildProject(Oaf oaf) {
548 543

  
544
        FundingParser fundingParser = new FundingParser(DELIM, ENCLOSING);
549 545
        StringBuilder buff = new StringBuilder();
550 546
        Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
551 547
        
......
554 550
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
555 551
            acronym = metadata.getTitle().getValue();
556 552
        }
557
        buff.append(getStringField(acronym, DELIM, ENCLOSING));
553
        buff.append(getStringField(acronym));
558 554

  
559 555
        //title
560
        buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING));
556
        buff.append(getStringField(metadata.getTitle().getValue()));
561 557

  
562 558
        //funding_lvl
563 559
        List<StringField> fundList = metadata.getFundingtreeList();
......
569 565
	        funding_lvl1 text,
570 566
	        funding_lvl2 text,
571 567
	        funding_lvl3 text,*/
572
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING));
568
            buff.append(fundingParser.getFundingInfo(fundList.get(0).getValue()));
573 569
        } else {
574
            buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING));
570
            buff.append(fundingParser.getFundingInfo(""));
575 571
        }
576 572

  
577 573
        //sc39
......
581 577
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
582 578
            sc39 = "no";
583 579
        }
584
        buff.append(getStringField(sc39, DELIM, ENCLOSING));
580
        buff.append(getStringField(sc39));
585 581

  
586 582
        //project_type
587
        buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING));
583
        buff.append(getStringField(metadata.getContracttype().getClassid()));
588 584

  
589 585
        // start_year
590
        buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
586
        buff.append(getYearInt(metadata.getStartdate().getValue()));
591 587

  
592 588
        // end_year
593
        buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
589
        buff.append(getYearInt(metadata.getEnddate().getValue()));
594 590

  
595 591
        // duration enddate-startdate
596
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING));
592
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue()));
597 593

  
598 594
        // haspubs
599
        buff.append(getStringField("no", DELIM, ENCLOSING));
595
        buff.append(getStringField("no"));
600 596

  
601 597
        // numpubs
602
        buff.append(getNumericField("0", DELIM, ENCLOSING));
598
        buff.append(getNumericField("0"));
603 599

  
604 600
        // enddate
605
        buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
601
        buff.append(getStringDateField(metadata.getEnddate().getValue()));
606 602

  
607 603
        // startdate
608
        buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
604
        buff.append(getStringDateField(metadata.getStartdate().getValue()));
609 605

  
610 606
        // `daysforlastpub`,
611
        buff.append(getNumericField("", DELIM, ENCLOSING));
607
        buff.append(getNumericField(""));
612 608

  
613 609
        // `delayedpubs`,
614
        buff.append(getNumericField("", DELIM, ENCLOSING));
610
        buff.append(getNumericField(""));
615 611

  
616 612
        //call identifier
617
        buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING));
613
        buff.append(getStringField(metadata.getCallidentifier().getValue()));
618 614

  
619 615
        //code
620
        buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING));
616
        buff.append(getStringField(metadata.getCode().getValue()));
621 617

  
622 618
        return buff.toString();
623

  
624 619
    }
625 620

  
626 621
    /*
......
674 669
    */
675 670

  
676 671

  
677
    private static String getYearDifferenceInteger(String enddate, String startdate, String DELIM, String ENCLOSING) {
672
    private String getYearDifferenceInteger(String enddate, String startdate) {
678 673

  
679 674
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
680 675

  
......
703 698
        return ENCLOSING + "0" + ENCLOSING + DELIM;
704 699
    }
705 700

  
706
    private static String getYearInt(String data, String DELIM, String ENCLOSING) {
701
    private String getYearInt(String data) {
707 702
        if (data == null || data.isEmpty() || data.equals("-1")) {
708 703
            return ENCLOSING + "0" + ENCLOSING + DELIM;
709 704
        }
......
724 719

  
725 720
    }
726 721

  
727
    private static String cleanNumber(String number) {
722
    private String cleanNumber(String number) {
728 723
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
729 724
        return number;
730 725
    }
731 726

  
732
    private static String getLatLongField(String data, String DELIM, String ENCLOSING) {
727
    private String getLatLongField(String data) {
733 728

  
734
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
729
        if (data == null || data.isEmpty())
730
            return ENCLOSING + "null" + ENCLOSING + DELIM;
735 731

  
736 732
        return ENCLOSING + data.replaceAll("[^-0-9.]+", "")  + ENCLOSING + DELIM;
737 733

  
738 734
    }
739 735

  
740
    private static String getStringField(String data, String DELIM, String ENCLOSING) {
736
    private String getStringField(String data) {
741 737

  
742
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
738
        if (data == null || data.isEmpty())
739
            return ENCLOSING + "null" + ENCLOSING + DELIM;
743 740

  
744
        return ENCLOSING + clean(data, DELIM, ENCLOSING) + ENCLOSING + DELIM;
741
        return ENCLOSING + clean(data) + ENCLOSING + DELIM;
745 742

  
746 743
    }
747 744

  
748
    private static String getStringDateField(String data, String DELIM, String ENCLOSING) {
745
    private String getStringDateField(String data) {
749 746
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
750 747
            return ENCLOSING + "0" + ENCLOSING + DELIM;
751 748
        } else {
......
762 759
        }
763 760
    }
764 761

  
765
    private static String getNumericField(String data, String DELIM, String ENCLOSING) {
762
    private String getNumericField(String data) {
766 763
        if (data == null || data.isEmpty() || data.equals("")) {
767 764
            return ENCLOSING + "0" + ENCLOSING + DELIM;
768 765
        } else {
......
770 767
        }
771 768
    }
772 769

  
773
    public static String getId(Oaf oaf, String DELIM, String ENCLOSING) {
770
    public String getId(Oaf oaf) {
774 771
        switch (oaf.getKind()) {
775 772
            case entity:
776
                return cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING);
773
                return cleanId(oaf.getEntity().getId());
777 774
            case relation:
778
                return cleanId(oaf.getRel().getSource(), DELIM, ENCLOSING);
775
                return cleanId(oaf.getRel().getSource());
779 776

  
780 777
        }
781 778
        return null;
782 779

  
783 780
    }
784 781

  
785
    public static String getId(OafRel relOaf, String DELIM, String ENCLOSING) {
786
        return cleanId(relOaf.getSource(), DELIM, ENCLOSING);
782
    public String getId(OafRel relOaf) {
783
        return cleanId(relOaf.getSource());
787 784
    }
788 785

  
789
    public static String clean(String value, String DELIM, String ENCLOSING) {
786
    public String clean(String value) {
790 787
        if (value != null) {
791 788

  
792 789
            value = value.replaceAll("[\"\\r\\\\;]", "");
......
801 798
            //value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " ");
802 799

  
803 800
            return value;
804

  
805 801
        } else {
806 802
            return "";
807

  
808 803
        }
809 804

  
810 805
    }
811 806

  
812
    public static String cleanId(String value, String DELIM, String ENCLOSING) {
807
    public String cleanId(String value) {
813 808
        if (value != null) {
814 809
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
815 810

  
......
829 824
    }
830 825

  
831 826

  
832
    public static String cleanUrl(String value, String DELIM, String ENCLOSING) {
827
    public String cleanUrl(String value) {
833 828
        value = value.replace(DELIM, " ");
834 829
        value = value.replace(ENCLOSING, " ");
835 830
        value = value.replace(" ", "");
......
838 833
    }
839 834

  
840 835

  
841
    public static long DATEDIFF(String startDate, String endDate) {
836
    public long DATEDIFF(String startDate, String endDate) {
842 837
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
843 838
        long days = 0l;
844 839
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
......
861 856

  
862 857
        return days;
863 858
    }
864

  
865

  
866 859
}
modules/dnet-openaire-stats-export-wf/trunk/dnet-openaire-stats/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/mapreduce/StatsMapper.java
45 45
        project
46 46
    }
47 47

  
48

  
49 48
    private enum REL_COUNTERS {
50 49
        resultProject,
51 50
        datasourceOrganization,
......
64 63
        rottenRelations
65 64
    }
66 65

  
66
    private Serializer serializer;
67

  
67 68
    //Init class: Load Index config and mapping for DB tables.
68 69
    @Override
69 70
    protected void setup(Context context) {
70 71
        loadEntityConfig(context);
72

  
73
        this.serializer = new Serializer(context.getConfiguration().get("stats.delim"), context.getConfiguration().get("stats.enclChar"));
71 74
    }
72 75

  
73 76

  
......
112 115

  
113 116
    private void emitEntity(Context context, Oaf oaf) {
114 117

  
115
        String serialized = Serializer.serialize(oaf, context.getConfiguration().get("stats.delim"), context.getConfiguration().get("stats.enclChar"));
118
        String serialized = serializer.serialize(oaf);
116 119
        if (serialized != null) {
117 120
                try {
118
                    Text TextKeyOut = new Text(oaf.getEntity().getType().toString() + "," + Serializer.getId(oaf, context.getConfiguration().get("stats.delim"), context.getConfiguration().get("stats.enclChar")));
121
                    Text TextKeyOut = new Text(oaf.getEntity().getType().toString() + "," + serializer.getId(oaf));
119 122
                    context.write((TextKeyOut), new ImmutableBytesWritable(serialized.getBytes()));
120 123
                    context.getCounter(STATS_COUNTERS.valueOf(oaf.getEntity().getType().toString())).increment(1);
121 124

  
......
137 140

  
138 141
            Multimap<String, String> relMap = ArrayListMultimap.create();
139 142

  
140
            Serializer.extractRelations(oaf, context.getConfiguration().get("stats.delim"), context.getConfiguration().get("stats.enclChar"), relMap);
143
            serializer.extractRelations(oaf,relMap);
141 144

  
142 145
            if (!relMap.isEmpty()) {
143 146
                for (Entry<String, String> rel : relMap.entries()) {
144
                    Text TextKeyOut = new Text(rel.getKey() + "," + Serializer.getId(oaf, context.getConfiguration().get("stats.delim"), context.getConfiguration().get("stats.enclChar")));
147
                    Text TextKeyOut = new Text(rel.getKey() + "," + serializer.getId(oaf));
145 148
                    //TODO here output
146 149
                    context.write((TextKeyOut), new ImmutableBytesWritable(rel.getValue().getBytes()));
147 150

  
148 151
                }
149 152
            }
150

  
151

  
152 153
        } catch (Throwable e) {
153 154
            log.error("Error writing relation to M/R output", e);
154 155
        }
155 156

  
156

  
157 157
        // Existing Hbase relations are generated here
158 158
        if (entityConfigTable.getDescriptors(oaf.getEntity().getType()) != null) {
159 159

  
......
169 169

  
170 170
                        //  builder.getEntityBuilder().addCachedRel(rel);
171 171

  
172
                            Text TextKeyOut = new Text(ld.getRelDescriptor().getRelType().toString()
173
                                    + "," + Serializer.getId(rel, context.getConfiguration().get("stats.delim"),
174
                                    context.getConfiguration().get("stats.enclChar")));
172
                            Text TextKeyOut = new Text(ld.getRelDescriptor().getRelType().toString() + "," + serializer.getId(rel));
175 173

  
176
                            String buff = Serializer.serialize(rel, context.getConfiguration().get("stats.delim"), context.getConfiguration().get("stats.enclChar"));
174
                            String buff = serializer.serialize(rel);
177 175

  
178 176
                            context.write((TextKeyOut), new ImmutableBytesWritable(buff.getBytes()));
179

  
180 177
                            context.getCounter(REL_COUNTERS.valueOf(rel.getRelType().toString())).increment(1);
181 178
                        }
182

  
183 179
                    }
184 180
                } catch (Throwable e) {
185 181
                    log.error("Error for record ", e);
186 182
                    context.getCounter(ERROR_COUNTERS.rottenRelations).increment(1);
187 183

  
188 184
                }
189

  
190 185
            }
191 186
        }
192

  
193 187
    }
194 188

  
195 189
    private void decodeRelation(final Oaf body, final Context context, Result result, final LinkDescriptor ld, List<OafRel> rels) {
......
203 197
                for (Entry<byte[], byte[]> e : columnMap.entrySet()) {
204 198

  
205 199
                    Oaf decodedOaf = decodeProto(e.getValue(), context);
200
                    OafRel.Builder relBuilder = OafRel.newBuilder(decodedOaf.getRel());
206 201

  
207
                        OafRel.Builder relBuilder = OafRel.newBuilder(decodedOaf.getRel());
202
                    if (ld.isSymmetric()) {
203
                        RelDescriptor rd = ld.getRelDescriptor();
204
                        relBuilder.setCachedTarget(body.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
205
                    }
208 206

  
209
                        if (ld.isSymmetric()) {
210
                            RelDescriptor rd = ld.getRelDescriptor();
211
                            relBuilder.setCachedTarget(body.getEntity()).setRelType(rd.getRelType()).setSubRelType(rd.getSubRelType());
212
                        }
213

  
214

  
215
                        OafRel oafRel = relBuilder.setChild(ld.isChild()).build();
216
                        rels.add(oafBuilder.setDataInfo(decodedOaf.getDataInfo()).setRel(oafRel).build().getRel());
217

  
218

  
219

  
207
                    OafRel oafRel = relBuilder.setChild(ld.isChild()).build();
208
                    rels.add(oafBuilder.setDataInfo(decodedOaf.getDataInfo()).setRel(oafRel).build().getRel());
220 209
                }
221 210
            }
222 211

  
223 212
        } catch (Throwable throwable) {
224 213
            log.error("Error Decoding relation for: " + body.getRel().getRelType() + " " + body.getEntity().getId() + " ", throwable);
225 214
            context.getCounter(ERROR_COUNTERS.rottenRelations).increment(1);
226

  
227 215
        }
228 216
    }
229 217

  
230

  
231 218
    private Oaf decodeProto(final byte[] body, Context context) {
232 219
        try {
233 220
            return Oaf.parseFrom(body);
234 221
        } catch (Exception e) {
235 222
            log.error(e);
236 223
            context.getCounter(ERROR_COUNTERS.rottenRecords).increment(1);
237

  
238

  
239 224
        }
240 225
        return null;
241 226
    }
......
260 245
        return columnMap != null && !columnMap.isEmpty();
261 246
    }
262 247

  
263
    @Override
264
    protected void cleanup(Context context) throws IOException, InterruptedException {
265

  
266
        super.cleanup(context);
267
    }
268

  
269 248
    public EntityConfigTable getEntityConfigTable() {
270 249
        return entityConfigTable;
271 250
    }
......
273 252
    public void setEntityConfigTable(EntityConfigTable entityConfigTable) {
274 253
        this.entityConfigTable = entityConfigTable;
275 254
    }
276

  
277 255
}
modules/dnet-openaire-stats-export-wf/trunk/dnet-openaire-stats/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/mapreduce/StatsReducer.java
37 37
			String value = new String(it.next().copyBytes(),Charset.forName("UTF-8"));
38 38
            value = value.trim();
39 39
			MultipleOutputWriter.write(type, new Text(id.getBytes(Charset.forName("UTF-8"))), new Text(value.getBytes(Charset.forName("UTF-8"))), type.toString());
40

  
41 40
		}
42

  
43 41
	}
44 42

  
45 43
	@Override

Also available in: Unified diff