Project

General

Profile

1 27955 claudio.at
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2
3 42734 eri.katsar
import com.google.common.collect.Multimap;
4 47072 tsampikos.
5
6 27955 claudio.at
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
7
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
8
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
9 34194 eri.katsar
import eu.dnetlib.data.proto.FieldTypeProtos;
10 29712 eri.katsar
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13 27955 claudio.at
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.OafProtos.OafRel;
16
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
17 48302 tsampikos.
//import eu.dnetlib.data.proto.PersonProtos;
18 27955 claudio.at
import eu.dnetlib.data.proto.ProjectProtos.Project;
19
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
20
import eu.dnetlib.data.proto.ResultProtos.Result;
21
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
22 34084 eri.katsar
import org.apache.log4j.Logger;
23 42734 eri.katsar
import org.jsoup.Jsoup;
24 27955 claudio.at
25 54431 tsampikos.
import java.text.DateFormat;
26
import java.text.ParseException;
27 34084 eri.katsar
import java.text.SimpleDateFormat;
28
import java.util.ArrayList;
29
import java.util.Date;
30
import java.util.HashMap;
31
import java.util.List;
32
33 47072 tsampikos.
import org.w3c.dom.Element;
34
import org.w3c.dom.NodeList;
35
import org.xml.sax.InputSource;
36
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
37
import org.w3c.dom.Document;
38
39 27955 claudio.at
/**
40
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
41
 *         for sqoop
42
 */
43 28471 eri.katsar
public class Serializer {
44 27955 claudio.at
45 42734 eri.katsar
    public static String serialize(Oaf oaf, String DELIM, String ENCLOSING) {
46 29375 eri.katsar
47 41790 eri.katsar
        switch (oaf.getKind()) {
48
            case entity:
49
                OafEntity valueEntity = oaf.getEntity();
50 27955 claudio.at
51 41790 eri.katsar
                switch (valueEntity.getType()) {
52
                    case datasource:
53 27955 claudio.at
54 42734 eri.katsar
                        return buildDatasource(oaf, DELIM, ENCLOSING);
55 27955 claudio.at
56 41790 eri.katsar
                    case organization:
57 27955 claudio.at
58 42734 eri.katsar
                        return buildOrganization(oaf, DELIM, ENCLOSING);
59 27955 claudio.at
60 41790 eri.katsar
                    case project:
61 36921 eri.katsar
62 42734 eri.katsar
                        return buildProject(oaf, DELIM, ENCLOSING);
63 41790 eri.katsar
                    case result:
64 36921 eri.katsar
65 42734 eri.katsar
                        return buildResult(oaf, DELIM, ENCLOSING);
66 48302 tsampikos.
                    /*
67 42734 eri.katsar
                    case person:
68
                        return buildPerson(oaf, DELIM, ENCLOSING);
69 48302 tsampikos.
                    */
70 41790 eri.katsar
                    default:
71
                        break;
72
                }
73
                break;
74
            case relation:
75 34202 eri.katsar
76 42734 eri.katsar
                return buildRel(oaf.getRel(), DELIM, ENCLOSING);
77 27955 claudio.at
78 41790 eri.katsar
        }
79
        return null;
80
    }
81 27955 claudio.at
82 42734 eri.katsar
    public static String serialize(OafRel oaf, String DELIM, String ENCLOSING) {
83 27955 claudio.at
84 41790 eri.katsar
        switch (oaf.getRelType()) {
85
            case resultProject:
86 42734 eri.katsar
                return getResultProject(oaf, DELIM, ENCLOSING);
87 41790 eri.katsar
            default:
88 42734 eri.katsar
                return buildRel(oaf, DELIM, ENCLOSING);
89 41790 eri.katsar
        }
90
    }
91 27955 claudio.at
92 42734 eri.katsar
    private static String buildRel(OafRel Rel, String DELIM, String ENCLOSING) {
93
94
        return cleanId(Rel.getTarget(), DELIM, ENCLOSING) + DELIM;
95
96 41790 eri.katsar
    }
97 27955 claudio.at
98 42734 eri.katsar
    public static void extractRelations(Oaf oaf, String DELIM, String ENCLOSING, Multimap<String, String> relations) {
99
        OafEntity valueEntity = oaf.getEntity();
100
        getOriginalId(valueEntity, relations, DELIM, ENCLOSING);
101 36689 eri.katsar
102 41790 eri.katsar
        switch (valueEntity.getType()) {
103
            case datasource:
104 42734 eri.katsar
                getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING);
105 54431 tsampikos.
                getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING);
106 41790 eri.katsar
            case result:
107 42734 eri.katsar
                getResultTopics(valueEntity, relations, DELIM, ENCLOSING);
108
                getResultLanguages(valueEntity, relations, DELIM, ENCLOSING);
109
                getResultClassifications(valueEntity, relations, DELIM, ENCLOSING);
110
                getResultDatasources(valueEntity, relations, DELIM, ENCLOSING);
111
                getResultConcepts(valueEntity, relations, DELIM, ENCLOSING);
112
                getResultDois(valueEntity, relations, DELIM, ENCLOSING);
113
                getResultCitations(valueEntity, relations, DELIM, ENCLOSING);
114
                getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING);
115 54431 tsampikos.
                getResultExtra(valueEntity, relations, DELIM, ENCLOSING);
116 42734 eri.katsar
117
            case project:
118
                getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING);
119
                getProjectSubjects(valueEntity, relations, DELIM, ENCLOSING);
120
121 41790 eri.katsar
            default:
122 42734 eri.katsar
        }
123 29637 eri.katsar
124 42734 eri.katsar
    }
125
126 54431 tsampikos.
    private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
127
        Datasource d = valueEntity.getDatasource();
128
        Metadata metadata = d.getMetadata();
129 42734 eri.katsar
130 54431 tsampikos.
        relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
131
    }
132
133
    private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
134
        Result result = valueEntity.getResult();
135
        Result.Metadata metadata = result.getMetadata();
136
137
        StringBuilder buff = new StringBuilder();
138
        String titleString = "";
139
140
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
141
            StructuredProperty title = metadata.getTitleList().get(i);
142
143
            titleString = title.getValue().replaceAll("\\s+", " ");
144
            titleString = titleString.replaceAll("\n", " ");
145
            break;
146
        }
147
148
        //  pubtitle
149
        buff.append(getStringField(titleString, DELIM, ENCLOSING));
150
151
        String sources = "";
152
        for (Instance instance : (result.getInstanceList())) {
153
            List<String> urls = instance.getUrlList();
154
            for (String url : urls) {
155
                sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
156
            }
157
        }
158
159
        //sources
160
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
161
        buff.append(sources);
162
163
        relations.put("resultExtra", buff.toString());
164
    }
165
166 42734 eri.katsar
    private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
167
168
        String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
169
        for (String oid : oafEntity.getOriginalIdList()) {
170
            relations.put(relName, cleanId(oid, DELIM, ENCLOSING));
171 41790 eri.katsar
        }
172 29739 eri.katsar
173 41790 eri.katsar
    }
174 27955 claudio.at
175 42734 eri.katsar
    private static void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
176
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue(), DELIM, ENCLOSING));
177 29637 eri.katsar
178 42734 eri.katsar
    }
179 27955 claudio.at
180 42734 eri.katsar
    private static void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
181
        for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
182
            relations.put("projectSubject", getStringField(subj.getValue(), DELIM, ENCLOSING));
183
        }
184
    }
185
186
    private static String getResultProject(OafRel oaf, String DELIM, String ENCLOSING) {
187
        StringBuilder buff = new StringBuilder();
188
        buff.append(cleanId(oaf.getTarget(), DELIM, ENCLOSING) + DELIM);
189 41790 eri.katsar
        // TODO is declared as int!!!
190
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
191
        if (diff < 0) {
192
            diff = 0;
193
        }
194 27955 claudio.at
195 42734 eri.katsar
        buff.append(getNumericField(String.valueOf(diff), DELIM, ENCLOSING));
196
        return buff.toString();
197 41790 eri.katsar
    }
198 27955 claudio.at
199
200 42734 eri.katsar
    private static void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
201 41790 eri.katsar
        Datasource d = valueEntity.getDatasource();
202
        Metadata metadata = d.getMetadata();
203 27955 claudio.at
204 41790 eri.katsar
        for (StringField lang : metadata.getOdlanguagesList()) {
205 42734 eri.katsar
            rels.put("datasourceLanguage", getStringField(lang.getValue(), DELIM, ENCLOSING));
206 41790 eri.katsar
        }
207
    }
208 27955 claudio.at
209 42734 eri.katsar
    private static void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
210
211 41790 eri.katsar
        Result d = valueEntity.getResult();
212 42734 eri.katsar
        Result.Metadata metadata = d.getMetadata();
213
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
214
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname(), DELIM, ENCLOSING));
215
        }
216 27955 claudio.at
217 42734 eri.katsar
    }
218 27955 claudio.at
219 42734 eri.katsar
    private static void getResultDois(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
220 27955 claudio.at
221 42734 eri.katsar
        for (StructuredProperty pid : valueEntity.getPidList()) {
222
223
            rels.put("resultPid",
224
                    getStringField(pid.getQualifier().getClassname(), DELIM, ENCLOSING) + getStringField(pid.getValue(), DELIM, ENCLOSING));
225 41790 eri.katsar
        }
226
    }
227 27955 claudio.at
228 42734 eri.katsar
    private static void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
229 27955 claudio.at
230 41790 eri.katsar
        Result result = valueEntity.getResult();
231 29735 eri.katsar
232 41790 eri.katsar
        for (Instance instance : (result.getInstanceList())) {
233
            String classification = instance.getInstancetype().getClassname();
234
            if (classification != null && !classification.isEmpty()) {
235 42734 eri.katsar
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname(), DELIM, ENCLOSING));
236 41790 eri.katsar
                // TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT
237
                break;
238
            }
239
        }
240 42734 eri.katsar
    }
241 27955 claudio.at
242 42734 eri.katsar
    private static void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
243
        Result result = valueEntity.getResult();
244
        //description
245
        for (StringField s : result.getMetadata().getDescriptionList()) {
246
247
            rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text(), DELIM, ENCLOSING));
248
        }
249 41790 eri.katsar
    }
250 27955 claudio.at
251 42734 eri.katsar
    private static void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
252 27955 claudio.at
253 41790 eri.katsar
        Result result = valueEntity.getResult();
254 27955 claudio.at
255 47072 tsampikos.
256 42734 eri.katsar
        for (Result.Context context : result.getMetadata().getContextList()) {
257 27955 claudio.at
258 42734 eri.katsar
            rels.put("resultConcept", cleanId(context.getId(), DELIM, ENCLOSING));
259 41790 eri.katsar
        }
260
    }
261 27955 claudio.at
262 42734 eri.katsar
    private static void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
263 41790 eri.katsar
        Result result = valueEntity.getResult();
264 29382 eri.katsar
265 34200 eri.katsar
//TODO hosted by
266 41790 eri.katsar
        for (Instance instance : (result.getInstanceList())) {
267
            String hostedBy = instance.getHostedby().getKey();
268
            if (hostedBy != null && !hostedBy.isEmpty()) {
269 42734 eri.katsar
                rels.put("resultDatasource", cleanId(hostedBy, DELIM, ENCLOSING) + DELIM);
270 41790 eri.katsar
            }
271
        }
272 34084 eri.katsar
273 34200 eri.katsar
//TODO  collected froms
274 41790 eri.katsar
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
275 34084 eri.katsar
276 41790 eri.katsar
            String collectedFrom = collectedFromValue.getKey();
277 42734 eri.katsar
            if (collectedFrom != null && !collectedFrom.isEmpty()) {
278
                rels.put("resultDatasource", cleanId(collectedFrom, DELIM, ENCLOSING) + DELIM);
279
            }
280 41790 eri.katsar
        }
281
    }
282 29386 eri.katsar
283 42734 eri.katsar
    private static void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
284 29380 eri.katsar
285 42734 eri.katsar
        Result d = valueEntity.getResult();
286
        Result.Metadata metadata = d.getMetadata();
287 27955 claudio.at
288 42734 eri.katsar
        List<StructuredProperty> Topics = metadata.getSubjectList();
289 29754 eri.katsar
290 42734 eri.katsar
        for (StructuredProperty topic : Topics) {
291
            // TODO result topics
292
            rels.put("resultTopic", getStringField(topic.getValue(), DELIM, ENCLOSING));
293 41790 eri.katsar
        }
294
    }
295 29957 eri.katsar
296 27955 claudio.at
297 42734 eri.katsar
    private static void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
298
        for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
299
            if (extraInfo.getName().equals("result citations")) {
300 47072 tsampikos.
                DOMParser parser = new DOMParser();
301
                try {
302
                    parser.parse(new InputSource(new java.io.StringReader(extraInfo.getValue())));
303
                    Document doc = parser.getDocument();
304
                    doc.getDocumentElement().normalize();
305
306
                    NodeList citations = doc.getElementsByTagName("citation");
307
                    for (int temp = 0; temp < citations.getLength(); temp++) {
308
                        Element citation = (Element) citations.item(temp);
309
                        NodeList ids = citation.getElementsByTagName("id");
310
                        for(int temp1 = 0; temp1 < ids.getLength(); temp1++){
311
                            Element id = (Element) ids.item(temp1);
312
                            if(id.getAttribute("type").equals("openaire")){
313
                                //System.out.println(id.getAttribute("value"));
314
                                rels.put("resultCitation", id.getAttribute("value"));
315
                            }
316
                        }
317
                    }
318
                } catch (Exception e) {
319
320
                }
321
322
                /*
323 42734 eri.katsar
                rels.put("resultCitation", getStringField(extraInfo.getTrust(), DELIM, ENCLOSING) +
324
                        getStringField(extraInfo.getProvenance(), DELIM, ENCLOSING) + getStringField(extraInfo.getValue(), DELIM, ENCLOSING));
325 47072 tsampikos.
                        */
326 41790 eri.katsar
            }
327 27955 claudio.at
328 41790 eri.katsar
        }
329
    }
330 27955 claudio.at
331 42734 eri.katsar
    private static String buildDatasource(Oaf oaf, String DELIM, String ENCLOSING) {
332
        Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
333
        StringBuilder buff = new StringBuilder();
334 27955 claudio.at
335 41790 eri.katsar
        // name
336
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
337 42734 eri.katsar
            buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING));
338 41790 eri.katsar
        } else {
339 42734 eri.katsar
            buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING));
340 41790 eri.katsar
        }
341 54431 tsampikos.
342 41790 eri.katsar
        // type
343 42734 eri.katsar
        if (metadata.hasDatasourcetype()) {
344
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING));
345 41790 eri.katsar
        }
346 27955 claudio.at
347 41790 eri.katsar
        // compatibility,
348 42734 eri.katsar
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING));
349 27955 claudio.at
350 41790 eri.katsar
        // dateofvalidation,
351 42734 eri.katsar
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
352 27955 claudio.at
353 41790 eri.katsar
        // yearofvalidation,
354 42734 eri.katsar
        buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
355 27955 claudio.at
356 54431 tsampikos.
        //harvested
357
        buff.append(getStringField("false", DELIM, ENCLOSING));
358 27955 claudio.at
359 45523 tsampikos.
        //piwik_id
360
        String piwik_id = "";
361
        for (String oid : oaf.getEntity().getOriginalIdList()) {
362
            if (oid.contains("piwik")) {
363
                piwik_id = oid.split(":")[1];
364
                break;
365
            }
366
        }
367
        buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING));
368
369 42734 eri.katsar
        return buff.toString();
370 27955 claudio.at
371 41790 eri.katsar
    }
372 27955 claudio.at
373 42734 eri.katsar
    private static String buildOrganization(Oaf oaf, String DELIM, String ENCLOSING) {
374 27955 claudio.at
375 42734 eri.katsar
        StringBuilder buff = new StringBuilder();
376
        Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
377 27955 claudio.at
378 41790 eri.katsar
        // `name`,
379 42734 eri.katsar
        buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING));
380
381 41790 eri.katsar
        // `country`,
382 53471 tsampikos.
        buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING));
383 29323 eri.katsar
384 42734 eri.katsar
        return buff.toString();
385 41790 eri.katsar
    }
386 27955 claudio.at
387 42734 eri.katsar
    private static String buildResult(Oaf oaf, String DELIM, String ENCLOSING) {
388
        StringBuilder buff = new StringBuilder();
389 27955 claudio.at
390 42734 eri.katsar
        Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
391 27955 claudio.at
392 42734 eri.katsar
        //  publisher
393
        buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING));
394
395
        //  journal
396
        buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING));  //#null#!
397
398 41790 eri.katsar
        // year
399 42734 eri.katsar
        buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
400 27955 claudio.at
401 54431 tsampikos.
        // date
402 42734 eri.katsar
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
403 29211 eri.katsar
404 41790 eri.katsar
        // bestlicense
405 42734 eri.katsar
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING));
406 29735 eri.katsar
407 41790 eri.katsar
        // type
408 42734 eri.katsar
        buff.append(getStringField(metadata.getResulttype().getClassname(), DELIM, ENCLOSING));
409
410 41790 eri.katsar
        // embargo_end_date
411 42734 eri.katsar
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING));
412 29637 eri.katsar
413 41790 eri.katsar
        // `authors`,
414 54431 tsampikos.
        int authors = metadata.getAuthorCount();
415 42734 eri.katsar
416 54431 tsampikos.
417 41790 eri.katsar
        String delayed = "no";
418 27955 claudio.at
419 42734 eri.katsar
        for (OafRel rel : oaf.getEntity().getCachedRelList()) {
420 27955 claudio.at
421 48302 tsampikos.
            /*
422 41790 eri.katsar
            if (rel.getRelType().equals(RelType.personResult)) {
423 29634 eri.katsar
424 41790 eri.katsar
                authors++;
425 48302 tsampikos.
            } else
426
            */
427
            if (rel.getRelType().equals(RelType.resultProject))
428 41790 eri.katsar
            // TODO remember : in result Project, first id is project, second is
429
            // result.
430
            {
431 42734 eri.katsar
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
432
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate(), DELIM, ENCLOSING);
433 41790 eri.katsar
                if (Integer.parseInt(daysfromend) > 0) {
434
                    delayed = "yes";
435
                }
436
            }
437
        }
438 42734 eri.katsar
439 41790 eri.katsar
        // `delayed`,
440 42734 eri.katsar
        buff.append(getStringField(delayed, DELIM, ENCLOSING));
441
        //authors
442
        buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING));
443 29336 eri.katsar
444 42734 eri.katsar
        return buff.toString();
445
446 41790 eri.katsar
    }
447 31183 eri.katsar
448 42734 eri.katsar
449 53034 tsampikos.
    /*
450 42734 eri.katsar
    private static String getBestLicense(Result result) {
451 41790 eri.katsar
        Qualifier bestLicense = null;
452
        LicenseComparator lc = new LicenseComparator();
453
        for (Instance instance : (result.getInstanceList())) {
454 53034 tsampikos.
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
455
                bestLicense = instance.getLicence();
456
            }
457
        }
458
        if (bestLicense != null) {
459
            return bestLicense.getClassname();
460
        } else {
461
            return null;
462
        }
463
    }
464
465
    // TODO here iterate over all values
466
    private static String getAccessMode(Result result) {
467
        String accessMode = " ";
468
        for (Instance instance : (result.getInstanceList())) {
469
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
470
                accessMode = instance.getLicence().getClassname();
471
                break;
472
            }
473
474
        }
475
476
        return accessMode;
477
    }
478
    */
479
480
    private static String getBestLicense(Result result) {
481
        Qualifier bestLicense = null;
482
        LicenseComparator lc = new LicenseComparator();
483
        for (Instance instance : (result.getInstanceList())) {
484 50242 tsampikos.
            if (lc.compare(bestLicense, instance.getAccessright()) > 0) {
485
                bestLicense = instance.getAccessright();
486 41790 eri.katsar
            }
487
        }
488
        if (bestLicense != null) {
489
            return bestLicense.getClassname();
490
        } else {
491
            return null;
492
        }
493
    }
494 27955 claudio.at
495 41790 eri.katsar
    // TODO here iterate over all values
496 42734 eri.katsar
    private static String getAccessMode(Result result) {
497
        String accessMode = " ";
498 41790 eri.katsar
        for (Instance instance : (result.getInstanceList())) {
499 50242 tsampikos.
            if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) {
500
                accessMode = instance.getAccessright().getClassname();
501 41790 eri.katsar
                break;
502
            }
503 27955 claudio.at
504 41790 eri.katsar
        }
505 31279 eri.katsar
506 41790 eri.katsar
        return accessMode;
507
    }
508 27955 claudio.at
509 42734 eri.katsar
    private static String buildProject(Oaf oaf, String DELIM, String ENCLOSING) {
510 27955 claudio.at
511 42734 eri.katsar
        StringBuilder buff = new StringBuilder();
512
        Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
513 43392 tsampikos.
514 41790 eri.katsar
        // `acronym`,
515
        String acronym = metadata.getAcronym().getValue();
516
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
517
            acronym = metadata.getTitle().getValue();
518
        }
519 42734 eri.katsar
        buff.append(getStringField(acronym, DELIM, ENCLOSING));
520 31183 eri.katsar
521 54431 tsampikos.
        //title
522 42734 eri.katsar
        buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING));
523 41790 eri.katsar
524 54431 tsampikos.
        //funding_lvl
525 41790 eri.katsar
        List<StringField> fundList = metadata.getFundingtreeList();
526
        if (!fundList.isEmpty()) // `funding_lvl0`,
527
        {
528
            //TODO funder + 3 funding levels
529
           /* funder text,
530
            funding_lvl0 text,
531 37706 eri.katsar
	        funding_lvl1 text,
532
	        funding_lvl2 text,
533
	        funding_lvl3 text,*/
534 42734 eri.katsar
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING));
535 41790 eri.katsar
        } else {
536 42734 eri.katsar
            buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING));
537 41790 eri.katsar
        }
538 36995 eri.katsar
539 54431 tsampikos.
        //sc39
540 41790 eri.katsar
        String sc39 = metadata.getEcsc39().getValue().toString();
541
        if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
542
            sc39 = "yes";
543
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
544
            sc39 = "no";
545
        }
546 42734 eri.katsar
        buff.append(getStringField(sc39, DELIM, ENCLOSING));
547 27955 claudio.at
548 45523 tsampikos.
        //project_type
549
        buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING));
550
551 41790 eri.katsar
        // start_year
552 42734 eri.katsar
        buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
553 27955 claudio.at
554 41790 eri.katsar
        // end_year
555 42734 eri.katsar
        buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
556 27955 claudio.at
557 41790 eri.katsar
        // duration enddate-startdate
558 42734 eri.katsar
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING));
559 27955 claudio.at
560 41790 eri.katsar
        // haspubs
561 42734 eri.katsar
        buff.append(getStringField("no", DELIM, ENCLOSING));
562 27955 claudio.at
563 41790 eri.katsar
        // numpubs
564 42734 eri.katsar
        buff.append(getNumericField("0", DELIM, ENCLOSING));
565
566 41790 eri.katsar
        // enddate
567 54431 tsampikos.
        buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
568 42734 eri.katsar
569 41790 eri.katsar
        // startdate
570 54431 tsampikos.
        buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
571 27955 claudio.at
572 41790 eri.katsar
        // `daysforlastpub`,
573 42734 eri.katsar
        buff.append(getNumericField("", DELIM, ENCLOSING));
574
575 41790 eri.katsar
        // `delayedpubs`,
576 42734 eri.katsar
        buff.append(getNumericField("", DELIM, ENCLOSING));
577
578
        //call identifier
579
        buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING));
580 54431 tsampikos.
581 42734 eri.katsar
        //code
582
        buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING));
583
584
        return buff.toString();
585 27955 claudio.at
586 41790 eri.katsar
    }
587 27955 claudio.at
588 48302 tsampikos.
    /*
589 42734 eri.katsar
    private static String buildPerson(Oaf oaf, String DELIM, String ENCLOSING) {
590
        StringBuilder buff = new StringBuilder();
591 27955 claudio.at
592 42734 eri.katsar
        PersonProtos.Person.Metadata metadata = oaf.getEntity().getPerson().getMetadata();
593 27955 claudio.at
594 41790 eri.katsar
        // `person_id`,
595 43392 tsampikos.
        //buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
596
597
598 41790 eri.katsar
        // person_result
599 42734 eri.katsar
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
600 27955 claudio.at
601 42734 eri.katsar
602
        //firstName
603
        buff.append(getStringField(metadata.getFirstname().getValue(), DELIM, ENCLOSING));
604
605
        //LastName
606
607
        String secondName = new String();
608
        for (StringField s : metadata.getSecondnamesList()) {
609
            secondName += s.getValue().replace("\n", " ").replace(",", " ") + " ";
610
        }
611
612
        buff.append(getStringField(secondName, DELIM, ENCLOSING));
613
614 41790 eri.katsar
        // `fullname`,
615 42734 eri.katsar
        buff.append(getStringField(metadata.getFullname().getValue(), DELIM, ENCLOSING));
616 34202 eri.katsar
617 41790 eri.katsar
        // `Nationality`,
618 42734 eri.katsar
        buff.append(getStringField(metadata.getNationality().getClassid(), DELIM, ENCLOSING));
619
620 41790 eri.katsar
        // `Email`,
621 42734 eri.katsar
        buff.append(getStringField(metadata.getEmail().getValue(), DELIM, ENCLOSING));
622
623 41790 eri.katsar
        // `Phone`,
624 42734 eri.katsar
        buff.append(getStringField(metadata.getPhone().getValue(), DELIM, ENCLOSING));
625
626
        //deletedByInference
627
628
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
629
630 41790 eri.katsar
        // `number`,
631 42734 eri.katsar
        buff.append(getStringField("1", DELIM, ENCLOSING));
632 34202 eri.katsar
633 42734 eri.katsar
        return buff.toString();
634
635 41790 eri.katsar
    }
636 48302 tsampikos.
    */
637 34202 eri.katsar
638
639 42734 eri.katsar
    private static String getYearDifferenceInteger(String enddate, String startdate, String DELIM, String ENCLOSING) {
640 34202 eri.katsar
641 41790 eri.katsar
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
642 29754 eri.katsar
643 41790 eri.katsar
            String[] split = startdate.split("-");
644 27955 claudio.at
645 41790 eri.katsar
            if (split == null || split.length == 0) {
646 42734 eri.katsar
                return ENCLOSING + "0" + ENCLOSING + DELIM;
647 41790 eri.katsar
            }
648 27955 claudio.at
649 41790 eri.katsar
            int Startdate = Integer.parseInt(split[0]);
650 27955 claudio.at
651 41790 eri.katsar
            split = enddate.split("-");
652 27955 claudio.at
653 41790 eri.katsar
            if (split == null || split.length == 0) {
654 42734 eri.katsar
                return ENCLOSING + "0" + ENCLOSING + DELIM;
655 41790 eri.katsar
            }
656 27955 claudio.at
657 41790 eri.katsar
            int Enddate = Integer.parseInt(split[0]);
658 29211 eri.katsar
659 41790 eri.katsar
            int diff = Enddate - Startdate;
660 29211 eri.katsar
661 42734 eri.katsar
            return ENCLOSING + diff + ENCLOSING + DELIM;
662 29384 eri.katsar
663 41790 eri.katsar
        }
664 31279 eri.katsar
665 42734 eri.katsar
        return ENCLOSING + "0" + ENCLOSING + DELIM;
666 41790 eri.katsar
    }
667 29211 eri.katsar
668 42734 eri.katsar
    private static String getYearInt(String data, String DELIM, String ENCLOSING) {
669 41790 eri.katsar
        if (data == null || data.isEmpty() || data.equals("-1")) {
670 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
671 41790 eri.katsar
        }
672 27955 claudio.at
673 41790 eri.katsar
        String[] split = data.split("-");
674 29384 eri.katsar
675 41790 eri.katsar
        if (split == null || split.length == 0) {
676 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
677 41790 eri.katsar
        }
678 29336 eri.katsar
679 41790 eri.katsar
        String year = split[0];
680 29336 eri.katsar
681 41790 eri.katsar
        year = cleanNumber(year);
682 27955 claudio.at
683 42734 eri.katsar
        if (year == null || year.isEmpty()) year = "0";
684 30977 eri.katsar
685 42734 eri.katsar
        return ENCLOSING + year + ENCLOSING + DELIM;
686 27955 claudio.at
687 41790 eri.katsar
    }
688 30043 eri.katsar
689 42734 eri.katsar
    private static String cleanNumber(String number) {
690 41790 eri.katsar
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
691
        return number;
692
    }
693 30043 eri.katsar
694 43739 tsampikos.
    private static String getLatLongField(String data, String DELIM, String ENCLOSING) {
695
696
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
697
698
        return ENCLOSING + data.replaceAll("[^-0-9.]+", "")  + ENCLOSING + DELIM;
699
700
    }
701
702 42734 eri.katsar
    private static String getStringField(String data, String DELIM, String ENCLOSING) {
703 30977 eri.katsar
704 42734 eri.katsar
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
705 27955 claudio.at
706 42734 eri.katsar
        return ENCLOSING + clean(data, DELIM, ENCLOSING) + ENCLOSING + DELIM;
707 27955 claudio.at
708 41790 eri.katsar
    }
709 27955 claudio.at
710 42734 eri.katsar
    private static String getStringDateField(String data, String DELIM, String ENCLOSING) {
711 41790 eri.katsar
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
712 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
713 41790 eri.katsar
        } else {
714 42734 eri.katsar
            data = data.replace(DELIM, " ");
715
            data = data.replace(ENCLOSING, " ");
716 53034 tsampikos.
            data = data.replaceAll("\\r\\n|\\r|\\n", "");
717 54431 tsampikos.
            try {
718
                DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
719
                data = format.format(format.parse(data));
720
                return ENCLOSING + data + ENCLOSING + DELIM;
721
            } catch (ParseException e) {
722
                return ENCLOSING + "0" + ENCLOSING + DELIM;
723
            }
724 41790 eri.katsar
        }
725
    }
726 27955 claudio.at
727 42734 eri.katsar
    private static String getNumericField(String data, String DELIM, String ENCLOSING) {
728 41790 eri.katsar
        if (data == null || data.isEmpty() || data.equals("")) {
729 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
730 41790 eri.katsar
        } else {
731 42734 eri.katsar
            return ENCLOSING + data + ENCLOSING + DELIM;
732 41790 eri.katsar
        }
733
    }
734 29634 eri.katsar
735 42734 eri.katsar
    public static String getId(Oaf oaf, String DELIM, String ENCLOSING) {
736 41790 eri.katsar
        switch (oaf.getKind()) {
737
            case entity:
738 42734 eri.katsar
                return cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING);
739 41790 eri.katsar
            case relation:
740 36689 eri.katsar
741 42734 eri.katsar
                return cleanId(oaf.getRel().getSource(), DELIM, ENCLOSING);
742 36689 eri.katsar
743 41790 eri.katsar
        }
744
        return null;
745 27955 claudio.at
746 41790 eri.katsar
    }
747 27955 claudio.at
748 42734 eri.katsar
    public static String getId(OafRel relOaf, String DELIM, String ENCLOSING) {
749
        return cleanId(relOaf.getSource(), DELIM, ENCLOSING);
750 41790 eri.katsar
    }
751 27955 claudio.at
752 42734 eri.katsar
    public static String clean(String value, String DELIM, String ENCLOSING) {
753 41790 eri.katsar
        if (value != null) {
754 36689 eri.katsar
755 42734 eri.katsar
            value = value.replaceAll("[\"\\r\\\\;]", "");
756
            value = value.replace(DELIM, " ");
757
            value = value.replace(ENCLOSING, " ");
758 47371 tsampikos.
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
759
            //value = value.replace("\"", "");
760
            //value = value.replace("'", "");
761
            //value = value.replace("«", " ");
762
            //value = value.replace("»", " ");
763 36689 eri.katsar
764 47371 tsampikos.
            //value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " ");
765 27955 claudio.at
766 42734 eri.katsar
            return value;
767 27955 claudio.at
768 42734 eri.katsar
        } else {
769
            return "";
770
771 41790 eri.katsar
        }
772 27955 claudio.at
773 41790 eri.katsar
    }
774 27955 claudio.at
775 42734 eri.katsar
    public static String cleanId(String value, String DELIM, String ENCLOSING) {
776 41790 eri.katsar
        if (value != null) {
777
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
778 27955 claudio.at
779 41790 eri.katsar
            // to datacite____:: )
780
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
781
            value = value.replaceFirst(".*\\|", "");
782 42734 eri.katsar
            value = value.replace("\n", "");
783
            value = value.replace(ENCLOSING, "");
784
            value = value.replace(DELIM, "");
785
            value = value.replace("\"", "");
786
            value = value.replace("«", " ");
787
            value = value.replace("»", " ");
788 41790 eri.katsar
        }
789 29336 eri.katsar
790 42734 eri.katsar
        return ENCLOSING + value + ENCLOSING;
791
792 41790 eri.katsar
    }
793 31900 eri.katsar
794 42734 eri.katsar
795
    public static String cleanUrl(String value, String DELIM, String ENCLOSING) {
796
        value = value.replace(DELIM, " ");
797
        value = value.replace(ENCLOSING, " ");
798
        value = value.replace(" ", "");
799
        value = value.replace("\n", "");
800
        return value;
801
    }
802
803
804
    public static long DATEDIFF(String startDate, String endDate) {
805 41790 eri.katsar
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
806
        long days = 0l;
807
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
808
        // <startdate>2011-09-01</startdate>
809
        // <enddate>2015-08-31</enddate>
810
        Date dateIni = null;
811
        Date dateFin = null;
812 37693 eri.katsar
813 41790 eri.katsar
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
814
            return 0;
815
        }
816
        try {
817
            dateIni = (Date) format.parse(startDate);
818
            dateFin = (Date) format.parse(endDate);
819
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
820
        } catch (Exception e) {
821 42734 eri.katsar
822 41790 eri.katsar
            return 0;
823
        }
824 37693 eri.katsar
825 41790 eri.katsar
        return days;
826
    }
827 37693 eri.katsar
828
829 27955 claudio.at
}