Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

    
3
import com.google.common.collect.Multimap;
4

    
5

    
6
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
7
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
8
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
9
import eu.dnetlib.data.proto.FieldTypeProtos;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.OafProtos.OafRel;
16
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
17
//import eu.dnetlib.data.proto.PersonProtos;
18
import eu.dnetlib.data.proto.ProjectProtos.Project;
19
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
20
import eu.dnetlib.data.proto.ResultProtos.Result;
21
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
22
import org.apache.log4j.Logger;
23
import org.jsoup.Jsoup;
24

    
25
import java.text.DateFormat;
26
import java.text.ParseException;
27
import java.text.SimpleDateFormat;
28
import java.util.ArrayList;
29
import java.util.Date;
30
import java.util.HashMap;
31
import java.util.List;
32

    
33
import org.w3c.dom.Element;
34
import org.w3c.dom.NodeList;
35
import org.xml.sax.InputSource;
36
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
37
import org.w3c.dom.Document;
38

    
39
/**
40
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
41
 *         for sqoop
42
 */
43
public class Serializer {
44
    private static Logger logger = Logger.getLogger(Serializer.class);
45

    
46
    public static String serialize(Oaf oaf, String DELIM, String ENCLOSING) {
47

    
48
        switch (oaf.getKind()) {
49
            case entity:
50
                OafEntity valueEntity = oaf.getEntity();
51

    
52
                switch (valueEntity.getType()) {
53
                    case datasource:
54

    
55
                        return buildDatasource(oaf, DELIM, ENCLOSING);
56

    
57
                    case organization:
58

    
59
                        return buildOrganization(oaf, DELIM, ENCLOSING);
60

    
61
                    case project:
62

    
63
                        return buildProject(oaf, DELIM, ENCLOSING);
64
                    case result:
65

    
66
                        return buildResult(oaf, DELIM, ENCLOSING);
67
                    /*
68
                    case person:
69
                        return buildPerson(oaf, DELIM, ENCLOSING);
70
                    */
71
                    default:
72
                        break;
73
                }
74
                break;
75
            case relation:
76

    
77
                return buildRel(oaf.getRel(), DELIM, ENCLOSING);
78

    
79
        }
80
        return null;
81
    }
82

    
83
    public static String serialize(OafRel oaf, String DELIM, String ENCLOSING) {
84

    
85
        switch (oaf.getRelType()) {
86
            case resultProject:
87
                return getResultProject(oaf, DELIM, ENCLOSING);
88
            default:
89
                return buildRel(oaf, DELIM, ENCLOSING);
90
        }
91
    }
92

    
93
    private static String buildRel(OafRel Rel, String DELIM, String ENCLOSING) {
94

    
95
        return cleanId(Rel.getTarget(), DELIM, ENCLOSING) + DELIM;
96

    
97
    }
98

    
99
    public static void extractRelations(Oaf oaf, String DELIM, String ENCLOSING, Multimap<String, String> relations) {
100
        OafEntity valueEntity = oaf.getEntity();
101
        getOriginalId(valueEntity, relations, DELIM, ENCLOSING);
102

    
103
        switch (valueEntity.getType()) {
104
            case datasource:
105
                getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING);
106
//                getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING);
107
            case result:
108
                getResultTopics(valueEntity, relations, DELIM, ENCLOSING);
109
                getResultLanguages(valueEntity, relations, DELIM, ENCLOSING);
110
                getResultClassifications(valueEntity, relations, DELIM, ENCLOSING);
111
                getResultDatasources(valueEntity, relations, DELIM, ENCLOSING);
112
                getResultConcepts(valueEntity, relations, DELIM, ENCLOSING);
113
                getResultDois(valueEntity, relations, DELIM, ENCLOSING);
114
                getResultCitations(valueEntity, relations, DELIM, ENCLOSING);
115
//                getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING);
116
//                getResultExtra(valueEntity, relations, DELIM, ENCLOSING);
117

    
118
            case project:
119
                getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING);
120
                getProjectSubjects(valueEntity, relations, DELIM, ENCLOSING);
121

    
122
            default:
123
        }
124

    
125
    }
126

    
127
    private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
128
        Datasource d = valueEntity.getDatasource();
129
        Metadata metadata = d.getMetadata();
130

    
131
        relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
132
    }
133

    
134
    private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
135
        Result result = valueEntity.getResult();
136
        Result.Metadata metadata = result.getMetadata();
137

    
138
        StringBuilder buff = new StringBuilder();
139
        String titleString = "";
140

    
141
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
142
            StructuredProperty title = metadata.getTitleList().get(i);
143

    
144
            titleString = title.getValue().replaceAll("\\s+", " ");
145
            titleString = titleString.replaceAll("\n", " ");
146
            break;
147
        }
148

    
149
        //  pubtitle
150
        buff.append(getStringField(titleString, DELIM, ENCLOSING));
151

    
152
        String sources = "";
153
        for (Instance instance : (result.getInstanceList())) {
154
            List<String> urls = instance.getUrlList();
155
            for (String url : urls) {
156
                sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
157
            }
158
        }
159

    
160
        //sources
161
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
162
        buff.append(sources);
163

    
164
        relations.put("resultExtra", buff.toString());
165
    }
166

    
167
    private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
168

    
169
        String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
170
        for (String oid : oafEntity.getOriginalIdList()) {
171
            relations.put(relName, cleanId(oid, DELIM, ENCLOSING));
172
        }
173

    
174
    }
175

    
176
    private static void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
177
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue(), DELIM, ENCLOSING));
178

    
179
    }
180

    
181
    private static void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
182
        for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
183
            relations.put("projectSubject", getStringField(subj.getValue(), DELIM, ENCLOSING));
184
        }
185
    }
186

    
187
    private static String getResultProject(OafRel oaf, String DELIM, String ENCLOSING) {
188
        StringBuilder buff = new StringBuilder();
189
        buff.append(cleanId(oaf.getTarget(), DELIM, ENCLOSING) + DELIM);
190
        // TODO is declared as int!!!
191
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
192
        if (diff < 0) {
193
            diff = 0;
194
        }
195

    
196
        buff.append(getNumericField(String.valueOf(diff), DELIM, ENCLOSING));
197
        return buff.toString();
198
    }
199

    
200

    
201
    private static void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
202
        Datasource d = valueEntity.getDatasource();
203
        Metadata metadata = d.getMetadata();
204

    
205
        for (StringField lang : metadata.getOdlanguagesList()) {
206
            rels.put("datasourceLanguage", getStringField(lang.getValue(), DELIM, ENCLOSING));
207
        }
208
    }
209

    
210
    private static void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
211

    
212
        Result d = valueEntity.getResult();
213
        Result.Metadata metadata = d.getMetadata();
214
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
215
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname(), DELIM, ENCLOSING));
216
        }
217

    
218
    }
219

    
220
    private static void getResultDois(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
221

    
222
        for (StructuredProperty pid : valueEntity.getPidList()) {
223

    
224
            rels.put("resultPid",
225
                    getStringField(pid.getQualifier().getClassname(), DELIM, ENCLOSING) + getStringField(pid.getValue(), DELIM, ENCLOSING));
226
        }
227
    }
228

    
229
    private static void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
230

    
231
        Result result = valueEntity.getResult();
232

    
233
        for (Instance instance : (result.getInstanceList())) {
234
            String classification = instance.getInstancetype().getClassname();
235
            if (classification != null && !classification.isEmpty()) {
236
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname(), DELIM, ENCLOSING));
237
                // TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT
238
//                break;
239
            }
240
        }
241
    }
242

    
243
    private static void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
244
        Result result = valueEntity.getResult();
245
        //description
246
        for (StringField s : result.getMetadata().getDescriptionList()) {
247

    
248
            rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text(), DELIM, ENCLOSING));
249
        }
250
    }
251

    
252
    private static void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
253

    
254
        Result result = valueEntity.getResult();
255

    
256

    
257
        for (Result.Context context : result.getMetadata().getContextList()) {
258

    
259
            rels.put("resultConcept", cleanId(context.getId(), DELIM, ENCLOSING));
260
        }
261
    }
262

    
263
    private static void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
264
        Result result = valueEntity.getResult();
265

    
266
//TODO hosted by
267
        for (Instance instance : (result.getInstanceList())) {
268
            String hostedBy = instance.getHostedby().getKey();
269
            if (hostedBy != null && !hostedBy.isEmpty()) {
270
                rels.put("resultDatasource", cleanId(hostedBy, DELIM, ENCLOSING) + DELIM);
271
            }
272
        }
273

    
274
//TODO  collected froms
275
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
276

    
277
            String collectedFrom = collectedFromValue.getKey();
278
            if (collectedFrom != null && !collectedFrom.isEmpty()) {
279
                rels.put("resultDatasource", cleanId(collectedFrom, DELIM, ENCLOSING) + DELIM);
280
            }
281
        }
282
    }
283

    
284
    private static void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
285

    
286
        Result d = valueEntity.getResult();
287
        Result.Metadata metadata = d.getMetadata();
288

    
289
        List<StructuredProperty> Topics = metadata.getSubjectList();
290

    
291
        for (StructuredProperty topic : Topics) {
292
            // TODO result topics
293
            rels.put("resultTopic", getStringField(topic.getValue(), DELIM, ENCLOSING));
294
        }
295
    }
296

    
297

    
298
    private static void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
299
        for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
300
            if (extraInfo.getName().equals("result citations")) {
301
                DOMParser parser = new DOMParser();
302
                try {
303
                    parser.parse(new InputSource(new java.io.StringReader(extraInfo.getValue())));
304
                    Document doc = parser.getDocument();
305
                    doc.getDocumentElement().normalize();
306

    
307
                    NodeList citations = doc.getElementsByTagName("citation");
308
                    for (int temp = 0; temp < citations.getLength(); temp++) {
309
                        Element citation = (Element) citations.item(temp);
310
                        NodeList ids = citation.getElementsByTagName("id");
311
                        for(int temp1 = 0; temp1 < ids.getLength(); temp1++){
312
                            Element id = (Element) ids.item(temp1);
313
                            if(id.getAttribute("type").equals("openaire")){
314
                                //System.out.println(id.getAttribute("value"));
315
                                rels.put("resultCitation", id.getAttribute("value"));
316
                            }
317
                        }
318
                    }
319
                } catch (Exception e) {
320

    
321
                }
322

    
323
                /*
324
                rels.put("resultCitation", getStringField(extraInfo.getTrust(), DELIM, ENCLOSING) +
325
                        getStringField(extraInfo.getProvenance(), DELIM, ENCLOSING) + getStringField(extraInfo.getValue(), DELIM, ENCLOSING));
326
                        */
327
            }
328

    
329
        }
330
    }
331

    
332
    private static String buildDatasource(Oaf oaf, String DELIM, String ENCLOSING) {
333
        Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
334
        StringBuilder buff = new StringBuilder();
335

    
336
        // name
337
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
338
            buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING));
339
        } else {
340
            buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING));
341
        }
342

    
343
        // type
344
        if (metadata.hasDatasourcetype()) {
345
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING));
346
        }
347

    
348
        // compatibility,
349
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING));
350

    
351
        // latitude
352
        buff.append(getLatLongField(metadata.getLatitude().getValue(), DELIM, ENCLOSING));
353

    
354
        // longtitude
355
        buff.append(getLatLongField(metadata.getLongitude().getValue(), DELIM, ENCLOSING));
356

    
357
        // dateofvalidation,
358
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
359

    
360
        // yearofvalidation,
361
        buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
362

    
363
        //harvested
364
        buff.append(getStringField("false", DELIM, ENCLOSING));
365

    
366
        //piwik_id
367
        String piwik_id = "";
368
        for (String oid : oaf.getEntity().getOriginalIdList()) {
369
            if (oid.contains("piwik")) {
370
                piwik_id = oid.split(":")[1];
371
                break;
372
            }
373
        }
374
        buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING));
375

    
376
        return buff.toString();
377

    
378
    }
379

    
380
    private static String buildOrganization(Oaf oaf, String DELIM, String ENCLOSING) {
381

    
382
        StringBuilder buff = new StringBuilder();
383
        Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
384

    
385
        // `name`,
386
        buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING));
387

    
388
        // `country`,
389
        buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING));
390

    
391
        return buff.toString();
392
    }
393

    
394
    private static String buildResult(Oaf oaf, String DELIM, String ENCLOSING) {
395
        StringBuilder buff = new StringBuilder();
396

    
397
        Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
398

    
399
        String titleString = new String();
400

    
401
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
402
            StructuredProperty title = metadata.getTitleList().get(i);
403

    
404
            if (i == 0) {
405
                titleString = title.getValue().replaceAll("\\s+", " ");
406
                titleString = titleString.replaceAll("\n", " ");
407
            }
408
            break;
409
        }
410

    
411
        //  pubtitle
412
        buff.append(getStringField(titleString, DELIM, ENCLOSING));
413

    
414

    
415
        //  publisher
416
        buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING));
417

    
418
        //  journal
419
        buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING));  //#null#!
420

    
421
        // year
422
        buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
423

    
424
        // date
425
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
426

    
427
        // bestlicense
428
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING));
429

    
430
        // type
431
        buff.append(getStringField(metadata.getResulttype().getClassname(), DELIM, ENCLOSING));
432

    
433
        // embargo_end_date
434
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING));
435

    
436
        // `authors`,
437
        int authors = metadata.getAuthorCount();
438

    
439

    
440
        String delayed = "no";
441

    
442
        for (OafRel rel : oaf.getEntity().getCachedRelList()) {
443

    
444
            /*
445
            if (rel.getRelType().equals(RelType.personResult)) {
446

    
447
                authors++;
448
            } else
449
            */
450
            if (rel.getRelType().equals(RelType.resultProject))
451
            // TODO remember : in result Project, first id is project, second is
452
            // result.
453
            {
454
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
455
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate(), DELIM, ENCLOSING);
456
                if (Integer.parseInt(daysfromend) > 0) {
457
                    delayed = "yes";
458
                }
459
            }
460
        }
461

    
462
        // `delayed`,
463
        buff.append(getStringField(delayed, DELIM, ENCLOSING));
464
        //authors
465
        buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING));
466

    
467
        String sources = new String();
468

    
469

    
470
        for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) {
471
            List<String> urls = instance.getUrlList();
472
            for (String url : urls) {
473
                sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
474
            }
475
        }
476

    
477
        //sources
478
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
479

    
480
        buff.append(sources);
481

    
482
        return buff.toString();
483

    
484
    }
485

    
486

    
487
    /*
488
    private static String getBestLicense(Result result) {
489
        Qualifier bestLicense = null;
490
        LicenseComparator lc = new LicenseComparator();
491
        for (Instance instance : (result.getInstanceList())) {
492
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
493
                bestLicense = instance.getLicence();
494
            }
495
        }
496
        if (bestLicense != null) {
497
            return bestLicense.getClassname();
498
        } else {
499
            return null;
500
        }
501
    }
502

    
503
    // TODO here iterate over all values
504
    private static String getAccessMode(Result result) {
505
        String accessMode = " ";
506
        for (Instance instance : (result.getInstanceList())) {
507
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
508
                accessMode = instance.getLicence().getClassname();
509
                break;
510
            }
511

    
512
        }
513

    
514
        return accessMode;
515
    }
516
    */
517

    
518
    private static String getBestLicense(Result result) {
519
        Qualifier bestLicense = null;
520
        LicenseComparator lc = new LicenseComparator();
521
        for (Instance instance : (result.getInstanceList())) {
522
            if (lc.compare(bestLicense, instance.getAccessright()) > 0) {
523
                bestLicense = instance.getAccessright();
524
            }
525
        }
526
        if (bestLicense != null) {
527
            return bestLicense.getClassname();
528
        } else {
529
            return "";
530
        }
531
    }
532

    
533
    // TODO here iterate over all values
534
    private static String getAccessMode(Result result) {
535
        String accessMode = " ";
536
        for (Instance instance : (result.getInstanceList())) {
537
            if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) {
538
                accessMode = instance.getAccessright().getClassname();
539
                break;
540
            }
541

    
542
        }
543

    
544
        return accessMode;
545
    }
546

    
547
    private static String buildProject(Oaf oaf, String DELIM, String ENCLOSING) {
548

    
549
        StringBuilder buff = new StringBuilder();
550
        Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
551
        
552
        // `acronym`,
553
        String acronym = metadata.getAcronym().getValue();
554
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
555
            acronym = metadata.getTitle().getValue();
556
        }
557
        buff.append(getStringField(acronym, DELIM, ENCLOSING));
558

    
559
        //title
560
        buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING));
561

    
562
        //funding_lvl
563
        List<StringField> fundList = metadata.getFundingtreeList();
564
        if (!fundList.isEmpty()) // `funding_lvl0`,
565
        {
566
            //TODO funder + 3 funding levels
567
           /* funder text,
568
            funding_lvl0 text,
569
	        funding_lvl1 text,
570
	        funding_lvl2 text,
571
	        funding_lvl3 text,*/
572
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING));
573
        } else {
574
            buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING));
575
        }
576

    
577
        //sc39
578
        String sc39 = metadata.getEcsc39().getValue().toString();
579
        if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
580
            sc39 = "yes";
581
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
582
            sc39 = "no";
583
        }
584
        buff.append(getStringField(sc39, DELIM, ENCLOSING));
585

    
586
        //project_type
587
        buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING));
588

    
589
        // start_year
590
        buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
591

    
592
        // end_year
593
        buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
594

    
595
        // duration enddate-startdate
596
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING));
597

    
598
        // haspubs
599
        buff.append(getStringField("no", DELIM, ENCLOSING));
600

    
601
        // numpubs
602
        buff.append(getNumericField("0", DELIM, ENCLOSING));
603

    
604
        // enddate
605
        buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
606

    
607
        // startdate
608
        buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
609

    
610
        // `daysforlastpub`,
611
        buff.append(getNumericField("", DELIM, ENCLOSING));
612

    
613
        // `delayedpubs`,
614
        buff.append(getNumericField("", DELIM, ENCLOSING));
615

    
616
        //call identifier
617
        buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING));
618

    
619
        //code
620
        buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING));
621

    
622
        return buff.toString();
623

    
624
    }
625

    
626
    /*
627
    private static String buildPerson(Oaf oaf, String DELIM, String ENCLOSING) {
628
        StringBuilder buff = new StringBuilder();
629

    
630
        PersonProtos.Person.Metadata metadata = oaf.getEntity().getPerson().getMetadata();
631

    
632
        // `person_id`,
633
        //buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
634

    
635

    
636
        // person_result
637
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
638

    
639

    
640
        //firstName
641
        buff.append(getStringField(metadata.getFirstname().getValue(), DELIM, ENCLOSING));
642

    
643
        //LastName
644

    
645
        String secondName = new String();
646
        for (StringField s : metadata.getSecondnamesList()) {
647
            secondName += s.getValue().replace("\n", " ").replace(",", " ") + " ";
648
        }
649

    
650
        buff.append(getStringField(secondName, DELIM, ENCLOSING));
651

    
652
        // `fullname`,
653
        buff.append(getStringField(metadata.getFullname().getValue(), DELIM, ENCLOSING));
654

    
655
        // `Nationality`,
656
        buff.append(getStringField(metadata.getNationality().getClassid(), DELIM, ENCLOSING));
657

    
658
        // `Email`,
659
        buff.append(getStringField(metadata.getEmail().getValue(), DELIM, ENCLOSING));
660

    
661
        // `Phone`,
662
        buff.append(getStringField(metadata.getPhone().getValue(), DELIM, ENCLOSING));
663

    
664
        //deletedByInference
665

    
666
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
667

    
668
        // `number`,
669
        buff.append(getStringField("1", DELIM, ENCLOSING));
670

    
671
        return buff.toString();
672

    
673
    }
674
    */
675

    
676

    
677
    private static String getYearDifferenceInteger(String enddate, String startdate, String DELIM, String ENCLOSING) {
678

    
679
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
680

    
681
            String[] split = startdate.split("-");
682

    
683
            if (split == null || split.length == 0) {
684
                return ENCLOSING + "0" + ENCLOSING + DELIM;
685
            }
686

    
687
            int Startdate = Integer.parseInt(split[0]);
688

    
689
            split = enddate.split("-");
690

    
691
            if (split == null || split.length == 0) {
692
                return ENCLOSING + "0" + ENCLOSING + DELIM;
693
            }
694

    
695
            int Enddate = Integer.parseInt(split[0]);
696

    
697
            int diff = Enddate - Startdate;
698

    
699
            return ENCLOSING + diff + ENCLOSING + DELIM;
700

    
701
        }
702

    
703
        return ENCLOSING + "0" + ENCLOSING + DELIM;
704
    }
705

    
706
    private static String getYearInt(String data, String DELIM, String ENCLOSING) {
707
        if (data == null || data.isEmpty() || data.equals("-1")) {
708
            return ENCLOSING + "0" + ENCLOSING + DELIM;
709
        }
710

    
711
        String[] split = data.split("-");
712

    
713
        if (split == null || split.length == 0) {
714
            return ENCLOSING + "0" + ENCLOSING + DELIM;
715
        }
716

    
717
        String year = split[0];
718

    
719
        year = cleanNumber(year);
720

    
721
        if (year == null || year.isEmpty()) year = "0";
722

    
723
        return ENCLOSING + year + ENCLOSING + DELIM;
724

    
725
    }
726

    
727
    private static String cleanNumber(String number) {
728
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
729
        return number;
730
    }
731

    
732
    private static String getLatLongField(String data, String DELIM, String ENCLOSING) {
733

    
734
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
735

    
736
        return ENCLOSING + data.replaceAll("[^-0-9.]+", "")  + ENCLOSING + DELIM;
737

    
738
    }
739

    
740
    private static String getStringField(String data, String DELIM, String ENCLOSING) {
741

    
742
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
743

    
744
        return ENCLOSING + clean(data, DELIM, ENCLOSING) + ENCLOSING + DELIM;
745

    
746
    }
747

    
748
    private static String getStringDateField(String data, String DELIM, String ENCLOSING) {
749
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
750
            return ENCLOSING + "0" + ENCLOSING + DELIM;
751
        } else {
752
            data = data.replace(DELIM, " ");
753
            data = data.replace(ENCLOSING, " ");
754
            data = data.replaceAll("\\r\\n|\\r|\\n", "");
755
            try {
756
                DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
757
                data = format.format(format.parse(data));
758
                return ENCLOSING + data + ENCLOSING + DELIM;
759
            } catch (ParseException e) {
760
                return ENCLOSING + "0" + ENCLOSING + DELIM;
761
            }
762
        }
763
    }
764

    
765
    private static String getNumericField(String data, String DELIM, String ENCLOSING) {
766
        if (data == null || data.isEmpty() || data.equals("")) {
767
            return ENCLOSING + "0" + ENCLOSING + DELIM;
768
        } else {
769
            return ENCLOSING + data + ENCLOSING + DELIM;
770
        }
771
    }
772

    
773
    public static String getId(Oaf oaf, String DELIM, String ENCLOSING) {
774
        switch (oaf.getKind()) {
775
            case entity:
776
                return cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING);
777
            case relation:
778
                return cleanId(oaf.getRel().getSource(), DELIM, ENCLOSING);
779

    
780
        }
781
        return null;
782

    
783
    }
784

    
785
    public static String getId(OafRel relOaf, String DELIM, String ENCLOSING) {
786
        return cleanId(relOaf.getSource(), DELIM, ENCLOSING);
787
    }
788

    
789
    public static String clean(String value, String DELIM, String ENCLOSING) {
790
        if (value != null) {
791

    
792
            value = value.replaceAll("[\"\\r\\\\;]", "");
793
            value = value.replace(DELIM, " ");
794
            value = value.replace(ENCLOSING, " ");
795
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
796
            //value = value.replace("\"", "");
797
            //value = value.replace("'", "");
798
            //value = value.replace("«", " ");
799
            //value = value.replace("»", " ");
800

    
801
            //value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " ");
802

    
803
            return value;
804

    
805
        } else {
806
            return "";
807

    
808
        }
809

    
810
    }
811

    
812
    public static String cleanId(String value, String DELIM, String ENCLOSING) {
813
        if (value != null) {
814
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
815

    
816
            // to datacite____:: )
817
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
818
            value = value.replaceFirst(".*\\|", "");
819
            value = value.replace("\n", "");
820
            value = value.replace(ENCLOSING, "");
821
            value = value.replace(DELIM, "");
822
            value = value.replace("\"", "");
823
            value = value.replace("«", " ");
824
            value = value.replace("»", " ");
825
        }
826

    
827
        return ENCLOSING + value + ENCLOSING;
828

    
829
    }
830

    
831

    
832
    public static String cleanUrl(String value, String DELIM, String ENCLOSING) {
833
        value = value.replace(DELIM, " ");
834
        value = value.replace(ENCLOSING, " ");
835
        value = value.replace(" ", "");
836
        value = value.replace("\n", "");
837
        return value;
838
    }
839

    
840

    
841
    public static long DATEDIFF(String startDate, String endDate) {
842
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
843
        long days = 0l;
844
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
845
        // <startdate>2011-09-01</startdate>
846
        // <enddate>2015-08-31</enddate>
847
        Date dateIni = null;
848
        Date dateFin = null;
849

    
850
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
851
            return 0;
852
        }
853
        try {
854
            dateIni = (Date) format.parse(startDate);
855
            dateFin = (Date) format.parse(endDate);
856
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
857
        } catch (Exception e) {
858

    
859
            return 0;
860
        }
861

    
862
        return days;
863
    }
864

    
865

    
866
}
(3-3/3)