Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

    
3
import com.google.common.collect.Multimap;
4

    
5
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
6
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
7
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
8
import eu.dnetlib.data.proto.FieldTypeProtos;
9
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
10
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
12
import eu.dnetlib.data.proto.OafProtos.Oaf;
13
import eu.dnetlib.data.proto.OafProtos.OafEntity;
14
import eu.dnetlib.data.proto.OafProtos.OafRel;
15
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
16
import eu.dnetlib.data.proto.ProjectProtos.Project;
17
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
18
import eu.dnetlib.data.proto.ResultProtos.Result;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20
import org.apache.log4j.Logger;
21
import org.jsoup.Jsoup;
22

    
23
import java.text.DateFormat;
24
import java.text.ParseException;
25
import java.text.SimpleDateFormat;
26
import java.util.Date;
27
import java.util.List;
28

    
29
import org.w3c.dom.Element;
30
import org.w3c.dom.NodeList;
31
import org.xml.sax.InputSource;
32
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
33
import org.w3c.dom.Document;
34

    
35
/**
36
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
37
 *         for sqoop
38
 */
39
public class Serializer {
40
    private static Logger logger = Logger.getLogger(Serializer.class);
41

    
42
    private String DELIM;
43
    private String ENCLOSING;
44

    
45
    public Serializer(String DELIM, String ENCLOSING) {
46
        this.DELIM = DELIM;
47
        this.ENCLOSING = ENCLOSING;
48
    }
49

    
50
    public String serialize(Oaf oaf) {
51

    
52
        switch (oaf.getKind()) {
53
            case entity:
54
                OafEntity valueEntity = oaf.getEntity();
55

    
56
                switch (valueEntity.getType()) {
57
                    case datasource:
58

    
59
                        return buildDatasource(oaf);
60

    
61
                    case organization:
62

    
63
                        return buildOrganization(oaf);
64

    
65
                    case project:
66

    
67
                        return buildProject(oaf);
68
                    case result:
69

    
70
                        return buildResult(oaf);
71
                    /*
72
                    case person:
73
                        return buildPerson(oaf, DELIM, ENCLOSING);
74
                    */
75
                    default:
76
                        break;
77
                }
78
                break;
79
            case relation:
80

    
81
                return buildRel(oaf.getRel());
82

    
83
        }
84
        return null;
85
    }
86

    
87
    public String serialize(OafRel oaf) {
88

    
89
        switch (oaf.getRelType()) {
90
            case resultProject:
91
                return getResultProject(oaf);
92
            default:
93
                return buildRel(oaf);
94
        }
95
    }
96

    
97
    private String buildRel(OafRel Rel) {
98

    
99
        return cleanId(Rel.getTarget()) + DELIM;
100

    
101
    }
102

    
103
    public void extractRelations(Oaf oaf, Multimap<String, String> relations) {
104
        OafEntity valueEntity = oaf.getEntity();
105
        getOriginalId(valueEntity, relations);
106

    
107
        switch (valueEntity.getType()) {
108
            case datasource:
109
                getDatasourceLanguages(valueEntity, relations);
110
//                getDatasourceWebsite(valueEntity, relations);
111
            case result:
112
                getResultTopics(valueEntity, relations);
113
                getResultLanguages(valueEntity, relations);
114
                getResultClassifications(valueEntity, relations);
115
                getResultDatasources(valueEntity, relations);
116
                getResultConcepts(valueEntity, relations);
117
                getResultDois(valueEntity, relations);
118
                getResultCitations(valueEntity, relations);
119
//                getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING);
120
//                getResultExtra(valueEntity, relations, DELIM, ENCLOSING);
121

    
122
            case project:
123
                getProjectKeywords(valueEntity, relations);
124
                getProjectSubjects(valueEntity, relations);
125

    
126
            default:
127
        }
128

    
129
    }
130

    
131
    private void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations) {
132
        Datasource d = valueEntity.getDatasource();
133
        Metadata metadata = d.getMetadata();
134

    
135
        relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue()));
136
    }
137

    
138
    private void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations) {
139
        Result result = valueEntity.getResult();
140
        Result.Metadata metadata = result.getMetadata();
141

    
142
        StringBuilder buff = new StringBuilder();
143
        String titleString = "";
144

    
145
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
146
            StructuredProperty title = metadata.getTitleList().get(i);
147

    
148
            titleString = title.getValue().replaceAll("\\s+", " ");
149
            titleString = titleString.replaceAll("\n", " ");
150
            break;
151
        }
152

    
153
        //  pubtitle
154
        buff.append(getStringField(titleString));
155

    
156
        String sources = "";
157
        for (Instance instance : (result.getInstanceList())) {
158
            List<String> urls = instance.getUrlList();
159
            for (String url : urls) {
160
                sources += cleanUrl(url) + " ;";
161
            }
162
        }
163

    
164
        //sources
165
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
166
        buff.append(sources);
167

    
168
        relations.put("resultExtra", buff.toString());
169
    }
170

    
171
    private void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations) {
172

    
173
        String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
174
        for (String oid : oafEntity.getOriginalIdList()) {
175
            relations.put(relName, cleanId(oid));
176
        }
177

    
178
    }
179

    
180
    private void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations) {
181
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue()));
182

    
183
    }
184

    
185
    private void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations) {
186
        for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
187
            relations.put("projectSubject", getStringField(subj.getValue()));
188
        }
189
    }
190

    
191
    private String getResultProject(OafRel oaf) {
192
        StringBuilder buff = new StringBuilder();
193
        buff.append(cleanId(oaf.getTarget()) + DELIM);
194
        // TODO is declared as int!!!
195
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
196
        if (diff < 0) {
197
            diff = 0;
198
        }
199

    
200
        buff.append(getNumericField(String.valueOf(diff)));
201
        return buff.toString();
202
    }
203

    
204

    
205
    private void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
206
        Datasource d = valueEntity.getDatasource();
207
        Metadata metadata = d.getMetadata();
208

    
209
        for (StringField lang : metadata.getOdlanguagesList()) {
210
            rels.put("datasourceLanguage", getStringField(lang.getValue()));
211
        }
212
    }
213

    
214
    private void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
215

    
216
        Result d = valueEntity.getResult();
217
        Result.Metadata metadata = d.getMetadata();
218
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
219
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname()));
220
        }
221

    
222
    }
223

    
224
    private void getResultDois(OafEntity valueEntity, Multimap<String, String> rels) {
225

    
226
        for (StructuredProperty pid : valueEntity.getPidList()) {
227
            rels.put("resultPid", getStringField(pid.getQualifier().getClassname()) + getStringField(pid.getValue()));
228
        }
229
    }
230

    
231
    private void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels) {
232

    
233
        Result result = valueEntity.getResult();
234

    
235
        for (Instance instance : (result.getInstanceList())) {
236
            String classification = instance.getInstancetype().getClassname();
237

    
238
            if (classification != null && !classification.isEmpty()) {
239
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname()));
240
            }
241
        }
242
    }
243

    
244
    private void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels) {
245
        Result result = valueEntity.getResult();
246

    
247
        for (StringField s : result.getMetadata().getDescriptionList()) {
248
            rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text()));
249
        }
250
    }
251

    
252
    private void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels) {
253
        Result result = valueEntity.getResult();
254

    
255
        for (Result.Context context : result.getMetadata().getContextList()) {
256
            rels.put("resultConcept", cleanId(context.getId()));
257
        }
258
    }
259

    
260
    private void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels) {
261
        Result result = valueEntity.getResult();
262

    
263
//TODO hosted by
264
        for (Instance instance : (result.getInstanceList())) {
265
            String hostedBy = instance.getHostedby().getKey();
266

    
267
            if (hostedBy != null && !hostedBy.isEmpty()) {
268
                rels.put("resultDatasource", cleanId(hostedBy) + DELIM);
269
            }
270
        }
271

    
272
//TODO  collected froms
273
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
274
            String collectedFrom = collectedFromValue.getKey();
275

    
276
            if (collectedFrom != null && !collectedFrom.isEmpty()) {
277
                rels.put("resultDatasource", cleanId(collectedFrom) + DELIM);
278
            }
279
        }
280
    }
281

    
282
    private void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels) {
283
        Result d = valueEntity.getResult();
284
        Result.Metadata metadata = d.getMetadata();
285
        List<StructuredProperty> Topics = metadata.getSubjectList();
286

    
287
        for (StructuredProperty topic : Topics) {
288
            rels.put("resultTopic", getStringField(topic.getValue()));
289
        }
290
    }
291

    
292

    
293
    private void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels) {
294
        for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
295
            if (extraInfo.getName().equals("result citations")) {
296
                DOMParser parser = new DOMParser();
297
                try {
298
                    parser.parse(new InputSource(new java.io.StringReader(extraInfo.getValue())));
299
                    Document doc = parser.getDocument();
300
                    doc.getDocumentElement().normalize();
301

    
302
                    NodeList citations = doc.getElementsByTagName("citation");
303
                    for (int temp = 0; temp < citations.getLength(); temp++) {
304
                        Element citation = (Element) citations.item(temp);
305
                        NodeList ids = citation.getElementsByTagName("id");
306
                        for(int temp1 = 0; temp1 < ids.getLength(); temp1++){
307
                            Element id = (Element) ids.item(temp1);
308
                            if(id.getAttribute("type").equals("openaire")){
309
                                //System.out.println(id.getAttribute("value"));
310
                                rels.put("resultCitation", id.getAttribute("value"));
311
                            }
312
                        }
313
                    }
314
                } catch (Exception e) {
315

    
316
                }
317

    
318
                /*
319
                rels.put("resultCitation", getStringField(extraInfo.getTrust(), DELIM, ENCLOSING) +
320
                        getStringField(extraInfo.getProvenance(), DELIM, ENCLOSING) + getStringField(extraInfo.getValue(), DELIM, ENCLOSING));
321
                        */
322
            }
323

    
324
        }
325
    }
326

    
327
    private String buildDatasource(Oaf oaf) {
328
        Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
329
        StringBuilder buff = new StringBuilder();
330

    
331
        // name
332
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
333
            buff.append(getStringField("Unknown Repository"));
334
        } else {
335
            buff.append(getStringField(metadata.getOfficialname().getValue()));
336
        }
337

    
338
        // type
339
        if (metadata.hasDatasourcetype()) {
340
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", "")));
341
        }
342

    
343
        // compatibility,
344
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname()));
345

    
346
        // latitude
347
        buff.append(getLatLongField(metadata.getLatitude().getValue()));
348

    
349
        // longtitude
350
        buff.append(getLatLongField(metadata.getLongitude().getValue()));
351

    
352
        // dateofvalidation,
353
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue()));
354

    
355
        // yearofvalidation,
356
        buff.append(getYearInt(metadata.getDateofvalidation().getValue()));
357

    
358
        //harvested
359
        buff.append(getStringField("false"));
360

    
361
        //piwik_id
362
        String piwik_id = "";
363
        for (String oid : oaf.getEntity().getOriginalIdList()) {
364
            if (oid.contains("piwik")) {
365
                piwik_id = oid.split(":")[1];
366
                break;
367
            }
368
        }
369
        buff.append(getStringField(cleanNumber(piwik_id)));
370

    
371
        return buff.toString();
372

    
373
    }
374

    
375
    private String buildOrganization(Oaf oaf) {
376

    
377
        StringBuilder buff = new StringBuilder();
378
        Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
379

    
380
        // `name`,
381
        buff.append(getStringField(metadata.getLegalname().getValue()));
382

    
383
        // `country`,
384
        buff.append(getStringField(metadata.getCountry().getClassid()));
385

    
386
        return buff.toString();
387
    }
388

    
389
    private String buildResult(Oaf oaf) {
390
        StringBuilder buff = new StringBuilder();
391

    
392
        Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
393

    
394
        String titleString = new String();
395

    
396
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
397
            StructuredProperty title = metadata.getTitleList().get(i);
398

    
399
            if (i == 0) {
400
                titleString = title.getValue().replaceAll("\\s+", " ");
401
                titleString = titleString.replaceAll("\n", " ");
402
            }
403
            break;
404
        }
405

    
406
        //  pubtitle
407
        buff.append(getStringField(titleString));
408

    
409

    
410
        //  publisher
411
        buff.append(getStringField(metadata.getPublisher().getValue()));
412

    
413
        //  journal
414
        buff.append(getStringField(metadata.getJournal().getName()));  //#null#!
415

    
416
        // year
417
        buff.append(getYearInt(metadata.getDateofacceptance().getValue()));
418

    
419
        // date
420
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue()));
421

    
422
        // bestlicense
423
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult())));
424

    
425
        // type
426
        buff.append(getStringField(metadata.getResulttype().getClassname()));
427

    
428
        // embargo_end_date
429
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue()));
430

    
431
        // `authors`,
432
        int authors = metadata.getAuthorCount();
433

    
434

    
435
        String delayed = "no";
436

    
437
        for (OafRel rel : oaf.getEntity().getCachedRelList()) {
438

    
439
            /*
440
            if (rel.getRelType().equals(RelType.personResult)) {
441

    
442
                authors++;
443
            } else
444
            */
445
            if (rel.getRelType().equals(RelType.resultProject))
446
            // TODO remember : in result Project, first id is project, second is
447
            // result.
448
            {
449
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
450
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
451
                if (Integer.parseInt(daysfromend) > 0) {
452
                    delayed = "yes";
453
                }
454
            }
455
        }
456

    
457
        // `delayed`,
458
        buff.append(getStringField(delayed));
459
        //authors
460
        buff.append(getNumericField(String.valueOf(authors)));
461

    
462
        String sources = new String();
463

    
464

    
465
        for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) {
466
            List<String> urls = instance.getUrlList();
467
            for (String url : urls) {
468
                sources += cleanUrl(url) + " ;";
469
            }
470
        }
471

    
472
        //sources
473
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
474

    
475
        buff.append(sources);
476

    
477
        return buff.toString();
478

    
479
    }
480

    
481

    
482
    /*
483
    private static String getBestLicense(Result result) {
484
        Qualifier bestLicense = null;
485
        LicenseComparator lc = new LicenseComparator();
486
        for (Instance instance : (result.getInstanceList())) {
487
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
488
                bestLicense = instance.getLicence();
489
            }
490
        }
491
        if (bestLicense != null) {
492
            return bestLicense.getClassname();
493
        } else {
494
            return null;
495
        }
496
    }
497

    
498
    // TODO here iterate over all values
499
    private static String getAccessMode(Result result) {
500
        String accessMode = " ";
501
        for (Instance instance : (result.getInstanceList())) {
502
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
503
                accessMode = instance.getLicence().getClassname();
504
                break;
505
            }
506

    
507
        }
508

    
509
        return accessMode;
510
    }
511
    */
512

    
513
    private String getBestLicense(Result result) {
514
        Qualifier bestLicense = null;
515
        LicenseComparator lc = new LicenseComparator();
516
        for (Instance instance : (result.getInstanceList())) {
517
            if (lc.compare(bestLicense, instance.getAccessright()) > 0) {
518
                bestLicense = instance.getAccessright();
519
            }
520
        }
521
        if (bestLicense != null) {
522
            return bestLicense.getClassname();
523
        } else {
524
            return "";
525
        }
526
    }
527

    
528
    // TODO here iterate over all values
529
    private String getAccessMode(Result result) {
530
        String accessMode = " ";
531
        for (Instance instance : (result.getInstanceList())) {
532
            if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) {
533
                accessMode = instance.getAccessright().getClassname();
534
                break;
535
            }
536

    
537
        }
538

    
539
        return accessMode;
540
    }
541

    
542
    private String buildProject(Oaf oaf) {
543

    
544
        FundingParser fundingParser = new FundingParser(DELIM, ENCLOSING);
545
        StringBuilder buff = new StringBuilder();
546
        Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
547
        
548
        // `acronym`,
549
        String acronym = metadata.getAcronym().getValue();
550
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
551
            acronym = metadata.getTitle().getValue();
552
        }
553
        buff.append(getStringField(acronym));
554

    
555
        //title
556
        buff.append(getStringField(metadata.getTitle().getValue()));
557

    
558
        //funding_lvl
559
        List<StringField> fundList = metadata.getFundingtreeList();
560
        if (!fundList.isEmpty()) // `funding_lvl0`,
561
        {
562
            //TODO funder + 3 funding levels
563
           /* funder text,
564
            funding_lvl0 text,
565
	        funding_lvl1 text,
566
	        funding_lvl2 text,
567
	        funding_lvl3 text,*/
568
            buff.append(fundingParser.getFundingInfo(fundList.get(0).getValue()));
569
        } else {
570
            buff.append(fundingParser.getFundingInfo(""));
571
        }
572

    
573
        //sc39
574
        String sc39 = metadata.getEcsc39().getValue().toString();
575
        if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
576
            sc39 = "yes";
577
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
578
            sc39 = "no";
579
        }
580
        buff.append(getStringField(sc39));
581

    
582
        //project_type
583
        buff.append(getStringField(metadata.getContracttype().getClassid()));
584

    
585
        // start_year
586
        buff.append(getYearInt(metadata.getStartdate().getValue()));
587

    
588
        // end_year
589
        buff.append(getYearInt(metadata.getEnddate().getValue()));
590

    
591
        // duration enddate-startdate
592
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue()));
593

    
594
        // haspubs
595
        buff.append(getStringField("no"));
596

    
597
        // numpubs
598
        buff.append(getNumericField("0"));
599

    
600
        // enddate
601
        buff.append(getStringDateField(metadata.getEnddate().getValue()));
602

    
603
        // startdate
604
        buff.append(getStringDateField(metadata.getStartdate().getValue()));
605

    
606
        // `daysforlastpub`,
607
        buff.append(getNumericField(""));
608

    
609
        // `delayedpubs`,
610
        buff.append(getNumericField(""));
611

    
612
        //call identifier
613
        buff.append(getStringField(metadata.getCallidentifier().getValue()));
614

    
615
        //code
616
        buff.append(getStringField(metadata.getCode().getValue()));
617

    
618
        return buff.toString();
619
    }
620

    
621
    /*
622
    private static String buildPerson(Oaf oaf, String DELIM, String ENCLOSING) {
623
        StringBuilder buff = new StringBuilder();
624

    
625
        PersonProtos.Person.Metadata metadata = oaf.getEntity().getPerson().getMetadata();
626

    
627
        // `person_id`,
628
        //buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
629

    
630

    
631
        // person_result
632
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
633

    
634

    
635
        //firstName
636
        buff.append(getStringField(metadata.getFirstname().getValue(), DELIM, ENCLOSING));
637

    
638
        //LastName
639

    
640
        String secondName = new String();
641
        for (StringField s : metadata.getSecondnamesList()) {
642
            secondName += s.getValue().replace("\n", " ").replace(",", " ") + " ";
643
        }
644

    
645
        buff.append(getStringField(secondName, DELIM, ENCLOSING));
646

    
647
        // `fullname`,
648
        buff.append(getStringField(metadata.getFullname().getValue(), DELIM, ENCLOSING));
649

    
650
        // `Nationality`,
651
        buff.append(getStringField(metadata.getNationality().getClassid(), DELIM, ENCLOSING));
652

    
653
        // `Email`,
654
        buff.append(getStringField(metadata.getEmail().getValue(), DELIM, ENCLOSING));
655

    
656
        // `Phone`,
657
        buff.append(getStringField(metadata.getPhone().getValue(), DELIM, ENCLOSING));
658

    
659
        //deletedByInference
660

    
661
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
662

    
663
        // `number`,
664
        buff.append(getStringField("1", DELIM, ENCLOSING));
665

    
666
        return buff.toString();
667

    
668
    }
669
    */
670

    
671

    
672
    private String getYearDifferenceInteger(String enddate, String startdate) {
673

    
674
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
675

    
676
            String[] split = startdate.split("-");
677

    
678
            if (split == null || split.length == 0) {
679
                return ENCLOSING + "0" + ENCLOSING + DELIM;
680
            }
681

    
682
            int Startdate = Integer.parseInt(split[0]);
683

    
684
            split = enddate.split("-");
685

    
686
            if (split == null || split.length == 0) {
687
                return ENCLOSING + "0" + ENCLOSING + DELIM;
688
            }
689

    
690
            int Enddate = Integer.parseInt(split[0]);
691

    
692
            int diff = Enddate - Startdate;
693

    
694
            return ENCLOSING + diff + ENCLOSING + DELIM;
695

    
696
        }
697

    
698
        return ENCLOSING + "0" + ENCLOSING + DELIM;
699
    }
700

    
701
    private String getYearInt(String data) {
702
        if (data == null || data.isEmpty() || data.equals("-1")) {
703
            return ENCLOSING + "0" + ENCLOSING + DELIM;
704
        }
705

    
706
        String[] split = data.split("-");
707

    
708
        if (split == null || split.length == 0) {
709
            return ENCLOSING + "0" + ENCLOSING + DELIM;
710
        }
711

    
712
        String year = split[0];
713

    
714
        year = cleanNumber(year);
715

    
716
        if (year == null || year.isEmpty()) year = "0";
717

    
718
        return ENCLOSING + year + ENCLOSING + DELIM;
719

    
720
    }
721

    
722
    private String cleanNumber(String number) {
723
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
724
        return number;
725
    }
726

    
727
    private String getLatLongField(String data) {
728

    
729
        if (data == null || data.isEmpty())
730
            return ENCLOSING + "null" + ENCLOSING + DELIM;
731

    
732
        return ENCLOSING + data.replaceAll("[^-0-9.]+", "")  + ENCLOSING + DELIM;
733

    
734
    }
735

    
736
    private String getStringField(String data) {
737

    
738
        if (data == null || data.isEmpty())
739
            return ENCLOSING + "null" + ENCLOSING + DELIM;
740

    
741
        return ENCLOSING + clean(data) + ENCLOSING + DELIM;
742

    
743
    }
744

    
745
    private String getStringDateField(String data) {
746
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
747
            return ENCLOSING + "0" + ENCLOSING + DELIM;
748
        } else {
749
            data = data.replace(DELIM, " ");
750
            data = data.replace(ENCLOSING, " ");
751
            data = data.replaceAll("\\r\\n|\\r|\\n", "");
752
            try {
753
                DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
754
                data = format.format(format.parse(data));
755
                return ENCLOSING + data + ENCLOSING + DELIM;
756
            } catch (ParseException e) {
757
                return ENCLOSING + "0" + ENCLOSING + DELIM;
758
            }
759
        }
760
    }
761

    
762
    private String getNumericField(String data) {
763
        if (data == null || data.isEmpty() || data.equals("")) {
764
            return ENCLOSING + "0" + ENCLOSING + DELIM;
765
        } else {
766
            return ENCLOSING + data + ENCLOSING + DELIM;
767
        }
768
    }
769

    
770
    public String getId(Oaf oaf) {
771
        switch (oaf.getKind()) {
772
            case entity:
773
                return cleanId(oaf.getEntity().getId());
774
            case relation:
775
                return cleanId(oaf.getRel().getSource());
776

    
777
        }
778
        return null;
779

    
780
    }
781

    
782
    public String getId(OafRel relOaf) {
783
        return cleanId(relOaf.getSource());
784
    }
785

    
786
    public String clean(String value) {
787
        if (value != null) {
788

    
789
            value = value.replaceAll("[\"\\r\\\\;]", "");
790
            value = value.replace(DELIM, " ");
791
            value = value.replace(ENCLOSING, " ");
792
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
793
            //value = value.replace("\"", "");
794
            //value = value.replace("'", "");
795
            //value = value.replace("«", " ");
796
            //value = value.replace("»", " ");
797

    
798
            //value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " ");
799

    
800
            return value;
801
        } else {
802
            return "";
803
        }
804

    
805
    }
806

    
807
    public String cleanId(String value) {
808
        if (value != null) {
809
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
810

    
811
            // to datacite____:: )
812
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
813
            value = value.replaceFirst(".*\\|", "");
814
            value = value.replace("\n", "");
815
            value = value.replace(ENCLOSING, "");
816
            value = value.replace(DELIM, "");
817
            value = value.replace("\"", "");
818
            value = value.replace("«", " ");
819
            value = value.replace("»", " ");
820
        }
821

    
822
        return ENCLOSING + value + ENCLOSING;
823

    
824
    }
825

    
826

    
827
    public String cleanUrl(String value) {
828
        value = value.replace(DELIM, " ");
829
        value = value.replace(ENCLOSING, " ");
830
        value = value.replace(" ", "");
831
        value = value.replace("\n", "");
832
        return value;
833
    }
834

    
835

    
836
    public long DATEDIFF(String startDate, String endDate) {
837
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
838
        long days = 0l;
839
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
840
        // <startdate>2011-09-01</startdate>
841
        // <enddate>2015-08-31</enddate>
842
        Date dateIni = null;
843
        Date dateFin = null;
844

    
845
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
846
            return 0;
847
        }
848
        try {
849
            dateIni = (Date) format.parse(startDate);
850
            dateFin = (Date) format.parse(endDate);
851
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
852
        } catch (Exception e) {
853

    
854
            return 0;
855
        }
856

    
857
        return days;
858
    }
859
}
(3-3/3)