Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

    
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
5
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
6
import eu.dnetlib.data.proto.FieldTypeProtos;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
8
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
9
import eu.dnetlib.data.proto.OafProtos;
10
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafEntity;
12
import eu.dnetlib.data.proto.OafProtos.OafRel;
13
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
14
import eu.dnetlib.data.proto.PersonProtos;
15
import eu.dnetlib.data.proto.ProjectProtos.Project;
16
import eu.dnetlib.data.proto.ResultProtos;
17
import eu.dnetlib.data.proto.ResultProtos.Result;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import org.apache.log4j.Logger;
20

    
21
import java.text.SimpleDateFormat;
22
import java.util.ArrayList;
23
import java.util.Date;
24
import java.util.List;
25

    
26
/**
27
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
28
 *         for sqoop
29
 */
30
public class Serializer {
31

    
32
    private static String DELIM;
33
    private Logger log = Logger.getLogger(this.getClass());
34
    private String NULL_STRING = null;
35
    private String NULL_NUM = "0";
36

    
37
    private static String ENCLOSED;
38
    private FundingParser fundingParser;
39

    
40

    
41
    public Serializer(String delim, String enclosing) {
42
        this.setDELIM(delim);
43
        this.setENCLOSED(enclosing);
44
        this.fundingParser = new FundingParser(delim, enclosing);
45

    
46
    }
47

    
48
    public String serialize(Oaf oaf) {
49

    
50
        switch (oaf.getKind()) {
51
            case entity:
52
                OafEntity valueEntity = oaf.getEntity();
53

    
54

    
55
                switch (valueEntity.getType()) {
56
                    case datasource:
57

    
58
                        return buildDatasource(valueEntity);
59
                    case organization:
60

    
61
                        return buildOrganization(valueEntity);
62

    
63
                    case project:
64

    
65
                        return buildProject(valueEntity);
66

    
67
                    case result:
68

    
69
                        return buildResult(valueEntity);
70
                    case person:
71

    
72
                        return buildPerson(valueEntity);
73

    
74
                    default:
75
                        log.error("wrong type");
76
                        break;
77
                }
78
                break;
79
            case relation:
80
                OafRel valueRel = oaf.getRel();
81

    
82
                return buildRel(valueRel);
83

    
84
        }
85

    
86
        return null;
87

    
88
    }
89

    
90
    public String serialize(OafRel oaf) {
91

    
92
        switch (oaf.getRelType()) {
93

    
94
            default:
95
                return buildRel(oaf);
96
        }
97

    
98
    }
99

    
100

    
101
    public ArrayList<String> extractRelations(Oaf oaf) {
102
        OafEntity valueEntity = oaf.getEntity();
103
        ArrayList<String> relations = new ArrayList<String>();
104
        switch (valueEntity.getType()) {
105
            case result:
106
                relations.addAll(getResultDatasources(valueEntity));
107
                relations.addAll(getDedups(valueEntity));
108
                return relations;
109
            case datasource:
110
                relations.addAll(getDedups(valueEntity));
111
                return relations;
112
            case person:
113
                relations.addAll(getDedups(valueEntity));
114
                return relations;
115
            case organization:
116
                relations.addAll(getDedups(valueEntity));
117
                return relations;
118
            default:
119
                return relations;
120
        }
121

    
122
    }
123

    
124
    private String buildRel(OafRel Rel) {
125
        return getStringField(Rel.getRelType().name()) + getStringField(Rel.getSource()) + getStringField(Rel.getTarget());
126

    
127
    }
128

    
129
    private String getResultResult(OafRel oaf) {
130

    
131

    
132
        String buff = new String();
133

    
134
        buff += getStringField(oaf.getTarget());
135

    
136
        buff += getStringField(String.valueOf(oaf.getResultResult().getSimilarity().getSimilarity()));
137
      /* •	hasAmongTopNSimilarDocuments: r1 hasAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar, and that we also have r2 isAmongTopNSimilarDocuments of r1.
138
        In OpenAIRE, N so far always equals 20.
139
        •	isAmongTopNSimilarDocuments: r1 isAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar and that r2 hasAmongTopNSimilarDocuments of r1;
140
        •	isRelatedTo: two results are somehow related to each other. OpenAIRE may further refine the semantics of possible types of "relatedness" by adding new classes in the Qualifier.
141
                Scheme "dnet:result_result_relations";
142
oaf. */
143
        return buff;
144
    }
145

    
146
    private String getHeader(OafEntity data) {
147
        String buff = new String();
148

    
149
        //  EntityType
150
        buff += getStringField(data.getType().name());
151
        // OpenaireID
152
        buff += getStringField(getStringDateField(cleanId(data.getId())));
153

    
154
        //  dateOfTransformation
155
        // TODO change to dateOfTransformation here when released
156
        buff += getStringField(data.getDateofcollection());
157
        //    dateOfCollection
158
        buff += getStringField(data.getDateofcollection());
159

    
160
        String originalId = new String();
161
// originalId
162

    
163
        for (String oid : data.getOriginalIdList()) {
164
            originalId += oid + ";";
165
        }
166

    
167
        buff += getStringField(originalId);
168

    
169
        return buff;
170

    
171
    }
172

    
173
    private String buildDatasource(OafEntity data) {
174

    
175

    
176
        Datasource d = data.getDatasource();
177

    
178
        Metadata metadata = d.getMetadata();
179

    
180

    
181
        String buff = getHeader(data);
182

    
183
        //Datasourcetype
184
        if (metadata.hasDatasourcetype()) {
185
            buff += getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""));
186
        } else {
187
            buff += getStringField(null);
188
        }
189

    
190
        //Openairecompatibility
191
        buff += getStringField(metadata.getOpenairecompatibility().getClassname());
192

    
193
        //OfficialName
194
        buff += getStringField(metadata.getOfficialname().getValue());
195

    
196
        //  Englishname
197
        buff += getStringField(metadata.getEnglishname().getValue());
198

    
199
        //Websiteurl
200
        buff += getStringField(metadata.getWebsiteurl().getValue());
201

    
202
        //LogoURL
203
        buff += getStringField(metadata.getLogourl().getValue());
204

    
205
        //Contactemail
206
        buff += getStringField(metadata.getContactemail().getValue());
207
        //Namespaceprefix
208
        buff += getStringField(metadata.getNamespaceprefix().getValue());
209

    
210
        // latitude
211
        buff += getStringField(metadata.getLatitude().getValue());
212

    
213
        // longtitude
214
        buff += getStringField(metadata.getLongitude().getValue());
215

    
216
        // dateofvalidation,
217
        buff += getStringField(metadata.getDateofvalidation().getValue());
218

    
219
        //Description
220
        buff += getStringField(metadata.getDescription().getValue());
221

    
222
        //subjects
223
        String subj = new String();
224
        for (StructuredProperty s : metadata.getSubjectsList()) {
225
            subj += s.getValue() + ';';
226

    
227
        }
228

    
229
        //subjectList
230
        buff += getStringField(subj);
231

    
232

    
233
        //Number of items
234
        buff += getStringField(metadata.getOdnumberofitems().getValue());
235

    
236
        //Date of number of items
237
        buff += getStringField(metadata.getOdnumberofitemsdate().getValue());
238

    
239
        // Policies
240
        buff += getStringField(metadata.getOdpolicies().getValue());
241

    
242
        //languages
243
        String languages = new String();
244
        for (StringField lang : metadata.getOdlanguagesList()) {
245
            languages += lang.getValue() + ";";
246
        }
247
        buff += getStringField(languages);
248

    
249

    
250
        // Content type
251
        String contentType = new String();
252
        for (StringField c : metadata.getOdcontenttypesList()) {
253
            contentType += c.getValue() + ";";
254
        }
255
        buff += getStringField(contentType);
256

    
257
        //Access info package
258
        String accessInfo = new String();
259
        for (StringField c : metadata.getAccessinfopackageList()) {
260
            accessInfo += c.getValue() + ";";
261
        }
262
        buff += getStringField(accessInfo);
263

    
264
        //Release start date
265
        buff += getStringField(metadata.getReleasestartdate().getValue());
266

    
267

    
268
        //Release end date
269
        buff += getStringField(metadata.getReleaseenddate().getValue());
270

    
271
        //Mission statement url
272
        buff += getStringField(metadata.getMissionstatementurl().getValue());
273

    
274
        //Data provider
275
        buff += getStringField(String.valueOf(metadata.getDataprovider().getValue()));
276

    
277
        //Service provider
278
        buff += getStringField(String.valueOf(metadata.getServiceprovider().getValue()));
279

    
280
        //Database access type
281
        buff += getStringField(metadata.getDatabaseaccessrestriction().getValue());
282

    
283
        //Data upload type
284
        buff += getStringField(metadata.getDatauploadtype().getValue());
285

    
286
        //Data upload restrictions
287
        buff += getStringField(metadata.getDatauploadrestriction().getValue());
288

    
289

    
290
        //Versioning
291
        buff += getStringField(String.valueOf(metadata.getVersioning().getValue()));
292

    
293
        //Citation guideline url
294
        buff += getStringField(String.valueOf(metadata.getCitationguidelineurl().getValue()));
295

    
296
        //Quality management kind
297
        buff += getStringField(String.valueOf(metadata.getQualitymanagementkind().getValue()));
298

    
299
        //PID systems
300
        buff += getStringField(metadata.getPidsystems().getValue());
301

    
302
        //Certificates
303
        buff += getStringField(metadata.getCertificates().getValue());
304

    
305
        //Policies
306
        String policies = new String();
307
        for (FieldTypeProtos.KeyValue property : metadata.getPoliciesList()) {
308
            policies += property.getValue() + ";";
309
        }
310
        buff += getStringField(policies);
311

    
312
        String trust = " ";
313
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
314
            trust = getStringDateField(info.getTrust());
315
            break;
316
        }
317
        buff += getStringField(trust);
318

    
319

    
320
        return buff;
321

    
322
    }
323

    
324

    
325
    private String buildOrganization(OafEntity data) {
326

    
327

    
328
        Organization organization = data.getOrganization();
329
        eu.dnetlib.data.proto.OrganizationProtos.Organization.Metadata metadata = organization.getMetadata();
330

    
331
        String buff = getHeader(data);
332

    
333
        //getLegalshortname
334
        buff += getStringField(metadata.getLegalshortname().getValue());
335
        // `name`,
336
        buff += getStringField(metadata.getLegalname().getValue());
337
        //website URL
338
        buff += getStringField(metadata.getWebsiteurl().getValue());
339
        //logourl
340
        buff += getStringField(metadata.getLogourl().getValue());
341
        // `country`,
342
        buff += getStringField(metadata.getCountry().getClassname());
343

    
344
        String trust = " ";
345
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
346
            trust = getStringDateField(info.getTrust());
347
            break;
348
        }
349
        buff += getStringField(trust);
350

    
351
        return buff;
352

    
353
    }
354

    
355
    private String buildResult(OafEntity data) {
356

    
357
        Result result = data.getResult();
358
        eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = result.getMetadata();
359

    
360

    
361
        String buff = getHeader(data);
362

    
363

    
364
        //   titleString
365
        String titleString = new String();
366
        String alternativeTitles = new String();
367

    
368
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
369
            StructuredProperty title = metadata.getTitleList().get(i);
370
            if (i == 0) {
371
                titleString = title.getValue().replaceAll("\\s+", " ");
372
                titleString = titleString.replaceAll("\n", " ");
373
            } else {
374
                alternativeTitles += title.getValue().replaceAll("\\s+", " ") + " ; ";
375
                alternativeTitles = alternativeTitles.replaceAll("\n", " ");
376
            }
377
            break;
378
        }
379

    
380
        //  pubtitle
381
        buff += getStringField(titleString);
382
        // date of acceptance CHANGED THIS TO DATE FORMAT
383
        buff += getStringDateField(metadata.getDateofacceptance().getValue());
384

    
385
        // publisher
386
        buff += getStringField(metadata.getPublisher().getValue());
387

    
388

    
389
        //PID
390
        String pids = new String();
391
        for (StructuredProperty p : data.getPidList()) {
392
            pids += p.getValue() + ";";
393
        }
394
        buff += getStringField(pids);
395

    
396
        //language
397
        buff += getStringField(metadata.getLanguage().getClassname());
398

    
399
        // RelevantDate
400
        String reldate = new String();
401

    
402
        for (StructuredProperty p : metadata.getRelevantdateList()) {
403
            reldate += p.getValue();
404
            break;
405
        }
406

    
407
        buff += getStringField(reldate);
408

    
409
        //Subject
410
        String subjects = new String();
411
        for (StructuredProperty subj : metadata.getSubjectList()) {
412

    
413
            if (isValidTopic(subj.getValue())) {
414
                if (!isNumeric(subj.getValue())) {
415
                    subjects += subj.getValue() + ";";
416
                }
417
            }
418
        }
419

    
420
        buff += subjects;
421

    
422

    
423
        //TODO ExternalReference
424

    
425
        buff += getStringField(" ");
426

    
427
        //Source
428
        String source = new String();
429
        for (StringField s : metadata.getSourceList()) {
430
            source += s.getValue() + ";";
431
        }
432

    
433
        buff += getStringField(source);
434

    
435

    
436
        //TODO Format     
437
        buff += getStringField("");
438
        //DOES NOT EXIST
439
          /*String formatString = new String();
440
        for (StringField format : metadata.getFormatList()) {
441
            formatString = format.getValue();
442
            break;}
443
               buff += getStringField(formatString);
444
        }*/
445
        //Context
446
        String context = new String();
447
        for (Result.Context s : metadata.getContextList()) {
448
            context += s.getId() + ";";
449
        }
450
        buff += getStringField(context);
451

    
452

    
453
        //country TODO does not exist; throws error
454
        String country = new String();
455
        // for (Qualifier s : metadata.getCountryList()) {
456
        //  country += s.getClassname() + ";";
457
        // }
458

    
459
        buff += getStringField(country);
460

    
461

    
462
        //Best License
463
        buff += getStringField(getBestLicense(result));
464
        //Description
465
        String description = new String();
466

    
467
        for (StringField desc : metadata.getDescriptionList()) {
468
            description += desc;
469
            break;
470
        }
471
        buff += getStringField(description);
472
        //Journal  
473
        buff += getStringField(metadata.getJournal().getName().replaceAll("\n", " "));  //#null#!
474

    
475

    
476
        // TODO ERI SOS : HERE IN GET JOUTNAL. GET DATA INFO I CAN FIND PROVENANCE AND SIMILARITY
477

    
478

    
479
        // TODO isRelatedTo
480

    
481
        //   resource type
482
        buff += getStringField(metadata.getResourcetype().getClassname());
483
        //   device
484
        buff += getStringField(metadata.getDevice().getValue());
485
        //   size
486
        buff += getStringField(metadata.getSize().getValue());
487
        //     version
488
        buff += getStringField(metadata.getVersion().getValue());
489

    
490
        //   metadata update
491
        buff += getStringField(metadata.getLastmetadataupdate().getValue());
492
        //   metadata version
493
        buff += getStringField(metadata.getMetadataversionnumber().getValue());
494

    
495

    
496
        // year
497
        buff += getYearInt(metadata.getDateofacceptance().getValue());
498

    
499
        // type
500
        buff += getStringField(metadata.getResulttype().getClassname());
501

    
502
/*
503
        //classification
504
		String classification = new String();
505
		for (Instance instance : (result.getInstanceList())) {
506

    
507
			if (instance.getInstancetype().getClassname() != null && !instance.getInstancetype().getClassname().isEmpty()) {
508
				classification += instance.getInstancetype().getClassname() + ';';
509
			}
510
		}
511

    
512
		buff += getStringField(classification);*/
513

    
514
        String trust = " ";
515
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
516
            trust = getStringDateField(info.getTrust());
517
            break;
518
        }
519

    
520
        buff += getStringField(trust);
521

    
522
        return buff;
523
    }
524

    
525

    
526
    private String buildProject(OafEntity data) {
527

    
528

    
529
        Project project = data.getProject();
530
        eu.dnetlib.data.proto.ProjectProtos.Project.Metadata metadata = project.getMetadata();
531

    
532

    
533
        String buff = getHeader(data);
534

    
535

    
536
        //Code
537
        buff += getStringField(metadata.getCode().getValue());
538
        // `Websiteurl`,
539
        buff += getStringField(metadata.getWebsiteurl().getValue());
540

    
541

    
542
        // `Acronym`,
543
        buff += getStringField(metadata.getAcronym().getValue());
544

    
545
        //Title
546
        String title = getStringField(metadata.getTitle().getValue());
547
        buff += getStringField(title);
548

    
549
        // Startdate
550
        buff += getNumericField(metadata.getStartdate().getValue());
551

    
552
        // Enddate
553
        buff += getNumericField(metadata.getEnddate().getValue());
554

    
555
        //`Call identifer`
556
        buff += getStringField(metadata.getCallidentifier().getValue());
557

    
558
        //`KeyWords`
559
        buff += getStringField(metadata.getKeywords().getValue());
560

    
561
        //`Duration`
562
        buff += getStringField(metadata.getDuration().getValue());
563

    
564
        //esc39
565
        buff += getStringField(metadata.getEcsc39().getValue().toString());
566

    
567
        //`Contracttype`
568
        buff += getStringField(metadata.getContracttype().getClassname());
569

    
570
        //`OA mandate pubs`  TODO DOES NOT EXIST
571
        buff += getStringField(metadata.getOamandatepublications().getValue());
572
        // buff += getStringField("");
573

    
574
        //`Subjects` TODO DOES NOT EXIST EITHER
575
        String subjects = new String();
576
        for (StructuredProperty s : metadata.getSubjectsList()) {
577

    
578
            subjects += s.getValue() + ';';
579
        }
580
        buff += getStringField(subjects);
581

    
582

    
583
        //`EC293`
584
        buff += getStringField(metadata.getEcarticle293().getValue());
585

    
586

    
587
        List<StringField> fundList = metadata.getFundingtreeList();
588

    
589

    
590
        if (!fundList.isEmpty()) // `funding_lvl0`,
591
        {
592
            //TODO funder + 3 funding levels
593
           /* funder text,
594
            funding_lvl0 text,
595
	        funding_lvl1 text,
596
	        funding_lvl2 text,
597
	        funding_lvl3 text,*/
598
            buff += this.fundingParser.getFundingInfo(fundList.get(0).getValue());
599

    
600
        } else {
601

    
602
            buff += this.fundingParser.getFundingInfo("");
603
        }
604

    
605

    
606
        String trust = " ";
607
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
608
            trust = getStringDateField(info.getTrust());
609
            break;
610
        }
611
        buff += getStringField(trust);
612

    
613

    
614
        return buff;
615

    
616
    }
617

    
618

    
619
    private String buildPerson(OafEntity data) {
620

    
621
        PersonProtos.Person person = data.getPerson();
622
        eu.dnetlib.data.proto.PersonProtos.Person.Metadata metadata = person.getMetadata();
623

    
624
        String buff = getHeader(data);
625

    
626

    
627
        // `firstname`,
628
        buff += metadata.getFirstname();
629

    
630
        // `secondNames`,
631

    
632
        String secondNames = new String();
633
        for (StringField s : metadata.getSecondnamesList()) {
634

    
635
            secondNames += s.getValue() + ' ';
636
        }
637

    
638
        buff += getStringField(secondNames);
639

    
640
        // `fullname`,
641
        buff += getStringField(metadata.getFullname().getValue());
642
        // `Fax`,
643
        buff += getStringField(metadata.getFax().getValue());
644
        // `Email`,
645
        buff += getStringField(metadata.getEmail().getValue());
646
        // `Phone`,
647
        buff += getStringField(metadata.getPhone().getValue());
648

    
649
        // `Nationality`,
650
        buff += getStringField(metadata.getNationality().getClassname());
651

    
652
        // `PIDS`,
653
        String pids = new String();
654
        for (StructuredProperty s : data.getPidList()) {
655

    
656
            pids += s.getValue() + ";";
657

    
658

    
659
        }
660
        buff += getStringField(pids);
661

    
662
		/*// `collected from`,
663
        String collectedFrom = new String();
664
		for (FieldTypeProtos.KeyValue s : data.getCollectedfromList()) {
665
			collectedFrom += s.getValue() + ";";
666
		}
667
		buff += getStringField(collectedFrom);*/
668

    
669
        String trust = " ";
670
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
671
            trust = getStringDateField(info.getTrust());
672
            break;
673
        }
674
        buff += getStringField(trust);
675

    
676

    
677
        return buff;
678

    
679
    }
680

    
681

    
682
    private ArrayList<String> getResultDatasources(OafEntity valueEntity) {
683

    
684
        ArrayList<String> buffs = new ArrayList<String>();
685
        Result result = valueEntity.getResult();
686

    
687
        //TODO hosted by
688
        for (Instance instance : (result.getInstanceList())) {
689
            String hostedBy = instance.getHostedby().getKey();
690

    
691
            if (hostedBy != null && !hostedBy.isEmpty()) {
692
                buffs.add(getStringField("resultDatasource") + getStringField(hostedBy));
693
            }
694
        }
695

    
696
        //TODO  collected froms
697
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
698
            String collectedFrom = collectedFromValue.getKey();
699
            if (collectedFrom != null && !collectedFrom.isEmpty())
700
                buffs.add((getStringField("resultDatasource") + getStringField(collectedFrom)));
701

    
702
        }
703
        return buffs;
704

    
705
    }
706

    
707

    
708
    public String cleanId(String value) {
709
        if (value != null) {
710
            //   DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
711

    
712

    
713
            // to datacite____:: )
714
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
715
            value = value.replaceFirst(".*\\|", "");
716
            value = value.replaceAll("\n", "");
717
            value = value.replaceAll(DELIM, "");
718
            value = value.replaceAll(ENCLOSED, "");
719
            value = value.trim();
720

    
721
        }
722
        if (value == null) {
723
            return null;
724
        }
725
        return ENCLOSED + value + ENCLOSED;
726

    
727
    }
728

    
729

    
730
    private ArrayList<String> getDedups(OafEntity valueEntity) {
731

    
732
        ArrayList<String> buffs = new ArrayList<String>();
733

    
734

    
735
        if (!valueEntity.getChildrenList().isEmpty()) {
736

    
737
            String buff = getStringField("Dedup") + getStringField(cleanId(valueEntity.getId()));
738

    
739
            for (OafEntity child : valueEntity.getChildrenList()) {
740
                {
741
                    if (child.getType() == valueEntity.getType()) { // if it is result, then its the deduplication
742
                        buff += getStringField(cleanId(child.getId()));
743
                    }
744
                }
745
            }
746

    
747
            buffs.add(buff);
748
        }
749

    
750
        return buffs;
751

    
752
    }
753

    
754

    
755
    private String getNumericField(String data) {
756
        if (data == null || data.isEmpty() || data.equals("")) {
757
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
758
        } else {
759

    
760
            return ENCLOSED + data + ENCLOSED + DELIM;
761
        }
762
    }
763

    
764

    
765
    private String getYearDifferenceInteger(String enddate, String startdate) {
766

    
767
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
768

    
769
            String[] split = startdate.split("-");
770

    
771
            if (split == null || split.length == 0) {
772
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
773
            }
774

    
775
            int Startdate = Integer.parseInt(split[0]);
776

    
777
            split = enddate.split("-");
778

    
779
            if (split == null || split.length == 0) {
780
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
781
            }
782

    
783
            int Enddate = Integer.parseInt(split[0]);
784

    
785
            int diff = Enddate - Startdate;
786

    
787
            return ENCLOSED + diff + ENCLOSED + DELIM;
788

    
789
        }
790

    
791
        return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
792
    }
793

    
794
    private String getYearInt(String data) {
795
        if (data == null || data.isEmpty() || data.equals("-1")) {
796
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
797
        }
798

    
799
        String[] split = data.split("-");
800

    
801
        if (split == null || split.length == 0) {
802
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
803
        }
804

    
805
        String year = split[0];
806

    
807
        year = cleanNumber(year);
808

    
809
        return ENCLOSED + year + ENCLOSED + DELIM;
810

    
811

    
812
    }
813

    
814
    private static String cleanNumber(String number) {
815
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
816

    
817
        return number;
818
    }
819

    
820

    
821
    private String getStringField(String data) {
822

    
823
        if (data == null || data.isEmpty() || data.equals("")) {
824

    
825
            return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
826
        } else {
827

    
828
            String field = clean(data);
829
            if (field == null) {
830
                return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
831
            } else {
832
                return field + DELIM;
833
            }
834
        }
835
    }
836

    
837
    private String getStringDateField(String data) {
838

    
839
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
840

    
841
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
842
        } else {
843

    
844
            String field = clean(data);
845
            if (field == null) {
846
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
847
            } else {
848
                return field + DELIM;
849
            }
850
        }
851
    }
852

    
853

    
854
    public String getId(OafProtos.Oaf oaf) {
855
        switch (oaf.getKind()) {
856
            case entity:
857

    
858
                return cleanId(oaf.getEntity().getId());
859
            case relation:
860

    
861
                return cleanId(oaf.getRel().getSource());
862

    
863
        }
864
        return null;
865

    
866
    }
867

    
868
    private boolean isNumeric(String str) {
869

    
870
        str = str.replaceAll("[^A-Za-z0-9 ]", "");
871
        str = str.replaceAll(" ", "");
872
        return str.matches("-?\\d+(\\.\\d+)?"); // match a number with optional
873
        // '-' and decimal.
874
    }
875

    
876
    //   there are topics with "null" as value -> replace them
877
    private boolean isValidTopic(String t) {
878

    
879
        if (t == null || t.isEmpty()) {
880
            return false;
881
        }
882

    
883
        if (t.equals("") || t.equals(" ")) {
884
            return false;
885
        }
886
        if (t.equals("null") || t.equals("Null") || t.equals("NULL")) {
887
            return false;
888
        }
889

    
890
        if (t.equals(ENCLOSED + ENCLOSED + DELIM) || t.equals(ENCLOSED + NULL_STRING + ENCLOSED + DELIM)) {
891
            return false;
892
        }
893
        // skip dedups
894
        if (t.contains("ddc:")) {
895

    
896
            return false;
897
        }
898
        return true;
899
    }
900

    
901

    
902
    private String getBestLicense(ResultProtos.Result result) {
903
        FieldTypeProtos.Qualifier bestLicense = null;
904
        LicenseComparator lc = new LicenseComparator();
905
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
906
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
907
                bestLicense = instance.getLicence();
908
            }
909
        }
910
        if (bestLicense != null) {
911
            return bestLicense.getClassname();
912
        } else {
913
            return null;
914
        }
915
    }
916

    
917
    //   here iterate over all values
918
    private String getAccessMode(ResultProtos.Result result) {
919
        String accessMode = null;
920
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
921
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
922
                accessMode = instance.getLicence().getClassname();
923
                break;
924
            }
925

    
926
        }
927

    
928

    
929
        return accessMode;
930
    }
931

    
932

    
933
    private String clean(String value) {
934
        if (value != null) {
935
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
936
            // to datacite____:: )
937
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
938
            value = value.replaceFirst(".*\\|", "");
939
            value = value.replaceAll(DELIM, "");
940
            value = value.replaceAll(",", "");
941
            value = value.replaceAll("\"", "");
942
            value = value.replaceAll("'", "");
943
            value = value.replaceAll(ENCLOSED, "");
944
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
945
            value = value.replaceAll("\\s+", " ");
946
            value = value.replaceAll("(\\r|\\n)", " ");
947
            value = value.replaceAll("\\t", " ");
948

    
949
            // value = value.replaceAll("[^A-Za-z0-9:,____-;:]", " ");
950
            value = value.trim();
951

    
952
        }
953
        if (value == null) {
954
            return null;
955
        }
956
        return ENCLOSED + value + ENCLOSED;
957

    
958
    }
959

    
960

    
961
    public long DATEDIFF(String startDate, String endDate) {
962
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
963
        long days = 0l;
964
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
965
        // <startdate>2011-09-01</startdate>
966
        // <enddate>2015-08-31</enddate>
967
        Date dateIni = null;
968
        Date dateFin = null;
969

    
970
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
971
            return 0;
972
        }
973
        try {
974
            dateIni = (Date) format.parse(startDate);
975
            dateFin = (Date) format.parse(endDate);
976
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
977
        } catch (Exception e) {
978
            log.error(e.toString());
979
            return 0;
980
        }
981

    
982
        return days;
983
    }
984

    
985
    public String getDELIM() {
986
        return DELIM;
987
    }
988

    
989
    public void setDELIM(String dELIM) {
990
        DELIM = dELIM;
991
    }
992

    
993
    public String getNULL_STRING() {
994
        return NULL_STRING;
995
    }
996

    
997
    public void setNULL_STRING(String nULL_STRING) {
998
        NULL_STRING = nULL_STRING;
999
    }
1000

    
1001
    public String getNULL_NUM() {
1002
        return NULL_NUM;
1003
    }
1004

    
1005
    public void setNULL_NUM(String nULL_NUM) {
1006
        NULL_NUM = nULL_NUM;
1007
    }
1008

    
1009
    public String getENCLOSED() {
1010
        return ENCLOSED;
1011
    }
1012

    
1013
    public void setENCLOSED(String eNCLOSED) {
1014
        ENCLOSED = eNCLOSED;
1015
    }
1016

    
1017

    
1018
}
(4-4/5)