Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

    
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
5
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
6
import eu.dnetlib.data.proto.FieldTypeProtos;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
8
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
9
import eu.dnetlib.data.proto.OafProtos;
10
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafEntity;
12
import eu.dnetlib.data.proto.OafProtos.OafRel;
13
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
14
import eu.dnetlib.data.proto.PersonProtos;
15
import eu.dnetlib.data.proto.ProjectProtos.Project;
16
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
17
import eu.dnetlib.data.proto.ResultProtos;
18
import eu.dnetlib.data.proto.ResultProtos.Result;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20
import org.apache.log4j.Logger;
21

    
22
import java.text.SimpleDateFormat;
23
import java.util.Date;
24
import java.util.List;
25

    
26
/**
27
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
28
 *         for sqoop
29
 */
30
public class Serializer {
31

    
32
    private static String DELIM;
33
    private Logger log = Logger.getLogger(this.getClass());
34
    private String NULL_STRING = null;
35
    private String NULL_NUM = "0";
36

    
37
    private static String ENCLOSED;
38
    private FundingParser fundingParser = new FundingParser();
39

    
40

    
41
    public Serializer() {
42
    }
43

    
44
    public String serialize(Oaf oaf) {
45

    
46
        switch (oaf.getKind()) {
47
            case entity:
48
                OafEntity valueEntity = oaf.getEntity();
49

    
50
                switch (valueEntity.getType()) {
51
                    case datasource:
52

    
53
                        return buildDatasource(valueEntity);
54

    
55
                    case organization:
56

    
57
                        return buildOrganization(valueEntity);
58

    
59
                    case project:
60

    
61
                        return buildProject(valueEntity);
62
                    case result:
63

    
64
                        return buildResult(valueEntity);
65
                    case person:
66
                        return buildPerson(valueEntity);
67
                    default:
68
                        log.error("wrong type");
69
                        break;
70
                }
71
                break;
72
            case relation:
73
                OafRel valueRel = oaf.getRel();
74

    
75
                return buildRel(valueRel);
76

    
77
        }
78

    
79
        return null;
80

    
81
    }
82

    
83
    public String serialize(OafRel oaf) {
84

    
85
        switch (oaf.getRelType()) {
86
            case resultProject:
87

    
88
                return getResultProject(oaf);
89
            case resultResult:
90
                return getResultResult(oaf);
91

    
92
            default:
93
                return buildRel(oaf);
94
        }
95
    }
96

    
97
    private String getResultResult(OafRel oaf) {
98

    
99

    
100
        String buff = new String();
101

    
102
        buff += getStringField(oaf.getTarget());
103

    
104
        buff += getStringField(String.valueOf(oaf.getResultResult().getSimilarity().getSimilarity()));
105

    
106
      /* •	hasAmongTopNSimilarDocuments: r1 hasAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar, and that we also have r2 isAmongTopNSimilarDocuments of r1.
107
        In OpenAIRE, N so far always equals 20.
108
        •	isAmongTopNSimilarDocuments: r1 isAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar and that r2 hasAmongTopNSimilarDocuments of r1;
109
        •	isRelatedTo: two results are somehow related to each other. OpenAIRE may further refine the semantics of possible types of "relatedness" by adding new classes in the Qualifier.
110
                Scheme "dnet:result_result_relations";
111
oaf. */
112

    
113
        return buff;
114
    }
115

    
116
    private String buildRel(OafRel Rel) {
117
        return getStringField(Rel.getTarget());
118
    }
119

    
120

    
121
    private String getResultProject(OafRel oaf) {
122
        String buff = new String();
123
        String result = oaf.getTarget();
124

    
125
        buff += getStringField(result);
126
        //   is declared as int!!!
127
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
128
        if (diff < 0) {
129
            diff = 0;
130
        }
131
        buff += getNumericField(String.valueOf(diff));
132

    
133
        return buff;
134
    }
135

    
136

    
137
    private String buildDatasource(OafEntity data) {
138

    
139
        String buff = new String();
140

    
141
        Datasource d = data.getDatasource();
142

    
143
        Metadata metadata = d.getMetadata();
144

    
145
        //`original Id`
146
        String originalId = new String();
147

    
148
        for (String oid : data.getOriginalIdList()) {
149
            originalId += oid + ";";
150
        }
151

    
152
        buff += getStringField(originalId);
153

    
154
        //dateOfCollection
155
        buff += getStringDateField(data.getDateofcollection());
156

    
157
        //Provider Type
158
        if (metadata.hasDatasourcetype()) {
159
            buff += getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""));
160
        } else {
161
            buff += getStringField(null);
162
        }
163

    
164
        //Compatibility
165
        buff += getStringField(metadata.getOpenairecompatibility().getClassname());
166

    
167
        //Official Name
168
        buff += getStringField(metadata.getOfficialname().getValue());
169

    
170
        // English Name
171
        buff += getStringField(metadata.getEnglishname().getValue());
172

    
173
        //Website URL
174
        buff += getStringField(metadata.getWebsiteurl().getValue());
175

    
176
        //LogoURL
177
        buff += getStringField(metadata.getLogourl().getValue());
178

    
179
        //Email
180
        buff += getStringField(metadata.getContactemail().getValue());
181

    
182
        //Namespaceprefix
183
        buff += getStringField(metadata.getNamespaceprefix().getValue());
184

    
185
        // latitude
186
        buff += getStringField(metadata.getLatitude().getValue());
187

    
188
        // longtitude
189
        buff += getStringField(metadata.getLongitude().getValue());
190

    
191
        // dateofvalidation,
192
        buff += getStringField(metadata.getDateofvalidation().getValue());
193

    
194
        //Description
195
        buff += getStringField(metadata.getDescription().getValue());
196

    
197
        //subjects
198
        String subj = new String();
199
        for (StructuredProperty s : metadata.getSubjectsList()) {
200
            subj += s.getValue() + ';';
201

    
202
        }
203

    
204
        //subjects
205
        buff += getStringField(subj);
206

    
207
        //Number of items
208
        buff += getStringField(metadata.getOdnumberofitems().getValue());
209

    
210
        //Date of number of items
211
        buff += getStringField(metadata.getOdnumberofitemsdate().getValue());
212

    
213
        // Policies
214
        buff += getStringField(metadata.getOdpolicies().getValue());
215

    
216
        //languages
217
        String languages = new String();
218
        for (StringField lang : metadata.getOdlanguagesList()) {
219
            languages += lang.getValue() + ";";
220
        }
221
        buff += getStringField(languages);
222

    
223

    
224
        // Content type
225
        String contentType = new String();
226
        for (StringField c : metadata.getOdcontenttypesList()) {
227
            contentType += c.getValue() + ";";
228
        }
229
        buff += getStringField(contentType);
230

    
231
        //Access info package
232
        String accessInfo = new String();
233
        for (StringField c : metadata.getAccessinfopackageList()) {
234
            accessInfo += c.getValue() + ";";
235
        }
236
        buff += getStringField(accessInfo);
237

    
238
        //Release start date
239
        buff += getStringField(metadata.getReleasestartdate().getValue());
240

    
241
        //Release end date
242
        buff += getStringField(metadata.getReleaseenddate().getValue());
243

    
244
        //Mission statement url
245
        buff += getStringField(metadata.getMissionstatementurl().getValue());
246

    
247
        //Data provider
248
        buff += getStringField(String.valueOf(metadata.getDataprovider().getValue()));
249

    
250
        //Service provider
251
        buff += getStringField(String.valueOf(metadata.getServiceprovider().getValue()));
252

    
253
        //Database access type
254
        buff += getStringField(metadata.getDatabaseaccessrestriction().getValue());
255

    
256
        //Data upload type
257
        buff += getStringField(metadata.getDatauploadtype().getValue());
258

    
259
        //Data upload restrictions
260
        buff += getStringField(metadata.getDatauploadrestriction().getValue());
261

    
262
        //Versioning
263
        buff += getStringField(String.valueOf(metadata.getVersioning().getValue()));
264

    
265
        //Citation guideline url
266
        buff += getStringField(String.valueOf(metadata.getCitationguidelineurl().getValue()));
267

    
268
        //Quality management kind
269
        buff += getStringField(String.valueOf(metadata.getQualitymanagementkind().getValue()));
270

    
271
        //PID systems
272
        buff += getStringField(metadata.getPidsystems().getValue());
273

    
274
        //Certificates
275
        buff += getStringField(metadata.getCertificates().getValue());
276

    
277
        //Policies
278
        String policies = new String();
279
        for (FieldTypeProtos.KeyValue property : metadata.getPoliciesList()) {
280
            policies += property.getValue() + ";";
281
        }
282
        buff += getStringField(policies);
283

    
284
        return buff;
285
    }
286

    
287

    
288
    private String buildOrganization(OafEntity data) {
289

    
290
        String buff = new String();
291

    
292
        Organization organization = data.getOrganization();
293
        eu.dnetlib.data.proto.OrganizationProtos.Organization.Metadata metadata = organization.getMetadata();
294

    
295
        //`original Id`
296
        String originalId = new String();
297
        for (String oid : data.getOriginalIdList()) {
298
            originalId += oid + ";";
299
        }
300

    
301

    
302
        buff += getStringField(originalId);
303
        //dateOfCollection
304
        buff += getStringDateField(data.getDateofcollection());
305

    
306

    
307
        //getLegalshortname
308
        buff += getStringField(metadata.getLegalshortname().getValue());
309
        // `name`,
310
        buff += getStringField(metadata.getLegalname().getValue());
311
        //website URL
312
        buff += getStringField(metadata.getWebsiteurl().getValue());
313
        //logourl
314
        buff += getStringField(metadata.getLogourl().getValue());
315
        // `country`,
316
        buff += getStringField(metadata.getCountry().getClassname());
317

    
318

    
319
        return buff;
320

    
321
    }
322

    
323
    private String buildResult(OafEntity data) {
324
        String buff = new String();
325

    
326
        Result result = data.getResult();
327
        eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = result.getMetadata();
328

    
329
        //`original Id`
330
        String originalId = new String();
331

    
332
        for (String oid : data.getOriginalIdList()) {
333
            originalId += oid + ";";
334
        }
335

    
336
        buff += getStringField(originalId);
337

    
338
        //dateOfCollection
339
        buff += getStringDateField(data.getDateofcollection());
340
        //   titleString
341
        String titleString = new String();
342
        String alternativeTitles = new String();
343

    
344
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
345
            StructuredProperty title = metadata.getTitleList().get(i);
346
            if (i == 0) {
347
                titleString = title.getValue().replaceAll("\\s+", " ");
348
                titleString = titleString.replaceAll("\n", " ");
349
            } else {
350
                alternativeTitles += title.getValue().replaceAll("\\s+", " ") + " ; ";
351
                alternativeTitles = alternativeTitles.replaceAll("\n", " ");
352
            }
353
            break;
354
        }
355

    
356
        //  pubtitle
357
        buff += getStringField(titleString);
358

    
359
        // alternative titles
360
        //buff += getStringField(alternativeTitles);    //  null#!
361
        // date of acceptance CHANGED THIS TO DATE FORMAT
362
        buff += getStringDateField(metadata.getDateofacceptance().getValue());
363

    
364
        // publisher
365
        buff += getStringField(metadata.getPublisher().getValue());
366

    
367

    
368
        //PID
369
        String pids = new String();
370
        for (StructuredProperty p : data.getPidList()) {
371
            pids += p.getValue() + ";";
372
        }
373
        buff += getStringField(pids);
374

    
375
        //language
376
        buff += getStringField(metadata.getLanguage().getClassname());
377

    
378
        // RelevantDate
379
        String reldate = new String();
380

    
381
        for (StructuredProperty p : metadata.getRelevantdateList()) {
382
            reldate += p.getValue();
383
            break;
384
        }
385
        buff += getStringField(reldate);
386

    
387
        //Subject
388
        String subjects = new String();
389
        for (StructuredProperty subj : metadata.getSubjectList()) {
390

    
391
            if (isValidTopic(subj.getValue())) {
392
                if (!isNumeric(subj.getValue())) {
393
                    subjects += subj.getValue() + ";";
394
                }
395
            }
396
        }
397

    
398
        buff += subjects;
399

    
400
        // TODO Instance
401
        // buff += getStringField();
402

    
403
        //TODO ExternalReference
404

    
405

    
406
        //Source
407
        String source = new String();
408
        for (StringField s : metadata.getSourceList()) {
409
            source += s.getValue() + ";";
410
        }
411

    
412
        buff += getStringField(source);
413

    
414

    
415
        //TODO Format     
416
        buff += getStringField("");
417
        //DOES NOT EXIST
418
          /*String formatString = new String();
419
        for (StringField format : metadata.getFormatList()) {
420
            formatString = format.getValue();
421
            break;}
422
               buff += getStringField(formatString);
423
        }*/
424
        //Context
425
        String context = new String();
426
        for (Result.Context s : metadata.getContextList()) {
427
            context += s.getId() + ";";
428
        }
429
        buff += getStringField(context);
430

    
431
        //country TODO does not exist; throws error
432
        String country = new String();
433
       // for (Qualifier s : metadata.getCountryList()) {
434
          //  country += s.getClassname() + ";";
435
       // }
436

    
437
        buff += getStringField(country);
438

    
439
        // access_mode,
440
        buff += getStringField(getAccessMode(result));
441
        //Best License
442
        buff += getStringField(getBestLicense(result));
443
        //Description
444
        String description = new String();
445

    
446
        for (StringField desc : metadata.getDescriptionList()) {
447
            description += desc;
448
            break;
449
        }
450
        buff += getStringField(description);
451
        //Journal  
452
        buff += getStringField(metadata.getJournal().getName().replaceAll("\n", " "));  //#null#!
453

    
454
        //journalName                     
455
        buff += getStringField(metadata.getJournal().getName().replaceAll("\n", " "));  //#null#!
456

    
457
        // TODO ERI SOS : HERE IN GET JOUTNAL. GET DATA INFO I CAN FIND PROVENANCE AND SIMILARITY
458

    
459
        //ISSN                                  
460
        buff += getStringField(metadata.getJournal().getIssnLinking());
461

    
462
        //embargoEndDate    
463
        buff += getStringField(metadata.getEmbargoenddate().getValue());
464

    
465

    
466
        // `authors`,
467
        int authors = 0;
468
        String delayed = "no";
469

    
470
        for (OafRel rel : data.getCachedRelList()) {
471

    
472
            if (rel.getRelType().equals(RelType.personResult)) {
473

    
474
                authors++;
475
            } else if (rel.getRelType().equals(RelType.resultProject))
476
            // TODO remember : in result Project, first id is project, second is  result.
477

    
478
            {
479
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(), rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
480
                if (Integer.parseInt(daysfromend) > 0) {
481
                    delayed = "yes";
482
                }
483
            }
484
        }
485

    
486
        buff += getNumericField(String.valueOf(authors));
487

    
488

    
489
        // TODO isRelatedTo
490

    
491
        //   resource type
492
        buff += getStringField(metadata.getResourcetype().getClassname());
493
        //   device
494
        buff += getStringField(metadata.getDevice().getValue());
495
        //   size
496
        buff += getStringField(metadata.getSize().getValue());
497
        //     version
498
        buff += getStringField(metadata.getVersion().getValue());
499
        //   metadata update
500
        buff += getStringField(metadata.getLastmetadataupdate().getValue());
501
        //   metadata version
502
        buff += getStringField(metadata.getMetadataversionnumber().getValue());
503

    
504
        // `delayed`,
505
        buff += getStringField(delayed);
506

    
507
        // year
508
        buff += getYearInt(metadata.getDateofacceptance().getValue());
509

    
510
        // type
511
        buff += getStringField(metadata.getResulttype().getClassname());
512
        //classification
513
        String classification = new String();
514
        for (Instance instance : (result.getInstanceList())) {
515

    
516
            if (instance.getInstancetype().getClassname() != null && !instance.getInstancetype().getClassname().isEmpty()) {
517
                classification += instance.getInstancetype().getClassname() + ';';
518
            }
519
        }
520

    
521
        buff += getStringField(classification);
522

    
523
        //  hosted by
524
        String hostedBy = new String();
525
        for (Instance instance : (result.getInstanceList())) {
526
            String host = instance.getHostedby().getKey();
527
            if (host != null && !host.isEmpty()) {
528
                hostedBy += host + ";";
529
            }
530
        }
531

    
532
        buff += getStringField(hostedBy);
533

    
534
        //   collectedfrom
535
        String collectedFrom = new String();
536
        for (FieldTypeProtos.KeyValue collectedFromValue : (data.getCollectedfromList())) {
537

    
538
            String host = collectedFromValue.getKey();
539
            if (host != null && !host.isEmpty()) {
540
                collectedFrom += host + ';';
541

    
542
            }
543
        }
544
        buff += getStringField(collectedFrom);
545
        return buff;
546
    }
547

    
548

    
549
    private String buildProject(OafEntity data) {
550

    
551
        String buff = new String();
552

    
553
        Project project = data.getProject();
554
        eu.dnetlib.data.proto.ProjectProtos.Project.Metadata metadata = project.getMetadata();
555

    
556
        //`original Id`
557
        String originalId = new String();
558

    
559
        for (String oid : data.getOriginalIdList()) {
560
            originalId += oid + ";";
561
        }
562

    
563
        buff += getStringField(originalId);
564

    
565
        //dateOfCollection
566
        buff += getStringDateField(data.getDateofcollection());
567

    
568
        //Code
569
        buff += getStringField(metadata.getCode().getValue());
570
        // `url`,
571
        buff += getStringField(metadata.getWebsiteurl().getValue());
572

    
573
        // `acronym`,
574
        String acronym = metadata.getAcronym().getValue();
575
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
576
            acronym = metadata.getTitle().getValue();
577
        }
578

    
579
        buff += getStringField(acronym);
580

    
581
        //title!
582
        String title = getStringField(metadata.getTitle().getValue());
583
        buff += getStringField(title);
584

    
585
        // startdate
586
        buff += getNumericField(metadata.getStartdate().getValue());
587

    
588
        // enddate
589
        buff += getNumericField(metadata.getEnddate().getValue());
590

    
591
        //`Call identifer`
592
        buff += getStringField(metadata.getCallidentifier().getValue());
593

    
594
        //`KeyWords`
595
        buff += getStringField(metadata.getKeywords().getValue());
596

    
597
        //`Duration`
598
        buff += getStringField(metadata.getDuration().getValue());
599

    
600
        //esc39
601
        buff += getStringField(metadata.getEcsc39().getValue().toString());
602

    
603
        //`Contracttype`
604
        buff += getStringField(metadata.getContracttype().getClassname());
605

    
606
        //`OA mandate pubs`  TODO DOES NOT EXIST
607
      //  buff += getStringField(metadata.getOamandatepublications().getValue());
608

    
609
        //`Subjects` TODO DOES NOT EXIST EITHER
610
     /*   String subjects = new String();
611
        for (StructuredProperty s : metadata.getSubjectsList()) {
612

    
613
            subjects += s.getValue() + ';';
614
        }
615
        buff += getStringField(subjects);*/
616

    
617

    
618
        //`EC293`
619
        buff += getStringField(metadata.getEcarticle293().getValue());
620

    
621

    
622
        List<StringField> fundList = metadata.getFundingtreeList();
623

    
624
//TODO possible to have multiple funders per project?
625
        if (!fundList.isEmpty()) // `funding_lvl0`,
626
        {
627
            //TODO funder + 3 funding levels
628
           /* funder text,
629
            funding_lvl0 text,
630
	        funding_lvl1 text,
631
	        funding_lvl2 text,
632
	        funding_lvl3 text,*/
633
            buff += this.fundingParser.getFundingInfo(fundList.get(0).getValue());
634

    
635
        } else {
636

    
637
            buff += this.fundingParser.getFundingInfo("");
638
        }
639

    
640
        return buff;
641

    
642
    }
643

    
644

    
645
    private String buildPerson(OafEntity data) {
646

    
647
        String buff = new String();
648

    
649
        PersonProtos.Person person = data.getPerson();
650
        eu.dnetlib.data.proto.PersonProtos.Person.Metadata metadata = person.getMetadata();
651

    
652

    
653
        //`original Id`
654
        String originalId = new String();
655

    
656
        for (String oid : data.getOriginalIdList()) {
657
            originalId += oid + ";";
658
        }
659

    
660
        buff += getStringField(originalId);
661
        //dateOfCollection
662
        buff += getStringDateField(data.getDateofcollection());
663

    
664
        // `firstname`,
665
        buff += metadata.getFirstname();
666

    
667
        // `secondNames`,
668

    
669
        String secondNames = new String();
670
        for (StringField s : metadata.getSecondnamesList()) {
671

    
672
            secondNames += s.getValue() + ' ';
673
        }
674

    
675
        buff += getStringField(secondNames);
676

    
677
        // `fullname`,
678
        buff += getStringField(metadata.getFullname().getValue());
679
        // `Fax`,
680
        buff += getStringField(metadata.getFax().getValue());
681
        // `Email`,
682
        buff += getStringField(metadata.getEmail().getValue());
683
        // `Phone`,
684
        buff += getStringField(metadata.getPhone().getValue());
685

    
686
        // `Nationality`,
687
        buff += getStringField(metadata.getNationality().getClassname());
688

    
689
        // `PIDS`,
690
        String pids = new String();
691
        for (StructuredProperty s : data.getPidList()) {
692

    
693
            pids += s.getValue() + ";";
694

    
695

    
696
        }
697
        buff += getStringField(pids);
698

    
699
        // `collected from`,
700
        String collectedFrom = new String();
701
        for (FieldTypeProtos.KeyValue s : data.getCollectedfromList()) {
702
            collectedFrom += s.getValue() + ";";
703
        }
704
        buff += getStringField(collectedFrom);
705
        return buff;
706

    
707
    }
708

    
709
    public String cleanId(String value) {
710
        if (value != null) {
711
            //   DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
712

    
713

    
714
            // to datacite____:: )
715
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
716
            value = value.replaceFirst(".*\\|", "");
717
            value = value.replaceAll("\n", "");
718
            value = value.replaceAll(DELIM, "");
719
            value = value.replaceAll(ENCLOSED, "");
720
            value = value.trim();
721

    
722
        }
723
        if (value == null) {
724
            return null;
725
        }
726
        return ENCLOSED + value + ENCLOSED;
727

    
728
    }
729

    
730

    
731
    private String getNumericField(String data) {
732
        if (data == null || data.isEmpty() || data.equals("")) {
733
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
734
        } else {
735

    
736
            return ENCLOSED + data + ENCLOSED + DELIM;
737
        }
738
    }
739

    
740

    
741
    private String getYearDifferenceInteger(String enddate, String startdate) {
742

    
743
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
744

    
745
            String[] split = startdate.split("-");
746

    
747
            if (split == null || split.length == 0) {
748
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
749
            }
750

    
751
            int Startdate = Integer.parseInt(split[0]);
752

    
753
            split = enddate.split("-");
754

    
755
            if (split == null || split.length == 0) {
756
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
757
            }
758

    
759
            int Enddate = Integer.parseInt(split[0]);
760

    
761
            int diff = Enddate - Startdate;
762

    
763
            return ENCLOSED + diff + ENCLOSED + DELIM;
764

    
765
        }
766

    
767
        return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
768
    }
769

    
770
    private String getYearInt(String data) {
771
        if (data == null || data.isEmpty() || data.equals("-1")) {
772
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
773
        }
774

    
775
        String[] split = data.split("-");
776

    
777
        if (split == null || split.length == 0) {
778
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
779
        }
780

    
781
        String year = split[0];
782

    
783
        year = cleanNumber(year);
784

    
785
        return ENCLOSED + year + ENCLOSED + DELIM;
786

    
787

    
788
    }
789

    
790
    private static String cleanNumber(String number) {
791
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
792

    
793
        return number;
794
    }
795

    
796
    private String getStringField(String data) {
797

    
798
        if (data == null || data.isEmpty() || data.equals("")) {
799

    
800
            return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
801
        } else {
802

    
803
            String field = clean(data);
804
            if (field == null) {
805
                return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
806
            } else {
807
                return field + DELIM;
808
            }
809
        }
810
    }
811

    
812
    private String getStringDateField(String data) {
813

    
814
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
815

    
816
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
817
        } else {
818

    
819
            String field = clean(data);
820
            if (field == null) {
821
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
822
            } else {
823
                return field + DELIM;
824
            }
825
        }
826
    }
827

    
828

    
829
    public String getId(OafProtos.Oaf oaf) {
830
        switch (oaf.getKind()) {
831
            case entity:
832

    
833
                return cleanId(oaf.getEntity().getId());
834
            case relation:
835

    
836
                return cleanId(oaf.getRel().getSource());
837

    
838
        }
839
        return null;
840

    
841
    }
842

    
843
    private boolean isNumeric(String str) {
844

    
845
        str = str.replaceAll("[^A-Za-z0-9 ]", "");
846
        str = str.replaceAll(" ", "");
847
        return str.matches("-?\\d+(\\.\\d+)?"); // match a number with optional
848
        // '-' and decimal.
849
    }
850

    
851
    //   there are topics with "null" as value -> replace them
852
    private boolean isValidTopic(String t) {
853

    
854
        if (t == null || t.isEmpty()) {
855
            return false;
856
        }
857

    
858
        if (t.equals("") || t.equals(" ")) {
859
            return false;
860
        }
861
        if (t.equals("null") || t.equals("Null") || t.equals("NULL")) {
862
            return false;
863
        }
864

    
865
        if (t.equals(ENCLOSED + ENCLOSED + DELIM) || t.equals(ENCLOSED + NULL_STRING + ENCLOSED + DELIM)) {
866
            return false;
867
        }
868
        // skip dedups
869
        if (t.contains("ddc:")) {
870

    
871
            return false;
872
        }
873
        return true;
874
    }
875

    
876

    
877
    private String getBestLicense(ResultProtos.Result result) {
878
        FieldTypeProtos.Qualifier bestLicense = null;
879
        LicenseComparator lc = new LicenseComparator();
880
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
881
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
882
                bestLicense = instance.getLicence();
883
            }
884
        }
885
        if (bestLicense != null) {
886
            return bestLicense.getClassname();
887
        } else {
888
            return null;
889
        }
890
    }
891

    
892
    //   here iterate over all values
893
    private String getAccessMode(ResultProtos.Result result) {
894
        String accessMode = null;
895
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
896
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
897
                accessMode = instance.getLicence().getClassname();
898
                break;
899
            }
900

    
901
        }
902

    
903

    
904
        return accessMode;
905
    }
906

    
907

    
908
    public String getId(OafProtos.OafRel relOaf) {
909
        return cleanId(relOaf.getSource());
910
    }
911

    
912
    private String clean(String value) {
913
        if (value != null) {
914
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
915
            // to datacite____:: )
916
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
917
            value = value.replaceFirst(".*\\|", "");
918
            value = value.replaceAll(DELIM, "");
919
            value = value.replaceAll(",", "");
920
            value = value.replaceAll("\"", "");
921
            value = value.replaceAll("'", "");
922
            value = value.replaceAll(ENCLOSED, "");
923
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
924
            value = value.replaceAll("\\s+", " ");
925
            value = value.replaceAll("(\\r|\\n)", " ");
926
            value = value.replaceAll("\\t", " ");
927

    
928
            // value = value.replaceAll("[^A-Za-z0-9:,____-;:]", " ");
929
            value = value.trim();
930

    
931
        }
932
        if (value == null) {
933
            return null;
934
        }
935
        return ENCLOSED + value + ENCLOSED;
936

    
937
    }
938

    
939

    
940
    public long DATEDIFF(String startDate, String endDate) {
941
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
942
        long days = 0l;
943
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
944
        // <startdate>2011-09-01</startdate>
945
        // <enddate>2015-08-31</enddate>
946
        Date dateIni = null;
947
        Date dateFin = null;
948

    
949
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
950
            return 0;
951
        }
952
        try {
953
            dateIni = (Date) format.parse(startDate);
954
            dateFin = (Date) format.parse(endDate);
955
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
956
        } catch (Exception e) {
957
            log.error(e.toString());
958
            return 0;
959
        }
960

    
961
        return days;
962
    }
963

    
964
    public String getDELIM() {
965
        return DELIM;
966
    }
967

    
968
    public void setDELIM(String dELIM) {
969
        DELIM = dELIM;
970
    }
971

    
972
    public String getNULL_STRING() {
973
        return NULL_STRING;
974
    }
975

    
976
    public void setNULL_STRING(String nULL_STRING) {
977
        NULL_STRING = nULL_STRING;
978
    }
979

    
980
    public String getNULL_NUM() {
981
        return NULL_NUM;
982
    }
983

    
984
    public void setNULL_NUM(String nULL_NUM) {
985
        NULL_NUM = nULL_NUM;
986
    }
987

    
988
    public String getENCLOSED() {
989
        return ENCLOSED;
990
    }
991

    
992
    public void setENCLOSED(String eNCLOSED) {
993
        ENCLOSED = eNCLOSED;
994
    }
995

    
996
}
(4-4/4)