Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

    
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
5
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
6
import eu.dnetlib.data.proto.FieldTypeProtos;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
8
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
9
import eu.dnetlib.data.proto.OafProtos;
10
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafEntity;
12
import eu.dnetlib.data.proto.OafProtos.OafRel;
13
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
14
import eu.dnetlib.data.proto.PersonProtos;
15
import eu.dnetlib.data.proto.ProjectProtos.Project;
16
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
17
import eu.dnetlib.data.proto.ResultProtos;
18
import eu.dnetlib.data.proto.ResultProtos.Result;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20
import org.apache.log4j.Logger;
21

    
22
import java.text.SimpleDateFormat;
23
import java.util.ArrayList;
24
import java.util.Date;
25
import java.util.List;
26

    
27
/**
28
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
29
 *         for sqoop
30
 */
31
public class Serializer {
32

    
33
    private static String DELIM;
34
    private Logger log = Logger.getLogger(this.getClass());
35
    private String NULL_STRING = null;
36
    private String NULL_NUM = "0";
37

    
38
    private static String ENCLOSED;
39
    private FundingParser fundingParser = new FundingParser();
40

    
41

    
42
    public Serializer() {
43
    }
44

    
45
    public String serialize(Oaf oaf) {
46

    
47
        switch (oaf.getKind()) {
48
            case entity:
49
                OafEntity valueEntity = oaf.getEntity();
50

    
51

    
52
                switch (valueEntity.getType()) {
53
                    case datasource:
54

    
55
                        return buildDatasource(valueEntity);
56

    
57
                    case organization:
58

    
59
                        return buildOrganization(valueEntity);
60

    
61
                    case project:
62

    
63
                        return buildProject(valueEntity);
64
                    case result:
65

    
66
                        return buildResult(valueEntity);
67
                    case person:
68
                        return buildPerson(valueEntity);
69
                    default:
70
                        log.error("wrong type");
71
                        break;
72
                }
73
                break;
74
            case relation:
75
                OafRel valueRel = oaf.getRel();
76

    
77
                return buildRel(valueRel);
78

    
79
        }
80

    
81
        return null;
82

    
83
    }
84

    
85
    public String serialize(OafRel oaf) {
86

    
87
        switch (oaf.getRelType()) {
88

    
89
            default:
90
                return buildRel(oaf);
91
        }
92

    
93
    }
94

    
95

    
96
    public ArrayList<String> extractRelations(Oaf oaf) {
97
        OafEntity valueEntity = oaf.getEntity();
98
        ArrayList<String> relations = new ArrayList<String>();
99
        switch (valueEntity.getType()) {
100
            case result:
101
                relations.addAll(getResultDatasources(valueEntity));
102
                relations.addAll(getDedups(valueEntity));
103
                return relations;
104
            case datasource:
105
                relations.addAll(getDedups(valueEntity));
106
                return relations;
107
            case person:
108
                relations.addAll(getDedups(valueEntity));
109
                return relations;
110
            default:
111
                return null;
112
        }
113

    
114
    }
115

    
116
    private String buildRel(OafRel Rel) {
117

    
118
        return getStringField(Rel.getRelType().getDescriptorForType().getName()) + getStringField(Rel.getSource()) + getStringField(Rel.getTarget());
119

    
120
    }
121

    
122

    
123
    private String getResultResult(OafRel oaf) {
124

    
125

    
126
        String buff = new String();
127

    
128
        buff += getStringField(oaf.getTarget());
129

    
130
        buff += getStringField(String.valueOf(oaf.getResultResult().getSimilarity().getSimilarity()));
131
      /* •	hasAmongTopNSimilarDocuments: r1 hasAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar, and that we also have r2 isAmongTopNSimilarDocuments of r1.
132
        In OpenAIRE, N so far always equals 20.
133
        •	isAmongTopNSimilarDocuments: r1 isAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar and that r2 hasAmongTopNSimilarDocuments of r1;
134
        •	isRelatedTo: two results are somehow related to each other. OpenAIRE may further refine the semantics of possible types of "relatedness" by adding new classes in the Qualifier.
135
                Scheme "dnet:result_result_relations";
136
oaf. */
137
        return buff;
138
    }
139

    
140
    private String getHeader(OafEntity data) {
141
        String buff = new String();
142

    
143
        //  EntityType
144
        buff += getStringField(data.getType().name());
145
        // OpenaireID
146
        buff += getStringField(getStringDateField(cleanId(data.getId())));
147

    
148
        //  dateOfTransformation
149
        // TODO change to dateOfTransformation here when released
150
        buff += getStringField(data.getDateofcollection());
151
        //    dateOfCollection
152
        buff += getStringField(data.getDateofcollection());
153

    
154
        String originalId = new String();
155
// originalId
156

    
157
        for (String oid : data.getOriginalIdList()) {
158
            originalId += oid + ";";
159
        }
160

    
161
        buff += getStringField(originalId);
162

    
163
        return buff;
164

    
165
    }
166

    
167
    private String buildDatasource(OafEntity data) {
168

    
169

    
170
        Datasource d = data.getDatasource();
171

    
172
        Metadata metadata = d.getMetadata();
173

    
174

    
175
        String buff = getHeader(data);
176

    
177

    
178
/*
179

    
180
        OfficialName
181
                Englishname
182
        Websiteurl
183
                Logourl
184
        Contactemail
185
                Namespaceprefix
186
        Latitude
187
                Longitude
188
        Dateofvalidation
189
                Description
190
        SubjectsList
191
                Odnumberofitems
192
        Odnumberofitemsdate
193
                Odpolicies
194
        OdlanguagesList
195
                OdcontenttypesList
196
        Accessinfopackage
197
                Releasestartdate
198
        Releaseenddate
199
                Missionstatementurl
200
        Dataprovider
201
                Serviceprovider
202
        Databaseaccessrestriction
203
                Datauploadtype
204
        Datauploadrestriction
205
                Versioning
206
        Citationguidelineurl
207
                Qualitymanagementkind
208
        Pidsystems
209
                Certificates
210
        Policies
211
                Trust
212
*/
213

    
214
        //Datasourcetype
215
        if (metadata.hasDatasourcetype()) {
216
            buff += getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""));
217
        } else {
218
            buff += getStringField(null);
219
        }
220

    
221
        //Openairecompatibility
222
        buff += getStringField(metadata.getOpenairecompatibility().getClassname());
223

    
224
        //Official Name
225
        buff += getStringField(metadata.getOfficialname().getValue());
226

    
227
        // English Name
228
        buff += getStringField(metadata.getEnglishname().getValue());
229

    
230
        //Website URL
231
        buff += getStringField(metadata.getWebsiteurl().getValue());
232

    
233
        //LogoURL
234
        buff += getStringField(metadata.getLogourl().getValue());
235

    
236
        //Email
237
        buff += getStringField(metadata.getContactemail().getValue());
238

    
239
        //Namespaceprefix
240
        buff += getStringField(metadata.getNamespaceprefix().getValue());
241

    
242
        // latitude
243
        buff += getStringField(metadata.getLatitude().getValue());
244

    
245
        // longtitude
246
        buff += getStringField(metadata.getLongitude().getValue());
247

    
248
        // dateofvalidation,
249
        buff += getStringField(metadata.getDateofvalidation().getValue());
250

    
251
        //Description
252
        buff += getStringField(metadata.getDescription().getValue());
253

    
254
        //subjects
255
        String subj = new String();
256
        for (StructuredProperty s : metadata.getSubjectsList()) {
257
            subj += s.getValue() + ';';
258

    
259
        }
260

    
261
        //subjects
262
        buff += getStringField(subj);
263

    
264
        //Number of items
265
        buff += getStringField(metadata.getOdnumberofitems().getValue());
266

    
267
        //Date of number of items
268
        buff += getStringField(metadata.getOdnumberofitemsdate().getValue());
269

    
270
        // Policies
271
        buff += getStringField(metadata.getOdpolicies().getValue());
272

    
273
        //languages
274
        String languages = new String();
275
        for (StringField lang : metadata.getOdlanguagesList()) {
276
            languages += lang.getValue() + ";";
277
        }
278
        buff += getStringField(languages);
279

    
280

    
281
        // Content type
282
        String contentType = new String();
283
        for (StringField c : metadata.getOdcontenttypesList()) {
284
            contentType += c.getValue() + ";";
285
        }
286
        buff += getStringField(contentType);
287

    
288
        //Access info package
289
        String accessInfo = new String();
290
        for (StringField c : metadata.getAccessinfopackageList()) {
291
            accessInfo += c.getValue() + ";";
292
        }
293
        buff += getStringField(accessInfo);
294

    
295
        //Release start date
296
        buff += getStringField(metadata.getReleasestartdate().getValue());
297

    
298
        //Release end date
299
        buff += getStringField(metadata.getReleaseenddate().getValue());
300

    
301
        //Mission statement url
302
        buff += getStringField(metadata.getMissionstatementurl().getValue());
303

    
304
        //Data provider
305
        buff += getStringField(String.valueOf(metadata.getDataprovider().getValue()));
306

    
307
        //Service provider
308
        buff += getStringField(String.valueOf(metadata.getServiceprovider().getValue()));
309

    
310
        //Database access type
311
        buff += getStringField(metadata.getDatabaseaccessrestriction().getValue());
312

    
313
        //Data upload type
314
        buff += getStringField(metadata.getDatauploadtype().getValue());
315

    
316
        //Data upload restrictions
317
        buff += getStringField(metadata.getDatauploadrestriction().getValue());
318

    
319
        //Versioning
320
        buff += getStringField(String.valueOf(metadata.getVersioning().getValue()));
321

    
322
        //Citation guideline url
323
        buff += getStringField(String.valueOf(metadata.getCitationguidelineurl().getValue()));
324

    
325
        //Quality management kind
326
        buff += getStringField(String.valueOf(metadata.getQualitymanagementkind().getValue()));
327

    
328
        //PID systems
329
        buff += getStringField(metadata.getPidsystems().getValue());
330

    
331
        //Certificates
332
        buff += getStringField(metadata.getCertificates().getValue());
333

    
334
        //Policies
335
        String policies = new String();
336
        for (FieldTypeProtos.KeyValue property : metadata.getPoliciesList()) {
337
            policies += property.getValue() + ";";
338
        }
339
        buff += getStringField(policies);
340

    
341
        return buff;
342
    }
343

    
344

    
345
    private String buildOrganization(OafEntity data) {
346

    
347

    
348
        Organization organization = data.getOrganization();
349
        eu.dnetlib.data.proto.OrganizationProtos.Organization.Metadata metadata = organization.getMetadata();
350

    
351
        String buff = getHeader(data);
352

    
353
        //getLegalshortname
354
        buff += getStringField(metadata.getLegalshortname().getValue());
355
        // `name`,
356
        buff += getStringField(metadata.getLegalname().getValue());
357
        //website URL
358
        buff += getStringField(metadata.getWebsiteurl().getValue());
359
        //logourl
360
        buff += getStringField(metadata.getLogourl().getValue());
361
        // `country`,
362
        buff += getStringField(metadata.getCountry().getClassname());
363

    
364

    
365
        return buff;
366

    
367
    }
368

    
369
    private String buildResult(OafEntity data) {
370

    
371
        Result result = data.getResult();
372
        eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = result.getMetadata();
373

    
374

    
375
        String buff = getHeader(data);
376
        //   titleString
377
        String titleString = new String();
378
        String alternativeTitles = new String();
379

    
380
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
381
            StructuredProperty title = metadata.getTitleList().get(i);
382
            if (i == 0) {
383
                titleString = title.getValue().replaceAll("\\s+", " ");
384
                titleString = titleString.replaceAll("\n", " ");
385
            } else {
386
                alternativeTitles += title.getValue().replaceAll("\\s+", " ") + " ; ";
387
                alternativeTitles = alternativeTitles.replaceAll("\n", " ");
388
            }
389
            break;
390
        }
391

    
392
        //  pubtitle
393
        buff += getStringField(titleString);
394

    
395
        // alternative titles
396
        //buff += getStringField(alternativeTitles);    //  null#!
397
        // date of acceptance CHANGED THIS TO DATE FORMAT
398
        buff += getStringDateField(metadata.getDateofacceptance().getValue());
399

    
400
        // publisher
401
        buff += getStringField(metadata.getPublisher().getValue());
402

    
403

    
404
        //PID
405
        String pids = new String();
406
        for (StructuredProperty p : data.getPidList()) {
407
            pids += p.getValue() + ";";
408
        }
409
        buff += getStringField(pids);
410

    
411
        //language
412
        buff += getStringField(metadata.getLanguage().getClassname());
413

    
414
        // RelevantDate
415
        String reldate = new String();
416

    
417
        for (StructuredProperty p : metadata.getRelevantdateList()) {
418
            reldate += p.getValue();
419
            break;
420
        }
421
        buff += getStringField(reldate);
422

    
423
        //Subject
424
        String subjects = new String();
425
        for (StructuredProperty subj : metadata.getSubjectList()) {
426

    
427
            if (isValidTopic(subj.getValue())) {
428
                if (!isNumeric(subj.getValue())) {
429
                    subjects += subj.getValue() + ";";
430
                }
431
            }
432
        }
433

    
434
        buff += subjects;
435

    
436
        // TODO Instance
437
        // buff += getStringField();
438

    
439
        //TODO ExternalReference
440

    
441

    
442
        //Source
443
        String source = new String();
444
        for (StringField s : metadata.getSourceList()) {
445
            source += s.getValue() + ";";
446
        }
447

    
448
        buff += getStringField(source);
449

    
450

    
451
        //TODO Format     
452
        buff += getStringField("");
453
        //DOES NOT EXIST
454
          /*String formatString = new String();
455
        for (StringField format : metadata.getFormatList()) {
456
            formatString = format.getValue();
457
            break;}
458
               buff += getStringField(formatString);
459
        }*/
460
        //Context
461
        String context = new String();
462
        for (Result.Context s : metadata.getContextList()) {
463
            context += s.getId() + ";";
464
        }
465
        buff += getStringField(context);
466

    
467
        //country TODO does not exist; throws error
468
        String country = new String();
469
        // for (Qualifier s : metadata.getCountryList()) {
470
        //  country += s.getClassname() + ";";
471
        // }
472

    
473
        buff += getStringField(country);
474

    
475
        // access_mode,
476
        buff += getStringField(getAccessMode(result));
477
        //Best License
478
        buff += getStringField(getBestLicense(result));
479
        //Description
480
        String description = new String();
481

    
482
        for (StringField desc : metadata.getDescriptionList()) {
483
            description += desc;
484
            break;
485
        }
486
        buff += getStringField(description);
487
        //Journal  
488
        buff += getStringField(metadata.getJournal().getName().replaceAll("\n", " "));  //#null#!
489

    
490
        //journalName                     
491
        buff += getStringField(metadata.getJournal().getName().replaceAll("\n", " "));  //#null#!
492

    
493
        // TODO ERI SOS : HERE IN GET JOUTNAL. GET DATA INFO I CAN FIND PROVENANCE AND SIMILARITY
494

    
495
        //ISSN                                  
496
        buff += getStringField(metadata.getJournal().getIssnLinking());
497

    
498
        //embargoEndDate    
499
        buff += getStringField(metadata.getEmbargoenddate().getValue());
500

    
501

    
502
        // `authors`,
503
        int authors = 0;
504
        String delayed = "no";
505

    
506
        for (OafRel rel : data.getCachedRelList()) {
507

    
508
            if (rel.getRelType().equals(RelType.personResult)) {
509

    
510
                authors++;
511
            } else if (rel.getRelType().equals(RelType.resultProject))
512
            // TODO remember : in result Project, first id is project, second is  result.
513

    
514
            {
515
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(), rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
516
                if (Integer.parseInt(daysfromend) > 0) {
517
                    delayed = "yes";
518
                }
519
            }
520
        }
521

    
522
        buff += getNumericField(String.valueOf(authors));
523

    
524

    
525
        // TODO isRelatedTo
526

    
527
        //   resource type
528
        buff += getStringField(metadata.getResourcetype().getClassname());
529
        //   device
530
        buff += getStringField(metadata.getDevice().getValue());
531
        //   size
532
        buff += getStringField(metadata.getSize().getValue());
533
        //     version
534
        buff += getStringField(metadata.getVersion().getValue());
535
        //   metadata update
536
        buff += getStringField(metadata.getLastmetadataupdate().getValue());
537
        //   metadata version
538
        buff += getStringField(metadata.getMetadataversionnumber().getValue());
539

    
540
        // `delayed`,
541
        buff += getStringField(delayed);
542

    
543
        // year
544
        buff += getYearInt(metadata.getDateofacceptance().getValue());
545

    
546
        // type
547
        buff += getStringField(metadata.getResulttype().getClassname());
548
        //classification
549
        String classification = new String();
550
        for (Instance instance : (result.getInstanceList())) {
551

    
552
            if (instance.getInstancetype().getClassname() != null && !instance.getInstancetype().getClassname().isEmpty()) {
553
                classification += instance.getInstancetype().getClassname() + ';';
554
            }
555
        }
556

    
557
        buff += getStringField(classification);
558

    
559
        //  hosted by
560
        String hostedBy = new String();
561
        for (Instance instance : (result.getInstanceList())) {
562
            String host = instance.getHostedby().getKey();
563
            if (host != null && !host.isEmpty()) {
564
                hostedBy += host + ";";
565
            }
566
        }
567

    
568
        buff += getStringField(hostedBy);
569

    
570
        //   collectedfrom
571
        String collectedFrom = new String();
572
        for (FieldTypeProtos.KeyValue collectedFromValue : (data.getCollectedfromList())) {
573

    
574
            String host = collectedFromValue.getKey();
575
            if (host != null && !host.isEmpty()) {
576
                collectedFrom += host + ';';
577

    
578
            }
579
        }
580
        buff += getStringField(collectedFrom);
581
        return buff;
582
    }
583

    
584

    
585
    private String buildProject(OafEntity data) {
586

    
587

    
588
        Project project = data.getProject();
589
        eu.dnetlib.data.proto.ProjectProtos.Project.Metadata metadata = project.getMetadata();
590
        String buff = getHeader(data);
591
        //Code
592
        buff += getStringField(metadata.getCode().getValue());
593
        // `url`,
594
        buff += getStringField(metadata.getWebsiteurl().getValue());
595

    
596
        // `acronym`,
597
        String acronym = metadata.getAcronym().getValue();
598
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
599
            acronym = metadata.getTitle().getValue();
600
        }
601

    
602
        buff += getStringField(acronym);
603

    
604
        //title!
605
        String title = getStringField(metadata.getTitle().getValue());
606
        buff += getStringField(title);
607

    
608
        // startdate
609
        buff += getNumericField(metadata.getStartdate().getValue());
610

    
611
        // enddate
612
        buff += getNumericField(metadata.getEnddate().getValue());
613

    
614
        //`Call identifer`
615
        buff += getStringField(metadata.getCallidentifier().getValue());
616

    
617
        //`KeyWords`
618
        buff += getStringField(metadata.getKeywords().getValue());
619

    
620
        //`Duration`
621
        buff += getStringField(metadata.getDuration().getValue());
622

    
623
        //esc39
624
        buff += getStringField(metadata.getEcsc39().getValue().toString());
625

    
626
        //`Contracttype`
627
        buff += getStringField(metadata.getContracttype().getClassname());
628

    
629
        //`OA mandate pubs`  TODO DOES NOT EXIST
630
        buff += getStringField(metadata.getOamandatepublications().getValue());
631

    
632
        //`Subjects` TODO DOES NOT EXIST EITHER
633
        String subjects = new String();
634
        for (StructuredProperty s : metadata.getSubjectsList()) {
635

    
636
            subjects += s.getValue() + ';';
637
        }
638
        buff += getStringField(subjects);
639

    
640

    
641
        //`EC293`
642
        buff += getStringField(metadata.getEcarticle293().getValue());
643

    
644

    
645
        List<StringField> fundList = metadata.getFundingtreeList();
646

    
647

    
648
        if (!fundList.isEmpty()) // `funding_lvl0`,
649
        {
650
            //TODO funder + 3 funding levels
651
           /* funder text,
652
            funding_lvl0 text,
653
	        funding_lvl1 text,
654
	        funding_lvl2 text,
655
	        funding_lvl3 text,*/
656
            buff += this.fundingParser.getFundingInfo(fundList.get(0).getValue());
657

    
658
        } else {
659

    
660
            buff += this.fundingParser.getFundingInfo("");
661
        }
662

    
663
        return buff;
664

    
665
    }
666

    
667

    
668
    private String buildPerson(OafEntity data) {
669

    
670
        PersonProtos.Person person = data.getPerson();
671
        eu.dnetlib.data.proto.PersonProtos.Person.Metadata metadata = person.getMetadata();
672

    
673
        String buff = getHeader(data);
674

    
675
        // `firstname`,
676
        buff += metadata.getFirstname();
677

    
678
        // `secondNames`,
679

    
680
        String secondNames = new String();
681
        for (StringField s : metadata.getSecondnamesList()) {
682

    
683
            secondNames += s.getValue() + ' ';
684
        }
685

    
686
        buff += getStringField(secondNames);
687

    
688
        // `fullname`,
689
        buff += getStringField(metadata.getFullname().getValue());
690
        // `Fax`,
691
        buff += getStringField(metadata.getFax().getValue());
692
        // `Email`,
693
        buff += getStringField(metadata.getEmail().getValue());
694
        // `Phone`,
695
        buff += getStringField(metadata.getPhone().getValue());
696

    
697
        // `Nationality`,
698
        buff += getStringField(metadata.getNationality().getClassname());
699

    
700
        // `PIDS`,
701
        String pids = new String();
702
        for (StructuredProperty s : data.getPidList()) {
703

    
704
            pids += s.getValue() + ";";
705

    
706

    
707
        }
708
        buff += getStringField(pids);
709

    
710
        // `collected from`,
711
        String collectedFrom = new String();
712
        for (FieldTypeProtos.KeyValue s : data.getCollectedfromList()) {
713
            collectedFrom += s.getValue() + ";";
714
        }
715
        buff += getStringField(collectedFrom);
716
        return buff;
717

    
718
    }
719

    
720

    
721
    private ArrayList<String> getResultDatasources(OafEntity valueEntity) {
722

    
723
        ArrayList<String> buffs = new ArrayList<String>();
724
        Result result = valueEntity.getResult();
725

    
726
        //TODO hosted by
727
        for (Instance instance : (result.getInstanceList())) {
728
            String hostedBy = instance.getHostedby().getKey();
729

    
730
            if (hostedBy != null && !hostedBy.isEmpty()) {
731
                buffs.add(getStringField("resultDatasource") + getStringField(hostedBy));
732
            }
733
        }
734

    
735
        //TODO  collected froms
736
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
737
            String collectedFrom = collectedFromValue.getKey();
738
            if (collectedFrom != null && !collectedFrom.isEmpty())
739
                buffs.add((getStringField("resultDatasource") + getStringField(collectedFrom)));
740

    
741
        }
742
        return buffs;
743

    
744
    }
745

    
746

    
747
    public String cleanId(String value) {
748
        if (value != null) {
749
            //   DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
750

    
751

    
752
            // to datacite____:: )
753
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
754
            value = value.replaceFirst(".*\\|", "");
755
            value = value.replaceAll("\n", "");
756
            value = value.replaceAll(DELIM, "");
757
            value = value.replaceAll(ENCLOSED, "");
758
            value = value.trim();
759

    
760
        }
761
        if (value == null) {
762
            return null;
763
        }
764
        return ENCLOSED + value + ENCLOSED;
765

    
766
    }
767

    
768

    
769
    private ArrayList<String> getDedups(OafEntity valueEntity) {
770

    
771
        ArrayList<String> buffs = new ArrayList<String>();
772
        String buff = getStringField("Dedup") + getStringField(cleanId(valueEntity.getId()));
773

    
774
        //TODO hosted by
775
        for (OafEntity child : valueEntity.getChildrenList()) {
776
            {
777
                if (child.getType() == valueEntity.getType()) { // if it is result, then its the deduplication
778
                    buff += getStringField(cleanId(child.getId()));
779
                }
780
            }
781
        }
782

    
783
        buffs.add(buff);
784
        return buffs;
785

    
786
    }
787

    
788

    
789
    private String getNumericField(String data) {
790
        if (data == null || data.isEmpty() || data.equals("")) {
791
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
792
        } else {
793

    
794
            return ENCLOSED + data + ENCLOSED + DELIM;
795
        }
796
    }
797

    
798

    
799
    private String getYearDifferenceInteger(String enddate, String startdate) {
800

    
801
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
802

    
803
            String[] split = startdate.split("-");
804

    
805
            if (split == null || split.length == 0) {
806
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
807
            }
808

    
809
            int Startdate = Integer.parseInt(split[0]);
810

    
811
            split = enddate.split("-");
812

    
813
            if (split == null || split.length == 0) {
814
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
815
            }
816

    
817
            int Enddate = Integer.parseInt(split[0]);
818

    
819
            int diff = Enddate - Startdate;
820

    
821
            return ENCLOSED + diff + ENCLOSED + DELIM;
822

    
823
        }
824

    
825
        return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
826
    }
827

    
828
    private String getYearInt(String data) {
829
        if (data == null || data.isEmpty() || data.equals("-1")) {
830
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
831
        }
832

    
833
        String[] split = data.split("-");
834

    
835
        if (split == null || split.length == 0) {
836
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
837
        }
838

    
839
        String year = split[0];
840

    
841
        year = cleanNumber(year);
842

    
843
        return ENCLOSED + year + ENCLOSED + DELIM;
844

    
845

    
846
    }
847

    
848
    private static String cleanNumber(String number) {
849
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
850

    
851
        return number;
852
    }
853

    
854
    private String getStringField(String data) {
855

    
856
        if (data == null || data.isEmpty() || data.equals("")) {
857

    
858
            return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
859
        } else {
860

    
861
            String field = clean(data);
862
            if (field == null) {
863
                return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
864
            } else {
865
                return field + DELIM;
866
            }
867
        }
868
    }
869

    
870
    private String getStringDateField(String data) {
871

    
872
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
873

    
874
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
875
        } else {
876

    
877
            String field = clean(data);
878
            if (field == null) {
879
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
880
            } else {
881
                return field + DELIM;
882
            }
883
        }
884
    }
885

    
886

    
887
    public String getId(OafProtos.Oaf oaf) {
888
        switch (oaf.getKind()) {
889
            case entity:
890

    
891
                return cleanId(oaf.getEntity().getId());
892
            case relation:
893

    
894
                return cleanId(oaf.getRel().getSource());
895

    
896
        }
897
        return null;
898

    
899
    }
900

    
901
    private boolean isNumeric(String str) {
902

    
903
        str = str.replaceAll("[^A-Za-z0-9 ]", "");
904
        str = str.replaceAll(" ", "");
905
        return str.matches("-?\\d+(\\.\\d+)?"); // match a number with optional
906
        // '-' and decimal.
907
    }
908

    
909
    //   there are topics with "null" as value -> replace them
910
    private boolean isValidTopic(String t) {
911

    
912
        if (t == null || t.isEmpty()) {
913
            return false;
914
        }
915

    
916
        if (t.equals("") || t.equals(" ")) {
917
            return false;
918
        }
919
        if (t.equals("null") || t.equals("Null") || t.equals("NULL")) {
920
            return false;
921
        }
922

    
923
        if (t.equals(ENCLOSED + ENCLOSED + DELIM) || t.equals(ENCLOSED + NULL_STRING + ENCLOSED + DELIM)) {
924
            return false;
925
        }
926
        // skip dedups
927
        if (t.contains("ddc:")) {
928

    
929
            return false;
930
        }
931
        return true;
932
    }
933

    
934

    
935
    private String getBestLicense(ResultProtos.Result result) {
936
        FieldTypeProtos.Qualifier bestLicense = null;
937
        LicenseComparator lc = new LicenseComparator();
938
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
939
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
940
                bestLicense = instance.getLicence();
941
            }
942
        }
943
        if (bestLicense != null) {
944
            return bestLicense.getClassname();
945
        } else {
946
            return null;
947
        }
948
    }
949

    
950
    //   here iterate over all values
951
    private String getAccessMode(ResultProtos.Result result) {
952
        String accessMode = null;
953
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
954
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
955
                accessMode = instance.getLicence().getClassname();
956
                break;
957
            }
958

    
959
        }
960

    
961

    
962
        return accessMode;
963
    }
964

    
965

    
966
    private String clean(String value) {
967
        if (value != null) {
968
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
969
            // to datacite____:: )
970
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
971
            value = value.replaceFirst(".*\\|", "");
972
            value = value.replaceAll(DELIM, "");
973
            value = value.replaceAll(",", "");
974
            value = value.replaceAll("\"", "");
975
            value = value.replaceAll("'", "");
976
            value = value.replaceAll(ENCLOSED, "");
977
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
978
            value = value.replaceAll("\\s+", " ");
979
            value = value.replaceAll("(\\r|\\n)", " ");
980
            value = value.replaceAll("\\t", " ");
981

    
982
            // value = value.replaceAll("[^A-Za-z0-9:,____-;:]", " ");
983
            value = value.trim();
984

    
985
        }
986
        if (value == null) {
987
            return null;
988
        }
989
        return ENCLOSED + value + ENCLOSED;
990

    
991
    }
992

    
993

    
994
    public long DATEDIFF(String startDate, String endDate) {
995
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
996
        long days = 0l;
997
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
998
        // <startdate>2011-09-01</startdate>
999
        // <enddate>2015-08-31</enddate>
1000
        Date dateIni = null;
1001
        Date dateFin = null;
1002

    
1003
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
1004
            return 0;
1005
        }
1006
        try {
1007
            dateIni = (Date) format.parse(startDate);
1008
            dateFin = (Date) format.parse(endDate);
1009
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
1010
        } catch (Exception e) {
1011
            log.error(e.toString());
1012
            return 0;
1013
        }
1014

    
1015
        return days;
1016
    }
1017

    
1018
    public String getDELIM() {
1019
        return DELIM;
1020
    }
1021

    
1022
    public void setDELIM(String dELIM) {
1023
        DELIM = dELIM;
1024
    }
1025

    
1026
    public String getNULL_STRING() {
1027
        return NULL_STRING;
1028
    }
1029

    
1030
    public void setNULL_STRING(String nULL_STRING) {
1031
        NULL_STRING = nULL_STRING;
1032
    }
1033

    
1034
    public String getNULL_NUM() {
1035
        return NULL_NUM;
1036
    }
1037

    
1038
    public void setNULL_NUM(String nULL_NUM) {
1039
        NULL_NUM = nULL_NUM;
1040
    }
1041

    
1042
    public String getENCLOSED() {
1043
        return ENCLOSED;
1044
    }
1045

    
1046
    public void setENCLOSED(String eNCLOSED) {
1047
        ENCLOSED = eNCLOSED;
1048
    }
1049

    
1050

    
1051
}
(4-4/5)