Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

    
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
5
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
6
import eu.dnetlib.data.proto.FieldTypeProtos;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
8
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
9
import eu.dnetlib.data.proto.OafProtos;
10
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafEntity;
12
import eu.dnetlib.data.proto.OafProtos.OafRel;
13
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
14
import eu.dnetlib.data.proto.PersonProtos;
15
import eu.dnetlib.data.proto.ProjectProtos.Project;
16
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
17
import eu.dnetlib.data.proto.ResultProtos;
18
import eu.dnetlib.data.proto.ResultProtos.Result;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20
import org.apache.log4j.Logger;
21

    
22
import java.text.SimpleDateFormat;
23
import java.util.ArrayList;
24
import java.util.Date;
25
import java.util.HashMap;
26
import java.util.List;
27

    
28
/**
29
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
30
 *         for sqoop
31
 */
32
public class Serializer {
33

    
34
    private static String DELIM;
35
    private Logger log = Logger.getLogger(this.getClass());
36
    private String NULL_STRING = null;
37
    private String NULL_NUM = "0";
38

    
39
    private static String ENCLOSED;
40
    private FundingParser fundingParser = new FundingParser();
41

    
42

    
43
    public Serializer() {
44
    }
45

    
46
    public String serialize(Oaf oaf) {
47

    
48
        switch (oaf.getKind()) {
49
            case entity:
50
                OafEntity valueEntity = oaf.getEntity();
51

    
52
                switch (valueEntity.getType()) {
53
                    case datasource:
54

    
55
                        return buildDatasource(valueEntity);
56

    
57
                    case organization:
58

    
59
                        return buildOrganization(valueEntity);
60

    
61
                    case project:
62

    
63
                        return buildProject(valueEntity);
64
                    case result:
65

    
66
                        return buildResult(valueEntity);
67
                    case person:
68
                        return buildPerson(valueEntity);
69
                    default:
70
                        log.error("wrong type");
71
                        break;
72
                }
73
                break;
74
            case relation:
75
                OafRel valueRel = oaf.getRel();
76

    
77
                return buildRel(valueRel);
78

    
79
        }
80

    
81
        return null;
82

    
83
    }
84

    
85
    public String serialize(OafRel oaf) {
86

    
87
      //  switch (oaf.getRelType()) {
88

    
89
           // default:
90
                return buildRel(oaf);
91
       // }
92
    }
93

    
94

    
95
    public HashMap<String, List<String>> extractRelations(Oaf oaf) {
96
        OafEntity valueEntity = oaf.getEntity();
97
        switch (valueEntity.getType()) {
98
             case result:
99
                HashMap<String, List<String>> relations = new HashMap<String, List<String>>();
100
                 relations.putAll(getResultDatasources(valueEntity));
101

    
102
                return relations;
103
            default:
104

    
105
                return null;
106
        }
107

    
108
    }
109

    
110
    private HashMap<String, List<String>> getResultDatasources(OafEntity valueEntity) {
111

    
112
        HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
113
        List<String> buffs = new ArrayList<String>();
114
        Result result = valueEntity.getResult();
115
        String relType="resultDatasource" ;
116

    
117

    
118

    
119
       //TODO hosted by
120
        for (Instance instance : (result.getInstanceList())) {
121

    
122

    
123
            String hostedBy = instance.getHostedby().getKey();
124
            if (hostedBy != null && !hostedBy.isEmpty()) {
125
                buffs.add(cleanId(valueEntity.getId()) + DELIM + getStringField(hostedBy));
126
            }
127
        }
128

    
129
//TODO  collected froms
130
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
131
            String collectedFrom = collectedFromValue.getKey();
132
            if (collectedFrom != null && !collectedFrom.isEmpty()) buffs.add((cleanId(valueEntity.getId()) + DELIM + getStringField(collectedFrom)));
133

    
134
        }
135

    
136
        rels.put(relType ,buffs);
137
        return rels;
138

    
139
    }
140

    
141

    
142
    private String getResultResult(OafRel oaf) {
143

    
144

    
145
        String buff = new String();
146

    
147
        buff += getStringField(oaf.getTarget());
148

    
149
        buff += getStringField(String.valueOf(oaf.getResultResult().getSimilarity().getSimilarity()));
150

    
151
      /* 	hasAmongTopNSimilarDocuments: r1 hasAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar, and that we also have r2 isAmongTopNSimilarDocuments of r1.
152
        In OpenAIRE, N so far always equals 20.
153
        	isAmongTopNSimilarDocuments: r1 isAmongTopNSimilarDocuments r2 means that Results r1 and r2 are similar and that r2 hasAmongTopNSimilarDocuments of r1;
154
        	isRelatedTo: two results are somehow related to each other. OpenAIRE may further refine the semantics of possible types of "relatedness" by adding new classes in the Qualifier.
155
                Scheme "dnet:result_result_relations";
156
oaf. */
157

    
158
        return buff;
159
    }
160

    
161
    private String buildRel(OafRel Rel) {
162
        return getStringField(Rel.getSource()) + getStringField(Rel.getTarget());
163
    }
164

    
165

    
166
   /* private String getResultProject(OafRel oaf) {
167
        String buff = new String();+
168
        String result = oaf.getTarget();
169

    
170
        buff += getStringField(result);
171
        //   is declared as int!!!
172
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
173
        if (diff < 0) {
174
            diff = 0;
175
        }
176
        buff += getNumericField(String.valueOf(diff));
177

    
178
        return buff;
179
    }*/
180

    
181

    
182
    private String buildDatasource(OafEntity data) {
183

    
184
        String buff = new String();
185

    
186
        Datasource d = data.getDatasource();
187

    
188
        Metadata metadata = d.getMetadata();
189

    
190
        //`original Id`
191
        String originalId = new String();
192

    
193
        for (String oid : data.getOriginalIdList()) {
194
            originalId += oid + ";";
195
        }
196

    
197
        buff += getStringField(originalId);
198

    
199
        //dateOfCollection
200
        buff += getStringDateField(data.getDateofcollection());
201

    
202
        //Provider Type
203
        if (metadata.hasDatasourcetype()) {
204
            buff += getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""));
205
        } else {
206
            buff += getStringField(null);
207
        }
208

    
209
        //Compatibility
210
        buff += getStringField(metadata.getOpenairecompatibility().getClassname());
211

    
212
        //Official Name
213
        buff += getStringField(metadata.getOfficialname().getValue());
214

    
215
        // English Name
216
        buff += getStringField(metadata.getEnglishname().getValue());
217

    
218
        //Website URL
219
        buff += getStringField(metadata.getWebsiteurl().getValue());
220

    
221
        //LogoURL
222
        buff += getStringField(metadata.getLogourl().getValue());
223

    
224
        //Email
225
        buff += getStringField(metadata.getContactemail().getValue());
226

    
227
        //Namespaceprefix
228
        buff += getStringField(metadata.getNamespaceprefix().getValue());
229

    
230
        // latitude
231
        buff += getStringField(metadata.getLatitude().getValue());
232

    
233
        // longtitude
234
        buff += getStringField(metadata.getLongitude().getValue());
235

    
236
        // dateofvalidation,
237
        buff += getStringField(metadata.getDateofvalidation().getValue());
238

    
239
        //Description
240
        buff += getStringField(metadata.getDescription().getValue());
241

    
242
        //subjects
243
        String subj = new String();
244
        for (StructuredProperty s : metadata.getSubjectsList()) {
245
            subj += s.getValue() + ';';
246

    
247
        }
248

    
249
        //subjects
250
        buff += getStringField(subj);
251

    
252
        //Number of items
253
        buff += getStringField(metadata.getOdnumberofitems().getValue());
254

    
255
        //Date of number of items
256
        buff += getStringField(metadata.getOdnumberofitemsdate().getValue());
257

    
258
        // Policies
259
        buff += getStringField(metadata.getOdpolicies().getValue());
260

    
261
        //languages
262
        String languages = new String();
263
        for (StringField lang : metadata.getOdlanguagesList()) {
264
            languages += lang.getValue() + ";";
265
        }
266
        buff += getStringField(languages);
267

    
268

    
269
        // Content type
270
        String contentType = new String();
271
        for (StringField c : metadata.getOdcontenttypesList()) {
272
            contentType += c.getValue() + ";";
273
        }
274
        buff += getStringField(contentType);
275

    
276
        //Access info package
277
        String accessInfo = new String();
278
        for (StringField c : metadata.getAccessinfopackageList()) {
279
            accessInfo += c.getValue() + ";";
280
        }
281
        buff += getStringField(accessInfo);
282

    
283
        //Release start date
284
        buff += getStringField(metadata.getReleasestartdate().getValue());
285

    
286
        //Release end date
287
        buff += getStringField(metadata.getReleaseenddate().getValue());
288

    
289
        //Mission statement url
290
        buff += getStringField(metadata.getMissionstatementurl().getValue());
291

    
292
        //Data provider
293
        buff += getStringField(String.valueOf(metadata.getDataprovider().getValue()));
294

    
295
        //Service provider
296
        buff += getStringField(String.valueOf(metadata.getServiceprovider().getValue()));
297

    
298
        //Database access type
299
        buff += getStringField(metadata.getDatabaseaccessrestriction().getValue());
300

    
301
        //Data upload type
302
        buff += getStringField(metadata.getDatauploadtype().getValue());
303

    
304
        //Data upload restrictions
305
        buff += getStringField(metadata.getDatauploadrestriction().getValue());
306

    
307
        //Versioning
308
        buff += getStringField(String.valueOf(metadata.getVersioning().getValue()));
309

    
310
        //Citation guideline url
311
        buff += getStringField(String.valueOf(metadata.getCitationguidelineurl().getValue()));
312

    
313
        //Quality management kind
314
        buff += getStringField(String.valueOf(metadata.getQualitymanagementkind().getValue()));
315

    
316
        //PID systems
317
        buff += getStringField(metadata.getPidsystems().getValue());
318

    
319
        //Certificates
320
        buff += getStringField(metadata.getCertificates().getValue());
321

    
322
        //Policies
323
        String policies = new String();
324
        for (FieldTypeProtos.KeyValue property : metadata.getPoliciesList()) {
325
            policies += property.getValue() + ";";
326
        }
327
        buff += getStringField(policies);
328

    
329
        return buff;
330
    }
331

    
332

    
333
    private String buildOrganization(OafEntity data) {
334

    
335
        String buff = new String();
336

    
337
        Organization organization = data.getOrganization();
338
        eu.dnetlib.data.proto.OrganizationProtos.Organization.Metadata metadata = organization.getMetadata();
339

    
340
        //`original Id`
341
        String originalId = new String();
342
        for (String oid : data.getOriginalIdList()) {
343
            originalId += oid + ";";
344
        }
345

    
346

    
347
        buff += getStringField(originalId);
348
        //dateOfCollection
349
        buff += getStringDateField(data.getDateofcollection());
350

    
351

    
352
        //getLegalshortname
353
        buff += getStringField(metadata.getLegalshortname().getValue());
354
        // `name`,
355
        buff += getStringField(metadata.getLegalname().getValue());
356
        //website URL
357
        buff += getStringField(metadata.getWebsiteurl().getValue());
358
        //logourl
359
        buff += getStringField(metadata.getLogourl().getValue());
360
        // `country`,
361
        buff += getStringField(metadata.getCountry().getClassname());
362

    
363

    
364
        return buff;
365

    
366
    }
367

    
368
    private String buildResult(OafEntity data) {
369
        String buff = new String();
370

    
371
        Result result = data.getResult();
372
        eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = result.getMetadata();
373

    
374
        //`original Id`
375
        String originalId = new String();
376

    
377
        for (String oid : data.getOriginalIdList()) {
378
            originalId += oid + ";";
379
        }
380

    
381
        buff += getStringField(originalId);
382

    
383
        //dateOfCollection
384
        buff += getStringDateField(data.getDateofcollection());
385
        //   titleString
386
        String titleString = new String();
387
        String alternativeTitles = new String();
388

    
389
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
390
            StructuredProperty title = metadata.getTitleList().get(i);
391
            if (i == 0) {
392
                titleString = title.getValue().replaceAll("\\s+", " ");
393
                titleString = titleString.replaceAll("\n", " ");
394
            } else {
395
                alternativeTitles += title.getValue().replaceAll("\\s+", " ") + " ; ";
396
                alternativeTitles = alternativeTitles.replaceAll("\n", " ");
397
            }
398
            break;
399
        }
400

    
401
        //  pubtitle
402
        buff += getStringField(titleString);
403

    
404
        // alternative titles
405
        //buff += getStringField(alternativeTitles);    //  null#!
406
        // date of acceptance CHANGED THIS TO DATE FORMAT
407
        buff += getStringDateField(metadata.getDateofacceptance().getValue());
408

    
409
        // publisher
410
        buff += getStringField(metadata.getPublisher().getValue());
411

    
412

    
413
        //PID
414
        String pids = new String();
415
        for (StructuredProperty p : data.getPidList()) {
416
            pids += p.getValue() + ";";
417
        }
418
        buff += getStringField(pids);
419

    
420
        //language
421
        buff += getStringField(metadata.getLanguage().getClassname());
422

    
423
        // RelevantDate
424
        String reldate = new String();
425

    
426
        for (StructuredProperty p : metadata.getRelevantdateList()) {
427
            reldate += p.getValue();
428
            break;
429
        }
430
        buff += getStringField(reldate);
431

    
432
        //Subject
433
        String subjects = new String();
434
        for (StructuredProperty subj : metadata.getSubjectList()) {
435

    
436
            if (isValidTopic(subj.getValue())) {
437
                if (!isNumeric(subj.getValue())) {
438
                    subjects += subj.getValue() + ";";
439
                }
440
            }
441
        }
442

    
443
        buff += subjects;
444

    
445
        // TODO Instance
446
        // buff += getStringField();
447

    
448
        //TODO ExternalReference
449

    
450

    
451
        //Source
452
        String source = new String();
453
        for (StringField s : metadata.getSourceList()) {
454
            source += s.getValue() + ";";
455
        }
456

    
457
        buff += getStringField(source);
458

    
459

    
460
        //TODO Format     
461
        buff += getStringField("");
462
        //DOES NOT EXIST
463
          /*String formatString = new String();
464
        for (StringField format : metadata.getFormatList()) {
465
            formatString = format.getValue();
466
            break;}
467
               buff += getStringField(formatString);
468
        }*/
469
        //Context
470
        String context = new String();
471
        for (Result.Context s : metadata.getContextList()) {
472
            context += s.getId() + ";";
473
        }
474
        buff += getStringField(context);
475

    
476
        //country TODO does not exist; throws error
477
        String country = new String();
478
       // for (Qualifier s : metadata.getCountryList()) {
479
          //  country += s.getClassname() + ";";
480
       // }
481

    
482
        buff += getStringField(country);
483

    
484
        // access_mode,
485
        buff += getStringField(getAccessMode(result));
486
        //Best License
487
        buff += getStringField(getBestLicense(result));
488
        //Description
489
        String description = new String();
490

    
491
        for (StringField desc : metadata.getDescriptionList()) {
492
            description += desc;
493
            break;
494
        }
495
        buff += getStringField(description);
496
        //Journal  
497
        buff += getStringField(metadata.getJournal().getName().replaceAll("\n", " "));  //#null#!
498

    
499
        //journalName                     
500
        buff += getStringField(metadata.getJournal().getName().replaceAll("\n", " "));  //#null#!
501

    
502
        // TODO ERI SOS : HERE IN GET JOUTNAL. GET DATA INFO I CAN FIND PROVENANCE AND SIMILARITY
503

    
504
        //ISSN                                  
505
        buff += getStringField(metadata.getJournal().getIssnLinking());
506

    
507
        //embargoEndDate    
508
        buff += getStringField(metadata.getEmbargoenddate().getValue());
509

    
510

    
511
        // `authors`,
512
        int authors = 0;
513
        String delayed = "no";
514

    
515
        for (OafRel rel : data.getCachedRelList()) {
516

    
517
            if (rel.getRelType().equals(RelType.personResult)) {
518

    
519
                authors++;
520
            } else if (rel.getRelType().equals(RelType.resultProject))
521
            // TODO remember : in result Project, first id is project, second is  result.
522

    
523
            {
524
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(), rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
525
                if (Integer.parseInt(daysfromend) > 0) {
526
                    delayed = "yes";
527
                }
528
            }
529
        }
530

    
531
        buff += getNumericField(String.valueOf(authors));
532

    
533

    
534
        // TODO isRelatedTo
535

    
536
        //   resource type
537
        buff += getStringField(metadata.getResourcetype().getClassname());
538
        //   device
539
        buff += getStringField(metadata.getDevice().getValue());
540
        //   size
541
        buff += getStringField(metadata.getSize().getValue());
542
        //     version
543
        buff += getStringField(metadata.getVersion().getValue());
544
        //   metadata update
545
        buff += getStringField(metadata.getLastmetadataupdate().getValue());
546
        //   metadata version
547
        buff += getStringField(metadata.getMetadataversionnumber().getValue());
548

    
549
        // `delayed`,
550
        buff += getStringField(delayed);
551

    
552
        // year
553
        buff += getYearInt(metadata.getDateofacceptance().getValue());
554

    
555
        // type
556
        buff += getStringField(metadata.getResulttype().getClassname());
557
        //classification
558
        String classification = new String();
559
        for (Instance instance : (result.getInstanceList())) {
560

    
561
            if (instance.getInstancetype().getClassname() != null && !instance.getInstancetype().getClassname().isEmpty()) {
562
                classification += instance.getInstancetype().getClassname() + ';';
563
            }
564
        }
565

    
566
        buff += getStringField(classification);
567

    
568
        //  hosted by
569
        String hostedBy = new String();
570
        for (Instance instance : (result.getInstanceList())) {
571
            String host = instance.getHostedby().getKey();
572
            if (host != null && !host.isEmpty()) {
573
                hostedBy += host + ";";
574
            }
575
        }
576

    
577
        buff += getStringField(hostedBy);
578

    
579
        //   collectedfrom
580
        String collectedFrom = new String();
581
        for (FieldTypeProtos.KeyValue collectedFromValue : (data.getCollectedfromList())) {
582

    
583
            String host = collectedFromValue.getKey();
584
            if (host != null && !host.isEmpty()) {
585
                collectedFrom += host + ';';
586

    
587
            }
588
        }
589
        buff += getStringField(collectedFrom);
590
        return buff;
591
    }
592

    
593

    
594
    private String buildProject(OafEntity data) {
595

    
596
        String buff = new String();
597

    
598
        Project project = data.getProject();
599
        eu.dnetlib.data.proto.ProjectProtos.Project.Metadata metadata = project.getMetadata();
600

    
601
        //`original Id`
602
        String originalId = new String();
603

    
604
        for (String oid : data.getOriginalIdList()) {
605
            originalId += oid + ";";
606
        }
607

    
608
        buff += getStringField(originalId);
609

    
610
        //dateOfCollection
611
        buff += getStringDateField(data.getDateofcollection());
612

    
613
        //Code
614
        buff += getStringField(metadata.getCode().getValue());
615
        // `url`,
616
        buff += getStringField(metadata.getWebsiteurl().getValue());
617

    
618
        // `acronym`,
619
        String acronym = metadata.getAcronym().getValue();
620
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
621
            acronym = metadata.getTitle().getValue();
622
        }
623

    
624
        buff += getStringField(acronym);
625

    
626
        //title!
627
        String title = getStringField(metadata.getTitle().getValue());
628
        buff += getStringField(title);
629

    
630
        // startdate
631
        buff += getNumericField(metadata.getStartdate().getValue());
632

    
633
        // enddate
634
        buff += getNumericField(metadata.getEnddate().getValue());
635

    
636
        //`Call identifer`
637
        buff += getStringField(metadata.getCallidentifier().getValue());
638

    
639
        //`KeyWords`
640
        buff += getStringField(metadata.getKeywords().getValue());
641

    
642
        //`Duration`
643
        buff += getStringField(metadata.getDuration().getValue());
644

    
645
        //esc39
646
        buff += getStringField(metadata.getEcsc39().getValue().toString());
647

    
648
        //`Contracttype`
649
        buff += getStringField(metadata.getContracttype().getClassname());
650

    
651
        //`OA mandate pubs`  TODO DOES NOT EXIST
652
       buff += getStringField(metadata.getOamandatepublications().getValue());
653

    
654
        //`Subjects` TODO DOES NOT EXIST EITHER
655
        String subjects = new String();
656
        for (StructuredProperty s : metadata.getSubjectsList()) {
657

    
658
            subjects += s.getValue() + ';';
659
        }
660
        buff += getStringField(subjects);
661

    
662

    
663
        //`EC293`
664
        buff += getStringField(metadata.getEcarticle293().getValue());
665

    
666

    
667
        List<StringField> fundList = metadata.getFundingtreeList();
668

    
669

    
670
        if (!fundList.isEmpty()) // `funding_lvl0`,
671
        {
672
            //TODO funder + 3 funding levels
673
           /* funder text,
674
            funding_lvl0 text,
675
	        funding_lvl1 text,
676
	        funding_lvl2 text,
677
	        funding_lvl3 text,*/
678
            buff += this.fundingParser.getFundingInfo(fundList.get(0).getValue());
679

    
680
        } else {
681

    
682
            buff += this.fundingParser.getFundingInfo("");
683
        }
684

    
685
        return buff;
686

    
687
    }
688

    
689

    
690
    private String buildPerson(OafEntity data) {
691

    
692
        String buff = new String();
693

    
694
        PersonProtos.Person person = data.getPerson();
695
        eu.dnetlib.data.proto.PersonProtos.Person.Metadata metadata = person.getMetadata();
696

    
697

    
698
        //`original Id`
699
        String originalId = new String();
700

    
701
        for (String oid : data.getOriginalIdList()) {
702
            originalId += oid + ";";
703
        }
704

    
705
        buff += getStringField(originalId);
706
        //dateOfCollection
707
        buff += getStringDateField(data.getDateofcollection());
708

    
709
        // `firstname`,
710
        buff += metadata.getFirstname();
711

    
712
        // `secondNames`,
713

    
714
        String secondNames = new String();
715
        for (StringField s : metadata.getSecondnamesList()) {
716

    
717
            secondNames += s.getValue() + ' ';
718
        }
719

    
720
        buff += getStringField(secondNames);
721

    
722
        // `fullname`,
723
        buff += getStringField(metadata.getFullname().getValue());
724
        // `Fax`,
725
        buff += getStringField(metadata.getFax().getValue());
726
        // `Email`,
727
        buff += getStringField(metadata.getEmail().getValue());
728
        // `Phone`,
729
        buff += getStringField(metadata.getPhone().getValue());
730

    
731
        // `Nationality`,
732
        buff += getStringField(metadata.getNationality().getClassname());
733

    
734
        // `PIDS`,
735
        String pids = new String();
736
        for (StructuredProperty s : data.getPidList()) {
737

    
738
            pids += s.getValue() + ";";
739

    
740

    
741
        }
742
        buff += getStringField(pids);
743

    
744
        // `collected from`,
745
        String collectedFrom = new String();
746
        for (FieldTypeProtos.KeyValue s : data.getCollectedfromList()) {
747
            collectedFrom += s.getValue() + ";";
748
        }
749
        buff += getStringField(collectedFrom);
750
        return buff;
751

    
752
    }
753

    
754
    public String cleanId(String value) {
755
        if (value != null) {
756
            //   DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
757

    
758

    
759
            // to datacite____:: )
760
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
761
            value = value.replaceFirst(".*\\|", "");
762
            value = value.replaceAll("\n", "");
763
            value = value.replaceAll(DELIM, "");
764
            value = value.replaceAll(ENCLOSED, "");
765
            value = value.trim();
766

    
767
        }
768
        if (value == null) {
769
            return null;
770
        }
771
        return ENCLOSED + value + ENCLOSED;
772

    
773
    }
774

    
775

    
776
    private String getNumericField(String data) {
777
        if (data == null || data.isEmpty() || data.equals("")) {
778
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
779
        } else {
780

    
781
            return ENCLOSED + data + ENCLOSED + DELIM;
782
        }
783
    }
784

    
785

    
786
    private String getYearDifferenceInteger(String enddate, String startdate) {
787

    
788
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
789

    
790
            String[] split = startdate.split("-");
791

    
792
            if (split == null || split.length == 0) {
793
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
794
            }
795

    
796
            int Startdate = Integer.parseInt(split[0]);
797

    
798
            split = enddate.split("-");
799

    
800
            if (split == null || split.length == 0) {
801
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
802
            }
803

    
804
            int Enddate = Integer.parseInt(split[0]);
805

    
806
            int diff = Enddate - Startdate;
807

    
808
            return ENCLOSED + diff + ENCLOSED + DELIM;
809

    
810
        }
811

    
812
        return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
813
    }
814

    
815
    private String getYearInt(String data) {
816
        if (data == null || data.isEmpty() || data.equals("-1")) {
817
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
818
        }
819

    
820
        String[] split = data.split("-");
821

    
822
        if (split == null || split.length == 0) {
823
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
824
        }
825

    
826
        String year = split[0];
827

    
828
        year = cleanNumber(year);
829

    
830
        return ENCLOSED + year + ENCLOSED + DELIM;
831

    
832

    
833
    }
834

    
835
    private static String cleanNumber(String number) {
836
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
837

    
838
        return number;
839
    }
840

    
841
    private String getStringField(String data) {
842

    
843
        if (data == null || data.isEmpty() || data.equals("")) {
844

    
845
            return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
846
        } else {
847

    
848
            String field = clean(data);
849
            if (field == null) {
850
                return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
851
            } else {
852
                return field + DELIM;
853
            }
854
        }
855
    }
856

    
857
    private String getStringDateField(String data) {
858

    
859
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
860

    
861
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
862
        } else {
863

    
864
            String field = clean(data);
865
            if (field == null) {
866
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
867
            } else {
868
                return field + DELIM;
869
            }
870
        }
871
    }
872

    
873

    
874
    public String getId(OafProtos.Oaf oaf) {
875
        switch (oaf.getKind()) {
876
            case entity:
877

    
878
                return cleanId(oaf.getEntity().getId());
879
            case relation:
880

    
881
                return cleanId(oaf.getRel().getSource());
882

    
883
        }
884
        return null;
885

    
886
    }
887

    
888
    private boolean isNumeric(String str) {
889

    
890
        str = str.replaceAll("[^A-Za-z0-9 ]", "");
891
        str = str.replaceAll(" ", "");
892
        return str.matches("-?\\d+(\\.\\d+)?"); // match a number with optional
893
        // '-' and decimal.
894
    }
895

    
896
    //   there are topics with "null" as value -> replace them
897
    private boolean isValidTopic(String t) {
898

    
899
        if (t == null || t.isEmpty()) {
900
            return false;
901
        }
902

    
903
        if (t.equals("") || t.equals(" ")) {
904
            return false;
905
        }
906
        if (t.equals("null") || t.equals("Null") || t.equals("NULL")) {
907
            return false;
908
        }
909

    
910
        if (t.equals(ENCLOSED + ENCLOSED + DELIM) || t.equals(ENCLOSED + NULL_STRING + ENCLOSED + DELIM)) {
911
            return false;
912
        }
913
        // skip dedups
914
        if (t.contains("ddc:")) {
915

    
916
            return false;
917
        }
918
        return true;
919
    }
920

    
921

    
922
    private String getBestLicense(ResultProtos.Result result) {
923
        FieldTypeProtos.Qualifier bestLicense = null;
924
        LicenseComparator lc = new LicenseComparator();
925
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
926
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
927
                bestLicense = instance.getLicence();
928
            }
929
        }
930
        if (bestLicense != null) {
931
            return bestLicense.getClassname();
932
        } else {
933
            return null;
934
        }
935
    }
936

    
937
    //   here iterate over all values
938
    private String getAccessMode(ResultProtos.Result result) {
939
        String accessMode = null;
940
        for (ResultProtos.Result.Instance instance : (result.getInstanceList())) {
941
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
942
                accessMode = instance.getLicence().getClassname();
943
                break;
944
            }
945

    
946
        }
947

    
948

    
949
        return accessMode;
950
    }
951

    
952

    
953
    public String getId(OafProtos.OafRel relOaf) {
954
        return cleanId(relOaf.getSource());
955
    }
956

    
957
    private String clean(String value) {
958
        if (value != null) {
959
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
960
            // to datacite____:: )
961
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
962
            value = value.replaceFirst(".*\\|", "");
963
            value = value.replaceAll(DELIM, "");
964
            value = value.replaceAll(",", "");
965
            value = value.replaceAll("\"", "");
966
            value = value.replaceAll("'", "");
967
            value = value.replaceAll(ENCLOSED, "");
968
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
969
            value = value.replaceAll("\\s+", " ");
970
            value = value.replaceAll("(\\r|\\n)", " ");
971
            value = value.replaceAll("\\t", " ");
972

    
973
            // value = value.replaceAll("[^A-Za-z0-9:,____-;:]", " ");
974
            value = value.trim();
975

    
976
        }
977
        if (value == null) {
978
            return null;
979
        }
980
        return ENCLOSED + value + ENCLOSED;
981

    
982
    }
983

    
984

    
985
    public long DATEDIFF(String startDate, String endDate) {
986
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
987
        long days = 0l;
988
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
989
        // <startdate>2011-09-01</startdate>
990
        // <enddate>2015-08-31</enddate>
991
        Date dateIni = null;
992
        Date dateFin = null;
993

    
994
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
995
            return 0;
996
        }
997
        try {
998
            dateIni = (Date) format.parse(startDate);
999
            dateFin = (Date) format.parse(endDate);
1000
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
1001
        } catch (Exception e) {
1002
            log.error(e.toString());
1003
            return 0;
1004
        }
1005

    
1006
        return days;
1007
    }
1008

    
1009
    public String getDELIM() {
1010
        return DELIM;
1011
    }
1012

    
1013
    public void setDELIM(String dELIM) {
1014
        DELIM = dELIM;
1015
    }
1016

    
1017
    public String getNULL_STRING() {
1018
        return NULL_STRING;
1019
    }
1020

    
1021
    public void setNULL_STRING(String nULL_STRING) {
1022
        NULL_STRING = nULL_STRING;
1023
    }
1024

    
1025
    public String getNULL_NUM() {
1026
        return NULL_NUM;
1027
    }
1028

    
1029
    public void setNULL_NUM(String nULL_NUM) {
1030
        NULL_NUM = nULL_NUM;
1031
    }
1032

    
1033
    public String getENCLOSED() {
1034
        return ENCLOSED;
1035
    }
1036

    
1037
    public void setENCLOSED(String eNCLOSED) {
1038
        ENCLOSED = eNCLOSED;
1039
    }
1040

    
1041
}
(5-5/5)