Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

    
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
5
import eu.dnetlib.data.proto.FieldTypeProtos;
6
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
8
import eu.dnetlib.data.proto.OafProtos.Oaf;
9
import eu.dnetlib.data.proto.OafProtos.OafEntity;
10
import eu.dnetlib.data.proto.OafProtos.OafRel;
11
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
12
import eu.dnetlib.data.proto.PersonProtos;
13
import eu.dnetlib.data.proto.ProjectProtos.Project;
14
import eu.dnetlib.data.proto.ResultProtos.Result;
15
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
16
import org.jsoup.Jsoup;
17

    
18
import java.util.ArrayList;
19
import java.util.List;
20
import java.util.Set;
21

    
22
/**
23
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
24
 *         for sqoop
25
 */
26
public class NewSerializer {
27

    
28
    public static String Serialize(Oaf oaf, String DELIM) {
29

    
30
        switch (oaf.getKind()) {
31
            case entity:
32
                OafEntity valueEntity = oaf.getEntity();
33
                switch (valueEntity.getType()) {
34
                    case datasource:
35

    
36
                        return buildDatasource(valueEntity, DELIM);
37
                    case organization:
38

    
39
                        return buildOrganization(valueEntity, DELIM);
40

    
41
                    case project:
42

    
43
                        return buildProject(valueEntity, DELIM);
44

    
45
                    case result:
46

    
47
                        return buildResult(valueEntity, DELIM);
48
                    case person:
49
                        return buildPerson(valueEntity, DELIM);
50

    
51
                    default:
52

    
53
                        break;
54
                }
55
                break;
56
            case relation:
57
                OafRel valueRel = oaf.getRel();
58

    
59
                return Serialize(valueRel, DELIM);
60

    
61
        }
62

    
63
        return null;
64

    
65
    }
66

    
67

    
68
    public static void extractRelations(Oaf oaf, String DELIM, Set<String> relations) {
69
        OafEntity valueEntity = oaf.getEntity();
70
        switch (valueEntity.getType()) {
71
            case result:
72

    
73
                getResultDatasources(valueEntity, DELIM, relations);
74
                getDedups(valueEntity, DELIM, relations);
75

    
76
            case datasource:
77
                getDedups(valueEntity, DELIM, relations);
78

    
79
            case person:
80
                getDedups(valueEntity, DELIM, relations);
81
            case organization:
82
                getDedups(valueEntity, DELIM, relations);
83

    
84
            default:
85

    
86
        }
87

    
88
    }
89

    
90
    public static String Serialize(OafRel Rel, String DELIM) {
91
        StringBuilder buff ;
92

    
93
        switch (Rel.getRelType()) {
94
            case datasourceOrganization:
95
                buff = new StringBuilder();
96
                buff.append(Rel.getRelType().name()).append(DELIM).append("datasource").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
97
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM).append("\n");
98
                return buff.toString();
99
            case resultResult:
100
                buff = new StringBuilder();
101
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
102
                        .append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
103
                return buff.toString();
104
            case personPerson:
105
                buff = new StringBuilder();
106
                buff.append(Rel.getRelType().name()).append(DELIM).append("person").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
107
                        .append("person").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
108
                return buff.toString();
109
            case organizationOrganization:
110
                buff = new StringBuilder();
111
                buff.append(Rel.getRelType().name()).append(DELIM).append("organization").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
112
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
113
                return buff.toString();
114
            case personResult:
115
                buff = new StringBuilder();
116
                buff.append(Rel.getRelType().name()).append(DELIM).append("person").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
117
                        .append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
118
                return buff.toString();
119
            case projectOrganization:
120
                buff = new StringBuilder();
121
                buff.append(Rel.getRelType().name()).append(DELIM).append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
122
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
123
                return buff.toString();
124
            case projectPerson:
125
                buff = new StringBuilder();
126
                buff.append(Rel.getRelType().name()).append(DELIM).append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
127
                        .append("person").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
128
                return buff.toString();
129
            case resultOrganization:
130
                buff = new StringBuilder();
131
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
132
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
133
                return buff.toString();
134
//TOOD maybe switch them???
135

    
136
            case resultProject:
137
                buff = new StringBuilder();
138
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
139
                        .append("project").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
140
                return buff.toString();
141
            default:
142

    
143
        }
144

    
145
        return "";
146

    
147
    }
148

    
149
    private static String getHeader(OafEntity data, String DELIM) {
150
        String SEPERATOR = ";";
151

    
152
        StringBuilder buff = new StringBuilder();
153

    
154
        //  EntityType
155
        buff.append(data.getType().name() + DELIM);
156

    
157
        // OpenaireID
158
        buff.append(cleanId(data.getId()) + DELIM);
159
        //  dateOfTransformation
160

    
161
        // TODO change to dateOfTransformation here when released
162
        buff.append(cleanId(data.getDateoftransformation()) + DELIM);
163

    
164
        //    dateOfCollection
165
        buff.append(cleanId(data.getDateofcollection()) + DELIM);
166

    
167
        String dataStr = new String();
168
        // originalId
169

    
170
        for (String oid : data.getOriginalIdList()) {
171
            dataStr += cleanId(oid) + SEPERATOR;
172
        }
173

    
174

    
175
        buff.append(dataStr + DELIM);
176

    
177
        return buff.toString();
178
    }
179

    
180

    
181
    private static String buildDatasource(OafEntity data, String DELIM) {
182
        String SEPERATOR = ";";
183

    
184
        StringBuilder buff = new StringBuilder();
185

    
186
        buff.append(getHeader(data, DELIM));
187
        Metadata metadata = data.getDatasource().getMetadata();
188

    
189

    
190
        //Datasourcetype
191
        buff.append(clean(metadata.getDatasourcetype().getClassname()) + DELIM);
192

    
193
        //Openairecompatibility
194
        buff.append(clean(metadata.getOpenairecompatibility().getClassname()) + DELIM);
195

    
196
        //OfficialName
197
        buff.append(clean(metadata.getOfficialname().getValue()) + DELIM);
198

    
199
        //  Englishname
200
        buff.append(clean(metadata.getEnglishname().getValue()) + DELIM);
201

    
202
        //Websiteurl
203
        buff.append(clean(metadata.getWebsiteurl().getValue()) + DELIM);
204

    
205
        //LogoURL
206
        buff.append(clean(metadata.getLogourl().getValue()) + DELIM);
207

    
208
        //Contactemail
209
        buff.append(clean(metadata.getContactemail().getValue()) + DELIM);
210

    
211
        //Namespaceprefix
212
        buff.append(clean(metadata.getNamespaceprefix().getValue()) + DELIM);
213

    
214
        // latitude
215
        buff.append(clean(metadata.getLatitude().getValue()) + DELIM);
216

    
217
        // longtitude
218
        buff.append(clean(metadata.getLongitude().getValue() + DELIM));
219

    
220
        // dateofvalidation,
221
        buff.append(clean(metadata.getDateofvalidation().getValue()) + DELIM);
222

    
223
        //Description
224
        buff.append(cleanText(metadata.getDescription().getValue(), DELIM) + DELIM);
225

    
226
        //subjects
227
        String subj = new String();
228
        for (StructuredProperty s : metadata.getSubjectsList()) {
229
            subj += clean(s.getValue()) + SEPERATOR;
230

    
231
        }
232

    
233
        //subjectList
234
        buff.append(clean(subj) + DELIM);
235

    
236
        //Number of items
237
        buff.append(clean(metadata.getOdnumberofitems().getValue()) + DELIM);
238

    
239
        //Date of number of items
240
        buff.append(clean(metadata.getOdnumberofitemsdate().getValue()) + DELIM);
241

    
242
        // Policies
243
        buff.append(clean(metadata.getOdpolicies().getValue()) + DELIM);
244

    
245
        //languages
246
        String dataStr = new String();
247

    
248
        for (StringField lang : metadata.getOdlanguagesList()) {
249
            dataStr += clean(lang.getValue()) + SEPERATOR;
250
        }
251

    
252
        buff.append(dataStr + DELIM);
253

    
254

    
255
        // Content type
256
        dataStr = " ";
257
        for (StringField c : metadata.getOdcontenttypesList()) {
258
            dataStr += clean(c.getValue()) + SEPERATOR;
259
        }
260
        buff.append(dataStr + DELIM);
261

    
262
        //Access info package
263
        dataStr = " ";
264

    
265
        for (StringField c : metadata.getAccessinfopackageList()) {
266
            dataStr += clean(c.getValue()) + SEPERATOR;
267
        }
268

    
269
        buff.append(dataStr + DELIM);
270

    
271
        //Release start date
272
        buff.append(clean(metadata.getReleasestartdate().getValue()) + DELIM);
273

    
274
        //Release end date
275
        buff.append(clean(metadata.getReleaseenddate().getValue()) + DELIM);
276

    
277
        //Mission statement url
278
        buff.append(clean(metadata.getMissionstatementurl().getValue()) + DELIM);
279

    
280
        //Data provider
281
        buff.append(clean(String.valueOf(metadata.getDataprovider().getValue())) + DELIM);
282

    
283
        //Service provider
284
        buff.append(clean(String.valueOf(metadata.getServiceprovider().getValue())) + DELIM);
285

    
286
        //Database access type
287
        buff.append(clean(metadata.getDatabaseaccessrestriction().getValue()) + DELIM);
288

    
289
        //Data upload type
290
        buff.append(clean(metadata.getDatauploadtype().getValue()) + DELIM);
291

    
292
        //Data upload restrictions
293
        buff.append(clean(metadata.getDatauploadrestriction().getValue()) + DELIM);
294

    
295
        //Versioning
296
        buff.append(clean(String.valueOf(metadata.getVersioning().getValue())) + DELIM);
297

    
298
        //Citation guideline url
299
        buff.append(clean(metadata.getCitationguidelineurl().getValue()) + DELIM);
300

    
301
        //Quality management kind
302
        buff.append(clean(metadata.getQualitymanagementkind().getValue()) + DELIM);
303

    
304
        //PID systems
305
        buff.append(clean(metadata.getPidsystems().getValue()) + DELIM);
306

    
307
        //Certificates
308
        buff.append(clean(metadata.getCertificates().getValue()) + DELIM);
309

    
310
        //Policies
311
        dataStr = " ";
312
        for (FieldTypeProtos.KeyValue property : metadata.getPoliciesList()) {
313
            dataStr += clean(property.getValue()) + SEPERATOR;
314
        }
315

    
316
        buff.append(dataStr + DELIM);
317

    
318
        buff.append(getTrust(data) + DELIM);
319
        return buff.toString();
320
    }
321

    
322

    
323
    private static String buildOrganization(OafEntity data, String DELIM) {
324
        String SEPERATOR = ";";
325

    
326
        StringBuilder buff = new StringBuilder();
327
        buff.append(getHeader(data, DELIM));
328

    
329
        Organization organization = data.getOrganization();
330
        Organization.Metadata metadata = organization.getMetadata();
331

    
332
        //getLegalshortname
333
        buff.append(clean(metadata.getLegalshortname().getValue()) + DELIM);
334
        // `name`,
335
        buff.append(clean(metadata.getLegalname().getValue()) + DELIM);
336
        //website URL
337
        String[] split = metadata.getWebsiteurl().getValue().split(",");
338
        String dataStr = new String();
339

    
340
        for (String s : split) {
341
            dataStr += s.replace(DELIM, " ") + SEPERATOR;
342
        }
343

    
344
        buff.append(dataStr + DELIM);
345
        //logourl
346
        buff.append(cleanUrl(metadata.getLogourl().getValue(), DELIM) + DELIM);
347
        // `country`,
348
        buff.append(clean(metadata.getCountry().getClassid()) + DELIM);
349
        buff.append(getTrust(data) + DELIM);
350

    
351
        return buff.toString();
352

    
353
    }
354

    
355
    static String getTrust(OafEntity data) {
356
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
357
            return (info.getTrust());
358

    
359
        }
360
        return " ";
361
    }
362

    
363

    
364
    private static String buildResult(OafEntity data, String DELIM) {
365
        String SEPERATOR = ";";
366

    
367
        Result.Metadata metadata = data.getResult().getMetadata();
368

    
369

    
370
        StringBuilder buff = new StringBuilder();
371

    
372
        buff.append(getHeader(data, DELIM));
373

    
374
        //   titleString
375
        String dataStr = new String();
376

    
377
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
378
            StructuredProperty title = metadata.getTitleList().get(i);
379
            dataStr = clean(title.getValue());
380
            break;
381
        }
382

    
383
        //  pubtitle
384
        buff.append(clean(dataStr) + DELIM);
385

    
386
        // date of acceptance CHANGED THIS TO DATE FORMAT
387
        buff.append(clean(metadata.getDateofacceptance().getValue()) + DELIM);
388

    
389
        // publisher
390
        buff.append(clean(metadata.getPublisher().getValue()) + DELIM);
391

    
392

    
393
        //PID
394
        dataStr = " ";
395
        for (StructuredProperty p : data.getPidList()) {
396
            dataStr += clean(p.getValue()) + SEPERATOR;
397
        }
398

    
399
        buff.append(dataStr + DELIM);
400

    
401
        //language
402
        buff.append(clean(metadata.getLanguage().getClassid()) + DELIM);
403

    
404
        // RelevantDate
405
        dataStr = " ";
406

    
407
        for (StructuredProperty p : metadata.getRelevantdateList()) {
408
            dataStr += clean(p.getValue());
409
            break;
410
        }
411

    
412
        buff.append(dataStr + DELIM);
413

    
414
        //Subject
415
        dataStr = " ";
416
        for (StructuredProperty subj : metadata.getSubjectList()) {
417

    
418
            if (subj.getValue() != null && !subj.getValue().isEmpty()) {
419
                dataStr += clean(subj.getValue()) + SEPERATOR;
420
            }
421
        }
422

    
423
        buff.append(dataStr + DELIM);
424

    
425
        //TODO ExternalReference
426

    
427
        buff.append(" " + DELIM);
428

    
429
        //Source
430
        dataStr = " ";
431
        for (StringField s : metadata.getSourceList()) {
432
            dataStr += clean(s.getValue()) + SEPERATOR;
433
        }
434

    
435
        buff.append(dataStr + DELIM);
436

    
437
        //TODO Format     
438
        buff.append(" " + DELIM);
439

    
440
        //Context
441
        dataStr = " ";
442
        for (Result.Context s : metadata.getContextList()) {
443
            dataStr += clean(s.getId()) + SEPERATOR;
444
        }
445
        buff.append(dataStr + DELIM);
446

    
447
        //country
448

    
449
        String country = " ";
450

    
451
        for (FieldTypeProtos.Qualifier c : metadata.getCountryList()) {
452
            country += clean(c.getClassid()) + SEPERATOR;
453
        }
454

    
455
        buff.append(country + DELIM);
456

    
457
        //Best License
458
        buff.append(getBestLicense(data.getResult()) + DELIM);
459

    
460
        //Description
461
        dataStr = " ";
462

    
463
        for (StringField desc : metadata.getDescriptionList()) {
464
            dataStr += clean(cleanText(desc.getValue(), DELIM));
465
            break;
466
        }
467

    
468
        buff.append(dataStr + DELIM);
469

    
470
        //Journal  
471
        buff.append(clean(metadata.getJournal().getName()) + DELIM);  //#null#!
472

    
473

    
474
        // TODO ERI SOS : HERE IN GET JOUTNAL. GET DATA INFO I CAN FIND PROVENANCE AND SIMILARITY
475

    
476
        // TODO isRelatedTo
477

    
478
        //   resource type
479
        buff.append(clean(metadata.getResourcetype().getClassname()) + DELIM);
480
        //   device
481
        buff.append(clean(metadata.getDevice().getValue()) + DELIM);
482
        //   size
483
        buff.append(clean(metadata.getSize().getValue()) + DELIM);
484
        //     version
485
        buff.append(clean(metadata.getVersion().getValue()) + DELIM);
486

    
487
        //   metadata update
488
        buff.append(clean(metadata.getLastmetadataupdate().getValue()) + DELIM);
489
        //   metadata version
490
        buff.append(clean(metadata.getMetadataversionnumber().getValue()) + DELIM);
491

    
492

    
493
        // year
494
        buff.append(clean(getYearInt(metadata.getDateofacceptance().getValue())) + DELIM);
495

    
496
        // type
497
        buff.append(clean(metadata.getResulttype().getClassname()) + DELIM);
498

    
499
        buff.append(getTrust(data) + DELIM);
500

    
501

    
502
        return buff.toString();
503
    }
504

    
505

    
506
    private static String buildProject(OafEntity data, String DELIM) {
507
        String SEPERATOR = ";";
508

    
509
        StringBuilder buff = new StringBuilder();
510

    
511
        buff.append(getHeader(data, DELIM));
512
        Project.Metadata metadata = data.getProject().getMetadata();
513

    
514

    
515
        //Code
516
        buff.append(metadata.getCode().getValue() + DELIM);
517
        // `Websiteurl`,
518
        buff.append(clean(metadata.getWebsiteurl().getValue()) + DELIM);
519
        //TODO here
520

    
521
        // `Acronym`,
522
        buff.append(clean(metadata.getAcronym().getValue()) + DELIM);
523

    
524
        //Title
525
        buff.append(clean(metadata.getTitle().getValue()) + DELIM);
526

    
527
        // Startdate
528
        buff.append(clean(metadata.getStartdate().getValue()) + DELIM);
529

    
530
        // Enddate
531
        buff.append(clean(metadata.getEnddate().getValue()) + DELIM);
532

    
533
        //`Call identifer`
534
        buff.append(clean(metadata.getCallidentifier().getValue()) + DELIM);
535

    
536
        //`KeyWords`
537
        buff.append(clean(metadata.getKeywords().getValue()) + DELIM);
538

    
539
        //`Duration`
540
        buff.append(clean(metadata.getDuration().getValue()) + DELIM);
541

    
542
        //esc39
543
        buff.append(clean(metadata.getEcsc39().getValue()) + DELIM);
544

    
545
        //`Contracttype`
546
        buff.append(clean(metadata.getContracttype().getClassname()) + DELIM);
547

    
548
        //`OA mandate pubs`  TODO DOES NOT EXIST
549
        buff.append(clean(metadata.getOamandatepublications().getValue()) + DELIM);
550
        //`Subjects`
551
        String dataStr = new String();
552
        for (StructuredProperty s : metadata.getSubjectsList()) {
553

    
554
            dataStr += clean(s.getValue()) + SEPERATOR;
555
        }
556
        buff.append(dataStr + DELIM);
557

    
558
        //`EC293`
559
        buff.append(clean(metadata.getEcarticle293().getValue()) + DELIM);
560

    
561
        List<StringField> fundList = metadata.getFundingtreeList();
562

    
563
        if (!fundList.isEmpty()) // `funding_lvl0`,
564
        {//TODO funder + 3 funding levels
565
           /* funder text,
566
            funding_lvl0 text,
567
	        funding_lvl1 text,
568
	        funding_lvl2 text,
569
	        funding_lvl3 text,*/
570
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM));
571
        } else {
572

    
573
            buff.append(FundingParser.getFundingInfo("", DELIM));
574
        }
575

    
576
        buff.append(getTrust(data) + DELIM);
577

    
578
        return buff.toString();
579

    
580
    }
581

    
582

    
583
    private static String buildPerson(OafEntity data, String DELIM) {
584
        String SEPERATOR = ";";
585

    
586
        PersonProtos.Person person = data.getPerson();
587
        PersonProtos.Person.Metadata metadata = person.getMetadata();
588

    
589
        StringBuilder buff = new StringBuilder();
590

    
591
        buff.append(getHeader(data, DELIM));
592

    
593
        // `firstname`,
594
        buff.append(clean(metadata.getFirstname().getValue()) + DELIM);
595

    
596
        // `secondNames`,
597
        String dataStr = new String();
598

    
599
        for (StringField s : metadata.getSecondnamesList()) {
600
            dataStr += clean(s.getValue()) + ' ';
601
        }
602

    
603
        buff.append(dataStr + DELIM);
604

    
605
        // `fullname`,
606
        buff.append(clean(metadata.getFullname().getValue()) + DELIM);
607

    
608
        // `Fax`,
609
        buff.append(clean(metadata.getFax().getValue()) + DELIM);
610

    
611
        // `Email`,
612
        buff.append(clean(metadata.getEmail().getValue()) + DELIM);
613

    
614
        // `Phone`,
615
        buff.append(clean(metadata.getPhone().getValue()) + DELIM);
616

    
617
        // `Nationality`,
618
        buff.append(clean(metadata.getNationality().getClassid()) + DELIM);
619

    
620
        // `PIDS`,
621
        dataStr = " ";
622
        for (StructuredProperty s : data.getPidList()) {
623

    
624
            dataStr += cleanId(s.getValue()) + ";";
625
        }
626
        buff.append(dataStr + DELIM);
627

    
628
        buff.append(getTrust(data) + DELIM);
629

    
630
        return buff.toString();
631

    
632
    }
633

    
634

    
635
    private static void getResultDatasources(OafEntity valueEntity, String DELIM, Set<String> returnList) {
636
        String SEPERATOR = ";";
637

    
638
        Result result = valueEntity.getResult();
639

    
640
        if (valueEntity.getId().contains("dedup")) return;
641

    
642
        //TODO hosted by
643
        for (Instance instance : (result.getInstanceList())) {
644
            String hostedBy = instance.getHostedby().getKey();
645

    
646
            if (hostedBy != null && !hostedBy.isEmpty()) {
647
                returnList.add("resultDatasource" + DELIM + "result" + DELIM +
648
                        cleanId(valueEntity.getId()) + DELIM + "datasource" + DELIM + cleanId(hostedBy) + DELIM);
649
            }
650
        }
651

    
652
        //TODO  collected froms
653
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
654
            String collectedFrom = collectedFromValue.getKey();
655
            if (collectedFrom != null && !collectedFrom.isEmpty())
656
                returnList.add(("resultDatasource" + DELIM
657
                        + "result" + DELIM + cleanId(valueEntity.getId()) + DELIM
658
                        + "datasource" + DELIM + cleanId(collectedFrom) + DELIM));
659

    
660
        }
661

    
662

    
663
    }
664

    
665
    public static String cleanText(String value, String DELIM) {
666
        value = value.replace(DELIM, " ");
667

    
668
        value = Jsoup.parse(value).text();
669
        //PROSOXI PROSOXI DEN TO PIANEI AUTO H REGEX GIA TO REPLACE!!!!
670

    
671
        value = value.replace("\n", " ");
672
        value = value.replace(">", " ");
673
        value = value.replace("<", " ");
674
        value = value.replace(",", " ");
675
        value = value.replace("\"", " ");
676
        value = value.replace("'", " ");
677

    
678
        value = value.replace("«", " ");
679
        value = value.replace("»", " ");
680
        value = value.replace("!", " ");
681
        value = value.replace("#", " ");
682

    
683

    
684
        value = value.replaceAll("[^a-zA-Z0-9 .-_:/@+=]+", " ");
685

    
686
        return value;
687
    }
688

    
689
    public static String cleanUrl(String value, String DELIM) {
690
        value = value.replace(DELIM, " ");
691
        value = value.replace(" ", "");
692
        value = value.replace("\"", " ");
693
        value = value.replace("\n", "");
694

    
695

    
696
        value = value.replace(">", " ");
697
        value = value.replace("<", " ");
698
        value = value.replace(",", " ");
699
        value = value.replace("\"", " ");
700
        value = value.replace("'", " ");
701
        value = value.replace("«", " ");
702
        value = value.replace("»", " ");
703

    
704
        value = value.replace("!", " ");
705
        value = value.replace("#", " ");
706

    
707

    
708
        return value;
709
    }
710

    
711

    
712
    public static String cleanId(String value) {
713
        if (value != null) {
714
            //   DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
715
            // to datacite____:: )
716
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
717
            value = value.replaceFirst(".*\\|", "");
718
            value = value.replace("\n", " ");
719
            value = value.replace("\"", "");
720
            value = value.replace("'", "");
721
            value = value.replace("#", "");
722
            value = value.replace("!", "");
723

    
724
            value = value.replace("«", " ");
725
            value = value.replace("»", " ");
726

    
727
        }
728

    
729

    
730
        return value;
731

    
732
    }
733

    
734

    
735
    //TODO make them in pairs
736
    private static void getDedups(OafEntity valueEntity, String DELIM, Set<String> returnList) {
737
        if (!valueEntity.getChildrenList().isEmpty() && valueEntity.getId().contains("dedup")) {
738
            ArrayList<String> entries = new ArrayList<String>();
739

    
740
            for (OafEntity child : valueEntity.getChildrenList()) {
741
                if (child.getType() == valueEntity.getType() && !child.getId().contains("dedup")) {
742
                    // if it is result, then its the deduplication
743
                    entries.add(cleanId(child.getId()));
744
                }
745
            }
746

    
747
            for (int i = 0; i < entries.size() - 1; i++) {
748
                for (int j = i + 1; j < entries.size(); j++) {
749
                    returnList.add("dedup" + DELIM +
750
                            valueEntity.getType().name() + DELIM +
751
                            entries.get(i) + DELIM +
752
                            valueEntity.getType().name() + DELIM +
753
                            entries.get(j) + DELIM);
754
                }
755
            }
756

    
757
        }
758

    
759
    }
760

    
761

    
762
    private static String getYearInt(String data) {
763
        if (data == null || data.isEmpty() || data.equals("-1")) {
764
            return " ";
765
        }
766
        String[] split = data.split("-");
767

    
768
        if (split == null || split.length == 0) {
769
            return " ";
770
        }
771

    
772

    
773
        return split[0];
774

    
775

    
776
    }
777

    
778

    
779
    private static String getBestLicense(Result result) {
780
        FieldTypeProtos.Qualifier bestLicense = null;
781
        LicenseComparator lc = new LicenseComparator();
782
        for (Instance instance : (result.getInstanceList())) {
783
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
784
                bestLicense = instance.getLicence();
785
            }
786
        }
787
        if (bestLicense != null) {
788
            return bestLicense.getClassname();
789
        } else {
790
            return null;
791
        }
792
    }
793

    
794

    
795
    private static String clean(String value) {
796
        String SEPERATOR = ";";
797
        String enclosing = "\"";
798

    
799

    
800
        if (value != null) {
801
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
802
            // to datacite____:: )
803
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
804
            //       value = value.replaceFirst(".*\\|", "");
805

    
806
            value = value.replaceAll("[\"\\r\\\\;]", "");
807

    
808
            value = value.replace(SEPERATOR, " ");
809
            value = value.replace(">", " ");
810
            value = value.replace("<", " ");
811
            value = value.replace(",", " ");
812
            value = value.replace("\"", " ");
813
            value = value.replace("'", " ");
814
            value = value.replace(enclosing, " ");
815
            value = value.replace("«", " ");
816
            value = value.replace("»", " ");
817
            value = value.replace("!", " ");
818
            value = value.replace("#", " ");
819

    
820
            value = value.replaceAll("[^a-zA-Z0-9 .-_:/@+=]+", " ");
821
        }
822

    
823

    
824
        return value;
825

    
826
    }
827

    
828
}
(4-4/5)