Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodExport.utils;
2

    
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
5
import eu.dnetlib.data.proto.FieldTypeProtos;
6
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
7
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
8
import eu.dnetlib.data.proto.OafProtos.Oaf;
9
import eu.dnetlib.data.proto.OafProtos.OafEntity;
10
import eu.dnetlib.data.proto.OafProtos.OafRel;
11
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
12
//import eu.dnetlib.data.proto.PersonProtos;
13
import eu.dnetlib.data.proto.PersonProtos;
14
import eu.dnetlib.data.proto.ProjectProtos.Project;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
17

    
18
import java.util.ArrayList;
19
import java.util.List;
20
import java.util.Set;
21

    
22
/**
23
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
24
 *         for sqoop
25
 */
26
public class Serializer {
27
    private static final String TEXT_PATTERN = "[^a-zA-Z0-9 .-_:/@+=-]";
28
    private static final String ID_PATTERN = "[^a-zA-Z0-9.:/_-]";
29
    private static final String DOI_PATTERN = ".*\\/(10.)";
30

    
31
    public static String serialize(Oaf oaf, String DELIM) {
32
        switch (oaf.getKind()) {
33
            case entity:
34
                OafEntity valueEntity = oaf.getEntity();
35
                switch (valueEntity.getType()) {
36
                    case datasource:
37
                        return buildDatasource(valueEntity, DELIM);
38
                    case organization:
39
                        return buildOrganization(valueEntity, DELIM);
40
                    case project:
41
                        return buildProject(valueEntity, DELIM);
42
                    case result:
43
                        return buildResult(valueEntity, DELIM);
44
                        /*
45
                    case person:
46
                        return buildPerson(valueEntity, DELIM);
47
                        */
48
                    default:
49
                        break;
50
                }
51
                break;
52
            case relation:
53
                OafRel valueRel = oaf.getRel();
54

    
55
                return serialize(valueRel, DELIM);
56

    
57
        }
58

    
59
        return null;
60

    
61
    }
62

    
63

    
64
    public static void extractRelations(Oaf oaf, String DELIM, Set<String> relations) {
65
        OafEntity valueEntity = oaf.getEntity();
66
        switch (valueEntity.getType()) {
67
            case result:
68
                getResultDatasources(valueEntity, DELIM, relations);
69
                getDedups(valueEntity, DELIM, relations);
70
            case datasource:
71
                getDedups(valueEntity, DELIM, relations);
72
                /*
73
            case person:
74
                getDedups(valueEntity, DELIM, relations);
75
                */
76
            case organization:
77
                getDedups(valueEntity, DELIM, relations);
78
            default:
79
        }
80

    
81
    }
82

    
83
    public static String serialize(OafRel Rel, String DELIM) {
84
        StringBuilder buff;
85
        switch (Rel.getRelType()) {
86
            case datasourceOrganization:
87
                buff = new StringBuilder();
88
                buff.append(Rel.getRelType().name()).append(DELIM).append("datasource").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
89
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM).append("\n");
90
                return buff.toString();
91
            case resultResult:
92
                buff = new StringBuilder();
93
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
94
                        .append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
95
                return buff.toString();
96
                /*
97
            case personPerson:
98
                buff = new StringBuilder();
99
                buff.append(Rel.getRelType().name()).append(DELIM).append("person").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
100
                        .append("person").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
101
                return buff.toString();
102
                */
103
            case organizationOrganization:
104
                buff = new StringBuilder();
105
                buff.append(Rel.getRelType().name()).append(DELIM).append("organization").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
106
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
107
                return buff.toString();
108
                /*
109
            case personResult:
110
                buff = new StringBuilder();
111
                buff.append(Rel.getRelType().name()).append(DELIM).append("person").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
112
                        .append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
113
                return buff.toString();
114
                */
115
            case projectOrganization:
116
                buff = new StringBuilder();
117
                buff.append(Rel.getRelType().name()).append(DELIM).append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
118
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
119
                return buff.toString();
120
                /*
121
            case projectPerson:
122
                buff = new StringBuilder();
123
                buff.append(Rel.getRelType().name()).append(DELIM).append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
124
                        .append("person").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
125
                return buff.toString();
126
                */
127
            case resultOrganization:
128
                buff = new StringBuilder();
129
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM)
130
                        .append("organization").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM);
131
                return buff.toString();
132
//TODO - SOS RELATION RESULT PROJECT IS INVERTED! SOURCE IS PROJECT, TARGET IS RESULT
133

    
134
            case resultProject:
135
                buff = new StringBuilder();
136
                buff.append(Rel.getRelType().name()).append(DELIM).append("result").append(DELIM).append(cleanId(Rel.getTarget())).append(DELIM)
137
                        .append("project").append(DELIM).append(cleanId(Rel.getSource())).append(DELIM);
138
                return buff.toString();
139
            default:
140

    
141
        }
142

    
143
        return "";
144

    
145
    }
146

    
147
    private static String getHeader(OafEntity data, String DELIM) {
148
        String SEPERATOR = ";";
149

    
150
        StringBuilder buff = new StringBuilder();
151

    
152
        //  EntityType
153
        buff.append(data.getType().name()).append(DELIM);
154

    
155
        // OpenaireID
156
        buff.append(cleanId(data.getId())).append(DELIM);
157
        //  dateOfTransformation
158

    
159
        // TODO  CRITERIA FOR IDENTIFYING UPDATED RECORDS
160
        buff.append(cleanId(data.getDateoftransformation())).append(DELIM);
161

    
162
        //    dateOfCollection
163
        buff.append(clean(data.getDateofcollection())).append(DELIM);
164

    
165
        String dataStr = new String();
166
        // originalId
167

    
168
        for (String oid : data.getOriginalIdList()) {
169
            dataStr += cleanDoi(oid) + SEPERATOR; //oid is not openaireid, clean instead of cleanid
170
        }
171

    
172
        buff.append(dataStr).append(DELIM);
173
        return buff.toString();
174
    }
175

    
176

    
177
    private static String buildDatasource(OafEntity data, String DELIM) {
178
        String SEPERATOR = ";";
179

    
180
        StringBuilder buff = new StringBuilder();
181

    
182
        buff.append(getHeader(data, DELIM));
183
        Metadata metadata = data.getDatasource().getMetadata();
184

    
185
        //Datasourcetype
186
        buff.append(clean(metadata.getDatasourcetype().getClassname())).append(DELIM);
187

    
188
        //Openairecompatibility
189
        buff.append(clean(metadata.getOpenairecompatibility().getClassname())).append(DELIM);
190

    
191
        //OfficialName
192
        buff.append(clean(metadata.getOfficialname().getValue())).append(DELIM);
193

    
194
        //  Englishname
195
        buff.append(clean(metadata.getEnglishname().getValue())).append(DELIM);
196

    
197
        //Websiteurl
198
        buff.append(clean(metadata.getWebsiteurl().getValue())).append(DELIM);
199

    
200
        //LogoURL
201
        buff.append(clean(metadata.getLogourl().getValue())).append(DELIM);
202

    
203
        //Contactemail
204
        buff.append(clean(metadata.getContactemail().getValue())).append(DELIM);
205

    
206
        //Namespaceprefix
207
        buff.append(clean(metadata.getNamespaceprefix().getValue())).append(DELIM);
208

    
209
        // latitude
210
        buff.append(clean(metadata.getLatitude().getValue())).append(DELIM);
211

    
212
        // longtitude
213
        buff.append(clean(metadata.getLongitude().getValue())).append(DELIM);
214

    
215
        // dateofvalidation,
216
        buff.append(clean(metadata.getDateofvalidation().getValue())).append(DELIM);
217

    
218
        //Description
219
        buff.append(clean(metadata.getDescription().getValue())).append(DELIM);
220

    
221
        //subjects
222
        String subj = new String();
223
        for (StructuredProperty s : metadata.getSubjectsList()) {
224
            subj += clean(s.getValue()) + SEPERATOR;
225
        }
226

    
227
        //subjectList
228
        buff.append(clean(subj)).append(DELIM);
229

    
230
        //Number of items
231
        buff.append(clean(metadata.getOdnumberofitems().getValue())).append(DELIM);
232

    
233
        //Date of number of items
234
        buff.append(clean(metadata.getOdnumberofitemsdate().getValue())).append(DELIM);
235

    
236
        // Policies
237
        buff.append(clean(metadata.getOdpolicies().getValue())).append(DELIM);
238

    
239
        //languages
240
        String dataStr = new String();
241

    
242
        for (StringField lang : metadata.getOdlanguagesList()) {
243
            dataStr += clean(lang.getValue()) + SEPERATOR;
244
        }
245

    
246
        buff.append(dataStr).append(DELIM);
247
        ;
248

    
249

    
250
        // Content type
251
        dataStr = " ";
252
        for (StringField c : metadata.getOdcontenttypesList()) {
253
            dataStr += clean(c.getValue()) + SEPERATOR;
254
        }
255
        buff.append(dataStr).append(DELIM);
256

    
257
        //Access info package
258
        dataStr = " ";
259

    
260
        for (StringField c : metadata.getAccessinfopackageList()) {
261
            dataStr += clean(c.getValue()) + SEPERATOR;
262
        }
263

    
264
        buff.append(dataStr).append(DELIM);
265

    
266
        //Release start date
267
        buff.append(clean(metadata.getReleasestartdate().getValue())).append(DELIM);
268

    
269
        //Release end date
270
        buff.append(clean(metadata.getReleaseenddate().getValue())).append(DELIM);
271

    
272
        //Mission statement url
273
        buff.append(clean(metadata.getMissionstatementurl().getValue())).append(DELIM);
274

    
275
        //Data provider
276
        buff.append(clean(String.valueOf(metadata.getDataprovider().getValue()))).append(DELIM);
277

    
278
        //Service provider
279
        buff.append(clean(String.valueOf(metadata.getServiceprovider().getValue()))).append(DELIM);
280

    
281
        //Database access type
282
        buff.append(clean(metadata.getDatabaseaccessrestriction().getValue())).append(DELIM);
283

    
284
        //Data upload type
285
        buff.append(clean(metadata.getDatauploadtype().getValue())).append(DELIM);
286

    
287
        //Data upload restrictions
288
        buff.append(clean(metadata.getDatauploadrestriction().getValue())).append(DELIM);
289

    
290
        //Versioning
291
        buff.append(clean(String.valueOf(metadata.getVersioning().getValue()))).append(DELIM);
292

    
293
        //Citation guideline url
294
        buff.append(clean(metadata.getCitationguidelineurl().getValue())).append(DELIM);
295

    
296
        //Quality management kind
297
        buff.append(clean(metadata.getQualitymanagementkind().getValue())).append(DELIM);
298

    
299
        //PID systems
300
        buff.append(clean(metadata.getPidsystems().getValue())).append(DELIM);
301

    
302
        //Certificates
303
        buff.append(clean(metadata.getCertificates().getValue())).append(DELIM);
304

    
305
        //Policies
306
        dataStr = " ";
307
        for (FieldTypeProtos.KeyValue property : metadata.getPoliciesList()) {
308
            dataStr += clean(property.getValue()) + SEPERATOR;
309
        }
310

    
311
        buff.append(dataStr).append(DELIM);
312

    
313
        buff.append(getTrust(data)).append(DELIM);
314
        return buff.toString();
315
    }
316

    
317

    
318
    private static String buildOrganization(OafEntity data, String DELIM) {
319
        String SEPERATOR = ";";
320

    
321
        StringBuilder buff = new StringBuilder();
322
        buff.append(getHeader(data, DELIM));
323

    
324
        Organization organization = data.getOrganization();
325
        Organization.Metadata metadata = organization.getMetadata();
326

    
327
        //getLegalshortname
328
        buff.append(clean(metadata.getLegalshortname().getValue())).append(DELIM);
329
        // `name`,
330
        buff.append(clean(metadata.getLegalname().getValue())).append(DELIM);
331
        //website URL
332
        String[] split = metadata.getWebsiteurl().getValue().split(",");
333
        String dataStr = new String();
334

    
335
        for (String s : split) {
336
            dataStr += s.replace(DELIM, " ") + SEPERATOR;
337
        }
338

    
339
        buff.append(dataStr).append(DELIM);
340
        //logourl
341
        buff.append(clean(metadata.getLogourl().getValue())).append(DELIM);
342
        // `country`,
343
        buff.append(clean(metadata.getCountry().getClassid())).append(DELIM);
344
        buff.append(getTrust(data)).append(DELIM);
345

    
346
        return buff.toString();
347

    
348
    }
349

    
350
    static String getTrust(OafEntity data) {
351
        for (FieldTypeProtos.ExtraInfo info : data.getExtraInfoList()) {
352
            return (info.getTrust());
353

    
354
        }
355
        return " ";
356
    }
357

    
358

    
359
    private static String buildResult(OafEntity data, String DELIM) {
360
        String SEPERATOR = ";";
361

    
362
        Result.Metadata metadata = data.getResult().getMetadata();
363

    
364

    
365
        StringBuilder buff = new StringBuilder();
366

    
367
        buff.append(getHeader(data, DELIM));
368

    
369
        //   titleString
370
        String dataStr = new String();
371

    
372
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
373
            StructuredProperty title = metadata.getTitleList().get(i);
374
            dataStr = clean(title.getValue());
375
            break;
376
        }
377

    
378
        //  pubtitle
379
        buff.append(clean(dataStr)).append(DELIM);
380

    
381
        // date of acceptance CHANGED THIS TO DATE FORMAT
382
        buff.append(clean(metadata.getDateofacceptance().getValue())).append(DELIM);
383

    
384
        // publisher
385
        buff.append(clean(metadata.getPublisher().getValue())).append(DELIM);
386

    
387

    
388
        //PID
389
        dataStr = " ";
390
        for (StructuredProperty p : data.getPidList()) {
391
            dataStr += clean(p.getValue()) + SEPERATOR;
392
        }
393

    
394
        buff.append(dataStr).append(DELIM);
395

    
396
        //language
397
        buff.append(clean(metadata.getLanguage().getClassid())).append(DELIM);
398

    
399
        // RelevantDate
400
        dataStr = " ";
401

    
402
        for (StructuredProperty p : metadata.getRelevantdateList()) {
403
            dataStr += clean(p.getValue());
404
            break;
405
        }
406

    
407
        buff.append(dataStr).append(DELIM);
408

    
409
        //Subject
410
        dataStr = " ";
411
        for (StructuredProperty subj : metadata.getSubjectList()) {
412

    
413
            if (subj.getValue() != null && !subj.getValue().isEmpty()) {
414
                dataStr += clean(subj.getValue()) + SEPERATOR;
415
            }
416
        }
417

    
418
        buff.append(dataStr).append(DELIM);
419

    
420
        //TODO ExternalReference
421

    
422
        buff.append(" ").append(DELIM);
423

    
424
        //Source
425
        dataStr = " ";
426
        for (StringField s : metadata.getSourceList()) {
427
            dataStr += clean(s.getValue()) + SEPERATOR;
428
        }
429

    
430
        buff.append(dataStr).append(DELIM);
431

    
432
        //TODO Format     
433
        buff.append(" ").append(DELIM);
434

    
435
        //Context
436
        dataStr = " ";
437
        for (Result.Context s : metadata.getContextList()) {
438
            dataStr += clean(s.getId()) + SEPERATOR;
439
        }
440
        buff.append(dataStr).append(DELIM);
441

    
442
        //country
443

    
444
        String country = " ";
445

    
446
        for (FieldTypeProtos.Qualifier c : metadata.getCountryList()) {
447
            country += clean(c.getClassid()) + SEPERATOR;
448
        }
449

    
450
        buff.append(country).append(DELIM);
451

    
452
        //Best License
453
        buff.append(getBestLicense(data.getResult())).append(DELIM);
454

    
455
        //Description
456
        dataStr = " ";
457

    
458
        for (StringField desc : metadata.getDescriptionList()) {
459
            dataStr += clean(desc.getValue());
460
            break;
461
        }
462

    
463
        buff.append(dataStr).append(DELIM);
464

    
465
        //Journal  
466
        buff.append(clean(metadata.getJournal().getName())).append(DELIM);  //#null#!
467

    
468

    
469
        // TODO ERI SOS : HERE IN GET JOUTNAL. GET DATA INFO I CAN FIND PROVENANCE AND SIMILARITY
470

    
471
        // TODO isRelatedTo
472

    
473
        //   resource type
474
        buff.append(clean(metadata.getResourcetype().getClassname())).append(DELIM);
475
        //   device
476
        buff.append(clean(metadata.getDevice().getValue())).append(DELIM);
477
        //   size
478
        buff.append(clean(metadata.getSize().getValue())).append(DELIM);
479
        //     version
480
        buff.append(clean(metadata.getVersion().getValue())).append(DELIM);
481

    
482
        //   metadata update
483
        buff.append(clean(metadata.getLastmetadataupdate().getValue())).append(DELIM);
484
        //   metadata version
485
        buff.append(clean(metadata.getMetadataversionnumber().getValue())).append(DELIM);
486

    
487
        // year
488
        buff.append(clean(getYearInt(metadata.getDateofacceptance().getValue()))).append(DELIM);
489

    
490
        // type
491
        buff.append(clean(metadata.getResulttype().getClassname())).append(DELIM);
492

    
493
        buff.append(getTrust(data)).append(DELIM);
494

    
495
        //Authors
496
        dataStr = " ";
497
        for(PersonProtos.Person p : data.getResult().getAuthorList()){
498
            dataStr += clean(p.getMetadata().getFullname().getValue()) + SEPERATOR;
499
        }
500

    
501
        buff.append(dataStr).append(DELIM);
502

    
503
        return buff.toString();
504
    }
505

    
506

    
507
    private static String buildProject(OafEntity data, String DELIM) {
508
        String SEPERATOR = ";";
509

    
510
        StringBuilder buff = new StringBuilder();
511

    
512
        buff.append(getHeader(data, DELIM));
513
        Project.Metadata metadata = data.getProject().getMetadata();
514

    
515

    
516
        //Code
517
        buff.append(metadata.getCode().getValue()).append(DELIM);
518
        // `Websiteurl`,
519
        buff.append(clean(metadata.getWebsiteurl().getValue())).append(DELIM);
520
        //TODO here
521

    
522
        // `Acronym`,
523
        buff.append(clean(metadata.getAcronym().getValue())).append(DELIM);
524

    
525
        //Title
526
        buff.append(clean(metadata.getTitle().getValue())).append(DELIM);
527

    
528
        // Startdate
529
        buff.append(clean(metadata.getStartdate().getValue())).append(DELIM);
530

    
531
        // Enddate
532
        buff.append(clean(metadata.getEnddate().getValue())).append(DELIM);
533

    
534
        //`Call identifer`
535
        buff.append(clean(metadata.getCallidentifier().getValue())).append(DELIM);
536

    
537
        //`KeyWords`
538
        buff.append(clean(metadata.getKeywords().getValue())).append(DELIM);
539

    
540
        //`Duration`
541
        buff.append(clean(metadata.getDuration().getValue())).append(DELIM);
542

    
543
        //esc39
544
        buff.append(clean(metadata.getEcsc39().getValue())).append(DELIM);
545

    
546
        //`Contracttype`
547
        buff.append(clean(metadata.getContracttype().getClassname())).append(DELIM);
548

    
549
        //`OA mandate pubs`  TODO DOES NOT EXIST
550
        buff.append(clean(metadata.getOamandatepublications().getValue())).append(DELIM);
551
        //`Subjects`
552
        String dataStr = new String();
553
        for (StructuredProperty s : metadata.getSubjectsList()) {
554

    
555
            dataStr += clean(s.getValue()) + SEPERATOR;
556
        }
557
        buff.append(dataStr).append(DELIM);
558

    
559
        //`EC293`
560
        buff.append(clean(metadata.getEcarticle293().getValue())).append(DELIM);
561

    
562
        List<StringField> fundList = metadata.getFundingtreeList();
563

    
564
        if (!fundList.isEmpty()) // `funding_lvl0`,
565
        {//TODO funder + 3 funding levels
566
           /* funder text,
567
            funding_lvl0 text,
568
	        funding_lvl1 text,
569
	        funding_lvl2 text,
570
	        funding_lvl3 text,*/
571
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM));
572
        } else {
573

    
574
            buff.append(FundingParser.getFundingInfo("", DELIM));
575
        }
576

    
577
        buff.append(getTrust(data)).append(DELIM);
578

    
579
        return buff.toString();
580

    
581
    }
582

    
583

    
584
    /*
585
    private static String buildPerson(OafEntity data, String DELIM) {
586
        String SEPERATOR = ";";
587

    
588
        PersonProtos.Person person = data.getPerson();
589
        PersonProtos.Person.Metadata metadata = person.getMetadata();
590

    
591
        StringBuilder buff = new StringBuilder();
592

    
593
        buff.append(getHeader(data, DELIM));
594

    
595
        // `firstname`,
596
        buff.append(clean(metadata.getFirstname().getValue())).append(DELIM);
597

    
598
        // `secondNames`,
599
        String dataStr = new String();
600

    
601
        for (StringField s : metadata.getSecondnamesList()) {
602
            dataStr += clean(s.getValue()) + ' ';
603
        }
604

    
605
        buff.append(dataStr).append(DELIM);
606

    
607
        // `fullname`,
608
        buff.append(clean(metadata.getFullname().getValue())).append(DELIM);
609

    
610
        // `Fax`,
611
        buff.append(clean(metadata.getFax().getValue())).append(DELIM);
612

    
613
        // `Email`,
614
        buff.append(clean(metadata.getEmail().getValue())).append(DELIM);
615

    
616
        // `Phone`,
617
        buff.append(clean(metadata.getPhone().getValue())).append(DELIM);
618

    
619
        // `Nationality`,
620
        buff.append(clean(metadata.getNationality().getClassid())).append(DELIM);
621

    
622
        // `PIDS`,
623
        dataStr = " ";
624
        for (StructuredProperty s : data.getPidList()) {
625

    
626
            dataStr += clean(s.getValue()) + ";";
627
        }
628
        buff.append(dataStr).append(DELIM);
629

    
630
        buff.append(getTrust(data)).append(DELIM);
631

    
632
        return buff.toString();
633

    
634
    }
635
    */
636

    
637

    
638
    private static void getResultDatasources(OafEntity valueEntity, String DELIM, Set<String> returnList) {
639
        String SEPERATOR = ";";
640

    
641
        Result result = valueEntity.getResult();
642

    
643
        if (valueEntity.getId().contains("dedup")) return;
644

    
645
        //TODO hosted by
646
        for (Instance instance : (result.getInstanceList())) {
647
            String hostedBy = instance.getHostedby().getKey();
648

    
649
            if (hostedBy != null && !hostedBy.isEmpty()) {
650
                returnList.add("resultDatasource" + DELIM + "result" + DELIM +
651
                        cleanId(valueEntity.getId()) + DELIM + "datasource" + DELIM + cleanId(hostedBy) + DELIM);
652
            }
653
        }
654

    
655
        //TODO  collected froms
656
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
657
            String collectedFrom = collectedFromValue.getKey();
658
            if (collectedFrom != null && !collectedFrom.isEmpty())
659
                returnList.add(("resultDatasource" + DELIM
660
                        + "result" + DELIM + cleanId(valueEntity.getId()) + DELIM
661
                        + "datasource" + DELIM + cleanId(collectedFrom) + DELIM));
662

    
663
        }
664

    
665

    
666
    }
667

    
668

    
669
    public static String cleanId(String value) {
670
        if (value == null) {
671
            return " ";
672
        }
673

    
674
        //   DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
675
        // to datacite____:: )
676
        // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
677
        value = value.replaceFirst(".*\\|", "");
678
        value = value.replaceAll(ID_PATTERN, "");
679
        value = value.replace("#", " ");
680
        value = value.replace("\n", " ");
681
        return value;
682
    }
683

    
684
    private static String clean(String value) {
685
        if (value == null) {
686
            return " ";
687
        }
688

    
689

    
690
        // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
691
        value = value.replaceAll("[\"\\r\\\\;]", "");
692
        value = value.replace("\\", "");
693
        value = value.replaceAll(TEXT_PATTERN, " ");
694

    
695
        value = value.replace(">", " ");
696
        value = value.replace("<", " ");
697
        value = value.replace("\"", " ");
698
        value = value.replace("\\", " ");
699
        value = value.replace("'", " ");
700
        value = value.replace("«", " ");
701
        value = value.replace("»", " ");
702
        value = value.replace("#", " ");
703
        value = value.replace("\\", " ");
704
        value = value.replace("\n", " ");
705
        value = value.replace(",", "");
706
        value = value.replace("#", "");
707
        value = value.replace(";", "");
708

    
709

    
710
        return value;
711
    }
712

    
713
    private static String cleanDoi(String value) {
714
        value = value.replaceAll(DOI_PATTERN, "");
715
        value = value.replace("#", " ");
716
        value = value.replace("\n", " ");
717
        return value;
718
    }
719

    
720

    
721
    //TODO make them in pairs
722
    private static void getDedups(OafEntity valueEntity, String DELIM, Set<String> returnList) {
723
        if (!valueEntity.getChildrenList().isEmpty() && valueEntity.getId().contains("dedup")) {
724
            ArrayList<String> entries = new ArrayList<String>();
725

    
726
            for (OafEntity child : valueEntity.getChildrenList()) {
727
                if (child.getType() == valueEntity.getType() && !child.getId().contains("dedup")) {
728
                    // if it is result, then its the deduplication
729
                    entries.add(cleanId(child.getId()));
730
                }
731
            }
732

    
733
            for (int i = 0; i < entries.size() - 1; i++) {
734
                for (int j = i + 1; j < entries.size(); j++) {
735
                    returnList.add("dedup" + DELIM +
736
                            valueEntity.getType().name() + DELIM +
737
                            entries.get(i) + DELIM +
738
                            valueEntity.getType().name() + DELIM +
739
                            entries.get(j) + DELIM);
740
                }
741
            }
742

    
743
        }
744

    
745
    }
746

    
747

    
748
    private static String getYearInt(String data) {
749
        if (data == null || data.isEmpty() || data.equals("-1")) {
750
            return " ";
751
        }
752
        String[] split = data.split("-");
753

    
754
        if (split == null || split.length == 0) {
755
            return " ";
756
        }
757

    
758

    
759
        return split[0];
760

    
761

    
762
    }
763

    
764

    
765
    private static String getBestLicense(Result result) {
766
        FieldTypeProtos.Qualifier bestLicense = null;
767
        LicenseComparator lc = new LicenseComparator();
768
        for (Instance instance : (result.getInstanceList())) {
769
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
770
                bestLicense = instance.getLicence();
771
            }
772
        }
773
        if (bestLicense != null) {
774
            return bestLicense.getClassname();
775
        } else {
776
            return null;
777
        }
778
    }
779

    
780

    
781
}
(4-4/4)