Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

    
3
import com.google.common.collect.Multimap;
4

    
5

    
6
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
7
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
8
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
9
import eu.dnetlib.data.proto.FieldTypeProtos;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.OafProtos.OafRel;
16
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
17
//import eu.dnetlib.data.proto.PersonProtos;
18
import eu.dnetlib.data.proto.ProjectProtos.Project;
19
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
20
import eu.dnetlib.data.proto.ResultProtos.Result;
21
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
22
import org.apache.log4j.Logger;
23
import org.jsoup.Jsoup;
24

    
25
import java.text.DateFormat;
26
import java.text.ParseException;
27
import java.text.SimpleDateFormat;
28
import java.util.ArrayList;
29
import java.util.Date;
30
import java.util.HashMap;
31
import java.util.List;
32

    
33
import org.w3c.dom.Element;
34
import org.w3c.dom.NodeList;
35
import org.xml.sax.InputSource;
36
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
37
import org.w3c.dom.Document;
38

    
39
/**
40
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
41
 *         for sqoop
42
 */
43
public class Serializer {
44

    
45
    public static String serialize(Oaf oaf, String DELIM, String ENCLOSING) {
46

    
47
        switch (oaf.getKind()) {
48
            case entity:
49
                OafEntity valueEntity = oaf.getEntity();
50

    
51
                switch (valueEntity.getType()) {
52
                    case datasource:
53

    
54
                        return buildDatasource(oaf, DELIM, ENCLOSING);
55

    
56
                    case organization:
57

    
58
                        return buildOrganization(oaf, DELIM, ENCLOSING);
59

    
60
                    case project:
61

    
62
                        return buildProject(oaf, DELIM, ENCLOSING);
63
                    case result:
64

    
65
                        return buildResult(oaf, DELIM, ENCLOSING);
66
                    /*
67
                    case person:
68
                        return buildPerson(oaf, DELIM, ENCLOSING);
69
                    */
70
                    default:
71
                        break;
72
                }
73
                break;
74
            case relation:
75

    
76
                return buildRel(oaf.getRel(), DELIM, ENCLOSING);
77

    
78
        }
79
        return null;
80
    }
81

    
82
    public static String serialize(OafRel oaf, String DELIM, String ENCLOSING) {
83

    
84
        switch (oaf.getRelType()) {
85
            case resultProject:
86
                return getResultProject(oaf, DELIM, ENCLOSING);
87
            default:
88
                return buildRel(oaf, DELIM, ENCLOSING);
89
        }
90
    }
91

    
92
    private static String buildRel(OafRel Rel, String DELIM, String ENCLOSING) {
93

    
94
        return cleanId(Rel.getTarget(), DELIM, ENCLOSING) + DELIM;
95

    
96
    }
97

    
98
    public static void extractRelations(Oaf oaf, String DELIM, String ENCLOSING, Multimap<String, String> relations) {
99
        OafEntity valueEntity = oaf.getEntity();
100
        getOriginalId(valueEntity, relations, DELIM, ENCLOSING);
101

    
102
        switch (valueEntity.getType()) {
103
            case datasource:
104
                getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING);
105
                getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING);
106
            case result:
107
                getResultTopics(valueEntity, relations, DELIM, ENCLOSING);
108
                getResultLanguages(valueEntity, relations, DELIM, ENCLOSING);
109
                getResultClassifications(valueEntity, relations, DELIM, ENCLOSING);
110
                getResultDatasources(valueEntity, relations, DELIM, ENCLOSING);
111
                getResultConcepts(valueEntity, relations, DELIM, ENCLOSING);
112
                getResultDois(valueEntity, relations, DELIM, ENCLOSING);
113
                getResultCitations(valueEntity, relations, DELIM, ENCLOSING);
114
                getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING);
115
                getResultExtra(valueEntity, relations, DELIM, ENCLOSING);
116

    
117
            case project:
118
                getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING);
119
                getProjectSubjects(valueEntity, relations, DELIM, ENCLOSING);
120

    
121
            default:
122
        }
123

    
124
    }
125

    
126
    private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
127
        Datasource d = valueEntity.getDatasource();
128
        Metadata metadata = d.getMetadata();
129

    
130
        relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
131
    }
132

    
133
    private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
134
        Result result = valueEntity.getResult();
135
        Result.Metadata metadata = result.getMetadata();
136

    
137
        StringBuilder buff = new StringBuilder();
138
        String titleString = "";
139

    
140
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
141
            StructuredProperty title = metadata.getTitleList().get(i);
142

    
143
            titleString = title.getValue().replaceAll("\\s+", " ");
144
            titleString = titleString.replaceAll("\n", " ");
145
            break;
146
        }
147

    
148
        //  pubtitle
149
        buff.append(getStringField(titleString, DELIM, ENCLOSING));
150

    
151
        String sources = "";
152
        for (Instance instance : (result.getInstanceList())) {
153
            List<String> urls = instance.getUrlList();
154
            for (String url : urls) {
155
                sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
156
            }
157
        }
158

    
159
        //sources
160
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
161
        buff.append(sources);
162

    
163
        relations.put("resultExtra", buff.toString());
164
    }
165

    
166
    private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
167

    
168
        String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
169
        for (String oid : oafEntity.getOriginalIdList()) {
170
            relations.put(relName, cleanId(oid, DELIM, ENCLOSING));
171
        }
172

    
173
    }
174

    
175
    private static void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
176
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue(), DELIM, ENCLOSING));
177

    
178
    }
179

    
180
    private static void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
181
        for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
182
            relations.put("projectSubject", getStringField(subj.getValue(), DELIM, ENCLOSING));
183
        }
184
    }
185

    
186
    private static String getResultProject(OafRel oaf, String DELIM, String ENCLOSING) {
187
        StringBuilder buff = new StringBuilder();
188
        buff.append(cleanId(oaf.getTarget(), DELIM, ENCLOSING) + DELIM);
189
        // TODO is declared as int!!!
190
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
191
        if (diff < 0) {
192
            diff = 0;
193
        }
194

    
195
        buff.append(getNumericField(String.valueOf(diff), DELIM, ENCLOSING));
196
        return buff.toString();
197
    }
198

    
199

    
200
    private static void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
201
        Datasource d = valueEntity.getDatasource();
202
        Metadata metadata = d.getMetadata();
203

    
204
        for (StringField lang : metadata.getOdlanguagesList()) {
205
            rels.put("datasourceLanguage", getStringField(lang.getValue(), DELIM, ENCLOSING));
206
        }
207
    }
208

    
209
    private static void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
210

    
211
        Result d = valueEntity.getResult();
212
        Result.Metadata metadata = d.getMetadata();
213
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
214
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname(), DELIM, ENCLOSING));
215
        }
216

    
217
    }
218

    
219
    private static void getResultDois(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
220

    
221
        for (StructuredProperty pid : valueEntity.getPidList()) {
222

    
223
            rels.put("resultPid",
224
                    getStringField(pid.getQualifier().getClassname(), DELIM, ENCLOSING) + getStringField(pid.getValue(), DELIM, ENCLOSING));
225
        }
226
    }
227

    
228
    private static void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
229

    
230
        Result result = valueEntity.getResult();
231

    
232
        for (Instance instance : (result.getInstanceList())) {
233
            String classification = instance.getInstancetype().getClassname();
234
            if (classification != null && !classification.isEmpty()) {
235
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname(), DELIM, ENCLOSING));
236
                // TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT
237
                break;
238
            }
239
        }
240
    }
241

    
242
    private static void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
243
        Result result = valueEntity.getResult();
244
        //description
245
        for (StringField s : result.getMetadata().getDescriptionList()) {
246

    
247
            rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text(), DELIM, ENCLOSING));
248
        }
249
    }
250

    
251
    private static void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
252

    
253
        Result result = valueEntity.getResult();
254

    
255

    
256
        for (Result.Context context : result.getMetadata().getContextList()) {
257

    
258
            rels.put("resultConcept", cleanId(context.getId(), DELIM, ENCLOSING));
259
        }
260
    }
261

    
262
    private static void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
263
        Result result = valueEntity.getResult();
264

    
265
//TODO hosted by
266
        for (Instance instance : (result.getInstanceList())) {
267
            String hostedBy = instance.getHostedby().getKey();
268
            if (hostedBy != null && !hostedBy.isEmpty()) {
269
                rels.put("resultDatasource", cleanId(hostedBy, DELIM, ENCLOSING) + DELIM);
270
            }
271
        }
272

    
273
//TODO  collected froms
274
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
275

    
276
            String collectedFrom = collectedFromValue.getKey();
277
            if (collectedFrom != null && !collectedFrom.isEmpty()) {
278
                rels.put("resultDatasource", cleanId(collectedFrom, DELIM, ENCLOSING) + DELIM);
279
            }
280
        }
281
    }
282

    
283
    private static void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
284

    
285
        Result d = valueEntity.getResult();
286
        Result.Metadata metadata = d.getMetadata();
287

    
288
        List<StructuredProperty> Topics = metadata.getSubjectList();
289

    
290
        for (StructuredProperty topic : Topics) {
291
            // TODO result topics
292
            rels.put("resultTopic", getStringField(topic.getValue(), DELIM, ENCLOSING));
293
        }
294
    }
295

    
296

    
297
    private static void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
298
        for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
299
            if (extraInfo.getName().equals("result citations")) {
300
                DOMParser parser = new DOMParser();
301
                try {
302
                    parser.parse(new InputSource(new java.io.StringReader(extraInfo.getValue())));
303
                    Document doc = parser.getDocument();
304
                    doc.getDocumentElement().normalize();
305

    
306
                    NodeList citations = doc.getElementsByTagName("citation");
307
                    for (int temp = 0; temp < citations.getLength(); temp++) {
308
                        Element citation = (Element) citations.item(temp);
309
                        NodeList ids = citation.getElementsByTagName("id");
310
                        for(int temp1 = 0; temp1 < ids.getLength(); temp1++){
311
                            Element id = (Element) ids.item(temp1);
312
                            if(id.getAttribute("type").equals("openaire")){
313
                                //System.out.println(id.getAttribute("value"));
314
                                rels.put("resultCitation", id.getAttribute("value"));
315
                            }
316
                        }
317
                    }
318
                } catch (Exception e) {
319

    
320
                }
321

    
322
                /*
323
                rels.put("resultCitation", getStringField(extraInfo.getTrust(), DELIM, ENCLOSING) +
324
                        getStringField(extraInfo.getProvenance(), DELIM, ENCLOSING) + getStringField(extraInfo.getValue(), DELIM, ENCLOSING));
325
                        */
326
            }
327

    
328
        }
329
    }
330

    
331
    private static String buildDatasource(Oaf oaf, String DELIM, String ENCLOSING) {
332
        Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
333
        StringBuilder buff = new StringBuilder();
334

    
335
        // name
336
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
337
            buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING));
338
        } else {
339
            buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING));
340
        }
341

    
342
        // type
343
        if (metadata.hasDatasourcetype()) {
344
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING));
345
        }
346

    
347
        // compatibility,
348
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING));
349

    
350
        // dateofvalidation,
351
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
352

    
353
        // yearofvalidation,
354
        buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
355

    
356
        //harvested
357
        buff.append(getStringField("false", DELIM, ENCLOSING));
358

    
359
        //piwik_id
360
        String piwik_id = "";
361
        for (String oid : oaf.getEntity().getOriginalIdList()) {
362
            if (oid.contains("piwik")) {
363
                piwik_id = oid.split(":")[1];
364
                break;
365
            }
366
        }
367
        buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING));
368

    
369
        return buff.toString();
370

    
371
    }
372

    
373
    private static String buildOrganization(Oaf oaf, String DELIM, String ENCLOSING) {
374

    
375
        StringBuilder buff = new StringBuilder();
376
        Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
377

    
378
        // `name`,
379
        buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING));
380

    
381
        // `country`,
382
        buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING));
383

    
384
        return buff.toString();
385
    }
386

    
387
    private static String buildResult(Oaf oaf, String DELIM, String ENCLOSING) {
388
        StringBuilder buff = new StringBuilder();
389

    
390
        Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
391

    
392
        //  publisher
393
        buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING));
394

    
395
        //  journal
396
        buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING));  //#null#!
397

    
398
        // year
399
        buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
400

    
401
        // date
402
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
403

    
404
        // bestlicense
405
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING));
406

    
407
        // type
408
        buff.append(getStringField(metadata.getResulttype().getClassname(), DELIM, ENCLOSING));
409

    
410
        // embargo_end_date
411
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING));
412

    
413
        // `authors`,
414
        int authors = metadata.getAuthorCount();
415

    
416

    
417
        String delayed = "no";
418

    
419
        for (OafRel rel : oaf.getEntity().getCachedRelList()) {
420

    
421
            /*
422
            if (rel.getRelType().equals(RelType.personResult)) {
423

    
424
                authors++;
425
            } else
426
            */
427
            if (rel.getRelType().equals(RelType.resultProject))
428
            // TODO remember : in result Project, first id is project, second is
429
            // result.
430
            {
431
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
432
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate(), DELIM, ENCLOSING);
433
                if (Integer.parseInt(daysfromend) > 0) {
434
                    delayed = "yes";
435
                }
436
            }
437
        }
438

    
439
        // `delayed`,
440
        buff.append(getStringField(delayed, DELIM, ENCLOSING));
441
        //authors
442
        buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING));
443

    
444
        return buff.toString();
445

    
446
    }
447

    
448

    
449
    /*
450
    private static String getBestLicense(Result result) {
451
        Qualifier bestLicense = null;
452
        LicenseComparator lc = new LicenseComparator();
453
        for (Instance instance : (result.getInstanceList())) {
454
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
455
                bestLicense = instance.getLicence();
456
            }
457
        }
458
        if (bestLicense != null) {
459
            return bestLicense.getClassname();
460
        } else {
461
            return null;
462
        }
463
    }
464

    
465
    // TODO here iterate over all values
466
    private static String getAccessMode(Result result) {
467
        String accessMode = " ";
468
        for (Instance instance : (result.getInstanceList())) {
469
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
470
                accessMode = instance.getLicence().getClassname();
471
                break;
472
            }
473

    
474
        }
475

    
476
        return accessMode;
477
    }
478
    */
479

    
480
    private static String getBestLicense(Result result) {
481
        Qualifier bestLicense = null;
482
        LicenseComparator lc = new LicenseComparator();
483
        for (Instance instance : (result.getInstanceList())) {
484
            if (lc.compare(bestLicense, instance.getAccessright()) > 0) {
485
                bestLicense = instance.getAccessright();
486
            }
487
        }
488
        if (bestLicense != null) {
489
            return bestLicense.getClassname();
490
        } else {
491
            return null;
492
        }
493
    }
494

    
495
    // TODO here iterate over all values
496
    private static String getAccessMode(Result result) {
497
        String accessMode = " ";
498
        for (Instance instance : (result.getInstanceList())) {
499
            if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) {
500
                accessMode = instance.getAccessright().getClassname();
501
                break;
502
            }
503

    
504
        }
505

    
506
        return accessMode;
507
    }
508

    
509
    private static String buildProject(Oaf oaf, String DELIM, String ENCLOSING) {
510

    
511
        StringBuilder buff = new StringBuilder();
512
        Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
513
        
514
        // `acronym`,
515
        String acronym = metadata.getAcronym().getValue();
516
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
517
            acronym = metadata.getTitle().getValue();
518
        }
519
        buff.append(getStringField(acronym, DELIM, ENCLOSING));
520

    
521
        //title
522
        buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING));
523

    
524
        //funding_lvl
525
        List<StringField> fundList = metadata.getFundingtreeList();
526
        if (!fundList.isEmpty()) // `funding_lvl0`,
527
        {
528
            //TODO funder + 3 funding levels
529
           /* funder text,
530
            funding_lvl0 text,
531
	        funding_lvl1 text,
532
	        funding_lvl2 text,
533
	        funding_lvl3 text,*/
534
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING));
535
        } else {
536
            buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING));
537
        }
538

    
539
        //sc39
540
        String sc39 = metadata.getEcsc39().getValue().toString();
541
        if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
542
            sc39 = "yes";
543
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
544
            sc39 = "no";
545
        }
546
        buff.append(getStringField(sc39, DELIM, ENCLOSING));
547

    
548
        //project_type
549
        buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING));
550

    
551
        // start_year
552
        buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
553

    
554
        // end_year
555
        buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
556

    
557
        // duration enddate-startdate
558
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING));
559

    
560
        // haspubs
561
        buff.append(getStringField("no", DELIM, ENCLOSING));
562

    
563
        // numpubs
564
        buff.append(getNumericField("0", DELIM, ENCLOSING));
565

    
566
        // enddate
567
        buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
568

    
569
        // startdate
570
        buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
571

    
572
        // `daysforlastpub`,
573
        buff.append(getNumericField("", DELIM, ENCLOSING));
574

    
575
        // `delayedpubs`,
576
        buff.append(getNumericField("", DELIM, ENCLOSING));
577

    
578
        //call identifier
579
        buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING));
580

    
581
        //code
582
        buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING));
583

    
584
        return buff.toString();
585

    
586
    }
587

    
588
    /*
589
    private static String buildPerson(Oaf oaf, String DELIM, String ENCLOSING) {
590
        StringBuilder buff = new StringBuilder();
591

    
592
        PersonProtos.Person.Metadata metadata = oaf.getEntity().getPerson().getMetadata();
593

    
594
        // `person_id`,
595
        //buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
596

    
597

    
598
        // person_result
599
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
600

    
601

    
602
        //firstName
603
        buff.append(getStringField(metadata.getFirstname().getValue(), DELIM, ENCLOSING));
604

    
605
        //LastName
606

    
607
        String secondName = new String();
608
        for (StringField s : metadata.getSecondnamesList()) {
609
            secondName += s.getValue().replace("\n", " ").replace(",", " ") + " ";
610
        }
611

    
612
        buff.append(getStringField(secondName, DELIM, ENCLOSING));
613

    
614
        // `fullname`,
615
        buff.append(getStringField(metadata.getFullname().getValue(), DELIM, ENCLOSING));
616

    
617
        // `Nationality`,
618
        buff.append(getStringField(metadata.getNationality().getClassid(), DELIM, ENCLOSING));
619

    
620
        // `Email`,
621
        buff.append(getStringField(metadata.getEmail().getValue(), DELIM, ENCLOSING));
622

    
623
        // `Phone`,
624
        buff.append(getStringField(metadata.getPhone().getValue(), DELIM, ENCLOSING));
625

    
626
        //deletedByInference
627

    
628
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
629

    
630
        // `number`,
631
        buff.append(getStringField("1", DELIM, ENCLOSING));
632

    
633
        return buff.toString();
634

    
635
    }
636
    */
637

    
638

    
639
    private static String getYearDifferenceInteger(String enddate, String startdate, String DELIM, String ENCLOSING) {
640

    
641
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
642

    
643
            String[] split = startdate.split("-");
644

    
645
            if (split == null || split.length == 0) {
646
                return ENCLOSING + "0" + ENCLOSING + DELIM;
647
            }
648

    
649
            int Startdate = Integer.parseInt(split[0]);
650

    
651
            split = enddate.split("-");
652

    
653
            if (split == null || split.length == 0) {
654
                return ENCLOSING + "0" + ENCLOSING + DELIM;
655
            }
656

    
657
            int Enddate = Integer.parseInt(split[0]);
658

    
659
            int diff = Enddate - Startdate;
660

    
661
            return ENCLOSING + diff + ENCLOSING + DELIM;
662

    
663
        }
664

    
665
        return ENCLOSING + "0" + ENCLOSING + DELIM;
666
    }
667

    
668
    private static String getYearInt(String data, String DELIM, String ENCLOSING) {
669
        if (data == null || data.isEmpty() || data.equals("-1")) {
670
            return ENCLOSING + "0" + ENCLOSING + DELIM;
671
        }
672

    
673
        String[] split = data.split("-");
674

    
675
        if (split == null || split.length == 0) {
676
            return ENCLOSING + "0" + ENCLOSING + DELIM;
677
        }
678

    
679
        String year = split[0];
680

    
681
        year = cleanNumber(year);
682

    
683
        if (year == null || year.isEmpty()) year = "0";
684

    
685
        return ENCLOSING + year + ENCLOSING + DELIM;
686

    
687
    }
688

    
689
    private static String cleanNumber(String number) {
690
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
691
        return number;
692
    }
693

    
694
    private static String getLatLongField(String data, String DELIM, String ENCLOSING) {
695

    
696
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
697

    
698
        return ENCLOSING + data.replaceAll("[^-0-9.]+", "")  + ENCLOSING + DELIM;
699

    
700
    }
701

    
702
    private static String getStringField(String data, String DELIM, String ENCLOSING) {
703

    
704
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
705

    
706
        return ENCLOSING + clean(data, DELIM, ENCLOSING) + ENCLOSING + DELIM;
707

    
708
    }
709

    
710
    private static String getStringDateField(String data, String DELIM, String ENCLOSING) {
711
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
712
            return ENCLOSING + "0" + ENCLOSING + DELIM;
713
        } else {
714
            data = data.replace(DELIM, " ");
715
            data = data.replace(ENCLOSING, " ");
716
            data = data.replaceAll("\\r\\n|\\r|\\n", "");
717
            try {
718
                DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
719
                data = format.format(format.parse(data));
720
                return ENCLOSING + data + ENCLOSING + DELIM;
721
            } catch (ParseException e) {
722
                return ENCLOSING + "0" + ENCLOSING + DELIM;
723
            }
724
        }
725
    }
726

    
727
    private static String getNumericField(String data, String DELIM, String ENCLOSING) {
728
        if (data == null || data.isEmpty() || data.equals("")) {
729
            return ENCLOSING + "0" + ENCLOSING + DELIM;
730
        } else {
731
            return ENCLOSING + data + ENCLOSING + DELIM;
732
        }
733
    }
734

    
735
    public static String getId(Oaf oaf, String DELIM, String ENCLOSING) {
736
        switch (oaf.getKind()) {
737
            case entity:
738
                return cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING);
739
            case relation:
740

    
741
                return cleanId(oaf.getRel().getSource(), DELIM, ENCLOSING);
742

    
743
        }
744
        return null;
745

    
746
    }
747

    
748
    public static String getId(OafRel relOaf, String DELIM, String ENCLOSING) {
749
        return cleanId(relOaf.getSource(), DELIM, ENCLOSING);
750
    }
751

    
752
    public static String clean(String value, String DELIM, String ENCLOSING) {
753
        if (value != null) {
754

    
755
            value = value.replaceAll("[\"\\r\\\\;]", "");
756
            value = value.replace(DELIM, " ");
757
            value = value.replace(ENCLOSING, " ");
758
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
759
            //value = value.replace("\"", "");
760
            //value = value.replace("'", "");
761
            //value = value.replace("«", " ");
762
            //value = value.replace("»", " ");
763

    
764
            //value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " ");
765

    
766
            return value;
767

    
768
        } else {
769
            return "";
770

    
771
        }
772

    
773
    }
774

    
775
    public static String cleanId(String value, String DELIM, String ENCLOSING) {
776
        if (value != null) {
777
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
778

    
779
            // to datacite____:: )
780
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
781
            value = value.replaceFirst(".*\\|", "");
782
            value = value.replace("\n", "");
783
            value = value.replace(ENCLOSING, "");
784
            value = value.replace(DELIM, "");
785
            value = value.replace("\"", "");
786
            value = value.replace("«", " ");
787
            value = value.replace("»", " ");
788
        }
789

    
790
        return ENCLOSING + value + ENCLOSING;
791

    
792
    }
793

    
794

    
795
    public static String cleanUrl(String value, String DELIM, String ENCLOSING) {
796
        value = value.replace(DELIM, " ");
797
        value = value.replace(ENCLOSING, " ");
798
        value = value.replace(" ", "");
799
        value = value.replace("\n", "");
800
        return value;
801
    }
802

    
803

    
804
    public static long DATEDIFF(String startDate, String endDate) {
805
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
806
        long days = 0l;
807
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
808
        // <startdate>2011-09-01</startdate>
809
        // <enddate>2015-08-31</enddate>
810
        Date dateIni = null;
811
        Date dateFin = null;
812

    
813
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
814
            return 0;
815
        }
816
        try {
817
            dateIni = (Date) format.parse(startDate);
818
            dateFin = (Date) format.parse(endDate);
819
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
820
        } catch (Exception e) {
821

    
822
            return 0;
823
        }
824

    
825
        return days;
826
    }
827

    
828

    
829
}
(3-3/3)