Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

    
3
import com.google.common.collect.Multimap;
4

    
5

    
6
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
7
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
8
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
9
import eu.dnetlib.data.proto.FieldTypeProtos;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.OafProtos.OafRel;
16
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
17
//import eu.dnetlib.data.proto.PersonProtos;
18
import eu.dnetlib.data.proto.ProjectProtos.Project;
19
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
20
import eu.dnetlib.data.proto.ResultProtos.Result;
21
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
22
import org.apache.log4j.Logger;
23
import org.jsoup.Jsoup;
24

    
25
import java.text.SimpleDateFormat;
26
import java.util.ArrayList;
27
import java.util.Date;
28
import java.util.HashMap;
29
import java.util.List;
30

    
31
import org.w3c.dom.Element;
32
import org.w3c.dom.NodeList;
33
import org.xml.sax.InputSource;
34
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
35
import org.w3c.dom.Document;
36

    
37
/**
38
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
39
 *         for sqoop
40
 */
41
public class Serializer {
42

    
43
    public static String serialize(Oaf oaf, String DELIM, String ENCLOSING) {
44

    
45
        switch (oaf.getKind()) {
46
            case entity:
47
                OafEntity valueEntity = oaf.getEntity();
48

    
49
                switch (valueEntity.getType()) {
50
                    case datasource:
51

    
52
                        return buildDatasource(oaf, DELIM, ENCLOSING);
53

    
54
                    case organization:
55

    
56
                        return buildOrganization(oaf, DELIM, ENCLOSING);
57

    
58
                    case project:
59

    
60
                        return buildProject(oaf, DELIM, ENCLOSING);
61
                    case result:
62

    
63
                        return buildResult(oaf, DELIM, ENCLOSING);
64
                    /*
65
                    case person:
66
                        return buildPerson(oaf, DELIM, ENCLOSING);
67
                    */
68
                    default:
69
                        break;
70
                }
71
                break;
72
            case relation:
73

    
74
                return buildRel(oaf.getRel(), DELIM, ENCLOSING);
75

    
76
        }
77
        return null;
78
    }
79

    
80
    public static String serialize(OafRel oaf, String DELIM, String ENCLOSING) {
81

    
82
        switch (oaf.getRelType()) {
83
            case resultProject:
84
                return getResultProject(oaf, DELIM, ENCLOSING);
85
            default:
86
                return buildRel(oaf, DELIM, ENCLOSING);
87
        }
88
    }
89

    
90
    private static String buildRel(OafRel Rel, String DELIM, String ENCLOSING) {
91

    
92
        return cleanId(Rel.getTarget(), DELIM, ENCLOSING) + DELIM;
93

    
94
    }
95

    
96
    public static void extractRelations(Oaf oaf, String DELIM, String ENCLOSING, Multimap<String, String> relations) {
97
        OafEntity valueEntity = oaf.getEntity();
98
        getOriginalId(valueEntity, relations, DELIM, ENCLOSING);
99

    
100
        switch (valueEntity.getType()) {
101
            case datasource:
102
                getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING);
103
            case result:
104
                getResultTopics(valueEntity, relations, DELIM, ENCLOSING);
105
                getResultLanguages(valueEntity, relations, DELIM, ENCLOSING);
106
                getResultClassifications(valueEntity, relations, DELIM, ENCLOSING);
107
                getResultDatasources(valueEntity, relations, DELIM, ENCLOSING);
108
                getResultConcepts(valueEntity, relations, DELIM, ENCLOSING);
109
                getResultDois(valueEntity, relations, DELIM, ENCLOSING);
110
                getResultCitations(valueEntity, relations, DELIM, ENCLOSING);
111
                getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING);
112

    
113
            case project:
114
                getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING);
115
                getProjectSubjects(valueEntity, relations, DELIM, ENCLOSING);
116

    
117
            default:
118
        }
119

    
120
    }
121

    
122

    
123
    private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
124

    
125
        String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
126
        for (String oid : oafEntity.getOriginalIdList()) {
127
            relations.put(relName, cleanId(oid, DELIM, ENCLOSING));
128
        }
129

    
130
    }
131

    
132
    private static void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
133
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue(), DELIM, ENCLOSING));
134

    
135
    }
136

    
137
    private static void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
138
        for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
139
            relations.put("projectSubject", getStringField(subj.getValue(), DELIM, ENCLOSING));
140
        }
141
    }
142

    
143
    private static String getResultProject(OafRel oaf, String DELIM, String ENCLOSING) {
144
        StringBuilder buff = new StringBuilder();
145
        buff.append(cleanId(oaf.getTarget(), DELIM, ENCLOSING) + DELIM);
146
        // TODO is declared as int!!!
147
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
148
        if (diff < 0) {
149
            diff = 0;
150
        }
151

    
152
        buff.append(getNumericField(String.valueOf(diff), DELIM, ENCLOSING));
153
        return buff.toString();
154
    }
155

    
156

    
157
    private static void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
158
        Datasource d = valueEntity.getDatasource();
159
        Metadata metadata = d.getMetadata();
160

    
161
        for (StringField lang : metadata.getOdlanguagesList()) {
162
            rels.put("datasourceLanguage", getStringField(lang.getValue(), DELIM, ENCLOSING));
163
        }
164
    }
165

    
166
    private static void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
167

    
168
        Result d = valueEntity.getResult();
169
        Result.Metadata metadata = d.getMetadata();
170
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
171
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname(), DELIM, ENCLOSING));
172
        }
173

    
174
    }
175

    
176
    private static void getResultDois(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
177

    
178
        for (StructuredProperty pid : valueEntity.getPidList()) {
179

    
180
            rels.put("resultPid",
181
                    getStringField(pid.getQualifier().getClassname(), DELIM, ENCLOSING) + getStringField(pid.getValue(), DELIM, ENCLOSING));
182
        }
183
    }
184

    
185
    private static void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
186

    
187
        Result result = valueEntity.getResult();
188

    
189
        for (Instance instance : (result.getInstanceList())) {
190
            String classification = instance.getInstancetype().getClassname();
191
            if (classification != null && !classification.isEmpty()) {
192
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname(), DELIM, ENCLOSING));
193
                // TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT
194
                break;
195
            }
196
        }
197
    }
198

    
199
    private static void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
200
        Result result = valueEntity.getResult();
201
        //description
202
        for (StringField s : result.getMetadata().getDescriptionList()) {
203

    
204
            rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text(), DELIM, ENCLOSING));
205
        }
206
    }
207

    
208
    private static void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
209

    
210
        Result result = valueEntity.getResult();
211

    
212

    
213
        for (Result.Context context : result.getMetadata().getContextList()) {
214

    
215
            rels.put("resultConcept", cleanId(context.getId(), DELIM, ENCLOSING));
216
        }
217
    }
218

    
219
    private static void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
220
        Result result = valueEntity.getResult();
221

    
222
//TODO hosted by
223
        for (Instance instance : (result.getInstanceList())) {
224
            String hostedBy = instance.getHostedby().getKey();
225
            if (hostedBy != null && !hostedBy.isEmpty()) {
226
                rels.put("resultDatasource", cleanId(hostedBy, DELIM, ENCLOSING) + DELIM);
227
            }
228
        }
229

    
230
//TODO  collected froms
231
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
232

    
233
            String collectedFrom = collectedFromValue.getKey();
234
            if (collectedFrom != null && !collectedFrom.isEmpty()) {
235
                rels.put("resultDatasource", cleanId(collectedFrom, DELIM, ENCLOSING) + DELIM);
236
            }
237
        }
238
    }
239

    
240
    private static void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
241

    
242
        Result d = valueEntity.getResult();
243
        Result.Metadata metadata = d.getMetadata();
244

    
245
        List<StructuredProperty> Topics = metadata.getSubjectList();
246

    
247
        for (StructuredProperty topic : Topics) {
248
            // TODO result topics
249
            rels.put("resultTopic", getStringField(topic.getValue(), DELIM, ENCLOSING));
250
        }
251
    }
252

    
253

    
254
    private static void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
255
        for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
256
            if (extraInfo.getName().equals("result citations")) {
257
                DOMParser parser = new DOMParser();
258
                try {
259
                    parser.parse(new InputSource(new java.io.StringReader(extraInfo.getValue())));
260
                    Document doc = parser.getDocument();
261
                    doc.getDocumentElement().normalize();
262

    
263
                    NodeList citations = doc.getElementsByTagName("citation");
264
                    for (int temp = 0; temp < citations.getLength(); temp++) {
265
                        Element citation = (Element) citations.item(temp);
266
                        NodeList ids = citation.getElementsByTagName("id");
267
                        for(int temp1 = 0; temp1 < ids.getLength(); temp1++){
268
                            Element id = (Element) ids.item(temp1);
269
                            if(id.getAttribute("type").equals("openaire")){
270
                                //System.out.println(id.getAttribute("value"));
271
                                rels.put("resultCitation", id.getAttribute("value"));
272
                            }
273
                        }
274
                    }
275
                } catch (Exception e) {
276

    
277
                }
278

    
279
                /*
280
                rels.put("resultCitation", getStringField(extraInfo.getTrust(), DELIM, ENCLOSING) +
281
                        getStringField(extraInfo.getProvenance(), DELIM, ENCLOSING) + getStringField(extraInfo.getValue(), DELIM, ENCLOSING));
282
                        */
283
            }
284

    
285
        }
286
    }
287

    
288
    private static String buildDatasource(Oaf oaf, String DELIM, String ENCLOSING) {
289
        Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
290
        StringBuilder buff = new StringBuilder();
291

    
292
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
293
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
294
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
295
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
296

    
297
        // name
298
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
299
            buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING));
300
        } else {
301
            buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING));
302
        }
303
        // type
304

    
305
        if (metadata.hasDatasourcetype()) {
306
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING));
307
        }
308

    
309
        // compatibility,
310
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING));
311

    
312
        // latitude
313
        buff.append(getLatLongField(metadata.getLatitude().getValue(), DELIM, ENCLOSING));
314

    
315
        // longtitude
316
        buff.append(getLatLongField(metadata.getLongitude().getValue(), DELIM, ENCLOSING));
317

    
318
        // dateofvalidation,
319
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
320

    
321
        // yearofvalidation,
322
        buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
323

    
324
        //website
325
        buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
326

    
327
        //piwik_id
328
        String piwik_id = "";
329
        for (String oid : oaf.getEntity().getOriginalIdList()) {
330
            if (oid.contains("piwik")) {
331
                piwik_id = oid.split(":")[1];
332
                break;
333
            }
334
        }
335
        buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING));
336

    
337
        //harvested
338
        buff.append(getStringField("false", DELIM, ENCLOSING));
339

    
340
        // deletedByInference
341
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
342

    
343
        // number??
344
        buff.append(getStringField("1", DELIM, ENCLOSING));
345

    
346
        return buff.toString();
347

    
348
    }
349

    
350
    private static String buildOrganization(Oaf oaf, String DELIM, String ENCLOSING) {
351

    
352
        StringBuilder buff = new StringBuilder();
353
        Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
354

    
355
        // `organization_datasources`,
356
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
357
        // organization_projects
358
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
359
        // `name`,
360
        buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING));
361

    
362
        // `country`,
363
        buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING));
364

    
365
        //website
366
        buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
367

    
368
        // deletedByInference
369
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
370

    
371
        // number
372
        buff.append(getStringField("1", DELIM, ENCLOSING));
373

    
374
        return buff.toString();
375
    }
376

    
377
    private static String buildResult(Oaf oaf, String DELIM, String ENCLOSING) {
378
        StringBuilder buff = new StringBuilder();
379

    
380
        Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
381

    
382
        // result_topics/
383
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
384
        // result_languages
385
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
386
        // `result_projects`,
387
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
388
        // `result_datasources`,
389
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
390
        // `result_classifications`,
391
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
392
        /// `result_infrastructures`,
393
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
394
        // `result_claims`,
395
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
396
        // `result_results`,
397
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
398
        // pubtitle
399

    
400
        String titleString = new String();
401

    
402
        for (int i = 0; i < metadata.getTitleList().size(); i++) {
403
            StructuredProperty title = metadata.getTitleList().get(i);
404

    
405
            if (i == 0) {
406
                titleString = title.getValue().replaceAll("\\s+", " ");
407
                titleString = titleString.replaceAll("\n", " ");
408
            }
409
            break;
410
        }
411

    
412
        //  pubtitle
413
        buff.append(getStringField(titleString, DELIM, ENCLOSING));
414

    
415
        //   format
416
        String formatString = new String();
417

    
418
        for (StringField format : metadata.getFormatList()) {
419
            formatString += format.getValue() + ";";
420

    
421
        }
422

    
423
        buff.append(getStringField(formatString, DELIM, ENCLOSING));
424
        //  publisher
425

    
426
        buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING));
427

    
428
        //  journal
429

    
430
        buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING));  //#null#!
431

    
432
        // year
433
        buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
434

    
435
        // date CHANGED THIS TO DATE FORMAT
436
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
437

    
438
        // access_mode,
439
        buff.append(getStringField(getAccessMode(oaf.getEntity().getResult()), DELIM, ENCLOSING));
440

    
441
        // bestlicense
442
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING));
443

    
444
        // type
445
        buff.append(getStringField(metadata.getResulttype().getClassname(), DELIM, ENCLOSING));
446

    
447
        // embargo_end_date
448
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING));
449

    
450
        // `authors`,
451
        int authors = 0;
452

    
453
        String delayed = "no";
454

    
455
        for (OafRel rel : oaf.getEntity().getCachedRelList()) {
456

    
457
            /*
458
            if (rel.getRelType().equals(RelType.personResult)) {
459

    
460
                authors++;
461
            } else
462
            */
463
            if (rel.getRelType().equals(RelType.resultProject))
464
            // TODO remember : in result Project, first id is project, second is
465
            // result.
466
            {
467
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
468
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate(), DELIM, ENCLOSING);
469
                if (Integer.parseInt(daysfromend) > 0) {
470
                    delayed = "yes";
471
                }
472
            }
473
        }
474

    
475
        // `delayed`,
476
        buff.append(getStringField(delayed, DELIM, ENCLOSING));
477
        //authors
478
        buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING));
479

    
480
        String sources = new String();
481

    
482

    
483
        for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) {
484
            List<String> urls = instance.getUrlList();
485
            for (String url : urls) {
486
                sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
487
            }
488
        }
489

    
490
        //sources
491
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
492

    
493
        buff.append(sources);
494

    
495
        // deletedByInference
496
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
497

    
498

    
499
        // number??
500
        buff.append(getStringField("1", DELIM, ENCLOSING));
501
        return buff.toString();
502

    
503
    }
504

    
505

    
506
    /*
507
    private static String getBestLicense(Result result) {
508
        Qualifier bestLicense = null;
509
        LicenseComparator lc = new LicenseComparator();
510
        for (Instance instance : (result.getInstanceList())) {
511
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
512
                bestLicense = instance.getLicence();
513
            }
514
        }
515
        if (bestLicense != null) {
516
            return bestLicense.getClassname();
517
        } else {
518
            return null;
519
        }
520
    }
521

    
522
    // TODO here iterate over all values
523
    private static String getAccessMode(Result result) {
524
        String accessMode = " ";
525
        for (Instance instance : (result.getInstanceList())) {
526
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
527
                accessMode = instance.getLicence().getClassname();
528
                break;
529
            }
530

    
531
        }
532

    
533
        return accessMode;
534
    }
535
    */
536

    
537
    private static String getBestLicense(Result result) {
538
        Qualifier bestLicense = null;
539
        LicenseComparator lc = new LicenseComparator();
540
        for (Instance instance : (result.getInstanceList())) {
541
            if (lc.compare(bestLicense, instance.getAccessright()) > 0) {
542
                bestLicense = instance.getAccessright();
543
            }
544
        }
545
        if (bestLicense != null) {
546
            return bestLicense.getClassname();
547
        } else {
548
            return null;
549
        }
550
    }
551

    
552
    // TODO here iterate over all values
553
    private static String getAccessMode(Result result) {
554
        String accessMode = " ";
555
        for (Instance instance : (result.getInstanceList())) {
556
            if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) {
557
                accessMode = instance.getAccessright().getClassname();
558
                break;
559
            }
560

    
561
        }
562

    
563
        return accessMode;
564
    }
565

    
566
    private static String buildProject(Oaf oaf, String DELIM, String ENCLOSING) {
567

    
568
        StringBuilder buff = new StringBuilder();
569
        Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
570
        
571
        // project_organizations
572
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
573
        
574
        // project_results
575
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
576

    
577

    
578
        // `acronym`,
579
        String acronym = metadata.getAcronym().getValue();
580
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
581
            acronym = metadata.getTitle().getValue();
582
        }
583

    
584
        buff.append(getStringField(acronym, DELIM, ENCLOSING));
585

    
586
        //title!
587
        buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING));
588

    
589
        List<StringField> fundList = metadata.getFundingtreeList();
590

    
591
        if (!fundList.isEmpty()) // `funding_lvl0`,
592
        {
593
            //TODO funder + 3 funding levels
594
           /* funder text,
595
            funding_lvl0 text,
596
	        funding_lvl1 text,
597
	        funding_lvl2 text,
598
	        funding_lvl3 text,*/
599
            buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING));
600

    
601
        } else {
602
            buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING));
603

    
604
        }
605

    
606
        String sc39 = metadata.getEcsc39().getValue().toString();
607
        if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
608
            sc39 = "yes";
609
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
610
            sc39 = "no";
611
        }
612

    
613
        buff.append(getStringField(sc39, DELIM, ENCLOSING));
614

    
615

    
616
        //project_type
617
        buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING));
618

    
619
        // `url`,
620
        buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
621

    
622
        // start_year
623

    
624
        buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
625

    
626
        // end_year
627
        buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
628

    
629
        // duration enddate-startdate
630

    
631
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING));
632

    
633
        // haspubs
634
        buff.append(getStringField("no", DELIM, ENCLOSING));
635

    
636
        // numpubs
637
        buff.append(getNumericField("0", DELIM, ENCLOSING));
638

    
639
        // enddate
640
        buff.append(getNumericField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
641

    
642
        // startdate
643
        buff.append(getNumericField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
644

    
645
        // `daysforlastpub`,
646
        buff.append(getNumericField("", DELIM, ENCLOSING));
647

    
648
        // `delayedpubs`,
649
        buff.append(getNumericField("", DELIM, ENCLOSING));
650

    
651
        //call identifier
652
        buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING));
653
        //code
654
        buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING));
655

    
656
        //esc39
657
        buff.append(getStringField(metadata.getEcsc39().getValue(), DELIM, ENCLOSING));
658

    
659
        //getUrl
660
        String sources = new String();
661

    
662
        for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) {
663
            List<String> urls = instance.getUrlList();
664
            for (String u : urls) {
665
                sources += u + ";";
666
            }
667
        }
668

    
669
        sources = cleanUrl(sources, DELIM, ENCLOSING);
670
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
671

    
672
        buff.append(sources);
673

    
674
        // deletedByInference
675
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
676

    
677
        // `number`
678
        buff.append(getStringField("1", DELIM, ENCLOSING));
679
        return buff.toString();
680

    
681
    }
682

    
683
    /*
684
    private static String buildPerson(Oaf oaf, String DELIM, String ENCLOSING) {
685
        StringBuilder buff = new StringBuilder();
686

    
687
        PersonProtos.Person.Metadata metadata = oaf.getEntity().getPerson().getMetadata();
688

    
689
        // `person_id`,
690
        //buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
691

    
692

    
693
        // person_result
694
        buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
695

    
696

    
697
        //firstName
698
        buff.append(getStringField(metadata.getFirstname().getValue(), DELIM, ENCLOSING));
699

    
700
        //LastName
701

    
702
        String secondName = new String();
703
        for (StringField s : metadata.getSecondnamesList()) {
704
            secondName += s.getValue().replace("\n", " ").replace(",", " ") + " ";
705
        }
706

    
707
        buff.append(getStringField(secondName, DELIM, ENCLOSING));
708

    
709
        // `fullname`,
710
        buff.append(getStringField(metadata.getFullname().getValue(), DELIM, ENCLOSING));
711

    
712
        // `Nationality`,
713
        buff.append(getStringField(metadata.getNationality().getClassid(), DELIM, ENCLOSING));
714

    
715
        // `Email`,
716
        buff.append(getStringField(metadata.getEmail().getValue(), DELIM, ENCLOSING));
717

    
718
        // `Phone`,
719
        buff.append(getStringField(metadata.getPhone().getValue(), DELIM, ENCLOSING));
720

    
721
        //deletedByInference
722

    
723
        buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
724

    
725
        // `number`,
726
        buff.append(getStringField("1", DELIM, ENCLOSING));
727

    
728
        return buff.toString();
729

    
730
    }
731
    */
732

    
733

    
734
    private static String getYearDifferenceInteger(String enddate, String startdate, String DELIM, String ENCLOSING) {
735

    
736
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
737

    
738
            String[] split = startdate.split("-");
739

    
740
            if (split == null || split.length == 0) {
741
                return ENCLOSING + "0" + ENCLOSING + DELIM;
742
            }
743

    
744
            int Startdate = Integer.parseInt(split[0]);
745

    
746
            split = enddate.split("-");
747

    
748
            if (split == null || split.length == 0) {
749
                return ENCLOSING + "0" + ENCLOSING + DELIM;
750
            }
751

    
752
            int Enddate = Integer.parseInt(split[0]);
753

    
754
            int diff = Enddate - Startdate;
755

    
756
            return ENCLOSING + diff + ENCLOSING + DELIM;
757

    
758
        }
759

    
760
        return ENCLOSING + "0" + ENCLOSING + DELIM;
761
    }
762

    
763
    private static String getYearInt(String data, String DELIM, String ENCLOSING) {
764
        if (data == null || data.isEmpty() || data.equals("-1")) {
765
            return ENCLOSING + "0" + ENCLOSING + DELIM;
766
        }
767

    
768
        String[] split = data.split("-");
769

    
770
        if (split == null || split.length == 0) {
771
            return ENCLOSING + "0" + ENCLOSING + DELIM;
772
        }
773

    
774
        String year = split[0];
775

    
776
        year = cleanNumber(year);
777

    
778
        if (year == null || year.isEmpty()) year = "0";
779

    
780
        return ENCLOSING + year + ENCLOSING + DELIM;
781

    
782
    }
783

    
784
    private static String cleanNumber(String number) {
785
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
786
        return number;
787
    }
788

    
789
    private static String getLatLongField(String data, String DELIM, String ENCLOSING) {
790

    
791
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
792

    
793
        return ENCLOSING + data.replaceAll("[^-0-9.]+", "")  + ENCLOSING + DELIM;
794

    
795
    }
796

    
797
    private static String getStringField(String data, String DELIM, String ENCLOSING) {
798

    
799
        if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
800

    
801
        return ENCLOSING + clean(data, DELIM, ENCLOSING) + ENCLOSING + DELIM;
802

    
803
    }
804

    
805
    private static String getStringDateField(String data, String DELIM, String ENCLOSING) {
806
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
807
            return ENCLOSING + "0" + ENCLOSING + DELIM;
808
        } else {
809
            data = data.replace(DELIM, " ");
810
            data = data.replace(ENCLOSING, " ");
811
            data = data.replaceAll("\\r\\n|\\r|\\n", "");
812
            return ENCLOSING + data + ENCLOSING + DELIM;
813
        }
814
    }
815

    
816
    private static String getNumericField(String data, String DELIM, String ENCLOSING) {
817
        if (data == null || data.isEmpty() || data.equals("")) {
818
            return ENCLOSING + "0" + ENCLOSING + DELIM;
819
        } else {
820
            return ENCLOSING + data + ENCLOSING + DELIM;
821
        }
822
    }
823

    
824
    public static String getId(Oaf oaf, String DELIM, String ENCLOSING) {
825
        switch (oaf.getKind()) {
826
            case entity:
827
                return cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING);
828
            case relation:
829

    
830
                return cleanId(oaf.getRel().getSource(), DELIM, ENCLOSING);
831

    
832
        }
833
        return null;
834

    
835
    }
836

    
837
    public static String getId(OafRel relOaf, String DELIM, String ENCLOSING) {
838
        return cleanId(relOaf.getSource(), DELIM, ENCLOSING);
839
    }
840

    
841
    public static String clean(String value, String DELIM, String ENCLOSING) {
842
        if (value != null) {
843

    
844
            value = value.replaceAll("[\"\\r\\\\;]", "");
845
            value = value.replace(DELIM, " ");
846
            value = value.replace(ENCLOSING, " ");
847
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
848
            //value = value.replace("\"", "");
849
            //value = value.replace("'", "");
850
            //value = value.replace("«", " ");
851
            //value = value.replace("»", " ");
852

    
853
            //value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " ");
854

    
855
            return value;
856

    
857
        } else {
858
            return "";
859

    
860
        }
861

    
862
    }
863

    
864
    public static String cleanId(String value, String DELIM, String ENCLOSING) {
865
        if (value != null) {
866
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
867

    
868
            // to datacite____:: )
869
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
870
            value = value.replaceFirst(".*\\|", "");
871
            value = value.replace("\n", "");
872
            value = value.replace(ENCLOSING, "");
873
            value = value.replace(DELIM, "");
874
            value = value.replace("\"", "");
875
            value = value.replace("«", " ");
876
            value = value.replace("»", " ");
877
        }
878

    
879
        return ENCLOSING + value + ENCLOSING;
880

    
881
    }
882

    
883

    
884
    public static String cleanUrl(String value, String DELIM, String ENCLOSING) {
885
        value = value.replace(DELIM, " ");
886
        value = value.replace(ENCLOSING, " ");
887
        value = value.replace(" ", "");
888
        value = value.replace("\n", "");
889
        return value;
890
    }
891

    
892

    
893
    public static long DATEDIFF(String startDate, String endDate) {
894
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
895
        long days = 0l;
896
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
897
        // <startdate>2011-09-01</startdate>
898
        // <enddate>2015-08-31</enddate>
899
        Date dateIni = null;
900
        Date dateFin = null;
901

    
902
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
903
            return 0;
904
        }
905
        try {
906
            dateIni = (Date) format.parse(startDate);
907
            dateFin = (Date) format.parse(endDate);
908
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
909
        } catch (Exception e) {
910

    
911
            return 0;
912
        }
913

    
914
        return days;
915
    }
916

    
917

    
918
}
(3-3/3)