Project

General

Profile

« Previous | Next » 

Revision 35787

Added by Eri Katsari over 9 years ago

creating branch for meausing oaimpact

View differences:

modules/dnet-openaire-stats/branches/oaimpact/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/Serializer.java
1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

  
3
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
4
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
5
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
6
import eu.dnetlib.data.proto.FieldTypeProtos;
7
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
8
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
9
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
10
import eu.dnetlib.data.proto.OafProtos.Oaf;
11
import eu.dnetlib.data.proto.OafProtos.OafEntity;
12
import eu.dnetlib.data.proto.OafProtos.OafRel;
13
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
14
import eu.dnetlib.data.proto.PersonProtos;
15
import eu.dnetlib.data.proto.ProjectProtos.Project;
16
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
17
import eu.dnetlib.data.proto.ResultProtos.Result;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import org.apache.log4j.Logger;
20

  
21
import java.text.SimpleDateFormat;
22
import java.util.ArrayList;
23
import java.util.Date;
24
import java.util.HashMap;
25
import java.util.List;
26

  
27
/**
28
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
29
 *         for sqoop
30
 */
31
public class Serializer {
32

  
33
    private static String DELIM;
34
    private Logger log = Logger.getLogger(this.getClass());
35
    private String NULL_STRING;
36
    private String NULL_NUM;
37
    //TODO no longer used
38
    private static String ENCLOSED;
39

  
40
    public Serializer() {
41
    }
42

  
43
    ;
44

  
45
    public String serialize(Oaf oaf) {
46

  
47
        switch (oaf.getKind()) {
48
            case entity:
49
                OafEntity valueEntity = oaf.getEntity();
50

  
51
                switch (valueEntity.getType()) {
52
                    case datasource:
53

  
54
                        return buildDatasource(valueEntity);
55

  
56
                    case organization:
57

  
58
                        return buildOrganization(valueEntity);
59

  
60
                    case project:
61

  
62
                        return buildProject(valueEntity);
63
                    case result:
64

  
65
                        return buildResult(valueEntity);
66
//                    case person:
67
//
68
//                        return buildPerson(valueEntity);
69
                    default:
70
                        log.error("wrong type");
71
                        break;
72
                }
73
                break;
74
            case relation:
75
                OafRel valueRel = oaf.getRel();
76

  
77
                return buildRel(valueRel);
78

  
79
        }
80

  
81
        return null;
82

  
83
    }
84

  
85
    public String serialize(OafRel oaf) {
86

  
87
        switch (oaf.getRelType()) {
88
            case resultProject:
89

  
90
                return getResultProject(oaf);
91

  
92
            default:
93
                return buildRel(oaf);
94
        }
95
    }
96

  
97
    private String buildRel(OafRel Rel) {
98

  
99
        return getStringField(Rel.getTarget());
100
    }
101

  
102
    public HashMap<String, List<String>> extractRelations(Oaf oaf) {
103
        OafEntity valueEntity = oaf.getEntity();
104
        switch (valueEntity.getType()) {
105
            case datasource:
106

  
107
                return getDatasourceLanguages(valueEntity);
108

  
109
            case result:
110
                HashMap<String, List<String>> relations = new HashMap<String, List<String>>();
111
                relations.putAll(getResultLanguages(valueEntity));
112
                relations.putAll(getResultTopics(valueEntity));
113
                relations.putAll(getResultClassifications(valueEntity));
114
                relations.putAll(getResultDatasources(valueEntity));
115
                relations.putAll(getResultConcepts(valueEntity));
116
                return relations;
117
            default:
118

  
119
                return null;
120
        }
121

  
122
    }
123

  
124
    private String getResultProject(OafRel oaf) {
125
        String buff = new String();
126
        String result = oaf.getTarget();
127

  
128

  
129
        buff += getStringField(result);
130
        // TODO is declared as int!!!
131
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
132
        if (diff < 0) {
133
            diff = 0;
134
        }
135
        buff += getNumericField(String.valueOf(diff));
136

  
137

  
138
        return buff;
139
    }
140

  
141

  
142
    private HashMap<String, List<String>> getDatasourceLanguages(OafEntity valueEntity) {
143
        HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
144
        List<String> buffs = new ArrayList<String>();
145

  
146

  
147
        Datasource d = valueEntity.getDatasource();
148

  
149
        Metadata metadata = d.getMetadata();
150

  
151
        for (StringField lang : metadata.getOdlanguagesList()) {
152

  
153
            buffs.add(getStringField(lang.getValue()));
154
        }
155
        rels.put("datasourceLanguage", buffs);
156
        return rels;
157
    }
158

  
159
    private HashMap<String, List<String>> getResultLanguages(OafEntity valueEntity) {
160
        HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
161
        List<String> buffs = new ArrayList<String>();
162
        Result d = valueEntity.getResult();
163

  
164
        eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = d.getMetadata();
165

  
166
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
167

  
168
            buffs.add(getStringField(metadata.getLanguage().getClassname()));
169
        }
170
        rels.put("resultLanguage", buffs);
171
        return rels;
172

  
173
    }
174

  
175
    private HashMap<String, List<String>> getResultClassifications(OafEntity valueEntity) {
176

  
177
        HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
178
        List<String> buffs = new ArrayList<String>();
179
        Result result = valueEntity.getResult();
180

  
181
        for (Instance instance : (result.getInstanceList())) {
182
            String classification = instance.getInstancetype().getClassname();
183
            if (classification != null && !classification.isEmpty()) {
184
                buffs.add(getStringField(instance.getInstancetype().getClassname()));
185
                // TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT
186
                break;
187
            }
188
        }
189
        rels.put("resultClassification", buffs);
190
        return rels;
191

  
192
    }
193

  
194
    private HashMap<String, List<String>> getResultConcepts(OafEntity valueEntity) {
195
        HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
196
        List<String> buffs = new ArrayList<String>();
197

  
198
        Result result = valueEntity.getResult();
199

  
200
        for (eu.dnetlib.data.proto.ResultProtos.Result.Context context : result.getMetadata().getContextList()) {
201

  
202
            buffs.add(getStringField(context.getId()));
203
        }
204
        rels.put("resultConcept", buffs);
205
        return rels;
206

  
207
    }
208

  
209
    private HashMap<String, List<String>> getResultDatasources(OafEntity valueEntity) {
210

  
211
        HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
212
        List<String> buffs = new ArrayList<String>();
213
        Result result = valueEntity.getResult();
214

  
215
//TODO hosted by
216
        for (Instance instance : (result.getInstanceList())) {
217

  
218

  
219
            String hostedBy = instance.getHostedby().getKey();
220
            if (hostedBy != null && !hostedBy.isEmpty()) {
221
                buffs.add((getStringField(hostedBy)));
222
            }
223
        }
224

  
225
//TODO  collected froms
226
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
227

  
228
            String collectedFrom = collectedFromValue.getKey();
229
            if (collectedFrom != null && !collectedFrom.isEmpty())
230
                buffs.add((getStringField(collectedFrom)));
231

  
232
        }
233
        rels.put("resultDatasource", buffs);
234
        return rels;
235

  
236
    }
237

  
238
    public static boolean isNumeric(String str) {
239

  
240
        str = str.replaceAll("[^A-Za-z0-9 ]", "");
241
        str = str.replaceAll(" ", "");
242
        return str.matches("-?\\d+(\\.\\d+)?"); // match a number with optional
243
        // '-' and decimal.
244
    }
245

  
246
    // TODO there are topics with "null" as value -> replace them
247
    private boolean isValidTopic(String t) {
248

  
249
        if (t == null || t.isEmpty()) {
250
            return false;
251
        }
252

  
253
        if (t.equals("") || t.equals(" ")) {
254
            return false;
255
        }
256
        if (t.equals("null") || t.equals("Null") || t.equals("NULL")) {
257
            return false;
258
        }
259

  
260
        if (t.equals(ENCLOSED + ENCLOSED + DELIM) || t.equals(ENCLOSED + NULL_STRING + ENCLOSED + DELIM)) {
261
            return false;
262
        }
263
        // skip dedups
264
        if (t.contains("ddc:")) {
265
            return false;
266
        }
267
        return true;
268
    }
269

  
270
    private HashMap<String, List<String>> getResultTopics(OafEntity valueEntity) {
271
        HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
272
        List<String> buffs = new ArrayList<String>();
273
        Result d = valueEntity.getResult();
274

  
275
        eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = d.getMetadata();
276

  
277
        List<StructuredProperty> Topics = metadata.getSubjectList();
278
        String buff = new String();
279
        for (StructuredProperty topic : Topics) {
280
            // TODOs
281
            if (isValidTopic(topic.getValue())) {
282
                if (!isNumeric(topic.getValue())) {
283
                    String t = getStringField(topic.getValue());
284
                    if (isValidTopic(t)) {
285
                        buff += t + " ";
286

  
287
                    }
288

  
289
                }
290
            }
291

  
292
        }
293
        if (!buff.isEmpty()) {
294
            buff = getStringField(buff);
295
            buffs.add(buff);
296
        }
297
        rels.put("resultTopic", buffs);
298

  
299
        return rels;
300

  
301
    }
302

  
303
    private String buildDatasource(OafEntity data) {
304

  
305
        String buff = new String();
306

  
307
        Datasource d = data.getDatasource();
308

  
309
        Metadata metadata = d.getMetadata();
310
        String full_id = getStringField(data.getId());
311

  
312
        buff += full_id;
313
        buff += full_id;
314
        buff += full_id;
315
        buff += full_id;
316

  
317

  
318
        // name
319
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
320
            buff += getStringField("Unknown Repository");
321
        } else {
322
            buff += getStringField(metadata.getOfficialname().getValue());
323
        }
324
        // type
325

  
326
        if (metadata.hasDatasourcetype())
327

  
328
        {
329
            buff += getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""));
330

  
331
        } else {
332
            buff += getStringField(null);
333
        }
334

  
335
        // compatibility,
336
        buff += getStringField(metadata.getOpenairecompatibility().getClassname());
337

  
338
        // latitude
339
        buff += getStringField(metadata.getLatitude().getValue());
340

  
341
        // longtitude
342
        buff += getStringField(metadata.getLongitude().getValue());
343

  
344
        // dateofvalidation,
345
        buff += getStringField(metadata.getDateofvalidation().getValue());
346

  
347
        // yearofvalidation,
348

  
349
        // parse year of validation
350
        buff += getYearInt(metadata.getDateofvalidation().getValue());
351

  
352
        // number??
353

  
354
        buff += getStringField("1");
355

  
356
        return buff;
357
    }
358

  
359
    private String buildOrganization(OafEntity data) {
360

  
361
        String buff = new String();
362

  
363
        Organization organization = data.getOrganization();
364
        eu.dnetlib.data.proto.OrganizationProtos.Organization.Metadata metadata = organization.getMetadata();
365

  
366
        // `organization_datasources`,
367
        String full_id = getStringField(data.getId());
368
        buff += full_id;
369
        // organization_projects
370
        buff += full_id;
371
        // `name`,
372
        buff += getStringField(metadata.getLegalname().getValue());
373
        // `country`,
374

  
375

  
376
        buff += getStringField(metadata.getCountry().getClassname());
377

  
378

  
379
        // `number`,
380

  
381
        buff += getStringField("1");
382
        return buff;
383

  
384
    }
385

  
386
    private String buildResult(OafEntity data) {
387

  
388
        String buff = new String();
389

  
390
        Result result = data.getResult();
391
        eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = result.getMetadata();
392

  
393
        // result_topics/
394
        String full_id = getStringField(data.getId());
395

  
396
        buff += full_id;
397

  
398
        // result_languages
399
        buff += full_id;
400

  
401
        // `result_projects`,
402
        buff += full_id;
403

  
404
        // `result_datasources`,
405
        buff += full_id;
406

  
407
        // `result_classifications`,
408
        buff += full_id;
409

  
410
        // / `result_infrastructures`,
411
        buff += full_id;
412

  
413
        // `result_claims`,
414
        buff += full_id;
415

  
416
        // `result_results`,
417
        buff += full_id;
418

  
419
        //TODO pubtitle
420

  
421
        String titleString = NULL_STRING;
422

  
423
        for (StructuredProperty title : metadata.getTitleList()) {
424
            titleString = title.getValue();
425
            break;
426
        }
427

  
428
        buff += getStringField(titleString);
429

  
430
        // TODO format
431
        String formatString = NULL_STRING;
432
        for (StringField format : metadata.getFormatList()) {
433
            formatString = format.getValue();
434
            break;
435
        }
436

  
437
        buff += getStringField(formatString);
438
       //TODO publisher
439

  
440
        buff += getStringField(metadata.getPublisher().getValue());
441
        //TODO journal
442

  
443
        buff += getStringField(metadata.getJournal().getName());
444

  
445
        // year
446
        buff += getYearInt(metadata.getDateofacceptance().getValue());
447

  
448
        // date CHANGED THIS TO DATE FORMAT
449
        buff += getStringDateField(metadata.getDateofacceptance().getValue());
450

  
451
        // access_mode,
452
        buff += getStringField(getAccessMode(result));
453

  
454
        // bestlicense
455

  
456
        buff += getStringField(getBestLicense(result));
457
        // type
458
        buff += getStringField(metadata.getResulttype().getClassname());
459
        // embargo_end_date
460
        buff += getStringField(metadata.getEmbargoenddate().getValue());
461

  
462
        // `authors`,
463
        int authors = 0;
464
        String delayed = "no";
465

  
466
        for (OafRel rel : data.getCachedRelList()) {
467

  
468
            if (rel.getRelType().equals(RelType.personResult)) {
469

  
470
                authors++;
471
            } else if (rel.getRelType().equals(RelType.resultProject))
472
            // TODO remember : in result Project, first id is project, second is
473
            // result.
474
            {
475

  
476
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(), rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
477
                if (Integer.parseInt(daysfromend) > 0) {
478
                    delayed = "yes";
479
                }
480
            }
481
        }
482
        // `delayed`,
483
        buff += getStringField(delayed);
484

  
485
        // log.info("Result " + full_id +"Author count : " + authors );
486
        buff += getNumericField(String.valueOf(authors));
487

  
488
        // number??
489

  
490
        buff += getStringField("1");
491

  
492
        if (isValid(buff, full_id)) {
493
            return buff;
494
        } else {
495
            return null;
496
        }
497

  
498
    }
499

  
500
    //TODO here see if check is ok
501
    private boolean isValid(String buff, String id) {
502
        if (buff.endsWith(ENCLOSED)) {
503
            log.error("Empty Result with  " + id + " with body: \n" + buff);
504
            return false;
505
        }
506
        return true;
507
    }
508

  
509
    private String getBestLicense(Result result) {
510
        Qualifier bestLicense = null;
511
        LicenseComparator lc = new LicenseComparator();
512
        for (Instance instance : (result.getInstanceList())) {
513
            if (lc.compare(bestLicense, instance.getLicence()) > 0) {
514
                bestLicense = instance.getLicence();
515
            }
516
        }
517
        if (bestLicense != null) {
518
            return bestLicense.getClassname();
519
        } else {
520
            return null;
521
        }
522
    }
523

  
524
    // TODO here iterate over all values
525
    private String getAccessMode(Result result) {
526
        String accessMode = NULL_STRING;
527
        for (Instance instance : (result.getInstanceList())) {
528
            if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
529
                accessMode = instance.getLicence().getClassname();
530
                break;
531
            }
532

  
533
        }
534

  
535
        return accessMode;
536
    }
537

  
538
    private String buildProject(OafEntity data) {
539

  
540
        String buff = new String();
541

  
542
        Project project = data.getProject();
543
        eu.dnetlib.data.proto.ProjectProtos.Project.Metadata metadata = project.getMetadata();
544
        // project_organizations
545

  
546
        String full_id = getStringField(data.getId());
547
        buff += full_id;
548

  
549
        // project_results
550
        buff += full_id;
551
        // `acronym`,
552
        String acronym = metadata.getAcronym().getValue();
553
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
554
            acronym = metadata.getTitle().getValue();
555
        }
556

  
557
        buff += getStringField(acronym);
558

  
559
        //title!
560
        String title = getStringField(metadata.getTitle().getValue());
561
        buff += getStringField(title);
562
        // `funding_lvl0`,
563
        String funding_tree_0 = NULL_STRING;
564
        String funding_tree_1 = NULL_STRING;
565
        String funding_tree_2 = NULL_STRING;
566

  
567
        List<StringField> fundList = metadata.getFundingtreeList();
568

  
569
        if (!fundList.isEmpty()) // `funding_lvl0`,
570
        {
571
            funding_tree_0 = getFundingLevel(fundList.get(0).getValue(), 0);
572

  
573
            funding_tree_1 = getFundingLevel(fundList.get(0).getValue(), 1);
574
            // log.info(funding_tree_1);
575

  
576
            funding_tree_2 = getFundingLevel(fundList.get(0).getValue(), 2);
577
            // log.info(funding_tree_2);
578

  
579
        }
580
        funding_tree_0 = funding_tree_0.replaceAll("\"", "");
581
        buff += getStringField(funding_tree_0);
582
        // `funding_lvl1`,
583

  
584
        funding_tree_1 = funding_tree_1.replaceAll("\"", "");
585

  
586
        if (funding_tree_1.equalsIgnoreCase("SP1")) {
587

  
588
            funding_tree_1 = "SP1-Cooperation";
589

  
590
        } else if (funding_tree_1.equalsIgnoreCase("SP2")) {
591
            funding_tree_1 = "SP2-Ideas";
592
        } else if (funding_tree_1.equalsIgnoreCase("SP3")) {
593
            funding_tree_1 = "SP3-People";
594
        } else if (funding_tree_1.equalsIgnoreCase("SP4")) {
595
            funding_tree_1 = "SP4-Capacities";
596
        } else if (funding_tree_1.equalsIgnoreCase("SP5")) {
597
            funding_tree_1 = "SP5-Euratom";
598
        }
599

  
600
        buff += getStringField(funding_tree_1);
601

  
602
        // TODO checking for level 2 value present in other funding tree
603
        if (funding_tree_2.equals(NULL_STRING)) {
604
            if (fundList.size() > 1) {
605
                funding_tree_2 = getFundingLevel(fundList.get(1).getValue(), 2);
606
            }
607
        }
608

  
609
        funding_tree_2 = funding_tree_2.replaceAll("\"", "");
610

  
611
        // / `funding_lvl2`,
612
        buff += getStringField(funding_tree_2);
613

  
614
        // `sc39`,
615

  
616
        String sc39 = metadata.getEcsc39().getValue().toString();
617
        if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
618
            sc39 = "yes";
619
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
620
            sc39 = "no";
621
        }
622

  
623
        buff += getStringField(sc39);
624

  
625
        // `url`,
626

  
627
        buff += getStringField(metadata.getWebsiteurl().getValue());
628

  
629
        // start_year
630

  
631
        buff += getYearInt(metadata.getStartdate().getValue());
632

  
633
        // end_year
634
        buff += getYearInt(metadata.getEnddate().getValue());
635

  
636
        // duration enddate-startdate
637

  
638
        buff += getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue());
639

  
640
        // haspubs
641
        buff += getStringField("no");
642

  
643
        // numpubs
644
        buff += getNumericField("0");
645
        // enddate
646
        buff += getNumericField(metadata.getEnddate().getValue());
647
        // startdate
648
        buff += getNumericField(metadata.getStartdate().getValue());
649

  
650
        // `daysforlastpub`,
651
        buff += getNumericField("");
652
        // `delayedpubs`,
653
        buff += getNumericField("");
654
        // `number`
655
        buff += getStringField("1");
656
        return buff;
657

  
658
    }
659

  
660
    //TODO remove for production - not yet ready
661
    private String buildPerson(OafEntity data) {
662

  
663
        String buff = new String();
664

  
665
        PersonProtos.Person person = data.getPerson();
666
        eu.dnetlib.data.proto.PersonProtos.Person.Metadata metadata = person.getMetadata();
667

  
668
        person.getCoauthorsCount();
669

  
670
        // `person_id`,
671
        String full_id = getStringField(data.getId());
672
        buff += full_id;
673
        // person_result
674
        buff += full_id;
675

  
676
        // `fullname`,
677
        buff += metadata.getFullname();
678

  
679
        // `Nationality`,
680
        buff += metadata.getNationality();
681
        // `Email`,
682
        buff += metadata.getEmail();
683
        // `Phone`,
684
        buff += metadata.getPhone();
685
        // `CoauthorsCount`
686
        buff += person.getCoauthorsCount();
687
        // `number`,
688
        buff += getStringField("1");
689
        return buff;
690

  
691
    }
692

  
693
    private String getFundingLevel(String funding_level, int level) {
694

  
695
        if (funding_level.isEmpty()) {
696
            return NULL_STRING;
697

  
698
        }
699

  
700
        if (!funding_level.contains("funding_level_" + level)) {
701
            return NULL_STRING;
702
        }
703
        String[] split = funding_level.split("funding_level_" + level);
704

  
705
        funding_level = split[1];
706
        split = funding_level.split("name");
707
        split = split[1].split(",");
708

  
709
        funding_level = split[0].replaceAll(".*:\"", "");
710
        funding_level = funding_level.replaceFirst(ENCLOSED, "");
711
        funding_level = funding_level.trim();
712

  
713
        return funding_level;
714
    }
715

  
716
    private String getYearDifferenceInteger(String enddate, String startdate) {
717

  
718
        if (!enddate.isEmpty() && enddate != null && startdate != null && !startdate.isEmpty()) {
719

  
720
            // if (enddate != null && startdate != null&&) {
721

  
722
            String[] split = startdate.split("-");
723

  
724
            int Startdate = Integer.parseInt(split[0]);
725

  
726
            split = enddate.split("-");
727

  
728
            int Enddate = Integer.parseInt(split[0]);
729

  
730
            int diff = Enddate - Startdate;
731
            return ENCLOSED + diff + ENCLOSED + DELIM;
732
        }
733

  
734
        return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
735
    }
736

  
737
    private String getYearInt(String data) {
738
        if (data == null || data.isEmpty() || data.equals("-1")) {
739
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
740
        }
741

  
742
        String[] split = data.split("-");
743

  
744
        if (split != null) {
745

  
746
            String year = split[0];
747
            year = cleanNumber(year);
748

  
749
            return ENCLOSED + year + ENCLOSED + DELIM;
750
        } else {
751
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
752
        }
753

  
754
    }
755

  
756
    private String cleanNumber(String number) {
757
        number = number.replaceAll("[^A-Za-z0-9:,____]", "");
758

  
759
        return number;
760
    }
761

  
762
    private String getStringField(String data) {
763

  
764
        if (data == null || data.isEmpty() || data.equals("")) {
765

  
766
            return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
767
        } else {
768

  
769
            String field = clean(data);
770
            if (field == null) {
771
                return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
772
            } else {
773
                return field + DELIM;
774
            }
775
        }
776
    }
777

  
778
    private String getStringDateField(String data) {
779

  
780
        if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
781

  
782
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
783
        } else {
784

  
785
            String field = clean(data);
786
            if (field == null) {
787
                return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
788
            } else {
789
                return field + DELIM;
790
            }
791
        }
792
    }
793

  
794
    private String getNumericField(String data) {
795
        if (data == null || data.isEmpty() || data.equals("")) {
796
            return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
797
        } else {
798

  
799
            return ENCLOSED + data + ENCLOSED + DELIM;
800
        }
801
    }
802

  
803
    public String getId(Oaf oaf) {
804
        switch (oaf.getKind()) {
805
            case entity:
806

  
807
                return cleanId(oaf.getEntity().getId());
808
            case relation:
809

  
810
                return cleanId(oaf.getRel().getSource());
811

  
812
        }
813
        return null;
814

  
815
    }
816

  
817
    public String getId(OafRel relOaf) {
818
        return cleanId(relOaf.getSource());
819
    }
820

  
821
    public static String clean(String value) {
822
        if (value != null) {
823
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
824
            // to datacite____:: )
825
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
826
            value = value.replaceFirst(".*\\|", "");
827
            value = value.replaceAll(DELIM, "");
828
            value = value.replaceAll(ENCLOSED, "");
829

  
830
            // value = value.replaceAll("[^A-Za-z0-9:,____-;:]", " ");
831
            value = value.trim();
832

  
833
        }
834
        if (value == null) {
835
            return null;
836
        }
837
        return ENCLOSED + value + ENCLOSED;
838

  
839
    }
840

  
841
    public static String cleanId(String value) {
842
        if (value != null) {
843
            // TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
844
            // to datacite____:: )
845
            // AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
846
            value = value.replaceFirst(".*\\|", "");
847
            value = value.replaceAll("\n", "");
848
            value = value.replaceAll(DELIM, "");
849
            value = value.replaceAll(ENCLOSED, "");
850
            value = value.trim();
851

  
852
        }
853
        if (value == null) {
854
            return null;
855
        }
856
        return ENCLOSED + value + ENCLOSED;
857

  
858
    }
859

  
860
    public long DATEDIFF(String startDate, String endDate) {
861
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
862
        long days = 0l;
863
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
864
        // <startdate>2011-09-01</startdate>
865
        // <enddate>2015-08-31</enddate>
866
        Date dateIni = null;
867
        Date dateFin = null;
868
        try {
869
            dateIni = (Date) format.parse(startDate);
870
            dateFin = (Date) format.parse(endDate);
871
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
872
        } catch (Exception e) {
873
            e.printStackTrace();
874
        }
875

  
876
        return days;
877
    }
878

  
879
    public String getDELIM() {
880
        return DELIM;
881
    }
882

  
883
    public void setDELIM(String dELIM) {
884
        DELIM = dELIM;
885
    }
886

  
887
    public String getNULL_STRING() {
888
        return NULL_STRING;
889
    }
890

  
891
    public void setNULL_STRING(String nULL_STRING) {
892
        NULL_STRING = nULL_STRING;
893
    }
894

  
895
    public String getNULL_NUM() {
896
        return NULL_NUM;
897
    }
898

  
899
    public void setNULL_NUM(String nULL_NUM) {
900
        NULL_NUM = nULL_NUM;
901
    }
902

  
903
    public String getENCLOSED() {
904
        return ENCLOSED;
905
    }
906

  
907
    public void setENCLOSED(String eNCLOSED) {
908
        ENCLOSED = eNCLOSED;
909
    }
910

  
911
}
0 912

  
modules/dnet-openaire-stats/branches/oaimpact/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/ContextExporter.java
1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import org.apache.hadoop.conf.Configuration;
7
import org.apache.hadoop.fs.FSDataOutputStream;
8
import org.apache.hadoop.fs.FileSystem;
9
import org.apache.hadoop.fs.Path;
10
import org.apache.log4j.Logger;
11

  
12
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
13
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
14

  
15
import org.apache.cxf.jaxws.JaxWsProxyFactoryBean;
16

  
17
public class ContextExporter {
18
    private ContextTransformer contextTransformer = new ContextTransformer();
19
    private String outputPath;
20
    private Logger log = Logger.getLogger(this.getClass());
21

  
22

  
23
    private ArrayList<String> context = new ArrayList<String>();
24
    private ArrayList<String> category = new ArrayList<String>();
25
    private ArrayList<String> concept = new ArrayList<String>();
26

  
27
    public ContextExporter(String outputPath, String contextMap, boolean readFromUrl) throws Exception {
28
        if (!outputPath.endsWith("/")) {
29
            outputPath += "/";
30
        }
31
        this.outputPath = outputPath;
32
        if (readFromUrl) {
33
            readFromUrl(contextMap);
34
        } else {
35
            readFromBuffer(contextMap);
36
        }
37

  
38
    }
39

  
40
    public void readFromUrl(String url) throws Exception {
41

  
42
        List<String> concepts = getContextResouces(url);
43
        log.info("Returned concept  " + concepts.size()
44
        );
45

  
46
        for (String data : concepts) {
47
            log.info("++++++++++++++ Transforming concept data ");
48
            String res = contextTransformer.transformXSL(data);
49

  
50
            processData(res);
51

  
52

  
53
        }
54
        writeData(this.context, "context");
55
        writeData(this.category, "category");
56
        writeData(this.concept, "concept");
57

  
58

  
59
    }
60

  
61
    private void readFromBuffer(String contextMap) throws Exception {
62

  
63
        if (contextMap == null || contextMap.isEmpty()) {
64
            log.error("Context Resources file is empty.");
65
            throw new Exception("Context Resources file is empty.");
66
        }
67

  
68
        String data = contextTransformer.transformXSL(contextMap);
69

  
70
        log.info(data);
71
        processData(data);
72
    }
73

  
74
    private List<String> getContextResouces(String url) throws ISLookUpException {
75
        ISLookUpService lookUpService;
76

  
77
        JaxWsProxyFactoryBean factory = new JaxWsProxyFactoryBean();
78
        factory.setServiceClass(ISLookUpService.class);
79
        factory.setAddress(url);
80

  
81
        lookUpService = (ISLookUpService) factory.create();
82
//		for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']
83
//		[.//RESOURCE_KIND/@value='ContextDSResources'] return  $x
84
        return lookUpService.quickSearchProfile("//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType'][.//RESOURCE_KIND/@value='ContextDSResources']");
85
    }
86

  
87

  
88
    private void writeData(ArrayList<String> dataList, String listName) throws Exception {
89

  
90
        log.info(listName + "  size " + dataList.size());
91

  
92
        String data = new String();
93
        for (int i = 0; i < dataList.size(); i++) {
94

  
95
            data += dataList.get(i);
96

  
97

  
98
        }
99

  
100

  
101
        data = data.substring(0, data.lastIndexOf("\n"));
102

  
103

  
104
        flushString(data, outputPath + listName);
105

  
106

  
107
    }
108
    private void processData(String data) throws Exception {
109
        try {
110

  
111
            String[] split = data.split("COPY\n");
112

  
113

  
114
            context.add(split[0]);
115

  
116

  
117
            if (split[1].endsWith("\n")) {
118
                //  split[1] = split[1].substring(0, split[1].lastIndexOf("\n"));
119

  
120
            }
121

  
122
            category.add(split[1]);
123

  
124
            concept.add(split[2]);
125

  
126
        } catch (Exception e) {
127
            String msg = " Unable to create file with context, " + "concept and category values in output path " + outputPath + ". Reason: ";
128
            log.error(msg);
129
            throw new Exception(msg, e);
130
        }
131

  
132
    }
133

  
134
    private void flushString(String data, String destination) throws Exception {
135

  
136
        FSDataOutputStream fin = null;
137
        try {
138

  
139

  
140
            log.info("***********************Writing data:***********************\n" + data);
141
            log.info("***********************  data:***********************\n");
142
            FileSystem fs = FileSystem.get(new Configuration());
143
            fin = fs.create(new Path(destination), true);
144

  
145
            fin.write(data.getBytes());
146

  
147
        } catch (Exception e) {
148
            log.error("Failed  to write exported data to a file : ", e);
149
            throw new Exception("Failed  to write exported data to a file : " + e.toString(), e);
150

  
151
        } finally {
152

  
153
            fin.close();
154

  
155
        }
156
    }
157

  
158
    public String getOutputPath() {
159
        return outputPath;
160
    }
161

  
162
    public void setOutputPath(String outputPath) {
163
        this.outputPath = outputPath;
164
    }
165

  
166
}
modules/dnet-openaire-stats/branches/oaimpact/src/test/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/ExportTest.java
1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

  
3
import java.io.InputStream;
4
import java.util.Properties;
5

  
6
import org.apache.hadoop.conf.Configuration;
7
import org.apache.log4j.Logger;
8
import org.junit.Before;
9
import org.junit.Test;
10

  
11
import eu.dnetlib.data.mapreduce.hbase.statsExport.daos.SqlDAO;
12
import eu.dnetlib.data.mapreduce.hbase.statsExport.daos.SqlStore;
13
import eu.dnetlib.data.mapreduce.hbase.statsExport.drivers.DBDriver;
14
import eu.dnetlib.data.mapreduce.hbase.statsExport.drivers.SqoopDriver;
15
import eu.dnetlib.data.mapreduce.hbase.statsExport.drivers.StatsJobDriver;
16

  
17
public class ExportTest {
18

  
19
	private Logger log = Logger.getLogger(this.getClass());
20

  
21
	private SqlDAO statsDao;
22
	private SqlStore statsStore;
23
	private SqoopDriver sqoopDriver;
24
	private Properties props;
25
	private StatsJobDriver statsJobDriver;
26
	private DBDriver dbDriver;
27

  
28
//	@Before
29
	public void init() throws Exception {
30
		InputStream file = ClassLoader.getSystemResourceAsStream("eu/dnetlib/data/mapreduce/hbase/statsExport/StatsProperties");
31
		props = new Properties();
32
		props.load(file);
33
		file.close();
34

  
35
		statsStore = new SqlStore();
36

  
37
		statsStore.setDbDriver(props.getProperty("Stats_db_Driver"));
38
		//
39
		statsStore.setDB_URL(props.getProperty("Stats_db_Url"));
40
		statsStore.setDbUser(props.getProperty("Stats_db_User"));
41
		statsStore.setDbPassword(props.getProperty("Stats_db_Pass"));
42

  
43
		statsDao = new SqlDAO();
44

  
45
		statsDao.setStore(statsStore);
46

  
47
		dbDriver = new DBDriver();
48
		dbDriver.setStatsDao(statsDao);
49

  
50
		sqoopDriver = new SqoopDriver();
51
		sqoopDriver.setDelim(props.getProperty("Stats_delim_Character"));
52
		sqoopDriver.setOutputPath(props.getProperty("Stats_outputPath"));
53

  
54
		sqoopDriver.setConnectionUrl(props.getProperty("Stats_db_Url"));
55
		sqoopDriver.setDbUser(props.getProperty("Stats_db_User"));
56
		sqoopDriver.setDbPass(props.getProperty("Stats_db_Pass"));
57
		sqoopDriver.setSqoopReducersCount(props.getProperty("Stats_sqoop_ReducersCount"));
58
		sqoopDriver.setRecsPerStatement("1");
59
		sqoopDriver.setStatementPerTrans("1");
60
		sqoopDriver.setBatch(false);
61
		sqoopDriver.setVerbose(true);
62

  
63
		sqoopDriver.setConf(new Configuration());
64
		sqoopDriver.setUseHdfsStore(false);
65
	}
66

  
67
//	 @Test
68
	public void parser() {
69

  
70
		String url = "jdbc:postgresql://kiczora.vls.icm.edu.pl:5432/stats";
71
		if (url.endsWith("/")) {
72
			url = url.substring(0, url.lastIndexOf('/'));
73
			System.out.print("url 1 " + url + "\n");
74

  
75
		}
76
		url = url.substring(0, url.lastIndexOf('/') + 1);
77
		System.out.print("url 2" + url);
78

  
79
	}
80

  
81
	public void runStatsExport() throws Exception {
82
		statsJobDriver = new StatsJobDriver();
83
		statsJobDriver.init();
84
		statsJobDriver.run();
85
	}
86

  
87
//	 @Test
88
	public void runPrepareDB() throws Exception {
89
		
90
		 statsDao.setSearchPathDB(props.getProperty("Stats_db_User"));
91
//		 dbDriver.prepareDB(props.getProperty("Stats_db_User"));
92

  
93
	}
94

  
95
	// @Test
96
	public void runSqoopImport() throws Exception {
97
		// dbDriver.prepareDB(props.getProperty("Stats.dbUser"));
98
		sqoopDriver.initSqoopJob();
99
	}
100

  
101
//	@Test
102
	public void test() throws Exception {
103
		// InputStream in =
104
		// ClassLoader.getSystemResourceAsStream("eu/dnetlib/data/mapreduce/hbase/statsExport/"
105
		// + "ContextResourceProfile.xml");
106

  
107
		// byte[] b = new byte[in.available()];
108

  
109
		// in.read(b);
110
		// String data = new String(b);
111
//		 in.close();
112
		// log.info("data" + data);
113

  
114
		// ContextExporter exp= new
115
		// ContextExporter(props.getProperty("Stats_outputPath"),
116
		// props.getProperty("ConhaspubstextResourceXML"), false);
117
		ContextTransformer tr = new ContextTransformer();
118
		log.info(tr.transformXSL(props.getProperty("ContextResourceXML")));
119

  
120
	}
121

  
122
	
123
//	@Test
124
	public void testExport() throws Exception {
125
		  
126
		 ContextExporter exp= new ContextExporter(props.getProperty("Stats_outputPath"),  props.getProperty("isLookupEndpoint"), true);
127
		 
128
	}
129

  
130
	
131
//	@Test
132
	public void executeExtraInserts() throws Exception {
133
//		statsDao.buildViews();
134
		statsDao.executeExtraInserts();
135

  
136
	}
137

  
138
	 
139
//	@Test
140
	public void buildIndexesDb() throws Exception {
141

  
142
		statsDao.executeExtraInserts();
143

  
144
	}
145

  
146
//	@Test
147
	public void finalizeDb() throws Exception {
148

  
149
		dbDriver.finalizedDB();
150

  
151
	}
152

  
153
//	@Test
154
	public void clean() throws Exception {
155
 
156
log.info(cleanId("od_______133::000d16306f933e487892fdad65e1fc59"));
157
	}
158
	public   String cleanId(String value) {
159
		if (value != null) {
160
			// TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
161
			// to datacite____:: )
162
			// AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
163
			value = value.replaceFirst(".*\\|", "");
164
			value = value.replaceAll("\n", "");
165
			value = value.replaceAll("#", "");
166
			value = value.replaceAll("!", "");
167
			value = value.trim();
168

  
169
		}
170
		if (value == null) {
171
			return null;
172
		}
173
		return     value   ;
174

  
175
	}
176

  
177
}
modules/dnet-openaire-stats/branches/oaimpact/pom.xml
1
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet-hadoop-parent</artifactId>
6
		<version>1.0.0-SNAPSHOT</version>
7
		<relativePath></relativePath>
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-openaire-stats</artifactId>
12
	<version>0.0.1-SNAPSHOT</version>
13
	<build>
14
		<plugins>
15
			<!-- <plugin> -->
16
			<!-- <groupId>org.codehaus.mojo</groupId> -->
17
			<!-- <artifactId>exec-maven-plugin</artifactId> -->
18
			<!-- <version>1.2.1</version> -->
19
			<!-- <configuration> -->
20
			<!-- <mainClass>eu.dnetlib.data.mapreduce.hbase.statsExport.drivers.SqoopDriver</mainClass> -->
21
			<!-- </configuration> -->
22
			<!-- </plugin> -->
23
			<!-- <plugin> -->
24
			<!-- <groupId>org.apache.maven.plugins</groupId> -->
25
			<!-- <artifactId>maven-shade-plugin</artifactId> -->
26
			<!-- <version>2.1</version> -->
27
			<!-- <executions> -->
28
			<!-- <execution> -->
29
			<!-- <phase>package</phase> -->
30
			<!-- <goals> -->
31
			<!-- <goal>shade</goal> -->
32
			<!-- </goals> -->
33
			<!-- <configuration> -->
34
			<!-- <transformers> -->
35
			<!-- <transformer -->
36
			<!-- implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> -->
37
			<!-- </transformer> -->
38
			<!-- </transformers> -->
39
			<!-- </configuration> -->
40
			<!-- </execution> -->
41
			<!-- </executions> </plugin> -->
42
		</plugins>
43

  
44
	</build>
45
	<dependencies>
46
		<dependency>
47
			<groupId>javax.xml</groupId>
48
			<artifactId>jaxp-api</artifactId>
49
			<version>1.4.2</version>
50
		</dependency>
51
		<dependency>
52
			<groupId>net.sf.opencsv</groupId>
53
			<artifactId>opencsv</artifactId>
54
			<version>2.3</version>
55
		</dependency>
56
<dependency>
57
    <groupId>org.apache.poi</groupId>
58
    <artifactId>poi</artifactId>
59
    <version>3.8</version>
60
</dependency>
61

  
62
		<dependency>
63
			<groupId>jdk.tools</groupId>
64
			<artifactId>jdk.tools</artifactId>
65
			<version>1.7.0_05</version>
66
			<scope>system</scope>
67
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
68
		</dependency>
69
		<dependency>
70
			<groupId>org.apache.logging.log4j</groupId>
71
			<artifactId>log4j-api</artifactId>
72
			<version>2.0-rc2</version>
73
		</dependency>
74
		<dependency>
75
			<groupId>org.apache.logging.log4j</groupId>
76
			<artifactId>log4j-core</artifactId>
77
			<version>2.0-rc2</version>
78
		</dependency>
79
		<dependency>
80
			<groupId>log4j</groupId>
81
			<artifactId>log4j</artifactId>
82
			<version>1.2.17</version>
83
		</dependency>
84
		<dependency>
85
			<groupId>org.slf4j</groupId>
86
			<artifactId>slf4j-api</artifactId>
87
			<version>1.7.6</version>
88
		</dependency>
89
		<dependency>
90
			<groupId>org.slf4j</groupId>
91
			<artifactId>slf4j-log4j12</artifactId>
92
			<version>1.7.6</version>
93
		</dependency>
94
		<dependency>
95
			<groupId>com.google.guava</groupId>
96
			<artifactId>guava-collections</artifactId>
97
			<version>r03</version>
98
		</dependency>
99

  
100
		<dependency>
101
			<groupId>eu.dnetlib</groupId>
102
			<artifactId>dnet-mapreduce-jobs</artifactId>
103
			<version>0.0.6.2-SNAPSHOT</version>
104
		</dependency>
105
        <dependency>
106
            <groupId>eu.dnetlib</groupId>
107
            <artifactId>dnet-openaire-data-protos</artifactId>
108
            <version>[3.0.0,4.0.0)</version>
109
        </dependency>
110
		<dependency>
111
			<groupId>org.apache.httpcomponents</groupId>
112
			<artifactId>httpclient</artifactId>
113
			<version>4.0-alpha4</version>
114
		</dependency>
115
		<dependency>
116
			<groupId>commons-httpclient</groupId>
117
			<artifactId>commons-httpclient</artifactId>
118
			<version>3.1</version>
119
		</dependency>
120
		<dependency>
121
			<groupId>junit</groupId>
122
			<artifactId>junit</artifactId>
123
			<version>4.8.2</version>
124
		</dependency>
125
		<dependency>
126
			<groupId>org.apache.sqoop</groupId>
127
			<artifactId>sqoop</artifactId>
128
			<version>1.4.4</version>
129
		</dependency>
130
		<dependency>
131
			<groupId>postgresql</groupId>
132
			<artifactId>postgresql</artifactId>
133
			<version>9.1-901.jdbc4</version>
134
		</dependency>
135
		<dependency>
136
			<groupId>eu.dnetlib</groupId>
137
			<artifactId>cnr-rmi-api</artifactId>
138
			<version>2.0.0-SNAPSHOT</version>
139
		</dependency>
140
		<dependency>
141
			<groupId>org.apache.cxf</groupId>
142
			<artifactId>cxf-rt-frontend-jaxws</artifactId>
143
			<version>2.7.8</version>
144
		</dependency>
145

  
146
	</dependencies>
147
	<repositories>
148
		<repository>
149
			<id>cloudera</id>
150
			<name>Cloudera Repository</name>
151
			<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
152
			<releases>
153
				<enabled>true</enabled>
154
			</releases>
155
			<snapshots>
156
				<enabled>false</enabled>
157
			</snapshots>
158
		</repository>
159
	</repositories>
160

  
161
</project>
0 162

  
modules/dnet-openaire-stats/branches/oaimpact/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/HdfsWriter.java
1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

  
3
import java.io.BufferedWriter;
4
import java.io.OutputStreamWriter;
5
import java.text.SimpleDateFormat;
6
import java.util.ArrayList;
7
import java.util.Date;
8
import java.util.List;
9

  
10
import org.apache.hadoop.conf.Configuration;
11
import org.apache.hadoop.fs.FileSystem;
12
import org.apache.hadoop.fs.Path;
13
import org.apache.log4j.Logger;
14

  
15
public class HdfsWriter {
16

  
17

  
18
    public static void write(String data, String filename) throws Exception {
19

  
20
        Configuration conf = new Configuration();
21

  
22
        FileSystem hdfs = FileSystem.get(conf);
23

  
24
        try {
25

  
26

  
27
            Path exportPath = new Path(hdfs.getUri() + filename);
28
            BufferedWriter br = new BufferedWriter(new OutputStreamWriter(hdfs.create(exportPath, false)));
29
            // TO append data to a file, use fs.append(Path f)
30

  
31
            br.write(data);
32
            br.close();
33

  
34
        } catch (Exception e) {
35

  
36

  
37
            throw new Exception("Error while writing file ", e);
38
        }
39

  
40
    }
41

  
42

  
43
}
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff