Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2

    
3
import java.util.ArrayList;
4
import java.util.HashMap;
5
import java.util.List;
6

    
7
import org.apache.log4j.Logger;
8

    
9
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
10
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
11
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
12
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
13
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
14
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
15
import eu.dnetlib.data.proto.OafProtos.Oaf;
16
import eu.dnetlib.data.proto.OafProtos.OafEntity;
17
import eu.dnetlib.data.proto.OafProtos.OafRel;
18
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
19
import eu.dnetlib.data.proto.ProjectProtos.Project;
20
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
21
import eu.dnetlib.data.proto.ResultProtos.Result;
22
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
23

    
24
/**
25
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
26
 *         for sqoop
27
 * 
28
 */
29
public class Serializer {
30

    
31
	private String DELIM;
32
	private Logger log = Logger.getLogger(this.getClass());
33
	private String NULL_STRING;
34
	private String NULL_NUM;
35
	private static String ENCLOSED;
36

    
37
	public String serialize(Oaf oaf) {
38

    
39
		switch (oaf.getKind()) {
40
		case entity:
41
			OafEntity valueEntity = oaf.getEntity();
42

    
43
			switch (valueEntity.getType()) {
44
			case datasource:
45

    
46
				return buildDatasource(valueEntity);
47

    
48
			case organization:
49

    
50
				return buildOrganization(valueEntity);
51

    
52
			case project:
53

    
54
				return buildProject(valueEntity);
55
			case result:
56

    
57
				return buildResult(valueEntity);
58
			default:
59
				log.error("wrong type");
60
				break;
61
			}
62
			break;
63
		case relation:
64
			OafRel valueRel = oaf.getRel();
65
			return buildRel(valueRel);
66

    
67
		}
68

    
69
		return null;
70

    
71
	}
72

    
73
	public String serialize(OafRel oaf) {
74

    
75
		switch (oaf.getRelType()) {
76
		case resultProject:
77

    
78
			return buildresultProject(oaf);
79

    
80
		default:
81
			return buildRel(oaf);
82
		}
83
	}
84

    
85
	private String buildRel(OafRel Rel) {
86

    
87
		return getStringField(Rel.getTarget());
88
	}
89

    
90
	public HashMap<String, List<String>> extractRelations(Oaf oaf) {
91
		OafEntity valueEntity = oaf.getEntity();
92
		switch (valueEntity.getType()) {
93
		case datasource:
94

    
95
			return getDatasourceLanguages(valueEntity);
96

    
97
		case result:
98
			HashMap<String, List<String>> relations = new HashMap<String, List<String>>();
99

    
100
//			relations.putAll(getResultLanguages(valueEntity));
101
//			relations.putAll(getResultTopics(valueEntity));
102
			relations.putAll(getResultClassifications(valueEntity));
103
//			relations.putAll(getResultDatasources(valueEntity));
104
//			relations.putAll(getResultConcepts(valueEntity));
105
			return relations;
106
		default:
107

    
108
			return null;
109
		}
110

    
111
	}
112

    
113
	private String buildresultProject(OafRel oaf) {
114
		String buff = new String();
115
		buff += getStringField(oaf.getTarget());
116
		// TODO is declared as int!!!
117
		buff += getYearDifferenceInteger(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
118

    
119
		return buff;
120
	}
121

    
122
	private HashMap<String, List<String>> getDatasourceLanguages(OafEntity valueEntity) {
123
		HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
124
		List<String> buffs = new ArrayList<String>();
125

    
126
		Datasource d = valueEntity.getDatasource();
127

    
128
		Metadata metadata = d.getMetadata();
129

    
130
		for (StringField lang : metadata.getOdlanguagesList()) {
131

    
132
			buffs.add(getStringField(lang.getValue()));
133
		}
134
		rels.put("datasourceLanguage", buffs);
135
		return rels;
136
	}
137

    
138
	private HashMap<String, List<String>> getResultLanguages(OafEntity valueEntity) {
139
		HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
140
		List<String> buffs = new ArrayList<String>();
141
		Result d = valueEntity.getResult();
142

    
143
		eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = d.getMetadata();
144

    
145
		if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
146

    
147
			buffs.add(getStringField(metadata.getLanguage().getClassname()));
148
		}
149
		rels.put("resultLanguage", buffs);
150
		return rels;
151

    
152
	}
153

    
154
	private HashMap<String, List<String>> getResultClassifications(OafEntity valueEntity) {
155

    
156
		HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
157
		List<String> buffs = new ArrayList<String>();
158
		Result result = valueEntity.getResult();
159

    
160
		for (Instance instance : (result.getInstanceList())) {
161

    
162
			buffs.add(getStringField(instance.getInstancetype().getClassname()));
163
		}
164
		rels.put("resultClassification", buffs);
165
		return rels;
166

    
167
	}
168

    
169
	private HashMap<String, List<String>> getResultConcepts(OafEntity valueEntity) {
170
		HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
171
		List<String> buffs = new ArrayList<String>();
172

    
173
		Result result = valueEntity.getResult();
174

    
175
		for (eu.dnetlib.data.proto.ResultProtos.Result.Context context : result.getMetadata().getContextList()) {
176

    
177
			buffs.add(getStringField(context.getId()));
178

    
179
		}
180
		rels.put("resultConcept", buffs);
181
		return rels;
182

    
183
	}
184

    
185
	private HashMap<String, List<String>> getResultDatasources(OafEntity valueEntity) {
186

    
187
		HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
188
		List<String> buffs = new ArrayList<String>();
189
		Result result = valueEntity.getResult();
190

    
191
		for (Instance instance : (result.getInstanceList())) {
192
			String hostedBy = null;
193

    
194
			hostedBy = instance.getHostedby().getKey();
195

    
196
			buffs.add((getStringField(hostedBy)));
197
		}
198
		rels.put("resultDatasource", buffs);
199
		return rels;
200

    
201
	}
202

    
203
	private HashMap<String, List<String>> getResultTopics(OafEntity valueEntity) {
204
		HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
205
		List<String> buffs = new ArrayList<String>();
206
		Result d = valueEntity.getResult();
207

    
208
		eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = d.getMetadata();
209
		List<StructuredProperty> Topics = metadata.getSubjectList();
210
		for (StructuredProperty topic : Topics) {
211
			// TODOs
212
			String buff = new String();
213
			if (topic.getValue() != null && !topic.getValue().isEmpty()) {
214
				buff +=  topic.getValue() ;
215
				buffs.add(getStringField(buff));
216
			}
217
		}
218
		rels.put("resultTopic", buffs);
219

    
220
		return rels;
221

    
222
	}
223

    
224
	private String buildDatasource(OafEntity data) {
225

    
226
		String buff = new String();
227

    
228
		Datasource d = data.getDatasource();
229

    
230
		Metadata metadata = d.getMetadata();
231
		String full_id = getStringField(data.getId());
232

    
233
		buff += full_id;
234
		buff += full_id;
235
		buff += full_id;
236
		buff += full_id;
237

    
238
		// TODO move this here???
239
		// UPDATE "shadow".datasource SET name='Other' where name='Unknown
240
		// Repository';
241

    
242
		// name
243
		if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
244
			buff += getStringField("Unknown Repository");
245
		} else {
246
			buff += getStringField(metadata.getOfficialname().getValue());
247
		}
248
		// type
249

    
250
		if (metadata.hasDatasourcetype())
251

    
252
		{
253
			buff += getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""));
254

    
255
		} else {
256
			buff += getStringField("");
257
		}
258

    
259
		// compatibility,
260
		buff += getStringField(metadata.getOpenairecompatibility().getClassname());
261

    
262
		// latitude
263
		buff += getStringField(metadata.getLatitude().getValue());
264

    
265
		// longtitude
266
		buff += getStringField(metadata.getLongitude().getValue());
267

    
268
		// dateofvalidation,
269
		buff += getStringField(metadata.getDateofvalidation().getValue());
270

    
271
		// yearofvalidation,
272

    
273
		// parse year of validation
274
		buff += getYearInt(metadata.getDateofvalidation().getValue());
275

    
276
		// number??
277

    
278
		buff += getStringField("1");
279

    
280
		return buff;
281
	}
282

    
283
	private String buildOrganization(OafEntity data) {
284

    
285
		String buff = new String();
286

    
287
		Organization organization = data.getOrganization();
288
		eu.dnetlib.data.proto.OrganizationProtos.Organization.Metadata metadata = organization.getMetadata();
289

    
290
		// `organization_datasources`,
291
		String full_id = getStringField(data.getId());
292

    
293
		buff += full_id;
294
		// organization_projects
295
		buff += full_id;
296
		// `name`,
297
		buff += getStringField(metadata.getLegalname().getValue());
298
		// `country`,
299

    
300
		if (metadata.getCountry().getClassname().equals("UNITED KINGDOM"))
301

    
302
		{
303
			buff += buff += getStringField("United Kingdom");
304
		} else if (metadata.getCountry().getClassname().equals("GREECE")) {
305
			buff += getStringField("Greece");
306
		} else
307

    
308
		{
309
			buff += getStringField(metadata.getCountry().getClassname());
310
		}
311

    
312
		// `number`,
313

    
314
		buff += getStringField("1");
315
		return buff;
316

    
317
	}
318

    
319
	private String buildResult(OafEntity data) {
320

    
321
		String buff = new String();
322

    
323
		Result result = data.getResult();
324
		eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = result.getMetadata();
325

    
326
		// result_topics/
327
		String full_id = getStringField(data.getId());
328
		buff += full_id;
329

    
330
		// result_languages
331
		buff += full_id;
332

    
333
		// `result_projects`,
334
		buff += full_id;
335

    
336
		// `result_datasources`,
337
		buff += full_id;
338

    
339
		// `result_classifications`,
340
		buff += full_id;
341

    
342
		// / `result_infrastructures`,
343
		buff += full_id;
344

    
345
		// `result_claims`,
346
		buff += full_id;
347

    
348
		// `result_results`,
349
		buff += full_id;
350
		// year
351
		buff += getYearInt(metadata.getDateofacceptance().getValue());
352

    
353
		// date
354
		buff += getStringField(metadata.getDateofacceptance().getValue());
355

    
356
		// access_mode,
357
		buff += getStringField(getAcessMode(result));
358

    
359
		// bestlicense
360

    
361
		buff += getStringField(getBestLicense(result));
362
		// type
363
		buff += getStringField(metadata.getResulttype().getClassname());
364
		// embargo_end_date
365
		buff += getStringField(metadata.getEmbargoenddate().getValue());
366

    
367
		// `delayed`,
368
		buff += getStringField("no");
369

    
370
		// `authors`,
371
		int authors = 0;
372

    
373
		for (OafRel rel : data.getCachedRelList()) {
374

    
375
			if (rel.getRelType().equals(RelType.personResult)) {
376

    
377
				authors++;
378
			}
379
		}
380
		// log.info("Result " + full_id +"Author count : " + authors );
381
		buff += getNumericField(String.valueOf(authors));
382

    
383
		// number??
384

    
385
		buff += getStringField("1");
386

    
387
		// TODO check if valid or empty protobuff
388
		//TODO do it in all protos?
389
		if (isValid(buff, full_id)) {
390
			return buff;
391
		}
392

    
393
		else {
394
			return null;
395
		}
396

    
397
	}
398

    
399
	private boolean isValid(String buff, String id) {
400
		return buff.endsWith(id);
401
	}
402

    
403
	private String getBestLicense(Result result) {
404
		Qualifier bestLicense = null;
405
		LicenseComparator lc = new LicenseComparator();
406
		for (Instance instance : (result.getInstanceList())) {
407
			if (lc.compare(bestLicense, instance.getLicence()) > 0) {
408
				bestLicense = instance.getLicence();
409
			}
410
		}
411
		if (bestLicense != null) {
412
			return bestLicense.getClassname();
413
		} else {
414
			return null;
415
		}
416
	}
417

    
418
	private String getAcessMode(Result result) {
419

    
420
		for (Instance instance : (result.getInstanceList())) {
421
			return instance.getLicence().getClassname();
422

    
423
		}
424

    
425
		return NULL_STRING;
426
	}
427

    
428
	private String buildProject(OafEntity data) {
429

    
430
		String buff = new String();
431

    
432
		Project project = data.getProject();
433
		eu.dnetlib.data.proto.ProjectProtos.Project.Metadata metadata = project.getMetadata();
434
		// project_organizations
435

    
436
		String full_id = getStringField(data.getId());
437
		buff += full_id;
438

    
439
		// project_results
440
		buff += full_id;
441
		// `acronym`,
442
		buff += getStringField(metadata.getAcronym().getValue());
443

    
444
		// `funding_lvl0`,
445
		String funding_tree_0 = NULL_STRING;
446
		String funding_tree_1 = NULL_STRING;
447
		String funding_tree_2 = NULL_STRING;
448

    
449
		List<StringField> fundList = metadata.getFundingtreeList();
450

    
451
		if (!fundList.isEmpty()) // `funding_lvl0`,
452
		{
453
			funding_tree_0 = getFundingLevel(fundList.get(0).getValue(), 0);
454

    
455
			funding_tree_1 = getFundingLevel(fundList.get(0).getValue(), 1);
456
			// log.info(funding_tree_1);
457

    
458
			funding_tree_2 = getFundingLevel(fundList.get(0).getValue(), 2);
459
			// log.info(funding_tree_2);
460

    
461
		}
462

    
463
		buff += getStringField(funding_tree_0);
464
		// `funding_lvl1`,
465
		if (funding_tree_1.equalsIgnoreCase("SP1")) {
466
			funding_tree_1 = "SP1-Cooperation";
467

    
468
		} else if (funding_tree_1.equalsIgnoreCase("SP2")) {
469
			funding_tree_1 = "SP2-Ideas";
470
		} else if (funding_tree_1.equalsIgnoreCase("SP3")) {
471
			funding_tree_1 = "SP3-People";
472
		} else if (funding_tree_1.equalsIgnoreCase("SP4")) {
473
			funding_tree_1 = "SP4-Capacities";
474
		} else if (funding_tree_1.equalsIgnoreCase("SP5")) {
475
			funding_tree_1 = "SP5-Euratom";
476
		}
477

    
478
		buff += getStringField(funding_tree_1);
479

    
480
		// / `funding_lvl2`,
481
		buff += getStringField(funding_tree_2);
482

    
483
		// `sc39`,
484

    
485
		String sc39 = metadata.getEcsc39().getValue();
486
		if (sc39.contains("true")) {
487
			sc39 = "yes";
488
		} else if (sc39.contains("false")) {
489
			sc39 = "no";
490
		}
491

    
492
		buff += getStringField(sc39);
493

    
494
		// `url`,
495

    
496
		buff += getStringField(metadata.getWebsiteurl().getValue());
497

    
498
		// start_year
499

    
500
		buff += getYearInt(metadata.getStartdate().getValue());
501

    
502
		// end_year
503
		buff += getYearInt(metadata.getEnddate().getValue());
504

    
505
		// duration enddate-startdate
506

    
507
		buff += getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue());
508

    
509
		// haspubs
510
		buff += getStringField("");
511

    
512
		// numpubs
513
		buff += getNumericField("");
514
		// enddate
515
		buff += getStringField(metadata.getEnddate().getValue());
516
		// startdate
517
		buff += getStringField(metadata.getStartdate().getValue());
518

    
519
		// `daysforlastpub`,
520
		buff += getNumericField("");
521
		// `delayedpubs`,
522
		buff += getNumericField("");
523
		// `number`
524
		buff += getStringField("1");
525
		return buff;
526

    
527
	}
528

    
529
	private String getFundingLevel(String funding_level, int level) {
530

    
531
		if (funding_level.isEmpty()) {
532
			return NULL_STRING;
533

    
534
		}
535

    
536
		if (!funding_level.contains("funding_level_" + level)) {
537
			return NULL_STRING;
538
		}
539
		String[] split = funding_level.split("funding_level_" + level);
540

    
541
		funding_level = split[1];
542
		split = funding_level.split("name");
543
		split = split[1].split(",");
544

    
545
		funding_level = split[0].replaceAll(".*:\"", "");
546
		funding_level = funding_level.replaceFirst(ENCLOSED, "");
547
		funding_level = funding_level.trim();
548

    
549
		return funding_level;
550
	}
551

    
552
	private String getYearDifferenceInteger(String enddate, String startdate) {
553

    
554
		if (!enddate.isEmpty() && enddate != null && startdate != null && !startdate.isEmpty()) {
555

    
556
			String[] split = startdate.split("-");
557

    
558
			int Startdate = Integer.parseInt(split[0]);
559

    
560
			split = enddate.split("-");
561

    
562
			int Enddate = Integer.parseInt(split[0]);
563

    
564
			int diff = Enddate - Startdate;
565
			return ENCLOSED + diff + ENCLOSED + DELIM;
566
		}
567

    
568
		return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
569
	}
570

    
571
	private String getYearInt(String data) {
572
		if (data == null || data.isEmpty()) {
573
			return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
574
		}
575

    
576
		String[] split = data.split("-");
577

    
578
		if (split != null) {
579
			return ENCLOSED + split[0] + ENCLOSED + DELIM;
580
		} else {
581
			return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
582
		}
583

    
584
	}
585

    
586
	private String getStringField(String data) {
587

    
588
		if (data == null || data.isEmpty() || data.equals("")) {
589

    
590
			return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
591
		} else {
592

    
593
			return clean(data) + DELIM;
594

    
595
		}
596
	}
597

    
598
	private String getNumericField(String data) {
599
		if (data == null || data.isEmpty()) {
600
			return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
601
		} else {
602

    
603
			return ENCLOSED + data + ENCLOSED + DELIM;
604
		}
605
	}
606

    
607
	public String getId(Oaf oaf) {
608
		switch (oaf.getKind()) {
609
		case entity:
610

    
611
			return clean(oaf.getEntity().getId());
612
		case relation:
613

    
614
			return clean(oaf.getRel().getSource());
615

    
616
		}
617
		return null;
618

    
619
	}
620

    
621
	public String getId(OafRel relOaf) {
622
		return clean(relOaf.getSource());
623
	}
624

    
625
	public static String clean(String value) {
626
		if (value != null) {
627
			value = value.replaceFirst(".*\\|", "");
628
			value = value.replaceAll("\n", "");
629
			value = value.trim();
630

    
631
		}
632

    
633
		if (!value.contains(ENCLOSED)) {
634
			return ENCLOSED + value + ENCLOSED;
635
		} else {
636
			return value;
637
		}
638
	}
639

    
640
	public String getName(Oaf oaf) {
641
		switch (oaf.getKind()) {
642
		case entity:
643

    
644
			return oaf.getEntity().getType().name();
645

    
646
		case relation:
647

    
648
			return oaf.getRel().getSource() + oaf.getRel().getTarget();
649

    
650
		}
651
		return null;
652

    
653
	}
654

    
655
	public String getDELIM() {
656
		return DELIM;
657
	}
658

    
659
	public void setDELIM(String dELIM) {
660
		DELIM = dELIM;
661
	}
662

    
663
	public String getNULL_STRING() {
664
		return NULL_STRING;
665
	}
666

    
667
	public void setNULL_STRING(String nULL_STRING) {
668
		NULL_STRING = nULL_STRING;
669
	}
670

    
671
	public String getNULL_NUM() {
672
		return NULL_NUM;
673
	}
674

    
675
	public void setNULL_NUM(String nULL_NUM) {
676
		NULL_NUM = nULL_NUM;
677
	}
678

    
679
	public static String getENCLOSED() {
680
		return ENCLOSED;
681
	}
682

    
683
	public void setENCLOSED(String eNCLOSED) {
684
		ENCLOSED = eNCLOSED;
685
	}
686

    
687
}
(4-4/4)