Project

General

Profile

« Previous | Next » 

Revision 29336

Added by Eri Katsari about 10 years ago

View differences:

modules/dnet-openaire-stats/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/Serializer.java
32 32
	private Logger log = Logger.getLogger(this.getClass());
33 33
	private String NULL_STRING;
34 34
	private String NULL_NUM;
35
	private static String ENCLOSED;
35 36

  
36 37
	public String serialize(Oaf oaf) {
37 38

  
......
111 112

  
112 113
	private String buildresultProject(OafRel oaf) {
113 114
		String buff = new String();
114
		buff +=  getStringField(oaf.getTarget());
115
		buff += getStringField(oaf.getTarget());
115 116
		// TODO is declared as int!!!
116 117
		buff += getYearDifferenceInteger(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
117 118

  
......
191 192
			String hostedBy = null;
192 193

  
193 194
			hostedBy = instance.getHostedby().getKey();
194
			 
195

  
195 196
			buffs.add((getStringField(hostedBy)));
196 197
		}
197 198
		rels.put("resultDatasource", buffs);
......
227 228
		Datasource d = data.getDatasource();
228 229

  
229 230
		Metadata metadata = d.getMetadata();
230
		String full_id =getStringField(data.getId());
231
		String full_id = getStringField(data.getId());
231 232

  
232 233
		buff += full_id;
233 234
		buff += full_id;
......
287 288
		eu.dnetlib.data.proto.OrganizationProtos.Organization.Metadata metadata = organization.getMetadata();
288 289

  
289 290
		// `organization_datasources`,
290
		String full_id =getStringField(data.getId());
291
		String full_id = getStringField(data.getId());
291 292

  
292

  
293
		buff += full_id; 
293
		buff += full_id;
294 294
		// organization_projects
295
		buff += full_id; 
295
		buff += full_id;
296 296
		// `name`,
297 297
		buff += getStringField(metadata.getLegalname().getValue());
298 298
		// `country`,
......
300 300
		if (metadata.getCountry().getClassname().equals("UNITED KINGDOM"))
301 301

  
302 302
		{
303
			buff += 
304
					buff += getStringField("United Kingdom");
303
			buff += buff += getStringField("United Kingdom");
305 304
		} else if (metadata.getCountry().getClassname().equals("GREECE")) {
306
			buff += getStringField ("Greece");
305
			buff += getStringField("Greece");
307 306
		} else
308 307

  
309 308
		{
......
325 324
		eu.dnetlib.data.proto.ResultProtos.Result.Metadata metadata = result.getMetadata();
326 325

  
327 326
		// result_topics/
328
		String full_id =getStringField(data.getId());
327
		String full_id = getStringField(data.getId());
329 328
		buff += full_id;
330 329

  
331 330
		// result_languages
......
384 383
		// number??
385 384

  
386 385
		buff += getStringField("1");
387
	 
388
		return buff;
389 386

  
387
		// TODO check if valid or empty protobuff
388
		//TODO do it in all protos?
389
		if (isValid(buff, full_id)) {
390
			return buff;
391
		}
392

  
393
		else {
394
			return null;
395
		}
396

  
390 397
	}
391 398

  
399
	private boolean isValid(String buff, String id) {
400
		return buff.endsWith(id);
401
	}
402

  
392 403
	private String getBestLicense(Result result) {
393 404
		Qualifier bestLicense = null;
394 405
		LicenseComparator lc = new LicenseComparator();
......
422 433
		eu.dnetlib.data.proto.ProjectProtos.Project.Metadata metadata = project.getMetadata();
423 434
		// project_organizations
424 435

  
425
		String full_id =getStringField(data.getId());
426
		buff +=  full_id;
436
		String full_id = getStringField(data.getId());
437
		buff += full_id;
427 438

  
428 439
		// project_results
429 440
		buff += full_id;
......
532 543
		split = split[1].split(",");
533 544

  
534 545
		funding_level = split[0].replaceAll(".*:\"", "");
535
		funding_level = funding_level.replaceFirst("\"", "");
546
		funding_level = funding_level.replaceFirst(ENCLOSED, "");
536 547
		funding_level = funding_level.trim();
537 548

  
538 549
		return funding_level;
......
551 562
			int Enddate = Integer.parseInt(split[0]);
552 563

  
553 564
			int diff = Enddate - Startdate;
554
			return "\"" + diff + "\"" + DELIM;
565
			return ENCLOSED + diff + ENCLOSED + DELIM;
555 566
		}
556 567

  
557
		return "\"" + NULL_NUM + "\"" + DELIM;
568
		return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
558 569
	}
559 570

  
560 571
	private String getYearInt(String data) {
561 572
		if (data == null || data.isEmpty()) {
562
			return "\"" + NULL_NUM + "\"" + DELIM;
573
			return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
563 574
		}
564 575

  
565 576
		String[] split = data.split("-");
566 577

  
567 578
		if (split != null) {
568
			return "\"" + split[0] + "\"" + DELIM;
579
			return ENCLOSED + split[0] + ENCLOSED + DELIM;
569 580
		} else {
570
			return "\"" + NULL_NUM + "\"" + DELIM;
581
			return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
571 582
		}
572 583

  
573 584
	}
574 585

  
575 586
	private String getStringField(String data) {
576
	
587

  
577 588
		if (data == null || data.isEmpty() || data.equals("")) {
578
		
579
			return "\"" + NULL_STRING + "\"" + DELIM;
589

  
590
			return ENCLOSED + NULL_STRING + ENCLOSED + DELIM;
580 591
		} else {
581 592

  
582
		return clean(data) + DELIM;
583
			
593
			return clean(data) + DELIM;
594

  
584 595
		}
585 596
	}
586 597

  
587 598
	private String getNumericField(String data) {
588 599
		if (data == null || data.isEmpty()) {
589
			return "\"" + NULL_NUM + "\"" + DELIM;
600
			return ENCLOSED + NULL_NUM + ENCLOSED + DELIM;
590 601
		} else {
591 602

  
592
			return "\"" + data + "\"" + DELIM;
603
			return ENCLOSED + data + ENCLOSED + DELIM;
593 604
		}
594 605
	}
595 606

  
......
616 627
			value = value.replaceFirst(".*\\|", "");
617 628
			value = value.replaceAll("\n", "");
618 629
			value = value.trim();
619
			
630

  
620 631
		}
621 632

  
622
		if (!value.contains("\"")) {
623
			return "\"" + value + "\"";
633
		if (!value.contains(ENCLOSED)) {
634
			return ENCLOSED + value + ENCLOSED;
624 635
		} else {
625 636
			return value;
626 637
		}
......
665 676
		NULL_NUM = nULL_NUM;
666 677
	}
667 678

  
679
	public static String getENCLOSED() {
680
		return ENCLOSED;
681
	}
682

  
683
	public void setENCLOSED(String eNCLOSED) {
684
		ENCLOSED = eNCLOSED;
685
	}
686

  
668 687
}
modules/dnet-openaire-stats/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/StatsMapper.java
56 56
		serializer.setDELIM(context.getConfiguration().get("stats.delim"));
57 57
		serializer.setNULL_NUM(context.getConfiguration().get("stats.nullNum"));
58 58
		serializer.setNULL_STRING(context.getConfiguration().get("stats.nullString"));
59
		serializer.setENCLOSED(context.getConfiguration().get("stats.enclChar"));
60
		
61
		
59 62
	}
60 63

  
61 64
	@Override
modules/dnet-openaire-stats/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/drivers/SqoopDriver.java
17 17
import org.apache.hadoop.fs.RemoteIterator;
18 18
import org.apache.log4j.Logger;
19 19
import org.apache.sqoop.Sqoop;
20
import org.junit.experimental.runners.Enclosed;
20 21

  
21 22
import com.google.common.collect.ArrayListMultimap;
22 23
import com.google.common.collect.Multimap;
......
45 46
	private boolean batch = true;
46 47
	private boolean verbose = true;
47 48
	private String tableMapConf;
48
	 
49
	private String enclosed;
49 50

  
50 51
	/**
51 52
	 * Driver for the Sqoop tool. Calls the Sqoop Client for each <input file,
......
67 68
			"--connect", connectionUrl, "--table", table.getKey(),
68 69

  
69 70
			"--export-dir", table.getValue(),
70
 
71
			 "--input-fields-terminated-by", delim,
72
			 "--input-enclosed-by", "\"", 
73
			
74
			 "--verbose", "--username", dbUser, "--password", dbPass, "--driver", "org.postgresql.Driver", "--batch", "--mapreduce-job-name", "Sqoop Stats Import Job for " + table.getKey(), "--m", sqoopReducersCount };
75 71

  
72
			"--input-fields-terminated-by", delim, "--input-enclosed-by", enclosed,
73

  
74
			"--verbose", "--username", dbUser, "--password", dbPass, "--driver", "org.postgresql.Driver", "--batch", "--mapreduce-job-name", "Sqoop Stats Import Job for " + table.getKey(), "--m", sqoopReducersCount };
75

  
76 76
			int ret = Sqoop.runTool(str);
77 77

  
78 78
			if (ret != 0) {
......
116 116
				if (split[0].equals(name)) {
117 117

  
118 118
					tables.put((String) e.getValue(), filename);
119
					log.info("    match   " +  e.getValue()  + "  " + filename);
119
					log.info("    match   " + e.getValue() + "  " + filename);
120 120
				}
121 121
			}
122 122
		}
123 123

  
124 124
		long startTime = System.currentTimeMillis();
125
		log.info("    match   " +  tables.entries());
125
		log.info("    match   " + tables.entries());
126 126

  
127 127
		try {
128 128
			this.run(tables);
......
370 370
		this.tableMapConf = tableMapConf;
371 371
	}
372 372

  
373
	 
373
	public String getEnclosed() {
374
		return enclosed;
375
	}
374 376

  
377
	public void setEnclosed(String enclosed) {
378
		this.enclosed = enclosed;
379
	}
380

  
375 381
}

Also available in: Unified diff