Revision 56508
Added by Antonis Lempesis almost 5 years ago
modules/dnet-openaire-stats-export-wf/trunk/dnet-openaire-stats/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/Serializer.java | ||
---|---|---|
18 | 18 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
19 | 19 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
20 | 20 |
import org.apache.log4j.Logger; |
21 |
import org.jsoup.Jsoup; |
|
22 | 21 |
|
23 | 22 |
import java.text.DateFormat; |
24 | 23 |
import java.text.ParseException; |
... | ... | |
140 | 139 |
|
141 | 140 |
private String getResultProject(OafRel oaf) { |
142 | 141 |
StringBuilder buff = new StringBuilder(); |
143 |
buff.append(cleanId(oaf.getTarget()) + DELIM);
|
|
144 |
// TODO is declared as int!!!
|
|
142 |
buff.append(cleanId(oaf.getTarget())).append(DELIM);
|
|
143 |
// is declared as int!!! |
|
145 | 144 |
long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate()); |
145 |
|
|
146 | 146 |
if (diff < 0) { |
147 | 147 |
diff = 0; |
148 | 148 |
} |
... | ... | |
191 | 191 |
} |
192 | 192 |
} |
193 | 193 |
|
194 |
private void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels) { |
|
195 |
Result result = valueEntity.getResult(); |
|
196 |
|
|
197 |
for (StringField s : result.getMetadata().getDescriptionList()) { |
|
198 |
rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text())); |
|
199 |
} |
|
200 |
} |
|
201 |
|
|
202 | 194 |
private void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels) { |
203 | 195 |
Result result = valueEntity.getResult(); |
204 | 196 |
|
... | ... | |
262 | 254 |
} |
263 | 255 |
} |
264 | 256 |
} catch (Exception e) { |
265 |
|
|
257 |
logger.error("Error getting result citations", e); |
|
266 | 258 |
} |
267 | 259 |
} |
268 | 260 |
} |
... | ... | |
335 | 327 |
|
336 | 328 |
Result.Metadata metadata = oaf.getEntity().getResult().getMetadata(); |
337 | 329 |
|
338 |
String titleString = new String();
|
|
330 |
String titleString = "";
|
|
339 | 331 |
|
340 |
for (int i = 0; i < metadata.getTitleList().size(); i++) {
|
|
341 |
StructuredProperty title = metadata.getTitleList().get(i);
|
|
332 |
if (metadata.getTitleList().size() > 0) {
|
|
333 |
StructuredProperty title = metadata.getTitleList().get(0);
|
|
342 | 334 |
|
343 |
if (i == 0) { |
|
344 |
titleString = title.getValue().replaceAll("\\s+", " "); |
|
345 |
titleString = titleString.replaceAll("\n", " "); |
|
346 |
} |
|
347 |
break; |
|
335 |
titleString = title.getValue().replaceAll("\\s+", " "); |
|
336 |
titleString = titleString.replaceAll("\n", " "); |
|
348 | 337 |
} |
349 | 338 |
|
350 | 339 |
// pubtitle |
... | ... | |
393 | 382 |
//authors |
394 | 383 |
buff.append(getNumericField(String.valueOf(authors))); |
395 | 384 |
|
396 |
String sources = new String();
|
|
385 |
String sources = "";
|
|
397 | 386 |
|
398 | 387 |
|
399 | 388 |
for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) { |
... | ... | |
454 | 443 |
} |
455 | 444 |
|
456 | 445 |
//sc39 |
457 |
String sc39 = metadata.getEcsc39().getValue().toString();
|
|
446 |
String sc39 = metadata.getEcsc39().getValue(); |
|
458 | 447 |
if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) { |
459 | 448 |
sc39 = "yes"; |
460 | 449 |
} else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) { |
... | ... | |
508 | 497 |
|
509 | 498 |
String[] split = startdate.split("-"); |
510 | 499 |
|
511 |
if (split == null || split.length == 0) {
|
|
500 |
if (split.length == 0) { |
|
512 | 501 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
513 | 502 |
} |
514 | 503 |
|
... | ... | |
516 | 505 |
|
517 | 506 |
split = enddate.split("-"); |
518 | 507 |
|
519 |
if (split == null || split.length == 0) {
|
|
508 |
if (split.length == 0) { |
|
520 | 509 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
521 | 510 |
} |
522 | 511 |
|
... | ... | |
538 | 527 |
|
539 | 528 |
String[] split = data.split("-"); |
540 | 529 |
|
541 |
if (split == null || split.length == 0) {
|
|
530 |
if (split.length == 0) { |
|
542 | 531 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
543 | 532 |
} |
544 | 533 |
|
... | ... | |
553 | 542 |
} |
554 | 543 |
|
555 | 544 |
private String cleanNumber(String number) { |
556 |
number = number.replaceAll("[^A-Za-z0-9:,____]", "");
|
|
545 |
number = number.replaceAll("[^A-Za-z0-9:,_]", ""); |
|
557 | 546 |
return number; |
558 | 547 |
} |
559 | 548 |
|
... | ... | |
576 | 565 |
} |
577 | 566 |
|
578 | 567 |
private String getStringDateField(String data) { |
579 |
if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
|
|
568 |
if (data == null || data.isEmpty() || data.equals("-1")) { |
|
580 | 569 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
581 | 570 |
} else { |
582 | 571 |
data = data.replace(DELIM, " "); |
... | ... | |
593 | 582 |
} |
594 | 583 |
|
595 | 584 |
private String getNumericField(String data) { |
596 |
if (data == null || data.isEmpty() || data.equals("")) {
|
|
585 |
if (data == null || data.isEmpty()) { |
|
597 | 586 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
598 | 587 |
} else { |
599 | 588 |
return ENCLOSING + data + ENCLOSING + DELIM; |
... | ... | |
616 | 605 |
return cleanId(relOaf.getSource()); |
617 | 606 |
} |
618 | 607 |
|
619 |
public String clean(String value) {
|
|
608 |
private String clean(String value) {
|
|
620 | 609 |
if (value != null) { |
621 | 610 |
|
622 | 611 |
value = value.replaceAll("[\"\\r\\\\;]", ""); |
... | ... | |
631 | 620 |
|
632 | 621 |
} |
633 | 622 |
|
634 |
public String cleanId(String value) {
|
|
623 |
private String cleanId(String value) {
|
|
635 | 624 |
if (value != null) { |
636 | 625 |
// DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( "5|datacite____::" to "datacite____::") |
637 | 626 |
// AND REPLACES OCCURRENCES OF DELIM CHARS IN DATA |
... | ... | |
647 | 636 |
return ENCLOSING + value + ENCLOSING; |
648 | 637 |
} |
649 | 638 |
|
650 |
public String cleanUrl(String value) {
|
|
639 |
private String cleanUrl(String value) {
|
|
651 | 640 |
value = value.replace(DELIM, " "); |
652 | 641 |
value = value.replace(ENCLOSING, " "); |
653 | 642 |
value = value.replace(" ", ""); |
... | ... | |
655 | 644 |
return value; |
656 | 645 |
} |
657 | 646 |
|
658 |
public long DATEDIFF(String startDate, String endDate) {
|
|
659 |
long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000; |
|
660 |
long days = 0l;
|
|
647 |
private long DATEDIFF(String startDate, String endDate) {
|
|
648 |
long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000L;
|
|
649 |
long days; |
|
661 | 650 |
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss"); |
662 | 651 |
// <startdate>2011-09-01</startdate> |
663 | 652 |
// <enddate>2015-08-31</enddate> |
664 |
Date dateIni = null;
|
|
665 |
Date dateFin = null;
|
|
653 |
Date dateIni; |
|
654 |
Date dateFin; |
|
666 | 655 |
|
667 | 656 |
if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) { |
668 | 657 |
return 0; |
669 | 658 |
} |
670 | 659 |
try { |
671 |
dateIni = (Date) format.parse(startDate);
|
|
672 |
dateFin = (Date) format.parse(endDate);
|
|
660 |
dateIni = format.parse(startDate); |
|
661 |
dateFin = format.parse(endDate); |
|
673 | 662 |
days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY; |
674 | 663 |
} catch (Exception e) { |
675 | 664 |
|
Also available in: Unified diff
further code clean up