Revision 54431
Added by Tsampikos Livisianos over 5 years ago
Serializer.java | ||
---|---|---|
22 | 22 |
import org.apache.log4j.Logger; |
23 | 23 |
import org.jsoup.Jsoup; |
24 | 24 |
|
25 |
import java.text.DateFormat; |
|
26 |
import java.text.ParseException; |
|
25 | 27 |
import java.text.SimpleDateFormat; |
26 | 28 |
import java.util.ArrayList; |
27 | 29 |
import java.util.Date; |
... | ... | |
100 | 102 |
switch (valueEntity.getType()) { |
101 | 103 |
case datasource: |
102 | 104 |
getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING); |
105 |
getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING); |
|
103 | 106 |
case result: |
104 | 107 |
getResultTopics(valueEntity, relations, DELIM, ENCLOSING); |
105 | 108 |
getResultLanguages(valueEntity, relations, DELIM, ENCLOSING); |
... | ... | |
109 | 112 |
getResultDois(valueEntity, relations, DELIM, ENCLOSING); |
110 | 113 |
getResultCitations(valueEntity, relations, DELIM, ENCLOSING); |
111 | 114 |
getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING); |
115 |
getResultExtra(valueEntity, relations, DELIM, ENCLOSING); |
|
112 | 116 |
|
113 | 117 |
case project: |
114 | 118 |
getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING); |
... | ... | |
119 | 123 |
|
120 | 124 |
} |
121 | 125 |
|
126 |
private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) { |
|
127 |
Datasource d = valueEntity.getDatasource(); |
|
128 |
Metadata metadata = d.getMetadata(); |
|
122 | 129 |
|
130 |
relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING)); |
|
131 |
} |
|
132 |
|
|
133 |
private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) { |
|
134 |
Result result = valueEntity.getResult(); |
|
135 |
Result.Metadata metadata = result.getMetadata(); |
|
136 |
|
|
137 |
StringBuilder buff = new StringBuilder(); |
|
138 |
String titleString = ""; |
|
139 |
|
|
140 |
for (int i = 0; i < metadata.getTitleList().size(); i++) { |
|
141 |
StructuredProperty title = metadata.getTitleList().get(i); |
|
142 |
|
|
143 |
titleString = title.getValue().replaceAll("\\s+", " "); |
|
144 |
titleString = titleString.replaceAll("\n", " "); |
|
145 |
break; |
|
146 |
} |
|
147 |
|
|
148 |
// pubtitle |
|
149 |
buff.append(getStringField(titleString, DELIM, ENCLOSING)); |
|
150 |
|
|
151 |
String sources = ""; |
|
152 |
for (Instance instance : (result.getInstanceList())) { |
|
153 |
List<String> urls = instance.getUrlList(); |
|
154 |
for (String url : urls) { |
|
155 |
sources += cleanUrl(url, DELIM, ENCLOSING) + " ;"; |
|
156 |
} |
|
157 |
} |
|
158 |
|
|
159 |
//sources |
|
160 |
sources = ENCLOSING + sources + ENCLOSING + DELIM; |
|
161 |
buff.append(sources); |
|
162 |
|
|
163 |
relations.put("resultExtra", buff.toString()); |
|
164 |
} |
|
165 |
|
|
123 | 166 |
private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) { |
124 | 167 |
|
125 | 168 |
String relName = oafEntity.getType().toString().toLowerCase() + "Oid"; |
... | ... | |
289 | 332 |
Metadata metadata = oaf.getEntity().getDatasource().getMetadata(); |
290 | 333 |
StringBuilder buff = new StringBuilder(); |
291 | 334 |
|
292 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
293 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
294 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
295 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
296 |
|
|
297 | 335 |
// name |
298 | 336 |
if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) { |
299 | 337 |
buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING)); |
300 | 338 |
} else { |
301 | 339 |
buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING)); |
302 | 340 |
} |
341 |
|
|
303 | 342 |
// type |
304 |
|
|
305 | 343 |
if (metadata.hasDatasourcetype()) { |
306 | 344 |
buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING)); |
307 | 345 |
} |
... | ... | |
309 | 347 |
// compatibility, |
310 | 348 |
buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING)); |
311 | 349 |
|
312 |
// latitude |
|
313 |
buff.append(getLatLongField(metadata.getLatitude().getValue(), DELIM, ENCLOSING)); |
|
314 |
|
|
315 |
// longtitude |
|
316 |
buff.append(getLatLongField(metadata.getLongitude().getValue(), DELIM, ENCLOSING)); |
|
317 |
|
|
318 | 350 |
// dateofvalidation, |
319 | 351 |
buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING)); |
320 | 352 |
|
321 | 353 |
// yearofvalidation, |
322 | 354 |
buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING)); |
323 | 355 |
|
324 |
//website
|
|
325 |
buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
|
|
356 |
//harvested
|
|
357 |
buff.append(getStringField("false", DELIM, ENCLOSING));
|
|
326 | 358 |
|
327 | 359 |
//piwik_id |
328 | 360 |
String piwik_id = ""; |
... | ... | |
334 | 366 |
} |
335 | 367 |
buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING)); |
336 | 368 |
|
337 |
//harvested |
|
338 |
buff.append(getStringField("false", DELIM, ENCLOSING)); |
|
339 |
|
|
340 |
// deletedByInference |
|
341 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
342 |
|
|
343 |
// number?? |
|
344 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
345 |
|
|
346 | 369 |
return buff.toString(); |
347 | 370 |
|
348 | 371 |
} |
... | ... | |
352 | 375 |
StringBuilder buff = new StringBuilder(); |
353 | 376 |
Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata(); |
354 | 377 |
|
355 |
// `organization_datasources`, |
|
356 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
357 |
// organization_projects |
|
358 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
359 | 378 |
// `name`, |
360 | 379 |
buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING)); |
361 | 380 |
|
362 | 381 |
// `country`, |
363 | 382 |
buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING)); |
364 | 383 |
|
365 |
//website |
|
366 |
buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING)); |
|
367 |
|
|
368 |
// deletedByInference |
|
369 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
370 |
|
|
371 |
// number |
|
372 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
373 |
|
|
374 | 384 |
return buff.toString(); |
375 | 385 |
} |
376 | 386 |
|
... | ... | |
379 | 389 |
|
380 | 390 |
Result.Metadata metadata = oaf.getEntity().getResult().getMetadata(); |
381 | 391 |
|
382 |
// result_topics/ |
|
383 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
384 |
// result_languages |
|
385 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
386 |
// `result_projects`, |
|
387 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
388 |
// `result_datasources`, |
|
389 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
390 |
// `result_classifications`, |
|
391 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
392 |
/// `result_infrastructures`, |
|
393 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
394 |
// `result_claims`, |
|
395 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
396 |
// `result_results`, |
|
397 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
398 |
// pubtitle |
|
399 |
|
|
400 |
String titleString = new String(); |
|
401 |
|
|
402 |
for (int i = 0; i < metadata.getTitleList().size(); i++) { |
|
403 |
StructuredProperty title = metadata.getTitleList().get(i); |
|
404 |
|
|
405 |
if (i == 0) { |
|
406 |
titleString = title.getValue().replaceAll("\\s+", " "); |
|
407 |
titleString = titleString.replaceAll("\n", " "); |
|
408 |
} |
|
409 |
break; |
|
410 |
} |
|
411 |
|
|
412 |
// pubtitle |
|
413 |
buff.append(getStringField(titleString, DELIM, ENCLOSING)); |
|
414 |
|
|
415 |
// format |
|
416 |
String formatString = new String(); |
|
417 |
|
|
418 |
for (StringField format : metadata.getFormatList()) { |
|
419 |
formatString += format.getValue() + ";"; |
|
420 |
|
|
421 |
} |
|
422 |
|
|
423 |
buff.append(getStringField(formatString, DELIM, ENCLOSING)); |
|
424 | 392 |
// publisher |
425 |
|
|
426 | 393 |
buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING)); |
427 | 394 |
|
428 | 395 |
// journal |
429 |
|
|
430 | 396 |
buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING)); //#null#! |
431 | 397 |
|
432 | 398 |
// year |
433 | 399 |
buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING)); |
434 | 400 |
|
435 |
// date CHANGED THIS TO DATE FORMAT
|
|
401 |
// date |
|
436 | 402 |
buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING)); |
437 | 403 |
|
438 |
// access_mode, |
|
439 |
buff.append(getStringField(getAccessMode(oaf.getEntity().getResult()), DELIM, ENCLOSING)); |
|
440 |
|
|
441 | 404 |
// bestlicense |
442 | 405 |
buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING)); |
443 | 406 |
|
... | ... | |
448 | 411 |
buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING)); |
449 | 412 |
|
450 | 413 |
// `authors`, |
451 |
int authors = 0;
|
|
414 |
int authors = metadata.getAuthorCount();
|
|
452 | 415 |
|
416 |
|
|
453 | 417 |
String delayed = "no"; |
454 | 418 |
|
455 | 419 |
for (OafRel rel : oaf.getEntity().getCachedRelList()) { |
... | ... | |
477 | 441 |
//authors |
478 | 442 |
buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING)); |
479 | 443 |
|
480 |
String sources = new String(); |
|
481 |
|
|
482 |
|
|
483 |
for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) { |
|
484 |
List<String> urls = instance.getUrlList(); |
|
485 |
for (String url : urls) { |
|
486 |
sources += cleanUrl(url, DELIM, ENCLOSING) + " ;"; |
|
487 |
} |
|
488 |
} |
|
489 |
|
|
490 |
//sources |
|
491 |
sources = ENCLOSING + sources + ENCLOSING + DELIM; |
|
492 |
|
|
493 |
buff.append(sources); |
|
494 |
|
|
495 |
// deletedByInference |
|
496 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
497 |
|
|
498 |
|
|
499 |
// number?? |
|
500 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
501 | 444 |
return buff.toString(); |
502 | 445 |
|
503 | 446 |
} |
... | ... | |
568 | 511 |
StringBuilder buff = new StringBuilder(); |
569 | 512 |
Project.Metadata metadata = oaf.getEntity().getProject().getMetadata(); |
570 | 513 |
|
571 |
// project_organizations |
|
572 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
573 |
|
|
574 |
// project_results |
|
575 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
576 |
|
|
577 |
|
|
578 | 514 |
// `acronym`, |
579 | 515 |
String acronym = metadata.getAcronym().getValue(); |
580 | 516 |
if (acronym.equalsIgnoreCase("UNKNOWN")) { |
581 | 517 |
acronym = metadata.getTitle().getValue(); |
582 | 518 |
} |
583 |
|
|
584 | 519 |
buff.append(getStringField(acronym, DELIM, ENCLOSING)); |
585 | 520 |
|
586 |
//title!
|
|
521 |
//title |
|
587 | 522 |
buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING)); |
588 | 523 |
|
524 |
//funding_lvl |
|
589 | 525 |
List<StringField> fundList = metadata.getFundingtreeList(); |
590 |
|
|
591 | 526 |
if (!fundList.isEmpty()) // `funding_lvl0`, |
592 | 527 |
{ |
593 | 528 |
//TODO funder + 3 funding levels |
... | ... | |
597 | 532 |
funding_lvl2 text, |
598 | 533 |
funding_lvl3 text,*/ |
599 | 534 |
buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING)); |
600 |
|
|
601 | 535 |
} else { |
602 | 536 |
buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING)); |
603 |
|
|
604 | 537 |
} |
605 | 538 |
|
539 |
//sc39 |
|
606 | 540 |
String sc39 = metadata.getEcsc39().getValue().toString(); |
607 | 541 |
if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) { |
608 | 542 |
sc39 = "yes"; |
609 | 543 |
} else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) { |
610 | 544 |
sc39 = "no"; |
611 | 545 |
} |
612 |
|
|
613 | 546 |
buff.append(getStringField(sc39, DELIM, ENCLOSING)); |
614 | 547 |
|
615 |
|
|
616 | 548 |
//project_type |
617 | 549 |
buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING)); |
618 | 550 |
|
619 |
// `url`, |
|
620 |
buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING)); |
|
621 |
|
|
622 | 551 |
// start_year |
623 |
|
|
624 | 552 |
buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING)); |
625 | 553 |
|
626 | 554 |
// end_year |
627 | 555 |
buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING)); |
628 | 556 |
|
629 | 557 |
// duration enddate-startdate |
630 |
|
|
631 | 558 |
buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING)); |
632 | 559 |
|
633 | 560 |
// haspubs |
... | ... | |
637 | 564 |
buff.append(getNumericField("0", DELIM, ENCLOSING)); |
638 | 565 |
|
639 | 566 |
// enddate |
640 |
buff.append(getNumericField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
|
567 |
buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
|
641 | 568 |
|
642 | 569 |
// startdate |
643 |
buff.append(getNumericField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
|
570 |
buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
|
644 | 571 |
|
645 | 572 |
// `daysforlastpub`, |
646 | 573 |
buff.append(getNumericField("", DELIM, ENCLOSING)); |
... | ... | |
650 | 577 |
|
651 | 578 |
//call identifier |
652 | 579 |
buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING)); |
580 |
|
|
653 | 581 |
//code |
654 | 582 |
buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING)); |
655 | 583 |
|
656 |
//esc39 |
|
657 |
buff.append(getStringField(metadata.getEcsc39().getValue(), DELIM, ENCLOSING)); |
|
658 |
|
|
659 |
//getUrl |
|
660 |
String sources = new String(); |
|
661 |
|
|
662 |
for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) { |
|
663 |
List<String> urls = instance.getUrlList(); |
|
664 |
for (String u : urls) { |
|
665 |
sources += u + ";"; |
|
666 |
} |
|
667 |
} |
|
668 |
|
|
669 |
sources = cleanUrl(sources, DELIM, ENCLOSING); |
|
670 |
sources = ENCLOSING + sources + ENCLOSING + DELIM; |
|
671 |
|
|
672 |
buff.append(sources); |
|
673 |
|
|
674 |
// deletedByInference |
|
675 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
676 |
|
|
677 |
// `number` |
|
678 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
679 | 584 |
return buff.toString(); |
680 | 585 |
|
681 | 586 |
} |
... | ... | |
809 | 714 |
data = data.replace(DELIM, " "); |
810 | 715 |
data = data.replace(ENCLOSING, " "); |
811 | 716 |
data = data.replaceAll("\\r\\n|\\r|\\n", ""); |
812 |
return ENCLOSING + data + ENCLOSING + DELIM; |
|
717 |
try { |
|
718 |
DateFormat format = new SimpleDateFormat("yyyy-MM-dd"); |
|
719 |
data = format.format(format.parse(data)); |
|
720 |
return ENCLOSING + data + ENCLOSING + DELIM; |
|
721 |
} catch (ParseException e) { |
|
722 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
|
723 |
} |
|
813 | 724 |
} |
814 | 725 |
} |
815 | 726 |
|
Also available in: Unified diff
finalize new-schema for betadb