Revision 35179
Added by Michele Artini about 9 years ago
XmlRecordFactory.java | ||
---|---|---|
23 | 23 |
import org.dom4j.Document; |
24 | 24 |
import org.dom4j.DocumentException; |
25 | 25 |
import org.dom4j.Element; |
26 |
import org.dom4j.Node; |
|
26 | 27 |
import org.dom4j.io.SAXReader; |
27 |
import org.json.JSONException; |
|
28 |
import org.json.JSONObject; |
|
29 | 28 |
|
29 |
import com.google.common.base.Joiner; |
|
30 | 30 |
import com.google.common.base.Predicate; |
31 |
import com.google.common.collect.Iterators; |
|
32 | 31 |
import com.google.common.collect.Lists; |
33 | 32 |
import com.google.common.collect.Maps; |
34 | 33 |
import com.google.common.collect.Sets; |
... | ... | |
407 | 406 |
} |
408 | 407 |
|
409 | 408 |
if (StringField.getDescriptor().equals(fd.getMessageType())) { |
410 |
if (fd.getName().contains("fundingtree")) { |
|
411 |
handleFundingTree(metadata, fd, o, expandingRel); |
|
409 |
String fieldName = fd.getName(); |
|
410 |
|
|
411 |
if (fieldName.equals("fundingtree")) { |
|
412 |
String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString(); |
|
413 |
|
|
414 |
if (expandingRel) { |
|
415 |
metadata.add(getRelFundingTree(xmlTree)); |
|
416 |
fillContextMap(xmlTree); |
|
417 |
} else { |
|
418 |
metadata.add(xmlTree); |
|
419 |
} |
|
412 | 420 |
} else { |
413 | 421 |
StringField sf = (StringField) o; |
414 | 422 |
StringBuilder sb = new StringBuilder("<" + fd.getName()); |
... | ... | |
420 | 428 |
metadata.add(sb.toString()); |
421 | 429 |
} |
422 | 430 |
} |
423 |
|
|
424 | 431 |
if (Journal.getDescriptor().equals(fd.getMessageType()) && o != null) { |
425 | 432 |
Journal j = (Journal) o; |
426 | 433 |
metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\"" |
... | ... | |
527 | 534 |
} |
528 | 535 |
|
529 | 536 |
@SuppressWarnings("unchecked") |
530 |
private void handleFundingTree(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) { |
|
531 |
String xmlTree = asXmlJSon(fd.getName(), o instanceof StringField ? ((StringField) o).getValue() : o.toString()); |
|
532 |
if (expandingRel) { |
|
533 |
try { |
|
534 |
Document ftree = new SAXReader().read(new StringReader(xmlTree)); |
|
537 |
private String getRelFundingTree(final String xmlTree) { |
|
538 |
String funding = "<funding>"; |
|
539 |
try { |
|
540 |
Document ftree = new SAXReader().read(new StringReader(xmlTree)); |
|
541 |
funding = "<funding>"; |
|
542 |
// String _id = ""; |
|
535 | 543 |
|
536 |
int i = 0; |
|
537 |
String funding = "<funding>"; |
|
538 |
String _id = ""; |
|
539 |
|
|
540 |
for (Object id : Lists.reverse(ftree.selectNodes("//fundingtree//name"))) { |
|
541 |
_id += ((Element) id).getText(); |
|
542 |
funding += "<funding_level_" + i + ">" + escapeXml(_id) + "</funding_level_" + i + ">"; |
|
543 |
_id += "::"; |
|
544 |
i++; |
|
545 |
} |
|
546 |
funding += "</funding>"; |
|
547 |
// System.out.println("-------------------------------\n" + xmlTree + "\n" + funding); |
|
548 |
metadata.add(funding); |
|
549 |
} catch (DocumentException e) { |
|
550 |
System.err.println("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage()); |
|
544 |
for (Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) { |
|
545 |
Element e = (Element) o; |
|
546 |
String _id = e.valueOf("./id"); |
|
547 |
funding += "<" + e.getName() + ">" + escapeXml(_id) + "</" + e.getName() + ">"; |
|
548 |
// _id += "::"; |
|
551 | 549 |
} |
552 |
} else { |
|
553 |
metadata.add(xmlTree); |
|
550 |
} catch (DocumentException e) { |
|
551 |
System.err.println("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage()); |
|
552 |
} finally { |
|
553 |
funding += "</funding>"; |
|
554 | 554 |
} |
555 |
return funding; |
|
555 | 556 |
} |
556 | 557 |
|
557 |
private String asXmlJSon(final String root, final String json) { |
|
558 |
try { |
|
559 |
if (json == null || json.isEmpty()) { return "<" + root + "/>"; } |
|
560 |
JSONObject o = new JSONObject(json.replace("'", "")); |
|
558 |
private void fillContextMap(final String xmlTree) { |
|
561 | 559 |
|
562 |
String contextId = parseFundingJson(o).toLowerCase(); |
|
563 |
contextes.add(contextId); |
|
564 |
|
|
565 |
String xml = org.json.XML.toString(o, root); |
|
566 |
return xml; |
|
567 |
} catch (Exception e) { |
|
568 |
System.err.println("unable to parse json: " + json + "\n" + e.getMessage()); |
|
569 |
return "<" + root + "/>"; |
|
570 |
} |
|
571 |
} |
|
572 |
|
|
573 |
private String parseFundingJson(final JSONObject o) { |
|
560 |
Document fundingPath; |
|
574 | 561 |
try { |
575 |
String key = (String) Iterators.getOnlyElement(o.keys()); |
|
576 |
JSONObject obj = o.getJSONObject(key); |
|
577 |
|
|
578 |
String id = obj.getString("id").toLowerCase(); |
|
579 |
if (id.startsWith("welcometrust::")) { |
|
580 |
id = StringUtils.substringBeforeLast("uk::" + id.replace("welcometrust", "wt"), "::") + "::" + cleanup(id); |
|
581 |
} else if (id.startsWith("wt::wt")) { |
|
582 |
id = StringUtils.substringBeforeLast(id.replaceFirst("wt", "uk"), "::") + "::" + cleanup(id); |
|
583 |
} else if (id.startsWith("corda_______::")) { |
|
584 |
id = id.replace("corda_______::", "ec::"); |
|
585 |
} else if (id.startsWith("fct_________::")) { |
|
586 |
id = "pt::" + id.replace("fct_________", "fct"); |
|
587 |
if (id.endsWith("::fct")) { |
|
588 |
id = StringUtils.substringBeforeLast(id, "::fct"); |
|
589 |
} |
|
590 |
} |
|
591 |
|
|
592 |
String label = obj.getString("name"); |
|
593 |
|
|
594 |
if (key.endsWith("level_0")) { |
|
595 |
|
|
596 |
if (id.equals("uk::wt")) { |
|
597 |
label = "Wellcome Trust Funding Stream"; |
|
598 |
} |
|
599 |
contextMapper.put(id, new ContextDef(id, label, "category", "")); |
|
600 |
|
|
601 |
if (id.startsWith("ec::")) { |
|
602 |
contextMapper.put("ec", new ContextDef("ec", "European Community", "context", "funding")); |
|
603 |
} else if (id.startsWith("uk::")) { |
|
604 |
contextMapper.put("uk", new ContextDef("uk", "United Kingdom", "context", "funding")); |
|
605 |
} else if (id.startsWith("pt::")) { |
|
606 |
contextMapper.put("pt", new ContextDef("pt", "Portugal", "context", "funding")); |
|
607 |
} |
|
608 |
} else { |
|
609 |
contextMapper.put(id, new ContextDef(id, label, "concept", "")); |
|
610 |
parseFundingJson(obj.getJSONObject("parent")); |
|
611 |
} |
|
612 |
|
|
613 |
return id; |
|
614 |
} catch (JSONException e) { |
|
562 |
fundingPath = new SAXReader().read(new StringReader(xmlTree)); |
|
563 |
} catch (DocumentException e) { |
|
615 | 564 |
throw new RuntimeException(e); |
616 | 565 |
} |
617 |
} |
|
566 |
Node funder = fundingPath.selectSingleNode("//funder"); |
|
567 |
String funderShortName = funder.valueOf("./shortname"); |
|
568 |
contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding")); |
|
569 |
Node level0 = fundingPath.selectSingleNode("//funding_level_0"); |
|
570 |
String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name")); |
|
571 |
contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", "")); |
|
572 |
Node level1 = fundingPath.selectSingleNode("//funding_level_1"); |
|
573 |
String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name")); |
|
574 |
contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", "")); |
|
575 |
Node level2 = fundingPath.selectSingleNode("//funding_level_2"); |
|
576 |
if (level2 == null) { |
|
577 |
contextes.add(level1Id); |
|
578 |
} |
|
579 |
else { |
|
580 |
String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name")); |
|
581 |
contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", "")); |
|
582 |
contextes.add(level2Id); |
|
583 |
} |
|
618 | 584 |
|
619 |
private String cleanup(final String id) { |
|
620 |
return StringUtils.substring(StringUtils.deleteWhitespace(StringUtils.substringAfterLast(id, "::").replaceAll("[^a-zA-Z]", "")), 0, 20); |
|
621 | 585 |
} |
622 | 586 |
|
623 | 587 |
private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) { |
Also available in: Unified diff
reimplemented the fundingpath and context generation