Revision 49029
Added by Claudio Atzori over 6 years ago
XsltRowTransformerFactoryTest.java | ||
---|---|---|
47 | 47 |
} |
48 | 48 |
|
49 | 49 |
@Test |
50 |
public void testParseManyAuthors() throws Exception { |
|
51 |
|
|
52 |
final Map<String, Object> xslParams = Maps.newHashMap(); |
|
53 |
|
|
54 |
final Map<String, String> m = Maps.newHashMap(); |
|
55 |
|
|
56 |
m.put("od______2367", "true"); // Puma |
|
57 |
m.put("od______2294", "true"); // UNIBI |
|
58 |
m.put("od________18", "false"); // Arxiv |
|
59 |
|
|
60 |
xslParams.put("mergeIdForHomonymsMap", m); |
|
61 |
|
|
62 |
final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordManyAuthors.xml")); |
|
63 |
int authorCount = 0; |
|
64 |
for (final Row row : rows) { |
|
65 |
for (final Column<String, byte[]> col : row.getColumns()) { |
|
66 |
|
|
67 |
authorCount++; |
|
68 |
final OafDecoder d = OafDecoder.decode(col.getValue()); |
|
69 |
assertNotNull(d); |
|
70 |
|
|
71 |
log.debug(d.getEntity().getPerson().getCoauthorList().size()); |
|
72 |
} |
|
73 |
} |
|
74 |
log.info("authors' count: ---> " + authorCount); |
|
75 |
|
|
76 |
} |
|
77 |
|
|
78 |
@Test |
|
79 |
public void testParseAuthors() throws Exception { |
|
80 |
|
|
81 |
final Map<String, Object> xslParams = Maps.newHashMap(); |
|
82 |
|
|
83 |
final Map<String, String> m = Maps.newHashMap(); |
|
84 |
|
|
85 |
m.put("od______2367", "true"); // Puma |
|
86 |
m.put("od______2294", "true"); // UNIBI |
|
87 |
m.put("od________18", "false"); // Arxiv |
|
88 |
|
|
89 |
xslParams.put("mergeIdForHomonymsMap", m); |
|
90 |
|
|
91 |
final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordArxiv.xml")); |
|
92 |
|
|
93 |
for (final Row row : rows) { |
|
94 |
for (final Column<String, byte[]> col : row.getColumns()) { |
|
95 |
|
|
96 |
final OafDecoder d = OafDecoder.decode(col.getValue()); |
|
97 |
log.debug(d.getOaf()); |
|
98 |
} |
|
99 |
} |
|
100 |
} |
|
101 |
|
|
102 |
@Test |
|
103 | 50 |
@Ignore // need to reimplement because claimUpdates_2_hbase.xsl was removed |
104 | 51 |
public void testParseOafClaimUpdate() throws Exception { |
105 | 52 |
doTest(loadFromTransformationProfile("claimUpdates_2_hbase.xsl"), load("recordClaimUpdate.xml")); |
... | ... | |
293 | 240 |
} |
294 | 241 |
|
295 | 242 |
@Test |
296 |
public void testUnpackAuthors() throws Exception { |
|
297 |
|
|
298 |
final Map<String, Object> xslParams = Maps.newHashMap(); |
|
299 |
|
|
300 |
final Map<String, String> m = Maps.newHashMap(); |
|
301 |
|
|
302 |
m.put("od______2367", "true"); // Puma |
|
303 |
m.put("od______2294", "true"); // UNIBI |
|
304 |
m.put("od________18", "false"); // Arxiv |
|
305 |
|
|
306 |
xslParams.put("mergeIdForHomonymsMap", m); |
|
307 |
|
|
308 |
final List<Row> rows = Lists.newArrayList(); |
|
309 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv.xml"))); |
|
310 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv2.xml"))); |
|
311 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma1.xml"))); |
|
312 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma2.xml"))); |
|
313 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordUNIBI.xml"))); |
|
314 |
|
|
315 |
printPersonFullnames(mapAll(buildTable(rows))); |
|
316 |
} |
|
317 |
|
|
318 |
@Test |
|
319 | 243 |
public void testDuplicates() throws Exception { |
320 | 244 |
final String mergeId = "50|dedup_wf_001::08ed625d07e5738b794ff14d6773fd9f"; |
321 | 245 |
final List<Row> rows = Lists.newArrayList(); |
322 | 246 |
|
323 |
final Function<Row, Row> f = new Function<Row, Row>() { |
|
324 |
@Override |
|
325 |
public Row apply(final Row rowIn) { |
|
247 |
final Function<Row, Row> f = rowIn -> { |
|
326 | 248 |
|
327 |
final List<Column<String,byte[]>> cols = Lists.newArrayList();
|
|
328 |
for(Column<String,byte[]> col : rowIn.getColumns()) {
|
|
329 |
if (col.getName().equals("body")) {
|
|
330 |
cols.add(new Column(col.getName(), col.getValue()));
|
|
249 |
final List<Column<String,byte[]>> cols = Lists.newArrayList(); |
|
250 |
for(Column<String,byte[]> col : rowIn.getColumns()) { |
|
251 |
if (col.getName().equals("body")) { |
|
252 |
cols.add(new Column(col.getName(), col.getValue())); |
|
331 | 253 |
|
332 |
} |
|
333 | 254 |
} |
334 |
return new Row("result", rowIn.getKey(), cols); |
|
335 | 255 |
} |
256 |
return new Row("result", rowIn.getKey(), cols); |
|
336 | 257 |
}; |
337 | 258 |
|
338 | 259 |
final List<Row> puma1 = asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordPuma1.xml"), f); |
... | ... | |
628 | 549 |
final String rowKey = row.getKey(); |
629 | 550 |
final String cf = row.getColumnFamily(); |
630 | 551 |
if (!table.containsKey(rowKey)) { |
631 |
table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
|
|
552 |
table.put(rowKey, new HashMap<>()); |
|
632 | 553 |
} |
633 | 554 |
if (!table.get(rowKey).containsKey(cf)) { |
634 |
table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
|
|
555 |
table.get(rowKey).put(row.getColumnFamily(), new HashMap<>()); |
|
635 | 556 |
} |
636 | 557 |
for (final Column<String, byte[]> c : row.getColumns()) { |
637 | 558 |
System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName())); |
... | ... | |
730 | 651 |
print(Sets.newHashSet(Type.values()), builders, null); |
731 | 652 |
} |
732 | 653 |
|
733 |
private void printPerson(final Map<String, XmlRecordFactory> builders) throws DocumentException { |
|
734 |
print(Sets.newHashSet(Type.person), builders, null); |
|
735 |
} |
|
736 |
|
|
737 |
private void printPersonFullnames(final Map<String, XmlRecordFactory> builders) throws DocumentException { |
|
738 |
|
|
739 |
final Map<Type, Set<String>> xpaths = Maps.newHashMap(); |
|
740 |
|
|
741 |
final Set<String> personPaths = Sets.newHashSet(); |
|
742 |
|
|
743 |
personPaths.add("//fullname"); |
|
744 |
|
|
745 |
xpaths.put(Type.person, personPaths); |
|
746 |
|
|
747 |
print(Sets.newHashSet(Type.person), builders, xpaths); |
|
748 |
} |
|
749 |
|
|
750 | 654 |
private void print(final Set<Type> types, final Map<String, XmlRecordFactory> builders, final Map<Type, Set<String>> xpaths) throws DocumentException { |
751 | 655 |
final SAXReader r = new SAXReader(); |
752 | 656 |
|
Also available in: Unified diff
getting rid of person entities