Revision 31997
Added by Claudio Atzori over 9 years ago
XsltRowTransformerFactoryTest.java | ||
---|---|---|
3 | 3 |
import static org.junit.Assert.assertFalse; |
4 | 4 |
import static org.junit.Assert.assertNotNull; |
5 | 5 |
|
6 |
import java.io.IOException; |
|
7 | 6 |
import java.io.InputStream; |
8 | 7 |
import java.util.HashMap; |
9 | 8 |
import java.util.List; |
... | ... | |
18 | 17 |
import org.junit.Before; |
19 | 18 |
import org.junit.Test; |
20 | 19 |
|
21 |
import com.google.common.collect.Iterables;
|
|
20 |
import com.google.common.collect.Lists;
|
|
22 | 21 |
import com.google.common.collect.Maps; |
23 | 22 |
import com.google.protobuf.InvalidProtocolBufferException; |
24 | 23 |
|
... | ... | |
37 | 36 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
38 | 37 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
39 | 38 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
40 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
41 | 39 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
42 | 40 |
import eu.dnetlib.miscutils.functional.xml.IndentXmlString; |
43 | 41 |
|
... | ... | |
47 | 45 |
|
48 | 46 |
private EntityConfigTable entityConfigTable; |
49 | 47 |
|
50 |
private final InputStream datacite2insertActionsInputStream = getClass().getResourceAsStream("/eu/dnetlib/actionmanager/xslt/datacite2insertActions.xslt"); |
|
48 |
// private final InputStream datacite2insertActionsInputStream = |
|
49 |
// getClass().getResourceAsStream("/eu/dnetlib/actionmanager/xslt/datacite2insertActions.xslt"); |
|
51 | 50 |
|
52 | 51 |
private final InputStream dmf2hbaseInputStream = getClass().getResourceAsStream("dmf_2_hbase.xsl"); |
53 | 52 |
|
... | ... | |
63 | 62 |
|
64 | 63 |
private final InputStream recordClaimInputStream = getClass().getResourceAsStream("recordClaim.xml"); |
65 | 64 |
|
65 |
private final InputStream recordPangaeOafInputStream = getClass().getResourceAsStream("pangaeOAF.xml"); |
|
66 |
|
|
67 |
private final InputStream recordPangaeOdfInputStream = getClass().getResourceAsStream("pangaeODF.xml"); |
|
68 |
|
|
66 | 69 |
@Before |
67 | 70 |
public void setUp() throws Exception { |
68 | 71 |
factory = new XsltRowTransformerFactory(); |
... | ... | |
70 | 73 |
} |
71 | 74 |
|
72 | 75 |
@Test |
73 |
public void testParseClaim() throws IOException {
|
|
76 |
public void testParseClaim() throws Exception { |
|
74 | 77 |
|
75 |
transform(dmf2hbaseInputStream, recordClaimInputStream);
|
|
78 |
doTest(dmf2hbaseInputStream, recordClaimInputStream);
|
|
76 | 79 |
} |
77 | 80 |
|
78 | 81 |
@Test |
79 |
public void testParseProjectWithStats() throws IOException {
|
|
82 |
public void testParseProjectWithStats() throws Exception { |
|
80 | 83 |
|
81 |
transform(project2hbaseInputStream, projectRecordInputStream);
|
|
84 |
doTest(project2hbaseInputStream, projectRecordInputStream);
|
|
82 | 85 |
} |
83 | 86 |
|
84 | 87 |
@Test |
85 |
public void testParseDmf() throws IOException {
|
|
88 |
public void testParseDmf() throws Exception { |
|
86 | 89 |
|
87 |
transform(dmf2hbaseInputStream, recordInputStream);
|
|
90 |
doTest(dmf2hbaseInputStream, recordInputStream);
|
|
88 | 91 |
} |
89 | 92 |
|
90 | 93 |
@Test |
91 |
public void testParseDatacite() throws IOException {
|
|
94 |
public void testParseDatacite() throws Exception { |
|
92 | 95 |
|
93 |
transform(datacite2hbaseInputStream, recordDataciteInputStream);
|
|
96 |
doTest(datacite2hbaseInputStream, recordDataciteInputStream);
|
|
94 | 97 |
} |
95 | 98 |
|
96 |
// |
|
97 |
// @Test |
|
98 |
// public void testParseDmfWithStats() throws IOException { |
|
99 |
// |
|
100 |
// String xslt = IOUtils.toString(dmf2hbaseInputStream); |
|
101 |
// XsltRowTransformer transformer = factory.getTransformer(xslt); |
|
102 |
// assertNotNull(transformer); |
|
103 |
// |
|
104 |
// String record = IOUtils.toString(recordInputStream); |
|
105 |
// List<Row> rows = transformer.apply(record); |
|
106 |
// |
|
107 |
// assertNotNull(rows); |
|
108 |
// assertFalse(rows.isEmpty()); |
|
109 |
// |
|
110 |
// // System.out.println(rows); |
|
111 |
// |
|
112 |
// for (Row row : rows) { |
|
113 |
// if ("result".equals(row.getColumnFamily())) { |
|
114 |
// for (Column<String, byte[]> col : row) { |
|
115 |
// |
|
116 |
// if ("body".equals(col.getName())) { |
|
117 |
// Oaf.Builder oaf = Oaf.newBuilder(Oaf.parseFrom(col.getValue())); |
|
118 |
// eu.dnetlib.data.proto.ResultProtos.Result.Builder resultBuilder = oaf.getEntityBuilder().getResultBuilder(); |
|
119 |
// resultBuilder.addInstance(newInstance(oaf.getEntity().getResult().getInstanceList(), "CLOSED", "CLOSED Access")); |
|
120 |
// |
|
121 |
// resultBuilder.getMetadataBuilder().addSubject( |
|
122 |
// OafTest.getStructuredproperty("subject1::subsubject1", "arxiv classes", "dnet:subjects", OafTest.getDataInfo())); |
|
123 |
// |
|
124 |
// oaf.getEntityBuilder().addExtraInfo( |
|
125 |
// OafTest.extraInfo("result citations", "iis::document_referencedDocuments", "0.9", "citations", |
|
126 |
// XmlRecordFactoryTest.CITATION_XML)); |
|
127 |
// oaf.getEntityBuilder().addExtraInfo( |
|
128 |
// OafTest.extraInfo("result statistics", "iis:document_statistics", "0.87", "statistics", XmlRecordFactoryTest.STATISTICS_JSON)); |
|
129 |
// |
|
130 |
// builder.setMainEntity(OafDecoder.decode(oaf.build())); |
|
131 |
// String projectId = "corda_______::e6608053c55492c6da3ecd446ef12bde"; |
|
132 |
// String resultId = "50|od______1064::fe947e59cf7db2f039b4c8cc25693fb0"; |
|
133 |
// builder.addRelation(OafTest.embed(OafTest.getResultProject(projectId, resultId, OafTest.getProjectFP7(projectId), "produces"), |
|
134 |
// Kind.relation)); |
|
135 |
// builder.addRelation(OafTest.embed( |
|
136 |
// OafTest.getResultProject("welcometrust::50e00ff1da1fa610f44e186b2ba9785f", resultId, OafTest.getProjectWT(), "produces"), |
|
137 |
// Kind.relation)); |
|
138 |
// |
|
139 |
// builder.addRelation(OafTest.embed( |
|
140 |
// OafTest.getSimilarityRel("od______1064::fe947e59cf7db2f039b4c8cc25693fb0", resultId, |
|
141 |
// OafTest.getResult("od______1064::fe947e59cf7db2f039b4c8cc25693fb0"), "isAmongTopNSimilarDocuments"), Kind.relation)); |
|
142 |
// |
|
143 |
// builder.addChild(OafTest.embed(OafTest.getDedupRel(resultId, "dup1", RelType.resultResult, "isMergedIn"), Kind.relation)); |
|
144 |
// |
|
145 |
// // System.out.println(oaf.toString()); |
|
146 |
// } |
|
147 |
// } |
|
148 |
// } |
|
149 |
// } |
|
150 |
// System.out.println(IndentXmlString.apply(builder.build())); |
|
151 |
// } |
|
152 |
// |
|
153 |
// @Test |
|
154 |
// public void testParsePersonWithStats() throws IOException { |
|
155 |
// |
|
156 |
// String xslt = IOUtils.toString(dmf2hbaseInputStream); |
|
157 |
// XsltRowTransformer transformer = factory.getTransformer(xslt); |
|
158 |
// assertNotNull(transformer); |
|
159 |
// |
|
160 |
// String record = IOUtils.toString(recordInputStream); |
|
161 |
// List<Row> rows = transformer.apply(record); |
|
162 |
// |
|
163 |
// assertNotNull(rows); |
|
164 |
// assertFalse(rows.isEmpty()); |
|
165 |
// |
|
166 |
// // System.out.println(rows); |
|
167 |
// |
|
168 |
// for (Row row : rows) { |
|
169 |
// |
|
170 |
// if ("person".equals(row.getColumnFamily())) { |
|
171 |
// for (Column<String, byte[]> col : row) { |
|
172 |
// |
|
173 |
// if ("body".equals(col.getName())) { |
|
174 |
// Oaf.Builder oaf = Oaf.newBuilder(Oaf.parseFrom(col.getValue())); |
|
175 |
// builder.setMainEntity(OafDecoder.decode(oaf.build())); |
|
176 |
// } |
|
177 |
// } |
|
178 |
// } |
|
179 |
// } |
|
180 |
// |
|
181 |
// System.out.println(IndentXmlString.apply(builder.build())); |
|
182 |
// } |
|
99 |
@Test |
|
100 |
public void testParsePangaeOAF() throws Exception { |
|
183 | 101 |
|
184 |
private Instance newInstance(final List<Instance> instances, final String classid, final String className) { |
|
185 |
Instance.Builder instance = Instance.newBuilder(Iterables.getLast(instances)); |
|
186 |
instance.getLicenceBuilder().setClassid(classid).setClassname(className); |
|
187 |
return instance.build(); |
|
102 |
doTest(dmf2hbaseInputStream, recordPangaeOafInputStream); |
|
188 | 103 |
} |
189 | 104 |
|
190 |
private void transform(final InputStream xsltStream, final InputStream recordStream) throws IOException { |
|
191 |
try { |
|
192 |
String xslt = IOUtils.toString(xsltStream); |
|
193 |
XsltRowTransformer transformer = factory.getTransformer(xslt); |
|
194 |
assertNotNull(transformer); |
|
105 |
@Test |
|
106 |
public void testParsePangaeODF() throws Exception { |
|
195 | 107 |
|
196 |
String record = IOUtils.toString(recordStream);
|
|
197 |
List<Row> rows = transformer.apply(record);
|
|
108 |
doTest(datacite2hbaseInputStream, recordPangaeOdfInputStream);
|
|
109 |
}
|
|
198 | 110 |
|
199 |
assertNotNull(rows);
|
|
200 |
assertFalse(rows.isEmpty());
|
|
111 |
@Test
|
|
112 |
public void testLinkPangae() throws Exception {
|
|
201 | 113 |
|
114 |
List<Row> rows = Lists.newArrayList(); |
|
115 |
rows.addAll(asRows(datacite2hbaseInputStream, recordPangaeOdfInputStream)); |
|
116 |
rows.addAll(asRows(dmf2hbaseInputStream, recordPangaeOafInputStream)); |
|
117 |
|
|
118 |
print(mapAll(buildTable(rows))); |
|
119 |
} |
|
120 |
|
|
121 |
private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception { |
|
122 |
try { |
|
123 |
List<Row> rows = asRows(xsltStream, recordStream); |
|
124 |
|
|
202 | 125 |
// System.out.println(rows); |
203 |
Map<String, XmlRecordFactory> builders = Maps.newHashMap(); |
|
204 | 126 |
|
205 |
Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
|
|
127 |
Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
|
|
206 | 128 |
|
207 |
for (Row row : rows) { |
|
208 |
String rowKey = row.getKey(); |
|
209 |
String cf = row.getColumnFamily(); |
|
210 |
if (!table.containsKey(rowKey)) { |
|
211 |
table.put(rowKey, new HashMap<String, Map<String, byte[]>>()); |
|
212 |
} |
|
213 |
if (!table.get(rowKey).containsKey(cf)) { |
|
214 |
table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>()); |
|
215 |
} |
|
216 |
for (Column<String, byte[]> c : row.getColumns()) { |
|
217 |
System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName())); |
|
218 |
table.get(rowKey).get(cf).put(c.getName(), c.getValue()); |
|
219 |
} |
|
220 |
} |
|
221 |
|
|
222 | 129 |
// System.out.println("\n" + table.toString()); |
223 | 130 |
|
224 |
for (Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) { |
|
225 |
map(builders, e.getKey(), e.getValue()); |
|
226 |
} |
|
131 |
Map<String, XmlRecordFactory> builders = mapAll(table); |
|
227 | 132 |
|
228 |
for (Entry<String, XmlRecordFactory> e : builders.entrySet()) { |
|
229 |
if (e.getValue().isValid()) { |
|
230 |
System.out.println(IndentXmlString.apply(e.getValue().build())); |
|
231 |
} else { |
|
232 |
System.out.println("invalid builder: " + e.getKey()); |
|
233 |
} |
|
234 |
} |
|
133 |
print(builders); |
|
235 | 134 |
} catch (InvalidProtocolBufferException e) { |
236 |
throw new IOException(e);
|
|
135 |
throw new Exception(e); |
|
237 | 136 |
} catch (TransformerConfigurationException e) { |
238 |
throw new IOException(e);
|
|
137 |
throw new Exception(e); |
|
239 | 138 |
} catch (TransformerFactoryConfigurationError e) { |
240 |
throw new IOException(e);
|
|
139 |
throw new Exception(e); |
|
241 | 140 |
} catch (DocumentException e) { |
242 |
throw new IOException(e);
|
|
141 |
throw new Exception(e); |
|
243 | 142 |
} |
244 | 143 |
} |
245 | 144 |
|
246 |
private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) |
|
247 |
throws InvalidProtocolBufferException, TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException { |
|
145 |
private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception { |
|
146 |
String xslt = IOUtils.toString(xsltStream); |
|
147 |
XsltRowTransformer transformer = factory.getTransformer(xslt); |
|
148 |
assertNotNull(transformer); |
|
248 | 149 |
|
150 |
String record = IOUtils.toString(recordStream); |
|
151 |
List<Row> rows = transformer.apply(record); |
|
152 |
|
|
153 |
assertNotNull(rows); |
|
154 |
assertFalse(rows.isEmpty()); |
|
155 |
return rows; |
|
156 |
} |
|
157 |
|
|
158 |
private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) { |
|
159 |
Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap(); |
|
160 |
|
|
161 |
for (Row row : rows) { |
|
162 |
String rowKey = row.getKey(); |
|
163 |
String cf = row.getColumnFamily(); |
|
164 |
if (!table.containsKey(rowKey)) { |
|
165 |
table.put(rowKey, new HashMap<String, Map<String, byte[]>>()); |
|
166 |
} |
|
167 |
if (!table.get(rowKey).containsKey(cf)) { |
|
168 |
table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>()); |
|
169 |
} |
|
170 |
for (Column<String, byte[]> c : row.getColumns()) { |
|
171 |
System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName())); |
|
172 |
table.get(rowKey).get(cf).put(c.getName(), c.getValue()); |
|
173 |
} |
|
174 |
} |
|
175 |
return table; |
|
176 |
} |
|
177 |
|
|
178 |
private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception { |
|
179 |
|
|
180 |
Map<String, XmlRecordFactory> builders = Maps.newHashMap(); |
|
181 |
for (Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) { |
|
182 |
map(builders, e.getKey(), e.getValue()); |
|
183 |
} |
|
184 |
return builders; |
|
185 |
} |
|
186 |
|
|
187 |
private void map(final Map<String, XmlRecordFactory> builders, final String rowKey, final Map<String, Map<String, byte[]>> row) throws Exception { |
|
188 |
|
|
249 | 189 |
final Type type = OafRowKeyDecoder.decode(rowKey).getType(); |
250 | 190 |
|
251 | 191 |
Map<String, byte[]> colEntity = row.get(type.toString()); |
... | ... | |
297 | 237 |
|
298 | 238 |
} |
299 | 239 |
|
300 |
private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws TransformerConfigurationException, |
|
301 |
TransformerFactoryConfigurationError, DocumentException { |
|
240 |
private void print(final Map<String, XmlRecordFactory> builders) { |
|
241 |
for (Entry<String, XmlRecordFactory> e : builders.entrySet()) { |
|
242 |
if (e.getValue().isValid()) { |
|
243 |
System.out.println(IndentXmlString.apply(e.getValue().build())); |
|
244 |
} else { |
|
245 |
System.out.println("invalid builder: " + e.getKey()); |
|
246 |
} |
|
247 |
} |
|
248 |
} |
|
249 |
|
|
250 |
private void ensureBuilder(final Map<String, XmlRecordFactory> builders, final String rowKey) throws Exception { |
|
302 | 251 |
if (!builders.containsKey(rowKey)) { |
303 | 252 |
builders.put(rowKey, newBuilder()); |
304 | 253 |
} |
... | ... | |
310 | 259 |
} |
311 | 260 |
|
312 | 261 |
@Test |
313 |
public void test_template() throws IOException {
|
|
262 |
public void test_template() throws Exception { |
|
314 | 263 |
String xslt = IOUtils.toString(dmf2hbaseInputStream); |
315 | 264 |
XsltRowTransformer transformer = factory.getTransformer(xslt); |
316 | 265 |
assertNotNull(transformer); |
Also available in: Unified diff
cleanup & tests