Revision 53408
Added by Claudio Atzori over 5 years ago
modules/dnet-mapreduce-jobs/branches/master/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java | ||
---|---|---|
13 | 13 |
import com.google.common.collect.Maps; |
14 | 14 |
import com.google.common.collect.Sets; |
15 | 15 |
import com.google.protobuf.InvalidProtocolBufferException; |
16 |
import com.googlecode.protobuf.format.JsonFormat; |
|
17 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
18 |
import eu.dnetlib.actionmanager.actions.XsltInfoPackageAction; |
|
19 |
import eu.dnetlib.actionmanager.common.Agent; |
|
20 |
import eu.dnetlib.actionmanager.common.Operation; |
|
21 |
import eu.dnetlib.actionmanager.common.Provenance; |
|
16 | 22 |
import eu.dnetlib.data.mapreduce.hbase.index.config.*; |
17 | 23 |
import eu.dnetlib.data.mapreduce.util.*; |
18 | 24 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
... | ... | |
30 | 36 |
import org.junit.Before; |
31 | 37 |
import org.junit.Ignore; |
32 | 38 |
import org.junit.Test; |
39 |
import org.springframework.core.io.ByteArrayResource; |
|
40 |
import org.springframework.core.io.ClassPathResource; |
|
41 |
import org.springframework.core.io.Resource; |
|
33 | 42 |
|
34 | 43 |
import static org.junit.Assert.*; |
35 | 44 |
|
... | ... | |
450 | 459 |
printAll(mapAll(buildTable(rows))); |
451 | 460 |
} |
452 | 461 |
|
462 |
@Test |
|
463 |
public void testParseOafWithUpdates() throws Exception { |
|
464 |
final List<Row> rows = Lists.newArrayList(); |
|
465 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("oafWithCommunity.xml"))); |
|
466 |
|
|
467 |
ActionFactory actionFactory = new ActionFactory(); |
|
468 |
|
|
469 |
Map<String, Resource> xslts = Maps.newHashMap(); |
|
470 |
|
|
471 |
xslts.put("oaf2hbase", new ByteArrayResource(IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml")).getBytes())); |
|
472 |
actionFactory.setXslts(xslts); |
|
473 |
|
|
474 |
XsltInfoPackageAction pa = actionFactory.generateInfoPackageAction( |
|
475 |
"oaf2hbase", |
|
476 |
"rawset-id", |
|
477 |
new Agent("agent-id", "agent-name", Agent.AGENT_TYPE.algo), |
|
478 |
Operation.UPDATE, |
|
479 |
IOUtils.toString(load("oafUpdateWithCommunity.xml")), |
|
480 |
Provenance.sysimport_mining_aggregator, |
|
481 |
"0.9"); |
|
482 |
|
|
483 |
final String qualifier = "update_" + System.nanoTime(); |
|
484 |
|
|
485 |
IOUtils.readLines(load("country_updates.json")).forEach(line -> { |
|
486 |
|
|
487 |
Oaf.Builder oaf = Oaf.newBuilder(); |
|
488 |
|
|
489 |
try { |
|
490 |
JsonFormat.merge(line, oaf); |
|
491 |
} catch (JsonFormat.ParseException e) { |
|
492 |
throw new IllegalArgumentException(e); |
|
493 |
} |
|
494 |
|
|
495 |
Column<String, byte[]> col = new Column<>("update_" + System.nanoTime(), oaf.build().toByteArray()); |
|
496 |
rows.add(new Row("result", oaf.getEntity().getId(), Lists.newArrayList(col))); |
|
497 |
}); |
|
498 |
|
|
499 |
pa.asAtomicActions().forEach(a -> { |
|
500 |
Column<String, byte[]> col = new Column<>("update_" + System.nanoTime(), a.getTargetValue()); |
|
501 |
rows.add(new Row(a.getTargetColumnFamily(), a.getTargetRowKey(), Lists.newArrayList(col))); |
|
502 |
}); |
|
503 |
|
|
504 |
|
|
505 |
/* |
|
506 |
rows.forEach(r -> { |
|
507 |
log.info(r); |
|
508 |
}); |
|
509 |
*/ |
|
510 |
|
|
511 |
mapAll(buildTable(rows)).entrySet().forEach(b -> { |
|
512 |
log.info(b.getKey()); |
|
513 |
log.info(b.getValue()); |
|
514 |
}); |
|
515 |
} |
|
516 |
|
|
453 | 517 |
private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception { |
454 | 518 |
try { |
455 | 519 |
final List<Row> rows = asRows(xsltStream, recordStream); |
modules/dnet-mapreduce-jobs/branches/master/src/test/resources/eu/dnetlib/data/transform/oafUpdateWithCommunity.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<record xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
3 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
4 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
> |
|
7 |
<header xmlns="http://namespace.openaire.eu/"> |
|
8 |
<dri:objIdentifier>acm_________::02a8fbd0aa341df6dbb8323f453091f8</dri:objIdentifier> |
|
9 |
<dri:recordIdentifier></dri:recordIdentifier> |
|
10 |
<dri:dateOfCollection/> |
|
11 |
<dri:mdFormat/> |
|
12 |
<dri:mdFormatInterpretation/> |
|
13 |
<dri:repositoryId/> |
|
14 |
<dr:objectIdentifier/> |
|
15 |
<dr:dateOfCollection></dr:dateOfCollection> |
|
16 |
<oaf:datasourceprefix></oaf:datasourceprefix> |
|
17 |
</header> |
|
18 |
<metadata xmlns="http://namespace.openaire.eu/"> |
|
19 |
<oaf:concept id="fam"/> |
|
20 |
<oaf:concept id="dh-ch::subcommunity::2"/> |
|
21 |
</metadata> |
|
22 |
<about> |
|
23 |
<oaf:datainfo> |
|
24 |
<oaf:inferred>false</oaf:inferred> |
|
25 |
<oaf:deletedbyinference>false</oaf:deletedbyinference> |
|
26 |
<oaf:trust>0.9</oaf:trust> |
|
27 |
<oaf:inferenceprovenance/> |
|
28 |
<oaf:provenanceaction classid="sysimport:mining:repository" |
|
29 |
classname="sysimport:mining:repository" |
|
30 |
schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/> |
|
31 |
</oaf:datainfo> |
|
32 |
</about> |
|
33 |
</record> |
modules/dnet-mapreduce-jobs/branches/master/src/test/resources/eu/dnetlib/data/transform/country_updates.json | ||
---|---|---|
1 |
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"country": [{"classid": "NL","classname": "Netherland","schemeid": "dnet:countries","schemename": "dnet:countries","dataInfo": {"inferred": true,"trust": "0.9","inferenceprovenance": "propagation","provenanceaction": {"classid": "propagation::country::instrepos","classname": "Propagation of country information from datasources belonging to institutional repositories","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}]}},"id": "50|acm_________::02a8fbd0aa341df6dbb8323f453091f8"}} |
|
2 |
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"country": [{"classid": "IT","classname": "Italy","schemeid": "dnet:countries","schemename": "dnet:countries","dataInfo": {"inferred": true,"trust": "0.9","inferenceprovenance": "propagation","provenanceaction": {"classid": "propagation::country::instrepos","classname": "Propagation of country information from datasources belonging to institutional repositories","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}]}},"id": "50|acm_________::02a8fbd0aa341df6dbb8323f453091f8"}} |
|
3 |
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"country": [{"classid": "DE","classname": "Germany","schemeid": "dnet:countries","schemename": "dnet:countries","dataInfo": {"inferred": true,"trust": "0.9","inferenceprovenance": "propagation","provenanceaction": {"classid": "propagation::country::instrepos","classname": "Propagation of country information from datasources belonging to institutional repositories","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}]}},"id": "50|acm_________::02a8fbd0aa341df6dbb8323f453091f8"}} |
|
4 |
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"country": [{"classid": "IT","classname": "Italy","schemeid": "dnet:countries","schemename": "dnet:countries","dataInfo": {"inferred": true,"trust": "0.9","inferenceprovenance": "propagation","provenanceaction": {"classid": "propagation::country::instrepos","classname": "Propagation of country information from datasources belonging to institutional repositories","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}]}},"id": "50|acm_________::02a8fbd0aa341df6dbb8323f453091f8"}} |
|
5 |
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"country": [{"classid": "GR","classname": "Greece","schemeid": "dnet:countries","schemename": "dnet:countries","dataInfo": {"inferred": true,"trust": "0.9","inferenceprovenance": "propagation","provenanceaction": {"classid": "propagation::country::instrepos","classname": "Propagation of country information from datasources belonging to institutional repositories","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}]}},"id": "50|acm_________::02a8fbd0aa341df6dbb8323f453091f8"}} |
|
6 |
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"country": [{"classid": "GR","classname": "Greece","schemeid": "dnet:countries","schemename": "dnet:countries","dataInfo": {"inferred": true,"trust": "0.9","inferenceprovenance": "propagation","provenanceaction": {"classid": "propagation::country::instrepos","classname": "Propagation of country information from datasources belonging to institutional repositories","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"}}}]}},"id": "50|acm_________::02a8fbd0aa341df6dbb8323f453091f8"}} |
modules/dnet-mapreduce-jobs/branches/master/pom.xml | ||
---|---|---|
143 | 143 |
<dependency> |
144 | 144 |
<groupId>eu.dnetlib</groupId> |
145 | 145 |
<artifactId>dnet-openaireplus-mapping-utils</artifactId> |
146 |
<version>[6.2.11]</version>
|
|
146 |
<version>[6.2.13]</version>
|
|
147 | 147 |
</dependency> |
148 | 148 |
<dependency> |
149 | 149 |
<groupId>org.antlr</groupId> |
Also available in: Unified diff
using updated mapping-utils module, added unit test to check the merge procedure for context and country updates