Revision 41772
Added by Claudio Atzori about 8 years ago
AbstractProtoPaceTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.pace; |
2 | 2 |
|
3 | 3 |
import java.io.IOException; |
4 |
import java.io.InputStream; |
|
5 | 4 |
import java.io.StringWriter; |
6 | 5 |
import java.util.ArrayList; |
7 |
import java.util.HashMap; |
|
8 | 6 |
import java.util.List; |
9 |
import java.util.Map; |
|
10 | 7 |
|
11 |
import com.google.common.base.Function; |
|
12 |
import com.google.common.base.Predicate; |
|
13 |
import com.google.common.collect.Iterables; |
|
14 | 8 |
import com.google.common.collect.Lists; |
15 |
import com.google.common.collect.Maps; |
|
16 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
17 |
import eu.dnetlib.data.proto.TypeProtos; |
|
18 |
import eu.dnetlib.data.transform.Column; |
|
19 |
import eu.dnetlib.data.transform.Row; |
|
20 |
import eu.dnetlib.data.transform.XsltRowTransformer; |
|
21 |
import eu.dnetlib.data.transform.XsltRowTransformerFactory; |
|
22 | 9 |
import org.apache.commons.io.IOUtils; |
23 | 10 |
import org.apache.commons.lang.RandomStringUtils; |
24 | 11 |
import org.apache.commons.lang.StringUtils; |
... | ... | |
43 | 30 |
import eu.dnetlib.pace.model.ProtoDocumentBuilder; |
44 | 31 |
import eu.dnetlib.pace.model.gt.GTAuthor; |
45 | 32 |
import eu.dnetlib.pace.model.gt.GTAuthorMapper; |
46 |
import org.apache.commons.logging.Log; |
|
47 |
import org.apache.commons.logging.LogFactory; |
|
48 |
import org.dom4j.Document; |
|
49 |
import org.dom4j.DocumentException; |
|
50 |
import org.dom4j.io.SAXReader; |
|
51 | 33 |
|
52 |
import static org.junit.Assert.assertFalse; |
|
53 |
import static org.junit.Assert.assertNotNull; |
|
54 |
import static org.junit.Assert.assertTrue; |
|
55 |
|
|
56 | 34 |
public abstract class AbstractProtoPaceTest extends OafTest { |
57 | 35 |
|
58 |
private static final Log log = LogFactory.getLog(AbstractProtoPaceTest.class); |
|
59 |
|
|
60 |
private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/"; |
|
61 |
|
|
62 |
protected XsltRowTransformerFactory factory; |
|
63 |
|
|
64 | 36 |
protected DedupConfig getResultFullConf() { |
65 | 37 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf")); |
66 | 38 |
} |
... | ... | |
89 | 61 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.prod.pace.conf")); |
90 | 62 |
} |
91 | 63 |
|
92 |
protected DedupConfig getResultResourceTypeConf() { |
|
93 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.test1.pace.conf")); |
|
94 |
} |
|
95 |
|
|
96 | 64 |
protected MapDocument person(final Config conf, final String id, final Oaf oaf) { |
97 | 65 |
return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model()); |
98 | 66 |
} |
... | ... | |
112 | 80 |
return gta; |
113 | 81 |
} |
114 | 82 |
|
115 |
protected MapDocument getMapDocument(final Config config, final String filename, final String xsltName, final TypeProtos.Type type) throws Exception { |
|
116 |
|
|
117 |
final Iterable<Row> rows = Iterables.filter(asRows(loadFromTransformationProfile(xsltName), load(filename)), new Predicate<Row>() { |
|
118 |
@Override |
|
119 |
public boolean apply(final Row row) { |
|
120 |
final String cf = row.getColumnFamily(); |
|
121 |
|
|
122 |
return cf.equals(type.toString()); |
|
123 |
} |
|
124 |
}); |
|
125 |
|
|
126 |
final Iterable<Oaf> oaf = Iterables.transform(rows, new Function<Row, Oaf>() { |
|
127 |
@Override |
|
128 |
public Oaf apply(final Row row) { |
|
129 |
final Column<String, byte[]> column = row.getColumn("body"); |
|
130 |
final byte[] value = column.getValue(); |
|
131 |
try { |
|
132 |
return Oaf.parseFrom(value); |
|
133 |
} catch (InvalidProtocolBufferException e) { |
|
134 |
throw new IllegalArgumentException(row.toString(), e); |
|
135 |
} |
|
136 |
} |
|
137 |
}); |
|
138 |
|
|
139 |
final MapDocument doc = Iterables.getOnlyElement(Iterables.transform(oaf, new Function<Oaf, MapDocument>() { |
|
140 |
@Override |
|
141 |
public MapDocument apply(final Oaf oaf) { |
|
142 |
return ProtoDocumentBuilder.newInstance(oaf.getEntity().getId(), oaf.getEntity(), config.model()); |
|
143 |
} |
|
144 |
})); |
|
145 |
|
|
146 |
return doc; |
|
147 |
} |
|
148 |
|
|
149 |
protected List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception { |
|
150 |
return asRows(xsltStream, new HashMap<String, Object>(), recordStream); |
|
151 |
} |
|
152 |
|
|
153 |
protected List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception { |
|
154 |
final String xslt = IOUtils.toString(xsltStream); |
|
155 |
final XsltRowTransformer transformer = factory.getTransformer(xslt, params); |
|
156 |
assertNotNull(transformer); |
|
157 |
|
|
158 |
final String record = IOUtils.toString(recordStream); |
|
159 |
final List<Row> rows = transformer.apply(record); |
|
160 |
|
|
161 |
assertNotNull(rows); |
|
162 |
assertFalse(rows.isEmpty()); |
|
163 |
return rows; |
|
164 |
} |
|
165 |
|
|
166 |
protected InputStream loadFromTransformationProfile(final String profilePath) { |
|
167 |
log.info("Loading xslt from: " + basePathProfiles + profilePath); |
|
168 |
InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath); |
|
169 |
SAXReader saxReader = new SAXReader(); |
|
170 |
Document doc = null; |
|
171 |
try { |
|
172 |
doc = saxReader.read(profile); |
|
173 |
} catch (DocumentException e) { |
|
174 |
e.printStackTrace(); |
|
175 |
throw new RuntimeException(e); |
|
176 |
} |
|
177 |
String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML(); |
|
178 |
return IOUtils.toInputStream(xslt); |
|
179 |
} |
|
180 |
|
|
181 |
protected InputStream load(final String fileName) { |
|
182 |
return getClass().getResourceAsStream(fileName); |
|
183 |
} |
|
184 |
|
|
185 | 83 |
private String readFromClasspath(final String filename) { |
186 | 84 |
final StringWriter sw = new StringWriter(); |
187 | 85 |
try { |
Also available in: Unified diff
reverted to r41644