Revision 57188
Added by Claudio Atzori over 4 years ago
ExportFilteredResultMapper.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import com.google.gson.Gson; |
4 | 4 |
import eu.dnetlib.data.mapreduce.hbase.bulktag.ProtoMap; |
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 | 5 |
import org.apache.hadoop.io.Text; |
8 | 6 |
import org.apache.hadoop.mapreduce.Mapper; |
9 | 7 |
import org.dom4j.Document; |
... | ... | |
14 | 12 |
import java.time.Year; |
15 | 13 |
|
16 | 14 |
/** |
17 |
* Exports the result matching the criteria found in the confguration. |
|
15 |
* Exports the result matching the criteria found in the configuration.
|
|
18 | 16 |
* |
19 | 17 |
* @author claudio |
20 | 18 |
*/ |
21 | 19 |
public class ExportFilteredResultMapper extends Mapper<Text, Text, Text, Text> { |
22 | 20 |
|
23 |
/** |
|
24 |
* logger. |
|
25 |
*/ |
|
26 |
private static final Log log = LogFactory.getLog(ExportFilteredResultMapper.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
21 |
private final static String RESULT_TYPE_XPATH = "/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='resulttype']/@classid"; |
|
27 | 22 |
|
28 | 23 |
private Text keyOut; |
29 | 24 |
|
... | ... | |
35 | 30 |
|
36 | 31 |
@Override |
37 | 32 |
protected void setup(final Context context) throws IOException, InterruptedException { |
38 |
super.setup(context); |
|
39 |
|
|
40 | 33 |
keyOut = new Text(""); |
41 | 34 |
valueOut = new Text(); |
42 | 35 |
|
... | ... | |
60 | 53 |
|
61 | 54 |
final Document doc = new SAXReader().read(new StringReader(record)); |
62 | 55 |
|
63 |
if (defaultFilter.matches(doc)) { |
|
56 |
if (defaultFilter.matches(doc, true)) {
|
|
64 | 57 |
|
65 |
if (userFilter.matches(doc)) { |
|
58 |
if (userFilter.matches(doc, false)) {
|
|
66 | 59 |
keyOut.set(keyIn.toString()); |
67 | 60 |
valueOut.set(value.toString()); |
68 | 61 |
|
69 | 62 |
context.write(keyOut, valueOut); |
70 |
context.getCounter("filter", "matched criteria").increment(1);
|
|
63 |
context.getCounter("filter", "matched criteria " +doc.valueOf(RESULT_TYPE_XPATH)).increment(1);
|
|
71 | 64 |
} else { |
72 | 65 |
context.getCounter("filter", "filtered by criteria").increment(1); |
73 | 66 |
} |
Also available in: Unified diff
removed project reference from src/test/resources/eu/dnetlib/data/transform/odf.xml, the test didn't include any check against it