Revision 53591
Added by Sandro La Bruzzo almost 6 years ago
modules/dnet-mapreduce-jobs/branches/master/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/GetInvalidXmlRecordsMapper.java | ||
---|---|---|
64 | 64 |
final DocumentResult result = new DocumentResult(); |
65 | 65 |
final Document document = saxReader.read(new StringReader(value.toString())); |
66 | 66 |
transformer.transform(new DocumentSource(document), result); |
67 |
|
|
68 | 67 |
result.getDocument().asXML(); |
69 | 68 |
|
70 | 69 |
} catch (final Throwable e) { |
71 | 70 |
//log.error("error parsing record\n" + value.toString(), e); |
72 |
|
|
73 | 71 |
context.getCounter("error", e.getClass().getName()).increment(1); |
74 |
|
|
75 | 72 |
final String c = getInvalidXmlChar(e); |
76 | 73 |
if (StringUtils.isNotBlank(c)) { |
77 | 74 |
context.getCounter("invalid char", c).increment(1); |
78 | 75 |
} |
79 |
|
|
80 | 76 |
String doi = getDoi(value.toString()); |
81 | 77 |
if (StringUtils.isNotBlank(doi)) { |
82 |
valueOut.set(doi); |
|
83 |
context.write(key, valueOut); |
|
84 | 78 |
context.getCounter("output", "doi").increment(1); |
85 | 79 |
} |
86 |
|
|
80 |
valueOut.set(value.toString()); |
|
81 |
context.write(key, valueOut); |
|
87 | 82 |
} |
88 | 83 |
} |
89 | 84 |
|
Also available in: Unified diff
updated Mapper to return the whole invalid record