Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataexport;
2

    
3
import java.io.IOException;
4

    
5
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
6
import org.apache.commons.codec.binary.Base64;
7
import org.apache.commons.lang.StringUtils;
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10
import org.apache.hadoop.io.Text;
11
import org.apache.hadoop.mapreduce.Mapper;
12

    
13
public class ExportSimplifiedRecordsMapper extends Mapper<Text, Text, Text, Text> {
14

    
15
	private static final Log log = LogFactory.getLog(ExportSimplifiedRecordsMapper.class); // NOPMD by marko on 11/24/08 5:02 PM
16

    
17
	private ApplyXslt recordSummarizer;
18

    
19
	private Text valueOut;
20

    
21
	private Text keyOut;
22

    
23
	@Override
24
	protected void setup(final Context context) throws IOException, InterruptedException {
25

    
26
		final String xslt = new String(Base64.decodeBase64(context.getConfiguration().get("xslt")));
27

    
28
		log.info("got xslt: \n" + xslt);
29

    
30
		recordSummarizer = new ApplyXslt(xslt);
31
		valueOut = new Text();
32
		keyOut = new Text("");
33
	}
34

    
35
	@Override
36
	protected void map(final Text key, final Text value, final Context context) throws IOException, InterruptedException {
37

    
38
		final String summary = recordSummarizer.evaluate(value.toString());
39
		if (StringUtils.isNotBlank(summary)) {
40
			valueOut.set(summary.replaceAll("\n","").replaceAll("\t",""));
41
			context.write(keyOut, valueOut);
42
		}
43
	}
44

    
45
}
(7-7/10)