Project

General

Profile

1
package eu.dnetlib.iis.metadataextraction;
2

    
3
import java.io.IOException;
4
import java.nio.ByteBuffer;
5

    
6
import org.apache.avro.mapred.AvroKey;
7
import org.apache.hadoop.io.NullWritable;
8
import org.apache.zookeeper.server.ByteBufferInputStream;
9

    
10
import eu.dnetlib.iis.importer.schemas.DocumentContent;
11

    
12
/**
13
 * Metadata extractor module.
14
 * @author Mateusz Kobos
15
 * @author mhorst
16
 *
17
 */
18
public class MetadataExtractorMapper extends AbstractMetadataExtractorMapper<DocumentContent> {
19
	
20
	/* (non-Javadoc)
21
	 * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
22
	 */
23
	@Override
24
	public void map(AvroKey<DocumentContent> key, NullWritable ignore, Context context)
25
			throws IOException, InterruptedException {
26
		DocumentContent content = key.datum();
27
		if (content.getPdf()!=null) {
28
			ByteBuffer byteBuffer = content.getPdf();
29
			processStream(content.getId(), 
30
					new ByteBufferInputStream(byteBuffer), 0);	
31
		} else {
32
			log.warn("no byte data found for id: " + content.getId());
33
		}
34
	}
35

    
36
}
(4-4/8)