1
|
define avro_load_input_citations
|
2
|
org.apache.pig.piggybank.storage.avro.AvroStorage(
|
3
|
'schema', '$schema_input');
|
4
|
|
5
|
define avro_store_output_citations
|
6
|
org.apache.pig.piggybank.storage.avro.AvroStorage(
|
7
|
'index', '0',
|
8
|
'schema', '$schema_output');
|
9
|
|
10
|
define EMPTY_MAP eu.dnetlib.iis.transformers.udfs.EmptyMap;
|
11
|
|
12
|
input_citations = load '$input' using avro_load_input_citations;
|
13
|
|
14
|
output_citations = foreach input_citations generate
|
15
|
sourceDocumentId, (rawText, destinationDocumentId, confidenceLevel, EMPTY_MAP()) as entry;
|
16
|
|
17
|
store output_citations into '$output' using avro_store_output_citations;
|