1
|
define avro_load_document_to_dataset
|
2
|
org.apache.pig.piggybank.storage.avro.AvroStorage(
|
3
|
'schema', '$schema_input_document_to_dataset');
|
4
|
|
5
|
define avro_load_document_to_mdstore
|
6
|
org.apache.pig.piggybank.storage.avro.AvroStorage(
|
7
|
'schema', '$schema_input_document_to_mdstore');
|
8
|
|
9
|
define avro_store_document_to_mdstore
|
10
|
org.apache.pig.piggybank.storage.avro.AvroStorage(
|
11
|
'index', '0',
|
12
|
'schema', '$schema_output_document_to_mdstore');
|
13
|
|
14
|
documentToDataset = load '$input_document_to_dataset' using avro_load_document_to_dataset;
|
15
|
datasetIds = foreach documentToDataset generate datasetId as id;
|
16
|
datasetIdsDistinct = distinct datasetIds;
|
17
|
|
18
|
documentToMDStore = load '$input_document_to_mdstore' using avro_load_document_to_mdstore;
|
19
|
|
20
|
joinedWithMDStore = join datasetIdsDistinct by id, documentToMDStore by documentId;
|
21
|
|
22
|
outputDocumentToMdstore = foreach joinedWithMDStore generate datasetIdsDistinct::id as documentId, documentToMDStore::mdStoreId as mdStoreId;
|
23
|
|
24
|
store outputDocumentToMdstore into '$output_document_to_mdstore' using avro_store_document_to_mdstore;
|