Revision 49096
Added by Claudio Atzori over 6 years ago
IndexFeedMapper.java | ||
---|---|---|
7 | 7 |
import java.util.Map.Entry; |
8 | 8 |
import java.util.zip.GZIPOutputStream; |
9 | 9 |
|
10 |
import com.google.common.collect.Lists; |
|
11 |
import eu.dnetlib.data.mapreduce.JobParams; |
|
12 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
13 |
import eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory; |
|
10 | 14 |
import eu.dnetlib.functionality.index.solr.feed.ResultTransformer; |
11 | 15 |
import eu.dnetlib.functionality.index.solr.feed.ResultTransformer.Mode; |
16 |
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory; |
|
17 |
import eu.dnetlib.miscutils.datetime.HumanTime; |
|
18 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
|
12 | 19 |
import org.apache.commons.codec.binary.Base64; |
13 | 20 |
import org.apache.commons.lang.exception.ExceptionUtils; |
14 | 21 |
import org.apache.commons.logging.Log; |
... | ... | |
22 | 29 |
import org.apache.solr.client.solrj.response.UpdateResponse; |
23 | 30 |
import org.apache.solr.common.SolrInputDocument; |
24 | 31 |
|
25 |
import com.google.common.collect.Lists; |
|
26 |
|
|
27 |
import eu.dnetlib.data.mapreduce.JobParams; |
|
28 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
29 |
import eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory; |
|
30 |
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory; |
|
31 |
import eu.dnetlib.miscutils.datetime.HumanTime; |
|
32 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
|
33 |
|
|
34 | 32 |
public class IndexFeedMapper extends Mapper<Text, Text, Text, Text> { |
35 | 33 |
|
36 | 34 |
private static final Log log = LogFactory.getLog(IndexFeedMapper.class); // NOPMD by marko on 11/24/08 5:02 PM |
35 |
public static final String DNET_RESULT = "dnetResult"; |
|
37 | 36 |
|
38 | 37 |
private InputDocumentFactory documentFactory; |
39 | 38 |
|
... | ... | |
59 | 58 |
|
60 | 59 |
private final static int MAX_FEED_RETRIES = 10; |
61 | 60 |
|
61 |
private boolean compress = false; |
|
62 |
|
|
62 | 63 |
@Override |
63 | 64 |
protected void setup(final Context context) throws IOException, InterruptedException { |
64 | 65 |
|
... | ... | |
72 | 73 |
buffer = Lists.newArrayList(); |
73 | 74 |
simulation = Boolean.parseBoolean(context.getConfiguration().get(JobParams.INDEX_FEED_SIMULATION_MODE)); |
74 | 75 |
|
76 |
compress = context.getConfiguration().getBoolean(JobParams.INDEX_FEED_COMPRESS_RESULT, false); |
|
77 |
|
|
75 | 78 |
final String xslt = new String(Base64.decodeBase64(context.getConfiguration().get(JobParams.INDEX_XSLT))); |
76 | 79 |
|
77 | 80 |
log.info("got xslt: \n" + xslt); |
... | ... | |
130 | 133 |
|
131 | 134 |
try { |
132 | 135 |
indexRecord = dmfToRecord.evaluate(value.toString()); |
133 |
doc = documentFactory.parseDocument(version, indexRecord, dsId, "dnetResult", new ResultTransformer(Mode.base64) { |
|
134 |
@Override |
|
135 |
public String apply(final String s) { |
|
136 | 136 |
|
137 |
return org.apache.solr.common.util.Base64.byteArrayToBase64(zip(s)); |
|
138 |
} |
|
139 |
}); |
|
137 |
if (compress) { |
|
138 |
doc = documentFactory.parseDocument(version, indexRecord, dsId, DNET_RESULT, new ResultTransformer(Mode.base64) { |
|
139 |
@Override |
|
140 |
public String apply(final String s) { |
|
141 |
|
|
142 |
return org.apache.solr.common.util.Base64.byteArrayToBase64(zip(s)); |
|
143 |
} |
|
144 |
}); |
|
145 |
} else { |
|
146 |
doc = documentFactory.parseDocument(version, indexRecord, dsId, DNET_RESULT); |
|
147 |
} |
|
148 |
|
|
140 | 149 |
if ((doc == null) || doc.isEmpty()) throw new EmptySolrDocumentException(); |
141 | 150 |
|
142 | 151 |
} catch (final Throwable e) { |
Also available in: Unified diff
fixing mapping for license vs accessright #3128, cleanup