Revision 48892
Added by Claudio Atzori over 6 years ago
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/oai/OAIFeedMapperTest.java | ||
---|---|---|
17 | 17 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
18 | 18 |
import org.apache.commons.io.IOUtils; |
19 | 19 |
import org.apache.hadoop.mapreduce.Counter; |
20 |
import org.apache.solr.common.util.DateUtil; |
|
21 | 20 |
import org.dom4j.DocumentException; |
22 | 21 |
import org.junit.Before; |
23 | 22 |
import org.junit.Test; |
... | ... | |
72 | 71 |
|
73 | 72 |
String feedDateString = DateUtils.now_ISO8601(); |
74 | 73 |
try { |
75 |
feedDate = DateUtil.parseDate(feedDateString); |
|
74 |
feedDate = org.apache.commons.lang.time.DateUtils.parseDate( |
|
75 |
feedDateString, |
|
76 |
new String[]{ "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ssZ" }); |
|
76 | 77 |
} catch (ParseException e) { |
77 | 78 |
e.printStackTrace(System.err); |
78 | 79 |
throw new RuntimeException(e); |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/IndexFeedMapper.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.index; |
2 | 2 |
|
3 |
import java.io.ByteArrayOutputStream; |
|
3 | 4 |
import java.io.IOException; |
5 |
import java.nio.charset.StandardCharsets; |
|
4 | 6 |
import java.util.List; |
5 | 7 |
import java.util.Map.Entry; |
8 |
import java.util.zip.GZIPOutputStream; |
|
6 | 9 |
|
10 |
import eu.dnetlib.functionality.index.solr.feed.ResultTransformer; |
|
11 |
import eu.dnetlib.functionality.index.solr.feed.ResultTransformer.Mode; |
|
7 | 12 |
import org.apache.commons.codec.binary.Base64; |
8 | 13 |
import org.apache.commons.lang.exception.ExceptionUtils; |
9 | 14 |
import org.apache.commons.logging.Log; |
... | ... | |
12 | 17 |
import org.apache.hadoop.io.Text; |
13 | 18 |
import org.apache.hadoop.mapreduce.Mapper; |
14 | 19 |
import org.apache.solr.client.solrj.SolrServerException; |
15 |
import org.apache.solr.client.solrj.impl.CloudSolrServer;
|
|
20 |
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
|
16 | 21 |
import org.apache.solr.client.solrj.response.SolrPingResponse; |
17 | 22 |
import org.apache.solr.client.solrj.response.UpdateResponse; |
18 | 23 |
import org.apache.solr.common.SolrInputDocument; |
... | ... | |
32 | 37 |
|
33 | 38 |
private InputDocumentFactory documentFactory; |
34 | 39 |
|
35 |
private CloudSolrServer solrServer;
|
|
40 |
private CloudSolrClient solrServer;
|
|
36 | 41 |
|
37 | 42 |
private String version; |
38 | 43 |
|
... | ... | |
87 | 92 |
try { |
88 | 93 |
count++; |
89 | 94 |
log.info("initializing solr server..."); |
90 |
solrServer = new CloudSolrServer(baseURL); |
|
95 |
solrServer = new CloudSolrClient.Builder() |
|
96 |
.withZkHost(baseURL) |
|
97 |
.build(); |
|
91 | 98 |
|
92 | 99 |
solrServer.connect(); |
93 | 100 |
|
... | ... | |
103 | 110 |
|
104 | 111 |
} catch (final Throwable e) { |
105 | 112 |
if (solrServer != null) { |
106 |
solrServer.shutdown();
|
|
113 |
solrServer.close();
|
|
107 | 114 |
} |
108 | 115 |
context.getCounter("index init", e.getMessage()).increment(1); |
109 | 116 |
log.error(String.format("failed to init solr client wait %dms, error:\n%s", backoffTimeMs, ExceptionUtils.getStackTrace(e))); |
... | ... | |
123 | 130 |
|
124 | 131 |
try { |
125 | 132 |
indexRecord = dmfToRecord.evaluate(value.toString()); |
126 |
doc = documentFactory.parseDocument(version, indexRecord, dsId, "dnetResult"); |
|
133 |
doc = documentFactory.parseDocument(version, indexRecord, dsId, "dnetResult", new ResultTransformer(Mode.base64) { |
|
134 |
@Override |
|
135 |
public String apply(final String s) { |
|
136 |
|
|
137 |
return org.apache.solr.common.util.Base64.byteArrayToBase64(zip(s)); |
|
138 |
} |
|
139 |
}); |
|
127 | 140 |
if ((doc == null) || doc.isEmpty()) throw new EmptySolrDocumentException(); |
128 | 141 |
|
129 | 142 |
} catch (final Throwable e) { |
... | ... | |
146 | 159 |
} |
147 | 160 |
} |
148 | 161 |
|
162 |
public static byte[] zip(final String s) { |
|
163 |
if ((s == null) || (s.length() == 0)) { |
|
164 |
throw new IllegalArgumentException("Cannot zip null or empty string"); |
|
165 |
} |
|
166 |
|
|
167 |
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) { |
|
168 |
try (GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) { |
|
169 |
gzipOutputStream.write(s.getBytes(StandardCharsets.UTF_8)); |
|
170 |
} |
|
171 |
return byteArrayOutputStream.toByteArray(); |
|
172 |
} catch(IOException e) { |
|
173 |
throw new RuntimeException("Failed to zip content", e); |
|
174 |
} |
|
175 |
} |
|
176 |
|
|
149 | 177 |
private void addDocument(final Context context, final SolrInputDocument doc) throws SolrServerException, IOException, EmptySolrDocumentException { |
150 | 178 |
buffer.add(doc); |
151 | 179 |
if (buffer.size() >= bufferFlushThreshold) { |
... | ... | |
182 | 210 |
} |
183 | 211 |
log.info("\nwaiting " + shutdownWaitTime + "ms before shutdown"); |
184 | 212 |
Thread.sleep(shutdownWaitTime); |
185 |
solrServer.shutdown();
|
|
213 |
solrServer.close();
|
|
186 | 214 |
} catch (final SolrServerException e) { |
187 | 215 |
log.error("couldn't shutdown server " + e.getMessage()); |
188 | 216 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/DedupIndexFeedMapper.java | ||
---|---|---|
15 | 15 |
import org.apache.hadoop.hbase.util.Bytes; |
16 | 16 |
import org.apache.hadoop.io.Text; |
17 | 17 |
import org.apache.solr.client.solrj.SolrServerException; |
18 |
import org.apache.solr.client.solrj.impl.CloudSolrServer;
|
|
18 |
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
|
19 | 19 |
import org.apache.solr.client.solrj.response.SolrPingResponse; |
20 | 20 |
import org.apache.solr.client.solrj.response.UpdateResponse; |
21 | 21 |
import org.apache.solr.common.SolrInputDocument; |
... | ... | |
35 | 35 |
|
36 | 36 |
private static final Log log = LogFactory.getLog(DedupIndexFeedMapper.class); // NOPMD by marko on 11/24/08 5:02 PM |
37 | 37 |
|
38 |
private CloudSolrServer solrServer;
|
|
38 |
private CloudSolrClient solrServer;
|
|
39 | 39 |
|
40 | 40 |
private String dsId; |
41 | 41 |
|
... | ... | |
99 | 99 |
while (true) { |
100 | 100 |
try { |
101 | 101 |
log.info("initializing solr server..."); |
102 |
solrServer = new CloudSolrServer(baseURL); |
|
102 |
solrServer = new CloudSolrClient.Builder() |
|
103 |
.withZkHost(baseURL) |
|
104 |
.build(); |
|
103 | 105 |
|
104 | 106 |
solrServer.connect(); |
105 | 107 |
|
... | ... | |
115 | 117 |
|
116 | 118 |
} catch (final Throwable e) { |
117 | 119 |
if (solrServer != null) { |
118 |
solrServer.shutdown();
|
|
120 |
solrServer.close();
|
|
119 | 121 |
} |
120 | 122 |
context.getCounter("index init", e.getMessage()).increment(1); |
121 | 123 |
log.info(String.format("failed to init solr client wait %dms", backoffTimeMs)); |
... | ... | |
205 | 207 |
} |
206 | 208 |
log.info("\nwaiting " + shutdownWaitTime + "ms before shutdown"); |
207 | 209 |
Thread.sleep(shutdownWaitTime); |
208 |
solrServer.shutdown();
|
|
210 |
solrServer.close();
|
|
209 | 211 |
} catch (final SolrServerException e) { |
210 | 212 |
System.err.println("couldn't shutdown server " + e.getMessage()); |
211 | 213 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/oai/OaiFeedMapper.java | ||
---|---|---|
14 | 14 |
import com.google.common.collect.Lists; |
15 | 15 |
import com.google.common.collect.Maps; |
16 | 16 |
import com.google.common.collect.Multimap; |
17 |
import com.mongodb.*; |
|
17 |
import com.mongodb.BasicDBObject; |
|
18 |
import com.mongodb.DBObject; |
|
19 |
import com.mongodb.MongoClient; |
|
20 |
import com.mongodb.WriteConcern; |
|
18 | 21 |
import com.mongodb.client.MongoCollection; |
19 | 22 |
import com.mongodb.client.MongoDatabase; |
20 | 23 |
import eu.dnetlib.data.mapreduce.JobParams; |
... | ... | |
29 | 32 |
import eu.dnetlib.miscutils.functional.xml.IndentXmlString; |
30 | 33 |
import org.apache.commons.io.output.ByteArrayOutputStream; |
31 | 34 |
import org.apache.commons.lang.StringUtils; |
35 |
import org.apache.commons.lang.time.DateUtils; |
|
32 | 36 |
import org.apache.hadoop.io.NullWritable; |
33 | 37 |
import org.apache.hadoop.io.Text; |
34 | 38 |
import org.apache.hadoop.mapreduce.Mapper; |
35 |
import org.apache.solr.common.util.DateUtil; |
|
36 | 39 |
import org.bson.types.Binary; |
37 | 40 |
|
38 | 41 |
public class OaiFeedMapper extends Mapper<Text, Text, NullWritable, NullWritable> { |
... | ... | |
113 | 116 |
|
114 | 117 |
String feedDateString = context.getConfiguration().get(JobParams.OAI_FEED_DATE); |
115 | 118 |
try { |
116 |
feedDate = DateUtil.parseDate(feedDateString);
|
|
119 |
feedDate = DateUtils.parseDate(feedDateString, new String[]{"yyyy-MM-dd\'T\'hh:mm:ss\'Z\'"});
|
|
117 | 120 |
} catch (ParseException e) { |
118 | 121 |
e.printStackTrace(System.err); |
119 | 122 |
throw new RuntimeException(e); |
modules/dnet-mapreduce-jobs/trunk/pom.xml | ||
---|---|---|
80 | 80 |
<version>[1.0.0,2.0.0)</version> |
81 | 81 |
</dependency> |
82 | 82 |
<dependency> |
83 |
<groupId>org.apache.solr</groupId> |
|
84 |
<artifactId>solr-solrj</artifactId> |
|
85 |
<version>[4.10.4]</version> |
|
86 |
<exclusions> |
|
87 |
<exclusion> |
|
88 |
<artifactId>wstx-asl</artifactId> |
|
89 |
<groupId>org.codehaus.woodstox</groupId> |
|
90 |
</exclusion> |
|
91 |
</exclusions> |
|
92 |
</dependency> |
|
93 |
<dependency> |
|
94 | 83 |
<groupId>com.mycila</groupId> |
95 | 84 |
<artifactId>xmltool</artifactId> |
96 | 85 |
<version>3.3</version> |
Also available in: Unified diff
upgraded solr version to 6.6.0