Project

General

Profile

« Previous | Next » 

Revision 48892

upgraded solr version to 6.6.0

View differences:

modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/oai/OAIFeedMapperTest.java
17 17
import eu.dnetlib.miscutils.datetime.DateUtils;
18 18
import org.apache.commons.io.IOUtils;
19 19
import org.apache.hadoop.mapreduce.Counter;
20
import org.apache.solr.common.util.DateUtil;
21 20
import org.dom4j.DocumentException;
22 21
import org.junit.Before;
23 22
import org.junit.Test;
......
72 71

  
73 72
		String feedDateString = DateUtils.now_ISO8601();
74 73
		try {
75
			feedDate = DateUtil.parseDate(feedDateString);
74
			feedDate = org.apache.commons.lang.time.DateUtils.parseDate(
75
					feedDateString,
76
					new String[]{ "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ssZ" });
76 77
		} catch (ParseException e) {
77 78
			e.printStackTrace(System.err);
78 79
			throw new RuntimeException(e);
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/IndexFeedMapper.java
1 1
package eu.dnetlib.data.mapreduce.hbase.index;
2 2

  
3
import java.io.ByteArrayOutputStream;
3 4
import java.io.IOException;
5
import java.nio.charset.StandardCharsets;
4 6
import java.util.List;
5 7
import java.util.Map.Entry;
8
import java.util.zip.GZIPOutputStream;
6 9

  
10
import eu.dnetlib.functionality.index.solr.feed.ResultTransformer;
11
import eu.dnetlib.functionality.index.solr.feed.ResultTransformer.Mode;
7 12
import org.apache.commons.codec.binary.Base64;
8 13
import org.apache.commons.lang.exception.ExceptionUtils;
9 14
import org.apache.commons.logging.Log;
......
12 17
import org.apache.hadoop.io.Text;
13 18
import org.apache.hadoop.mapreduce.Mapper;
14 19
import org.apache.solr.client.solrj.SolrServerException;
15
import org.apache.solr.client.solrj.impl.CloudSolrServer;
20
import org.apache.solr.client.solrj.impl.CloudSolrClient;
16 21
import org.apache.solr.client.solrj.response.SolrPingResponse;
17 22
import org.apache.solr.client.solrj.response.UpdateResponse;
18 23
import org.apache.solr.common.SolrInputDocument;
......
32 37

  
33 38
	private InputDocumentFactory documentFactory;
34 39

  
35
	private CloudSolrServer solrServer;
40
	private CloudSolrClient solrServer;
36 41

  
37 42
	private String version;
38 43

  
......
87 92
			try {
88 93
				count++;
89 94
				log.info("initializing solr server...");
90
				solrServer = new CloudSolrServer(baseURL);
95
				solrServer = new CloudSolrClient.Builder()
96
						.withZkHost(baseURL)
97
						.build();
91 98

  
92 99
				solrServer.connect();
93 100

  
......
103 110

  
104 111
			} catch (final Throwable e) {
105 112
				if (solrServer != null) {
106
					solrServer.shutdown();
113
					solrServer.close();
107 114
				}
108 115
				context.getCounter("index init", e.getMessage()).increment(1);
109 116
				log.error(String.format("failed to init solr client wait %dms, error:\n%s", backoffTimeMs, ExceptionUtils.getStackTrace(e)));
......
123 130

  
124 131
		try {
125 132
			indexRecord = dmfToRecord.evaluate(value.toString());
126
			doc = documentFactory.parseDocument(version, indexRecord, dsId, "dnetResult");
133
			doc = documentFactory.parseDocument(version, indexRecord, dsId, "dnetResult", new ResultTransformer(Mode.base64) {
134
				@Override
135
				public String apply(final String s) {
136

  
137
					return org.apache.solr.common.util.Base64.byteArrayToBase64(zip(s));
138
				}
139
			});
127 140
			if ((doc == null) || doc.isEmpty()) throw new EmptySolrDocumentException();
128 141

  
129 142
		} catch (final Throwable e) {
......
146 159
		}
147 160
	}
148 161

  
162
	public static byte[] zip(final String s) {
163
		if ((s == null) || (s.length() == 0)) {
164
			throw new IllegalArgumentException("Cannot zip null or empty string");
165
		}
166

  
167
		try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
168
			try (GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
169
				gzipOutputStream.write(s.getBytes(StandardCharsets.UTF_8));
170
			}
171
			return byteArrayOutputStream.toByteArray();
172
		} catch(IOException e) {
173
			throw new RuntimeException("Failed to zip content", e);
174
		}
175
	}
176

  
149 177
	private void addDocument(final Context context, final SolrInputDocument doc) throws SolrServerException, IOException, EmptySolrDocumentException {
150 178
		buffer.add(doc);
151 179
		if (buffer.size() >= bufferFlushThreshold) {
......
182 210
			}
183 211
			log.info("\nwaiting " + shutdownWaitTime + "ms before shutdown");
184 212
			Thread.sleep(shutdownWaitTime);
185
			solrServer.shutdown();
213
			solrServer.close();
186 214
		} catch (final SolrServerException e) {
187 215
			log.error("couldn't shutdown server " + e.getMessage());
188 216
		}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/DedupIndexFeedMapper.java
15 15
import org.apache.hadoop.hbase.util.Bytes;
16 16
import org.apache.hadoop.io.Text;
17 17
import org.apache.solr.client.solrj.SolrServerException;
18
import org.apache.solr.client.solrj.impl.CloudSolrServer;
18
import org.apache.solr.client.solrj.impl.CloudSolrClient;
19 19
import org.apache.solr.client.solrj.response.SolrPingResponse;
20 20
import org.apache.solr.client.solrj.response.UpdateResponse;
21 21
import org.apache.solr.common.SolrInputDocument;
......
35 35

  
36 36
	private static final Log log = LogFactory.getLog(DedupIndexFeedMapper.class); // NOPMD by marko on 11/24/08 5:02 PM
37 37

  
38
	private CloudSolrServer solrServer;
38
	private CloudSolrClient solrServer;
39 39

  
40 40
	private String dsId;
41 41

  
......
99 99
		while (true) {
100 100
			try {
101 101
				log.info("initializing solr server...");
102
				solrServer = new CloudSolrServer(baseURL);
102
				solrServer = new CloudSolrClient.Builder()
103
					.withZkHost(baseURL)
104
					.build();
103 105

  
104 106
				solrServer.connect();
105 107

  
......
115 117

  
116 118
			} catch (final Throwable e) {
117 119
				if (solrServer != null) {
118
					solrServer.shutdown();
120
					solrServer.close();
119 121
				}
120 122
				context.getCounter("index init", e.getMessage()).increment(1);
121 123
				log.info(String.format("failed to init solr client wait %dms", backoffTimeMs));
......
205 207
			}
206 208
			log.info("\nwaiting " + shutdownWaitTime + "ms before shutdown");
207 209
			Thread.sleep(shutdownWaitTime);
208
			solrServer.shutdown();
210
			solrServer.close();
209 211
		} catch (final SolrServerException e) {
210 212
			System.err.println("couldn't shutdown server " + e.getMessage());
211 213
		}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/oai/OaiFeedMapper.java
14 14
import com.google.common.collect.Lists;
15 15
import com.google.common.collect.Maps;
16 16
import com.google.common.collect.Multimap;
17
import com.mongodb.*;
17
import com.mongodb.BasicDBObject;
18
import com.mongodb.DBObject;
19
import com.mongodb.MongoClient;
20
import com.mongodb.WriteConcern;
18 21
import com.mongodb.client.MongoCollection;
19 22
import com.mongodb.client.MongoDatabase;
20 23
import eu.dnetlib.data.mapreduce.JobParams;
......
29 32
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
30 33
import org.apache.commons.io.output.ByteArrayOutputStream;
31 34
import org.apache.commons.lang.StringUtils;
35
import org.apache.commons.lang.time.DateUtils;
32 36
import org.apache.hadoop.io.NullWritable;
33 37
import org.apache.hadoop.io.Text;
34 38
import org.apache.hadoop.mapreduce.Mapper;
35
import org.apache.solr.common.util.DateUtil;
36 39
import org.bson.types.Binary;
37 40

  
38 41
public class OaiFeedMapper extends Mapper<Text, Text, NullWritable, NullWritable> {
......
113 116

  
114 117
		String feedDateString = context.getConfiguration().get(JobParams.OAI_FEED_DATE);
115 118
		try {
116
			feedDate = DateUtil.parseDate(feedDateString);
119
			feedDate = DateUtils.parseDate(feedDateString, new String[]{"yyyy-MM-dd\'T\'hh:mm:ss\'Z\'"});
117 120
		} catch (ParseException e) {
118 121
			e.printStackTrace(System.err);
119 122
			throw new RuntimeException(e);
modules/dnet-mapreduce-jobs/trunk/pom.xml
80 80
			<version>[1.0.0,2.0.0)</version>
81 81
		</dependency>
82 82
		<dependency>
83
			<groupId>org.apache.solr</groupId>
84
			<artifactId>solr-solrj</artifactId>
85
			<version>[4.10.4]</version>
86
			<exclusions>
87
				<exclusion>
88
					<artifactId>wstx-asl</artifactId>
89
					<groupId>org.codehaus.woodstox</groupId>
90
				</exclusion>
91
			</exclusions>
92
		</dependency>
93
		<dependency>
94 83
			<groupId>com.mycila</groupId>
95 84
			<artifactId>xmltool</artifactId>
96 85
			<version>3.3</version>

Also available in: Unified diff