Project

General

Profile

« Previous | Next » 

Revision 49817

adopted solrj:7.1.0, implemented new way to fetch the lastIndexingDate

View differences:

modules/dnet-openaire-exporter/trunk/dnet-openaire-exporter.iml
18 18
    <orderEntry type="inheritedJdk" />
19 19
    <orderEntry type="sourceFolder" forTests="false" />
20 20
    <orderEntry type="library" name="Maven: eu.dnetlib:dnet-pace-core:2.5.1" level="project" />
21
    <orderEntry type="library" name="Maven: org.apache.solr:solr-solrj:5.5.4" level="project" />
22
    <orderEntry type="library" name="Maven: org.apache.solr:solr-solrj:6.6.0" level="project" />
23
    <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.4.1" level="project" />
24
    <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.10" level="project" />
25
    <orderEntry type="library" name="Maven: org.noggit:noggit:0.6" level="project" />
26 21
    <orderEntry type="library" name="Maven: eu.dnetlib:cnr-rmi-api:2.6.2-SNAPSHOT" level="project" />
27 22
    <orderEntry type="library" name="Maven: org.apache.cxf:cxf-core:3.1.5" level="project" />
28 23
    <orderEntry type="library" name="Maven: org.apache.ws.xmlschema:xmlschema-core:2.2.1" level="project" />
......
69 64
    <orderEntry type="module" module-name="dnet-pace-core" />
70 65
    <orderEntry type="library" name="Maven: edu.cmu:secondstring:1.0.0" level="project" />
71 66
    <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
72
    <orderEntry type="library" name="Maven: eu.dnetlib:dnet-index-solr-common:1.3.1" level="project" />
73 67
    <orderEntry type="library" name="Maven: com.googlecode.protobuf-java-format:protobuf-java-format:1.2" level="project" />
74
    <orderEntry type="library" name="Maven: org.apache.solr:solr-solrj:6.6.0" level="project" />
75
    <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.4.1" level="project" />
68
    <orderEntry type="library" name="Maven: org.apache.solr:solr-solrj:7.1.0" level="project" />
69
    <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.6.1" level="project" />
76 70
    <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.3" level="project" />
77 71
    <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.6" level="project" />
78 72
    <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpmime:4.5.3" level="project" />
79 73
    <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.10" level="project" />
80 74
    <orderEntry type="library" name="Maven: org.codehaus.woodstox:stax2-api:3.1.4" level="project" />
81 75
    <orderEntry type="library" name="Maven: org.codehaus.woodstox:woodstox-core-asl:4.4.1" level="project" />
82
    <orderEntry type="library" name="Maven: org.noggit:noggit:0.6" level="project" />
76
    <orderEntry type="library" name="Maven: org.noggit:noggit:0.8" level="project" />
83 77
    <orderEntry type="library" name="Maven: org.slf4j:jcl-over-slf4j:1.7.24" level="project" />
84 78
    <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.24" level="project" />
85 79
    <orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-web:1.5.2.RELEASE" level="project" />
modules/dnet-openaire-exporter/trunk/src/main/java/eu/dnetlib/openaire/exporter/datasource/clients/DatasourceIndexClient.java
1 1
package eu.dnetlib.openaire.exporter.datasource.clients;
2 2

  
3 3
import java.io.IOException;
4
import java.util.Date;
5
import java.util.List;
4
import java.util.Calendar;
6 5
import java.util.Map;
7 6
import java.util.Queue;
8 7
import java.util.concurrent.*;
......
12 11
import com.google.common.collect.Iterables;
13 12
import com.google.common.util.concurrent.*;
14 13
import eu.dnetlib.OpenaireExporterConfig;
15
import eu.dnetlib.data.index.CloudIndexClient;
16
import eu.dnetlib.data.index.CloudIndexClientException;
17
import eu.dnetlib.data.index.CloudIndexClientFactory;
18 14
import eu.dnetlib.miscutils.functional.hash.Hashing;
19 15
import eu.dnetlib.openaire.exporter.datasource.ApiException;
20 16
import eu.dnetlib.openaire.exporter.datasource.clients.utils.DatasourceFunctions;
......
25 21
import org.apache.commons.lang3.exception.ExceptionUtils;
26 22
import org.apache.commons.logging.Log;
27 23
import org.apache.commons.logging.LogFactory;
24
import org.apache.solr.client.solrj.SolrClient;
25
import org.apache.solr.client.solrj.SolrQuery;
26
import org.apache.solr.client.solrj.SolrServerException;
27
import org.apache.solr.client.solrj.impl.CloudSolrClient;
28
import org.apache.solr.client.solrj.impl.CloudSolrClient.Builder;
28 29
import org.apache.solr.client.solrj.response.QueryResponse;
29 30
import org.apache.solr.common.SolrDocument;
30 31
import org.springframework.beans.factory.annotation.Autowired;
32
import org.springframework.cache.annotation.Cacheable;
31 33
import org.springframework.http.HttpStatus;
32 34
import org.springframework.stereotype.Component;
33 35

  
......
47 49

  
48 50
	private ListeningExecutorService executor;
49 51

  
50
	private static Map<String, CloudIndexClient> indexClientMap = new ConcurrentHashMap<>();
52
	private static Map<String, SolrClient> indexClientMap = new ConcurrentHashMap<>();
51 53

  
52 54
	@PostConstruct
53 55
	public void init() {
......
70 72
	public IndexRecordsInfo getIndexInfo(final String dsId, final IndexDsInfo info, final Queue<Throwable> errors) throws ApiException {
71 73
		try {
72 74
			final String collectedFrom = StringUtils.substringBefore(dsId, SEPARATOR) + SEPARATOR + Hashing.md5(StringUtils.substringAfter(dsId, SEPARATOR));
73
			final CloudIndexClient indexClient = getIndexClient(info);
75
			final SolrClient indexClient = getIndexClient(info);
74 76
			final CountDownLatch latch = new CountDownLatch(2);
75 77
			final IndexRecordsInfo indexRecordInfo = new IndexRecordsInfo();
76 78

  
77 79
			Futures.addCallback(
78
				executor.submit(() -> setDateAndTotal(collectedFrom, indexClient)),
80
				executor.submit(() -> setTotal(collectedFrom, indexClient)),
79 81
						new FutureCallback<IndexRecordsInfo>() {
80 82
							@Override
81 83
							public void onSuccess(final IndexRecordsInfo info) {
......
119 121

  
120 122
	private Long setFunded(
121 123
			final String collectedFrom,
122
			final CloudIndexClient indexClient) throws ApiException {
123
		final String query = String.format("oaftype:result AND deletedbyinference:false AND collectedfromdatasourceid:\"%s\" AND relprojectid:*", collectedFrom);
124
			final SolrClient indexClient) throws ApiException {
125

  
126
		final SolrQuery query = new SolrQuery(
127
				String.format("oaftype:result AND deletedbyinference:false AND collectedfromdatasourceid:\"%s\" AND relprojectid:*", collectedFrom))
128
				.setRows(0);
129

  
124 130
		log.debug(String.format("query: %s", query));
125 131
		try {
126
			return indexClient.query(query, 0).getResults().getNumFound();
132
			return indexClient.query(query).getResults().getNumFound();
127 133
		} catch (Throwable e) {
128 134
			throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying information system", e);
129 135
		}
130 136
	}
131 137

  
132
	private IndexRecordsInfo setDateAndTotal(
138
	private IndexRecordsInfo setTotal(
133 139
			final String collectedFrom,
134
			final CloudIndexClient indexClient) throws ApiException {
140
			final SolrClient indexClient) throws ApiException {
135 141
		try {
136
			final String query = String.format("oaftype:result AND deletedbyinference:false AND collectedfromdatasourceid:\"%s\"", collectedFrom);
142
			final SolrQuery query = new SolrQuery(
143
					String.format("oaftype:result AND deletedbyinference:false AND collectedfromdatasourceid:\"%s\"", collectedFrom))
144
					.setRows(1);
137 145
			log.debug(String.format("query: %s", query));
138 146

  
139
			final QueryResponse rsp = indexClient.query(query, 1);
147
			final QueryResponse rsp = indexClient.query(query);
140 148
			final SolrDocument doc = Iterables.getFirst(rsp.getResults(), new SolrDocument());
141 149
			if (log.isDebugEnabled()) {
142 150
				log.debug(String.format("got document %s", doc.toString()));
143 151
			}
144
			//			if (doc.isEmpty()) {
145
			//				throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), String.format("cannot find document matching query: %s", queryTotal));
146
			//			}
152

  
147 153
			return new IndexRecordsInfo()
148
					.setDate(getDate(doc))
154
					.setDate(getLastIndexingDate(indexClient))
149 155
					.setTotal(rsp.getResults().getNumFound());
150 156
		} catch (Throwable e) {
151 157
			throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying information system", e);
152 158
		}
153 159
	}
154 160

  
155
	private String getDate(final SolrDocument doc) throws ApiException {
156
		final List<Date> dsversion = (List<Date>) doc.get(DSVERSION);
157
		if (dsversion == null || dsversion.isEmpty()) {
158
			throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), String.format("cannot find %s in matched solr document", DSVERSION));
161
	@Cacheable("index-cache")
162
	public String getLastIndexingDate(final SolrClient indexClient) throws ApiException {
163
		try {
164
			final QueryResponse rsp = indexClient.query(new SolrQuery("oaftype:datasource").setRows(1));
165
			if (rsp.getResults().getNumFound() > 0) {
166
				final SolrDocument doc = Iterables.getFirst(rsp.getResults(), null);
167

  
168
				final String dsversion = doc.get(DSVERSION).toString();
169
				return StringUtils.substringBefore(dsversion, "T");
170
			} else {
171
				final String defaultDate = getDefaultLastIndexingDate();
172
				log.debug("unable to find documents, defaulting to " + defaultDate);
173
				return defaultDate;
174
			}
175
		} catch (SolrServerException | IOException e) {
176
			throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying index DS profile", e);
159 177
		}
160
		final Date date = Iterables.getLast(dsversion);
161

  
162
		return DateFormatUtils.format(date, DatasourceFunctions.DATE_FORMAT);
163 178
	}
164 179

  
165
	public String getLastIndexingDate(final IndexDsInfo info) throws ApiException {
166
		try {
167
			final QueryResponse rsp = getIndexClient(info).query("oaftype:datasource", 1);
168
			final SolrDocument doc = Iterables.getFirst(rsp.getResults(), null);
169
			final String dsversion = doc.get("__dsversion").toString();
170
			return StringUtils.substringBefore(dsversion, "T");
171
		} catch (CloudIndexClientException e) {
172
			throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying index DS profile: " + info, e);
173
		}
180
	private String getDefaultLastIndexingDate() {
181
		final Calendar cal = Calendar.getInstance();
182
		cal.add(Calendar.MONTH, -1);
183
		return DateFormatUtils.format(cal.getTime(), DatasourceFunctions.DATE_FORMAT);
174 184
	}
175 185

  
176
	private synchronized CloudIndexClient getIndexClient(final IndexDsInfo info) throws CloudIndexClientException {
186
	private synchronized SolrClient getIndexClient(final IndexDsInfo info) {
177 187
		if (!indexClientMap.containsKey(info.getColl())) {
178
			indexClientMap.put(info.getColl(), CloudIndexClientFactory.newIndexClient(info.getIndexBaseUrl(), info.getColl(), false));
188

  
189
			final CloudSolrClient client = new Builder().withZkHost(info.getIndexBaseUrl()).build();
190
			client.setDefaultCollection(info.getColl());
191

  
192
			indexClientMap.put(info.getColl(), client);
179 193
		}
180 194
		return indexClientMap.get(info.getColl());
181 195
	}
modules/dnet-openaire-exporter/trunk/src/main/java/eu/dnetlib/DNetOpenaireExporterApplication.java
11 11
import org.apache.commons.logging.LogFactory;
12 12
import org.springframework.beans.factory.annotation.Autowired;
13 13
import org.springframework.boot.SpringApplication;
14
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
14 15
import org.springframework.boot.autoconfigure.SpringBootApplication;
16
import org.springframework.boot.autoconfigure.solr.SolrAutoConfiguration;
15 17
import org.springframework.cache.annotation.EnableCaching;
16 18
import org.springframework.context.annotation.EnableAspectJAutoProxy;
17 19
import org.springframework.web.bind.annotation.RequestMapping;
......
23 25
@RestController
24 26
@SpringBootApplication
25 27
@EnableAspectJAutoProxy
28
@EnableAutoConfiguration(exclude = { SolrAutoConfiguration.class })
26 29
public class DNetOpenaireExporterApplication {
27 30

  
28 31
	private static final Log log = LogFactory.getLog(DNetOpenaireExporterApplication.class);
modules/dnet-openaire-exporter/trunk/src/main/resources/ehcache.xml
49 49
		<persistence strategy="localTempSwap" />
50 50
	</cache>
51 51

  
52
	<cache name="index-cache"
53
	       maxEntriesLocalHeap="100"
54
	       maxEntriesLocalDisk="1000"
55
	       eternal="false"
56
	       diskSpoolBufferSizeMB="10"
57
	       timeToIdleSeconds="86400" timeToLiveSeconds="86400"
58
	       memoryStoreEvictionPolicy="LFU"
59
	       transactionalMode="off">
60
		<persistence strategy="localTempSwap" />
61
	</cache>
52 62

  
63

  
53 64
</ehcache>
modules/dnet-openaire-exporter/trunk/src/main/resources/logback-spring.xml
10 10
	<logger name="eu.dnetlib" level="INFO" additivity="false">
11 11
		<appender-ref ref="CONSOLE" />
12 12
	</logger>
13
	<logger name="eu.dnetlib.openaire.exporter" level="DEBUG" additivity="false">
13
	<logger name="eu.dnetlib.openaire.exporter" level="INFO" additivity="false">
14 14
		<appender-ref ref="CONSOLE" />
15 15
	</logger>
16 16
</configuration>
modules/dnet-openaire-exporter/trunk/pom.xml
97 97
					<groupId>eu.dnetlib</groupId>
98 98
					<artifactId>dnet-hadoop-commons</artifactId>
99 99
				</exclusion>
100
				<exclusion>
101
					<groupId>eu.dnetlib</groupId>
102
					<artifactId>dnet-index-solr-common</artifactId>
103
				</exclusion>
104
				<exclusion>
105
					<groupId>org.apache.solr</groupId>
106
					<artifactId>solr-solrj</artifactId>
107
				</exclusion>
100 108
			</exclusions>
101 109
		</dependency>
102 110
		<dependency>
......
240 248

  
241 249
	<properties>
242 250
		<java.version>1.8</java.version>
243
		<apache.solr.version>6.6.0</apache.solr.version>
251
		<apache.solr.version>7.1.0</apache.solr.version>
244 252
		<mongodb.driver.version>3.4.2</mongodb.driver.version>
245 253
		<springfox-version>2.7.0</springfox-version>
246 254
		<prometheus.version>0.0.25</prometheus.version>

Also available in: Unified diff