Project

General

Profile

1
package eu.dnetlib.datasource.publisher.clients;
2

    
3
import java.util.Date;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.concurrent.ConcurrentHashMap;
7

    
8
import com.google.common.collect.Iterables;
9
import eu.dnetlib.data.index.CloudIndexClient;
10
import eu.dnetlib.data.index.CloudIndexClientException;
11

    
12
import eu.dnetlib.data.index.CloudIndexClientFactory;
13
import eu.dnetlib.datasource.publisher.ApiException;
14
import eu.dnetlib.datasource.publisher.model.utils.DatasourceFunctions;
15
import eu.dnetlib.datasource.publisher.model.utils.IndexDsInfo;
16
import eu.dnetlib.datasource.publisher.model.utils.IndexRecordsInfo;
17
import eu.dnetlib.miscutils.functional.hash.Hashing;
18
import org.apache.commons.lang.StringUtils;
19
import org.apache.commons.lang.time.DateFormatUtils;
20
import org.apache.commons.logging.Log;
21
import org.apache.commons.logging.LogFactory;
22
import org.apache.solr.client.solrj.response.QueryResponse;
23
import org.apache.solr.common.SolrDocument;
24
import org.springframework.http.HttpStatus;
25

    
26
/**
27
 * Created by claudio on 20/10/2016.
28
 */
29
public class DatasourceIndexClient {
30

    
31
	private static final Log log = LogFactory.getLog(DatasourceIndexClient.class);
32

    
33
	public static final String SEPARATOR = "::";
34
	public static final String DSVERSION = "__dsversion";
35

    
36
	private static Map<String, CloudIndexClient> indexClientMap = new ConcurrentHashMap<>();
37

    
38
	public IndexRecordsInfo getIndexInfo(final String dsId, final IndexDsInfo info) throws ApiException {
39
		try {
40
			final String collectedFrom = StringUtils.substringBefore(dsId, SEPARATOR) + SEPARATOR + Hashing.md5(StringUtils.substringAfter(dsId, SEPARATOR));
41
			final CloudIndexClient indexClient = getIndexClient(info);
42
			final String query = String.format("oaftype:result AND deletedbyinference:false AND collectedfromdatasourceid:\"%s\"", collectedFrom);
43
			log.debug(String.format("query on %s: %s", info.getFormat(), query));
44

    
45
			final QueryResponse rsp = indexClient.query(query, 1);
46

    
47
			long count = rsp.getResults().getNumFound();
48
			final SolrDocument doc = Iterables.getFirst(rsp.getResults(), new SolrDocument());
49
			final String date = getDate(doc);
50

    
51
			return new IndexRecordsInfo(count, date);
52
		} catch (final Throwable e) {
53
			throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying publications from: " + dsId);
54
		}
55
	}
56

    
57
	private String getDate(final SolrDocument doc) {
58
		final List<Date> dsversion = (List<Date>) doc.get(DSVERSION);
59
		final Date date = Iterables.getLast(dsversion);
60

    
61
		return DateFormatUtils.format(date, DatasourceFunctions.DATE_FORMAT);
62
	}
63

    
64
	public String getLastIndexingDate(final IndexDsInfo info) throws ApiException {
65
		try {
66
			final QueryResponse rsp = getIndexClient(info).query("oaftype:datasource", 1);
67
			final SolrDocument doc = Iterables.getFirst(rsp.getResults(), null);
68
			final String dsversion = doc.get("__dsversion").toString();
69
			return StringUtils.substringBefore(dsversion, "T");
70
		} catch (CloudIndexClientException e) {
71
			throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying index DS profile: " + info);
72
		}
73
	}
74

    
75
	private synchronized CloudIndexClient getIndexClient(final IndexDsInfo info) throws CloudIndexClientException {
76
		if (!indexClientMap.containsKey(info.getColl())) {
77
			indexClientMap.put(info.getColl(), CloudIndexClientFactory.newIndexClient(info.getIndexBaseUrl(), info.getColl(), false));
78
		}
79
		return indexClientMap.get(info.getColl());
80
	}
81

    
82
}
(2-2/7)