1
|
package eu.dnetlib.datasource.publisher.clients;
|
2
|
|
3
|
import java.util.Date;
|
4
|
import java.util.List;
|
5
|
import java.util.Map;
|
6
|
import java.util.concurrent.ConcurrentHashMap;
|
7
|
|
8
|
import com.google.common.collect.Iterables;
|
9
|
import eu.dnetlib.data.index.CloudIndexClient;
|
10
|
import eu.dnetlib.data.index.CloudIndexClientException;
|
11
|
|
12
|
import eu.dnetlib.data.index.CloudIndexClientFactory;
|
13
|
import eu.dnetlib.datasource.publisher.ApiException;
|
14
|
import eu.dnetlib.datasource.publisher.model.utils.DatasourceFunctions;
|
15
|
import eu.dnetlib.datasource.publisher.model.utils.IndexDsInfo;
|
16
|
import eu.dnetlib.datasource.publisher.model.utils.IndexRecordsInfo;
|
17
|
import eu.dnetlib.miscutils.functional.hash.Hashing;
|
18
|
import org.apache.commons.lang.StringUtils;
|
19
|
import org.apache.commons.lang.time.DateFormatUtils;
|
20
|
import org.apache.commons.logging.Log;
|
21
|
import org.apache.commons.logging.LogFactory;
|
22
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
23
|
import org.apache.solr.common.SolrDocument;
|
24
|
import org.springframework.http.HttpStatus;
|
25
|
|
26
|
/**
|
27
|
* Created by claudio on 20/10/2016.
|
28
|
*/
|
29
|
public class DatasourceIndexClient {
|
30
|
|
31
|
private static final Log log = LogFactory.getLog(DatasourceIndexClient.class);
|
32
|
|
33
|
public static final String SEPARATOR = "::";
|
34
|
public static final String DSVERSION = "__dsversion";
|
35
|
|
36
|
private static Map<String, CloudIndexClient> indexClientMap = new ConcurrentHashMap<>();
|
37
|
|
38
|
public IndexRecordsInfo getIndexInfo(final String dsId, final IndexDsInfo info) throws ApiException {
|
39
|
try {
|
40
|
final String collectedFrom = StringUtils.substringBefore(dsId, SEPARATOR) + SEPARATOR + Hashing.md5(StringUtils.substringAfter(dsId, SEPARATOR));
|
41
|
final CloudIndexClient indexClient = getIndexClient(info);
|
42
|
final String query = String.format("oaftype:result AND deletedbyinference:false AND collectedfromdatasourceid:\"%s\"", collectedFrom);
|
43
|
log.debug(String.format("query on %s: %s", info.getFormat(), query));
|
44
|
|
45
|
final QueryResponse rsp = indexClient.query(query, 1);
|
46
|
|
47
|
long count = rsp.getResults().getNumFound();
|
48
|
final SolrDocument doc = Iterables.getFirst(rsp.getResults(), new SolrDocument());
|
49
|
final String date = getDate(doc);
|
50
|
|
51
|
return new IndexRecordsInfo(count, date);
|
52
|
} catch (final Throwable e) {
|
53
|
throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying publications from: " + dsId);
|
54
|
}
|
55
|
}
|
56
|
|
57
|
private String getDate(final SolrDocument doc) {
|
58
|
final List<Date> dsversion = (List<Date>) doc.get(DSVERSION);
|
59
|
final Date date = Iterables.getLast(dsversion);
|
60
|
|
61
|
return DateFormatUtils.format(date, DatasourceFunctions.DATE_FORMAT);
|
62
|
}
|
63
|
|
64
|
public String getLastIndexingDate(final IndexDsInfo info) throws ApiException {
|
65
|
try {
|
66
|
final QueryResponse rsp = getIndexClient(info).query("oaftype:datasource", 1);
|
67
|
final SolrDocument doc = Iterables.getFirst(rsp.getResults(), null);
|
68
|
final String dsversion = doc.get("__dsversion").toString();
|
69
|
return StringUtils.substringBefore(dsversion, "T");
|
70
|
} catch (CloudIndexClientException e) {
|
71
|
throw new ApiException(HttpStatus.INTERNAL_SERVER_ERROR.value(), "Error querying index DS profile: " + info);
|
72
|
}
|
73
|
}
|
74
|
|
75
|
private synchronized CloudIndexClient getIndexClient(final IndexDsInfo info) throws CloudIndexClientException {
|
76
|
if (!indexClientMap.containsKey(info.getColl())) {
|
77
|
indexClientMap.put(info.getColl(), CloudIndexClientFactory.newIndexClient(info.getIndexBaseUrl(), info.getColl(), false));
|
78
|
}
|
79
|
return indexClientMap.get(info.getColl());
|
80
|
}
|
81
|
|
82
|
}
|