Revision 29686
Added by Sandro La Bruzzo almost 10 years ago
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/cql/SolrTypeBasedCqlValueTransformerMapFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.cql; |
|
2 |
|
|
3 |
import java.util.Map; |
|
4 |
|
|
5 |
import org.springframework.beans.factory.annotation.Required; |
|
6 |
|
|
7 |
import eu.dnetlib.functionality.index.model.Any.ValueType; |
|
8 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
9 |
|
|
10 |
/** |
|
11 |
* Factory for the SolrTypeBasedCqlValueTransformerMap class objects |
|
12 |
* |
|
13 |
* @author claudio |
|
14 |
* |
|
15 |
*/ |
|
16 |
public class SolrTypeBasedCqlValueTransformerMapFactory { |
|
17 |
|
|
18 |
/** |
|
19 |
* Map of functions, injected via spring. |
|
20 |
*/ |
|
21 |
private Map<String, UnaryFunction<String, String>> transformerMap; |
|
22 |
|
|
23 |
/** |
|
24 |
* Method returns a new instance of SolrTypeBasedCqlValueTransformerMap. |
|
25 |
* |
|
26 |
* @param schema |
|
27 |
* @return |
|
28 |
*/ |
|
29 |
public SolrTypeBasedCqlValueTransformerMap getIt(final Map<String, ValueType> schema) { |
|
30 |
return new SolrTypeBasedCqlValueTransformerMap(schema, getTransformerMap()); |
|
31 |
} |
|
32 |
|
|
33 |
@Required |
|
34 |
public void setTransformerMap(Map<String, UnaryFunction<String, String>> transformerMap) { |
|
35 |
this.transformerMap = transformerMap; |
|
36 |
} |
|
37 |
|
|
38 |
public Map<String, UnaryFunction<String, String>> getTransformerMap() { |
|
39 |
return transformerMap; |
|
40 |
} |
|
41 |
|
|
42 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/cql/SimpleDateValueTransformer.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.cql; |
|
2 |
|
|
3 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
4 |
|
|
5 |
/** |
|
6 |
* Simply and not very roboust normalizer for solr dates. Basically it handles well yyyy-mm-dd and |
|
7 |
* yyyy-mm-ddThh:mm:ssZ |
|
8 |
* |
|
9 |
* @author marko |
|
10 |
* |
|
11 |
*/ |
|
12 |
public class SimpleDateValueTransformer implements UnaryFunction<String, String> { |
|
13 |
@Override |
|
14 |
public String evaluate(final String value) { |
|
15 |
if (!value.endsWith("Z")) |
|
16 |
return value + "T00:00:00Z"; |
|
17 |
return value; |
|
18 |
} |
|
19 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/cql/SolrTypeBasedCqlValueTransformerMap.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.cql; |
|
2 |
|
|
3 |
import java.util.Map; |
|
4 |
|
|
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 |
import org.apache.solr.common.SolrException; |
|
8 |
|
|
9 |
import eu.dnetlib.functionality.index.cql.CqlValueTransformerMap; |
|
10 |
import eu.dnetlib.functionality.index.model.Any.ValueType; |
|
11 |
import eu.dnetlib.miscutils.functional.IdentityFunction; |
|
12 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
13 |
|
|
14 |
/** |
|
15 |
* This class maps the fields in the given index schema with a transformation rule. |
|
16 |
* |
|
17 |
* @author marko |
|
18 |
* |
|
19 |
*/ |
|
20 |
public class SolrTypeBasedCqlValueTransformerMap implements CqlValueTransformerMap { |
|
21 |
|
|
22 |
/** |
|
23 |
* logger. |
|
24 |
*/ |
|
25 |
private static final Log log = LogFactory.getLog(SolrTypeBasedCqlValueTransformerMap.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
26 |
|
|
27 |
/** |
|
28 |
* Index schema. |
|
29 |
*/ |
|
30 |
private final Map<String, ValueType> schema; |
|
31 |
|
|
32 |
/** |
|
33 |
* Map of functions. |
|
34 |
*/ |
|
35 |
private final Map<String, UnaryFunction<String, String>> transformerMap; |
|
36 |
|
|
37 |
/** |
|
38 |
* Create value transformer map bound to a specific schema |
|
39 |
* |
|
40 |
* @param indexSchema |
|
41 |
*/ |
|
42 |
public SolrTypeBasedCqlValueTransformerMap(final Map<String, ValueType> schema, final Map<String, UnaryFunction<String, String>> transformerMap) { |
|
43 |
this.schema = schema; |
|
44 |
this.transformerMap = transformerMap; |
|
45 |
} |
|
46 |
|
|
47 |
/** |
|
48 |
* {@inheritDoc} |
|
49 |
* |
|
50 |
* @see eu.dnetlib.functionality.index.cql.CqlValueTransformerMap#transformerFor(java.lang.String) |
|
51 |
*/ |
|
52 |
@Override |
|
53 |
public UnaryFunction<String, String> transformerFor(final String fieldName) { |
|
54 |
try { |
|
55 |
final ValueType field = schema.get(fieldName); |
|
56 |
|
|
57 |
if (field != null) { |
|
58 |
UnaryFunction<String, String> res = transformerMap.get(field.name()); |
|
59 |
if (res != null) { |
|
60 |
return res; |
|
61 |
} |
|
62 |
} |
|
63 |
} catch (SolrException e) { |
|
64 |
log.debug("cannot find field", e); |
|
65 |
} |
|
66 |
return new IdentityFunction<String>(); |
|
67 |
} |
|
68 |
|
|
69 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/utils/HighlightUtils.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.utils; |
|
2 |
|
|
3 |
import org.apache.oro.text.perl.Perl5Util; |
|
4 |
|
|
5 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
6 |
|
|
7 |
public class HighlightUtils implements UnaryFunction<String, String> { |
|
8 |
|
|
9 |
public final static String DEFAULT_HL_PRE = "[hl]"; |
|
10 |
|
|
11 |
public final static String DEFAULT_HL_POST = "[/hl]"; |
|
12 |
|
|
13 |
private static String CLEAN_HEADER = "s#\\[/?hl\\]##gm"; |
|
14 |
private static String CLEAN_REGEX_OPEN = "<([^>]*)\\[hl\\]([^>]*)>"; |
|
15 |
private static String CLEAN_REGEX_CLOSE = "<([^>]*)\\[\\/hl\\]([^>]*)>"; |
|
16 |
|
|
17 |
// private static String CLEAN_REGEX_OPEN = "s#<([^>]*)\\[hl\\]([^>]*)>#<$1$2>#gm"; |
|
18 |
// private static String CLEAN_REGEX_CLOSE = "s#<([^>]*)\\[\\/hl\\]([^>]*)>#<$1$2>#gm"; |
|
19 |
|
|
20 |
private Perl5Util p5util = new Perl5Util(); |
|
21 |
|
|
22 |
@Override |
|
23 |
public String evaluate(final String doc) { |
|
24 |
String[] chunk = doc.split("</header>"); |
|
25 |
String string = cleanHeader(chunk[0]) + "</header>" + cleanBody(chunk[1]); |
|
26 |
return string; |
|
27 |
} |
|
28 |
|
|
29 |
private String cleanHeader(final String header) { |
|
30 |
return p5util.substitute(CLEAN_HEADER, header); |
|
31 |
} |
|
32 |
|
|
33 |
// TODO: implement a faster way to do this |
|
34 |
private String cleanBody(final String body) { |
|
35 |
String res = body.replaceAll(CLEAN_REGEX_OPEN, "<$1$2>").replaceAll(CLEAN_REGEX_CLOSE, "<$1$2>"); |
|
36 |
|
|
37 |
if (res.equals(body)) return res; |
|
38 |
|
|
39 |
return cleanBody(res); |
|
40 |
} |
|
41 |
|
|
42 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/feed/SolrServerPool.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.feed; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.net.MalformedURLException; |
|
5 |
import java.net.URI; |
|
6 |
import java.net.URL; |
|
7 |
import java.util.Collection; |
|
8 |
import java.util.Iterator; |
|
9 |
import java.util.List; |
|
10 |
import java.util.regex.Matcher; |
|
11 |
import java.util.regex.Pattern; |
|
12 |
|
|
13 |
import org.apache.commons.logging.Log; |
|
14 |
import org.apache.commons.logging.LogFactory; |
|
15 |
import org.apache.solr.client.solrj.SolrServer; |
|
16 |
import org.apache.solr.client.solrj.SolrServerException; |
|
17 |
import org.apache.solr.client.solrj.impl.CloudSolrServer; |
|
18 |
import org.apache.solr.client.solrj.impl.HttpSolrServer; |
|
19 |
import org.apache.solr.client.solrj.response.UpdateResponse; |
|
20 |
import org.apache.solr.common.SolrInputDocument; |
|
21 |
|
|
22 |
import com.google.common.base.Splitter; |
|
23 |
import com.google.common.collect.Lists; |
|
24 |
import com.google.common.hash.HashFunction; |
|
25 |
import com.google.common.hash.Hashing; |
|
26 |
|
|
27 |
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions; |
|
28 |
|
|
29 |
@Deprecated |
|
30 |
public class SolrServerPool { |
|
31 |
|
|
32 |
/** |
|
33 |
* logger. |
|
34 |
*/ |
|
35 |
private static final Log log = LogFactory.getLog(SolrServerPool.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
36 |
|
|
37 |
/* |
|
38 |
* We run into problems when using the ConcurrentUpdateSolrServer, so we're sticking to the HttpSolrServer. |
|
39 |
*/ |
|
40 |
private final List<HttpSolrServer> updateServerPool = Lists.newArrayList(); |
|
41 |
|
|
42 |
private CloudSolrServer cloudServer; |
|
43 |
|
|
44 |
private final HashFunction hash = Hashing.murmur3_32(); |
|
45 |
|
|
46 |
public SolrServerPool(final String updateUrlLocal, final String updateUrlList, final String zkHost, final String collection, final boolean localFeeding) { |
|
47 |
for (URL url : parseUrlListPattern(updateUrlLocal, updateUrlList, localFeeding)) { |
|
48 |
updateServerPool.add(new HttpSolrServer(url + "/" + collection)); |
|
49 |
} |
|
50 |
cloudServer = new CloudSolrServer(zkHost); |
|
51 |
cloudServer.setDefaultCollection(collection); |
|
52 |
} |
|
53 |
|
|
54 |
public UpdateResponse add(final SolrInputDocument doc) throws SolrServerException, IOException { |
|
55 |
return updateServerPool.get(hashPick(doc)).add(doc); |
|
56 |
} |
|
57 |
|
|
58 |
public UpdateResponse addAll(final Iterator<SolrInputDocument> docs) throws SolrServerException, IOException { |
|
59 |
if (updateServerPool.size() == 1) return updateServerPool.get(0).add(docs); |
|
60 |
int i = Integer.parseInt(DnetXsltFunctions.randomInt(updateServerPool.size())); |
|
61 |
return updateServerPool.get(i).add(docs); |
|
62 |
} |
|
63 |
|
|
64 |
public UpdateResponse addAll(final Collection<SolrInputDocument> docs) throws SolrServerException, IOException { |
|
65 |
if (updateServerPool.size() == 1) return updateServerPool.get(0).add(docs); |
|
66 |
int i = Integer.parseInt(DnetXsltFunctions.randomInt(updateServerPool.size())); |
|
67 |
return updateServerPool.get(i).add(docs); |
|
68 |
} |
|
69 |
|
|
70 |
public void deleteByQuery(final String query) throws SolrServerException, IOException { |
|
71 |
cloudServer.deleteByQuery(query); |
|
72 |
} |
|
73 |
|
|
74 |
public void commitAll() throws SolrServerException, IOException { |
|
75 |
cloudServer.commit(); |
|
76 |
} |
|
77 |
|
|
78 |
public void shutdownAll() throws SolrServerException { |
|
79 |
cloudServer.shutdown(); |
|
80 |
for (SolrServer server : updateServerPool) { |
|
81 |
server.shutdown(); |
|
82 |
} |
|
83 |
} |
|
84 |
|
|
85 |
// ////////////////// |
|
86 |
|
|
87 |
private int hashPick(final SolrInputDocument doc) { |
|
88 |
final int hashCode = hash.hashBytes(doc.getFieldValue("__indexrecordidentifier").toString().getBytes()).asInt(); |
|
89 |
return Math.abs(hashCode) % updateServerPool.size(); |
|
90 |
} |
|
91 |
|
|
92 |
public List<URL> parseUrlListPattern(final String local, final String list, final boolean localFeeding) { |
|
93 |
final List<URL> res = Lists.newArrayList(); |
|
94 |
try { |
|
95 |
if (localFeeding) { |
|
96 |
res.add(new URL(local)); |
|
97 |
} else { |
|
98 |
Matcher matcher = Pattern.compile("(^.*)\\[(\\d+)\\.\\.(\\d+)\\](.*$)").matcher(list); |
|
99 |
if (matcher.matches()) { |
|
100 |
final String prefix = matcher.group(1); |
|
101 |
int lb = Integer.parseInt(matcher.group(2)); |
|
102 |
int ub = Integer.parseInt(matcher.group(3)); |
|
103 |
final String suffix = matcher.group(4); |
|
104 |
|
|
105 |
for (int i = lb; i <= ub; i++) { |
|
106 |
res.add(new URL(prefix + i + suffix)); |
|
107 |
} |
|
108 |
} |
|
109 |
} |
|
110 |
} catch (MalformedURLException e) { |
|
111 |
throw new IllegalArgumentException("invalid url list: " + list, e); |
|
112 |
} |
|
113 |
|
|
114 |
log.info("parsed url(s): " + res); |
|
115 |
return res; |
|
116 |
} |
|
117 |
|
|
118 |
public List<URL> parseUrlList(final String list) throws MalformedURLException { |
|
119 |
final List<URL> res = Lists.newArrayList(); |
|
120 |
for (final String url : Splitter.on(",").trimResults().split(list)) { |
|
121 |
res.add(URI.create(url).toURL()); |
|
122 |
} |
|
123 |
return res; |
|
124 |
} |
|
125 |
|
|
126 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/feed/StreamingInputDocumentFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.feed; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.util.ArrayList; |
|
6 |
|
|
7 |
import javax.xml.stream.XMLEventFactory; |
|
8 |
import javax.xml.stream.XMLEventReader; |
|
9 |
import javax.xml.stream.XMLEventWriter; |
|
10 |
import javax.xml.stream.XMLInputFactory; |
|
11 |
import javax.xml.stream.XMLOutputFactory; |
|
12 |
import javax.xml.stream.XMLStreamException; |
|
13 |
import javax.xml.stream.events.Namespace; |
|
14 |
import javax.xml.stream.events.StartElement; |
|
15 |
import javax.xml.stream.events.XMLEvent; |
|
16 |
|
|
17 |
import org.apache.solr.common.SolrInputDocument; |
|
18 |
import org.dom4j.DocumentException; |
|
19 |
|
|
20 |
import com.google.common.collect.Lists; |
|
21 |
|
|
22 |
/** |
|
23 |
* Optimized version of the document parser, drop in replacement of InputDocumentFactory. |
|
24 |
* |
|
25 |
* <p> |
|
26 |
* Faster because: |
|
27 |
* </p> |
|
28 |
* <ul> |
|
29 |
* <li>Doesn't create a DOM for the full document</li> |
|
30 |
* <li>Doesn't execute xpaths agains the DOM</li> |
|
31 |
* <li>Quickly serialize the 'result' element directly in a string.</li> |
|
32 |
* <li>Uses less memory: less pressure on GC and allows more threads to process this in parallel</li> |
|
33 |
* </ul> |
|
34 |
* |
|
35 |
* <p> |
|
36 |
* This class is fully reentrant and can be invoked in parallel. |
|
37 |
* </p> |
|
38 |
* |
|
39 |
* @author marko |
|
40 |
* |
|
41 |
*/ |
|
42 |
public class StreamingInputDocumentFactory extends InputDocumentFactory { |
|
43 |
|
|
44 |
protected static final String DNETRESULT = "result"; |
|
45 |
|
|
46 |
protected static final String TARGETFIELDS = "targetFields"; |
|
47 |
|
|
48 |
protected static final String INDEX_RECORD_ID_ELEMENT = "indexRecordIdentifier"; |
|
49 |
|
|
50 |
protected ThreadLocal<XMLInputFactory> inputFactory = new ThreadLocal<XMLInputFactory>() { |
|
51 |
|
|
52 |
@Override |
|
53 |
protected XMLInputFactory initialValue() { |
|
54 |
return XMLInputFactory.newInstance(); |
|
55 |
} |
|
56 |
}; |
|
57 |
|
|
58 |
protected ThreadLocal<XMLOutputFactory> outputFactory = new ThreadLocal<XMLOutputFactory>() { |
|
59 |
|
|
60 |
@Override |
|
61 |
protected XMLOutputFactory initialValue() { |
|
62 |
return XMLOutputFactory.newInstance(); |
|
63 |
} |
|
64 |
}; |
|
65 |
|
|
66 |
protected ThreadLocal<XMLEventFactory> eventFactory = new ThreadLocal<XMLEventFactory>() { |
|
67 |
|
|
68 |
@Override |
|
69 |
protected XMLEventFactory initialValue() { |
|
70 |
return XMLEventFactory.newInstance(); |
|
71 |
} |
|
72 |
}; |
|
73 |
|
|
74 |
/** |
|
75 |
* {@inheritDoc} |
|
76 |
* |
|
77 |
* @throws XMLStreamException |
|
78 |
* @throws DocumentException |
|
79 |
* |
|
80 |
* @see eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory#parseDocument(eu.dnetlib.functionality.index.solr.feed.IndexDocument, |
|
81 |
* java.lang.String) |
|
82 |
*/ |
|
83 |
@Override |
|
84 |
public SolrInputDocument parseDocument(final String version, final String inputDocument, final String dsId) { |
|
85 |
|
|
86 |
final StringWriter results = new StringWriter(); |
|
87 |
|
|
88 |
try { |
|
89 |
XMLEventReader parser = inputFactory.get().createXMLEventReader(new StringReader(inputDocument)); |
|
90 |
|
|
91 |
final SolrInputDocument indexDocument = new SolrInputDocument(); |
|
92 |
|
|
93 |
while (parser.hasNext()) { |
|
94 |
final XMLEvent event = parser.nextEvent(); |
|
95 |
if ((event != null) && event.isStartElement()) { |
|
96 |
final String localName = event.asStartElement().getName().getLocalPart(); |
|
97 |
|
|
98 |
if (INDEX_RECORD_ID_ELEMENT.equals(localName)) { |
|
99 |
final XMLEvent text = parser.nextEvent(); |
|
100 |
String recordId = getText(text); |
|
101 |
indexDocument.addField(INDEX_RECORD_ID, recordId); |
|
102 |
} else if (TARGETFIELDS.equals(localName)) { |
|
103 |
parseTargetFields(indexDocument, parser); |
|
104 |
} else if (DNETRESULT.equals(localName)) { |
|
105 |
copyResult(indexDocument, results, parser); |
|
106 |
} |
|
107 |
} |
|
108 |
} |
|
109 |
|
|
110 |
if (version != null) { |
|
111 |
indexDocument.addField(DS_VERSION, version); |
|
112 |
} |
|
113 |
|
|
114 |
if (dsId != null) { |
|
115 |
indexDocument.addField(DS_ID, dsId); |
|
116 |
} |
|
117 |
|
|
118 |
if (!indexDocument.containsKey(INDEX_RECORD_ID)) { |
|
119 |
indexDocument.clear(); |
|
120 |
System.err.println("missing indexrecord id:\n" + inputDocument); |
|
121 |
} |
|
122 |
|
|
123 |
return indexDocument; |
|
124 |
} catch (XMLStreamException e) { |
|
125 |
return new SolrInputDocument(); |
|
126 |
} |
|
127 |
} |
|
128 |
|
|
129 |
/** |
|
130 |
* Parse the targetFields block and add fields to the solr document. |
|
131 |
* |
|
132 |
* @param indexDocument |
|
133 |
* @param parser |
|
134 |
* @throws XMLStreamException |
|
135 |
*/ |
|
136 |
protected void parseTargetFields(final SolrInputDocument indexDocument, final XMLEventReader parser) throws XMLStreamException { |
|
137 |
|
|
138 |
boolean hasFields = false; |
|
139 |
|
|
140 |
while (parser.hasNext()) { |
|
141 |
final XMLEvent targetEvent = parser.nextEvent(); |
|
142 |
if (targetEvent.isEndElement() && targetEvent.asEndElement().getName().getLocalPart().equals(TARGETFIELDS)) { |
|
143 |
break; |
|
144 |
} |
|
145 |
|
|
146 |
if (targetEvent.isStartElement()) { |
|
147 |
final String fieldName = targetEvent.asStartElement().getName().getLocalPart(); |
|
148 |
final XMLEvent text = parser.nextEvent(); |
|
149 |
|
|
150 |
String data = getText(text); |
|
151 |
|
|
152 |
addField(indexDocument, fieldName, data); |
|
153 |
hasFields = true; |
|
154 |
} |
|
155 |
} |
|
156 |
|
|
157 |
if (!hasFields) { |
|
158 |
indexDocument.clear(); |
|
159 |
} |
|
160 |
} |
|
161 |
|
|
162 |
/** |
|
163 |
* Copy the /indexRecord/result element and children, preserving namespace declarations etc. |
|
164 |
* |
|
165 |
* @param indexDocument |
|
166 |
* @param results |
|
167 |
* @param parser |
|
168 |
* @throws XMLStreamException |
|
169 |
*/ |
|
170 |
protected void copyResult(final SolrInputDocument indexDocument, final StringWriter results, final XMLEventReader parser) throws XMLStreamException { |
|
171 |
final XMLEventWriter writer = outputFactory.get().createXMLEventWriter(results); |
|
172 |
|
|
173 |
// TODO: newRecord should copy all the namespace prefixes setup in parents |
|
174 |
// fortunately the only parent of the result element is the 'indexrecord', so it should be easy to get |
|
175 |
// the namespaces declared on the root element (and fast) |
|
176 |
|
|
177 |
final ArrayList<Namespace> namespaces = Lists.newArrayList(eventFactory.get().createNamespace("dri", "http://www.driver-repository.eu/namespace/dri"), |
|
178 |
eventFactory.get().createNamespace("dr", "http://www.driver-repository.eu/namespace/dr"), |
|
179 |
eventFactory.get().createNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"), |
|
180 |
eventFactory.get().createNamespace("dc", "http://purl.org/dc/elements/1.1/"), |
|
181 |
eventFactory.get().createNamespace("oaf", "http://namespace.openaire.eu/oaf")); |
|
182 |
|
|
183 |
StartElement newRecord = eventFactory.get().createStartElement("", null, RESULT, null, namespaces.iterator()); |
|
184 |
|
|
185 |
// new root record |
|
186 |
writer.add(newRecord); |
|
187 |
|
|
188 |
// copy the rest as it is |
|
189 |
while (parser.hasNext()) { |
|
190 |
final XMLEvent resultEvent = parser.nextEvent(); |
|
191 |
|
|
192 |
// TODO: replace with depth tracking instead of close tag tracking. |
|
193 |
if (resultEvent.isEndElement() && resultEvent.asEndElement().getName().getLocalPart().equals(DNETRESULT)) { |
|
194 |
writer.add(eventFactory.get().createEndElement("", null, RESULT)); |
|
195 |
break; |
|
196 |
} |
|
197 |
|
|
198 |
writer.add(resultEvent); |
|
199 |
} |
|
200 |
writer.close(); |
|
201 |
|
|
202 |
indexDocument.addField(INDEX_RESULT, results.toString()); |
|
203 |
} |
|
204 |
|
|
205 |
/** |
|
206 |
* Helper used to add a field to a solr doc. It avoids to add empy fields |
|
207 |
* |
|
208 |
* @param indexDocument |
|
209 |
* @param field |
|
210 |
* @param value |
|
211 |
*/ |
|
212 |
private final void addField(final SolrInputDocument indexDocument, final String field, final String value) { |
|
213 |
String cleaned = value.trim(); |
|
214 |
if (!cleaned.isEmpty()) { |
|
215 |
// log.info("\n\n adding field " + field.toLowerCase() + " value: " + cleaned + "\n"); |
|
216 |
indexDocument.addField(field.toLowerCase(), cleaned); |
|
217 |
} |
|
218 |
} |
|
219 |
|
|
220 |
/** |
|
221 |
* Helper used to get the string from a text element. |
|
222 |
* |
|
223 |
* @param text |
|
224 |
* @return |
|
225 |
*/ |
|
226 |
protected final String getText(final XMLEvent text) { |
|
227 |
if (text.isEndElement()) // log.warn("skipping because isEndOfElement " + text.asEndElement().getName().getLocalPart()); |
|
228 |
return ""; |
|
229 |
|
|
230 |
return text.asCharacters().getData(); |
|
231 |
} |
|
232 |
|
|
233 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/feed/InputDocumentFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.feed; |
|
2 |
|
|
3 |
import java.text.ParseException; |
|
4 |
import java.text.SimpleDateFormat; |
|
5 |
import java.util.Arrays; |
|
6 |
import java.util.List; |
|
7 |
|
|
8 |
import javax.xml.stream.XMLStreamException; |
|
9 |
|
|
10 |
import org.apache.solr.common.SolrInputDocument; |
|
11 |
|
|
12 |
/** |
|
13 |
* |
|
14 |
* @author claudio |
|
15 |
* |
|
16 |
*/ |
|
17 |
public abstract class InputDocumentFactory { |
|
18 |
|
|
19 |
public static final String INDEX_FIELD_PREFIX = "__"; |
|
20 |
|
|
21 |
public static final String DS_VERSION = INDEX_FIELD_PREFIX + "dsversion"; |
|
22 |
|
|
23 |
public static final String DS_ID = INDEX_FIELD_PREFIX + "dsid"; |
|
24 |
|
|
25 |
public static final String RESULT = "result"; |
|
26 |
|
|
27 |
public static final String INDEX_RESULT = INDEX_FIELD_PREFIX + RESULT; |
|
28 |
|
|
29 |
public static final String INDEX_RECORD_ID = INDEX_FIELD_PREFIX + "indexrecordidentifier"; |
|
30 |
|
|
31 |
private static final String outFormat = new String("yyyy-MM-dd'T'hh:mm:ss'Z'"); |
|
32 |
|
|
33 |
private final static List<String> dateFormats = Arrays.asList("yyyy-MM-dd'T'hh:mm:ss", "yyyy-MM-dd", "dd-MM-yyyy", "dd/MM/yyyy", "yyyy"); |
|
34 |
|
|
35 |
public abstract SolrInputDocument parseDocument(final String version, String inputDocument, String dsId) throws XMLStreamException; |
|
36 |
|
|
37 |
/** |
|
38 |
* method return a solr-compatible string representation of a date |
|
39 |
* |
|
40 |
* @param date |
|
41 |
* @return |
|
42 |
* @throws DocumentException |
|
43 |
* @throws ParseException |
|
44 |
*/ |
|
45 |
public static String getParsedDateField(final String date) { |
|
46 |
for (String formatString : dateFormats) { |
|
47 |
try { |
|
48 |
return new SimpleDateFormat(outFormat).format(new SimpleDateFormat(formatString).parse(date)); |
|
49 |
} catch (ParseException e) { |
|
50 |
} |
|
51 |
} |
|
52 |
throw new IllegalStateException("unable to parse date: " + date); |
|
53 |
} |
|
54 |
|
|
55 |
public String parseDate(final String date) { |
|
56 |
return getParsedDateField(date); |
|
57 |
} |
|
58 |
|
|
59 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/solr/feed/SolrDocumentMapperFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.solr.feed; |
|
2 |
|
|
3 |
import java.util.Map; |
|
4 |
|
|
5 |
import javax.xml.stream.XMLStreamException; |
|
6 |
|
|
7 |
import org.springframework.beans.factory.annotation.Required; |
|
8 |
|
|
9 |
import com.google.common.base.Function; |
|
10 |
|
|
11 |
import eu.dnetlib.functionality.index.feed.DocumentMapperFactory; |
|
12 |
import eu.dnetlib.functionality.index.model.Any.ValueType; |
|
13 |
import eu.dnetlib.functionality.index.model.document.IndexDocument; |
|
14 |
import eu.dnetlib.functionality.index.model.document.Status; |
|
15 |
import eu.dnetlib.functionality.index.model.util.SolrIndexDocument; |
|
16 |
import eu.dnetlib.functionality.index.utils.MetadataReference; |
|
17 |
|
|
18 |
/** |
|
19 |
* A factory for creating SolrDocumentMapper objects. |
|
20 |
*/ |
|
21 |
public class SolrDocumentMapperFactory implements DocumentMapperFactory { |
|
22 |
|
|
23 |
/** |
|
24 |
* document factory used for the feed process. |
|
25 |
*/ |
|
26 |
private InputDocumentFactory documentFactory; |
|
27 |
|
|
28 |
/** |
|
29 |
* {@inheritDoc} |
|
30 |
* |
|
31 |
* @see eu.dnetlib.functionality.index.feed.DocumentMapperFactory#getRecordMapper(java.util.Map, |
|
32 |
* eu.dnetlib.functionality.index.utils.MetadataReference, java.lang.String, java.lang.String) |
|
33 |
*/ |
|
34 |
@Override |
|
35 |
public Function<String, IndexDocument> getRecordMapper(final Map<String, ValueType> schema, |
|
36 |
final MetadataReference mdRef, |
|
37 |
final String dsId, |
|
38 |
final String version) { |
|
39 |
|
|
40 |
return new Function<String, IndexDocument>() { |
|
41 |
|
|
42 |
@Override |
|
43 |
public IndexDocument apply(final String doc) { |
|
44 |
SolrIndexDocument indexDocument = new SolrIndexDocument(schema, dsId); |
|
45 |
try { |
|
46 |
indexDocument.setContent(documentFactory.parseDocument(version, doc, dsId)); |
|
47 |
} catch (XMLStreamException e) { |
|
48 |
return indexDocument.setMarked(); |
|
49 |
} |
|
50 |
indexDocument.setStatus(Status.OK); |
|
51 |
return indexDocument; |
|
52 |
} |
|
53 |
}; |
|
54 |
} |
|
55 |
|
|
56 |
/** |
|
57 |
* Gets the document factory. |
|
58 |
* |
|
59 |
* @return the documentFactory |
|
60 |
*/ |
|
61 |
public InputDocumentFactory getDocumentFactory() { |
|
62 |
return documentFactory; |
|
63 |
} |
|
64 |
|
|
65 |
/** |
|
66 |
* Sets the document factory. |
|
67 |
* |
|
68 |
* @param documentFactory |
|
69 |
* the documentFactory to set |
|
70 |
*/ |
|
71 |
@Required |
|
72 |
public void setDocumentFactory(final InputDocumentFactory documentFactory) { |
|
73 |
this.documentFactory = documentFactory; |
|
74 |
} |
|
75 |
|
|
76 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/model/util/AnySolrUtil.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.model.util; |
|
2 |
|
|
3 |
import java.util.Iterator; |
|
4 |
import java.util.Map.Entry; |
|
5 |
|
|
6 |
import org.apache.solr.common.SolrDocumentList; |
|
7 |
import org.apache.solr.common.util.NamedList; |
|
8 |
|
|
9 |
import eu.dnetlib.functionality.index.model.AnyMap; |
|
10 |
import eu.dnetlib.functionality.index.model.DataFactory; |
|
11 |
import eu.dnetlib.functionality.index.model.InvalidValueTypeException; |
|
12 |
import eu.dnetlib.functionality.index.model.Value; |
|
13 |
import eu.dnetlib.functionality.index.model.impl.DefaultDataFactoryImpl; |
|
14 |
|
|
15 |
/** |
|
16 |
* The Class AnySolrUtil. |
|
17 |
*/ |
|
18 |
public class AnySolrUtil extends AnyUtil { |
|
19 |
|
|
20 |
/** |
|
21 |
* Convert named list to any map. |
|
22 |
* |
|
23 |
* @param list |
|
24 |
* the list |
|
25 |
* @param map |
|
26 |
* the map |
|
27 |
* @return the any map |
|
28 |
*/ |
|
29 |
@SuppressWarnings("unchecked") |
|
30 |
public static AnyMap convertNamedListToAnyMap(final NamedList<Object> list, final AnyMap map) { |
|
31 |
final Iterator<Entry<String, Object>> it = list.iterator(); |
|
32 |
while (it.hasNext()) { |
|
33 |
Entry<String, Object> entry = it.next(); |
|
34 |
final String key = entry.getKey(); |
|
35 |
final Object obj = entry.getValue(); |
|
36 |
if (obj instanceof NamedList<?>) { |
|
37 |
final AnyMap subMap = map.getMap(key, true); |
|
38 |
convertNamedListToAnyMap((NamedList<Object>) obj, subMap); |
|
39 |
} else if (obj instanceof SolrDocumentList) { |
|
40 |
SolrDocumentList docList = (SolrDocumentList) obj; |
|
41 |
AnyMap response = DataFactory.DEFAULT.createAnyMap(); |
|
42 |
response.put("numFound", docList.getNumFound()); |
|
43 |
response.put("start", docList.getStart()); |
|
44 |
response.put("maxScore", docList.getMaxScore()); |
|
45 |
response.put("docs", objectToAny(obj)); |
|
46 |
map.put("response", response); |
|
47 |
} else { |
|
48 |
try { |
|
49 |
final Value value = DataFactory.DEFAULT.autoConvertValue(obj); |
|
50 |
map.put(key, value); |
|
51 |
} catch (InvalidValueTypeException exception) { |
|
52 |
; // skip |
|
53 |
} |
|
54 |
} |
|
55 |
} |
|
56 |
return map; |
|
57 |
} |
|
58 |
|
|
59 |
/** |
|
60 |
* Convert named list to any map. |
|
61 |
* |
|
62 |
* @param list |
|
63 |
* the list |
|
64 |
* @return the any map |
|
65 |
*/ |
|
66 |
public static AnyMap convertNamedListToAnyMap(final NamedList<Object> list) { |
|
67 |
return convertNamedListToAnyMap(list, DefaultDataFactoryImpl.INSTANCE.createAnyMap()); |
|
68 |
} |
|
69 |
|
|
70 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/model/util/SolrIndexDocument.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.model.util; |
|
2 |
|
|
3 |
import java.util.HashMap; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import org.apache.solr.common.SolrInputDocument; |
|
7 |
import org.apache.solr.common.SolrInputField; |
|
8 |
|
|
9 |
import eu.dnetlib.functionality.index.model.Any.ValueType; |
|
10 |
import eu.dnetlib.functionality.index.model.document.AbstractIndexDocument; |
|
11 |
|
|
12 |
// TODO: Auto-generated Javadoc |
|
13 |
/** |
|
14 |
* The Class SolrIndexDocument an implementation of the index document for SOLR. |
|
15 |
*/ |
|
16 |
public class SolrIndexDocument extends AbstractIndexDocument { |
|
17 |
|
|
18 |
/** |
|
19 |
* Instantiates a new solr index document. |
|
20 |
* |
|
21 |
* @param schema |
|
22 |
* the schema |
|
23 |
* @param dsId |
|
24 |
* the ds id |
|
25 |
*/ |
|
26 |
public SolrIndexDocument(final Map<String, ValueType> schema, final String dsId) { |
|
27 |
super(schema, dsId); |
|
28 |
} |
|
29 |
|
|
30 |
/** |
|
31 |
* Instantiates a new solr index document. |
|
32 |
* |
|
33 |
* @param schema |
|
34 |
* the schema |
|
35 |
* @param dsId |
|
36 |
* the ds id |
|
37 |
* @param solrDocument |
|
38 |
* the solr document |
|
39 |
*/ |
|
40 |
public SolrIndexDocument(final Map<String, ValueType> schema, final String dsId, final SolrInputDocument solrDocument) { |
|
41 |
super(schema, dsId); |
|
42 |
addFields(solrDocument); |
|
43 |
} |
|
44 |
|
|
45 |
/** |
|
46 |
* Adds the fields. |
|
47 |
* |
|
48 |
* @param solrDocument |
|
49 |
* the solr document |
|
50 |
*/ |
|
51 |
private void addFields(final SolrInputDocument solrDocument) { |
|
52 |
for (String name : solrDocument.getFieldNames()) { |
|
53 |
Object fieldValue = solrDocument.getFieldValue(name); |
|
54 |
addField(name, fieldValue); |
|
55 |
} |
|
56 |
} |
|
57 |
|
|
58 |
/** |
|
59 |
* Sets the content. |
|
60 |
* |
|
61 |
* @param solrDocument |
|
62 |
* the new content |
|
63 |
*/ |
|
64 |
public void setContent(final SolrInputDocument solrDocument) { |
|
65 |
addFields(solrDocument); |
|
66 |
} |
|
67 |
|
|
68 |
/** |
|
69 |
* Gets the solr document. |
|
70 |
* |
|
71 |
* @return the solr document |
|
72 |
*/ |
|
73 |
public SolrInputDocument getSolrDocument() { |
|
74 |
|
|
75 |
Map<String, SolrInputField> data = new HashMap<String, SolrInputField>(); |
|
76 |
for (String key : fields.keySet()) { |
|
77 |
SolrInputField solrField = new SolrInputField(key); |
|
78 |
for (Object o : fields.get(key)) { |
|
79 |
solrField.addValue(o, 1.0f); |
|
80 |
} |
|
81 |
data.put(key, solrField); |
|
82 |
} |
|
83 |
return new SolrInputDocument(data); |
|
84 |
} |
|
85 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/query/SolrIndexQueryResponseFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.query; |
|
2 |
|
|
3 |
import org.apache.solr.client.solrj.response.QueryResponse; |
|
4 |
|
|
5 |
import eu.dnetlib.functionality.index.client.IndexClientException; |
|
6 |
import eu.dnetlib.functionality.index.utils.MetadataReference; |
|
7 |
|
|
8 |
/** |
|
9 |
* The Class SolrIndexQueryResponseFactory. |
|
10 |
*/ |
|
11 |
public class SolrIndexQueryResponseFactory extends QueryResponseFactory<QueryResponse> { |
|
12 |
|
|
13 |
/** |
|
14 |
* {@inheritDoc} |
|
15 |
* |
|
16 |
* @throws IndexClientException |
|
17 |
* |
|
18 |
* @see eu.dnetlib.functionality.index.query.QueryResponseFactory#getQueryResponseParser(eu.dnetlib.functionality.index.query.IndexQueryResponse, |
|
19 |
* eu.dnetlib.functionality.index.utils.MetadataReference) |
|
20 |
*/ |
|
21 |
@Override |
|
22 |
public QueryResponseParser getQueryResponseParser(final IndexQueryResponse<QueryResponse> queryRsp, final MetadataReference mdRef) |
|
23 |
throws IndexClientException { |
|
24 |
|
|
25 |
QueryResponse response = queryRsp.getContextualQueryResponse(); |
|
26 |
return new SolrResponseParser(highlightUtils, browseAliases.get(mdRef), returnEmptyFields, includeRanking, response); |
|
27 |
} |
|
28 |
|
|
29 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/query/SolrIndexQueryFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.query; |
|
2 |
|
|
3 |
import java.util.Arrays; |
|
4 |
|
|
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 |
import org.apache.solr.common.params.FacetParams; |
|
8 |
|
|
9 |
import com.google.common.collect.BiMap; |
|
10 |
|
|
11 |
import eu.dnetlib.functionality.index.client.AbstractIndexClient; |
|
12 |
import eu.dnetlib.functionality.index.client.IndexClientException; |
|
13 |
import eu.dnetlib.functionality.index.cql.TranslatedQuery; |
|
14 |
import eu.dnetlib.functionality.index.query.Pruner.Result; |
|
15 |
import eu.dnetlib.functionality.index.solr.utils.HighlightUtils; |
|
16 |
import eu.dnetlib.functionality.index.utils.IndexFieldUtility; |
|
17 |
import eu.dnetlib.functionality.index.utils.MetadataReference; |
|
18 |
|
|
19 |
/** |
|
20 |
* A factory for creating SolrIndexQuery objects. |
|
21 |
*/ |
|
22 |
public class SolrIndexQueryFactory extends IndexQueryFactory { |
|
23 |
|
|
24 |
/** |
|
25 |
* logger. |
|
26 |
*/ |
|
27 |
private static final Log log = LogFactory.getLog(SolrIndexQueryFactory.class); |
|
28 |
|
|
29 |
/** The Property name SERVICE_HIGHLIGHT_ENABLE. */ |
|
30 |
private static final String SERVICE_HIGHLIGHT_ENABLE = "service.index.solr.highlight.enable"; |
|
31 |
private static String GROUPBY = "&groupby="; |
|
32 |
private static String AND = " and "; |
|
33 |
private static String CLAUSE_PREFIX = "solr." + FacetParams.FACET_FIELD + "="; |
|
34 |
private static String BROWSE_PREFIX = "(>solr=SOLR solr." + FacetParams.FACET + "=true" + AND; |
|
35 |
private static String GROUP_BY_DELIMITER = ","; |
|
36 |
|
|
37 |
/* |
|
38 |
* (non-Javadoc) |
|
39 |
* |
|
40 |
* @see eu.dnetlib.functionality.index.query.IndexQueryFactory#newInstance(eu.dnetlib.functionality.index.cql.TranslatedQuery, |
|
41 |
* eu.dnetlib.functionality.index.query.Pruner.Result, eu.dnetlib.functionality.index.query.QueryLanguage) |
|
42 |
*/ |
|
43 |
@Override |
|
44 |
protected IndexQuery newInstance(final TranslatedQuery cql, final Result res, final QueryLanguage lang) { |
|
45 |
|
|
46 |
switch (lang) { |
|
47 |
case CQL: |
|
48 |
return new SolrIndexQuery(cql, res.getOptionMap()); |
|
49 |
case SOLR: |
|
50 |
return new SolrIndexQuery(res.getNode().toCQL(), res.getOptionMap()); |
|
51 |
default: |
|
52 |
throw new IllegalArgumentException("invalid query language: " + lang); |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
/* |
|
57 |
* (non-Javadoc) |
|
58 |
* |
|
59 |
* @see eu.dnetlib.functionality.index.query.IndexQueryFactory#setQueryOptions(eu.dnetlib.functionality.index.query.IndexQuery, |
|
60 |
* eu.dnetlib.functionality.index.IndexServerDAO) |
|
61 |
*/ |
|
62 |
@Override |
|
63 |
protected IndexQuery setQueryOptions(final IndexQuery indexQuery, final AbstractIndexClient client) { |
|
64 |
|
|
65 |
final SolrIndexQuery solrQuery = (SolrIndexQuery) indexQuery; |
|
66 |
|
|
67 |
boolean isHighlightEnabled = Boolean.parseBoolean(client.getServiceProperties().get(SERVICE_HIGHLIGHT_ENABLE)); |
|
68 |
if (solrQuery.getHighlight() & isHighlightEnabled) { |
|
69 |
solrQuery.setHighlightFragsize(0).setHighlightSnippets(1).setHighlightSimplePre(HighlightUtils.DEFAULT_HL_PRE) |
|
70 |
.setHighlightSimplePost(HighlightUtils.DEFAULT_HL_POST).addHighlightField(IndexFieldUtility.RESULT) |
|
71 |
.addField(IndexFieldUtility.INDEX_RECORD_ID); |
|
72 |
} |
|
73 |
|
|
74 |
solrQuery.addField(IndexFieldUtility.RESULT); |
|
75 |
if (solrQuery.getFacetFields() != null) { |
|
76 |
log.debug("getFacetFields() " + Arrays.asList(solrQuery.getFacetFields())); |
|
77 |
solrQuery.setFacetMinCount(1); |
|
78 |
} |
|
79 |
|
|
80 |
return solrQuery; |
|
81 |
} |
|
82 |
|
|
83 |
@Override |
|
84 |
public String getBrowsingFields(String query, final MetadataReference mdRef) throws IndexClientException { |
|
85 |
if (query.contains(GROUPBY)) { |
|
86 |
|
|
87 |
final BiMap<String, String> aliases = browseAliases.get(mdRef); |
|
88 |
|
|
89 |
final String cql = query.split(GROUPBY)[0]; |
|
90 |
String clauses = query.split(GROUPBY)[1].replaceAll(GROUP_BY_DELIMITER, AND); |
|
91 |
String newclauses = ""; |
|
92 |
|
|
93 |
for (String clause : clauses.split(AND)) { |
|
94 |
final String cleanClause = clause.trim().toLowerCase(); |
|
95 |
|
|
96 |
String clauseAlias = cleanClause; |
|
97 |
if ((aliases != null) && !aliases.isEmpty()) { |
|
98 |
clauseAlias = aliases.get(cleanClause) == null ? aliases.inverse().get(cleanClause) : aliases.get(cleanClause); |
|
99 |
} |
|
100 |
|
|
101 |
final String fieldName = clauseAlias != null ? clauseAlias : cleanClause; |
|
102 |
clauses = clauses.replaceAll(clause, CLAUSE_PREFIX + fieldName); |
|
103 |
|
|
104 |
if (!newclauses.isEmpty()) { |
|
105 |
newclauses += AND; |
|
106 |
} |
|
107 |
newclauses += CLAUSE_PREFIX + fieldName; |
|
108 |
} |
|
109 |
query = BROWSE_PREFIX + newclauses + ")" + AND + cql; |
|
110 |
log.debug("low level browse query " + query); |
|
111 |
|
|
112 |
} |
|
113 |
return query; |
|
114 |
} |
|
115 |
|
|
116 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/query/SolrResponseParser.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.query; |
|
2 |
|
|
3 |
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap; |
|
4 |
|
|
5 |
import java.util.Collection; |
|
6 |
import java.util.List; |
|
7 |
import java.util.Map; |
|
8 |
|
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.apache.solr.client.solrj.response.FacetField; |
|
12 |
import org.apache.solr.client.solrj.response.FacetField.Count; |
|
13 |
import org.apache.solr.client.solrj.response.QueryResponse; |
|
14 |
import org.apache.solr.common.SolrDocument; |
|
15 |
import org.apache.solr.common.SolrDocumentList; |
|
16 |
|
|
17 |
import com.google.common.base.Predicate; |
|
18 |
import com.google.common.collect.BiMap; |
|
19 |
import com.google.common.collect.Iterables; |
|
20 |
import com.google.common.collect.Lists; |
|
21 |
|
|
22 |
import eu.dnetlib.data.provision.index.rmi.BrowsingRow; |
|
23 |
import eu.dnetlib.data.provision.index.rmi.GroupResult; |
|
24 |
import eu.dnetlib.functionality.index.utils.IndexFieldUtility; |
|
25 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
|
26 |
|
|
27 |
/** |
|
28 |
* The Class SolrResponseParser. |
|
29 |
*/ |
|
30 |
public class SolrResponseParser extends QueryResponseParser { |
|
31 |
|
|
32 |
/** |
|
33 |
* logger. |
|
34 |
*/ |
|
35 |
private static final Log log = LogFactory.getLog(SolrResponseParser.class); |
|
36 |
|
|
37 |
/** |
|
38 |
* Lower level response. |
|
39 |
*/ |
|
40 |
private QueryResponse queryRsp = null; |
|
41 |
|
|
42 |
/** The wrapper rank. */ |
|
43 |
protected final UnaryFunction<String, SolrDocument> wrapperRank = new UnaryFunction<String, SolrDocument>() { |
|
44 |
|
|
45 |
@Override |
|
46 |
public String evaluate(final SolrDocument doc) { |
|
47 |
return addRanking(getSingleField(doc, IndexFieldUtility.RESULT), getSingleField(doc, IndexFieldUtility.SCORE_FIELD)); |
|
48 |
} |
|
49 |
}; |
|
50 |
|
|
51 |
/** The wrapper no rank. */ |
|
52 |
protected final UnaryFunction<String, SolrDocument> wrapperNoRank = new UnaryFunction<String, SolrDocument>() { |
|
53 |
|
|
54 |
@Override |
|
55 |
public String evaluate(final SolrDocument doc) { |
|
56 |
return wrap(getSingleField(doc, IndexFieldUtility.RESULT)); |
|
57 |
} |
|
58 |
}; |
|
59 |
|
|
60 |
/** |
|
61 |
* The Constructor. |
|
62 |
* |
|
63 |
* @param highlightUtils |
|
64 |
* the highlight utils |
|
65 |
* @param aliases |
|
66 |
* the aliases |
|
67 |
* @param returnEmptyFields |
|
68 |
* the return empty fields |
|
69 |
* @param includeRanking |
|
70 |
* the include ranking |
|
71 |
* @param response |
|
72 |
* the response |
|
73 |
*/ |
|
74 |
public SolrResponseParser(final UnaryFunction<String, String> highlightUtils, final BiMap<String, String> aliases, final boolean returnEmptyFields, |
|
75 |
final boolean includeRanking, final QueryResponse response) { |
|
76 |
super(highlightUtils, aliases, returnEmptyFields, includeRanking); |
|
77 |
this.queryRsp = response; |
|
78 |
} |
|
79 |
|
|
80 |
/** |
|
81 |
* {@inheritDoc} |
|
82 |
* |
|
83 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getNumFound() |
|
84 |
*/ |
|
85 |
@Override |
|
86 |
public long getNumFound() { |
|
87 |
|
|
88 |
return this.queryRsp.getResults().getNumFound(); |
|
89 |
} |
|
90 |
|
|
91 |
/** |
|
92 |
* {@inheritDoc} |
|
93 |
* |
|
94 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getQueryTime() |
|
95 |
*/ |
|
96 |
@Override |
|
97 |
public int getQueryTime() { |
|
98 |
return queryRsp.getQTime(); |
|
99 |
} |
|
100 |
|
|
101 |
/** |
|
102 |
* {@inheritDoc} |
|
103 |
* |
|
104 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getElapsedTime() |
|
105 |
*/ |
|
106 |
@Override |
|
107 |
public long getElapsedTime() { |
|
108 |
return queryRsp.getElapsedTime(); |
|
109 |
} |
|
110 |
|
|
111 |
/** |
|
112 |
* {@inheritDoc} |
|
113 |
* |
|
114 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getStatus() |
|
115 |
*/ |
|
116 |
@Override |
|
117 |
public String getStatus() { |
|
118 |
return String.valueOf(queryRsp.getStatus()); |
|
119 |
} |
|
120 |
|
|
121 |
/** |
|
122 |
* {@inheritDoc} |
|
123 |
* |
|
124 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getCurrentSize() |
|
125 |
*/ |
|
126 |
@Override |
|
127 |
public int getCurrentSize() { |
|
128 |
return queryRsp.getResults().size(); |
|
129 |
} |
|
130 |
|
|
131 |
/** |
|
132 |
* Gets the query response. |
|
133 |
* |
|
134 |
* @return the query response |
|
135 |
*/ |
|
136 |
public QueryResponse getQueryResponse() { |
|
137 |
return queryRsp; |
|
138 |
} |
|
139 |
|
|
140 |
/** |
|
141 |
* {@inheritDoc} |
|
142 |
* |
|
143 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getResults() |
|
144 |
*/ |
|
145 |
@Override |
|
146 |
public List<String> getResults() { |
|
147 |
return asRankedList(queryRsp.getResults()); |
|
148 |
} |
|
149 |
|
|
150 |
/** |
|
151 |
* {@inheritDoc} |
|
152 |
* |
|
153 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getNumberOfBrowsingResults() |
|
154 |
*/ |
|
155 |
@Override |
|
156 |
public Long getNumberOfBrowsingResults() { |
|
157 |
List<FacetField> ffList = queryRsp.getFacetFields(); |
|
158 |
Long maxCount = 0L; |
|
159 |
|
|
160 |
if (ffList != null) { |
|
161 |
for (FacetField ff : ffList) { |
|
162 |
if (ff != null) { |
|
163 |
Long countFacets = countFacets(ff.getValues()); |
|
164 |
if (countFacets > maxCount) { |
|
165 |
maxCount = countFacets; |
|
166 |
} |
|
167 |
} |
|
168 |
} |
|
169 |
} |
|
170 |
return maxCount; |
|
171 |
} |
|
172 |
|
|
173 |
/** |
|
174 |
* {@inheritDoc} |
|
175 |
* |
|
176 |
* @see eu.dnetlib.functionality.index.query.QueryResponseParser#getBrowsingResults() |
|
177 |
*/ |
|
178 |
@Override |
|
179 |
public List<BrowsingRow> getBrowsingResults() { |
|
180 |
List<BrowsingRow> bresList = Lists.newArrayList(); |
|
181 |
List<GroupResult> facets = Lists.newArrayList(); |
|
182 |
|
|
183 |
final List<FacetField> ffList = queryRsp.getFacetFields(); |
|
184 |
|
|
185 |
Long numberOfBrowsingResults = getNumberOfBrowsingResults(); |
|
186 |
for (int i = 0; (ffList != null) && (i < numberOfBrowsingResults); i++) { |
|
187 |
for (FacetField ff : ffList) { |
|
188 |
|
|
189 |
String name = aliases.inverse().get(ff.getName()); |
|
190 |
|
|
191 |
// fix #1456 |
|
192 |
if (name == null) { |
|
193 |
name = ff.getName(); |
|
194 |
} |
|
195 |
|
|
196 |
final Count facet = getFacet(ff, i); |
|
197 |
|
|
198 |
if ((facet != null) && (facet.getCount() > 0)) { |
|
199 |
|
|
200 |
final String value = facet.getName(); |
|
201 |
final int count = (int) facet.getCount(); |
|
202 |
|
|
203 |
if (returnEmptyFields || !value.isEmpty()) { |
|
204 |
facets.add(new GroupResult(name, value, count)); |
|
205 |
} |
|
206 |
} |
|
207 |
} |
|
208 |
|
|
209 |
if (facets.size() > 0) { |
|
210 |
bresList.add(new BrowsingRow(Lists.newArrayList(facets))); |
|
211 |
facets.clear(); |
|
212 |
} |
|
213 |
} |
|
214 |
if (log.isDebugEnabled()) { |
|
215 |
log.debug("BrowsingResult size: " + bresList.size()); |
|
216 |
} |
|
217 |
return bresList; |
|
218 |
} |
|
219 |
|
|
220 |
// /////////////// helpers |
|
221 |
|
|
222 |
/** |
|
223 |
* Gets the facet. |
|
224 |
* |
|
225 |
* @param ff |
|
226 |
* the ff |
|
227 |
* @param pos |
|
228 |
* the pos |
|
229 |
* @return the facet |
|
230 |
*/ |
|
231 |
private Count getFacet(final FacetField ff, final int pos) { |
|
232 |
|
|
233 |
if ((ff.getValues() == null) || (pos >= ff.getValues().size())) return null; |
|
234 |
return ff.getValues().get(pos); |
|
235 |
} |
|
236 |
|
|
237 |
/** |
|
238 |
* Given SolrDocumentList, return a List of Strings, representing it. |
|
239 |
* |
|
240 |
* @param documentList |
|
241 |
* the document list |
|
242 |
* @return the list< string> |
|
243 |
*/ |
|
244 |
private List<String> asRankedList(final SolrDocumentList documentList) { |
|
245 |
|
|
246 |
UnaryFunction<String, SolrDocument> wrapper = includeRanking ? wrapperRank : wrapperNoRank; |
|
247 |
|
|
248 |
if (queryRsp.getHighlighting() != null) return listMap(listMap(documentList, new UnaryFunction<String, SolrDocument>() { |
|
249 |
|
|
250 |
@Override |
|
251 |
public String evaluate(final SolrDocument doc) { |
|
252 |
|
|
253 |
String score = getSingleField(doc, IndexFieldUtility.SCORE_FIELD); |
|
254 |
|
|
255 |
String hl = getHighlighting(getSingleField(doc, IndexFieldUtility.INDEX_RECORD_ID)); |
|
256 |
String res = hl != null ? hl : getSingleField(doc, IndexFieldUtility.RESULT); |
|
257 |
|
|
258 |
return includeRanking ? addRanking(res, score) : wrap(res); |
|
259 |
} |
|
260 |
}), highlightUtils); |
|
261 |
|
|
262 |
return listMap(documentList, wrapper); |
|
263 |
} |
|
264 |
|
|
265 |
/** |
|
266 |
* Converts a String document to |
|
267 |
* |
|
268 |
* <record rank="score"> [document] </record>. |
|
269 |
* |
|
270 |
* @param doc |
|
271 |
* the doc |
|
272 |
* @param score |
|
273 |
* the score |
|
274 |
* @return the string |
|
275 |
*/ |
|
276 |
private String addRanking(final String doc, final String score) { |
|
277 |
return new String("<record rank=\"" + score + "\">" + doc + "</record>"); |
|
278 |
} |
|
279 |
|
|
280 |
/** |
|
281 |
* Wraps the given document as <record> [document] </record>. |
|
282 |
* |
|
283 |
* @param doc |
|
284 |
* the doc |
|
285 |
* @return the string |
|
286 |
*/ |
|
287 |
private String wrap(final String doc) { |
|
288 |
return new String("<record>" + doc + "</record>"); |
|
289 |
} |
|
290 |
|
|
291 |
/** |
|
292 |
* Gets the single field. |
|
293 |
* |
|
294 |
* @param doc |
|
295 |
* the doc |
|
296 |
* @param fieldName |
|
297 |
* the field name |
|
298 |
* @return the single field |
|
299 |
*/ |
|
300 |
@SuppressWarnings("unchecked") |
|
301 |
protected String getSingleField(final SolrDocument doc, final String fieldName) { |
|
302 |
Object value = doc.getFieldValue(fieldName); |
|
303 |
if (value instanceof Collection) return Iterables.getOnlyElement((Iterable<String>) value); |
|
304 |
return String.valueOf(value); |
|
305 |
} |
|
306 |
|
|
307 |
/** |
|
308 |
* Gets the highlighting. |
|
309 |
* |
|
310 |
* @param docId |
|
311 |
* the doc id |
|
312 |
* @return the highlighting |
|
313 |
*/ |
|
314 |
private String getHighlighting(final String docId) { |
|
315 |
final Map<String, List<String>> highlight = queryRsp.getHighlighting().get(docId); |
|
316 |
|
|
317 |
String result = new String(); |
|
318 |
if ((highlight != null) && (highlight.get(IndexFieldUtility.RESULT) != null)) { |
|
319 |
for (String s : highlight.get(IndexFieldUtility.RESULT)) { |
|
320 |
result = result.concat(s); |
|
321 |
} |
|
322 |
return result; |
|
323 |
} |
|
324 |
return null; |
|
325 |
} |
|
326 |
|
|
327 |
/** |
|
328 |
* helper method. |
|
329 |
* |
|
330 |
* @param facets |
|
331 |
* the list of facets to analyze |
|
332 |
* @return the number of non-empty facets in the list whose count is greater than zero |
|
333 |
*/ |
|
334 |
private Long countFacets(final List<Count> facets) { |
|
335 |
|
|
336 |
if (facets == null) return 0L; |
|
337 |
|
|
338 |
return (long) Iterables.size(Iterables.filter(facets, new Predicate<Count>() { |
|
339 |
|
|
340 |
@Override |
|
341 |
public boolean apply(final Count c) { |
|
342 |
return (c != null) && (c.getName() != null) && !c.getName().isEmpty() && (c.getCount() > 0); |
|
343 |
} |
|
344 |
})); |
|
345 |
} |
|
346 |
|
|
347 |
@Override |
|
348 |
public long getStart() { |
|
349 |
// TODO Auto-generated method stub |
|
350 |
return queryRsp.getResults().getStart(); |
|
351 |
} |
|
352 |
|
|
353 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/query/SolrIndexQueryResponse.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.query; |
|
2 |
|
|
3 |
import org.apache.solr.client.solrj.response.QueryResponse; |
|
4 |
|
|
5 |
/** |
|
6 |
* The Class SolrIndexQueryResponse. |
|
7 |
*/ |
|
8 |
public class SolrIndexQueryResponse implements IndexQueryResponse<QueryResponse> { |
|
9 |
|
|
10 |
private QueryResponse solrQueryResponse; |
|
11 |
|
|
12 |
public SolrIndexQueryResponse(final QueryResponse solrQueryResponse) { |
|
13 |
this.solrQueryResponse = solrQueryResponse; |
|
14 |
} |
|
15 |
|
|
16 |
@Override |
|
17 |
public QueryResponse getContextualQueryResponse() { |
|
18 |
return solrQueryResponse; |
|
19 |
} |
|
20 |
|
|
21 |
} |
modules/dnet-index-solr-client/trunk/src/main/java/eu/dnetlib/functionality/index/query/SolrIndexQuery.java | ||
---|---|---|
1 |
package eu.dnetlib.functionality.index.query; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import org.apache.commons.logging.Log; |
|
7 |
import org.apache.commons.logging.LogFactory; |
|
8 |
import org.apache.solr.client.solrj.SolrQuery; |
|
9 |
import org.apache.solr.common.params.ModifiableSolrParams; |
|
10 |
import org.apache.solr.common.params.SolrParams; |
|
11 |
|
|
12 |
import eu.dnetlib.functionality.index.cql.QueryOptions; |
|
13 |
import eu.dnetlib.functionality.index.cql.TranslatedQuery; |
|
14 |
import eu.dnetlib.functionality.index.utils.IndexFieldUtility; |
|
15 |
|
|
16 |
/** |
|
17 |
* The Class SolrIndexQuery. |
|
18 |
* |
|
19 |
* @author claudio, sandro |
|
20 |
*/ |
|
21 |
public class SolrIndexQuery extends SolrQuery implements IndexQuery { |
|
22 |
|
|
23 |
/** The Constant serialVersionUID. */ |
|
24 |
private static final long serialVersionUID = 1L; |
|
25 |
|
|
26 |
/** |
|
27 |
* logger. |
|
28 |
*/ |
|
29 |
private static final Log log = LogFactory.getLog(SolrIndexQuery.class); |
|
30 |
|
|
31 |
/** |
|
32 |
* Instantiates a new solr index query. |
|
33 |
* |
|
34 |
* @param query |
|
35 |
* the query |
|
36 |
* @param options |
|
37 |
* the options |
|
38 |
*/ |
|
39 |
public SolrIndexQuery(final TranslatedQuery query, final Map<String, List<String>> options) { |
|
40 |
this(query.asLucene(), options); |
|
41 |
|
|
42 |
setCqlParams(query.getOptions()); |
|
43 |
|
|
44 |
log.debug("internal solr query: " + this.toString()); |
|
45 |
} |
|
46 |
|
|
47 |
/** |
|
48 |
* Instantiates a new solr index query. |
|
49 |
* |
|
50 |
* @param query |
|
51 |
* the query |
|
52 |
* @param options |
|
53 |
* the options |
|
54 |
*/ |
|
55 |
public SolrIndexQuery(final String query, final Map<String, List<String>> options) { |
|
56 |
this(query); |
|
57 |
|
|
58 |
// TODO verify that the input options belongs to solr |
|
59 |
super.add(getQueryParams(options)); |
|
60 |
} |
|
61 |
|
|
62 |
/** |
|
63 |
* Instantiates a new solr index query. |
|
64 |
* |
|
65 |
* @param query |
|
66 |
* the query |
|
67 |
*/ |
|
68 |
public SolrIndexQuery(final String query) { |
|
69 |
super(query); |
|
70 |
} |
|
71 |
|
|
72 |
@Override |
|
73 |
public IndexQuery setQueryOffset(final int offset) { |
|
74 |
super.setStart(offset); |
|
75 |
return this; |
|
76 |
} |
|
77 |
|
|
78 |
@Override |
|
79 |
public IndexQuery setQueryLimit(final int limit) { |
|
80 |
super.setRows(limit); |
|
81 |
return this; |
|
82 |
} |
|
83 |
|
Also available in: Unified diff
Implemented index client