Revision 31954
Added by Alessia Bardi over 9 years ago
modules/cnr-cql-utils/trunk/src/test/java/eu/dnetlib/functionality/index/cql/CqlTranslatorImplTest.java | ||
---|---|---|
63 | 63 |
@Test |
64 | 64 |
public void testToSolr_2() throws Exception { |
65 | 65 |
String query = "(_all=faust AND _all=pippo) AND _all<>cinegiornale"; |
66 |
Node node = |
|
67 |
new AndNode(new AndNode(new TermNode("_all", Relation.EQUAL, "faust"), new TermNode("_all", Relation.EQUAL, "pippo")), new TermNode("_all", |
|
68 |
Relation.NOT, "cinegiornale")); |
|
66 |
Node node = new AndNode(new AndNode(new TermNode("_all", Relation.EQUAL, "faust"), new TermNode("_all", Relation.EQUAL, "pippo")), new TermNode("_all", |
|
67 |
Relation.NOT, "cinegiornale")); |
|
69 | 68 |
|
70 | 69 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
71 | 70 |
|
... | ... | |
204 | 203 |
BiMap<String, String> aliases = HashBiMap.create(); |
205 | 204 |
|
206 | 205 |
cqlOptions.put("wildcard", Lists.newArrayList("true")); |
207 |
TranslatedQuery luceneQuery = |
|
208 |
translator.getTranslatedQuery(parsed, new IdentityCqlValueTransformerMap(), cqlOptions, aliases, new HashMap<String, String>());
|
|
206 |
TranslatedQuery luceneQuery = translator.getTranslatedQuery(parsed, new IdentityCqlValueTransformerMap(), cqlOptions, aliases,
|
|
207 |
new HashMap<String, String>()); |
|
209 | 208 |
printQuery(cqlQuery, luceneQuery.asLucene()); |
210 | 209 |
|
211 | 210 |
cqlOptions = Maps.newHashMap(); |
... | ... | |
348 | 347 |
|
349 | 348 |
String query = "(title = ESTUDIO and title = abierto) not (title = mediante)"; |
350 | 349 |
|
351 |
Node node = |
|
352 |
new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new TermNode( |
|
353 |
"title", Relation.EQUAL, "mediante")); |
|
350 |
Node node = new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new TermNode( |
|
351 |
"title", Relation.EQUAL, "mediante")); |
|
354 | 352 |
|
355 | 353 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
356 | 354 |
|
... | ... | |
363 | 361 |
|
364 | 362 |
String query = "(title = ESTUDIO and title = abierto) not (title = mediante or title = verde)"; |
365 | 363 |
|
366 |
Node node = |
|
367 |
new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new OrNode( |
|
368 |
new TermNode("title", Relation.EQUAL, "mediante"), new TermNode("title", Relation.EQUAL, "verde"))); |
|
364 |
Node node = new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new OrNode( |
|
365 |
new TermNode("title", Relation.EQUAL, "mediante"), new TermNode("title", Relation.EQUAL, "verde"))); |
|
369 | 366 |
|
370 | 367 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
371 | 368 |
|
... | ... | |
496 | 493 |
public void testToSolr_27() throws Exception { |
497 | 494 |
|
498 | 495 |
String query = "((((publicationdate =/within \"2000-01-01 2010-01-01\" and title = \"ddd\") and y < 2010) or y <= 2010) or y > 2010) or y >= 2010"; |
499 |
Node node = |
|
500 |
new OrNode(new OrNode(new OrNode(new AndNode(new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"), |
|
501 |
new TermNode("title", Relation.EQUAL, "ddd")), new TermNode("y", Relation.LT, "2010")), new TermNode("y", Relation.LTE, "2010")), |
|
502 |
new TermNode("y", Relation.GT, "2010")), new TermNode("y", Relation.GTE, "2010")); |
|
496 |
Node node = new OrNode(new OrNode(new OrNode(new AndNode(new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"), |
|
497 |
new TermNode("title", Relation.EQUAL, "ddd")), new TermNode("y", Relation.LT, "2010")), new TermNode("y", Relation.LTE, "2010")), new TermNode( |
|
498 |
"y", Relation.GT, "2010")), new TermNode("y", Relation.GTE, "2010")); |
|
503 | 499 |
|
504 | 500 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
505 | 501 |
|
... | ... | |
512 | 508 |
|
513 | 509 |
String query = "publicationdate =/within \"2000-01-01 2010-01-01\" and (title = \"ddd\" and (y < 2010 or (y <= 2010 or (y > 2010 or y >= 2010))))"; |
514 | 510 |
|
515 |
Node node = |
|
516 |
new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"), new AndNode( |
|
517 |
new TermNode("title", Relation.EQUAL, "ddd"), new OrNode(new TermNode("y", Relation.LT, "2010"), new OrNode(new TermNode("y", |
|
518 |
Relation.LTE, "2010"), new OrNode(new TermNode("y", Relation.GT, "2010"), new TermNode("y", Relation.GTE, "2010")))))); |
|
511 |
Node node = new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"), new AndNode(new TermNode("title", Relation.EQUAL, |
|
512 |
"ddd"), new OrNode(new TermNode("y", Relation.LT, "2010"), new OrNode(new TermNode("y", Relation.LTE, "2010"), new OrNode(new TermNode("y", |
|
513 |
Relation.GT, "2010"), new TermNode("y", Relation.GTE, "2010")))))); |
|
519 | 514 |
|
520 | 515 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
521 | 516 |
|
... | ... | |
528 | 523 |
|
529 | 524 |
String query = "dateaccepted =/within \"1900-01-01 2000-01-01\" and dateaccepted >= 2010-01-01"; |
530 | 525 |
|
531 |
Node node = |
|
532 |
new AndNode(new TermNode("dateaccepted", Relation.WITHIN, "1900-01-01 2000-01-01"), new TermNode("dateaccepted", Relation.GTE, "2010-01-01"));
|
|
526 |
Node node = new AndNode(new TermNode("dateaccepted", Relation.WITHIN, "1900-01-01 2000-01-01"),
|
|
527 |
new TermNode("dateaccepted", Relation.GTE, "2010-01-01")); |
|
533 | 528 |
|
534 | 529 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
535 | 530 |
|
... | ... | |
550 | 545 |
public void testToSolr_30() throws Exception { |
551 | 546 |
|
552 | 547 |
String query = "a = 1 and b = 2 and c = 3"; |
553 |
Node node = |
|
554 |
new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "1"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c", Relation.EQUAL, "3"));
|
|
548 |
Node node = new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "1"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c", Relation.EQUAL,
|
|
549 |
"3")); |
|
555 | 550 |
|
556 | 551 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
557 | 552 |
|
... | ... | |
563 | 558 |
public void testToSolr_31() throws Exception { |
564 | 559 |
|
565 | 560 |
String query = "a = \"pippo pluto\" and b = 2 and c = 3"; |
566 |
Node node = |
|
567 |
new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "pippo pluto"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c", |
|
568 |
Relation.EQUAL, "3")); |
|
561 |
Node node = new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "pippo pluto"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c", |
|
562 |
Relation.EQUAL, "3")); |
|
569 | 563 |
|
570 | 564 |
TranslatedQuery parsed = translator.getTranslatedQuery(query); |
571 | 565 |
|
... | ... | |
635 | 629 |
printQuery(cqlQuery, lucene); |
636 | 630 |
} |
637 | 631 |
|
632 |
@Test |
|
633 |
public void testDateQuery() throws CQLParseException, IOException { |
|
634 |
String cqlQuery = "(resultdateofacceptance <= \"2012-03-15\")"; |
|
635 |
Map<String, List<String>> options = new HashMap<String, List<String>>(); |
|
636 |
String lucene = translator.toLucene(cqlQuery, options); |
|
637 |
printQuery(cqlQuery, lucene); |
|
638 |
} |
|
639 |
|
|
640 |
@Test |
|
641 |
public void testFullISODateQuery() throws CQLParseException, IOException { |
|
642 |
String cqlQuery = "(resultdateofacceptance <= 2012-03-15T00:00:00Z)"; |
|
643 |
Map<String, List<String>> options = new HashMap<String, List<String>>(); |
|
644 |
String lucene = translator.toLucene(cqlQuery, options); |
|
645 |
printQuery(cqlQuery, lucene); |
|
646 |
} |
|
647 |
|
|
648 |
@Test |
|
649 |
public void testNonDateQuery() throws CQLParseException, IOException { |
|
650 |
String cqlQuery = "(resultdateofacceptance <= 2012.03.15T00:00:00Z)"; |
|
651 |
Map<String, List<String>> options = new HashMap<String, List<String>>(); |
|
652 |
String lucene = translator.toLucene(cqlQuery, options); |
|
653 |
printQuery(cqlQuery, lucene); |
|
654 |
} |
|
655 |
|
|
656 |
@Test |
|
657 |
public void testNonDateQuery2() throws CQLParseException, IOException { |
|
658 |
String cqlQuery = "(resultdateofacceptance <= ciao)"; |
|
659 |
Map<String, List<String>> options = new HashMap<String, List<String>>(); |
|
660 |
String lucene = translator.toLucene(cqlQuery, options); |
|
661 |
printQuery(cqlQuery, lucene); |
|
662 |
} |
|
663 |
|
|
664 |
@Test |
|
665 |
public void testDateWrong() throws Exception { |
|
666 |
|
|
667 |
String cqlQuery = "publicationdate =/within \"2000-01-01 2010.99.01\""; |
|
668 |
String luceneQuery = translator.toLucene(cqlQuery); |
|
669 |
|
|
670 |
printQuery(cqlQuery, luceneQuery); |
|
671 |
} |
|
672 |
|
|
638 | 673 |
private void printQuery(final String cql, final String lucene) throws CQLParseException, IOException { |
639 | 674 |
System.out.println("CQL: " + cql); |
640 | 675 |
// System.out.println("PARSED: " + new CQLParser().parse(cql).toCQL()); |
modules/cnr-cql-utils/trunk/src/main/java/eu/dnetlib/functionality/index/parse/TermNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.functionality.index.parse; |
2 | 2 |
|
3 |
import java.text.ParseException; |
|
4 |
import java.text.SimpleDateFormat; |
|
3 | 5 |
import java.util.List; |
4 | 6 |
import java.util.Map; |
5 | 7 |
import java.util.Map.Entry; |
6 | 8 |
import java.util.StringTokenizer; |
7 | 9 |
|
8 |
import org.apache.lucene.queryparser.classic.QueryParser; |
|
10 |
import org.apache.lucene.queryparser.classic.QueryParserBase;
|
|
9 | 11 |
|
10 | 12 |
import com.google.common.collect.BiMap; |
11 | 13 |
import com.google.common.collect.HashBiMap; |
... | ... | |
14 | 16 |
public class TermNode extends Node { |
15 | 17 |
|
16 | 18 |
public static final String dnetDefaultField = "__all"; |
17 |
|
|
19 |
|
|
18 | 20 |
private String field; |
19 | 21 |
private Relation rel; |
20 | 22 |
private String value; |
21 | 23 |
private Map<String, List<String>> options = Maps.newHashMap(); |
22 | 24 |
private BiMap<String, String> aliases = HashBiMap.create(); |
23 | 25 |
private Map<String, String> weights = Maps.newHashMap(); |
24 |
|
|
25 |
public TermNode(String field, Relation rel, String value) { |
|
26 |
private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); |
|
27 |
|
|
28 |
public TermNode(final String field, final Relation rel, final String value) { |
|
26 | 29 |
this.field = field; |
27 | 30 |
this.rel = rel; |
28 | 31 |
this.value = value; |
32 |
simpleDateFormat.setLenient(false); |
|
29 | 33 |
} |
30 |
|
|
31 |
public TermNode(String field, Relation rel, String value, Map<String, List<String>> options, BiMap<String, String> aliases, Map<String, String> weights) { |
|
34 |
|
|
35 |
public TermNode(final String field, final Relation rel, final String value, final Map<String, List<String>> options, final BiMap<String, String> aliases, |
|
36 |
final Map<String, String> weights) { |
|
32 | 37 |
this(field, rel, value); |
33 | 38 |
this.options = options; |
34 | 39 |
this.aliases = aliases; |
35 | 40 |
this.weights = weights; |
36 |
}
|
|
41 |
} |
|
37 | 42 |
|
38 | 43 |
@Override |
39 | 44 |
public String toString() { |
... | ... | |
44 | 49 |
public String toLucene() { |
45 | 50 |
StringTokenizer termTokenizer = new StringTokenizer(value, " "); |
46 | 51 |
StringTokenizer weightTokenizer = new StringTokenizer(value, " "); |
47 |
|
|
52 |
|
|
48 | 53 |
switch (rel) { |
49 |
case EXACT: |
|
50 |
return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value); |
|
51 |
case ALL: |
|
52 |
return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")"; |
|
53 |
case EQUAL: |
|
54 |
case ANY: |
|
55 |
return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")"; |
|
56 |
case NOT: |
|
57 |
return "-" + field + ":" + "\"" + value + "\""; |
|
58 |
case LT: |
|
59 |
return field + ":" + "{* TO " + value + "}" + weight(); |
|
60 |
case GT: |
|
61 |
return field + ":" + "{" + value + " TO *}" + weight(); |
|
62 |
case LTE: |
|
63 |
return field + ":" + "[* TO " + value + "]" + weight(); |
|
64 |
case GTE: |
|
65 |
return field + ":" + "[" + value + " TO *]" + weight(); |
|
66 |
case WITHIN: |
|
67 |
String lower = checkDate(value.split(" ")[0]); |
|
68 |
String upper = checkDate(value.split(" ")[1]); |
|
69 |
return field + ":[" + lower + " TO " + upper + "]" + weight(); |
|
70 |
default: |
|
71 |
throw new RuntimeException("unable to serialize: " + toString()); |
|
54 |
case EXACT: |
|
55 |
return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value); |
|
56 |
case ALL: |
|
57 |
return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")"; |
|
58 |
case EQUAL: |
|
59 |
case ANY: |
|
60 |
return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")"; |
|
61 |
case NOT: |
|
62 |
return "-" + field + ":" + "\"" + value + "\""; |
|
63 |
case LT: |
|
64 |
if (isDate(value)) { |
|
65 |
value = checkDate(value); |
|
66 |
} |
|
67 |
return field + ":" + "{* TO " + value + "}" + weight(); |
|
68 |
case GT: |
|
69 |
if (isDate(value)) { |
|
70 |
value = checkDate(value); |
|
71 |
} |
|
72 |
return field + ":" + "{" + value + " TO *}" + weight(); |
|
73 |
case LTE: |
|
74 |
if (isDate(value)) { |
|
75 |
value = checkDate(value); |
|
76 |
} |
|
77 |
return field + ":" + "[* TO " + value + "]" + weight(); |
|
78 |
case GTE: |
|
79 |
if (isDate(value)) { |
|
80 |
value = checkDate(value); |
|
81 |
} |
|
82 |
return field + ":" + "[" + value + " TO *]" + weight(); |
|
83 |
case WITHIN: |
|
84 |
String lowerValue = value.split(" ")[0]; |
|
85 |
String upperValue = value.split(" ")[1]; |
|
86 |
if (isDate(lowerValue)) { |
|
87 |
lowerValue = checkDate(lowerValue); |
|
88 |
} |
|
89 |
if (isDate(upperValue)) { |
|
90 |
upperValue = checkDate(upperValue); |
|
91 |
} |
|
92 |
return field + ":[" + lowerValue + " TO " + upperValue + "]" + weight(); |
|
93 |
default: |
|
94 |
throw new RuntimeException("unable to serialize: " + toString()); |
|
72 | 95 |
} |
73 | 96 |
} |
74 | 97 |
|
... | ... | |
77 | 100 |
} |
78 | 101 |
|
79 | 102 |
private String weight() { |
80 |
return weights != null && weights.get(field) != null ? "^" + weights.get(field) : "";
|
|
103 |
return (weights != null) && (weights.get(field) != null) ? "^" + weights.get(field) : "";
|
|
81 | 104 |
} |
82 |
|
|
83 |
private String expandTokens(StringTokenizer tokenizer) { |
|
105 |
|
|
106 |
private String expandTokens(final StringTokenizer tokenizer) {
|
|
84 | 107 |
String ret = ""; |
85 | 108 |
while (tokenizer.hasMoreTokens()) { |
86 | 109 |
String token = tokenizer.nextToken(); |
... | ... | |
91 | 114 |
} |
92 | 115 |
return ret.trim(); |
93 | 116 |
} |
94 |
|
|
95 |
private String expand(String token) { |
|
117 |
|
|
118 |
private String expand(final String token) {
|
|
96 | 119 |
String ret = ""; |
97 | 120 |
if (!weights.keySet().contains(field)) { |
98 |
for(Entry<String, String> e : weights.entrySet()) { |
|
121 |
for (Entry<String, String> e : weights.entrySet()) {
|
|
99 | 122 |
ret += e.getKey() + ":\"" + checkEscaping(token) + "\"^" + e.getValue() + " "; |
100 | 123 |
} |
101 | 124 |
} |
102 | 125 |
return ret; |
103 |
}
|
|
126 |
} |
|
104 | 127 |
|
105 |
private String handleTokens(StringTokenizer tokenizer, String op) {
|
|
128 |
private String handleTokens(final StringTokenizer tokenizer, final String op) {
|
|
106 | 129 |
String ret = ""; |
107 | 130 |
while (tokenizer.hasMoreTokens()) { |
108 | 131 |
String token = tokenizer.nextToken(); |
... | ... | |
110 | 133 |
} |
111 | 134 |
return ret.trim(); |
112 | 135 |
} |
113 |
|
|
136 |
|
|
114 | 137 |
private String checkEscaping(String token) { |
115 | 138 |
boolean isWildcard = token.contains("*") || token.contains("?"); |
116 |
boolean isWildcardEnabled = (options.get("wildcard") != null && options.get("wildcard").contains("true")) || token.equals("*");
|
|
117 |
|
|
118 |
if (!(isWildcard & isWildcardEnabled) ) {
|
|
119 |
token = QueryParser.escape(token); |
|
139 |
boolean isWildcardEnabled = ((options.get("wildcard") != null) && options.get("wildcard").contains("true")) || token.equals("*");
|
|
140 |
|
|
141 |
if (!(isWildcard & isWildcardEnabled)) { |
|
142 |
token = QueryParserBase.escape(token);
|
|
120 | 143 |
} |
121 | 144 |
return token; |
122 | 145 |
} |
123 | 146 |
|
147 |
private boolean isDate(final String aPossibleDate) { |
|
148 |
try { |
|
149 |
simpleDateFormat.parse(aPossibleDate); |
|
150 |
} catch (ParseException pe) { |
|
151 |
return false; |
|
152 |
} |
|
153 |
return true; |
|
154 |
} |
|
155 |
|
|
124 | 156 |
private String checkDate(final String date) { |
125 |
if (!date.endsWith("Z")) { |
|
126 |
return date + "T00:00:00Z"; |
|
127 |
} |
|
157 |
if (!date.endsWith("Z")) return date + "T00:00:00Z"; |
|
128 | 158 |
return date; |
129 | 159 |
} |
130 |
|
|
160 |
|
|
131 | 161 |
public String getField() { |
132 | 162 |
return field; |
133 | 163 |
} |
... | ... | |
138 | 168 |
|
139 | 169 |
public String getValue() { |
140 | 170 |
return value; |
141 |
}
|
|
171 |
} |
|
142 | 172 |
|
143 | 173 |
} |
Also available in: Unified diff
Added check for date terms in queries. Solr requires dates to be in full ISO.