Project

General

Profile

« Previous | Next » 

Revision 31954

Added check for date terms in queries. Solr requires dates to be in full ISO.

View differences:

modules/cnr-cql-utils/trunk/src/test/java/eu/dnetlib/functionality/index/cql/CqlTranslatorImplTest.java
63 63
	@Test
64 64
	public void testToSolr_2() throws Exception {
65 65
		String query = "(_all=faust AND _all=pippo) AND _all<>cinegiornale";
66
		Node node =
67
				new AndNode(new AndNode(new TermNode("_all", Relation.EQUAL, "faust"), new TermNode("_all", Relation.EQUAL, "pippo")), new TermNode("_all",
68
						Relation.NOT, "cinegiornale"));
66
		Node node = new AndNode(new AndNode(new TermNode("_all", Relation.EQUAL, "faust"), new TermNode("_all", Relation.EQUAL, "pippo")), new TermNode("_all",
67
				Relation.NOT, "cinegiornale"));
69 68

  
70 69
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
71 70

  
......
204 203
		BiMap<String, String> aliases = HashBiMap.create();
205 204

  
206 205
		cqlOptions.put("wildcard", Lists.newArrayList("true"));
207
		TranslatedQuery luceneQuery =
208
				translator.getTranslatedQuery(parsed, new IdentityCqlValueTransformerMap(), cqlOptions, aliases, new HashMap<String, String>());
206
		TranslatedQuery luceneQuery = translator.getTranslatedQuery(parsed, new IdentityCqlValueTransformerMap(), cqlOptions, aliases,
207
				new HashMap<String, String>());
209 208
		printQuery(cqlQuery, luceneQuery.asLucene());
210 209

  
211 210
		cqlOptions = Maps.newHashMap();
......
348 347

  
349 348
		String query = "(title = ESTUDIO and title = abierto) not (title = mediante)";
350 349

  
351
		Node node =
352
				new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new TermNode(
353
						"title", Relation.EQUAL, "mediante"));
350
		Node node = new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new TermNode(
351
				"title", Relation.EQUAL, "mediante"));
354 352

  
355 353
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
356 354

  
......
363 361

  
364 362
		String query = "(title = ESTUDIO and title = abierto) not (title = mediante or title = verde)";
365 363

  
366
		Node node =
367
				new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new OrNode(
368
						new TermNode("title", Relation.EQUAL, "mediante"), new TermNode("title", Relation.EQUAL, "verde")));
364
		Node node = new NotNode(new AndNode(new TermNode("title", Relation.EQUAL, "ESTUDIO"), new TermNode("title", Relation.EQUAL, "abierto")), new OrNode(
365
				new TermNode("title", Relation.EQUAL, "mediante"), new TermNode("title", Relation.EQUAL, "verde")));
369 366

  
370 367
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
371 368

  
......
496 493
	public void testToSolr_27() throws Exception {
497 494

  
498 495
		String query = "((((publicationdate =/within \"2000-01-01 2010-01-01\" and title = \"ddd\") and y < 2010) or y <= 2010) or y > 2010) or y >= 2010";
499
		Node node =
500
				new OrNode(new OrNode(new OrNode(new AndNode(new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"),
501
						new TermNode("title", Relation.EQUAL, "ddd")), new TermNode("y", Relation.LT, "2010")), new TermNode("y", Relation.LTE, "2010")),
502
						new TermNode("y", Relation.GT, "2010")), new TermNode("y", Relation.GTE, "2010"));
496
		Node node = new OrNode(new OrNode(new OrNode(new AndNode(new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"),
497
				new TermNode("title", Relation.EQUAL, "ddd")), new TermNode("y", Relation.LT, "2010")), new TermNode("y", Relation.LTE, "2010")), new TermNode(
498
				"y", Relation.GT, "2010")), new TermNode("y", Relation.GTE, "2010"));
503 499

  
504 500
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
505 501

  
......
512 508

  
513 509
		String query = "publicationdate =/within \"2000-01-01 2010-01-01\" and (title = \"ddd\" and (y < 2010 or (y <= 2010 or (y > 2010 or y >= 2010))))";
514 510

  
515
		Node node =
516
				new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"), new AndNode(
517
						new TermNode("title", Relation.EQUAL, "ddd"), new OrNode(new TermNode("y", Relation.LT, "2010"), new OrNode(new TermNode("y",
518
								Relation.LTE, "2010"), new OrNode(new TermNode("y", Relation.GT, "2010"), new TermNode("y", Relation.GTE, "2010"))))));
511
		Node node = new AndNode(new TermNode("publicationdate", Relation.WITHIN, "2000-01-01 2010-01-01"), new AndNode(new TermNode("title", Relation.EQUAL,
512
				"ddd"), new OrNode(new TermNode("y", Relation.LT, "2010"), new OrNode(new TermNode("y", Relation.LTE, "2010"), new OrNode(new TermNode("y",
513
				Relation.GT, "2010"), new TermNode("y", Relation.GTE, "2010"))))));
519 514

  
520 515
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
521 516

  
......
528 523

  
529 524
		String query = "dateaccepted =/within \"1900-01-01 2000-01-01\" and dateaccepted >= 2010-01-01";
530 525

  
531
		Node node =
532
				new AndNode(new TermNode("dateaccepted", Relation.WITHIN, "1900-01-01 2000-01-01"), new TermNode("dateaccepted", Relation.GTE, "2010-01-01"));
526
		Node node = new AndNode(new TermNode("dateaccepted", Relation.WITHIN, "1900-01-01 2000-01-01"),
527
				new TermNode("dateaccepted", Relation.GTE, "2010-01-01"));
533 528

  
534 529
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
535 530

  
......
550 545
	public void testToSolr_30() throws Exception {
551 546

  
552 547
		String query = "a = 1 and b = 2 and c = 3";
553
		Node node =
554
				new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "1"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c", Relation.EQUAL, "3"));
548
		Node node = new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "1"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c", Relation.EQUAL,
549
				"3"));
555 550

  
556 551
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
557 552

  
......
563 558
	public void testToSolr_31() throws Exception {
564 559

  
565 560
		String query = "a = \"pippo pluto\" and b = 2 and c = 3";
566
		Node node =
567
				new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "pippo pluto"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c",
568
						Relation.EQUAL, "3"));
561
		Node node = new AndNode(new AndNode(new TermNode("a", Relation.EQUAL, "pippo pluto"), new TermNode("b", Relation.EQUAL, "2")), new TermNode("c",
562
				Relation.EQUAL, "3"));
569 563

  
570 564
		TranslatedQuery parsed = translator.getTranslatedQuery(query);
571 565

  
......
635 629
		printQuery(cqlQuery, lucene);
636 630
	}
637 631

  
632
	@Test
633
	public void testDateQuery() throws CQLParseException, IOException {
634
		String cqlQuery = "(resultdateofacceptance <= \"2012-03-15\")";
635
		Map<String, List<String>> options = new HashMap<String, List<String>>();
636
		String lucene = translator.toLucene(cqlQuery, options);
637
		printQuery(cqlQuery, lucene);
638
	}
639

  
640
	@Test
641
	public void testFullISODateQuery() throws CQLParseException, IOException {
642
		String cqlQuery = "(resultdateofacceptance <= 2012-03-15T00:00:00Z)";
643
		Map<String, List<String>> options = new HashMap<String, List<String>>();
644
		String lucene = translator.toLucene(cqlQuery, options);
645
		printQuery(cqlQuery, lucene);
646
	}
647

  
648
	@Test
649
	public void testNonDateQuery() throws CQLParseException, IOException {
650
		String cqlQuery = "(resultdateofacceptance <= 2012.03.15T00:00:00Z)";
651
		Map<String, List<String>> options = new HashMap<String, List<String>>();
652
		String lucene = translator.toLucene(cqlQuery, options);
653
		printQuery(cqlQuery, lucene);
654
	}
655

  
656
	@Test
657
	public void testNonDateQuery2() throws CQLParseException, IOException {
658
		String cqlQuery = "(resultdateofacceptance <= ciao)";
659
		Map<String, List<String>> options = new HashMap<String, List<String>>();
660
		String lucene = translator.toLucene(cqlQuery, options);
661
		printQuery(cqlQuery, lucene);
662
	}
663

  
664
	@Test
665
	public void testDateWrong() throws Exception {
666

  
667
		String cqlQuery = "publicationdate =/within \"2000-01-01 2010.99.01\"";
668
		String luceneQuery = translator.toLucene(cqlQuery);
669

  
670
		printQuery(cqlQuery, luceneQuery);
671
	}
672

  
638 673
	private void printQuery(final String cql, final String lucene) throws CQLParseException, IOException {
639 674
		System.out.println("CQL:    " + cql);
640 675
		// System.out.println("PARSED: " + new CQLParser().parse(cql).toCQL());
modules/cnr-cql-utils/trunk/src/main/java/eu/dnetlib/functionality/index/parse/TermNode.java
1 1
package eu.dnetlib.functionality.index.parse;
2 2

  
3
import java.text.ParseException;
4
import java.text.SimpleDateFormat;
3 5
import java.util.List;
4 6
import java.util.Map;
5 7
import java.util.Map.Entry;
6 8
import java.util.StringTokenizer;
7 9

  
8
import org.apache.lucene.queryparser.classic.QueryParser;
10
import org.apache.lucene.queryparser.classic.QueryParserBase;
9 11

  
10 12
import com.google.common.collect.BiMap;
11 13
import com.google.common.collect.HashBiMap;
......
14 16
public class TermNode extends Node {
15 17

  
16 18
	public static final String dnetDefaultField = "__all";
17
	
19

  
18 20
	private String field;
19 21
	private Relation rel;
20 22
	private String value;
21 23
	private Map<String, List<String>> options = Maps.newHashMap();
22 24
	private BiMap<String, String> aliases = HashBiMap.create();
23 25
	private Map<String, String> weights = Maps.newHashMap();
24
	
25
	public TermNode(String field, Relation rel, String value) {
26
	private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
27

  
28
	public TermNode(final String field, final Relation rel, final String value) {
26 29
		this.field = field;
27 30
		this.rel = rel;
28 31
		this.value = value;
32
		simpleDateFormat.setLenient(false);
29 33
	}
30
	
31
	public TermNode(String field, Relation rel, String value, Map<String, List<String>> options, BiMap<String, String> aliases, Map<String, String> weights) {
34

  
35
	public TermNode(final String field, final Relation rel, final String value, final Map<String, List<String>> options, final BiMap<String, String> aliases,
36
			final Map<String, String> weights) {
32 37
		this(field, rel, value);
33 38
		this.options = options;
34 39
		this.aliases = aliases;
35 40
		this.weights = weights;
36
	}	
41
	}
37 42

  
38 43
	@Override
39 44
	public String toString() {
......
44 49
	public String toLucene() {
45 50
		StringTokenizer termTokenizer = new StringTokenizer(value, " ");
46 51
		StringTokenizer weightTokenizer = new StringTokenizer(value, " ");
47
		
52

  
48 53
		switch (rel) {
49
			case EXACT:
50
				return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value);
51
			case ALL:
52
				return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")";
53
			case EQUAL:				
54
			case ANY:
55
				return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")";
56
			case NOT:
57
				return "-" + field + ":" + "\"" + value + "\"";
58
			case LT:
59
				return field + ":" + "{* TO " + value + "}" + weight();
60
			case GT:
61
				return field + ":" + "{" + value + " TO *}" + weight();
62
			case LTE:
63
				return field + ":" + "[* TO " + value + "]" + weight();
64
			case GTE:
65
				return field + ":" + "[" + value + " TO *]" + weight();
66
			case WITHIN:
67
				String lower = checkDate(value.split(" ")[0]);
68
				String upper = checkDate(value.split(" ")[1]);
69
				return field + ":[" + lower + " TO " + upper + "]" + weight();
70
			default:
71
				throw new RuntimeException("unable to serialize: " + toString());
54
		case EXACT:
55
			return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value);
56
		case ALL:
57
			return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")";
58
		case EQUAL:
59
		case ANY:
60
			return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")";
61
		case NOT:
62
			return "-" + field + ":" + "\"" + value + "\"";
63
		case LT:
64
			if (isDate(value)) {
65
				value = checkDate(value);
66
			}
67
			return field + ":" + "{* TO " + value + "}" + weight();
68
		case GT:
69
			if (isDate(value)) {
70
				value = checkDate(value);
71
			}
72
			return field + ":" + "{" + value + " TO *}" + weight();
73
		case LTE:
74
			if (isDate(value)) {
75
				value = checkDate(value);
76
			}
77
			return field + ":" + "[* TO " + value + "]" + weight();
78
		case GTE:
79
			if (isDate(value)) {
80
				value = checkDate(value);
81
			}
82
			return field + ":" + "[" + value + " TO *]" + weight();
83
		case WITHIN:
84
			String lowerValue = value.split(" ")[0];
85
			String upperValue = value.split(" ")[1];
86
			if (isDate(lowerValue)) {
87
				lowerValue = checkDate(lowerValue);
88
			}
89
			if (isDate(upperValue)) {
90
				upperValue = checkDate(upperValue);
91
			}
92
			return field + ":[" + lowerValue + " TO " + upperValue + "]" + weight();
93
		default:
94
			throw new RuntimeException("unable to serialize: " + toString());
72 95
		}
73 96
	}
74 97

  
......
77 100
	}
78 101

  
79 102
	private String weight() {
80
		return weights != null && weights.get(field) != null ? "^" + weights.get(field) : "";  
103
		return (weights != null) && (weights.get(field) != null) ? "^" + weights.get(field) : "";
81 104
	}
82
	
83
	private String expandTokens(StringTokenizer tokenizer) {
105

  
106
	private String expandTokens(final StringTokenizer tokenizer) {
84 107
		String ret = "";
85 108
		while (tokenizer.hasMoreTokens()) {
86 109
			String token = tokenizer.nextToken();
......
91 114
		}
92 115
		return ret.trim();
93 116
	}
94
	
95
	private String expand(String token) {
117

  
118
	private String expand(final String token) {
96 119
		String ret = "";
97 120
		if (!weights.keySet().contains(field)) {
98
			for(Entry<String, String> e : weights.entrySet()) {
121
			for (Entry<String, String> e : weights.entrySet()) {
99 122
				ret += e.getKey() + ":\"" + checkEscaping(token) + "\"^" + e.getValue() + " ";
100 123
			}
101 124
		}
102 125
		return ret;
103
	}	
126
	}
104 127

  
105
	private String handleTokens(StringTokenizer tokenizer, String op) {
128
	private String handleTokens(final StringTokenizer tokenizer, final String op) {
106 129
		String ret = "";
107 130
		while (tokenizer.hasMoreTokens()) {
108 131
			String token = tokenizer.nextToken();
......
110 133
		}
111 134
		return ret.trim();
112 135
	}
113
	
136

  
114 137
	private String checkEscaping(String token) {
115 138
		boolean isWildcard = token.contains("*") || token.contains("?");
116
		boolean isWildcardEnabled = (options.get("wildcard") != null && options.get("wildcard").contains("true")) || token.equals("*");
117
		
118
		if (!(isWildcard & isWildcardEnabled) ) {
119
			token = QueryParser.escape(token);
139
		boolean isWildcardEnabled = ((options.get("wildcard") != null) && options.get("wildcard").contains("true")) || token.equals("*");
140

  
141
		if (!(isWildcard & isWildcardEnabled)) {
142
			token = QueryParserBase.escape(token);
120 143
		}
121 144
		return token;
122 145
	}
123 146

  
147
	private boolean isDate(final String aPossibleDate) {
148
		try {
149
			simpleDateFormat.parse(aPossibleDate);
150
		} catch (ParseException pe) {
151
			return false;
152
		}
153
		return true;
154
	}
155

  
124 156
	private String checkDate(final String date) {
125
		if (!date.endsWith("Z")) {
126
			return date + "T00:00:00Z";
127
		}
157
		if (!date.endsWith("Z")) return date + "T00:00:00Z";
128 158
		return date;
129 159
	}
130
	
160

  
131 161
	public String getField() {
132 162
		return field;
133 163
	}
......
138 168

  
139 169
	public String getValue() {
140 170
		return value;
141
	}	
171
	}
142 172

  
143 173
}

Also available in: Unified diff