Project

General

Profile

« Previous | Next » 

Revision 31954

Added check for date terms in queries. Solr requires dates to be in full ISO.

View differences:

TermNode.java
1 1
package eu.dnetlib.functionality.index.parse;
2 2

  
3
import java.text.ParseException;
4
import java.text.SimpleDateFormat;
3 5
import java.util.List;
4 6
import java.util.Map;
5 7
import java.util.Map.Entry;
6 8
import java.util.StringTokenizer;
7 9

  
8
import org.apache.lucene.queryparser.classic.QueryParser;
10
import org.apache.lucene.queryparser.classic.QueryParserBase;
9 11

  
10 12
import com.google.common.collect.BiMap;
11 13
import com.google.common.collect.HashBiMap;
......
14 16
public class TermNode extends Node {
15 17

  
16 18
	public static final String dnetDefaultField = "__all";
17
	
19

  
18 20
	private String field;
19 21
	private Relation rel;
20 22
	private String value;
21 23
	private Map<String, List<String>> options = Maps.newHashMap();
22 24
	private BiMap<String, String> aliases = HashBiMap.create();
23 25
	private Map<String, String> weights = Maps.newHashMap();
24
	
25
	public TermNode(String field, Relation rel, String value) {
26
	private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
27

  
28
	public TermNode(final String field, final Relation rel, final String value) {
26 29
		this.field = field;
27 30
		this.rel = rel;
28 31
		this.value = value;
32
		simpleDateFormat.setLenient(false);
29 33
	}
30
	
31
	public TermNode(String field, Relation rel, String value, Map<String, List<String>> options, BiMap<String, String> aliases, Map<String, String> weights) {
34

  
35
	public TermNode(final String field, final Relation rel, final String value, final Map<String, List<String>> options, final BiMap<String, String> aliases,
36
			final Map<String, String> weights) {
32 37
		this(field, rel, value);
33 38
		this.options = options;
34 39
		this.aliases = aliases;
35 40
		this.weights = weights;
36
	}	
41
	}
37 42

  
38 43
	@Override
39 44
	public String toString() {
......
44 49
	public String toLucene() {
45 50
		StringTokenizer termTokenizer = new StringTokenizer(value, " ");
46 51
		StringTokenizer weightTokenizer = new StringTokenizer(value, " ");
47
		
52

  
48 53
		switch (rel) {
49
			case EXACT:
50
				return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value);
51
			case ALL:
52
				return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")";
53
			case EQUAL:				
54
			case ANY:
55
				return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")";
56
			case NOT:
57
				return "-" + field + ":" + "\"" + value + "\"";
58
			case LT:
59
				return field + ":" + "{* TO " + value + "}" + weight();
60
			case GT:
61
				return field + ":" + "{" + value + " TO *}" + weight();
62
			case LTE:
63
				return field + ":" + "[* TO " + value + "]" + weight();
64
			case GTE:
65
				return field + ":" + "[" + value + " TO *]" + weight();
66
			case WITHIN:
67
				String lower = checkDate(value.split(" ")[0]);
68
				String upper = checkDate(value.split(" ")[1]);
69
				return field + ":[" + lower + " TO " + upper + "]" + weight();
70
			default:
71
				throw new RuntimeException("unable to serialize: " + toString());
54
		case EXACT:
55
			return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value);
56
		case ALL:
57
			return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")";
58
		case EQUAL:
59
		case ANY:
60
			return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")";
61
		case NOT:
62
			return "-" + field + ":" + "\"" + value + "\"";
63
		case LT:
64
			if (isDate(value)) {
65
				value = checkDate(value);
66
			}
67
			return field + ":" + "{* TO " + value + "}" + weight();
68
		case GT:
69
			if (isDate(value)) {
70
				value = checkDate(value);
71
			}
72
			return field + ":" + "{" + value + " TO *}" + weight();
73
		case LTE:
74
			if (isDate(value)) {
75
				value = checkDate(value);
76
			}
77
			return field + ":" + "[* TO " + value + "]" + weight();
78
		case GTE:
79
			if (isDate(value)) {
80
				value = checkDate(value);
81
			}
82
			return field + ":" + "[" + value + " TO *]" + weight();
83
		case WITHIN:
84
			String lowerValue = value.split(" ")[0];
85
			String upperValue = value.split(" ")[1];
86
			if (isDate(lowerValue)) {
87
				lowerValue = checkDate(lowerValue);
88
			}
89
			if (isDate(upperValue)) {
90
				upperValue = checkDate(upperValue);
91
			}
92
			return field + ":[" + lowerValue + " TO " + upperValue + "]" + weight();
93
		default:
94
			throw new RuntimeException("unable to serialize: " + toString());
72 95
		}
73 96
	}
74 97

  
......
77 100
	}
78 101

  
79 102
	private String weight() {
80
		return weights != null && weights.get(field) != null ? "^" + weights.get(field) : "";  
103
		return (weights != null) && (weights.get(field) != null) ? "^" + weights.get(field) : "";
81 104
	}
82
	
83
	private String expandTokens(StringTokenizer tokenizer) {
105

  
106
	private String expandTokens(final StringTokenizer tokenizer) {
84 107
		String ret = "";
85 108
		while (tokenizer.hasMoreTokens()) {
86 109
			String token = tokenizer.nextToken();
......
91 114
		}
92 115
		return ret.trim();
93 116
	}
94
	
95
	private String expand(String token) {
117

  
118
	private String expand(final String token) {
96 119
		String ret = "";
97 120
		if (!weights.keySet().contains(field)) {
98
			for(Entry<String, String> e : weights.entrySet()) {
121
			for (Entry<String, String> e : weights.entrySet()) {
99 122
				ret += e.getKey() + ":\"" + checkEscaping(token) + "\"^" + e.getValue() + " ";
100 123
			}
101 124
		}
102 125
		return ret;
103
	}	
126
	}
104 127

  
105
	private String handleTokens(StringTokenizer tokenizer, String op) {
128
	private String handleTokens(final StringTokenizer tokenizer, final String op) {
106 129
		String ret = "";
107 130
		while (tokenizer.hasMoreTokens()) {
108 131
			String token = tokenizer.nextToken();
......
110 133
		}
111 134
		return ret.trim();
112 135
	}
113
	
136

  
114 137
	private String checkEscaping(String token) {
115 138
		boolean isWildcard = token.contains("*") || token.contains("?");
116
		boolean isWildcardEnabled = (options.get("wildcard") != null && options.get("wildcard").contains("true")) || token.equals("*");
117
		
118
		if (!(isWildcard & isWildcardEnabled) ) {
119
			token = QueryParser.escape(token);
139
		boolean isWildcardEnabled = ((options.get("wildcard") != null) && options.get("wildcard").contains("true")) || token.equals("*");
140

  
141
		if (!(isWildcard & isWildcardEnabled)) {
142
			token = QueryParserBase.escape(token);
120 143
		}
121 144
		return token;
122 145
	}
123 146

  
147
	private boolean isDate(final String aPossibleDate) {
148
		try {
149
			simpleDateFormat.parse(aPossibleDate);
150
		} catch (ParseException pe) {
151
			return false;
152
		}
153
		return true;
154
	}
155

  
124 156
	private String checkDate(final String date) {
125
		if (!date.endsWith("Z")) {
126
			return date + "T00:00:00Z";
127
		}
157
		if (!date.endsWith("Z")) return date + "T00:00:00Z";
128 158
		return date;
129 159
	}
130
	
160

  
131 161
	public String getField() {
132 162
		return field;
133 163
	}
......
138 168

  
139 169
	public String getValue() {
140 170
		return value;
141
	}	
171
	}
142 172

  
143 173
}

Also available in: Unified diff