Revision 31954
Added by Alessia Bardi over 9 years ago
TermNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.functionality.index.parse; |
2 | 2 |
|
3 |
import java.text.ParseException; |
|
4 |
import java.text.SimpleDateFormat; |
|
3 | 5 |
import java.util.List; |
4 | 6 |
import java.util.Map; |
5 | 7 |
import java.util.Map.Entry; |
6 | 8 |
import java.util.StringTokenizer; |
7 | 9 |
|
8 |
import org.apache.lucene.queryparser.classic.QueryParser; |
|
10 |
import org.apache.lucene.queryparser.classic.QueryParserBase;
|
|
9 | 11 |
|
10 | 12 |
import com.google.common.collect.BiMap; |
11 | 13 |
import com.google.common.collect.HashBiMap; |
... | ... | |
14 | 16 |
public class TermNode extends Node { |
15 | 17 |
|
16 | 18 |
public static final String dnetDefaultField = "__all"; |
17 |
|
|
19 |
|
|
18 | 20 |
private String field; |
19 | 21 |
private Relation rel; |
20 | 22 |
private String value; |
21 | 23 |
private Map<String, List<String>> options = Maps.newHashMap(); |
22 | 24 |
private BiMap<String, String> aliases = HashBiMap.create(); |
23 | 25 |
private Map<String, String> weights = Maps.newHashMap(); |
24 |
|
|
25 |
public TermNode(String field, Relation rel, String value) { |
|
26 |
private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd"); |
|
27 |
|
|
28 |
public TermNode(final String field, final Relation rel, final String value) { |
|
26 | 29 |
this.field = field; |
27 | 30 |
this.rel = rel; |
28 | 31 |
this.value = value; |
32 |
simpleDateFormat.setLenient(false); |
|
29 | 33 |
} |
30 |
|
|
31 |
public TermNode(String field, Relation rel, String value, Map<String, List<String>> options, BiMap<String, String> aliases, Map<String, String> weights) { |
|
34 |
|
|
35 |
public TermNode(final String field, final Relation rel, final String value, final Map<String, List<String>> options, final BiMap<String, String> aliases, |
|
36 |
final Map<String, String> weights) { |
|
32 | 37 |
this(field, rel, value); |
33 | 38 |
this.options = options; |
34 | 39 |
this.aliases = aliases; |
35 | 40 |
this.weights = weights; |
36 |
}
|
|
41 |
} |
|
37 | 42 |
|
38 | 43 |
@Override |
39 | 44 |
public String toString() { |
... | ... | |
44 | 49 |
public String toLucene() { |
45 | 50 |
StringTokenizer termTokenizer = new StringTokenizer(value, " "); |
46 | 51 |
StringTokenizer weightTokenizer = new StringTokenizer(value, " "); |
47 |
|
|
52 |
|
|
48 | 53 |
switch (rel) { |
49 |
case EXACT: |
|
50 |
return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value); |
|
51 |
case ALL: |
|
52 |
return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")"; |
|
53 |
case EQUAL: |
|
54 |
case ANY: |
|
55 |
return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")"; |
|
56 |
case NOT: |
|
57 |
return "-" + field + ":" + "\"" + value + "\""; |
|
58 |
case LT: |
|
59 |
return field + ":" + "{* TO " + value + "}" + weight(); |
|
60 |
case GT: |
|
61 |
return field + ":" + "{" + value + " TO *}" + weight(); |
|
62 |
case LTE: |
|
63 |
return field + ":" + "[* TO " + value + "]" + weight(); |
|
64 |
case GTE: |
|
65 |
return field + ":" + "[" + value + " TO *]" + weight(); |
|
66 |
case WITHIN: |
|
67 |
String lower = checkDate(value.split(" ")[0]); |
|
68 |
String upper = checkDate(value.split(" ")[1]); |
|
69 |
return field + ":[" + lower + " TO " + upper + "]" + weight(); |
|
70 |
default: |
|
71 |
throw new RuntimeException("unable to serialize: " + toString()); |
|
54 |
case EXACT: |
|
55 |
return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value); |
|
56 |
case ALL: |
|
57 |
return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")"; |
|
58 |
case EQUAL: |
|
59 |
case ANY: |
|
60 |
return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")"; |
|
61 |
case NOT: |
|
62 |
return "-" + field + ":" + "\"" + value + "\""; |
|
63 |
case LT: |
|
64 |
if (isDate(value)) { |
|
65 |
value = checkDate(value); |
|
66 |
} |
|
67 |
return field + ":" + "{* TO " + value + "}" + weight(); |
|
68 |
case GT: |
|
69 |
if (isDate(value)) { |
|
70 |
value = checkDate(value); |
|
71 |
} |
|
72 |
return field + ":" + "{" + value + " TO *}" + weight(); |
|
73 |
case LTE: |
|
74 |
if (isDate(value)) { |
|
75 |
value = checkDate(value); |
|
76 |
} |
|
77 |
return field + ":" + "[* TO " + value + "]" + weight(); |
|
78 |
case GTE: |
|
79 |
if (isDate(value)) { |
|
80 |
value = checkDate(value); |
|
81 |
} |
|
82 |
return field + ":" + "[" + value + " TO *]" + weight(); |
|
83 |
case WITHIN: |
|
84 |
String lowerValue = value.split(" ")[0]; |
|
85 |
String upperValue = value.split(" ")[1]; |
|
86 |
if (isDate(lowerValue)) { |
|
87 |
lowerValue = checkDate(lowerValue); |
|
88 |
} |
|
89 |
if (isDate(upperValue)) { |
|
90 |
upperValue = checkDate(upperValue); |
|
91 |
} |
|
92 |
return field + ":[" + lowerValue + " TO " + upperValue + "]" + weight(); |
|
93 |
default: |
|
94 |
throw new RuntimeException("unable to serialize: " + toString()); |
|
72 | 95 |
} |
73 | 96 |
} |
74 | 97 |
|
... | ... | |
77 | 100 |
} |
78 | 101 |
|
79 | 102 |
private String weight() { |
80 |
return weights != null && weights.get(field) != null ? "^" + weights.get(field) : "";
|
|
103 |
return (weights != null) && (weights.get(field) != null) ? "^" + weights.get(field) : "";
|
|
81 | 104 |
} |
82 |
|
|
83 |
private String expandTokens(StringTokenizer tokenizer) { |
|
105 |
|
|
106 |
private String expandTokens(final StringTokenizer tokenizer) {
|
|
84 | 107 |
String ret = ""; |
85 | 108 |
while (tokenizer.hasMoreTokens()) { |
86 | 109 |
String token = tokenizer.nextToken(); |
... | ... | |
91 | 114 |
} |
92 | 115 |
return ret.trim(); |
93 | 116 |
} |
94 |
|
|
95 |
private String expand(String token) { |
|
117 |
|
|
118 |
private String expand(final String token) {
|
|
96 | 119 |
String ret = ""; |
97 | 120 |
if (!weights.keySet().contains(field)) { |
98 |
for(Entry<String, String> e : weights.entrySet()) { |
|
121 |
for (Entry<String, String> e : weights.entrySet()) {
|
|
99 | 122 |
ret += e.getKey() + ":\"" + checkEscaping(token) + "\"^" + e.getValue() + " "; |
100 | 123 |
} |
101 | 124 |
} |
102 | 125 |
return ret; |
103 |
}
|
|
126 |
} |
|
104 | 127 |
|
105 |
private String handleTokens(StringTokenizer tokenizer, String op) {
|
|
128 |
private String handleTokens(final StringTokenizer tokenizer, final String op) {
|
|
106 | 129 |
String ret = ""; |
107 | 130 |
while (tokenizer.hasMoreTokens()) { |
108 | 131 |
String token = tokenizer.nextToken(); |
... | ... | |
110 | 133 |
} |
111 | 134 |
return ret.trim(); |
112 | 135 |
} |
113 |
|
|
136 |
|
|
114 | 137 |
private String checkEscaping(String token) { |
115 | 138 |
boolean isWildcard = token.contains("*") || token.contains("?"); |
116 |
boolean isWildcardEnabled = (options.get("wildcard") != null && options.get("wildcard").contains("true")) || token.equals("*");
|
|
117 |
|
|
118 |
if (!(isWildcard & isWildcardEnabled) ) {
|
|
119 |
token = QueryParser.escape(token); |
|
139 |
boolean isWildcardEnabled = ((options.get("wildcard") != null) && options.get("wildcard").contains("true")) || token.equals("*");
|
|
140 |
|
|
141 |
if (!(isWildcard & isWildcardEnabled)) { |
|
142 |
token = QueryParserBase.escape(token);
|
|
120 | 143 |
} |
121 | 144 |
return token; |
122 | 145 |
} |
123 | 146 |
|
147 |
private boolean isDate(final String aPossibleDate) { |
|
148 |
try { |
|
149 |
simpleDateFormat.parse(aPossibleDate); |
|
150 |
} catch (ParseException pe) { |
|
151 |
return false; |
|
152 |
} |
|
153 |
return true; |
|
154 |
} |
|
155 |
|
|
124 | 156 |
private String checkDate(final String date) { |
125 |
if (!date.endsWith("Z")) { |
|
126 |
return date + "T00:00:00Z"; |
|
127 |
} |
|
157 |
if (!date.endsWith("Z")) return date + "T00:00:00Z"; |
|
128 | 158 |
return date; |
129 | 159 |
} |
130 |
|
|
160 |
|
|
131 | 161 |
public String getField() { |
132 | 162 |
return field; |
133 | 163 |
} |
... | ... | |
138 | 168 |
|
139 | 169 |
public String getValue() { |
140 | 170 |
return value; |
141 |
}
|
|
171 |
} |
|
142 | 172 |
|
143 | 173 |
} |
Also available in: Unified diff
Added check for date terms in queries. Solr requires dates to be in full ISO.