1
|
package eu.dnetlib.functionality.cql.parse;
|
2
|
|
3
|
import java.text.ParseException;
|
4
|
import java.text.SimpleDateFormat;
|
5
|
import java.util.List;
|
6
|
import java.util.Map;
|
7
|
import java.util.Map.Entry;
|
8
|
|
9
|
import com.google.common.base.Function;
|
10
|
import com.google.common.base.Joiner;
|
11
|
import com.google.common.base.Splitter;
|
12
|
import com.google.common.collect.BiMap;
|
13
|
import com.google.common.collect.HashBiMap;
|
14
|
import com.google.common.collect.Iterables;
|
15
|
import com.google.common.collect.Maps;
|
16
|
import org.apache.commons.lang.StringUtils;
|
17
|
import org.apache.lucene.queryparser.classic.QueryParserBase;
|
18
|
|
19
|
public class TermNode extends Node {
|
20
|
|
21
|
public static final String dnetDefaultField = "__all";
|
22
|
|
23
|
private String field;
|
24
|
private Relation rel;
|
25
|
private String value;
|
26
|
private Map<String, List<String>> options = Maps.newHashMap();
|
27
|
private BiMap<String, String> aliases = HashBiMap.create();
|
28
|
private Map<String, String> weights = Maps.newHashMap();
|
29
|
private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
|
30
|
|
31
|
public TermNode(final String field, final Relation rel, final String value) {
|
32
|
this.field = field;
|
33
|
this.rel = rel;
|
34
|
this.value = value;
|
35
|
simpleDateFormat.setLenient(false);
|
36
|
}
|
37
|
|
38
|
public TermNode(final String field, final Relation rel, final String value, final Map<String, List<String>> options, final BiMap<String, String> aliases,
|
39
|
final Map<String, String> weights) {
|
40
|
this(field, rel, value);
|
41
|
this.options = options;
|
42
|
this.aliases = aliases;
|
43
|
this.weights = weights;
|
44
|
}
|
45
|
|
46
|
@Override
|
47
|
public String toString() {
|
48
|
return TermNode.class.getSimpleName() + "(" + field + " " + rel + " " + value + ")";
|
49
|
}
|
50
|
|
51
|
@Override
|
52
|
public String toLucene() {
|
53
|
//StringTokenizer termTokenizer = new StringTokenizer(value, " ");
|
54
|
//StringTokenizer weightTokenizer = new StringTokenizer(value, " ");
|
55
|
final Iterable<String> termTokenizer = Splitter.on(" ").omitEmptyStrings().split(value);
|
56
|
final Iterable<String> weightTokenizer = Splitter.on(" ").omitEmptyStrings().split(value);
|
57
|
switch (rel) {
|
58
|
case EXACT:
|
59
|
final String lucene = getFieldName() + ":" + "\"" + value + "\"";
|
60
|
return StringUtils.isNotBlank(weight()) ? lucene + weight() + " " + expand(value) : lucene;
|
61
|
case EQUAL:
|
62
|
case ALL:
|
63
|
return "(" + handleTokens(termTokenizer, "AND") + " " + expandTokens(weightTokenizer) + ")";
|
64
|
case ANY:
|
65
|
return "(" + handleTokens(termTokenizer, "OR") + " " + expandTokens(weightTokenizer) + ")";
|
66
|
case NOT:
|
67
|
return "NOT " + field + ":" + "\"" + value + "\"";
|
68
|
case LT:
|
69
|
if (isDate(value)) {
|
70
|
value = checkDate(value);
|
71
|
}
|
72
|
return field + ":" + "{* TO " + value + "}" + weight();
|
73
|
case GT:
|
74
|
if (isDate(value)) {
|
75
|
value = checkDate(value);
|
76
|
}
|
77
|
return field + ":" + "{" + value + " TO *}" + weight();
|
78
|
case LTE:
|
79
|
if (isDate(value)) {
|
80
|
value = checkDate(value);
|
81
|
}
|
82
|
return field + ":" + "[* TO " + value + "]" + weight();
|
83
|
case GTE:
|
84
|
if (isDate(value)) {
|
85
|
value = checkDate(value);
|
86
|
}
|
87
|
return field + ":" + "[" + value + " TO *]" + weight();
|
88
|
case WITHIN:
|
89
|
String lowerValue = value.split(" ")[0];
|
90
|
String upperValue = value.split(" ")[1];
|
91
|
if (isDate(lowerValue)) {
|
92
|
lowerValue = checkDate(lowerValue);
|
93
|
}
|
94
|
if (isDate(upperValue)) {
|
95
|
upperValue = checkDate(upperValue);
|
96
|
}
|
97
|
return field + ":[" + lowerValue + " TO " + upperValue + "]" + weight();
|
98
|
default:
|
99
|
throw new RuntimeException("unable to serialize: " + toString());
|
100
|
}
|
101
|
}
|
102
|
|
103
|
private String getFieldName() {
|
104
|
return aliases.get(field) != null ? aliases.get(field) : field;
|
105
|
}
|
106
|
|
107
|
private String weight() {
|
108
|
return (weights != null) && (weights.get(field) != null) ? "^" + weights.get(field) : "";
|
109
|
}
|
110
|
|
111
|
private String expandTokens(final Iterable<String> tokens) {
|
112
|
return Joiner.on("").skipNulls().join(Iterables.transform(tokens, new Function<String, String>() {
|
113
|
@Override
|
114
|
public String apply(final String s) {
|
115
|
if (field.equals(dnetDefaultField.toLowerCase()) || field.equals(dnetDefaultField.toLowerCase())) {
|
116
|
return expand(s);
|
117
|
}
|
118
|
return null;
|
119
|
}
|
120
|
})).trim();
|
121
|
}
|
122
|
|
123
|
private String expand(final String token) {
|
124
|
String ret = "";
|
125
|
if (!weights.keySet().contains(field)) {
|
126
|
for (Entry<String, String> e : weights.entrySet()) {
|
127
|
ret += e.getKey() + ":\"" + checkEscaping(token) + "\"^" + e.getValue() + " ";
|
128
|
}
|
129
|
}
|
130
|
return ret.trim();
|
131
|
}
|
132
|
|
133
|
private String handleTokens(final Iterable<String> tokens, final String op) {
|
134
|
final String separator = " " + op + " ";
|
135
|
return Joiner.on(separator).join(Iterables.transform(tokens, new Function<String, String>() {
|
136
|
@Override
|
137
|
public String apply(final String s) {
|
138
|
return field + ":" + checkEscaping(s) + weight();
|
139
|
}
|
140
|
})).trim();
|
141
|
}
|
142
|
|
143
|
private String checkEscaping(String token) {
|
144
|
boolean isWildcard = token.contains("*") || token.contains("?");
|
145
|
boolean isWildcardEnabled = ((options.get("wildcard") != null) && options.get("wildcard").contains("true")) || token.equals("*");
|
146
|
|
147
|
if (!(isWildcard & isWildcardEnabled)) {
|
148
|
token = QueryParserBase.escape(token);
|
149
|
}
|
150
|
return token;
|
151
|
}
|
152
|
|
153
|
private boolean isDate(final String aPossibleDate) {
|
154
|
try {
|
155
|
simpleDateFormat.parse(aPossibleDate);
|
156
|
} catch (ParseException pe) {
|
157
|
return false;
|
158
|
}
|
159
|
return true;
|
160
|
}
|
161
|
|
162
|
private String checkDate(final String date) {
|
163
|
if (!date.endsWith("Z")) return date + "T00:00:00Z";
|
164
|
return date;
|
165
|
}
|
166
|
|
167
|
public String getField() {
|
168
|
return field;
|
169
|
}
|
170
|
|
171
|
public Relation getRel() {
|
172
|
return rel;
|
173
|
}
|
174
|
|
175
|
public String getValue() {
|
176
|
return value;
|
177
|
}
|
178
|
|
179
|
}
|