Project

General

Profile

1
package eu.dnetlib.functionality.cql.parse;
2

    
3
import java.text.ParseException;
4
import java.text.SimpleDateFormat;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8

    
9
import com.google.common.base.Function;
10
import com.google.common.base.Joiner;
11
import com.google.common.base.Splitter;
12
import com.google.common.collect.BiMap;
13
import com.google.common.collect.HashBiMap;
14
import com.google.common.collect.Iterables;
15
import com.google.common.collect.Maps;
16
import org.apache.commons.lang.StringUtils;
17
import org.apache.lucene.queryparser.classic.QueryParserBase;
18

    
19
public class TermNode extends Node {
20

    
21
	public static final String dnetDefaultField = "__all";
22

    
23
	private String field;
24
	private Relation rel;
25
	private String value;
26
	private Map<String, List<String>> options = Maps.newHashMap();
27
	private BiMap<String, String> aliases = HashBiMap.create();
28
	private Map<String, String> weights = Maps.newHashMap();
29
	private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
30

    
31
	public TermNode(final String field, final Relation rel, final String value) {
32
		this.field = field;
33
		this.rel = rel;
34
		this.value = value;
35
		simpleDateFormat.setLenient(false);
36
	}
37

    
38
	public TermNode(final String field, final Relation rel, final String value, final Map<String, List<String>> options, final BiMap<String, String> aliases,
39
			final Map<String, String> weights) {
40
		this(field, rel, value);
41
		this.options = options;
42
		this.aliases = aliases;
43
		this.weights = weights;
44
	}
45

    
46
	@Override
47
	public String toString() {
48
		return TermNode.class.getSimpleName() + "(" + field + " " + rel + " " + value + ")";
49
	}
50

    
51
	@Override
52
	public String toLucene() {
53
		//StringTokenizer termTokenizer = new StringTokenizer(value, " ");
54
		//StringTokenizer weightTokenizer = new StringTokenizer(value, " ");
55
		final Iterable<String> termTokenizer = Splitter.on(" ").omitEmptyStrings().split(value);
56
		final Iterable<String> weightTokenizer = Splitter.on(" ").omitEmptyStrings().split(value);
57
		switch (rel) {
58
		case EXACT:
59
			final String lucene = getFieldName() + ":" + "\"" + value + "\"";
60
			return StringUtils.isNotBlank(weight()) ? lucene + weight() + " " + expand(value) : lucene;
61
		case EQUAL:
62
		case ALL:
63
			return "(" + handleTokens(termTokenizer, "AND") + " " + expandTokens(weightTokenizer) + ")";
64
		case ANY:
65
			return "(" + handleTokens(termTokenizer, "OR") + " " + expandTokens(weightTokenizer) + ")";
66
		case NOT:
67
			return "NOT " + field + ":" + "\"" + value + "\"";
68
		case LT:
69
			if (isDate(value)) {
70
				value = checkDate(value);
71
			}
72
			return field + ":" + "{* TO " + value + "}" + weight();
73
		case GT:
74
			if (isDate(value)) {
75
				value = checkDate(value);
76
			}
77
			return field + ":" + "{" + value + " TO *}" + weight();
78
		case LTE:
79
			if (isDate(value)) {
80
				value = checkDate(value);
81
			}
82
			return field + ":" + "[* TO " + value + "]" + weight();
83
		case GTE:
84
			if (isDate(value)) {
85
				value = checkDate(value);
86
			}
87
			return field + ":" + "[" + value + " TO *]" + weight();
88
		case WITHIN:
89
			String lowerValue = value.split(" ")[0];
90
			String upperValue = value.split(" ")[1];
91
			if (isDate(lowerValue)) {
92
				lowerValue = checkDate(lowerValue);
93
			}
94
			if (isDate(upperValue)) {
95
				upperValue = checkDate(upperValue);
96
			}
97
			return field + ":[" + lowerValue + " TO " + upperValue + "]" + weight();
98
		default:
99
			throw new RuntimeException("unable to serialize: " + toString());
100
		}
101
	}
102

    
103
	private String getFieldName() {
104
		return aliases.get(field) != null ? aliases.get(field) : field;
105
	}
106

    
107
	private String weight() {
108
		return (weights != null) && (weights.get(field) != null) ? "^" + weights.get(field) : "";
109
	}
110

    
111
	private String expandTokens(final Iterable<String> tokens) {
112
		return Joiner.on("").skipNulls().join(Iterables.transform(tokens, new Function<String, String>() {
113
			@Override
114
			public String apply(final String s) {
115
				if (field.equals(dnetDefaultField.toLowerCase()) || field.equals(dnetDefaultField.toLowerCase())) {
116
					return expand(s);
117
				}
118
				return null;
119
			}
120
		})).trim();
121
	}
122

    
123
	private String expand(final String token) {
124
		String ret = "";
125
		if (!weights.keySet().contains(field)) {
126
			for (Entry<String, String> e : weights.entrySet()) {
127
				ret += e.getKey() + ":\"" + checkEscaping(token) + "\"^" + e.getValue() + " ";
128
			}
129
		}
130
		return ret.trim();
131
	}
132

    
133
	private String handleTokens(final Iterable<String> tokens, final String op) {
134
		final String separator = " " + op + " ";
135
		return Joiner.on(separator).join(Iterables.transform(tokens, new Function<String, String>() {
136
			@Override
137
			public String apply(final String s) {
138
				return field + ":" + checkEscaping(s) + weight();
139
			}
140
		})).trim();
141
	}
142

    
143
	private String checkEscaping(String token) {
144
		boolean isWildcard = token.contains("*") || token.contains("?");
145
		boolean isWildcardEnabled = ((options.get("wildcard") != null) && options.get("wildcard").contains("true")) || token.equals("*");
146

    
147
		if (!(isWildcard & isWildcardEnabled)) {
148
			token = QueryParserBase.escape(token);
149
		}
150
		return token;
151
	}
152

    
153
	private boolean isDate(final String aPossibleDate) {
154
		try {
155
			simpleDateFormat.parse(aPossibleDate);
156
		} catch (ParseException pe) {
157
			return false;
158
		}
159
		return true;
160
	}
161

    
162
	private String checkDate(final String date) {
163
		if (!date.endsWith("Z")) return date + "T00:00:00Z";
164
		return date;
165
	}
166

    
167
	public String getField() {
168
		return field;
169
	}
170

    
171
	public Relation getRel() {
172
		return rel;
173
	}
174

    
175
	public String getValue() {
176
		return value;
177
	}
178

    
179
}
(8-8/8)