Project

General

Profile

1
package eu.dnetlib.index.parse;
2

    
3
import java.text.ParseException;
4
import java.text.SimpleDateFormat;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.StringTokenizer;
9

    
10
import com.google.common.collect.BiMap;
11
import com.google.common.collect.HashBiMap;
12
import com.google.common.collect.Maps;
13
import org.apache.lucene.queryparser.classic.QueryParserBase;
14

    
15
public class TermNode extends Node {
16

    
17
	public static final String dnetDefaultField = "__all";
18

    
19
	private String field;
20
	private Relation rel;
21
	private String value;
22
	private Map<String, List<String>> options = Maps.newHashMap();
23
	private BiMap<String, String> aliases = HashBiMap.create();
24
	private Map<String, String> weights = Maps.newHashMap();
25
	private SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
26

    
27
	public TermNode(final String field, final Relation rel, final String value) {
28
		this.field = field;
29
		this.rel = rel;
30
		this.value = value;
31
		simpleDateFormat.setLenient(false);
32
	}
33

    
34
	public TermNode(final String field, final Relation rel, final String value, final Map<String, List<String>> options, final BiMap<String, String> aliases,
35
			final Map<String, String> weights) {
36
		this(field, rel, value);
37
		this.options = options;
38
		this.aliases = aliases;
39
		this.weights = weights;
40
	}
41

    
42
	@Override
43
	public String toString() {
44
		return TermNode.class.getSimpleName() + "(" + field + " " + rel + " " + value + ")";
45
	}
46

    
47
	@Override
48
	public String toLucene() {
49
		StringTokenizer termTokenizer = new StringTokenizer(value, " ");
50
		StringTokenizer weightTokenizer = new StringTokenizer(value, " ");
51

    
52
		switch (rel) {
53
		case EXACT:
54
			return getFieldName() + ":" + "\"" + value + "\"" + weight() + " " + expand(value);
55
		case ALL:
56
			return "(" + handleTokens(termTokenizer, "+") + " " + expandTokens(weightTokenizer) + ")";
57
		case EQUAL:
58
		case ANY:
59
			return "(" + handleTokens(termTokenizer, "") + " " + expandTokens(weightTokenizer) + ")";
60
		case NOT:
61
			return "-" + field + ":" + "\"" + value + "\"";
62
		case LT:
63
			if (isDate(value)) {
64
				value = checkDate(value);
65
			}
66
			return field + ":" + "{* TO " + value + "}" + weight();
67
		case GT:
68
			if (isDate(value)) {
69
				value = checkDate(value);
70
			}
71
			return field + ":" + "{" + value + " TO *}" + weight();
72
		case LTE:
73
			if (isDate(value)) {
74
				value = checkDate(value);
75
			}
76
			return field + ":" + "[* TO " + value + "]" + weight();
77
		case GTE:
78
			if (isDate(value)) {
79
				value = checkDate(value);
80
			}
81
			return field + ":" + "[" + value + " TO *]" + weight();
82
		case WITHIN:
83
			String lowerValue = value.split(" ")[0];
84
			String upperValue = value.split(" ")[1];
85
			if (isDate(lowerValue)) {
86
				lowerValue = checkDate(lowerValue);
87
			}
88
			if (isDate(upperValue)) {
89
				upperValue = checkDate(upperValue);
90
			}
91
			return field + ":[" + lowerValue + " TO " + upperValue + "]" + weight();
92
		default:
93
			throw new RuntimeException("unable to serialize: " + toString());
94
		}
95
	}
96

    
97
	private String getFieldName() {
98
		return aliases.get(field) != null ? aliases.get(field) : field;
99
	}
100

    
101
	private String weight() {
102
		return (weights != null) && (weights.get(field) != null) ? "^" + weights.get(field) : "";
103
	}
104

    
105
	private String expandTokens(final StringTokenizer tokenizer) {
106
		String ret = "";
107
		while (tokenizer.hasMoreTokens()) {
108
			String token = tokenizer.nextToken();
109

    
110
			if (field.equals(dnetDefaultField.toLowerCase()) || field.equals(dnetDefaultField.toLowerCase())) {
111
				ret += expand(token);
112
			}
113
		}
114
		return ret.trim();
115
	}
116

    
117
	private String expand(final String token) {
118
		String ret = "";
119
		if (!weights.keySet().contains(field)) {
120
			for (Entry<String, String> e : weights.entrySet()) {
121
				ret += e.getKey() + ":\"" + checkEscaping(token) + "\"^" + e.getValue() + " ";
122
			}
123
		}
124
		return ret;
125
	}
126

    
127
	private String handleTokens(final StringTokenizer tokenizer, final String op) {
128
		String ret = "";
129
		while (tokenizer.hasMoreTokens()) {
130
			String token = tokenizer.nextToken();
131
			ret += op + field + ":" + checkEscaping(token) + weight() + " ";
132
		}
133
		return ret.trim();
134
	}
135

    
136
	private String checkEscaping(String token) {
137
		boolean isWildcard = token.contains("*") || token.contains("?");
138
		boolean isWildcardEnabled = ((options.get("wildcard") != null) && options.get("wildcard").contains("true")) || token.equals("*");
139

    
140
		if (!(isWildcard & isWildcardEnabled)) {
141
			token = QueryParserBase.escape(token);
142
		}
143
		return token;
144
	}
145

    
146
	private boolean isDate(final String aPossibleDate) {
147
		try {
148
			simpleDateFormat.parse(aPossibleDate);
149
		} catch (ParseException pe) {
150
			return false;
151
		}
152
		return true;
153
	}
154

    
155
	private String checkDate(final String date) {
156
		if (!date.endsWith("Z")) return date + "T00:00:00Z";
157
		return date;
158
	}
159

    
160
	public String getField() {
161
		return field;
162
	}
163

    
164
	public Relation getRel() {
165
		return rel;
166
	}
167

    
168
	public String getValue() {
169
		return value;
170
	}
171

    
172
}
(8-8/8)