Project

General

Profile

1
package eu.dnetlib.clients.index.query;
2

    
3
import java.io.IOException;
4
import java.util.ArrayList;
5
import java.util.HashMap;
6
import java.util.List;
7
import java.util.Map;
8
import java.util.stream.Collectors;
9

    
10
import com.google.common.collect.Iterables;
11
import com.google.common.collect.Lists;
12
import com.google.common.collect.Maps;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15
import org.z3950.zing.cql.*;
16

    
17
/**
18
 * Use this class to cleanup a CQL tree and obtain all the options
19
 *
20
 * @author marko & claudio
21
 */
22
public class Pruner {
23

    
24
	/**
25
	 * All options have to be in this namespace.
26
	 */
27
	public static final String DNET_URI = "NAMESPACE";
28
	private static final Log log = LogFactory.getLog(Pruner.class); // NOPMD by marko on 11/24/08 5:02 PM
29
	private String optionUri = DNET_URI;
30

    
31
	/**
32
	 * Helper method, parse a given CQL string.
33
	 *
34
	 * @param cqlQuery
35
	 * @return
36
	 * @throws CQLParseException
37
	 * @throws IOException
38
	 */
39
	CQLNode parse(final String cqlQuery) throws CQLParseException, IOException {
40
		return new CQLParser().parse(cqlQuery);
41
	}
42

    
43
	/**
44
	 * Remove all options from a given CQL AST and return all the options.
45
	 * <p>
46
	 * The CQL tree is modified.
47
	 *
48
	 * @param root cql tree
49
	 * @return pair containing a new root node and a list of options
50
	 */
51
	public Result prune(final CQLNode root) {
52
		return prune(new HashMap<String, String>(), root);
53
	}
54

    
55
	/**
56
	 * Actual recursive implementation, dispatches the implementation to the appropriate overloaded method.
57
	 *
58
	 * @param prefixes
59
	 * @param root
60
	 * @return
61
	 */
62
	public Result prune(final Map<String, String> prefixes, final CQLNode root) {
63

    
64
		if (root instanceof CQLBooleanNode)
65
			return prune(prefixes, (CQLBooleanNode) root);
66

    
67
		if (root instanceof CQLPrefixNode)
68
			return prune(prefixes, (CQLPrefixNode) root);
69

    
70
		if (root instanceof CQLSortNode)
71
			return prune(prefixes, (CQLSortNode) root);
72

    
73
		return new Result(root, new ArrayList<String>());
74
	}
75

    
76
	/**
77
	 * If the current node is a cql "sort" node, just return the inner subtree.
78
	 *
79
	 * @param prefixes
80
	 * @param node
81
	 * @return
82
	 */
83
	public Result prune(final Map<String, String> prefixes, final CQLSortNode node) {
84
		Result res = prune(prefixes, node.subtree);
85
		node.subtree = res.getNode();
86
		res.setNode(node);
87
		return res;
88
	}
89

    
90
	/**
91
	 * If the current node is a cql "prefix" node, add his namespace declaration to the current list of namespaces and
92
	 * return the pruned inner subtree.
93
	 * <p>
94
	 * If the prefix node contains only one single option element, we have to return null. (TODO: perhaps there is a
95
	 * better solution).
96
	 *
97
	 * @param prefixes
98
	 * @param node
99
	 * @return
100
	 */
101
	public Result prune(final Map<String, String> prefixes, final CQLPrefixNode node) {
102
		final HashMap<String, String> subPrefixes = Maps.newHashMap(prefixes);
103
		subPrefixes.put(node.prefix.name, node.prefix.identifier);
104

    
105
		if (isOption(subPrefixes, node.subtree))
106
			return new Result(null, Lists.newArrayList(getOption(node.subtree)));
107

    
108
		boolean pruneThisPrefix = node.prefix.identifier.equals(optionUri);
109
		if (pruneThisPrefix)
110
			return prune(subPrefixes, node.subtree);
111

    
112
		Result res = prune(subPrefixes, node.subtree);
113
		node.subtree = res.getNode();
114
		res.setNode(node);
115
		return res;
116

    
117
	}
118

    
119
	/**
120
	 * boolean prunes are handled in the prune(prefix, node, left, right).
121
	 *
122
	 * @param prefixes
123
	 * @param node
124
	 * @return
125
	 */
126
	public Result prune(final Map<String, String> prefixes, final CQLBooleanNode node) {
127
		return prune(prefixes, node, node.left, node.right);
128
	}
129

    
130
	/**
131
	 * Detects if a left or right side of a boolean node is a option term, and returns the other side (recursively
132
	 * pruned). It also returns the accumulated options along the way.
133
	 *
134
	 * @param prefixes
135
	 * @param bool
136
	 * @param left
137
	 * @param right
138
	 * @return
139
	 */
140
	public Result prune(final Map<String, String> prefixes, final CQLBooleanNode bool, final CQLNode left, final CQLNode right) {
141

    
142
		if (isOption(prefixes, left) && isOption(prefixes, right)) {
143
			List<Result> r = Lists.newArrayList(trimOption(prefixes, left, right), trimOption(prefixes, right, left));
144

    
145
			return new Result(null, r
146
					.stream()
147
					.filter(it -> it != null)
148
					.map(res -> res.getOptions())
149
					.flatMap(strings -> strings.stream())
150
					.collect(Collectors.toList()));
151
		}
152

    
153
		Result res = anyNotNull(trimOption(prefixes, left, right), trimOption(prefixes, right, left));
154

    
155
		if (res != null)
156
			return res;
157

    
158
		final Result leftResult = prune(prefixes, left);
159
		final Result rightResult = prune(prefixes, right);
160

    
161
		bool.left = leftResult.getNode();
162
		bool.right = rightResult.getNode();
163
		return new Result(clean(bool), Iterables.concat(leftResult.getOptions(), rightResult.getOptions()));
164
	}
165

    
166
	public <T> T anyNotNull(T a, T b) {
167
		if (a != null)
168
			return a;
169
		return b;
170
	}
171

    
172
	/**
173
	 * Trims an option from a boolean node if one if it's sides is an option term.
174
	 * <p>
175
	 * Intended to be used once for each sides and then swap.
176
	 *
177
	 * @param prefixes
178
	 * @param a
179
	 * @param b
180
	 * @return
181
	 */
182
	public Result trimOption(final Map<String, String> prefixes, final CQLNode a, final CQLNode b) {
183
		log.debug("trim option?" + prefixes + " a " + a.toCQL());
184
		if (isOption(prefixes, a)) {
185
			log.debug("IS OPTION...");
186
			return trimOption(prefixes, prefixFromOption(a), getOption(a), b);
187
		}
188
		log.debug("IS NOT OPTION");
189
		return null;
190
	}
191

    
192
	/**
193
	 * prune(prefixes, bool, left, right) uses this helper method to do the dirty job:
194
	 * <p>
195
	 * we have to detect if a term node is a term option node. by checking the namespace uri associated with the term
196
	 * prefix according the the current namespace prefix scope (held in prefixes, which is passed down recursively by
197
	 * copy).
198
	 *
199
	 * @param prefixes
200
	 * @param ns
201
	 * @param o
202
	 * @param subtree
203
	 * @return
204
	 */
205
	public Result trimOption(final Map<String, String> prefixes, final String ns, final String o, final CQLNode subtree) {
206
		log.debug("trimming " + prefixes + " ns " + ns + " o " + o);
207

    
208
		final String namespaceUri = prefixes.get(ns);
209

    
210
		if (!optionUri.equals(namespaceUri)) {
211
			return null;
212
		}
213

    
214
		final Result res = prune(prefixes, subtree);
215
		return new Result(res.getNode(), Iterables.concat(Lists.newArrayList(o), res.getOptions()));
216
	}
217

    
218
	/**
219
	 * Drop a boolean node (and, or etc) if one of the sides has been dropped.
220
	 *
221
	 * @param bool
222
	 * @return
223
	 */
224
	private CQLNode clean(final CQLBooleanNode bool) {
225
		if (bool.left == null)
226
			return bool.right;
227
		if (bool.right == null)
228
			return bool.left;
229
		return bool;
230
	}
231

    
232
	public String getOption(final CQLNode node) {
233
		return indexFromOption(node) + "=" + termFromOption(node);
234
	}
235

    
236
	////////////////// helpers
237

    
238
	private String indexFromOption(final CQLNode node) {
239
		return ((CQLTermNode) node).getIndex().replaceAll("[a-z]*\\.(.+)", "$1");
240
	}
241

    
242
	private String termFromOption(final CQLNode node) {
243
		return ((CQLTermNode) node).getTerm();
244
	}
245

    
246
	public String prefixFromOption(final String option) {
247
		return option.replaceAll("([a-z]*)\\..+", "$1");
248
	}
249

    
250
	public String prefixFromOption(final CQLNode node) {
251
		if (node instanceof CQLTermNode)
252
			return prefixFromOption(((CQLTermNode) node).getIndex());
253

    
254
		return null;
255
	}
256

    
257
	public boolean isOption(final Map<String, String> prefixes, final String option) {
258
		return prefixes.containsKey(prefixFromOption(option)) && prefixes.get(prefixFromOption(option)).equals(getOptionUri());
259
	}
260

    
261
	public boolean isOption(final Map<String, String> prefixes, final CQLNode node) {
262
		if (node instanceof CQLTermNode)
263
			return isOption(prefixes, ((CQLTermNode) node).getIndex());
264

    
265
		return false;
266
	}
267

    
268
	public String getOptionUri() {
269
		return optionUri;
270
	}
271

    
272
	public void setOptionUri(String optionUri) {
273
		this.optionUri = optionUri;
274
	}
275

    
276
	class Result {
277

    
278
		private CQLNode node;
279
		private List<String> options;
280

    
281
		public Result(final CQLNode node, final List<String> options) {
282
			super();
283
			this.node = node;
284
			this.options = options;
285
		}
286

    
287
		public Result(final CQLNode node, final Iterable<String> concat) {
288
			this.node = node;
289
			this.options = Lists.newArrayList(concat);
290
		}
291

    
292
		public CQLNode getNode() {
293
			return node;
294
		}
295

    
296
		public void setNode(final CQLNode node) {
297
			this.node = node;
298
		}
299

    
300
		public List<String> getOptions() {
301
			return options;
302
		}
303

    
304
		public void setOptions(final List<String> options) {
305
			this.options = options;
306
		}
307

    
308
		public Map<String, List<String>> getOptionMap() {
309
			Map<String, List<String>> res = new HashMap<String, List<String>>();
310
			for (String opt : options) {
311
				String[] k = opt.split("=");
312
				List<String> l = res.get(k[0]);
313
				if (l == null)
314
					l = new ArrayList<String>();
315
				l.add(k[1]);
316
				res.put(k[0], l);
317
			}
318
			return res;
319
		}
320
	}
321
}
(5-5/9)