1
|
package eu.dnetlib.functionality.index.solr.utils;
|
2
|
|
3
|
import eu.dnetlib.miscutils.functional.UnaryFunction;
|
4
|
import org.apache.oro.text.perl.Perl5Util;
|
5
|
|
6
|
public class HighlightUtils implements UnaryFunction<String, String> {
|
7
|
|
8
|
public final static String DEFAULT_HL_PRE = "[hl]";
|
9
|
|
10
|
public final static String DEFAULT_HL_POST = "[/hl]";
|
11
|
|
12
|
private static String CLEAN_HEADER = "s#\\[/?hl\\]##gm";
|
13
|
private static String CLEAN_REGEX_OPEN = "<([^>]*)\\[hl\\]([^>]*)>";
|
14
|
private static String CLEAN_REGEX_CLOSE = "<([^>]*)\\[\\/hl\\]([^>]*)>";
|
15
|
|
16
|
// private static String CLEAN_REGEX_OPEN = "s#<([^>]*)\\[hl\\]([^>]*)>#<$1$2>#gm";
|
17
|
// private static String CLEAN_REGEX_CLOSE = "s#<([^>]*)\\[\\/hl\\]([^>]*)>#<$1$2>#gm";
|
18
|
|
19
|
private Perl5Util p5util = new Perl5Util();
|
20
|
|
21
|
@Override
|
22
|
public String evaluate(final String doc) {
|
23
|
String[] chunk = doc.split("</header>");
|
24
|
String string = cleanHeader(chunk[0]) + "</header>" + cleanBody(chunk[1]);
|
25
|
return string;
|
26
|
}
|
27
|
|
28
|
private String cleanHeader(final String header) {
|
29
|
return p5util.substitute(CLEAN_HEADER, header);
|
30
|
}
|
31
|
|
32
|
// TODO: implement a faster way to do this
|
33
|
private String cleanBody(final String body) {
|
34
|
String res = body.replaceAll(CLEAN_REGEX_OPEN, "<$1$2>").replaceAll(CLEAN_REGEX_CLOSE, "<$1$2>");
|
35
|
|
36
|
if (res.equals(body)) return res;
|
37
|
|
38
|
return cleanBody(res);
|
39
|
}
|
40
|
|
41
|
}
|