Project

General

Profile

1
package eu.dnetlib.functionality.index.solr.utils;
2

    
3
import eu.dnetlib.miscutils.functional.UnaryFunction;
4
import org.apache.oro.text.perl.Perl5Util;
5

    
6
public class HighlightUtils implements UnaryFunction<String, String> {
7

    
8
	public final static String DEFAULT_HL_PRE = "[hl]";
9

    
10
	public final static String DEFAULT_HL_POST = "[/hl]";
11

    
12
	private static String CLEAN_HEADER = "s#\\[/?hl\\]##gm";
13
	private static String CLEAN_REGEX_OPEN = "<([^>]*)\\[hl\\]([^>]*)>";
14
	private static String CLEAN_REGEX_CLOSE = "<([^>]*)\\[\\/hl\\]([^>]*)>";
15

    
16
	// private static String CLEAN_REGEX_OPEN = "s#<([^>]*)\\[hl\\]([^>]*)>#<$1$2>#gm";
17
	// private static String CLEAN_REGEX_CLOSE = "s#<([^>]*)\\[\\/hl\\]([^>]*)>#<$1$2>#gm";
18

    
19
	private Perl5Util p5util = new Perl5Util();
20

    
21
	@Override
22
	public String evaluate(final String doc) {
23
		String[] chunk = doc.split("</header>");
24
		String string = cleanHeader(chunk[0]) + "</header>" + cleanBody(chunk[1]);
25
		return string;
26
	}
27

    
28
	private String cleanHeader(final String header) {
29
		return p5util.substitute(CLEAN_HEADER, header);
30
	}
31

    
32
	// TODO: implement a faster way to do this
33
	private String cleanBody(final String body) {
34
		String res = body.replaceAll(CLEAN_REGEX_OPEN, "<$1$2>").replaceAll(CLEAN_REGEX_CLOSE, "<$1$2>");
35

    
36
		if (res.equals(body)) return res;
37

    
38
		return cleanBody(res);
39
	}
40

    
41
}
    (1-1/1)