Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataexport;
2

    
3
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
4
import org.apache.commons.lang3.StringUtils;
5
import org.dom4j.Document;
6
import org.dom4j.tree.DefaultText;
7

    
8
import java.text.ParseException;
9
import java.util.Map;
10
import java.util.Objects;
11

    
12
public class RecordFilter {
13

    
14
    private Map<String, String> criteria;
15

    
16
    private String yearXpath;
17
    private int fromYear;
18
    private int toYear;
19

    
20
    public RecordFilter(Map<String, String> criteria, String yearXpath, int fromYear, int toYear) {
21
        this.criteria = criteria;
22
        this.yearXpath = yearXpath;
23
        this.fromYear = fromYear;
24
        this.toYear = toYear;
25
    }
26

    
27
    public boolean matches(final Document record, final boolean strict) throws ParseException {
28

    
29
        final String date = record.valueOf(yearXpath);
30
        if (StringUtils.isBlank(date)) {
31
            return false;
32
        }
33

    
34
        final String yyyy = DnetXsltFunctions.extractYear(date);
35
        if (StringUtils.isBlank(yyyy)) {
36
            return false;
37
        }
38
        final Integer year = Integer.valueOf(yyyy);
39

    
40
        if (year < fromYear | year > toYear) {
41
            return false;
42
        }
43

    
44

    
45
        boolean matched = false;
46
        for(final Map.Entry<String, String> c : criteria.entrySet()) {
47

    
48
            boolean matches = matched = record.selectNodes(c.getKey()).stream()
49
                    .filter(Objects::nonNull)
50
                    .map(o -> textOf(o))
51
                    .map(s -> ((String) s).toLowerCase())
52
                    .map(s -> ((String) s).trim())
53
                    .anyMatch(s -> {
54
                        return ((String) s).matches(c.getValue().toLowerCase());
55
                    });
56

    
57
            if (matches && !strict) {
58
                return true;
59
            }
60
        }
61
        return matched;
62
    }
63

    
64
    private String textOf(final Object o) {
65
        if (o instanceof org.dom4j.tree.DefaultText) {
66
            return ((DefaultText) o).getText();
67
        }
68
        return o.toString();
69
    }
70

    
71
}
(10-10/10)