Project

General

Profile

1
package eu.dnetlib.msro.workers.aggregation.cleaner;
2

    
3
import java.io.StringReader;
4
import java.util.Set;
5

    
6
import org.codehaus.plexus.util.StringUtils;
7
import org.dom4j.Document;
8
import org.dom4j.Element;
9
import org.dom4j.io.SAXReader;
10
import org.springframework.beans.factory.annotation.Autowired;
11
import org.springframework.stereotype.Component;
12

    
13
import com.google.common.base.Splitter;
14
import com.google.common.collect.Sets;
15

    
16
import eu.dnetlib.clients.is.InformationServiceClient;
17
import eu.dnetlib.msro.exceptions.MSROException;
18

    
19
@Component
20
public class CleaningRuleFactory {
21

    
22
	@Autowired
23
	private InformationServiceClient isClient;
24

    
25
	public CleaningRule obtainCleaningRule(final String ruleId) throws MSROException {
26
		try {
27
			final String prof =
28
					isClient.findOne("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
29

    
30
			final SAXReader reader = new SAXReader();
31
			final Document doc = reader.read(new StringReader(prof));
32

    
33
			final CleaningRule rule = new CleaningRule();
34

    
35
			for (final Object o : doc.selectNodes("//RULE")) {
36
				final Element node = (Element) o;
37

    
38
				final String xpath = node.valueOf("@xpath");
39
				final String vocabularies = node.valueOf("@vocabularies");
40
				final String groovyRule = node.valueOf("@groovy");
41
				final String regexRule = node.valueOf("@regex");
42
				final String strict = node.valueOf("@strict");
43

    
44
				final XPATHCleaningRule xpathRule;
45
				if (StringUtils.isNotBlank(vocabularies)) {
46
					final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
47
					xpathRule = new VocabularyRule(list, isClient);
48
					xpathRule.setStrict("true".equals(strict));
49
				} else if (StringUtils.isNotBlank(groovyRule)) {
50
					xpathRule = new GroovyRule(groovyRule);
51
					xpathRule.setStrict("true".equals(strict));
52
				} else {
53
					xpathRule = new TestRegexValueRule(regexRule);
54
					xpathRule.setStrict(true);
55
				}
56

    
57
				xpathRule.setXpath(xpath);
58
				rule.getXpathRules().add(xpathRule);
59
			}
60
			return rule;
61
		} catch (final Exception e) {
62
			throw new MSROException("Error obtaing cleaner rule " + ruleId, e);
63
		}
64
	}
65

    
66
}
(2-2/6)