Project

General

Profile

1
package eu.dnetlib.msro.workers.aggregation.cleaner;
2

    
3
import java.io.StringReader;
4
import java.util.Set;
5

    
6
import org.dom4j.Document;
7
import org.dom4j.Element;
8
import org.dom4j.io.SAXReader;
9
import org.springframework.beans.factory.annotation.Autowired;
10
import org.springframework.stereotype.Component;
11

    
12
import com.google.common.base.Splitter;
13
import com.google.common.collect.Sets;
14

    
15
import eu.dnetlib.clients.is.InformationServiceClient;
16
import eu.dnetlib.msro.exceptions.MSROException;
17

    
18
@Component
19
public class CleaningRuleFactory {
20

    
21
	@Autowired
22
	private InformationServiceClient isClient;
23

    
24
	public CleaningRule obtainCleaningRule(final String ruleId) throws MSROException {
25
		try {
26
			final String prof =
27
					isClient.findOne("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
28

    
29
			final SAXReader reader = new SAXReader();
30
			final Document doc = reader.read(new StringReader(prof));
31

    
32
			final CleaningRule rule = new CleaningRule();
33

    
34
			for (final Object o : doc.selectNodes("//RULE")) {
35
				final Element node = (Element) o;
36

    
37
				final String xpath = node.valueOf("@xpath");
38
				final String vocabularies = node.valueOf("@vocabularies");
39
				final String groovyRule = node.valueOf("@groovy");
40
				final String strict = node.valueOf("@strict");
41

    
42
				final XPATHCleaningRule xpathRule;
43
				if ((vocabularies != null) && (vocabularies.length() > 0)) {
44
					final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
45
					xpathRule = new VocabularyRule(list, isClient);
46
				} else {
47
					xpathRule = new GroovyRule(groovyRule);
48
				}
49
				xpathRule.setXpath(xpath);
50
				xpathRule.setStrict("true".equals(strict));
51
				rule.getXpathRules().add(xpathRule);
52
			}
53
			return rule;
54
		} catch (final Exception e) {
55
			throw new MSROException("Error obtaing cleaner rule " + ruleId, e);
56
		}
57
	}
58

    
59
}
(2-2/5)