Project

General

Profile

1
package eu.dnetlib.data.cleaner;
2

    
3
import java.io.StringReader;
4
import java.util.*;
5

    
6
import com.google.common.base.Splitter;
7
import com.google.common.collect.Lists;
8
import com.google.common.collect.Maps;
9
import com.google.common.collect.Sets;
10
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
11
import eu.dnetlib.rmi.data.CleanerException;
12
import eu.dnetlib.rmi.enabling.ISLookUpException;
13
import eu.dnetlib.rmi.enabling.ISLookUpService;
14
import org.dom4j.Document;
15
import org.dom4j.Element;
16
import org.dom4j.io.SAXReader;
17
import org.springframework.beans.factory.annotation.Autowired;
18

    
19
public class CleaningRuleFactory {
20

    
21
	@Autowired
22
	private UniqueServiceLocator serviceLocator;
23

    
24
	public CleaningRule obtainCleaningRule(final String ruleId) throws CleanerException {
25
		try {
26
			final String prof = this.serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
27
					"/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
28

    
29
			final SAXReader reader = new SAXReader();
30
			final Document doc = reader.read(new StringReader(prof));
31

    
32
			final CleaningRule rule = new CleaningRule();
33

    
34
			final ISLookUpService lookup = this.serviceLocator.getService(ISLookUpService.class);
35
			Map<String, String> namespaceUris = Maps.newHashMap();
36

    
37
			for (final Object o : doc.selectNodes("//NAMESPACE")) {
38
				final Element node = (Element) o;
39
				final String ns = node.valueOf("@ns");
40
				final String uri = node.valueOf("@uri");
41
				namespaceUris.put(ns, uri);
42
			}
43

    
44
			for (final Object o : doc.selectNodes("//RULE")) {
45
				final Element node = (Element) o;
46

    
47
				final String xpath = node.valueOf("@xpath");
48
				final String vocabularies = node.valueOf("@vocabularies");
49
				final String groovyRule = node.valueOf("@groovy");
50
				final String strict = node.valueOf("@strict");
51

    
52
				final XPATHCleaningRule xpathRule;
53
				if (vocabularies != null && vocabularies.length() > 0) {
54
					final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
55
					xpathRule = new VocabularyRule(list, lookup);
56
				} else {
57
					xpathRule = new GroovyRule(groovyRule);
58
				}
59
				xpathRule.setXpath(xpath);
60
				xpathRule.setStrict("true".equals(strict));
61
				xpathRule.setNamesapceMap(namespaceUris);
62
				rule.getXpathRules().add(xpathRule);
63
			}
64
			return rule;
65
		} catch (final Exception e) {
66
			throw new CleanerException("Error obtaing cleaner rule " + ruleId, e);
67
		}
68
	}
69

    
70
	public List<String> getRuleIds() throws CleanerException {
71
		try {
72
			final HashSet<String> response = new HashSet<String>();
73

    
74
			final List<String> list = this.serviceLocator.getService(ISLookUpService.class).quickSearchProfile("//CLEANER_NAME");
75
			if (list != null) {
76
				response.addAll(list);
77
			}
78

    
79
			return Lists.newArrayList(response);
80
		} catch (final ISLookUpException e) {
81
			throw new CleanerException("Error obtaining IDs of cleaner DSs", e);
82
		}
83
	}
84

    
85
}
(3-3/6)