Project

General

Profile

« Previous | Next » 

Revision 47823

[maven-release-plugin] copy for tag cnr-data-utility-cleaner-service-3.0.1

View differences:

modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/cnr-data-utility-cleaner-service/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "cnr-data-utility-cleaner-service"}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/test/java/eu/dnetlib/data/utility/cleaner/VocabularyRuleTest.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5
import static org.junit.Assert.assertNull;
6
import static org.mockito.Matchers.anyString;
7
import static org.mockito.Mockito.times;
8
import static org.mockito.Mockito.verify;
9
import static org.mockito.Mockito.when;
10

  
11
import java.io.StringReader;
12
import java.util.List;
13

  
14
import org.dom4j.Document;
15
import org.dom4j.io.SAXReader;
16
import org.junit.Before;
17
import org.junit.Test;
18
import org.junit.runner.RunWith;
19
import org.mockito.Mock;
20
import org.mockito.runners.MockitoJUnit44Runner;
21

  
22
import com.google.common.collect.Lists;
23
import com.google.common.collect.Sets;
24

  
25
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
26

  
27
@RunWith(MockitoJUnit44Runner.class)
28
public class VocabularyRuleTest {
29

  
30
	private static final String VOCABULARY_NAME_1 = "TEST VOCABULARY 1";
31
	private static final String VOCABULARY_NAME_2 = "TEST VOCABULARY 2";
32
	private static final List<String> VOCABULARY = Lists.newArrayList("XXXX|-:-|AAAA", "YYYY|-:-|AAAA", "ZZZZ|-:-|AAAA");
33

  
34
	/**
35
	 * Class Under Test
36
	 */
37
	private VocabularyRule rule;
38

  
39
	@Mock
40
	private ISLookUpService lookup;
41

  
42
	@Before
43
	public void setUp() throws Exception {
44
		when(lookup.quickSearchProfile(anyString())).thenReturn(VOCABULARY);
45

  
46
		rule = new VocabularyRule(Sets.newHashSet(VOCABULARY_NAME_1, VOCABULARY_NAME_2), lookup);
47
	}
48

  
49
	@Test
50
	public void testSetup() throws Exception {
51
		final String xpath = "/a/b";
52
		rule.setXpath(xpath);
53

  
54
		execute("<a><b>XXXX</b></a>");
55

  
56
		verify(lookup, times(2)).quickSearchProfile(anyString());
57
		assertEquals(VOCABULARY.size(), rule.getVocabularyTerms().size());
58
	}
59

  
60
	@Test
61
	public void testApplyXpathRule() throws Exception {
62
		final String xpath = "/a/b";
63
		rule.setXpath(xpath);
64
		final Document doc = execute("<a><b>XXXX</b></a>");
65
		assertEquals("AAAA", doc.valueOf(xpath));
66
		assertNull(rule.verifyValue("AAAA"));
67
		assertNotNull(rule.verifyValue("XXXX"));
68
	}
69

  
70
	@Test
71
	public void testApplyXpathRule_2() throws Exception {
72
		final String xpath = "/a/b";
73
		rule.setXpath(xpath);
74
		final Document doc = execute("<a><b>XXXX</b></a>");
75
		assertEquals("AAAA", doc.valueOf(xpath));
76
		assertNull(rule.verifyValue("AAAA"));
77
		assertNotNull(rule.verifyValue("XXXX"));
78
	}
79

  
80
	@Test
81
	public void testApplyXpathRule_3() throws Exception {
82
		final String xpath = "/a/b";
83
		rule.setXpath(xpath);
84
		final Document doc = execute("<a><b>XXXX</b></a>");
85
		assertEquals("AAAA", doc.valueOf(xpath));
86
	}
87

  
88
	@Test
89
	public void testApplyXpathRule_attr() throws Exception {
90
		final String xpath = "/a/b/@value";
91
		rule.setXpath(xpath);
92
		final Document doc = execute("<a><b value='XXXX' /></a>");
93
		assertEquals("AAAA", doc.valueOf(xpath));
94
		assertNull(rule.verifyValue("AAAA"));
95
		assertNotNull(rule.verifyValue("XXXX"));
96
	}
97

  
98
	@Test
99
	public void testApplyXpathRule_with_spaces() throws Exception {
100
		final String xpath = "/a/b";
101
		rule.setXpath(xpath);
102
		final Document doc = execute("<a><b>  XXXX  </b></a>");
103
		assertEquals("AAAA", doc.valueOf(xpath));
104
		assertNull(rule.verifyValue("AAAA"));
105
		assertNotNull(rule.verifyValue("  XXXX  "));
106
	}
107

  
108
	@Test
109
	public void testApplyXpathRule_case() throws Exception {
110
		final String xpath = "/a/b";
111
		rule.setXpath(xpath);
112
		final Document doc = execute("<a><b>Xxxx</b></a>");
113
		assertEquals("AAAA", doc.valueOf(xpath));
114
		assertNull(rule.verifyValue("AAAA"));
115
		assertNotNull(rule.verifyValue("Xxxx"));
116
	}
117

  
118
	private Document execute(final String xml) throws Exception {
119
		final SAXReader reader = new SAXReader();
120
		final Document doc = reader.read(new StringReader(xml));
121
		System.out.println("BEFORE:\n" + doc.asXML() + "\n");
122
		rule.applyXpathRule(doc);
123
		System.out.println("AFTER:\n" + doc.asXML() + "\n");
124
		System.out.println("-----------------------------\n");
125
		return doc;
126
	}
127

  
128
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/test/java/eu/dnetlib/data/utility/cleaner/XMLCleaningRuleTest.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import static org.junit.Assert.assertFalse;
4
import static org.junit.Assert.assertTrue;
5

  
6
import java.util.HashMap;
7
import java.util.Map;
8

  
9
import org.junit.Before;
10
import org.junit.Test;
11
import org.junit.runner.RunWith;
12
import org.mockito.runners.MockitoJUnit44Runner;
13

  
14
import com.google.common.collect.Lists;
15

  
16
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
17

  
18
@RunWith(MockitoJUnit44Runner.class)
19
public class XMLCleaningRuleTest {
20

  
21
	private static final String INPUT_VALID = "<record>" + "	<header />" + "	<metadata>" + "		<a>HELLO</a>" + "	</metadata>" + "</record>";
22

  
23
	private static final String INPUT_INVALID = "<record>" + "	<header />" + "	<metadata>" + "		<a>GOOD BYE</a>" + "	</metadata>" + "</record>";
24

  
25
	/**
26
	 * Class under test.
27
	 */
28
	private CleaningRule xmlRule;
29
	private XPATHCleaningRule mockXpathRule = new XPATHCleaningRule() {
30

  
31
		@Override
32
		protected Map<String, String> verifyValue(final String value) throws CleanerException {
33
			if (value.equals("CIAO")) { return null; }
34

  
35
			Map<String, String> err = new HashMap<String, String>();
36
			err.put("term", value);
37
			return err;
38
		}
39

  
40
		@Override
41
		protected String calculateNewValue(final String oldValue) throws CleanerException {
42
			if (oldValue.equals("HELLO")) { return "CIAO"; }
43
			return oldValue;
44
		}
45
	};;
46

  
47
	@Before
48
	public void setUp() throws Exception {
49
		xmlRule = new CleaningRule();
50

  
51
		mockXpathRule.setStrict(true);
52
		mockXpathRule.setXpath("//a");
53

  
54
		xmlRule.setXpathRules(Lists.newArrayList(mockXpathRule));
55
	}
56

  
57
	@Test
58
	public void testEvaluate_valid() {
59
		String s = xmlRule.evaluate(INPUT_VALID);
60
		assertTrue(s.contains("CIAO"));
61
		assertFalse(s.contains("invalid"));
62
	}
63

  
64
	@Test
65
	public void testEvaluate_invalid() {
66
		String s = xmlRule.evaluate(INPUT_INVALID);
67
		System.out.println(s);
68
		assertFalse(s.contains("CIAO"));
69
		assertTrue(s.contains("invalid"));
70
	}
71

  
72
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/test/java/eu/dnetlib/data/utility/cleaner/CleanerServiceImplTest.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5
import static org.mockito.Mockito.verify;
6
import static org.mockito.Mockito.when;
7

  
8
import javax.xml.ws.wsaddressing.W3CEndpointReference;
9

  
10
import org.junit.Before;
11
import org.junit.Test;
12
import org.junit.runner.RunWith;
13
import org.mockito.Mock;
14
import org.mockito.runners.MockitoJUnit44Runner;
15

  
16
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
17
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
18
import eu.dnetlib.test.utils.EPRTestUtil;
19

  
20
@RunWith(MockitoJUnit44Runner.class)
21
public class CleanerServiceImplTest {
22

  
23
	/**
24
	 * Class under test.
25
	 */
26
	private CleanerServiceImpl service;
27

  
28
	@Mock
29
	private CleaningRuleFactory cleaningRuleFactory;
30
	@Mock
31
	private MappedResultSetFactory mappedResultSetFactory;
32
	@Mock
33
	private CleaningRule cleaningRule;
34

  
35
	private W3CEndpointReference epr_IN = EPRTestUtil.getTestEpr("http://1");
36
	private W3CEndpointReference epr_OUT = EPRTestUtil.getTestEpr("http://2");
37

  
38
	private static final String RULE_ID = "RULE_01";
39

  
40
	@Before
41
	public void setUp() throws Exception {
42
		when(cleaningRuleFactory.obtainCleaningRule(RULE_ID)).thenReturn(cleaningRule);
43
		when(mappedResultSetFactory.createMappedResultSet(epr_IN, cleaningRule)).thenReturn(epr_OUT);
44

  
45
		service = new CleanerServiceImpl();
46
		service.setCleaningRuleFactory(cleaningRuleFactory);
47
		service.setMappedResultSetFactory(mappedResultSetFactory);
48
	}
49

  
50
	@Test
51
	public void testClean() throws CleanerException {
52
		W3CEndpointReference epr = service.clean(epr_IN, RULE_ID);
53
		assertNotNull(epr);
54
		assertEquals(epr_OUT, epr);
55
		verify(cleaningRuleFactory).obtainCleaningRule(RULE_ID);
56
	}
57

  
58
	@Test(expected = CleanerException.class)
59
	public void testClean_null_1() throws CleanerException {
60
		service.clean(epr_IN, null);
61
	}
62

  
63
	@Test(expected = CleanerException.class)
64
	public void testClean_null_2() throws CleanerException {
65
		service.clean(null, RULE_ID);
66
	}
67
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/test/java/eu/dnetlib/data/utility/cleaner/GroovyRuleTest.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertTrue;
5

  
6
import java.io.StringReader;
7
import java.util.List;
8

  
9
import org.dom4j.Document;
10
import org.dom4j.Node;
11
import org.dom4j.io.SAXReader;
12
import org.junit.Test;
13
import org.junit.runner.RunWith;
14
import org.mockito.runners.MockitoJUnit44Runner;
15

  
16
@RunWith(MockitoJUnit44Runner.class)
17
public class GroovyRuleTest {
18

  
19
	@Test
20
	public void testApplyXpathRule_simple_constant() throws Exception {
21
		final GroovyRule rule = new GroovyRule("'YYYY'");
22

  
23
		final String xpath = "/a/b";
24

  
25
		rule.setXpath(xpath);
26

  
27
		final Document doc = execute(rule, "<a><b>XXXX</b></a>");
28

  
29
		assertEquals("YYYY", doc.valueOf(xpath));
30
	}
31

  
32
	@Test
33
	public void testApplyXpathRule_simple_regex() throws Exception {
34
		final GroovyRule rule = new GroovyRule("(input =~ /X/).replaceAll('Y')");
35

  
36
		final String xpath = "/a/b";
37

  
38
		rule.setXpath(xpath);
39

  
40
		final Document doc = execute(rule, "<a><b>aXaXa</b></a>");
41

  
42
		assertEquals("aYaYa", doc.valueOf(xpath));
43
	}
44

  
45
	@Test
46
	public void testApplyXpathRule_simple_upper() throws Exception {
47
		final GroovyRule rule = new GroovyRule("input.toUpperCase()");
48

  
49
		final String xpath = "/a/b";
50

  
51
		rule.setXpath(xpath);
52

  
53
		final Document doc = execute(rule, "<a><b>xyz</b></a>");
54

  
55
		assertEquals("XYZ", doc.valueOf(xpath));
56
	}
57

  
58
	@Test
59
	public void testApplyXpathRule_multi() throws Exception {
60
		final GroovyRule rule = new GroovyRule("'Y'");
61

  
62
		final String xpath = "/a/b";
63

  
64
		rule.setXpath(xpath);
65

  
66
		final Document doc = execute(rule, "<a><b>X</b><b>X</b><b>X</b></a>");
67

  
68
		List<?> list = doc.selectNodes(xpath);
69

  
70
		assertEquals(3, list.size());
71
		for (Object o : list) {
72
			assertEquals("Y", ((Node) o).getText());
73
		}
74

  
75
	}
76

  
77
	@Test
78
	public void testApplyXpathRule_singleAttr() throws Exception {
79
		final GroovyRule rule = new GroovyRule("'BBBB'");
80

  
81
		final String xpath = "/a/b/@value";
82

  
83
		rule.setXpath(xpath);
84

  
85
		final Document doc = execute(rule, "<a><b value='AAAA'>XXXX</b></a>");
86

  
87
		assertEquals("BBBB", doc.valueOf(xpath));
88
		assertEquals("XXXX", doc.valueOf("/a/b"));
89
	}
90

  
91
	@Test
92
	public void testApplyXpathRule_multiAttr() throws Exception {
93
		final GroovyRule rule = new GroovyRule("'B'");
94

  
95
		final String xpath = "/a/b/@value";
96

  
97
		rule.setXpath(xpath);
98

  
99
		final Document doc = execute(rule, "<a><b value='a' /><b value='b' /><b value='c' /></a>");
100

  
101
		final List<?> list = doc.selectNodes(xpath);
102

  
103
		assertEquals(3, list.size());
104
		for (Object o : list) {
105
			assertEquals("B", ((Node) o).getText());
106
		}
107
	}
108

  
109
	@Test
110
	public void testApplyXpathRule_complex() throws Exception {
111
		final GroovyRule rule = new GroovyRule("'B'");
112

  
113
		final String xpath = "/a/b";
114

  
115
		rule.setXpath(xpath);
116

  
117
		final Document doc = execute(rule, "<a><b>X<c>C</c></b></a>");
118

  
119
		assertTrue(doc.valueOf(xpath).contains("B"));
120
		assertEquals("C", doc.valueOf("/a/b/c"));
121
	}
122

  
123
	private Document execute(final GroovyRule rule, final String xml) throws Exception {
124

  
125
		final SAXReader reader = new SAXReader();
126
		final Document doc = reader.read(new StringReader(xml));
127

  
128
		System.out.println("BEFORE:\n" + doc.asXML() + "\n");
129
		rule.applyXpathRule(doc);
130
		System.out.println("AFTER:\n" + doc.asXML() + "\n");
131

  
132
		System.out.println("-----------------------------\n");
133
		return doc;
134
	}
135
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/java/eu/dnetlib/data/utility/cleaner/VocabularyRule.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import java.util.HashMap;
4
import java.util.Map;
5
import java.util.Set;
6

  
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9

  
10
import com.google.common.base.Joiner;
11
import com.google.common.collect.Maps;
12
import com.google.common.collect.Sets;
13

  
14
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
15
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
16

  
17
/**
18
 * @author michele
19
 * 
20
 *         Vocabulary rules must be declared in a CleanerDS profile, for each vocabulary must be present the relative VocabularyDS profile:
21
 * 
22
 *         <RULE xpath="..." vocabularies="VOC1" /> <RULE xpath="..." vocabularies="VOC1, VOC2, VOC3" />
23
 */
24

  
25
public class VocabularyRule extends XPATHCleaningRule {
26

  
27
	private Set<String> vocabularies;
28

  
29
	private static final Log log = LogFactory.getLog(VocabularyRule.class); // NOPMD by marko on 11/24/08 5:02 PM
30

  
31
	private Map<String, String> synonyms = Maps.newHashMap();
32
	private Set<String> validTerms = Sets.newHashSet();
33

  
34
	public VocabularyRule(final Set<String> vocabularies, final ISLookUpService lookup) throws CleanerException {
35
		this.vocabularies = vocabularies;
36

  
37
		loadSynonymsAndTerms(lookup);
38
	}
39

  
40
	@Override
41
	protected String calculateNewValue(final String oldValue) throws CleanerException {
42
		log.debug("calculating new value for: " + oldValue);
43

  
44
		if (synonyms.isEmpty()) {
45
			log.warn("Vocabulary terms is void, vocabularies: " + this.vocabularies);
46
		}
47

  
48
		String newValue = null;
49

  
50
		if (synonyms.containsKey(oldValue.toLowerCase())) {
51
			newValue = synonyms.get(oldValue.toLowerCase());
52
		}
53

  
54
		if (newValue == null) {
55
			log.debug("Synonym " + oldValue + " not found in vocabulary");
56
			return oldValue;
57
		}
58

  
59
		return newValue;
60
	}
61

  
62
	private void loadSynonymsAndTerms(final ISLookUpService lookup) throws CleanerException {
63

  
64
		for (final String vocabulary : vocabularies) {
65
			try {
66
				final String query = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')"
67
						+ "//RESOURCE_PROFILE[.//VOCABULARY_NAME/@code='" + vocabulary + "']//TERM return "
68
						+ "( concat($x/@code,'|-:-|', $x/@code), concat($x/@english_name,'|-:-|', $x/@code), concat($x/@native_name,'|-:-|', $x/@code), "
69
						+ "for $y in $x//SYNONYM return concat($y/@term,'|-:-|', $x/@code) )";
70

  
71
				for (final String s : lookup.quickSearchProfile(query)) {
72
					log.debug("SYNONYM : " + s);
73
					final String[] arr = s.split("\\|-:-\\|");
74
					if (arr[0] == null || arr[0].isEmpty()) {
75
						continue;
76
					}
77
					synonyms.put(arr[0].toLowerCase(), arr[1]);
78
					validTerms.add(arr[1].toLowerCase());
79
				}
80

  
81
				log.info("VOCABULARY " + vocabulary.trim() + " - terms size " + synonyms.size());
82
			} catch (final Exception e) {
83
				throw new CleanerException("Error obtaining vocabulary " + vocabulary, e);
84
			}
85
		}
86

  
87
	}
88

  
89
	@Override
90
	protected Map<String, String> verifyValue(final String value) throws CleanerException {
91
		if (synonyms.isEmpty()) {
92
			log.warn("Vocabulary terms is void, vocabularies: " + this.vocabularies);
93
		}
94

  
95
		if (validTerms.contains(value.toLowerCase())) { return null; }
96

  
97
		final Map<String, String> error = new HashMap<String, String>();
98
		error.put("term", value);
99
		error.put("vocabularies", this.vocabularies.toString().replaceAll("\\[", "").replaceAll("\\]", ""));
100
		error.put("xpath", this.getXpath());
101
		return error;
102
	}
103

  
104
	public Map<String, String> getVocabularyTerms() {
105
		return synonyms;
106
	}
107

  
108
	@Override
109
	public String toString() {
110
		return "VOCABULARIES: [" + Joiner.on(", ").join(vocabularies) + "]";
111
	}
112

  
113
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/java/eu/dnetlib/data/utility/cleaner/CleaningRuleFactory.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import java.io.StringReader;
4
import java.util.HashSet;
5
import java.util.List;
6
import java.util.Set;
7

  
8
import org.dom4j.Document;
9
import org.dom4j.Element;
10
import org.dom4j.io.SAXReader;
11
import org.springframework.beans.factory.annotation.Required;
12

  
13
import com.google.common.base.Splitter;
14
import com.google.common.collect.Lists;
15
import com.google.common.collect.Sets;
16

  
17
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
18
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
19
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
20
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
21

  
22
public class CleaningRuleFactory {
23

  
24
	private UniqueServiceLocator serviceLocator;
25

  
26
	public CleaningRule obtainCleaningRule(final String ruleId) throws CleanerException {
27
		try {
28
			final String prof = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(
29
					"/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
30

  
31
			final SAXReader reader = new SAXReader();
32
			final Document doc = reader.read(new StringReader(prof));
33

  
34
			final CleaningRule rule = new CleaningRule();
35

  
36
			final ISLookUpService lookup = serviceLocator.getService(ISLookUpService.class);
37

  
38
			for (Object o : doc.selectNodes("//RULE")) {
39
				final Element node = (Element) o;
40

  
41
				final String xpath = node.valueOf("@xpath");
42
				final String vocabularies = node.valueOf("@vocabularies");
43
				final String groovyRule = node.valueOf("@groovy");
44
				final String strict = node.valueOf("@strict");
45

  
46
				final XPATHCleaningRule xpathRule;
47
				if (vocabularies != null && vocabularies.length() > 0) {
48
					final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
49
					xpathRule = new VocabularyRule(list, lookup);
50
				} else {
51
					xpathRule = new GroovyRule(groovyRule);
52
				}
53
				xpathRule.setXpath(xpath);
54
				xpathRule.setStrict("true".equals(strict));
55
				rule.getXpathRules().add(xpathRule);
56
			}
57
			return rule;
58
		} catch (Exception e) {
59
			throw new CleanerException("Error obtaing cleaner rule " + ruleId, e);
60
		}
61
	}
62

  
63
	public List<String> getRuleIds() throws CleanerException {
64
		try {
65
			final HashSet<String> response = new HashSet<String>();
66

  
67
			final List<String> list = serviceLocator.getService(ISLookUpService.class).quickSearchProfile("//CLEANER_NAME");
68
			if (list != null) {
69
				response.addAll(list);
70
			}
71

  
72
			return Lists.newArrayList(response);
73
		} catch (ISLookUpException e) {
74
			throw new CleanerException("Error obtaining IDs of cleaner DSs", e);
75
		}
76
	}
77

  
78
	public UniqueServiceLocator getServiceLocator() {
79
		return serviceLocator;
80
	}
81

  
82
	@Required
83
	public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
84
		this.serviceLocator = serviceLocator;
85
	}
86
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/java/eu/dnetlib/data/utility/cleaner/inspector/CleanerInspector.java
1
package eu.dnetlib.data.utility.cleaner.inspector;
2

  
3
import java.util.List;
4

  
5
import javax.annotation.Resource;
6

  
7
import org.springframework.stereotype.Controller;
8
import org.springframework.ui.Model;
9
import org.springframework.web.bind.annotation.RequestMapping;
10
import org.springframework.web.bind.annotation.RequestParam;
11

  
12
import com.google.common.collect.Lists;
13

  
14
import eu.dnetlib.data.utility.cleaner.CleaningRule;
15
import eu.dnetlib.data.utility.cleaner.CleaningRuleFactory;
16
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
17
import eu.dnetlib.enabling.inspector.AbstractInspectorController;
18
import eu.dnetlib.miscutils.collections.MappedCollection;
19
import eu.dnetlib.miscutils.functional.UnaryFunction;
20

  
21
@Controller
22
public class CleanerInspector extends AbstractInspectorController {
23

  
24
	@Resource
25
	private CleaningRuleFactory cleaningRuleFactory;
26

  
27
	public static class SelectOption {
28

  
29
		private String value;
30
		private boolean selected;
31

  
32
		public SelectOption(final String value, final boolean selected) {
33
			super();
34
			this.value = value;
35
			this.selected = selected;
36
		}
37

  
38
		public String getValue() {
39
			return value;
40
		}
41

  
42
		public void setValue(final String value) {
43
			this.value = value;
44
		}
45

  
46
		public boolean isSelected() {
47
			return selected;
48
		}
49

  
50
		public void setSelected(final boolean selected) {
51
			this.selected = selected;
52
		}
53
	}
54

  
55
	@RequestMapping(value = "/inspector/cleaner.do")
56
	public void cleaner(final Model model,
57
			@RequestParam(value = "rule", required = false) final String ruleId,
58
			@RequestParam(value = "dirtyText", required = false) final String dirtyText) throws CleanerException {
59

  
60
		List<String> rules = Lists.newArrayList(cleaningRuleFactory.getRuleIds());
61
		model.addAttribute("rules", selectOptions(rules, ruleId));
62

  
63
		if ((ruleId != null) && (dirtyText != null)) {
64
			CleaningRule rule = cleaningRuleFactory.obtainCleaningRule(ruleId);
65
			model.addAttribute("dirtyText", dirtyText);
66
			model.addAttribute("cleanedText", rule.evaluate(dirtyText));
67
		}
68
	}
69

  
70
	/**
71
	 * Given an list of values, return a list of SelectOption instances which have the "selected" boolean field set to true only for the
72
	 * element matching "current".
73
	 * 
74
	 * @param input
75
	 *            list of input strings
76
	 * @param current
77
	 *            current value to select
78
	 * @return
79
	 */
80
	private List<SelectOption> selectOptions(final List<String> input, final String current) {
81
		final UnaryFunction<SelectOption, String> mapper = new UnaryFunction<SelectOption, String>() {
82

  
83
			@Override
84
			public SelectOption evaluate(final String value) {
85
				return new SelectOption(value, value.equals(current));
86
			}
87
		};
88
		return Lists.newArrayList(new MappedCollection<SelectOption, String>(input, mapper));
89
	}
90

  
91
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/java/eu/dnetlib/data/utility/cleaner/CleaningRule.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import java.io.StringReader;
4
import java.util.ArrayList;
5
import java.util.List;
6
import java.util.Map;
7

  
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10
import org.dom4j.Document;
11
import org.dom4j.Element;
12
import org.dom4j.Namespace;
13
import org.dom4j.QName;
14
import org.dom4j.io.SAXReader;
15
import org.springframework.beans.factory.annotation.Required;
16

  
17
import eu.dnetlib.miscutils.functional.UnaryFunction;
18

  
19
public class CleaningRule implements UnaryFunction<String, String> {
20

  
21
	private static final Log log = LogFactory.getLog(CleaningRule.class); // NOPMD by marko on 11/24/08 5:02 PM
22

  
23
	private List<XPATHCleaningRule> xpathRules = new ArrayList<XPATHCleaningRule>();
24

  
25
	@Override
26
	public String evaluate(final String text) {
27

  
28
		try {
29
			final List<Map<String, String>> errors = new ArrayList<Map<String, String>>();
30
			final Document doc = (new SAXReader()).read(new StringReader(text));
31
			for (final XPATHCleaningRule r : xpathRules) {
32
				errors.addAll(r.applyXpathRule(doc));
33
			}
34
			if (errors.size() > 0) {
35
				markAsInvalid(doc, errors);
36
			}
37
			return doc.asXML();
38
		} catch (final Exception e) {
39
			log.error("Error evaluating rule", e);
40
		}
41
		return "";
42
	}
43

  
44
	private void markAsInvalid(final Document doc, final List<Map<String, String>> errors) {
45
		final Element element = (Element) doc.selectSingleNode("//*[local-name()='header']");
46
		if (element != null) {
47
			final Element inv = element.addElement(new QName("invalid", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri")));
48
			for (final Map<String, String> e : errors) {
49
				final Element err = inv.addElement(new QName("error", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri")));
50
				for (final Map.Entry<String, String> entry : e.entrySet()) {
51
					err.addAttribute(entry.getKey(), entry.getValue());
52
				}
53
			}
54
			inv.addAttribute("value", "true");
55
		}
56
	}
57

  
58
	public List<XPATHCleaningRule> getXpathRules() {
59
		return xpathRules;
60
	}
61

  
62
	@Required
63
	public void setXpathRules(final List<XPATHCleaningRule> xpathRules) {
64
		this.xpathRules = xpathRules;
65
	}
66

  
67
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/java/eu/dnetlib/data/utility/cleaner/CleanerServiceImpl.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import javax.xml.ws.wsaddressing.W3CEndpointReference;
4

  
5
import org.springframework.beans.factory.annotation.Required;
6

  
7
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
8
import eu.dnetlib.data.utility.cleaner.rmi.CleanerService;
9
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
10
import eu.dnetlib.enabling.tools.AbstractBaseService;
11

  
12
public class CleanerServiceImpl extends AbstractBaseService implements CleanerService {
13

  
14
	private CleaningRuleFactory cleaningRuleFactory;
15

  
16
	private MappedResultSetFactory mappedResultSetFactory;
17

  
18
	@Override
19
	public W3CEndpointReference clean(final W3CEndpointReference epr, final String ruleId) throws CleanerException {
20
		if ((ruleId == null) || (ruleId.isEmpty())) { throw new CleanerException("Invalid ruleId: id is empty"); }
21
		if (epr == null) { throw new CleanerException("Passed epr is empty"); }
22

  
23
		return mappedResultSetFactory.createMappedResultSet(epr, cleaningRuleFactory.obtainCleaningRule(ruleId));
24
	}
25

  
26
	@Required
27
	public MappedResultSetFactory getMappedResultSetFactory() {
28
		return mappedResultSetFactory;
29
	}
30

  
31
	@Required
32
	public void setMappedResultSetFactory(final MappedResultSetFactory mappedResultSetFactory) {
33
		this.mappedResultSetFactory = mappedResultSetFactory;
34
	}
35

  
36
	public CleaningRuleFactory getCleaningRuleFactory() {
37
		return cleaningRuleFactory;
38
	}
39

  
40
	@Required
41
	public void setCleaningRuleFactory(final CleaningRuleFactory cleaningRuleFactory) {
42
		this.cleaningRuleFactory = cleaningRuleFactory;
43
	}
44

  
45
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/java/eu/dnetlib/data/utility/cleaner/GroovyRule.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
4
import groovy.lang.Closure;
5
import groovy.lang.GroovyShell;
6

  
7
import java.util.Map;
8

  
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11

  
12
/**
13
 * @author michele
14
 * 
15
 *         Groovy rules must be declared in a CleanerDS profile, some examples:
16
 * 
17
 *         <RULE xpath="..." groovy="(input =~ /X/).replaceAll('Y')" /> <RULE xpath="..." groovy="'CONSTANT'" /> <RULE xpath="..."
18
 *         groovy="input.toUpperCase()" />
19
 */
20

  
21
public class GroovyRule extends XPATHCleaningRule {
22

  
23
	private static final Log log = LogFactory.getLog(GroovyRule.class); // NOPMD by marko on 11/24/08 5:02 PM
24

  
25
	private String groovyRule;
26
	private Closure<String> closure;
27

  
28
	private GroovyShell groovyShell = new GroovyShell();
29

  
30
	@SuppressWarnings("unchecked")
31
	public GroovyRule(final String groovyRule) {
32
		this.groovyRule = groovyRule;
33
		this.closure = (Closure<String>) groovyShell.evaluate("{ input -> " + groovyRule + "}");
34
	}
35

  
36
	@Override
37
	protected String calculateNewValue(final String oldValue) throws CleanerException {
38
		try {
39
			log.info("Executing groovy closure on value " + oldValue);
40
			return closure.call(oldValue);
41
		} catch (Exception e) {
42
			log.error("Failed Groovy execution, groovyRule: " + groovyRule + ", input: " + oldValue, e);
43
			throw new CleanerException("Error executing groovy", e);
44
		}
45
	}
46

  
47
	@Override
48
	protected Map<String, String> verifyValue(final String value) throws CleanerException {
49
		return null;
50
	}
51

  
52
	@Override
53
	public String toString() {
54
		return "GROOVY: " + groovyRule;
55
	}
56
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/java/eu/dnetlib/data/utility/cleaner/XPATHCleaningRule.java
1
package eu.dnetlib.data.utility.cleaner;
2

  
3
import java.util.List;
4
import java.util.Map;
5

  
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.dom4j.Document;
9
import org.dom4j.Node;
10
import org.springframework.beans.factory.annotation.Required;
11

  
12
import com.google.common.collect.Lists;
13

  
14
import eu.dnetlib.data.utility.cleaner.rmi.CleanerException;
15

  
16
public abstract class XPATHCleaningRule {
17

  
18
	private String xpath;
19
	private boolean strict = false;
20

  
21
	private static final Log logCleaningRules = LogFactory.getLog("VOCABULARY_RULES");
22

  
23
	public List<Map<String, String>> applyXpathRule(final Document doc) throws CleanerException {
24
		final List<Map<String, String>> errors = Lists.newArrayList();
25

  
26
		final String id = doc.valueOf("//*[local-name()='objIdentifier']");
27

  
28
		for (Object o : doc.selectNodes(xpath)) {
29
			final Node node = (Node) o;
30

  
31
			final String oldValue = node.getText().trim();
32

  
33
			final String newValue = calculateNewValue(oldValue);
34
			if (strict) {
35
				final Map<String, String> err = verifyValue(newValue);
36
				if (err != null) {
37
					errors.add(err);
38

  
39
					if (logCleaningRules.isInfoEnabled()) {
40
						logCleaningRules.info("[" + newValue + "] is INVALID, " + "RULE: " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: "
41
								+ this.getXpath());
42
					}
43
				}
44
			}
45

  
46
			if (logCleaningRules.isInfoEnabled() && !newValue.equals(oldValue)) {
47
				logCleaningRules.info("[" + oldValue + "] => [" + newValue + "], " + toString() + ", " + "RECORD: " + id + ", " + "XPATH: " + this.getXpath());
48
			}
49

  
50
			node.setText(newValue);
51
		}
52

  
53
		return errors;
54
	}
55

  
56
	protected abstract Map<String, String> verifyValue(final String value) throws CleanerException;
57

  
58
	protected abstract String calculateNewValue(final String oldValue) throws CleanerException;
59

  
60
	public String getXpath() {
61
		return xpath;
62
	}
63

  
64
	@Required
65
	public void setXpath(final String xpath) {
66
		this.xpath = xpath;
67
	}
68

  
69
	public boolean isStrict() {
70
		return strict;
71
	}
72

  
73
	public void setStrict(final boolean strict) {
74
		this.strict = strict;
75
	}
76

  
77
}
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/resources/eu/dnetlib/enabling/views/inspector/cleaner.st
1
$inspector/master(it={
2

  
3
<style type="text/css">
4
  #results {
5
    width: 100%;
6
  }
7

  
8
  #results td:first-child {
9
   width: 2em;
10
  }
11

  
12
  #results td {
13
    border: 1px solid #cecece;
14
  }
15
</style>
16

  
17
<h2>Browse indices</h2>
18

  
19
<form method="POST">
20
Cleaner rules:
21
<select name="rule">
22
$rules:{<option $if(it.selected)$selected$endif$>$it.value$</option>}$
23
</select><br /><br />
24

  
25
Dirty Record:<br />
26
<textarea name="dirtyText" cols="80" rows="10">$dirtyText$</textarea>
27
<br /><br />
28
<input type="submit" value="submit"/>
29
</form>
30
<br />
31

  
32
Cleaned Record:<br />
33
<textarea readonly="readonly" cols="80" rows="10">$cleanedText$</textarea>
34

  
35

  
36
})$
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/resources/eu/dnetlib/data/utility/cleaner/inspector/webContext-cleaner-inspector.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns="http://www.springframework.org/schema/beans"
3
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4
	xmlns:p="http://www.springframework.org/schema/p"
5
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
6

  
7
	<bean id="cleanerInspectorGroup"
8
		class="eu.dnetlib.enabling.inspector.StaticEntryPointDescriptorGroup"
9
		p:name="cleaner">
10
		<property name="descriptors">
11
			<list>
12
				<bean class="eu.dnetlib.enabling.inspector.StaticEntryPointDescriptor"
13
					p:name="cleaner" p:relativeUrl="cleaner.do"
14
					p:hiddenAsDefault="true"/>
15
			</list>
16
		</property>
17
	</bean>
18

  
19
</beans>
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/resources/eu/dnetlib/data/utility/cleaner/applicationContext-cleaner.properties
1
service.cleaner.mapped.resultset.factory=mappedResultSetFactory
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/src/main/resources/eu/dnetlib/data/utility/cleaner/applicationContext-cleaner.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns="http://www.springframework.org/schema/beans"
3
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:jaxws="http://cxf.apache.org/jaxws"
4
	xmlns:sec="http://cxf.apache.org/configuration/security" xmlns:wsa="http://cxf.apache.org/ws/addressing"
5
	xmlns:p="http://www.springframework.org/schema/p" xmlns:http="http://cxf.apache.org/transports/http/configuration"
6
	xmlns:t="http://dnetlib.eu/springbeans/t" xmlns:template="http://dnetlib.eu/springbeans/template"
7
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
8
                                    http://cxf.apache.org/ws/addressing http://cxf.apache.org/schemas/ws-addr-conf.xsd
9
                                    http://cxf.apache.org/configuration/security http://cxf.apache.org/schemas/configuration/security.xsd
10
                                    http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd
11
                            http://cxf.apache.org/jaxws http://cxf.apache.org/schemas/jaxws.xsd
12
                            http://dnetlib.eu/springbeans/template http://dnetlib.eu/springbeans/template.xsd">
13

  
14
	<!-- beans -->
15
	<bean id="cleanerService" 
16
		class="eu.dnetlib.data.utility.cleaner.CleanerServiceImpl"
17
		init-method="start" destroy-method="stop" p:cleaningRuleFactory-ref="cleaningRuleFactory"
18
		p:mappedResultSetFactory-ref="${service.cleaner.mapped.resultset.factory}" />
19
	
20
	<bean id="cleaningRuleFactory"
21
		class="eu.dnetlib.data.utility.cleaner.CleaningRuleFactory" 
22
		p:serviceLocator-ref="uniqueServiceLocator">
23
	</bean>
24
	
25
	<!-- endpoints -->
26
	<jaxws:endpoint id="cleanerServiceEndpoint" implementor="#cleanerService"
27
		implementorClass="eu.dnetlib.data.utility.cleaner.rmi.CleanerService"
28
		address="/cleaner" />
29
		
30
	<template:instance name="serviceRegistrationManager"
31
		t:serviceRegistrationManagerClass="eu.dnetlib.enabling.tools.registration.ValidatingServiceRegistrationManagerImpl"
32
		t:name="cleanerServiceRegistrationManager" t:service="cleanerService"
33
		t:endpoint="cleanerServiceEndpoint" t:jobScheduler="jobScheduler"
34
		t:serviceRegistrator="blackboardServiceRegistrator" />
35
		
36
</beans>
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/.springBeans
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beansProjectDescription>
3
	<version>1</version>
4
	<pluginVersion><![CDATA[2.2.8.200911091054-RELEASE]]></pluginVersion>
5
	<configSuffixes>
6
		<configSuffix><![CDATA[xml]]></configSuffix>
7
	</configSuffixes>
8
	<enableImports><![CDATA[false]]></enableImports>
9
	<configs>
10
		<config>src/main/eu/dnetlib/data/utility/cleaner/applicationContext-cleaner.xml</config>
11
	</configs>
12
	<configSets>
13
	</configSets>
14
</beansProjectDescription>
modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
	</parent>
8
	<modelVersion>4.0.0</modelVersion>
9
	<groupId>eu.dnetlib</groupId>
10
	<artifactId>cnr-data-utility-cleaner-service</artifactId>
11
	<packaging>jar</packaging>
12
	<version>3.0.1</version>
13
	<scm>
14
	  <developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/cnr-data-utility-cleaner-service/tags/cnr-data-utility-cleaner-service-3.0.1</developerConnection>
15
	</scm>
16
	<dependencies>
17
		<dependency>
18
			<groupId>org.mockito</groupId>
19
			<artifactId>mockito-core</artifactId>
20
			<version>1.6</version>
21
			<scope>test</scope>
22
		</dependency>
23
		<dependency>
24
			<groupId>eu.dnetlib</groupId>
25
			<artifactId>cnr-test-utils</artifactId>
26
			<version>[1.0.0,2.0.0)</version>
27
			<scope>test</scope>
28
		</dependency>
29
		<dependency>
30
			<groupId>eu.dnetlib</groupId>
31
			<artifactId>cnr-rmi-api</artifactId>
32
			<version>[2.0.0,3.0.0)</version>
33
		</dependency>
34
		<dependency>
35
			<groupId>eu.dnetlib</groupId>
36
			<artifactId>cnr-service-common</artifactId>
37
			<version>[2.1.2,3.0.0)</version>
38
		</dependency>
39
		<dependency>
40
			<groupId>eu.dnetlib</groupId>
41
			<artifactId>cnr-resultset-service</artifactId>
42
			<version>[2.0.0,3.0.0)</version>
43
		</dependency>
44
		<dependency>
45
			<groupId>com.google.guava</groupId>
46
			<artifactId>guava</artifactId>
47
			<version>${google.guava.version}</version>
48
		</dependency>
49
		<dependency>
50
			<groupId>org.codehaus.groovy</groupId>
51
			<artifactId>groovy</artifactId>
52
			<version>2.1.6</version>
53
		</dependency>
54
		<dependency>
55
			<groupId>eu.dnetlib</groupId>
56
			<artifactId>cnr-inspector</artifactId>
57
			<version>[1.0.0,2.0.0)</version>
58
		</dependency>
59
		<dependency>
60
			<groupId>eu.dnetlib</groupId>
61
			<artifactId>cnr-data-utility-cleaner-rmi</artifactId>
62
			<version>[2.0.0,3.0.0)</version>
63
		</dependency>		
64
		<dependency>
65
			<groupId>junit</groupId>
66
			<artifactId>junit</artifactId>
67
			<version>${junit.version}</version>
68
			<scope>test</scope>
69
		</dependency>
70
	</dependencies>
71
</project>

Also available in: Unified diff