Project

General

Profile

« Previous | Next » 

Revision 49243

View differences:

modules/dnet-isti/trunk/src/test/java/eu/dnetlib/data/mdstore/plugins/EnrichLabsPluginTest.java
1 1
package eu.dnetlib.data.mdstore.plugins;
2 2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertFalse;
3 5
import static org.junit.Assert.assertTrue;
4 6

  
5 7
import java.util.Arrays;
......
10 12
import org.dom4j.io.SAXReader;
11 13
import org.junit.Test;
12 14

  
15
import com.google.common.collect.Lists;
16

  
13 17
import eu.dnetlib.data.mdstore.plugins.objects.CnrAuthor;
14 18

  
15 19
public class EnrichLabsPluginTest {
......
80 84
	}
81 85

  
82 86
	@Test
87
	public void testVerifyMatch_10() {
88
		final List<String> s1 = Arrays.asList("artini", "michele");
89
		final List<String> s2 = Arrays.asList("artini", "mauro");
90
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
91
	}
92

  
93
	@Test
94
	public void testVerifyMatch_11() {
95
		final List<String> s1 = Arrays.asList("artini", "michele");
96
		final List<String> s2 = Arrays.asList("artini", "c");
97
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
98
	}
99

  
100
	@Test
101
	public void testVerifyMatch_12() {
102
		final List<String> s1 = Arrays.asList("artini", "m");
103
		final List<String> s2 = Arrays.asList("artini", "c");
104
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
105
	}
106

  
107
	@Test
108
	public void testVerifyMatch_13() {
109
		final List<String> s1 = Arrays.asList("artini", "michele");
110
		final List<String> s2 = Arrays.asList("donatella", "castelli");
111
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
112
	}
113

  
114
	@Test
115
	public void testVerifyMatch_14() {
116
		final List<String> s1 = Arrays.asList("artini", "michele");
117
		final List<String> s2 = Arrays.asList("de", "bonis", "michele");
118
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
119
	}
120

  
121
	@Test
122
	public void testVerifyMatch_15() {
123
		final List<String> s1 = Arrays.asList("artini", "michele");
124
		final List<String> s2 = Arrays.asList("de", "bonis", "m");
125
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
126
	}
127

  
128
	@Test
129
	public void testVerifyMatch_16() {
130
		final List<String> s1 = Arrays.asList("artini", "m");
131
		final List<String> s2 = Arrays.asList("de", "bonis", "m");
132
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
133
	}
134

  
135
	@Test
136
	public void testVerifyMatch_17() {
137
		final List<String> s1 = Arrays.asList("artini", "m");
138
		final List<String> s2 = Arrays.asList("manghi", "p");
139
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
140
	}
141

  
142
	@Test
143
	public void testVerifyMatch_18() {
144
		final List<String> s1 = Arrays.asList("artini", "michele");
145
		final List<String> s2 = Arrays.asList("manghi", "m");
146
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
147
	}
148

  
149
	@Test
150
	public void testVerifyMatch_19() {
151
		final List<String> s1 = Arrays.asList("artini", "m");
152
		final List<String> s2 = Arrays.asList("manghi", "paolo");
153
		assertFalse(plugin.verifyMatch(s1, s2) || plugin.verifyMatch(s2, s1));
154
	}
155

  
156
	@Test
157
	public void testCleanValue_1() {
158
		final List<String> list = Lists.newArrayList(plugin.cleanValue("Spano L.  D."));
159

  
160
		assertEquals(list.size(), 3);
161
		assertEquals(list.get(0), "spano");
162
		assertEquals(list.get(1), "l");
163
		assertEquals(list.get(2), "d");
164
	}
165

  
166
	@Test
167
	public void testCleanValue_2() {
168
		final List<String> list = Lists.newArrayList(plugin.cleanValue(" "));
169
		assertEquals(list.size(), 0);
170
	}
171

  
172
	@Test
83 173
	public void testVerifyMatch_record() throws Exception {
84 174
		final Document doc = new SAXReader().read(getClass().getResourceAsStream("record.xml"));
85 175

  
86 176
		@SuppressWarnings("unchecked")
87 177
		final List<Element> creatorNodes = doc.selectNodes("//*[local-name() = 'creator']");
88 178

  
179
		System.out.println("NODES: " + creatorNodes.size());
180

  
89 181
		for (final CnrAuthor cnrAuthor : plugin.listCnrAuthors(doc)) {
90 182
			final Element node = plugin.findNodeCreator(creatorNodes, cnrAuthor.getName(), cnrAuthor.getSurname());
91 183
			if (node != null) {
92
				System.out.println("Found");
184
				final String a1 = node.valueOf(".//creatorName");
185
				final String a2 = cnrAuthor.getFullname();
186
				System.out.println(a1 + " <=> " + a2);
93 187
			} else {
188
				System.out.println("CNR Creator not found " + cnrAuthor);
94 189
				throw new Exception("CNR Creator not found " + cnrAuthor);
95 190
			}
96 191
		}
modules/dnet-isti/trunk/src/main/java/eu/dnetlib/data/mdstore/plugins/EnrichLabsPlugin.java
21 21
import org.dom4j.Node;
22 22
import org.dom4j.io.SAXReader;
23 23

  
24
import com.google.common.base.Splitter;
24 25
import com.google.common.collect.Lists;
25 26
import com.mongodb.BasicDBObject;
26 27
import com.mongodb.DBObject;
......
163 164

  
164 165
	protected Element findNodeCreator(final List<Element> nodes, final String name, final String surname) {
165 166

  
166
		final List<String> s1 = cleanValue(name + " " + surname);
167
		final Iterable<String> s1 = cleanValue(name + " " + surname);
167 168

  
168 169
		final Optional<Element> res = nodes.stream()
169 170
				.filter(n -> {
170
					final List<String> s2 = cleanValue(n.valueOf("./*[local-name() = 'creatorName']"));
171
					final Iterable<String> s2 = cleanValue(n.valueOf("./*[local-name() = 'creatorName']"));
171 172
					return verifyMatch(s1, s2) || verifyMatch(s2, s1);
172 173
				})
173 174
				.findFirst();
......
180 181
		return null;
181 182
	}
182 183

  
183
	protected boolean verifyMatch(final List<String> s1, final List<String> s2) {
184
	protected boolean verifyMatch(final Iterable<String> s1, final Iterable<String> s2) {
185

  
184 186
		final ArrayList<String> cs1 = new ArrayList<>();
185
		final ArrayList<String> cs2 = new ArrayList<>(s2);
186

  
187
		final ArrayList<String> cs2 = Lists.newArrayList(s2);
188
		final int start = cs2.size();
187 189
		for (final String s : s1) {
188 190
			if (!cs2.remove(s)) {
189 191
				cs1.add(s);
......
192 194
		for (final String s : cs1) {
193 195
			cs2.remove(s.substring(0, 1));
194 196
		}
195
		return (s2.size() - cs2.size()) <= 2;
197
		return (start - cs2.size()) >= 2;
196 198
	}
197 199

  
198
	private List<String> cleanValue(final String s) {
199
		return Lists.newArrayList(s.toLowerCase()
200
				.trim()
200
	protected Iterable<String> cleanValue(final String s) {
201
		return Splitter.on(" ").omitEmptyStrings().trimResults().split(s.toLowerCase()
201 202
				.replaceAll("[àáâaäææãā]", "a")
202 203
				.replaceAll("[èéêëēėę]", "e")
203 204
				.replaceAll("[îïíīįì]", "i")
204 205
				.replaceAll("[ôöòóœøōõ]", "o")
205 206
				.replaceAll("[ûüùúū]", "u")
206
				.replaceAll("[^a-z]", "")
207
				.split(" "));
207
				.replaceAll("[^a-z\\s]", ""));
208 208
	}
209 209

  
210 210
	private DataRange findDataRange(final int from, final int to) {

Also available in: Unified diff