Revision 37202
Added by Claudio Atzori over 9 years ago
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-pace-core/trunk/", "deploy_repository": "dnet4-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", "name": "dnet-pace-core"} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Map; |
|
4 |
|
|
5 |
import org.junit.Before; |
|
6 |
import org.junit.Test; |
|
7 |
|
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import com.google.common.collect.Maps; |
|
10 |
|
|
11 |
import eu.dnetlib.pace.AbstractPaceTest; |
|
12 |
|
|
13 |
public class ClusteringFunctionTest extends AbstractPaceTest { |
|
14 |
|
|
15 |
private Map<String, Integer> params; |
|
16 |
|
|
17 |
@Before |
|
18 |
public void setUp() throws Exception { |
|
19 |
params = Maps.newHashMap(); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testNgram() { |
|
24 |
params.put("ngramLen", 2); |
|
25 |
params.put("max", 8); |
|
26 |
params.put("maxPerToken", 1); |
|
27 |
params.put("minNgramLen", 3); |
|
28 |
|
|
29 |
final ClusteringFunction ngram = new Ngrams(params); |
|
30 |
|
|
31 |
final String s = "Search for the Standard Model Higgs Boson"; |
|
32 |
System.out.println(s); |
|
33 |
System.out.println(ngram.apply(Lists.newArrayList(title(s)))); |
|
34 |
} |
|
35 |
|
|
36 |
@Test |
|
37 |
public void testNgramPairs() { |
|
38 |
params.put("ngramLen", 2); |
|
39 |
params.put("max", 4); |
|
40 |
|
|
41 |
final ClusteringFunction np = new NgramPairs(params); |
|
42 |
|
|
43 |
final String s = "Search for the Standard Model Higgs Boson"; |
|
44 |
System.out.println(s); |
|
45 |
System.out.println(np.apply(Lists.newArrayList(title(s)))); |
|
46 |
} |
|
47 |
|
|
48 |
@Test |
|
49 |
public void testAcronym() { |
|
50 |
params.put("max", 4); |
|
51 |
params.put("minLen", 1); |
|
52 |
params.put("maxLen", 3); |
|
53 |
|
|
54 |
final ClusteringFunction acro = new Acronyms(params); |
|
55 |
|
|
56 |
final String s = "Search for the Standard Model Higgs Boson"; |
|
57 |
System.out.println(s); |
|
58 |
System.out.println(acro.apply(Lists.newArrayList(title(s)))); |
|
59 |
} |
|
60 |
|
|
61 |
@Test |
|
62 |
public void testSuffixPrefix() { |
|
63 |
params.put("len", 2); |
|
64 |
params.put("max", 3); |
|
65 |
|
|
66 |
final ClusteringFunction sp = new SuffixPrefix(params); |
|
67 |
|
|
68 |
final String s = "Search for the Standard Model Higgs Boson"; |
|
69 |
System.out.println(s); |
|
70 |
System.out.println(sp.apply(Lists.newArrayList(title(s)))); |
|
71 |
} |
|
72 |
|
|
73 |
@Test |
|
74 |
public void testFieldValue() { |
|
75 |
final ClusteringFunction sp = new SpaceTrimmingFieldValue(params); |
|
76 |
|
|
77 |
final String s = "Search for the Standard Model Higgs Boson"; |
|
78 |
System.out.println(s); |
|
79 |
System.out.println(sp.apply(Lists.newArrayList(title(s)))); |
|
80 |
} |
|
81 |
|
|
82 |
} |
|
0 | 83 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/java/eu/dnetlib/pace/model/PersonComparatorUtilsSimilarityTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertTrue; |
|
5 |
|
|
6 |
import org.junit.Test; |
|
7 |
|
|
8 |
public class PersonComparatorUtilsSimilarityTest { |
|
9 |
|
|
10 |
@Test |
|
11 |
public void testSimilarity_0() { |
|
12 |
assertTrue(PersonComparatorUtils.areSimilar("Artini Michele", "Michele Artini")); |
|
13 |
} |
|
14 |
|
|
15 |
@Test |
|
16 |
public void testSimilarity_1() { |
|
17 |
assertTrue(PersonComparatorUtils.areSimilar("ARTINI Michele", "Artini, Michele")); |
|
18 |
} |
|
19 |
|
|
20 |
@Test |
|
21 |
public void testSimilarity_2() { |
|
22 |
assertTrue(PersonComparatorUtils.areSimilar("Artini, M.", "Artini Michele")); |
|
23 |
} |
|
24 |
|
|
25 |
@Test |
|
26 |
public void testSimilarity_3() { |
|
27 |
assertTrue(PersonComparatorUtils.areSimilar("Artini, M.G.", "Artini, Michele")); |
|
28 |
} |
|
29 |
|
|
30 |
@Test |
|
31 |
public void testSimilarity_4() { |
|
32 |
assertTrue(PersonComparatorUtils.areSimilar("Artini, M.", "Artini, M.G.")); |
|
33 |
} |
|
34 |
|
|
35 |
@Test |
|
36 |
public void testSimilarity_5() { |
|
37 |
assertTrue(PersonComparatorUtils.areSimilar("Artini, M. (sig.)", "Artini, Michele")); |
|
38 |
} |
|
39 |
|
|
40 |
@Test |
|
41 |
public void testSimilarity_6() { |
|
42 |
assertFalse(PersonComparatorUtils.areSimilar("Artini, M.", "Artini, G.")); |
|
43 |
} |
|
44 |
|
|
45 |
@Test |
|
46 |
public void testSimilarity_7() { |
|
47 |
assertFalse(PersonComparatorUtils.areSimilar("Artini, M.G.", "Artini, M.A.")); |
|
48 |
} |
|
49 |
|
|
50 |
@Test |
|
51 |
public void testSimilarity_8() { |
|
52 |
assertFalse(PersonComparatorUtils.areSimilar("Artini, M.", "Artini, Giuseppe")); |
|
53 |
} |
|
54 |
|
|
55 |
@Test |
|
56 |
public void testSimilarity_9() { |
|
57 |
assertFalse(PersonComparatorUtils.areSimilar("Manghi, Paolo", "Artini, Michele")); |
|
58 |
} |
|
59 |
|
|
60 |
@Test |
|
61 |
public void testSimilarity_10() { |
|
62 |
assertTrue(PersonComparatorUtils.areSimilar("Artini, Michele", "Artini, Michele Giovanni")); |
|
63 |
} |
|
64 |
|
|
65 |
@Test |
|
66 |
public void testSimilarity_11() { |
|
67 |
assertFalse(PersonComparatorUtils.areSimilar("Artini, M.A.G.", "Artini, M.B.G.")); |
|
68 |
} |
|
69 |
|
|
70 |
@Test |
|
71 |
public void testSimilarity_12() { |
|
72 |
assertFalse(PersonComparatorUtils.areSimilar("Artini Manghi, M.", "Artini, Michele")); |
|
73 |
} |
|
74 |
|
|
75 |
@Test |
|
76 |
public void testSimilarity_13() { |
|
77 |
assertTrue(PersonComparatorUtils.areSimilar("Artini Manghi, M.", "Artini Manghi Michele")); |
|
78 |
} |
|
79 |
|
|
80 |
@Test |
|
81 |
public void testSimilarity_14() { |
|
82 |
assertFalse(PersonComparatorUtils.areSimilar("Artini, Michele", "Michele, Artini")); |
|
83 |
} |
|
84 |
|
|
85 |
@Test |
|
86 |
public void testSimilarity_15() { |
|
87 |
assertTrue(PersonComparatorUtils.areSimilar("Artini, M.", "Michele ARTINI")); |
|
88 |
} |
|
89 |
} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/java/eu/dnetlib/pace/model/PersonTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
|
|
5 |
import java.text.Normalizer; |
|
6 |
import java.util.Queue; |
|
7 |
|
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import com.google.common.collect.Lists; |
|
11 |
|
|
12 |
public class PersonTest { |
|
13 |
|
|
14 |
@Test |
|
15 |
public void test_1() { |
|
16 |
check("Atzori, Claudio", "Atzori, Claudio"); |
|
17 |
} |
|
18 |
|
|
19 |
@Test |
|
20 |
public void test_2() { |
|
21 |
check("Atzori, Claudio A.", "Atzori, Claudio A."); |
|
22 |
} |
|
23 |
|
|
24 |
@Test |
|
25 |
public void test_3() { |
|
26 |
check("Claudio ATZORI", "Atzori, Claudio"); |
|
27 |
} |
|
28 |
|
|
29 |
@Test |
|
30 |
public void test_4() { |
|
31 |
check("ATZORI, Claudio", "Atzori, Claudio"); |
|
32 |
} |
|
33 |
|
|
34 |
@Test |
|
35 |
public void test_5() { |
|
36 |
check("Claudio Atzori", "Claudio Atzori"); |
|
37 |
} |
|
38 |
|
|
39 |
@Test |
|
40 |
public void test_6() { |
|
41 |
check(" Manghi , Paolo", "Manghi, Paolo"); |
|
42 |
} |
|
43 |
|
|
44 |
@Test |
|
45 |
public void test_7() { |
|
46 |
check("ATZORI, CLAUDIO", "Atzori, Claudio"); |
|
47 |
} |
|
48 |
|
|
49 |
@Test |
|
50 |
public void test_8() { |
|
51 |
check("ATZORI, CLAUDIO A", "Atzori, Claudio A."); |
|
52 |
} |
|
53 |
|
|
54 |
@Test |
|
55 |
public void test_9() { |
|
56 |
check("Bølviken, B.", "Bølviken, B."); |
|
57 |
} |
|
58 |
|
|
59 |
@Test |
|
60 |
public void test_10() { |
|
61 |
check("Bñlviken, B.", "B" + Normalizer.normalize("ñ", Normalizer.Form.NFD) + "lviken, B."); |
|
62 |
} |
|
63 |
|
|
64 |
@Test |
|
65 |
public void test_11() { |
|
66 |
check("aáeéiíoóöőuúüű AÁEÉIÍOÓÖŐUÚÜŰ ø", "Aaeeiioooouuuu, Aaeeiioooouuuu Ø.", true); |
|
67 |
} |
|
68 |
|
|
69 |
@Test |
|
70 |
public void test_12() { |
|
71 |
check("aáeéiíoóöőuúüű AÁEÉIÍOÓÖŐUÚÜŰz ø", Normalizer.normalize("aáeéiíoóöőuúüű AÁEÉIÍOÓÖŐUÚÜŰz ø", Normalizer.Form.NFD), false); |
|
72 |
} |
|
73 |
|
|
74 |
@Test |
|
75 |
public void test_13() { |
|
76 |
check("Tkačíková, Daniela", Normalizer.normalize("Tkačíková, Daniela", Normalizer.Form.NFD), false); |
|
77 |
} |
|
78 |
|
|
79 |
@Test |
|
80 |
public void test_hashes() { |
|
81 |
checkHash(" Claudio ATZORI ", "ATZORI Claudio", "Atzori , Claudio", "ATZORI, Claudio"); |
|
82 |
} |
|
83 |
|
|
84 |
private void checkHash(String... ss) { |
|
85 |
Queue<String> q = Lists.newLinkedList(Lists.newArrayList(ss)); |
|
86 |
String h1 = new Person(q.remove(), false).hash(); |
|
87 |
while (!q.isEmpty()) { |
|
88 |
assertEquals(h1, new Person(q.remove(), false).hash()); |
|
89 |
} |
|
90 |
} |
|
91 |
|
|
92 |
private void check(String s, String expectedFullName) { |
|
93 |
check(s, expectedFullName, false); |
|
94 |
} |
|
95 |
|
|
96 |
private void check(String s, String expectedFullName, boolean aggressive) { |
|
97 |
Person p = new Person(s, aggressive); |
|
98 |
|
|
99 |
System.out.println("original: " + p.getOriginal()); |
|
100 |
System.out.println("accurate: " + p.isAccurate()); |
|
101 |
System.out.println("normalised: '" + p.getNormalisedFullname() + "'"); |
|
102 |
if (p.isAccurate()) { |
|
103 |
System.out.println("name: " + p.getNormalisedFirstName()); |
|
104 |
System.out.println("surname: " + p.getNormalisedSurname()); |
|
105 |
} |
|
106 |
System.out.println("hash: " + p.hash()); |
|
107 |
System.out.println(""); |
|
108 |
assertEquals(expectedFullName, p.getNormalisedFullname()); |
|
109 |
} |
|
110 |
|
|
111 |
} |
|
0 | 112 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/java/eu/dnetlib/pace/model/PersonComparatorUtilsNGramsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertTrue; |
|
5 |
|
|
6 |
import java.util.Set; |
|
7 |
|
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
public class PersonComparatorUtilsNGramsTest { |
|
11 |
|
|
12 |
@Test |
|
13 |
public void testNormaizePerson_1() { |
|
14 |
verifyGetNgramsForPerson("Artini Michele", 2, "a_michele", "m_artini"); |
|
15 |
} |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testNormaizePerson_2() { |
|
19 |
verifyGetNgramsForPerson("Michele Artini", 2, "a_michele", "m_artini"); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testNormaizePerson_3() { |
|
24 |
verifyGetNgramsForPerson("Michele ARTINI", 1, "m_artini"); |
|
25 |
} |
|
26 |
|
|
27 |
@Test |
|
28 |
public void testNormaizePerson_4() { |
|
29 |
verifyGetNgramsForPerson("ARTINI Michele", 1, "m_artini"); |
|
30 |
} |
|
31 |
|
|
32 |
@Test |
|
33 |
public void testNormaizePerson_5() { |
|
34 |
verifyGetNgramsForPerson("Michele G. Artini", 2, "m_artini", "g_artini"); |
|
35 |
} |
|
36 |
|
|
37 |
@Test |
|
38 |
public void testNormaizePerson_6() { |
|
39 |
verifyGetNgramsForPerson(" Artini, Michele ", 1, "m_artini"); |
|
40 |
} |
|
41 |
|
|
42 |
@Test |
|
43 |
public void testNormaizePerson_7() { |
|
44 |
verifyGetNgramsForPerson("Artini, Michele (sig.)", 1, "m_artini"); |
|
45 |
} |
|
46 |
|
|
47 |
@Test |
|
48 |
public void testNormaizePerson_8() { |
|
49 |
verifyGetNgramsForPerson("Artini Michele [sig.] ", 2, "a_michele", "m_artini"); |
|
50 |
} |
|
51 |
|
|
52 |
@Test |
|
53 |
public void testNormaizePerson_9() { |
|
54 |
verifyGetNgramsForPerson("Artini, M", 1, "m_artini"); |
|
55 |
} |
|
56 |
|
|
57 |
@Test |
|
58 |
public void testNormaizePerson_10() { |
|
59 |
verifyGetNgramsForPerson("Artini, M.", 1, "m_artini"); |
|
60 |
} |
|
61 |
|
|
62 |
@Test |
|
63 |
public void testNormaizePerson_11() { |
|
64 |
verifyGetNgramsForPerson("Artini, M. (sig.)", 1, "m_artini"); |
|
65 |
} |
|
66 |
|
|
67 |
@Test |
|
68 |
public void testNormaizePerson_12() { |
|
69 |
verifyGetNgramsForPerson("Artini, M[sig.] ", 1, "m_artini"); |
|
70 |
} |
|
71 |
|
|
72 |
@Test |
|
73 |
public void testNormaizePerson_13() { |
|
74 |
verifyGetNgramsForPerson("Artini-SIG, Michele ", 1, "m_artini-sig"); |
|
75 |
} |
|
76 |
|
|
77 |
@Test |
|
78 |
public void testNormaizePerson_14() { |
|
79 |
verifyGetNgramsForPerson("Artini - SIG, Michele ", 1, "m_artini-sig"); |
|
80 |
} |
|
81 |
|
|
82 |
@Test |
|
83 |
public void testNormaizePerson_15() { |
|
84 |
verifyGetNgramsForPerson("Artini {sig.}, M", 1, "m_artini"); |
|
85 |
} |
|
86 |
|
|
87 |
@Test |
|
88 |
public void testNormaizePerson_16() { |
|
89 |
verifyGetNgramsForPerson("Artini, M., sig.", 1, "m_artini"); |
|
90 |
} |
|
91 |
|
|
92 |
@Test |
|
93 |
public void testNormaizePerson_17() { |
|
94 |
verifyGetNgramsForPerson("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA, BBBBBBBBBBBBBBBBBBBBBBBBBBBBB CCCCCCCCCCCCCCCCCCCC", 0); |
|
95 |
} |
|
96 |
|
|
97 |
@Test |
|
98 |
public void testNormaizePerson_18() { |
|
99 |
verifyGetNgramsForPerson("Dell'amico, Andrea", 1, "a_amico"); |
|
100 |
} |
|
101 |
|
|
102 |
@Test |
|
103 |
public void testNormaizePerson_19() { |
|
104 |
verifyGetNgramsForPerson("Smith, Paul van der", 1, "p_smith"); |
|
105 |
} |
|
106 |
|
|
107 |
@Test |
|
108 |
public void testNormaizePerson_20() { |
|
109 |
verifyGetNgramsForPerson("AAAAAAA, BBBB, CCCC, DDDD, EEEE", 1, "b_aaaaaaa"); |
|
110 |
} |
|
111 |
|
|
112 |
@Test |
|
113 |
public void testNormaizePerson_21() { |
|
114 |
verifyGetNgramsForPerson("Kompetenzzentrum Informelle Bildung (KIB),", 6); |
|
115 |
} |
|
116 |
|
|
117 |
private void verifyGetNgramsForPerson(String name, int expectedSize, String... expectedTokens) { |
|
118 |
Set<String> list = PersonComparatorUtils.getNgramsForPerson(name); |
|
119 |
System.out.println(list); |
|
120 |
assertEquals(expectedSize, list.size()); |
|
121 |
for (String s : expectedTokens) { |
|
122 |
assertTrue(list.contains(s)); |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/java/eu/dnetlib/pace/config/ConfigTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.config; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertNotNull; |
|
4 |
|
|
5 |
import java.io.IOException; |
|
6 |
|
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import eu.dnetlib.pace.AbstractPaceTest; |
|
10 |
|
|
11 |
public class ConfigTest extends AbstractPaceTest { |
|
12 |
|
|
13 |
@Test |
|
14 |
public void test() throws IOException { |
|
15 |
final DedupConfig cfg = DedupConfig.load(readFromClasspath("result.pace.conf.json")); |
|
16 |
|
|
17 |
assertNotNull(cfg); |
|
18 |
|
|
19 |
System.out.println(cfg); |
|
20 |
} |
|
21 |
|
|
22 |
} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/java/eu/dnetlib/pace/distance/DistanceAlgoTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.distance; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.pace.common.AbstractPaceFunctions; |
|
7 |
|
|
8 |
public class DistanceAlgoTest extends AbstractPaceFunctions { |
|
9 |
|
|
10 |
private final static String TEST_STRING = "Toshiba NB550D: è un netbook su piattaforma AMD Fusion⁽¹²⁾."; |
|
11 |
|
|
12 |
@Before |
|
13 |
public void setup() { |
|
14 |
System.out.println("****************************************************************"); |
|
15 |
System.out.println("Test String : " + TEST_STRING); |
|
16 |
} |
|
17 |
|
|
18 |
@Test |
|
19 |
public void testGetNumbers() { |
|
20 |
System.out.println("Numbers : " + getNumbers(TEST_STRING)); |
|
21 |
} |
|
22 |
|
|
23 |
@Test |
|
24 |
public void testRemoveSymbols() { |
|
25 |
System.out.println("Without symbols: " + removeSymbols(TEST_STRING)); |
|
26 |
} |
|
27 |
|
|
28 |
@Test |
|
29 |
public void testFixAliases() { |
|
30 |
System.out.println("Fixed aliases : " + fixAliases(TEST_STRING)); |
|
31 |
} |
|
32 |
|
|
33 |
@Test |
|
34 |
public void testCleanup() { |
|
35 |
System.out.println("cleaned up : " + cleanup(TEST_STRING)); |
|
36 |
} |
|
37 |
|
|
38 |
} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/java/eu/dnetlib/pace/AbstractPaceTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringWriter; |
|
5 |
|
|
6 |
import org.apache.commons.io.IOUtils; |
|
7 |
|
|
8 |
import eu.dnetlib.pace.config.Type; |
|
9 |
import eu.dnetlib.pace.model.Field; |
|
10 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
11 |
|
|
12 |
public abstract class AbstractPaceTest { |
|
13 |
|
|
14 |
protected String readFromClasspath(final String filename) { |
|
15 |
final StringWriter sw = new StringWriter(); |
|
16 |
try { |
|
17 |
IOUtils.copy(getClass().getResourceAsStream(filename), sw); |
|
18 |
return sw.toString(); |
|
19 |
} catch (final IOException e) { |
|
20 |
throw new RuntimeException("cannot load resource from classpath: " + filename); |
|
21 |
} |
|
22 |
} |
|
23 |
|
|
24 |
protected Field title(final String s) { |
|
25 |
return new FieldValueImpl(Type.String, "title", s); |
|
26 |
} |
|
27 |
|
|
28 |
} |
|
0 | 29 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/resources/eu/dnetlib/pace/config/result.pace.conf.json | ||
---|---|---|
1 |
{ |
|
2 |
"wf" : { |
|
3 |
"threshold" : "0.99", |
|
4 |
"run" : "001", |
|
5 |
"entityType" : "result", |
|
6 |
"orderField" : "title", |
|
7 |
"queueMaxSize" : "2000", |
|
8 |
"groupMaxSize" : "10", |
|
9 |
"slidingWindowSize" : "200", |
|
10 |
"rootBuilder" : [ "result" ], |
|
11 |
"includeChildren" : "true" |
|
12 |
}, |
|
13 |
"pace" : { |
|
14 |
"clustering" : [ |
|
15 |
{ "name" : "acronyms", "fields" : [ "title" ], "params" : { "max" : "1", "minLen" : "2", "maxLen" : "4"} }, |
|
16 |
{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} }, |
|
17 |
{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } } |
|
18 |
], |
|
19 |
"strictConditions" : [ |
|
20 |
{ "name" : "exactMatch", "fields" : [ "pid" ] } |
|
21 |
], |
|
22 |
"conditions" : [ |
|
23 |
{ "name" : "yearMatch", "fields" : [ "dateofacceptance" ] }, |
|
24 |
{ "name" : "titleVersionMatch", "fields" : [ "title" ] }, |
|
25 |
{ "name" : "sizeMatch", "fields" : [ "authors" ] } |
|
26 |
], |
|
27 |
"model" : [ |
|
28 |
{ "name" : "pid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid/value", "overrideMatch" : "true" }, |
|
29 |
{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title/value" }, |
|
30 |
{ "name" : "dateofacceptance", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/dateofacceptance/value" } , |
|
31 |
{ "name" : "authors", "algo" : "Null", "type" : "List", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/author/metadata/fullname/value" } |
|
32 |
], |
|
33 |
"blacklists" : { |
|
34 |
"title" : [ |
|
35 |
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$", |
|
36 |
"^(Kiri Karl Morgensternile).*$", |
|
37 |
"^(\\[Eksliibris Aleksandr).*\\]$", |
|
38 |
"^(\\[Eksliibris Aleksandr).*$", |
|
39 |
"^(Eksliibris Aleksandr).*$", |
|
40 |
"^(Kiri A\\. de Vignolles).*$", |
|
41 |
"^(2 kirja Karl Morgensternile).*$", |
|
42 |
"^(Pirita kloostri idaosa arheoloogilised).*$", |
|
43 |
"^(Kiri tundmatule).*$", |
|
44 |
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$", |
|
45 |
"^(Eksliibris Nikolai Birukovile).*$", |
|
46 |
"^(Eksliibris Nikolai Issakovile).*$", |
|
47 |
"^(WHP Cruise Summary Information of section).*$", |
|
48 |
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", |
|
49 |
"^(Measurement of the spin\\-dependent structure function).*" |
|
50 |
] } |
|
51 |
} |
|
52 |
|
|
53 |
} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/test/resources/eu/dnetlib/pace/config/title_blacklist.txt | ||
---|---|---|
1 |
^(Corpus Oral Dialectal \(COD\)\.).*$ |
|
2 |
^(Kiri Karl Morgensternile).*$ |
|
3 |
^(\[Eksliibris Aleksandr).*\]$ |
|
4 |
^(Kiri A\. de Vignolles).*$ |
|
5 |
^(2 kirja Karl Morgensternile).*$ |
|
6 |
^(Pirita kloostri idaosa arheoloogilised).*$ |
|
7 |
^(Kiri tundmatule).*$ |
|
8 |
^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$ |
|
9 |
^(Eksliibris Nikolai Birukovile).*$ |
|
10 |
^(Eksliibris Nikolai Issakovile).*$ |
|
11 |
^(\[Eksliibris Aleksandr).*$ |
|
12 |
^(WHP Cruise Summary Information of section).*$ |
|
13 |
^(Measurement of the top quark\-pair production cross section with ATLAS in pp collisions at).*$ |
|
14 |
^(Measurement of the spin\-dependent structure function).* |
|
15 |
^(lorem ipsum).* |
|
0 | 16 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/Clustering.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
public enum Clustering { |
|
4 |
acronyms, ngrams, ngrampairs, suffixprefix, spacetrimmingfieldvalue |
|
5 |
} |
|
0 | 6 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.Map; |
|
5 |
import java.util.Set; |
|
6 |
|
|
7 |
import com.google.common.collect.Sets; |
|
8 |
|
|
9 |
public class SuffixPrefix extends AbstractClusteringFunction { |
|
10 |
|
|
11 |
public SuffixPrefix(Map<String, Integer> params) { |
|
12 |
super(params); |
|
13 |
} |
|
14 |
|
|
15 |
@Override |
|
16 |
protected Collection<String> doApply(String s) { |
|
17 |
return suffixPrefix(s, param("len"), param("max")); |
|
18 |
} |
|
19 |
|
|
20 |
private Collection<String> suffixPrefix(String s, int len, int max) { |
|
21 |
final Set<String> bigrams = Sets.newLinkedHashSet(); |
|
22 |
int i = 0; |
|
23 |
while (++i < s.length() && bigrams.size() < max) { |
|
24 |
int j = s.indexOf(" ", i); |
|
25 |
|
|
26 |
int offset = j + len + 1 < s.length() ? j + len + 1 : s.length(); |
|
27 |
|
|
28 |
if (j - len > 0) { |
|
29 |
String bigram = s.substring(j - len, offset).replaceAll(" ", "").trim(); |
|
30 |
if (bigram.length() >= 4) { |
|
31 |
bigrams.add(bigram); |
|
32 |
} |
|
33 |
} |
|
34 |
} |
|
35 |
return bigrams; |
|
36 |
} |
|
37 |
|
|
38 |
} |
|
0 | 39 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
|
|
7 |
import com.google.common.collect.Lists; |
|
8 |
|
|
9 |
public class NgramPairs extends Ngrams { |
|
10 |
|
|
11 |
public NgramPairs(Map<String, Integer> params) { |
|
12 |
super(params); |
|
13 |
} |
|
14 |
|
|
15 |
@Override |
|
16 |
protected Collection<String> doApply(String s) { |
|
17 |
return ngramPairs(Lists.newArrayList(getNgrams(s, param("ngramLen"), param("max") * 2, 1, 2)), param("max")); |
|
18 |
} |
|
19 |
|
|
20 |
private Collection<String> ngramPairs(final List<String> ngrams, int maxNgrams) { |
|
21 |
Collection<String> res = Lists.newArrayList(); |
|
22 |
int j = 0; |
|
23 |
for (int i = 0; i < ngrams.size() && res.size() < maxNgrams; i++) { |
|
24 |
if (++j >= ngrams.size()) { |
|
25 |
break; |
|
26 |
} |
|
27 |
res.add(ngrams.get(i) + ngrams.get(j)); |
|
28 |
//System.out.println("-- " + concatNgrams); |
|
29 |
} |
|
30 |
return res; |
|
31 |
} |
|
32 |
|
|
33 |
} |
|
0 | 34 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
|
|
7 |
import org.apache.commons.lang.RandomStringUtils; |
|
8 |
import org.apache.commons.lang.StringUtils; |
|
9 |
|
|
10 |
import com.google.common.collect.Lists; |
|
11 |
|
|
12 |
public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { |
|
13 |
|
|
14 |
public SpaceTrimmingFieldValue(final Map<String, Integer> params) { |
|
15 |
super(params); |
|
16 |
} |
|
17 |
|
|
18 |
@Override |
|
19 |
protected Collection<String> doApply(final String s) { |
|
20 |
final List<String> res = Lists.newArrayList(); |
|
21 |
|
|
22 |
res.add(StringUtils.isBlank(s) ? RandomStringUtils.random(getParams().get("randomLength")) : s.toLowerCase().replaceAll("\\s+", "")); |
|
23 |
|
|
24 |
return res; |
|
25 |
} |
|
26 |
|
|
27 |
} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
|
|
7 |
import eu.dnetlib.pace.model.Field; |
|
8 |
|
|
9 |
public interface ClusteringFunction { |
|
10 |
|
|
11 |
public Collection<String> apply(List<Field> fields); |
|
12 |
|
|
13 |
public Map<String, Integer> getParams(); |
|
14 |
|
|
15 |
} |
|
0 | 16 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/NGramUtils.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Set; |
|
4 |
|
|
5 |
import org.apache.commons.lang.StringUtils; |
|
6 |
|
|
7 |
import eu.dnetlib.pace.common.AbstractPaceFunctions; |
|
8 |
|
|
9 |
public class NGramUtils extends AbstractPaceFunctions { |
|
10 |
|
|
11 |
private static final int SIZE = 100; |
|
12 |
|
|
13 |
private static Set<String> stopwords = AbstractPaceFunctions.loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); |
|
14 |
|
|
15 |
public static String cleanupForOrdering(String s) { |
|
16 |
NGramUtils utils = new NGramUtils(); |
|
17 |
return (utils.filterStopWords(utils.normalize(s), stopwords) + StringUtils.repeat(" ", SIZE)).substring(0, SIZE).replaceAll(" ", ""); |
|
18 |
} |
|
19 |
|
|
20 |
} |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
public class RandomClusteringFunction extends AbstractClusteringFunction { |
|
7 |
|
|
8 |
public RandomClusteringFunction(Map<String, Integer> params) { |
|
9 |
super(params); |
|
10 |
} |
|
11 |
|
|
12 |
@Override |
|
13 |
protected Collection<String> doApply(String s) { |
|
14 |
// TODO Auto-generated method stub |
|
15 |
return null; |
|
16 |
} |
|
17 |
|
|
18 |
} |
|
0 | 19 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombiner.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.Map.Entry; |
|
7 |
import java.util.Set; |
|
8 |
|
|
9 |
import com.google.common.collect.Iterables; |
|
10 |
import com.google.common.collect.Lists; |
|
11 |
import com.google.common.collect.Maps; |
|
12 |
|
|
13 |
import eu.dnetlib.pace.config.Config; |
|
14 |
import eu.dnetlib.pace.model.Document; |
|
15 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
16 |
import eu.dnetlib.pace.model.MapDocument; |
|
17 |
|
|
18 |
public class BlacklistAwareClusteringCombiner extends ClusteringCombiner { |
|
19 |
|
|
20 |
public static Collection<String> filterAndCombine(final MapDocument a, final Config conf, final Map<String, List<String>> blacklists) { |
|
21 |
|
|
22 |
final Document filtered = new BlacklistAwareClusteringCombiner().filter(a, blacklists); |
|
23 |
return combine(filtered, conf); |
|
24 |
} |
|
25 |
|
|
26 |
private MapDocument filter(final MapDocument a, final Map<String, List<String>> blacklists) { |
|
27 |
final Map<String, FieldListImpl> filtered = Maps.newHashMap(a.getFieldMap()); |
|
28 |
if (blacklists != null) { |
|
29 |
for (final Entry<String, FieldListImpl> e : filtered.entrySet()) { |
|
30 |
|
|
31 |
final FieldListImpl fl = new FieldListImpl(); |
|
32 |
fl.addAll(Lists.newArrayList(Iterables.filter(e.getValue(), new FieldFilter(e.getKey(), blacklists)))); |
|
33 |
filtered.put(e.getKey(), fl); |
|
34 |
} |
|
35 |
} |
|
36 |
return new MapDocument(a.getIdentifier(), filtered); |
|
37 |
} |
|
38 |
|
|
39 |
/** |
|
40 |
* Tries to match the fields in the regex blacklist. |
|
41 |
* |
|
42 |
* @param fieldName |
|
43 |
* @param value |
|
44 |
* @return true if the field matches, false otherwise |
|
45 |
*/ |
|
46 |
protected boolean regexMatches(final String fieldName, final String value, final Map<String, Set<String>> blacklists) { |
|
47 |
if (blacklists.containsKey(fieldName)) { |
|
48 |
for (final String regex : blacklists.get(fieldName)) { |
|
49 |
if (value.matches(regex)) return true; |
|
50 |
} |
|
51 |
} |
|
52 |
return false; |
|
53 |
} |
|
54 |
} |
|
0 | 55 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.LinkedHashSet; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.StringTokenizer; |
|
7 |
|
|
8 |
public class Ngrams extends AbstractClusteringFunction { |
|
9 |
|
|
10 |
public Ngrams(Map<String, Integer> params) { |
|
11 |
super(params); |
|
12 |
} |
|
13 |
|
|
14 |
@Override |
|
15 |
protected Collection<String> doApply(String s) { |
|
16 |
return getNgrams(s, param("ngramLen"), param("max"), param("maxPerToken"), param("minNgramLen")); |
|
17 |
} |
|
18 |
|
|
19 |
protected Collection<String> getNgrams(String s, int ngramLen, int max, int maxPerToken, int minNgramLen) { |
|
20 |
|
|
21 |
final Collection<String> ngrams = new LinkedHashSet<String>(); |
|
22 |
final StringTokenizer st = new StringTokenizer(s); |
|
23 |
|
|
24 |
while (st.hasMoreTokens()) { |
|
25 |
final String token = st.nextToken(); |
|
26 |
if (!token.isEmpty()) { |
|
27 |
|
|
28 |
for (int i = 0; i < maxPerToken && ngramLen + i <= token.length(); i++) { |
|
29 |
String ngram = (token + " ").substring(i, ngramLen + i).trim(); |
|
30 |
if (ngrams.size() >= max) { |
|
31 |
return ngrams; |
|
32 |
} |
|
33 |
if (ngram.length() >= minNgramLen) { |
|
34 |
ngrams.add(ngram); |
|
35 |
} |
|
36 |
} |
|
37 |
} |
|
38 |
} |
|
39 |
//System.out.println(ngrams + " n: " + ngrams.size()); |
|
40 |
return ngrams; |
|
41 |
} |
|
42 |
|
|
43 |
} |
|
0 | 44 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
|
|
7 |
import com.google.common.collect.Sets; |
|
8 |
|
|
9 |
import eu.dnetlib.pace.common.AbstractPaceFunctions; |
|
10 |
import eu.dnetlib.pace.model.Field; |
|
11 |
|
|
12 |
public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction { |
|
13 |
|
|
14 |
protected Map<String, Integer> params; |
|
15 |
|
|
16 |
public AbstractClusteringFunction(final Map<String, Integer> params) { |
|
17 |
this.params = params; |
|
18 |
} |
|
19 |
|
|
20 |
protected abstract Collection<String> doApply(String s); |
|
21 |
|
|
22 |
@Override |
|
23 |
public Collection<String> apply(List<Field> fields) { |
|
24 |
Collection<String> c = Sets.newLinkedHashSet(); |
|
25 |
for(Field f : fields) { |
|
26 |
c.addAll(filterBlacklisted(doApply(filterStopWords(normalize(f.stringValue()), stopwords)), ngramBlacklist)); |
|
27 |
} |
|
28 |
return c; |
|
29 |
} |
|
30 |
|
|
31 |
public Map<String, Integer> getParams() { |
|
32 |
return params; |
|
33 |
} |
|
34 |
|
|
35 |
protected Integer param(String name) { |
|
36 |
return params.get(name); |
|
37 |
} |
|
38 |
} |
|
0 | 39 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/FieldFilter.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import com.google.common.base.Predicate; |
|
7 |
|
|
8 |
import eu.dnetlib.pace.model.Field; |
|
9 |
|
|
10 |
public class FieldFilter implements Predicate<Field> { |
|
11 |
|
|
12 |
private Map<String, List<String>> blacklists; |
|
13 |
|
|
14 |
private String filedName; |
|
15 |
|
|
16 |
public FieldFilter(final String fieldName, final Map<String, List<String>> blacklists) { |
|
17 |
this.filedName = fieldName; |
|
18 |
this.blacklists = blacklists; |
|
19 |
} |
|
20 |
|
|
21 |
@Override |
|
22 |
public boolean apply(final Field f) { |
|
23 |
return !regexMatches(filedName, f.stringValue(), blacklists); |
|
24 |
} |
|
25 |
|
|
26 |
/** |
|
27 |
* Tries to match the fields in the regex blacklist. |
|
28 |
* |
|
29 |
* @param fieldName |
|
30 |
* @param value |
|
31 |
* @return true if the field matches, false otherwise |
|
32 |
*/ |
|
33 |
protected boolean regexMatches(final String fieldName, final String value, final Map<String, List<String>> blacklists) { |
|
34 |
if (blacklists.containsKey(fieldName)) { |
|
35 |
final Iterable<String> regexes = blacklists.get(fieldName); |
|
36 |
for (final String regex : regexes) { |
|
37 |
if (value.matches(regex)) return true; |
|
38 |
} |
|
39 |
} |
|
40 |
return false; |
|
41 |
} |
|
42 |
} |
|
0 | 43 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.Map; |
|
5 |
import java.util.Set; |
|
6 |
import java.util.StringTokenizer; |
|
7 |
|
|
8 |
import com.google.common.collect.Sets; |
|
9 |
|
|
10 |
public class Acronyms extends AbstractClusteringFunction { |
|
11 |
|
|
12 |
public Acronyms(Map<String, Integer> params) { |
|
13 |
super(params); |
|
14 |
} |
|
15 |
|
|
16 |
@Override |
|
17 |
protected Collection<String> doApply(String s) { |
|
18 |
return extractAcronyms(s, param("max"), param("minLen"), param("maxLen")); |
|
19 |
} |
|
20 |
|
|
21 |
private Set<String> extractAcronyms(final String s, int maxAcronyms, int minLen, int maxLen) { |
|
22 |
|
|
23 |
final Set<String> acronyms = Sets.newLinkedHashSet(); |
|
24 |
|
|
25 |
for (int i = 0; i < maxAcronyms; i++) { |
|
26 |
|
|
27 |
final StringTokenizer st = new StringTokenizer(s); |
|
28 |
final StringBuilder sb = new StringBuilder(); |
|
29 |
|
|
30 |
while (st.hasMoreTokens()) { |
|
31 |
final String token = st.nextToken(); |
|
32 |
if (sb.length() > maxLen) { |
|
33 |
break; |
|
34 |
} |
|
35 |
if (token.length() > 1 && i < token.length()) { |
|
36 |
sb.append(token.charAt(i)); |
|
37 |
} |
|
38 |
} |
|
39 |
String acronym = sb.toString(); |
|
40 |
if (acronym.length() > minLen) { |
|
41 |
acronyms.add(acronym); |
|
42 |
} |
|
43 |
} |
|
44 |
return acronyms; |
|
45 |
} |
|
46 |
|
|
47 |
} |
|
0 | 48 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/clustering/ClusteringCombiner.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.List; |
|
5 |
|
|
6 |
import com.google.common.collect.Sets; |
|
7 |
|
|
8 |
import eu.dnetlib.pace.config.Config; |
|
9 |
import eu.dnetlib.pace.model.ClusteringDef; |
|
10 |
import eu.dnetlib.pace.model.Document; |
|
11 |
import eu.dnetlib.pace.model.FieldList; |
|
12 |
|
|
13 |
public class ClusteringCombiner { |
|
14 |
|
|
15 |
public static Collection<String> combine(final Document a, final Config conf) { |
|
16 |
return new ClusteringCombiner().doCombine(a, conf.clusterings()); |
|
17 |
} |
|
18 |
|
|
19 |
private Collection<String> doCombine(final Document a, final List<ClusteringDef> defs) { |
|
20 |
final Collection<String> res = Sets.newLinkedHashSet(); |
|
21 |
for (final ClusteringDef cd : defs) { |
|
22 |
for (final String fieldName : cd.getFields()) { |
|
23 |
final FieldList values = a.values(fieldName); |
|
24 |
res.addAll(cd.getClusteringFunction().apply(values)); |
|
25 |
} |
|
26 |
} |
|
27 |
return res; |
|
28 |
} |
|
29 |
|
|
30 |
} |
|
0 | 31 |
modules/dnet-pace-core/tags/dnet-pace-core-2.1.0/src/main/java/eu/dnetlib/pace/model/FieldListImpl.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.Iterator; |
|
5 |
import java.util.List; |
|
6 |
import java.util.ListIterator; |
|
7 |
|
|
8 |
import com.google.common.base.Function; |
|
9 |
import com.google.common.base.Joiner; |
|
10 |
import com.google.common.collect.Iterables; |
|
11 |
import com.google.common.collect.Lists; |
|
12 |
|
|
13 |
import eu.dnetlib.pace.config.Type; |
|
14 |
|
|
15 |
/** |
|
16 |
* The Class FieldListImpl. |
|
17 |
*/ |
|
18 |
public class FieldListImpl extends AbstractField implements FieldList { |
|
19 |
|
|
20 |
/** The fields. */ |
|
21 |
private List<Field> fields; |
|
22 |
|
|
23 |
/** |
|
24 |
* Instantiates a new field list impl. |
|
25 |
*/ |
|
26 |
public FieldListImpl() { |
|
27 |
fields = Lists.newArrayList(); |
|
28 |
} |
|
29 |
|
|
30 |
/** |
|
31 |
* Instantiates a new field list impl. |
|
32 |
* |
|
33 |
* @param name |
|
34 |
* the name |
|
35 |
*/ |
|
36 |
public FieldListImpl(final String name) { |
|
37 |
super(Type.List, name); |
|
38 |
fields = Lists.newArrayList(); |
|
39 |
} |
|
40 |
|
|
41 |
/* |
|
42 |
* (non-Javadoc) |
|
43 |
* |
|
44 |
* @see java.util.List#add(java.lang.Object) |
|
45 |
*/ |
|
46 |
@Override |
|
47 |
public boolean add(final Field f) { |
|
48 |
return fields.add(f); |
|
49 |
} |
|
50 |
|
|
51 |
/* |
|
52 |
* (non-Javadoc) |
|
53 |
* |
|
54 |
* @see java.util.List#add(int, java.lang.Object) |
|
55 |
*/ |
|
56 |
@Override |
|
57 |
public void add(final int i, final Field f) { |
|
58 |
fields.add(i, f); |
|
59 |
} |
|
60 |
|
|
61 |
/* |
|
62 |
* (non-Javadoc) |
|
63 |
* |
|
64 |
* @see java.util.List#addAll(java.util.Collection) |
|
65 |
*/ |
|
66 |
@Override |
|
67 |
public boolean addAll(final Collection<? extends Field> f) { |
|
68 |
return fields.addAll(f); |
|
69 |
} |
|
70 |
|
|
71 |
/* |
|
72 |
* (non-Javadoc) |
|
73 |
* |
|
74 |
* @see java.util.List#addAll(int, java.util.Collection) |
|
75 |
*/ |
|
76 |
@Override |
|
77 |
public boolean addAll(final int i, final Collection<? extends Field> f) { |
|
78 |
return fields.addAll(i, f); |
|
79 |
} |
|
80 |
|
|
81 |
/* |
|
82 |
* (non-Javadoc) |
|
83 |
* |
|
84 |
* @see java.util.List#clear() |
|
85 |
*/ |
|
86 |
@Override |
|
87 |
public void clear() { |
|
88 |
fields.clear(); |
|
89 |
} |
|
90 |
|
|
91 |
/* |
|
92 |
* (non-Javadoc) |
|
93 |
* |
|
94 |
* @see java.util.List#contains(java.lang.Object) |
|
95 |
*/ |
|
96 |
@Override |
|
97 |
public boolean contains(final Object o) { |
|
98 |
return fields.contains(o); |
|
99 |
} |
|
100 |
|
|
101 |
/* |
|
102 |
* (non-Javadoc) |
|
103 |
* |
|
104 |
* @see java.util.List#containsAll(java.util.Collection) |
|
105 |
*/ |
|
106 |
@Override |
|
107 |
public boolean containsAll(final Collection<?> f) { |
|
108 |
return fields.containsAll(f); |
|
109 |
} |
|
110 |
|
|
111 |
/* |
|
112 |
* (non-Javadoc) |
|
113 |
* |
|
114 |
* @see java.util.List#get(int) |
|
115 |
*/ |
|
116 |
@Override |
|
117 |
public Field get(final int i) { |
|
118 |
return fields.get(i); |
|
119 |
} |
|
120 |
|
|
121 |
/* |
|
122 |
* (non-Javadoc) |
|
123 |
* |
|
124 |
* @see java.util.List#indexOf(java.lang.Object) |
|
125 |
*/ |
|
126 |
@Override |
|
127 |
public int indexOf(final Object o) { |
|
128 |
return fields.indexOf(o); |
|
129 |
} |
|
130 |
|
|
131 |
/* |
|
132 |
* (non-Javadoc) |
|
133 |
* |
|
134 |
* @see eu.dnetlib.pace.model.Field#isEmpty() |
|
135 |
*/ |
|
136 |
@Override |
|
137 |
public boolean isEmpty() { |
|
138 |
return fields.isEmpty(); |
|
139 |
} |
|
140 |
|
|
141 |
/* |
|
142 |
* (non-Javadoc) |
|
143 |
* |
|
144 |
* @see java.lang.Iterable#iterator() |
|
145 |
*/ |
|
146 |
@Override |
|
147 |
public Iterator<Field> iterator() { |
|
148 |
return fields.iterator(); |
|
149 |
} |
|
150 |
|
|
151 |
/* |
|
152 |
* (non-Javadoc) |
|
153 |
* |
|
154 |
* @see java.util.List#lastIndexOf(java.lang.Object) |
|
155 |
*/ |
|
156 |
@Override |
|
157 |
public int lastIndexOf(final Object o) { |
|
158 |
return fields.lastIndexOf(o); |
|
159 |
} |
|
160 |
|
|
161 |
/* |
|
162 |
* (non-Javadoc) |
|
163 |
* |
|
164 |
* @see java.util.List#listIterator() |
|
165 |
*/ |
|
166 |
@Override |
|
167 |
public ListIterator<Field> listIterator() { |
|
168 |
return fields.listIterator(); |
|
169 |
} |
|
170 |
|
|
171 |
/* |
|
172 |
* (non-Javadoc) |
|
173 |
* |
|
174 |
* @see java.util.List#listIterator(int) |
|
175 |
*/ |
|
176 |
@Override |
|
177 |
public ListIterator<Field> listIterator(final int i) { |
|
178 |
return fields.listIterator(i); |
|
179 |
} |
|
180 |
|
|
181 |
/* |
|
182 |
* (non-Javadoc) |
|
183 |
* |
|
184 |
* @see java.util.List#remove(java.lang.Object) |
|
185 |
*/ |
|
186 |
@Override |
|
187 |
public boolean remove(final Object o) { |
|
188 |
return fields.remove(o); |
|
189 |
} |
|
190 |
|
|
191 |
/* |
|
192 |
* (non-Javadoc) |
|
193 |
* |
|
194 |
* @see java.util.List#remove(int) |
|
195 |
*/ |
|
196 |
@Override |
|
197 |
public Field remove(final int i) { |
|
198 |
return fields.remove(i); |
|
199 |
} |
|
200 |
|
|
201 |
/* |
|
202 |
* (non-Javadoc) |
|
203 |
* |
|
204 |
* @see java.util.List#removeAll(java.util.Collection) |
|
205 |
*/ |
|
206 |
@Override |
|
207 |
public boolean removeAll(final Collection<?> f) { |
|
208 |
return fields.removeAll(f); |
|
209 |
} |
|
210 |
|
|
211 |
/* |
|
212 |
* (non-Javadoc) |
|
213 |
* |
|
214 |
* @see java.util.List#retainAll(java.util.Collection) |
|
215 |
*/ |
|
216 |
@Override |
|
217 |
public boolean retainAll(final Collection<?> f) { |
|
218 |
return fields.retainAll(f); |
|
219 |
} |
|
220 |
|
|
221 |
/* |
|
222 |
* (non-Javadoc) |
|
223 |
* |
|
224 |
* @see java.util.List#set(int, java.lang.Object) |
|
225 |
*/ |
|
226 |
@Override |
|
227 |
public Field set(final int i, final Field f) { |
|
228 |
return fields.set(i, f); |
|
229 |
} |
|
230 |
|
|
231 |
/* |
|
232 |
* (non-Javadoc) |
|
233 |
* |
|
234 |
* @see java.util.List#size() |
|
235 |
*/ |
|
236 |
@Override |
|
237 |
public int size() { |
|
238 |
return fields.size(); |
|
239 |
} |
|
240 |
|
|
241 |
/* |
|
242 |
* (non-Javadoc) |
|
243 |
* |
|
244 |
* @see java.util.List#subList(int, int) |
|
245 |
*/ |
|
246 |
@Override |
|
247 |
public List<Field> subList(final int from, final int to) { |
|
248 |
return fields.subList(from, to); |
|
249 |
} |
|
250 |
|
|
251 |
/* |
|
252 |
* (non-Javadoc) |
|
253 |
* |
|
254 |
* @see java.util.List#toArray() |
|
255 |
*/ |
|
256 |
@Override |
|
257 |
public Object[] toArray() { |
|
258 |
return fields.toArray(); |
|
259 |
} |
|
260 |
|
|
261 |
/* |
|
262 |
* (non-Javadoc) |
|
263 |
* |
|
264 |
* @see java.util.List#toArray(java.lang.Object[]) |
|
265 |
*/ |
|
266 |
@Override |
|
267 |
public <T> T[] toArray(final T[] t) { |
|
268 |
return fields.toArray(t); |
|
269 |
} |
|
270 |
|
|
271 |
/* |
|
272 |
* (non-Javadoc) |
|
273 |
* |
|
274 |
* @see eu.dnetlib.pace.model.Field#stringValue() |
|
275 |
*/ |
|
276 |
@Override |
|
277 |
public String stringValue() { |
|
278 |
return Joiner.on(" ").join(stringList()); |
|
279 |
} |
|
280 |
|
|
281 |
/* |
|
282 |
* (non-Javadoc) |
|
283 |
* |
|
284 |
* @see eu.dnetlib.pace.model.FieldList#stringList() |
|
285 |
*/ |
|
286 |
@Override |
|
287 |
public List<String> stringList() { |
|
288 |
return Lists.newArrayList(Iterables.transform(fields, new Function<Field, String>() { |
|
289 |
|
|
290 |
@Override |
|
291 |
public String apply(final Field f) { |
|
292 |
return f.stringValue(); |
|
293 |
} |
|
294 |
})); |
|
295 |
} |
|
296 |
|
|
297 |
@Override |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-pace-core-2.1.0