Project

General

Profile

« Previous | Next » 

Revision 38187

[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-3.1.6

View differences:

modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-openaireplus-mapping-utils/trunk/", "deploy_repository": "dnet4-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", "name": "dnet-openaireplus-mapping-utils"}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombinerTest.java
1
package eu.dnetlib.pace.clustering;
2

  
3
import org.junit.Before;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.pace.AbstractProtoPaceTest;
7
import eu.dnetlib.pace.config.Config;
8
import eu.dnetlib.pace.config.Type;
9
import eu.dnetlib.pace.model.FieldListImpl;
10
import eu.dnetlib.pace.model.FieldValueImpl;
11
import eu.dnetlib.pace.model.MapDocument;
12

  
13
public class BlacklistAwareClusteringCombinerTest extends AbstractProtoPaceTest {
14

  
15
	private Config config;
16

  
17
	@Before
18
	public void setUp() {
19
		config = getResultFullConf();
20
	}
21

  
22
	@Test
23
	public void testCombine() {
24
		final MapDocument result =
25
				result(config, "A", "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission", "2013");
26
		final FieldListImpl fl = new FieldListImpl();
27
		fl.add(new FieldValueImpl(Type.String, "desc", "hello world description pipeline"));
28

  
29
		result.getFieldMap().put("desc", fl);
30

  
31
		fl.clear();
32
		fl.add(new FieldValueImpl(Type.String, "title", "lorem ipsum cabalie qwerty"));
33
		final FieldListImpl field = (FieldListImpl) result.getFieldMap().get("title");
34
		field.add(fl);
35

  
36
		System.out.println(BlacklistAwareClusteringCombiner.filterAndCombine(result, config, config.blacklists()));
37
	}
38
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java
1
package eu.dnetlib.pace.clustering;
2

  
3
import org.junit.Before;
4
import org.junit.Test;
5

  
6
import eu.dnetlib.pace.AbstractProtoPaceTest;
7
import eu.dnetlib.pace.config.Config;
8
import eu.dnetlib.pace.config.Type;
9
import eu.dnetlib.pace.model.FieldListImpl;
10
import eu.dnetlib.pace.model.FieldValueImpl;
11
import eu.dnetlib.pace.model.MapDocument;
12

  
13
public class ClusteringCombinerTest extends AbstractProtoPaceTest {
14

  
15
	private Config config;
16

  
17
	@Before
18
	public void setUp() {
19
		config = getResultFullConf();
20
	}
21

  
22
	@Test
23
	public void testCombine() {
24
		String title = "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission";
25
		MapDocument result = result(config, "A", title, "2013");
26

  
27
		FieldListImpl fl = new FieldListImpl();
28
		fl.add(new FieldValueImpl(Type.String, "desc", "lorem ipsum cabalie qwerty"));
29

  
30
		result.getFieldMap().put("desc", fl);
31
		System.out.println(title);
32
		System.out.println(ClusteringCombiner.combine(result, config));
33
	}
34

  
35
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java
1
package eu.dnetlib.pace;
2

  
3
import java.io.IOException;
4
import java.io.StringWriter;
5
import java.util.List;
6

  
7
import org.apache.commons.io.IOUtils;
8
import org.apache.commons.lang.RandomStringUtils;
9
import org.apache.commons.lang.StringUtils;
10

  
11
import com.google.gson.Gson;
12

  
13
import eu.dnetlib.data.mapreduce.util.OafTest;
14
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
15
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
16
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder;
17
import eu.dnetlib.data.proto.OafProtos.Oaf;
18
import eu.dnetlib.data.proto.OafProtos.OafEntity;
19
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
20
import eu.dnetlib.data.proto.PersonProtos.Person;
21
import eu.dnetlib.data.proto.ResultProtos.Result;
22
import eu.dnetlib.pace.config.Config;
23
import eu.dnetlib.pace.config.DedupConfig;
24
import eu.dnetlib.pace.config.Type;
25
import eu.dnetlib.pace.model.Field;
26
import eu.dnetlib.pace.model.FieldValueImpl;
27
import eu.dnetlib.pace.model.MapDocument;
28
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
29
import eu.dnetlib.pace.model.gt.GTAuthor;
30
import eu.dnetlib.pace.model.gt.GTAuthorMapper;
31

  
32
public abstract class AbstractProtoPaceTest extends OafTest {
33

  
34
	protected DedupConfig getResultFullConf() {
35
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf"));
36
	}
37

  
38
	protected DedupConfig getResultSimpleConf() {
39
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.simple.pace.conf"));
40
	}
41

  
42
	protected DedupConfig getResultConf() {
43
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.pace.conf"));
44
	}
45

  
46
	protected DedupConfig getOrganizationSimpleConf() {
47
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/organization.pace.conf"));
48
	}
49

  
50
	protected DedupConfig getResultAuthorsConf() {
51
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.authors.pace.conf"));
52
	}
53

  
54
	protected DedupConfig getPersonConf() {
55
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/person.pace.conf"));
56
	}
57

  
58
	protected DedupConfig getResultProdConf() {
59
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.prod.pace.conf"));
60
	}
61

  
62
	protected MapDocument person(final Config conf, final String id, final Oaf oaf) {
63
		return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model());
64
	}
65

  
66
	protected Oaf getPersonGT(final String path) {
67
		return new GTAuthorMapper().map(getGTAuthor(path));
68
	}
69

  
70
	protected GTAuthor getGTAuthor(final String path) {
71

  
72
		final Gson gson = new Gson();
73

  
74
		final String json = readFromClasspath("/eu/dnetlib/pace/model/gt.author.manghi1.json");
75

  
76
		final GTAuthor gta = gson.fromJson(json, GTAuthor.class);
77

  
78
		return gta;
79
	}
80

  
81
	private String readFromClasspath(final String filename) {
82
		final StringWriter sw = new StringWriter();
83
		try {
84
			IOUtils.copy(getClass().getResourceAsStream(filename), sw);
85
			return sw.toString();
86
		} catch (final IOException e) {
87
			throw new RuntimeException("cannot load resource from classpath: " + filename);
88
		}
89
	}
90

  
91
	protected MapDocument result(final Config config, final String id, final String title) {
92
		return result(config, id, title, null, null, null);
93
	}
94

  
95
	protected MapDocument result(final Config config, final String id, final String title, final String date) {
96
		return result(config, id, title, date, null, null);
97
	}
98

  
99
	protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) {
100
		return result(config, id, title, date, pid, null);
101
	}
102

  
103
	protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) {
104
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
105
		if (!StringUtils.isBlank(title)) {
106
			metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles")));
107
			metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles")));
108
		}
109
		if (!StringUtils.isBlank(date)) {
110
			metadata.setDateofacceptance(sf(date));
111
		}
112

  
113
		final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result);
114
		final Result.Builder result = Result.newBuilder().setMetadata(metadata);
115

  
116
		if (authors != null) {
117
			for (final String author : authors) {
118
				result.addAuthor(person(author));
119
			}
120
		}
121

  
122
		entity.setResult(result);
123

  
124
		if (!StringUtils.isBlank(pid)) {
125
			entity.addPid(sp(pid, "doi"));
126
			entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai"));
127
		}
128

  
129
		final OafEntity build = entity.build();
130
		return ProtoDocumentBuilder.newInstance(id, build, config.model());
131
	}
132

  
133
	private Person.Builder person(final String author) {
134
		final Person.Builder person = Person.newBuilder();
135

  
136
		final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(author, false);
137
		final Person.Metadata.Builder metadata = Person.Metadata.newBuilder();
138
		if (p.isAccurate()) {
139
			metadata.setFirstname(sf(p.getNormalisedFirstName()));
140
			metadata.addSecondnames(sf(p.getNormalisedSurname()));
141
			metadata.setFullname(sf(p.getNormalisedFullname()));
142
		} else {
143
			metadata.setFullname(sf(p.getOriginal()));
144
		}
145

  
146
		return person.setMetadata(metadata);
147
	}
148

  
149
	private OafEntity.Builder oafEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) {
150
		final OafEntity.Builder entity = OafEntity.newBuilder().setId(id).setType(type);
151
		return entity;
152
	}
153

  
154
	protected MapDocument organization(final Config config, final String id, final String legalName) {
155
		return organization(config, id, legalName, null);
156
	}
157

  
158
	protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) {
159
		final Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder();
160
		if (legalName != null) {
161
			metadata.setLegalname(sf(legalName));
162
		}
163
		if (legalShortName != null) {
164
			metadata.setLegalshortname(sf(legalShortName));
165
		}
166

  
167
		final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result);
168
		entity.setOrganization(Organization.newBuilder().setMetadata(metadata));
169

  
170
		return ProtoDocumentBuilder.newInstance(id, entity.build(), config.model());
171
	}
172

  
173
	private StructuredProperty sp(final String pid, final String type) {
174
		final Builder pidSp =
175
				StructuredProperty.newBuilder().setValue(pid)
176
				.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types"));
177
		return pidSp.build();
178
	}
179

  
180
	protected Field title(final String s) {
181
		return new FieldValueImpl(Type.String, "title", s);
182
	}
183

  
184
	protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) {
185
		return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
186
	}
187

  
188
	/*
189
	 * protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); }
190
	 *
191
	 * protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return
192
	 * Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); }
193
	 */
194

  
195
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/pace/model/ProtoDocumentBuilderTest.java
1
package eu.dnetlib.pace.model;
2

  
3
import static org.junit.Assert.assertFalse;
4
import static org.junit.Assert.assertTrue;
5

  
6
import org.junit.Test;
7

  
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Sets;
10
import com.google.common.collect.Sets.SetView;
11

  
12
import eu.dnetlib.pace.AbstractProtoPaceTest;
13
import eu.dnetlib.pace.config.Config;
14

  
15
public class ProtoDocumentBuilderTest extends AbstractProtoPaceTest {
16

  
17
	@Test
18
	public void test_serialise1() {
19

  
20
		final String id = "12345";
21

  
22
		final Config config = getResultFullConf();
23

  
24
		final MapDocument document = ProtoDocumentBuilder.newInstance(id, getResult(id), config.model());
25

  
26
		assertFalse(document.fieldNames().isEmpty());
27
		assertFalse(Iterables.isEmpty(document.fields()));
28

  
29
		System.out.println("original:\n" + document);
30

  
31
		final String stringDoc = MapDocumentSerializer.toString(document);
32

  
33
		System.out.println("srialization:\n" + stringDoc);
34

  
35
		final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes());
36

  
37
		final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames());
38

  
39
		assertTrue(diff.isEmpty());
40

  
41
		System.out.println("decoded:\n" + decoded);
42
	}
43

  
44
	@Test
45
	public void test_serialise2() {
46

  
47
		final String id = "12345";
48
		final String path = "/eu/dnetlib/pace/model/gt.author.manghi1.json";
49

  
50
		final Config config = getPersonConf();
51

  
52
		final MapDocument document = ProtoDocumentBuilder.newInstance(id, getPersonGT(path).getEntity(), config.model());
53

  
54
		assertFalse(document.fieldNames().isEmpty());
55
		assertFalse(Iterables.isEmpty(document.fields()));
56

  
57
		System.out.println("original:\n" + document);
58

  
59
		final String stringDoc = MapDocumentSerializer.toString(document);
60

  
61
		System.out.println("srialization:\n" + stringDoc);
62

  
63
		final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes());
64

  
65
		final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames());
66

  
67
		assertTrue(diff.isEmpty());
68

  
69
		System.out.println("decoded:\n" + decoded);
70
	}
71

  
72
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/pace/model/gt/AuthorTest.java
1
package eu.dnetlib.pace.model.gt;
2

  
3
import static org.junit.Assert.assertTrue;
4

  
5
import java.util.Set;
6

  
7
import org.junit.Test;
8

  
9
import com.google.common.collect.Sets;
10

  
11
import eu.dnetlib.pace.model.gt.Author;
12
import eu.dnetlib.pace.model.gt.Authors;
13

  
14
public class AuthorTest {
15

  
16
	@Test
17
	public void test() {
18
		final Set<Author> s1 = getAuthors(3);
19
		final Set<Author> s2 = getAuthors(3);
20

  
21
		final Set<Author> i = Sets.intersection(s1, s2);
22

  
23
		System.out.println(i);
24

  
25
		assertTrue(i.size() == 3);
26

  
27
	}
28

  
29
	@Test
30
	public void test1() {
31
		final Authors a1 = new Authors(a("1", "Wang, M."));
32
		final Authors a2 = new Authors(a("1", "Wang, M."));
33

  
34
		final Set<Author> i = Sets.intersection(a1, a2);
35

  
36
		assertTrue(i.size() == 1);
37

  
38
	}
39

  
40
	private Set<Author> getAuthors(final int n) {
41
		final Set<Author> s = Sets.newHashSet();
42

  
43
		for (int i = 0; i < n; i++) {
44
			s.add(a(i + "", "name" + i));
45
		}
46
		return s;
47
	}
48

  
49
	private Author a(final String id, final String fullname) {
50
		final Author a = new Author();
51
		a.setId(id);
52
		a.setFullname(fullname);
53
		return a;
54
	}
55

  
56
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/pace/distance/DetectorTest.java
1
package eu.dnetlib.pace.distance;
2

  
3
import static org.junit.Assert.assertTrue;
4

  
5
import java.util.List;
6

  
7
import org.junit.Test;
8

  
9
import com.google.common.collect.Lists;
10

  
11
import eu.dnetlib.pace.AbstractProtoPaceTest;
12
import eu.dnetlib.pace.config.Config;
13
import eu.dnetlib.pace.model.MapDocument;
14

  
15
public class DetectorTest extends AbstractProtoPaceTest {
16

  
17
	@Test
18
	public void testDistanceResultSimple() {
19
		final Config config = getResultSimpleConf();
20

  
21
		final MapDocument resA = result(config, "A", "Recent results from CDF");
22
		final MapDocument resB = result(config, "B", "Recent results from CDF");
23

  
24
		final double d = new PaceDocumentDistance().between(resA, resB, config);
25
		System.out.println(String.format(" d ---> %s", d));
26

  
27
		assertTrue(d == 1.0);
28
	}
29

  
30
	@Test
31
	public void testDistanceResultSimpleMissingDates() {
32
		final Config config = getResultSimpleConf();
33

  
34
		final MapDocument resA = result(config, "A", "Recent results from BES");
35
		final MapDocument resB = result(config, "A", "Recent results from CES");
36

  
37
		final double d = new PaceDocumentDistance().between(resA, resB, config);
38
		System.out.println(String.format(" d ---> %s", d));
39

  
40
		assertTrue(d > 0.97);
41
	}
42

  
43
	@Test
44
	public void testDistanceResultInvalidDate() {
45
		final Config config = getResultConf();
46

  
47
		final MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05");
48
		final MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty");
49

  
50
		final double d = new PaceDocumentDistance().between(resA, resB, config);
51
		System.out.println(String.format(" d ---> %s", d));
52

  
53
		assertTrue(d == 1.0);
54
	}
55

  
56
	@Test
57
	public void testDistanceResultMissingOneDate() {
58
		final Config config = getResultConf();
59

  
60
		final MapDocument resA = result(config, "A", "title title title 6BESR", null);
61
		final MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02");
62

  
63
		final double d = new PaceDocumentDistance().between(resA, resB, config);
64
		System.out.println(String.format(" d ---> %s", d));
65

  
66
		assertTrue((d > 0.9) && (d < 1.0));
67
	}
68

  
69
	@Test
70
	public void testDistanceResult() {
71
		final Config config = getResultConf();
72

  
73
		final MapDocument resA = result(config, "A", "title title title BES", "");
74
		final MapDocument resB = result(config, "B", "title title title CLEO");
75

  
76
		final double d = new PaceDocumentDistance().between(resA, resB, config);
77
		System.out.println(String.format(" d ---> %s", d));
78

  
79
		assertTrue((d > 0.9) && (d < 1.0));
80
	}
81

  
82
	@Test
83
	public void testDistanceResultMissingTwoDate() {
84
		final Config config = getResultConf();
85

  
86
		final MapDocument resA = result(config, "A", "title title title 6BESR");
87
		final MapDocument resB = result(config, "B", "title title title 6CLER");
88

  
89
		final double d = new PaceDocumentDistance().between(resA, resB, config);
90

  
91
		System.out.println(String.format(" d ---> %s", d));
92

  
93
		assertTrue((d > 0.9) && (d < 1.0));
94
	}
95

  
96
	@Test
97
	public void testDistanceOrganizationIgnoreMissing() {
98

  
99
		final Config config = getOrganizationSimpleConf();
100

  
101
		final MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE");
102
		final MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR");
103

  
104
		final double d = new PaceDocumentDistance().between(orgA, orgB, config);
105
		System.out.println(String.format(" d ---> %s", d));
106

  
107
		assertTrue(d == 1.0);
108
	}
109

  
110
	@Test
111
	public void testDistanceResultCase1() {
112

  
113
		final Config config = getResultConf();
114

  
115
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003");
116
		final MapDocument resB = result(config, "B", "Search for the Standard Model Higgs Boson", "2003");
117

  
118
		final double d = new PaceDocumentDistance().between(resA, resB, config);
119
		System.out.println(String.format(" d ---> %s", d));
120

  
121
		assertTrue((d > 0.9) && (d < 1.0));
122
	}
123

  
124
	@Test
125
	public void testDistanceResultCaseDoiMatch1() {
126
		final Config config = getResultConf();
127

  
128
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", "http://dx.doi.org/10.1594/PANGAEA.726855");
129
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", "10.1594/PANGAEA.726855");
130

  
131
		final double d = new PaceDocumentDistance().between(resA, resB, config);
132
		System.out.println(String.format(" d ---> %s", d));
133

  
134
		assertTrue("exact DOIs will produce an exact match", d == 1.0);
135
	}
136

  
137
	@Test
138
	public void testDistanceResultCaseDoiMatch2() {
139
		final Config config = getResultConf();
140

  
141
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "http://dx.doi.org/10.1594/PANGAEA.726855");
142
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2005", "doi:10.1594/PANGAEA.726855");
143

  
144
		final double d = new PaceDocumentDistance().between(resA, resB, config);
145
		System.out.println(String.format(" d ---> %s", d));
146

  
147
		assertTrue("exact DOIs will produce an exact match, regardless of different titles or publication years", d == 1.0);
148
	}
149

  
150
	@Test
151
	public void testDistanceResultCaseDoiMatch3() {
152
		final Config config = getResultConf();
153

  
154
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
155
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003");
156

  
157
		final double d = new PaceDocumentDistance().between(resA, resB, config);
158
		System.out.println(String.format(" d ---> %s", d));
159

  
160
		assertTrue("a missing DOI will casue the comparsion to continue with the following conditions", d == 1.0);
161
	}
162

  
163
	@Test
164
	public void testDistanceResultCaseDoiMatch4() {
165
		final Config config = getResultConf();
166

  
167
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
168
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2005");
169

  
170
		final double d = new PaceDocumentDistance().between(resA, resB, config);
171
		System.out.println(String.format(" d ---> %s", d));
172

  
173
		assertTrue("a missing DOI, comparsion continues with the following conditions, different publication years will drop the score to 0", d == 0.0);
174
	}
175

  
176
	@Test
177
	public void testDistanceResultCaseDoiMatch5() {
178

  
179
		final Config config = getResultConf();
180

  
181
		final MapDocument resA = result(config, "A", "Search for the Standard Model Higgs Boson", "2003", "10.1016/j.jmb.2010.12.020");
182
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003");
183

  
184
		final double d = new PaceDocumentDistance().between(resA, resB, config);
185
		System.out.println(String.format(" d ---> %s", d));
186

  
187
		assertTrue("a missing DOI, comparsion continues with the following conditions", (d > 0.9) && (d < 1.0));
188
	}
189

  
190
	@Test
191
	public void testDistanceResultCaseDoiMatch6() {
192
		final Config config = getResultConf();
193

  
194
		final MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024");
195
		final MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003", "anotherDifferentDOI");
196

  
197
		final double d = new PaceDocumentDistance().between(resA, resB, config);
198
		System.out.println(String.format(" d ---> %s", d));
199

  
200
		assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d == 0.0);
201
	}
202

  
203
	// http://dx.doi.org/10.1594/PANGAEA.726855 doi:10.1594/PANGAEA.726855
204

  
205
	@Test
206
	public void testDistanceResultCaseAuthor1() {
207

  
208
		final Config config = getResultAuthorsConf();
209

  
210
		final List<String> authorsA = Lists.newArrayList("a", "b", "c", "d");
211
		final List<String> authorsB = Lists.newArrayList("a", "b", "c");
212

  
213
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA);
214
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB);
215

  
216
		final double d = new PaceDocumentDistance().between(resA, resB, config);
217
		System.out.println(String.format(" d ---> %s", d));
218

  
219
		assertTrue(d == 0.0);
220
	}
221

  
222
	@Test
223
	public void testDistanceResultCaseAuthor2() {
224

  
225
		final Config config = getResultAuthorsConf();
226

  
227
		final List<String> authorsA = Lists.newArrayList("a", "b", "c");
228
		final List<String> authorsB = Lists.newArrayList("a", "b", "c");
229

  
230
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA);
231
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB);
232

  
233
		final double d = new PaceDocumentDistance().between(resA, resB, config);
234
		System.out.println(String.format(" d ---> %s", d));
235

  
236
		assertTrue(d == 1.0);
237
	}
238

  
239
	@Test
240
	public void testDistanceResultCaseAuthor3() {
241

  
242
		final Config config = getResultAuthorsConf();
243

  
244
		final List<String> authorsA = Lists.newArrayList("Bardi, A.", "Manghi, P.", "Artini, M.");
245
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
246

  
247
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA);
248
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB);
249

  
250
		final double d = new PaceDocumentDistance().between(resA, resB, config);
251
		System.out.println(String.format(" d ---> %s", d));
252

  
253
		assertTrue((d > 0.9) && (d < 1.0));
254
	}
255

  
256
	@Test
257
	public void testDistanceResultCaseAuthor4() {
258

  
259
		final Config config = getResultAuthorsConf();
260

  
261
		final List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a");
262
		final List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele");
263

  
264
		final MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA);
265
		final MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB);
266

  
267
		final double d = new PaceDocumentDistance().between(resA, resB, config);
268
		System.out.println(String.format(" d ---> %s", d));
269

  
270
		// assertTrue(d == 0.0);
271
	}
272

  
273
	@Test
274
	public void testDistanceResultFullConf() {
275

  
276
		final Config config = getResultFullConf();
277

  
278
		final List<String> authorsA = Lists.newArrayList("Nagarajan Pranesh", "Guy Vautier", "Punyanganie de Silva");
279
		final List<String> authorsB = Lists.newArrayList("Pranesh Nagarajan", "Vautier Guy", "de Silva Punyanganie");
280

  
281
		final MapDocument resA =
282
				result(config, "A", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010",
283
						"10.1186/1752-1947-4-299", authorsA);
284
		final MapDocument resB =
285
				result(config, "B", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010", null,
286
						authorsB);
287

  
288
		final double d = new PaceDocumentDistance().between(resA, resB, config);
289
		System.out.println(String.format(" d ---> %s", d));
290

  
291
		// assertTrue(d == 0.0);
292
	}
293

  
294
	@Test
295
	public void testDistanceProdConf1() {
296

  
297
		final Config config = getResultProdConf();
298

  
299
		final MapDocument resA =
300
				result(config,
301
						"A",
302
						" Analysis of Transfer Embryo-Derived de-duplication");
303
		final MapDocument resB =
304
				result(config,
305
						"B",
306
						" Analysis of Transfer Embryo Derived deduplication");
307

  
308
		final double d = new PaceDocumentDistance().between(resA, resB, config);
309
		System.out.println(String.format(" d ---> %s", d));
310

  
311
		// assertTrue(d == 0.0);
312
	}
313

  
314
	@Test
315
	public void testDistanceProdConf2() {
316

  
317
		final Config config = getResultProdConf();
318

  
319
		final MapDocument resA =
320
				result(config,
321
						"A",
322
						"qwerty aaabbbbbbbb bbb ccc ddddd");
323
		final MapDocument resB =
324
				result(config,
325
						"B",
326
						"qwert aaabbbbbbbb bbb ccc ddddd");
327

  
328
		final double d = new PaceDocumentDistance().between(resA, resB, config);
329
		System.out.println(String.format(" d ---> %s", d));
330

  
331
		// assertTrue(d == 0.0);
332
	}
333

  
334
	@Test
335
	public void testDistancePersonConf1() {
336

  
337
		final Config config = getPersonConf();
338

  
339
		final MapDocument p1 = person(config, "p1_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi1.json"));
340
		final MapDocument p2 = person(config, "p2_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi2.json"));
341

  
342
		final double d = new PaceDocumentDistance().between(p1, p2, config);
343
		System.out.println(String.format(" d ---> %s", d));
344

  
345
		// assertTrue(d == 0.0);
346
	}
347

  
348
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import static org.junit.Assert.assertFalse;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import java.util.List;
7

  
8
import org.junit.Test;
9

  
10
import eu.dnetlib.data.proto.KindProtos.Kind;
11
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
12

  
13
public class OafDecoderTest {
14

  
15
	@Test
16
	public void testAsXml() {
17

  
18
		final OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity);
19

  
20
		assertNotNull(decoder);
21

  
22
		assertNotNull(decoder.asXml());
23

  
24
		System.out.println(IndentXmlString.apply(decoder.asXml()));
25

  
26
	}
27

  
28
	@Test
29
	public void testGetFieldValues() {
30
		final OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity);
31

  
32
		final String path = "result/metadata/title/value";
33
		final List<String> titles = decoder.decodeEntity().getFieldValues(path);
34

  
35
		assertNotNull(titles);
36
		assertFalse(titles.isEmpty());
37
	}
38
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/data/mapreduce/util/OafRowKeyDecoderTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import org.junit.Test;
4

  
5
public class OafRowKeyDecoderTest {
6

  
7
	@Test
8
	public void test() {
9

  
10
		String id1 = "50|acnbad______::0a454baf9c61e63d42fb83ab549f8062";
11

  
12
		OafRowKeyDecoder d = OafRowKeyDecoder.decode(id1);
13

  
14
		System.out.println(d.getId());
15
	}
16

  
17
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import static org.junit.Assert.assertEquals;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import org.junit.Before;
7
import org.junit.Test;
8

  
9
import com.google.protobuf.Descriptors.FieldDescriptor;
10

  
11
import eu.dnetlib.data.proto.OafProtos.OafRel;
12
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
13
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
14
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
15

  
16
public class OafRelDecoderTest {
17

  
18
	private OafRel oafRel;
19

  
20
	@Before
21
	public void setUp() {
22
		oafRel = OafTest.getPersonResult("ID_1", "ID_2", "1", "isAuthor");
23
	}
24

  
25
	@Test
26
	public void testSetClass() {
27

  
28
		OafRelDecoder d1 = OafRelDecoder.decode(oafRel);
29

  
30
		assertNotNull(d1);
31
		assertEquals("isAuthor", d1.getRelClass());
32

  
33
		OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("hasAuthor").build());
34

  
35
		assertEquals("hasAuthor", d2.getRelClass());
36
		assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassid());
37
		assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassname());
38

  
39
		FieldDescriptor fd = Authorship.getDescriptor().findFieldByName("ranking");
40
		assertEquals(d1.getSubRel().getField(fd), d2.getSubRel().getField(fd));
41
	}
42

  
43
	@Test
44
	public void testGetCF() {
45
		assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, Authorship.RelName.isAuthorOf));
46
		assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, "isAuthorOf"));
47
	}
48

  
49
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/data/transform/SolrProtoMapperTest.java
1
package eu.dnetlib.data.transform;
2

  
3
import static org.junit.Assert.assertFalse;
4
import static org.junit.Assert.assertNotNull;
5

  
6
import java.io.IOException;
7
import java.io.StringWriter;
8

  
9
import org.apache.commons.codec.binary.Base64;
10
import org.apache.commons.io.IOUtils;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.apache.solr.common.SolrInputDocument;
14
import org.apache.solr.common.SolrInputField;
15
import org.dom4j.DocumentException;
16
import org.junit.Before;
17
import org.junit.Test;
18

  
19
import com.google.protobuf.InvalidProtocolBufferException;
20
import com.googlecode.protobuf.format.JsonFormat;
21

  
22
import eu.dnetlib.data.mapreduce.util.OafTest;
23
import eu.dnetlib.data.proto.KindProtos.Kind;
24
import eu.dnetlib.data.proto.OafProtos.Oaf;
25
import eu.dnetlib.data.proto.OafProtos.OafEntity;
26
import eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory;
27

  
28
public class SolrProtoMapperTest {
29

  
30
	private static final Log log = LogFactory.getLog(SolrProtoMapper.class); // NOPMD by marko on 11/24/08 5:02 PM
31
	private String fields;
32

  
33
	@Before
34
	public void setUp() throws IOException {
35
		final StringWriter sw = new StringWriter();
36
		IOUtils.copy(getClass().getResourceAsStream("fields.xml"), sw);
37
		fields = sw.toString();
38
		assertNotNull(fields);
39
		assertFalse(fields.isEmpty());
40

  
41
		log.info(fields);
42
	}
43

  
44
	@Test
45
	public void testProto2SolrDocument() throws DocumentException, InvalidProtocolBufferException {
46
		final SolrProtoMapper mapper = new SolrProtoMapper(fields);
47

  
48
		assertNotNull(mapper);
49

  
50
		final OafEntity.Builder entity = OafTest.getResultBuilder("01");
51
		entity.addChildren(OafTest.getResultBuilder("01_children"));
52

  
53
		final Oaf oaf = OafTest.embed(entity.build(), Kind.entity).getOaf();
54

  
55
		assertNotNull(oaf.getEntity().getChildrenList());
56
		assertFalse(oaf.getEntity().getChildrenList().isEmpty());
57

  
58
		log.info("byte[] size: " + oaf.toByteArray().length);
59

  
60
		log.info("json size:   " + JsonFormat.printToString(oaf).length());
61

  
62
		log.info("base64 size: " + Base64.encodeBase64String(oaf.toByteArray()).length());
63

  
64
		final byte[] decodeBase64 = Base64.decodeBase64(Base64.encodeBase64String(oaf.toByteArray()));
65

  
66
		log.info("decoded: " + JsonFormat.printToString(Oaf.parseFrom(decodeBase64)));
67

  
68
		final SolrInputDocument doc = mapper.map(oaf, InputDocumentFactory.getParsedDateField("2015-02-15"), "asd", "action-set");
69

  
70
		assertNotNull(doc);
71

  
72
		for (final SolrInputField f : doc.values()) {
73
			log.info(f);
74
		}
75
	}
76
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/organization.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "organization", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "organization" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {		
14
  		"conditions" : [ ],		
15
		"model" : [
16
			{ "name" : "legalname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.6", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value" },
17
			{ "name" : "legalshortname", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.4", "ignoreMissing" : "true", "path" : "organization/metadata/legalshortname/value" }
18
		],
19
		"blacklists" : { } 		
20
	}
21
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/model/gt.author.manghi1.json
1
{"id":"30|dedup_wf_001::e78059705c3885a10440c8021afbdd4a","author":{"frequency":6,"id":"30|od______2294::9897283f935d7c2f4c11cebf65ae3098","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},"merged":[{"id":"30|od______2294::9897283f935d7c2f4c11cebf65ae3098","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},{"id":"30|doaj10829873::6cff63f7eafbb51fd6a1c268f4f0227f","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},{"id":"30|od______2367::9897283f935d7c2f4c11cebf65ae3098","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},{"id":"30|doaj10829873::f67c45f3bd4e4e4c4942a373799e0457","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},{"id":"30|doaj10829873::39a29e7c15e04cdef8aecaa062b4f000","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},{"id":"30|doaj10829873::f9b4de41343d956271d88f93ba68c31d","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"}],"coAuthors":[{"anchorId":"30|dedup_wf_001::73aac4e0ca21747def81773cdf242152","id":"30|od______2367::069aab4b3defa55d5fb573838e54ed10","fullname":"Werf, Titia","firstname":"Titia","secondnames":"Werf"},{"anchorId":"30|dedup_wf_001::cd493693f2e04159d419cfbea6042944","id":"30|doaj10829873::0acf7cdc45040bd02482b94760af9db5","fullname":"Smith, Tim","firstname":"Tim","secondnames":"Smith"},{"anchorId":"30|dedup_wf_001::4867c41ae4006a89a4e00fa5b2ace526","id":"30|od______2294::717184cb6436add31e3ce0ba86ba2476","fullname":"Katifori, Akrivi","firstname":"Akrivi","secondnames":"Katifori"},{"anchorId":"30|dedup_wf_001::d17df318b8ab8b17ac2b016767e183b4","id":"30|od______2367::d24f772460583eb514d0465f77f15030","fullname":"Rettberg, Najla","firstname":"Najla","secondnames":"Rettberg"},{"anchorId":"30|dedup_wf_001::ce0c9316f1d270fe7c021ac010378189","id":"30|od______2367::ef2914f870d83e32764f2e0db122d0c4","fullname":"Houssos, Nikos","firstname":"Nikos","secondnames":"Houssos"},{"anchorId":"30|dedup_wf_001::ce0c9316f1d270fe7c021ac010378189","id":"30|od______2294::ef2914f870d83e32764f2e0db122d0c4","fullname":"Houssos, Nikos","firstname":"Nikos","secondnames":"Houssos"},{"anchorId":"30|dedup_wf_001::271f098792b54069a4d77b4fee16b354","id":"30|od______2294::217bb36de851aacdeae92250959e5af9","fullname":"Schmidt, Birgit","firstname":"Birgit","secondnames":"Schmidt"},{"anchorId":"30|dedup_wf_001::19651c25c06f8ddbf8fb582efa51bb10","id":"30|od______2367::c48a8036130fc6f9a73348ba3be40041","fullname":"Biagini, Federico","firstname":"Federico","secondnames":"Biagini"},{"anchorId":"30|dedup_wf_001::49e6185992465c4473a19d74aaf0c774","id":"30|od______2367::44afefb40235eadaa9424f735dc569ad","fullname":"Schirrwagen, Jochen","firstname":"Jochen","secondnames":"Schirrwagen"},{"anchorId":"30|dedup_wf_001::21bff9fc229ffae537509689a3eb2bd1","id":"30|doaj10829873::269dbf736b973d298160217072f127bd","fullname":"Candela, Leonardo","firstname":"Leonardo","secondnames":"Candela"},{"anchorId":"30|dedup_wf_001::84b80c9fc38b022ae4e4a5c25ffa7999","id":"30|od______2367::a93eb6a6cbfce5bcdf86cb743f788077","fullname":"Castelli, Donatella","firstname":"Donatella","secondnames":"Castelli"},{"anchorId":"30|dedup_wf_001::731b5090d3f9c6dac58ad770b1a29e13","id":"30|doaj10829873::c6cc48d8aa310a1d2c87f5df7c5576bb","fullname":"Bardi, Alessia","firstname":"Alessia","secondnames":"Bardi"},{"anchorId":"30|dedup_wf_001::5a835806fb58a8199640d0b54de6b70a","id":"30|od______2367::9c743b25e78b06608d4214ba18f92690","fullname":"Bolikowski, Lukasz","firstname":"Lukasz","secondnames":"Bolikowski"},{"anchorId":"30|dedup_wf_001::d71c73319eda487f9b56e860e2c1794f","id":"30|od______2367::c11bdd64efcd19316ef10c1967392565","fullname":"Peters, Dale","firstname":"Dale","secondnames":"Peters"},{"anchorId":"30|dedup_wf_001::2c561a4cbd689f4729324744fb046995","id":"30|doaj10829873::80724b7b737d6c51b3733eaf9da4b605","fullname":"Mikulicic, Marko","firstname":"Marko","secondnames":"Mikulicic"},{"anchorId":"30|dedup_wf_001::cd493693f2e04159d419cfbea6042944","id":"30|od______2367::b340050a287b2601d330547261fe82bc","fullname":"Smith, Tim","firstname":"Tim","secondnames":"Smith"},{"anchorId":"30|dedup_wf_001::49e6185992465c4473a19d74aaf0c774","id":"30|od______2294::44afefb40235eadaa9424f735dc569ad","fullname":"Schirrwagen, Jochen","firstname":"Jochen","secondnames":"Schirrwagen"},{"anchorId":"30|dedup_wf_001::f41b3d7b95be894f9bd7a30b08f50313","id":"30|od______2294::45f6127ede0d24d16a5007ceff0a2d4b","fullname":"Horstmann, Wolfram","firstname":"Wolfram","secondnames":"Horstmann"},{"anchorId":"30|dedup_wf_001::75d72e914bd4d88674bd2a99a01c9f19","id":"30|od______2367::08fc98b1a1e2e2de97e3ecdd1b8174d0","fullname":"Pagano, Pasquale","firstname":"Pasquale","secondnames":"Pagano"},{"anchorId":"30|dedup_wf_001::30530ec3554a58bf9eaa0c497b9c83e5","id":"30|od______2367::c68257b7b29d6c1b372856f371e3ba24","fullname":"Manola, Natalia","firstname":"Natalia","secondnames":"Manola"},{"anchorId":"30|dedup_wf_001::21bff9fc229ffae537509689a3eb2bd1","id":"30|od______2367::8fd116733ee055e8bee96e422b4142ca","fullname":"Candela, Leonardo","firstname":"Leonardo","secondnames":"Candela"},{"anchorId":"30|dedup_wf_001::4867c41ae4006a89a4e00fa5b2ace526","id":"30|od______2367::717184cb6436add31e3ce0ba86ba2476","fullname":"Katifori, Akrivi","firstname":"Akrivi","secondnames":"Katifori"},{"anchorId":"30|dedup_wf_001::2c561a4cbd689f4729324744fb046995","id":"30|od______2367::fe0e336ba013e433486e92b8e30435c1","fullname":"Mikulicic, Marko","firstname":"Marko","secondnames":"Mikulicic"},{"anchorId":"30|dedup_wf_001::21bff9fc229ffae537509689a3eb2bd1","id":"30|od______2294::8fd116733ee055e8bee96e422b4142ca","fullname":"Candela, Leonardo","firstname":"Leonardo","secondnames":"Candela"},{"anchorId":"30|dedup_wf_001::ef5fbc9697bc7bf23f9cedc92f855466","id":"30|od______2367::7aab6e55da61a2f412b9764e3554154a","fullname":"Assante, Massimiliano","firstname":"Massimiliano","secondnames":"Assante"},{"anchorId":"30|dedup_wf_001::49e6185992465c4473a19d74aaf0c774","id":"30|doaj10829873::6bb80887219a5ea64915d84d373e3eb5","fullname":"Schirrwagen, Jochen","firstname":"Jochen","secondnames":"Schirrwagen"},{"anchorId":"30|dedup_wf_001::ca57380f88fb4920d67e024c0af0c715","id":"30|od______2294::2ca703acc4dd66064135c1afc5e976b8","fullname":"Horst, Marek","firstname":"Marek","secondnames":"Horst"},{"anchorId":"30|dedup_wf_001::2317e62318581e89976523d2553977e2","id":"30|od______2367::545b78ed508339ab728beee05f312db1","fullname":"Zoppi, Franco","firstname":"Franco","secondnames":"Zoppi"},{"anchorId":"30|dedup_wf_001::152c124f2f79b57d16500a3b8a63b93c","id":"30|od______2367::ee0556ce5974761280b8be0812a586b3","fullname":"Bolikowski, Å<U+0081>ukasz","firstname":"Å<U+0081>ukasz","secondnames":"Bolikowski"},{"anchorId":"30|dedup_wf_001::84b80c9fc38b022ae4e4a5c25ffa7999","id":"30|doaj10829873::a3c59fe161823ef7fadbd1bb216c34bb","fullname":"Castelli, Donatella","firstname":"Donatella","secondnames":"Castelli"},{"anchorId":"30|dedup_wf_001::731b5090d3f9c6dac58ad770b1a29e13","id":"30|od______2367::3fbd0b3533bd0e6089c1fe68b115f772","fullname":"Bardi, Alessia","firstname":"Alessia","secondnames":"Bardi"},{"anchorId":"30|dedup_wf_001::75d72e914bd4d88674bd2a99a01c9f19","id":"30|doaj10829873::9e7037bbf8b59564442aa12b6b424747","fullname":"Pagano, Pasquale","firstname":"Pasquale","secondnames":"Pagano"},{"anchorId":"30|dedup_wf_001::eacced7c9132e343948bf51698a1c927","id":"30|od______2367::22272e3e38161be671abad3f27c68e36","fullname":"Mikulic, Marko","firstname":"Marko","secondnames":"Mikulic"},{"anchorId":"30|dedup_wf_001::30530ec3554a58bf9eaa0c497b9c83e5","id":"30|od______2294::c68257b7b29d6c1b372856f371e3ba24","fullname":"Manola, Natalia","firstname":"Natalia","secondnames":"Manola"},{"anchorId":"30|dedup_wf_001::f41b3d7b95be894f9bd7a30b08f50313","id":"30|od______2367::45f6127ede0d24d16a5007ceff0a2d4b","fullname":"Horstmann, Wolfram","firstname":"Wolfram","secondnames":"Horstmann"},{"anchorId":"30|dedup_wf_001::ca57380f88fb4920d67e024c0af0c715","id":"30|od______2367::2ca703acc4dd66064135c1afc5e976b8","fullname":"Horst, Marek","firstname":"Marek","secondnames":"Horst"},{"anchorId":"30|dedup_wf_001::7b1fe51141418a62b56f9c3bd5bccaa7","id":"30|od______2367::2638f44b18089930e67bbd5633f9b103","fullname":"Debole, Franca","firstname":"Franca","secondnames":"Debole"},{"anchorId":"30|dedup_wf_001::d17df318b8ab8b17ac2b016767e183b4","id":"30|od______2294::d24f772460583eb514d0465f77f15030","fullname":"Rettberg, Najla","firstname":"Najla","secondnames":"Rettberg"},{"anchorId":"30|dedup_wf_001::75d72e914bd4d88674bd2a99a01c9f19","id":"30|doaj10829873::b3795485a89b87aabeff16b81f2ff24d","fullname":"Pagano, Pasquale","firstname":"Pasquale","secondnames":"Pagano"},{"anchorId":"30|dedup_wf_001::d71c73319eda487f9b56e860e2c1794f","id":"30|od______2294::c11bdd64efcd19316ef10c1967392565","fullname":"Peters, Dale","firstname":"Dale","secondnames":"Peters"},{"anchorId":"30|dedup_wf_001::9bfaee15b25e210ab56dfa6f57c6417e","id":"30|od______2367::81d934153eb100552ef4b398e243dd45","fullname":"La Bruzzo, Sandro","firstname":"Sandro","secondnames":"La Bruzzo"},{"anchorId":"30|dedup_wf_001::5a835806fb58a8199640d0b54de6b70a","id":"30|doaj10829873::2abbf092ab99aaaf0a52969141fc0070","fullname":"Bolikowski, Lukasz","firstname":"Lukasz","secondnames":"Bolikowski"},{"anchorId":"30|dedup_wf_001::cd493693f2e04159d419cfbea6042944","id":"30|od______2294::b340050a287b2601d330547261fe82bc","fullname":"Smith, Tim","firstname":"Tim","secondnames":"Smith"},{"anchorId":"30|dedup_wf_001::2d413333206599ee864738c521dbe5b3","id":"30|od______2367::39c67e010b85a6a79e54e94b2a789221","fullname":"Savino, Pasquale","firstname":"Pasquale","secondnames":"Savino"},{"anchorId":"30|dedup_wf_001::21bff9fc229ffae537509689a3eb2bd1","id":"30|doaj10829873::515b938f185f85e1a91849337310c4b7","fullname":"Candela, Leonardo","firstname":"Leonardo","secondnames":"Candela"},{"anchorId":"30|dedup_wf_001::c4d99fedf82781b5cae2225cf8049426","id":"30|od______2294::e931b1326351ef5b3bb2b89f3fc4244c","fullname":"Kobos, Mateusz","firstname":"Mateusz","secondnames":"Kobos"},{"anchorId":"30|dedup_wf_001::30530ec3554a58bf9eaa0c497b9c83e5","id":"30|doaj10829873::d4df586c008bca6df713f84070d3e1c3","fullname":"Manola, Natalia","firstname":"Natalia","secondnames":"Manola"},{"anchorId":"30|dedup_wf_001::152c124f2f79b57d16500a3b8a63b93c","id":"30|od______2294::029cfdbbd0fcb888f4efbfe1dde81721","fullname":"Bolikowski, Łukasz","firstname":"Łukasz","secondnames":"Bolikowski"},{"anchorId":"30|dedup_wf_001::7b9bd8e96778de90c911e0fcc8821bcc","id":"30|od______2367::c96e9db5145834320c37332ef96802f6","fullname":"Artini, Michele","firstname":"Michele","secondnames":"Artini"},{"anchorId":"30|dedup_wf_001::5a835806fb58a8199640d0b54de6b70a","id":"30|od______2294::9c743b25e78b06608d4214ba18f92690","fullname":"Bolikowski, Lukasz","firstname":"Lukasz","secondnames":"Bolikowski"},{"anchorId":"30|dedup_wf_001::84b80c9fc38b022ae4e4a5c25ffa7999","id":"30|doaj10829873::347ee2414e420e4aec5395146956d8c0","fullname":"Castelli, Donatella","firstname":"Donatella","secondnames":"Castelli"},{"anchorId":"30|dedup_wf_001::5850f94defb717ab4e7be63e6d4d2d12","id":"30|od______2367::de34cde81e9e864703f58d615ea0cf69","fullname":"Siebinga, Sjoerd","firstname":"Sjoerd","secondnames":"Siebinga"},{"anchorId":"30|dedup_wf_001::271f098792b54069a4d77b4fee16b354","id":"30|od______2367::217bb36de851aacdeae92250959e5af9","fullname":"Schmidt, Birgit","firstname":"Birgit","secondnames":"Schmidt"},{"anchorId":"30|dedup_wf_001::ca57ee88d3a6d5e2908b09c5aae07f97","id":"30|od______2367::ddd745ebf411904db3cd353539f6e079","fullname":"Mieldijk, Mario","firstname":"Mario","secondnames":"Mieldijk"},{"anchorId":"30|dedup_wf_001::c4d99fedf82781b5cae2225cf8049426","id":"30|od______2367::e931b1326351ef5b3bb2b89f3fc4244c","fullname":"Kobos, Mateusz","firstname":"Mateusz","secondnames":"Kobos"}],"anchor":true}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/model/gt.author.manghi2.json
1
{"id":"30|dedup_wf_001::9044d2ebdd6112d84e5ff279ae3d0a26","author":{"frequency":2,"id":"30|od______1457::9897283f935d7c2f4c11cebf65ae3098","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},"merged":[{"id":"30|od______1457::9897283f935d7c2f4c11cebf65ae3098","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"},{"score":1.0,"id":"30|od_______307::9897283f935d7c2f4c11cebf65ae3098","fullname":"Manghi, Paolo","firstname":"Paolo","secondnames":"Manghi"}],"coAuthors":[{"anchorId":"30|dedup_wf_001::179d5e120c57d760e8847dedfa7bfb9b","id":"30|od_______307::e0c601b1613eec62abb99e2591168fba","fullname":"Rodrigues, Eloy","firstname":"Eloy","secondnames":"Rodrigues"},{"anchorId":"30|dedup_wf_001::951cb66881f278b74cb366add7f0ce0d","id":"30|od_______307::ef2914f870d83e32764f2e0db122d0c4","fullname":"Houssos, Nikos","firstname":"Nikos","secondnames":"Houssos"},{"anchorId":"30|dedup_wf_001::958d36ee987d35f7d0a4f74a7ad10680","id":"30|od______1457::980daed35d7008f6f4de325f235fde65","fullname":"Príncipe, Pedro","firstname":"Pedro","secondnames":"Príncipe"},{"anchorId":"30|dedup_wf_001::4fe9f7dd4a96f983dcc8f17436238ba6","id":"30|od_______307::ae2f5173f1f1745e8521107ffd9e37ae","fullname":"Dvořák, Jan","firstname":"Jan","secondnames":"Dvořák"},{"anchorId":"30|dedup_wf_001::d09e46b26192d335be7aef7ddcbc9c0c","id":"30|od_______307::56bbbdcfbdda05ec257fabc0c7a596cc","fullname":"Elbæk, Mikael Karstensen","firstname":"Mikael Karstensen","secondnames":"Elbæk"},{"anchorId":"30|dedup_wf_001::d09e46b26192d335be7aef7ddcbc9c0c","id":"30|od______1457::56bbbdcfbdda05ec257fabc0c7a596cc","fullname":"Elbæk, Mikael Karstensen","firstname":"Mikael Karstensen","secondnames":"Elbæk"},{"anchorId":"30|dedup_wf_001::958d36ee987d35f7d0a4f74a7ad10680","id":"30|od_______307::980daed35d7008f6f4de325f235fde65","fullname":"Príncipe, Pedro","firstname":"Pedro","secondnames":"Príncipe"},{"anchorId":"30|dedup_wf_001::29611ace913f53e57c9201010b24434b","id":"30|od______1457::99d2bbd6eb21de2603f2d355875fe7b2","fullname":"Jörg, Brigitte","firstname":"Brigitte","secondnames":"Jörg"},{"anchorId":"30|dedup_wf_001::951cb66881f278b74cb366add7f0ce0d","id":"30|od______1457::ef2914f870d83e32764f2e0db122d0c4","fullname":"Houssos, Nikos","firstname":"Nikos","secondnames":"Houssos"},{"anchorId":"30|dedup_wf_001::29611ace913f53e57c9201010b24434b","id":"30|od_______307::99d2bbd6eb21de2603f2d355875fe7b2","fullname":"Jörg, Brigitte","firstname":"Brigitte","secondnames":"Jörg"},{"anchorId":"30|dedup_wf_001::179d5e120c57d760e8847dedfa7bfb9b","id":"30|od______1457::e0c601b1613eec62abb99e2591168fba","fullname":"Rodrigues, Eloy","firstname":"Eloy","secondnames":"Rodrigues"},{"anchorId":"30|dedup_wf_001::4fe9f7dd4a96f983dcc8f17436238ba6","id":"30|od______1457::ae2f5173f1f1745e8521107ffd9e37ae","fullname":"Dvořák, Jan","firstname":"Jan","secondnames":"Dvořák"}],"anchor":true}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/person.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "person", 
6
        "orderField" : "person", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "person" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {
14
	  		"conditions" : [
15
  		 ],		
16
		"model" : [	
17
			{ "name" : "person", "algo" : "PersonDistance", "type" : "JSON", "weight" : "1.0", "ignoreMissing" : "false", "path" : "person" }
18
		],
19
		"blacklists" : { } 		
20
	}
21

  
22
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/result.simple.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {		
14
  		"conditions" : [ ],		
15
		"model" : [
16
			{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" }
17
		],
18
		"blacklists" : { } 		
19
	}
20

  
21
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/result.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {
14
		"strictConditions" : [
15
  			{ "name" : "doiExactMatch", "fields" : [ "pid" ] }
16
  		],	
17
  		"conditions" : [
18
  			{ "name" : "yearMatch", "fields" : [ "dateofacceptance" ] },
19
  			{ "name" : "titleVersionMatch", "fields" : [ "title" ] }		
20
  		 ],		
21
		"model" : [
22
			{ "name" : "pid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value", "overrideMatch" : "true" }, 	
23
			{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
24
			{ "name" : "dateofacceptance", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/dateofacceptance/value" }
25
		],
26
		"blacklists" : { } 		
27
	}
28

  
29
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/result.full.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {		
14
		"clustering" : [
15
			{ "name" : "acronyms", "fields" : [ "title" ], "params" : { "max" : "1", "minLen" : "2", "maxLen" : "4"} },
16
			{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
17
			{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } } 
18
		],		
19
		"strictConditions" : [
20
  			{ "name" : "doiExactMatch", "fields" : [ "pid" ] }
21
  		], 
22
  		"conditions" : [ 
23
  			{ "name" : "yearMatch", "fields" : [ "dateofacceptance" ] },
24
  			{ "name" : "titleVersionMatch", "fields" : [ "title" ] },
25
  			{ "name" : "sizeMatch", "fields" : [ "authors" ] } 
26
  		],		
27
		"model" : [
28
			{ "name" : "pid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {doi}]/value", "overrideMatch" : "true" }, 	
29
			{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
30
			{ "name" : "dateofacceptance", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/metadata/dateofacceptance/value" } ,
31
			{ "name" : "authors", "algo" : "Null", "type" : "List", "weight" : "0.0", "ignoreMissing" : "true", "path" : "result/author/metadata/fullname/value" } 		
32
		],
33
		"blacklists" : {
34
			"title" : [
35
				"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
36
				"^(Kiri Karl Morgensternile).*$",
37
				"^(\\[Eksliibris Aleksandr).*\\]$",
38
				"^(\\[Eksliibris Aleksandr).*$",
39
				"^(Eksliibris Aleksandr).*$",
40
				"^(Kiri A\\. de Vignolles).*$",
41
				"^(2 kirja Karl Morgensternile).*$",
42
				"^(Pirita kloostri idaosa arheoloogilised).*$",
43
				"^(Kiri tundmatule).*$",
44
				"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
45
				"^(Eksliibris Nikolai Birukovile).*$",
46
				"^(Eksliibris Nikolai Issakovile).*$",
47
				"^(WHP Cruise Summary Information of section).*$",
48
				"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
49
				"^(Measurement of the spin\\-dependent structure function).*"
50
			] } 		
51
	}
52

  
53
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/result.prod.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {
14
		"clustering" : [
15
			{ "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} },
16
			{ "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } } 
17
		],	
18
  		"conditions" : [ 
19
  			{ "name" : "titleVersionMatch", "fields" : [ "title" ] }
20
  		],		
21
		"model" : [
22
			{ "name" : "title", "algo" : "LevensteinTitle", "type" : "String", "weight" : "1.0", "ignoreMissing" : "false", "path" : "result/metadata/title/value" }
23
		],
24
		"blacklists" : {
25
			"title" : [
26
				"(?i)^Cases? of Puerperal Convulsions$",
27
				"(?i)^Operative Gyna?ecology$",
28
				"(?i)^Mind the gap\!?\:?$",
29
				"^Chronic fatigue syndrome\.?$",
30
				"^Cartas? ao editor Letters? to the Editor$",
31
				"^Note from the Editor$",
32
				"^Anesthesia Abstract$",
33
				
34
				"^Annual report$",
35
				"(?i)^“?THE RADICAL PREVENTION OF VENEREAL DISEASE\.?”?$",
36
				"(?i)^Graph and Table of Infectious Diseases?$",
37
				"^Presentation$",
38
				"(?i)^Reviews and Information on Publications$",
39
				"(?i)^PUBLIC HEALTH SERVICES?$",
40
				"(?i)^COMBINED TEXT-?BOOK OF OBSTETRICS AND GYN(Æ|ae)COLOGY$",
41
				"(?i)^Adrese autora$",
42
				"(?i)^Systematic Part .*\. Catalogus Fossilium Austriae, Band 2: Echinoidea neogenica$",
43
				"(?i)^Acknowledgement to Referees$",
44
				"(?i)^Behçet's disease\.?$",
45
				"(?i)^Isolation and identification of restriction endonuclease.*$",
46
				"(?i)^CEREBROVASCULAR DISEASES?.?$",
47
				"(?i)^Screening for abdominal aortic aneurysms?\.?$",
48
				"^Event management$",
49
				"(?i)^Breakfast and Crohn's disease.*\.?$",
50
				"^Cálculo de concentraciones en disoluciones acuosas. Ejercicio interactivo\..*\.$",
51
				"(?i)^Genetic and functional analyses of SHANK2 mutations suggest a multiple hit model of Autism spectrum disorders?\.?$",
52
				"^Gushi hakubutsugaku$",
53

  
54
				"^Starobosanski nadpisi u Bosni i Hercegovini \(.*\)$",							
55
				"^Intestinal spirocha?etosis$",
56
				"^Treatment of Rodent Ulcer$",
57
				"(?i)^\W*Cloud Computing\W*$",
58
				"^Compendio mathematico : en que se contienen todas las materias mas principales de las Ciencias que tratan de la cantidad$",				
59
				"^Free Communications, Poster Presentations: Session [A-F]$",
60
				
61
				"^“The Historical Aspects? of Quackery\.?”$",
62
				"^A designated centre for people with disabilities operated by St John of God Community Services (Limited|Ltd), Louth$",
63
				"^P(er|re)-Mile Premiums for Auto Insurance\\.?$",
64
				"(?i)^Case Report$",							
65
				"^Boletín Informativo$",
66
				"(?i)^Glioblastoma Multiforme$",
67
				"(?i)^Nuevos táxones animales descritos en la península Ibérica y Macaronesia desde 1994 \\(.*\\)$",
68
				"^Zaměstnanecké výhody$",
69
				"(?i)^The Economics of Terrorism and Counter-Terrorism: A Survey \\(Part .*\\)$",
70
				"(?i)^Carotid body tumours?\\.?$", 
71
				"(?i)^\\[Españoles en Francia : La condición Emigrante.*\\]$",
72
				"^Avant-propos$",
73
				"(?i)^St\. Patrick's Cathedral, Dublin, County Dublin - Head(s)? and Capital(s)?$",
74
				"(?i)^St\. Patrick's Cathedral, Dublin, County Dublin - Bases?$",
75
				"(?i)^PUBLIC HEALTH VERSUS THE STATE$",							
76
				"^Viñetas de Cortázar$",
77
				"(?i)^Search for heavy neutrinos and W(\\[|_|\\(|_\\{|-)?R(\\]|\\)|\\})? bosons with right-handed couplings in a left-right symmetric model in pp collisions at.*TeV(\.)?$",
78
				"(?i)^Measurement of the pseudorapidity and centrality dependence of the transverse energy density in Pb(-?)Pb collisions at.*tev(\.?)$",				
79
				"(?i)^Search for resonances decaying into top-quark pairs using fully hadronic decays in pp collisions with ATLAS at.*TeV$",
80
				"(?i)^Search for neutral minimal supersymmetric standard model Higgs bosons decaying to tau pairs in pp collisions at.*tev$",
81

  
82
				"(?i)^Relatório de Estágio (de|em) Angiologia e Cirurgia Vascular$",
83
				"^Aus der AGMB$",				
84

  
85
				"^Znanstveno-stručni prilozi$",
86
				"^Zhodnocení finanční situace podniku a návrhy na zlepšení$",
87
				"^Evaluation of the Financial Situation in the Firm and Proposals to its Improvement$",
88
				"^Finanční analýza podniku$",
89
				"^Financial analysis( of business)?$",
90
				"(?i)^Textbook of Gyn(a)?(Æ)?(e)?cology$",
91
				"^Jikken nihon shūshinsho$",
92
				"(?i)^CORONER('|s)(s|') INQUESTS$",
93
				"(?i)^(Μελέτη παραγόντων )?risk management( για ανάπτυξη και εφαρμογή ενός πληροφοριακού συστήματος| και ανάπτυξη συστήματος)?$",				
94
				"(?i)^Consultants' contract(s)?$",
95
				"(?i)^Upute autorima$",
96
				"(?i)^Bijdrage tot de Kennis van den Godsdienst der Dajaks van Lan(d|f)ak en Tajan$",
97
				"^Joshi shin kokubun$",
98
				"^Kōtō shōgaku dokuhon nōson'yō$",
99
				"^Jinjō shōgaku shōka$",
100
				"^Shōgaku shūjichō$",
101
				"^Nihon joshi dokuhon$",
102
				"^Joshi shin dokuhon$",
103
				"^Chūtō kanbun dokuhon$",
104
				"^Wabun dokuhon$",
105
				"(?i)^(Analysis of economy selected village or town|Rozbor hospodaření vybrané obce či města)$",
106
				"(?i)^cardiac rehabilitation$",
107
				"(?i)^Analytical summary$",
108
				"^Thesaurus resolutionum Sacrae Congregationis Concilii$",
109
				"(?i)^Sumario analítico(\\s{1})?(Analitic summary)?$", 
110
				"^Prikazi i osvrti$",
111
				"^Rodinný dům s provozovnou$",
112
				"^Family house with an establishment$",
113
				"^Shinsei chūtō shin kokugun$",
114
				"^Pulmonary alveolar proteinosis(\\.?)$",
115
				"^Shinshū kanbun$",
116
				"^Viñeta(s?) de Rodríguez$",
117
				"(?i)^RUBRIKA UREDNIKA$",
118
				"^A Matching Model of the Academic Publication Market$",
119
				"^Yōgaku kōyō$",
120

  
121
				"^Internetový marketing$",
122
				"^Internet marketing$",
123
				"^Chūtō kokugo dokuhon$",
124
				"^Kokugo dokuhon$",
125
				"^Antibiotic Cover for Dental Extraction(s?)$",
126
				"^Strategie podniku$",				
127
				"^Strategy of an Enterprise$",
128
				"(?i)^respiratory disease(s?)(\.?)$",
129
				"^Award(s?) for Gallantry in Civil Defence$",
130
				"^Podniková kultura$",
131
				"^Corporate Culture$",
132
				"^Severe hyponatraemia in hospital inpatient(s?)(\.?)$",
133
				"^Pracovní motivace$",
134
				"^Work Motivation$",
135
				"^Kaitei kōtō jogaku dokuhon$",
136
				"^Konsolidovaná účetní závěrka$",
137
				"^Consolidated Financial Statements$",
138
				"(?i)^intracranial tumour(s?)$",
139
				"^Climate Change Mitigation Options and Directed Technical Change: A Decentralized Equilibrium Analysis$",
140
				"^\\[CERVECERIAS MAHOU(\\.|\\:) INTERIOR\\] \\[Material gráfico\\]$",
141
				"^Housing Market Dynamics(\\:|\\.) On the Contribution of Income Shocks and Credit Constraint(s?)$",
142
				"^\\[Funciones auxiliares de la música en Radio París,.*\\]$",
143
				"^Úroveň motivačního procesu jako způsobu vedení lidí$",
144
				"^The level of motivation process as a leadership$",
145
				"^Pay-beds in N(\\.?)H(\\.?)S(\\.?) Hospitals$",
146
				"(?i)^news and events$",
147
				"(?i)^NOVOSTI I DOGAĐAJI$",
148
				"^Sansū no gakushū$",
149
				"^Posouzení informačního systému firmy a návrh změn$",
150
				"^Information System Assessment and Proposal for ICT Modification$",
151
				"^Stresové zatížení pracovníků ve vybrané profesi$",
152
				"^Stress load in a specific job$",
153
				
154
				"^Sunday: Poster Sessions, Pt.*$",
155
				"^Monday: Poster Sessions, Pt.*$",
156
				"^Wednesday: Poster Sessions, Pt.*",
157
				"^Tuesday: Poster Sessions, Pt.*$",
158
				
159
				"^Analýza reklamy$",
160
				"^Analysis of advertising$",
161

  
162
				"^Shōgaku shūshinsho$",
163
				"^Shōgaku sansū$",
164
				"^Shintei joshi kokubun$",
165
				"^Taishō joshi kokubun dokuhon$",
166
				"^Joshi kokubun$",				
167
												
168
				"^Účetní uzávěrka a účetní závěrka v ČR$",
169
				"(?i)^The \"?Causes\"? of Cancer$",
170
				"^Normas para la publicación de artículos$",
171
				"^Editor('|s)(s|') [Rr]eply$",
172
				"^Editor(’|s)(s|’) letter$",
173
				"^Redaktoriaus žodis$",		
174
				"^DISCUSSION ON THE PRECEDING PAPER$",
175
				"^Kōtō shōgaku shūshinsho jidōyō$",
176
				"^Shōgaku nihon rekishi$",
177
				"^(Theory of the flow of action currents in isolated myelinated nerve fibers).*$",
178
				"^Préface$",
179
				"^Occupational [Hh]ealth [Ss]ervices.$",
180
				"^In Memoriam Professor Toshiyuki TAKESHIMA$",
181
				"^Účetní závěrka ve vybraném podniku.*$",
182
				"^Financial statements in selected company$",
183
				"^Abdominal [Aa]ortic [Aa]neurysms.*$",
184
				"^Pseudomyxoma peritonei$",
185
				"^Kazalo autora$",			
186
			
187
				"(?i)^uvodna riječ$",
188
				"^Motivace jako způsob vedení lidí$",
189
				"^Motivation as a leadership$",
190
				"^Polyfunkční dům$",
191
				"^Multi\\-funkcional building$",
192
				"^Podnikatelský plán$",
193
				"^Business Plan$",
194
				"^Oceňování nemovitostí$",
195
				"^Marketingová komunikace$",
196
				"^Marketing communication$",
197
				"^Sumario Analítico$",
198
				"^Riječ uredništva$",
199
				"^Savjetovanja i priredbe$",
200
				"^Índice$",
201
				"^(Starobosanski nadpisi).*$",
202
				"^Vzdělávání pracovníků v organizaci$",
203
				"^Staff training in organization$",
204
				"^(Life Histories of North American Geometridae).*$",
205
				"^Strategická analýza podniku$",
206
				"^Strategic Analysis of an Enterprise$",
207
				"^Sadržaj$",
208
				"^Upute suradnicima$",
209
				"^Rodinný dům$",
210
				"(?i)^Fami(l)?ly house$",
211
				"^Upute autorima$",
212
				"^Strategic Analysis$",
213
				"^Finanční analýza vybraného podniku$",
214
				"^Finanční analýza$",
215
				"^Riječ urednika$",
216
				"(?i)^Content(s?)$",
217
				"(?i)^Inhalt$",
218
				"^Jinjō shōgaku shūshinsho jidōyō$",
219
				"(?i)^Index$",
220
				"^Chūgaku kokubun kyōkasho$",
221
				"^Retrato de una mujer$",
222
				"^Retrato de un hombre$",
223
				"^Kōtō shōgaku dokuhon$",
224
				"^Shotōka kokugo$",
225
				"^Shōgaku dokuhon$",
226
				"^Jinjō shōgaku kokugo dokuhon$",
227
				"^Shinsei kokugo dokuhon$",
228
				"^Teikoku dokuhon$",
229
				"^Instructions to Authors$",
230
				"^KİTAP TAHLİLİ$",
231
				"^PRZEGLĄD PIŚMIENNICTWA$",
232
				"(?i)^Presentación$",
233
				"^İçindekiler$",
234
				"(?i)^Tabl?e of contents$",
235
				"^(CODICE DEL BEATO DE LOS REYES FERNANDO I Y SANCHA).*$",
236
				"^(\\[MADRID\\. BIBL\\. NAC\\. N.*KING FERDINAND I.*FROM SAN ISIDORO DE LEON\\. FACUNDUS SCRIPSIT DATED.*\\]).*",
237
				"^Editorial( Board)?$",
238
				"(?i)^Editorial \\(English\\)$",
239
				"^Editörden$",			
240
				"^(Corpus Oral Dialectal \\(COD\\)\\.).*$",
241
				"^(Kiri Karl Morgensternile).*$",
242
				"^(\\[Eksliibris Aleksandr).*\\]$",
243
				"^(\\[Eksliibris Aleksandr).*$",
244
				"^(Eksliibris Aleksandr).*$",
245
				"^(Kiri A\\. de Vignolles).*$",
246
				"^(2 kirja Karl Morgensternile).*$",
247
				"^(Pirita kloostri idaosa arheoloogilised).*$",
248
				"^(Kiri tundmatule).*$",
249
				"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$",
250
				"^(Eksliibris Nikolai Birukovile).*$",
251
				"^(Eksliibris Nikolai Issakovile).*$",
252
				"^(WHP Cruise Summary Information of section).*$",
253
				"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$",
254
				"^(Measurement of the spin\\-dependent structure function).*"
255
			] }	
256
	}
257

  
258
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/pace/result.authors.pace.conf
1
{ 
2
	"wf" : { 
3
        "threshold" : "0.99", 
4
        "run" : "001", 
5
        "entityType" : "result", 
6
        "orderField" : "title", 
7
        "queueMaxSize" : "2000",
8
        "groupMaxSize" : "10",
9
        "slidingWindowSize" : "200",
10
        "rootBuilder" : [ "result" ],
11
        "includeChildren" : "true" 
12
    },
13
	"pace" : {
14
  		"conditions" : [
15
  			{ "name" : "sizeMatch", "fields" : [ "authors" ] },
16
  			{ "name" : "titleVersionMatch", "fields" : [ "title" ] }		
17
  		 ],		
18
		"model" : [	
19
			{ "name" : "title", "algo" : "JaroWinkler", "type" : "String", "weight" : "0.5", "ignoreMissing" : "false", "path" : "result/metadata/title[qualifier#classid = {main title}]/value" },
20
			{ "name" : "authors", "algo" : "SortedLevel2JaroWinkler", "type" : "List", "weight" : "0.5", "ignoreMissing" : "true", "path" : "result/author/metadata/fullname/value" }
21
		],
22
		"blacklists" : { } 		
23
	}
24

  
25
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/data/transform/fields.xml
1
<FIELDS>
2
	<FIELD name="oaftype" path="entity/type" tokenizable="false" indexable="true" stat="false" result="false"/>
3
	<FIELD name="objIdentifier" path="entity/id" tokenizable="false" indexable="true" stat="false" result="false"/>
4
	<FIELD name="collectedfrom" path="entity/collectedfrom/value" tokenizable="false" indexable="true" result="false" stat="false" />
5
	<FIELD name="pid" path="entity/pid[qualifier#classid = {doi}]/value" tokenizable="false" stat="false" result="false" indexable="true"/>
6
	<FIELD name="deletedbyinference" path="dataInfo/deletedbyinference" tokenizable="false" stat="false" result="false" indexable="true"/>        
7
	<FIELD name="inferred" path="dataInfo/inferred" tokenizable="false" stat="false" result="false" indexable="true"/>
8
	
9
	<!-- ORGANIZATION FIELDS -->
10
	<FIELD name="organizationlegalname" path="entity/organization/metadata/legalname/value" type="ngramtext" stat="false" indexable="true" result="false"/>
11
	<FIELD name="organizationlegalshortname" path="entity/organization/metadata/legalshortname/value" type="ngramtext" stat="false" indexable="true" result="false"/>
12
	<FIELD name="organizationwebsiteurl" path="entity/organization/metadata/websiteurl/value" stat="false" indexable="true" result="false"/>
13
	
14
	<!-- PERSON FIELDS -->
15
	<FIELD name="personfirstname" path="entity/person/metadata/firstname/value" stat="false" indexable="true" result="false"/>
16
	<FIELD name="personsecondnames" path="entity/person/metadata/secondnames/value" stat="false" indexable="true" result="false"/>
17
	<FIELD name="personfullname" path="entity/person/metadata/fullname/value" stat="false" indexable="true" result="false"/>
18
	
19
	<!-- RESULT FIELDS -->
20
	<FIELD name="resulttitle" path="entity/result/metadata/title/value | entity/children/result/metadata/title/value" stat="false" result="false" indexable="true"/>
21
	<FIELD name="resultdescription" path="entity/result/metadata/description/value" result="false" indexable="true" stat="false"/>
22
    <FIELD name="resultauthor" path="entity/result/author/metadata/fullname/value | entity/children/result/author/metadata/fullname/value" result="false" indexable="true" stat="false"/>
23

  
24
</FIELDS>
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/resources/eu/dnetlib/data/transform/simpleRecord.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<record>
3
  <metadata>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff