1
|
package eu.dnetlib.pace;
|
2
|
|
3
|
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
|
4
|
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
|
5
|
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
|
6
|
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
|
7
|
import eu.dnetlib.data.proto.ResultProtos.Result;
|
8
|
import eu.dnetlib.pace.config.Config;
|
9
|
import eu.dnetlib.pace.config.DynConf;
|
10
|
import eu.dnetlib.pace.config.Type;
|
11
|
import eu.dnetlib.pace.model.Field;
|
12
|
import eu.dnetlib.pace.model.MapDocument;
|
13
|
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
|
14
|
|
15
|
public abstract class AbstractProtoPaceTest {
|
16
|
|
17
|
protected static String cfg = "pace.conf { " + "clustering { " + "acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, "
|
18
|
+ "ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} }, " + "suffixprefix { fields = [title], params = { max = 1, len = 3 } } "
|
19
|
+ "}, " + "conditions { " + "yearMatch { fields = [dateofacceptance] }, " + "titleVersionMatch { fields = [title] } }," + "model { "
|
20
|
+ "title/value { algo = JaroWinklerTitle, type = String, weight = 1.0, ignoreMissing = false }, "
|
21
|
+ "dateofacceptance/value { algo = Null, type = String, weight = 0.0, ignoreMissing = true } }, " + "blacklists = {" + "title = ["
|
22
|
+ "\"^(Corpus Oral Dialectal \\\\(COD\\\\).).*$\", " + "\"^(Kiri Karl Morgensternile).*$\", " + "\"^(Kiri A. de Vignolles).*$\", "
|
23
|
+ "\"^(\\\\[Eksliibris Aleksandr).*\\\\]$\", " + "\"^(\\\\[Eksliibris Aleksandr).*$\", " + "\"^(Eksliibris Aleksandr).*$\"" + "] }" + "}";
|
24
|
|
25
|
protected Config getOrganizationSimpleConf() {
|
26
|
Config config =
|
27
|
DynConf.load("pace.conf { " + "conditions { }," + "model { "
|
28
|
+ "legalname/value { algo = JaroWinkler, type = String, weight = 0.6, ignoreMissing = false }, "
|
29
|
+ "legalshortname/value { algo = JaroWinkler, type = String, weight = 0.4, ignoreMissing = true } }" + "}");
|
30
|
return config;
|
31
|
}
|
32
|
|
33
|
protected Config getOrganizationConf() {
|
34
|
Config config =
|
35
|
DynConf.load("pace.conf {" + "clustering {" + "ngrampairs { fields = [legalname], params = { max = 1, ngramLen = 3} },"
|
36
|
+ "suffixprefix { fields = [legalname], params = { max = 1, len = 3 } } }, " + "model { "
|
37
|
+ "legalname/value { algo = JaroWinkler, type = String, weight = 0.6, ignoreMissing = false },"
|
38
|
+ "legalshortname/value { algo = JaroWinkler, type = String, weight = 0.4, ignoreMissing = true } } }");
|
39
|
return config;
|
40
|
}
|
41
|
|
42
|
protected Config getResultConf() {
|
43
|
return DynConf.load("pace.conf { " + "conditions { " + "yearMatch { fields = [dateofacceptance] }, " + "titleVersionMatch { fields = [title] } },"
|
44
|
+ "model { " + "title/value { algo = JaroWinklerTitle, type = String, weight = 1.0, ignoreMissing = false }, "
|
45
|
+ "dateofacceptance/value { algo = Null, type = String, weight = 0.0, ignoreMissing = true } } " + "}");
|
46
|
}
|
47
|
|
48
|
protected Config getResultSimpleConf() {
|
49
|
return DynConf.load("pace.conf { " + "conditions { }," + "model { "
|
50
|
+ "title/value { algo = JaroWinkler, type = String, weight = 1.0, ignoreMissing = false } } " + "}");
|
51
|
}
|
52
|
|
53
|
protected Config getResultSimpleConf2() {
|
54
|
return DynConf.load("pace.conf { " + "model { " + "title/value { algo = JaroWinkler, type = String, weight = 1.0, ignoreMissing = false } } " + "}");
|
55
|
}
|
56
|
|
57
|
protected MapDocument result(final Config config, final String id, final String title, final String date) {
|
58
|
Result.Metadata.Builder builder = Result.Metadata.newBuilder();
|
59
|
if (title != null) {
|
60
|
builder.addTitle(getStruct(title, getQualifier("main", "dnet:titles")));
|
61
|
}
|
62
|
if (date != null) {
|
63
|
builder.setDateofacceptance(sf(date));
|
64
|
}
|
65
|
return ProtoDocumentBuilder.newInstance(id, builder.build(), config.fields());
|
66
|
}
|
67
|
|
68
|
protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) {
|
69
|
Organization.Metadata.Builder builder = Organization.Metadata.newBuilder();
|
70
|
if (legalName != null) {
|
71
|
builder.setLegalname(sf(legalName));
|
72
|
}
|
73
|
if (legalShortName != null) {
|
74
|
builder.setLegalshortname(sf(legalShortName));
|
75
|
}
|
76
|
return ProtoDocumentBuilder.newInstance(id, builder.build(), config.fields());
|
77
|
}
|
78
|
|
79
|
protected Field title(final String s) {
|
80
|
return new Field(Type.String, "title", s);
|
81
|
}
|
82
|
|
83
|
protected static StringField.Builder sf(final String s) {
|
84
|
return StringField.newBuilder().setValue(s);
|
85
|
}
|
86
|
|
87
|
protected static Qualifier.Builder getQualifier(final String classname, final String schemename) {
|
88
|
return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename);
|
89
|
}
|
90
|
|
91
|
protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) {
|
92
|
return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
|
93
|
}
|
94
|
|
95
|
}
|