Project

General

Profile

1
package eu.dnetlib.pace;
2

    
3
import java.io.IOException;
4
import java.io.StringWriter;
5
import java.util.ArrayList;
6
import java.util.List;
7

    
8
import com.google.common.collect.Lists;
9
import com.google.gson.Gson;
10
import eu.dnetlib.data.mapreduce.util.DNGFTest;
11
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
12
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
13
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
14
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
15
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder;
16
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
17
import eu.dnetlib.data.proto.PersonProtos.Person;
18
import eu.dnetlib.data.proto.PublicationProtos.Publication;
19
import eu.dnetlib.data.proto.TypeProtos;
20
import eu.dnetlib.pace.config.Config;
21
import eu.dnetlib.pace.config.DedupConfig;
22
import eu.dnetlib.pace.config.Type;
23
import eu.dnetlib.pace.model.Field;
24
import eu.dnetlib.pace.model.FieldValueImpl;
25
import eu.dnetlib.pace.model.MapDocument;
26
import eu.dnetlib.pace.model.ProtoDocumentBuilder;
27
import eu.dnetlib.pace.model.gt.GTAuthor;
28
import eu.dnetlib.pace.model.gt.GTAuthorMapper;
29
import org.apache.commons.io.IOUtils;
30
import org.apache.commons.lang3.RandomStringUtils;
31
import org.apache.commons.lang3.StringUtils;
32

    
33
public abstract class AbstractProtoPaceTest extends DNGFTest {
34

    
35
	protected DedupConfig getResultFullConf() {
36
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf"));
37
	}
38

    
39
	protected DedupConfig getResultSimpleConf() {
40
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.simple.pace.conf"));
41
	}
42

    
43
	protected DedupConfig getResultConf() {
44
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.pace.conf"));
45
	}
46

    
47
	protected DedupConfig getOrganizationSimpleConf() {
48
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/organization.pace.conf"));
49
	}
50

    
51
	protected DedupConfig getResultAuthorsConf() {
52
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.authors.pace.conf"));
53
	}
54

    
55
	protected DedupConfig getPersonConf() {
56
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/person.pace.conf"));
57
	}
58

    
59
	protected DedupConfig getResultProdConf() {
60
		return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.prod.pace.conf"));
61
	}
62

    
63
	protected MapDocument person(final Config conf, final String id, final DNGF oaf) {
64
		return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model());
65
	}
66

    
67
	protected DNGF getPersonGT(final String path) {
68
		return new GTAuthorMapper().map(getGTAuthor(path));
69
	}
70

    
71
	protected GTAuthor getGTAuthor(final String path) {
72

    
73
		final Gson gson = new Gson();
74

    
75
		final String json = readFromClasspath(path);
76

    
77
		final GTAuthor gta = gson.fromJson(json, GTAuthor.class);
78

    
79
		return gta;
80
	}
81

    
82
	private String readFromClasspath(final String filename) {
83
		final StringWriter sw = new StringWriter();
84
		try {
85
			IOUtils.copy(getClass().getResourceAsStream(filename), sw);
86
			return sw.toString();
87
		} catch (final IOException e) {
88
			throw new RuntimeException("cannot load resource from classpath: " + filename);
89
		}
90
	}
91

    
92
	protected MapDocument result(final Config config, final String id, final String title) {
93
		return result(config, id, title, null, new ArrayList<String>(), null);
94
	}
95

    
96
	protected MapDocument result(final Config config, final String id, final String title, final String date) {
97
		return result(config, id, title, date, new ArrayList<String>(), null);
98
	}
99

    
100
	protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid) {
101
		return result(config, id, title, date, pid, null);
102
	}
103

    
104
	protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) {
105
		return result(config, id, title, date, pid, null);
106
	}
107

    
108
	protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) {
109
		return result(config, id, title, date, Lists.newArrayList(pid), authors);
110
	}
111

    
112
	protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid, final List<String> authors) {
113
		final Publication.Metadata.Builder metadata = Publication.Metadata.newBuilder();
114
		if (!StringUtils.isBlank(title)) {
115
			metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles")));
116
			metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles")));
117
		}
118
		if (!StringUtils.isBlank(date)) {
119
			metadata.setDateofacceptance(sf(date));
120
		}
121

    
122
		final DNGFEntity.Builder entity = dngfEntity(id, TypeProtos.Type.publication);
123
		final Publication.Builder result = Publication.newBuilder().setMetadata(metadata);
124

    
125
		if (authors != null) {
126
			for (final String author : authors) {
127
				result.addAuthor(person(author));
128
			}
129
		}
130

    
131
		entity.setPublication(result);
132

    
133
		if (pid != null) {
134
			for(String p : pid) {
135
				if (!StringUtils.isBlank(p)) {
136
					entity.addPid(sp(p, "doi"));
137
					//entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai"));
138
				}
139
			}
140
		}
141

    
142
		final DNGFEntity build = entity.build();
143
		return ProtoDocumentBuilder.newInstance(id, build, config.model());
144
	}
145

    
146
	private Person.Builder person(final String author) {
147
		final Person.Builder person = Person.newBuilder();
148

    
149
		final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(author, false);
150
		final Person.Metadata.Builder metadata = Person.Metadata.newBuilder();
151
		if (p.isAccurate()) {
152
			metadata.setFirstname(sf(p.getNormalisedFirstName()));
153
			metadata.addSecondnames(sf(p.getNormalisedSurname()));
154
			metadata.setFullname(sf(p.getNormalisedFullname()));
155
		} else {
156
			metadata.setFullname(sf(p.getOriginal()));
157
		}
158

    
159
		return person.setMetadata(metadata);
160
	}
161

    
162
	private DNGFEntity.Builder dngfEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) {
163
		final DNGFEntity.Builder entity = DNGFEntity.newBuilder().setId(id).setType(type);
164
		return entity;
165
	}
166

    
167
	protected MapDocument organization(final Config config, final String id, final String legalName) {
168
		return organization(config, id, legalName, null);
169
	}
170

    
171
	protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) {
172
		final Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder();
173
		if (legalName != null) {
174
			metadata.setLegalname(sf(legalName));
175
		}
176
		if (legalShortName != null) {
177
			metadata.setLegalshortname(sf(legalShortName));
178
		}
179

    
180
		final DNGFEntity.Builder entity = dngfEntity(id, TypeProtos.Type.publication);
181
		entity.setOrganization(Organization.newBuilder().setMetadata(metadata));
182

    
183
		return ProtoDocumentBuilder.newInstance(id, entity.build(), config.model());
184
	}
185

    
186
	private StructuredProperty sp(final String pid, final String type) {
187
		final Builder pidSp =
188
				StructuredProperty.newBuilder().setValue(pid)
189
						.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types"));
190
		return pidSp.build();
191
	}
192

    
193
	protected Field title(final String s) {
194
		return new FieldValueImpl(Type.String, "title", s);
195
	}
196

    
197
	protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) {
198
		return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier);
199
	}
200

    
201
	/*
202
	 * protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); }
203
	 * 
204
	 * protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return
205
	 * Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); }
206
	 */
207

    
208
}
    (1-1/1)