Revision 38187
Added by Claudio Atzori over 9 years ago
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/main/java/eu/dnetlib/data/transform/xml/DmfToHbaseXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import org.apache.commons.lang.StringUtils; |
|
7 |
import org.w3c.dom.NodeList; |
|
8 |
|
|
9 |
import com.google.common.collect.Iterables; |
|
10 |
import com.google.common.collect.Lists; |
|
11 |
import com.google.protobuf.Descriptors.Descriptor; |
|
12 |
|
|
13 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
|
14 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
15 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
16 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
17 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
18 |
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson; |
|
19 |
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship; |
|
20 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
21 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult; |
|
22 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
23 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
24 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
25 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
26 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; |
|
27 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; |
|
28 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
29 |
import eu.dnetlib.data.proto.ResultProtos.Result.Context; |
|
30 |
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference; |
|
31 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
32 |
import eu.dnetlib.data.proto.ResultProtos.Result.Journal; |
|
33 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult; |
|
34 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset; |
|
35 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
36 |
|
|
37 |
public class DmfToHbaseXsltFunctions extends AbstractDNetOafXsltFunctions { |
|
38 |
|
|
39 |
// dnet:oafPersonResultFromDMF($resultId, $oafPerson, position(), "sysimport:crosswalk:repository", "0.9") |
|
40 |
public static String oafPersonResult_Authorship_FromDMF(final String source, |
|
41 |
final String target, |
|
42 |
final int rank, |
|
43 |
final String relClass, |
|
44 |
final String provenanceAction, |
|
45 |
final String trust, |
|
46 |
final NodeList dataInfo) { |
|
47 |
try { |
|
48 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
49 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
50 |
|
|
51 |
final Authorship.Builder auth = Authorship.newBuilder().setRanking("" + rank) |
|
52 |
.setRelMetadata(RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:personroles"))); |
|
53 |
|
|
54 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.personResult, SubRelType.authorship, relClass, false).setPersonResult( |
|
55 |
PersonResult.newBuilder().setAuthorship(auth)); |
|
56 |
|
|
57 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
58 |
} catch (final Throwable e) { |
|
59 |
System.err.println("source: " + source); |
|
60 |
System.err.println("target: " + target); |
|
61 |
System.err.println("provenanceAction: " + provenanceAction); |
|
62 |
System.err.println("trust: " + trust); |
|
63 |
System.err.println("rank: " + rank); |
|
64 |
e.printStackTrace(); |
|
65 |
throw new RuntimeException(e); |
|
66 |
} |
|
67 |
} |
|
68 |
|
|
69 |
// dnet:oafPersonPersonFromMDStore($personId, $coauthorId) |
|
70 |
public static String oafPersonPerson_CoAuthorship_FromDMF(final String source, |
|
71 |
final String target, |
|
72 |
final String relClass, |
|
73 |
final String provenanceAction, |
|
74 |
final String trust, |
|
75 |
final NodeList dataInfo) { |
|
76 |
try { |
|
77 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
78 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
79 |
|
|
80 |
final PersonPerson.Builder pp = PersonPerson.newBuilder(); |
|
81 |
final CoAuthorship.Builder coauth = CoAuthorship.newBuilder().setRelMetadata( |
|
82 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:personroles"))); |
|
83 |
|
|
84 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.personPerson, SubRelType.coauthorship, relClass, false).setPersonPerson( |
|
85 |
pp.setCoauthorship(coauth)); |
|
86 |
|
|
87 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
88 |
} catch (final Throwable e) { |
|
89 |
System.err.println("source: " + source); |
|
90 |
System.err.println("target: " + target); |
|
91 |
System.err.println("provenanceAction: " + provenanceAction); |
|
92 |
System.err.println("trust: " + trust); |
|
93 |
e.printStackTrace(); |
|
94 |
throw new RuntimeException(e); |
|
95 |
} |
|
96 |
} |
|
97 |
|
|
98 |
// dnet:oafPersonFromDMF($personId, ., "sysimport:crosswalk:repository", "0.9") |
|
99 |
public static String oafPerson_FromDMF(final String personId, |
|
100 |
final String fullname, |
|
101 |
final String provenanceAction, |
|
102 |
final String trust, |
|
103 |
final NodeList dataInfo, |
|
104 |
final String collectedFromId, |
|
105 |
final String collectedFromName, |
|
106 |
final String originalId, |
|
107 |
final String dateOfCollection) { |
|
108 |
try { |
|
109 |
final String entityId = OafRowKeyDecoder.decode(personId).getKey(); |
|
110 |
|
|
111 |
final Person.Builder person = Person.newBuilder(); |
|
112 |
final Person.Metadata.Builder metadata = Person.Metadata.newBuilder(); |
|
113 |
|
|
114 |
metadata.setFullname(sf(fullname)); |
|
115 |
|
|
116 |
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false); |
|
117 |
if (p.isAccurate()) { |
|
118 |
metadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
119 |
metadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname())); |
|
120 |
// metadata.setFullname(sf(p.getNormalisedFullname())); |
|
121 |
} |
|
122 |
|
|
123 |
// metadata.setNationality(getSimpleQualifier("UNKNOWN", "dnet:countries")); |
|
124 |
final List<StructuredProperty> pids = Lists.newArrayList(getStructuredProperty(originalId, "oai", "oai", "dnet:pid_types", "dnet:pid_types")); |
|
125 |
final OafEntity.Builder entity = getEntity(Type.person, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection, pids) |
|
126 |
.setPerson(person.setMetadata(metadata)); |
|
127 |
|
|
128 |
return base64(getOaf(entity, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
129 |
} catch (final Throwable e) { |
|
130 |
System.err.println("personId: " + personId); |
|
131 |
System.err.println("fullname: " + fullname); |
|
132 |
System.err.println("provenanceAction: " + provenanceAction); |
|
133 |
System.err.println("trust: " + trust); |
|
134 |
System.err.println("collectedFromId: " + collectedFromId); |
|
135 |
System.err.println("collectedFromName: " + collectedFromName); |
|
136 |
System.err.println("originalId: " + originalId); |
|
137 |
System.err.println("dateOfCollection: " + dateOfCollection); |
|
138 |
e.printStackTrace(); |
|
139 |
throw new RuntimeException(e); |
|
140 |
} |
|
141 |
} |
|
142 |
|
|
143 |
// dnet:oafResultProjectFromDMF($resultId, $projectId, "sysimport:crosswalk:repository", "0.9") |
|
144 |
public static String oafResultProject_Outcome_FromDMF(final String source, |
|
145 |
final String target, |
|
146 |
final String relClass, |
|
147 |
final String provenanceAction, |
|
148 |
final String trust, |
|
149 |
final NodeList dataInfo) { |
|
150 |
try { |
|
151 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
152 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
153 |
|
|
154 |
final Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata( |
|
155 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:result_project_relations"))); |
|
156 |
|
|
157 |
final ResultProject.Builder rp = ResultProject.newBuilder().setOutcome(outcome); |
|
158 |
|
|
159 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.resultProject, SubRelType.outcome, relClass, false).setResultProject(rp); |
|
160 |
|
|
161 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
162 |
} catch (final Throwable e) { |
|
163 |
System.err.println("source: " + source); |
|
164 |
System.err.println("target: " + target); |
|
165 |
System.err.println("provenanceAction: " + provenanceAction); |
|
166 |
System.err.println("trust: " + trust); |
|
167 |
|
|
168 |
e.printStackTrace(); |
|
169 |
throw new RuntimeException(e); |
|
170 |
} |
|
171 |
} |
|
172 |
|
|
173 |
// dnet:oafResultProjectFromDMF($resultId, $projectId, "sysimport:crosswalk:repository", "0.9") |
|
174 |
public static String oafResultProject_Outcome_FromDMF(final String source, |
|
175 |
final String target, |
|
176 |
final String relClass, |
|
177 |
final String provenanceAction, |
|
178 |
final String trust) { |
|
179 |
return oafResultProject_Outcome_FromDMF(source, target, relClass, provenanceAction, trust, null); |
|
180 |
} |
|
181 |
|
|
182 |
public static String oafResultResult_PublicationDataset_FromDMF(final String source, |
|
183 |
final String target, |
|
184 |
final String relClass, |
|
185 |
final String provenanceAction, |
|
186 |
final String trust, |
|
187 |
final NodeList dataInfo) { |
|
188 |
try { |
|
189 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
190 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
191 |
|
|
192 |
final PublicationDataset.Builder pd = PublicationDataset.newBuilder().setRelMetadata( |
|
193 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:result_result_relations"))); |
|
194 |
|
|
195 |
final ResultResult.Builder rr = ResultResult.newBuilder().setPublicationDataset(pd); |
|
196 |
|
|
197 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.resultResult, SubRelType.publicationDataset, relClass, false).setResultResult(rr); |
|
198 |
|
|
199 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
200 |
} catch (final Throwable e) { |
|
201 |
System.err.println("source: " + source); |
|
202 |
System.err.println("target: " + target); |
|
203 |
System.err.println("provenanceAction: " + provenanceAction); |
|
204 |
System.err.println("trust: " + trust); |
|
205 |
|
|
206 |
e.printStackTrace(); |
|
207 |
throw new RuntimeException(e); |
|
208 |
} |
|
209 |
} |
|
210 |
|
|
211 |
public static String oafResult_FromDMF(final String resultId, |
|
212 |
final String provenanceAction, |
|
213 |
final String trust, |
|
214 |
final NodeList dataInfo, |
|
215 |
final String hostedbyId, |
|
216 |
final String hostedbyName, |
|
217 |
final String collectedFromId, |
|
218 |
final String collectedFromName, |
|
219 |
final String originalId, |
|
220 |
final String dateOfCollection, |
|
221 |
final NodeList nodelist) { |
|
222 |
try { |
|
223 |
final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); |
|
224 |
|
|
225 |
final Result.Builder result = Result.newBuilder(); |
|
226 |
|
|
227 |
final ValueMap values = ValueMap.parseNodeList(nodelist); |
|
228 |
|
|
229 |
final Result.Metadata.Builder metadata = Result.Metadata.newBuilder(); |
|
230 |
final Descriptor mDesc = Result.Metadata.getDescriptor(); |
|
231 |
|
|
232 |
if (values.get("creator") != null) { |
|
233 |
for (final String fullname : Iterables.limit(values.get("creator").listValues(), 10)) { |
|
234 |
|
|
235 |
final Person.Metadata.Builder authorMetadata = Person.Metadata.newBuilder(); |
|
236 |
|
|
237 |
authorMetadata.setFullname(sf(fullname)); |
|
238 |
|
|
239 |
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false); |
|
240 |
if (p.isAccurate()) { |
|
241 |
authorMetadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
242 |
authorMetadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname())); |
|
243 |
authorMetadata.setFullname(sf(p.getNormalisedFullname())); |
|
244 |
} |
|
245 |
|
|
246 |
result.addAuthor(Person.newBuilder().setMetadata(authorMetadata)); |
|
247 |
} |
|
248 |
} |
|
249 |
|
|
250 |
addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject").listValues(), "keyword", "dnet:result_subject"); |
|
251 |
addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title").listValues(), "main title", "dnet:dataCite_title"); |
|
252 |
|
|
253 |
for (final String fieldname : Lists.newArrayList("description", "source")) { |
|
254 |
if (values.get(fieldname) != null) { |
|
255 |
for (final String s : values.get(fieldname).listValues()) { |
|
256 |
addField(metadata, mDesc.findFieldByName(fieldname), s); |
|
257 |
} |
|
258 |
} |
|
259 |
} |
|
260 |
|
|
261 |
addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues())); |
|
262 |
addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues()); |
|
263 |
addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues()); |
|
264 |
addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues()); |
|
265 |
addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues()); |
|
266 |
|
|
267 |
addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier("publication", "dnet:result_typologies")); |
|
268 |
|
|
269 |
addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues()); |
|
270 |
addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues()); |
|
271 |
|
|
272 |
// addField(metadata, Result.Metadata.getDescriptor().findFieldByName("provenanceaction"), |
|
273 |
// getSimpleQualifier("sysimport:crosswalk:repository", "dnet:provenanceActions").build()); |
|
274 |
|
|
275 |
if (values.get("concept") != null) { |
|
276 |
for (final Element e : values.get("concept")) { |
|
277 |
final String id = e.getAttributes().get("id"); |
|
278 |
if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank"); |
|
279 |
metadata.addContext(Context.newBuilder().setId(id)); |
|
280 |
} |
|
281 |
} |
|
282 |
|
|
283 |
if (values.get("journal") != null) { |
|
284 |
for (final Element e : values.get("journal")) { |
|
285 |
|
|
286 |
final Journal.Builder journal = Journal.newBuilder(); |
|
287 |
if (e.getText() != null) { |
|
288 |
journal.setName(e.getText()); |
|
289 |
} |
|
290 |
|
|
291 |
final Map<String, String> attr = e.getAttributes(); |
|
292 |
if (attr != null) { |
|
293 |
if (attr.get("issn") != null) { |
|
294 |
journal.setIssnPrinted(attr.get("issn")); |
|
295 |
} |
|
296 |
if (attr.get("eissn") != null) { |
|
297 |
journal.setIssnOnline(attr.get("eissn")); |
|
298 |
} |
|
299 |
if (attr.get("lissn") != null) { |
|
300 |
journal.setIssnLinking(attr.get("lissn")); |
|
301 |
} |
|
302 |
} |
|
303 |
metadata.setJournal(journal.build()); |
|
304 |
} |
|
305 |
} |
|
306 |
|
|
307 |
final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)); |
|
308 |
|
|
309 |
addField(instance, Instance.getDescriptor().findFieldByName("licence"), |
|
310 |
setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues())); |
|
311 |
addField(instance, Instance.getDescriptor().findFieldByName("instancetype"), |
|
312 |
setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues())); |
|
313 |
|
|
314 |
if (values.get("identifier") != null) { |
|
315 |
addField(instance, Instance.getDescriptor().findFieldByName("url"), |
|
316 |
Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter))); |
|
317 |
} |
|
318 |
|
|
319 |
result.addInstance(instance); |
|
320 |
|
|
321 |
final List<Element> extrefs = values.get("reference"); |
|
322 |
if (!extrefs.isEmpty()) { |
|
323 |
final Descriptor extDesc = ExternalReference.getDescriptor(); |
|
324 |
for (final Element element : extrefs) { |
|
325 |
final ExternalReference.Builder extref = ExternalReference.newBuilder(); |
|
326 |
addField(extref, extDesc.findFieldByName("url"), element.getText()); |
|
327 |
addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source")); |
|
328 |
addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier")); |
|
329 |
addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title")); |
|
330 |
addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query")); |
|
331 |
addField(extref, extDesc.findFieldByName("qualifier"), |
|
332 |
setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type"))) |
|
333 |
.build()); |
|
334 |
|
|
335 |
result.addExternalReference(extref); |
|
336 |
} |
|
337 |
} |
|
338 |
|
|
339 |
final List<StructuredProperty> pids = Lists.newArrayList(); |
|
340 |
pids.addAll(parsePids(nodelist)); |
|
341 |
pids.add(getStructuredProperty(originalId, "oai", getClassName("oai"), "dnet:pid_types", "dnet:pid_types")); |
|
342 |
|
|
343 |
final OafEntity.Builder entity = getEntity(Type.result, entityId, getKV(collectedFromId, collectedFromName), originalId, dateOfCollection, pids) |
|
344 |
.setResult(result.setMetadata(metadata)); |
|
345 |
|
|
346 |
final Oaf oaf = getOaf(entity, getDataInfo(dataInfo, provenanceAction, trust, false, false)); |
|
347 |
return base64(oaf.toByteArray()); |
|
348 |
} catch (final Throwable e) { |
|
349 |
System.err.println("resultId: " + resultId); |
|
350 |
System.err.println("hostedbyId: " + hostedbyId); |
|
351 |
System.err.println("hostedbyName: " + hostedbyName); |
|
352 |
System.err.println("provenanceAction: " + provenanceAction); |
|
353 |
System.err.println("trust: " + trust); |
|
354 |
System.err.println("collectedFromId: " + collectedFromId); |
|
355 |
System.err.println("collectedFromName: " + collectedFromName); |
|
356 |
System.err.println("originalId: " + originalId); |
|
357 |
System.err.println("dateOfCollection: " + dateOfCollection); |
|
358 |
e.printStackTrace(); |
|
359 |
throw new RuntimeException(e); |
|
360 |
} |
|
361 |
} |
|
362 |
|
|
363 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/main/java/eu/dnetlib/data/transform/xml/DNetMdStoreDataCiteToHbaseXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import org.w3c.dom.NodeList; |
|
4 |
|
|
5 |
public class DNetMdStoreDataCiteToHbaseXsltFunctions extends DataciteToHbaseXsltFunctions { |
|
6 |
|
|
7 |
public static final String DEFAULT_TRUST = "0.9"; |
|
8 |
public static final String DEFAULT_PROVENANCE = "sysimport:crosswalk:datasetarchive"; |
|
9 |
|
|
10 |
// dnet:oafPersonResultFromMDStore($resultId, $oafPerson, position()) |
|
11 |
public static String oafPersonResult_Authorship_FromDatacite(final String personId, |
|
12 |
final String resultId, |
|
13 |
final int rank, |
|
14 |
final String relClass, |
|
15 |
final NodeList dataInfo) { |
|
16 |
return oafPersonResult_Authorship_FromDatacite(personId, resultId, rank, relClass, DEFAULT_PROVENANCE, DEFAULT_TRUST, dataInfo); |
|
17 |
|
|
18 |
} |
|
19 |
|
|
20 |
// dnet:oafPersonResultFromMDStore($resultId, $oafPerson, position()) |
|
21 |
// public static String oafPersonPersonFromMDStore(final String source, final String target) { |
|
22 |
// return oafPersonPersonFromDMF(source, target, DEFAULT_PROVENANCE, DEFAULT_TRUST); |
|
23 |
// } |
|
24 |
|
|
25 |
// dnet:oafResultProjectFromMDStore($resultId, $projectId) |
|
26 |
public static String oafResultProject_Outcome_FromDatacite(final String sourceId, final String targetId, final String relClass, final NodeList dataInfo) { |
|
27 |
return oafResultProject_Outcome_FromDatacite(sourceId, targetId, relClass, DEFAULT_PROVENANCE, DEFAULT_TRUST, dataInfo); |
|
28 |
} |
|
29 |
|
|
30 |
// dnet:oafPersonFromMDStore($personId, .) |
|
31 |
public static String oafPerson_FromDatacite(final String personId, |
|
32 |
final NodeList dataInfo, |
|
33 |
final String collectedFromId, |
|
34 |
final String collectedFromName, |
|
35 |
final String originalId, |
|
36 |
final String dateOfCollection, |
|
37 |
final String fullname) { |
|
38 |
return oafPerson_FromDatacite(personId, dataInfo, fullname, DEFAULT_PROVENANCE, DEFAULT_TRUST, collectedFromId, collectedFromName, originalId, |
|
39 |
dateOfCollection); |
|
40 |
|
|
41 |
} |
|
42 |
|
|
43 |
public static String oafResult_FromDatacite(final String resultId, |
|
44 |
final NodeList dataInfo, |
|
45 |
final NodeList metadata, |
|
46 |
final NodeList titles, |
|
47 |
final NodeList subjects, |
|
48 |
final NodeList publisher, |
|
49 |
final NodeList descriptions, |
|
50 |
final NodeList dates, |
|
51 |
final NodeList dateaccepted, |
|
52 |
final NodeList resourceTypes, |
|
53 |
final NodeList formats, |
|
54 |
final NodeList sizes, |
|
55 |
final NodeList languages, |
|
56 |
final NodeList cobjcategory, |
|
57 |
final NodeList rights, |
|
58 |
final NodeList version, |
|
59 |
final String hostedbyId, |
|
60 |
final String hostedbyName, |
|
61 |
final String collectedfromId, |
|
62 |
final String collectedfromName, |
|
63 |
final String originalId, |
|
64 |
final String instanceUri, |
|
65 |
final String dateOfCollection) { |
|
66 |
|
|
67 |
return oafResult_FromDatacite(resultId, dataInfo, metadata, titles, subjects, publisher, descriptions, dates, dateaccepted, resourceTypes, formats, |
|
68 |
sizes, languages, cobjcategory, rights, version, DEFAULT_PROVENANCE, DEFAULT_TRUST, hostedbyId, hostedbyName, collectedfromId, |
|
69 |
collectedfromName, originalId, instanceUri, dateOfCollection); |
|
70 |
} |
|
71 |
|
|
72 |
public static String oafResultResult_PublicationDataset_FromDatacite(final String sourceId, |
|
73 |
final String targetId, |
|
74 |
final String relClass, |
|
75 |
final NodeList dataInfo) { |
|
76 |
return oafResultResult_PublicationDataset_FromDatacite(sourceId, targetId, relClass, DEFAULT_PROVENANCE, DEFAULT_TRUST, dataInfo); |
|
77 |
} |
|
78 |
|
|
79 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>dnet-hadoop-parent</artifactId> |
|
6 |
<version>1.0.0</version> |
|
7 |
<relativePath /> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<groupId>eu.dnetlib</groupId> |
|
11 |
<artifactId>dnet-openaireplus-mapping-utils</artifactId> |
|
12 |
<packaging>jar</packaging> |
|
13 |
<version>3.1.6</version> |
|
14 |
<scm> |
|
15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6</developerConnection> |
|
16 |
</scm> |
|
17 |
<dependencies> |
|
18 |
<dependency> |
|
19 |
<groupId>com.google.guava</groupId> |
|
20 |
<artifactId>guava</artifactId> |
|
21 |
<version>${google.guava.version}</version> |
|
22 |
</dependency> |
|
23 |
<dependency> |
|
24 |
<groupId>junit</groupId> |
|
25 |
<artifactId>junit</artifactId> |
|
26 |
<version>${junit.version}</version> |
|
27 |
<scope>test</scope> |
|
28 |
</dependency> |
|
29 |
<dependency> |
|
30 |
<groupId>commons-codec</groupId> |
|
31 |
<artifactId>commons-codec</artifactId> |
|
32 |
<version>${commons.codec.version}</version> |
|
33 |
</dependency> |
|
34 |
<dependency> |
|
35 |
<groupId>dom4j</groupId> |
|
36 |
<artifactId>dom4j</artifactId> |
|
37 |
<version>${dom4j.version}</version> |
|
38 |
<exclusions> |
|
39 |
<exclusion> |
|
40 |
<artifactId>xml-apis</artifactId> |
|
41 |
<groupId>xml-apis</groupId> |
|
42 |
</exclusion> |
|
43 |
</exclusions> |
|
44 |
</dependency> |
|
45 |
<dependency> |
|
46 |
<groupId>eu.dnetlib</groupId> |
|
47 |
<artifactId>dnet-openaire-data-protos</artifactId> |
|
48 |
<version>[3.0.0,4.0.0)</version> |
|
49 |
</dependency> |
|
50 |
<dependency> |
|
51 |
<groupId>eu.dnetlib</groupId> |
|
52 |
<artifactId>dnet-pace-core</artifactId> |
|
53 |
<version>[2.0.0,3.0.0)</version> |
|
54 |
</dependency> |
|
55 |
<dependency> |
|
56 |
<groupId>eu.dnetlib</groupId> |
|
57 |
<artifactId>cnr-misc-utils</artifactId> |
|
58 |
<version>[1.0.0,2.0.0)</version> |
|
59 |
</dependency> |
|
60 |
<dependency> |
|
61 |
<groupId>eu.dnetlib</groupId> |
|
62 |
<artifactId>dnet-hadoop-commons</artifactId> |
|
63 |
<version>[1.0.0,2.0.0)</version> |
|
64 |
</dependency> |
|
65 |
<dependency> |
|
66 |
<groupId>eu.dnetlib</groupId> |
|
67 |
<artifactId>dnet-index-solr-common</artifactId> |
|
68 |
<version>[1.0.0,2.0.0)</version> |
|
69 |
</dependency> |
|
70 |
<dependency> |
|
71 |
<groupId>com.googlecode.protobuf-java-format</groupId> |
|
72 |
<artifactId>protobuf-java-format</artifactId> |
|
73 |
<version>1.2</version> |
|
74 |
</dependency> |
|
75 |
</dependencies> |
|
76 |
</project> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/main/java/eu/dnetlib/data/transform/xml/Element.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.Map; |
|
4 |
|
|
5 |
import org.apache.commons.lang.StringUtils; |
|
6 |
|
|
7 |
import com.google.common.collect.Maps; |
|
8 |
|
|
9 |
public class Element { |
|
10 |
|
|
11 |
private String text; |
|
12 |
private Map<String, String> attributes; |
|
13 |
|
|
14 |
public Element(final String text, final Map<String, String> attributes) { |
|
15 |
this.text = text; |
|
16 |
this.attributes = attributes; |
|
17 |
} |
|
18 |
|
|
19 |
public Element(final String text) { |
|
20 |
this.text = text; |
|
21 |
this.attributes = Maps.newHashMap(); |
|
22 |
} |
|
23 |
|
|
24 |
public Element() { |
|
25 |
this.text = ""; |
|
26 |
this.attributes = Maps.newHashMap(); |
|
27 |
} |
|
28 |
|
|
29 |
public String getText() { |
|
30 |
return text; |
|
31 |
} |
|
32 |
|
|
33 |
public void setText(final String text) { |
|
34 |
this.text = text; |
|
35 |
} |
|
36 |
|
|
37 |
public Map<String, String> getAttributes() { |
|
38 |
return attributes; |
|
39 |
} |
|
40 |
|
|
41 |
public void setAttributes(final Map<String, String> attributes) { |
|
42 |
this.attributes = attributes; |
|
43 |
} |
|
44 |
|
|
45 |
public boolean isEmpty() { |
|
46 |
return !(hasText() || hasAttributes()); |
|
47 |
} |
|
48 |
|
|
49 |
private boolean hasAttributes() { |
|
50 |
return (getAttributes() != null) && !getAttributes().isEmpty(); |
|
51 |
} |
|
52 |
|
|
53 |
public boolean hasText() { |
|
54 |
return (getText() != null) && !getText().isEmpty(); |
|
55 |
} |
|
56 |
|
|
57 |
@Override |
|
58 |
public String toString() { |
|
59 |
return "{ " + StringUtils.left(text, 20) + attributes.toString() + " }"; |
|
60 |
} |
|
61 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/main/java/eu/dnetlib/data/transform/OafUtils.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.util.Arrays; |
|
4 |
import java.util.Set; |
|
5 |
|
|
6 |
import com.google.common.base.Function; |
|
7 |
import com.google.common.base.Predicate; |
|
8 |
import com.google.common.collect.Iterables; |
|
9 |
import com.google.common.collect.Lists; |
|
10 |
import com.google.common.collect.Sets; |
|
11 |
import com.google.protobuf.Descriptors.Descriptor; |
|
12 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
13 |
|
|
14 |
import eu.dnetlib.data.mapreduce.util.OafDecoder; |
|
15 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
16 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
17 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
18 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
19 |
|
|
20 |
public class OafUtils { |
|
21 |
|
|
22 |
public static Set<String> entities() { |
|
23 |
return Sets.newHashSet(Iterables.transform(Lists.newArrayList(Type.values()), new Function<Type, String>() { |
|
24 |
|
|
25 |
@Override |
|
26 |
public String apply(final Type t) { |
|
27 |
return t.toString(); |
|
28 |
} |
|
29 |
})); |
|
30 |
} |
|
31 |
|
|
32 |
public static Predicate<Oaf> relationFilter() { |
|
33 |
return new Predicate<Oaf>() { |
|
34 |
|
|
35 |
@Override |
|
36 |
public boolean apply(final Oaf oaf) { |
|
37 |
return oaf.getKind().equals(Kind.relation); |
|
38 |
} |
|
39 |
}; |
|
40 |
} |
|
41 |
|
|
42 |
public static Predicate<Oaf> entityFilter() { |
|
43 |
return new Predicate<Oaf>() { |
|
44 |
|
|
45 |
@Override |
|
46 |
public boolean apply(final Oaf oaf) { |
|
47 |
return oaf.getKind().equals(Kind.entity); |
|
48 |
} |
|
49 |
}; |
|
50 |
} |
|
51 |
|
|
52 |
public static Function<OafDecoder, String> idDecoder() { |
|
53 |
return new Function<OafDecoder, String>() { |
|
54 |
|
|
55 |
@Override |
|
56 |
public String apply(final OafDecoder input) { |
|
57 |
return input.getEntityId(); |
|
58 |
} |
|
59 |
}; |
|
60 |
} |
|
61 |
|
|
62 |
public static Predicate<StructuredProperty> mainTitleFilter() { |
|
63 |
return new Predicate<StructuredProperty>() { |
|
64 |
|
|
65 |
@Override |
|
66 |
public boolean apply(final StructuredProperty sp) { |
|
67 |
return (sp.getQualifier() != null) && sp.getQualifier().getClassname().equals("main title"); |
|
68 |
} |
|
69 |
}; |
|
70 |
} |
|
71 |
|
|
72 |
public static Set<String> getFieldNames(final Descriptor d, final Integer... tag) { |
|
73 |
return Sets.newHashSet(Iterables.transform(Arrays.asList(tag), new Function<Integer, String>() { |
|
74 |
|
|
75 |
@Override |
|
76 |
public String apply(final Integer i) { |
|
77 |
final FieldDescriptor fd = d.findFieldByNumber(i); |
|
78 |
if (fd == null) throw new IllegalArgumentException("undefined tag: " + i + " for type: " + d.getFullName()); |
|
79 |
return fd.getName(); |
|
80 |
} |
|
81 |
})); |
|
82 |
} |
|
83 |
|
|
84 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/main/java/eu/dnetlib/data/mapreduce/util/OafTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import com.google.protobuf.GeneratedMessage; |
|
4 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
5 |
|
|
6 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization; |
|
7 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; |
|
8 |
import eu.dnetlib.data.proto.DatasourceProtos.Datasource; |
|
9 |
import eu.dnetlib.data.proto.DedupProtos.Dedup; |
|
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo; |
|
11 |
import eu.dnetlib.data.proto.FieldTypeProtos.ExtraInfo; |
|
12 |
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
13 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
14 |
import eu.dnetlib.data.proto.FieldTypeProtos.StringField; |
|
15 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
16 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; |
|
17 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
18 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
19 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
20 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
21 |
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization; |
|
22 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
23 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
24 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult; |
|
25 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
26 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; |
|
27 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
|
28 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson; |
|
29 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson; |
|
30 |
import eu.dnetlib.data.proto.ProjectProtos.Project; |
|
31 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
32 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
33 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
34 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; |
|
35 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; |
|
36 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
37 |
import eu.dnetlib.data.proto.ResultProtos.Result.Context; |
|
38 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
39 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult; |
|
40 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity; |
|
41 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
42 |
|
|
43 |
public class OafTest { |
|
44 |
|
|
45 |
public static final String CITATION_JSON = |
|
46 |
"<citations>\n <citation>\n <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n </citation>\n <citation>\n <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n </citation>\n <citation>\n <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n </citation>\n <citation>\n <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n </citation>\n <citation>\n <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n </citation>\n <citation>\n <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n </citation>\n <citation>\n <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n </citation>\n <citation>\n <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n </citation>\n <citation>\n <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n </citation>\n <citation>\n <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n </citation>\n <citation>\n <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n </citation>\n <citation>\n <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n </citation>\n <citation>\n <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n </citation>\n <citation>\n <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n </citation>\n <citation>\n <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n </citation>\n <citation>\n <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n </citation>\n <citation>\n <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n </citation>\n <citation>\n <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n </citation>\n <citation>\n <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n </citation>\n</citations>"; |
|
47 |
|
|
48 |
public static final String STATISTICS_JSON = |
|
49 |
"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]"; |
|
50 |
|
|
51 |
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename) { |
|
52 |
return getStructuredproperty(value, classname, schemename, null); |
|
53 |
} |
|
54 |
|
|
55 |
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename, final DataInfo dataInfo) { |
|
56 |
final Builder sp = StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classname, schemename)); |
|
57 |
if (dataInfo != null) { |
|
58 |
sp.setDataInfo(dataInfo); |
|
59 |
} |
|
60 |
return sp; |
|
61 |
} |
|
62 |
|
|
63 |
public static Qualifier.Builder getQualifier(final String classname, final String schemename) { |
|
64 |
return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); |
|
65 |
} |
|
66 |
|
|
67 |
public static KeyValue getKV(final String id, final String name) { |
|
68 |
return KeyValue.newBuilder().setKey(id).setValue(name).build(); |
|
69 |
} |
|
70 |
|
|
71 |
public static OafEntity getDatasource(final String datasourceId) { |
|
72 |
return OafEntity |
|
73 |
.newBuilder() |
|
74 |
.setType(Type.datasource) |
|
75 |
.setId(datasourceId) |
|
76 |
.setDatasource( |
|
77 |
Datasource.newBuilder().setMetadata( |
|
78 |
Datasource.Metadata.newBuilder().setOfficialname(sf("officialname")).setEnglishname(sf("englishname")) |
|
79 |
.setWebsiteurl(sf("websiteurl")).setContactemail(sf("contactemail")).addAccessinfopackage(sf("accessinforpackage")) |
|
80 |
.setNamespaceprefix(sf("namespaceprofix")).setDescription(sf("description")).setOdnumberofitems(sf("numberofitems")) |
|
81 |
.setOdnumberofitemsdate(sf("numberofitems date")) |
|
82 |
// .addOdsubjects("subjects") |
|
83 |
.setOdpolicies(sf("policies")).addOdlanguages(sf("languages")).addOdcontenttypes(sf("contenttypes")) |
|
84 |
.setDatasourcetype(getQualifier("type class", "type scheme")))).build(); |
|
85 |
} |
|
86 |
|
|
87 |
public static OafEntity getResult(final String id) { |
|
88 |
return getResultBuilder(id).build(); |
|
89 |
} |
|
90 |
|
|
91 |
public static OafEntity.Builder getResultBuilder(final String id) { |
|
92 |
return OafEntity |
|
93 |
.newBuilder() |
|
94 |
.setType(Type.result) |
|
95 |
.setId(id) |
|
96 |
.setResult( |
|
97 |
Result.newBuilder() |
|
98 |
.setMetadata( |
|
99 |
Result.Metadata |
|
100 |
.newBuilder() |
|
101 |
.addTitle( |
|
102 |
getStructuredproperty( |
|
103 |
"Analysis of cell viability in intervertebral disc: Effect of endplate permeability on cell population", |
|
104 |
"main title", "dnet:result_titles", getDataInfo())) |
|
105 |
.addTitle(getStructuredproperty("Another title", "alternative title", "dnet:result_titles", getDataInfo())) |
|
106 |
.addSubject(getStructuredproperty("Biophysics", "subject", "dnet:result_sujects")) |
|
107 |
.setDateofacceptance(sf("2010-01-01")).addSource(sf("sourceA")).addSource(sf("sourceB")) |
|
108 |
.addContext(Context.newBuilder().setId("egi::virtual::970")) |
|
109 |
.addContext(Context.newBuilder().setId("egi::classification::natsc::math::applied")) |
|
110 |
.addContext(Context.newBuilder().setId("egi::classification::natsc::math")) |
|
111 |
.addContext(Context.newBuilder().setId("egi::classification::natsc")) |
|
112 |
.addContext(Context.newBuilder().setId("egi::classification")).addContext(Context.newBuilder().setId("egi")) |
|
113 |
.addDescription(sf("Responsible for making and maintaining the extracellular matrix ...")) |
|
114 |
.addDescription(sf("Another description ...")).setPublisher(sf("ELSEVIER SCI LTD")) |
|
115 |
.setResulttype(getQualifier("publication", "dnet:result_types")) |
|
116 |
.setLanguage(getQualifier("eng", "dnet:languages"))).addInstance(getInstance("10|od__10", "Uk pubmed")) |
|
117 |
.addInstance(getInstance("10|od__10", "arxiv"))) |
|
118 |
.addCollectedfrom(getKV("opendoar____::1064", "Oxford University Research Archive")) |
|
119 |
.addPid(getStructuredproperty("doi:74293", "doi", "dnet:pids")).addPid(getStructuredproperty("oai:74295", "oai", "dnet:pids")) |
|
120 |
.setDateofcollection(""); |
|
121 |
} |
|
122 |
|
|
123 |
public static DataInfo getDataInfo() { |
|
124 |
return getDataInfo("0.4"); |
|
125 |
} |
|
126 |
|
|
127 |
public static DataInfo getDataInfo(final String trust) { |
|
128 |
return DataInfo.newBuilder().setDeletedbyinference(false).setTrust("0.4").setInferenceprovenance("algo").setProvenanceaction(getQualifier("xx", "yy")) |
|
129 |
.build(); |
|
130 |
} |
|
131 |
|
|
132 |
public static Instance.Builder getInstance(final String hostedbyId, final String hostedbyName) { |
|
133 |
return Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)).setLicence(getQualifier("OpenAccess", "dnet:access_modes")) |
|
134 |
.setInstancetype(getQualifier("publication", "dnet:result_typologies")).addUrl("webresource url"); |
|
135 |
|
|
136 |
} |
|
137 |
|
|
138 |
public static OafRel getDedupRel(final String source, final String target, final RelType relType, final String relClass) { |
|
139 |
return OafRel.newBuilder().setSource(source).setTarget(target).setRelType(relType).setSubRelType(SubRelType.dedup).setRelClass(relClass) |
|
140 |
.setChild(false).setCachedTarget(getResult(target)).build(); |
|
141 |
} |
|
142 |
|
|
143 |
public static OafRel getPersonResult(final String source, final String target, final String ranking, final String relClass) { |
|
144 |
return OafRel |
|
145 |
.newBuilder() |
|
146 |
.setSource(source) |
|
147 |
.setTarget(target) |
|
148 |
.setRelType(RelType.personResult) |
|
149 |
.setSubRelType(SubRelType.authorship) |
|
150 |
.setRelClass(relClass) |
|
151 |
.setChild(false) |
|
152 |
.setPersonResult( |
|
153 |
PersonResult.newBuilder().setAuthorship( |
|
154 |
Authorship.newBuilder().setRanking(ranking).setRelMetadata(relMetadata(relClass, "dnet:personResult_relations")))) |
|
155 |
.setCachedTarget(getPerson()).build(); |
|
156 |
} |
|
157 |
|
|
158 |
public static OafEntity getPerson() { |
|
159 |
return OafEntity |
|
160 |
.newBuilder() |
|
161 |
.setType(Type.person) |
|
162 |
.setId("WOS:000277866500014_A._Shirazi-Adl") |
|
163 |
.setPerson( |
|
164 |
Person.newBuilder().setMetadata( |
|
165 |
Person.Metadata.newBuilder().addSecondnames(sf("Shirazi-Adl")).setFullname(sf("A. Shirazi-Adl")) |
|
166 |
.setEmail(sf("name.surname@gmail.com")).setPhone(sf("12345")).setNationality(getQualifier("EN", "dnet:countries")))) |
|
167 |
.build(); |
|
168 |
} |
|
169 |
|
|
170 |
public static OafRel getProjectOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { |
|
171 |
final OafRel.Builder oafRel = OafRel |
|
172 |
.newBuilder() |
|
173 |
.setSource(source) |
|
174 |
.setTarget(target) |
|
175 |
.setRelType(RelType.projectOrganization) |
|
176 |
.setSubRelType(SubRelType.participation) |
|
177 |
.setRelClass(relClass) |
|
178 |
.setChild(false) |
|
179 |
.setProjectOrganization( |
|
180 |
ProjectOrganization.newBuilder().setParticipation( |
|
181 |
Participation.newBuilder().setParticipantnumber("" + 1) |
|
182 |
.setRelMetadata(relMetadata(relClass, "dnet:project_organization_relations")))); |
|
183 |
switch (Participation.RelName.valueOf(relClass)) { |
|
184 |
case hasParticipant: |
|
185 |
oafRel.setCachedTarget(getProjectFP7(target, "SP3")); |
|
186 |
break; |
|
187 |
case isParticipant: |
|
188 |
oafRel.setCachedTarget(getOrganization(target)); |
|
189 |
break; |
|
190 |
default: |
|
191 |
break; |
|
192 |
} |
|
193 |
return oafRel.build(); |
|
194 |
} |
|
195 |
|
|
196 |
public static GeneratedMessage getOrganizationOrganization(final String source, final String target, final String relClass) { |
|
197 |
final OafRel.Builder oafRel = OafRel |
|
198 |
.newBuilder() |
|
199 |
.setSource(source) |
|
200 |
.setTarget(target) |
|
201 |
.setRelType(RelType.organizationOrganization) |
|
202 |
.setSubRelType(SubRelType.dedup) |
|
203 |
.setRelClass(relClass) |
|
204 |
.setChild(true) |
|
205 |
.setOrganizationOrganization( |
|
206 |
OrganizationOrganization.newBuilder().setDedup( |
|
207 |
Dedup.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:organization_organization_relations")))); |
|
208 |
|
|
209 |
switch (Dedup.RelName.valueOf(relClass)) { |
|
210 |
case isMergedIn: |
|
211 |
oafRel.setCachedTarget(getOrganization(source)); |
|
212 |
break; |
|
213 |
case merges: |
|
214 |
oafRel.setCachedTarget(getOrganization(target)); |
|
215 |
break; |
|
216 |
default: |
|
217 |
break; |
|
218 |
} |
|
219 |
return oafRel.build(); |
|
220 |
} |
|
221 |
|
|
222 |
public static OafRel getDatasourceOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { |
|
223 |
final OafRel.Builder oafRel = OafRel |
|
224 |
.newBuilder() |
|
225 |
.setSource(source) |
|
226 |
.setTarget(target) |
|
227 |
.setRelType(RelType.datasourceOrganization) |
|
228 |
.setSubRelType(SubRelType.provision) |
|
229 |
.setRelClass(relClass) |
|
230 |
.setChild(false) |
|
231 |
.setDatasourceOrganization( |
|
232 |
DatasourceOrganization.newBuilder().setProvision( |
|
233 |
Provision.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:datasource_organization_relations")))); |
|
234 |
switch (Provision.RelName.valueOf(relClass)) { |
|
235 |
case isProvidedBy: |
|
236 |
oafRel.setCachedTarget(getOrganization(target)); |
|
237 |
break; |
|
238 |
case provides: |
|
239 |
oafRel.setCachedTarget(getDatasource(target)); |
|
240 |
break; |
|
241 |
default: |
|
242 |
break; |
|
243 |
} |
|
244 |
return oafRel.build(); |
|
245 |
} |
|
246 |
|
|
247 |
public static OafRel getProjectPerson(final String sourceId, final String targetId, final String relClass) { |
|
248 |
return OafRel |
|
249 |
.newBuilder() |
|
250 |
.setSource(sourceId) |
|
251 |
.setTarget(targetId) |
|
252 |
.setRelType(RelType.projectPerson) |
|
253 |
.setSubRelType(SubRelType.contactPerson) |
|
254 |
.setRelClass(relClass) |
|
255 |
.setChild(true) |
|
256 |
.setProjectPerson( |
|
257 |
ProjectPerson.newBuilder().setContactPerson( |
|
258 |
ContactPerson.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:project_result_relations")))) |
|
259 |
.setCachedTarget(getPerson()).build(); |
|
260 |
} |
|
261 |
|
|
262 |
public static OafRel getSimilarityRel(final String sourceId, final String targetId, final OafEntity result, final String relClass) { |
|
263 |
return OafRel |
|
264 |
.newBuilder() |
|
265 |
.setSource(sourceId) |
|
266 |
.setTarget(targetId) |
|
267 |
.setRelType(RelType.resultResult) |
|
268 |
.setSubRelType(SubRelType.similarity) |
|
269 |
.setRelClass(relClass) |
|
270 |
.setChild(false) |
|
271 |
.setCachedTarget(result) |
|
272 |
.setResultResult( |
|
273 |
ResultResult.newBuilder().setSimilarity( |
|
274 |
Similarity.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:resultResult_relations")).setSimilarity(.4f) |
|
275 |
.setType(Similarity.Type.STANDARD))).build(); |
|
276 |
} |
|
277 |
|
|
278 |
public static RelMetadata.Builder relMetadata(final String classname, final String schemename) { |
|
279 |
return RelMetadata.newBuilder().setSemantics(getQualifier(classname, schemename)); |
|
280 |
} |
|
281 |
|
|
282 |
public static OafEntity getOrganization(final String orgId) { |
|
283 |
return OafEntity |
|
284 |
.newBuilder() |
|
285 |
.setType(Type.organization) |
|
286 |
.setId(orgId) |
|
287 |
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) |
|
288 |
.setOrganization( |
|
289 |
Organization.newBuilder().setMetadata( |
|
290 |
Organization.Metadata.newBuilder().setLegalname(sf("CENTRE D'APPUI A LA RECHERCHE ET A LA FORMATION GIE")) |
|
291 |
.setLegalshortname(sf("CAREF")).setWebsiteurl(sf("www.caref-mali.org")) |
|
292 |
.setCountry(getQualifier("ML", "dnet:countries")))).build(); |
|
293 |
} |
|
294 |
|
|
295 |
public static OafRel getResultProject(final String from, final String to, final OafEntity project, final String relClass) |
|
296 |
throws InvalidProtocolBufferException { |
|
297 |
return OafRel |
|
298 |
.newBuilder() |
|
299 |
.setSource(from) |
|
300 |
.setTarget(to) |
|
301 |
.setRelType(RelType.resultProject) |
|
302 |
.setSubRelType(SubRelType.outcome) |
|
303 |
.setRelClass(relClass) |
|
304 |
.setChild(false) |
|
305 |
.setResultProject( |
|
306 |
ResultProject.newBuilder().setOutcome(Outcome.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:result_project_relations")))) |
|
307 |
.setCachedTarget(project).build(); |
|
308 |
} |
|
309 |
|
|
310 |
public static OafEntity getProjectFP7(final String projectId, final String fundingProgram) throws InvalidProtocolBufferException { |
|
311 |
return OafEntity |
|
312 |
.newBuilder() |
|
313 |
.setType(Type.project) |
|
314 |
.setId(projectId) |
|
315 |
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed")) |
|
316 |
.setProject( |
|
317 |
Project.newBuilder() |
|
318 |
.setMetadata( |
|
319 |
Project.Metadata |
|
320 |
.newBuilder() |
|
321 |
.setAcronym(sf("5CYRQOL")) |
|
322 |
.setTitle(sf("Cypriot Researchers Contribute to our Quality of Life")) |
|
323 |
.setStartdate(sf("2007-05-01")) |
|
324 |
.setEnddate(sf("2007-10-31")) |
|
325 |
.setEcsc39(sf("false")) |
|
326 |
.setContracttype(getQualifier("CSA", "ec:FP7contractTypes")) |
|
327 |
.addFundingtree( |
|
328 |
sf("<fundingtree><funder><id>ec__________::EC</id><shortname>EC</shortname><name>European Commission</name></funder><funding_level_2><id>ec__________::EC::FP7::" |
|
329 |
+ fundingProgram |
|
330 |
+ "::PEOPLE</id><description>Marie-Curie Actions</description><name>PEOPLE</name><class>ec:program</class><parent><funding_level_1><id>ec__________::EC::FP7::" |
|
331 |
+ fundingProgram |
|
332 |
+ "</id><description>" |
|
333 |
+ fundingProgram |
|
334 |
+ "-People</description><name>" |
|
335 |
+ fundingProgram |
|
336 |
+ "</name><class>ec:specificprogram</class><parent><funding_level_0><id>ec__________::EC::FP7</id><description>SEVENTH FRAMEWORK PROGRAMME</description><name>FP7</name><parent/><class>ec:frameworkprogram</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>")))) |
|
337 |
.build(); |
|
338 |
} |
|
339 |
|
|
340 |
public static OafEntity getProjectWT() throws InvalidProtocolBufferException { |
|
341 |
return OafEntity |
|
342 |
.newBuilder() |
|
343 |
.setType(Type.project) |
|
344 |
.setId("project|wt::087536") |
|
345 |
.addCollectedfrom(getKV("wellcomeTrust", "wellcome trust")) |
|
346 |
.setProject( |
|
347 |
Project.newBuilder() |
|
348 |
.setMetadata( |
|
349 |
Project.Metadata |
|
350 |
.newBuilder() |
|
351 |
.setAcronym(sf("UNKNOWN")) |
|
352 |
.setTitle(sf("Research Institute for Infectious Diseases of Poverty (IIDP).")) |
|
353 |
.setStartdate(sf("2007-05-01")) |
|
354 |
.setEnddate(sf("2007-10-31")) |
|
355 |
.setEcsc39(sf("false")) |
|
356 |
.setContracttype(getQualifier("UNKNOWN", "wt:contractTypes")) |
|
357 |
.addFundingtree( |
|
358 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::UNKNOWN</id><description>UNKNOWN</description><name>UNKNOWN</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")) |
|
359 |
.addFundingtree( |
|
360 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::Technology Transfer</id><description>Technology Transfer</description><name>Technology Transfer</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")))) |
|
361 |
.build(); |
|
362 |
} |
|
363 |
|
|
364 |
public static ExtraInfo extraInfo(final String name, final String provenance, final String trust, final String typology, final String value) { |
|
365 |
final ExtraInfo.Builder e = ExtraInfo.newBuilder().setName(name).setProvenance(provenance).setTrust(trust).setTypology(typology).setValue(value); |
|
366 |
return e.build(); |
|
367 |
} |
|
368 |
|
|
369 |
// public static DocumentClasses documentClasses() { |
|
370 |
// DocumentClasses.Builder builder = DocumentClasses.newBuilder(); |
|
371 |
// for (int i = 0; i < RandomUtils.nextInt(N_DOCUMENT_CLASSES) + 1; i++) { |
|
372 |
// builder.addArXivClasses(getDocumentClass()).addDdcClasses(getDocumentClass()).addWosClasses(getDocumentClass()) |
|
373 |
// .addMeshEuroPMCClasses(getDocumentClass()); |
|
374 |
// } |
|
375 |
// return builder.build(); |
|
376 |
// } |
|
377 |
// |
|
378 |
// private static DocumentClass getDocumentClass() { |
|
379 |
// DocumentClass.Builder builder = DocumentClass.newBuilder(); |
|
380 |
// for (int i = 0; i < RandomUtils.nextInt(N_DOCUMENT_CLASS_LABELS) + 1; i++) { |
|
381 |
// builder.addClassLabels("test_class_" + i); |
|
382 |
// } |
|
383 |
// return builder.setConfidenceLevel(0.5F).build(); |
|
384 |
// } |
|
385 |
// |
|
386 |
// public static DocumentStatistics documentStatistics() { |
|
387 |
// return |
|
388 |
// DocumentStatistics.newBuilder().setCitationsFromAllPapers(basicCitationStatistics()).setCitationsFromPublishedPapers(basicCitationStatistics()) |
|
389 |
// .build(); |
|
390 |
// } |
|
391 |
// |
|
392 |
// private static BasicCitationStatistics basicCitationStatistics() { |
|
393 |
// BasicCitationStatistics.Builder builder = BasicCitationStatistics.newBuilder(); |
|
394 |
// for (int i = 0; i < N_CITATION_STATS; i++) { |
|
395 |
// builder.addNumberOfCitationsPerYear(statisticsKeyValue()); |
|
396 |
// builder.setNumberOfCitations(RandomUtils.nextInt(5) + 1); |
|
397 |
// } |
|
398 |
// return builder.build(); |
|
399 |
// } |
|
400 |
// |
|
401 |
// private static StatisticsKeyValue statisticsKeyValue() { |
|
402 |
// return StatisticsKeyValue.newBuilder().setKey((RandomUtils.nextInt(30) + 1980) + "").setValue(RandomUtils.nextInt(5) + 1).build(); |
|
403 |
// } |
|
404 |
// |
|
405 |
// public static AuthorStatistics authorStatistics() { |
|
406 |
// AuthorStatistics.Builder builder = AuthorStatistics.newBuilder(); |
|
407 |
// builder.setCore(commonCoreStatistics()); |
|
408 |
// for (int i = 0; i < N_COAUTHORS; i++) { |
|
409 |
// builder.addCoAuthors(coAuthor()); |
|
410 |
// } |
|
411 |
// return builder.build(); |
|
412 |
// } |
|
413 |
// |
|
414 |
// private static CoAuthor coAuthor() { |
|
415 |
// CoAuthor.Builder builder = CoAuthor.newBuilder(); |
|
416 |
// builder.setId("30|od______2345::" + Hashing.md5(RandomStringUtils.random(10))); |
|
417 |
// builder.setCoauthoredPapersCount(RandomUtils.nextInt(5) + 1); |
|
418 |
// return builder.build(); |
|
419 |
// } |
|
420 |
// |
|
421 |
// public static CommonCoreStatistics commonCoreStatistics() { |
|
422 |
// CommonCoreStatistics.Builder builder = CommonCoreStatistics.newBuilder(); |
|
423 |
// |
|
424 |
// builder.setAllPapers(coreStatistics()); |
|
425 |
// builder.setPublishedPapers(coreStatistics()); |
|
426 |
// |
|
427 |
// return builder.build(); |
|
428 |
// } |
|
429 |
// |
|
430 |
// private static CoreStatistics coreStatistics() { |
|
431 |
// CoreStatistics.Builder builder = CoreStatistics.newBuilder(); |
|
432 |
// |
|
433 |
// builder.setNumberOfPapers(RandomUtils.nextInt(10)); |
|
434 |
// builder.setCitationsFromAllPapers(extendedStatistics()); |
|
435 |
// builder.setCitationsFromPublishedPapers(extendedStatistics()); |
|
436 |
// |
|
437 |
// return builder.build(); |
|
438 |
// } |
|
439 |
// |
|
440 |
// private static ExtendedStatistics extendedStatistics() { |
|
441 |
// ExtendedStatistics.Builder builder = ExtendedStatistics.newBuilder(); |
|
442 |
// |
|
443 |
// builder.setBasic(basicCitationStatistics()); |
|
444 |
// builder.setAverageNumberOfCitationsPerPaper(RandomUtils.nextFloat()); |
|
445 |
// for (int i = 0; i < N_CITATION_STATS; i++) { |
|
446 |
// builder.addNumberOfPapersCitedAtLeastXTimes(statisticsKeyValue()); |
|
447 |
// } |
|
448 |
// |
|
449 |
// return builder.build(); |
|
450 |
// } |
|
451 |
|
|
452 |
public static StringField sf(final String s) { |
|
453 |
return sf(s, null); |
|
454 |
} |
|
455 |
|
|
456 |
public static StringField sf(final String s, final DataInfo dataInfo) { |
|
457 |
final StringField.Builder sf = StringField.newBuilder().setValue(s); |
|
458 |
if (dataInfo != null) { |
|
459 |
sf.setDataInfo(dataInfo); |
|
460 |
} |
|
461 |
return sf.build(); |
|
462 |
} |
|
463 |
|
|
464 |
public static OafDecoder embed(final GeneratedMessage msg, |
|
465 |
final Kind kind, |
|
466 |
final boolean deletedByInference, |
|
467 |
final boolean inferred, |
|
468 |
final String provenance, |
|
469 |
final String action) { |
|
470 |
|
|
471 |
final Oaf.Builder oaf = Oaf |
|
472 |
.newBuilder() |
|
473 |
.setKind(kind) |
|
474 |
.setTimestamp(System.currentTimeMillis()) |
|
475 |
.setDataInfo( |
|
476 |
DataInfo.newBuilder().setDeletedbyinference(deletedByInference).setInferred(inferred).setTrust("0.5") |
|
477 |
.setInferenceprovenance(provenance).setProvenanceaction(getQualifier(action, action))); |
|
478 |
switch (kind) { |
|
479 |
case entity: |
|
480 |
oaf.setEntity((OafEntity) msg); |
|
481 |
break; |
|
482 |
case relation: |
|
483 |
oaf.setRel((OafRel) msg); |
|
484 |
break; |
|
485 |
default: |
|
486 |
break; |
|
487 |
} |
|
488 |
|
|
489 |
return OafDecoder.decode(oaf.build()); |
|
490 |
} |
|
491 |
|
|
492 |
public static OafDecoder embed(final GeneratedMessage msg, final Kind kind) { |
|
493 |
return embed(msg, kind, false, false, "inference_provenance", "provenance_action"); |
|
494 |
} |
|
495 |
|
|
496 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/main/java/eu/dnetlib/data/mapreduce/util/RelDescriptor.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
4 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
5 |
|
|
6 |
public class RelDescriptor { |
|
7 |
|
|
8 |
private final String it; |
|
9 |
|
|
10 |
private final RelType relType; |
|
11 |
|
|
12 |
private final SubRelType subRelType; |
|
13 |
|
|
14 |
private final String relClass; |
|
15 |
|
|
16 |
public RelDescriptor(final String value) { |
|
17 |
super(); |
|
18 |
this.it = value; |
|
19 |
|
|
20 |
String[] s = value.split("_"); |
|
21 |
|
|
22 |
this.relType = RelType.valueOf(s[0]); |
|
23 |
this.subRelType = SubRelType.valueOf(s[1]); |
|
24 |
this.relClass = s[2]; |
|
25 |
} |
|
26 |
|
|
27 |
public SubRelType getSubRelType() { |
|
28 |
return subRelType; |
|
29 |
} |
|
30 |
|
|
31 |
public RelType getRelType() { |
|
32 |
return relType; |
|
33 |
} |
|
34 |
|
|
35 |
public String getRelClass() { |
|
36 |
return relClass; |
|
37 |
} |
|
38 |
|
|
39 |
public String getIt() { |
|
40 |
return it; |
|
41 |
} |
|
42 |
|
|
43 |
@Override |
|
44 |
public String toString() { |
|
45 |
return getIt(); |
|
46 |
} |
|
47 |
|
|
48 |
@Override |
|
49 |
public int hashCode() { |
|
50 |
final int prime = 31; |
|
51 |
int result = 1; |
|
52 |
result = (prime * result) + ((it == null) ? 0 : it.hashCode()); |
|
53 |
return result; |
|
54 |
} |
|
55 |
|
|
56 |
@Override |
|
57 |
public boolean equals(final Object obj) { |
|
58 |
if (this == obj) return true; |
|
59 |
if (obj == null) return false; |
|
60 |
if (getClass() != obj.getClass()) return false; |
|
61 |
RelDescriptor other = (RelDescriptor) obj; |
|
62 |
if (it == null) { |
|
63 |
if (other.it != null) return false; |
|
64 |
} else if (!it.equals(other.it)) return false; |
|
65 |
return true; |
|
66 |
} |
|
67 |
|
|
68 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/main/java/eu/dnetlib/data/mapreduce/util/OafRelDecoder.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
4 |
import com.google.protobuf.GeneratedMessage; |
|
5 |
import com.google.protobuf.Message.Builder; |
|
6 |
import com.google.protobuf.MessageOrBuilder; |
|
7 |
import com.google.protobuf.ProtocolMessageEnum; |
|
8 |
|
|
9 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
10 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
11 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
12 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
13 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
14 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
15 |
|
|
16 |
public class OafRelDecoder { |
|
17 |
|
|
18 |
private static final String _ = "_"; |
|
19 |
|
|
20 |
private final OafRel oafRel; |
|
21 |
|
|
22 |
public static OafRelDecoder decode(final OafRel oafRel) { |
|
23 |
return new OafRelDecoder(oafRel); |
|
24 |
} |
|
25 |
|
|
26 |
private OafRelDecoder(final OafRel oafRel) { |
|
27 |
this.oafRel = oafRel; |
|
28 |
} |
|
29 |
|
|
30 |
public RelType getRelType() { |
|
31 |
return oafRel.getRelType(); |
|
32 |
} |
|
33 |
|
|
34 |
public String relTypeName() { |
|
35 |
return getRelType().toString(); |
|
36 |
} |
|
37 |
|
|
38 |
public SubRelType getSubRelType() { |
|
39 |
return oafRel.getSubRelType(); |
|
40 |
} |
|
41 |
|
|
42 |
public String relSubTypeName() { |
|
43 |
return getSubRelType().toString(); |
|
44 |
} |
|
45 |
|
|
46 |
public String getCF() { |
|
47 |
return OafRelDecoder.getCF(getRelType(), getSubRelType()); |
|
48 |
} |
|
49 |
|
|
50 |
public String getCFQ() { |
|
51 |
return OafRelDecoder.getCFQ(getRelType(), getSubRelType(), getRelClass()); |
|
52 |
} |
|
53 |
|
|
54 |
public static String getCFQ(final RelType relType, final SubRelType subRelType, final ProtocolMessageEnum relClass) { |
|
55 |
return OafRelDecoder.getCFQ(relType, subRelType, relClass.getValueDescriptor().getName()); |
|
56 |
} |
|
57 |
|
|
58 |
public static String getCFQ(final RelType relType, final SubRelType subRelType, final String relClass) { |
|
59 |
return OafRelDecoder.getCF(relType, subRelType) + _ + relClass; |
|
60 |
} |
|
61 |
|
|
62 |
public static String getCF(final RelType relType, final SubRelType subRelType) { |
|
63 |
return relType + _ + subRelType; |
|
64 |
} |
|
65 |
|
|
66 |
public String getRelClass() { |
|
67 |
return oafRel.getRelClass(); |
|
68 |
} |
|
69 |
|
|
70 |
public RelDescriptor getRelDescriptor() { |
|
71 |
return new RelDescriptor(getCFQ()); |
|
72 |
} |
|
73 |
|
|
74 |
public GeneratedMessage getRel() { |
|
75 |
|
|
76 |
FieldDescriptor fd = oafRel.getDescriptorForType().findFieldByName(relTypeName()); |
|
77 |
return (GeneratedMessage) oafRel.getField(fd); |
|
78 |
} |
|
79 |
|
|
80 |
public GeneratedMessage getSubRel() { |
|
81 |
GeneratedMessage rel = getRel(); |
|
82 |
FieldDescriptor fd = rel.getDescriptorForType().findFieldByName(relSubTypeName()); |
|
83 |
return (GeneratedMessage) rel.getField(fd); |
|
84 |
} |
|
85 |
|
|
86 |
public RelMetadata getRelMetadata() { |
|
87 |
GeneratedMessage rel = getSubRel(); |
|
88 |
FieldDescriptor fd = rel.getDescriptorForType().findFieldByName("relMetadata"); |
|
89 |
return fd != null ? (RelMetadata) rel.getField(fd) : null; |
|
90 |
} |
|
91 |
|
|
92 |
public OafRel.Builder setClassId(final String classid) { |
|
93 |
RelMetadata.Builder relMetadataBuilder = RelMetadata.newBuilder(getRelMetadata()); |
|
94 |
relMetadataBuilder.getSemanticsBuilder().setClassid(classid).setClassname(classid); |
|
95 |
|
|
96 |
OafRel.Builder builder = OafRel.newBuilder(oafRel); |
|
97 |
|
|
98 |
FieldDescriptor fdRel = fd(oafRel, relTypeName()); |
|
99 |
Builder relBuilder = builder.newBuilderForField(fdRel); |
|
100 |
|
|
101 |
FieldDescriptor fdSubRel = fd(relBuilder, relSubTypeName()); |
|
102 |
Builder subRelBuilder = relBuilder.newBuilderForField(fdSubRel).mergeFrom(getSubRel()); |
|
103 |
|
|
104 |
subRelBuilder.setField(fd(getSubRel(), "relMetadata"), relMetadataBuilder.build()); |
|
105 |
|
|
106 |
relBuilder.setField(fdSubRel, subRelBuilder.build()); |
|
107 |
builder.setField(fdRel, relBuilder.build()); |
|
108 |
|
|
109 |
return builder.setRelClass(classid); |
|
110 |
} |
|
111 |
|
|
112 |
public Type getTargetType(final Type sourceType) { |
|
113 |
switch (getRelType()) { |
|
114 |
case datasourceOrganization: |
|
115 |
return sourceType.equals(Type.datasource) ? Type.organization : Type.datasource; |
|
116 |
case organizationOrganization: |
|
117 |
return Type.organization; |
|
118 |
case personPerson: |
|
119 |
return Type.person; |
|
120 |
case personResult: |
|
121 |
return sourceType.equals(Type.person) ? Type.result : Type.person; |
|
122 |
case projectOrganization: |
|
123 |
return sourceType.equals(Type.project) ? Type.organization : Type.project; |
|
124 |
case projectPerson: |
|
125 |
return sourceType.equals(Type.project) ? Type.person : Type.project; |
|
126 |
case resultOrganization: |
|
127 |
return sourceType.equals(Type.result) ? Type.organization : Type.result; |
|
128 |
case resultProject: |
|
129 |
return sourceType.equals(Type.result) ? Type.project : Type.result; |
|
130 |
case resultResult: |
|
131 |
return Type.result; |
|
132 |
default: |
|
133 |
throw new IllegalArgumentException("Unknown relationship type: " + relTypeName()); |
|
134 |
} |
|
135 |
} |
|
136 |
|
|
137 |
protected FieldDescriptor fd(final MessageOrBuilder mb, final int fieldNumber) { |
|
138 |
return mb.getDescriptorForType().findFieldByNumber(fieldNumber); |
|
139 |
} |
|
140 |
|
|
141 |
protected FieldDescriptor fd(final MessageOrBuilder mb, final String fieldName) { |
|
142 |
return mb.getDescriptorForType().findFieldByName(fieldName); |
|
143 |
} |
|
144 |
|
|
145 |
public String getCachedTargedId() { |
|
146 |
|
|
147 |
if (!oafRel.hasCachedTarget()) return null; |
|
148 |
|
|
149 |
final OafEntity entity = oafRel.getCachedTarget(); |
|
150 |
return OafEntityDecoder.decode(entity).getId(); |
|
151 |
} |
|
152 |
|
|
153 |
public String getRelSourceId() { |
|
154 |
return oafRel.getSource(); |
|
155 |
} |
|
156 |
|
|
157 |
public String getRelTargetId() { |
|
158 |
return oafRel.getTarget(); |
|
159 |
} |
|
160 |
|
|
161 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.1.6/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.util.List; |
|
6 |
|
|
7 |
import org.apache.commons.io.IOUtils; |
|
8 |
import org.apache.commons.lang.RandomStringUtils; |
|
9 |
import org.apache.commons.lang.StringUtils; |
|
10 |
|
|
11 |
import com.google.gson.Gson; |
|
12 |
|
|
13 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
14 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
15 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
16 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; |
|
17 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
18 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
19 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
20 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
21 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
22 |
import eu.dnetlib.pace.config.Config; |
|
23 |
import eu.dnetlib.pace.config.DedupConfig; |
|
24 |
import eu.dnetlib.pace.config.Type; |
|
25 |
import eu.dnetlib.pace.model.Field; |
|
26 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
27 |
import eu.dnetlib.pace.model.MapDocument; |
|
28 |
import eu.dnetlib.pace.model.ProtoDocumentBuilder; |
|
29 |
import eu.dnetlib.pace.model.gt.GTAuthor; |
|
30 |
import eu.dnetlib.pace.model.gt.GTAuthorMapper; |
|
31 |
|
|
32 |
public abstract class AbstractProtoPaceTest extends OafTest { |
|
33 |
|
|
34 |
protected DedupConfig getResultFullConf() { |
|
35 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf")); |
|
36 |
} |
|
37 |
|
|
38 |
protected DedupConfig getResultSimpleConf() { |
|
39 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.simple.pace.conf")); |
|
40 |
} |
|
41 |
|
|
42 |
protected DedupConfig getResultConf() { |
|
43 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.pace.conf")); |
|
44 |
} |
|
45 |
|
|
46 |
protected DedupConfig getOrganizationSimpleConf() { |
|
47 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/organization.pace.conf")); |
|
48 |
} |
|
49 |
|
|
50 |
protected DedupConfig getResultAuthorsConf() { |
|
51 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.authors.pace.conf")); |
|
52 |
} |
|
53 |
|
|
54 |
protected DedupConfig getPersonConf() { |
|
55 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/person.pace.conf")); |
|
56 |
} |
|
57 |
|
|
58 |
protected DedupConfig getResultProdConf() { |
|
59 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.prod.pace.conf")); |
|
60 |
} |
|
61 |
|
|
62 |
protected MapDocument person(final Config conf, final String id, final Oaf oaf) { |
|
63 |
return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model()); |
|
64 |
} |
|
65 |
|
|
66 |
protected Oaf getPersonGT(final String path) { |
|
67 |
return new GTAuthorMapper().map(getGTAuthor(path)); |
|
68 |
} |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-3.1.6