1
|
package eu.dnetlib.data.mapreduce.util;
|
2
|
|
3
|
import com.google.protobuf.GeneratedMessage;
|
4
|
import com.google.protobuf.InvalidProtocolBufferException;
|
5
|
import eu.dnetlib.data.graph.model.DNGFDecoder;
|
6
|
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
|
7
|
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
|
8
|
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
|
9
|
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
|
10
|
import eu.dnetlib.data.proto.FieldTypeProtos.*;
|
11
|
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder;
|
12
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
13
|
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
|
14
|
import eu.dnetlib.data.proto.PersonProtos.Person;
|
15
|
import eu.dnetlib.data.proto.ProjectProtos.Project;
|
16
|
import eu.dnetlib.data.proto.PublicationProtos.Publication;
|
17
|
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
|
18
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
19
|
import org.apache.commons.logging.Log;
|
20
|
import org.apache.commons.logging.LogFactory;
|
21
|
|
22
|
public class DNGFTest {
|
23
|
|
24
|
public static final String CITATION_JSON =
|
25
|
"<citations>\n <citation>\n <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n </citation>\n <citation>\n <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n </citation>\n <citation>\n <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n </citation>\n <citation>\n <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n </citation>\n <citation>\n <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n </citation>\n <citation>\n <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n </citation>\n <citation>\n <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n </citation>\n <citation>\n <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n </citation>\n <citation>\n <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n </citation>\n <citation>\n <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n </citation>\n <citation>\n <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n </citation>\n <citation>\n <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n </citation>\n <citation>\n <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n </citation>\n <citation>\n <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n </citation>\n <citation>\n <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n </citation>\n <citation>\n <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n </citation>\n <citation>\n <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n </citation>\n <citation>\n <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n </citation>\n <citation>\n <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n </citation>\n</citations>";
|
26
|
public static final String STATISTICS_JSON =
|
27
|
"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]";
|
28
|
private static final Log log = LogFactory.getLog(DNGFTest.class);
|
29
|
private static String basePathProfiles = "/eu/dnetlib/test/profiles/OntologyDSResources/OntologyDSResourceType";
|
30
|
|
31
|
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename) {
|
32
|
return getStructuredproperty(value, classname, schemename, null);
|
33
|
}
|
34
|
|
35
|
public static StructuredProperty.Builder getStructuredproperty(final String value, final String classname, final String schemename, final DataInfo dataInfo) {
|
36
|
final Builder sp = StructuredProperty.newBuilder().setValue(value).setQualifier(getQualifier(classname, schemename));
|
37
|
if (dataInfo != null) {
|
38
|
sp.setDataInfo(dataInfo);
|
39
|
}
|
40
|
return sp;
|
41
|
}
|
42
|
|
43
|
public static Qualifier.Builder getQualifier(final String classname, final String schemename) {
|
44
|
return Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename);
|
45
|
}
|
46
|
|
47
|
public static KeyValue getKV(final String id, final String name) {
|
48
|
return KeyValue.newBuilder().setKey(id).setValue(name).build();
|
49
|
}
|
50
|
|
51
|
public static DNGFEntity getDatasource(final String datasourceId) {
|
52
|
return DNGFEntity
|
53
|
.newBuilder()
|
54
|
.setType(Type.datasource)
|
55
|
.setId(datasourceId)
|
56
|
.setDatasource(
|
57
|
Datasource.newBuilder().setMetadata(
|
58
|
Datasource.Metadata.newBuilder().setOfficialname(sf("officialname")).setEnglishname(sf("englishname"))
|
59
|
.setWebsiteurl(sf("websiteurl")).setContactemail(sf("contactemail")).addAccessinfopackage(sf("accessinforpackage"))
|
60
|
.setNamespaceprefix(sf("namespaceprofix")).setDescription(sf("description")).setOdnumberofitems(sf("numberofitems"))
|
61
|
.setOdnumberofitemsdate(sf("numberofitems date"))
|
62
|
// .addOdsubjects("subjects")
|
63
|
.setOdpolicies(sf("policies")).addOdlanguages(sf("languages")).addOdcontenttypes(sf("contenttypes"))
|
64
|
.setDatasourcetype(getQualifier("type class", "type scheme")))).build();
|
65
|
}
|
66
|
|
67
|
public static DNGFEntity getResult(final String id) {
|
68
|
return getResultBuilder(id).build();
|
69
|
}
|
70
|
|
71
|
public static DNGFEntity.Builder getResultBuilder(final String id) {
|
72
|
return DNGFEntity
|
73
|
.newBuilder()
|
74
|
.setType(Type.publication)
|
75
|
.setId(id)
|
76
|
.setPublication(
|
77
|
Publication.newBuilder()
|
78
|
.setMetadata(
|
79
|
Publication.Metadata
|
80
|
.newBuilder()
|
81
|
.addTitle(
|
82
|
getStructuredproperty(
|
83
|
"Analysis of cell viability in intervertebral disc: Effect of endplate permeability on cell population",
|
84
|
"main title", "dnet:result_titles", getDataInfo()))
|
85
|
.addTitle(getStructuredproperty("Another title", "alternative title", "dnet:result_titles", getDataInfo()))
|
86
|
.addSubject(getStructuredproperty("Biophysics", "subject", "dnet:result_sujects"))
|
87
|
.setDateofacceptance(sf("2010-01-01")).addSource(sf("sourceA")).addSource(sf("sourceB"))
|
88
|
.addContext(Context.newBuilder().setId("egi::virtual::970"))
|
89
|
.addContext(Context.newBuilder().setId("egi::classification::natsc::math::applied"))
|
90
|
.addContext(Context.newBuilder().setId("egi::classification::natsc::math"))
|
91
|
.addContext(Context.newBuilder().setId("egi::classification::natsc"))
|
92
|
.addContext(Context.newBuilder().setId("egi::classification")).addContext(Context.newBuilder().setId("egi"))
|
93
|
.addDescription(sf("Responsible for making and maintaining the extracellular matrix ..."))
|
94
|
.addDescription(sf("Another description ...")).setPublisher(sf("ELSEVIER SCI LTD"))
|
95
|
.setResulttype(getQualifier("publication", "dnet:result_types"))
|
96
|
.setLanguage(getQualifier("eng", "dnet:languages"))).addInstance(getInstance("10|od__10", "Uk pubmed"))
|
97
|
.addInstance(getInstance("10|od__10", "arxiv")))
|
98
|
.addCollectedfrom(getKV("opendoar____::1064", "Oxford University Research Archive"))
|
99
|
.addPid(getStructuredproperty("doi:74293", "doi", "dnet:pids")).addPid(getStructuredproperty("oai:74295", "oai", "dnet:pids"))
|
100
|
.setDateofcollection("");
|
101
|
}
|
102
|
|
103
|
public static DataInfo getDataInfo() {
|
104
|
return getDataInfo("0.4");
|
105
|
}
|
106
|
|
107
|
public static DataInfo getDataInfo(final String trust) {
|
108
|
return DataInfo.newBuilder().setDeletedbyinference(false).setTrust("0.4").setInferenceprovenance("algo").setProvenanceaction(getQualifier("xx", "yy"))
|
109
|
.build();
|
110
|
}
|
111
|
|
112
|
public static Instance.Builder getInstance(final String hostedbyId, final String hostedbyName) {
|
113
|
return Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)).setLicence(getQualifier("OpenAccess", "dnet:access_modes"))
|
114
|
.setInstancetype(getQualifier("publication", "dnet:result_typologies")).addUrl("webresource url");
|
115
|
|
116
|
}
|
117
|
|
118
|
public static DNGFRel getDedupRel(final String source, final String target) {
|
119
|
return DNGFRel.newBuilder().setSource(source).setTarget(target).setRelType(getQualifier("dedup", "dedupScheme"))
|
120
|
.setChild(false).setCachedTarget(getResult(target)).build();
|
121
|
}
|
122
|
|
123
|
|
124
|
public static DNGFEntity getPerson() {
|
125
|
return DNGFEntity
|
126
|
.newBuilder()
|
127
|
.setType(Type.person)
|
128
|
.setId("WOS:000277866500014_A._Shirazi-Adl")
|
129
|
.setPerson(
|
130
|
Person.newBuilder().setMetadata(
|
131
|
Person.Metadata.newBuilder().addSecondnames(sf("Shirazi-Adl")).setFullname(sf("A. Shirazi-Adl"))
|
132
|
.setEmail(sf("name.surname@gmail.com")).setPhone(sf("12345")).setNationality(getQualifier("EN", "dnet:countries"))))
|
133
|
.build();
|
134
|
}
|
135
|
|
136
|
|
137
|
public static RelMetadata.Builder relMetadata(final String classname, final String schemename) {
|
138
|
return RelMetadata.newBuilder().setSemantics(getQualifier(classname, schemename));
|
139
|
}
|
140
|
|
141
|
public static DNGFEntity getOrganization(final String orgId) {
|
142
|
return DNGFEntity
|
143
|
.newBuilder()
|
144
|
.setType(Type.organization)
|
145
|
.setId(orgId)
|
146
|
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed"))
|
147
|
.setOrganization(
|
148
|
Organization.newBuilder().setMetadata(
|
149
|
Organization.Metadata.newBuilder().setLegalname(sf("CENTRE D'APPUI A LA RECHERCHE ET A LA FORMATION GIE"))
|
150
|
.setLegalshortname(sf("CAREF")).setWebsiteurl(sf("www.caref-mali.org"))
|
151
|
.setCountry(getQualifier("ML", "dnet:countries")))).build();
|
152
|
}
|
153
|
|
154
|
public static DNGFEntity getProjectFP7(final String projectId, final String fundingProgram) throws InvalidProtocolBufferException {
|
155
|
return DNGFEntity
|
156
|
.newBuilder()
|
157
|
.setType(Type.project)
|
158
|
.setId(projectId)
|
159
|
.addCollectedfrom(getKV("opendoar_1234", "UK pubmed"))
|
160
|
.setProject(
|
161
|
Project.newBuilder()
|
162
|
.setMetadata(
|
163
|
Project.Metadata
|
164
|
.newBuilder()
|
165
|
.setAcronym(sf("5CYRQOL"))
|
166
|
.setTitle(sf("Cypriot Researchers Contribute to our Quality of Life"))
|
167
|
.setStartdate(sf("2007-05-01"))
|
168
|
.setEnddate(sf("2007-10-31"))
|
169
|
.setEcsc39(sf("false"))
|
170
|
.setContracttype(getQualifier("CSA", "ec:FP7contractTypes"))
|
171
|
.addFundingtree(
|
172
|
sf("<fundingtree><funder><id>ec__________::EC</id><shortname>EC</shortname><name>European Commission</name></funder><funding_level_2><id>ec__________::EC::FP7::"
|
173
|
+ fundingProgram
|
174
|
+ "::PEOPLE</id><description>Marie-Curie Actions</description><name>PEOPLE</name><class>ec:program</class><parent><funding_level_1><id>ec__________::EC::FP7::"
|
175
|
+ fundingProgram
|
176
|
+ "</id><description>"
|
177
|
+ fundingProgram
|
178
|
+ "-People</description><name>"
|
179
|
+ fundingProgram
|
180
|
+ "</name><class>ec:specificprogram</class><parent><funding_level_0><id>ec__________::EC::FP7</id><description>SEVENTH FRAMEWORK PROGRAMME</description><name>FP7</name><parent/><class>ec:frameworkprogram</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>"))))
|
181
|
.build();
|
182
|
}
|
183
|
|
184
|
public static DNGFEntity getProjectWT() throws InvalidProtocolBufferException {
|
185
|
return DNGFEntity
|
186
|
.newBuilder()
|
187
|
.setType(Type.project)
|
188
|
.setId("project|wt::087536")
|
189
|
.addCollectedfrom(getKV("wellcomeTrust", "wellcome trust"))
|
190
|
.setProject(
|
191
|
Project.newBuilder()
|
192
|
.setMetadata(
|
193
|
Project.Metadata
|
194
|
.newBuilder()
|
195
|
.setAcronym(sf("UNKNOWN"))
|
196
|
.setTitle(sf("Research Institute for Infectious Diseases of Poverty (IIDP)."))
|
197
|
.setStartdate(sf("2007-05-01"))
|
198
|
.setEnddate(sf("2007-10-31"))
|
199
|
.setEcsc39(sf("false"))
|
200
|
.setContracttype(getQualifier("UNKNOWN", "wt:contractTypes"))
|
201
|
.addFundingtree(
|
202
|
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::UNKNOWN</id><description>UNKNOWN</description><name>UNKNOWN</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>"))
|
203
|
.addFundingtree(
|
204
|
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::Technology Transfer</id><description>Technology Transfer</description><name>Technology Transfer</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>"))))
|
205
|
.build();
|
206
|
}
|
207
|
|
208
|
public static ExtraInfo extraInfo(final String name, final String provenance, final String trust, final String typology, final String value) {
|
209
|
final ExtraInfo.Builder e = ExtraInfo.newBuilder().setName(name).setProvenance(provenance).setTrust(trust).setTypology(typology).setValue(value);
|
210
|
return e.build();
|
211
|
}
|
212
|
|
213
|
public static StringField sf(final String s) {
|
214
|
return sf(s, null);
|
215
|
}
|
216
|
|
217
|
public static StringField sf(final String s, final DataInfo dataInfo) {
|
218
|
final StringField.Builder sf = StringField.newBuilder().setValue(s);
|
219
|
if (dataInfo != null) {
|
220
|
sf.setDataInfo(dataInfo);
|
221
|
}
|
222
|
return sf.build();
|
223
|
}
|
224
|
|
225
|
public static DNGFDecoder embed(final GeneratedMessage msg,
|
226
|
final Kind kind,
|
227
|
final boolean deletedByInference,
|
228
|
final boolean inferred,
|
229
|
final String provenance,
|
230
|
final String action) {
|
231
|
|
232
|
final DNGF.Builder dngf = DNGF.newBuilder()
|
233
|
.setKind(kind)
|
234
|
.setLastupdatetimestamp(System.currentTimeMillis())
|
235
|
.setDataInfo(
|
236
|
DataInfo.newBuilder().setDeletedbyinference(deletedByInference).setInferred(inferred).setTrust("0.5")
|
237
|
.setInferenceprovenance(provenance).setProvenanceaction(getQualifier(action, action)));
|
238
|
switch (kind) {
|
239
|
case entity:
|
240
|
dngf.setEntity((DNGFEntity) msg);
|
241
|
break;
|
242
|
case relation:
|
243
|
dngf.setRel((DNGFRel) msg);
|
244
|
break;
|
245
|
default:
|
246
|
break;
|
247
|
}
|
248
|
|
249
|
return DNGFDecoder.decode(dngf.build());
|
250
|
}
|
251
|
|
252
|
public static DNGFDecoder embed(final GeneratedMessage msg, final Kind kind) {
|
253
|
return embed(msg, kind, false, false, "inference_provenance", "provenance_action");
|
254
|
}
|
255
|
|
256
|
|
257
|
}
|