1
|
package eu.dnetlib.data.mapreduce.util;
|
2
|
|
3
|
import java.util.List;
|
4
|
|
5
|
import com.google.common.collect.Iterables;
|
6
|
import com.google.common.collect.Lists;
|
7
|
import com.google.protobuf.InvalidProtocolBufferException;
|
8
|
import com.googlecode.protobuf.format.JsonFormat;
|
9
|
import eu.dnetlib.data.graph.model.DNGFDecoder;
|
10
|
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
|
11
|
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
|
12
|
import eu.dnetlib.data.proto.*;
|
13
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
14
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
15
|
import eu.dnetlib.data.transform.OntologyLoader;
|
16
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
17
|
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
|
18
|
import org.apache.commons.codec.binary.Base64;
|
19
|
import org.junit.Before;
|
20
|
import org.junit.Test;
|
21
|
|
22
|
import static org.junit.Assert.assertTrue;
|
23
|
import static eu.dnetlib.data.graph.model.DNGFUtils.*;
|
24
|
|
25
|
public class XmlRecordFactoryTest extends AbstractRecordFactoryTest {
|
26
|
|
27
|
public static final String CITATION_XML =
|
28
|
"<citations>\n <citation>\n <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n </citation>\n <citation>\n <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n </citation>\n <citation>\n <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n </citation>\n <citation>\n <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n </citation>\n <citation>\n <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n </citation>\n <citation>\n <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n </citation>\n <citation>\n <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n </citation>\n <citation>\n <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n </citation>\n <citation>\n <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n </citation>\n <citation>\n <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n </citation>\n <citation>\n <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n </citation>\n <citation>\n <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n </citation>\n <citation>\n <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n </citation>\n <citation>\n <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n </citation>\n <citation>\n <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n </citation>\n <citation>\n <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n </citation>\n <citation>\n <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n </citation>\n <citation>\n <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n </citation>\n <citation>\n <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n </citation>\n</citations>";
|
29
|
|
30
|
public static final String STATISTICS_JSON =
|
31
|
"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]";
|
32
|
|
33
|
public static final String SCHEMA_LOCATION = "http://namespace.openaire.eu/DNGF http://www.openaire.eu/schema/0.2/DNGF-0.2.xsd";
|
34
|
|
35
|
private XmlRecordFactory builder;
|
36
|
|
37
|
|
38
|
|
39
|
@Before
|
40
|
public void setUp() throws Exception {
|
41
|
builder =
|
42
|
new XmlRecordFactory(IndexConfig.load(IndexConfigTest.loadConfiguration()).getConfigMap(),
|
43
|
ContextMapper.fromXml(ContextMapperTest.loadContext()), OntologyLoader.loadOntologiesFromCp(),
|
44
|
SCHEMA_LOCATION, true, false, false);
|
45
|
}
|
46
|
|
47
|
@Test
|
48
|
public void testJsonProtobuf() {
|
49
|
final DNGFDecoder decoder = embed(getPublication("id"), Kind.entity);
|
50
|
final String json = JsonFormat.printToString(decoder.getDNGF());
|
51
|
System.out.println(json);
|
52
|
System.out.println("json size: " + json.length());
|
53
|
System.out.println("binary size: " + decoder.getDNGF().toByteArray().length);
|
54
|
|
55
|
final String base64String = Base64.encodeBase64String(decoder.getDNGF().toByteArray());
|
56
|
System.out.println("base64 size: " + base64String.length());
|
57
|
|
58
|
System.out.println("decoded " + JsonFormat.printToString(DNGFDecoder.decode(Base64.decodeBase64(base64String)).getDNGF()));
|
59
|
}
|
60
|
|
61
|
@Test
|
62
|
public void testProjectFP7() throws InvalidProtocolBufferException {
|
63
|
|
64
|
final String projectId = "40|ec__________::20012100000000000000000000000000";
|
65
|
final String orgId = "20|WOS_________::organizationId000000000000000000";
|
66
|
|
67
|
builder.setMainEntity(embed(getProjectFP7(projectId, "SP3"), Kind.entity, false, false, "", "corda"));
|
68
|
builder.addRelation(Type.organization, embed(getProjectOrganization(orgId, projectId, "isParticipant"), Kind.relation));
|
69
|
DNGFProtos.DNGFRel projectPerson = getProjectPerson("30|WOS_________::personId000000000000000000000000", projectId, "isContact");
|
70
|
builder.addRelation(Type.person, embed(projectPerson, Kind.relation));
|
71
|
final String xml = builder.build();
|
72
|
|
73
|
System.out.println(XMLIndenter.indent(xml));
|
74
|
}
|
75
|
|
76
|
@Test
|
77
|
public void testOrganization() throws InvalidProtocolBufferException {
|
78
|
|
79
|
final String projectId = "40|ec__________::20012100000000000000000000000000";
|
80
|
final String orgIdRoot = "20|org_________::organizationIdRoot00000000000000";
|
81
|
final String orgIdDup = "20|org_________::organizationIdDup000000000000000";
|
82
|
|
83
|
builder.setMainEntity(embed(getOrganization(orgIdRoot), Kind.entity, false, false, "", "corda"));
|
84
|
builder.addRelation(Type.project, embed(getProjectOrganization(projectId, orgIdRoot, "hasParticipant"), Kind.relation));
|
85
|
builder.addRelation(Type.datasource, embed(getDatasourceOrganization("10|dts_________::datasourceId00000000000000000000", orgIdRoot, "provides"), Kind.relation));
|
86
|
|
87
|
builder.addChild(Type.organization, embed(getOrganizationOrganization(orgIdDup, orgIdRoot, "isMergedIn"), Kind.relation));
|
88
|
|
89
|
final String xml = builder.build();
|
90
|
|
91
|
System.out.println(XMLIndenter.indent(xml));
|
92
|
}
|
93
|
|
94
|
@Test
|
95
|
public void testResultFP7() throws InvalidProtocolBufferException {
|
96
|
final String resultId = "50|WOS_________::00010000000000000000000000000000";
|
97
|
final String projectId1 = "40|ec__________::20012100000000000000000000000000";
|
98
|
final String projectId2 = "40|ec__________::20012200000000000000000000000000";
|
99
|
builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, false, "", "pubmed"));
|
100
|
builder.addRelation(Type.person, embed(getPublicationPerson("30|WOS_________::0001name000000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
|
101
|
builder.addRelation(Type.project,
|
102
|
embed(getPublicationProject(projectId1, resultId, getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation));
|
103
|
builder.addRelation(Type.project,
|
104
|
embed(getPublicationProject(projectId2, resultId, getProjectFP7(projectId2, "SP2"), "produces"), Kind.relation));
|
105
|
builder.addRelation(Type.publication,
|
106
|
embed(getSimilarityRel("50|WOS_________::00020000000000000000000000000000", resultId, getPublication(resultId), "isAmongTopNSimilarDocuments"),
|
107
|
Kind.relation));
|
108
|
builder.addChild(Type.publication, embed(getDedupRel("50|WOS_________::anotherResultId00000000000000000", resultId, "publication_publication", "isMergedIn"), Kind.relation));
|
109
|
// System.err.println(builder.toString());
|
110
|
|
111
|
System.out.println(XMLIndenter.indent(builder.build()));
|
112
|
// System.out.println(builder.build());
|
113
|
}
|
114
|
|
115
|
@Test
|
116
|
public void testResultMerged() throws InvalidProtocolBufferException {
|
117
|
|
118
|
|
119
|
final String resultId = "50|WOS_________::00010000000000000000000000000000";
|
120
|
final String similarResultId = "50|WOS_________::00020000000000000000000000000000";
|
121
|
final String projectId1 = "40|EC__________::20012100000000000000000000000000";
|
122
|
final String projectId2 = "40|EC__________::99999900000000000000000000000000";
|
123
|
builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, true, "dedup", "pubmed"));
|
124
|
builder.addRelation(Type.person, embed(getPublicationPerson("50|WOS_________::00010name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
|
125
|
builder.addRelation(Type.person, embed(getPublicationPerson("50|WOS_________::00020name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
|
126
|
builder.addRelation(Type.project,
|
127
|
embed(getPublicationProject(projectId1, resultId, getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation));
|
128
|
builder.addRelation(Type.project,
|
129
|
embed(getPublicationProject(projectId2, resultId, getProjectFP7(projectId2, "SP3"), "produces"), Kind.relation));
|
130
|
|
131
|
builder.addRelation(Type.publication, embed(
|
132
|
getSimilarityRel(similarResultId, resultId, getPublication(similarResultId), "isAmongTopNSimilarDocuments"), Kind.relation));
|
133
|
|
134
|
builder.addChild(Type.publication, embed(getDedupRel("50|WOS_________::anotherResultId00000000000000000", resultId, "publication_publication", "merges"), Kind.relation));
|
135
|
final String xml = builder.build();
|
136
|
|
137
|
System.out.println(XMLIndenter.indent(xml));
|
138
|
}
|
139
|
|
140
|
@Test
|
141
|
public void testDatasource() throws InvalidProtocolBufferException {
|
142
|
final String datasourceId = "10|WOS_________::datasourceId00000000000000000000";
|
143
|
final String orgId = "20|WOS_________::organizationId000000000000000000";
|
144
|
|
145
|
builder.setMainEntity(embed(getDatasource(datasourceId), Kind.entity, false, false, "", "opendoar"));
|
146
|
builder.addRelation(Type.organization, embed(getDatasourceOrganization(orgId, datasourceId, "isProvidedBy"), Kind.relation));
|
147
|
final String xml = builder.build();
|
148
|
|
149
|
System.out.println(XMLIndenter.indent(xml));
|
150
|
}
|
151
|
|
152
|
@Test
|
153
|
public void testProjectWT() throws InvalidProtocolBufferException {
|
154
|
final String projectId = "40|ec__________::20012100000000000000000000000000";
|
155
|
builder.setMainEntity(embed(getProjectWT(), Kind.entity, false, false, "", "wellcometrust"));
|
156
|
builder.addChild(Type.organization, embed(getProjectOrganization(projectId, "20|ec__________::organizationId000000000000000000", "isParticipant"), Kind.relation));
|
157
|
final String xml = builder.build();
|
158
|
|
159
|
System.out.println(XMLIndenter.indent(xml));
|
160
|
}
|
161
|
|
162
|
@Test
|
163
|
public void testResultWT() throws InvalidProtocolBufferException {
|
164
|
final String resultId = "50|WOS_________::00001000000000000000000000000000";
|
165
|
builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, false, "", "arxiv"));
|
166
|
builder.addRelation(Type.person, embed(getPublicationPerson( "50|WOS_________::00001name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
|
167
|
builder.addRelation(Type.project, embed(getPublicationProject("40|wt__________::08753600000000000000000000000000", resultId, getProjectWT(), "produces"), Kind.relation));
|
168
|
builder.addChild(Type.publication, embed(getDedupRel( "50|wt__________::anotherResultId00000000000000000", resultId, "publication_publication", "merges"), Kind.relation));
|
169
|
final String xml = builder.build();
|
170
|
|
171
|
System.out.println(XMLIndenter.indent(xml));
|
172
|
}
|
173
|
|
174
|
@Test
|
175
|
public void testUrlFilter() throws InvalidProtocolBufferException {
|
176
|
|
177
|
final List<String> filtered =
|
178
|
Lists.newArrayList(Iterables.filter(Lists.newArrayList("http://www.google.com", "www.google.com"), AbstractDNetXsltFunctions.urlFilter));
|
179
|
|
180
|
assertTrue(filtered.size() == 1);
|
181
|
}
|
182
|
|
183
|
|
184
|
}
|
185
|
|