1
|
package eu.dnetlib.data.mapreduce.util;
|
2
|
|
3
|
import java.util.List;
|
4
|
import java.util.Set;
|
5
|
|
6
|
import com.google.common.collect.Iterables;
|
7
|
import com.google.common.collect.Lists;
|
8
|
import com.google.common.collect.Sets;
|
9
|
import com.google.protobuf.InvalidProtocolBufferException;
|
10
|
import com.googlecode.protobuf.format.JsonFormat;
|
11
|
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
|
12
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
13
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
14
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
15
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
16
|
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
|
17
|
import org.apache.commons.codec.binary.Base64;
|
18
|
import org.junit.Before;
|
19
|
import org.junit.Test;
|
20
|
|
21
|
import static org.junit.Assert.assertTrue;
|
22
|
|
23
|
public class XmlRecordFactoryTest {
|
24
|
|
25
|
public static final String CITATION_XML =
|
26
|
"<citations>\n <citation>\n <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n </citation>\n <citation>\n <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n </citation>\n <citation>\n <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n </citation>\n <citation>\n <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n </citation>\n <citation>\n <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n </citation>\n <citation>\n <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n </citation>\n <citation>\n <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n </citation>\n <citation>\n <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n </citation>\n <citation>\n <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n </citation>\n <citation>\n <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n </citation>\n <citation>\n <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n </citation>\n <citation>\n <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n </citation>\n <citation>\n <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n </citation>\n <citation>\n <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n </citation>\n <citation>\n <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n </citation>\n <citation>\n <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n </citation>\n <citation>\n <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n </citation>\n <citation>\n <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n </citation>\n <citation>\n <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n </citation>\n</citations>";
|
27
|
|
28
|
public static final String STATISTICS_JSON =
|
29
|
"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]";
|
30
|
|
31
|
public static final String SCHEMA_LOCATION = "http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.2/oaf-0.2.xsd";
|
32
|
|
33
|
public static Set<String> specialDatasourceTypes = Sets.newHashSet("scholarcomminfra", "infospace", "pubsrepository::mock", "entityregistry", "entityregistry::projects");
|
34
|
|
35
|
private XmlRecordFactory builder;
|
36
|
|
37
|
@Before
|
38
|
public void setUp() throws Exception {
|
39
|
builder =
|
40
|
new XmlRecordFactory(IndexConfig.load(IndexConfigTest.config).getConfigMap(),
|
41
|
ContextMapper.fromXml(eu.dnetlib.data.mapreduce.hbase.index.config.Context.xml), RelClasses.fromJSon(RelClassesTest.relClassesJson),
|
42
|
SCHEMA_LOCATION, true, false, false, specialDatasourceTypes);
|
43
|
}
|
44
|
|
45
|
@Test
|
46
|
public void testJsonProtobuf() {
|
47
|
JsonFormat jsonFormat = new JsonFormat();
|
48
|
|
49
|
final OafDecoder decoder = OafTest.embed(OafTest.getResult("id"), Kind.entity);
|
50
|
|
51
|
|
52
|
final String json = jsonFormat.printToString(decoder.getOaf());
|
53
|
System.out.println(json);
|
54
|
System.out.println("json size: " + json.length());
|
55
|
System.out.println("binary size: " + decoder.getOaf().toByteArray().length);
|
56
|
|
57
|
final String base64String = Base64.encodeBase64String(decoder.getOaf().toByteArray());
|
58
|
System.out.println("base64 size: " + base64String.length());
|
59
|
|
60
|
System.out.println("decoded " + jsonFormat.printToString(OafDecoder.decode(Base64.decodeBase64(base64String)).getOaf()));
|
61
|
}
|
62
|
|
63
|
@Test
|
64
|
public void testProjectFP7() throws InvalidProtocolBufferException {
|
65
|
|
66
|
final String projectId = "ec::200121";
|
67
|
final String orgId = "20|organizationId";
|
68
|
|
69
|
builder.setMainEntity(OafTest.embed(OafTest.getProjectFP7(projectId, "SP3"), Kind.entity, false, false, "", "corda"));
|
70
|
builder.addRelation(Type.organization, OafTest.embed(OafTest.getProjectOrganization(orgId, projectId, "isParticipant"), Kind.relation));
|
71
|
final String xml = builder.build();
|
72
|
|
73
|
System.out.println(new IndentXmlString().evaluate(xml));
|
74
|
}
|
75
|
|
76
|
@Test
|
77
|
public void testOrganization() throws InvalidProtocolBufferException {
|
78
|
|
79
|
final String projectId = "ec::200121";
|
80
|
final String orgIdRoot = "20|organizationIdRoot";
|
81
|
final String orgIdDup = "20|organizationIdDup";
|
82
|
|
83
|
builder.setMainEntity(OafTest.embed(OafTest.getOrganization(orgIdRoot), Kind.entity, false, false, "", "corda"));
|
84
|
builder.addRelation(Type.project, OafTest.embed(OafTest.getProjectOrganization(projectId, orgIdRoot, "hasParticipant"), Kind.relation));
|
85
|
builder.addRelation(Type.datasource, OafTest.embed(OafTest.getDatasourceOrganization("10|datasourceId", orgIdRoot, "provides"), Kind.relation));
|
86
|
|
87
|
builder.addChild(Type.organization, OafTest.embed(OafTest.getOrganizationOrganization(orgIdDup, orgIdRoot, "isMergedIn"), Kind.relation));
|
88
|
|
89
|
final String xml = builder.build();
|
90
|
|
91
|
System.out.println(new IndentXmlString().evaluate(xml));
|
92
|
}
|
93
|
|
94
|
@Test
|
95
|
public void testResultFP7() throws InvalidProtocolBufferException {
|
96
|
final String resultId = "50|WOS:0001";
|
97
|
final String projectId1 = "40|ec::200121";
|
98
|
final String projectId2 = "40|ec::200122";
|
99
|
builder.setMainEntity(OafTest.embed(OafTest.getResult(resultId), Kind.entity, false, false, "", "pubmed"));
|
100
|
builder.addRelation(Type.project,
|
101
|
OafTest.embed(OafTest.getResultProject(projectId1, resultId, OafTest.getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation));
|
102
|
builder.addRelation(Type.project,
|
103
|
OafTest.embed(OafTest.getResultProject(projectId2, resultId, OafTest.getProjectFP7(projectId2, "SP2"), "produces"), Kind.relation));
|
104
|
builder.addRelation(Type.result,
|
105
|
OafTest.embed(OafTest.getSimilarityRel("50|WOS:0002", resultId, OafTest.getResult("50|WOS:0002"), "isAmongTopNSimilarDocuments"),
|
106
|
Kind.relation));
|
107
|
builder.addChild(Type.result, OafTest.embed(OafTest.getDedupRel("50|anotherResultId", resultId, RelType.resultResult, "isMergedIn"), Kind.relation));
|
108
|
// System.err.println(builder.toString());
|
109
|
|
110
|
System.out.println(new IndentXmlString().evaluate(builder.build()));
|
111
|
// System.out.println(builder.build());
|
112
|
}
|
113
|
|
114
|
@Test
|
115
|
public void testResultMerged() throws InvalidProtocolBufferException {
|
116
|
final String resultId = "50|WOS:0001";
|
117
|
final String similarResultId = "50|WOS:0002";
|
118
|
final String projectId1 = "40|ec::200121";
|
119
|
final String projectId2 = "40|ec::999999";
|
120
|
builder.setMainEntity(OafTest.embed(OafTest.getResult(resultId), Kind.entity, false, true, "dedup", "pubmed"));
|
121
|
builder.addRelation(Type.project,
|
122
|
OafTest.embed(OafTest.getResultProject(projectId1, resultId, OafTest.getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation));
|
123
|
builder.addRelation(Type.project,
|
124
|
OafTest.embed(OafTest.getResultProject(projectId2, resultId, OafTest.getProjectFP7(projectId2, "SP3"), "produces"), Kind.relation));
|
125
|
|
126
|
builder.addRelation(Type.result, OafTest.embed(
|
127
|
OafTest.getSimilarityRel(similarResultId, resultId, OafTest.getResult(similarResultId), "isAmongTopNSimilarDocuments"), Kind.relation));
|
128
|
|
129
|
builder.addChild(Type.result, OafTest.embed(OafTest.getDedupRel("50|anotherResultId", resultId, RelType.resultResult, "merges"), Kind.relation));
|
130
|
final String xml = builder.build();
|
131
|
|
132
|
System.out.println(new IndentXmlString().evaluate(xml));
|
133
|
}
|
134
|
|
135
|
@Test
|
136
|
public void testDatasource() throws InvalidProtocolBufferException {
|
137
|
final String datasourceId = "10|datasourceId";
|
138
|
final String orgId = "20|organizationId";
|
139
|
|
140
|
builder.setMainEntity(OafTest.embed(OafTest.getDatasource(datasourceId), Kind.entity, false, false, "", "opendoar"));
|
141
|
builder.addRelation(Type.organization, OafTest.embed(OafTest.getDatasourceOrganization(orgId, datasourceId, "isProvidedBy"), Kind.relation));
|
142
|
final String xml = builder.build();
|
143
|
|
144
|
System.out.println(new IndentXmlString().evaluate(xml));
|
145
|
}
|
146
|
|
147
|
@Test
|
148
|
public void testProjectWT() throws InvalidProtocolBufferException {
|
149
|
final String projectId = "ec::200121";
|
150
|
builder.setMainEntity(OafTest.embed(OafTest.getProjectWT(), Kind.entity, false, false, "", "wellcometrust"));
|
151
|
builder.addChild(Type.organization, OafTest.embed(OafTest.getProjectOrganization(projectId, "20|organizationId", "isParticipant"), Kind.relation));
|
152
|
final String xml = builder.build();
|
153
|
|
154
|
System.out.println(new IndentXmlString().evaluate(xml));
|
155
|
}
|
156
|
|
157
|
@Test
|
158
|
public void testResultWT() throws InvalidProtocolBufferException {
|
159
|
final String resultId = "50|WOS:0001";
|
160
|
builder.setMainEntity(OafTest.embed(OafTest.getResult(resultId), Kind.entity, false, false, "", "arxiv"));
|
161
|
builder.addRelation(Type.project, OafTest.embed(OafTest.getResultProject("40|wt::087536", resultId, OafTest.getProjectWT(), "produces"), Kind.relation));
|
162
|
builder.addChild(Type.result, OafTest.embed(OafTest.getDedupRel("50|anotherResultId", resultId, RelType.resultResult, "merges"), Kind.relation));
|
163
|
final String xml = builder.build();
|
164
|
|
165
|
System.out.println(new IndentXmlString().evaluate(xml));
|
166
|
}
|
167
|
|
168
|
@Test
|
169
|
public void testUrlFilter() throws InvalidProtocolBufferException {
|
170
|
|
171
|
final List<String> filtered =
|
172
|
Lists.newArrayList(Iterables.filter(Lists.newArrayList("http://www.google.com", "www.google.com"), AbstractDNetXsltFunctions.urlFilter));
|
173
|
|
174
|
assertTrue(filtered.size() == 1);
|
175
|
}
|
176
|
|
177
|
}
|