Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import java.util.List;
4

    
5
import com.google.common.collect.Iterables;
6
import com.google.common.collect.Lists;
7
import com.google.protobuf.InvalidProtocolBufferException;
8
import com.googlecode.protobuf.format.JsonFormat;
9
import eu.dnetlib.data.graph.model.DNGFDecoder;
10
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
11
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
12
import eu.dnetlib.data.proto.*;
13
import eu.dnetlib.data.proto.KindProtos.Kind;
14
import eu.dnetlib.data.proto.TypeProtos.Type;
15
import eu.dnetlib.data.transform.OntologyLoader;
16
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
17
import eu.dnetlib.miscutils.functional.xml.XMLIndenter;
18
import org.apache.commons.codec.binary.Base64;
19
import org.junit.Before;
20
import org.junit.Test;
21

    
22
import static org.junit.Assert.assertTrue;
23
import static eu.dnetlib.data.graph.model.DNGFUtils.*;
24

    
25
public class XmlRecordFactoryTest extends AbstractRecordFactoryTest {
26

    
27
	public static final String CITATION_XML =
28
			"<citations>\n  <citation>\n    <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n  </citation>\n  <citation>\n    <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n  </citation>\n  <citation>\n    <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n  </citation>\n  <citation>\n    <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n  </citation>\n  <citation>\n    <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n  </citation>\n  <citation>\n    <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n  </citation>\n  <citation>\n    <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n  </citation>\n  <citation>\n    <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n  </citation>\n  <citation>\n    <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n  </citation>\n  <citation>\n    <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n  </citation>\n  <citation>\n    <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n  </citation>\n  <citation>\n    <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n  </citation>\n  <citation>\n    <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n  </citation>\n  <citation>\n    <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n  </citation>\n  <citation>\n    <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n  </citation>\n  <citation>\n    <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n  </citation>\n  <citation>\n    <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n  </citation>\n  <citation>\n    <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n  </citation>\n  <citation>\n    <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n  </citation>\n  <citation>\n    <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n  </citation>\n  <citation>\n    <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n  </citation>\n  <citation>\n    <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n  </citation>\n  <citation>\n    <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n  </citation>\n  <citation>\n    <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n  </citation>\n  <citation>\n    <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n  </citation>\n  <citation>\n    <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n  </citation>\n  <citation>\n    <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n  </citation>\n  <citation>\n    <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n  </citation>\n  <citation>\n    <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n  </citation>\n  <citation>\n    <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n  </citation>\n  <citation>\n    <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n  </citation>\n  <citation>\n    <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n  </citation>\n  <citation>\n    <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n  </citation>\n  <citation>\n    <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n  </citation>\n  <citation>\n    <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n  </citation>\n</citations>";
29

    
30
	public static final String STATISTICS_JSON =
31
			"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]";
32

    
33
	public static final String SCHEMA_LOCATION = "http://namespace.openaire.eu/DNGF http://www.openaire.eu/schema/0.2/DNGF-0.2.xsd";
34

    
35
	private XmlRecordFactory builder;
36

    
37

    
38

    
39
	@Before
40
	public void setUp() throws Exception {
41
		builder =
42
				new XmlRecordFactory(IndexConfig.load(IndexConfigTest.loadConfiguration()).getConfigMap(),
43
						ContextMapper.fromXml(ContextMapperTest.loadContext()), OntologyLoader.loadOntologiesFromCp(),
44
						SCHEMA_LOCATION, true, false, false);
45
	}
46

    
47
	@Test
48
	public void testJsonProtobuf() {
49
		final DNGFDecoder decoder = embed(getPublication("id"), Kind.entity);
50
		final String json = JsonFormat.printToString(decoder.getDNGF());
51
		System.out.println(json);
52
		System.out.println("json size: " + json.length());
53
		System.out.println("binary size: " + decoder.getDNGF().toByteArray().length);
54

    
55
		final String base64String = Base64.encodeBase64String(decoder.getDNGF().toByteArray());
56
		System.out.println("base64 size: " + base64String.length());
57

    
58
		System.out.println("decoded " + JsonFormat.printToString(DNGFDecoder.decode(Base64.decodeBase64(base64String)).getDNGF()));
59
	}
60

    
61
	@Test
62
	public void testProjectFP7() throws InvalidProtocolBufferException {
63

    
64
		final String projectId = "40|ec__________::20012100000000000000000000000000";
65
		final String orgId =     "20|WOS_________::organizationId000000000000000000";
66

    
67
		builder.setMainEntity(embed(getProjectFP7(projectId, "SP3"), Kind.entity, false, false, "", "corda"));
68
		builder.addRelation(Type.organization, embed(getProjectOrganization(orgId, projectId, "isParticipant"), Kind.relation));
69
		DNGFProtos.DNGFRel projectPerson = getProjectPerson("30|WOS_________::personId000000000000000000000000", projectId, "isContact");
70
		builder.addRelation(Type.person, embed(projectPerson, Kind.relation));
71
		final String xml = builder.build();
72

    
73
		System.out.println(XMLIndenter.indent(xml));
74
	}
75

    
76
	@Test
77
	public void testOrganization() throws InvalidProtocolBufferException {
78

    
79
		final String projectId = "40|ec__________::20012100000000000000000000000000";
80
		final String orgIdRoot = "20|org_________::organizationIdRoot00000000000000";
81
		final String orgIdDup =  "20|org_________::organizationIdDup000000000000000";
82

    
83
		builder.setMainEntity(embed(getOrganization(orgIdRoot), Kind.entity, false, false, "", "corda"));
84
		builder.addRelation(Type.project, embed(getProjectOrganization(projectId, orgIdRoot, "hasParticipant"), Kind.relation));
85
		builder.addRelation(Type.datasource, embed(getDatasourceOrganization("10|dts_________::datasourceId00000000000000000000", orgIdRoot, "provides"), Kind.relation));
86

    
87
		builder.addChild(Type.organization, embed(getOrganizationOrganization(orgIdDup, orgIdRoot, "isMergedIn"), Kind.relation));
88

    
89
		final String xml = builder.build();
90

    
91
		System.out.println(XMLIndenter.indent(xml));
92
	}
93

    
94
	@Test
95
	public void testResultFP7() throws InvalidProtocolBufferException {
96
		final String resultId =   "50|WOS_________::00010000000000000000000000000000";
97
		final String projectId1 = "40|ec__________::20012100000000000000000000000000";
98
		final String projectId2 = "40|ec__________::20012200000000000000000000000000";
99
		builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, false, "", "pubmed"));
100
		builder.addRelation(Type.person, embed(getPublicationPerson("30|WOS_________::0001name000000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
101
		builder.addRelation(Type.project,
102
				embed(getPublicationProject(projectId1, resultId, getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation));
103
		builder.addRelation(Type.project,
104
				embed(getPublicationProject(projectId2, resultId, getProjectFP7(projectId2, "SP2"), "produces"), Kind.relation));
105
		builder.addRelation(Type.publication,
106
				embed(getSimilarityRel("50|WOS_________::00020000000000000000000000000000", resultId, getPublication(resultId), "isAmongTopNSimilarDocuments"),
107
						Kind.relation));
108
		builder.addChild(Type.publication, embed(getDedupRel("50|WOS_________::anotherResultId00000000000000000", resultId, "publication_publication", "isMergedIn"), Kind.relation));
109
		// System.err.println(builder.toString());
110

    
111
		System.out.println(XMLIndenter.indent(builder.build()));
112
		// System.out.println(builder.build());
113
	}
114

    
115
	@Test
116
	public void testResultMerged() throws InvalidProtocolBufferException {
117

    
118

    
119
		final String resultId = "50|WOS_________::00010000000000000000000000000000";
120
		final String similarResultId = "50|WOS_________::00020000000000000000000000000000";
121
		final String projectId1 = "40|EC__________::20012100000000000000000000000000";
122
		final String projectId2 = "40|EC__________::99999900000000000000000000000000";
123
		builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, true, "dedup", "pubmed"));
124
		builder.addRelation(Type.person, embed(getPublicationPerson("50|WOS_________::00010name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
125
		builder.addRelation(Type.person, embed(getPublicationPerson("50|WOS_________::00020name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
126
		builder.addRelation(Type.project,
127
				embed(getPublicationProject(projectId1, resultId, getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation));
128
		builder.addRelation(Type.project,
129
				embed(getPublicationProject(projectId2, resultId, getProjectFP7(projectId2, "SP3"), "produces"), Kind.relation));
130

    
131
		builder.addRelation(Type.publication, embed(
132
				getSimilarityRel(similarResultId, resultId, getPublication(similarResultId), "isAmongTopNSimilarDocuments"), Kind.relation));
133

    
134
		builder.addChild(Type.publication, embed(getDedupRel("50|WOS_________::anotherResultId00000000000000000", resultId, "publication_publication", "merges"), Kind.relation));
135
		final String xml = builder.build();
136

    
137
		System.out.println(XMLIndenter.indent(xml));
138
	}
139

    
140
	@Test
141
	public void testDatasource() throws InvalidProtocolBufferException {
142
		final String datasourceId = "10|WOS_________::datasourceId00000000000000000000";
143
		final String orgId =        "20|WOS_________::organizationId000000000000000000";
144

    
145
		builder.setMainEntity(embed(getDatasource(datasourceId), Kind.entity, false, false, "", "opendoar"));
146
		builder.addRelation(Type.organization, embed(getDatasourceOrganization(orgId, datasourceId, "isProvidedBy"), Kind.relation));
147
		final String xml = builder.build();
148

    
149
		System.out.println(XMLIndenter.indent(xml));
150
	}
151

    
152
	@Test
153
	public void testProjectWT() throws InvalidProtocolBufferException {
154
		final String projectId = "40|ec__________::20012100000000000000000000000000";
155
		builder.setMainEntity(embed(getProjectWT(), Kind.entity, false, false, "", "wellcometrust"));
156
		builder.addChild(Type.organization, embed(getProjectOrganization(projectId, "20|ec__________::organizationId000000000000000000", "isParticipant"), Kind.relation));
157
		final String xml = builder.build();
158

    
159
		System.out.println(XMLIndenter.indent(xml));
160
	}
161

    
162
	@Test
163
	public void testResultWT() throws InvalidProtocolBufferException {
164
		final String resultId = "50|WOS_________::00001000000000000000000000000000";
165
		builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, false, "", "arxiv"));
166
		builder.addRelation(Type.person, embed(getPublicationPerson(  "50|WOS_________::00001name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation));
167
		builder.addRelation(Type.project, embed(getPublicationProject("40|wt__________::08753600000000000000000000000000", resultId, getProjectWT(), "produces"), Kind.relation));
168
		builder.addChild(Type.publication, embed(getDedupRel(        "50|wt__________::anotherResultId00000000000000000", resultId, "publication_publication", "merges"), Kind.relation));
169
		final String xml = builder.build();
170

    
171
		System.out.println(XMLIndenter.indent(xml));
172
	}
173

    
174
	@Test
175
	public void testUrlFilter() throws InvalidProtocolBufferException {
176

    
177
		final List<String> filtered =
178
				Lists.newArrayList(Iterables.filter(Lists.newArrayList("http://www.google.com", "www.google.com"), AbstractDNetXsltFunctions.urlFilter));
179

    
180
		assertTrue(filtered.size() == 1);
181
	}
182

    
183

    
184
}
185

    
(3-3/3)