Project

General

Profile

1 28303 alessia.ba
package eu.dnetlib.data.mapreduce.hbase.oai;
2
3
import java.io.IOException;
4
import java.text.ParseException;
5
import java.util.Date;
6
7 46586 alessia.ba
import com.google.common.collect.ArrayListMultimap;
8
import com.google.common.collect.Lists;
9
import com.google.common.collect.Multimap;
10
import com.mongodb.DBObject;
11
import com.mongodb.client.MongoCollection;
12
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfiguration;
13
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader;
14
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationStringReader;
15
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MongoSetCollection;
16
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
17 29009 claudio.at
import org.apache.commons.io.IOUtils;
18 29014 alessia.ba
import org.apache.hadoop.mapreduce.Counter;
19 28303 alessia.ba
import org.dom4j.DocumentException;
20
import org.junit.Before;
21
import org.junit.Test;
22 28309 alessia.ba
import org.mockito.Matchers;
23 28303 alessia.ba
import org.mockito.Mock;
24
import org.mockito.Mockito;
25
import org.mockito.MockitoAnnotations;
26 29009 claudio.at
import org.springframework.core.io.ClassPathResource;
27
import org.springframework.core.io.Resource;
28 28303 alessia.ba
29 46586 alessia.ba
import static org.junit.Assert.*;
30 28303 alessia.ba
31
public class OAIFeedMapperTest {
32
33
	private OaiFeedMapper oaiFeedMapper;
34
35 29009 claudio.at
	/** File path where to find the XML configuration profile **/
36
	private String pathToProfile = "eu/dnetlib/data/mapreduce/hbase/oai/config/OAIPublisherConfiguration-1.xml";
37
38 28303 alessia.ba
	private OAIConfigurationStringReader oaiConfigurationReader;
39
	private OAIConfiguration oaiConfiguration;
40
41 28309 alessia.ba
	@Mock
42 28303 alessia.ba
	private MongoSetCollection mongoSetCollection;
43 34374 alessia.ba
	@Mock
44 46586 alessia.ba
	private MongoCollection<DBObject> discardedCollection;
45 29014 alessia.ba
	@SuppressWarnings("rawtypes")
46
	@Mock
47
	private org.apache.hadoop.mapreduce.Mapper.Context context;
48
	@Mock
49
	private Counter counter;
50 28303 alessia.ba
51
	private Date feedDate;
52
	private String objId1 = "oai:dnet:openaire____::2fa6b215ace86e409dde3ba4b2a6b504";
53 49891 alessia.ba
	private String goodRecord = "<?xml version=\"1.0\"?>\n<record>\n  <result xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n    xmlns:dnet=\"eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions\"\n    xmlns:dr=\"http://www.driver-repository.eu/namespace/dr\"\n    xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n    <header>\n      <dri:objIdentifier>openaire____::2fa6b215ace86e409dde3ba4b2a6b504</dri:objIdentifier>\n      <dri:repositoryId/>\n      <dri:dateOfCollection>2013-10-09</dri:dateOfCollection>\n  <dri:dateOfTransformation>2013-10-09</dri:dateOfTransformation>  </header>\n    <metadata>\n      <oaf:entity xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" \n\t\t    xmlns:oaf=\"http://namespace.openaire.eu/oaf\" \n\t\t    xsi:schemaLocation=\"http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.1/oaf-0.1.xsd\">\n\t\t<oaf:datasource>\n\t\t\t<officialname>The Internet Journal of Orthopedic Surgery</officialname><englishname>The Internet Journal of Orthopedic Surgery</englishname><websiteurl>http://www.ispub.com/journal/the-internet-journal-of-orthopedic-surgery/</websiteurl><accessinfopackage/><namespaceprefix>issn15312968</namespaceprefix><datasourcetypeui classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><datasourcetype classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><openairecompatibility classid=\"notCompatible\" classname=\"notCompatible\" schemeid=\"dnet:compatibilityLevel\" schemename=\"dnet:compatibilityLevel\"/><latitude>0.0</latitude><longitude>0.0</longitude><subjects/><policies name=\"\" id=\"\"/><logourl/><contactemail/><dateofvalidation/><description/><odnumberofitems/><odnumberofitemsdate/><odpolicies/><odlanguages/><odcontenttypes/><releasestartdate/><releaseenddate/><missionstatementurl/><dataprovider>false</dataprovider><serviceprovider>false</serviceprovider><databaseaccesstype/><datauploadtype/><databaseaccessrestriction/><datauploadrestriction/><versioning>false</versioning><citationguidelineurl/><qualitymanagementkind/><pidsystems/><certificates/><originalId>openaire____::issn15312968</originalId><collectedfrom name=\"DOAJ-Articles\" id=\"driver______::bee53aa31dc2cbb538c10c2b65fa5824\"/><pid/><datainfo><inferred>false</inferred><deletedbyinference>false</deletedbyinference><trust>0.9</trust><inferenceprovenance/><provenanceaction classid=\"UNKNOWN\" classname=\"UNKNOWN\" schemeid=\"dnet:provenanceActions\" schemename=\"dnet:provenanceActions\"/></datainfo>\n\t\t  <rels>\n\t\t  </rels>\n\t\t  <children>\n\t\t  </children>\n\t\t</oaf:datasource>\n      </oaf:entity>\n    </metadata>\n  </result>\n</record>";
54 34374 alessia.ba
	private String dedupedRecord = "dedupedRecord.xml";
55 37821 alessia.ba
	private String representativeRecord = "representativeRecord.xml";
56 28303 alessia.ba
57
	@Before
58
	public void setUp() throws Exception {
59
		MockitoAnnotations.initMocks(this);
60
		oaiFeedMapper = new OaiFeedMapper();
61 29000 alessia.ba
62 29009 claudio.at
		Resource resource = new ClassPathResource(pathToProfile);
63
		// setting up the parser and the profile as a string
64
		String configurationProfile = IOUtils.toString(resource.getInputStream());
65
66
		System.out.println("oaiConfiguration:\n" + configurationProfile);
67
		oaiConfigurationReader = new OAIConfigurationStringReader(configurationProfile);
68
69 28303 alessia.ba
		oaiConfiguration = oaiConfigurationReader.getOaiConfiguration();
70
71 50157 alessia.ba
		//String feedDateString = DateUtils.now_ISO8601();
72
		String feedDateString = "2017-12-18T12:00:04+00:00";
73 28303 alessia.ba
		try {
74 48892 claudio.at
			feedDate = org.apache.commons.lang.time.DateUtils.parseDate(
75
					feedDateString,
76
					new String[]{ "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ssZ" });
77 28303 alessia.ba
		} catch (ParseException e) {
78
			e.printStackTrace(System.err);
79
			throw new RuntimeException(e);
80
		}
81
82
		oaiFeedMapper.setFeedDate(feedDate);
83
		oaiFeedMapper.setMongoSetCollection(mongoSetCollection);
84
		oaiFeedMapper.setOaiConfiguration(oaiConfiguration);
85
		oaiFeedMapper.setOaiConfigurationReader(oaiConfigurationReader);
86 34374 alessia.ba
		oaiFeedMapper.setDiscardedCollection(discardedCollection);
87
		oaiFeedMapper.setSkipDuplicates(true);
88
		oaiFeedMapper.setDuplicateXPath("//entity//datainfo/deletedbyinference[./text() = 'true']");
89 28303 alessia.ba
90 28309 alessia.ba
		Mockito.when(mongoSetCollection.normalizeSetSpec(Matchers.anyString())).thenReturn("aNormalisedSetName");
91 29014 alessia.ba
		Mockito.doNothing().when(counter).increment(Matchers.anyLong());
92
		Mockito.when(context.getCounter(Matchers.anyString(), Matchers.anyString())).thenReturn(counter);
93 46586 alessia.ba
		//TODO: fix mock
94
		//Mockito.when(discardedCollection.insertOne((DBObject) Matchers.any());).thenReturn(null);
95 29014 alessia.ba
96 28303 alessia.ba
	}
97
98
	@Test
99 28309 alessia.ba
	public void testCreateBasicObject() throws DocumentException, IOException, InterruptedException {
100 29014 alessia.ba
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
101 37821 alessia.ba
		Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
102 29014 alessia.ba
		DBObject obj = oaiFeedMapper.createBasicObject(objId1, goodRecord, parsedRecord, context);
103 28309 alessia.ba
		// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
104
		// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
105
		System.out.println(obj);
106
		assertNotNull(obj);
107 28303 alessia.ba
	}
108 34374 alessia.ba
109
	@Test
110 37821 alessia.ba
	public void testCreateBasicObjectRep() throws DocumentException, IOException, InterruptedException {
111
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
112
		String repRecordString = IOUtils.toString(this.getClass().getResourceAsStream(representativeRecord));
113
		Multimap<String, String> parsedRecord = parser.extractFields(repRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
114
		DBObject obj = oaiFeedMapper.createBasicObject(objId1, repRecordString, parsedRecord, context);
115
		// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
116
		// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
117
		System.out.println(obj);
118
		assertNotNull(obj);
119
	}
120
121
	@Test
122 34374 alessia.ba
	public void testParseDeduplicated() throws IOException {
123
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
124
		String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
125
		parser.setSkipDuplicates(true);
126
		parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']");
127 37821 alessia.ba
		Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
128 34374 alessia.ba
		assertFalse(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
129
	}
130
131
	@Test
132
	public void testParseDeduplicated2() throws IOException {
133
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
134
		String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
135
		parser.setSkipDuplicates(true);
136
		parser.setDuplicateXPath("//x");
137 37821 alessia.ba
		Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
138 34374 alessia.ba
		assertTrue(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
139
	}
140
141
	@Test
142
	public void testCheckRecordFieldsDeduplicated() {
143
		Multimap<String, String> recordProps = ArrayListMultimap.create();
144
		recordProps.put("duplicate", "true");
145
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
146
	}
147
148
	@Test
149
	public void testCheckRecordFieldsNotDeduplicated() {
150
		Multimap<String, String> recordProps = ArrayListMultimap.create();
151
		recordProps.put("duplicate", "false");
152
		recordProps.put(OAIConfigurationReader.ID_FIELD, "10|xxxx");
153
		assertTrue(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
154
	}
155
156
	@Test
157
	public void testCheckRecordFieldsEmpty() {
158
		Multimap<String, String> recordProps = ArrayListMultimap.create();
159
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
160
	}
161
162
	@Test
163
	public void testCheckRecordFieldsNull() {
164
		Multimap<String, String> recordProps = null;
165
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
166
	}
167 49891 alessia.ba
168
	@Test
169
	public void testTransformationDate(){
170
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
171
		parser.setSkipDuplicates(true);
172
		parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']");
173
		Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
174
		System.out.println(parsedRecord);
175
		assertTrue(parsedRecord.containsKey("dateOfTransformation"));
176
	}
177 28303 alessia.ba
}