Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.oai;
2

    
3
import java.io.IOException;
4
import java.text.ParseException;
5
import java.util.Date;
6

    
7
import com.google.common.collect.ArrayListMultimap;
8
import com.google.common.collect.Lists;
9
import com.google.common.collect.Multimap;
10
import com.mongodb.DBObject;
11
import com.mongodb.client.MongoCollection;
12
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfiguration;
13
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader;
14
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationStringReader;
15
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MongoSetCollection;
16
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
17
import eu.dnetlib.miscutils.datetime.DateUtils;
18
import org.apache.commons.io.IOUtils;
19
import org.apache.hadoop.mapreduce.Counter;
20
import org.apache.solr.common.util.DateUtil;
21
import org.dom4j.DocumentException;
22
import org.junit.Before;
23
import org.junit.Test;
24
import org.mockito.Matchers;
25
import org.mockito.Mock;
26
import org.mockito.Mockito;
27
import org.mockito.MockitoAnnotations;
28
import org.springframework.core.io.ClassPathResource;
29
import org.springframework.core.io.Resource;
30

    
31
import static org.junit.Assert.*;
32

    
33
public class OAIFeedMapperTest {
34

    
35
	private OaiFeedMapper oaiFeedMapper;
36

    
37
	/** File path where to find the XML configuration profile **/
38
	private String pathToProfile = "eu/dnetlib/data/mapreduce/hbase/oai/config/OAIPublisherConfiguration-1.xml";
39

    
40
	private OAIConfigurationStringReader oaiConfigurationReader;
41
	private OAIConfiguration oaiConfiguration;
42

    
43
	@Mock
44
	private MongoSetCollection mongoSetCollection;
45
	@Mock
46
	private MongoCollection<DBObject> discardedCollection;
47
	@SuppressWarnings("rawtypes")
48
	@Mock
49
	private org.apache.hadoop.mapreduce.Mapper.Context context;
50
	@Mock
51
	private Counter counter;
52

    
53
	private Date feedDate;
54
	private String objId1 = "oai:dnet:openaire____::2fa6b215ace86e409dde3ba4b2a6b504";
55
	private String goodRecord = "<?xml version=\"1.0\"?>\n<record>\n  <result xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n    xmlns:dnet=\"eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions\"\n    xmlns:dr=\"http://www.driver-repository.eu/namespace/dr\"\n    xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n    <header>\n      <dri:objIdentifier>openaire____::2fa6b215ace86e409dde3ba4b2a6b504</dri:objIdentifier>\n      <dri:repositoryId/>\n      <dri:dateOfCollection>2013-10-09</dri:dateOfCollection>\n    </header>\n    <metadata>\n      <oaf:entity xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" \n\t\t    xmlns:oaf=\"http://namespace.openaire.eu/oaf\" \n\t\t    xsi:schemaLocation=\"http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.1/oaf-0.1.xsd\">\n\t\t<oaf:datasource>\n\t\t\t<officialname>The Internet Journal of Orthopedic Surgery</officialname><englishname>The Internet Journal of Orthopedic Surgery</englishname><websiteurl>http://www.ispub.com/journal/the-internet-journal-of-orthopedic-surgery/</websiteurl><accessinfopackage/><namespaceprefix>issn15312968</namespaceprefix><datasourcetypeui classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><datasourcetype classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><openairecompatibility classid=\"notCompatible\" classname=\"notCompatible\" schemeid=\"dnet:compatibilityLevel\" schemename=\"dnet:compatibilityLevel\"/><latitude>0.0</latitude><longitude>0.0</longitude><subjects/><policies name=\"\" id=\"\"/><logourl/><contactemail/><dateofvalidation/><description/><odnumberofitems/><odnumberofitemsdate/><odpolicies/><odlanguages/><odcontenttypes/><releasestartdate/><releaseenddate/><missionstatementurl/><dataprovider>false</dataprovider><serviceprovider>false</serviceprovider><databaseaccesstype/><datauploadtype/><databaseaccessrestriction/><datauploadrestriction/><versioning>false</versioning><citationguidelineurl/><qualitymanagementkind/><pidsystems/><certificates/><originalId>openaire____::issn15312968</originalId><collectedfrom name=\"DOAJ-Articles\" id=\"driver______::bee53aa31dc2cbb538c10c2b65fa5824\"/><pid/><datainfo><inferred>false</inferred><deletedbyinference>false</deletedbyinference><trust>0.9</trust><inferenceprovenance/><provenanceaction classid=\"UNKNOWN\" classname=\"UNKNOWN\" schemeid=\"dnet:provenanceActions\" schemename=\"dnet:provenanceActions\"/></datainfo>\n\t\t  <rels>\n\t\t  </rels>\n\t\t  <children>\n\t\t  </children>\n\t\t</oaf:datasource>\n      </oaf:entity>\n    </metadata>\n  </result>\n</record>";
56
	private String dedupedRecord = "dedupedRecord.xml";
57
	private String representativeRecord = "representativeRecord.xml";
58

    
59
	@Before
60
	public void setUp() throws Exception {
61
		MockitoAnnotations.initMocks(this);
62
		oaiFeedMapper = new OaiFeedMapper();
63

    
64
		Resource resource = new ClassPathResource(pathToProfile);
65
		// setting up the parser and the profile as a string
66
		String configurationProfile = IOUtils.toString(resource.getInputStream());
67

    
68
		System.out.println("oaiConfiguration:\n" + configurationProfile);
69
		oaiConfigurationReader = new OAIConfigurationStringReader(configurationProfile);
70

    
71
		oaiConfiguration = oaiConfigurationReader.getOaiConfiguration();
72

    
73
		String feedDateString = DateUtils.now_ISO8601();
74
		try {
75
			feedDate = DateUtil.parseDate(feedDateString);
76
		} catch (ParseException e) {
77
			e.printStackTrace(System.err);
78
			throw new RuntimeException(e);
79
		}
80

    
81
		oaiFeedMapper.setFeedDate(feedDate);
82
		oaiFeedMapper.setMongoSetCollection(mongoSetCollection);
83
		oaiFeedMapper.setOaiConfiguration(oaiConfiguration);
84
		oaiFeedMapper.setOaiConfigurationReader(oaiConfigurationReader);
85
		oaiFeedMapper.setDiscardedCollection(discardedCollection);
86
		oaiFeedMapper.setSkipDuplicates(true);
87
		oaiFeedMapper.setDuplicateXPath("//entity//datainfo/deletedbyinference[./text() = 'true']");
88

    
89
		Mockito.when(mongoSetCollection.normalizeSetSpec(Matchers.anyString())).thenReturn("aNormalisedSetName");
90
		Mockito.doNothing().when(counter).increment(Matchers.anyLong());
91
		Mockito.when(context.getCounter(Matchers.anyString(), Matchers.anyString())).thenReturn(counter);
92
		//TODO: fix mock
93
		//Mockito.when(discardedCollection.insertOne((DBObject) Matchers.any());).thenReturn(null);
94

    
95
	}
96

    
97
	@Test
98
	public void testCreateBasicObject() throws DocumentException, IOException, InterruptedException {
99
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
100
		Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
101
		DBObject obj = oaiFeedMapper.createBasicObject(objId1, goodRecord, parsedRecord, context);
102
		// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
103
		// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
104
		System.out.println(obj);
105
		assertNotNull(obj);
106
	}
107

    
108
	@Test
109
	public void testCreateBasicObjectRep() throws DocumentException, IOException, InterruptedException {
110
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
111
		String repRecordString = IOUtils.toString(this.getClass().getResourceAsStream(representativeRecord));
112
		Multimap<String, String> parsedRecord = parser.extractFields(repRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
113
		DBObject obj = oaiFeedMapper.createBasicObject(objId1, repRecordString, parsedRecord, context);
114
		// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
115
		// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
116
		System.out.println(obj);
117
		assertNotNull(obj);
118
	}
119

    
120
	@Test
121
	public void testParseDeduplicated() throws IOException {
122
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
123
		String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
124
		parser.setSkipDuplicates(true);
125
		parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']");
126
		Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
127
		assertFalse(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
128
	}
129

    
130
	@Test
131
	public void testParseDeduplicated2() throws IOException {
132
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
133
		String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
134
		parser.setSkipDuplicates(true);
135
		parser.setDuplicateXPath("//x");
136
		Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
137
		assertTrue(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
138
	}
139

    
140
	@Test
141
	public void testCheckRecordFieldsDeduplicated() {
142
		Multimap<String, String> recordProps = ArrayListMultimap.create();
143
		recordProps.put("duplicate", "true");
144
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
145
	}
146

    
147
	@Test
148
	public void testCheckRecordFieldsNotDeduplicated() {
149
		Multimap<String, String> recordProps = ArrayListMultimap.create();
150
		recordProps.put("duplicate", "false");
151
		recordProps.put(OAIConfigurationReader.ID_FIELD, "10|xxxx");
152
		assertTrue(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
153
	}
154

    
155
	@Test
156
	public void testCheckRecordFieldsEmpty() {
157
		Multimap<String, String> recordProps = ArrayListMultimap.create();
158
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
159
	}
160

    
161
	@Test
162
	public void testCheckRecordFieldsNull() {
163
		Multimap<String, String> recordProps = null;
164
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
165
	}
166
}
(1-1/2)