Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.oai;
2

    
3
import java.io.IOException;
4
import java.text.ParseException;
5
import java.util.Date;
6

    
7
import com.google.common.collect.ArrayListMultimap;
8
import com.google.common.collect.Lists;
9
import com.google.common.collect.Multimap;
10
import com.mongodb.DBObject;
11
import com.mongodb.client.MongoCollection;
12
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfiguration;
13
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader;
14
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationStringReader;
15
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MongoSetCollection;
16
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
17
import org.apache.commons.io.IOUtils;
18
import org.apache.hadoop.mapreduce.Counter;
19
import org.dom4j.DocumentException;
20
import org.junit.Before;
21
import org.junit.Test;
22
import org.mockito.Matchers;
23
import org.mockito.Mock;
24
import org.mockito.Mockito;
25
import org.mockito.MockitoAnnotations;
26
import org.springframework.core.io.ClassPathResource;
27
import org.springframework.core.io.Resource;
28

    
29
import static org.junit.Assert.*;
30

    
31
public class OAIFeedMapperTest {
32

    
33
	private OaiFeedMapper oaiFeedMapper;
34

    
35
	/** File path where to find the XML configuration profile **/
36
	private String pathToProfile = "eu/dnetlib/data/mapreduce/hbase/oai/config/OAIPublisherConfiguration-1.xml";
37

    
38
	private OAIConfigurationStringReader oaiConfigurationReader;
39
	private OAIConfiguration oaiConfiguration;
40

    
41
	@Mock
42
	private MongoSetCollection mongoSetCollection;
43
	@Mock
44
	private MongoCollection<DBObject> discardedCollection;
45
	@SuppressWarnings("rawtypes")
46
	@Mock
47
	private org.apache.hadoop.mapreduce.Mapper.Context context;
48
	@Mock
49
	private Counter counter;
50

    
51
	private Date feedDate;
52
	private String objId1 = "oai:dnet:openaire____::2fa6b215ace86e409dde3ba4b2a6b504";
53
	private String goodRecord = "<?xml version=\"1.0\"?>\n<record>\n  <result xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n    xmlns:dnet=\"eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions\"\n    xmlns:dr=\"http://www.driver-repository.eu/namespace/dr\"\n    xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n    <header>\n      <dri:objIdentifier>openaire____::2fa6b215ace86e409dde3ba4b2a6b504</dri:objIdentifier>\n      <dri:repositoryId/>\n      <dri:dateOfCollection>2013-10-09</dri:dateOfCollection>\n  <dri:dateOfTransformation>2013-10-09</dri:dateOfTransformation>  </header>\n    <metadata>\n      <oaf:entity xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" \n\t\t    xmlns:oaf=\"http://namespace.openaire.eu/oaf\" \n\t\t    xsi:schemaLocation=\"http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.1/oaf-0.1.xsd\">\n\t\t<oaf:datasource>\n\t\t\t<officialname>The Internet Journal of Orthopedic Surgery</officialname><englishname>The Internet Journal of Orthopedic Surgery</englishname><websiteurl>http://www.ispub.com/journal/the-internet-journal-of-orthopedic-surgery/</websiteurl><accessinfopackage/><namespaceprefix>issn15312968</namespaceprefix><datasourcetypeui classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><datasourcetype classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><openairecompatibility classid=\"notCompatible\" classname=\"notCompatible\" schemeid=\"dnet:compatibilityLevel\" schemename=\"dnet:compatibilityLevel\"/><latitude>0.0</latitude><longitude>0.0</longitude><subjects/><policies name=\"\" id=\"\"/><logourl/><contactemail/><dateofvalidation/><description/><odnumberofitems/><odnumberofitemsdate/><odpolicies/><odlanguages/><odcontenttypes/><releasestartdate/><releaseenddate/><missionstatementurl/><dataprovider>false</dataprovider><serviceprovider>false</serviceprovider><databaseaccesstype/><datauploadtype/><databaseaccessrestriction/><datauploadrestriction/><versioning>false</versioning><citationguidelineurl/><qualitymanagementkind/><pidsystems/><certificates/><originalId>openaire____::issn15312968</originalId><collectedfrom name=\"DOAJ-Articles\" id=\"driver______::bee53aa31dc2cbb538c10c2b65fa5824\"/><pid/><datainfo><inferred>false</inferred><deletedbyinference>false</deletedbyinference><trust>0.9</trust><inferenceprovenance/><provenanceaction classid=\"UNKNOWN\" classname=\"UNKNOWN\" schemeid=\"dnet:provenanceActions\" schemename=\"dnet:provenanceActions\"/></datainfo>\n\t\t  <rels>\n\t\t  </rels>\n\t\t  <children>\n\t\t  </children>\n\t\t</oaf:datasource>\n      </oaf:entity>\n    </metadata>\n  </result>\n</record>";
54
	private String dedupedRecord = "dedupedRecord.xml";
55
	private String representativeRecord = "representativeRecord.xml";
56

    
57
	@Before
58
	public void setUp() throws Exception {
59
		MockitoAnnotations.initMocks(this);
60
		oaiFeedMapper = new OaiFeedMapper();
61

    
62
		Resource resource = new ClassPathResource(pathToProfile);
63
		// setting up the parser and the profile as a string
64
		String configurationProfile = IOUtils.toString(resource.getInputStream());
65

    
66
		System.out.println("oaiConfiguration:\n" + configurationProfile);
67
		oaiConfigurationReader = new OAIConfigurationStringReader(configurationProfile);
68

    
69
		oaiConfiguration = oaiConfigurationReader.getOaiConfiguration();
70

    
71
		//String feedDateString = DateUtils.now_ISO8601();
72
		String feedDateString = "2017-12-18T12:00:04+00:00";
73
		try {
74
			feedDate = org.apache.commons.lang.time.DateUtils.parseDate(
75
					feedDateString,
76
					new String[]{ "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ssZ" });
77
		} catch (ParseException e) {
78
			e.printStackTrace(System.err);
79
			throw new RuntimeException(e);
80
		}
81

    
82
		oaiFeedMapper.setFeedDate(feedDate);
83
		oaiFeedMapper.setMongoSetCollection(mongoSetCollection);
84
		oaiFeedMapper.setOaiConfiguration(oaiConfiguration);
85
		oaiFeedMapper.setOaiConfigurationReader(oaiConfigurationReader);
86
		oaiFeedMapper.setDiscardedCollection(discardedCollection);
87
		oaiFeedMapper.setSkipDuplicates(true);
88
		oaiFeedMapper.setDuplicateXPath("//entity//datainfo/deletedbyinference[./text() = 'true']");
89

    
90
		Mockito.when(mongoSetCollection.normalizeSetSpec(Matchers.anyString())).thenReturn("aNormalisedSetName");
91
		Mockito.doNothing().when(counter).increment(Matchers.anyLong());
92
		Mockito.when(context.getCounter(Matchers.anyString(), Matchers.anyString())).thenReturn(counter);
93
		//TODO: fix mock
94
		//Mockito.when(discardedCollection.insertOne((DBObject) Matchers.any());).thenReturn(null);
95

    
96
	}
97

    
98
	@Test
99
	public void testCreateBasicObject() throws DocumentException, IOException, InterruptedException {
100
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
101
		Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
102
		DBObject obj = oaiFeedMapper.createBasicObject(objId1, goodRecord, parsedRecord, context);
103
		// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
104
		// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
105
		System.out.println(obj);
106
		assertNotNull(obj);
107
	}
108

    
109
	@Test
110
	public void testCreateBasicObjectRep() throws DocumentException, IOException, InterruptedException {
111
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
112
		String repRecordString = IOUtils.toString(this.getClass().getResourceAsStream(representativeRecord));
113
		Multimap<String, String> parsedRecord = parser.extractFields(repRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
114
		DBObject obj = oaiFeedMapper.createBasicObject(objId1, repRecordString, parsedRecord, context);
115
		// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
116
		// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
117
		System.out.println(obj);
118
		assertNotNull(obj);
119
	}
120

    
121
	@Test
122
	public void testParseDeduplicated() throws IOException {
123
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
124
		String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
125
		parser.setSkipDuplicates(true);
126
		parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']");
127
		Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
128
		assertFalse(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
129
	}
130

    
131
	@Test
132
	public void testParseDeduplicated2() throws IOException {
133
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
134
		String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
135
		parser.setSkipDuplicates(true);
136
		parser.setDuplicateXPath("//x");
137
		Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
138
		assertTrue(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
139
	}
140

    
141
	@Test
142
	public void testCheckRecordFieldsDeduplicated() {
143
		Multimap<String, String> recordProps = ArrayListMultimap.create();
144
		recordProps.put("duplicate", "true");
145
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
146
	}
147

    
148
	@Test
149
	public void testCheckRecordFieldsNotDeduplicated() {
150
		Multimap<String, String> recordProps = ArrayListMultimap.create();
151
		recordProps.put("duplicate", "false");
152
		recordProps.put(OAIConfigurationReader.ID_FIELD, "10|xxxx");
153
		assertTrue(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
154
	}
155

    
156
	@Test
157
	public void testCheckRecordFieldsEmpty() {
158
		Multimap<String, String> recordProps = ArrayListMultimap.create();
159
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
160
	}
161

    
162
	@Test
163
	public void testCheckRecordFieldsNull() {
164
		Multimap<String, String> recordProps = null;
165
		assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
166
	}
167

    
168
	@Test
169
	public void testTransformationDate(){
170
		RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
171
		parser.setSkipDuplicates(true);
172
		parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']");
173
		Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
174
		System.out.println(parsedRecord);
175
		assertTrue(parsedRecord.containsKey("dateOfTransformation"));
176
	}
177
}
(1-1/2)