1
|
package eu.dnetlib.data.mapreduce.hbase.oai;
|
2
|
|
3
|
import java.io.IOException;
|
4
|
import java.text.ParseException;
|
5
|
import java.util.Date;
|
6
|
|
7
|
import com.google.common.collect.ArrayListMultimap;
|
8
|
import com.google.common.collect.Lists;
|
9
|
import com.google.common.collect.Multimap;
|
10
|
import com.mongodb.DBObject;
|
11
|
import com.mongodb.client.MongoCollection;
|
12
|
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfiguration;
|
13
|
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader;
|
14
|
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationStringReader;
|
15
|
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MongoSetCollection;
|
16
|
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor;
|
17
|
import org.apache.commons.io.IOUtils;
|
18
|
import org.apache.hadoop.mapreduce.Counter;
|
19
|
import org.dom4j.DocumentException;
|
20
|
import org.junit.Before;
|
21
|
import org.junit.Test;
|
22
|
import org.mockito.Matchers;
|
23
|
import org.mockito.Mock;
|
24
|
import org.mockito.Mockito;
|
25
|
import org.mockito.MockitoAnnotations;
|
26
|
import org.springframework.core.io.ClassPathResource;
|
27
|
import org.springframework.core.io.Resource;
|
28
|
|
29
|
import static org.junit.Assert.*;
|
30
|
|
31
|
public class OAIFeedMapperTest {
|
32
|
|
33
|
private OaiFeedMapper oaiFeedMapper;
|
34
|
|
35
|
/** File path where to find the XML configuration profile **/
|
36
|
private String pathToProfile = "eu/dnetlib/data/mapreduce/hbase/oai/config/OAIPublisherConfiguration-1.xml";
|
37
|
|
38
|
private OAIConfigurationStringReader oaiConfigurationReader;
|
39
|
private OAIConfiguration oaiConfiguration;
|
40
|
|
41
|
@Mock
|
42
|
private MongoSetCollection mongoSetCollection;
|
43
|
@Mock
|
44
|
private MongoCollection<DBObject> discardedCollection;
|
45
|
@SuppressWarnings("rawtypes")
|
46
|
@Mock
|
47
|
private org.apache.hadoop.mapreduce.Mapper.Context context;
|
48
|
@Mock
|
49
|
private Counter counter;
|
50
|
|
51
|
private Date feedDate;
|
52
|
private String objId1 = "oai:dnet:openaire____::2fa6b215ace86e409dde3ba4b2a6b504";
|
53
|
private String goodRecord = "<?xml version=\"1.0\"?>\n<record>\n <result xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n xmlns:dnet=\"eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions\"\n xmlns:dr=\"http://www.driver-repository.eu/namespace/dr\"\n xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n <header>\n <dri:objIdentifier>openaire____::2fa6b215ace86e409dde3ba4b2a6b504</dri:objIdentifier>\n <dri:repositoryId/>\n <dri:dateOfCollection>2013-10-09</dri:dateOfCollection>\n <dri:dateOfTransformation>2013-10-09</dri:dateOfTransformation> </header>\n <metadata>\n <oaf:entity xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" \n\t\t xmlns:oaf=\"http://namespace.openaire.eu/oaf\" \n\t\t xsi:schemaLocation=\"http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.1/oaf-0.1.xsd\">\n\t\t<oaf:datasource>\n\t\t\t<officialname>The Internet Journal of Orthopedic Surgery</officialname><englishname>The Internet Journal of Orthopedic Surgery</englishname><websiteurl>http://www.ispub.com/journal/the-internet-journal-of-orthopedic-surgery/</websiteurl><accessinfopackage/><namespaceprefix>issn15312968</namespaceprefix><datasourcetypeui classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><datasourcetype classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><openairecompatibility classid=\"notCompatible\" classname=\"notCompatible\" schemeid=\"dnet:compatibilityLevel\" schemename=\"dnet:compatibilityLevel\"/><latitude>0.0</latitude><longitude>0.0</longitude><subjects/><policies name=\"\" id=\"\"/><logourl/><contactemail/><dateofvalidation/><description/><odnumberofitems/><odnumberofitemsdate/><odpolicies/><odlanguages/><odcontenttypes/><releasestartdate/><releaseenddate/><missionstatementurl/><dataprovider>false</dataprovider><serviceprovider>false</serviceprovider><databaseaccesstype/><datauploadtype/><databaseaccessrestriction/><datauploadrestriction/><versioning>false</versioning><citationguidelineurl/><qualitymanagementkind/><pidsystems/><certificates/><originalId>openaire____::issn15312968</originalId><collectedfrom name=\"DOAJ-Articles\" id=\"driver______::bee53aa31dc2cbb538c10c2b65fa5824\"/><pid/><datainfo><inferred>false</inferred><deletedbyinference>false</deletedbyinference><trust>0.9</trust><inferenceprovenance/><provenanceaction classid=\"UNKNOWN\" classname=\"UNKNOWN\" schemeid=\"dnet:provenanceActions\" schemename=\"dnet:provenanceActions\"/></datainfo>\n\t\t <rels>\n\t\t </rels>\n\t\t <children>\n\t\t </children>\n\t\t</oaf:datasource>\n </oaf:entity>\n </metadata>\n </result>\n</record>";
|
54
|
private String dedupedRecord = "dedupedRecord.xml";
|
55
|
private String representativeRecord = "representativeRecord.xml";
|
56
|
|
57
|
@Before
|
58
|
public void setUp() throws Exception {
|
59
|
MockitoAnnotations.initMocks(this);
|
60
|
oaiFeedMapper = new OaiFeedMapper();
|
61
|
|
62
|
Resource resource = new ClassPathResource(pathToProfile);
|
63
|
// setting up the parser and the profile as a string
|
64
|
String configurationProfile = IOUtils.toString(resource.getInputStream());
|
65
|
|
66
|
System.out.println("oaiConfiguration:\n" + configurationProfile);
|
67
|
oaiConfigurationReader = new OAIConfigurationStringReader(configurationProfile);
|
68
|
|
69
|
oaiConfiguration = oaiConfigurationReader.getOaiConfiguration();
|
70
|
|
71
|
//String feedDateString = DateUtils.now_ISO8601();
|
72
|
String feedDateString = "2017-12-18T12:00:04+00:00";
|
73
|
try {
|
74
|
feedDate = org.apache.commons.lang.time.DateUtils.parseDate(
|
75
|
feedDateString,
|
76
|
new String[]{ "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ssZ" });
|
77
|
} catch (ParseException e) {
|
78
|
e.printStackTrace(System.err);
|
79
|
throw new RuntimeException(e);
|
80
|
}
|
81
|
|
82
|
oaiFeedMapper.setFeedDate(feedDate);
|
83
|
oaiFeedMapper.setMongoSetCollection(mongoSetCollection);
|
84
|
oaiFeedMapper.setOaiConfiguration(oaiConfiguration);
|
85
|
oaiFeedMapper.setOaiConfigurationReader(oaiConfigurationReader);
|
86
|
oaiFeedMapper.setDiscardedCollection(discardedCollection);
|
87
|
oaiFeedMapper.setSkipDuplicates(true);
|
88
|
oaiFeedMapper.setDuplicateXPath("//entity//datainfo/deletedbyinference[./text() = 'true']");
|
89
|
|
90
|
Mockito.when(mongoSetCollection.normalizeSetSpec(Matchers.anyString())).thenReturn("aNormalisedSetName");
|
91
|
Mockito.doNothing().when(counter).increment(Matchers.anyLong());
|
92
|
Mockito.when(context.getCounter(Matchers.anyString(), Matchers.anyString())).thenReturn(counter);
|
93
|
//TODO: fix mock
|
94
|
//Mockito.when(discardedCollection.insertOne((DBObject) Matchers.any());).thenReturn(null);
|
95
|
|
96
|
}
|
97
|
|
98
|
@Test
|
99
|
public void testCreateBasicObject() throws DocumentException, IOException, InterruptedException {
|
100
|
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
|
101
|
Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
|
102
|
DBObject obj = oaiFeedMapper.createBasicObject(objId1, goodRecord, parsedRecord, context);
|
103
|
// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
|
104
|
// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
|
105
|
System.out.println(obj);
|
106
|
assertNotNull(obj);
|
107
|
}
|
108
|
|
109
|
@Test
|
110
|
public void testCreateBasicObjectRep() throws DocumentException, IOException, InterruptedException {
|
111
|
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
|
112
|
String repRecordString = IOUtils.toString(this.getClass().getResourceAsStream(representativeRecord));
|
113
|
Multimap<String, String> parsedRecord = parser.extractFields(repRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
|
114
|
DBObject obj = oaiFeedMapper.createBasicObject(objId1, repRecordString, parsedRecord, context);
|
115
|
// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller
|
116
|
// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED)
|
117
|
System.out.println(obj);
|
118
|
assertNotNull(obj);
|
119
|
}
|
120
|
|
121
|
@Test
|
122
|
public void testParseDeduplicated() throws IOException {
|
123
|
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
|
124
|
String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
|
125
|
parser.setSkipDuplicates(true);
|
126
|
parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']");
|
127
|
Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
|
128
|
assertFalse(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
|
129
|
}
|
130
|
|
131
|
@Test
|
132
|
public void testParseDeduplicated2() throws IOException {
|
133
|
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
|
134
|
String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord));
|
135
|
parser.setSkipDuplicates(true);
|
136
|
parser.setDuplicateXPath("//x");
|
137
|
Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
|
138
|
assertTrue(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString));
|
139
|
}
|
140
|
|
141
|
@Test
|
142
|
public void testCheckRecordFieldsDeduplicated() {
|
143
|
Multimap<String, String> recordProps = ArrayListMultimap.create();
|
144
|
recordProps.put("duplicate", "true");
|
145
|
assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
|
146
|
}
|
147
|
|
148
|
@Test
|
149
|
public void testCheckRecordFieldsNotDeduplicated() {
|
150
|
Multimap<String, String> recordProps = ArrayListMultimap.create();
|
151
|
recordProps.put("duplicate", "false");
|
152
|
recordProps.put(OAIConfigurationReader.ID_FIELD, "10|xxxx");
|
153
|
assertTrue(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
|
154
|
}
|
155
|
|
156
|
@Test
|
157
|
public void testCheckRecordFieldsEmpty() {
|
158
|
Multimap<String, String> recordProps = ArrayListMultimap.create();
|
159
|
assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
|
160
|
}
|
161
|
|
162
|
@Test
|
163
|
public void testCheckRecordFieldsNull() {
|
164
|
Multimap<String, String> recordProps = null;
|
165
|
assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord));
|
166
|
}
|
167
|
|
168
|
@Test
|
169
|
public void testTransformationDate(){
|
170
|
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire")));
|
171
|
parser.setSkipDuplicates(true);
|
172
|
parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']");
|
173
|
Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire"));
|
174
|
System.out.println(parsedRecord);
|
175
|
assertTrue(parsedRecord.containsKey("dateOfTransformation"));
|
176
|
}
|
177
|
}
|