Project

General

Profile

« Previous | Next » 

Revision 52112

Do not add to the BasicDBObject properties that are not listed as field to index

View differences:

modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/oai/utils/RecordFieldsExtractor.java
63 63
			if (skipDuplicates && isDuplicate(doc)) {
64 64
				recordProps.put("duplicate", "true");
65 65
			}
66
			//dates
66
			//dates: note that this will be used to generate the LAST_COLLECTION_DATE_FIELD and DATESTAMP_FIELD
67 67
			Node coll = doc.selectSingleNode("//*[local-name()='header']/*[local-name()='dateOfCollection']");
68 68
			Node trans = doc.selectSingleNode("//*[local-name()='header']/*[local-name()='dateOfTransformation']");
69 69
			if(coll != null && StringUtils.isNotBlank(coll.getText())){
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/oai/OaiFeedMapper.java
11 11
import java.util.zip.ZipEntry;
12 12
import java.util.zip.ZipOutputStream;
13 13

  
14
import com.google.common.base.Function;
15 14
import com.google.common.base.Splitter;
16 15
import com.google.common.collect.Iterables;
17 16
import com.google.common.collect.Lists;
......
224 223
				Collection<String> values = recordProperties.get(key);
225 224
				if (key.equals(OAIConfigurationReader.SET_FIELD)) {
226 225

  
227
					Iterable<String> setSpecs = Iterables.transform(values, new Function<String, String>() {
228

  
229
						@Override
230
						public String apply(final String s) {
231
							return mongoSetCollection.normalizeSetSpec(s);
232
						}
233

  
234
					});
226
					Iterable<String> setSpecs = Iterables.transform(values, s -> mongoSetCollection.normalizeSetSpec(s));
235 227
					obj.put(key, setSpecs);
236 228
				} else {
237 229
					PublisherField keyField = fieldsToIndex.get(key);
238
					if (keyField == null) {
239
						context.getCounter("oai", key + " found for record but not in configuration. Assuming it is repeatable.").increment(1);
240
					}
241
					// let's check if the key is the name of a repeatable field or not
242
					if ((keyField != null) && !keyField.isRepeatable()) {
243
						if ((values != null) && !values.isEmpty()) {
244
							obj.put(key, values.iterator().next());
230
					//Skipping record properties that are not listed as fields to index (e.g. date of transformation and collection)
231
					if (keyField != null) {
232
						// let's check if the key is the name of a repeatable field or not
233
						if ((keyField != null) && !keyField.isRepeatable()) {
234
							if ((values != null) && !values.isEmpty()) {
235
								obj.put(key, values.iterator().next());
236
							}
237
						} else {
238
							obj.put(key, values);
245 239
						}
246
					} else {
247
						obj.put(key, values);
248 240
					}
249 241
				}
250 242
			}

Also available in: Unified diff