Project

General

Profile

« Previous | Next » 

Revision 46587

implemented use of opt in/out rules for entity fields (#2557).
depending on specific solrj version (thus excluding cdh6.X versions)

View differences:

modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/index/config/IndexConfigTest.java
34 34
					+ "{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, "
35 35
					+ "{ relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, "
36 36
					+ "{ relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, "
37
					+ "{ relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }"
37
					+ "{ relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, "
38
					+ "{ relType = resultOrganization_affiliation_isAuthorInstitutionOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }"
38 39
					+ "]}, "
39 40
					+ "project { dups = false, links = ["
40 41
					+ "{ relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, "
41 42
					+ "{ relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, "
42 43
					+ "{ relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } "
43
					+ "]}} ";
44
					+ "], excludeFields = [jsonextrainfo,optional1,optional2]}} ";
44 45

  
46
	public static final String configWithIncludesExcludes =
47
			"index.conf { "
48
					+ "result { dups = true, links = ["
49
					+ "{ relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,pid] }, "
50
					+ "{ relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,pid] }, "
51
					+ "{ relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,pid,url,collectedfrom] },"
52
					+ "{ relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid] },"
53
					+ "{ relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid] },"
54
					+ "{ relType = resultResult_supplement_isSupplementTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url,collectedfrom] },"
55
					+ "{ relType = resultResult_supplement_isSupplementedBy, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url,collectedfrom] },"
56
					+ "{ relType = resultResult_part_isPartOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url] },"
57
					+ "{ relType = resultResult_part_hasPart, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url] }"
58
					+ "], excludeFields = [jsonextrainfo] }, "
59
					+ "person { dups = false, links = ["
60
					+ "{ relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking,pid], max = 2 }, "
61
					+ "{ relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } "
62
					+ "], includeFields=[fullname, ranking]}"
63
					+ "}";
64

  
65
	public static final String wrongConfig =
66
			"index.conf { "
67
					+ "person { dups = false, links = ["
68
					+ "{ relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking,pid], max = 2 }, "
69
					+ "{ relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } "
70
					+ "], includeFields=[fullname, ranking], excludeFields=[email] }"
71
					+ "}";
72

  
73

  
45 74
	public static final String context = "[{id:'egi', label:'EGI', element:'context'}, "
46 75
			+ "{id:'egi::classification', label:'EGI classification scheme', element:'category'}, "
47 76
			+ "{id:'egi::classification::natsc', label:'Natural Sciences', element:'concept'}, "
......
88 117
				assertNotNull(relDescriptor.getRelType());
89 118
				assertNotNull(relDescriptor.getSubRelType());
90 119

  
91
				// System.out.println(ld);
120
				System.out.println(ld);
92 121
			}
93 122
		}
94 123
	}
95 124

  
125
	@Test
126
	public void testLoadEntityConfTableIncludeExclude() {
127
		final IndexConfig conf = IndexConfig.load(configWithIncludesExcludes);
128
		final EntityConfigTable map = conf.getConfigMap();
129
		assertNotNull(map);
130
		assertTrue(map.size() > 0);
131
		assertTrue(map.hasIncludeFields(Type.person));
132
		assertTrue(map.hasExcludeFields(Type.result));
133
	}
134

  
135
	@Test(expected = RuntimeException.class)
136
	public void testBothIncludeExclude() {
137
		final IndexConfig conf = IndexConfig.load(wrongConfig);
138
		final EntityConfigTable map = conf.getConfigMap();
139
	}
140

  
141

  
142

  
143

  
96 144
}
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java
155 155
		doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml"));
156 156
	}
157 157

  
158

  
158 159
	@Test
159 160
	public void testParseOaf() throws Exception {
160 161

  
......
272 273
	}
273 274

  
274 275
	@Test
276
	public void testLinkOrganizationAffiliation() throws Exception {
277

  
278
		final List<Row> rows = Lists.newArrayList();
279
		rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml")));
280
		rows.addAll(asRows(loadFromTransformationProfile("resultorganization_2_hbase.xsl"), load("result_organization.xml")));
281
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml")));
282

  
283
		printAll(mapAll(buildTable(rows)));
284
	}
285

  
286
	@Test
275 287
	public void testUnpackAuthors() throws Exception {
276 288

  
277 289
		final Map<String, Object> xslParams = Maps.newHashMap();
......
415 427

  
416 428
	@Test
417 429
	public void testProjectExtraInfo() throws Exception {
418

  
419 430
		final List<Row> rows = Lists.newArrayList();
420 431
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordExtraInfo.xml")));
421 432
		printAll(mapAll(buildTable(rows)));
......
730 741
						log.debug(doc.valueOf(xpath));
731 742
					}
732 743
				} else {
733

  
734 744
					log.info(val);
735 745
				}
736 746
			}
......
754 764
	}
755 765

  
756 766
	private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException {
757
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(eu.dnetlib.data.mapreduce.hbase.index.config.Context.xml),
758
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, true, false, XmlRecordFactoryTest.specialDatasourceTypes);
767
		return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(Context.xml),
768
				RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false, XmlRecordFactoryTest.specialDatasourceTypes);
759 769
	}
760 770

  
761 771
	private InputStream load(final String fileName) {
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/projectRecordExtraInfo.xml
31 31
	<FIELD name="callidentifier" isNull="true"/>
32 32
	<FIELD name="projectid">arc_________::ANZCCART</FIELD>
33 33
	<FIELD name="dateofcollection">2015-06-19</FIELD>
34
	<FIELD name="enddate">2012-12-31</FIELD>
34
	<FIELD name="enddate"></FIELD>
35 35
	<FIELD name="jsonextrainfo">{"a":"2", "b":"3"}</FIELD>
36 36
</ROW>
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/EntityConfig.java
1 1
package eu.dnetlib.data.mapreduce.hbase.index.config;
2 2

  
3 3
import java.util.HashMap;
4
import java.util.List;
4 5

  
5 6
import eu.dnetlib.data.mapreduce.util.RelDescriptor;
6 7

  
......
10 11

  
11 12
	private HashMap<RelDescriptor, LinkDescriptor> links;
12 13

  
14
	private List<String> includeFields;
15

  
16
	private List<String> excludeFields;
17

  
13 18
	public EntityConfig(final Boolean includeDuplicates, final HashMap<RelDescriptor, LinkDescriptor> links) {
14 19
		super();
15 20
		this.includeDuplicates = includeDuplicates;
16 21
		this.links = links;
17 22
	}
18 23

  
24
	public EntityConfig(final Boolean includeDuplicates,
25
			final HashMap<RelDescriptor, LinkDescriptor> links,
26
			final List<String> includeFields,
27
			final List<String> excludeFields) {
28
		super();
29
		this.includeDuplicates = includeDuplicates;
30
		this.links = links;
31
		this.includeFields = includeFields;
32
		this.excludeFields = excludeFields;
33
	}
34

  
19 35
	public Boolean getIndexDuplicates() {
20 36
		return includeDuplicates;
21 37
	}
......
32 48
		this.links = links;
33 49
	}
34 50

  
51
	public List<String> getIncludeFields() {
52
		return includeFields;
53
	}
54

  
55
	public void setIncludeFields(final List<String> includeFields) {
56
		this.includeFields = includeFields;
57
	}
58

  
59
	public List<String> getExcludeFields() {
60
		return excludeFields;
61
	}
62

  
63
	public void setExcludeFields(final List<String> excludeFields) {
64
		this.excludeFields = excludeFields;
65
	}
35 66
}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/EntityConfigTable.java
1 1
package eu.dnetlib.data.mapreduce.hbase.index.config;
2 2

  
3
import java.util.Collection;
4
import java.util.HashMap;
5
import java.util.HashSet;
6
import java.util.Set;
3
import java.util.*;
7 4

  
5
import com.google.common.base.Predicate;
6
import com.google.common.base.Predicates;
8 7
import com.google.common.collect.Lists;
8
import com.google.common.collect.Sets;
9 9
import eu.dnetlib.data.mapreduce.util.RelDescriptor;
10
import eu.dnetlib.data.proto.TypeProtos;
10 11
import eu.dnetlib.data.proto.TypeProtos.Type;
12
import org.apache.commons.collections.CollectionUtils;
11 13

  
12
public class EntityConfigTable extends HashMap<Type, EntityConfig> {
14
public class EntityConfigTable extends HashMap<TypeProtos.Type, EntityConfig> {
13 15

  
14 16
	private static final long serialVersionUID = 6087987206844928698L;
15 17

  
16 18
	public Collection<LinkDescriptor> getDescriptors(final Type source) {
17 19
		final EntityConfig entityConfig = super.get(source);
18 20
		if (entityConfig == null) return Lists.newArrayList();
19
		return entityConfig.getLinks().values();
21
		return Lists.newArrayList(entityConfig.getLinks().values());
20 22
	}
21 23

  
22 24
	public LinkDescriptor getDescriptor(final Type type, final RelDescriptor relDescriptor) {
......
25 27

  
26 28
	public Set<String> getFilter(final Type type, final RelDescriptor relDescriptor) {
27 29
		final LinkDescriptor ld = getDescriptor(type, relDescriptor);
28
		return ld != null ? ld.getFields() : new HashSet<String>();
30
		return ld != null ? Sets.newHashSet(ld.getFields()) : new HashSet<String>();
29 31
	}
30 32

  
31 33
	public boolean includeDuplicates(final Type type) {
32 34
		return super.get(type).getIndexDuplicates();
33 35
	}
34 36

  
35
}
37
	public boolean hasIncludeFields(final Type type){
38
		return CollectionUtils.isNotEmpty(super.get(type).getIncludeFields());
39
	}
40

  
41
	public boolean hasExcludeFields(final Type type){
42
		return CollectionUtils.isNotEmpty(super.get(type).getExcludeFields());
43
	}
44

  
45
	public List<String> getIncludeFields(final Type type){
46
		return super.get(type).getIncludeFields();
47
	}
48

  
49
	public List<String> getExcludeFields(final Type type){
50
		return super.get(type).getExcludeFields();
51
	}
52

  
53
	public Predicate<String> getIncludeFilter(final Type type, final RelDescriptor relDescriptor){
54
		final Set<String> filter = getFilter(type, relDescriptor);
55
		return fieldName -> filter.contains(fieldName);
56
	}
57

  
58
	public Predicate<String> getFilter(final Type type){
59
		if(hasIncludeFields(type)){
60
			return getIncludeFilter(type);
61
		}
62
		if(hasExcludeFields(type)){
63
			return getExcludeFilter(type);
64
		}
65
		return Predicates.alwaysTrue();
66
	}
67

  
68
	private Predicate<String> getIncludeFilter(final Type type) {
69
		return fieldName -> {
70
			if(getIncludeFields(type) == null || getIncludeFields(type).isEmpty()) return false;
71
			return getIncludeFields(type).contains(fieldName);
72
		};
73
	}
74

  
75
	private Predicate<String> getExcludeFilter(final Type type) {
76
		return fieldName -> {
77
			if(getExcludeFields(type) == null || getExcludeFields(type).isEmpty()) return true;
78
			return !getExcludeFields(type).contains(fieldName);
79
		};
80
	}
81

  
82

  
83
}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/IndexConfig.java
14 14
import com.typesafe.config.ConfigFactory;
15 15
import com.typesafe.config.ConfigObject;
16 16
import com.typesafe.config.ConfigValue;
17

  
18 17
import eu.dnetlib.data.mapreduce.OptionalConfig;
19 18
import eu.dnetlib.data.mapreduce.util.RelDescriptor;
20 19
import eu.dnetlib.data.proto.TypeProtos.Type;
20
import org.apache.commons.collections.CollectionUtils;
21 21

  
22 22
public class IndexConfig extends OptionalConfig {
23 23

  
......
55 55

  
56 56
					links.put(relDescriptor, ld);
57 57
				}
58
				return new EntityConfig((Boolean) ((Map<String, ?>) value.unwrapped()).get("dups"), links);
58
				final List<String> includeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("includeFields");
59
				final List<String> excludeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("excludeFields");
60
				if(CollectionUtils.isNotEmpty(includeFields) & CollectionUtils.isNotEmpty(excludeFields)){
61
					throw new RuntimeException("Cannot create the index configuration: includeFields and excludeFields are both not null");
62
				}
63
				return new EntityConfig((Boolean) ((Map<String, ?>) value.unwrapped()).get("dups"), links, includeFields, excludeFields);
59 64
			}
60 65
		});
61 66

  
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/TemplateFactory.java
42 42
		return string;
43 43
	}
44 44

  
45
	public String buildRecord(final Type type,
45
	public String buildRecord(
46 46
			final String objIdentifier,
47 47
			final String dateOfCollection,
48 48
			final String dateOfTransformation,
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/XmlRecordFactory.java
13 13
import com.google.common.base.Joiner;
14 14
import com.google.common.base.Predicate;
15 15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Iterables;
16 17
import com.google.common.collect.Lists;
17 18
import com.google.common.collect.Maps;
18 19
import com.google.common.collect.Sets;
......
33 34
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
34 35
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
35 36
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
37
import eu.dnetlib.data.proto.TypeProtos;
36 38
import eu.dnetlib.data.proto.TypeProtos.Type;
37 39
import eu.dnetlib.miscutils.functional.UnaryFunction;
38 40
import org.apache.commons.lang.StringUtils;
......
67 69
	protected Map<String, Integer> counters = Maps.newHashMap();
68 70
	protected Transformer transformer;
69 71

  
72
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
73
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence");
74
		@Override
75
		public boolean apply(final String s) {
76
			return instanceFieldFilter.contains(s);
77
		}
78
	};
79

  
70 80
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
71 81
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
72 82
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
......
156 166
			// log.info("chi:  " + children);
157 167
			// log.info("=============");
158 168

  
159
			final Type type = entity.getType();
160
			final List<String> metadata = decodeType(entity, null, entityDefaults, false);
169
			final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
170
			final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
161 171

  
162 172
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
163 173
			final List<String> rels = listRelations();
164
			metadata.addAll(buildContexts(type));
174
			metadata.addAll(buildContexts(entity.getType()));
165 175
			metadata.add(parseDataInfo(mainEntity));
166 176

  
167
			final String body = templateFactory.buildBody(type, metadata, rels, listChildren(), extraInfo);
177
			final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
168 178

  
169 179
			return templateFactory
170
					.buildRecord(type, key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
180
					.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
171 181
		} catch (final Throwable e) {
172 182
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
173 183
		}
......
188 198
		return sb.toString();
189 199
	}
190 200

  
191
	private List<String> decodeType(final OafEntityDecoder decoder, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
201
	private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
192 202

  
193 203
		final List<String> metadata = Lists.newArrayList();
194 204
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
......
238 248
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
239 249

  
240 250
				final List<String> metadata = Lists.newArrayList();
241
				final Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
242
				final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
243
				metadata.addAll(listFields(relDecoder.getSubRel(), relFilter, false, true));
251
				final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
252
				//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
253
				metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
244 254

  
245 255
				String semanticclass = "";
246 256
				String semanticscheme = "";
......
249 259

  
250 260
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
251 261

  
252
					final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
262
					//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
253 263
					final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
254
					metadata.addAll(decodeType(d, filter, relDefaults, true));
264
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
255 265
					if (d.getType().equals(Type.result)) {
256 266
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
257
							metadata.addAll(listFields(i, relFilter, false, true));
267
							metadata.addAll(listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
258 268
						}
259 269
					}
260 270
				}
......
304 314
		final OafEntityDecoder entity = mainEntity.decodeEntity();
305 315
		if (entity.getType().equals(Type.result)) {
306 316
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
307
				final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence");
308
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFieldFilter, false, false),
317

  
318

  
319
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false),
309 320
						listMap(instance.getUrlList(), new UnaryFunction<String, String>() {
310 321

  
311 322
							@Override
......
327 338
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
328 339
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
329 340
		incrementCounter(relDescriptor.getSubRelType().toString());
330
		final Set<String> filters = entityConfigTable.getFilter(target.getType(), relDescriptor);
331
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filters, childDefaults, false)));
341
		final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
342
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
332 343
	}
333 344

  
334
	private List<String> listFields(final GeneratedMessage fields, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
345
	private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
335 346

  
336 347
		final List<String> metadata = Lists.newArrayList();
337 348

  
338 349
		if (fields != null) {
339 350

  
340 351
			final Set<String> seen = Sets.newHashSet();
341
			for (final Entry<FieldDescriptor, Object> e : filterFields(fields, filter)) {
342 352

  
343
				// final String name = getFieldName(e.getKey().getName());
353
			final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
354
			for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
355

  
344 356
				final String name = e.getKey().getName();
345 357
				seen.add(name);
346

  
347 358
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
348 359
			}
349 360

  
350 361
			if (defaults) {
351
				for (final FieldDescriptor fd : fields.getDescriptorForType().getFields()) {
352
					if (!seen.contains(fd.getName())) {
353
						addFieldValue(metadata, fd, getDefault(fd), expandingRel);
362
				final Iterable<FieldDescriptor> unseen = Iterables.filter(fields.getDescriptorForType().getFields(), new Predicate<FieldDescriptor>() {
363
					@Override
364
					public boolean apply(final FieldDescriptor fd) {
365
						return !seen.contains(fd.getName()) && filter.apply(fd.getName());
354 366
					}
367
				});
368
				for(FieldDescriptor fd : unseen){
369
					addFieldValue(metadata, fd, getDefault(fd), expandingRel);
355 370
				}
356 371
			}
357 372
		}
......
485 500
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
486 501
				final Journal j = (Journal) o;
487 502
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
488
						+ escapeXml(j.getIssnLinking()) + "\">" + escapeXml(j.getName()) + "</journal>");
503
						+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
504
						+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
489 505
			}
490 506

  
491 507
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
......
705 721
		return sb.toString();
706 722
	}
707 723

  
708
	private Set<Entry<FieldDescriptor, Object>> filterFields(final GeneratedMessage fields, final Set<String> filter) {
709 724

  
710
		if (filter != null) {
711
			final Predicate<FieldDescriptor> p = new Predicate<FieldDescriptor>() {
712

  
713
				@Override
714
				public boolean apply(final FieldDescriptor descriptor) {
715
					if (fields == null) return false;
716
					final String name = descriptor.getName();
717
					return filter.contains(name);
718
				}
719
			};
720
			final Map<FieldDescriptor, Object> filtered = Maps.filterKeys(fields.getAllFields(), p);
721
			// log.info(
722
			// "filtered " + type.toString() + ": " + toString(filterEntries.keySet()) + "\n" +
723
			// "builder  " + fields.getDescriptorForType().getFullName() + ": " + toString(fields.getAllFields().keySet()));
724
			return filtered.entrySet();
725
	private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
726
		if(acceptFilter == null) return fields.getAllFields();
727
		final Map<FieldDescriptor, Object> res = Maps.newHashMap();
728
		for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
729
			if (acceptFilter.apply(e.getKey().getName())) {
730
				res.put(e.getKey(), e.getValue());
731
			}
725 732
		}
726
		return fields.getAllFields().entrySet();
733
		return res;
727 734
	}
728 735

  
736

  
737

  
729 738
	private List<String> countersAsXml() {
730 739
		final List<String> out = Lists.newArrayList();
731 740
		for (final Entry<String, Integer> e : counters.entrySet()) {
modules/dnet-mapreduce-jobs/trunk/pom.xml
9 9
	<modelVersion>4.0.0</modelVersion>
10 10
	<groupId>eu.dnetlib</groupId>
11 11
	<artifactId>dnet-mapreduce-jobs</artifactId>
12
	<version>0.0.9.7-PIWIK-SNAPSHOT</version>
12
	<version>1.0.0-SNAPSHOT</version>
13 13
	<packaging>jar</packaging>
14 14
	<scm>
15 15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-mapreduce-jobs/trunk</developerConnection>
......
82 82
		<dependency>
83 83
			<groupId>org.apache.solr</groupId>
84 84
			<artifactId>solr-solrj</artifactId>
85
			<version>[4.7.0, 5.0.0)</version>
85
			<version>[4.10.4]</version>
86 86
			<exclusions>
87 87
				<exclusion>
88 88
					<artifactId>wstx-asl</artifactId>
89 89
					<groupId>org.codehaus.woodstox</groupId>
90 90
				</exclusion>
91
<!--
92
				<exclusion>
93
					<artifactId>httpcore</artifactId>
94
					<groupId>org.apache.httpcomponents</groupId>
95
				</exclusion>
96
				<exclusion>
97
					<artifactId>httpclient</artifactId>
98
					<groupId>org.apache.httpcomponents</groupId>
99
				</exclusion>
100
-->
101 91
			</exclusions>
102 92
		</dependency>
103 93
		<dependency>
......
230 220
			<version>[1.0.0,2.0.0)</version>
231 221
			<scope>test</scope>
232 222
		</dependency>
223
		<dependency>
224
			<groupId>com.google.guava</groupId>
225
			<artifactId>guava</artifactId>
226
			<version>RELEASE</version>
227
		</dependency>
233 228

  
234 229
	</dependencies>
235 230
</project>

Also available in: Unified diff