Revision 46587
Added by Alessia Bardi about 7 years ago
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/index/config/IndexConfigTest.java | ||
---|---|---|
34 | 34 |
+ "{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, " |
35 | 35 |
+ "{ relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, " |
36 | 36 |
+ "{ relType = organizationOrganization_dedup_merges, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, " |
37 |
+ "{ relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }" |
|
37 |
+ "{ relType = organizationOrganization_dedup_isMergedIn, targetEntity = organization, expandAs = child, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, " |
|
38 |
+ "{ relType = resultOrganization_affiliation_isAuthorInstitutionOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }" |
|
38 | 39 |
+ "]}, " |
39 | 40 |
+ "project { dups = false, links = [" |
40 | 41 |
+ "{ relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, " |
41 | 42 |
+ "{ relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, " |
42 | 43 |
+ "{ relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } " |
43 |
+ "]}} "; |
|
44 |
+ "], excludeFields = [jsonextrainfo,optional1,optional2]}} ";
|
|
44 | 45 |
|
46 |
public static final String configWithIncludesExcludes = |
|
47 |
"index.conf { " |
|
48 |
+ "result { dups = true, links = [" |
|
49 |
+ "{ relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,pid] }, " |
|
50 |
+ "{ relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,pid] }, " |
|
51 |
+ "{ relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,pid,url,collectedfrom] }," |
|
52 |
+ "{ relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid] }," |
|
53 |
+ "{ relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid] }," |
|
54 |
+ "{ relType = resultResult_supplement_isSupplementTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url,collectedfrom] }," |
|
55 |
+ "{ relType = resultResult_supplement_isSupplementedBy, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url,collectedfrom] }," |
|
56 |
+ "{ relType = resultResult_part_isPartOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url] }," |
|
57 |
+ "{ relType = resultResult_part_hasPart, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype,similarity,type,pid,url] }" |
|
58 |
+ "], excludeFields = [jsonextrainfo] }, " |
|
59 |
+ "person { dups = false, links = [" |
|
60 |
+ "{ relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking,pid], max = 2 }, " |
|
61 |
+ "{ relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } " |
|
62 |
+ "], includeFields=[fullname, ranking]}" |
|
63 |
+ "}"; |
|
64 |
|
|
65 |
public static final String wrongConfig = |
|
66 |
"index.conf { " |
|
67 |
+ "person { dups = false, links = [" |
|
68 |
+ "{ relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking,pid], max = 2 }, " |
|
69 |
+ "{ relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } " |
|
70 |
+ "], includeFields=[fullname, ranking], excludeFields=[email] }" |
|
71 |
+ "}"; |
|
72 |
|
|
73 |
|
|
45 | 74 |
public static final String context = "[{id:'egi', label:'EGI', element:'context'}, " |
46 | 75 |
+ "{id:'egi::classification', label:'EGI classification scheme', element:'category'}, " |
47 | 76 |
+ "{id:'egi::classification::natsc', label:'Natural Sciences', element:'concept'}, " |
... | ... | |
88 | 117 |
assertNotNull(relDescriptor.getRelType()); |
89 | 118 |
assertNotNull(relDescriptor.getSubRelType()); |
90 | 119 |
|
91 |
// System.out.println(ld);
|
|
120 |
System.out.println(ld); |
|
92 | 121 |
} |
93 | 122 |
} |
94 | 123 |
} |
95 | 124 |
|
125 |
@Test |
|
126 |
public void testLoadEntityConfTableIncludeExclude() { |
|
127 |
final IndexConfig conf = IndexConfig.load(configWithIncludesExcludes); |
|
128 |
final EntityConfigTable map = conf.getConfigMap(); |
|
129 |
assertNotNull(map); |
|
130 |
assertTrue(map.size() > 0); |
|
131 |
assertTrue(map.hasIncludeFields(Type.person)); |
|
132 |
assertTrue(map.hasExcludeFields(Type.result)); |
|
133 |
} |
|
134 |
|
|
135 |
@Test(expected = RuntimeException.class) |
|
136 |
public void testBothIncludeExclude() { |
|
137 |
final IndexConfig conf = IndexConfig.load(wrongConfig); |
|
138 |
final EntityConfigTable map = conf.getConfigMap(); |
|
139 |
} |
|
140 |
|
|
141 |
|
|
142 |
|
|
143 |
|
|
96 | 144 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java | ||
---|---|---|
155 | 155 |
doTest(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordFCT.xml")); |
156 | 156 |
} |
157 | 157 |
|
158 |
|
|
158 | 159 |
@Test |
159 | 160 |
public void testParseOaf() throws Exception { |
160 | 161 |
|
... | ... | |
272 | 273 |
} |
273 | 274 |
|
274 | 275 |
@Test |
276 |
public void testLinkOrganizationAffiliation() throws Exception { |
|
277 |
|
|
278 |
final List<Row> rows = Lists.newArrayList(); |
|
279 |
rows.addAll(asRows(loadFromTransformationProfile("organizations_2_hbase.xsl"), load("organization.xml"))); |
|
280 |
rows.addAll(asRows(loadFromTransformationProfile("resultorganization_2_hbase.xsl"), load("result_organization.xml"))); |
|
281 |
rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"))); |
|
282 |
|
|
283 |
printAll(mapAll(buildTable(rows))); |
|
284 |
} |
|
285 |
|
|
286 |
@Test |
|
275 | 287 |
public void testUnpackAuthors() throws Exception { |
276 | 288 |
|
277 | 289 |
final Map<String, Object> xslParams = Maps.newHashMap(); |
... | ... | |
415 | 427 |
|
416 | 428 |
@Test |
417 | 429 |
public void testProjectExtraInfo() throws Exception { |
418 |
|
|
419 | 430 |
final List<Row> rows = Lists.newArrayList(); |
420 | 431 |
rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordExtraInfo.xml"))); |
421 | 432 |
printAll(mapAll(buildTable(rows))); |
... | ... | |
730 | 741 |
log.debug(doc.valueOf(xpath)); |
731 | 742 |
} |
732 | 743 |
} else { |
733 |
|
|
734 | 744 |
log.info(val); |
735 | 745 |
} |
736 | 746 |
} |
... | ... | |
754 | 764 |
} |
755 | 765 |
|
756 | 766 |
private XmlRecordFactory newBuilder() throws TransformerConfigurationException, TransformerFactoryConfigurationError, DocumentException { |
757 |
return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(eu.dnetlib.data.mapreduce.hbase.index.config.Context.xml),
|
|
758 |
RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, true, false, XmlRecordFactoryTest.specialDatasourceTypes);
|
|
767 |
return new XmlRecordFactory(entityConfigTable, ContextMapper.fromXml(Context.xml), |
|
768 |
RelClasses.fromJSon(RelClassesTest.relClassesJson), XmlRecordFactoryTest.SCHEMA_LOCATION, true, false, false, XmlRecordFactoryTest.specialDatasourceTypes);
|
|
759 | 769 |
} |
760 | 770 |
|
761 | 771 |
private InputStream load(final String fileName) { |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/projectRecordExtraInfo.xml | ||
---|---|---|
31 | 31 |
<FIELD name="callidentifier" isNull="true"/> |
32 | 32 |
<FIELD name="projectid">arc_________::ANZCCART</FIELD> |
33 | 33 |
<FIELD name="dateofcollection">2015-06-19</FIELD> |
34 |
<FIELD name="enddate">2012-12-31</FIELD>
|
|
34 |
<FIELD name="enddate"></FIELD> |
|
35 | 35 |
<FIELD name="jsonextrainfo">{"a":"2", "b":"3"}</FIELD> |
36 | 36 |
</ROW> |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/EntityConfig.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.index.config; |
2 | 2 |
|
3 | 3 |
import java.util.HashMap; |
4 |
import java.util.List; |
|
4 | 5 |
|
5 | 6 |
import eu.dnetlib.data.mapreduce.util.RelDescriptor; |
6 | 7 |
|
... | ... | |
10 | 11 |
|
11 | 12 |
private HashMap<RelDescriptor, LinkDescriptor> links; |
12 | 13 |
|
14 |
private List<String> includeFields; |
|
15 |
|
|
16 |
private List<String> excludeFields; |
|
17 |
|
|
13 | 18 |
public EntityConfig(final Boolean includeDuplicates, final HashMap<RelDescriptor, LinkDescriptor> links) { |
14 | 19 |
super(); |
15 | 20 |
this.includeDuplicates = includeDuplicates; |
16 | 21 |
this.links = links; |
17 | 22 |
} |
18 | 23 |
|
24 |
public EntityConfig(final Boolean includeDuplicates, |
|
25 |
final HashMap<RelDescriptor, LinkDescriptor> links, |
|
26 |
final List<String> includeFields, |
|
27 |
final List<String> excludeFields) { |
|
28 |
super(); |
|
29 |
this.includeDuplicates = includeDuplicates; |
|
30 |
this.links = links; |
|
31 |
this.includeFields = includeFields; |
|
32 |
this.excludeFields = excludeFields; |
|
33 |
} |
|
34 |
|
|
19 | 35 |
public Boolean getIndexDuplicates() { |
20 | 36 |
return includeDuplicates; |
21 | 37 |
} |
... | ... | |
32 | 48 |
this.links = links; |
33 | 49 |
} |
34 | 50 |
|
51 |
public List<String> getIncludeFields() { |
|
52 |
return includeFields; |
|
53 |
} |
|
54 |
|
|
55 |
public void setIncludeFields(final List<String> includeFields) { |
|
56 |
this.includeFields = includeFields; |
|
57 |
} |
|
58 |
|
|
59 |
public List<String> getExcludeFields() { |
|
60 |
return excludeFields; |
|
61 |
} |
|
62 |
|
|
63 |
public void setExcludeFields(final List<String> excludeFields) { |
|
64 |
this.excludeFields = excludeFields; |
|
65 |
} |
|
35 | 66 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/EntityConfigTable.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.index.config; |
2 | 2 |
|
3 |
import java.util.Collection; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.HashSet; |
|
6 |
import java.util.Set; |
|
3 |
import java.util.*; |
|
7 | 4 |
|
5 |
import com.google.common.base.Predicate; |
|
6 |
import com.google.common.base.Predicates; |
|
8 | 7 |
import com.google.common.collect.Lists; |
8 |
import com.google.common.collect.Sets; |
|
9 | 9 |
import eu.dnetlib.data.mapreduce.util.RelDescriptor; |
10 |
import eu.dnetlib.data.proto.TypeProtos; |
|
10 | 11 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
12 |
import org.apache.commons.collections.CollectionUtils; |
|
11 | 13 |
|
12 |
public class EntityConfigTable extends HashMap<Type, EntityConfig> { |
|
14 |
public class EntityConfigTable extends HashMap<TypeProtos.Type, EntityConfig> {
|
|
13 | 15 |
|
14 | 16 |
private static final long serialVersionUID = 6087987206844928698L; |
15 | 17 |
|
16 | 18 |
public Collection<LinkDescriptor> getDescriptors(final Type source) { |
17 | 19 |
final EntityConfig entityConfig = super.get(source); |
18 | 20 |
if (entityConfig == null) return Lists.newArrayList(); |
19 |
return entityConfig.getLinks().values();
|
|
21 |
return Lists.newArrayList(entityConfig.getLinks().values());
|
|
20 | 22 |
} |
21 | 23 |
|
22 | 24 |
public LinkDescriptor getDescriptor(final Type type, final RelDescriptor relDescriptor) { |
... | ... | |
25 | 27 |
|
26 | 28 |
public Set<String> getFilter(final Type type, final RelDescriptor relDescriptor) { |
27 | 29 |
final LinkDescriptor ld = getDescriptor(type, relDescriptor); |
28 |
return ld != null ? ld.getFields() : new HashSet<String>();
|
|
30 |
return ld != null ? Sets.newHashSet(ld.getFields()) : new HashSet<String>();
|
|
29 | 31 |
} |
30 | 32 |
|
31 | 33 |
public boolean includeDuplicates(final Type type) { |
32 | 34 |
return super.get(type).getIndexDuplicates(); |
33 | 35 |
} |
34 | 36 |
|
35 |
} |
|
37 |
public boolean hasIncludeFields(final Type type){ |
|
38 |
return CollectionUtils.isNotEmpty(super.get(type).getIncludeFields()); |
|
39 |
} |
|
40 |
|
|
41 |
public boolean hasExcludeFields(final Type type){ |
|
42 |
return CollectionUtils.isNotEmpty(super.get(type).getExcludeFields()); |
|
43 |
} |
|
44 |
|
|
45 |
public List<String> getIncludeFields(final Type type){ |
|
46 |
return super.get(type).getIncludeFields(); |
|
47 |
} |
|
48 |
|
|
49 |
public List<String> getExcludeFields(final Type type){ |
|
50 |
return super.get(type).getExcludeFields(); |
|
51 |
} |
|
52 |
|
|
53 |
public Predicate<String> getIncludeFilter(final Type type, final RelDescriptor relDescriptor){ |
|
54 |
final Set<String> filter = getFilter(type, relDescriptor); |
|
55 |
return fieldName -> filter.contains(fieldName); |
|
56 |
} |
|
57 |
|
|
58 |
public Predicate<String> getFilter(final Type type){ |
|
59 |
if(hasIncludeFields(type)){ |
|
60 |
return getIncludeFilter(type); |
|
61 |
} |
|
62 |
if(hasExcludeFields(type)){ |
|
63 |
return getExcludeFilter(type); |
|
64 |
} |
|
65 |
return Predicates.alwaysTrue(); |
|
66 |
} |
|
67 |
|
|
68 |
private Predicate<String> getIncludeFilter(final Type type) { |
|
69 |
return fieldName -> { |
|
70 |
if(getIncludeFields(type) == null || getIncludeFields(type).isEmpty()) return false; |
|
71 |
return getIncludeFields(type).contains(fieldName); |
|
72 |
}; |
|
73 |
} |
|
74 |
|
|
75 |
private Predicate<String> getExcludeFilter(final Type type) { |
|
76 |
return fieldName -> { |
|
77 |
if(getExcludeFields(type) == null || getExcludeFields(type).isEmpty()) return true; |
|
78 |
return !getExcludeFields(type).contains(fieldName); |
|
79 |
}; |
|
80 |
} |
|
81 |
|
|
82 |
|
|
83 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/IndexConfig.java | ||
---|---|---|
14 | 14 |
import com.typesafe.config.ConfigFactory; |
15 | 15 |
import com.typesafe.config.ConfigObject; |
16 | 16 |
import com.typesafe.config.ConfigValue; |
17 |
|
|
18 | 17 |
import eu.dnetlib.data.mapreduce.OptionalConfig; |
19 | 18 |
import eu.dnetlib.data.mapreduce.util.RelDescriptor; |
20 | 19 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
20 |
import org.apache.commons.collections.CollectionUtils; |
|
21 | 21 |
|
22 | 22 |
public class IndexConfig extends OptionalConfig { |
23 | 23 |
|
... | ... | |
55 | 55 |
|
56 | 56 |
links.put(relDescriptor, ld); |
57 | 57 |
} |
58 |
return new EntityConfig((Boolean) ((Map<String, ?>) value.unwrapped()).get("dups"), links); |
|
58 |
final List<String> includeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("includeFields"); |
|
59 |
final List<String> excludeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("excludeFields"); |
|
60 |
if(CollectionUtils.isNotEmpty(includeFields) & CollectionUtils.isNotEmpty(excludeFields)){ |
|
61 |
throw new RuntimeException("Cannot create the index configuration: includeFields and excludeFields are both not null"); |
|
62 |
} |
|
63 |
return new EntityConfig((Boolean) ((Map<String, ?>) value.unwrapped()).get("dups"), links, includeFields, excludeFields); |
|
59 | 64 |
} |
60 | 65 |
}); |
61 | 66 |
|
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/TemplateFactory.java | ||
---|---|---|
42 | 42 |
return string; |
43 | 43 |
} |
44 | 44 |
|
45 |
public String buildRecord(final Type type,
|
|
45 |
public String buildRecord( |
|
46 | 46 |
final String objIdentifier, |
47 | 47 |
final String dateOfCollection, |
48 | 48 |
final String dateOfTransformation, |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/XmlRecordFactory.java | ||
---|---|---|
13 | 13 |
import com.google.common.base.Joiner; |
14 | 14 |
import com.google.common.base.Predicate; |
15 | 15 |
import com.google.common.base.Splitter; |
16 |
import com.google.common.collect.Iterables; |
|
16 | 17 |
import com.google.common.collect.Lists; |
17 | 18 |
import com.google.common.collect.Maps; |
18 | 19 |
import com.google.common.collect.Sets; |
... | ... | |
33 | 34 |
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference; |
34 | 35 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
35 | 36 |
import eu.dnetlib.data.proto.ResultProtos.Result.Journal; |
37 |
import eu.dnetlib.data.proto.TypeProtos; |
|
36 | 38 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
37 | 39 |
import eu.dnetlib.miscutils.functional.UnaryFunction; |
38 | 40 |
import org.apache.commons.lang.StringUtils; |
... | ... | |
67 | 69 |
protected Map<String, Integer> counters = Maps.newHashMap(); |
68 | 70 |
protected Transformer transformer; |
69 | 71 |
|
72 |
protected static Predicate<String> instanceFilter = new Predicate<String>() { |
|
73 |
final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence"); |
|
74 |
@Override |
|
75 |
public boolean apply(final String s) { |
|
76 |
return instanceFieldFilter.contains(s); |
|
77 |
} |
|
78 |
}; |
|
79 |
|
|
70 | 80 |
public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses, |
71 | 81 |
final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI) |
72 | 82 |
throws TransformerConfigurationException, TransformerFactoryConfigurationError { |
... | ... | |
156 | 166 |
// log.info("chi: " + children); |
157 | 167 |
// log.info("============="); |
158 | 168 |
|
159 |
final Type type = entity.getType();
|
|
160 |
final List<String> metadata = decodeType(entity, null, entityDefaults, false);
|
|
169 |
final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
|
|
170 |
final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
|
|
161 | 171 |
|
162 | 172 |
// rels has to be processed before the contexts because they enrich the contextMap with the funding info. |
163 | 173 |
final List<String> rels = listRelations(); |
164 |
metadata.addAll(buildContexts(type));
|
|
174 |
metadata.addAll(buildContexts(entity.getType()));
|
|
165 | 175 |
metadata.add(parseDataInfo(mainEntity)); |
166 | 176 |
|
167 |
final String body = templateFactory.buildBody(type, metadata, rels, listChildren(), extraInfo);
|
|
177 |
final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
|
|
168 | 178 |
|
169 | 179 |
return templateFactory |
170 |
.buildRecord(type, key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
|
|
180 |
.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml()); |
|
171 | 181 |
} catch (final Throwable e) { |
172 | 182 |
throw new RuntimeException(String.format("error building record '%s'", this.key), e); |
173 | 183 |
} |
... | ... | |
188 | 198 |
return sb.toString(); |
189 | 199 |
} |
190 | 200 |
|
191 |
private List<String> decodeType(final OafEntityDecoder decoder, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
|
|
201 |
private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
|
|
192 | 202 |
|
193 | 203 |
final List<String> metadata = Lists.newArrayList(); |
194 | 204 |
metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel)); |
... | ... | |
238 | 248 |
if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) { |
239 | 249 |
|
240 | 250 |
final List<String> metadata = Lists.newArrayList(); |
241 |
final Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType()); |
|
242 |
final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor()); |
|
243 |
metadata.addAll(listFields(relDecoder.getSubRel(), relFilter, false, true));
|
|
251 |
final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
|
|
252 |
//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
|
|
253 |
metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
|
|
244 | 254 |
|
245 | 255 |
String semanticclass = ""; |
246 | 256 |
String semanticscheme = ""; |
... | ... | |
249 | 259 |
|
250 | 260 |
if ((cachedTarget != null) && cachedTarget.isInitialized()) { |
251 | 261 |
|
252 |
final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor); |
|
262 |
//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
|
|
253 | 263 |
final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget); |
254 |
metadata.addAll(decodeType(d, filter, relDefaults, true));
|
|
264 |
metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
|
|
255 | 265 |
if (d.getType().equals(Type.result)) { |
256 | 266 |
for(Instance i : cachedTarget.getResult().getInstanceList()) { |
257 |
metadata.addAll(listFields(i, relFilter, false, true));
|
|
267 |
metadata.addAll(listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
|
|
258 | 268 |
} |
259 | 269 |
} |
260 | 270 |
} |
... | ... | |
304 | 314 |
final OafEntityDecoder entity = mainEntity.decodeEntity(); |
305 | 315 |
if (entity.getType().equals(Type.result)) { |
306 | 316 |
for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) { |
307 |
final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence"); |
|
308 |
children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFieldFilter, false, false), |
|
317 |
|
|
318 |
|
|
319 |
children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false), |
|
309 | 320 |
listMap(instance.getUrlList(), new UnaryFunction<String, String>() { |
310 | 321 |
|
311 | 322 |
@Override |
... | ... | |
327 | 338 |
private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) { |
328 | 339 |
final OafEntityDecoder decoder = OafEntityDecoder.decode(target); |
329 | 340 |
incrementCounter(relDescriptor.getSubRelType().toString()); |
330 |
final Set<String> filters = entityConfigTable.getFilter(target.getType(), relDescriptor);
|
|
331 |
children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filters, childDefaults, false)));
|
|
341 |
final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
|
|
342 |
children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false))); |
|
332 | 343 |
} |
333 | 344 |
|
334 |
private List<String> listFields(final GeneratedMessage fields, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
|
|
345 |
private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
|
|
335 | 346 |
|
336 | 347 |
final List<String> metadata = Lists.newArrayList(); |
337 | 348 |
|
338 | 349 |
if (fields != null) { |
339 | 350 |
|
340 | 351 |
final Set<String> seen = Sets.newHashSet(); |
341 |
for (final Entry<FieldDescriptor, Object> e : filterFields(fields, filter)) { |
|
342 | 352 |
|
343 |
// final String name = getFieldName(e.getKey().getName()); |
|
353 |
final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter); |
|
354 |
for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) { |
|
355 |
|
|
344 | 356 |
final String name = e.getKey().getName(); |
345 | 357 |
seen.add(name); |
346 |
|
|
347 | 358 |
addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel); |
348 | 359 |
} |
349 | 360 |
|
350 | 361 |
if (defaults) { |
351 |
for (final FieldDescriptor fd : fields.getDescriptorForType().getFields()) { |
|
352 |
if (!seen.contains(fd.getName())) { |
|
353 |
addFieldValue(metadata, fd, getDefault(fd), expandingRel); |
|
362 |
final Iterable<FieldDescriptor> unseen = Iterables.filter(fields.getDescriptorForType().getFields(), new Predicate<FieldDescriptor>() { |
|
363 |
@Override |
|
364 |
public boolean apply(final FieldDescriptor fd) { |
|
365 |
return !seen.contains(fd.getName()) && filter.apply(fd.getName()); |
|
354 | 366 |
} |
367 |
}); |
|
368 |
for(FieldDescriptor fd : unseen){ |
|
369 |
addFieldValue(metadata, fd, getDefault(fd), expandingRel); |
|
355 | 370 |
} |
356 | 371 |
} |
357 | 372 |
} |
... | ... | |
485 | 500 |
if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) { |
486 | 501 |
final Journal j = (Journal) o; |
487 | 502 |
metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\"" |
488 |
+ escapeXml(j.getIssnLinking()) + "\">" + escapeXml(j.getName()) + "</journal>"); |
|
503 |
+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\"" |
|
504 |
+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>"); |
|
489 | 505 |
} |
490 | 506 |
|
491 | 507 |
if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) { |
... | ... | |
705 | 721 |
return sb.toString(); |
706 | 722 |
} |
707 | 723 |
|
708 |
private Set<Entry<FieldDescriptor, Object>> filterFields(final GeneratedMessage fields, final Set<String> filter) { |
|
709 | 724 |
|
710 |
if (filter != null) { |
|
711 |
final Predicate<FieldDescriptor> p = new Predicate<FieldDescriptor>() { |
|
712 |
|
|
713 |
@Override |
|
714 |
public boolean apply(final FieldDescriptor descriptor) { |
|
715 |
if (fields == null) return false; |
|
716 |
final String name = descriptor.getName(); |
|
717 |
return filter.contains(name); |
|
718 |
} |
|
719 |
}; |
|
720 |
final Map<FieldDescriptor, Object> filtered = Maps.filterKeys(fields.getAllFields(), p); |
|
721 |
// log.info( |
|
722 |
// "filtered " + type.toString() + ": " + toString(filterEntries.keySet()) + "\n" + |
|
723 |
// "builder " + fields.getDescriptorForType().getFullName() + ": " + toString(fields.getAllFields().keySet())); |
|
724 |
return filtered.entrySet(); |
|
725 |
private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) { |
|
726 |
if(acceptFilter == null) return fields.getAllFields(); |
|
727 |
final Map<FieldDescriptor, Object> res = Maps.newHashMap(); |
|
728 |
for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) { |
|
729 |
if (acceptFilter.apply(e.getKey().getName())) { |
|
730 |
res.put(e.getKey(), e.getValue()); |
|
731 |
} |
|
725 | 732 |
} |
726 |
return fields.getAllFields().entrySet();
|
|
733 |
return res;
|
|
727 | 734 |
} |
728 | 735 |
|
736 |
|
|
737 |
|
|
729 | 738 |
private List<String> countersAsXml() { |
730 | 739 |
final List<String> out = Lists.newArrayList(); |
731 | 740 |
for (final Entry<String, Integer> e : counters.entrySet()) { |
modules/dnet-mapreduce-jobs/trunk/pom.xml | ||
---|---|---|
9 | 9 |
<modelVersion>4.0.0</modelVersion> |
10 | 10 |
<groupId>eu.dnetlib</groupId> |
11 | 11 |
<artifactId>dnet-mapreduce-jobs</artifactId> |
12 |
<version>0.0.9.7-PIWIK-SNAPSHOT</version>
|
|
12 |
<version>1.0.0-SNAPSHOT</version>
|
|
13 | 13 |
<packaging>jar</packaging> |
14 | 14 |
<scm> |
15 | 15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-mapreduce-jobs/trunk</developerConnection> |
... | ... | |
82 | 82 |
<dependency> |
83 | 83 |
<groupId>org.apache.solr</groupId> |
84 | 84 |
<artifactId>solr-solrj</artifactId> |
85 |
<version>[4.7.0, 5.0.0)</version>
|
|
85 |
<version>[4.10.4]</version>
|
|
86 | 86 |
<exclusions> |
87 | 87 |
<exclusion> |
88 | 88 |
<artifactId>wstx-asl</artifactId> |
89 | 89 |
<groupId>org.codehaus.woodstox</groupId> |
90 | 90 |
</exclusion> |
91 |
<!-- |
|
92 |
<exclusion> |
|
93 |
<artifactId>httpcore</artifactId> |
|
94 |
<groupId>org.apache.httpcomponents</groupId> |
|
95 |
</exclusion> |
|
96 |
<exclusion> |
|
97 |
<artifactId>httpclient</artifactId> |
|
98 |
<groupId>org.apache.httpcomponents</groupId> |
|
99 |
</exclusion> |
|
100 |
--> |
|
101 | 91 |
</exclusions> |
102 | 92 |
</dependency> |
103 | 93 |
<dependency> |
... | ... | |
230 | 220 |
<version>[1.0.0,2.0.0)</version> |
231 | 221 |
<scope>test</scope> |
232 | 222 |
</dependency> |
223 |
<dependency> |
|
224 |
<groupId>com.google.guava</groupId> |
|
225 |
<artifactId>guava</artifactId> |
|
226 |
<version>RELEASE</version> |
|
227 |
</dependency> |
|
233 | 228 |
|
234 | 229 |
</dependencies> |
235 | 230 |
</project> |
Also available in: Unified diff
implemented use of opt in/out rules for entity fields (#2557).
depending on specific solrj version (thus excluding cdh6.X versions)