Revision 53267
Added by Miriam Baglioni over 5 years ago
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/bulktag/BulkTaggingMapper.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.hbase.bulktag; |
2 | 2 |
|
3 |
import com.google.common.base.Splitter; |
|
4 |
import com.google.common.collect.Lists; |
|
3 | 5 |
import eu.dnetlib.data.bulktag.CommunityConfiguration; |
4 | 6 |
import eu.dnetlib.data.bulktag.CommunityConfigurationFactory; |
5 | 7 |
import eu.dnetlib.data.proto.FieldTypeProtos; |
... | ... | |
36 | 38 |
System.out.println("conf = " + conf); |
37 | 39 |
cc = CommunityConfigurationFactory.fromJson(conf); |
38 | 40 |
tagger = new ResultTagger(); |
41 |
tagger.setTrust(context.getConfiguration().get("bulktagging.trust", "0.85")); |
|
39 | 42 |
} |
40 | 43 |
|
41 | 44 |
@Override |
... | ... | |
57 | 60 |
long tagged = oaf.getEntity().getResult().getMetadata().getContextList().stream() |
58 | 61 |
.flatMap(c -> c.getDataInfoList().stream()) |
59 | 62 |
.map(FieldTypeProtos.DataInfo::getInferenceprovenance) |
60 |
.filter(infProv -> "bulktagging::community".equals(infProv))
|
|
63 |
.filter(infProv -> "bulktagging".equals(infProv)) |
|
61 | 64 |
.count(); |
62 | 65 |
context.getCounter("Bulk Tagging", " bulktagged ").increment(tagged); |
63 | 66 |
|
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/bulktag/ResultTagger.java | ||
---|---|---|
25 | 25 |
private final static String SCHEMA_ID = "dnet:provenanceActions"; |
26 | 26 |
private final static String COUNTER_GROUP = "Bulk Tagging"; |
27 | 27 |
|
28 |
private String trust; |
|
28 | 29 |
|
30 |
|
|
29 | 31 |
public OafProtos.Oaf enrichContext(final OafProtos.Oaf oaf, final CommunityConfiguration conf, final Mapper.Context context) { |
30 | 32 |
|
31 | 33 |
//context.getCounter(COUNTER_GROUP, "to enrich").increment(1); |
... | ... | |
120 | 122 |
.setSchemeid(SCHEMA_ID) |
121 | 123 |
.setSchemename(SCHEMA_NAME)) |
122 | 124 |
.setInferenceprovenance(DATA_INFO_TYPE) |
123 |
.setTrust("0.85");
|
|
125 |
.setTrust(trust);
|
|
124 | 126 |
return builder |
125 | 127 |
.build(); |
126 | 128 |
} |
127 | 129 |
|
128 | 130 |
|
131 |
public void setTrust(String s) { |
|
132 |
trust = s; |
|
133 |
} |
|
129 | 134 |
} |
modules/dnet-openaireplus-profiles/trunk/src/main/resources/eu/dnetlib/test/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/bulkTaggingJob.xml | ||
---|---|---|
27 | 27 |
|
28 | 28 |
<PROPERTY key="mapred.reduce.tasks" value="0" /> |
29 | 29 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
30 |
|
|
30 |
|
|
31 |
<!-- trust for hte bulktagging operation --> |
|
32 |
<PROPERTY key="bulktagging.trust" value="0.85"/> |
|
33 |
|
|
31 | 34 |
<!-- Uncomment to override the default lib path --> |
32 | 35 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
33 | 36 |
</STATIC_CONFIGURATION> |
Also available in: Unified diff
modification for using trust as parameter of the configuration of the hodoop job and change in the provenance