8 |
8 |
import eu.dnetlib.data.proto.FieldTypeProtos;
|
9 |
9 |
import eu.dnetlib.data.proto.OafProtos;
|
10 |
10 |
import eu.dnetlib.data.proto.ResultProtos;
|
|
11 |
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
|
11 |
12 |
import org.apache.commons.lang3.StringUtils;
|
12 |
13 |
import org.apache.hadoop.mapreduce.Mapper;
|
13 |
14 |
|
... | ... | |
25 |
26 |
private final static String SCHEMA_ID = "dnet:provenanceActions";
|
26 |
27 |
private final static String COUNTER_GROUP = "Bulk Tagging";
|
27 |
28 |
|
|
29 |
private final static String ZENODO_COMMUNITY_INDICATOR = "https://zenodo.org/communities/";
|
|
30 |
|
28 |
31 |
private String trust = "0.8";
|
29 |
32 |
|
30 |
33 |
|
... | ... | |
36 |
39 |
|
37 |
40 |
if(oaf.getDataInfo().getDeletedbyinference()){
|
38 |
41 |
context.getCounter(COUNTER_GROUP, "deleted by inference").increment(1);
|
|
42 |
//TODO remove the context associated to Zenodo Communities
|
39 |
43 |
return null;
|
40 |
44 |
}
|
41 |
45 |
//context.getCounter(COUNTER_GROUP, "not deleted by inference").increment(1);
|
... | ... | |
49 |
53 |
}
|
50 |
54 |
//communities contains all the communities to be added as context for the result
|
51 |
55 |
final Set<String> communities = new HashSet<>();
|
52 |
|
|
|
56 |
final Set<String> subjects = new HashSet<>();
|
53 |
57 |
oaf.getEntity().getResult().getMetadata().getSubjectList().stream()
|
54 |
58 |
.map(subject -> subject.getValue())
|
55 |
59 |
.filter(StringUtils::isNotBlank)
|
56 |
60 |
.map(String::toLowerCase)
|
57 |
61 |
.map(String::trim)
|
58 |
62 |
.collect(Collectors.toCollection(HashSet::new))
|
59 |
|
.forEach(s -> communities.addAll(conf.getCommunityForSubjectValue(s)));
|
|
63 |
.forEach(s -> subjects.addAll(conf.getCommunityForSubjectValue(s)));
|
60 |
64 |
|
|
65 |
//updating counters for subject matching
|
|
66 |
subjects.forEach(c->context.getCounter(COUNTER_GROUP, "Matching subject for community " + c).increment(1));
|
|
67 |
communities.addAll(subjects);
|
|
68 |
context.getCounter(COUNTER_GROUP,"match found for subjects ").increment(subjects.size());
|
|
69 |
|
|
70 |
final Set<String> datasources = new HashSet<>();
|
61 |
71 |
oaf.getEntity().getResult().getInstanceList()
|
62 |
72 |
.stream()
|
63 |
73 |
.map(i -> new Pair<>(i.getCollectedfrom().getKey(), i.getHostedby().getKey()))
|
64 |
74 |
.flatMap(p -> Stream.of(p.getFst(), p.getSnd()))
|
65 |
75 |
.map(s -> StringUtils.substringAfter(s, "|"))
|
66 |
76 |
.collect(Collectors.toCollection(HashSet::new))
|
67 |
|
.forEach(dsId -> communities.addAll(conf.getCommunityForDatasourceValue(dsId)));
|
|
77 |
.forEach(dsId -> datasources.addAll(conf.getCommunityForDatasourceValue(dsId)));
|
68 |
78 |
|
69 |
|
//TODO: add code for Zenodo Communities
|
|
79 |
datasources.forEach(c->context.getCounter(COUNTER_GROUP,"Matching datasource for community " + c).increment(1));
|
|
80 |
communities.addAll(datasources);
|
|
81 |
context.getCounter(COUNTER_GROUP,"Match found for content providers " ).increment(datasources.size());
|
70 |
82 |
|
|
83 |
final Set<String> czenodo = new HashSet<>();
|
|
84 |
final ResultProtos.Result.Metadata.Builder mBuilder = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
|
|
85 |
mBuilder.getContextBuilderList().stream().filter(cBuilder -> cBuilder.getId().contains(ZENODO_COMMUNITY_INDICATOR))
|
|
86 |
.collect(Collectors.toList())
|
|
87 |
.forEach(c->czenodo.addAll(conf.getCommunityForZenodoCommunityValue(c.getId().substring(c.getId().lastIndexOf("/")+1).trim())));
|
|
88 |
czenodo.forEach(c->context.getCounter(COUNTER_GROUP,"Matching Zenodo community for community " + c).increment(1));
|
|
89 |
context.getCounter(COUNTER_GROUP,"Match found for Zenodo communities " ).increment(czenodo.size());
|
|
90 |
|
|
91 |
communities.addAll(czenodo);
|
|
92 |
List<Context.Builder> clist = mBuilder.getContextBuilderList().stream()
|
|
93 |
.filter(c -> (!c.getId().contains(ZENODO_COMMUNITY_INDICATOR))).collect(Collectors.toList());
|
|
94 |
|
|
95 |
|
|
96 |
mBuilder.clearContext();
|
|
97 |
clist.forEach(c->mBuilder.addContext(c));
|
|
98 |
|
|
99 |
|
|
100 |
final Map<String, ResultProtos.Result.Context.Builder> cBuilders = Maps.newHashMap();
|
|
101 |
|
|
102 |
|
71 |
103 |
if(communities.isEmpty()){
|
72 |
104 |
context.getCounter(COUNTER_GROUP, "list of communities empty").increment(1);
|
73 |
105 |
}else{
|
74 |
106 |
context.getCounter(COUNTER_GROUP, "list of communities has values!").increment(1);
|
75 |
107 |
}
|
76 |
108 |
|
77 |
|
final ResultProtos.Result.Metadata.Builder mBuilder = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
|
78 |
|
|
79 |
|
final Map<String, ResultProtos.Result.Context.Builder> cBuilders = Maps.newHashMap();
|
80 |
109 |
mBuilder.getContextBuilderList().forEach(cBuilder -> {
|
81 |
110 |
cBuilders.put(cBuilder.getId(), cBuilder);
|
82 |
111 |
});
|
update for zenodo community bulk tagging