Revision 57517
Added by Michele Artini over 4 years ago
GenerateSimilaritiesReducer.java | ||
---|---|---|
32 | 32 |
|
33 | 33 |
if (list.size() < 2) { return; } |
34 | 34 |
|
35 |
if (reduceUsingId(OpenOrgsCommon.OPENORGS_MAIN_PREFIX, list, context) |
|
36 |
|| reduceUsingId(OpenOrgsCommon.OPENORGS_CORDA_FP7_PREFIX, list, context) |
|
37 |
|| reduceUsingId(OpenOrgsCommon.OPENORGS_CORDA_H2020_PREFIX, list, context) |
|
38 |
|| reduceUsingId("20|", list, context)) { |
|
39 |
// NOHING TODO |
|
35 |
final String mainId = findMainId(OpenOrgsCommon.OPENORGS_MAIN_PREFIX, list); |
|
36 |
|
|
37 |
if (StringUtils.isNotBlank(mainId)) { |
|
38 |
for (final OafEntity o : list) { |
|
39 |
if (!o.getOriginalIdList().contains(mainId)) { |
|
40 |
context.getCounter("organization", "relations to " + OpenOrgsCommon.OPENORGS_MAIN_PREFIX + "*").increment(1); |
|
41 |
emit(newSimilarity(mainId, o), context); |
|
42 |
} |
|
43 |
} |
|
40 | 44 |
} |
41 | 45 |
} catch (final InvalidProtocolBufferException e) { |
42 | 46 |
e.printStackTrace(); |
... | ... | |
44 | 48 |
} |
45 | 49 |
} |
46 | 50 |
|
47 |
private boolean reduceUsingId(final String idPrefix, final List<OafEntity> list, final Context context) { |
|
48 |
final String mainId = findMainId(idPrefix, list); |
|
49 |
|
|
50 |
if (StringUtils.isNotBlank(mainId)) { |
|
51 |
for (final OafEntity o : list) { |
|
52 |
if (!o.getId().equals(mainId)) { |
|
53 |
context.getCounter("organization", "relations to " + idPrefix + "*").increment(1); |
|
54 |
emit(newSimilarity(mainId, o), context); |
|
55 |
} |
|
56 |
} |
|
57 |
|
|
58 |
return true; |
|
59 |
} |
|
60 |
return false; |
|
61 |
} |
|
62 |
|
|
63 | 51 |
private String findMainId(final String idPrefix, final List<OafEntity> list) { |
64 | 52 |
final List<String> valids = new ArrayList<>(); |
65 | 53 |
|
66 | 54 |
for (final OafEntity e : list) { |
67 |
if (e.getId().startsWith(idPrefix)) { |
|
68 |
valids.add(e.getId()); |
|
55 |
for (final String id : e.getOriginalIdList()) { |
|
56 |
if (id.startsWith(idPrefix)) { |
|
57 |
valids.add(id); |
|
58 |
} |
|
69 | 59 |
} |
70 | 60 |
} |
71 | 61 |
if (valids.isEmpty()) { return null; } |
... | ... | |
77 | 67 |
|
78 | 68 |
private void emit(final Similarity simrel, final Context context) { |
79 | 69 |
try { |
80 |
valueOut.set(simrel.toJsonBytes());
|
|
70 |
valueOut.set(simrel.toTsv());
|
|
81 | 71 |
context.getCounter("organization", "relations (total)").increment(1); |
82 | 72 |
context.write(NullWritable.get(), valueOut); |
83 | 73 |
} catch (IOException | InterruptedException e) { |
... | ... | |
88 | 78 |
private Similarity newSimilarity(final String openOrgsId, final OafEntity oafEntity) { |
89 | 79 |
final Similarity s = new Similarity(); |
90 | 80 |
s.setOpenOrgID(openOrgsId); |
91 |
s.setOpenaireId(oafEntity.getId()); |
|
92 | 81 |
s.setOpenaireOriginalId(oafEntity.getOriginalId(0)); |
93 | 82 |
s.setName(oafEntity.getOrganization().getMetadata().getLegalname().getValue()); |
94 | 83 |
s.setAcronym(oafEntity.getOrganization().getMetadata().getLegalshortname().getValue()); |
Also available in: Unified diff
OpenOrgs DB: use of tsv for rels