1
|
package eu.dnetlib.iis.export.actionmanager.module;
|
2
|
|
3
|
import static eu.dnetlib.iis.export.actionmanager.ExportWorkflowRuntimeParameters.EXPORT_DOCUMENTSSIMILARITY_THRESHOLD;
|
4
|
|
5
|
import java.util.ArrayList;
|
6
|
import java.util.Collections;
|
7
|
import java.util.List;
|
8
|
|
9
|
import org.apache.hadoop.conf.Configuration;
|
10
|
import org.apache.log4j.Logger;
|
11
|
|
12
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
13
|
import eu.dnetlib.actionmanager.common.Agent;
|
14
|
import eu.dnetlib.data.mapreduce.util.OafDecoder;
|
15
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
16
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
17
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
18
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
19
|
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
|
20
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
|
21
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
|
22
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity.Type;
|
23
|
import eu.dnetlib.iis.common.WorkflowRuntimeParameters;
|
24
|
import eu.dnetlib.iis.common.hbase.HBaseConstants;
|
25
|
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity;
|
26
|
|
27
|
/**
|
28
|
* {@link DocumentSimilarity} based action builder module.
|
29
|
* @author mhorst
|
30
|
*
|
31
|
*/
|
32
|
public class DocumentSimilarityActionBuilderModuleFactory
|
33
|
implements ActionBuilderFactory<DocumentSimilarity> {
|
34
|
|
35
|
private static final AlgorithmName algorithmName = AlgorithmName.document_similarities_standard;
|
36
|
|
37
|
private final Logger log = Logger.getLogger(this.getClass());
|
38
|
|
39
|
class DocumentSimilarityActionBuilderModule extends AbstractBuilderModule
|
40
|
implements ActionBuilderModule<DocumentSimilarity> {
|
41
|
|
42
|
private final Float similarityThreshold;
|
43
|
|
44
|
/**
|
45
|
* Default constructor.
|
46
|
* @param predefinedTrust
|
47
|
* @param threshold similarity threshold, skipped when null
|
48
|
*/
|
49
|
public DocumentSimilarityActionBuilderModule(
|
50
|
String predefinedTrust, Float trustLevelThreshold,
|
51
|
Float similarityThreshold) {
|
52
|
super(predefinedTrust, trustLevelThreshold, algorithmName);
|
53
|
this.similarityThreshold = similarityThreshold;
|
54
|
}
|
55
|
|
56
|
@Override
|
57
|
public List<AtomicAction> build(DocumentSimilarity object, Agent agent,
|
58
|
String actionSetId) {
|
59
|
if (object==null) {
|
60
|
return Collections.emptyList();
|
61
|
}
|
62
|
// checking similarity threshold if set
|
63
|
if (similarityThreshold!=null && object.getSimilarity()!=null &&
|
64
|
object.getSimilarity()<similarityThreshold) {
|
65
|
return Collections.emptyList();
|
66
|
}
|
67
|
// setting relations in both source and target objects
|
68
|
List<AtomicAction> simActions = createActions(
|
69
|
object, actionSetId, agent, false);
|
70
|
List<AtomicAction> reverseSimActions = createActions(
|
71
|
object, actionSetId, agent, true);
|
72
|
List<AtomicAction> results = new ArrayList<AtomicAction>();
|
73
|
if (simActions!=null && !simActions.isEmpty()) {
|
74
|
results.addAll(simActions);
|
75
|
}
|
76
|
if (reverseSimActions!=null && !reverseSimActions.isEmpty()) {
|
77
|
results.addAll(reverseSimActions);
|
78
|
}
|
79
|
return results;
|
80
|
}
|
81
|
|
82
|
/**
|
83
|
* Creates similarity related puts.
|
84
|
* @param object
|
85
|
* @param actionSet
|
86
|
* @param agent
|
87
|
* @param backwardMode
|
88
|
* @return similarity related puts
|
89
|
*/
|
90
|
protected List<AtomicAction> createActions(DocumentSimilarity object,
|
91
|
String actionSet, Agent agent, boolean backwardMode) {
|
92
|
Oaf oafObjectRel = buildOAFRel(
|
93
|
object.getDocumentId().toString(),
|
94
|
object.getOtherDocumentId().toString(),
|
95
|
object.getSimilarity(), backwardMode);
|
96
|
if (oafObjectRel==null) {
|
97
|
return Collections.emptyList();
|
98
|
}
|
99
|
List<AtomicAction> actionList = new ArrayList<AtomicAction>();
|
100
|
AtomicAction currentAction = actionFactory.createAtomicAction(
|
101
|
actionSet, agent,
|
102
|
backwardMode?
|
103
|
object.getOtherDocumentId().toString():
|
104
|
object.getDocumentId().toString(),
|
105
|
OafDecoder.decode(oafObjectRel).getCFQ(),
|
106
|
backwardMode?
|
107
|
object.getDocumentId().toString():
|
108
|
object.getOtherDocumentId().toString(),
|
109
|
oafObjectRel.toByteArray());
|
110
|
actionList.add(currentAction);
|
111
|
return actionList;
|
112
|
}
|
113
|
|
114
|
/**
|
115
|
* Builds OAF object.
|
116
|
* @param source
|
117
|
* @param target
|
118
|
* @param score
|
119
|
* @param invert flag indicating source and target should be inverted
|
120
|
* @return OAF object
|
121
|
*/
|
122
|
private Oaf buildOAFRel(String sourceId, String targetDocId,
|
123
|
float score, boolean invert) {
|
124
|
OafRel.Builder relBuilder = OafRel.newBuilder();
|
125
|
if (!invert) {
|
126
|
relBuilder.setSource(sourceId);
|
127
|
relBuilder.setTarget(targetDocId);
|
128
|
|
129
|
} else {
|
130
|
relBuilder.setSource(targetDocId);
|
131
|
relBuilder.setTarget(sourceId);
|
132
|
}
|
133
|
String relClass = invert?
|
134
|
Similarity.RelName.isAmongTopNSimilarDocuments.toString():
|
135
|
Similarity.RelName.hasAmongTopNSimilarDocuments.toString();
|
136
|
relBuilder.setChild(false);
|
137
|
relBuilder.setRelType(RelType.resultResult);
|
138
|
relBuilder.setSubRelType(SubRelType.similarity);
|
139
|
relBuilder.setRelClass(relClass);
|
140
|
ResultResult.Builder resultResultBuilder = ResultResult.newBuilder();
|
141
|
Similarity.Builder similarityBuilder = Similarity.newBuilder();
|
142
|
similarityBuilder.setRelMetadata(buildRelMetadata(
|
143
|
HBaseConstants.SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT,
|
144
|
relClass));
|
145
|
similarityBuilder.setSimilarity(score);
|
146
|
similarityBuilder.setType(Type.STANDARD);
|
147
|
resultResultBuilder.setSimilarity(similarityBuilder.build());
|
148
|
relBuilder.setResultResult(resultResultBuilder.build());
|
149
|
|
150
|
Oaf.Builder oafBuilder = Oaf.newBuilder();
|
151
|
oafBuilder.setKind(Kind.relation);
|
152
|
oafBuilder.setRel(relBuilder.build());
|
153
|
oafBuilder.setDataInfo(buildInference());
|
154
|
oafBuilder.setTimestamp(System.currentTimeMillis());
|
155
|
return oafBuilder.build();
|
156
|
}
|
157
|
}
|
158
|
|
159
|
@Override
|
160
|
public ActionBuilderModule<DocumentSimilarity> instantiate(
|
161
|
String predefinedTrust, Float trustLevelThreshold, Configuration config) {
|
162
|
String thresholdStr = config.get(
|
163
|
EXPORT_DOCUMENTSSIMILARITY_THRESHOLD);
|
164
|
Float similarityThreshold = null;
|
165
|
if (thresholdStr!=null && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(
|
166
|
thresholdStr)) {
|
167
|
similarityThreshold = Float.valueOf(thresholdStr);
|
168
|
log.warn("setting documents similarity exporter threshold to: " + similarityThreshold);
|
169
|
}
|
170
|
return new DocumentSimilarityActionBuilderModule(
|
171
|
predefinedTrust, trustLevelThreshold, similarityThreshold);
|
172
|
}
|
173
|
|
174
|
@Override
|
175
|
public AlgorithmName getAlgorithName() {
|
176
|
return algorithmName;
|
177
|
}
|
178
|
}
|