1
|
package eu.dnetlib.iis.export.actionmanager.module;
|
2
|
|
3
|
import java.util.ArrayList;
|
4
|
import java.util.Collections;
|
5
|
import java.util.List;
|
6
|
|
7
|
import org.apache.hadoop.conf.Configuration;
|
8
|
import org.apache.log4j.Logger;
|
9
|
|
10
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
11
|
import eu.dnetlib.actionmanager.common.Agent;
|
12
|
import eu.dnetlib.data.mapreduce.util.OafDecoder;
|
13
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
14
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
15
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
16
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
17
|
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
|
18
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
|
19
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
|
20
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity.Type;
|
21
|
import eu.dnetlib.iis.common.WorkflowRuntimeParameters;
|
22
|
import eu.dnetlib.iis.common.hbase.HBaseConstants;
|
23
|
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity;
|
24
|
|
25
|
/**
|
26
|
* {@link DocumentSimilarity} based action builder module.
|
27
|
* @author mhorst
|
28
|
*
|
29
|
*/
|
30
|
public class DocumentSimilarityActionBuilderModuleFactory
|
31
|
implements ActionBuilderFactory<DocumentSimilarity> {
|
32
|
|
33
|
private static final AlgorithmName algorithmName = AlgorithmName.document_similarities_standard;
|
34
|
|
35
|
private final Logger log = Logger.getLogger(this.getClass());
|
36
|
|
37
|
class DocumentSimilarityActionBuilderModule extends AbstractBuilderModule
|
38
|
implements ActionBuilderModule<DocumentSimilarity> {
|
39
|
|
40
|
private final Float threshold;
|
41
|
|
42
|
/**
|
43
|
* Default constructor.
|
44
|
* @param predefinedTrust
|
45
|
* @param threshold similarity threshold, skipped when null
|
46
|
*/
|
47
|
public DocumentSimilarityActionBuilderModule(
|
48
|
String predefinedTrust,
|
49
|
Float threshold) {
|
50
|
super(predefinedTrust, algorithmName);
|
51
|
this.threshold = threshold;
|
52
|
}
|
53
|
|
54
|
@Override
|
55
|
public List<AtomicAction> build(DocumentSimilarity object, Agent agent,
|
56
|
String actionSetId) {
|
57
|
if (object==null) {
|
58
|
return Collections.emptyList();
|
59
|
}
|
60
|
// checking similarity threshold if set
|
61
|
if (threshold!=null && object.getSimilarity()!=null &&
|
62
|
object.getSimilarity()<threshold) {
|
63
|
return Collections.emptyList();
|
64
|
}
|
65
|
// setting relations in both source and target objects
|
66
|
List<AtomicAction> simActions = createActions(
|
67
|
object, actionSetId, agent, false);
|
68
|
List<AtomicAction> reverseSimActions = createActions(
|
69
|
object, actionSetId, agent, true);
|
70
|
List<AtomicAction> results = new ArrayList<AtomicAction>();
|
71
|
if (simActions!=null && !simActions.isEmpty()) {
|
72
|
results.addAll(simActions);
|
73
|
}
|
74
|
if (reverseSimActions!=null && !reverseSimActions.isEmpty()) {
|
75
|
results.addAll(reverseSimActions);
|
76
|
}
|
77
|
return results;
|
78
|
}
|
79
|
|
80
|
/**
|
81
|
* Creates similarity related puts.
|
82
|
* @param object
|
83
|
* @param actionSet
|
84
|
* @param agent
|
85
|
* @param backwardMode
|
86
|
* @return similarity related puts
|
87
|
*/
|
88
|
protected List<AtomicAction> createActions(DocumentSimilarity object,
|
89
|
String actionSet, Agent agent, boolean backwardMode) {
|
90
|
Oaf oafObjectRel = buildOAFRel(
|
91
|
object.getDocumentId().toString(),
|
92
|
object.getOtherDocumentId().toString(),
|
93
|
object.getSimilarity(), backwardMode);
|
94
|
if (oafObjectRel==null) {
|
95
|
return Collections.emptyList();
|
96
|
}
|
97
|
List<AtomicAction> actionList = new ArrayList<AtomicAction>();
|
98
|
AtomicAction currentAction = actionFactory.createAtomicAction(
|
99
|
actionSet, agent,
|
100
|
backwardMode?
|
101
|
object.getOtherDocumentId().toString():
|
102
|
object.getDocumentId().toString(),
|
103
|
OafDecoder.decode(oafObjectRel).getCFQ(),
|
104
|
backwardMode?
|
105
|
object.getDocumentId().toString():
|
106
|
object.getOtherDocumentId().toString(),
|
107
|
oafObjectRel.toByteArray());
|
108
|
actionList.add(currentAction);
|
109
|
return actionList;
|
110
|
}
|
111
|
|
112
|
/**
|
113
|
* Builds OAF object.
|
114
|
* @param source
|
115
|
* @param target
|
116
|
* @param score
|
117
|
* @param invert flag indicating source and target should be inverted
|
118
|
* @return OAF object
|
119
|
*/
|
120
|
private Oaf buildOAFRel(String sourceId, String targetDocId,
|
121
|
float score, boolean invert) {
|
122
|
OafRel.Builder relBuilder = OafRel.newBuilder();
|
123
|
if (!invert) {
|
124
|
relBuilder.setSource(sourceId);
|
125
|
relBuilder.setTarget(targetDocId);
|
126
|
|
127
|
} else {
|
128
|
relBuilder.setSource(targetDocId);
|
129
|
relBuilder.setTarget(sourceId);
|
130
|
}
|
131
|
String relClass = invert?
|
132
|
Similarity.RelName.isAmongTopNSimilarDocuments.toString():
|
133
|
Similarity.RelName.hasAmongTopNSimilarDocuments.toString();
|
134
|
relBuilder.setChild(false);
|
135
|
relBuilder.setRelType(RelType.resultResult);
|
136
|
relBuilder.setSubRelType(SubRelType.similarity);
|
137
|
relBuilder.setRelClass(relClass);
|
138
|
ResultResult.Builder resultResultBuilder = ResultResult.newBuilder();
|
139
|
Similarity.Builder similarityBuilder = Similarity.newBuilder();
|
140
|
similarityBuilder.setRelMetadata(buildRelMetadata(
|
141
|
HBaseConstants.SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT,
|
142
|
relClass));
|
143
|
similarityBuilder.setSimilarity(score);
|
144
|
similarityBuilder.setType(Type.STANDARD);
|
145
|
resultResultBuilder.setSimilarity(similarityBuilder.build());
|
146
|
relBuilder.setResultResult(resultResultBuilder.build());
|
147
|
|
148
|
Oaf.Builder oafBuilder = Oaf.newBuilder();
|
149
|
oafBuilder.setKind(Kind.relation);
|
150
|
oafBuilder.setRel(relBuilder.build());
|
151
|
oafBuilder.setDataInfo(buildInference());
|
152
|
oafBuilder.setTimestamp(System.currentTimeMillis());
|
153
|
return oafBuilder.build();
|
154
|
}
|
155
|
|
156
|
@Override
|
157
|
public AlgorithmName getAlgorithName() {
|
158
|
return algorithmName;
|
159
|
}
|
160
|
}
|
161
|
|
162
|
@Override
|
163
|
public ActionBuilderModule<DocumentSimilarity> instantiate(
|
164
|
String predefinedTrust, Configuration config) {
|
165
|
String thresholdStr = config.get(
|
166
|
WorkflowRuntimeParameters.EXPORT_DOCUMENTSSIMILARITY_THRESHOLD);
|
167
|
Float threshold = null;
|
168
|
if (thresholdStr!=null && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(
|
169
|
thresholdStr)) {
|
170
|
threshold = Float.valueOf(thresholdStr);
|
171
|
log.warn("setting documents similarity exporter threshold to: " + threshold);
|
172
|
}
|
173
|
return new DocumentSimilarityActionBuilderModule(
|
174
|
predefinedTrust, threshold);
|
175
|
}
|
176
|
}
|