1 |
17911
|
marek.hors
|
package eu.dnetlib.iis.export.actionmanager.module;
|
2 |
|
|
|
3 |
22191
|
marek.hors
|
import java.util.ArrayList;
|
4 |
|
|
import java.util.Collections;
|
5 |
21553
|
marek.hors
|
import java.util.List;
|
6 |
|
|
|
7 |
28145
|
marek.hors
|
import org.apache.hadoop.conf.Configuration;
|
8 |
28149
|
marek.hors
|
import org.apache.log4j.Logger;
|
9 |
28145
|
marek.hors
|
|
10 |
25095
|
marek.hors
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
11 |
17911
|
marek.hors
|
import eu.dnetlib.actionmanager.common.Agent;
|
12 |
28141
|
marek.hors
|
import eu.dnetlib.data.mapreduce.util.OafDecoder;
|
13 |
22191
|
marek.hors
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
14 |
|
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
15 |
|
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
16 |
|
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
17 |
28141
|
marek.hors
|
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
|
18 |
|
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
|
19 |
|
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
|
20 |
|
|
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity.Type;
|
21 |
28147
|
marek.hors
|
import eu.dnetlib.iis.common.WorkflowRuntimeParameters;
|
22 |
27589
|
marek.hors
|
import eu.dnetlib.iis.common.hbase.HBaseConstants;
|
23 |
21974
|
marek.hors
|
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity;
|
24 |
17911
|
marek.hors
|
|
25 |
|
|
/**
|
26 |
|
|
* {@link DocumentSimilarity} based action builder module.
|
27 |
|
|
* @author mhorst
|
28 |
|
|
*
|
29 |
|
|
*/
|
30 |
26236
|
marek.hors
|
public class DocumentSimilarityActionBuilderModuleFactory
|
31 |
|
|
implements ActionBuilderFactory<DocumentSimilarity> {
|
32 |
17911
|
marek.hors
|
|
33 |
28234
|
marek.hors
|
private static final AlgorithmName algorithmName = AlgorithmName.document_similarities_standard;
|
34 |
|
|
|
35 |
28149
|
marek.hors
|
private final Logger log = Logger.getLogger(this.getClass());
|
36 |
22684
|
marek.hors
|
|
37 |
26236
|
marek.hors
|
class DocumentSimilarityActionBuilderModule extends AbstractBuilderModule
|
38 |
|
|
implements ActionBuilderModule<DocumentSimilarity> {
|
39 |
|
|
|
40 |
28147
|
marek.hors
|
private final Float threshold;
|
41 |
|
|
|
42 |
26236
|
marek.hors
|
/**
|
43 |
|
|
* Default constructor.
|
44 |
|
|
* @param predefinedTrust
|
45 |
28147
|
marek.hors
|
* @param threshold similarity threshold, skipped when null
|
46 |
26236
|
marek.hors
|
*/
|
47 |
|
|
public DocumentSimilarityActionBuilderModule(
|
48 |
|
|
String predefinedTrust,
|
49 |
28147
|
marek.hors
|
Float threshold) {
|
50 |
30163
|
marek.hors
|
super(predefinedTrust, algorithmName);
|
51 |
28147
|
marek.hors
|
this.threshold = threshold;
|
52 |
22191
|
marek.hors
|
}
|
53 |
26236
|
marek.hors
|
|
54 |
|
|
@Override
|
55 |
30163
|
marek.hors
|
public List<AtomicAction> build(DocumentSimilarity object, Agent agent,
|
56 |
|
|
String actionSetId) {
|
57 |
26236
|
marek.hors
|
if (object==null) {
|
58 |
|
|
return Collections.emptyList();
|
59 |
|
|
}
|
60 |
28147
|
marek.hors
|
// checking similarity threshold if set
|
61 |
|
|
if (threshold!=null && object.getSimilarity()!=null &&
|
62 |
|
|
object.getSimilarity()<threshold) {
|
63 |
|
|
return Collections.emptyList();
|
64 |
|
|
}
|
65 |
|
|
// setting relations in both source and target objects
|
66 |
26236
|
marek.hors
|
List<AtomicAction> simActions = createActions(
|
67 |
|
|
object, actionSetId, agent, false);
|
68 |
|
|
List<AtomicAction> reverseSimActions = createActions(
|
69 |
|
|
object, actionSetId, agent, true);
|
70 |
|
|
List<AtomicAction> results = new ArrayList<AtomicAction>();
|
71 |
|
|
if (simActions!=null && !simActions.isEmpty()) {
|
72 |
|
|
results.addAll(simActions);
|
73 |
|
|
}
|
74 |
|
|
if (reverseSimActions!=null && !reverseSimActions.isEmpty()) {
|
75 |
|
|
results.addAll(reverseSimActions);
|
76 |
|
|
}
|
77 |
|
|
return results;
|
78 |
22191
|
marek.hors
|
}
|
79 |
26236
|
marek.hors
|
|
80 |
|
|
/**
|
81 |
|
|
* Creates similarity related puts.
|
82 |
|
|
* @param object
|
83 |
|
|
* @param actionSet
|
84 |
|
|
* @param agent
|
85 |
|
|
* @param backwardMode
|
86 |
|
|
* @return similarity related puts
|
87 |
|
|
*/
|
88 |
|
|
protected List<AtomicAction> createActions(DocumentSimilarity object,
|
89 |
|
|
String actionSet, Agent agent, boolean backwardMode) {
|
90 |
|
|
Oaf oafObjectRel = buildOAFRel(
|
91 |
|
|
object.getDocumentId().toString(),
|
92 |
|
|
object.getOtherDocumentId().toString(),
|
93 |
|
|
object.getSimilarity(), backwardMode);
|
94 |
|
|
if (oafObjectRel==null) {
|
95 |
|
|
return Collections.emptyList();
|
96 |
|
|
}
|
97 |
|
|
List<AtomicAction> actionList = new ArrayList<AtomicAction>();
|
98 |
|
|
AtomicAction currentAction = actionFactory.createAtomicAction(
|
99 |
|
|
actionSet, agent,
|
100 |
|
|
backwardMode?
|
101 |
|
|
object.getOtherDocumentId().toString():
|
102 |
|
|
object.getDocumentId().toString(),
|
103 |
31011
|
marek.hors
|
OafDecoder.decode(oafObjectRel).getCFQ(),
|
104 |
26236
|
marek.hors
|
backwardMode?
|
105 |
|
|
object.getDocumentId().toString():
|
106 |
|
|
object.getOtherDocumentId().toString(),
|
107 |
|
|
oafObjectRel.toByteArray());
|
108 |
|
|
actionList.add(currentAction);
|
109 |
|
|
return actionList;
|
110 |
22523
|
marek.hors
|
}
|
111 |
26236
|
marek.hors
|
|
112 |
|
|
/**
|
113 |
|
|
* Builds OAF object.
|
114 |
|
|
* @param source
|
115 |
|
|
* @param target
|
116 |
|
|
* @param score
|
117 |
|
|
* @param invert flag indicating source and target should be inverted
|
118 |
|
|
* @return OAF object
|
119 |
|
|
*/
|
120 |
|
|
private Oaf buildOAFRel(String sourceId, String targetDocId,
|
121 |
|
|
float score, boolean invert) {
|
122 |
|
|
OafRel.Builder relBuilder = OafRel.newBuilder();
|
123 |
|
|
if (!invert) {
|
124 |
|
|
relBuilder.setSource(sourceId);
|
125 |
|
|
relBuilder.setTarget(targetDocId);
|
126 |
|
|
|
127 |
|
|
} else {
|
128 |
|
|
relBuilder.setSource(targetDocId);
|
129 |
|
|
relBuilder.setTarget(sourceId);
|
130 |
|
|
}
|
131 |
28141
|
marek.hors
|
String relClass = invert?
|
132 |
|
|
Similarity.RelName.isAmongTopNSimilarDocuments.toString():
|
133 |
|
|
Similarity.RelName.hasAmongTopNSimilarDocuments.toString();
|
134 |
26236
|
marek.hors
|
relBuilder.setChild(false);
|
135 |
28141
|
marek.hors
|
relBuilder.setRelType(RelType.resultResult);
|
136 |
|
|
relBuilder.setSubRelType(SubRelType.similarity);
|
137 |
|
|
relBuilder.setRelClass(relClass);
|
138 |
|
|
ResultResult.Builder resultResultBuilder = ResultResult.newBuilder();
|
139 |
|
|
Similarity.Builder similarityBuilder = Similarity.newBuilder();
|
140 |
|
|
similarityBuilder.setRelMetadata(buildRelMetadata(
|
141 |
|
|
HBaseConstants.SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT,
|
142 |
|
|
relClass));
|
143 |
|
|
similarityBuilder.setSimilarity(score);
|
144 |
|
|
similarityBuilder.setType(Type.STANDARD);
|
145 |
|
|
resultResultBuilder.setSimilarity(similarityBuilder.build());
|
146 |
|
|
relBuilder.setResultResult(resultResultBuilder.build());
|
147 |
|
|
|
148 |
|
|
Oaf.Builder oafBuilder = Oaf.newBuilder();
|
149 |
26236
|
marek.hors
|
oafBuilder.setKind(Kind.relation);
|
150 |
|
|
oafBuilder.setRel(relBuilder.build());
|
151 |
|
|
oafBuilder.setDataInfo(buildInference());
|
152 |
|
|
oafBuilder.setTimestamp(System.currentTimeMillis());
|
153 |
|
|
return oafBuilder.build();
|
154 |
22191
|
marek.hors
|
}
|
155 |
30163
|
marek.hors
|
|
156 |
|
|
@Override
|
157 |
|
|
public AlgorithmName getAlgorithName() {
|
158 |
|
|
return algorithmName;
|
159 |
|
|
}
|
160 |
22191
|
marek.hors
|
}
|
161 |
|
|
|
162 |
|
|
@Override
|
163 |
26236
|
marek.hors
|
public ActionBuilderModule<DocumentSimilarity> instantiate(
|
164 |
30163
|
marek.hors
|
String predefinedTrust, Configuration config) {
|
165 |
28147
|
marek.hors
|
String thresholdStr = config.get(
|
166 |
|
|
WorkflowRuntimeParameters.EXPORT_DOCUMENTSSIMILARITY_THRESHOLD);
|
167 |
|
|
Float threshold = null;
|
168 |
|
|
if (thresholdStr!=null && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(
|
169 |
|
|
thresholdStr)) {
|
170 |
|
|
threshold = Float.valueOf(thresholdStr);
|
171 |
28149
|
marek.hors
|
log.warn("setting documents similarity exporter threshold to: " + threshold);
|
172 |
28147
|
marek.hors
|
}
|
173 |
26236
|
marek.hors
|
return new DocumentSimilarityActionBuilderModule(
|
174 |
30163
|
marek.hors
|
predefinedTrust, threshold);
|
175 |
22191
|
marek.hors
|
}
|
176 |
17911
|
marek.hors
|
}
|