Project

General

Profile

1 17911 marek.hors
package eu.dnetlib.iis.export.actionmanager.module;
2
3 22191 marek.hors
import java.util.ArrayList;
4
import java.util.Collections;
5 21553 marek.hors
import java.util.List;
6
7 28145 marek.hors
import org.apache.hadoop.conf.Configuration;
8 28149 marek.hors
import org.apache.log4j.Logger;
9 28145 marek.hors
10 25095 marek.hors
import eu.dnetlib.actionmanager.actions.AtomicAction;
11 17911 marek.hors
import eu.dnetlib.actionmanager.common.Agent;
12 28141 marek.hors
import eu.dnetlib.data.mapreduce.util.OafDecoder;
13 22191 marek.hors
import eu.dnetlib.data.proto.KindProtos.Kind;
14
import eu.dnetlib.data.proto.OafProtos.Oaf;
15
import eu.dnetlib.data.proto.OafProtos.OafRel;
16
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
17 28141 marek.hors
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
18
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
19
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
20
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity.Type;
21 28147 marek.hors
import eu.dnetlib.iis.common.WorkflowRuntimeParameters;
22 27589 marek.hors
import eu.dnetlib.iis.common.hbase.HBaseConstants;
23 21974 marek.hors
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity;
24 17911 marek.hors
25
/**
26
 * {@link DocumentSimilarity} based action builder module.
27
 * @author mhorst
28
 *
29
 */
30 26236 marek.hors
public class DocumentSimilarityActionBuilderModuleFactory
31
	implements ActionBuilderFactory<DocumentSimilarity> {
32 17911 marek.hors
33 28234 marek.hors
	private static final AlgorithmName algorithmName = AlgorithmName.document_similarities_standard;
34
35 28149 marek.hors
	private final Logger log = Logger.getLogger(this.getClass());
36 22684 marek.hors
37 26236 marek.hors
	class DocumentSimilarityActionBuilderModule extends AbstractBuilderModule
38
	implements ActionBuilderModule<DocumentSimilarity> {
39
40 28147 marek.hors
		private final Float threshold;
41
42 26236 marek.hors
		/**
43
		 * Default constructor.
44
		 * @param predefinedTrust
45 28147 marek.hors
		 * @param threshold similarity threshold, skipped when null
46 26236 marek.hors
		 */
47
		public DocumentSimilarityActionBuilderModule(
48
				String predefinedTrust,
49 28147 marek.hors
				Float threshold) {
50 30163 marek.hors
			super(predefinedTrust, algorithmName);
51 28147 marek.hors
			this.threshold = threshold;
52 22191 marek.hors
		}
53 26236 marek.hors
54
		@Override
55 30163 marek.hors
		public List<AtomicAction> build(DocumentSimilarity object, Agent agent,
56
				String actionSetId) {
57 26236 marek.hors
			if (object==null) {
58
				return Collections.emptyList();
59
			}
60 28147 marek.hors
//			checking similarity threshold if set
61
			if (threshold!=null && object.getSimilarity()!=null &&
62
					object.getSimilarity()<threshold) {
63
				return Collections.emptyList();
64
			}
65
//			setting relations in both source and target objects
66 26236 marek.hors
			List<AtomicAction> simActions = createActions(
67
					object, actionSetId, agent, false);
68
			List<AtomicAction> reverseSimActions = createActions(
69
					object, actionSetId, agent, true);
70
			List<AtomicAction> results = new ArrayList<AtomicAction>();
71
			if (simActions!=null && !simActions.isEmpty()) {
72
				results.addAll(simActions);
73
			}
74
			if (reverseSimActions!=null && !reverseSimActions.isEmpty()) {
75
				results.addAll(reverseSimActions);
76
			}
77
			return results;
78 22191 marek.hors
		}
79 26236 marek.hors
80
		/**
81
		 * Creates similarity related puts.
82
		 * @param object
83
		 * @param actionSet
84
		 * @param agent
85
		 * @param backwardMode
86
		 * @return similarity related puts
87
		 */
88
		protected List<AtomicAction> createActions(DocumentSimilarity object,
89
				String actionSet, Agent agent, boolean backwardMode) {
90
			Oaf oafObjectRel = buildOAFRel(
91
					object.getDocumentId().toString(),
92
					object.getOtherDocumentId().toString(),
93
					object.getSimilarity(), backwardMode);
94
			if (oafObjectRel==null) {
95
				return Collections.emptyList();
96
			}
97
			List<AtomicAction> actionList = new ArrayList<AtomicAction>();
98
			AtomicAction currentAction = actionFactory.createAtomicAction(
99
					actionSet, agent,
100
					backwardMode?
101
							object.getOtherDocumentId().toString():
102
								object.getDocumentId().toString(),
103 31011 marek.hors
					OafDecoder.decode(oafObjectRel).getCFQ(),
104 26236 marek.hors
					backwardMode?
105
							object.getDocumentId().toString():
106
								object.getOtherDocumentId().toString(),
107
					oafObjectRel.toByteArray());
108
			actionList.add(currentAction);
109
			return actionList;
110 22523 marek.hors
		}
111 26236 marek.hors
112
		/**
113
		 * Builds OAF object.
114
		 * @param source
115
		 * @param target
116
		 * @param score
117
		 * @param invert flag indicating source and target should be inverted
118
		 * @return OAF object
119
		 */
120
		private Oaf buildOAFRel(String sourceId, String targetDocId,
121
				float score, boolean invert) {
122
			OafRel.Builder relBuilder = OafRel.newBuilder();
123
			if (!invert) {
124
				relBuilder.setSource(sourceId);
125
				relBuilder.setTarget(targetDocId);
126
127
			} else {
128
				relBuilder.setSource(targetDocId);
129
				relBuilder.setTarget(sourceId);
130
			}
131 28141 marek.hors
			String relClass = invert?
132
					Similarity.RelName.isAmongTopNSimilarDocuments.toString():
133
						Similarity.RelName.hasAmongTopNSimilarDocuments.toString();
134 26236 marek.hors
			relBuilder.setChild(false);
135 28141 marek.hors
			relBuilder.setRelType(RelType.resultResult);
136
			relBuilder.setSubRelType(SubRelType.similarity);
137
			relBuilder.setRelClass(relClass);
138
			ResultResult.Builder resultResultBuilder = ResultResult.newBuilder();
139
			Similarity.Builder similarityBuilder = Similarity.newBuilder();
140
			similarityBuilder.setRelMetadata(buildRelMetadata(
141
					HBaseConstants.SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT,
142
					relClass));
143
			similarityBuilder.setSimilarity(score);
144
			similarityBuilder.setType(Type.STANDARD);
145
			resultResultBuilder.setSimilarity(similarityBuilder.build());
146
			relBuilder.setResultResult(resultResultBuilder.build());
147
148
			Oaf.Builder oafBuilder = Oaf.newBuilder();
149 26236 marek.hors
			oafBuilder.setKind(Kind.relation);
150
			oafBuilder.setRel(relBuilder.build());
151
			oafBuilder.setDataInfo(buildInference());
152
			oafBuilder.setTimestamp(System.currentTimeMillis());
153
			return oafBuilder.build();
154 22191 marek.hors
		}
155 30163 marek.hors
156
		@Override
157
		public AlgorithmName getAlgorithName() {
158
			return algorithmName;
159
		}
160 22191 marek.hors
	}
161
162
	@Override
163 26236 marek.hors
	public ActionBuilderModule<DocumentSimilarity> instantiate(
164 30163 marek.hors
			String predefinedTrust, Configuration config) {
165 28147 marek.hors
		String thresholdStr = config.get(
166
				WorkflowRuntimeParameters.EXPORT_DOCUMENTSSIMILARITY_THRESHOLD);
167
		Float threshold = null;
168
		if (thresholdStr!=null && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(
169
				thresholdStr)) {
170
			threshold = Float.valueOf(thresholdStr);
171 28149 marek.hors
			log.warn("setting documents similarity exporter threshold to: " + threshold);
172 28147 marek.hors
		}
173 26236 marek.hors
		return new DocumentSimilarityActionBuilderModule(
174 30163 marek.hors
				predefinedTrust, threshold);
175 22191 marek.hors
	}
176 17911 marek.hors
}