Project

General

Profile

1
package eu.dnetlib.iis.export.actionmanager.module;
2

    
3
import java.util.ArrayList;
4
import java.util.Collections;
5
import java.util.List;
6

    
7
import org.apache.hadoop.conf.Configuration;
8
import org.apache.log4j.Logger;
9

    
10
import eu.dnetlib.actionmanager.actions.AtomicAction;
11
import eu.dnetlib.actionmanager.common.Agent;
12
import eu.dnetlib.data.mapreduce.util.OafDecoder;
13
import eu.dnetlib.data.proto.KindProtos.Kind;
14
import eu.dnetlib.data.proto.OafProtos.Oaf;
15
import eu.dnetlib.data.proto.OafProtos.OafRel;
16
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
17
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
18
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
19
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
20
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity.Type;
21
import eu.dnetlib.iis.common.WorkflowRuntimeParameters;
22
import eu.dnetlib.iis.common.hbase.HBaseConstants;
23
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity;
24

    
25
/**
26
 * {@link DocumentSimilarity} based action builder module.
27
 * @author mhorst
28
 *
29
 */
30
public class DocumentSimilarityActionBuilderModuleFactory  
31
	implements ActionBuilderFactory<DocumentSimilarity> {
32

    
33
	private static final AlgorithmName algorithmName = AlgorithmName.document_similarities_standard;
34
	
35
	private final Logger log = Logger.getLogger(this.getClass());
36
	
37
	class DocumentSimilarityActionBuilderModule extends AbstractBuilderModule 
38
	implements ActionBuilderModule<DocumentSimilarity> {
39
	
40
		private final Float threshold;
41
		
42
		/**
43
		 * Default constructor.
44
		 * @param predefinedTrust
45
		 * @param threshold similarity threshold, skipped when null
46
		 */
47
		public DocumentSimilarityActionBuilderModule(
48
				String predefinedTrust,
49
				Float threshold) {
50
			super(predefinedTrust, algorithmName);
51
			this.threshold = threshold;
52
		}
53
		
54
		@Override
55
		public List<AtomicAction> build(DocumentSimilarity object, Agent agent,
56
				String actionSetId) {
57
			if (object==null) {
58
				return Collections.emptyList();
59
			}
60
//			checking similarity threshold if set
61
			if (threshold!=null && object.getSimilarity()!=null &&
62
					object.getSimilarity()<threshold) {
63
				return Collections.emptyList();
64
			}
65
//			setting relations in both source and target objects
66
			List<AtomicAction> simActions = createActions(
67
					object, actionSetId, agent, false);
68
			List<AtomicAction> reverseSimActions = createActions(
69
					object, actionSetId, agent, true);
70
			List<AtomicAction> results = new ArrayList<AtomicAction>();
71
			if (simActions!=null && !simActions.isEmpty()) {
72
				results.addAll(simActions);
73
			}
74
			if (reverseSimActions!=null && !reverseSimActions.isEmpty()) {
75
				results.addAll(reverseSimActions);
76
			}
77
			return results;
78
		}
79
		
80
		/**
81
		 * Creates similarity related puts.
82
		 * @param object
83
		 * @param actionSet
84
		 * @param agent
85
		 * @param backwardMode
86
		 * @return similarity related puts
87
		 */
88
		protected List<AtomicAction> createActions(DocumentSimilarity object, 
89
				String actionSet, Agent agent, boolean backwardMode) {
90
			Oaf oafObjectRel = buildOAFRel(
91
					object.getDocumentId().toString(), 
92
					object.getOtherDocumentId().toString(),
93
					object.getSimilarity(), backwardMode);
94
			if (oafObjectRel==null) {
95
				return Collections.emptyList();
96
			}
97
			List<AtomicAction> actionList = new ArrayList<AtomicAction>();
98
			AtomicAction currentAction = actionFactory.createAtomicAction(
99
					actionSet, agent, 
100
					backwardMode?
101
							object.getOtherDocumentId().toString():
102
								object.getDocumentId().toString(), 
103
					OafDecoder.decode(oafObjectRel).getCFQ(), 
104
					backwardMode?
105
							object.getDocumentId().toString():
106
								object.getOtherDocumentId().toString(), 
107
					oafObjectRel.toByteArray());
108
			actionList.add(currentAction);
109
			return actionList;
110
		}
111
		
112
		/**
113
		 * Builds OAF object.
114
		 * @param source
115
		 * @param target
116
		 * @param score
117
		 * @param invert flag indicating source and target should be inverted
118
		 * @return OAF object
119
		 */
120
		private Oaf buildOAFRel(String sourceId, String targetDocId, 
121
				float score, boolean invert) {
122
			OafRel.Builder relBuilder = OafRel.newBuilder();
123
			if (!invert) {
124
				relBuilder.setSource(sourceId);
125
				relBuilder.setTarget(targetDocId);
126
				
127
			} else {
128
				relBuilder.setSource(targetDocId);
129
				relBuilder.setTarget(sourceId);
130
			}
131
			String relClass = invert?
132
					Similarity.RelName.isAmongTopNSimilarDocuments.toString():
133
						Similarity.RelName.hasAmongTopNSimilarDocuments.toString();
134
			relBuilder.setChild(false);
135
			relBuilder.setRelType(RelType.resultResult);
136
			relBuilder.setSubRelType(SubRelType.similarity);
137
			relBuilder.setRelClass(relClass);
138
			ResultResult.Builder resultResultBuilder = ResultResult.newBuilder();
139
			Similarity.Builder similarityBuilder = Similarity.newBuilder();
140
			similarityBuilder.setRelMetadata(buildRelMetadata(
141
					HBaseConstants.SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT, 
142
					relClass));
143
			similarityBuilder.setSimilarity(score);
144
			similarityBuilder.setType(Type.STANDARD);
145
			resultResultBuilder.setSimilarity(similarityBuilder.build());
146
			relBuilder.setResultResult(resultResultBuilder.build());
147
			
148
			Oaf.Builder oafBuilder = Oaf.newBuilder();
149
			oafBuilder.setKind(Kind.relation);
150
			oafBuilder.setRel(relBuilder.build());
151
			oafBuilder.setDataInfo(buildInference());
152
			oafBuilder.setTimestamp(System.currentTimeMillis());
153
			return oafBuilder.build();
154
		}
155
		
156
		@Override
157
		public AlgorithmName getAlgorithName() {
158
			return algorithmName;
159
		}
160
	}
161

    
162
	@Override
163
	public ActionBuilderModule<DocumentSimilarity> instantiate(
164
			String predefinedTrust, Configuration config) {
165
		String thresholdStr = config.get(
166
				WorkflowRuntimeParameters.EXPORT_DOCUMENTSSIMILARITY_THRESHOLD);
167
		Float threshold = null;
168
		if (thresholdStr!=null && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(
169
				thresholdStr)) {
170
			threshold = Float.valueOf(thresholdStr);
171
			log.warn("setting documents similarity exporter threshold to: " + threshold);
172
		}
173
		return new DocumentSimilarityActionBuilderModule(
174
				predefinedTrust, threshold);
175
	}
176
}
(11-11/22)