Project

General

Profile

1
package eu.dnetlib.iis.export.actionmanager.module;
2

    
3
import static eu.dnetlib.iis.export.actionmanager.ExportWorkflowRuntimeParameters.EXPORT_DOCUMENTSSIMILARITY_THRESHOLD;
4

    
5
import java.util.ArrayList;
6
import java.util.Collections;
7
import java.util.List;
8

    
9
import org.apache.hadoop.conf.Configuration;
10
import org.apache.log4j.Logger;
11

    
12
import eu.dnetlib.actionmanager.actions.AtomicAction;
13
import eu.dnetlib.actionmanager.common.Agent;
14
import eu.dnetlib.data.mapreduce.util.OafDecoder;
15
import eu.dnetlib.data.proto.KindProtos.Kind;
16
import eu.dnetlib.data.proto.OafProtos.Oaf;
17
import eu.dnetlib.data.proto.OafProtos.OafRel;
18
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
19
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
20
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
21
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
22
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity.Type;
23
import eu.dnetlib.iis.common.WorkflowRuntimeParameters;
24
import eu.dnetlib.iis.common.hbase.HBaseConstants;
25
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity;
26

    
27
/**
28
 * {@link DocumentSimilarity} based action builder module.
29
 * @author mhorst
30
 *
31
 */
32
public class DocumentSimilarityActionBuilderModuleFactory  
33
	implements ActionBuilderFactory<DocumentSimilarity> {
34

    
35
	private static final AlgorithmName algorithmName = AlgorithmName.document_similarities_standard;
36
	
37
	private final Logger log = Logger.getLogger(this.getClass());
38
	
39
	class DocumentSimilarityActionBuilderModule extends AbstractBuilderModule 
40
	implements ActionBuilderModule<DocumentSimilarity> {
41
	
42
		private final Float similarityThreshold;
43
		
44
		/**
45
		 * Default constructor.
46
		 * @param predefinedTrust
47
		 * @param threshold similarity threshold, skipped when null
48
		 */
49
		public DocumentSimilarityActionBuilderModule(
50
				String predefinedTrust, Float trustLevelThreshold,
51
				Float similarityThreshold) {
52
			super(predefinedTrust, trustLevelThreshold, algorithmName);
53
			this.similarityThreshold = similarityThreshold;
54
		}
55
		
56
		@Override
57
		public List<AtomicAction> build(DocumentSimilarity object, Agent agent,
58
				String actionSetId) {
59
			if (object==null) {
60
				return Collections.emptyList();
61
			}
62
//			checking similarity threshold if set
63
			if (similarityThreshold!=null && object.getSimilarity()!=null &&
64
					object.getSimilarity()<similarityThreshold) {
65
				return Collections.emptyList();
66
			}
67
//			setting relations in both source and target objects
68
			List<AtomicAction> simActions = createActions(
69
					object, actionSetId, agent, false);
70
			List<AtomicAction> reverseSimActions = createActions(
71
					object, actionSetId, agent, true);
72
			List<AtomicAction> results = new ArrayList<AtomicAction>();
73
			if (simActions!=null && !simActions.isEmpty()) {
74
				results.addAll(simActions);
75
			}
76
			if (reverseSimActions!=null && !reverseSimActions.isEmpty()) {
77
				results.addAll(reverseSimActions);
78
			}
79
			return results;
80
		}
81
		
82
		/**
83
		 * Creates similarity related puts.
84
		 * @param object
85
		 * @param actionSet
86
		 * @param agent
87
		 * @param backwardMode
88
		 * @return similarity related puts
89
		 */
90
		protected List<AtomicAction> createActions(DocumentSimilarity object, 
91
				String actionSet, Agent agent, boolean backwardMode) {
92
			Oaf oafObjectRel = buildOAFRel(
93
					object.getDocumentId().toString(), 
94
					object.getOtherDocumentId().toString(),
95
					object.getSimilarity(), backwardMode);
96
			if (oafObjectRel==null) {
97
				return Collections.emptyList();
98
			}
99
			List<AtomicAction> actionList = new ArrayList<AtomicAction>();
100
			AtomicAction currentAction = actionFactory.createAtomicAction(
101
					actionSet, agent, 
102
					backwardMode?
103
							object.getOtherDocumentId().toString():
104
								object.getDocumentId().toString(), 
105
					OafDecoder.decode(oafObjectRel).getCFQ(), 
106
					backwardMode?
107
							object.getDocumentId().toString():
108
								object.getOtherDocumentId().toString(), 
109
					oafObjectRel.toByteArray());
110
			actionList.add(currentAction);
111
			return actionList;
112
		}
113
		
114
		/**
115
		 * Builds OAF object.
116
		 * @param source
117
		 * @param target
118
		 * @param score
119
		 * @param invert flag indicating source and target should be inverted
120
		 * @return OAF object
121
		 */
122
		private Oaf buildOAFRel(String sourceId, String targetDocId, 
123
				float score, boolean invert) {
124
			OafRel.Builder relBuilder = OafRel.newBuilder();
125
			if (!invert) {
126
				relBuilder.setSource(sourceId);
127
				relBuilder.setTarget(targetDocId);
128
				
129
			} else {
130
				relBuilder.setSource(targetDocId);
131
				relBuilder.setTarget(sourceId);
132
			}
133
			String relClass = invert?
134
					Similarity.RelName.isAmongTopNSimilarDocuments.toString():
135
						Similarity.RelName.hasAmongTopNSimilarDocuments.toString();
136
			relBuilder.setChild(false);
137
			relBuilder.setRelType(RelType.resultResult);
138
			relBuilder.setSubRelType(SubRelType.similarity);
139
			relBuilder.setRelClass(relClass);
140
			ResultResult.Builder resultResultBuilder = ResultResult.newBuilder();
141
			Similarity.Builder similarityBuilder = Similarity.newBuilder();
142
			similarityBuilder.setRelMetadata(buildRelMetadata(
143
					HBaseConstants.SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT, 
144
					relClass));
145
			similarityBuilder.setSimilarity(score);
146
			similarityBuilder.setType(Type.STANDARD);
147
			resultResultBuilder.setSimilarity(similarityBuilder.build());
148
			relBuilder.setResultResult(resultResultBuilder.build());
149
			
150
			Oaf.Builder oafBuilder = Oaf.newBuilder();
151
			oafBuilder.setKind(Kind.relation);
152
			oafBuilder.setRel(relBuilder.build());
153
			oafBuilder.setDataInfo(buildInference());
154
			oafBuilder.setTimestamp(System.currentTimeMillis());
155
			return oafBuilder.build();
156
		}
157
	}
158

    
159
	@Override
160
	public ActionBuilderModule<DocumentSimilarity> instantiate(
161
			String predefinedTrust, Float trustLevelThreshold, Configuration config) {
162
		String thresholdStr = config.get(
163
				EXPORT_DOCUMENTSSIMILARITY_THRESHOLD);
164
		Float similarityThreshold = null;
165
		if (thresholdStr!=null && !WorkflowRuntimeParameters.UNDEFINED_NONEMPTY_VALUE.equals(
166
				thresholdStr)) {
167
			similarityThreshold = Float.valueOf(thresholdStr);
168
			log.warn("setting documents similarity exporter threshold to: " + similarityThreshold);
169
		}
170
		return new DocumentSimilarityActionBuilderModule(
171
				predefinedTrust, trustLevelThreshold, similarityThreshold);
172
	}
173
	
174
	@Override
175
	public AlgorithmName getAlgorithName() {
176
		return algorithmName;
177
	}
178
}
(10-10/23)