Project

General

Profile

1
package eu.dnetlib.iis.export.actionmanager.module;
2

    
3
import java.util.ArrayList;
4
import java.util.Collections;
5
import java.util.List;
6
import java.util.Map.Entry;
7
import java.util.SortedSet;
8
import java.util.TreeSet;
9

    
10
import org.apache.commons.lang.StringUtils;
11
import org.apache.hadoop.conf.Configuration;
12

    
13
import eu.dnetlib.actionmanager.actions.AtomicAction;
14
import eu.dnetlib.actionmanager.common.Agent;
15
import eu.dnetlib.data.proto.FieldTypeProtos.ExtraInfo;
16
import eu.dnetlib.data.proto.OafProtos.Oaf;
17
import eu.dnetlib.data.proto.OafProtos.OafEntity;
18
import eu.dnetlib.data.proto.TypeProtos.Type;
19
import eu.dnetlib.iis.common.citations.schemas.CitationEntry;
20
import eu.dnetlib.iis.common.hbase.HBaseConstants;
21
import eu.dnetlib.iis.common.model.extrainfo.ExtraInfoConstants;
22
import eu.dnetlib.iis.common.model.extrainfo.citations.BlobCitationEntry;
23
import eu.dnetlib.iis.common.model.extrainfo.citations.TypedId;
24
import eu.dnetlib.iis.common.model.extrainfo.converter.CitationsExtraInfoConverter;
25
import eu.dnetlib.iis.export.schemas.Citations;
26

    
27

    
28
/**
29
 * {@link Citations} based action builder module.
30
 * @author mhorst
31
 *
32
 */
33
public class CitationsActionBuilderModuleFactory 
34
		implements ActionBuilderFactory<Citations> {
35
	
36
	private static final String EXTRA_INFO_NAME = ExtraInfoConstants.NAME_CITATIONS;
37
	private static final String EXTRA_INFO_TYPOLOGY = ExtraInfoConstants.TYPOLOGY_CITATIONS;
38
	
39
	private static final AlgorithmName algorithmName = AlgorithmName.document_referencedDocuments;
40
	
41
	class CitationActionBuilderModule extends AbstractBuilderModule
42
	implements ActionBuilderModule<Citations> {
43
	
44
		CitationsExtraInfoConverter converter = new CitationsExtraInfoConverter();
45
		
46
		/**
47
		 * Default constructor.
48
		 * @param predefinedTrust
49
		 */
50
		public CitationActionBuilderModule(String predefinedTrust) {
51
			super(predefinedTrust, algorithmName);
52
		}
53
	
54
		@Override
55
		public List<AtomicAction> build(Citations object, Agent agent, String actionSetId) {
56
			Oaf oaf = buildOAFCitations(object);
57
			if (oaf!=null) {
58
				return actionFactory.createUpdateActions(
59
						actionSetId,
60
						agent, object.getDocumentId().toString(), Type.result, 
61
						oaf.toByteArray());	
62
			} else {
63
				return Collections.emptyList();
64
			}
65
		}
66
		
67
		/**
68
		 * Builds OAF object containing document statistics.
69
		 * @param source
70
		 * @return OAF object containing document statistics
71
		 */
72
		protected Oaf buildOAFCitations(Citations source) {
73
			if (source.getCitations()!=null && source.getCitations().size()>0) {
74
				OafEntity.Builder entityBuilder = OafEntity.newBuilder();
75
				if (source.getDocumentId()!=null) {
76
					entityBuilder.setId(source.getDocumentId().toString());	
77
				}
78
				ExtraInfo.Builder extraInfoBuilder = ExtraInfo.newBuilder();
79
				extraInfoBuilder.setValue(converter.serialize(
80
						normalize(source.getCitations())));
81
				extraInfoBuilder.setName(EXTRA_INFO_NAME);
82
				extraInfoBuilder.setTypology(EXTRA_INFO_TYPOLOGY);
83
				extraInfoBuilder.setProvenance(this.inferenceProvenance);
84
				extraInfoBuilder.setTrust(getPredefinedTrust());
85
				entityBuilder.addExtraInfo(extraInfoBuilder.build());
86
				entityBuilder.setType(Type.result);
87
				return buildOaf(entityBuilder.build());
88
			}
89
//			fallback
90
			return null;
91
		}
92
		
93
		/**
94
		 * Performs confidence level normalization. Removes empty lists.
95
		 * Removes 50| prefix from publication identifier.
96
		 * @param source
97
		 * @return {@link BlobCitationEntry} objects having confidence level value normalized.
98
		 */
99
		private SortedSet<BlobCitationEntry> normalize(List<CitationEntry> source) {
100
			if (source!=null) {
101
				SortedSet<BlobCitationEntry> results = new TreeSet<BlobCitationEntry>();
102
				for (CitationEntry currentEntry : source) {
103
					if (currentEntry.getExternalDestinationDocumentIds().isEmpty()) {
104
						currentEntry.setExternalDestinationDocumentIds(null);
105
					}
106
					if (currentEntry.getDestinationDocumentId()!=null) {
107
						currentEntry.setDestinationDocumentId(
108
								StringUtils.split(currentEntry.getDestinationDocumentId().toString(), 
109
										HBaseConstants.ROW_PREFIX_SEPARATOR)[1]);
110
					}
111
					results.add(CitationsActionBuilderModuleFactory.build(
112
							currentEntry, getConfidenceToTrustLevelNormalizationFactor()));
113
				}
114
				return results;
115
			} else {
116
				return null;
117
			}
118
		}
119
		
120
		@Override
121
		public AlgorithmName getAlgorithName() {
122
			return algorithmName;
123
		}
124
	}
125

    
126
	@Override
127
	public ActionBuilderModule<Citations> instantiate(String predefinedTrust,
128
			Configuration config) {
129
		return new CitationActionBuilderModule(predefinedTrust);
130
	}
131
	
132
	public static BlobCitationEntry build(CitationEntry entry, float confidenceToTrustLevelFactor) {
133
		BlobCitationEntry result = new BlobCitationEntry(
134
				entry.getRawText()!=null?entry.getRawText().toString():null);
135
		if (entry.getDestinationDocumentId()!=null) {
136
			result.setIdentifiers(new ArrayList<TypedId>());
137
			result.getIdentifiers().add(new TypedId(
138
					entry.getDestinationDocumentId().toString(),
139
					ExtraInfoConstants.CITATION_TYPE_OPENAIRE,
140
					entry.getConfidenceLevel()!=null?
141
							(entry.getConfidenceLevel()*confidenceToTrustLevelFactor):
142
								1f*confidenceToTrustLevelFactor));
143
		}
144
		if (entry.getExternalDestinationDocumentIds()!=null &&
145
				!entry.getExternalDestinationDocumentIds().isEmpty()) {
146
			if (result.getIdentifiers()==null) {
147
				result.setIdentifiers(new ArrayList<TypedId>());	
148
			}
149
			for (Entry<CharSequence, CharSequence> extId : entry.getExternalDestinationDocumentIds().entrySet()) {
150
				result.getIdentifiers().add(new TypedId(
151
						extId.getValue().toString(),
152
						extId.getKey().toString(),
153
						1f*confidenceToTrustLevelFactor));
154
			}
155
		}
156
		return result;
157
	}
158

    
159
}
(9-9/22)