Project

General

Profile

1
package eu.dnetlib.data.mapreduce.util;
2

    
3
import java.util.List;
4
import java.util.Map;
5
import java.util.Set;
6

    
7
import com.google.common.base.Predicate;
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Lists;
10
import com.google.common.collect.Maps;
11
import com.google.common.collect.Sets;
12
import com.google.protobuf.Descriptors.FieldDescriptor;
13
import com.google.protobuf.Message.Builder;
14

    
15
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
16
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
17
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
18
import eu.dnetlib.data.proto.KindProtos.Kind;
19
import eu.dnetlib.data.proto.OafProtos.Oaf;
20
import eu.dnetlib.data.proto.OafProtos.OafEntity;
21
import eu.dnetlib.data.proto.PersonProtos.Person;
22
import eu.dnetlib.data.proto.ResultProtos.Result;
23
import eu.dnetlib.data.proto.SpecialTrustProtos.SpecialTrust;
24

    
25
public class OafEntityMerger {
26

    
27
	private final Predicate<StringField> skipEmptyStringField = new Predicate<StringField>() {
28

    
29
		@Override
30
		public boolean apply(StringField s) {
31
			return s != null && s.getValue() != null && !s.getValue().isEmpty();
32
		}
33
	};
34

    
35
	private final Predicate<String> skipEmptyString = new Predicate<String>() {
36

    
37
		@Override
38
		public boolean apply(String s) {
39
			return s != null && !s.isEmpty();
40
		}
41
	};
42

    
43
	public static Oaf.Builder merge(String id, Iterable<Oaf> entities) {
44
		return new OafEntityMerger().mergeEntities(id, entities);
45
	}
46

    
47
	public static Oaf.Builder merge(Oaf.Builder builder) {
48
		return new OafEntityMerger().doMergeEntities(builder);
49
	}
50

    
51
	public Oaf.Builder mergeEntities(String id, Iterable<Oaf> entities) {
52

    
53
		Oaf.Builder builder = Oaf.newBuilder();
54
		String trust = "0.0";
55
		for (Oaf oaf : TrustOrdering.sort(entities)) {
56
			// doublecheck we're dealing only with main entities
57
			if (!oaf.getKind().equals(Kind.entity)) { throw new IllegalArgumentException("expected OafEntity!"); }
58

    
59
			String currentTrust = oaf.getDataInfo().getTrust();
60
			if (!currentTrust.equals(SpecialTrust.NEUTRAL.toString())) {
61
				trust = currentTrust;
62
			}
63
			builder.mergeFrom(oaf);
64
		}
65

    
66
		builder = doMergeEntities(builder);
67
		builder.getEntityBuilder().setId(id);
68
		builder.getDataInfoBuilder().setInferred(true).setDeletedbyinference(false).setTrust(trust);
69

    
70
		return builder;
71
	}
72

    
73
	public Oaf.Builder doMergeEntities(Oaf.Builder builder) {
74

    
75
		switch (builder.getEntity().getType()) {
76
		case datasource:
77
			break;
78
		case organization:
79
			break;
80
		case person:
81
			Person.Metadata.Builder person = builder.getEntityBuilder().getPersonBuilder().getMetadataBuilder();
82
			for (String field : Lists.newArrayList("secondnames")) {
83
				setSingleString(person, field);
84
			}
85
			break;
86
		case project:
87
			break;
88
		case result:
89
			Result.Metadata.Builder result = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
90
			setTitle(result);
91

    
92
			// for (String field : Lists.newArrayList("subject", "relevantdate")) {
93
			for (String field : OafUtils.getFieldNames(Result.Metadata.getDescriptor(), Result.Metadata.SUBJECT_FIELD_NUMBER,
94
					Result.Metadata.RELEVANTDATE_FIELD_NUMBER)) {
95
				setStructuredProperty(result, field);
96
			}
97
			for (String field : OafUtils.getFieldNames(Result.Metadata.getDescriptor(), Result.Metadata.DESCRIPTION_FIELD_NUMBER)) {
98
				setLongestStringField(result, field);
99
			}
100
			for (String field : OafUtils.getFieldNames(Result.Metadata.getDescriptor(), Result.Metadata.SOURCE_FIELD_NUMBER)) {
101
				setUniqueStringField(result, field);
102
			}
103
			for (String field : OafUtils.getFieldNames(OafEntity.getDescriptor(), OafEntity.COLLECTEDFROM_FIELD_NUMBER)) {
104
				setKeyValues(builder.getEntityBuilder(), field);
105
			}
106
			for (String field : OafUtils.getFieldNames(OafEntity.getDescriptor(), OafEntity.PID_FIELD_NUMBER)) {
107
				setStructuredProperty(builder.getEntityBuilder(), field);
108
			}
109
			for (String field : OafUtils.getFieldNames(OafEntity.getDescriptor(), OafEntity.ORIGINALID_FIELD_NUMBER)) {
110
				setUniqueString(builder.getEntityBuilder(), field);
111
			}
112
			break;
113
		default:
114
			break;
115
		}
116
		return builder;
117
	}
118

    
119
	/**
120
	 * Helper method, avoid duplicated StructuredProperties in the given builder for the given fieldName
121
	 * 
122
	 * @param builder
123
	 * @param fieldName
124
	 */
125
	@SuppressWarnings("unchecked")
126
	private void setStructuredProperty(Builder builder, String fieldName) {
127
		final Map<String, StructuredProperty> map = Maps.newHashMap();
128
		final FieldDescriptor fd = builder.getDescriptorForType().findFieldByName(fieldName);
129
		final List<StructuredProperty> sps = (List<StructuredProperty>) builder.getField(fd);
130

    
131
		if (sps != null && !sps.isEmpty()) {
132
			for (StructuredProperty sp : sps) {
133
				map.put(sp.getValue(), sp);
134
			}
135

    
136
			if (!map.isEmpty()) {
137
				builder.clearField(fd).setField(fd, Lists.newArrayList(map.values()));
138
			}
139
		}
140
	}
141

    
142
	/**
143
	 * Helper method, avoid duplicated KeyValues in the given builder for the given fieldName
144
	 * 
145
	 * @param builder
146
	 * @param fieldName
147
	 */
148
	@SuppressWarnings("unchecked")
149
	private void setKeyValues(Builder builder, String fieldName) {
150
		final Map<String, KeyValue> map = Maps.newHashMap();
151
		final FieldDescriptor fd = builder.getDescriptorForType().findFieldByName(fieldName);
152
		final List<KeyValue> kvs = (List<KeyValue>) builder.getField(fd);
153

    
154
		if (kvs != null && !kvs.isEmpty()) {
155
			for (KeyValue sp : kvs) {
156
				map.put(sp.getKey(), sp);
157
			}
158

    
159
			if (!map.isEmpty()) {
160
				builder.clearField(fd).setField(fd, Lists.newArrayList(map.values()));
161
			}
162
		}
163
	}
164

    
165
	@SuppressWarnings("unchecked")
166
	private void setSingleString(Builder builder, String fieldName) {
167

    
168
		final FieldDescriptor fd = builder.getDescriptorForType().findFieldByName(fieldName);
169
		final List<StringField> field = (List<StringField>) builder.getField(fd);
170
		if (field != null && !field.isEmpty()) {
171
			final StringField s = (StringField) Iterables.getLast(Iterables.filter(field, skipEmptyStringField), "");
172

    
173
			if (s != null && s.getValue() != null && !s.getValue().isEmpty()) {
174
				builder.clearField(fd).setField(fd, Lists.newArrayList(s));
175
			}
176
		}
177
	}
178

    
179
	@SuppressWarnings("unchecked")
180
	private void setLongestStringField(Builder builder, String fieldName) {
181

    
182
		final FieldDescriptor fd = builder.getDescriptorForType().findFieldByName(fieldName);
183
		final List<StringField> field = (List<StringField>) builder.getField(fd);
184

    
185
		if (field != null && !field.isEmpty()) {
186
			StringField.Builder max = StringField.newBuilder().setValue("");
187
			int maxLength = 0;
188
			for (StringField sf : field) {
189
				if (sf.getValue().length() > maxLength) {
190
					maxLength = sf.getValue().length();
191
					max.clear();
192
					max.mergeFrom(sf);
193
				}
194
			}
195

    
196
			builder.clearField(fd).setField(fd, Lists.newArrayList(max.build()));
197
		}
198
	}
199

    
200
	@SuppressWarnings("unchecked")
201
	private void setUniqueStringField(Builder builder, String fieldName) {
202

    
203
		final FieldDescriptor fd = builder.getDescriptorForType().findFieldByName(fieldName);
204
		final List<StringField> field = (List<StringField>) builder.getField(fd);
205
		final Map<String, StringField> map = Maps.newHashMap();
206
		if (field != null && !field.isEmpty()) {
207
			for (StringField s : Iterables.filter(field, skipEmptyStringField)) {
208
				map.put(s.getValue(), s);
209
			}
210

    
211
			builder.clearField(fd).setField(fd, Lists.newArrayList(map.values()));
212
		}
213
	}
214

    
215
	@SuppressWarnings("unchecked")
216
	private void setUniqueString(Builder builder, String fieldName) {
217

    
218
		final FieldDescriptor fd = builder.getDescriptorForType().findFieldByName(fieldName);
219
		final List<String> field = (List<String>) builder.getField(fd);
220
		final Set<String> set = Sets.newHashSet();
221
		if (field != null && !field.isEmpty()) {
222
			for (String s : Iterables.filter(field, skipEmptyString)) {
223
				set.add(s);
224
			}
225

    
226
			builder.clearField(fd).setField(fd, Lists.newArrayList(set));
227
		}
228
	}
229

    
230
	private void setTitle(Result.Metadata.Builder metadata) {
231
		Iterable<StructuredProperty> filtered = Iterables.filter(metadata.getTitleList(), OafUtils.mainTitleFilter());
232

    
233
		if (!Iterables.isEmpty(filtered)) {
234
			metadata.clearTitle().addTitle(Iterables.getLast(filtered));
235
		}
236
	}
237

    
238
}
(3-3/10)