Project

General

Profile

1
package eu.dnetlib.grid.process.utils;
2

    
3
import java.io.FileInputStream;
4
import java.io.FileNotFoundException;
5
import java.io.InputStream;
6
import java.util.ArrayList;
7
import java.util.HashSet;
8
import java.util.List;
9
import java.util.Map;
10
import java.util.Set;
11
import java.util.stream.Collectors;
12
import java.util.stream.Stream;
13
import java.util.stream.StreamSupport;
14

    
15
import org.apache.commons.lang3.StringUtils;
16

    
17
import com.fasterxml.jackson.databind.ObjectMapper;
18

    
19
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
20
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
21
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
22
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
23
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
24
import eu.dnetlib.data.proto.KindProtos.Kind;
25
import eu.dnetlib.data.proto.OafProtos.Oaf;
26
import eu.dnetlib.data.proto.OafProtos.OafEntity;
27
import eu.dnetlib.data.proto.OafProtos.OafRel;
28
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
29
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
30
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
31
import eu.dnetlib.data.proto.TypeProtos.Type;
32
import eu.dnetlib.grid.process.model.GridLabel;
33
import eu.dnetlib.grid.process.model.GridOrganization;
34
import eu.dnetlib.grid.process.model.GridRel;
35
import eu.dnetlib.grid.process.model.GridResponse;
36
import eu.dnetlib.miscutils.datetime.DateUtils;
37
import eu.dnetlib.miscutils.functional.hash.Hashing;
38

    
39
public class GridUtils {
40

    
41
	private static final String PRIMARY_TRUST_LEVEL = "0.91";
42

    
43
	private static final String SECONDARY_TRUST_LEVEL = "0.89";
44

    
45
	public static Stream<GridOrganization> streamOrganizations(final String jsonFile) {
46
		try {
47
			return StreamSupport.stream(GridUtils.getOrganizations(new FileInputStream(jsonFile)).spliterator(), false);
48
		} catch (final FileNotFoundException e) {
49
			e.printStackTrace();
50
			return Stream.empty();
51
		}
52
	}
53

    
54
	protected static Iterable<GridOrganization> getOrganizations(final InputStream input) {
55
		try {
56
			return new ObjectMapper().readValue(input, GridResponse.class).getInstitutes();
57
		} catch (final Throwable e) {
58
			e.printStackTrace();
59
			return new ArrayList<>();
60
		}
61
	}
62

    
63
	public static List<Oaf> toProtos(final GridOrganization org, final Map<String, String> parents, final Datasource ds) {
64

    
65
		final String gridId = org.getId();
66
		final String parent = findParentName(org, parents);
67
		final String mainName = applyPrefix(parent, org.getName());
68
		final String shortName = findShortName(org);
69
		final String mainOpenaireId = calculateOpenaireId(ds.getPrefix(), gridId);
70
		final Set<String> alternativeNames = calculateAlternativeNames(org);
71

    
72
		final Map<String, String> orgNames = alternativeNames.stream()
73
				.map(s -> applyPrefix(parent, s))
74
				.collect(Collectors.toMap(
75
						s -> calculateOpenaireId(ds.getPrefix(), gridId, s),
76
						s -> s));
77
		orgNames.put(mainOpenaireId, mainName);
78

    
79
		final Qualifier country = org.getAddresses().stream()
80
				.map(addr -> Qualifier.newBuilder()
81
						.setClassid(addr.getCountry_code())
82
						.setClassname(addr.getCountry())
83
						.setSchemeid("dnet:countries")
84
						.setSchemename("dnet:countries"))
85
				.filter(q -> StringUtils.isNotBlank(q.getClassid()))
86
				.filter(q -> StringUtils.isNotBlank(q.getClassname()))
87
				.map(q -> q.build())
88
				.findFirst()
89
				.orElse(null);
90

    
91
		final KeyValue collectedFrom = KeyValue.newBuilder()
92
				.setKey(ds.getId())
93
				.setValue(ds.getName()).build();
94

    
95
		return orgNames.entrySet()
96
				.stream()
97
				.map(e -> toProtos(e.getKey(),
98
						gridId,
99
						e.getValue(),
100
						shortName,
101
						org.getLinks().stream().findFirst().orElse(""),
102
						country,
103
						e.getKey().equals(mainOpenaireId) ? alternativeNames : new HashSet<>(),
104
						orgNames.keySet(),
105
						collectedFrom,
106
						e.getValue().equals(mainName) ? PRIMARY_TRUST_LEVEL : SECONDARY_TRUST_LEVEL))
107
				.flatMap(l -> l.stream())
108
				.collect(Collectors.toList());
109
	}
110

    
111
	private static List<Oaf> toProtos(final String openaireId,
112
			final String gridId,
113
			final String name,
114
			final String shortName,
115
			final String url,
116
			final Qualifier country,
117
			final Set<String> alternativeNames,
118
			final Set<String> orgRels,
119
			final KeyValue collectedFrom,
120
			final String trust) {
121

    
122
		final OafEntity.Builder entity = OafEntity.newBuilder()
123
				.setId(openaireId)
124
				.addPid(StructuredProperty.newBuilder()
125
						.setValue(gridId)
126
						.setQualifier(Qualifier.newBuilder()
127
								.setClassid("grid")
128
								.setClassname("grid")
129
								.setSchemeid("dnet:pid_types")
130
								.setSchemename("dnet:pid_types")))
131
				.addCollectedfrom(collectedFrom)
132
				.setType(Type.organization)
133
				.setOrganization(Organization.newBuilder()
134
						.setMetadata(Organization.Metadata.newBuilder()
135
								.setLegalname(StringField.newBuilder().setValue(name))
136
								.setLegalshortname(StringField.newBuilder().setValue(shortName))
137
								.setWebsiteurl(StringField.newBuilder().setValue(url))
138
								.addAllAlternativeNames(
139
										alternativeNames.stream().map(a -> StringField.newBuilder().setValue(a).build()).collect(Collectors.toList()))
140
								.setCountry(country)));
141

    
142
		// Relations
143
		final List<Oaf> oafs = orgRels.stream()
144
				.filter(id -> !id.equals(openaireId))
145
				.map(id -> Oaf.newBuilder()
146
						.setKind(Kind.relation)
147
						.setLastupdatetimestamp(DateUtils.now())
148
						.setRel(OafRel.newBuilder()
149
								.setSource(openaireId)
150
								.setTarget(id)
151
								.setRelType(RelType.organizationOrganization)
152
								.setSubRelType(SubRelType.dedupSimilarity)
153
								.setRelClass("isSimilarTo")
154
								.setChild(false))
155
						.build())
156
				.collect(Collectors.toList());
157

    
158
		// Entity
159
		oafs.add(Oaf.newBuilder()
160
				.setKind(Kind.entity)
161
				.setLastupdatetimestamp(DateUtils.now())
162
				.setEntity(entity)
163
				.setDataInfo(DataInfo.newBuilder()
164
						.setTrust(trust)
165
						.setInferred(false)
166
						.setProvenanceaction(Qualifier.newBuilder()
167
								.setClassid("UNKNOWN")
168
								.setClassname("UNKNOWN")
169
								.setSchemeid("dnet:provenanceActions")
170
								.setSchemename("dnet:provenanceActions")))
171
				.build());
172

    
173
		return oafs;
174
	}
175

    
176
	private static String calculateOpenaireId(final String prefix, final String gridId) {
177
		return String.format("20|%s::%s", prefix, Hashing.md5(gridId));
178
	}
179

    
180
	private static String calculateOpenaireId(final String prefix, final String gridId, final String name) {
181
		return String.format("20|%s::%s", prefix, Hashing.md5(gridId + " " + name));
182
	}
183

    
184
	private static String applyPrefix(final String parent, final String simpleName) {
185
		return StringUtils.isBlank(parent) ? simpleName : parent + " - " + simpleName;
186
	}
187

    
188
	private static Set<String> calculateAlternativeNames(final GridOrganization org) {
189
		final Set<String> res = org.getLabels().stream()
190
				.map(GridLabel::getLabel)
191
				.collect(Collectors.toSet());
192
		res.addAll(org.getAcronyms());
193
		return res;
194
	}
195

    
196
	public static String findShortName(final GridOrganization org) {
197
		return org.getAcronyms()
198
				.stream()
199
				.filter(StringUtils::isNotBlank)
200
				.findFirst()
201
				.orElse(org.getName());
202
	}
203

    
204
	private static String findParentName(final GridOrganization org, final Map<String, String> parents) {
205
		return org.getRelationships()
206
				.stream()
207
				.filter(r -> r.getType().equalsIgnoreCase("Parent"))
208
				.map(GridRel::getId)
209
				.map(parents::get)
210
				.filter(StringUtils::isNotBlank)
211
				.findFirst()
212
				.orElse(null);
213
	}
214
}
(3-3/3)