Project

General

Profile

1
package eu.dnetlib.grid.process.utils;
2

    
3
import java.io.FileInputStream;
4
import java.io.FileNotFoundException;
5
import java.io.InputStream;
6
import java.util.ArrayList;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Set;
10
import java.util.stream.Collectors;
11
import java.util.stream.Stream;
12
import java.util.stream.StreamSupport;
13

    
14
import org.apache.commons.lang3.StringUtils;
15

    
16
import com.fasterxml.jackson.databind.ObjectMapper;
17

    
18
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
19
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
20
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
21
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
22
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
23
import eu.dnetlib.data.proto.KindProtos.Kind;
24
import eu.dnetlib.data.proto.OafProtos.Oaf;
25
import eu.dnetlib.data.proto.OafProtos.OafEntity;
26
import eu.dnetlib.data.proto.OafProtos.OafRel;
27
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
28
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
29
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
30
import eu.dnetlib.data.proto.TypeProtos.Type;
31
import eu.dnetlib.grid.process.model.GridLabel;
32
import eu.dnetlib.grid.process.model.GridOrganization;
33
import eu.dnetlib.grid.process.model.GridRel;
34
import eu.dnetlib.grid.process.model.GridResponse;
35
import eu.dnetlib.miscutils.datetime.DateUtils;
36
import eu.dnetlib.miscutils.functional.hash.Hashing;
37

    
38
public class GridUtils {
39

    
40
	public static Stream<GridOrganization> streamOrganizations(final String jsonFile) {
41
		try {
42
			return StreamSupport.stream(GridUtils.getOrganizations(new FileInputStream(jsonFile)).spliterator(), false);
43
		} catch (final FileNotFoundException e) {
44
			e.printStackTrace();
45
			return Stream.empty();
46
		}
47
	}
48

    
49
	protected static Iterable<GridOrganization> getOrganizations(final InputStream input) {
50
		try {
51
			return new ObjectMapper().readValue(input, GridResponse.class).getInstitutes();
52
		} catch (final Throwable e) {
53
			e.printStackTrace();
54
			return new ArrayList<>();
55
		}
56
	}
57

    
58
	public static List<Oaf> toProtos(final GridOrganization org, final Map<String, String> parents, final Datasource ds) {
59

    
60
		final String gridId = org.getId();
61
		final String parent = findParentName(org, parents);
62
		final String mainName = calculateName(parent, org.getName());
63
		final String shortName = findShortName(org);
64
		final String mainOpenaireId = calculateOpenaireId(ds.getPrefix(), gridId, mainName);
65

    
66
		final Map<String, String> orgNames = calculateAlternativeNames(parent, org)
67
				.stream()
68
				.collect(Collectors.toMap(
69
						s -> calculateOpenaireId(ds.getPrefix(), gridId, s),
70
						s -> s));
71
		orgNames.put(mainOpenaireId, mainName);
72

    
73
		final Qualifier country = org.getAddresses().stream()
74
				.map(addr -> Qualifier.newBuilder()
75
						.setClassid(addr.getCountry_code())
76
						.setClassname(addr.getCountry())
77
						.setSchemeid("dnet:countries")
78
						.setSchemename("dnet:countries"))
79
				.filter(q -> StringUtils.isNotBlank(q.getClassid()))
80
				.filter(q -> StringUtils.isNotBlank(q.getClassname()))
81
				.map(q -> q.build())
82
				.findFirst()
83
				.orElse(null);
84

    
85
		final KeyValue collectedFrom = KeyValue.newBuilder()
86
				.setKey(ds.getId())
87
				.setValue(ds.getName()).build();
88

    
89
		return orgNames.entrySet()
90
				.stream()
91
				.map(e -> toProtos(e.getKey(),
92
						gridId,
93
						e.getValue(),
94
						shortName,
95
						org.getLinks().stream().findFirst().orElse(""),
96
						country,
97
						e.getKey().equals(mainOpenaireId) ? org.getAcronyms() : new ArrayList<>(),
98
						orgNames.keySet(),
99
						collectedFrom))
100
				.flatMap(l -> l.stream())
101
				.collect(Collectors.toList());
102
	}
103

    
104
	private static List<Oaf> toProtos(final String openaireId,
105
			final String gridId,
106
			final String name,
107
			final String shortName,
108
			final String url,
109
			final Qualifier country,
110
			final List<String> acronyms,
111
			final Set<String> orgRels,
112
			final KeyValue collectedFrom) {
113

    
114
		final OafEntity.Builder entity = OafEntity.newBuilder()
115
				.setId(openaireId)
116
				.addPid(StructuredProperty.newBuilder()
117
						.setValue(gridId)
118
						.setQualifier(Qualifier.newBuilder()
119
								.setClassid("grid")
120
								.setClassname("grid")
121
								.setSchemeid("dnet:pid_types")
122
								.setSchemename("dnet:pid_types")))
123
				.addCollectedfrom(collectedFrom)
124
				.setType(Type.organization)
125
				.setOrganization(Organization.newBuilder()
126
						.setMetadata(Organization.Metadata.newBuilder()
127
								.setLegalname(StringField.newBuilder().setValue(name))
128
								.setLegalshortname(StringField.newBuilder().setValue(shortName))
129
								.setWebsiteurl(StringField.newBuilder().setValue(url))
130
								.addAllAlternativeNames(acronyms.stream().map(a -> StringField.newBuilder().setValue(a).build()).collect(Collectors.toList()))
131
								.setCountry(country)));
132

    
133
		// Relations
134
		final List<Oaf> oafs = orgRels.stream()
135
				.filter(id -> !id.equals(openaireId))
136
				.map(id -> Oaf.newBuilder()
137
						.setKind(Kind.relation)
138
						.setLastupdatetimestamp(DateUtils.now())
139
						.setRel(OafRel.newBuilder()
140
								.setSource(openaireId)
141
								.setTarget(id)
142
								.setRelType(RelType.organizationOrganization)
143
								.setSubRelType(SubRelType.dedupSimilarity)
144
								.setRelClass("isSimilarTo")
145
								.setChild(false))
146
						.build())
147
				.collect(Collectors.toList());
148

    
149
		// Entity
150
		oafs.add(Oaf.newBuilder()
151
				.setKind(Kind.entity)
152
				.setLastupdatetimestamp(DateUtils.now())
153
				.setEntity(entity)
154
				.setDataInfo(DataInfo.newBuilder()
155
						.setTrust("0.90")
156
						.setInferred(false)
157
						.setProvenanceaction(Qualifier.newBuilder()
158
								.setClassid("UNKNOWN")
159
								.setClassname("UNKNOWN")
160
								.setSchemeid("dnet:provenanceActions")
161
								.setSchemename("dnet:provenanceActions")))
162
				.build());
163

    
164
		return oafs;
165
	}
166

    
167
	private static String calculateOpenaireId(final String prefix, final String gridId, final String name) {
168
		return String.format("20|%s::%s", prefix, Hashing.md5(gridId + " " + name));
169
	}
170

    
171
	private static String calculateName(final String parent, final String simpleName) {
172
		return StringUtils.isBlank(parent) ? simpleName : parent + " - " + simpleName;
173
	}
174

    
175
	private static Set<String> calculateAlternativeNames(final String parent, final GridOrganization org) {
176
		final Set<String> res = org.getAcronyms().stream()
177
				.map(s -> calculateName(parent, s))
178
				.collect(Collectors.toSet());
179
		res.addAll(org.getLabels().stream()
180
				.map(GridLabel::getLabel)
181
				.map(s -> calculateName(parent, s))
182
				.collect(Collectors.toSet()));
183
		return res;
184
	}
185

    
186
	public static String findShortName(final GridOrganization org) {
187
		return org.getAcronyms()
188
				.stream()
189
				.filter(StringUtils::isNotBlank)
190
				.findFirst()
191
				.orElse(org.getName());
192
	}
193

    
194
	private static String findParentName(final GridOrganization org, final Map<String, String> parents) {
195
		return org.getRelationships()
196
				.stream()
197
				.filter(r -> r.getType().equalsIgnoreCase("Parent"))
198
				.map(GridRel::getId)
199
				.map(parents::get)
200
				.filter(StringUtils::isNotBlank)
201
				.findFirst()
202
				.orElse(null);
203
	}
204
}
(3-3/3)