Project

General

Profile

1
package eu.dnetlib.grid.process.utils;
2

    
3
import java.io.FileInputStream;
4
import java.io.FileNotFoundException;
5
import java.io.InputStream;
6
import java.util.ArrayList;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Set;
10
import java.util.stream.Collectors;
11
import java.util.stream.Stream;
12
import java.util.stream.StreamSupport;
13

    
14
import org.apache.commons.lang3.StringUtils;
15

    
16
import com.fasterxml.jackson.databind.ObjectMapper;
17

    
18
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
19
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
20
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
21
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
22
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
23
import eu.dnetlib.data.proto.KindProtos.Kind;
24
import eu.dnetlib.data.proto.OafProtos.Oaf;
25
import eu.dnetlib.data.proto.OafProtos.OafEntity;
26
import eu.dnetlib.data.proto.OafProtos.OafRel;
27
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
28
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
29
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
30
import eu.dnetlib.data.proto.TypeProtos.Type;
31
import eu.dnetlib.grid.process.model.GridOrganization;
32
import eu.dnetlib.grid.process.model.GridResponse;
33
import eu.dnetlib.miscutils.datetime.DateUtils;
34
import eu.dnetlib.miscutils.functional.hash.Hashing;
35

    
36
public class GridUtils {
37

    
38
	public static Stream<GridOrganization> streamOrganizations(final String jsonFile) {
39
		try {
40
			return StreamSupport.stream(GridUtils.getOrganizations(new FileInputStream(jsonFile)).spliterator(), false);
41
		} catch (final FileNotFoundException e) {
42
			e.printStackTrace();
43
			return Stream.empty();
44
		}
45
	}
46

    
47
	protected static Iterable<GridOrganization> getOrganizations(final InputStream input) {
48
		try {
49
			return new ObjectMapper().readValue(input, GridResponse.class).getInstitutes();
50
		} catch (final Throwable e) {
51
			e.printStackTrace();
52
			return new ArrayList<>();
53
		}
54
	}
55

    
56
	public static List<Oaf> toProtos(final GridOrganization org, final Map<String, String> parents, final Datasource ds) {
57

    
58
		final String gridId = org.getId();
59
		final String parent = parents.get(gridId);
60
		final String mainName = calculateName(parent, org.getName());
61
		final String shortName = findShortName(org);
62
		final String mainOpenaireId = calculateOpenaireId(ds.getPrefix(), gridId, mainName);
63

    
64
		final Map<String, String> orgNames = calculateNames(parent, org.getAcronyms())
65
				.stream()
66
				.collect(Collectors.toMap(
67
						s -> calculateOpenaireId(ds.getPrefix(), gridId, s),
68
						s -> s));
69
		orgNames.put(mainOpenaireId, mainName);
70

    
71
		final Qualifier country = org.getAddresses().stream()
72
				.map(addr -> Qualifier.newBuilder()
73
						.setClassid(addr.getCountry_code())
74
						.setClassname(addr.getCountry())
75
						.setSchemeid("dnet:countries")
76
						.setSchemename("dnet:countries"))
77
				.filter(q -> StringUtils.isNotBlank(q.getClassid()))
78
				.filter(q -> StringUtils.isNotBlank(q.getClassname()))
79
				.map(q -> q.build())
80
				.findFirst()
81
				.orElse(null);
82

    
83
		final KeyValue collectedFrom = KeyValue.newBuilder()
84
				.setKey(ds.getId())
85
				.setValue(ds.getName()).build();
86

    
87
		return orgNames.entrySet()
88
				.stream()
89
				.map(e -> toProtos(e.getKey(),
90
						gridId,
91
						e.getValue(),
92
						shortName,
93
						org.getLinks().stream().findFirst().orElse(""),
94
						country,
95
						e.getKey().equals(mainOpenaireId) ? org.getAcronyms() : new ArrayList<>(),
96
						orgNames, collectedFrom))
97
				.flatMap(l -> l.stream())
98
				.collect(Collectors.toList());
99

    
100
	}
101

    
102
	private static List<Oaf> toProtos(final String openaireId,
103
			final String gridId,
104
			final String name,
105
			final String shortName,
106
			final String url,
107
			final Qualifier country,
108
			final List<String> acronyms,
109
			final Map<String, String> orgRels,
110
			final KeyValue collectedFrom) {
111

    
112
		final OafEntity.Builder entity = OafEntity.newBuilder()
113
				.setId(openaireId)
114
				.addPid(StructuredProperty.newBuilder()
115
						.setValue(gridId)
116
						.setQualifier(Qualifier.newBuilder()
117
								.setClassid("grid")
118
								.setClassname("grid")
119
								.setSchemeid("dnet:pid_types")
120
								.setSchemename("dnet:pid_types")))
121
				.addCollectedfrom(collectedFrom)
122
				.setType(Type.organization)
123
				.setOrganization(Organization.newBuilder()
124
						.setMetadata(Organization.Metadata.newBuilder()
125
								.setLegalname(StringField.newBuilder().setValue(name))
126
								.setLegalshortname(StringField.newBuilder().setValue(shortName))
127
								.setWebsiteurl(StringField.newBuilder().setValue(url))
128
								.addAllAlternativeNames(acronyms.stream().map(a -> StringField.newBuilder().setValue(a).build()).collect(Collectors.toList()))
129
								.setCountry(country)));
130

    
131
		final List<Oaf> oafs = orgRels.entrySet().stream()
132
				.filter(e -> !e.getKey().equals(openaireId))
133
				.map(e -> Oaf.newBuilder()
134
						.setKind(Kind.relation)
135
						.setLastupdatetimestamp(DateUtils.now())
136
						.setRel(OafRel.newBuilder()
137
								.setSource(openaireId)
138
								.setTarget(e.getKey())
139
								.setRelType(RelType.organizationOrganization)
140
								.setSubRelType(SubRelType.dedupSimilarity)
141
								.setRelClass("isSimilarTo")
142
								.setChild(false))
143
						.build())
144
				.collect(Collectors.toList());
145

    
146
		oafs.add(Oaf.newBuilder()
147
				.setKind(Kind.entity)
148
				.setLastupdatetimestamp(DateUtils.now())
149
				.setEntity(entity)
150
				.setDataInfo(DataInfo.newBuilder()
151
						.setTrust("0.90")
152
						.setInferred(false)
153
						.setProvenanceaction(Qualifier.newBuilder()
154
								.setClassid("UNKNOWN")
155
								.setClassname("UNKNOWN")
156
								.setSchemeid("dnet:provenanceActions")
157
								.setSchemename("dnet:provenanceActions")))
158
				.build());
159

    
160
		return oafs;
161
	}
162

    
163
	private static String calculateOpenaireId(final String prefix, final String gridId, final String name) {
164
		return String.format("20|%s::%s", prefix, Hashing.md5(gridId + " " + name));
165
	}
166

    
167
	private static String calculateName(final String parent, final String simpleName) {
168
		return StringUtils.isBlank(parent) ? simpleName : parent + " - " + simpleName;
169
	}
170

    
171
	private static Set<String> calculateNames(final String parent, final List<String> list) {
172
		return list.stream()
173
				.map(s -> calculateName(parent, s))
174
				.collect(Collectors.toSet());
175
	}
176

    
177
	public static String findShortName(final GridOrganization org) {
178
		return org.getAcronyms()
179
				.stream()
180
				.filter(StringUtils::isNotBlank)
181
				.filter(s -> s.length() < 10)
182
				.filter(s -> s.equals(s.toUpperCase()))
183
				.findFirst()
184
				.orElse(org.getName());
185

    
186
	}
187
}
(3-3/3)