Project

General

Profile

1
package eu.dnetlib.grid.process.utils;
2

    
3
import java.io.FileInputStream;
4
import java.io.FileNotFoundException;
5
import java.io.InputStream;
6
import java.util.ArrayList;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Set;
10
import java.util.stream.Collectors;
11
import java.util.stream.Stream;
12
import java.util.stream.StreamSupport;
13

    
14
import org.apache.commons.lang3.StringUtils;
15

    
16
import com.fasterxml.jackson.databind.ObjectMapper;
17

    
18
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
19
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
20
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
21
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
22
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
23
import eu.dnetlib.data.proto.KindProtos.Kind;
24
import eu.dnetlib.data.proto.OafProtos.Oaf;
25
import eu.dnetlib.data.proto.OafProtos.OafEntity;
26
import eu.dnetlib.data.proto.OafProtos.OafRel;
27
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
28
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
29
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
30
import eu.dnetlib.data.proto.TypeProtos.Type;
31
import eu.dnetlib.grid.process.model.GridOrganization;
32
import eu.dnetlib.grid.process.model.GridResponse;
33
import eu.dnetlib.miscutils.datetime.DateUtils;
34
import eu.dnetlib.miscutils.functional.hash.Hashing;
35

    
36
public class GridUtils {
37

    
38
	public static Stream<GridOrganization> streamOrganizations(final String jsonFile) {
39
		try {
40
			return StreamSupport.stream(GridUtils.getOrganizations(new FileInputStream(jsonFile)).spliterator(), false);
41
		} catch (final FileNotFoundException e) {
42
			e.printStackTrace();
43
			return Stream.empty();
44
		}
45
	}
46

    
47
	protected static Iterable<GridOrganization> getOrganizations(final InputStream input) {
48
		try {
49
			return new ObjectMapper().readValue(input, GridResponse.class).getInstitutes();
50
		} catch (final Throwable e) {
51
			e.printStackTrace();
52
			return new ArrayList<>();
53
		}
54
	}
55

    
56
	public static List<Oaf> toProtos(final GridOrganization org, final Map<String, String> parents, final Datasource ds) {
57

    
58
		final String gridId = org.getId();
59
		final String parent = parents.get(gridId);
60
		final String mainName = calculateName(parent, org.getName());
61
		final String shortName = findShortName(org);
62
		final String mainOpenaireId = calculateOpenaireId(ds.getPrefix(), gridId, mainName);
63

    
64
		final Map<String, String> orgNames = calculateNames(parent, org.getAcronyms())
65
				.stream()
66
				.collect(Collectors.toMap(
67
						s -> calculateOpenaireId(ds.getPrefix(), gridId, s),
68
						s -> s));
69
		orgNames.put(mainOpenaireId, mainName);
70

    
71
		final Qualifier country = org.getAddresses().stream()
72
				.map(addr -> Qualifier.newBuilder()
73
						.setClassid(addr.getCountry_code())
74
						.setClassname(addr.getCountry())
75
						.setSchemeid("dnet:countries")
76
						.setSchemename("dnet:countries"))
77
				.filter(q -> StringUtils.isNotBlank(q.getClassid()))
78
				.filter(q -> StringUtils.isNotBlank(q.getClassname()))
79
				.map(q -> q.build())
80
				.findFirst()
81
				.orElse(null);
82

    
83
		final KeyValue collectedFrom = KeyValue.newBuilder()
84
				.setKey(ds.getId())
85
				.setValue(ds.getName()).build();
86

    
87
		return orgNames.entrySet()
88
				.stream()
89
				.map(e -> toProto(e.getKey(),
90
						gridId,
91
						e.getValue(),
92
						shortName,
93
						org.getLinks().stream().findFirst().orElse(""),
94
						country,
95
						e.getKey().equals(mainOpenaireId) ? org.getAcronyms() : new ArrayList<>(),
96
						orgNames, collectedFrom))
97
				.collect(Collectors.toList());
98

    
99
	}
100

    
101
	private static Oaf toProto(final String openaireId,
102
			final String gridId,
103
			final String name,
104
			final String shortName,
105
			final String url,
106
			final Qualifier country,
107
			final List<String> acronyms,
108
			final Map<String, String> orgRels,
109
			final KeyValue collectedFrom) {
110

    
111
		final OafEntity.Builder entity = OafEntity.newBuilder()
112
				.setId(openaireId)
113
				.addPid(StructuredProperty.newBuilder()
114
						.setValue(gridId)
115
						.setQualifier(Qualifier.newBuilder()
116
								.setClassid("grid")
117
								.setClassname("grid")
118
								.setSchemeid("dnet:pid_types")
119
								.setSchemename("dnet:pid_types")))
120
				.addCollectedfrom(collectedFrom)
121
				.setType(Type.organization)
122
				.setOrganization(Organization.newBuilder()
123
						.setMetadata(Organization.Metadata.newBuilder()
124
								.setLegalname(StringField.newBuilder().setValue(name))
125
								.setLegalshortname(StringField.newBuilder().setValue(shortName))
126
								.setWebsiteurl(StringField.newBuilder().setValue(url))
127
								.addAllAlternativeNames(acronyms.stream().map(a -> StringField.newBuilder().setValue(a).build()).collect(Collectors.toList()))
128
								.setCountry(country)));
129

    
130
		orgRels.entrySet().stream()
131
				.filter(e -> !e.getKey().equals(openaireId))
132
				.forEach(e -> entity.addCachedRel(
133
						OafRel.newBuilder()
134
								.setSource(openaireId)
135
								.setTarget(e.getKey())
136
								.setRelType(RelType.organizationOrganization)
137
								.setSubRelType(SubRelType.similarity)
138
								.setRelClass("") // TODO ???
139
								.setChild(false)
140
								// TODO ??? e.getValue()
141
								.addCollectedfrom(collectedFrom)));
142

    
143
		return Oaf.newBuilder()
144
				.setKind(Kind.entity)
145
				.setLastupdatetimestamp(DateUtils.now())
146
				.setEntity(entity)
147
				.setDataInfo(DataInfo.newBuilder()
148
						.setTrust("0.90")
149
						.setInferred(false)
150
						.setProvenanceaction(Qualifier.newBuilder()
151
								.setClassid("UNKNOWN")
152
								.setClassname("UNKNOWN")
153
								.setSchemeid("dnet:provenanceActions")
154
								.setSchemename("dnet:provenanceActions")))
155
				.build();
156
	}
157

    
158
	private static String calculateOpenaireId(final String prefix, final String gridId, final String name) {
159
		return String.format("20|%s::%s", prefix, Hashing.md5(gridId + " " + name));
160
	}
161

    
162
	private static String calculateName(final String parent, final String simpleName) {
163
		return StringUtils.isBlank(parent) ? simpleName : parent + " - " + simpleName;
164
	}
165

    
166
	private static Set<String> calculateNames(final String parent, final List<String> list) {
167
		return list.stream()
168
				.map(s -> calculateName(parent, s))
169
				.collect(Collectors.toSet());
170
	}
171

    
172
	public static String findShortName(final GridOrganization org) {
173
		return org.getAcronyms()
174
				.stream()
175
				.filter(StringUtils::isNotBlank)
176
				.filter(s -> s.length() < 10)
177
				.filter(s -> s.equals(s.toUpperCase()))
178
				.findFirst()
179
				.orElse(org.getName());
180

    
181
	}
182
}
(3-3/3)