Project

General

Profile

1
package eu.dnetlib.grid.process.utils;
2

    
3
import java.io.FileInputStream;
4
import java.io.FileNotFoundException;
5
import java.io.InputStream;
6
import java.util.ArrayList;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.Set;
10
import java.util.stream.Collectors;
11
import java.util.stream.Stream;
12
import java.util.stream.StreamSupport;
13

    
14
import org.apache.commons.lang3.StringUtils;
15

    
16
import com.fasterxml.jackson.databind.ObjectMapper;
17

    
18
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
19
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
20
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
21
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
22
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
23
import eu.dnetlib.data.proto.KindProtos.Kind;
24
import eu.dnetlib.data.proto.OafProtos.Oaf;
25
import eu.dnetlib.data.proto.OafProtos.OafEntity;
26
import eu.dnetlib.data.proto.OafProtos.OafRel;
27
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
28
import eu.dnetlib.data.proto.TypeProtos.Type;
29
import eu.dnetlib.grid.process.model.GridOrganization;
30
import eu.dnetlib.grid.process.model.GridResponse;
31
import eu.dnetlib.miscutils.datetime.DateUtils;
32
import eu.dnetlib.miscutils.functional.hash.Hashing;
33

    
34
public class GridUtils {
35

    
36
	public static Stream<GridOrganization> streamOrganizations(final String jsonFile) {
37
		try {
38
			return StreamSupport.stream(GridUtils.getOrganizations(new FileInputStream(jsonFile)).spliterator(), false);
39
		} catch (final FileNotFoundException e) {
40
			e.printStackTrace();
41
			return Stream.empty();
42
		}
43
	}
44

    
45
	protected static Iterable<GridOrganization> getOrganizations(final InputStream input) {
46
		try {
47
			return new ObjectMapper().readValue(input, GridResponse.class).getInstitutes();
48
		} catch (final Throwable e) {
49
			e.printStackTrace();
50
			return new ArrayList<>();
51
		}
52
	}
53

    
54
	public static List<Oaf> toProtos(final GridOrganization org, final Map<String, String> parents, final Datasource ds) {
55

    
56
		final String gridId = org.getId();
57
		final String shortName = findShortName(org);
58
		final String parent = parents.get(gridId);
59
		final String mainName = calculateName(parent, org.getName());
60
		final Set<String> otherNames = calculateNames(parent, org.getAcronyms());
61

    
62
		final Map<String, String> orgRels = otherNames.stream()
63
				.collect(Collectors.toMap(s -> calculateOpenaireId(ds.getPrefix(), gridId, s), s -> s));
64
		orgRels.put(calculateOpenaireId(ds.getPrefix(), gridId, mainName), mainName);
65

    
66
		final Qualifier.Builder country = org.getAddresses().stream()
67
				.map(addr -> Qualifier.newBuilder()
68
						.setClassid(addr.getCountry_code())
69
						.setClassname(addr.getCountry())
70
						.setSchemeid("dnet:countries")
71
						.setSchemename("dnet:countries"))
72
				.filter(q -> StringUtils.isNotBlank(q.getClassid()))
73
				.filter(q -> StringUtils.isNotBlank(q.getClassname()))
74
				.findFirst()
75
				.orElse(null);
76

    
77
		final String url = org.getLinks().stream().findFirst().orElse("");
78

    
79
		final List<Oaf> res = new ArrayList<>();
80

    
81
		res.add(toProto(gridId, mainName, shortName, url, country, org.getAcronyms(), orgRels, ds));
82

    
83
		res.addAll(otherNames.stream()
84
				.map(name -> toProto(gridId, name, shortName, url, country, new ArrayList<>(), orgRels, ds))
85
				.collect(Collectors.toList()));
86

    
87
		return res;
88
	}
89

    
90
	private static Oaf toProto(final String gridId,
91
			final String name,
92
			final String shortName,
93
			final String url,
94
			final Qualifier.Builder country,
95
			final List<String> acronyms,
96
			final Map<String, String> orgRels,
97
			final Datasource ds) {
98

    
99
		// TODO ADD ALTERNATIVE NAMES AND RELS
100

    
101
		final String openaireId = calculateOpenaireId(ds.getPrefix(), gridId, name);
102

    
103
		final KeyValue collectedFrom = KeyValue.newBuilder()
104
				.setKey(ds.getId())
105
				.setValue(ds.getName()).build();
106

    
107
		final OafEntity.Builder entity = OafEntity.newBuilder()
108
				.setId(openaireId)
109
				.addPid(StructuredProperty.newBuilder()
110
						.setValue(gridId)
111
						.setQualifier(Qualifier.newBuilder()
112
								.setClassid("grid")
113
								.setClassname("grid")
114
								.setSchemeid("dnet:pid_types")
115
								.setSchemename("dnet:pid_types")))
116
				.addCollectedfrom(collectedFrom)
117
				.setType(Type.organization)
118
				.setOrganization(Organization.newBuilder()
119
						.setMetadata(Organization.Metadata.newBuilder()
120
								.setLegalname(StringField.newBuilder().setValue(name))
121
								.setLegalshortname(StringField.newBuilder().setValue(shortName))
122
								.setWebsiteurl(StringField.newBuilder().setValue(url))
123
								.addAllAlternativeNames(acronyms.stream().map(a -> StringField.newBuilder().setValue(a).build()).collect(Collectors.toList()))
124
								.setCountry(country)));
125

    
126
		orgRels.entrySet().stream()
127
				.filter(e -> !e.getKey().equals(openaireId))
128
				.forEach(e -> entity.addCachedRel(
129
						OafRel.newBuilder()
130
								.setTarget(e.getKey())
131
								// TODO ??? e.getValue()
132
								.addCollectedfrom(collectedFrom)));
133

    
134
		return Oaf.newBuilder()
135
				.setKind(Kind.entity)
136
				.setLastupdatetimestamp(DateUtils.now())
137
				.setEntity(entity)
138
				.setDataInfo(DataInfo.newBuilder()
139
						.setTrust("0.90")
140
						.setInferred(false)
141
						.setProvenanceaction(Qualifier.newBuilder()
142
								.setClassid("UNKNOWN")
143
								.setClassname("UNKNOWN")
144
								.setSchemeid("dnet:provenanceActions")
145
								.setSchemename("dnet:provenanceActions")))
146
				.build();
147
	}
148

    
149
	private static String calculateOpenaireId(final String prefix, final String gridId, final String name) {
150
		return String.format("20|%s::%s", prefix, Hashing.md5(gridId + " " + name));
151
	}
152

    
153
	private static String calculateName(final String parent, final String simpleName) {
154
		return StringUtils.isBlank(parent) ? simpleName : parent + " - " + simpleName;
155
	}
156

    
157
	private static Set<String> calculateNames(final String parent, final List<String> list) {
158
		return list.stream()
159
				.map(s -> calculateName(parent, s))
160
				.collect(Collectors.toSet());
161
	}
162

    
163
	public static String findShortName(final GridOrganization org) {
164
		return org.getAcronyms()
165
				.stream()
166
				.filter(StringUtils::isNotBlank)
167
				.filter(s -> s.length() < 10)
168
				.filter(s -> s.equals(s.toUpperCase()))
169
				.findFirst()
170
				.orElse(org.getName());
171

    
172
	}
173
}
(3-3/3)