1
|
package eu.dnetlib.msro.workflows.nodes;
|
2
|
|
3
|
import static eu.dnetlib.data.proto.dli.ScholixObjectProtos.*;
|
4
|
|
5
|
import com.googlecode.protobuf.format.JsonFormat;
|
6
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
7
|
import eu.dnetlib.dli.resolver.PIDResolver;
|
8
|
import eu.dnetlib.dli.resolver.model.*;
|
9
|
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
|
10
|
import eu.dnetlib.msro.workflows.graph.Arc;
|
11
|
import eu.dnetlib.msro.workflows.procs.Env;
|
12
|
import eu.dnetlib.resolver.parser.DMFResolverParser;
|
13
|
import eu.dnetlib.rmi.common.ResultSet;
|
14
|
import org.apache.commons.lang3.StringUtils;
|
15
|
import org.apache.commons.logging.Log;
|
16
|
import org.apache.commons.logging.LogFactory;
|
17
|
import org.springframework.beans.factory.annotation.Autowired;
|
18
|
import org.springframework.http.converter.StringHttpMessageConverter;
|
19
|
import org.springframework.web.client.RestTemplate;
|
20
|
|
21
|
import java.net.URI;
|
22
|
import java.nio.charset.Charset;
|
23
|
import java.time.LocalDateTime;
|
24
|
import java.util.Arrays;
|
25
|
import java.util.List;
|
26
|
|
27
|
public class ResolveAndIndexJobNode extends SimpleJobNode {
|
28
|
|
29
|
private static final Log log = LogFactory.getLog(ResolveAndIndexJobNode.class);
|
30
|
|
31
|
|
32
|
private static final String BASE_CFG_URL = "http://%s:9200/%s/scholix/%s/?pretty";
|
33
|
|
34
|
|
35
|
private String inputEprParam;
|
36
|
|
37
|
private String indexHost;
|
38
|
|
39
|
private String indexName;
|
40
|
|
41
|
@Autowired
|
42
|
private List<PIDResolver> pluginResolver;
|
43
|
|
44
|
@Autowired
|
45
|
private ResultSetClient resultSetClient;
|
46
|
|
47
|
@Override
|
48
|
protected String execute(Env env) throws Exception {
|
49
|
|
50
|
final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
|
51
|
|
52
|
final Iterable<String> records = resultSetClient.iter(rsIn, String.class);
|
53
|
|
54
|
final RestTemplate restTemplate = new RestTemplate();
|
55
|
restTemplate.getMessageConverters()
|
56
|
.add(0, new StringHttpMessageConverter(Charset.forName("UTF-8")));
|
57
|
|
58
|
setIndexHost(indexHost);
|
59
|
setIndexName(indexName);
|
60
|
|
61
|
DMFResolverParser parser = new DMFResolverParser();
|
62
|
|
63
|
|
64
|
for (String record : records) {
|
65
|
final ResolvedObject result = parser.parseObject(record);
|
66
|
if (result == null) {
|
67
|
log.error("error on parsing " + record);
|
68
|
continue;
|
69
|
}
|
70
|
for (final ObjectRelation rels : result.getRelations()) {
|
71
|
final ResolvedObject resolvedRelation = resolveRelation(rels.getTargetPID(), result.getDatasourceProvenance().get(0));
|
72
|
|
73
|
|
74
|
final Scholix.Builder scholix = Scholix.newBuilder();
|
75
|
|
76
|
scholix.addLinkproviderBuilder()
|
77
|
.setName(result.getDatasourceProvenance().get(0).getDatasource())
|
78
|
.addIdentifiersBuilder()
|
79
|
.setIdentifier(result.getDatasourceProvenance().get(0).getDatasourceId())
|
80
|
.setSchema("dnetIdentifier");
|
81
|
|
82
|
scholix.setRelationship(ScholixRelationship.newBuilder()
|
83
|
.setName(rels.getRelationSemantics())
|
84
|
.setInverse(rels.getInverseRelation())
|
85
|
.setSchema("datacite")
|
86
|
.build());
|
87
|
|
88
|
final ScholixResource source = generateResource(result);
|
89
|
final ScholixResource target = generateResource(resolvedRelation);
|
90
|
scholix.setSource(source);
|
91
|
scholix.setTarget(target);
|
92
|
scholix.setPublicationDate(LocalDateTime.now().toString());
|
93
|
|
94
|
|
95
|
// log.error(indexURL);
|
96
|
//
|
97
|
// log.info("json : "+JsonFormat.printToString(scholix.build()));
|
98
|
|
99
|
|
100
|
restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(result, resolvedRelation)), JsonFormat.printToString(scholix.build()));
|
101
|
|
102
|
scholix.setRelationship(ScholixRelationship.newBuilder()
|
103
|
.setInverse(rels.getRelationSemantics())
|
104
|
.setName(rels.getInverseRelation())
|
105
|
.setSchema("datacite")
|
106
|
.build());
|
107
|
scholix.setTarget(source);
|
108
|
scholix.setSource(target);
|
109
|
|
110
|
restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(resolvedRelation, result)), JsonFormat.printToString(scholix.build()));
|
111
|
}
|
112
|
}
|
113
|
return Arc.DEFAULT_ARC;
|
114
|
}
|
115
|
|
116
|
private String generateIdentifier(final ResolvedObject source, ResolvedObject target) {
|
117
|
|
118
|
return AbstractDNetXsltFunctions.md5(String.format("%s::%s", source.getPid().toLowerCase().trim(), target.getPid().toLowerCase().trim()));
|
119
|
|
120
|
}
|
121
|
|
122
|
private ScholixResource generateResource(ResolvedObject result) {
|
123
|
final ScholixResource.Builder builder = ScholixResource.newBuilder();
|
124
|
if (result.getDatasourceProvenance() != null)
|
125
|
result.getDatasourceProvenance().forEach(
|
126
|
objectProvenance -> {
|
127
|
builder.addCollectedFrom(ScholixCollectedFrom.newBuilder()
|
128
|
.setProvisionMode(objectProvenance.getProvisionMode())
|
129
|
.setCompletionStatus(objectProvenance.getCompletionStatus())
|
130
|
.setProvider(ScholixEntityId.newBuilder()
|
131
|
.setName(objectProvenance.getDatasource())
|
132
|
.addIdentifiers(ScholixIdentifier.newBuilder().setIdentifier(objectProvenance.getDatasourceId())
|
133
|
.setSchema("dnetIdentifier").build())
|
134
|
.build()));
|
135
|
if (StringUtils.isNotEmpty(objectProvenance.getPublisher())) {
|
136
|
builder.addPublisher(ScholixEntityId.newBuilder()
|
137
|
.setName(objectProvenance.getPublisher())
|
138
|
.build());
|
139
|
}
|
140
|
|
141
|
});
|
142
|
builder.addIdentifier(ScholixIdentifier.newBuilder().
|
143
|
setIdentifier(result.getPid())
|
144
|
.setSchema(result.getPidType())
|
145
|
.build());
|
146
|
builder.setObjectType(result.getType().toString());
|
147
|
if (result.getTitles() != null && result.getTitles().size() > 0)
|
148
|
builder.setTitle(result.getTitles().get(0));
|
149
|
if (result.getAuthors() != null)
|
150
|
result.getAuthors().forEach(author -> builder.addCreator(
|
151
|
ScholixEntityId.newBuilder()
|
152
|
.setName(author)
|
153
|
.build()));
|
154
|
if (StringUtils.isNotBlank(result.getDate())) {
|
155
|
builder.setPublicationDate(result.getDate());
|
156
|
}
|
157
|
|
158
|
String tp = null;
|
159
|
|
160
|
switch (result.getType()) {
|
161
|
case dataset:
|
162
|
tp = "60";
|
163
|
break;
|
164
|
case unknown:
|
165
|
tp = "70";
|
166
|
break;
|
167
|
case publication:
|
168
|
tp = "50";
|
169
|
break;
|
170
|
}
|
171
|
builder.setDnetIdentifier(tp + "|dnet________::" + result.getIdentifier());
|
172
|
return builder.build();
|
173
|
}
|
174
|
|
175
|
|
176
|
private ResolvedObject resolveRelation(final PID currentPid, final ObjectProvenance provenance) {
|
177
|
for (PIDResolver resolver : pluginResolver) {
|
178
|
final ResolvedObject currentIdentifier = resolver.retrievePID(currentPid.getId(), currentPid.getType());
|
179
|
|
180
|
if (currentIdentifier != null &&
|
181
|
!StringUtils.isBlank(currentIdentifier.getPid()) &&
|
182
|
currentIdentifier.getPid().toLowerCase().equals(currentPid.getId().toLowerCase())) {
|
183
|
return currentIdentifier;
|
184
|
}
|
185
|
}
|
186
|
|
187
|
final ResolvedObject resolvedObject = new ResolvedObject();
|
188
|
resolvedObject.setPid(currentPid.getId());
|
189
|
resolvedObject.setPidType(currentPid.getType());
|
190
|
ObjectProvenance resultProvenance = new ObjectProvenance();
|
191
|
resultProvenance.setDatasource(provenance.getDatasource());
|
192
|
resultProvenance.setDatasourceId(provenance.getDatasourceId());
|
193
|
resultProvenance.setCompletionStatus(CompletionStatus.incomplete.toString());
|
194
|
resultProvenance.setProvisionMode(ObjectProvisionMode.collected.toString());
|
195
|
resolvedObject.setDatasourceProvenance(Arrays.asList(resultProvenance));
|
196
|
return resolvedObject;
|
197
|
}
|
198
|
|
199
|
public String getInputEprParam() {
|
200
|
return inputEprParam;
|
201
|
}
|
202
|
|
203
|
public void setInputEprParam(String inputEprParam) {
|
204
|
this.inputEprParam = inputEprParam;
|
205
|
}
|
206
|
|
207
|
public String getIndexHost() {
|
208
|
return indexHost;
|
209
|
}
|
210
|
|
211
|
public void setIndexHost(String indexHost) {
|
212
|
this.indexHost = indexHost;
|
213
|
}
|
214
|
|
215
|
public String getIndexName() {
|
216
|
return indexName;
|
217
|
}
|
218
|
|
219
|
public void setIndexName(String indexName) {
|
220
|
this.indexName = indexName;
|
221
|
}
|
222
|
}
|