Project

General

Profile

1
package eu.dnetlib.msro.workflows.nodes;
2

    
3
import com.googlecode.protobuf.format.JsonFormat;
4
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
5

    
6
import eu.dnetlib.dli.resolver.model.*;
7
import eu.dnetlib.enabling.resultset.ResultSetInfo;
8
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
9
import eu.dnetlib.msro.workflows.graph.Arc;
10
import eu.dnetlib.msro.workflows.procs.Env;
11
import eu.dnetlib.msro.workflows.procs.Token;
12
import eu.dnetlib.msro.workflows.util.ProgressProvider;
13
import eu.dnetlib.pid.resolver.PIDResolver;
14
import eu.dnetlib.pid.resolver.model.ObjectProvenance;
15
import eu.dnetlib.pid.resolver.model.ObjectRelation;
16
import eu.dnetlib.pid.resolver.model.PID;
17
import eu.dnetlib.resolver.parser.DMFResolverParser;
18
import eu.dnetlib.rmi.common.ResultSet;
19
import org.apache.commons.lang3.StringUtils;
20
import org.apache.commons.logging.Log;
21
import org.apache.commons.logging.LogFactory;
22
import org.springframework.beans.factory.annotation.Autowired;
23
import org.springframework.http.converter.StringHttpMessageConverter;
24
import org.springframework.web.client.RestTemplate;
25

    
26
import java.nio.charset.Charset;
27
import java.time.LocalDateTime;
28
import java.util.Arrays;
29
import java.util.List;
30

    
31
import static eu.dnetlib.data.proto.dli.ScholixObjectProtos.*;
32

    
33
public class ResolveAndIndexJobNode extends SimpleJobNode implements ProgressProvider {
34

    
35
    private static final Log log = LogFactory.getLog(ResolveAndIndexJobNode.class);
36

    
37

    
38
    private static final String BASE_CFG_URL = "http://%s:9200/%s/scholix/%s/?pretty";
39

    
40
    private String inputEprParam;
41

    
42
    private String indexHost;
43

    
44
    private String indexName;
45

    
46
    private int counter;
47

    
48
    private int total;
49

    
50

    
51
    @Autowired
52
    private List<PIDResolver> pluginResolver;
53

    
54
    @Autowired
55
    private ResultSetClient resultSetClient;
56

    
57
    @Override
58
    protected String execute(Env env) throws Exception {
59

    
60
        final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
61

    
62

    
63
        final Iterable<String> records = resultSetClient.iter(rsIn, String.class);
64

    
65
        ResultSetInfo info = resultSetClient.info(rsIn);
66
        this.total = info.getTotal();
67

    
68
        final RestTemplate restTemplate = new RestTemplate();
69
        restTemplate.getMessageConverters()
70
                .add(0, new StringHttpMessageConverter(Charset.forName("UTF-8")));
71

    
72
        setIndexHost(indexHost);
73
        setIndexName(indexName);
74

    
75
        DMFResolverParser parser = new DMFResolverParser();
76

    
77

    
78
        for (String record : records) {
79
            this.counter++;
80
            final DLIResolvedObject result = parser.parseObject(record);
81
            if (result == null) {
82
                log.error("error on parsing " + record);
83
                continue;
84
            }
85
            for (final ObjectRelation rels : result.getRelations()) {
86
                final DLIResolvedObject resolvedRelation = resolveRelation(rels.getTargetPID(), result.getDatasourceProvenance().get(0));
87
                final Scholix.Builder scholix = Scholix.newBuilder();
88

    
89
                scholix.addLinkproviderBuilder()
90
                        .setName(result.getDatasourceProvenance().get(0).getDatasource())
91
                        .addIdentifiersBuilder()
92
                        .setIdentifier(result.getDatasourceProvenance().get(0).getDatasourceId())
93
                        .setSchema("dnetIdentifier");
94

    
95
                scholix.setRelationship(ScholixRelationship.newBuilder()
96
                        .setName(rels.getRelationSemantics())
97
                        .setInverse(rels.getInverseRelation())
98
                        .setSchema("datacite")
99
                        .build());
100

    
101
                final ScholixResource source = generateResource(result);
102
                final ScholixResource target = generateResource(resolvedRelation);
103
                scholix.setSource(source);
104
                scholix.setTarget(target);
105
                scholix.setPublicationDate(LocalDateTime.now().toString());
106
                restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(result, resolvedRelation)), JsonFormat.printToString(scholix.build()));
107

    
108
                scholix.setRelationship(ScholixRelationship.newBuilder()
109
                        .setInverse(rels.getRelationSemantics())
110
                        .setName(rels.getInverseRelation())
111
                        .setSchema("datacite")
112
                        .build());
113
                scholix.setTarget(source);
114
                scholix.setSource(target);
115

    
116
                restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(resolvedRelation, result)), JsonFormat.printToString(scholix.build()));
117
            }
118
        }
119
        return Arc.DEFAULT_ARC;
120
    }
121

    
122
    @Override
123
    protected void beforeStart(final Token token) {
124
        token.setProgressProvider(this);
125
    }
126

    
127

    
128
    private String generateIdentifier(final String source, final String target) {
129
        return AbstractDNetXsltFunctions.md5(String.format("%s::%s", source.toLowerCase().trim(), target.toLowerCase().trim()));
130

    
131
    }
132

    
133
    private String generateIdentifier(final DLIResolvedObject source, DLIResolvedObject target) {
134

    
135
        return AbstractDNetXsltFunctions.md5(String.format("%s::%s", source.getPid().toLowerCase().trim(), target.getPid().toLowerCase().trim()));
136

    
137
    }
138

    
139
    private ScholixResource generateResource(DLIResolvedObject result) {
140
        final ScholixResource.Builder builder = ScholixResource.newBuilder();
141
        if (result.getDatasourceProvenance() != null)
142
            result.getDatasourceProvenance().forEach(
143
                    objectProvenance -> {
144
                        builder.addCollectedFrom(ScholixCollectedFrom.newBuilder()
145
                                .setProvisionMode(((DLIObjectProvenance) objectProvenance).getProvisionMode())
146
                                .setCompletionStatus(((DLIObjectProvenance) objectProvenance).getCompletionStatus())
147
                                .setProvider(ScholixEntityId.newBuilder()
148
                                        .setName(objectProvenance.getDatasource())
149
                                        .addIdentifiers(ScholixIdentifier.newBuilder().setIdentifier(objectProvenance.getDatasourceId())
150
                                                .setSchema("dnetIdentifier").build())
151
                                        .build()));
152
                        if (StringUtils.isNotEmpty(((DLIObjectProvenance) objectProvenance).getPublisher())) {
153
                            builder.addPublisher(ScholixEntityId.newBuilder()
154
                                    .setName(((DLIObjectProvenance) objectProvenance).getPublisher())
155
                                    .build());
156
                        }
157

    
158
                    });
159
        builder.addIdentifier(ScholixIdentifier.newBuilder().
160
                setIdentifier(result.getPid())
161
                .setSchema(result.getPidType())
162
                .build());
163
        builder.setObjectType(result.getType().toString());
164
        if (result.getTitles() != null && result.getTitles().size() > 0)
165
            builder.setTitle(result.getTitles().get(0));
166
        if (result.getAuthors() != null)
167
            result.getAuthors().forEach(author -> builder.addCreator(
168
                    ScholixEntityId.newBuilder()
169
                            .setName(author)
170
                            .build()));
171
        if (StringUtils.isNotBlank(result.getDate())) {
172
            builder.setPublicationDate(result.getDate());
173
        }
174

    
175
        String tp = null;
176

    
177
        switch (result.getType()) {
178
            case dataset:
179
                tp = "60";
180
                break;
181
            case unknown:
182
                tp = "70";
183
                break;
184
            case publication:
185
                tp = "50";
186
                break;
187
        }
188
        builder.setDnetIdentifier(tp + "|dnet________::" + result.getIdentifier());
189
        return builder.build();
190
    }
191

    
192

    
193
    private DLIResolvedObject resolveRelation(final PID currentPid, final ObjectProvenance provenance) {
194
        for (PIDResolver resolver : pluginResolver) {
195
            final DLIResolvedObject currentIdentifier = (DLIResolvedObject) resolver.retrievePID(currentPid.getId(), currentPid.getType(), false);
196

    
197
            if (currentIdentifier != null &&
198
                    !StringUtils.isBlank(currentIdentifier.getPid()) &&
199
                    currentIdentifier.getPid().toLowerCase().equals(currentPid.getId().toLowerCase())) {
200
                return currentIdentifier;
201
            }
202
        }
203

    
204
        final DLIResolvedObject resolvedObject = new DLIResolvedObject();
205
        resolvedObject.setPid(currentPid.getId());
206
        resolvedObject.setPidType(currentPid.getType());
207
        DLIObjectProvenance resultProvenance = new DLIObjectProvenance();
208
        resultProvenance.setDatasource(provenance.getDatasource());
209
        resultProvenance.setDatasourceId(provenance.getDatasourceId());
210
        resultProvenance.setCompletionStatus(CompletionStatus.incomplete.toString());
211
        resultProvenance.setProvisionMode(ObjectProvisionMode.collected.toString());
212
        resolvedObject.setDatasourceProvenance(Arrays.asList(resultProvenance));
213
        return resolvedObject;
214
    }
215

    
216
    public String getInputEprParam() {
217
        return inputEprParam;
218
    }
219

    
220
    public void setInputEprParam(String inputEprParam) {
221
        this.inputEprParam = inputEprParam;
222
    }
223

    
224
    public String getIndexHost() {
225
        return indexHost;
226
    }
227

    
228
    public void setIndexHost(String indexHost) {
229
        this.indexHost = indexHost;
230
    }
231

    
232
    public String getIndexName() {
233
        return indexName;
234
    }
235

    
236
    public void setIndexName(String indexName) {
237
        this.indexName = indexName;
238
    }
239

    
240
    @Override
241
    public String getProgressDescription() {
242
        return this.counter < 0 ? "-" : String.format("%d / %d", this.counter, this.total);
243
    }
244
}
(4-4/4)