Project

General

Profile

1
package eu.dnetlib.msro.workflows.nodes;
2

    
3
import com.googlecode.protobuf.format.JsonFormat;
4
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
5

    
6
import eu.dnetlib.dli.resolver.model.*;
7
import eu.dnetlib.enabling.resultset.ResultSetInfo;
8
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
9
import eu.dnetlib.msro.workflows.graph.Arc;
10
import eu.dnetlib.msro.workflows.procs.Env;
11
import eu.dnetlib.msro.workflows.procs.Token;
12
import eu.dnetlib.msro.workflows.util.ProgressProvider;
13
import eu.dnetlib.pid.resolver.PIDResolver;
14
import eu.dnetlib.pid.resolver.model.ObjectProvenance;
15
import eu.dnetlib.pid.resolver.model.ObjectRelation;
16
import eu.dnetlib.pid.resolver.model.PID;
17
import eu.dnetlib.resolver.parser.DMFResolverParser;
18
import eu.dnetlib.resolver.parser.PMFResolverParser;
19
import eu.dnetlib.rmi.common.ResultSet;
20
import org.apache.commons.lang3.StringUtils;
21
import org.apache.commons.logging.Log;
22
import org.apache.commons.logging.LogFactory;
23
import org.springframework.beans.factory.annotation.Autowired;
24
import org.springframework.http.converter.StringHttpMessageConverter;
25
import org.springframework.web.client.RestTemplate;
26

    
27
import java.nio.charset.Charset;
28
import java.time.LocalDateTime;
29
import java.util.Arrays;
30
import java.util.List;
31

    
32
import static eu.dnetlib.data.proto.dli.ScholixObjectProtos.*;
33

    
34
public class ResolveAndIndexJobNode extends SimpleJobNode implements ProgressProvider {
35

    
36
    private static final Log log = LogFactory.getLog(ResolveAndIndexJobNode.class);
37

    
38

    
39
    private static final String BASE_CFG_URL = "http://%s:9200/%s/scholix/%s/?pretty";
40

    
41
    private String inputEprParam;
42

    
43
    private String indexHost;
44

    
45
    private String indexName;
46

    
47
    private int counter;
48

    
49
    private int total;
50

    
51

    
52
    @Autowired
53
    private List<PIDResolver> pluginResolver;
54

    
55
    @Autowired
56
    private ResultSetClient resultSetClient;
57

    
58
    @Override
59
    protected String execute(Env env) throws Exception {
60

    
61
        final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
62

    
63

    
64
        final Iterable<String> records = resultSetClient.iter(rsIn, String.class);
65

    
66
        ResultSetInfo info = resultSetClient.info(rsIn);
67
        this.total = info.getTotal();
68

    
69
        final RestTemplate restTemplate = new RestTemplate();
70
        restTemplate.getMessageConverters()
71
                .add(0, new StringHttpMessageConverter(Charset.forName("UTF-8")));
72

    
73
        setIndexHost(indexHost);
74
        setIndexName(indexName);
75

    
76
        DMFResolverParser dmfParser = new DMFResolverParser();
77
        PMFResolverParser pmfParser = new PMFResolverParser();
78

    
79

    
80
        for (String record : records) {
81
            this.counter++;
82
            DLIResolvedObject result = dmfParser.parseObject(record);
83
            if (result == null) {
84
                result = pmfParser.parseObject(record);
85
            }
86

    
87

    
88
            if (result == null) {
89
                log.error("error on parsing " + record);
90
                continue;
91
            }
92
            for (final ObjectRelation rels : result.getRelations()) {
93
                final DLIResolvedObject resolvedRelation = resolveRelation(rels.getTargetPID(), result.getDatasourceProvenance().get(0));
94
                final Scholix.Builder scholix = Scholix.newBuilder();
95

    
96
                scholix.addLinkproviderBuilder()
97
                        .setName(result.getDatasourceProvenance().get(0).getDatasource())
98
                        .addIdentifiersBuilder()
99
                        .setIdentifier(result.getDatasourceProvenance().get(0).getDatasourceId())
100
                        .setSchema("dnetIdentifier");
101

    
102
                scholix.setRelationship(ScholixRelationship.newBuilder()
103
                        .setName(rels.getRelationSemantics())
104
                        .setInverse(rels.getInverseRelation())
105
                        .setSchema("datacite")
106
                        .build());
107

    
108
                final ScholixResource source = generateResource(result);
109
                final ScholixResource target = generateResource(resolvedRelation);
110
                scholix.setSource(source);
111
                scholix.setTarget(target);
112
                scholix.setPublicationDate(LocalDateTime.now().toString());
113
                restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(result, resolvedRelation)), JsonFormat.printToString(scholix.build()));
114

    
115
                scholix.setRelationship(ScholixRelationship.newBuilder()
116
                        .setInverse(rels.getRelationSemantics())
117
                        .setName(rels.getInverseRelation())
118
                        .setSchema("datacite")
119
                        .build());
120
                scholix.setTarget(source);
121
                scholix.setSource(target);
122

    
123
                restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(resolvedRelation, result)), JsonFormat.printToString(scholix.build()));
124
            }
125
        }
126
        return Arc.DEFAULT_ARC;
127
    }
128

    
129
    @Override
130
    protected void beforeStart(final Token token) {
131
        token.setProgressProvider(this);
132
    }
133

    
134

    
135
    private String generateIdentifier(final String source, final String target) {
136
        return AbstractDNetXsltFunctions.md5(String.format("%s::%s", source.toLowerCase().trim(), target.toLowerCase().trim()));
137

    
138
    }
139

    
140
    private String generateIdentifier(final DLIResolvedObject source, DLIResolvedObject target) {
141

    
142
        return AbstractDNetXsltFunctions.md5(String.format("%s::%s", source.getPid().toLowerCase().trim(), target.getPid().toLowerCase().trim()));
143

    
144
    }
145

    
146
    public static ScholixResource generateResource(DLIResolvedObject result) {
147
        final ScholixResource.Builder builder = ScholixResource.newBuilder();
148
        if (result.getDatasourceProvenance() != null)
149
            result.getDatasourceProvenance().forEach(
150
                    objectProvenance -> {
151
                        builder.addCollectedFrom(ScholixCollectedFrom.newBuilder()
152
                                .setProvisionMode("collected")
153
                                .setCompletionStatus(((DLIObjectProvenance) objectProvenance).getCompletionStatus())
154
                                .setProvider(ScholixEntityId.newBuilder()
155
                                        .setName(objectProvenance.getDatasource())
156
                                        .addIdentifiers(ScholixIdentifier.newBuilder().setIdentifier(objectProvenance.getDatasourceId())
157
                                                .setSchema("dnetIdentifier").build())
158
                                        .build()));
159
                        if (StringUtils.isNotEmpty(((DLIObjectProvenance) objectProvenance).getPublisher())) {
160
                            builder.addPublisher(ScholixEntityId.newBuilder()
161
                                    .setName(((DLIObjectProvenance) objectProvenance).getPublisher())
162
                                    .build());
163
                        }
164
                    });
165
        builder.addIdentifier(ScholixIdentifier.newBuilder().
166
                setIdentifier(result.getPid())
167
                .setSchema(result.getPidType())
168
                .build());
169
        builder.setObjectType(result.getType().toString());
170
        if (result.getTitles() != null && result.getTitles().size() > 0)
171
            builder.setTitle(result.getTitles().get(0));
172
        if (result.getAuthors() != null)
173
            result.getAuthors().forEach(author -> builder.addCreator(
174
                    ScholixEntityId.newBuilder()
175
                            .setName(author)
176
                            .build()));
177
        if (StringUtils.isNotBlank(result.getDate())) {
178
            builder.setPublicationDate(result.getDate());
179
        }
180

    
181
        String tp = null;
182

    
183
        switch (result.getType()) {
184
            case dataset:
185
                tp = "60";
186
                break;
187
            case unknown:
188
                tp = "70";
189
                break;
190
            case publication:
191
                tp = "50";
192
                break;
193
        }
194
        builder.setDnetIdentifier(tp + "|dnet________::" + result.getIdentifier());
195
        return builder.build();
196
    }
197

    
198

    
199
    private DLIResolvedObject resolveRelation(final PID currentPid, final ObjectProvenance provenance) {
200
        for (PIDResolver resolver : pluginResolver) {
201
            final DLIResolvedObject currentIdentifier = (DLIResolvedObject) resolver.retrievePID(currentPid.getId(), currentPid.getType(), false);
202

    
203
            if (currentIdentifier != null &&
204
                    !StringUtils.isBlank(currentIdentifier.getPid()) &&
205
                    currentIdentifier.getPid().toLowerCase().equals(currentPid.getId().toLowerCase())) {
206
                return currentIdentifier;
207
            }
208
        }
209

    
210
        final DLIResolvedObject resolvedObject = new DLIResolvedObject();
211
        resolvedObject.setPid(currentPid.getId());
212
        resolvedObject.setPidType(currentPid.getType());
213
        DLIObjectProvenance resultProvenance = new DLIObjectProvenance();
214
        resultProvenance.setDatasource(provenance.getDatasource());
215
        resultProvenance.setDatasourceId(provenance.getDatasourceId());
216
        resultProvenance.setCompletionStatus(CompletionStatus.incomplete.toString());
217
        resultProvenance.setProvisionMode(ObjectProvisionMode.collected.toString());
218
        resolvedObject.setDatasourceProvenance(Arrays.asList(resultProvenance));
219
        return resolvedObject;
220
    }
221

    
222
    public String getInputEprParam() {
223
        return inputEprParam;
224
    }
225

    
226
    public void setInputEprParam(String inputEprParam) {
227
        this.inputEprParam = inputEprParam;
228
    }
229

    
230
    public String getIndexHost() {
231
        return indexHost;
232
    }
233

    
234
    public void setIndexHost(String indexHost) {
235
        this.indexHost = indexHost;
236
    }
237

    
238
    public String getIndexName() {
239
        return indexName;
240
    }
241

    
242
    public void setIndexName(String indexName) {
243
        this.indexName = indexName;
244
    }
245

    
246
    @Override
247
    public String getProgressDescription() {
248
        return this.counter < 0 ? "-" : String.format("%d / %d", this.counter, this.total);
249
    }
250
}
(4-4/4)