Project

General

Profile

1
package eu.dnetlib.msro.workflows.nodes;
2

    
3
import static eu.dnetlib.data.proto.dli.ScholixObjectProtos.*;
4

    
5
import com.googlecode.protobuf.format.JsonFormat;
6
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
7
import eu.dnetlib.dli.resolver.PIDResolver;
8
import eu.dnetlib.dli.resolver.model.*;
9
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
10
import eu.dnetlib.msro.workflows.graph.Arc;
11
import eu.dnetlib.msro.workflows.procs.Env;
12
import eu.dnetlib.resolver.parser.DMFResolverParser;
13
import eu.dnetlib.rmi.common.ResultSet;
14
import org.apache.commons.lang3.StringUtils;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.springframework.beans.factory.annotation.Autowired;
18
import org.springframework.http.converter.StringHttpMessageConverter;
19
import org.springframework.web.client.RestTemplate;
20

    
21
import java.net.URI;
22
import java.nio.charset.Charset;
23
import java.time.LocalDateTime;
24
import java.util.Arrays;
25
import java.util.List;
26

    
27
public class ResolveAndIndexJobNode extends SimpleJobNode {
28

    
29
    private static final Log log = LogFactory.getLog(ResolveAndIndexJobNode.class);
30

    
31

    
32
    private static final String BASE_CFG_URL = "http://%s:9200/%s/scholix/%s/?pretty";
33

    
34

    
35
    private String inputEprParam;
36

    
37
    private String indexHost;
38

    
39
    private String indexName;
40

    
41
    @Autowired
42
    private List<PIDResolver> pluginResolver;
43

    
44
    @Autowired
45
    private ResultSetClient resultSetClient;
46

    
47
    @Override
48
    protected String execute(Env env) throws Exception {
49

    
50
        final ResultSet<?> rsIn = env.getAttribute(this.inputEprParam, ResultSet.class);
51

    
52
        final Iterable<String> records = resultSetClient.iter(rsIn, String.class);
53

    
54
        final RestTemplate restTemplate = new RestTemplate();
55
        restTemplate.getMessageConverters()
56
                .add(0, new StringHttpMessageConverter(Charset.forName("UTF-8")));
57

    
58
        setIndexHost(indexHost);
59
        setIndexName(indexName);
60

    
61
        DMFResolverParser parser = new DMFResolverParser();
62

    
63

    
64
        for (String record : records) {
65
            final ResolvedObject result = parser.parseObject(record);
66
            if (result == null) {
67
                log.error("error on parsing " + record);
68
                continue;
69
            }
70
            for (final ObjectRelation rels : result.getRelations()) {
71
                final ResolvedObject resolvedRelation = resolveRelation(rels.getTargetPID(), result.getDatasourceProvenance().get(0));
72

    
73

    
74
                final Scholix.Builder scholix = Scholix.newBuilder();
75

    
76
                scholix.addLinkproviderBuilder()
77
                        .setName(result.getDatasourceProvenance().get(0).getDatasource())
78
                        .addIdentifiersBuilder()
79
                        .setIdentifier(result.getDatasourceProvenance().get(0).getDatasourceId())
80
                        .setSchema("dnetIdentifier");
81

    
82
                scholix.setRelationship(ScholixRelationship.newBuilder()
83
                        .setName(rels.getRelationSemantics())
84
                        .setInverse(rels.getInverseRelation())
85
                        .setSchema("datacite")
86
                        .build());
87

    
88
                final ScholixResource source = generateResource(result);
89
                final ScholixResource target = generateResource(resolvedRelation);
90
                scholix.setSource(source);
91
                scholix.setTarget(target);
92
                scholix.setPublicationDate(LocalDateTime.now().toString());
93

    
94

    
95
//                log.error(indexURL);
96
//
97
//                log.info("json :   "+JsonFormat.printToString(scholix.build()));
98

    
99

    
100
                restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(result, resolvedRelation)), JsonFormat.printToString(scholix.build()));
101

    
102
                scholix.setRelationship(ScholixRelationship.newBuilder()
103
                        .setInverse(rels.getRelationSemantics())
104
                        .setName(rels.getInverseRelation())
105
                        .setSchema("datacite")
106
                        .build());
107
                scholix.setTarget(source);
108
                scholix.setSource(target);
109

    
110
                restTemplate.postForLocation(String.format(BASE_CFG_URL, indexHost, indexName, generateIdentifier(resolvedRelation, result)), JsonFormat.printToString(scholix.build()));
111
            }
112
        }
113
        return Arc.DEFAULT_ARC;
114
    }
115

    
116
    private String generateIdentifier(final ResolvedObject source, ResolvedObject target) {
117

    
118
        return AbstractDNetXsltFunctions.md5(String.format("%s::%s", source.getPid().toLowerCase().trim(), target.getPid().toLowerCase().trim()));
119

    
120
    }
121

    
122
    private ScholixResource generateResource(ResolvedObject result) {
123
        final ScholixResource.Builder builder = ScholixResource.newBuilder();
124
        if (result.getDatasourceProvenance() != null)
125
            result.getDatasourceProvenance().forEach(
126
                    objectProvenance -> {
127
                        builder.addCollectedFrom(ScholixCollectedFrom.newBuilder()
128
                                .setProvisionMode(objectProvenance.getProvisionMode())
129
                                .setCompletionStatus(objectProvenance.getCompletionStatus())
130
                                .setProvider(ScholixEntityId.newBuilder()
131
                                        .setName(objectProvenance.getDatasource())
132
                                        .addIdentifiers(ScholixIdentifier.newBuilder().setIdentifier(objectProvenance.getDatasourceId())
133
                                                .setSchema("dnetIdentifier").build())
134
                                        .build()));
135
                        if (StringUtils.isNotEmpty(objectProvenance.getPublisher())) {
136
                            builder.addPublisher(ScholixEntityId.newBuilder()
137
                                    .setName(objectProvenance.getPublisher())
138
                                    .build());
139
                        }
140

    
141
                    });
142
        builder.addIdentifier(ScholixIdentifier.newBuilder().
143
                setIdentifier(result.getPid())
144
                .setSchema(result.getPidType())
145
                .build());
146
        builder.setObjectType(result.getType().toString());
147
        if (result.getTitles() != null && result.getTitles().size() > 0)
148
            builder.setTitle(result.getTitles().get(0));
149
        if (result.getAuthors() != null)
150
            result.getAuthors().forEach(author -> builder.addCreator(
151
                    ScholixEntityId.newBuilder()
152
                            .setName(author)
153
                            .build()));
154
        if (StringUtils.isNotBlank(result.getDate())) {
155
            builder.setPublicationDate(result.getDate());
156
        }
157

    
158
        String tp = null;
159

    
160
        switch (result.getType()) {
161
            case dataset:
162
                tp = "60";
163
                break;
164
            case unknown:
165
                tp = "70";
166
                break;
167
            case publication:
168
                tp = "50";
169
                break;
170
        }
171
        builder.setDnetIdentifier(tp + "|dnet________::" + result.getIdentifier());
172
        return builder.build();
173
    }
174

    
175

    
176
    private ResolvedObject resolveRelation(final PID currentPid, final ObjectProvenance provenance) {
177
        for (PIDResolver resolver : pluginResolver) {
178
            final ResolvedObject currentIdentifier = resolver.retrievePID(currentPid.getId(), currentPid.getType());
179

    
180
            if (currentIdentifier != null &&
181
                    !StringUtils.isBlank(currentIdentifier.getPid()) &&
182
                    currentIdentifier.getPid().toLowerCase().equals(currentPid.getId().toLowerCase())) {
183
                return currentIdentifier;
184
            }
185
        }
186

    
187
        final ResolvedObject resolvedObject = new ResolvedObject();
188
        resolvedObject.setPid(currentPid.getId());
189
        resolvedObject.setPidType(currentPid.getType());
190
        ObjectProvenance resultProvenance = new ObjectProvenance();
191
        resultProvenance.setDatasource(provenance.getDatasource());
192
        resultProvenance.setDatasourceId(provenance.getDatasourceId());
193
        resultProvenance.setCompletionStatus(CompletionStatus.incomplete.toString());
194
        resultProvenance.setProvisionMode(ObjectProvisionMode.collected.toString());
195
        resolvedObject.setDatasourceProvenance(Arrays.asList(resultProvenance));
196
        return resolvedObject;
197
    }
198

    
199
    public String getInputEprParam() {
200
        return inputEprParam;
201
    }
202

    
203
    public void setInputEprParam(String inputEprParam) {
204
        this.inputEprParam = inputEprParam;
205
    }
206

    
207
    public String getIndexHost() {
208
        return indexHost;
209
    }
210

    
211
    public void setIndexHost(String indexHost) {
212
        this.indexHost = indexHost;
213
    }
214

    
215
    public String getIndexName() {
216
        return indexName;
217
    }
218

    
219
    public void setIndexName(String indexName) {
220
        this.indexName = indexName;
221
    }
222
}
(5-5/5)