Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.propagation.orcidthroughproducts;
2

    
3
import com.googlecode.protobuf.format.JsonFormat;
4
import eu.dnetlib.data.mapreduce.hbase.propagation.*;
5
import eu.dnetlib.data.proto.*;
6
import org.apache.hadoop.io.Text;
7

    
8
import java.util.ArrayList;
9
import java.util.Arrays;
10
import java.util.Iterator;
11
import java.util.List;
12
import java.util.stream.Collectors;
13

    
14
public class ResultOrcidIterator extends ResultIterator {
15

    
16
    private Iterator<String> author_iterator;
17
    private List<FieldTypeProtos.Author> autoritative_authors ;
18
    private List<String> relatedResult ;
19

    
20

    
21
    public ResultOrcidIterator(final Iterable<Text> values, final String key) throws NotValidResultSequenceException {
22
        super(values,key);
23
    }
24

    
25
    @Override
26
    protected void checkSequence() throws NotValidResultSequenceException {
27
        if(!it.hasNext()){
28
            throw new NotValidResultSequenceException("Empty information for key");
29
        }
30

    
31
        try {
32
            autoritative_authors = new ArrayList<>();
33
            relatedResult = new ArrayList<>();
34
            analizeValueList();
35

    
36
        }catch(JsonFormat.ParseException e){
37
            throw new NotValidResultSequenceException("Problems recreating the author list from serialization");
38
        }
39

    
40
        List<FieldTypeProtos.Author> authors_with_orcid = autoritative_authors.stream()
41
                .map(a -> {
42
                    if (a.getPidList() == null || a.getPidList().isEmpty())
43
                        return null;
44
                    return a;
45
                })
46
                .filter(a -> a!= null)
47
                .filter(a -> containsOrcid(a.getPidList()))
48
                .collect(Collectors.toList());
49

    
50

    
51
        if(authors_with_orcid.size() == 0 || relatedResult.size() == 0){
52
            resultId = TERMINATOR;
53
            return;
54
        }
55

    
56

    
57
        author_iterator = relatedResult.iterator();
58
        autoritative_authors = authors_with_orcid;
59
        getNext();
60

    
61
    }
62

    
63
    private boolean containsOrcid(List<FieldTypeProtos.KeyValue> pidList){
64
        if(pidList == null)
65
            return false;
66
        return pidList
67
                .stream()
68
                .filter(kv -> kv.getKey().equals(PropagationConstants.AUTHOR_PID))
69
                .collect(Collectors.toList()).size() > 0;
70
    }
71

    
72
    private void getNext(){
73
        if (author_iterator.hasNext())
74
            resultId = author_iterator.next();
75
        else
76
            resultId = TERMINATOR;
77
    }
78

    
79
    @Override
80
    public List<OafProtos.Oaf> next() {
81
        //get the next merged author list
82
        try {
83
            //list of authors in the related result
84
            Emit e = Emit.fromJson(resultId);
85
            List<FieldTypeProtos.Author> author_list = getAuthorList(e);
86

    
87
            ResultProtos.Result.Metadata.Builder metadata = searchMatch(author_list);
88

    
89
            if (metadata != null){
90
                ArrayList<OafProtos.Oaf> ret = new ArrayList<OafProtos.Oaf>(Arrays.asList(getUpdate(metadata, e.getId())));
91
                getNext();
92
                return ret;
93
            }
94

    
95

    
96
        }catch(JsonFormat.ParseException e){
97

    
98
        }
99
        getNext();
100
        return null;
101
    }
102

    
103
    private ResultProtos.Result.Metadata.Builder searchMatch(List<FieldTypeProtos.Author> author_list){
104
        ResultProtos.Result.Metadata.Builder metadataBuilder = ResultProtos.Result.Metadata.newBuilder();
105
        boolean updated = false;
106
//        for (FieldTypeProtos.Author a: autoritative_authors){
107
//            searchAuthor(a,author_list);
108
//        }
109

    
110
        for (FieldTypeProtos.Author a: author_list){
111
            FieldTypeProtos.Author.Builder author = searchAuthor(a, autoritative_authors);
112
            if(author != null){
113
                updated = true;
114
                metadataBuilder.addAuthor(author);
115
            }else{
116
                metadataBuilder.addAuthor(FieldTypeProtos.Author.newBuilder(a));
117
            }
118
        }
119
        if(updated)
120
            return metadataBuilder;
121
        return null;
122
    }
123

    
124

    
125
    private boolean equals(FieldTypeProtos.Author a1, FieldTypeProtos.Author a2){
126
        if(a1.hasSurname()){
127
            if(a2.hasSurname()){
128
                if(!a1.getSurname().trim().equalsIgnoreCase(a2.getSurname().trim())){
129
                    return false;
130
                }
131
                //have the same surname. Check the name
132
                if(a1.hasName()){
133
                    if (a2.hasName()){
134
                        if (a1.getName().trim().equalsIgnoreCase(a2.getName().trim())){
135
                            return true; //same name and same surname in a related research result
136
                        }
137
                        //they could be differently written (i.e. only the initials of the name in one of the two
138
                        return (a1.getName().trim().substring(0,0).equalsIgnoreCase(a2.getName().trim().substring(0,0)));
139
                    }
140
                }
141
            }
142
        }
143
//        if(a1.hasFullname()){
144
//            if (a2.hasFullname()){
145
//                if (a1.getFullname().trim().equalsIgnoreCase(a2.getFullname().trim())){
146
//                    return true;
147
//                }
148
//                //split string containing name and surname
149
//                String[] ns_a1 = a1.getFullname().trim().split(" ");
150
//                String[] ns_a2 = a2.getFullname().trim().split(" ");
151
//
152
//
153
//                if (ns_a1[0].endsWith(".") || ns_a1[0].endsWith(",")){
154
//                    ns_a1[0] = ns_a1[0].substring(0,ns_a1[0].length()-1);
155
//                }
156
//                if (ns_a1[1].endsWith(".") || ns_a1[1].endsWith(",")){
157
//                    ns_a1[1] = ns_a1[1].substring(0,ns_a1[1].length()-1);
158
//                }
159
//
160
//                if (ns_a2[0].endsWith(".") || ns_a2[0].endsWith(",")){
161
//                    ns_a2[0] = ns_a2[0].substring(0,ns_a2[0].length()-1);
162
//                }
163
//                if (ns_a2[1].endsWith(".") || ns_a2[1].endsWith(",")){
164
//                    ns_a2[1] = ns_a2[1].substring(0,ns_a2[1].length()-1);
165
//                }
166
//
167
//                if(ns_a1[0].compareTo(ns_a1[1]) < 0){
168
//                    String tmp = ns_a1[0];
169
//                    ns_a1[0] = ns_a1[1];
170
//                    ns_a1[1] = tmp;
171
//                }
172
//
173
//                if(ns_a2[0].compareTo(ns_a2[1]) < 0){
174
//                    String tmp = ns_a2[0];
175
//                    ns_a2[0] = ns_a2[1];
176
//                    ns_a2[1] = tmp;
177
//
178
//                }
179
//
180
//                if(ns_a1[0].equalsIgnoreCase(ns_a2[0])){
181
//                    if(ns_a1[1].equalsIgnoreCase(ns_a2[1])){//same name and surname
182
//                        return true;
183
//                    }
184
//                    if(ns_a1[1].length() == 1 || ns_a2[1].length() == 1){
185
//                        return ns_a1[1].charAt(0) == ns_a2[1].charAt(0);//same surname and initial of the name
186
//                    }
187
//                    return false;
188
//
189
//                }else{
190
//                    if(ns_a1[1].equalsIgnoreCase(ns_a2[1])){
191
//                        if(ns_a1[0].length() == 1 || ns_a2[0].length()==1)
192
//                            return ns_a1[0].charAt(0) == ns_a2[0].charAt(0);
193
//                        else
194
//                            return false;
195
//                    }
196
//                }
197
//
198
//
199
//
200
//        }
201
//        return false;
202
//    }
203
        return false;
204

    
205
    }
206

    
207
    private FieldTypeProtos.Author.Builder searchAuthor(FieldTypeProtos.Author a, List<FieldTypeProtos.Author> author_list){
208
        if(containsOrcid(a.getPidList()))
209
            return null;
210
        for(FieldTypeProtos.Author autoritative_author : author_list) {
211
                if (equals(autoritative_author, a)) {
212
                    if(!containsOrcid(a.getPidList()))
213
                        return update(a, autoritative_author);
214
                }
215
        }
216
        return  null;
217

    
218
    }
219

    
220
    private void analizeValueList() throws JsonFormat.ParseException {
221
        while(it.hasNext()){
222
            Value v = Value.fromJson(it.next().toString());
223

    
224
            if(v.getType().equals(PropagationConstants.Type.fromresult)){
225
                autoritative_authors.addAll(getAuthorList(Emit.fromJson(v.getValue ())));
226
            }
227
            if(v.getType().equals(PropagationConstants.Type.fromsemrel)){
228
                relatedResult.add(v.getValue());
229
            }
230
        }
231

    
232
    }
233
    private FieldTypeProtos.Author.Builder update(FieldTypeProtos.Author related_author, FieldTypeProtos.Author autoritative_autor ){
234

    
235
        FieldTypeProtos.Author.Builder res = FieldTypeProtos.Author.newBuilder(related_author);
236
        List<FieldTypeProtos.KeyValue> apid_list = autoritative_autor.getPidList();
237
        FieldTypeProtos.KeyValue akv = apid_list.stream().filter(kv -> kv.getKey().equals(PropagationConstants.AUTHOR_PID)).collect(Collectors.toList()).get(0);
238
        FieldTypeProtos.KeyValue.Builder kvb = FieldTypeProtos.KeyValue.newBuilder();
239
        kvb.setKey(akv.getKey()).setValue(akv.getValue());
240
        kvb.setDataInfo(Utils.getDataInfo(
241
                PropagationConstants.ORCID_RESULT_TRUST,
242
                PropagationConstants.CLASS_ORCID_ID,
243
                PropagationConstants.SCHEMA_ID,
244
                PropagationConstants.SCHEMA_NAME,
245
                PropagationConstants.DATA_INFO_TYPE,
246
                PropagationConstants.CLASS_ORCID_NAME)
247
        );
248
        return res.addPid(kvb);
249

    
250

    
251
    }
252

    
253
    private List<FieldTypeProtos.Author> getAuthorList(Emit e) throws JsonFormat.ParseException {
254

    
255
        List<FieldTypeProtos.Author> authors = new ArrayList<>();
256
        for (String author : e.getAuthor_list()) {
257
            FieldTypeProtos.Author.Builder author_builder = FieldTypeProtos.Author.newBuilder();
258
            JsonFormat.merge(author, author_builder);
259
            authors.add(author_builder.build());
260
        }
261

    
262
        return authors;
263

    
264
    }
265

    
266

    
267
    public static OafProtos.Oaf getUpdate(ResultProtos.Result.Metadata.Builder metadata, String resultId) {
268
        final ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder().setMetadata(metadata);
269
        final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder()
270
                .setType(TypeProtos.Type.result)
271
                .setId(resultId)
272
                .setResult(result);
273

    
274
        return OafProtos.Oaf.newBuilder()
275
                .setKind(KindProtos.Kind.entity)
276
                .setEntity(entity)
277
                .build();
278
    }
279
}
(5-5/5)