Project

General

Profile

1
package eu.dnetlib.pid.resolver.parser;
2

    
3
import eu.dnetlib.data.transform.VtdUtilityParser;
4
import eu.dnetlib.pid.resolver.model.ObjectType;
5
import eu.dnetlib.pid.resolver.model.PID;
6
import eu.dnetlib.pid.resolver.model.ResolvedObject;
7
import eu.dnetlib.pid.resolver.model.SubjectType;
8
import org.apache.commons.lang3.StringUtils;
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11

    
12
import javax.xml.stream.XMLStreamReader;
13
import java.util.*;
14
import java.util.regex.Matcher;
15
import java.util.regex.Pattern;
16

    
17
public abstract class AbstractResolverParser {
18

    
19
    protected static final Log log = LogFactory.getLog(AbstractResolverParser.class);
20
    final static Pattern pattern = Pattern.compile("10\\.\\d{4,9}/[-._;()/:A-Z0-9]+$", Pattern.CASE_INSENSITIVE);
21
    private List<String> datasetSubTypes = Arrays.asList("dataset", "software", "film", "sound", "physicalobject", "audiovisual", "collection", "other", "study", "metadata");
22

    
23
    public abstract ResolvedObject parseObject(final String record);
24

    
25
    protected Map<String, String> getAttributes(final XMLStreamReader parser) {
26
        final Map<String, String> attributesMap = new HashMap<>();
27
        for (int i = 0; i < parser.getAttributeCount(); i++) {
28
            attributesMap.put(parser.getAttributeLocalName(i), parser.getAttributeValue(i));
29
        }
30
        return attributesMap;
31
    }
32

    
33
    protected void setType(final ResolvedObject object, final String type) {
34
        if (!StringUtils.isBlank(type)) {
35
            if (datasetSubTypes.contains(type.toLowerCase())) {
36
                object.setType(ObjectType.dataset);
37
                return;
38
            } else if (type.toLowerCase().contains("publication")) {
39
                object.setType(ObjectType.publication);
40
                return;
41
            } else {
42
                object.setType(ObjectType.unknown);
43
            }
44
        }
45
    }
46

    
47
    protected void extractSubject(ResolvedObject parsedObject, List<VtdUtilityParser.Node> subjects) {
48
        if (subjects != null && subjects.size() > 0) {
49
            final List<SubjectType> subjectResult = new ArrayList<>();
50
            subjects.forEach(subjectMap -> {
51
                final SubjectType subject = new SubjectType(subjectMap.getAttributes().get("subjectScheme"), subjectMap.getTextValue());
52
                subjectResult.add(subject);
53
            });
54
            parsedObject.setSubjects(subjectResult);
55
        }
56
    }
57

    
58
    protected boolean extractIdentifier(ResolvedObject parsedObject, List<VtdUtilityParser.Node> identifierType) {
59
        return extractIdentifier(parsedObject, identifierType, "identifierType");
60
    }
61

    
62

    
63
    protected boolean extractIdentifier(ResolvedObject parsedObject, List<VtdUtilityParser.Node> identifierType, final String fieldName) {
64
        if (identifierType != null && identifierType.size() > 0) {
65

    
66
            final VtdUtilityParser.Node result = identifierType.get(0);
67
            parsedObject.setPid(result.getTextValue());
68
            parsedObject.setPidType(result.getAttributes().get(fieldName));
69
        } else {
70
            log.debug("Error on parsing record the identifier should not null ");
71
            return true;
72
        }
73
        return false;
74
    }
75

    
76
    protected PID inferPid(final PID input) {
77
        final Matcher matcher = pattern.matcher(input.getId());
78
        if (matcher.find()) {
79
            input.setId(matcher.group());
80
            input.setType("doi");
81
        }
82
        return input;
83
    }
84

    
85
}
    (1-1/1)