Project

General

Profile

1
package eu.dnetlib.wds.parser;
2

    
3
import com.google.common.collect.Lists;
4
import com.ximpleware.AutoPilot;
5
import com.ximpleware.VTDGen;
6
import com.ximpleware.VTDNav;
7
import eu.dnetlib.data.transform.VtdUtilityParser;
8
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
9
import eu.dnetlib.miscutils.collections.Pair;
10
import eu.dnetlib.pid.resolver.model.*;
11
import eu.dnetlib.pid.resolver.parser.AbstractResolverParser;
12
import eu.dnetlib.wds.resolver.WDSObjectRelation;
13
import eu.dnetlib.wds.resolver.WDSResolvedObject;
14
import eu.dnetlib.wds.utils.WDSUtils;
15
import org.apache.commons.lang3.StringUtils;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18

    
19
import java.util.ArrayList;
20
import java.util.Arrays;
21
import java.util.List;
22
import java.util.stream.Collectors;
23

    
24
/**
25
 * Created by sandro on 9/13/16.
26
 */
27
public class DMFResolverParser extends AbstractResolverParser {
28

    
29
    private static final Log log = LogFactory.getLog(DMFResolverParser.class);
30

    
31
    @Override
32
    public ResolvedObject parseObject(final String record) {
33
        try {
34
            final WDSResolvedObject parsedObject = new WDSResolvedObject();
35
            final VTDGen vg = new VTDGen();
36
            vg.setDoc(record.getBytes());
37
            vg.parse(true);
38

    
39
            final VTDNav vn = vg.getNav();
40
            final AutoPilot ap = new AutoPilot(vn);
41
            ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri");
42

    
43

    
44
            final String datasourcePrefix = VtdUtilityParser.getSingleValue(ap, vn, "//dri:datasourceprefix");
45
            ap.declareXPathNameSpace("oaf", "http://namespace.dnet.eu/oaf");
46

    
47
            final ObjectProvenance provenance = new ObjectProvenance();
48
            provenance.setDatasourceId(WDSUtils.getIdFromDataSourcePrefix(datasourcePrefix));
49
            provenance.setDatasource(WDSUtils.getNameFromDataSourcePrefix(datasourcePrefix));
50
            parsedObject.setDatasourceProvenance(Arrays.asList(provenance));
51

    
52
            final String publisher = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resource']/*[local-name()='publisher']");
53

    
54
            if (StringUtils.isNotEmpty(publisher))
55
                parsedObject.setPublisher(publisher);
56

    
57
            final List<Node> identifierType =
58
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']/*[local-name()='identifier']", Lists.newArrayList("identifierType"));
59

    
60
            if (extractIdentifier(parsedObject, identifierType)) return null;
61

    
62
            final List<Node> relations =
63
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='relatedIdentifier']", Arrays.asList("relatedIdentifierType", "relationType", "inverseRelationType"));
64

    
65
            if (relations != null && relations.size() > 0) {
66
                final List<ObjectRelation> relationsResult = new ArrayList<>();
67
                relations.forEach(relationMap -> {
68
                    final String relationType = relationMap.getAttributes().get("relationType");
69
                    final String inverseRelationType = relationMap.getAttributes().get("inverseRelationType");
70
                    final String relatedIdentifierType = relationMap.getAttributes().get("relatedIdentifierType");
71
                    final String relatedPid = relationMap.getTextValue();
72
                    final WDSObjectRelation currentRelation = new WDSObjectRelation();
73
                    currentRelation.setTargetPID(inferPid(new PID(relatedPid, relatedIdentifierType)));
74
                    currentRelation.setRelationSemantics(relationType);
75
                    currentRelation.setInverseRelation(inverseRelationType);
76
                    relationsResult.add(currentRelation);
77
                });
78
                parsedObject.setRelations(relationsResult);
79
            }
80

    
81
            final List<Node> subjects = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='subject']", Arrays.asList("subjectScheme"));
82

    
83
            extractSubject(parsedObject, subjects);
84

    
85

    
86
            final List<String> creators = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']//*[local-name()='creator']/*[local-name()='creatorName']");
87
            if (creators != null && creators.size() > 0) {
88
                parsedObject.setAuthors(creators);
89
            }
90
            final List<String> titles = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']//*[local-name()='title']");
91
            if (titles != null && titles.size() > 0) {
92
                parsedObject.setTitles(titles);
93
            }
94
            final List<String> descriptions = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']//*[local-name()='description']");
95
            if (descriptions != null && descriptions.size() > 0) {
96

    
97

    
98
                parsedObject.setDescriptions(descriptions.stream().map(it -> new Pair<>("unknwonw", it)).collect(Collectors.toList()));
99
            }
100

    
101
            List<Node> resourceTypeGeneral = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']/*[local-name()='resourceType']", Arrays.asList("resourceTypeGeneral"));
102

    
103

    
104
            if (resourceTypeGeneral != null && !resourceTypeGeneral.isEmpty()) {
105
                final String type = resourceTypeGeneral.get(0).getAttributes().get("resourceTypeGeneral");
106
                setType(parsedObject, type);
107
            }
108

    
109
            if (parsedObject.getType() == ObjectType.unknown) {
110

    
111
                final String type = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resource']/*[local-name()='resourceType']");
112
                setType(parsedObject, type);
113
            }
114

    
115

    
116
            final List<String> dates = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']/*[local-name()='dates']/*[local-name()='date']");
117

    
118
            if (dates != null && dates.size() > 0) {
119
                dates.forEach(it -> parsedObject.addDate("unknown", it));
120
            }
121
            return parsedObject;
122
        } catch (Throwable e) {
123
            log.error("Error on parsing record " + record, e);
124
            return null;
125
        }
126
    }
127

    
128

    
129
}
130

    
(1-1/3)