Project

General

Profile

1
package eu.dnetlib.wds.parser;
2

    
3
import com.ximpleware.AutoPilot;
4
import com.ximpleware.VTDGen;
5
import com.ximpleware.VTDNav;
6
import eu.dnetlib.data.transform.VtdUtilityParser;
7
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
8
import eu.dnetlib.miscutils.collections.Pair;
9
import eu.dnetlib.pid.resolver.model.*;
10
import eu.dnetlib.pid.resolver.parser.AbstractResolverParser;
11
import eu.dnetlib.wds.resolver.WDSObjectRelation;
12
import eu.dnetlib.wds.resolver.WDSResolvedObject;
13

    
14
import java.util.ArrayList;
15
import java.util.Arrays;
16
import java.util.List;
17

    
18
/**
19
 * Created by sandro on 9/13/16.
20
 */
21
public class PMFResolverParser extends AbstractResolverParser {
22

    
23
    @Override
24
    public ResolvedObject parseObject(final String record) {
25
        try {
26
            final WDSResolvedObject parsedObject = new WDSResolvedObject();
27
            final VTDGen vg = new VTDGen();
28
            vg.setDoc(record.getBytes());
29
            vg.parse(true);
30

    
31
            final VTDNav vn = vg.getNav();
32
            final AutoPilot ap = new AutoPilot(vn);
33

    
34
            ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri");
35

    
36
            final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//dri:resolvedDate");
37

    
38

    
39
            ap.declareXPathNameSpace("oaf", "http://namespace.dnet.eu/oaf");
40
            final List<Node> pid = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:pid", Arrays.asList("type"));
41

    
42
            if (pid != null && pid.size() > 0) {
43
                final String currentPid = pid.get(0).getTextValue();
44
                final String currentPidType = pid.get(0).getAttributes().get("type");
45
                parsedObject.setPid(currentPid);
46
                parsedObject.setPidType(currentPidType);
47
            } else {
48
                return null;
49
            }
50

    
51
            List<Node> collectedFromNodes =
52
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:collectedFrom", Arrays.asList("name", "id", "mode", "completionStatus"));
53

    
54

    
55
            List<Node> relatedIdentifiers =
56
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:relatedIdentifier",
57
                            Arrays.asList("relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
58

    
59
            if (relatedIdentifiers != null && relatedIdentifiers.size() > 0) {
60
                final List<ObjectRelation> relations = new ArrayList<>();
61
                relatedIdentifiers.forEach(relation -> {
62
                    final String currentPid = relation.getTextValue();
63
                    final String currentPidType = relation.getAttributes().get("relatedIdentifierType");
64
                    final String currentType = relation.getAttributes().get("entityType");
65
                    final WDSObjectRelation currentRelation = new WDSObjectRelation();
66
                    currentRelation.setTargetPID(inferPid(new PID(currentPid, currentPidType)));
67
                    currentRelation.setSourcePID(inferPid(new PID(parsedObject.getPid(), parsedObject.getPidType())));
68
                    currentRelation.setRelationSemantics(relation.getAttributes().get("relationType"));
69
                    currentRelation.setInverseRelation(relation.getAttributes().get("inverseRelationType"));
70

    
71

    
72
                    currentRelation.setTargetType(ObjectType.valueOf(currentType));
73
//					if (parsedObject.getDatasourceProvenance() != null && parsedObject.getDatasourceProvenance().size() > 0) {
74
//                        final DLIObjectProvenance provenance = parsedObject.getDatasourceProvenance().get(0);
75
//
76
//                        final DLIObjectProvenance newProvenance = new DLIObjectProvenance();
77
//                        newProvenance.setCompletionStatus(CompletionStatus.incomplete.toString());
78
//						newProvenance.setDatasourceId(provenance.getDatasourceId());
79
//						newProvenance.setDatasource(provenance.getDatasource());
80
//						newProvenance.setProvisionMode(provenance.getProvisionMode());
81
//						currentRelation.setRelationProvenance(Arrays.asList(newProvenance));
82
//					}
83

    
84
                    relations.add(currentRelation);
85
                });
86
                parsedObject.setRelations(relations);
87
            }
88

    
89
            ap.declareXPathNameSpace("dc", "http://purl.org/dc/elements/1.1/");
90
            final List<String> authorsNode = VtdUtilityParser.getTextValue(ap, vn, "//dc:creator");
91
            parsedObject.setAuthors(authorsNode);
92
            parsedObject.setTitles(VtdUtilityParser.getTextValue(ap, vn, "//dc:title"));
93

    
94

    
95
            parsedObject.addDescription(new Pair<>("unknown", VtdUtilityParser.getSingleValue(ap, vn, "//dc:description")));
96

    
97
            parsedObject.addDate("unknown", VtdUtilityParser.getSingleValue(ap, vn, "//dc:date"));
98
            List<Node> subjects = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//dc:subject", Arrays.asList("scheme"));
99
            if (subjects != null && subjects.size() > 0) {
100
                final List<SubjectType> currentSubjects = new ArrayList<>();
101
                subjects.forEach(it -> {
102

    
103
                    String scheme = it.getAttributes().get("scheme");
104
                    if (scheme == null)
105
                        scheme = "unknown";
106
                    currentSubjects.add(new SubjectType(scheme, it.getTextValue()));
107
                });
108

    
109
                parsedObject.setSubjects(currentSubjects);
110
            }
111

    
112
            setType(parsedObject, VtdUtilityParser.getSingleValue(ap, vn, "//dc:type"));
113

    
114
            return parsedObject;
115
        } catch (Throwable e) {
116
            log.debug("Input record: " + record);
117
            log.error("Error on parsing record ", e);
118
            return null;
119
        }
120

    
121
    }
122
}
(2-2/3)