Project

General

Profile

1
package eu.dnetlib.resolver.parser;
2

    
3
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.List;
6

    
7
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDGen;
9
import com.ximpleware.VTDNav;
10
import eu.dnetlib.dli.resolver.model.*;
11
import eu.dnetlib.data.transform.VtdUtilityParser;
12
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
13
import eu.dnetlib.pid.resolver.model.ObjectType;
14

    
15
/**
16
 * Created by sandro on 9/13/16.
17
 */
18
public class PMFResolverParser extends AbstractResolverParser {
19

    
20
	@Override
21
    public DLIResolvedObject parseObject(final String record) {
22
        try {
23
            final DLIResolvedObject parsedObject = new DLIResolvedObject();
24
            final VTDGen vg = new VTDGen();
25
			vg.setDoc(record.getBytes());
26
			vg.parse(true);
27

    
28
			final VTDNav vn = vg.getNav();
29
			final AutoPilot ap = new AutoPilot(vn);
30

    
31
			ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri");
32

    
33
			final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//dri:resolvedDate");
34
			parsedObject.setResolvedDate(resolvedDate);
35

    
36
			ap.declareXPathNameSpace("oaf", "http://namespace.dnet.eu/oaf");
37
			final List<Node> pid = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:pid", Arrays.asList("type"));
38

    
39
			if (pid != null && pid.size() > 0) {
40
				final String currentPid = pid.get(0).getTextValue();
41
				final String currentPidType = pid.get(0).getAttributes().get("type");
42
				parsedObject.setPid(currentPid);
43
				parsedObject.setPidType(currentPidType);
44
			} else {
45
				return null;
46
			}
47

    
48
			List<Node> collectedFromNodes =
49
					VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:collectedFrom", Arrays.asList("name", "id", "mode", "completionStatus"));
50

    
51
            final List<DLIObjectProvenance> provenances = new ArrayList<>();
52

    
53
			if (collectedFromNodes != null && collectedFromNodes.size() > 0) {
54
				collectedFromNodes.forEach(it -> {
55
                    final DLIObjectProvenance provenance = new DLIObjectProvenance();
56
                    provenance.setDatasourceId(it.getAttributes().get("id"));
57
					provenance.setDatasource(it.getAttributes().get("name"));
58
					provenance.setProvisionMode(it.getAttributes().get("mode"));
59
					provenance.setCompletionStatus(it.getAttributes().get("completionStatus"));
60
					provenances.add(provenance);
61
				});
62
			}
63

    
64
			parsedObject.setDatasourceProvenance(provenances);
65
			parsedObject.setCompletionStatus(VtdUtilityParser.getSingleValue(ap, vn, "//oaf:completionStatus"));
66

    
67
			List<Node> relatedIdentifiers =
68
					VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:relatedIdentifier",
69
							Arrays.asList("relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
70

    
71
			if (relatedIdentifiers != null && relatedIdentifiers.size() > 0) {
72
                final List<DLIObjectRelation> relations = new ArrayList<>();
73
                relatedIdentifiers.forEach(relation -> {
74
					final String currentPid = relation.getTextValue();
75
					final String currentPidType = relation.getAttributes().get("relatedIdentifierType");
76
					final String currentType = relation.getAttributes().get("entityType");
77
                    final DLIObjectRelation currentRelation = new DLIObjectRelation();
78
                    currentRelation.setTargetPID(new PID(currentPid, currentPidType));
79
					currentRelation.setCompletionStatus(CompletionStatus.incomplete.toString());
80
					currentRelation.setSourcePid(parsedObject.getPid());
81
					currentRelation.setRelationSemantics(relation.getAttributes().get("relationType"));
82
					currentRelation.setInverseRelation(relation.getAttributes().get("inverseRelationType"));
83

    
84

    
85
					currentRelation.setTargetType(ObjectType.valueOf(currentType));
86
					if (parsedObject.getDatasourceProvenance() != null && parsedObject.getDatasourceProvenance().size() > 0) {
87
                        final DLIObjectProvenance provenance = parsedObject.getDatasourceProvenance().get(0);
88

    
89
                        final DLIObjectProvenance newProvenance = new DLIObjectProvenance();
90
                        newProvenance.setCompletionStatus(CompletionStatus.incomplete.toString());
91
						newProvenance.setDatasourceId(provenance.getDatasourceId());
92
						newProvenance.setDatasource(provenance.getDatasource());
93
						newProvenance.setProvisionMode(provenance.getProvisionMode());
94
						currentRelation.setRelationProvenance(Arrays.asList(newProvenance));
95
					}
96

    
97
					relations.add(currentRelation);
98
				});
99
				parsedObject.setRelations(relations);
100
			}
101

    
102
			ap.declareXPathNameSpace("dc", "http://purl.org/dc/elements/1.1/");
103
			final List<String> authorsNode = VtdUtilityParser.getTextValue(ap, vn, "//dc:creator");
104
			parsedObject.setAuthors(authorsNode);
105
			parsedObject.setTitles(VtdUtilityParser.getTextValue(ap, vn, "//dc:title"));
106

    
107
			parsedObject.setDescription(VtdUtilityParser.getSingleValue(ap, vn, "//dc:description"));
108
			parsedObject.setDate(VtdUtilityParser.getSingleValue(ap, vn, "//dc:date"));
109
			List<Node> subjects = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//dc:subject", Arrays.asList("scheme"));
110
			if (subjects != null && subjects.size() > 0) {
111
				final List<SubjectType> currentSubjects = new ArrayList<>();
112
				subjects.forEach(it -> {
113

    
114
					String scheme = it.getAttributes().get("scheme");
115
					if (scheme == null)
116
						scheme = "unknown";
117
					currentSubjects.add(new SubjectType(scheme, it.getTextValue()));
118
				});
119

    
120
				parsedObject.setSubjects(currentSubjects);
121
			}
122

    
123
			setType(parsedObject, VtdUtilityParser.getSingleValue(ap, vn, "//dc:type"));
124

    
125
			return parsedObject;
126
		} catch (Throwable e) {
127
			log.debug("Input record: " + record);
128
			log.error("Error on parsing record ", e);
129
			return null;
130
		}
131

    
132
	}
133
}
(4-4/5)