Project

General

Profile

1
package eu.dnetlib.resolver.parser;
2

    
3
import com.google.common.collect.Lists;
4
import com.ximpleware.AutoPilot;
5
import com.ximpleware.VTDGen;
6
import com.ximpleware.VTDNav;
7
import eu.dnetlib.data.transform.VtdUtilityParser;
8
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
9
import eu.dnetlib.dli.DLIUtils;
10
import eu.dnetlib.dli.resolver.model.*;
11
import eu.dnetlib.pid.resolver.model.ObjectType;
12
import org.apache.commons.lang3.StringUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15

    
16
import java.util.ArrayList;
17
import java.util.Arrays;
18
import java.util.List;
19

    
20
/**
21
 * Created by sandro on 9/13/16.
22
 */
23
public class DMFResolverParser extends AbstractResolverParser {
24

    
25
	private static final Log log = LogFactory.getLog(DMFResolverParser.class);
26

    
27
	@Override
28
    public DLIResolvedObject parseObject(final String record) {
29
        try {
30
            final DLIResolvedObject parsedObject = new DLIResolvedObject();
31
            final VTDGen vg = new VTDGen();
32
			vg.setDoc(record.getBytes());
33
			vg.parse(true);
34

    
35
			final VTDNav vn = vg.getNav();
36
			final AutoPilot ap = new AutoPilot(vn);
37
			ap.declareXPathNameSpace("dri", "http://www.driver-repository.eu/namespace/dri");
38

    
39
			final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//dri:resolvedDate");
40
			parsedObject.setResolvedDate(resolvedDate);
41

    
42
			final String datasourcePrefix = VtdUtilityParser.getSingleValue(ap, vn, "//dri:datasourceprefix");
43
			ap.declareXPathNameSpace("oaf", "http://namespace.dnet.eu/oaf");
44

    
45
			final String completionStatus = VtdUtilityParser.getSingleValue(ap, vn, "//oaf:completionStatus");
46
			final String provisionMode = VtdUtilityParser.getSingleValue(ap, vn, "//oaf:provisionMode");
47

    
48
            final String publisher = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resource']/*[local-name()='publisher']");
49

    
50

    
51
            final DLIObjectProvenance provenance = new DLIObjectProvenance();
52
            provenance.setDatasourceId(DLIUtils.getIdFromDataSourcePrefix(datasourcePrefix));
53
            provenance.setDatasource(DLIUtils.getNameFromDataSourcePrefix(datasourcePrefix));
54
            provenance.setCompletionStatus(completionStatus);
55
            if (!StringUtils.isEmpty(publisher))
56
                provenance.setPublisher(publisher);
57
            provenance.setProvisionMode(provisionMode);
58
			parsedObject.setDatasourceProvenance(Lists.newArrayList(provenance));
59

    
60
			ap.declareXPathNameSpace("datacite", "http://datacite.org/schema/kernel-3");
61
			final List<Node> identifierType =
62
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']/*[local-name()='identifier']", Lists.newArrayList("identifierType"));
63

    
64
			if (identifierType != null && identifierType.size() > 0) {
65

    
66
				final Node result = identifierType.get(0);
67
				parsedObject.setPid(result.getTextValue());
68
				parsedObject.setPidType(result.getAttributes().get("identifierType"));
69
			} else {
70
				log.debug("Error on parsing record the identifier should not null ");
71
				return null;
72
			}
73

    
74
			final List<Node> relations =
75
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='relatedIdentifier']", Arrays.asList("relatedIdentifierType", "relationType", "inverseRelationType"));
76

    
77
			if (relations != null && relations.size() > 0) {
78
                final List<DLIObjectRelation> relationsResult = new ArrayList<>();
79
                relations.forEach(relationMap -> {
80
					final String relationType = relationMap.getAttributes().get("relationType");
81
                    final String inverseRelationType = relationMap.getAttributes().get("inverseRelationType");
82
                    final String relatedIdentifierType = relationMap.getAttributes().get("relatedIdentifierType");
83
					final String relatedPid = relationMap.getTextValue();
84
                    final DLIObjectRelation currentRelation = new DLIObjectRelation();
85
                    currentRelation.setTargetPID(new PID(relatedPid, relatedIdentifierType));
86
					currentRelation.setRelationSemantics(relationType);
87
                    currentRelation.setInverseRelation(inverseRelationType);
88
                    currentRelation.setCompletionStatus(CompletionStatus.incomplete.toString());
89
					relationsResult.add(currentRelation);
90
				});
91
				parsedObject.setRelations(relationsResult);
92
			}
93

    
94
            final List<Node> subjects = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='subject']", Arrays.asList("subjectScheme"));
95

    
96
			if (subjects != null && subjects.size() > 0) {
97
				final List<SubjectType> subjectResult = new ArrayList<>();
98
				subjects.forEach(subjectMap -> {
99
					final SubjectType subject = new SubjectType(subjectMap.getAttributes().get("subjectScheme"), subjectMap.getTextValue());
100
					subjectResult.add(subject);
101
				});
102
				parsedObject.setSubjects(subjectResult);
103
			}
104

    
105
			parsedObject.setCompletionStatus(completionStatus);
106

    
107
            final List<String> creators = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']//*[local-name()='creator']/*[local-name()='creatorName']");
108
            if (creators != null && creators.size() > 0) {
109
				parsedObject.setAuthors(creators);
110
			}
111
            final List<String> titles = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']//*[local-name()='title']");
112
            if (titles != null && titles.size() > 0) {
113
				parsedObject.setTitles(titles);
114
			}
115

    
116
            List<Node> resourceTypeGeneral = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']/*[local-name()='resourceType']", Arrays.asList("resourceTypeGeneral"));
117

    
118

    
119
            if (resourceTypeGeneral != null && !resourceTypeGeneral.isEmpty()) {
120
                final String type = resourceTypeGeneral.get(0).getAttributes().get("resourceTypeGeneral");
121
                setType(parsedObject, type);
122
            }
123

    
124
            if (parsedObject.getType() == ObjectType.unknown) {
125

    
126
                final String type = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resource']/*[local-name()='resourceType']");
127
                setType(parsedObject, type);
128
            }
129

    
130

    
131

    
132

    
133

    
134

    
135
            final List<String> dates = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']/*[local-name()='dates']/*[local-name()='date']");
136

    
137
			if (dates != null && dates.size() > 0) {
138
				parsedObject.setDate(dates.get(0));
139
			}
140
			return parsedObject;
141
		} catch (Throwable e) {
142
			log.error("Error on parsing record " + record, e);
143
			return null;
144
		}
145
	}
146

    
147

    
148

    
149
}
150

    
(3-3/5)