Project

General

Profile

1 43783 sandro.lab
package eu.dnetlib.resolver.parser;
2
3
import com.google.common.collect.Lists;
4
import com.ximpleware.AutoPilot;
5
import com.ximpleware.VTDGen;
6
import com.ximpleware.VTDNav;
7 49184 sandro.lab
import eu.dnetlib.data.transform.VtdUtilityParser;
8
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
9 45717 sandro.lab
import eu.dnetlib.dli.DLIUtils;
10 44356 claudio.at
import eu.dnetlib.dli.resolver.model.*;
11 49216 sandro.lab
import eu.dnetlib.pid.resolver.model.ObjectRelation;
12 49184 sandro.lab
import eu.dnetlib.pid.resolver.model.ObjectType;
13 49216 sandro.lab
import eu.dnetlib.pid.resolver.model.PID;
14 49288 sandro.lab
import eu.dnetlib.pid.resolver.model.SubjectType;
15 49332 sandro.lab
import eu.dnetlib.pid.resolver.parser.AbstractResolverParser;
16 46216 sandro.lab
import org.apache.commons.lang3.StringUtils;
17 43783 sandro.lab
import org.apache.commons.logging.Log;
18
import org.apache.commons.logging.LogFactory;
19
20 49184 sandro.lab
import java.util.ArrayList;
21
import java.util.Arrays;
22
import java.util.List;
23
24 43783 sandro.lab
/**
25
 * Created by sandro on 9/13/16.
26
 */
27
public class DMFResolverParser extends AbstractResolverParser {
28
29
	private static final Log log = LogFactory.getLog(DMFResolverParser.class);
30
31
	@Override
32 49184 sandro.lab
    public DLIResolvedObject parseObject(final String record) {
33
        try {
34
            final DLIResolvedObject parsedObject = new DLIResolvedObject();
35
            final VTDGen vg = new VTDGen();
36 43783 sandro.lab
			vg.setDoc(record.getBytes());
37
			vg.parse(true);
38
39
			final VTDNav vn = vg.getNav();
40
			final AutoPilot ap = new AutoPilot(vn);
41
42 49580 sandro.lab
			final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
43 45835 sandro.lab
			parsedObject.setResolvedDate(resolvedDate);
44
45 49580 sandro.lab
			final String datasourcePrefix = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='datasourceprefix']");
46 43783 sandro.lab
47 49580 sandro.lab
			final String completionStatus = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='completionStatus']");
48
			final String provisionMode = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='provisionMode']");
49 43783 sandro.lab
50 46216 sandro.lab
            final String publisher = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resource']/*[local-name()='publisher']");
51
52
53 49184 sandro.lab
            final DLIObjectProvenance provenance = new DLIObjectProvenance();
54 45717 sandro.lab
            provenance.setDatasourceId(DLIUtils.getIdFromDataSourcePrefix(datasourcePrefix));
55
            provenance.setDatasource(DLIUtils.getNameFromDataSourcePrefix(datasourcePrefix));
56
            provenance.setCompletionStatus(completionStatus);
57 46216 sandro.lab
            if (!StringUtils.isEmpty(publisher))
58
                provenance.setPublisher(publisher);
59
            provenance.setProvisionMode(provisionMode);
60 43783 sandro.lab
			parsedObject.setDatasourceProvenance(Lists.newArrayList(provenance));
61
62 49580 sandro.lab
63 51327 sandro.lab
64
65
66 43902 sandro.lab
			final List<Node> identifierType =
67 45628 sandro.lab
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']/*[local-name()='identifier']", Lists.newArrayList("identifierType"));
68 43783 sandro.lab
69 49332 sandro.lab
            if (extractIdentifier(parsedObject, identifierType)) return null;
70 43783 sandro.lab
71 51327 sandro.lab
72
            List<String> descs = VtdUtilityParser.getTextValue(ap, vn,"//*[local-name()='description']" );
73
            if (descs!=null && descs.size()>0)
74
            parsedObject.setDescription(descs.get(0));
75
76
77 43902 sandro.lab
			final List<Node> relations =
78 54390 sandro.lab
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='relatedIdentifier']", Arrays.asList("relatedIdentifierType", "relationType", "inverseRelationType", "entityType"));
79 43783 sandro.lab
80
			if (relations != null && relations.size() > 0) {
81 49216 sandro.lab
				final List<ObjectRelation> relationsResult = new ArrayList<>();
82
				relations.forEach(relationMap -> {
83 43902 sandro.lab
					final String relationType = relationMap.getAttributes().get("relationType");
84 45425 sandro.lab
                    final String inverseRelationType = relationMap.getAttributes().get("inverseRelationType");
85
                    final String relatedIdentifierType = relationMap.getAttributes().get("relatedIdentifierType");
86 54390 sandro.lab
                    final String relatedEntityType = relationMap.getAttributes().get("entityType");
87 43902 sandro.lab
					final String relatedPid = relationMap.getTextValue();
88 49184 sandro.lab
                    final DLIObjectRelation currentRelation = new DLIObjectRelation();
89 54390 sandro.lab
                    currentRelation.setTargetType(ObjectType.valueOf(relatedEntityType));
90 49184 sandro.lab
                    currentRelation.setTargetPID(new PID(relatedPid, relatedIdentifierType));
91 43783 sandro.lab
					currentRelation.setRelationSemantics(relationType);
92 45425 sandro.lab
                    currentRelation.setInverseRelation(inverseRelationType);
93
                    currentRelation.setCompletionStatus(CompletionStatus.incomplete.toString());
94 51143 sandro.lab
					if (!StringUtils.isBlank(currentRelation.getTargetPID().getId()))
95
						relationsResult.add(currentRelation);
96 43783 sandro.lab
				});
97
				parsedObject.setRelations(relationsResult);
98
			}
99
100 45628 sandro.lab
            final List<Node> subjects = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']//*[local-name()='subject']", Arrays.asList("subjectScheme"));
101 43783 sandro.lab
102 49332 sandro.lab
            extractSubject(parsedObject, subjects);
103 43783 sandro.lab
104
			parsedObject.setCompletionStatus(completionStatus);
105
106 45628 sandro.lab
            final List<String> creators = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']//*[local-name()='creator']/*[local-name()='creatorName']");
107
            if (creators != null && creators.size() > 0) {
108 43783 sandro.lab
				parsedObject.setAuthors(creators);
109
			}
110 45628 sandro.lab
            final List<String> titles = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']//*[local-name()='title']");
111
            if (titles != null && titles.size() > 0) {
112 43783 sandro.lab
				parsedObject.setTitles(titles);
113
			}
114
115 45943 sandro.lab
            List<Node> resourceTypeGeneral = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resource']/*[local-name()='resourceType']", Arrays.asList("resourceTypeGeneral"));
116 43783 sandro.lab
117 45943 sandro.lab
118
            if (resourceTypeGeneral != null && !resourceTypeGeneral.isEmpty()) {
119
                final String type = resourceTypeGeneral.get(0).getAttributes().get("resourceTypeGeneral");
120
                setType(parsedObject, type);
121
            }
122
123
            if (parsedObject.getType() == ObjectType.unknown) {
124
125
                final String type = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resource']/*[local-name()='resourceType']");
126
                setType(parsedObject, type);
127
            }
128 49332 sandro.lab
            final List<String> dates = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='resource']/*[local-name()='dates']/*[local-name()='date']");
129 45943 sandro.lab
130
131 49332 sandro.lab
            if (dates != null && dates.size() > 0) {
132 43783 sandro.lab
				parsedObject.setDate(dates.get(0));
133
			}
134
			return parsedObject;
135
		} catch (Throwable e) {
136
			log.error("Error on parsing record " + record, e);
137
			return null;
138
		}
139
	}
140
141
142
}