Project

General

Profile

1 43783 sandro.lab
package eu.dnetlib.resolver.parser;
2
3 43939 sandro.lab
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.List;
6 43783 sandro.lab
7 43939 sandro.lab
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDGen;
9
import com.ximpleware.VTDNav;
10 44356 claudio.at
import eu.dnetlib.dli.resolver.model.*;
11 45451 sandro.lab
import eu.dnetlib.data.transform.VtdUtilityParser;
12
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
13 49332 sandro.lab
import eu.dnetlib.pid.resolver.model.*;
14
import eu.dnetlib.pid.resolver.parser.AbstractResolverParser;
15 43783 sandro.lab
16
/**
17
 * Created by sandro on 9/13/16.
18
 */
19
public class PMFResolverParser extends AbstractResolverParser {
20
21
	@Override
22 49184 sandro.lab
    public DLIResolvedObject parseObject(final String record) {
23
        try {
24
            final DLIResolvedObject parsedObject = new DLIResolvedObject();
25
            final VTDGen vg = new VTDGen();
26 43939 sandro.lab
			vg.setDoc(record.getBytes());
27
			vg.parse(true);
28 43783 sandro.lab
29 43939 sandro.lab
			final VTDNav vn = vg.getNav();
30
			final AutoPilot ap = new AutoPilot(vn);
31 43783 sandro.lab
32 45835 sandro.lab
33 49580 sandro.lab
34
			final String resolvedDate = VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='resolvedDate']");
35 45835 sandro.lab
			parsedObject.setResolvedDate(resolvedDate);
36
37 43939 sandro.lab
38 49580 sandro.lab
			final List<Node> pid = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='pid']", Arrays.asList("type"));
39
40 49760 sandro.lab
            if (extractIdentifier(parsedObject, pid, "type")) return null;
41 43939 sandro.lab
42
			List<Node> collectedFromNodes =
43 49580 sandro.lab
					VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='collectedFrom']", Arrays.asList("name", "id", "mode", "completionStatus"));
44 43939 sandro.lab
45 52674 sandro.lab
            List<Node> resolvededFromNodes =
46
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='resolvedFrom']", Arrays.asList("name", "id", "mode", "completionStatus"));
47
48 49332 sandro.lab
            final List<ObjectProvenance> provenances = new ArrayList<>();
49 43939 sandro.lab
50
			if (collectedFromNodes != null && collectedFromNodes.size() > 0) {
51
				collectedFromNodes.forEach(it -> {
52 49184 sandro.lab
                    final DLIObjectProvenance provenance = new DLIObjectProvenance();
53
                    provenance.setDatasourceId(it.getAttributes().get("id"));
54 43939 sandro.lab
					provenance.setDatasource(it.getAttributes().get("name"));
55
					provenance.setProvisionMode(it.getAttributes().get("mode"));
56
					provenance.setCompletionStatus(it.getAttributes().get("completionStatus"));
57
					provenances.add(provenance);
58
				});
59
			}
60
61 52674 sandro.lab
            if (resolvededFromNodes != null && resolvededFromNodes.size() > 0) {
62
                resolvededFromNodes.forEach(it -> {
63
                    final DLIObjectProvenance provenance = new DLIObjectProvenance();
64
                    provenance.setDatasourceId(it.getAttributes().get("id"));
65
                    provenance.setDatasource(it.getAttributes().get("name"));
66
                    provenance.setProvisionMode("resolved");
67
                    provenance.setCompletionStatus(it.getAttributes().get("completionStatus"));
68
                    provenances.add(provenance);
69
                });
70
            }
71
72 43939 sandro.lab
			parsedObject.setDatasourceProvenance(provenances);
73 49580 sandro.lab
			parsedObject.setCompletionStatus(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='completionStatus']"));
74 43939 sandro.lab
75
			List<Node> relatedIdentifiers =
76 49580 sandro.lab
					VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='relatedIdentifier']",
77 45425 sandro.lab
							Arrays.asList("relatedIdentifierType", "relationType", "entityType", "inverseRelationType"));
78 43939 sandro.lab
79
			if (relatedIdentifiers != null && relatedIdentifiers.size() > 0) {
80 49216 sandro.lab
				final List<ObjectRelation> relations = new ArrayList<>();
81
				relatedIdentifiers.forEach(relation -> {
82 44352 sandro.lab
					final String currentPid = relation.getTextValue();
83
					final String currentPidType = relation.getAttributes().get("relatedIdentifierType");
84
					final String currentType = relation.getAttributes().get("entityType");
85 49184 sandro.lab
                    final DLIObjectRelation currentRelation = new DLIObjectRelation();
86
                    currentRelation.setTargetPID(new PID(currentPid, currentPidType));
87 45451 sandro.lab
					currentRelation.setCompletionStatus(CompletionStatus.incomplete.toString());
88
					currentRelation.setSourcePid(parsedObject.getPid());
89
					currentRelation.setRelationSemantics(relation.getAttributes().get("relationType"));
90
					currentRelation.setInverseRelation(relation.getAttributes().get("inverseRelationType"));
91 45425 sandro.lab
92
93 45451 sandro.lab
					currentRelation.setTargetType(ObjectType.valueOf(currentType));
94 43939 sandro.lab
					if (parsedObject.getDatasourceProvenance() != null && parsedObject.getDatasourceProvenance().size() > 0) {
95 49332 sandro.lab
                        final DLIObjectProvenance provenance = (DLIObjectProvenance) parsedObject.getDatasourceProvenance().get(0);
96 43939 sandro.lab
97 49184 sandro.lab
                        final DLIObjectProvenance newProvenance = new DLIObjectProvenance();
98
                        newProvenance.setCompletionStatus(CompletionStatus.incomplete.toString());
99 43939 sandro.lab
						newProvenance.setDatasourceId(provenance.getDatasourceId());
100
						newProvenance.setDatasource(provenance.getDatasource());
101
						newProvenance.setProvisionMode(provenance.getProvisionMode());
102 45451 sandro.lab
						currentRelation.setRelationProvenance(Arrays.asList(newProvenance));
103 43939 sandro.lab
					}
104
105 45451 sandro.lab
					relations.add(currentRelation);
106 43939 sandro.lab
				});
107
				parsedObject.setRelations(relations);
108
			}
109
110 49580 sandro.lab
111
			final List<String> authorsNode = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='creator']");
112 43939 sandro.lab
			parsedObject.setAuthors(authorsNode);
113 49580 sandro.lab
			parsedObject.setTitles(VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='title']"));
114 43939 sandro.lab
115 49580 sandro.lab
			parsedObject.setDescription(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='description']"));
116
			parsedObject.setDate(VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='date']"));
117
			List<Node> subjects = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='subject']", Arrays.asList("scheme"));
118 49332 sandro.lab
            extractSubject(parsedObject, subjects);
119 43939 sandro.lab
120
121 49580 sandro.lab
			setType(parsedObject, VtdUtilityParser.getSingleValue(ap, vn, "//*[local-name()='type']"));
122 43939 sandro.lab
123
			return parsedObject;
124 43783 sandro.lab
		} catch (Throwable e) {
125 54390 sandro.lab
			log.error("Input record: " + record);
126 43783 sandro.lab
			log.error("Error on parsing record ", e);
127
			return null;
128
		}
129
130
	}
131
}