Project

General

Profile

1 44356 claudio.at
package eu.dnetlib.dli.resolver;
2
3
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.List;
6
7
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDGen;
9
import com.ximpleware.VTDNav;
10 45448 claudio.at
import eu.dnetlib.data.transform.VtdUtilityParser;
11
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
12 49184 sandro.lab
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
13 49288 sandro.lab
import eu.dnetlib.pid.resolver.model.SubjectType;
14 44356 claudio.at
import eu.dnetlib.resolver.parser.AbstractResolverParser;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
18
/**
19
 * Created by sandro on 10/3/16.
20
 */
21
public class OpenAireParser extends AbstractResolverParser {
22
23
	private static final Log log = LogFactory.getLog(OpenAireParser.class);
24
25
	@Override
26 49184 sandro.lab
    public DLIResolvedObject parseObject(final String record) {
27
        try {
28
            final DLIResolvedObject currentObject = new DLIResolvedObject();
29
            final VTDGen vg = new VTDGen();
30 44356 claudio.at
			vg.setDoc(record.getBytes());
31
			vg.parse(true);
32
33
			final VTDNav vn = vg.getNav();
34
			final AutoPilot ap = new AutoPilot(vn);
35
36
			ap.declareXPathNameSpace("oaf", "http://namespace.openaire.eu/oaf");
37
38
			final List<Node> subjectNodes =
39 45448 claudio.at
					VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:result//subject", Arrays.asList("classname", "schemename"));
40 44356 claudio.at
41
			//Setting subjects
42
			if (subjectNodes != null && subjectNodes.size() > 0) {
43
				final List<SubjectType> currentSubjects = new ArrayList<>();
44
45
				subjectNodes.forEach(it ->
46
						currentSubjects.add(new SubjectType(it.getAttributes().get("schemename"), it.getTextValue()))
47
				);
48
				currentObject.setSubjects(currentSubjects);
49
			}
50
51
			//Setting Titles
52 45448 claudio.at
			final List<String> titles = VtdUtilityParser.getTextValue(ap, vn, "//oaf:result/title");
53 44356 claudio.at
54
			if (titles != null && titles.size() > 0) {
55
				currentObject.setTitles(titles);
56
			}
57
58
			//Setting authors
59 45448 claudio.at
			final List<String> authorNodes = VtdUtilityParser.getTextValue(ap, vn, "//oaf:result//fullname");
60 44356 claudio.at
			if (authorNodes != null && authorNodes.size() > 0) {
61
				currentObject.setAuthors(authorNodes);
62
			}
63
64
			//Setting descriptions
65 45448 claudio.at
			final List<String> descriptions = VtdUtilityParser.getTextValue(ap, vn, "//oaf:result//description");
66 44356 claudio.at
			if (descriptions != null && descriptions.size() > 0) {
67
				currentObject.setDescription(descriptions.get(0));
68
			}
69
70
			//resulttype classid
71 45448 claudio.at
			final List<Node> resutlTypes = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:result/resulttype", Arrays.asList("classid"));
72 44356 claudio.at
			if (resutlTypes != null && resutlTypes.size() > 0) {
73
				final String type = resutlTypes.get(0).getAttributes().get("classid");
74
				setType(currentObject, type);
75
			}
76
77
			return currentObject;
78
		} catch (Throwable e) {
79
			log.error("Error on parsing object ", e);
80
			return null;
81
		}
82
	}
83
84
}