Project

General

Profile

1 44356 claudio.at
package eu.dnetlib.dli.resolver;
2
3
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.List;
6
7
import com.ximpleware.AutoPilot;
8
import com.ximpleware.VTDGen;
9
import com.ximpleware.VTDNav;
10 45448 claudio.at
import eu.dnetlib.data.transform.VtdUtilityParser;
11
import eu.dnetlib.data.transform.VtdUtilityParser.Node;
12 49184 sandro.lab
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
13 49288 sandro.lab
import eu.dnetlib.pid.resolver.model.SubjectType;
14 49332 sandro.lab
15
import eu.dnetlib.pid.resolver.parser.AbstractResolverParser;
16 44356 claudio.at
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18
19
/**
20
 * Created by sandro on 10/3/16.
21
 */
22
public class OpenAireParser extends AbstractResolverParser {
23
24
	private static final Log log = LogFactory.getLog(OpenAireParser.class);
25
26
	@Override
27 49184 sandro.lab
    public DLIResolvedObject parseObject(final String record) {
28
        try {
29
            final DLIResolvedObject currentObject = new DLIResolvedObject();
30
            final VTDGen vg = new VTDGen();
31 44356 claudio.at
			vg.setDoc(record.getBytes());
32
			vg.parse(true);
33
34
			final VTDNav vn = vg.getNav();
35
			final AutoPilot ap = new AutoPilot(vn);
36
37
			ap.declareXPathNameSpace("oaf", "http://namespace.openaire.eu/oaf");
38
39
			final List<Node> subjectNodes =
40 45448 claudio.at
					VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:result//subject", Arrays.asList("classname", "schemename"));
41 44356 claudio.at
42
			//Setting subjects
43
			if (subjectNodes != null && subjectNodes.size() > 0) {
44
				final List<SubjectType> currentSubjects = new ArrayList<>();
45
46
				subjectNodes.forEach(it ->
47
						currentSubjects.add(new SubjectType(it.getAttributes().get("schemename"), it.getTextValue()))
48
				);
49
				currentObject.setSubjects(currentSubjects);
50
			}
51
52
			//Setting Titles
53 45448 claudio.at
			final List<String> titles = VtdUtilityParser.getTextValue(ap, vn, "//oaf:result/title");
54 44356 claudio.at
55
			if (titles != null && titles.size() > 0) {
56
				currentObject.setTitles(titles);
57
			}
58
59
			//Setting authors
60 45448 claudio.at
			final List<String> authorNodes = VtdUtilityParser.getTextValue(ap, vn, "//oaf:result//fullname");
61 44356 claudio.at
			if (authorNodes != null && authorNodes.size() > 0) {
62
				currentObject.setAuthors(authorNodes);
63
			}
64
65
			//Setting descriptions
66 45448 claudio.at
			final List<String> descriptions = VtdUtilityParser.getTextValue(ap, vn, "//oaf:result//description");
67 44356 claudio.at
			if (descriptions != null && descriptions.size() > 0) {
68
				currentObject.setDescription(descriptions.get(0));
69
			}
70
71
			//resulttype classid
72 45448 claudio.at
			final List<Node> resutlTypes = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//oaf:result/resulttype", Arrays.asList("classid"));
73 44356 claudio.at
			if (resutlTypes != null && resutlTypes.size() > 0) {
74
				final String type = resutlTypes.get(0).getAttributes().get("classid");
75
				setType(currentObject, type);
76
			}
77
78
			return currentObject;
79
		} catch (Throwable e) {
80
			log.error("Error on parsing object ", e);
81
			return null;
82
		}
83
	}
84
85
}