Project

General

Profile

1 49384 sandro.lab
package eu.dnetlib.wds.resolver;
2
3
import com.ximpleware.AutoPilot;
4
import com.ximpleware.VTDGen;
5
import com.ximpleware.VTDNav;
6
import eu.dnetlib.data.transform.VtdUtilityParser;
7
import eu.dnetlib.miscutils.collections.Pair;
8
import eu.dnetlib.pid.resolver.model.SubjectType;
9
import eu.dnetlib.pid.resolver.parser.AbstractResolverParser;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
13
import java.util.ArrayList;
14
import java.util.Arrays;
15
import java.util.List;
16
17
18
/**
19
 * Created by sandro on 10/3/16.
20
 */
21
public class OpenAireParser extends AbstractResolverParser {
22
23
    private static final Log log = LogFactory.getLog(OpenAireParser.class);
24
25
    @Override
26
    public WDSResolvedObject parseObject(final String record) {
27
        try {
28
            final WDSResolvedObject currentObject = new WDSResolvedObject();
29
            final VTDGen vg = new VTDGen();
30
            vg.setDoc(record.getBytes());
31
            vg.parse(true);
32
33
            final VTDNav vn = vg.getNav();
34
            final AutoPilot ap = new AutoPilot(vn);
35
36
            ap.declareXPathNameSpace("oaf", "http://namespace.openaire.eu/oaf");
37
38
            final List<VtdUtilityParser.Node> subjectNodes =
39 49434 sandro.lab
                    VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='result']//*[local-name()='subject']", Arrays.asList("classname", "schemename"));
40 49384 sandro.lab
41
            //Setting subjects
42
            if (subjectNodes != null && subjectNodes.size() > 0) {
43
                final List<SubjectType> currentSubjects = new ArrayList<>();
44
45
                subjectNodes.forEach(it ->
46
                        currentSubjects.add(new SubjectType(it.getAttributes().get("schemename"), it.getTextValue()))
47
                );
48
                currentObject.setSubjects(currentSubjects);
49
            }
50
51
            //Setting Titles
52 49434 sandro.lab
            final List<String> titles = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='result']/*[local-name()='title']");
53 49384 sandro.lab
54
            if (titles != null && titles.size() > 0) {
55
                currentObject.setTitles(titles);
56
            }
57
58
            //Setting authors
59 49434 sandro.lab
            final List<String> authorNodes = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='result']//*[local-name()='fullname']");
60 49384 sandro.lab
            if (authorNodes != null && authorNodes.size() > 0) {
61
                currentObject.setAuthors(authorNodes);
62
            }
63
64
            //Setting descriptions
65 49434 sandro.lab
            final List<String> descriptions = VtdUtilityParser.getTextValue(ap, vn, "//*[local-name()='result']//*[local-name()='description']");
66 49384 sandro.lab
            if (descriptions != null && descriptions.size() > 0) {
67
                descriptions.forEach(d -> currentObject.addDescription(new Pair<>("unknown", d)));
68
            }
69
70
            //resulttype classid
71 49434 sandro.lab
            final List<VtdUtilityParser.Node> resutlTypes = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, "//*[local-name()='result']/*[local-name()='resulttype']", Arrays.asList("classid"));
72 49384 sandro.lab
            if (resutlTypes != null && resutlTypes.size() > 0) {
73
                final String type = resutlTypes.get(0).getAttributes().get("classid");
74
                setType(currentObject, type);
75
            }
76
77 49434 sandro.lab
            ap.selectXPath("//*[local-name()='rel'][./to/@type='project']");
78 49384 sandro.lab
            while (ap.evalXPath() != -1) {
79
                final List<String> projectTitles = VtdUtilityParser.getTextValue(ap, vn, "./title");
80
                final List<String> projectAcronym = VtdUtilityParser.getTextValue(ap, vn, "./acronym");
81
                final List<String> projectCodes = VtdUtilityParser.getTextValue(ap, vn, "./code");
82
                final List<VtdUtilityParser.Node> funderNames = VtdUtilityParser.getTextValuesWithAttributes(ap, vn, ".//funder", Arrays.asList("name"));
83
84
                final List<String> projectfundings = VtdUtilityParser.getTextValue(ap, vn, ".//*[starts-with(name(), 'funding_level_')]");
85
86
                final String funderName = funderNames != null ? funderNames.stream().findFirst().get().getAttributes().get("name") : "";
87
88
                String funding = projectfundings.size() > 0 ? projectfundings.get(projectfundings.size() - 1) : "";
89
                funding = funding.length() > 2 ? funding.substring(funding.indexOf("::") + 2) : "";
90
91 49432 sandro.lab
                currentObject.addProject(new Project().setAcronym((projectAcronym!=null && projectAcronym.size()>0) ?projectAcronym.get(0):"")
92 49384 sandro.lab
                        .setFundingStream(funding)
93
                        .setFunder(funderName)
94
                        .setGrantID(projectCodes.get(0))
95
                        .setName(projectTitles.get(0)));
96
            }
97 49434 sandro.lab
                return currentObject;
98 49384 sandro.lab
        } catch (Throwable e) {
99
            log.error("Error on parsing object ", e);
100
            return null;
101
        }
102
    }
103
104
}