Project

General

Profile

1
import logging
2
import re
3

    
4
import libxml2
5

    
6
p = re.compile("((https?):((//)|(\\\\))+([\w\d:#@%/;$()~_?\+-=\\\.&](#!)?)*)")
7
pattern = re.compile(r'\s+')
8
log = logging.getLogger('dli')
9

    
10

    
11
class DLIRelation(object):
12
    def __init__(self, node, ctx):
13

    
14
        ctx.setContextNode(node)
15
        nodes = ctx.xpathEval("./*[local-name()='dnetIdentifier']")
16
        for entity in nodes:
17
            self.relatedDnetId = re.sub(pattern, ' ', entity.content)
18

    
19
        nodes = ctx.xpathEval("./*[local-name()='entitytype']")
20
        for entity in nodes:
21
            self.relatedEntityType = re.sub(pattern, ' ', entity.content)
22

    
23
        nodes = ctx.xpathEval("./*[local-name()='typeOfRelation']")
24
        for entity in nodes:
25
            self.typeOfRelation = re.sub(pattern, ' ', entity.content)
26

    
27
        nodes = ctx.xpathEval("./*[local-name()='title']")
28
        for entity in nodes:
29
            self.related_title = re.sub(pattern, ' ', entity.content)
30

    
31
        nodes = ctx.xpathEval("./*[local-name()='pid']")
32
        for entity in nodes:
33
            self.targetPID = re.sub(pattern, ' ', entity.content)
34
            for property in entity.properties:
35
                if property.name == 'type':
36
                    self.targetPIDType = property.content
37
        self.authors = []
38
        nodes = ctx.xpathEval(".//*[local-name()='author']")
39
        for entity in nodes:
40
            self.authors.append(re.sub(pattern, ' ', entity.content).strip())
41
        self.relation_provenance = []
42

    
43
        nodes = ctx.xpathEval("./*[local-name()='relationProvenance']/*[local-name()='datasource']")
44
        for entity in nodes:
45
            rel_item = {}
46
            rel_item['name'] = re.sub(pattern, ' ', entity.content).strip()
47
            for property in entity.properties:
48
                if property.name == 'completionStatus':
49
                    rel_item['completionStatus'] = property.content
50
                elif property.name == 'provisionMode':
51
                    rel_item['provisionMode'] = property.content
52
                elif property.name == 'collectionDate':
53
                    rel_item['collectionDate'] = property.content
54
            self.relation_provenance.append(rel_item)
55

    
56

    
57
class DLIObject(object):
58
    def __init__(self, input_xml):
59
        log.debug("CREATED OBJECT")
60
        self.initialize_from_xml(input_xml)
61

    
62
    def _associate_identifier(self, ctxt):
63
        res = ctxt.xpathEval("./*[local-name()='dnetResourceIdentifier']")
64
        for node in res:
65
            self.identifier = node.content
66

    
67
    def initialize_from_xml(self, input_xml):
68
        log.debug("Parsing input %s" % input_xml)
69
        doc = libxml2.parseDoc(input_xml)
70
        ctxt = doc.xpathNewContext()
71
        res = ctxt.xpathEval("//*[local-name()='dliObject']")
72
        if len(res) == 0:
73
            log.error("Unable to create DLI object the dli_object node is null")
74
            return None
75
        dli_object_node = res[0]
76
        ctxt.setContextNode(dli_object_node)
77
        self.identifier = ""
78
        self.pid = ""
79
        self.pid_type = ""
80
        self.resolved_url = ""
81
        self.completionStatus = ""
82
        self.provenance_record = []
83
        self.objectType = "unknown"
84
        self.title = ""
85
        self.date = ""
86
        self.authors = []
87
        self.relations = []
88
        self._associate_identifier(ctxt)
89
        self._associate_local_PID(ctxt)
90
        self._associate_complete_status(ctxt)
91
        self._associate_record_provenance(ctxt)
92
        ctxt.setContextNode(dli_object_node)
93
        self._associate_type(ctxt)
94
        self._associate_title(ctxt)
95
        self._associate_date(ctxt)
96
        self._associate_authors(ctxt)
97
        ctxt.setContextNode(dli_object_node)
98
        self._associate_relations(ctxt, doc.xpathNewContext())
99
        doc.freeDoc()
100
        ctxt.xpathFreeContext()
101

    
102
    def _associate_local_PID(self, ctxt):
103
        res = ctxt.xpathEval("./*[local-name()='originalIdentifier']")
104
        for node in res:
105
            self.pid = node.content.strip()
106
            for prop in node.properties:
107
                if prop.name == "type":
108
                    self.pid_type = prop.content
109
                elif prop.name == "resolvedUrl":
110
                    self.resolved_url = prop.content
111
                if self.resolved_url == "#":
112
                    if p.match(self.pid):
113
                        self.resolved_url = self.pid
114

    
115
    def _associate_complete_status(self, ctxt):
116
        res = ctxt.xpathEval("./*[local-name()='completionStatus']")
117
        for node in res:
118
            self.completionStatus = node.content.strip()
119

    
120
    def _associate_record_provenance(self, ctxt):
121
        res = ctxt.xpathEval("./*[local-name()='provenance']")
122
        for node in res:
123
            ctxt.setContextNode(node)
124
            datasourcesInfo = ctxt.xpathEval(".//*[local-name()='datasourceInfo']")
125
            for datasourceInfo in datasourcesInfo:
126
                ctxt.setContextNode(datasourceInfo)
127
                datasources = ctxt.xpathEval("./*[local-name()='datasource']")
128
                for datasource in datasources:
129
                    item = {}
130
                    item['name'] = datasource.content.strip()
131
                    item['name'] = re.sub(pattern, ' ', item['name'])
132
                    for property in datasource.properties:
133
                        if property.name == "completionStatus":
134
                            item['completionStatus'] = property.content.strip()
135
                        elif property.name == "provisionMode":
136
                            item['provisionMode'] = property.content.strip()
137

    
138
                ctxt.setContextNode(node)
139
                collection_date_nodes = ctxt.xpathEval(".//*[local-name()='collectionDate']")
140
                for coll_node in collection_date_nodes:
141
                    item['collectionDate'] = coll_node.content
142
                self.provenance_record.append(item)
143

    
144
    def _associate_type(self, ctxt):
145
        res = ctxt.xpathEval("./*[local-name()='objectType']")
146
        for node in res:
147
            self.objectType = node.content.strip()
148
        if self.objectType == "":
149
            self.objectType = "unknown"
150

    
151
    def _associate_title(self, ctxt):
152
        res = ctxt.xpathEval("./*[local-name()='title']")
153
        for node in res:
154
            self.title = node.content.strip()
155
            self.title = re.sub(pattern, ' ', self.title)
156

    
157
    def _associate_date(self, ctxt):
158
        res = ctxt.xpathEval("./*[local-name()='date']")
159
        for node in res:
160
            self.date = node.content.strip()
161

    
162
    def _associate_authors(self, ctxt):
163
        res = ctxt.xpathEval("./*[local-name()='authors']")
164
        for node in res:
165
            ctxt.setContextNode(node)
166
            authors = ctxt.xpathEval("./*[local-name()='author']")
167
            for author in authors:
168
                self.authors.append(re.sub(pattern, ' ', author.content))
169

    
170
    def _associate_relations(self, ctxt, newCtxt):
171
        res = ctxt.xpathEval("//*[local-name()='relation']")
172
        for relation in res:
173
            self.relations.append(DLIRelation(relation, newCtxt))
(2-2/9)