1 |
52209
|
claudio.at
|
package eu.dnetlib.data.transform.xml2;
|
2 |
51942
|
claudio.at
|
|
3 |
|
|
import java.util.List;
|
4 |
|
|
import java.util.Map;
|
5 |
52041
|
claudio.at
|
import java.util.Map.Entry;
|
6 |
52048
|
claudio.at
|
import java.util.Objects;
|
7 |
52041
|
claudio.at
|
import java.util.function.Function;
|
8 |
51942
|
claudio.at
|
|
9 |
52041
|
claudio.at
|
import com.google.common.collect.Streams;
|
10 |
|
|
import com.google.protobuf.Descriptors.Descriptor;
|
11 |
51942
|
claudio.at
|
import com.ximpleware.AutoPilot;
|
12 |
52041
|
claudio.at
|
import com.ximpleware.VTDGen;
|
13 |
51942
|
claudio.at
|
import com.ximpleware.VTDNav;
|
14 |
52041
|
claudio.at
|
import eu.dnetlib.data.proto.FieldTypeProtos;
|
15 |
51953
|
claudio.at
|
import eu.dnetlib.data.proto.FieldTypeProtos.*;
|
16 |
51942
|
claudio.at
|
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription;
|
17 |
52041
|
claudio.at
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
18 |
51953
|
claudio.at
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
19 |
|
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
20 |
52041
|
claudio.at
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
21 |
|
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
22 |
|
|
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
|
23 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result;
|
24 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result.*;
|
25 |
|
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
26 |
51944
|
sandro.lab
|
import eu.dnetlib.miscutils.collections.Pair;
|
27 |
52041
|
claudio.at
|
import eu.dnetlib.pace.model.Person;
|
28 |
51945
|
sandro.lab
|
import org.apache.commons.lang3.StringUtils;
|
29 |
52041
|
claudio.at
|
import org.apache.commons.lang3.exception.ExceptionUtils;
|
30 |
|
|
import org.apache.commons.logging.Log;
|
31 |
|
|
import org.apache.commons.logging.LogFactory;
|
32 |
51942
|
claudio.at
|
|
33 |
52041
|
claudio.at
|
import static eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions.oafSimpleId;
|
34 |
|
|
import static eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions.oafSplitId;
|
35 |
52209
|
claudio.at
|
import static eu.dnetlib.data.transform.xml2.Utils.*;
|
36 |
|
|
import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.*;
|
37 |
52041
|
claudio.at
|
import static java.lang.String.format;
|
38 |
51942
|
claudio.at
|
|
39 |
52048
|
claudio.at
|
public abstract class AbstractResultVtdParser implements Function<String, Oaf> {
|
40 |
51942
|
claudio.at
|
|
41 |
52048
|
claudio.at
|
private static final Log log = LogFactory.getLog(AbstractResultVtdParser.class);
|
42 |
52041
|
claudio.at
|
|
43 |
51953
|
claudio.at
|
protected boolean invisible = false;
|
44 |
52041
|
claudio.at
|
protected String provenance = "";
|
45 |
51953
|
claudio.at
|
protected String trust = "0.9";
|
46 |
|
|
|
47 |
52209
|
claudio.at
|
protected SpecificationMap specs;
|
48 |
51974
|
claudio.at
|
|
49 |
52209
|
claudio.at
|
public AbstractResultVtdParser(final Map<String, String> fields) {
|
50 |
|
|
this.specs = buildSpecs(fields);
|
51 |
|
|
}
|
52 |
|
|
|
53 |
|
|
public AbstractResultVtdParser(final boolean invisible, final String provenance, final String trust, final Map<String, String> fields) {
|
54 |
|
|
this(fields);
|
55 |
51953
|
claudio.at
|
this.invisible = invisible;
|
56 |
|
|
this.provenance = provenance;
|
57 |
|
|
this.trust = trust;
|
58 |
|
|
}
|
59 |
|
|
|
60 |
52209
|
claudio.at
|
protected abstract String getResulttype(final String cobjcategory);
|
61 |
|
|
|
62 |
52041
|
claudio.at
|
@Override
|
63 |
|
|
public Oaf apply(final String xml) {
|
64 |
|
|
try {
|
65 |
|
|
final VTDGen vg = parseXml(xml);
|
66 |
|
|
final VTDNav vn = vg.getNav();
|
67 |
|
|
final AutoPilot ap = new AutoPilot(vn);
|
68 |
51942
|
claudio.at
|
|
69 |
52041
|
claudio.at
|
final boolean skiprecord = Boolean.valueOf(getFirstValue(ap, vn, xpath("record", "header", "skipRecord")));
|
70 |
|
|
int metadata = countNodes(ap, vn, format("count(%s)", xpath("record", "metadata")));
|
71 |
51942
|
claudio.at
|
|
72 |
52041
|
claudio.at
|
if (metadata == 0 || skiprecord) {
|
73 |
|
|
return null;
|
74 |
|
|
}
|
75 |
51942
|
claudio.at
|
|
76 |
52041
|
claudio.at
|
final String objIdentifier = oafSimpleId(Type.result.name(), getFirstValue(ap, vn, xpath("record", "header", "objIdentifier")));
|
77 |
|
|
if (StringUtils.isBlank(objIdentifier)) {
|
78 |
|
|
return null;
|
79 |
|
|
}
|
80 |
51942
|
claudio.at
|
|
81 |
52209
|
claudio.at
|
for(final Entry<Descriptor, SpecificationDescriptor> spec : specs.entrySet()) {
|
82 |
|
|
final Descriptor d = spec.getKey();
|
83 |
|
|
final SpecificationDescriptor md = spec.getValue();
|
84 |
|
|
|
85 |
|
|
for(Entry<String, Pair<String, Function<List<Node>, Object>>> entry : md.getFields().entrySet()) {
|
86 |
|
|
final String fieldName = entry.getKey();
|
87 |
|
|
final Pair<String, Function<List<Node>, Object>> pair = entry.getValue();
|
88 |
|
|
final String xpath = pair.getKey();
|
89 |
|
|
final Function<List<Node>, Object> function = pair.getValue();
|
90 |
|
|
try {
|
91 |
|
|
addField(md.getBuilder(), d.findFieldByName(fieldName), function.apply(getNodes(ap, vn, xpath)));
|
92 |
|
|
} catch (Throwable e) {
|
93 |
|
|
throw new VtdException(String.format("Error mapping field '%s' from xpath '%s' for record '%s'", fieldName, xpath, objIdentifier), e);
|
94 |
|
|
}
|
95 |
|
|
}
|
96 |
|
|
}
|
97 |
|
|
|
98 |
|
|
return Oaf.newBuilder()
|
99 |
|
|
.setKind(Kind.entity)
|
100 |
|
|
.setDataInfo(ensureDataInfo(ap, vn, DataInfo.newBuilder()))
|
101 |
|
|
.setEntity(((OafEntity.Builder) specs.get(OafEntity.getDescriptor())
|
102 |
|
|
.getBuilder()
|
103 |
|
|
.setField(
|
104 |
|
|
OafEntity.getDescriptor().findFieldByName(Type.result.name()),
|
105 |
|
|
((Result.Builder) specs.get(Result.getDescriptor()).getBuilder())
|
106 |
|
|
.setMetadata((Metadata) specs.get(Metadata.getDescriptor()).getBuilder().build())
|
107 |
|
|
.addInstance((Instance) specs.get(Instance.getDescriptor()).getBuilder().build())
|
108 |
|
|
.build()))
|
109 |
|
|
.setId(objIdentifier)
|
110 |
|
|
.setOaiprovenance(getOaiProvenance(ap, vn))
|
111 |
|
|
.build())
|
112 |
|
|
.build();
|
113 |
52041
|
claudio.at
|
} catch (Throwable e) {
|
114 |
52062
|
claudio.at
|
log.error(xml);
|
115 |
52041
|
claudio.at
|
log.error(ExceptionUtils.getStackTrace(e));
|
116 |
|
|
return null;
|
117 |
|
|
}
|
118 |
51942
|
claudio.at
|
}
|
119 |
|
|
|
120 |
52209
|
claudio.at
|
public SpecificationMap buildSpecs(final Map<String, String> fields) {
|
121 |
|
|
final SpecificationMap specs = new SpecificationMap();
|
122 |
52041
|
claudio.at
|
|
123 |
|
|
specs.put(Result.getDescriptor(), SpecificationDescriptor.newInstance())
|
124 |
|
|
.setBuilder(Result.newBuilder())
|
125 |
|
|
.put("externalReference", fields.get("externalReference"), nodes -> nodes.stream()
|
126 |
52209
|
claudio.at
|
.map(node -> {
|
127 |
|
|
final ExternalReference.Builder extref = ExternalReference.newBuilder();
|
128 |
|
|
if (StringUtils.isNotBlank(node.getTextValue())) {
|
129 |
|
|
extref.setUrl(node.getTextValue());
|
130 |
|
|
}
|
131 |
|
|
final Map<String, String> a = node.getAttributes();
|
132 |
|
|
final String source = a.get("source");
|
133 |
|
|
if (StringUtils.isNotBlank(source)) {
|
134 |
|
|
extref.setSitename(source);
|
135 |
|
|
}
|
136 |
|
|
final String identifier = a.get("identifier");
|
137 |
|
|
if (StringUtils.isNotBlank(identifier)) {
|
138 |
|
|
extref.setRefidentifier(identifier);
|
139 |
|
|
}
|
140 |
|
|
final String title = a.get("title");
|
141 |
|
|
if (StringUtils.isNotBlank(title)) {
|
142 |
|
|
extref.setLabel(title);
|
143 |
|
|
}
|
144 |
|
|
final String query = a.get("query");
|
145 |
|
|
if (StringUtils.isNotBlank(query)) {
|
146 |
|
|
extref.setQuery(query);
|
147 |
|
|
}
|
148 |
|
|
final String type = a.get("type");
|
149 |
|
|
if (StringUtils.isNotBlank(type)) {
|
150 |
|
|
extref.setQualifier(getSimpleQualifier(type, DNET_EXT_REF_TYPOLOGIES));
|
151 |
|
|
}
|
152 |
|
|
return extref.build();
|
153 |
|
|
}));
|
154 |
52041
|
claudio.at
|
|
155 |
|
|
specs.put(Instance.getDescriptor(), SpecificationDescriptor.newInstance())
|
156 |
|
|
.setBuilder(Instance.newBuilder())
|
157 |
|
|
.put("license", fields.get("license"), nodes -> nodes.stream()
|
158 |
52053
|
claudio.at
|
.filter(node -> {
|
159 |
|
|
final Map<String, String> a = node.getAttributes();
|
160 |
|
|
switch (node.getName()) {
|
161 |
|
|
case "rights":
|
162 |
|
|
return a.containsKey(RIGHTS_URI) && a.get(RIGHTS_URI).matches(URL_REGEX);
|
163 |
|
|
case "license":
|
164 |
|
|
return true;
|
165 |
|
|
default:
|
166 |
|
|
return false;
|
167 |
|
|
}
|
168 |
|
|
})
|
169 |
52041
|
claudio.at
|
.map(Node::getTextValue))
|
170 |
|
|
.put("accessright", fields.get("accessright"), nodes -> nodes.stream()
|
171 |
|
|
.map(Node::getTextValue)
|
172 |
|
|
.map(rights -> mappingAccess.containsKey(rights) ? mappingAccess.get(rights) : "UNKNOWN")
|
173 |
|
|
.map(code -> getQualifier(code, getClassName(code), DNET_ACCESS_MODES, DNET_ACCESS_MODES)))
|
174 |
|
|
.put("instancetype", fields.get("instancetype"), nodes -> nodes.stream()
|
175 |
|
|
.map(Node::getTextValue)
|
176 |
|
|
.map(code -> getQualifier(code, getClassName(code), DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE)))
|
177 |
|
|
.put("hostedby", fields.get("hostedby"), nodes -> nodes.stream()
|
178 |
|
|
.map(node -> getKV(oafSplitId("datasource", node.getAttributes().get("id")), node.getAttributes().get("name"))))
|
179 |
|
|
.put("url", fields.get("url"), nodes -> nodes.stream()
|
180 |
|
|
.map(Node::getTextValue)
|
181 |
|
|
.filter(s -> s.trim().matches(URL_REGEX)))
|
182 |
|
|
.put("dateofacceptance", fields.get("dateofacceptance"), nodes -> nodes.stream()
|
183 |
|
|
.map(Node::getTextValue));
|
184 |
|
|
|
185 |
|
|
specs.put(Metadata.getDescriptor(), SpecificationDescriptor.newInstance())
|
186 |
|
|
.setBuilder(Metadata.newBuilder())
|
187 |
|
|
.put("title", fields.get("title"), nodes -> nodes.stream()
|
188 |
|
|
.map(node -> {
|
189 |
|
|
final Qualifier.Builder q = Qualifier.newBuilder().setSchemeid(DNET_TITLE_TYPOLOGIES).setSchemename(DNET_TITLE_TYPOLOGIES);
|
190 |
|
|
switch (node.getAttributes().get(TITLE_TYPE) + "") {
|
191 |
|
|
case "AlternativeTitle":
|
192 |
|
|
q.setClassid("alternative title").setClassname("alternative title");
|
193 |
|
|
break;
|
194 |
|
|
case "Subtitle":
|
195 |
|
|
q.setClassid("subtitle").setClassname("subtitle");
|
196 |
|
|
break;
|
197 |
|
|
case "TranslatedTitle":
|
198 |
|
|
q.setClassid("translated title").setClassname("translated title");
|
199 |
|
|
break;
|
200 |
|
|
default:
|
201 |
|
|
q.setClassid("main title").setClassname("main title");
|
202 |
|
|
break;
|
203 |
|
|
}
|
204 |
|
|
return StructuredProperty.newBuilder().setValue(node.getTextValue()).setQualifier(q).build();
|
205 |
|
|
}))
|
206 |
|
|
.put("description", fields.get("description"), nodes -> nodes.stream()
|
207 |
|
|
.map(Node::getTextValue))
|
208 |
|
|
.put("storagedate", fields.get("storagedate"), nodes -> nodes.stream()
|
209 |
|
|
.map(Node::getTextValue))
|
210 |
|
|
.put("lastmetadataupdate", fields.get("lastmetadataupdate"), nodes -> nodes.stream()
|
211 |
|
|
.map(Node::getTextValue))
|
212 |
|
|
.put("embargoenddate", fields.get("embargoenddate"), nodes -> nodes.stream()
|
213 |
|
|
.map(Node::getTextValue))
|
214 |
|
|
.put("dateofacceptance", fields.get("dateofacceptance"), nodes -> nodes.stream()
|
215 |
|
|
.map(Node::getTextValue))
|
216 |
|
|
.put("author", fields.get("author"), nodes -> Streams.mapWithIndex(
|
217 |
|
|
nodes.stream()
|
218 |
|
|
.map(Node::getTextValue),
|
219 |
|
|
(creator, i) -> new Pair<>(i, creator))
|
220 |
|
|
.map(pair -> {
|
221 |
|
|
final Author.Builder author = Author.newBuilder();
|
222 |
|
|
author.setFullname(pair.getValue());
|
223 |
|
|
author.setRank(pair.getKey().intValue() + 1);
|
224 |
|
|
final Person p = new Person(pair.getValue(), false);
|
225 |
|
|
if (p.isAccurate()) {
|
226 |
|
|
author.setName(p.getNormalisedFirstName());
|
227 |
|
|
author.setSurname(p.getNormalisedSurname());
|
228 |
|
|
}
|
229 |
|
|
return author.build();
|
230 |
|
|
}))
|
231 |
|
|
.put("contributor", fields.get("contributor"), nodes -> nodes.stream()
|
232 |
|
|
.map(Node::getTextValue))
|
233 |
|
|
.put("subject", fields.get("subject"), nodes -> nodes.stream()
|
234 |
|
|
.map(node -> {
|
235 |
|
|
final Map<String, String> a = node.getAttributes();
|
236 |
|
|
final String classId = StringUtils.isNotBlank(a.get(CLASSID)) ? a.get(CLASSID) : KEYWORD;
|
237 |
|
|
final String className = StringUtils.isNotBlank(a.get(CLASSNAME)) ? a.get(CLASSNAME) : KEYWORD;
|
238 |
|
|
final String schemeId = StringUtils.isNotBlank(a.get(SCHEMEID)) ? a.get(SCHEMEID) : DNET_SUBJECT_TYPOLOGIES;
|
239 |
|
|
final String schemeName = StringUtils.isNotBlank(a.get(SCHEMENAME)) ? a.get(SCHEMENAME) : DNET_SUBJECT_TYPOLOGIES;
|
240 |
|
|
return getStructuredProperty(node.getTextValue(), classId, className, schemeId, schemeName);
|
241 |
|
|
}))
|
242 |
|
|
.put("format", fields.get("format"), nodes -> nodes.stream()
|
243 |
|
|
.map(Node::getTextValue))
|
244 |
|
|
.put("source", fields.get("source"), nodes -> nodes.stream()
|
245 |
|
|
.map(Node::getTextValue))
|
246 |
|
|
.put("size", fields.get("size"), nodes -> nodes.stream()
|
247 |
|
|
.map(Node::getTextValue))
|
248 |
|
|
.put("version", fields.get("version"), nodes -> nodes.stream()
|
249 |
|
|
.map(Node::getTextValue))
|
250 |
|
|
.put("publisher", fields.get("publisher"), nodes -> nodes.stream()
|
251 |
|
|
.map(Node::getTextValue))
|
252 |
|
|
.put("language", fields.get("language"), nodes -> nodes.stream()
|
253 |
|
|
.map(Node::getTextValue)
|
254 |
|
|
.map(code -> getQualifier(code, getClassName(code), DNET_LANGUAGES, DNET_LANGUAGES)))
|
255 |
|
|
.put("resourcetype", fields.get("resourcetype"), nodes -> nodes.stream()
|
256 |
|
|
.map(node -> node.getAttributes().get("resourceTypeGeneral"))
|
257 |
|
|
.map(resourceType -> getSimpleQualifier(resourceType, DNET_DATA_CITE_RESOURCE)))
|
258 |
|
|
.put("resulttype", fields.get("resulttype"), nodes -> nodes.stream()
|
259 |
|
|
.map(Node::getTextValue)
|
260 |
|
|
.map(cobjcategory -> getSimpleQualifier(getResulttype(cobjcategory), DNET_RESULT_TYPOLOGIES)))
|
261 |
|
|
.put("concept", fields.get("concept"), nodes -> nodes.stream()
|
262 |
52209
|
claudio.at
|
.filter(node -> node.getAttributes() != null && StringUtils.isNotBlank(node.getAttributes().get("id")))
|
263 |
|
|
.map(node -> Context.newBuilder().setId(node.getAttributes().get("id"))))
|
264 |
52041
|
claudio.at
|
.put("journal", fields.get("journal"), nodes -> nodes.stream()
|
265 |
52209
|
claudio.at
|
.map(node -> {
|
266 |
|
|
final Journal.Builder journal = Journal.newBuilder();
|
267 |
|
|
if (StringUtils.isNotBlank(node.getTextValue())) {
|
268 |
|
|
journal.setName(node.getTextValue());
|
269 |
52041
|
claudio.at
|
}
|
270 |
52209
|
claudio.at
|
if (node.getAttributes() != null) {
|
271 |
|
|
final Map<String, String> a = node.getAttributes();
|
272 |
|
|
if (StringUtils.isNotBlank(a.get("issn"))) {
|
273 |
|
|
journal.setIssnPrinted(a.get("issn"));
|
274 |
|
|
}
|
275 |
|
|
if (StringUtils.isNotBlank(a.get("eissn"))) {
|
276 |
|
|
journal.setIssnOnline(a.get("eissn"));
|
277 |
|
|
}
|
278 |
|
|
if (StringUtils.isNotBlank(a.get("lissn"))) {
|
279 |
|
|
journal.setIssnLinking(a.get("lissn"));
|
280 |
|
|
}
|
281 |
|
|
if (StringUtils.isNotBlank(a.get("sp"))) {
|
282 |
|
|
journal.setSp(a.get("sp"));
|
283 |
|
|
}
|
284 |
|
|
if (StringUtils.isNotBlank(a.get("ep"))) {
|
285 |
|
|
journal.setEp(a.get("ep"));
|
286 |
|
|
}
|
287 |
|
|
if (StringUtils.isNotBlank(a.get("iss"))) {
|
288 |
|
|
journal.setIss(a.get("iss"));
|
289 |
|
|
}
|
290 |
|
|
if (StringUtils.isNotBlank(a.get("vol"))) {
|
291 |
|
|
journal.setVol(a.get("vol"));
|
292 |
|
|
}
|
293 |
52041
|
claudio.at
|
}
|
294 |
52209
|
claudio.at
|
return journal;
|
295 |
|
|
}));
|
296 |
52041
|
claudio.at
|
|
297 |
|
|
specs.put(OafEntity.getDescriptor(), SpecificationDescriptor.newInstance())
|
298 |
52209
|
claudio.at
|
.setBuilder(OafEntity.newBuilder().setType(Type.result))
|
299 |
52041
|
claudio.at
|
.put("originalId", fields.get("originalId"), nodes -> nodes.stream()
|
300 |
|
|
.map(Node::getTextValue)
|
301 |
|
|
.map(s -> StringUtils.contains(s, ID_SEPARATOR) ? StringUtils.substringAfter(s, ID_SEPARATOR) : s)
|
302 |
|
|
.filter(s -> !s.trim().matches(URL_REGEX)))
|
303 |
|
|
.put("collectedfrom", fields.get("collectedfrom"), nodes -> nodes.stream()
|
304 |
|
|
.map(node -> getKV(
|
305 |
|
|
oafSplitId(Type.datasource.name(), node.getAttributes().get("id")),
|
306 |
|
|
node.getAttributes().get("name"))))
|
307 |
|
|
.put("pid", fields.get("pid"), nodes -> nodes.stream()
|
308 |
|
|
.filter(pid -> {
|
309 |
|
|
final Map<String, String> a = pid.getAttributes();
|
310 |
|
|
return a.containsKey(IDENTIFIER_TYPE) || a.containsKey(ALTERNATE_IDENTIFIER_TYPE);
|
311 |
|
|
})
|
312 |
|
|
.filter(pid -> {
|
313 |
|
|
final Map<String, String> a = pid.getAttributes();
|
314 |
|
|
return !"url".equalsIgnoreCase(a.get(IDENTIFIER_TYPE)) && !"url".equalsIgnoreCase(a.get(ALTERNATE_IDENTIFIER_TYPE));
|
315 |
|
|
})
|
316 |
|
|
.map(pid -> {
|
317 |
|
|
final Map<String, String> a = pid.getAttributes();
|
318 |
|
|
final String identifierType = a.get(IDENTIFIER_TYPE);
|
319 |
|
|
final String altIdentifierType = a.get(ALTERNATE_IDENTIFIER_TYPE);
|
320 |
|
|
return StructuredProperty.newBuilder()
|
321 |
|
|
.setValue(pid.getTextValue())
|
322 |
|
|
.setQualifier(getSimpleQualifier(
|
323 |
|
|
StringUtils.isNotBlank(identifierType) ?
|
324 |
|
|
identifierType : altIdentifierType, DNET_PID_TYPES))
|
325 |
|
|
.build();
|
326 |
|
|
}))
|
327 |
|
|
.put("dateofcollection", fields.get("dateofcollection"), nodes -> nodes.stream()
|
328 |
|
|
.map(Node::getTextValue))
|
329 |
|
|
.put("dateoftransformation", fields.get("dateoftransformation"), nodes -> nodes.stream()
|
330 |
|
|
.map(Node::getTextValue))
|
331 |
|
|
.put("cachedRel", fields.get("cachedRel"), nodes -> nodes.stream()
|
332 |
52209
|
claudio.at
|
.map(node -> getOafRel(node,
|
333 |
52048
|
claudio.at
|
OafRel.newBuilder()
|
334 |
52209
|
claudio.at
|
.setSource("")
|
335 |
|
|
.setChild(false)))
|
336 |
52048
|
claudio.at
|
.filter(Objects::nonNull)
|
337 |
|
|
.map(oafRel -> oafRel.build()));
|
338 |
52209
|
claudio.at
|
return specs;
|
339 |
52041
|
claudio.at
|
}
|
340 |
|
|
|
341 |
52209
|
claudio.at
|
private static OafRel.Builder getOafRel(final Node node, final OafRel.Builder oafRel) {
|
342 |
52048
|
claudio.at
|
final Map<String, String> a = node.getAttributes();
|
343 |
|
|
|
344 |
52062
|
claudio.at
|
switch (node.getName()) {
|
345 |
52048
|
claudio.at
|
case PROJECTID:
|
346 |
52062
|
claudio.at
|
if (StringUtils.isBlank(node.getTextValue())) {
|
347 |
|
|
return null;
|
348 |
|
|
}
|
349 |
52048
|
claudio.at
|
return oafRel
|
350 |
|
|
.setTarget(oafSplitId(Type.project.name(), StringUtils.trim(node.getTextValue())))
|
351 |
|
|
.setRelType(RelType.resultProject)
|
352 |
|
|
.setSubRelType(SubRelType.outcome)
|
353 |
|
|
.setRelClass("isProducedBy");
|
354 |
|
|
|
355 |
|
|
case RELATED_PUBLICATION:
|
356 |
|
|
case RELATED_DATASET:
|
357 |
52062
|
claudio.at
|
if (StringUtils.isBlank(a.get("id"))) {
|
358 |
|
|
return null;
|
359 |
|
|
}
|
360 |
52048
|
claudio.at
|
return oafRel
|
361 |
|
|
.setTarget(oafSimpleId(Type.result.name(), StringUtils.trim(a.get("id"))))
|
362 |
|
|
.setRelType(RelType.resultResult)
|
363 |
|
|
.setSubRelType(SubRelType.publicationDataset)
|
364 |
|
|
.setRelClass("isRelatedTo");
|
365 |
|
|
|
366 |
|
|
case RELATED_IDENTIFIER:
|
367 |
52062
|
claudio.at
|
if (StringUtils.isBlank(node.getTextValue())) {
|
368 |
|
|
return null;
|
369 |
|
|
}
|
370 |
52048
|
claudio.at
|
return oafRel
|
371 |
|
|
.setTarget(node.getTextValue())
|
372 |
|
|
.setRelType(RelType.resultResult)
|
373 |
|
|
.setSubRelType(SubRelType.relationship)
|
374 |
|
|
.setRelClass(a.get(RELATION_TYPE))
|
375 |
|
|
.setCachedTarget(
|
376 |
|
|
OafEntity.newBuilder()
|
377 |
|
|
.setType(Type.result)
|
378 |
52209
|
claudio.at
|
.setId("") //TODO
|
379 |
52048
|
claudio.at
|
.addPid(
|
380 |
|
|
StructuredProperty.newBuilder()
|
381 |
|
|
.setValue(node.getTextValue())
|
382 |
|
|
.setQualifier(getSimpleQualifier(a.get(RELATED_IDENTIFIER_TYPE), DNET_PID_TYPES))
|
383 |
|
|
.build()));
|
384 |
|
|
default:
|
385 |
|
|
return null;
|
386 |
|
|
}
|
387 |
|
|
}
|
388 |
|
|
|
389 |
52041
|
claudio.at
|
private OriginDescription getOriginDescription(final AutoPilot ap, final VTDNav vn, final String basePath) throws VtdException {
|
390 |
51942
|
claudio.at
|
final OriginDescription.Builder od = OriginDescription.newBuilder();
|
391 |
|
|
if (getNodes(ap, vn, basePath).isEmpty()) {
|
392 |
|
|
return od.build();
|
393 |
|
|
}
|
394 |
51974
|
claudio.at
|
final Map<String, String> odAttr = getNode(ap, vn, basePath).getAttributes();
|
395 |
|
|
|
396 |
|
|
final String harvestDate = odAttr.get("harvestDate");
|
397 |
|
|
if (StringUtils.isNotBlank(harvestDate)) {
|
398 |
|
|
od.setHarvestDate(harvestDate);
|
399 |
|
|
}
|
400 |
|
|
final String altered = odAttr.get("altered");
|
401 |
|
|
if (StringUtils.isNotBlank(altered)) {
|
402 |
|
|
od.setAltered(Boolean.valueOf(altered));
|
403 |
|
|
}
|
404 |
|
|
final String baseUrl = getFirstValue(ap, vn, basePath + xpath("baseURL"));
|
405 |
|
|
if (StringUtils.isNotBlank(basePath)) {
|
406 |
|
|
od.setBaseURL(baseUrl);
|
407 |
|
|
}
|
408 |
|
|
final String identifier = getFirstValue(ap, vn, basePath + xpath("identifier"));
|
409 |
|
|
if (StringUtils.isNotBlank(identifier)) {
|
410 |
|
|
od.setIdentifier(identifier);
|
411 |
|
|
}
|
412 |
|
|
final String datestamp = getFirstValue(ap, vn, basePath + xpath("datestamp"));
|
413 |
|
|
if (StringUtils.isNotBlank(datestamp)) {
|
414 |
|
|
od.setDatestamp(datestamp);
|
415 |
|
|
}
|
416 |
|
|
final String metadataNamespace = getFirstValue(ap, vn, basePath + xpath("metadataNamespace"));
|
417 |
|
|
if (StringUtils.isNotBlank(metadataNamespace)) {
|
418 |
|
|
od.setMetadataNamespace(metadataNamespace);
|
419 |
|
|
}
|
420 |
|
|
final OriginDescription originDescription = getOriginDescription(ap, vn, basePath + xpath("originDescription"));
|
421 |
|
|
if (originDescription.hasHarvestDate()) {
|
422 |
|
|
od.setOriginDescription(originDescription);
|
423 |
|
|
}
|
424 |
|
|
|
425 |
|
|
return od.build();
|
426 |
51942
|
claudio.at
|
}
|
427 |
|
|
|
428 |
52041
|
claudio.at
|
private OAIProvenance getOaiProvenance(final AutoPilot ap, final VTDNav vn) throws VtdException {
|
429 |
|
|
return OAIProvenance.newBuilder()
|
430 |
|
|
.setOriginDescription(getOriginDescription(ap, vn, xpath("record", "about", "provenance", "originDescription")))
|
431 |
|
|
.build();
|
432 |
|
|
}
|
433 |
51944
|
sandro.lab
|
|
434 |
52041
|
claudio.at
|
private FieldTypeProtos.DataInfo.Builder ensureDataInfo(
|
435 |
51953
|
claudio.at
|
final AutoPilot ap, final VTDNav vn,
|
436 |
52041
|
claudio.at
|
final DataInfo.Builder info) throws VtdException {
|
437 |
51944
|
sandro.lab
|
|
438 |
52041
|
claudio.at
|
if (info.isInitialized()) return info;
|
439 |
|
|
return buildDataInfo( ap, vn, invisible, provenance, trust, false, false);
|
440 |
51944
|
sandro.lab
|
}
|
441 |
|
|
|
442 |
52041
|
claudio.at
|
private FieldTypeProtos.DataInfo.Builder buildDataInfo(
|
443 |
|
|
final AutoPilot ap,
|
444 |
|
|
final VTDNav vn,
|
445 |
|
|
final boolean invisible,
|
446 |
|
|
final String defaultProvenanceaction,
|
447 |
|
|
final String defaultTrust,
|
448 |
|
|
final boolean defaultDeletedbyinference,
|
449 |
|
|
final boolean defaultInferred) throws VtdException {
|
450 |
51953
|
claudio.at
|
|
451 |
52041
|
claudio.at
|
final DataInfo.Builder dataInfoBuilder = FieldTypeProtos.DataInfo.newBuilder()
|
452 |
|
|
.setInvisible(invisible)
|
453 |
|
|
.setInferred(defaultInferred)
|
454 |
|
|
.setDeletedbyinference(defaultDeletedbyinference)
|
455 |
|
|
.setTrust(defaultTrust)
|
456 |
|
|
.setProvenanceaction(getSimpleQualifier(defaultProvenanceaction, DNET_PROVENANCE_ACTIONS));
|
457 |
51944
|
sandro.lab
|
|
458 |
52041
|
claudio.at
|
// checking instanceof because when receiving an empty <oaf:datainfo> we don't want to parse it.
|
459 |
51944
|
sandro.lab
|
|
460 |
52041
|
claudio.at
|
final String xpath = xpath("record", "about", "datainfo");
|
461 |
|
|
if (getNodes(ap, vn, xpath).size() > 0) {
|
462 |
|
|
final Map<String, String> provAction = getNode(ap, vn, xpath + xpath("provenanceaction")).getAttributes();
|
463 |
|
|
dataInfoBuilder
|
464 |
|
|
.setInvisible(Boolean.valueOf(getValue(getNode(ap, vn, xpath + xpath("invisible")), String.valueOf(invisible))))
|
465 |
|
|
.setInferred(Boolean.valueOf(getValue(getNode(ap, vn, xpath + xpath("inferred")), String.valueOf(defaultInferred))))
|
466 |
|
|
.setDeletedbyinference(Boolean.valueOf(
|
467 |
|
|
getValue(getNode(ap, vn, xpath + xpath("deletedbyinference")), String.valueOf(defaultDeletedbyinference))))
|
468 |
|
|
.setTrust(getValue(getNode(ap, vn, xpath + xpath("trust")), defaultTrust))
|
469 |
|
|
.setInferenceprovenance(getValue(getNode(ap, vn, xpath + xpath("inferenceprovenance")), ""))
|
470 |
|
|
.setProvenanceaction(getSimpleQualifier(
|
471 |
|
|
getValue(provAction.get(CLASSID), defaultProvenanceaction),
|
472 |
|
|
DNET_PROVENANCE_ACTIONS));
|
473 |
|
|
}
|
474 |
51944
|
sandro.lab
|
|
475 |
52041
|
claudio.at
|
return dataInfoBuilder;
|
476 |
51944
|
sandro.lab
|
}
|
477 |
|
|
|
478 |
51942
|
claudio.at
|
}
|