1 |
26600
|
sandro.lab
|
package eu.dnetlib.data.mapreduce.util;
|
2 |
|
|
|
3 |
|
|
import java.io.StringReader;
|
4 |
|
|
import java.io.StringWriter;
|
5 |
|
|
import java.util.List;
|
6 |
|
|
import java.util.Map;
|
7 |
|
|
import java.util.Map.Entry;
|
8 |
|
|
import java.util.Set;
|
9 |
40314
|
claudio.at
|
import javax.xml.transform.*;
|
10 |
26600
|
sandro.lab
|
import javax.xml.transform.dom.DOMSource;
|
11 |
|
|
import javax.xml.transform.stream.StreamResult;
|
12 |
|
|
|
13 |
35179
|
michele.ar
|
import com.google.common.base.Joiner;
|
14 |
26600
|
sandro.lab
|
import com.google.common.base.Predicate;
|
15 |
37717
|
claudio.at
|
import com.google.common.base.Splitter;
|
16 |
46587
|
alessia.ba
|
import com.google.common.collect.Iterables;
|
17 |
26600
|
sandro.lab
|
import com.google.common.collect.Lists;
|
18 |
|
|
import com.google.common.collect.Maps;
|
19 |
|
|
import com.google.common.collect.Sets;
|
20 |
|
|
import com.google.protobuf.Descriptors.EnumValueDescriptor;
|
21 |
|
|
import com.google.protobuf.Descriptors.FieldDescriptor;
|
22 |
|
|
import com.google.protobuf.GeneratedMessage;
|
23 |
|
|
import com.mycila.xmltool.XMLDoc;
|
24 |
|
|
import com.mycila.xmltool.XMLTag;
|
25 |
40314
|
claudio.at
|
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
|
26 |
|
|
import eu.dnetlib.data.proto.FieldTypeProtos.*;
|
27 |
26600
|
sandro.lab
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
28 |
|
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
29 |
|
|
import eu.dnetlib.data.proto.ProjectProtos.Project;
|
30 |
|
|
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
|
31 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result;
|
32 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
|
33 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
|
34 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
|
35 |
46587
|
alessia.ba
|
import eu.dnetlib.data.proto.TypeProtos;
|
36 |
26600
|
sandro.lab
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
37 |
52524
|
claudio.at
|
import org.apache.commons.lang3.StringUtils;
|
38 |
40314
|
claudio.at
|
import org.dom4j.Document;
|
39 |
|
|
import org.dom4j.DocumentException;
|
40 |
|
|
import org.dom4j.Element;
|
41 |
|
|
import org.dom4j.Node;
|
42 |
|
|
import org.dom4j.io.SAXReader;
|
43 |
26600
|
sandro.lab
|
|
44 |
40314
|
claudio.at
|
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
|
45 |
|
|
|
46 |
26600
|
sandro.lab
|
public class XmlRecordFactory {
|
47 |
|
|
|
48 |
37616
|
claudio.at
|
// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
|
49 |
37531
|
claudio.at
|
|
50 |
40314
|
claudio.at
|
private final Map<String, Integer> relCounters = Maps.newHashMap();
|
51 |
43428
|
alessia.ba
|
protected Set<String> specialDatasourceTypes;
|
52 |
26600
|
sandro.lab
|
protected TemplateFactory templateFactory = new TemplateFactory();
|
53 |
|
|
protected OafDecoder mainEntity = null;
|
54 |
|
|
protected String key = null;
|
55 |
28226
|
claudio.at
|
protected List<OafDecoder> relations = Lists.newLinkedList();
|
56 |
|
|
protected List<OafDecoder> children = Lists.newLinkedList();
|
57 |
26600
|
sandro.lab
|
protected EntityConfigTable entityConfigTable;
|
58 |
|
|
protected ContextMapper contextMapper;
|
59 |
28094
|
claudio.at
|
protected RelClasses relClasses;
|
60 |
26600
|
sandro.lab
|
protected String schemaLocation;
|
61 |
|
|
protected boolean entityDefaults;
|
62 |
|
|
protected boolean relDefaults;
|
63 |
|
|
protected boolean childDefaults;
|
64 |
|
|
protected Set<String> contextes = Sets.newHashSet();
|
65 |
28094
|
claudio.at
|
protected List<String> extraInfo = Lists.newArrayList();
|
66 |
30968
|
claudio.at
|
protected Map<String, Integer> counters = Maps.newHashMap();
|
67 |
26600
|
sandro.lab
|
protected Transformer transformer;
|
68 |
|
|
|
69 |
46587
|
alessia.ba
|
protected static Predicate<String> instanceFilter = new Predicate<String>() {
|
70 |
58071
|
alessia.ba
|
final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "license", "accessright", "collectedfrom", "dateofacceptance", "distributionlocation", "refereed");
|
71 |
46587
|
alessia.ba
|
@Override
|
72 |
|
|
public boolean apply(final String s) {
|
73 |
|
|
return instanceFieldFilter.contains(s);
|
74 |
|
|
}
|
75 |
|
|
};
|
76 |
|
|
|
77 |
28226
|
claudio.at
|
public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
|
78 |
43428
|
alessia.ba
|
final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults, final Set<String> otherDatasourceTypesUForUI)
|
79 |
33382
|
claudio.at
|
throws TransformerConfigurationException, TransformerFactoryConfigurationError {
|
80 |
26600
|
sandro.lab
|
this.entityConfigTable = entityConfigTable;
|
81 |
|
|
this.contextMapper = contextMapper;
|
82 |
28094
|
claudio.at
|
this.relClasses = relClasses;
|
83 |
26600
|
sandro.lab
|
this.schemaLocation = schemaLocation;
|
84 |
|
|
this.entityDefaults = entityDefaults;
|
85 |
|
|
this.relDefaults = relDefaults;
|
86 |
|
|
this.childDefaults = childDefeaults;
|
87 |
43428
|
alessia.ba
|
this.specialDatasourceTypes = otherDatasourceTypesUForUI;
|
88 |
26600
|
sandro.lab
|
|
89 |
|
|
transformer = TransformerFactory.newInstance().newTransformer();
|
90 |
|
|
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
|
91 |
|
|
}
|
92 |
|
|
|
93 |
40314
|
claudio.at
|
public static String removePrefix(final String s) {
|
94 |
|
|
if (s.contains("|")) return StringUtils.substringAfter(s, "|");
|
95 |
|
|
return s;
|
96 |
|
|
}
|
97 |
|
|
|
98 |
|
|
public static String escapeXml(final String value) {
|
99 |
|
|
return value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll("\"", """).replaceAll("'", "'");
|
100 |
|
|
}
|
101 |
|
|
|
102 |
37334
|
claudio.at
|
public Map<String, Integer> getRelCounters() {
|
103 |
|
|
return relCounters;
|
104 |
|
|
}
|
105 |
|
|
|
106 |
|
|
public RelClasses getRelClasses() {
|
107 |
|
|
return relClasses;
|
108 |
|
|
}
|
109 |
|
|
|
110 |
26600
|
sandro.lab
|
public String getId() {
|
111 |
|
|
return key;
|
112 |
|
|
}
|
113 |
|
|
|
114 |
|
|
public boolean isValid() {
|
115 |
|
|
return mainEntity != null;
|
116 |
|
|
}
|
117 |
|
|
|
118 |
|
|
public void setMainEntity(final OafDecoder mainEntity) {
|
119 |
|
|
this.mainEntity = mainEntity;
|
120 |
|
|
this.key = mainEntity.decodeEntity().getId();
|
121 |
|
|
}
|
122 |
|
|
|
123 |
37334
|
claudio.at
|
public void addRelation(final Type type, final OafDecoder rel) {
|
124 |
|
|
addRelOrChild(type, relations, rel);
|
125 |
26600
|
sandro.lab
|
}
|
126 |
|
|
|
127 |
37334
|
claudio.at
|
public void addChild(final Type type, final OafDecoder child) {
|
128 |
|
|
addRelOrChild(type, children, child);
|
129 |
26600
|
sandro.lab
|
}
|
130 |
|
|
|
131 |
37334
|
claudio.at
|
private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
|
132 |
|
|
|
133 |
|
|
final OafRel oafRel = decoder.getOafRel();
|
134 |
|
|
final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
|
135 |
|
|
final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
|
136 |
|
|
|
137 |
|
|
if (getRelCounters().get(rd) == null) {
|
138 |
|
|
getRelCounters().put(rd, 0);
|
139 |
|
|
}
|
140 |
|
|
|
141 |
|
|
if (ld == null) {
|
142 |
|
|
list.add(decoder);
|
143 |
|
|
return;
|
144 |
|
|
}
|
145 |
|
|
|
146 |
|
|
if (ld.getMax() < 0) {
|
147 |
|
|
list.add(decoder);
|
148 |
|
|
return;
|
149 |
|
|
}
|
150 |
|
|
|
151 |
|
|
if (getRelCounters().get(rd) < ld.getMax()) {
|
152 |
|
|
getRelCounters().put(rd, getRelCounters().get(rd) + 1);
|
153 |
|
|
list.add(decoder);
|
154 |
|
|
}
|
155 |
26600
|
sandro.lab
|
}
|
156 |
|
|
|
157 |
|
|
public String build() {
|
158 |
37717
|
claudio.at
|
try {
|
159 |
|
|
final OafEntityDecoder entity = mainEntity.decodeEntity();
|
160 |
|
|
// log.info("building");
|
161 |
|
|
// log.info("main: " + mainEntity);
|
162 |
|
|
// log.info("rel: " + relations);
|
163 |
|
|
// log.info("chi: " + children);
|
164 |
|
|
// log.info("=============");
|
165 |
26600
|
sandro.lab
|
|
166 |
46587
|
alessia.ba
|
final Predicate<String> filter = entityConfigTable.getFilter(entity.getType());
|
167 |
|
|
final List<String> metadata = decodeType(entity, filter, entityDefaults, false);
|
168 |
26600
|
sandro.lab
|
|
169 |
37717
|
claudio.at
|
// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
|
170 |
|
|
final List<String> rels = listRelations();
|
171 |
46587
|
alessia.ba
|
metadata.addAll(buildContexts(entity.getType()));
|
172 |
37717
|
claudio.at
|
metadata.add(parseDataInfo(mainEntity));
|
173 |
28226
|
claudio.at
|
|
174 |
46587
|
alessia.ba
|
final String body = templateFactory.buildBody(entity.getType(), metadata, rels, listChildren(), extraInfo);
|
175 |
26600
|
sandro.lab
|
|
176 |
40314
|
claudio.at
|
return templateFactory
|
177 |
46587
|
alessia.ba
|
.buildRecord(key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
|
178 |
37717
|
claudio.at
|
} catch (final Throwable e) {
|
179 |
|
|
throw new RuntimeException(String.format("error building record '%s'", this.key), e);
|
180 |
|
|
}
|
181 |
26600
|
sandro.lab
|
}
|
182 |
|
|
|
183 |
|
|
private String parseDataInfo(final OafDecoder decoder) {
|
184 |
35771
|
claudio.at
|
final DataInfo dataInfo = decoder.getOaf().getDataInfo();
|
185 |
26600
|
sandro.lab
|
|
186 |
35771
|
claudio.at
|
final StringBuilder sb = new StringBuilder();
|
187 |
26600
|
sandro.lab
|
sb.append("<datainfo>");
|
188 |
28094
|
claudio.at
|
sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
|
189 |
|
|
sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
|
190 |
|
|
sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
|
191 |
|
|
sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
|
192 |
|
|
sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
|
193 |
26600
|
sandro.lab
|
sb.append("</datainfo>");
|
194 |
|
|
|
195 |
|
|
return sb.toString();
|
196 |
|
|
}
|
197 |
|
|
|
198 |
46587
|
alessia.ba
|
private List<String> decodeType(final OafEntityDecoder decoder, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
|
199 |
26600
|
sandro.lab
|
|
200 |
|
|
final List<String> metadata = Lists.newArrayList();
|
201 |
|
|
metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
|
202 |
|
|
metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
|
203 |
|
|
|
204 |
35771
|
claudio.at
|
if ((decoder.getEntity() instanceof Result) && !expandingRel) {
|
205 |
49096
|
claudio.at
|
metadata.add(asXmlElement("bestaccessright", "", getBestAccessright(), null));
|
206 |
26600
|
sandro.lab
|
|
207 |
|
|
metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
|
208 |
|
|
}
|
209 |
35771
|
claudio.at
|
if ((decoder.getEntity() instanceof Project) && !expandingRel) {
|
210 |
26600
|
sandro.lab
|
metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
|
211 |
|
|
}
|
212 |
|
|
|
213 |
|
|
return metadata;
|
214 |
|
|
}
|
215 |
|
|
|
216 |
49096
|
claudio.at
|
private Qualifier getBestAccessright() {
|
217 |
|
|
Qualifier bestAccessRight = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
|
218 |
35771
|
claudio.at
|
final LicenseComparator lc = new LicenseComparator();
|
219 |
|
|
for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
|
220 |
49096
|
claudio.at
|
if (lc.compare(bestAccessRight, instance.getAccessright()) > 0) {
|
221 |
|
|
bestAccessRight = instance.getAccessright();
|
222 |
26600
|
sandro.lab
|
}
|
223 |
|
|
}
|
224 |
49096
|
claudio.at
|
return bestAccessRight;
|
225 |
26600
|
sandro.lab
|
}
|
226 |
|
|
|
227 |
31409
|
claudio.at
|
public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
|
228 |
|
|
return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
|
229 |
|
|
}
|
230 |
|
|
|
231 |
26600
|
sandro.lab
|
private List<String> listRelations() {
|
232 |
|
|
|
233 |
|
|
final List<String> rels = Lists.newArrayList();
|
234 |
|
|
|
235 |
35771
|
claudio.at
|
for (final OafDecoder decoder : this.relations) {
|
236 |
26600
|
sandro.lab
|
|
237 |
|
|
final OafRel rel = decoder.getOafRel();
|
238 |
|
|
final OafEntity cachedTarget = rel.getCachedTarget();
|
239 |
|
|
final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
|
240 |
|
|
|
241 |
28094
|
claudio.at
|
// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
|
242 |
|
|
if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
|
243 |
26600
|
sandro.lab
|
|
244 |
|
|
final List<String> metadata = Lists.newArrayList();
|
245 |
46587
|
alessia.ba
|
final TypeProtos.Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
|
246 |
|
|
//final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
|
247 |
|
|
metadata.addAll(listFields(relDecoder.getSubRel(), entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
|
248 |
26600
|
sandro.lab
|
|
249 |
|
|
String semanticclass = "";
|
250 |
|
|
String semanticscheme = "";
|
251 |
|
|
|
252 |
35771
|
claudio.at
|
final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
|
253 |
28226
|
claudio.at
|
|
254 |
35771
|
claudio.at
|
if ((cachedTarget != null) && cachedTarget.isInitialized()) {
|
255 |
28226
|
claudio.at
|
|
256 |
46587
|
alessia.ba
|
//final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
|
257 |
43896
|
claudio.at
|
final OafEntityDecoder d = OafEntityDecoder.decode(cachedTarget);
|
258 |
46587
|
alessia.ba
|
metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
|
259 |
43896
|
claudio.at
|
if (d.getType().equals(Type.result)) {
|
260 |
|
|
for(Instance i : cachedTarget.getResult().getInstanceList()) {
|
261 |
48697
|
claudio.at
|
final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
|
262 |
|
|
metadata.addAll(fields);
|
263 |
43896
|
claudio.at
|
}
|
264 |
|
|
}
|
265 |
28094
|
claudio.at
|
}
|
266 |
26600
|
sandro.lab
|
|
267 |
35771
|
claudio.at
|
final RelMetadata relMetadata = relDecoder.getRelMetadata();
|
268 |
28094
|
claudio.at
|
// debug
|
269 |
|
|
if (relMetadata == null) {
|
270 |
|
|
// System.err.println(this);
|
271 |
|
|
semanticclass = semanticscheme = "UNKNOWN";
|
272 |
26600
|
sandro.lab
|
} else {
|
273 |
28094
|
claudio.at
|
semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
|
274 |
|
|
semanticscheme = relMetadata.getSemantics().getSchemename();
|
275 |
26600
|
sandro.lab
|
}
|
276 |
|
|
|
277 |
42584
|
claudio.at
|
final String rd = relDescriptor.getSubRelType().toString();
|
278 |
|
|
incrementCounter(rd);
|
279 |
28226
|
claudio.at
|
|
280 |
42584
|
claudio.at
|
final DataInfo info = decoder.getOaf().getDataInfo();
|
281 |
|
|
if (info.getInferred()) {
|
282 |
|
|
incrementCounter(rd + "_inferred");
|
283 |
|
|
} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "sysimport:crosswalk")) {
|
284 |
|
|
incrementCounter(rd + "_collected");
|
285 |
|
|
} else if(StringUtils.startsWith(info.getProvenanceaction().getClassid(), "user:")) {
|
286 |
|
|
incrementCounter(rd + "_claimed");
|
287 |
|
|
}
|
288 |
|
|
|
289 |
35771
|
claudio.at
|
final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
|
290 |
28226
|
claudio.at
|
|
291 |
35771
|
claudio.at
|
final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
|
292 |
28226
|
claudio.at
|
|
293 |
48697
|
claudio.at
|
rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
|
294 |
28226
|
claudio.at
|
info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
|
295 |
26600
|
sandro.lab
|
}
|
296 |
|
|
}
|
297 |
|
|
return rels;
|
298 |
|
|
}
|
299 |
|
|
|
300 |
40314
|
claudio.at
|
// //////////////////////////////////
|
301 |
|
|
|
302 |
26600
|
sandro.lab
|
private List<String> listChildren() {
|
303 |
|
|
|
304 |
|
|
final List<String> children = Lists.newArrayList();
|
305 |
35771
|
claudio.at
|
for (final OafDecoder decoder : this.children) {
|
306 |
|
|
final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
|
307 |
30968
|
claudio.at
|
addChildren(children, cachedTarget, decoder.getRelDescriptor());
|
308 |
26600
|
sandro.lab
|
}
|
309 |
35771
|
claudio.at
|
final OafEntityDecoder entity = mainEntity.decodeEntity();
|
310 |
26600
|
sandro.lab
|
if (entity.getType().equals(Type.result)) {
|
311 |
35771
|
claudio.at
|
for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
|
312 |
58086
|
claudio.at
|
final List<String> instancemetadata = listFields(instance, instanceFilter, false, false);
|
313 |
|
|
|
314 |
|
|
if (instance.hasProcessingchargeamount()) {
|
315 |
|
|
instancemetadata.add("<processingchargeamount currency=\""
|
316 |
|
|
+ instance.getProcessingchargecurrency().getValue() + "\">"
|
317 |
|
|
+ instance.getProcessingchargeamount().getValue()
|
318 |
|
|
+ "</processingchargeamount>");
|
319 |
|
|
}
|
320 |
|
|
|
321 |
|
|
children.add(templateFactory.getInstance(instance.getHostedby().getKey(), instancemetadata,
|
322 |
48697
|
claudio.at
|
listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
|
323 |
26600
|
sandro.lab
|
}
|
324 |
35771
|
claudio.at
|
for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
|
325 |
26600
|
sandro.lab
|
// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
|
326 |
35771
|
claudio.at
|
final List<String> fields = listFields(er, null, false, false);
|
327 |
26600
|
sandro.lab
|
children.add(templateFactory.getChild("externalreference", null, fields));
|
328 |
|
|
}
|
329 |
|
|
}
|
330 |
|
|
|
331 |
|
|
return children;
|
332 |
|
|
}
|
333 |
|
|
|
334 |
28094
|
claudio.at
|
private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
|
335 |
26600
|
sandro.lab
|
final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
|
336 |
30968
|
claudio.at
|
incrementCounter(relDescriptor.getSubRelType().toString());
|
337 |
46587
|
alessia.ba
|
final Predicate<String> filter = entityConfigTable.getIncludeFilter(target.getType(), relDescriptor);
|
338 |
|
|
children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filter, childDefaults, false)));
|
339 |
26600
|
sandro.lab
|
}
|
340 |
|
|
|
341 |
46587
|
alessia.ba
|
private List<String> listFields(final GeneratedMessage fields, final Predicate<String> filter, final boolean defaults, final boolean expandingRel) {
|
342 |
26600
|
sandro.lab
|
|
343 |
|
|
final List<String> metadata = Lists.newArrayList();
|
344 |
|
|
|
345 |
|
|
if (fields != null) {
|
346 |
|
|
|
347 |
35771
|
claudio.at
|
final Set<String> seen = Sets.newHashSet();
|
348 |
26600
|
sandro.lab
|
|
349 |
46587
|
alessia.ba
|
final Map<FieldDescriptor, Object> filtered = filterFields(fields, filter);
|
350 |
|
|
for (final Entry<FieldDescriptor, Object> e : filtered.entrySet()) {
|
351 |
|
|
|
352 |
26600
|
sandro.lab
|
final String name = e.getKey().getName();
|
353 |
|
|
seen.add(name);
|
354 |
|
|
addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
|
355 |
|
|
}
|
356 |
|
|
|
357 |
|
|
if (defaults) {
|
358 |
49029
|
claudio.at
|
final Iterable<FieldDescriptor> unseen =
|
359 |
|
|
Iterables.filter(fields.getDescriptorForType().getFields(), fd -> !seen.contains(fd.getName()) && filter.apply(fd.getName()));
|
360 |
46587
|
alessia.ba
|
for(FieldDescriptor fd : unseen){
|
361 |
|
|
addFieldValue(metadata, fd, getDefault(fd), expandingRel);
|
362 |
26600
|
sandro.lab
|
}
|
363 |
|
|
}
|
364 |
|
|
}
|
365 |
|
|
return metadata;
|
366 |
|
|
}
|
367 |
|
|
|
368 |
|
|
private Object getDefault(final FieldDescriptor fd) {
|
369 |
|
|
switch (fd.getType()) {
|
370 |
|
|
case BOOL:
|
371 |
|
|
return false;
|
372 |
|
|
case BYTES:
|
373 |
|
|
return "".getBytes();
|
374 |
|
|
case MESSAGE: {
|
375 |
35771
|
claudio.at
|
if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
|
376 |
37894
|
alessia.ba
|
if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
|
377 |
|
|
return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
|
378 |
35771
|
claudio.at
|
if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
|
379 |
|
|
if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
|
380 |
37616
|
claudio.at
|
if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
|
381 |
26600
|
sandro.lab
|
return null;
|
382 |
|
|
}
|
383 |
|
|
case SFIXED32:
|
384 |
|
|
case SFIXED64:
|
385 |
|
|
case SINT32:
|
386 |
|
|
case SINT64:
|
387 |
|
|
case INT32:
|
388 |
|
|
case INT64:
|
389 |
|
|
case FIXED32:
|
390 |
|
|
case FIXED64:
|
391 |
57774
|
alessia.ba
|
case DOUBLE:
|
392 |
26600
|
sandro.lab
|
case FLOAT:
|
393 |
|
|
case STRING:
|
394 |
|
|
return "";
|
395 |
|
|
default:
|
396 |
|
|
return null;
|
397 |
|
|
}
|
398 |
|
|
}
|
399 |
|
|
|
400 |
|
|
private Qualifier defaultQualifier() {
|
401 |
|
|
return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
|
402 |
|
|
}
|
403 |
|
|
|
404 |
|
|
@SuppressWarnings("unchecked")
|
405 |
|
|
private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
|
406 |
42501
|
claudio.at
|
if ("dateofcollection".equals(fd.getName()) ||
|
407 |
|
|
"dateoftransformation".equals(fd.getName()) ||
|
408 |
|
|
"id".equals(fd.getName()) ||
|
409 |
|
|
(value == null)) return;
|
410 |
26600
|
sandro.lab
|
|
411 |
|
|
if (fd.getName().equals("datasourcetype")) {
|
412 |
35771
|
claudio.at
|
final String classid = ((Qualifier) value).getClassid();
|
413 |
26600
|
sandro.lab
|
|
414 |
35771
|
claudio.at
|
final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
|
415 |
26600
|
sandro.lab
|
if (specialDatasourceTypes.contains(classid)) {
|
416 |
|
|
q.setClassid("other").setClassname("other");
|
417 |
|
|
}
|
418 |
28094
|
claudio.at
|
metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
|
419 |
26600
|
sandro.lab
|
}
|
420 |
|
|
|
421 |
35771
|
claudio.at
|
if (fd.isRepeated() && (value instanceof List<?>)) {
|
422 |
|
|
for (final Object o : (List<Object>) value) {
|
423 |
26600
|
sandro.lab
|
guessType(metadata, fd, o, expandingRel);
|
424 |
|
|
}
|
425 |
|
|
} else {
|
426 |
|
|
guessType(metadata, fd, value, expandingRel);
|
427 |
|
|
}
|
428 |
|
|
}
|
429 |
|
|
|
430 |
|
|
private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
|
431 |
|
|
|
432 |
|
|
if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
|
433 |
|
|
|
434 |
49029
|
claudio.at
|
if(Author.getDescriptor().equals(fd.getMessageType())) {
|
435 |
|
|
|
436 |
|
|
final Author a = (Author) o;
|
437 |
|
|
|
438 |
|
|
final StringBuilder sb = new StringBuilder("<creator rank=\"" + a.getRank() + "\"");
|
439 |
|
|
if (a.hasName()) {
|
440 |
|
|
sb.append(" name=\"" + escapeXml(a.getName()) + "\"");
|
441 |
|
|
}
|
442 |
|
|
if (a.hasSurname()) {
|
443 |
|
|
sb.append(" surname=\"" + escapeXml(a.getSurname()) + "\"");
|
444 |
|
|
}
|
445 |
52524
|
claudio.at
|
if (a.getPidCount() > 0) {
|
446 |
|
|
a.getPidList().stream()
|
447 |
53371
|
claudio.at
|
.filter(kv -> StringUtils.isNotBlank(kv.getKey()) && StringUtils.isNotBlank(kv.getValue()))
|
448 |
|
|
.forEach(kv -> {
|
449 |
|
|
String pidType = escapeXml(kv.getKey())
|
450 |
|
|
.replaceAll("\\W", "");
|
451 |
|
|
String pidValue = escapeXml(kv.getValue());
|
452 |
|
|
sb.append(String.format(" %s=\"%s\"", pidType, pidValue));
|
453 |
|
|
});
|
454 |
52524
|
claudio.at
|
}
|
455 |
|
|
|
456 |
49029
|
claudio.at
|
sb.append(">" + escapeXml(a.getFullname()) + "</creator>");
|
457 |
|
|
|
458 |
|
|
metadata.add(sb.toString());
|
459 |
|
|
}
|
460 |
|
|
|
461 |
26600
|
sandro.lab
|
if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
|
462 |
35771
|
claudio.at
|
final Qualifier qualifier = (Qualifier) o;
|
463 |
28094
|
claudio.at
|
metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
|
464 |
26600
|
sandro.lab
|
}
|
465 |
|
|
|
466 |
|
|
if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
|
467 |
35771
|
claudio.at
|
final StructuredProperty sp = (StructuredProperty) o;
|
468 |
28094
|
claudio.at
|
metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
|
469 |
41681
|
claudio.at
|
|
470 |
|
|
if (!expandingRel && fd.getName().equals("pid")) {
|
471 |
|
|
if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
|
472 |
|
|
incrementCounter("doi");
|
473 |
|
|
}
|
474 |
|
|
}
|
475 |
26600
|
sandro.lab
|
}
|
476 |
|
|
|
477 |
|
|
if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
|
478 |
35771
|
claudio.at
|
final KeyValue kv = (KeyValue) o;
|
479 |
26600
|
sandro.lab
|
metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
|
480 |
|
|
}
|
481 |
|
|
|
482 |
28094
|
claudio.at
|
if (StringField.getDescriptor().equals(fd.getMessageType())) {
|
483 |
35771
|
claudio.at
|
final String fieldName = fd.getName();
|
484 |
35179
|
michele.ar
|
|
485 |
|
|
if (fieldName.equals("fundingtree")) {
|
486 |
35771
|
claudio.at
|
final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
|
487 |
35179
|
michele.ar
|
|
488 |
|
|
if (expandingRel) {
|
489 |
|
|
metadata.add(getRelFundingTree(xmlTree));
|
490 |
|
|
fillContextMap(xmlTree);
|
491 |
|
|
} else {
|
492 |
|
|
metadata.add(xmlTree);
|
493 |
|
|
}
|
494 |
28094
|
claudio.at
|
} else {
|
495 |
35771
|
claudio.at
|
final StringField sf = (StringField) o;
|
496 |
|
|
final StringBuilder sb = new StringBuilder("<" + fd.getName());
|
497 |
28094
|
claudio.at
|
if (sf.hasDataInfo()) {
|
498 |
35771
|
claudio.at
|
final DataInfo dataInfo = sf.getDataInfo();
|
499 |
28094
|
claudio.at
|
dataInfoAsAttributes(sb, dataInfo);
|
500 |
|
|
}
|
501 |
|
|
sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
|
502 |
|
|
metadata.add(sb.toString());
|
503 |
|
|
}
|
504 |
|
|
}
|
505 |
37616
|
claudio.at
|
|
506 |
|
|
if (BoolField.getDescriptor().equals(fd.getMessageType())) {
|
507 |
|
|
final BoolField bf = (BoolField) o;
|
508 |
|
|
final StringBuilder sb = new StringBuilder("<" + fd.getName());
|
509 |
|
|
if (bf.hasDataInfo()) {
|
510 |
|
|
final DataInfo dataInfo = bf.getDataInfo();
|
511 |
|
|
dataInfoAsAttributes(sb, dataInfo);
|
512 |
|
|
}
|
513 |
|
|
|
514 |
|
|
sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
|
515 |
|
|
metadata.add(sb.toString());
|
516 |
|
|
}
|
517 |
|
|
|
518 |
35771
|
claudio.at
|
if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
|
519 |
|
|
final Journal j = (Journal) o;
|
520 |
26600
|
sandro.lab
|
metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
|
521 |
46587
|
alessia.ba
|
+ escapeXml(j.getIssnLinking()) + "\" " + "ep=\"" + escapeXml(j.getEp()) + "\" " + "iss=\"" + escapeXml(j.getIss()) + "\" " + "sp=\""
|
522 |
|
|
+ escapeXml(j.getSp()) + "\" " + "vol=\"" + escapeXml(j.getVol()) + "\">" + escapeXml(j.getName()) + "</journal>");
|
523 |
26600
|
sandro.lab
|
}
|
524 |
|
|
|
525 |
35771
|
claudio.at
|
if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
|
526 |
52751
|
alessia.ba
|
final String contextid = ((Context) o).getId();
|
527 |
|
|
contextes.add(contextid);
|
528 |
|
|
/* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */
|
529 |
|
|
if(contextid.equalsIgnoreCase("dh-ch::subcommunity::2")){
|
530 |
|
|
contextes.add("clarin");
|
531 |
|
|
}
|
532 |
|
|
|
533 |
26600
|
sandro.lab
|
}
|
534 |
|
|
|
535 |
35771
|
claudio.at
|
if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
|
536 |
26600
|
sandro.lab
|
|
537 |
35771
|
claudio.at
|
final ExtraInfo e = (ExtraInfo) o;
|
538 |
|
|
final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
|
539 |
28094
|
claudio.at
|
|
540 |
|
|
sb.append("name=\"" + e.getName() + "\" ");
|
541 |
|
|
sb.append("typology=\"" + e.getTypology() + "\" ");
|
542 |
|
|
sb.append("provenance=\"" + e.getProvenance() + "\" ");
|
543 |
|
|
sb.append("trust=\"" + e.getTrust() + "\"");
|
544 |
|
|
sb.append(">");
|
545 |
30827
|
claudio.at
|
sb.append(e.getValue());
|
546 |
26600
|
sandro.lab
|
sb.append("</" + fd.getName() + ">");
|
547 |
28094
|
claudio.at
|
|
548 |
|
|
extraInfo.add(sb.toString());
|
549 |
26600
|
sandro.lab
|
}
|
550 |
|
|
|
551 |
|
|
} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
|
552 |
35771
|
claudio.at
|
if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
|
553 |
28094
|
claudio.at
|
metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
|
554 |
26600
|
sandro.lab
|
} else {
|
555 |
57774
|
alessia.ba
|
if(o instanceof String && o.equals("")){
|
556 |
|
|
metadata.add(asXmlElement(fd.getName(), "", null, null));
|
557 |
57758
|
alessia.ba
|
}
|
558 |
57774
|
alessia.ba
|
else {
|
559 |
|
|
switch (fd.getType()) {
|
560 |
|
|
case SFIXED32:
|
561 |
|
|
case SFIXED64:
|
562 |
|
|
case SINT32:
|
563 |
|
|
case SINT64:
|
564 |
|
|
case INT32:
|
565 |
|
|
case INT64:
|
566 |
|
|
case FIXED32:
|
567 |
|
|
case FIXED64:
|
568 |
|
|
metadata.add(asXmlElement(fd.getName(), String.format("%s", o), null, null));
|
569 |
|
|
break;
|
570 |
|
|
case DOUBLE:
|
571 |
|
|
case FLOAT:
|
572 |
|
|
metadata.add(asXmlElement(fd.getName(), String.format("%.2f", ((Float) o)), null, null));
|
573 |
|
|
break;
|
574 |
|
|
default:
|
575 |
|
|
metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
|
576 |
|
|
}
|
577 |
|
|
}
|
578 |
26600
|
sandro.lab
|
}
|
579 |
|
|
}
|
580 |
|
|
|
581 |
28226
|
claudio.at
|
private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
|
582 |
28094
|
claudio.at
|
sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
|
583 |
|
|
sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
|
584 |
30863
|
claudio.at
|
sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
|
585 |
28094
|
claudio.at
|
sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
|
586 |
|
|
return sb;
|
587 |
26600
|
sandro.lab
|
}
|
588 |
|
|
|
589 |
|
|
private List<String> buildContexts(final Type type) {
|
590 |
|
|
final List<String> res = Lists.newArrayList();
|
591 |
|
|
|
592 |
35771
|
claudio.at
|
if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
|
593 |
26600
|
sandro.lab
|
|
594 |
|
|
XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
|
595 |
|
|
|
596 |
37717
|
claudio.at
|
for (final String context : contextes) {
|
597 |
26600
|
sandro.lab
|
|
598 |
37717
|
claudio.at
|
String id = "";
|
599 |
|
|
for (final String token : Splitter.on("::").split(context)) {
|
600 |
|
|
id += token;
|
601 |
26600
|
sandro.lab
|
|
602 |
37717
|
claudio.at
|
final ContextDef def = contextMapper.get(id);
|
603 |
26600
|
sandro.lab
|
|
604 |
51221
|
claudio.at
|
if (def == null) {
|
605 |
|
|
continue;
|
606 |
|
|
// throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
|
607 |
|
|
}
|
608 |
26600
|
sandro.lab
|
|
609 |
|
|
if (def.getName().equals("context")) {
|
610 |
35771
|
claudio.at
|
final String xpath = "//context/@id='" + def.getId() + "'";
|
611 |
26600
|
sandro.lab
|
if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
|
612 |
|
|
document = addContextDef(document.gotoRoot(), def);
|
613 |
|
|
}
|
614 |
|
|
}
|
615 |
|
|
|
616 |
|
|
if (def.getName().equals("category")) {
|
617 |
35771
|
claudio.at
|
final String rootId = StringUtils.substringBefore(def.getId(), "::");
|
618 |
26600
|
sandro.lab
|
document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
|
619 |
|
|
}
|
620 |
|
|
|
621 |
|
|
if (def.getName().equals("concept")) {
|
622 |
|
|
document = addContextDef(document, def).gotoParent();
|
623 |
|
|
}
|
624 |
37717
|
claudio.at
|
id += "::";
|
625 |
26600
|
sandro.lab
|
}
|
626 |
|
|
}
|
627 |
|
|
|
628 |
35771
|
claudio.at
|
for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
|
629 |
26600
|
sandro.lab
|
try {
|
630 |
|
|
res.add(asStringElement(x));
|
631 |
35771
|
claudio.at
|
} catch (final TransformerException e) {
|
632 |
26600
|
sandro.lab
|
throw new RuntimeException(e);
|
633 |
|
|
}
|
634 |
|
|
}
|
635 |
|
|
}
|
636 |
|
|
|
637 |
|
|
return res;
|
638 |
|
|
}
|
639 |
|
|
|
640 |
|
|
private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
|
641 |
|
|
tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
|
642 |
35771
|
claudio.at
|
if ((def.getType() != null) && !def.getType().isEmpty()) {
|
643 |
26600
|
sandro.lab
|
tag.addAttribute("type", def.getType());
|
644 |
|
|
}
|
645 |
|
|
return tag;
|
646 |
|
|
}
|
647 |
|
|
|
648 |
|
|
private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
|
649 |
35771
|
claudio.at
|
final StringWriter buffer = new StringWriter();
|
650 |
26600
|
sandro.lab
|
transformer.transform(new DOMSource(element), new StreamResult(buffer));
|
651 |
|
|
return buffer.toString();
|
652 |
|
|
}
|
653 |
|
|
|
654 |
|
|
@SuppressWarnings("unchecked")
|
655 |
35179
|
michele.ar
|
private String getRelFundingTree(final String xmlTree) {
|
656 |
|
|
String funding = "<funding>";
|
657 |
|
|
try {
|
658 |
35771
|
claudio.at
|
final Document ftree = new SAXReader().read(new StringReader(xmlTree));
|
659 |
35179
|
michele.ar
|
funding = "<funding>";
|
660 |
|
|
// String _id = "";
|
661 |
26600
|
sandro.lab
|
|
662 |
37531
|
claudio.at
|
funding += getFunderElement(ftree);
|
663 |
|
|
|
664 |
35771
|
claudio.at
|
for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
|
665 |
|
|
final Element e = (Element) o;
|
666 |
|
|
final String _id = e.valueOf("./id");
|
667 |
37894
|
alessia.ba
|
funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
|
668 |
35179
|
michele.ar
|
// _id += "::";
|
669 |
26600
|
sandro.lab
|
}
|
670 |
35771
|
claudio.at
|
} catch (final DocumentException e) {
|
671 |
37531
|
claudio.at
|
throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
|
672 |
35179
|
michele.ar
|
} finally {
|
673 |
|
|
funding += "</funding>";
|
674 |
26600
|
sandro.lab
|
}
|
675 |
35179
|
michele.ar
|
return funding;
|
676 |
26600
|
sandro.lab
|
}
|
677 |
|
|
|
678 |
37531
|
claudio.at
|
private String getFunderElement(final Document ftree) {
|
679 |
|
|
final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
|
680 |
|
|
final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
|
681 |
|
|
final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
|
682 |
|
|
final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
|
683 |
|
|
|
684 |
|
|
return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
|
685 |
|
|
+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
|
686 |
|
|
}
|
687 |
|
|
|
688 |
35179
|
michele.ar
|
private void fillContextMap(final String xmlTree) {
|
689 |
26600
|
sandro.lab
|
|
690 |
35179
|
michele.ar
|
Document fundingPath;
|
691 |
26600
|
sandro.lab
|
try {
|
692 |
35179
|
michele.ar
|
fundingPath = new SAXReader().read(new StringReader(xmlTree));
|
693 |
35771
|
claudio.at
|
} catch (final DocumentException e) {
|
694 |
26600
|
sandro.lab
|
throw new RuntimeException(e);
|
695 |
|
|
}
|
696 |
37273
|
claudio.at
|
try {
|
697 |
|
|
final Node funder = fundingPath.selectSingleNode("//funder");
|
698 |
41468
|
claudio.at
|
|
699 |
|
|
if (funder != null) {
|
700 |
|
|
|
701 |
|
|
final String funderShortName = funder.valueOf("./shortname");
|
702 |
|
|
contextes.add(funderShortName);
|
703 |
|
|
|
704 |
|
|
contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
|
705 |
|
|
final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
|
706 |
|
|
if (level0 != null) {
|
707 |
|
|
final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
|
708 |
|
|
contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
|
709 |
|
|
final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
|
710 |
|
|
if (level1 == null) {
|
711 |
|
|
contextes.add(level0Id);
|
712 |
|
|
} else {
|
713 |
|
|
final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
|
714 |
|
|
contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
|
715 |
|
|
final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
|
716 |
|
|
if (level2 == null) {
|
717 |
|
|
contextes.add(level1Id);
|
718 |
|
|
} else {
|
719 |
|
|
final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
|
720 |
|
|
contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
|
721 |
|
|
contextes.add(level2Id);
|
722 |
|
|
}
|
723 |
|
|
}
|
724 |
37273
|
claudio.at
|
}
|
725 |
|
|
}
|
726 |
|
|
} catch (final NullPointerException e) {
|
727 |
|
|
throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
|
728 |
35179
|
michele.ar
|
}
|
729 |
26600
|
sandro.lab
|
}
|
730 |
|
|
|
731 |
28094
|
claudio.at
|
private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
|
732 |
26600
|
sandro.lab
|
StringBuilder sb = new StringBuilder();
|
733 |
|
|
sb.append("<");
|
734 |
|
|
sb.append(name);
|
735 |
|
|
if (q != null) {
|
736 |
|
|
sb.append(getAttributes(q));
|
737 |
|
|
}
|
738 |
28094
|
claudio.at
|
if (dataInfo != null) {
|
739 |
|
|
sb = dataInfoAsAttributes(sb, dataInfo);
|
740 |
|
|
}
|
741 |
35771
|
claudio.at
|
if ((value == null) || value.isEmpty()) {
|
742 |
26600
|
sandro.lab
|
sb.append("/>");
|
743 |
|
|
return sb.toString();
|
744 |
|
|
// return "<" + name + getAttributes(q) + "/>";
|
745 |
|
|
}
|
746 |
|
|
|
747 |
|
|
sb.append(">");
|
748 |
|
|
// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
|
749 |
|
|
sb.append(escapeXml(value));
|
750 |
|
|
sb.append("</");
|
751 |
|
|
sb.append(name);
|
752 |
|
|
sb.append(">");
|
753 |
|
|
|
754 |
|
|
return sb.toString();
|
755 |
|
|
// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
|
756 |
|
|
}
|
757 |
|
|
|
758 |
|
|
private String getAttributes(final Qualifier q) {
|
759 |
35771
|
claudio.at
|
if (q == null) return "";
|
760 |
26600
|
sandro.lab
|
|
761 |
35771
|
claudio.at
|
final StringBuilder sb = new StringBuilder();
|
762 |
|
|
for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
|
763 |
26600
|
sandro.lab
|
// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
|
764 |
|
|
sb.append(" ");
|
765 |
|
|
sb.append(e.getKey().getName());
|
766 |
|
|
sb.append("=\"");
|
767 |
|
|
sb.append(escapeXml(e.getValue().toString()));
|
768 |
|
|
sb.append("\"");
|
769 |
|
|
}
|
770 |
|
|
return sb.toString();
|
771 |
|
|
}
|
772 |
|
|
|
773 |
|
|
|
774 |
46587
|
alessia.ba
|
private Map<FieldDescriptor, Object> filterFields(final GeneratedMessage fields, final Predicate<String> acceptFilter) {
|
775 |
|
|
if(acceptFilter == null) return fields.getAllFields();
|
776 |
|
|
final Map<FieldDescriptor, Object> res = Maps.newHashMap();
|
777 |
|
|
for(Entry<FieldDescriptor, Object> e : fields.getAllFields().entrySet()) {
|
778 |
|
|
if (acceptFilter.apply(e.getKey().getName())) {
|
779 |
|
|
res.put(e.getKey(), e.getValue());
|
780 |
|
|
}
|
781 |
26600
|
sandro.lab
|
}
|
782 |
46587
|
alessia.ba
|
return res;
|
783 |
26600
|
sandro.lab
|
}
|
784 |
|
|
|
785 |
46587
|
alessia.ba
|
|
786 |
|
|
|
787 |
30968
|
claudio.at
|
private List<String> countersAsXml() {
|
788 |
35771
|
claudio.at
|
final List<String> out = Lists.newArrayList();
|
789 |
|
|
for (final Entry<String, Integer> e : counters.entrySet()) {
|
790 |
30968
|
claudio.at
|
out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
|
791 |
|
|
}
|
792 |
|
|
return out;
|
793 |
|
|
}
|
794 |
|
|
|
795 |
|
|
private void incrementCounter(final String type) {
|
796 |
|
|
if (!counters.containsKey(type)) {
|
797 |
|
|
counters.put(type, 1);
|
798 |
|
|
} else {
|
799 |
|
|
counters.put(type, counters.get(type) + 1);
|
800 |
|
|
}
|
801 |
|
|
}
|
802 |
|
|
|
803 |
26600
|
sandro.lab
|
@Override
|
804 |
|
|
public String toString() {
|
805 |
35771
|
claudio.at
|
final StringBuilder sb = new StringBuilder();
|
806 |
26600
|
sandro.lab
|
sb.append("################################################\n");
|
807 |
|
|
sb.append("ID: ").append(key).append("\n");
|
808 |
|
|
if (mainEntity != null) {
|
809 |
|
|
sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
|
810 |
|
|
}
|
811 |
|
|
if (relations != null) {
|
812 |
|
|
sb.append("\nRELATIONS:\n");
|
813 |
35771
|
claudio.at
|
for (final OafDecoder decoder : relations) {
|
814 |
26600
|
sandro.lab
|
sb.append(decoder.getOafRel().toString() + "\n");
|
815 |
|
|
}
|
816 |
|
|
}
|
817 |
|
|
if (children != null) {
|
818 |
|
|
sb.append("\nCHILDREN:\n");
|
819 |
35771
|
claudio.at
|
for (final OafDecoder decoder : children) {
|
820 |
26600
|
sandro.lab
|
sb.append(decoder.getOafRel().toString() + "\n");
|
821 |
|
|
}
|
822 |
|
|
}
|
823 |
|
|
return sb.toString();
|
824 |
|
|
}
|
825 |
|
|
|
826 |
|
|
}
|