Revision 56610
Added by Claudio Atzori over 5 years ago
modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/oaisets/OaiSetsIterator.java | ||
---|---|---|
97 | 97 |
try { |
98 | 98 |
doc = reader.read(new StringReader(xml)); |
99 | 99 |
} catch (DocumentException e) { |
100 |
log.warn("Error parsing xml, I try to clean it: " + xml, e);
|
|
100 |
log.warn("Error parsing xml, I try to clean it", e);
|
|
101 | 101 |
final String cleaned = XmlCleaner.cleanAllEntities(xml); |
102 | 102 |
try { |
103 | 103 |
doc = reader.read(new StringReader(cleaned)); |
104 | 104 |
} catch (DocumentException e1) { |
105 |
throw new CollectorServiceException("Error parsing cleaned document:" + cleaned, e1);
|
|
105 |
throw new CollectorServiceException("Error parsing cleaned document", e1);
|
|
106 | 106 |
} |
107 | 107 |
} |
108 | 108 |
|
... | ... | |
126 | 126 |
} |
127 | 127 |
if (sawAllSets) { |
128 | 128 |
log.warn("URL " + baseUrl + " keeps returning the same OAI sets. Please contact the repo admin."); |
129 |
System.out.println("URL " + baseUrl + " keeps returning the same OAI sets. Please contact the repo admin."); |
|
130 | 129 |
return null; |
131 | 130 |
} else return doc.valueOf("//*[local-name()='resumptionToken']"); |
132 | 131 |
} |
modules/dnet-modular-collector-service/trunk/src/main/java/eu/dnetlib/data/collector/plugins/oai/OaiIterator.java | ||
---|---|---|
134 | 134 |
try { |
135 | 135 |
doc = reader.read(new StringReader(xml)); |
136 | 136 |
} catch (DocumentException e) { |
137 |
log.warn("Error parsing xml, I try to clean it: " + xml, e);
|
|
137 |
log.warn("Error parsing xml, I try to clean it", e);
|
|
138 | 138 |
final String cleaned = XmlCleaner.cleanAllEntities(xml); |
139 | 139 |
try { |
140 | 140 |
doc = reader.read(new StringReader(cleaned)); |
141 | 141 |
} catch (DocumentException e1) { |
142 | 142 |
final String resumptionToken = extractResumptionToken(xml); |
143 | 143 |
if (resumptionToken == null) |
144 |
throw new CollectorServiceException("Error parsing cleaned document:" + cleaned, e1);
|
|
144 |
throw new CollectorServiceException("Error parsing cleaned document", e1);
|
|
145 | 145 |
return resumptionToken; |
146 | 146 |
} |
147 | 147 |
} |
Also available in: Unified diff
do not log or propagate erroneous and potentially huge xml records. They might clog the information system