Revision 53685
Added by Giorgos Papanikos over 5 years ago
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/Utils.java | ||
---|---|---|
164 | 164 |
return null; |
165 | 165 |
} |
166 | 166 |
|
167 |
public static Boolean validateXml(String xml){ |
|
168 |
try { |
|
169 |
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
|
170 |
DocumentBuilder builder = factory.newDocumentBuilder(); |
|
171 |
InputSource is = new InputSource(new StringReader(xml)); |
|
172 |
builder.parse(is); |
|
173 |
return true; |
|
174 |
}catch(Exception ex){ |
|
175 |
return false; |
|
176 |
} |
|
177 |
} |
|
178 |
|
|
167 | 179 |
public static void writeFiles(final Iterable<String> iterable, final String outDir) throws DocumentException, IOException { |
168 | 180 |
|
169 | 181 |
int skipped = 0; |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/httpapi/kaggle/KaggleRepositoryIterable.java | ||
---|---|---|
117 | 117 |
public void bootstrap() { |
118 | 118 |
this.queue = new ArrayBlockingQueue<>(this.options.getQueueSize()); |
119 | 119 |
|
120 |
ExecutorService executor = Executors.newSingleThreadExecutor(); |
|
121 |
executor.execute(new Harvester()); |
|
122 |
executor.shutdown(); |
|
120 |
Thread ft = new Thread(new Harvester() ); |
|
121 |
ft.start(); |
|
122 |
// ExecutorService executor = Executors.newSingleThreadExecutor(); |
|
123 |
// executor.execute(new Harvester()); |
|
124 |
// executor.shutdown(); |
|
123 | 125 |
} |
124 | 126 |
|
125 | 127 |
@Override |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/sitemapindex/SitemapIndexRepositoryIterable.java | ||
---|---|---|
64 | 64 |
public void bootstrap() { |
65 | 65 |
this.queue = new ArrayBlockingQueue<>(this.options.getQueueSize()); |
66 | 66 |
|
67 |
ExecutorService executor = Executors.newSingleThreadExecutor(); |
|
68 |
executor.execute(new Harvester()); |
|
69 |
executor.shutdown(); |
|
67 |
Thread ft = new Thread(new Harvester() ); |
|
68 |
ft.start(); |
|
69 |
// ExecutorService executor = Executors.newSingleThreadExecutor(); |
|
70 |
// executor.execute(new Harvester()); |
|
71 |
// executor.shutdown(); |
|
70 | 72 |
} |
71 | 73 |
|
72 | 74 |
@Override |
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/schemaorg/DatasetMappingIterator.java | ||
---|---|---|
111 | 111 |
else { |
112 | 112 |
log.debug("building document"); |
113 | 113 |
xml = this.buildDataset(document); |
114 |
if (xml == null){ |
|
114 |
if (!Utils.validateXml(xml)) { |
|
115 |
log.debug("xml not valid. setting to empty"); |
|
116 |
xml = null; |
|
117 |
} |
|
118 |
if (xml == null) { |
|
115 | 119 |
log.debug("could not build xml. returning empty"); |
116 | 120 |
xml = DatasetDocument.emptyXml(); |
117 | 121 |
} |
Also available in: Unified diff
Added safeguard parsing of produced xml to catch badly escaped illegal characters (eg etf: \001 escaped as ). Changed harvesting thread execution from ThreadExecutor to Thread.Start