1
|
//package eu.dnetlib.data.collector.plugins.schemaorg;
|
2
|
//
|
3
|
//import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapFileIterator;
|
4
|
//import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapIndexIterator;
|
5
|
//import org.apache.commons.logging.Log;
|
6
|
//import org.apache.commons.logging.LogFactory;
|
7
|
//
|
8
|
//import java.net.URL;
|
9
|
//import java.util.Iterator;
|
10
|
//import java.util.concurrent.ArrayBlockingQueue;
|
11
|
//import java.util.concurrent.ExecutorService;
|
12
|
//import java.util.concurrent.Executors;
|
13
|
//
|
14
|
//public class SchemaOrgIterableOLD implements Iterable<String> {
|
15
|
// private static final Log log = LogFactory.getLog(SchemaOrgIterable.class);
|
16
|
//
|
17
|
// public static class Options {
|
18
|
// private SchemaOrgIterator.Options schemaOrgIteratorOptions;
|
19
|
// private SitemapIndexIterator.Options sitemapIndexIteratorOptions;
|
20
|
// private SitemapFileIterator.Options sitemapFileIteratorOptions;
|
21
|
// private EndpointAccessIterator.Options endpointAccessIteratorOptions;
|
22
|
// private DatasetMappingIterator.Options datasetMappingIteratorOptions;
|
23
|
//
|
24
|
// private int queueSize;
|
25
|
//
|
26
|
// public DatasetMappingIterator.Options getDatasetMappingIteratorOptions() {
|
27
|
// return datasetMappingIteratorOptions;
|
28
|
// }
|
29
|
//
|
30
|
// public void setDatasetMappingIteratorOptions(DatasetMappingIterator.Options datasetMappingIteratorOptions) {
|
31
|
// this.datasetMappingIteratorOptions = datasetMappingIteratorOptions;
|
32
|
// }
|
33
|
//
|
34
|
// public EndpointAccessIterator.Options getEndpointAccessIteratorOptions() {
|
35
|
// return endpointAccessIteratorOptions;
|
36
|
// }
|
37
|
//
|
38
|
// public void setEndpointAccessIteratorOptions(EndpointAccessIterator.Options endpointAccessIteratorOptions) {
|
39
|
// this.endpointAccessIteratorOptions = endpointAccessIteratorOptions;
|
40
|
// }
|
41
|
//
|
42
|
// public SitemapFileIterator.Options getSitemapFileIteratorOptions() {
|
43
|
// return sitemapFileIteratorOptions;
|
44
|
// }
|
45
|
//
|
46
|
// public void setSitemapFileIteratorOptions(SitemapFileIterator.Options sitemapFileIteratorOptions) {
|
47
|
// this.sitemapFileIteratorOptions = sitemapFileIteratorOptions;
|
48
|
// }
|
49
|
//
|
50
|
// public SitemapIndexIterator.Options getSitemapIndexIteratorOptions() {
|
51
|
// return sitemapIndexIteratorOptions;
|
52
|
// }
|
53
|
//
|
54
|
// public void setSitemapIndexIteratorOptions(SitemapIndexIterator.Options sitemapIndexIteratorOptions) {
|
55
|
// this.sitemapIndexIteratorOptions = sitemapIndexIteratorOptions;
|
56
|
// }
|
57
|
//
|
58
|
// public SchemaOrgIterator.Options getSchemaOrgIteratorOptions() {
|
59
|
// return schemaOrgIteratorOptions;
|
60
|
// }
|
61
|
//
|
62
|
// public void setSchemaOrgIteratorOptions(SchemaOrgIterator.Options schemaOrgIteratorOptions) {
|
63
|
// this.schemaOrgIteratorOptions = schemaOrgIteratorOptions;
|
64
|
// }
|
65
|
//
|
66
|
// public int getQueueSize() {
|
67
|
// return queueSize;
|
68
|
// }
|
69
|
//
|
70
|
// public void setQueueSize(int queueSize) {
|
71
|
// this.queueSize = queueSize;
|
72
|
// }
|
73
|
// }
|
74
|
//
|
75
|
// private Options options;
|
76
|
// private ArrayBlockingQueue<String> queue;
|
77
|
//
|
78
|
// public SchemaOrgIterable(Options options) {
|
79
|
// this.options = options;
|
80
|
// this.queue = new ArrayBlockingQueue<>(this.options.getQueueSize(), true);
|
81
|
// }
|
82
|
//
|
83
|
// public void bootstrap() {
|
84
|
// ExecutorService executor = Executors.newSingleThreadExecutor();
|
85
|
// executor.execute(new Harvester());
|
86
|
// executor.shutdown();
|
87
|
// }
|
88
|
//
|
89
|
// @Override
|
90
|
// public Iterator<String> iterator() {
|
91
|
// return new SchemaOrgIterator(this.options.getSchemaOrgIteratorOptions(), this.queue);
|
92
|
// }
|
93
|
//
|
94
|
// private class Harvester implements Runnable{
|
95
|
//
|
96
|
// @Override
|
97
|
// public void run() {
|
98
|
// this.execute();
|
99
|
// }
|
100
|
//
|
101
|
// private void execute(){
|
102
|
// try {
|
103
|
// SitemapIndexIterator sitemapIndexIterator = new SitemapIndexIterator(options.getSitemapIndexIteratorOptions());
|
104
|
// sitemapIndexIterator.bootstrap();
|
105
|
//
|
106
|
// while (sitemapIndexIterator.hasNext()) {
|
107
|
// String sitemapFile = sitemapIndexIterator.next();
|
108
|
// if(sitemapFile == null) continue;
|
109
|
//
|
110
|
// SitemapFileIterator.Options sitemapFileIteratorOptions = (SitemapFileIterator.Options)options.getSitemapFileIteratorOptions().clone();
|
111
|
// sitemapFileIteratorOptions.setFileUrl(new URL(sitemapFile));
|
112
|
// SitemapFileIterator sitemapFileIterator = new SitemapFileIterator(sitemapFileIteratorOptions);
|
113
|
// sitemapFileIterator.bootstrap();
|
114
|
//
|
115
|
// EndpointAccessIterator endpointAccessIterator = new EndpointAccessIterator(options.getEndpointAccessIteratorOptions(), sitemapFileIterator);
|
116
|
// DatasetMappingIterator datasetMappingIterator = new DatasetMappingIterator(options.getDatasetMappingIteratorOptions(), endpointAccessIterator);
|
117
|
//
|
118
|
// while (datasetMappingIterator.hasNext()) {
|
119
|
// String xml = datasetMappingIterator.next();
|
120
|
// if(xml == null) continue;
|
121
|
//
|
122
|
// queue.put(xml);
|
123
|
// }
|
124
|
// }
|
125
|
// }catch(Exception ex){
|
126
|
// log.error("problem execution harvesting", ex);
|
127
|
// }
|
128
|
// finally {
|
129
|
// try {
|
130
|
// queue.put(Conventions.TerminateHint);
|
131
|
// } catch (Exception ex) {
|
132
|
// log.fatal("could not add termination hint. the process will not terminate gracefully", ex);
|
133
|
// }
|
134
|
// }
|
135
|
// }
|
136
|
// }
|
137
|
//}
|