Revision 62135
Added by Michele Artini over 2 years ago
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/consistency/FindInvalidRepoProfilesJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.consistency; |
|
2 |
|
|
3 |
import java.util.HashMap; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.concurrent.atomic.AtomicInteger; |
|
7 |
import java.util.stream.Collectors; |
|
8 |
|
|
9 |
import org.apache.commons.lang.StringUtils; |
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
import org.springframework.beans.factory.annotation.Autowired; |
|
13 |
|
|
14 |
import com.googlecode.sarasvati.Arc; |
|
15 |
import com.googlecode.sarasvati.NodeToken; |
|
16 |
|
|
17 |
import eu.dnetlib.enabling.datasources.DatasourceManagerClients; |
|
18 |
import eu.dnetlib.enabling.datasources.LocalOpenaireDatasourceManager; |
|
19 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
20 |
import eu.dnetlib.enabling.is.registry.rmi.ISRegistryService; |
|
21 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
22 |
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode; |
|
23 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
24 |
import eu.dnetlib.msro.workflows.util.ProgressProvider; |
|
25 |
import eu.dnetlib.msro.workflows.util.WorkflowsConstants; |
|
26 |
|
|
27 |
public class FindInvalidRepoProfilesJobNode extends SimpleJobNode implements ProgressJobNode { |
|
28 |
|
|
29 |
@Autowired |
|
30 |
private LocalOpenaireDatasourceManager dsManager; |
|
31 |
|
|
32 |
@Autowired |
|
33 |
private DatasourceManagerClients dsManagerClient; |
|
34 |
|
|
35 |
@Autowired |
|
36 |
private UniqueServiceLocator serviceLocator; |
|
37 |
|
|
38 |
private int current = 0; |
|
39 |
private int total = 0; |
|
40 |
|
|
41 |
private boolean deleteInvalidProfiles = false; |
|
42 |
private boolean registerMissingProfiles = false; |
|
43 |
|
|
44 |
private static final Log log = LogFactory.getLog(FindInvalidRepoProfilesJobNode.class); |
|
45 |
|
|
46 |
@Override |
|
47 |
protected String execute(final NodeToken token) throws Exception { |
|
48 |
|
|
49 |
final Map<String, AtomicInteger> validIds = dsManagerClient.searchSQL("SELECT id FROM dsm_datasources", new HashMap<>()) |
|
50 |
.stream() |
|
51 |
.map(m -> (String) m.get("id")) |
|
52 |
.collect(Collectors.toMap(s -> s, s -> new AtomicInteger(0))); |
|
53 |
|
|
54 |
final List<String> list = serviceLocator.getService(ISLookUpService.class).quickSearchProfile( |
|
55 |
"for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') return concat($x//DATASOURCE_ORIGINAL_ID, ' @@@ ', $x//RESOURCE_IDENTIFIER/@value)"); |
|
56 |
|
|
57 |
this.current = 0; |
|
58 |
this.total = list.size() + validIds.size(); |
|
59 |
int errorInvalidProfiles = 0; |
|
60 |
int errorUnregisteredProfiles = 0; |
|
61 |
int errorTooManyProfiles = 0; |
|
62 |
|
|
63 |
for (final String s : list) { |
|
64 |
current++; |
|
65 |
final String oid = StringUtils.substringBefore(s, "@@@").trim(); |
|
66 |
final String profId = StringUtils.substringAfter(s, "@@@").trim(); |
|
67 |
log.info("Evaluating ds: " + oid); |
|
68 |
if (validIds.containsKey(oid)) { |
|
69 |
validIds.get(oid).incrementAndGet(); |
|
70 |
} else { |
|
71 |
log.warn("Invalid profile " + profId + ", openaireId " + oid + " not registered properly"); |
|
72 |
errorInvalidProfiles++; |
|
73 |
// token.getEnv().setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "invalidProfile:" + oid, profId); |
|
74 |
if (deleteInvalidProfiles) { |
|
75 |
log.warn(" - Deleting profile " + profId); |
|
76 |
serviceLocator.getService(ISRegistryService.class).deleteProfile(profId); |
|
77 |
} |
|
78 |
} |
|
79 |
} |
|
80 |
|
|
81 |
for (final Map.Entry<String, AtomicInteger> e : validIds.entrySet()) { |
|
82 |
current++; |
|
83 |
final String dsId = e.getKey(); |
|
84 |
final int n = e.getValue().get(); |
|
85 |
if (n == 0) { |
|
86 |
errorUnregisteredProfiles++; |
|
87 |
// token.getEnv().setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "unregisterdProfile:" + dsId, dsId); |
|
88 |
log.warn("Missing profile for ds " + dsId); |
|
89 |
if (registerMissingProfiles) { |
|
90 |
dsManager.setManaged(dsId, dsManager.isManaged(dsId)); // This command should regenerate the repo profile |
|
91 |
} |
|
92 |
} else if (n > 1) { |
|
93 |
errorTooManyProfiles++; |
|
94 |
log.error("Too many profiles registerd " + n + " for ds " + dsId); |
|
95 |
// token.getEnv().setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "tooManyProfiles:" + dsId, n); |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
token.getEnv().setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "errorInvalidProfiles", errorInvalidProfiles); |
|
100 |
token.getEnv().setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "errorUnregisteredProfiles", errorUnregisteredProfiles); |
|
101 |
token.getEnv().setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "errorTooManyProfiles", errorTooManyProfiles); |
|
102 |
|
|
103 |
return Arc.DEFAULT_ARC; |
|
104 |
} |
|
105 |
|
|
106 |
@Override |
|
107 |
public ProgressProvider getProgressProvider() { |
|
108 |
return new ProgressProvider() { |
|
109 |
|
|
110 |
@Override |
|
111 |
public int getTotalValue() { |
|
112 |
return total; |
|
113 |
} |
|
114 |
|
|
115 |
@Override |
|
116 |
public int getCurrentValue() { |
|
117 |
return current; |
|
118 |
} |
|
119 |
|
|
120 |
@Override |
|
121 |
public boolean isInaccurate() { |
|
122 |
return false; |
|
123 |
} |
|
124 |
}; |
|
125 |
} |
|
126 |
|
|
127 |
public boolean isDeleteInvalidProfiles() { |
|
128 |
return deleteInvalidProfiles; |
|
129 |
} |
|
130 |
|
|
131 |
public void setDeleteInvalidProfiles(final boolean deleteInvalidProfiles) { |
|
132 |
this.deleteInvalidProfiles = deleteInvalidProfiles; |
|
133 |
} |
|
134 |
|
|
135 |
public boolean isRegisterMissingProfiles() { |
|
136 |
return registerMissingProfiles; |
|
137 |
} |
|
138 |
|
|
139 |
public void setRegisterMissingProfiles(final boolean registerMissingProfiles) { |
|
140 |
this.registerMissingProfiles = registerMissingProfiles; |
|
141 |
} |
|
142 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/patch-db/patch.sql | ||
---|---|---|
1 |
ALTER TABLE datasources |
|
2 |
ADD COLUMN issn CHARACTER VARYING(20); |
|
3 |
ALTER TABLE datasources |
|
4 |
ADD COLUMN eissn CHARACTER VARYING(20); |
|
5 |
ALTER TABLE datasources |
|
6 |
ADD COLUMN lissn CHARACTER VARYING(20); |
|
7 |
ALTER TABLE datasources |
|
8 |
ALTER COLUMN databaseaccesstype TYPE VARCHAR(32); |
|
9 |
ALTER TABLE datasources |
|
10 |
ALTER COLUMN datauploadtype TYPE VARCHAR(32); |
|
11 |
ALTER TABLE datasources |
|
12 |
ALTER COLUMN databaseaccessrestriction TYPE VARCHAR(32); |
|
13 |
ALTER TABLE datasources |
|
14 |
ALTER COLUMN datauploadrestriction TYPE VARCHAR(32); |
|
15 |
ALTER TABLE datasources |
|
16 |
ALTER COLUMN qualitymanagementkind TYPE VARCHAR(32); |
|
17 |
|
|
18 |
ALTER TABLE datasources |
|
19 |
ADD COLUMN registeredby CHARACTER VARYING(255); |
|
20 |
|
|
21 |
DELETE FROM apicollections |
|
22 |
WHERE api NOT LIKE 'api_________::%'; |
|
23 |
DELETE FROM api |
|
24 |
WHERE id NOT LIKE 'api_________::%'; |
|
25 |
|
|
26 |
ALTER TABLE apicollections |
|
27 |
DROP COLUMN name; |
|
28 |
ALTER TABLE apicollections |
|
29 |
RENAME COLUMN accessparam TO param; |
|
30 |
ALTER TABLE apicollections |
|
31 |
RENAME COLUMN accessvalue TO original; |
|
32 |
ALTER TABLE apicollections |
|
33 |
ADD COLUMN edited VARCHAR(255) DEFAULT NULL; |
|
34 |
ALTER TABLE apicollections |
|
35 |
ADD COLUMN accessparam BOOLEAN DEFAULT TRUE; |
|
36 |
ALTER TABLE apicollections |
|
37 |
DROP CONSTRAINT apicollection_pkey; |
|
38 |
ALTER TABLE apicollections |
|
39 |
ALTER COLUMN original SET DEFAULT ''; |
|
40 |
|
|
41 |
UPDATE apicollections |
|
42 |
SET param = 'set' |
|
43 |
WHERE param = 'SET'; |
|
44 |
UPDATE apicollections |
|
45 |
SET param = 'format' |
|
46 |
WHERE param = 'FORMAT'; |
|
47 |
UPDATE apicollections |
|
48 |
SET param = 'baseUrl' |
|
49 |
WHERE param = 'BASEURL'; |
|
50 |
|
|
51 |
-- FIX A PROBLEM WITH MULTIPLE SET |
|
52 |
DELETE FROM apicollections |
|
53 |
WHERE api = 'api_________::opendoar____::1560::1' AND param = 'set'; |
|
54 |
INSERT INTO apicollections (param, original, api) VALUES |
|
55 |
('set', 'hdl_10251_3829, hdl_10251_321, hdl_10251_11256, hdl_10251_11206, hdl_10251_8702, hdl_10251_3026, hdl_10251_11076', |
|
56 |
'api_________::opendoar____::1560::1'); |
|
57 |
DELETE FROM apicollections |
|
58 |
WHERE api = 'api_________::driver______::d0b68287-678d-4efc-b354-c47cbacaed3c::0' AND param = 'set'; |
|
59 |
INSERT INTO apicollections (param, original, api) VALUES |
|
60 |
('set', 'pub-type:article, pub-type:conf-proceeding, pub-type:lecture, pub-type:paper, pub-type:report', |
|
61 |
'api_________::driver______::d0b68287-678d-4efc-b354-c47cbacaed3c::0'); |
|
62 |
DELETE FROM apicollections |
|
63 |
WHERE api = 'api_________::driver______::79476713-c428-4f23-bf97-8f27500f754a::0' AND param = 'set'; |
|
64 |
INSERT INTO apicollections (param, original, api) |
|
65 |
VALUES ('set', 'dad:SIQG, jhs,sal:CNF+PA', 'api_________::driver______::79476713-c428-4f23-bf97-8f27500f754a::0'); |
|
66 |
DELETE FROM apicollections |
|
67 |
WHERE api = 'api_________::opendoar____::202::0' AND param = 'set'; |
|
68 |
INSERT INTO apicollections (param, original, api) VALUES ('set', 'hdl_1887_4540, hdl_1887_4951', 'api_________::opendoar____::202::0'); |
|
69 |
DELETE FROM apicollections |
|
70 |
WHERE api = 'api_________::driver______::c75b9f23-25a8-4e45-aadd-ba449211e2a8::0' AND param = 'set'; |
|
71 |
INSERT INTO apicollections (param, original, api) VALUES ('set', 'ijdc:ART, ijdc:PAP', 'api_________::driver______::c75b9f23-25a8-4e45-aadd-ba449211e2a8::0'); |
|
72 |
-- AND FIX |
|
73 |
|
|
74 |
ALTER TABLE apicollections |
|
75 |
ADD CONSTRAINT apicollection_pkey PRIMARY KEY (api, param); |
|
76 |
|
|
77 |
INSERT INTO apicollections (param, original, edited, api) SELECT |
|
78 |
'baseUrl', |
|
79 |
COALESCE(original, ''), |
|
80 |
COALESCE(edited, ''), |
|
81 |
id |
|
82 |
FROM api |
|
83 |
WHERE originalprotocolclass = editedprotocolclass; |
|
84 |
INSERT INTO apicollections (param, original, api) SELECT |
|
85 |
'baseUrl', |
|
86 |
COALESCE(original, ''), |
|
87 |
id |
|
88 |
FROM api |
|
89 |
WHERE originalprotocolclass != editedprotocolclass OR editedprotocolclass IS NULL; |
|
90 |
UPDATE apicollections |
|
91 |
SET edited = NULL |
|
92 |
WHERE edited = ''; |
|
93 |
|
|
94 |
UPDATE datasources |
|
95 |
SET datasourceclass = 'websource' |
|
96 |
WHERE id = 'openaire____::webcrawl'; |
|
97 |
|
|
98 |
INSERT INTO apicollections (param, original, api) VALUES ('splitOnElement', 'repository', 'api_________::opendoar::0'); |
|
99 |
INSERT INTO apicollections (param, original, api) VALUES ('splitOnElement', 'ROW', 'api_________::re3data::0'); |
|
100 |
INSERT INTO apicollections (param, original, api) VALUES ('splitOnElement', 'Record', 'api_________::wellcometrust::0'); |
|
101 |
INSERT INTO apicollections (param, original, api) VALUES ('splitOnElement', 'ROW', 'api_________::corda::0'); |
|
102 |
|
|
103 |
UPDATE apicollections |
|
104 |
SET _dnet_resource_identifier_ = api || '@@' || param; |
|
105 |
|
|
106 |
INSERT INTO scheme (_dnet_resource_identifier_, code, name) |
|
107 |
VALUES ('dnet:content_description_typologies', 'dnet:content_description_typologies', 'D-Net Content Description Typologies'); |
|
108 |
|
|
109 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('UNKNOWN', 'UNKNOWN', 'UNKNOWN'); |
|
110 |
|
|
111 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('native', 'native', 'native'); |
|
112 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) VALUES ('native@@dnet:compatibilityLevel', 'native', 'dnet:compatibilityLevel'); |
|
113 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('files', 'files', 'files'); |
|
114 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) VALUES ('files@@dnet:compatibilityLevel', 'files', 'dnet:compatibilityLevel'); |
|
115 |
|
|
116 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('metadata', 'metadata', 'metadata'); |
|
117 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('file::WoS', 'file::WoS', 'file::WoS'); |
|
118 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('file::PDF', 'file::PDF', 'file::PDF'); |
|
119 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('file::EuropePMC', 'file::EuropePMC', 'file::EuropePMC'); |
|
120 |
|
|
121 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) |
|
122 |
VALUES ('metadata@@dnet:content_description_typologies', 'metadata', 'dnet:content_description_typologies'); |
|
123 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) |
|
124 |
VALUES ('file::WoS@@dnet:content_description_typologies', 'file::WoS', 'dnet:content_description_typologies'); |
|
125 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) |
|
126 |
VALUES ('file::PDF@@dnet:content_description_typologies', 'file::PDF', 'dnet:content_description_typologies'); |
|
127 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) |
|
128 |
VALUES ('file::EuropePMC@@dnet:content_description_typologies', 'file::EuropePMC', 'dnet:content_description_typologies'); |
|
129 |
|
|
130 |
ALTER TABLE api |
|
131 |
ADD COLUMN contentdescriptionclass VARCHAR(255) DEFAULT 'metadata'; |
|
132 |
ALTER TABLE api |
|
133 |
ADD COLUMN contentdescriptionscheme VARCHAR(255) DEFAULT 'dnet:content_description_typologies'; |
|
134 |
ALTER TABLE api |
|
135 |
DROP COLUMN editedprotocolclass; |
|
136 |
ALTER TABLE api |
|
137 |
DROP COLUMN editedprotocolscheme; |
|
138 |
ALTER TABLE api |
|
139 |
DROP COLUMN original; |
|
140 |
ALTER TABLE api |
|
141 |
DROP COLUMN edited; |
|
142 |
ALTER TABLE api |
|
143 |
DROP COLUMN useedited; |
|
144 |
ALTER TABLE api |
|
145 |
RENAME COLUMN originalprotocolclass TO protocolclass; |
|
146 |
ALTER TABLE api |
|
147 |
RENAME COLUMN originalprotocolscheme TO protocolscheme; |
|
148 |
ALTER TABLE api |
|
149 |
ADD COLUMN active BOOLEAN DEFAULT FALSE; |
|
150 |
ALTER TABLE api |
|
151 |
ADD COLUMN removable BOOLEAN DEFAULT FALSE; |
|
152 |
ALTER TABLE api |
|
153 |
ADD COLUMN typologyclass VARCHAR(255) DEFAULT 'UNKNOWN'; |
|
154 |
ALTER TABLE api |
|
155 |
ADD COLUMN typologyscheme VARCHAR(255) DEFAULT 'dnet:datasource_typologies'; |
|
156 |
ALTER TABLE api |
|
157 |
ADD CONSTRAINT api_typologyclass_fkey FOREIGN KEY (typologyclass) REFERENCES class (code); |
|
158 |
ALTER TABLE api |
|
159 |
ADD CONSTRAINT api_typologyscheme_fkey FOREIGN KEY (typologyscheme) REFERENCES scheme (code); |
|
160 |
ALTER TABLE api |
|
161 |
ADD COLUMN compatibilityclass VARCHAR(255) DEFAULT 'UNKNOWN'; |
|
162 |
ALTER TABLE api |
|
163 |
ADD COLUMN compatibilityscheme VARCHAR(255) DEFAULT 'dnet:compatibilityLevel'; |
|
164 |
ALTER TABLE api |
|
165 |
ADD CONSTRAINT api_compatibilityclass_fkey FOREIGN KEY (compatibilityclass) REFERENCES class (code); |
|
166 |
ALTER TABLE api |
|
167 |
ADD CONSTRAINT api_compatibilityscheme_fkey FOREIGN KEY (compatibilityscheme) REFERENCES scheme (code); |
|
168 |
|
|
169 |
ALTER TABLE api |
|
170 |
ADD CONSTRAINT api_contentdescriptionclass_fkey FOREIGN KEY (contentdescriptionclass) REFERENCES class (code); |
|
171 |
ALTER TABLE api |
|
172 |
ADD CONSTRAINT api_contentdescriptionscheme_fkey FOREIGN KEY (contentdescriptionscheme) REFERENCES scheme (code); |
|
173 |
|
|
174 |
UPDATE api |
|
175 |
SET typologyclass = d.datasourceclass FROM datasources d |
|
176 |
WHERE datasource = d.id; |
|
177 |
--UPDATE api SET compatibilityclass = 'driver' where typology = 'driver'; |
|
178 |
--UPDATE api SET compatibilityclass = 'openaire2.0' FROM apicollections ac WHERE typology = 'openaire' AND id = ac.api AND ac.param = 'set' AND ac.original like '%ec_fundedresources%'; |
|
179 |
--UPDATE api SET compatibilityclass = 'openaire3.0' FROM apicollections ac WHERE typology = 'openaire' AND id = ac.api AND ac.param = 'set' AND ac.original like '%openaire%'; |
|
180 |
|
|
181 |
-- Jochen version |
|
182 |
UPDATE api |
|
183 |
SET compatibilityclass = ds.openairecompatibilityclass FROM datasources ds |
|
184 |
WHERE ds.id = api.datasource; |
|
185 |
UPDATE api |
|
186 |
SET compatibilityclass = 'driver' |
|
187 |
WHERE compatibilityclass = 'driver-openaire2.0' AND typology = 'driver'; |
|
188 |
UPDATE api |
|
189 |
SET compatibilityclass = 'openaire2.0' |
|
190 |
WHERE compatibilityclass = 'driver-openaire2.0' AND typology = 'openaire'; |
|
191 |
UPDATE api |
|
192 |
SET compatibilityclass = 'UNKNOWN' |
|
193 |
WHERE compatibilityclass = 'openaire2.0-openaire3.0' |
|
194 |
-- END Jochen |
|
195 |
|
|
196 |
UPDATE api |
|
197 |
SET (typologyclass, compatibilityclass) = ('entityregistry', 'native') |
|
198 |
WHERE id = 'api_________::opendoar::0'; |
|
199 |
UPDATE api |
|
200 |
SET (typologyclass, compatibilityclass) = ('entityregistry', 'native') |
|
201 |
WHERE id = 'api_________::re3data::0'; |
|
202 |
UPDATE api |
|
203 |
SET (typologyclass, compatibilityclass) = ('entityregistry', 'native') |
|
204 |
WHERE id = 'api_________::wellcometrust::0'; |
|
205 |
UPDATE api |
|
206 |
SET (typologyclass, compatibilityclass) = ('entityregistry', 'native') |
|
207 |
WHERE id = 'api_________::corda::0'; |
|
208 |
|
|
209 |
ALTER TABLE api |
|
210 |
DROP COLUMN typology; |
|
211 |
ALTER TABLE datasources |
|
212 |
DROP COLUMN openairecompatibilityclass; |
|
213 |
ALTER TABLE datasources |
|
214 |
DROP COLUMN openairecompatibilityscheme; |
|
215 |
|
|
216 |
DELETE FROM class_scheme |
|
217 |
WHERE scheme = 'dnet:compatibilityLevel' AND class = 'driver-openaire2.0'; |
|
218 |
DELETE FROM class_scheme |
|
219 |
WHERE scheme = 'dnet:compatibilityLevel' AND class = 'driver-openaire2.0-openaire3.0'; |
|
220 |
DELETE FROM class_scheme |
|
221 |
WHERE scheme = 'dnet:compatibilityLevel' AND class = 'driver-openaire3.0'; |
|
222 |
DELETE FROM class_scheme |
|
223 |
WHERE scheme = 'dnet:compatibilityLevel' AND class = 'openaire2.0-openaire3.0'; |
|
224 |
DELETE FROM class |
|
225 |
WHERE code = 'driver-openaire2.0'; |
|
226 |
DELETE FROM class |
|
227 |
WHERE code = 'driver-openaire2.0-openaire3.0'; |
|
228 |
DELETE FROM class |
|
229 |
WHERE code = 'driver-openaire3.0'; |
|
230 |
DELETE FROM class |
|
231 |
WHERE code = 'openaire2.0-openaire3.0'; |
|
232 |
|
|
233 |
DELETE FROM apicollections |
|
234 |
WHERE api = 'api_________::openaire____::webcrawl::0'; |
|
235 |
DELETE FROM api |
|
236 |
WHERE id = 'api_________::openaire____::webcrawl::0'; |
|
237 |
|
|
238 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('fileCSV', 'fileCSV', 'fileCSV'); |
|
239 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) VALUES ('fileCSV@@dnet:protocols', 'fileCSV', 'dnet:protocols'); |
|
240 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('files_from_mdstore', 'files_from_mdstore', 'files_from_mdstore'); |
|
241 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) VALUES ('files_from_mdstore@@dnet:protocols', 'files_from_mdstore', 'dnet:protocols'); |
|
242 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('files_from_metadata', 'files_from_metadata', 'files_from_metadata'); |
|
243 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) VALUES ('files_from_metadata@@dnet:protocols', 'files_from_metadata', 'dnet:protocols'); |
|
244 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('files_by_rpc', 'files_by_rpc', 'files_by_rpc'); |
|
245 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) VALUES ('files_by_rpc@@dnet:protocols', 'files_by_rpc', 'dnet:protocols'); |
|
246 |
INSERT INTO class (_dnet_resource_identifier_, code, name) VALUES ('filesystem', 'filesystem', 'filesystem'); |
|
247 |
INSERT INTO class_scheme (_dnet_resource_identifier_, class, scheme) VALUES ('filesystem@@dnet:protocols', 'filesystem', 'dnet:protocols'); |
|
248 |
|
|
249 |
INSERT INTO api (_dnet_resource_identifier_, id, protocolclass, datasource, typologyclass, compatibilityclass) |
|
250 |
VALUES ('api_________::openaire____::webcrawl::csv', 'api_________::openaire____::webcrawl::csv', 'fileCSV', 'openaire____::webcrawl', 'websource', 'native'); |
|
251 |
INSERT INTO api (_dnet_resource_identifier_, id, protocolclass, datasource, typologyclass, compatibilityclass, contentdescriptionclass) VALUES |
|
252 |
('api_________::openaire____::webcrawl::files', 'api_________::openaire____::webcrawl::files', 'files_from_mdstore', 'openaire____::webcrawl', 'websource', |
|
253 |
'files', 'file::WoS'); |
|
254 |
INSERT INTO api (_dnet_resource_identifier_, id, protocolclass, datasource, typologyclass, compatibilityclass, contentdescriptionclass) VALUES |
|
255 |
('api_________::opendoar____::18::files', 'api_________::opendoar____::18::files', 'files_from_metadata', 'opendoar____::18', 'pubsrepository::unknown', |
|
256 |
'files', 'file::PDF'); |
|
257 |
INSERT INTO api (_dnet_resource_identifier_, id, protocolclass, datasource, typologyclass, compatibilityclass, contentdescriptionclass) VALUES |
|
258 |
('api_________::opendoar____::908::files', 'api_________::opendoar____::908::files', 'files_from_metadata', 'opendoar____::908', 'pubsrepository::unknown', |
|
259 |
'files', 'file::PDF'); |
|
260 |
INSERT INTO api (_dnet_resource_identifier_, id, protocolclass, datasource, typologyclass, compatibilityclass, contentdescriptionclass) VALUES |
|
261 |
('api_________::opendoar____::2367::files', 'api_________::opendoar____::2367::files', 'files_from_metadata', 'opendoar____::2367', 'pubsrepository::unknown', |
|
262 |
'files', 'file::PDF'); |
|
263 |
INSERT INTO api (_dnet_resource_identifier_, id, protocolclass, datasource, typologyclass, compatibilityclass, contentdescriptionclass) VALUES |
|
264 |
('api_________::opendoar____::165::files', 'api_________::opendoar____::165::files', 'files_from_metadata', 'opendoar____::165', 'pubsrepository::unknown', |
|
265 |
'files', 'file::PDF'); |
|
266 |
INSERT INTO api (_dnet_resource_identifier_, id, protocolclass, datasource, typologyclass, compatibilityclass, contentdescriptionclass) VALUES |
|
267 |
('api_________::infrastruct_::openaire::jdbc', 'api_________::infrastruct_::openaire::jdbc', 'jdbc', 'infrastruct_::openaire', 'scholarcomminfra', |
|
268 |
'openaire2.0', 'metadata'); |
|
269 |
|
|
270 |
INSERT INTO apicollections (param, original, api) VALUES ('baseUrl', 'jdbc:postgresql://localhost:5432', 'api_________::infrastruct_::openaire::jdbc'); |
|
271 |
INSERT INTO apicollections (param, original, api) VALUES ('dbName', 'dnet_openaireplus', 'api_________::infrastruct_::openaire::jdbc'); |
|
272 |
INSERT INTO apicollections (param, original, api) VALUES |
|
273 |
('sql', 'SELECT regexp_replace(xml, ''&(?!amp;)'', ''&'', ''g'') as xml FROM claims WHERE type = ''dmf2actions'' and set = ''userclaim_dmf''', |
|
274 |
'api_________::infrastruct_::openaire::jdbc'); |
|
275 |
INSERT INTO apicollections (param, original, api) VALUES ('format', 'OAF', 'api_________::infrastruct_::openaire::jdbc'); |
|
276 |
|
|
277 |
INSERT INTO apicollections (param, original, api) VALUES ('baseUrl', 'file:///var/lib/dnet/wos/metadata.tsv', 'api_________::openaire____::webcrawl::csv'); |
|
278 |
INSERT INTO apicollections (param, original, api) VALUES ('separator', E'\\t', 'api_________::openaire____::webcrawl::csv'); |
|
279 |
INSERT INTO apicollections (param, original, api) VALUES ('identifier', '56', 'api_________::openaire____::webcrawl::csv'); |
|
280 |
INSERT INTO apicollections (param, original, api) VALUES ('header', 'true', 'api_________::openaire____::webcrawl::csv'); |
|
281 |
INSERT INTO apicollections (param, original, api) VALUES ('baseUrl', '', 'api_________::openaire____::webcrawl::files'); |
|
282 |
INSERT INTO apicollections (param, original, api) VALUES ('mdstoreId', '', 'api_________::openaire____::webcrawl::files'); |
|
283 |
INSERT INTO apicollections (param, original, api) VALUES ('xpath', '//column[./@isID=''true'']', 'api_________::openaire____::webcrawl::files'); |
|
284 |
INSERT INTO apicollections (param, original, api) VALUES ('baseUrl', '', 'api_________::opendoar____::18::files'); |
|
285 |
INSERT INTO apicollections (param, original, api) VALUES ('mdstoreId', '', 'api_________::opendoar____::18::files'); |
|
286 |
INSERT INTO apicollections (param, original, api) VALUES ('xpath', '//dc:identifier', 'api_________::opendoar____::18::files'); |
|
287 |
INSERT INTO apicollections (param, original, api) VALUES ('baseUrl', '', 'api_________::opendoar____::908::files'); |
|
288 |
INSERT INTO apicollections (param, original, api) VALUES ('mdstoreId', '', 'api_________::opendoar____::908::files'); |
|
289 |
INSERT INTO apicollections (param, original, api) VALUES ('xpath', '//dc:identifier', 'api_________::opendoar____::908::files'); |
|
290 |
INSERT INTO apicollections (param, original, api) VALUES ('baseUrl', '', 'api_________::opendoar____::2367::files'); |
|
291 |
INSERT INTO apicollections (param, original, api) VALUES ('mdstoreId', '', 'api_________::opendoar____::2367::files'); |
|
292 |
INSERT INTO apicollections (param, original, api) VALUES ('xpath', '//dc:identifier', 'api_________::opendoar____::2367::files'); |
|
293 |
|
|
294 |
INSERT INTO apicollections (param, original, api) VALUES ('baseUrl', '', 'api_________::opendoar____::165::files'); |
|
295 |
INSERT INTO apicollections (param, original, api) VALUES ('mdstoreId', '', 'api_________::opendoar____::165::files'); |
|
296 |
INSERT INTO apicollections (param, original, api) VALUES ('xpath', '//dc:identifier', 'api_________::opendoar____::165::files'); |
|
297 |
|
|
298 |
INSERT INTO apicollections (param, original, api, accessparam) VALUES ('metadata_identifier_path', '//repository/@rID', 'api_________::opendoar::0', FALSE); |
|
299 |
INSERT INTO apicollections (param, original, api, accessparam) |
|
300 |
VALUES ('metadata_identifier_path', '//*[local-name()=''repository'']/*[local-name()=''identifier'']', 'api_________::re3data::0', FALSE); |
|
301 |
INSERT INTO apicollections (param, original, api, accessparam) VALUES ('metadata_identifier_path', '//Position', 'api_________::wellcometrust::0', FALSE); |
|
302 |
INSERT INTO apicollections (param, original, api, accessparam) VALUES ('metadata_identifier_path', '//ProjectId', 'api_________::corda::0', FALSE); |
|
303 |
|
|
304 |
DELETE FROM apicollections |
|
305 |
WHERE param = 'splitOnElement' AND api = 'api_________::corda::0'; |
|
306 |
INSERT INTO apicollections (param, original, api) VALUES ('filter', '*.xml', 'api_________::corda::0'); |
|
307 |
INSERT INTO apicollections (param, original, api) VALUES ('username', '***', 'api_________::corda::0'); |
|
308 |
INSERT INTO apicollections (param, original, api) VALUES ('password', '***', 'api_________::corda::0'); |
|
309 |
INSERT INTO apicollections (param, original, api) VALUES ('recursive', 'false', 'api_________::corda::0'); |
|
310 |
|
|
311 |
UPDATE apicollections |
|
312 |
SET original = 'ftp://fts.ec.europa.eu' |
|
313 |
WHERE api = 'api_________::corda::0' AND param = 'baseUrl'; |
|
314 |
UPDATE api |
|
315 |
SET protocolclass = 'ftp' |
|
316 |
WHERE id = 'api_________::corda::0'; |
|
317 |
UPDATE api |
|
318 |
SET protocolclass = 'filesystem' |
|
319 |
WHERE id = 'api_________::re3data::0'; |
|
320 |
UPDATE apicollections |
|
321 |
SET original = '/var/lib/dnet/re3data' |
|
322 |
WHERE api = 'api_________::re3data::0' AND param = 'baseUrl'; |
|
323 |
UPDATE api |
|
324 |
SET compatibilityclass = 'native' |
|
325 |
WHERE id = 'api_________::datacite::0'; |
|
326 |
|
|
327 |
-- metadata_identifier_path for Datacite and WoS |
|
328 |
INSERT INTO apicollections (param, original, api, accessparam) |
|
329 |
VALUES ('metadata_identifier_path', '//*[local-name()=''header'']/*[local-name()=''identifier'']', 'api_________::datacite::0', FALSE); |
|
330 |
INSERT INTO apicollections (param, original, api, accessparam) |
|
331 |
VALUES ('metadata_identifier_path', '//*[local-name()=''column'' and @isID=''true'']', 'api_________::openaire____::webcrawl::csv', FALSE); |
|
332 |
|
|
333 |
UPDATE apicollections |
|
334 |
SET _dnet_resource_identifier_ = api || '@@' || param; |
|
335 |
|
|
336 |
-- get the orgs whose legalshortname is in upper case (probably good sort name, i.e. acronym) |
|
337 |
select id, legalshortname from dsm_organizations where id like 're3data%' and legalshortname ~ '^[^a-z]*$'; |
|
338 |
-- removed legalshortnames of re3data orgs when the legalshortname is not all uppercase |
|
339 |
update dsm_organizations set legalshortname = '' where id like 're3data%' and legalshortname !~ '^[^a-z]*$'; |
|
340 |
|
|
341 |
|
|
342 |
|
|
343 |
|
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/patch-db/migrate_dsm.sql | ||
---|---|---|
1 |
-- noinspection SqlNoDataSourceInspectionForFile |
|
2 |
|
|
3 |
-- cleanup the unnecessary functions: the following SQL produces the deletion statement list |
|
4 |
SELECT 'DROP FUNCTION ' || ns.nspname || '.' || proname || '(' || oidvectortypes(proargtypes) || ') CASCADE;' |
|
5 |
FROM pg_proc |
|
6 |
INNER JOIN pg_namespace ns ON (pg_proc.pronamespace = ns.oid) |
|
7 |
WHERE ns.nspname = 'public' |
|
8 |
ORDER BY proname; |
|
9 |
|
|
10 |
-- cleanup the unnecessary triggers: the following SQL produces the deletion statement list |
|
11 |
SELECT 'DROP TRIGGER ' || trg.tgname || ' ON ' || tbl.relname || ';' |
|
12 |
FROM pg_trigger trg |
|
13 |
JOIN pg_class tbl on trg.tgrelid = tbl.oid |
|
14 |
JOIN pg_namespace ns ON ns.oid = tbl.relnamespace |
|
15 |
WHERE trg.tgname like 'notify_%'; |
|
16 |
|
|
17 |
-- export the new dnet_dsm schema using |
|
18 |
-- pg_dump --schema-only -h localhost -U dnet dnet_dsm | gzip -c > /tmp/dnet_dsm.sql.gz |
|
19 |
-- rename tables and index names in order to include prefix "dsm_" |
|
20 |
-- import the dsm table schema to the existing openaire database: |
|
21 |
-- psql -h localhost -U dnet dnet_openaireplus -f dnet_dsm.sql |
|
22 |
|
|
23 |
ALTER TABLE dsm_datasources ALTER COLUMN officialname TYPE VARCHAR(512); |
|
24 |
ALTER TABLE dsm_datasources ALTER COLUMN englishname TYPE VARCHAR(512); |
|
25 |
ALTER TABLE dsm_datasources ALTER COLUMN missionstatementurl TYPE VARCHAR(512); |
|
26 |
ALTER TABLE dsm_datasources ALTER COLUMN citationguidelineurl TYPE VARCHAR(512); |
|
27 |
ALTER TABLE dsm_datasources ALTER COLUMN languages TYPE text; |
|
28 |
ALTER TABLE dsm_datasources ALTER COLUMN od_contenttypes TYPE text; |
|
29 |
ALTER TABLE dsm_datasources ALTER COLUMN subjects TYPE text; |
|
30 |
ALTER TABLE dsm_datasources ALTER COLUMN description TYPE text; |
|
31 |
ALTER TABLE dsm_datasources ALTER COLUMN certificates TYPE text; |
|
32 |
ALTER TABLE dsm_datasources ALTER COLUMN pidsystems TYPE text; |
|
33 |
|
|
34 |
-- insert into the new dsm_* tables picking the information from the old tables |
|
35 |
insert into dsm_datasources (id, officialname, englishname, websiteurl, logourl, contactemail, latitude, longitude, timezone, namespaceprefix, languages, od_contenttypes, collectedfrom, dateofvalidation, |
|
36 |
typology, provenanceaction, dateofcollection, platform, activationid, description,releasestartdate, releaseenddate, missionstatementurl, dataprovider, serviceprovider, |
|
37 |
databaseaccesstype, datauploadtype,databaseaccessrestriction, datauploadrestriction, versioning, citationguidelineurl, qualitymanagementkind,pidsystems, certificates, |
|
38 |
aggregator, issn, eissn, lissn, registeredby, subjects) |
|
39 |
select d.id, |
|
40 |
d.officialname, |
|
41 |
d.englishname, |
|
42 |
d.websiteurl, |
|
43 |
d.logourl, |
|
44 |
d.contactemail, |
|
45 |
d.latitude, |
|
46 |
d.longitude, |
|
47 |
d.timezone, |
|
48 |
d.namespaceprefix as namespaceprefix, |
|
49 |
d.od_languages as languages, |
|
50 |
d.od_contenttypes, |
|
51 |
d.collectedfrom, |
|
52 |
d.dateofvalidation, |
|
53 |
d.datasourceclass as typology, |
|
54 |
d.provenanceactionclass as provenanceaction, |
|
55 |
d.dateofcollection, |
|
56 |
d.typology as platform, |
|
57 |
d.activationid, |
|
58 |
d.description, |
|
59 |
d.releasestartdate, |
|
60 |
d.releaseenddate, |
|
61 |
d.missionstatementurl, |
|
62 |
d.dataprovider as dataprovider, |
|
63 |
d.serviceprovider as serviceprovider, |
|
64 |
d.databaseaccesstype, |
|
65 |
d.datauploadtype, |
|
66 |
d.databaseaccessrestriction, |
|
67 |
d.datauploadrestriction, |
|
68 |
d.versioning, |
|
69 |
d.citationguidelineurl, |
|
70 |
d.qualitymanagementkind, |
|
71 |
d.pidsystems, |
|
72 |
d.certificates, |
|
73 |
d.aggregator, |
|
74 |
d.issn, |
|
75 |
d.eissn, |
|
76 |
d.lissn, |
|
77 |
d.registeredby, |
|
78 |
ARRAY_AGG(s.name) as subjects |
|
79 |
from datasources d |
|
80 |
left outer join datasource_subject sd on (d.id = sd.datasource) |
|
81 |
left outer join subjects s on (sd.subject = s.id) |
|
82 |
group by |
|
83 |
d.id; |
|
84 |
|
|
85 |
insert into dsm_api (id, protocol, datasource, contentdescription, active, removable, typology, compatibility) |
|
86 |
select a.id, |
|
87 |
a.protocolclass as protocol, |
|
88 |
a.datasource, |
|
89 |
a.contentdescriptionclass as contentdescription, |
|
90 |
a.active, |
|
91 |
a.removable, |
|
92 |
a.typologyclass as typology, |
|
93 |
a.compatibilityclass as compatibility |
|
94 |
from api a; |
|
95 |
|
|
96 |
update dsm_api da set metadata_identifier_path = ac.original from apicollections ac where da.id = ac.api and ac.param = 'metadata_identifier_path' ; |
|
97 |
update dsm_api da set last_collection_total = ac.original::integer from apicollections ac where da.id = ac.api and ac.param = 'last_collection_total' ; |
|
98 |
update dsm_api da set last_collection_date = ac.original::date from apicollections ac where da.id = ac.api and ac.param = 'last_collection_date' ; |
|
99 |
update dsm_api da set last_collection_mdid = ac.original from apicollections ac where da.id = ac.api and ac.param = 'last_collection_mdId' ; |
|
100 |
update dsm_api da set last_aggregation_total = ac.original::integer from apicollections ac where da.id = ac.api and ac.param = 'last_aggregation_total' ; |
|
101 |
update dsm_api da set last_aggregation_date = ac.original::date from apicollections ac where da.id = ac.api and ac.param = 'last_aggregation_date' ; |
|
102 |
update dsm_api da set last_aggregation_mdid = ac.original from apicollections ac where da.id = ac.api and ac.param = 'last_aggregation_mdId' ; |
|
103 |
update dsm_api da set last_download_total = ac.original::integer from apicollections ac where da.id = ac.api and ac.param = 'last_download_total' ; |
|
104 |
update dsm_api da set last_download_date = ac.original::date from apicollections ac where da.id = ac.api and ac.param = 'last_download_date' ; |
|
105 |
update dsm_api da set last_download_objid = ac.original from apicollections ac where da.id = ac.api and ac.param = 'last_download_objId' ; |
|
106 |
update dsm_api da set last_validation_job = ac.original from apicollections ac where da.id = ac.api and ac.param = 'last_validation_job' ; |
|
107 |
update dsm_api da set baseurl = CASE WHEN (ac.edited is not null and ac.edited != '') THEN ac.edited ELSE ac.original END from apicollections ac where da.id = ac.api and ac.param = 'baseUrl' ; |
|
108 |
|
|
109 |
insert into dsm_apiparams (param,value, api) |
|
110 |
select |
|
111 |
a.param, |
|
112 |
CASE |
|
113 |
WHEN (a.edited is not null and a.edited != '') THEN a.edited ELSE a.original |
|
114 |
END AS value, |
|
115 |
a.api |
|
116 |
from apicollections a |
|
117 |
where param != 'baseUrl' AND accessparam is true; |
|
118 |
|
|
119 |
|
|
120 |
INSERT INTO dsm_organizations (id, legalshortname,legalname,websiteurl, logourl,ec_legalbody, ec_legalperson, ec_nonprofit, ec_researchorganization, ec_highereducation, ec_internationalorganizationeurinterests, |
|
121 |
ec_internationalorganization, ec_enterprise, ec_smevalidated, ec_nutscode, country, collectedfrom, dateofcollection, provenanceaction) |
|
122 |
SELECT |
|
123 |
o.id, |
|
124 |
o.legalshortname, |
|
125 |
o.legalname, |
|
126 |
o.websiteurl, |
|
127 |
o.logourl, |
|
128 |
o.ec_legalbody, |
|
129 |
o.ec_legalperson, |
|
130 |
o.ec_nonprofit, |
|
131 |
o.ec_researchorganization, |
|
132 |
o.ec_highereducation, |
|
133 |
o.ec_internationalorganizationeurinterests, |
|
134 |
o.ec_internationalorganization, |
|
135 |
o.ec_enterprise, |
|
136 |
o.ec_smevalidated, |
|
137 |
o.ec_nutscode, |
|
138 |
o.countryclass AS country, |
|
139 |
o.collectedfrom, |
|
140 |
o.dateofcollection, |
|
141 |
o.provenanceactionclass AS provenanceaction |
|
142 |
FROM organizations o; |
|
143 |
|
|
144 |
INSERT INTO dsm_datasource_organization (datasource, organization) |
|
145 |
SELECT |
|
146 |
dorg.datasource, |
|
147 |
dorg.organization |
|
148 |
FROM datasource_organization dorg; |
|
149 |
|
|
150 |
|
|
151 |
INSERT INTO dsm_identities (pid, issuertype) |
|
152 |
SELECT |
|
153 |
i.pid, |
|
154 |
i.issuertypeclass AS issuertype |
|
155 |
FROM identities i; |
|
156 |
|
|
157 |
INSERT INTO dsm_datasourcepids (datasource, pid) |
|
158 |
SELECT |
|
159 |
dp.datasource, |
|
160 |
dp.pid |
|
161 |
FROM datasourcepids dp; |
|
162 |
|
|
163 |
-- is this table needed? -- |
|
164 |
INSERT INTO dsm_organizationpids (organization, pid) |
|
165 |
SELECT |
|
166 |
op.organization, |
|
167 |
op.pid |
|
168 |
FROM organizationpids op; |
|
169 |
|
|
170 |
|
|
171 |
-- Migrate the managed status |
|
172 |
update dsm_datasources d set managed = true where d.id in ( |
|
173 |
SELECT d.id |
|
174 |
FROM dsm_datasources d |
|
175 |
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource) |
|
176 |
LEFT OUTER JOIN apicollections ap ON (ap.api = a.id) |
|
177 |
WHERE a.active = TRUE |
|
178 |
OR a.removable = TRUE |
|
179 |
OR ap.edited IS NOT NULL |
|
180 |
GROUP BY d.id ); |
|
181 |
|
|
182 |
|
|
183 |
-- !!!!! data insert complete !!!!! |
|
184 |
|
|
185 |
|
|
186 |
delete from organizations where id like 'opendoar%' ; |
|
187 |
delete from organizations where id like 're3data%' ; |
|
188 |
delete from organizations where collectedfrom is null ; |
|
189 |
|
|
190 |
delete from dsm_organizations where collectedfrom = 'openaire____::corda' ; |
|
191 |
delete from dsm_organizations where collectedfrom = 'openaire____::snsf' ; |
|
192 |
delete from dsm_organizations where collectedfrom = 'openaire____::conicytf' ; |
|
193 |
delete from dsm_organizations where collectedfrom = 'openaire____::irb_hr' ; |
|
194 |
delete from dsm_organizations where collectedfrom = 'openaire____::fwf' ; |
|
195 |
delete from dsm_organizations where collectedfrom = 'openaire____::sfi' ; |
|
196 |
delete from dsm_organizations where collectedfrom = 'openaire____::wellcometrust' ; |
|
197 |
delete from dsm_organizations where collectedfrom = 'openaire____::nsf' ; |
|
198 |
delete from dsm_organizations where collectedfrom = 'openaire____::corda_h2020' ; |
|
199 |
|
|
200 |
|
|
201 |
|
|
202 |
|
|
203 |
|
|
204 |
|
|
205 |
|
|
206 |
|
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/patch-db/increase_size_re3data_fields.sql | ||
---|---|---|
1 |
ALTER TABLE dsm_datasources ALTER COLUMN databaseaccesstype TYPE varchar(64); |
|
2 |
ALTER TABLE dsm_datasources ALTER COLUMN datauploadtype TYPE varchar(64); |
|
3 |
ALTER TABLE dsm_datasources ALTER COLUMN databaseaccessrestriction TYPE varchar(64); |
|
4 |
ALTER TABLE dsm_datasources ALTER COLUMN datauploadrestriction TYPE varchar(64); |
|
5 |
ALTER TABLE dsm_datasources ALTER COLUMN qualitymanagementkind TYPE varchar(64); |
|
6 |
|
|
7 |
ALTER TABLE dsm_datasources ALTER COLUMN missionstatementurl TYPE text; |
|
8 |
|
|
9 |
|
modules/dnet-openaireplus-workflows/trunk/pom.xml | ||
---|---|---|
10 | 10 |
<groupId>eu.dnetlib</groupId> |
11 | 11 |
<artifactId>dnet-openaireplus-workflows</artifactId> |
12 | 12 |
<packaging>jar</packaging> |
13 |
<version>7.1.19-SNAPSHOT</version>
|
|
13 |
<version>8.0.0-SNAPSHOT</version>
|
|
14 | 14 |
|
15 | 15 |
<scm> |
16 | 16 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-workflows/trunk</developerConnection> |
... | ... | |
40 | 40 |
<dependency> |
41 | 41 |
<groupId>eu.dnetlib</groupId> |
42 | 42 |
<artifactId>dnet-msro-service</artifactId> |
43 |
<version>[3.0.0,4.0.0)</version>
|
|
43 |
<version>[4.0.0,5.0.0)</version>
|
|
44 | 44 |
</dependency> |
45 | 45 |
<dependency> |
46 | 46 |
<groupId>eu.dnetlib</groupId> |
... | ... | |
50 | 50 |
<dependency> |
51 | 51 |
<groupId>eu.dnetlib</groupId> |
52 | 52 |
<artifactId>dnet-openaire-datasource-manager</artifactId> |
53 |
<version>[1.0.0-SNAPSHOT,2.0.0)</version>
|
|
53 |
<version>[2.0.0,3.0.0)</version>
|
|
54 | 54 |
</dependency> |
55 | 55 |
<dependency> |
56 | 56 |
<groupId>eu.dnetlib</groupId> |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/FilterManagedDatasourcesJobNode.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import java.io.StringReader; |
4 | 4 |
import java.util.Set; |
5 |
|
|
5 | 6 |
import javax.xml.ws.wsaddressing.W3CEndpointReference; |
6 | 7 |
|
7 |
import com.googlecode.sarasvati.Arc; |
|
8 |
import com.googlecode.sarasvati.NodeToken; |
|
9 |
import eu.dnetlib.enabling.datasources.LocalOpenaireDatasourceManager; |
|
10 |
import eu.dnetlib.enabling.resultset.MappedResultSetFactory; |
|
11 |
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils; |
|
12 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
13 | 8 |
import org.apache.commons.logging.Log; |
14 | 9 |
import org.apache.commons.logging.LogFactory; |
15 | 10 |
import org.dom4j.Document; |
... | ... | |
18 | 13 |
import org.dom4j.io.SAXReader; |
19 | 14 |
import org.springframework.beans.factory.annotation.Autowired; |
20 | 15 |
|
16 |
import com.googlecode.sarasvati.Arc; |
|
17 |
import com.googlecode.sarasvati.NodeToken; |
|
18 |
|
|
19 |
import eu.dnetlib.enabling.datasources.LocalOpenaireDatasourceManager; |
|
20 |
import eu.dnetlib.enabling.resultset.MappedResultSetFactory; |
|
21 |
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils; |
|
22 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
23 |
|
|
21 | 24 |
public class FilterManagedDatasourcesJobNode extends SimpleJobNode { |
22 | 25 |
|
23 | 26 |
private static final Log log = LogFactory.getLog(FilterManagedDatasourcesJobNode.class); |
... | ... | |
48 | 51 |
|
49 | 52 |
/** |
50 | 53 |
* Extracts the datasource id from the input record and checks its existence in the given set. |
54 |
* |
|
51 | 55 |
* @param data |
52 | 56 |
* @param filter |
53 | 57 |
* @return The |
54 | 58 |
* @throws IllegalStateException |
55 | 59 |
*/ |
56 |
private String filterManaged(final String data, Set<String> filter) throws IllegalStateException { |
|
60 |
private String filterManaged(final String data, final Set<String> filter) throws IllegalStateException {
|
|
57 | 61 |
try { |
58 | 62 |
final Document doc = new SAXReader().read(new StringReader(data)); |
59 | 63 |
|
60 |
final String dsId = doc.valueOf("/record/metadata/ROWS/ROW[@table = 'dsm_datasources']/FIELD[@name = 'id']/text()");
|
|
64 |
final String dsId = doc.valueOf("/record/metadata/ROWS/ROW[@table = 'dsm_services']/FIELD[@name = 'id']/text()");
|
|
61 | 65 |
if (filter.contains(dsId)) { |
62 | 66 |
doc.selectSingleNode("/record/metadata/ROWS").detach(); |
63 | 67 |
((Element) doc.selectSingleNode("/record/metadata")).addElement("ROWS"); |
64 | 68 |
return doc.asXML(); |
65 |
} else return data; |
|
66 |
} catch (DocumentException e) { |
|
69 |
} else { |
|
70 |
return data; |
|
71 |
} |
|
72 |
} catch (final DocumentException e) { |
|
67 | 73 |
throw new IllegalStateException(e); |
68 | 74 |
} |
69 | 75 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/ObtainOpenaireDataSourceParamsJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes; |
2 | 2 |
|
3 |
import javax.annotation.Resource;
|
|
3 |
import org.springframework.beans.factory.annotation.Autowired;
|
|
4 | 4 |
|
5 | 5 |
import com.googlecode.sarasvati.Arc; |
6 | 6 |
import com.googlecode.sarasvati.NodeToken; |
7 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
8 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
7 |
|
|
8 |
import eu.dnetlib.enabling.datasources.common.Api; |
|
9 |
import eu.dnetlib.enabling.datasources.common.Datasource; |
|
10 |
import eu.dnetlib.enabling.datasources.common.LocalDatasourceManager; |
|
9 | 11 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
10 | 12 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
11 | 13 |
import eu.dnetlib.msro.workflows.util.WorkflowsConstants; |
... | ... | |
14 | 16 |
|
15 | 17 |
private String providerId; |
16 | 18 |
|
17 |
@Resource
|
|
18 |
private UniqueServiceLocator serviceLocator;
|
|
19 |
@Autowired
|
|
20 |
private LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> dsManager;
|
|
19 | 21 |
|
20 | 22 |
@Override |
21 | 23 |
protected String execute(final NodeToken token) throws Exception { |
22 |
final String query = "let $x := /*[.//RESOURCE_IDENTIFIER/@value='" + providerId + "']//EXTRA_FIELDS\n" |
|
23 |
+ "return concat($x/FIELD[./key='OpenAireDataSourceId']/value, ' @@@ ', $x/FIELD[./key='NamespacePrefix']/value)"; |
|
24 |
final String nsPrefix = dsManager.getDs(providerId).getNamespaceprefix(); |
|
24 | 25 |
|
25 |
final String[] arr = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(query).split("@@@");
|
|
26 |
token.getEnv().setAttribute("parentDatasourceId", providerId);
|
|
26 | 27 |
|
27 |
final String origId = arr[0].trim(); |
|
28 |
final String nsPrefix = arr[1].trim(); |
|
29 |
//this is needed by the mdbuilder |
|
30 |
//TODO: update mdbuilder to use the env attributes below, whose names are defined in WorkflowConstants |
|
31 |
token.getEnv().setAttribute("parentDatasourceId", origId); |
|
32 | 28 |
token.getEnv().setAttribute("namespacePrefix", nsPrefix); |
33 | 29 |
token.getEnv().setAttribute("dateOfCollection", DateUtils.now_ISO8601()); |
34 | 30 |
|
35 |
//these are needed for validation and fill hostedby |
|
36 |
token.getEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_ORIGINALID, origId);
|
|
31 |
// these are needed for validation and fill hostedby
|
|
32 |
token.getEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_ORIGINALID, providerId);
|
|
37 | 33 |
token.getEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_NAMESPACE_PREFIX, nsPrefix); |
38 | 34 |
|
39 |
token.getFullEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_ORIGINALID, origId);
|
|
35 |
token.getFullEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_ORIGINALID, providerId);
|
|
40 | 36 |
token.getFullEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_NAMESPACE_PREFIX, nsPrefix); |
41 | 37 |
|
42 |
token.getProcess().getEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_ORIGINALID, origId);
|
|
38 |
token.getProcess().getEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_ORIGINALID, providerId);
|
|
43 | 39 |
token.getProcess().getEnv().setAttribute(WorkflowsConstants.DATAPROVIDER_NAMESPACE_PREFIX, nsPrefix); |
44 | 40 |
|
45 | 41 |
return Arc.DEFAULT_ARC; |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/OpenaireMdBuilderJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes; |
2 | 2 |
|
3 |
import javax.annotation.Resource; |
|
3 |
import java.io.UnsupportedEncodingException; |
|
4 |
import java.net.URLEncoder; |
|
5 |
|
|
4 | 6 |
import javax.xml.ws.wsaddressing.W3CEndpointReference; |
5 | 7 |
|
8 |
import org.antlr.stringtemplate.StringTemplate; |
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.springframework.beans.factory.annotation.Autowired; |
|
12 |
import org.springframework.beans.factory.annotation.Required; |
|
13 |
|
|
6 | 14 |
import com.googlecode.sarasvati.Arc; |
7 | 15 |
import com.googlecode.sarasvati.NodeToken; |
16 |
|
|
17 |
import eu.dnetlib.enabling.datasources.common.Api; |
|
18 |
import eu.dnetlib.enabling.datasources.common.Datasource; |
|
19 |
import eu.dnetlib.enabling.datasources.common.DsmException; |
|
20 |
import eu.dnetlib.enabling.datasources.common.LocalDatasourceManager; |
|
8 | 21 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
9 | 22 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
10 | 23 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
... | ... | |
12 | 25 |
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils; |
13 | 26 |
import eu.dnetlib.msro.rmi.MSROException; |
14 | 27 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
15 |
import org.antlr.stringtemplate.StringTemplate; |
|
16 |
import org.apache.commons.logging.Log; |
|
17 |
import org.apache.commons.logging.LogFactory; |
|
18 |
import org.springframework.beans.factory.annotation.Required; |
|
19 | 28 |
|
20 |
import java.io.UnsupportedEncodingException; |
|
21 |
import java.net.URLEncoder; |
|
22 |
|
|
23 | 29 |
public class OpenaireMdBuilderJobNode extends SimpleJobNode { |
24 | 30 |
|
25 | 31 |
private static final Log log = LogFactory.getLog(OpenaireMdBuilderJobNode.class); |
... | ... | |
42 | 48 |
|
43 | 49 |
private XSLTMappedResultSetFactory xsltMappedResultSetFactory; |
44 | 50 |
|
45 |
@Resource |
|
51 |
@Autowired |
|
52 |
private LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> dsManager; |
|
53 |
|
|
54 |
@Autowired |
|
46 | 55 |
private UniqueServiceLocator serviceLocator; |
47 | 56 |
|
48 | 57 |
@Override |
49 | 58 |
protected String execute(final NodeToken token) throws MSROException { |
50 | 59 |
final String inputEpr = token.getEnv().getAttribute(inputEprParam); |
51 | 60 |
|
52 |
if ((inputEpr == null) || inputEpr.isEmpty()) throw new MSROException("InputEprParam (" + inputEprParam + ") not found in ENV");
|
|
61 |
if (inputEpr == null || inputEpr.isEmpty()) { throw new MSROException("InputEprParam (" + inputEprParam + ") not found in ENV"); }
|
|
53 | 62 |
final StringTemplate st = new StringTemplate(getMdBuilderTemplateXslt().getTemplate()); |
54 | 63 |
try { |
55 |
st.setAttribute("xpath", getMetadataIdentifierXPath()); |
|
64 |
final Api<?> api = dsManager.getApis(datasourceId) |
|
65 |
.stream() |
|
66 |
.filter(a -> a.getId().equals(datasourceInterface)) |
|
67 |
.findFirst() |
|
68 |
.orElseThrow(() -> new MSROException("Api not found: " + datasourceInterface)); |
|
69 |
st.setAttribute("xpath", api.getMetadataIdentifierPath()); |
|
56 | 70 |
st.setAttribute("datasourceId", datasourceId); |
57 | 71 |
if (token.getEnv().hasAttribute("namespacePrefix")) { |
58 | 72 |
st.setAttribute("namespacePrefix", token.getEnv().getAttribute("namespacePrefix")); |
... | ... | |
66 | 80 |
st.setAttribute("trust", getTrust()); |
67 | 81 |
st.setAttribute("provenanceactionclassname", getProvenanceactionclassname()); |
68 | 82 |
st.setAttribute("provenanceactionclassid", getProvenanceactionclassid()); |
69 |
st.setAttribute("baseurl", URLEncoder.encode(getBaseUrl(), "UTF-8")); |
|
70 |
st.setAttribute("metadatanamespace", getMetadataNamespace()); |
|
83 |
st.setAttribute("baseurl", URLEncoder.encode(api.getBaseurl(), "UTF-8")); |
|
84 |
st.setAttribute("metadatanamespace", getMetadataNamespace(api.getApiParams() |
|
85 |
.stream() |
|
86 |
.filter(p -> p.getParam().equalsIgnoreCase("format")) |
|
87 |
.map(p -> p.getValue()) |
|
88 |
.findFirst() |
|
89 |
.orElse(null))); |
|
71 | 90 |
|
72 | 91 |
final W3CEndpointReference epr = xsltMappedResultSetFactory.createMappedResultSet(new EPRUtils().getEpr(inputEpr), st.toString()); |
73 | 92 |
|
74 | 93 |
token.getEnv().setAttribute(outputEprParam, epr.toString()); |
75 | 94 |
|
76 | 95 |
return Arc.DEFAULT_ARC; |
77 |
} catch (ISLookUpException e) {
|
|
96 |
} catch (final DsmException e) {
|
|
78 | 97 |
throw new MSROException("Error while initializing mdBuilder template for datasource " + datasourceId, e); |
79 |
} catch (UnsupportedEncodingException e) { |
|
98 |
} catch (final UnsupportedEncodingException e) {
|
|
80 | 99 |
log.error("Cannot encode baseUrl in UTF-8"); |
81 | 100 |
throw new MSROException(e); |
82 | 101 |
} |
83 | 102 |
} |
84 | 103 |
|
85 |
private String getMetadataIdentifierXPath() throws ISLookUpException { |
|
86 |
String xQuery = |
|
87 |
String.format("for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') " |
|
88 |
+ "return $x//INTERFACE[@id='%s']/INTERFACE_EXTRA_FIELD[@name='metadata_identifier_path']/string()", getDatasourceInterface()); |
|
89 |
|
|
90 |
return hasOverridingMetadataIdentifierXPath() ? getOverridingMetadataIdentifierXPath() : serviceLocator.getService(ISLookUpService.class) |
|
91 |
.getResourceProfileByQuery(xQuery); |
|
92 |
} |
|
93 |
|
|
94 |
private String getBaseUrl() throws ISLookUpException { |
|
95 |
String xQuery = "/*[.//RESOURCE_IDENTIFIER/@value='{datasourceId}']//INTERFACE[@id='{interfaceId}']//BASE_URL/string()"; |
|
96 |
xQuery = xQuery.replace("{interfaceId}", datasourceInterface).replace("{datasourceId}", datasourceId); |
|
97 |
return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xQuery); |
|
98 |
} |
|
99 |
|
|
100 |
private String getMetadataNamespace() { |
|
104 |
private String getMetadataNamespace(final String format) { |
|
101 | 105 |
try { |
102 | 106 |
String xQuery = |
103 |
"let $x := /*[.//RESOURCE_IDENTIFIER/@value='{datasourceId}']//INTERFACE[@id='{interfaceId}']/ACCESS_PROTOCOL/@format/string() " |
|
104 |
+ "return /*[.//RESOURCE_TYPE/@value='MetadataFormatDSResourceType']//METADATAFORMAT[@Prefix=$x]/@NameSpace/string()"; |
|
105 |
xQuery = xQuery.replace("{interfaceId}", datasourceInterface).replace("{datasourceId}", datasourceId); |
|
107 |
"/*[.//RESOURCE_TYPE/@value='MetadataFormatDSResourceType']//METADATAFORMAT[@Prefix='{format}']/@NameSpace/string()"; |
|
108 |
xQuery = xQuery.replace("{format}", format); |
|
106 | 109 |
return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xQuery); |
107 |
} catch (ISLookUpException e) { |
|
110 |
} catch (final ISLookUpException e) {
|
|
108 | 111 |
log.error("The interface is not OAI or the format is not found in the MetadataFormatDSResourceType, thus metadata format in the <about> section " |
109 |
+ "cannot managed here and it will be leaved empty (for the time being)");
|
|
112 |
+ "cannot managed here and it will be leaved empty (for the time being)"); |
|
110 | 113 |
return ""; |
111 | 114 |
} |
112 | 115 |
} |
113 | 116 |
|
114 |
private boolean hasOverridingMetadataIdentifierXPath() { |
|
115 |
return !getOverridingMetadataIdentifierXPath().isEmpty(); |
|
116 |
} |
|
117 |
|
|
118 | 117 |
public String getInputEprParam() { |
119 | 118 |
return inputEprParam; |
120 | 119 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/UpdateRepositoryProfilesJobNode.java | ||
---|---|---|
13 | 13 |
public class UpdateRepositoryProfilesJobNode extends SimpleJobNode { |
14 | 14 |
|
15 | 15 |
@Autowired |
16 |
private LocalDatasourceManager<Datasource<?, ?>, Api<?>> dsManager; |
|
16 |
private LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> dsManager;
|
|
17 | 17 |
|
18 | 18 |
@Override |
19 | 19 |
protected String execute(final NodeToken token) throws Exception { |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/consistency/FindInvalidMetaWorkflowsJobNode.java | ||
---|---|---|
11 | 11 |
import com.googlecode.sarasvati.Arc; |
12 | 12 |
import com.googlecode.sarasvati.NodeToken; |
13 | 13 |
|
14 |
import eu.dnetlib.enabling.datasources.common.Api; |
|
15 |
import eu.dnetlib.enabling.datasources.common.Datasource; |
|
16 |
import eu.dnetlib.enabling.datasources.common.LocalDatasourceManager; |
|
14 | 17 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
15 | 18 |
import eu.dnetlib.enabling.is.registry.rmi.ISRegistryService; |
16 | 19 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
... | ... | |
22 | 25 |
@Autowired |
23 | 26 |
private UniqueServiceLocator serviceLocator; |
24 | 27 |
|
28 |
@Autowired |
|
29 |
private LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> dsManager; |
|
30 |
|
|
25 | 31 |
private static final Log log = LogFactory.getLog(FindInvalidMetaWorkflowsJobNode.class); |
26 | 32 |
|
27 | 33 |
private boolean deleteInvalidProfiles = false; |
... | ... | |
32 | 38 |
final ISLookUpService lookup = serviceLocator.getService(ISLookUpService.class); |
33 | 39 |
final ISRegistryService registry = serviceLocator.getService(ISRegistryService.class); |
34 | 40 |
|
35 |
final String q1 = "for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType')\n" + |
|
36 |
"let $id := $x//RESOURCE_IDENTIFIER/@value\n" + |
|
37 |
"for $api in $x//INTERFACE/@id\n" + |
|
38 |
"return concat($id,'|',$api)"; |
|
41 |
final Set<String> valids = new HashSet<>(); |
|
42 |
for (final String dsId : dsManager.listManagedDatasourceIds()) { |
|
43 |
for (final Api<?> api : dsManager.getApis(dsId)) { |
|
44 |
valids.add(dsId + "|" + api.getId()); |
|
45 |
} |
|
46 |
} |
|
39 | 47 |
|
40 |
final String q2 = "for $x in collection(' /db/DRIVER/MetaWorkflowDSResources/MetaWorkflowDSResourceType') \n" +
|
|
41 |
"where $x//DATAPROVIDER/@id and $x//DATAPROVIDER/@interface\n" +
|
|
42 |
"return concat($x//RESOURCE_IDENTIFIER/@value,'@@@',$x//DATAPROVIDER/@id,'|',$x//DATAPROVIDER/@interface)";
|
|
48 |
final String q = "for $x in collection(' /db/DRIVER/MetaWorkflowDSResources/MetaWorkflowDSResourceType') \n" + |
|
49 |
"where $x//DATAPROVIDER/@id and $x//DATAPROVIDER/@interface\n" + |
|
50 |
"return concat($x//RESOURCE_IDENTIFIER/@value,'@@@',$x//DATAPROVIDER/@id,'|',$x//DATAPROVIDER/@interface)"; |
|
43 | 51 |
|
44 |
final Set<String> valids = new HashSet<>(lookup.quickSearchProfile(q1)); |
|
45 |
|
|
46 | 52 |
int count = 0; |
47 | 53 |
int countInvalids = 0; |
48 | 54 |
int countDeleted = 0; |
49 |
for (final String s : lookup.quickSearchProfile(q2)) {
|
|
55 |
for (final String s : lookup.quickSearchProfile(q)) { |
|
50 | 56 |
count++; |
51 | 57 |
final String metaWfId = StringUtils.substringBefore(s, "@@@"); |
52 | 58 |
final String dsapi = StringUtils.substringAfter(s, "@@@"); |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/consistency/FixRepoMdstoreSizesJobNode.java | ||
---|---|---|
3 | 3 |
import java.io.IOException; |
4 | 4 |
import java.io.StringReader; |
5 | 5 |
import java.util.Date; |
6 |
import java.util.HashMap; |
|
7 | 6 |
import java.util.List; |
8 |
import java.util.Map; |
|
9 | 7 |
import java.util.Objects; |
10 | 8 |
import java.util.Set; |
11 | 9 |
import java.util.stream.Collectors; |
... | ... | |
30 | 28 |
import eu.dnetlib.data.mdstore.modular.connector.MDStoreDao; |
31 | 29 |
import eu.dnetlib.data.objectstore.modular.connector.ObjectStoreDao; |
32 | 30 |
import eu.dnetlib.data.objectstore.rmi.ObjectStoreServiceException; |
33 |
import eu.dnetlib.enabling.datasources.LocalOpenaireDatasourceManager; |
|
34 | 31 |
import eu.dnetlib.enabling.datasources.common.Api; |
35 |
import eu.dnetlib.enabling.datasources.common.ApiParam;
|
|
32 |
import eu.dnetlib.enabling.datasources.common.Datasource;
|
|
36 | 33 |
import eu.dnetlib.enabling.datasources.common.DsmException; |
34 |
import eu.dnetlib.enabling.datasources.common.LocalDatasourceManager; |
|
37 | 35 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; |
38 | 36 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
39 | 37 |
import eu.dnetlib.enabling.is.registry.rmi.ISRegistryException; |
... | ... | |
48 | 46 |
public class FixRepoMdstoreSizesJobNode extends SimpleJobNode implements ProgressJobNode { |
49 | 47 |
|
50 | 48 |
@Autowired |
51 |
private LocalOpenaireDatasourceManager dsManager;
|
|
49 |
private LocalDatasourceManager<Datasource<?, ?, ?>, Api<?>> dsManager;;
|
|
52 | 50 |
|
53 |
@Autowired |
|
54 |
private UniqueServiceLocator serviceLocator; |
|
55 |
|
|
56 | 51 |
@Resource(name = "mongodbMDStoreDao") |
57 | 52 |
private MDStoreDao mdstoreDao; |
58 | 53 |
|
59 | 54 |
@Autowired |
55 |
private UniqueServiceLocator serviceLocator; |
|
56 |
|
|
57 |
@Autowired |
|
60 | 58 |
private ObjectStoreDao objectStoreDao; |
61 | 59 |
|
62 | 60 |
private final DateUtils dateUtils = new DateUtils(); |
... | ... | |
64 | 62 |
private int current = 0; |
65 | 63 |
private int total = 0; |
66 | 64 |
|
67 |
private ISRegistryService registry; |
|
65 |
private boolean alwaysUpdate = false; |
|
66 |
|
|
68 | 67 |
private ISLookUpService lookup; |
69 | 68 |
|
70 |
private final Map<String, String> openaireIds = new HashMap<>(); |
|
71 |
private boolean alwaysUpdate = false; |
|
69 |
private ISRegistryService registry; |
|
72 | 70 |
|
73 | 71 |
private static final Log log = LogFactory.getLog(FixRepoMdstoreSizesJobNode.class); |
74 | 72 |
|
... | ... | |
77 | 75 |
this.total = total; |
78 | 76 |
this.lookup = serviceLocator.getService(ISLookUpService.class); |
79 | 77 |
this.registry = serviceLocator.getService(ISRegistryService.class); |
80 |
try { |
|
81 |
openaireIds.putAll(lookup.quickSearchProfile( |
|
82 |
"for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') return concat($x//DATASOURCE_ORIGINAL_ID, ' @@@ ', $x//RESOURCE_IDENTIFIER/@value)") |
|
83 |
.stream() |
|
84 |
.collect(Collectors.toMap( |
|
85 |
s -> StringUtils.substringBefore(s, "@@@").trim(), |
|
86 |
s -> StringUtils.substringAfter(s, "@@@").trim()))); |
|
87 |
} catch (final ISLookUpException e) { |
|
88 |
// TODO Auto-generated catch block |
|
89 |
e.printStackTrace(); |
|
90 |
} |
|
91 |
|
|
92 | 78 |
} |
93 | 79 |
|
94 | 80 |
@Override |
... | ... | |
103 | 89 |
current++; |
104 | 90 |
|
105 | 91 |
try { |
106 |
for (final Api<ApiParam> api : dsManager.getApis(dsId)) {
|
|
92 |
for (final Api<?> api : dsManager.getApis(dsId)) {
|
|
107 | 93 |
verifyApi(dsId, api); |
108 | 94 |
} |
109 | 95 |
} catch (final Throwable e) { |
... | ... | |
115 | 101 |
return Arc.DEFAULT_ARC; |
116 | 102 |
} |
117 | 103 |
|
118 |
private void verifyApi(final String dsId, final Api<ApiParam> api)
|
|
119 |
throws DsmException, ISRegistryException, IOException, ISLookUpException, MDStoreServiceException, ObjectStoreServiceException {
|
|
104 |
private void verifyApi(final String dsId, final Api<?> api)
|
|
105 |
throws DsmException, ISRegistryException, IOException, ISLookUpException, MDStoreServiceException, ObjectStoreServiceException { |
|
120 | 106 |
|
121 | 107 |
for (final Document doc : listCollectionMdStores(dsId, api.getId())) { |
122 | 108 |
final String mdId = doc.valueOf("//RESOURCE_IDENTIFIER/@value"); |
... | ... | |
170 | 156 |
|
171 | 157 |
private List<Document> executeXquery(final String template, final String dsId, final String apiId) throws ISLookUpException, IOException { |
172 | 158 |
final StringTemplate st = new StringTemplate(IOUtils.toString(getClass().getResourceAsStream(template))); |
173 |
st.setAttribute("dsId", openaireIds.get(dsId));
|
|
159 |
st.setAttribute("dsId", dsId);
|
|
174 | 160 |
st.setAttribute("apiId", apiId); |
175 | 161 |
|
176 | 162 |
final SAXReader reader = new SAXReader(); |
177 | 163 |
|
178 | 164 |
return lookup.quickSearchProfile(st.toString()) |
179 |
.stream()
|
|
180 |
.map(s -> {
|
|
181 |
try {
|
|
182 |
return reader.read(new StringReader(s));
|
|
183 |
} catch (final DocumentException e) {
|
|
184 |
return null;
|
|
185 |
}
|
|
186 |
})
|
|
187 |
.filter(Objects::nonNull)
|
|
188 |
.collect(Collectors.toList());
|
|
165 |
.stream() |
|
166 |
.map(s -> { |
|
167 |
try { |
|
168 |
return reader.read(new StringReader(s)); |
|
169 |
} catch (final DocumentException e) { |
|
170 |
return null; |
|
171 |
} |
|
172 |
}) |
|
173 |
.filter(Objects::nonNull) |
|
174 |
.collect(Collectors.toList()); |
|
189 | 175 |
} |
190 | 176 |
|
191 | 177 |
private void updateMdStoreProfile(final String mdId, final Document doc, final int size) throws ISRegistryException { |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/dhp/CreateMDStoreHadoopJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.dhp; |
2 |
import java.net.URI; |
|
3 |
import java.util.HashMap; |
|
4 |
import java.util.Map; |
|
5 | 2 |
|
6 |
import com.google.gson.Gson; |
|
7 |
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; |
|
8 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
9 |
import eu.dnetlib.msro.rmi.MSROException; |
|
10 |
import org.apache.commons.lang3.StringUtils; |
|
3 |
import javax.annotation.Resource; |
|
4 |
|
|
11 | 5 |
import org.apache.commons.logging.Log; |
12 | 6 |
import org.apache.commons.logging.LogFactory; |
13 | 7 |
import org.springframework.beans.factory.annotation.Required; |
14 | 8 |
import org.springframework.web.client.RestTemplate; |
15 |
import org.springframework.web.util.UriComponentsBuilder; |
|
16 | 9 |
|
10 |
import com.google.gson.Gson; |
|
17 | 11 |
import com.googlecode.sarasvati.Arc; |
18 | 12 |
import com.googlecode.sarasvati.NodeToken; |
19 | 13 |
|
20 | 14 |
import eu.dnetlib.data.mdstore.manager.common.model.MDStoreWithInfo; |
15 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
21 | 16 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
22 | 17 |
import eu.dnetlib.msro.workflows.util.WorkflowsConstants; |
23 | 18 |
|
24 |
import javax.annotation.Resource; |
|
25 |
|
|
26 | 19 |
public class CreateMDStoreHadoopJobNode extends SimpleJobNode { |
27 | 20 |
|
28 |
private static final Log log = LogFactory.getLog(CreateMDStoreHadoopJobNode.class);
|
|
29 |
public static final String MDSTORE_CREATE_URL_AND_PARAMS = "/new/{format}/{layout}/{interpretation}?dsName={dsName}&dsId={dsid}&apiId={apiId}";
|
|
21 |
private static final Log log = LogFactory.getLog(CreateMDStoreHadoopJobNode.class);
|
|
22 |
public static final String MDSTORE_CREATE_URL_AND_PARAMS = "/new/{format}/{layout}/{interpretation}?dsName={dsName}&dsId={dsid}&apiId={apiId}";
|
|
30 | 23 |
|
31 |
/* Workflow params */
|
|
32 |
private String format;
|
|
33 |
private String layout;
|
|
34 |
private String interpretation;
|
|
35 |
private String outputPrefix = "mdstore";
|
|
24 |
/* Workflow params */
|
|
25 |
private String format;
|
|
26 |
private String layout;
|
|
27 |
private String interpretation;
|
|
28 |
private String outputPrefix = "mdstore";
|
|
36 | 29 |
|
37 |
/* Spring managed params */
|
|
38 |
private String mdStoreManagerUrl;
|
|
30 |
/* Spring managed params */
|
|
31 |
private String mdStoreManagerUrl;
|
|
39 | 32 |
|
40 |
@Resource
|
|
41 |
private UniqueServiceLocator serviceLocator;
|
|
33 |
@Resource
|
|
34 |
private UniqueServiceLocator serviceLocator;
|
|
42 | 35 |
|
43 |
@Override
|
|
44 |
protected String execute(final NodeToken token) throws Exception {
|
|
36 |
@Override
|
|
37 |
protected String execute(final NodeToken token) throws Exception {
|
|
45 | 38 |
|
46 |
final String repositoryId = token.getFullEnv().getAttribute(WorkflowsConstants.DATAPROVIDER_ID); |
|
47 |
final String xquery = String.format( |
|
48 |
"for $x in collection('/db/DRIVER/RepositoryServiceResources/RepositoryServiceResourceType') \n" + |
|
49 |
"where .//RESOURCE_IDENTIFIER[./@value='%s']\n" + |
|
50 |
"return $x//EXTRA_FIELDS/FIELD[./key/text() = 'OpenAireDataSourceId']/value/text()", |
|
51 |
repositoryId); |
|
39 |
final String openAireDataSourceId = token.getFullEnv().getAttribute(WorkflowsConstants.DATAPROVIDER_ID); |
|
52 | 40 |
|
53 |
log.info("resolving openaAireDatasourceId: " + xquery);
|
|
41 |
final String urlTemplate = getMdStoreManagerUrl() + MDSTORE_CREATE_URL_AND_PARAMS;
|
|
54 | 42 |
|
55 |
final String openAireDataSourceId = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery); |
|
43 |
final RestTemplate restTemplate = new RestTemplate(); |
|
44 |
final MDStoreWithInfo result = |
|
45 |
restTemplate.getForObject(urlTemplate, MDStoreWithInfo.class, getFormat(), getLayout(), getInterpretation(), token.getFullEnv() |
|
46 |
.getAttribute(WorkflowsConstants.DATAPROVIDER_NAME), openAireDataSourceId, token.getFullEnv() |
|
47 |
.getAttribute(WorkflowsConstants.DATAPROVIDER_INTERFACE)); |
|
56 | 48 |
|
57 |
if (StringUtils.isBlank(openAireDataSourceId)) { |
|
58 |
throw new MSROException("unable to find openAireDataSourceId in repository profile " + repositoryId); |
|
59 |
} |
|
49 |
log.info("created mdstore: " + new Gson().toJson(result)); |
|
60 | 50 |
|
61 |
log.info(String.format("got OpenAIRE datasource id '%s' for repository profile '%s'", openAireDataSourceId, repositoryId)); |
|
51 |
token.getEnv().setAttribute(getOutputPrefix() + "format", format); |
|
52 |
token.getEnv().setAttribute(getOutputPrefix() + "layout", layout); |
|
53 |
token.getEnv().setAttribute(getOutputPrefix() + "interpretation", interpretation); |
|
54 |
token.getEnv().setAttribute(getOutputPrefix() + "id", result.getId()); |
|
62 | 55 |
|
63 |
final String urlTemplate = getMdStoreManagerUrl() + MDSTORE_CREATE_URL_AND_PARAMS; |
|
56 |
return Arc.DEFAULT_ARC; |
|
57 |
} |
|
64 | 58 |
|
65 |
final RestTemplate restTemplate = new RestTemplate(); |
|
66 |
final MDStoreWithInfo result = restTemplate.getForObject( |
|
67 |
urlTemplate, |
|
68 |
MDStoreWithInfo.class, |
|
69 |
getFormat(), |
|
70 |
getLayout(), |
|
71 |
getInterpretation(), |
|
72 |
token.getFullEnv().getAttribute(WorkflowsConstants.DATAPROVIDER_NAME), |
|
73 |
openAireDataSourceId, |
|
74 |
token.getFullEnv().getAttribute(WorkflowsConstants.DATAPROVIDER_INTERFACE)); |
|
59 |
public String getFormat() { |
|
60 |
return format; |
|
61 |
} |
|
75 | 62 |
|
76 |
log.info("created mdstore: " + new Gson().toJson(result)); |
|
63 |
public void setFormat(final String format) { |
|
64 |
this.format = format; |
|
65 |
} |
|
77 | 66 |
|
78 |
token.getEnv().setAttribute(getOutputPrefix() + "format", format); |
|
79 |
token.getEnv().setAttribute(getOutputPrefix() + "layout", layout); |
|
80 |
token.getEnv().setAttribute(getOutputPrefix() + "interpretation", interpretation); |
|
81 |
token.getEnv().setAttribute(getOutputPrefix() + "id", result.getId()); |
|
67 |
public String getLayout() { |
|
68 |
return layout; |
|
69 |
} |
|
82 | 70 |
|
83 |
return Arc.DEFAULT_ARC; |
|
84 |
} |
|
71 |
public void setLayout(final String layout) { |
|
72 |
this.layout = layout; |
|
73 |
} |
|
85 | 74 |
|
86 |
public String getFormat() {
|
|
87 |
return format;
|
|
88 |
}
|
|
75 |
public String getInterpretation() {
|
|
76 |
return interpretation;
|
|
77 |
}
|
|
89 | 78 |
|
90 |
public void setFormat(final String format) {
|
|
91 |
this.format = format;
|
|
92 |
}
|
|
79 |
public void setInterpretation(final String interpretation) {
|
|
80 |
this.interpretation = interpretation;
|
|
81 |
}
|
|
93 | 82 |
|
94 |
public String getLayout() {
|
|
95 |
return layout;
|
|
96 |
}
|
|
83 |
public String getOutputPrefix() {
|
|
84 |
return outputPrefix;
|
|
85 |
}
|
|
97 | 86 |
|
98 |
public void setLayout(final String layout) {
|
|
99 |
this.layout = layout;
|
|
100 |
}
|
|
87 |
public void setOutputPrefix(final String outputPrefix) {
|
|
88 |
this.outputPrefix = outputPrefix;
|
|
89 |
}
|
|
101 | 90 |
|
102 |
public String getInterpretation() {
|
|
103 |
return interpretation;
|
|
104 |
}
|
|
91 |
public String getMdStoreManagerUrl() {
|
|
92 |
return mdStoreManagerUrl;
|
|
93 |
}
|
|
105 | 94 |
|
106 |
public void setInterpretation(final String interpretation) { |
|
107 |
this.interpretation = interpretation; |
|
108 |
} |
|
95 |
@Required |
|
96 |
public void setMdStoreManagerUrl(final String mdStoreManagerUrl) { |
|
97 |
this.mdStoreManagerUrl = mdStoreManagerUrl; |
|
98 |
} |
|
109 | 99 |
|
110 |
public String getOutputPrefix() { |
|
111 |
return outputPrefix; |
|
112 |
} |
|
113 |
|
|
114 |
public void setOutputPrefix(final String outputPrefix) { |
|
115 |
this.outputPrefix = outputPrefix; |
|
116 |
} |
|
117 |
|
|
118 |
public String getMdStoreManagerUrl() { |
|
119 |
return mdStoreManagerUrl; |
|
120 |
} |
|
121 |
|
|
122 |
@Required |
|
123 |
public void setMdStoreManagerUrl(final String mdStoreManagerUrl) { |
|
124 |
this.mdStoreManagerUrl = mdStoreManagerUrl; |
|
125 |
} |
|
126 |
|
|
127 | 100 |
} |
modules/dnet-openaireplus-workflows/trunk/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/dhp/PrepareEnvCollectHadoopJobNode.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.dhp; |
2 | 2 |
|
3 |
import static eu.dnetlib.dhp.common.Constants.COLLECTION_MODE; |
|
4 |
import static eu.dnetlib.dhp.common.Constants.CONNECT_TIMEOUT; |
|
5 |
import static eu.dnetlib.dhp.common.Constants.DNET_MESSAGE_MGR_URL; |
|
6 |
import static eu.dnetlib.dhp.common.Constants.MAX_NUMBER_OF_RETRY; |
|
7 |
import static eu.dnetlib.dhp.common.Constants.METADATA_ENCODING; |
|
8 |
import static eu.dnetlib.dhp.common.Constants.OOZIE_WF_PATH; |
|
9 |
import static eu.dnetlib.dhp.common.Constants.READ_TIMEOUT; |
|
10 |
import static eu.dnetlib.dhp.common.Constants.REQUEST_DELAY; |
|
11 |
import static eu.dnetlib.dhp.common.Constants.RETRY_DELAY; |
|
12 |
|
|
3 | 13 |
import java.text.SimpleDateFormat; |
4 | 14 |
import java.util.Date; |
5 | 15 |
import java.util.Iterator; |
... | ... | |
7 | 17 |
import java.util.Optional; |
8 | 18 |
import java.util.stream.Collectors; |
9 | 19 |
|
10 |
import eu.dnetlib.common.logging.DnetLogger; |
|
11 |
import eu.dnetlib.dhp.collection.ApiDescriptor; |
|
12 |
import eu.dnetlib.dhp.model.mdstore.Provenance; |
|
13 |
import eu.dnetlib.enabling.datasources.common.Datasource; |
|
14 |
import eu.dnetlib.enabling.datasources.common.DsmNotFoundException; |
|
15 |
import eu.dnetlib.msro.rmi.MSROException; |
|
20 |
import javax.annotation.Resource; |
|
21 |
|
|
16 | 22 |
import org.apache.commons.lang.math.NumberUtils; |
17 | 23 |
import org.apache.commons.lang3.StringUtils; |
18 | 24 |
import org.apache.commons.logging.Log; |
19 | 25 |
import org.apache.commons.logging.LogFactory; |
20 | 26 |
import org.springframework.beans.factory.annotation.Autowired; |
27 |
import org.springframework.beans.factory.annotation.Value; |
|
21 | 28 |
|
22 | 29 |
import com.google.gson.Gson; |
23 | 30 |
import com.googlecode.sarasvati.Arc; |
24 | 31 |
import com.googlecode.sarasvati.NodeToken; |
25 | 32 |
|
33 |
import eu.dnetlib.common.logging.DnetLogger; |
|
34 |
import eu.dnetlib.dhp.collection.ApiDescriptor; |
|
35 |
import eu.dnetlib.dhp.model.mdstore.Provenance; |
|
26 | 36 |
import eu.dnetlib.enabling.datasources.common.ApiParam; |
37 |
import eu.dnetlib.enabling.datasources.common.Datasource; |
|
38 |
import eu.dnetlib.enabling.datasources.common.DsmNotFoundException; |
|
27 | 39 |
import eu.dnetlib.enabling.datasources.common.LocalDatasourceManager; |
40 |
import eu.dnetlib.msro.rmi.MSROException; |
|
28 | 41 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
29 | 42 |
import eu.dnetlib.msro.workflows.util.WorkflowsConstants; |
30 |
import org.springframework.beans.factory.annotation.Value; |
|
31 | 43 |
|
32 |
import javax.annotation.Resource; |
|
33 |
|
|
34 |
import static eu.dnetlib.dhp.common.Constants.*; |
|
35 |
|
|
36 | 44 |
public class PrepareEnvCollectHadoopJobNode extends SimpleJobNode { |
37 | 45 |
|
38 |
private static final Log log = LogFactory.getLog(PrepareEnvCollectHadoopJobNode.class);
|
|
46 |
private static final Log log = LogFactory.getLog(PrepareEnvCollectHadoopJobNode.class);
|
|
39 | 47 |
|
40 |
public static final String METADATA_IDENTIFIER_PATH = "metadata_identifier_path";
|
|
41 |
public static final String DATE_FORMAT = "yyyy-MM-dd";
|
|
48 |
public static final String METADATA_IDENTIFIER_PATH = "metadata_identifier_path";
|
|
49 |
public static final String DATE_FORMAT = "yyyy-MM-dd";
|
|
42 | 50 |
|
43 |
private long ONE_DAY = 1000 * 60 * 60 * 24;
|
|
51 |
private final long ONE_DAY = 1000 * 60 * 60 * 24;
|
|
44 | 52 |
|
45 |
@Resource(name = "msroWorkflowLogger")
|
|
46 |
private DnetLogger dnetLogger;
|
|
53 |
@Resource(name = "msroWorkflowLogger")
|
|
54 |
private DnetLogger dnetLogger;
|
|
47 | 55 |
|
48 |
@Autowired |
|
49 |
private LocalDatasourceManager<?, ?> dsManager; |
|
56 |
@Autowired |
Also available in: Unified diff
merge from eosc_services