Revision 48139
Added by Alessia Bardi almost 7 years ago
queryDatasources.sql | ||
---|---|---|
1 |
SELECT d.id AS datasourceid, |
|
2 |
--regexp_replace(d.id, '^.*::', '') AS originalid, |
|
1 |
SELECT |
|
2 |
d.id AS datasourceid, |
|
3 |
--regexp_replace(d.id, '^.*::', '') AS originalid, |
|
3 | 4 |
-- array_agg(DISTINCT (d.id, i.pid)) as identities, |
4 |
ARRAY[d.id, i.pid] as identities,
|
|
5 |
ARRAY [d.id, i.pid] AS identities,
|
|
5 | 6 |
--SELECT ARRAY[dd.id, ii.pid] as identities from datasources dd left outer join datasourcepids dps2 on (dps2.datasource = dd.id) |
6 | 7 |
-- left outer join identities ii on (ii.pid = dps2.pid) |
7 | 8 |
|
8 |
-- d.id AS originalid, |
|
9 |
d.officialname AS officialname,
|
|
10 |
d.englishname AS englishname,
|
|
9 |
-- d.id AS originalid,
|
|
10 |
d.officialname AS officialname,
|
|
11 |
d.englishname AS englishname,
|
|
11 | 12 |
|
12 |
CASE |
|
13 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['driver', 'openaire2.0'])
|
|
14 |
THEN |
|
15 |
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
16 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['driver'])
|
|
17 |
THEN |
|
18 |
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
19 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire2.0'])
|
|
20 |
THEN |
|
21 |
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
22 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire3.0'])
|
|
23 |
THEN |
|
24 |
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
25 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire2.0_data'])
|
|
26 |
THEN |
|
27 |
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
28 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['native'])
|
|
29 |
THEN |
|
30 |
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
31 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['hostedBy'])
|
|
32 |
THEN |
|
33 |
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
34 |
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['notCompatible'])
|
|
35 |
THEN |
|
36 |
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
37 |
ELSE |
|
38 |
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
39 |
END AS openairecompatibility,
|
|
13 |
CASE
|
|
14 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
|
15 |
THEN
|
|
16 |
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
17 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver'])
|
|
18 |
THEN
|
|
19 |
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
20 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0'])
|
|
21 |
THEN
|
|
22 |
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
23 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire3.0'])
|
|
24 |
THEN
|
|
25 |
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
26 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0_data'])
|
|
27 |
THEN
|
|
28 |
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
29 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['native'])
|
|
30 |
THEN
|
|
31 |
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
32 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['hostedBy'])
|
|
33 |
THEN
|
|
34 |
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
35 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['notCompatible'])
|
|
36 |
THEN
|
|
37 |
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
38 |
ELSE
|
|
39 |
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
40 |
END AS openairecompatibility,
|
|
40 | 41 |
|
41 |
d.websiteurl AS websiteurl, |
|
42 |
d.logourl AS logourl, |
|
43 |
d.contactemail AS contactemail, |
|
44 |
array_agg( DISTINCT CASE |
|
45 |
WHEN apc.param='baseUrl' and api.protocolclass='oai' THEN |
|
46 |
CASE WHEN COALESCE(apc.edited, '') = '' THEN apc.original ELSE apc.edited END |
|
47 |
ELSE |
|
48 |
NULL |
|
49 |
END ) AS accessinfopackage, -- we need baseUrl param only -when available-, but we can not use WHERE clause on apc.param, otherwise we exclude datasources without a baseUrl param and the returned rows are less than we expect. |
|
50 |
d.latitude as latitude, |
|
51 |
d.longitude as longitude, |
|
52 |
d.namespaceprefix as namespaceprefix, |
|
53 |
d.od_numberofitems as odnumberofitems, |
|
54 |
d.od_numberofitemsdate as odnumberofitemsdate, |
|
55 |
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) as subjects, |
|
56 |
d.description as description, |
|
57 |
d.od_policies as odpolicies, |
|
58 |
ARRAY(select trim(s) from unnest(string_to_array(d.od_languages, ',')) as s) as odlanguages, |
|
59 |
ARRAY(select trim(s) from unnest(string_to_array(d.od_contenttypes, '-')) as s) as odcontenttypes, |
|
60 |
d.inferred as inferred, |
|
61 |
d.deletedbyinference as deletedbyinference, |
|
62 |
d.trust as trust, |
|
63 |
d.inferenceprovenance as inferenceprovenance, |
|
64 |
d.dateofcollection as dateofcollection, |
|
65 |
d.dateofvalidation as dateofvalidation, |
|
66 |
|
|
67 |
-- re3data fields |
|
68 |
d.releasestartdate as releasestartdate, |
|
69 |
d.releaseenddate as releaseenddate, |
|
70 |
d.missionstatementurl as missionstatementurl, |
|
71 |
d.dataprovider as dataprovider, |
|
72 |
d.serviceprovider as serviceprovider, |
|
73 |
d.databaseaccesstype as databaseaccesstype, |
|
74 |
d.datauploadtype as datauploadtype, |
|
75 |
d.databaseaccessrestriction as databaseaccessrestriction, |
|
76 |
d.datauploadrestriction as datauploadrestriction, |
|
77 |
d.versioning as versioning, |
|
78 |
d.citationguidelineurl as citationguidelineurl, |
|
79 |
d.qualitymanagementkind as qualitymanagementkind, |
|
80 |
d.pidsystems as pidsystems, |
|
81 |
d.certificates as certificates, |
|
82 |
array_agg(DISTINCT p.name || '&&&' || p.url) as policies, |
|
83 |
-- end of re3data fields |
|
84 |
|
|
85 |
dc.id as collectedfromid, |
|
86 |
dc.officialname as collectedfromname, |
|
87 |
|
|
88 |
tc.code || '@@@' || tc.name || '@@@' || ts.code || '@@@' || ts.name as datasourcetype, |
|
89 |
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name as provenanceaction |
|
42 |
d.websiteurl AS websiteurl, |
|
43 |
d.logourl AS logourl, |
|
44 |
d.contactemail AS contactemail, |
|
45 |
array_agg(DISTINCT CASE |
|
46 |
WHEN apc.param = 'baseUrl' AND api.protocolclass = 'oai' |
|
47 |
THEN |
|
48 |
CASE WHEN COALESCE(apc.edited, '') = '' |
|
49 |
THEN apc.original |
|
50 |
ELSE apc.edited END |
|
51 |
ELSE |
|
52 |
NULL |
|
53 |
END) AS accessinfopackage, |
|
54 |
-- we need baseUrl param only -when available-, but we can not use WHERE clause on apc.param, otherwise we exclude datasources without a baseUrl param and the returned rows are less than we expect. |
|
55 |
d.latitude AS latitude, |
|
56 |
d.longitude AS longitude, |
|
57 |
d.namespaceprefix AS namespaceprefix, |
|
58 |
d.od_numberofitems AS odnumberofitems, |
|
59 |
d.od_numberofitemsdate AS odnumberofitemsdate, |
|
60 |
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects, |
|
61 |
d.description AS description, |
|
62 |
d.od_policies AS odpolicies, |
|
63 |
ARRAY(SELECT trim(s) |
|
64 |
FROM unnest(string_to_array(d.od_languages, ',')) AS s) AS odlanguages, |
|
65 |
ARRAY(SELECT trim(s) |
|
66 |
FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes, |
|
67 |
d.inferred AS inferred, |
|
68 |
d.deletedbyinference AS deletedbyinference, |
|
69 |
d.trust AS trust, |
|
70 |
d.inferenceprovenance AS inferenceprovenance, |
|
71 |
d.dateofcollection AS dateofcollection, |
|
72 |
d.dateofvalidation AS dateofvalidation, |
|
90 | 73 |
|
91 |
-- merged into identities above |
|
92 |
-- array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) as pid |
|
93 |
|
|
74 |
-- re3data fields |
|
75 |
d.releasestartdate AS releasestartdate, |
|
76 |
d.releaseenddate AS releaseenddate, |
|
77 |
d.missionstatementurl AS missionstatementurl, |
|
78 |
d.dataprovider AS dataprovider, |
|
79 |
d.serviceprovider AS serviceprovider, |
|
80 |
d.databaseaccesstype AS databaseaccesstype, |
|
81 |
d.datauploadtype AS datauploadtype, |
|
82 |
d.databaseaccessrestriction AS databaseaccessrestriction, |
|
83 |
d.datauploadrestriction AS datauploadrestriction, |
|
84 |
d.versioning AS versioning, |
|
85 |
d.citationguidelineurl AS citationguidelineurl, |
|
86 |
d.qualitymanagementkind AS qualitymanagementkind, |
|
87 |
d.pidsystems AS pidsystems, |
|
88 |
d.certificates AS certificates, |
|
89 |
array_agg(DISTINCT p.name || '&&&' || p.url) AS policies, |
|
90 |
-- end of re3data fields |
|
94 | 91 |
|
92 |
dc.id AS collectedfromid, |
|
93 |
dc.officialname AS collectedfromname, |
|
94 |
|
|
95 |
tc.code || '@@@' || tc.name || '@@@' || ts.code || '@@@' || ts.name AS datasourcetype, |
|
96 |
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction |
|
97 |
|
|
98 |
-- merged into identities above |
|
99 |
-- array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) as pid |
|
100 |
|
|
101 |
|
|
95 | 102 |
FROM datasources d |
96 |
left outer join class tc on (tc.code = d.datasourceclass) |
|
97 |
left outer join scheme ts on (ts.code = d.datasourcescheme) |
|
98 |
|
|
99 |
left outer join class pac on (pac.code = d.provenanceactionclass) |
|
100 |
left outer join scheme pas on (pas.code = d.provenanceactionscheme) |
|
103 |
LEFT OUTER JOIN class tc ON (tc.code = d.datasourceclass) |
|
104 |
LEFT OUTER JOIN scheme ts ON (ts.code = d.datasourcescheme) |
|
101 | 105 |
|
102 |
left outer join datasourcepids dps on (dps.datasource = d.id)
|
|
103 |
left outer join identities i on (i.pid = dps.pid)
|
|
106 |
LEFT OUTER JOIN class pac ON (pac.code = d.provenanceactionclass)
|
|
107 |
LEFT OUTER JOIN scheme pas ON (pas.code = d.provenanceactionscheme)
|
|
104 | 108 |
|
105 |
left outer join datasources dc on (dc.id = d.collectedfrom) |
|
106 |
left outer join api on (api.datasource = d.id) |
|
107 |
left outer join apicollections apc on (apc.api = api.id) |
|
108 |
|
|
109 |
left outer join temp_compliances tcc on (tcc.api = api.id) |
|
110 |
|
|
111 |
left outer join datasource_subject ds on (ds.datasource = d.id) |
|
112 |
left outer join subjects s on (s.id = ds.subject) |
|
113 |
|
|
114 |
left outer join class sc on (sc.code = s.semanticclass) |
|
115 |
left outer join scheme ss on (ss.code = s.semanticscheme) |
|
116 |
|
|
117 |
left outer join datasource_policy dp on (dp.datasource = d.id) |
|
118 |
left outer join policies p on (p.id = dp.policy) |
|
119 |
|
|
109 |
LEFT OUTER JOIN datasourcepids dps ON (dps.datasource = d.id) |
|
110 |
LEFT OUTER JOIN identities i ON (i.pid = dps.pid) |
|
111 |
|
|
112 |
LEFT OUTER JOIN datasources dc ON (dc.id = d.collectedfrom) |
|
113 |
LEFT OUTER JOIN api ON (api.datasource = d.id) |
|
114 |
LEFT OUTER JOIN apicollections apc ON (apc.api = api.id) |
|
115 |
|
|
116 |
LEFT OUTER JOIN temp_compliances tcc ON (tcc.api = api.id) |
|
117 |
|
|
118 |
LEFT OUTER JOIN datasource_subject ds ON (ds.datasource = d.id) |
|
119 |
LEFT OUTER JOIN subjects s ON (s.id = ds.subject) |
|
120 |
|
|
121 |
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass) |
|
122 |
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme) |
|
123 |
|
|
124 |
LEFT OUTER JOIN datasource_policy dp ON (dp.datasource = d.id) |
|
125 |
LEFT OUTER JOIN policies p ON (p.id = dp.policy) |
|
126 |
|
|
120 | 127 |
-- These clauses have been commented to allow the import of all the datasource on the index |
121 | 128 |
-- WHERE d.datasourceclass != 'entityregistry' |
122 | 129 |
-- AND d.openairecompatibilityclass IS NOT null |
123 | 130 |
-- AND d.openairecompatibilityclass != 'UNKNOWN' |
124 | 131 |
-- AND d.openairecompatibilityclass != 'notCompatible' |
125 |
|
|
126 |
GROUP BY |
|
127 |
d.id, |
|
128 |
d.officialname, |
|
129 |
d.englishname, |
|
130 |
d.websiteurl, |
|
131 |
d.logourl, |
|
132 |
d.contactemail, |
|
133 |
d.namespaceprefix, |
|
134 |
d.description, |
|
135 |
d.od_numberofitems, |
|
136 |
d.od_numberofitemsdate, |
|
137 |
d.od_policies, |
|
138 |
d.od_languages, |
|
139 |
d.od_contenttypes, |
|
140 |
d.latitude, |
|
141 |
d.longitude, |
|
142 |
d.inferred, |
|
143 |
d.deletedbyinference, |
|
144 |
d.trust, |
|
145 |
d.inferenceprovenance, |
|
146 |
d.dateofcollection, |
|
147 |
d.dateofvalidation, |
|
148 |
dc.id, |
|
149 |
d.releasestartdate, |
|
150 |
d.releaseenddate, |
|
151 |
d.missionstatementurl, |
|
152 |
d.dataprovider, |
|
153 |
d.serviceprovider, |
|
154 |
d.databaseaccesstype, |
|
155 |
d.datauploadtype, |
|
156 |
d.databaseaccessrestriction, |
|
157 |
d.datauploadrestriction, |
|
158 |
d.versioning, |
|
159 |
d.citationguidelineurl, |
|
160 |
d.qualitymanagementkind, |
|
161 |
d.pidsystems, |
|
162 |
d.certificates, |
|
163 |
dc.officialname, |
|
164 |
tc.code, tc.name, ts.code, ts.name, |
|
165 |
pac.code, pac.name, pas.code, pas.name, |
|
166 |
sc.code, sc.name, ss.code, ss.name, |
|
167 |
i.pid |
|
132 |
|
|
133 |
GROUP BY |
|
134 |
d.id, |
|
135 |
d.officialname, |
|
136 |
d.englishname, |
|
137 |
d.websiteurl, |
|
138 |
d.logourl, |
|
139 |
d.contactemail, |
|
140 |
d.namespaceprefix, |
|
141 |
d.description, |
|
142 |
d.od_numberofitems, |
|
143 |
d.od_numberofitemsdate, |
|
144 |
d.od_policies, |
|
145 |
d.od_languages, |
|
146 |
d.od_contenttypes, |
|
147 |
d.latitude, |
|
148 |
d.longitude, |
|
149 |
d.inferred, |
|
150 |
d.deletedbyinference, |
|
151 |
d.trust, |
|
152 |
d.inferenceprovenance, |
|
153 |
d.dateofcollection, |
|
154 |
d.dateofvalidation, |
|
155 |
dc.id, |
|
156 |
d.releasestartdate, |
|
157 |
d.releaseenddate, |
|
158 |
d.missionstatementurl, |
|
159 |
d.dataprovider, |
|
160 |
d.serviceprovider, |
|
161 |
d.databaseaccesstype, |
|
162 |
d.datauploadtype, |
|
163 |
d.databaseaccessrestriction, |
|
164 |
d.datauploadrestriction, |
|
165 |
d.versioning, |
|
166 |
d.citationguidelineurl, |
|
167 |
d.qualitymanagementkind, |
|
168 |
d.pidsystems, |
|
169 |
d.certificates, |
|
170 |
dc.officialname, |
|
171 |
tc.code, tc.name, ts.code, ts.name, |
|
172 |
pac.code, pac.name, pas.code, pas.name, |
|
173 |
sc.code, sc.name, ss.code, ss.name, |
|
174 |
i.pid |
Also available in: Unified diff
integrated (hopefully) all required changes from dnet40