Revision 50489
Added by Michele Artini about 6 years ago
queryDatasources.sql | ||
---|---|---|
1 | 1 |
SELECT |
2 | 2 |
d.id AS datasourceid, |
3 |
--regexp_replace(d.id, '^.*::', '') AS originalid, |
|
4 |
-- array_agg(DISTINCT (d.id, i.pid)) as identities, |
|
5 |
ARRAY [d.id, i.pid] AS identities, |
|
6 |
--SELECT ARRAY[dd.id, ii.pid] as identities from datasources dd left outer join datasourcepids dps2 on (dps2.datasource = dd.id) |
|
7 |
-- left outer join identities ii on (ii.pid = dps2.pid) |
|
8 |
|
|
9 |
-- d.id AS originalid, |
|
3 |
d.id || array_agg(di.pid) AS identities, |
|
10 | 4 |
d.officialname AS officialname, |
11 | 5 |
d.englishname AS englishname, |
12 |
|
|
13 | 6 |
CASE |
14 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
|
15 |
THEN |
|
16 |
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
17 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver'])
|
|
18 |
THEN |
|
19 |
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
20 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0'])
|
|
21 |
THEN |
|
22 |
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
23 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire3.0'])
|
|
24 |
THEN |
|
25 |
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
26 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0_data'])
|
|
27 |
THEN |
|
28 |
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
29 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['native'])
|
|
30 |
THEN |
|
31 |
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
32 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['hostedBy'])
|
|
33 |
THEN |
|
34 |
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
35 |
WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['notCompatible'])
|
|
36 |
THEN |
|
7 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
|
|
8 |
THEN
|
|
9 |
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
10 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver'])
|
|
11 |
THEN
|
|
12 |
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
13 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0'])
|
|
14 |
THEN
|
|
15 |
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
16 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0'])
|
|
17 |
THEN
|
|
18 |
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
19 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data'])
|
|
20 |
THEN
|
|
21 |
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
22 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native'])
|
|
23 |
THEN
|
|
24 |
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
25 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy'])
|
|
26 |
THEN
|
|
27 |
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
|
|
28 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible'])
|
|
29 |
THEN
|
|
37 | 30 |
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
38 | 31 |
ELSE |
39 | 32 |
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
40 | 33 |
END AS openairecompatibility, |
41 |
|
|
42 | 34 |
d.websiteurl AS websiteurl, |
43 | 35 |
d.logourl AS logourl, |
44 |
d.contactemail AS contactemail, |
|
45 |
array_agg(DISTINCT CASE |
|
46 |
WHEN apc.param = 'baseUrl' AND api.protocolclass = 'oai' |
|
47 |
THEN |
|
48 |
CASE WHEN COALESCE(apc.edited, '') = '' |
|
49 |
THEN apc.original |
|
50 |
ELSE apc.edited END |
|
51 |
ELSE |
|
52 |
NULL |
|
53 |
END) AS accessinfopackage, |
|
54 |
-- we need baseUrl param only -when available-, but we can not use WHERE clause on apc.param, otherwise we exclude datasources without a baseUrl param and the returned rows are less than we expect. |
|
36 |
array_agg(DISTINCT CASE WHEN a.protocol = 'oai' THEN a.baseurl ELSE NULL END) AS accessinfopackage, |
|
55 | 37 |
d.latitude AS latitude, |
56 | 38 |
d.longitude AS longitude, |
57 | 39 |
d.namespaceprefix AS namespaceprefix, |
58 |
d.od_numberofitems AS odnumberofitems,
|
|
59 |
d.od_numberofitemsdate AS odnumberofitemsdate,
|
|
60 |
array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
|
|
40 |
NULL AS odnumberofitems,
|
|
41 |
NULL AS odnumberofitemsdate,
|
|
42 |
(SELECT array_agg(s|| '###unknown@@@unknown@@@dnet:subjects@@@dnet:subjects') FROM UNNEST(d.subjects::text[]) AS s) AS subjects,
|
|
61 | 43 |
d.description AS description, |
62 |
d.od_policies AS odpolicies, |
|
63 |
ARRAY(SELECT trim(s) |
|
64 |
FROM unnest(string_to_array(d.od_languages, ',')) AS s) AS odlanguages, |
|
65 |
ARRAY(SELECT trim(s) |
|
66 |
FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s) AS odcontenttypes, |
|
67 |
d.inferred AS inferred, |
|
68 |
d.deletedbyinference AS deletedbyinference, |
|
69 |
d.trust AS trust, |
|
70 |
d.inferenceprovenance AS inferenceprovenance, |
|
44 |
NULL AS odpolicies, |
|
45 |
d.languages AS odlanguages, |
|
46 |
d.od_contenttypes AS odcontenttypes, |
|
47 |
false AS inferred, |
|
48 |
false AS deletedbyinference, |
|
49 |
0.9 AS trust, |
|
50 |
NULL AS inferenceprovenance, |
|
71 | 51 |
d.dateofcollection AS dateofcollection, |
72 | 52 |
d.dateofvalidation AS dateofvalidation, |
73 |
|
|
74 |
-- re3data fields |
|
53 |
-- re3data fields |
|
75 | 54 |
d.releasestartdate AS releasestartdate, |
76 | 55 |
d.releaseenddate AS releaseenddate, |
77 | 56 |
d.missionstatementurl AS missionstatementurl, |
... | ... | |
86 | 65 |
d.qualitymanagementkind AS qualitymanagementkind, |
87 | 66 |
d.pidsystems AS pidsystems, |
88 | 67 |
d.certificates AS certificates, |
89 |
array_agg(DISTINCT p.name || '&&&' || p.url) AS policies, |
|
90 |
-- end of re3data fields |
|
91 |
|
|
68 |
ARRAY[]::text[] AS policies, |
|
92 | 69 |
dc.id AS collectedfromid, |
93 | 70 |
dc.officialname AS collectedfromname, |
71 |
d.typology || '@@@' || d.typology || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype, |
|
72 |
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction |
|
94 | 73 |
|
95 |
tc.code || '@@@' || tc.name || '@@@' || ts.code || '@@@' || ts.name AS datasourcetype, |
|
96 |
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction |
|
74 |
FROM dsm_datasources d |
|
97 | 75 |
|
98 |
-- merged into identities above |
|
99 |
-- array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) as pid |
|
76 |
LEFT OUTER JOIN dsm_datasources dc on (d.collectedfrom = dc.id) |
|
77 |
LEFT OUTER JOIN dsm_api a ON (d.id = a.datasource) |
|
78 |
LEFT OUTER JOIN dsm_datasourcepids di ON (d.id = di.datasource) |
|
100 | 79 |
|
101 |
|
|
102 |
FROM datasources d |
|
103 |
LEFT OUTER JOIN class tc ON (tc.code = d.datasourceclass) |
|
104 |
LEFT OUTER JOIN scheme ts ON (ts.code = d.datasourcescheme) |
|
105 |
|
|
106 |
LEFT OUTER JOIN class pac ON (pac.code = d.provenanceactionclass) |
|
107 |
LEFT OUTER JOIN scheme pas ON (pas.code = d.provenanceactionscheme) |
|
108 |
|
|
109 |
LEFT OUTER JOIN datasourcepids dps ON (dps.datasource = d.id) |
|
110 |
LEFT OUTER JOIN identities i ON (i.pid = dps.pid) |
|
111 |
|
|
112 |
LEFT OUTER JOIN datasources dc ON (dc.id = d.collectedfrom) |
|
113 |
LEFT OUTER JOIN api ON (api.datasource = d.id) |
|
114 |
LEFT OUTER JOIN apicollections apc ON (apc.api = api.id) |
|
115 |
|
|
116 |
LEFT OUTER JOIN temp_compliances tcc ON (tcc.api = api.id) |
|
117 |
|
|
118 |
LEFT OUTER JOIN datasource_subject ds ON (ds.datasource = d.id) |
|
119 |
LEFT OUTER JOIN subjects s ON (s.id = ds.subject) |
|
120 |
|
|
121 |
LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass) |
|
122 |
LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme) |
|
123 |
|
|
124 |
LEFT OUTER JOIN datasource_policy dp ON (dp.datasource = d.id) |
|
125 |
LEFT OUTER JOIN policies p ON (p.id = dp.policy) |
|
126 |
|
|
127 |
-- These clauses have been commented to allow the import of all the datasource on the index |
|
128 |
-- WHERE d.datasourceclass != 'entityregistry' |
|
129 |
-- AND d.openairecompatibilityclass IS NOT null |
|
130 |
-- AND d.openairecompatibilityclass != 'UNKNOWN' |
|
131 |
-- AND d.openairecompatibilityclass != 'notCompatible' |
|
132 |
|
|
133 | 80 |
GROUP BY |
134 | 81 |
d.id, |
135 | 82 |
d.officialname, |
... | ... | |
139 | 86 |
d.contactemail, |
140 | 87 |
d.namespaceprefix, |
141 | 88 |
d.description, |
142 |
d.od_numberofitems, |
|
143 |
d.od_numberofitemsdate, |
|
144 |
d.od_policies, |
|
145 |
d.od_languages, |
|
146 |
d.od_contenttypes, |
|
147 | 89 |
d.latitude, |
148 | 90 |
d.longitude, |
149 |
d.inferred, |
|
150 |
d.deletedbyinference, |
|
151 |
d.trust, |
|
152 |
d.inferenceprovenance, |
|
153 | 91 |
d.dateofcollection, |
154 | 92 |
d.dateofvalidation, |
155 |
dc.id, |
|
156 | 93 |
d.releasestartdate, |
157 | 94 |
d.releaseenddate, |
158 | 95 |
d.missionstatementurl, |
... | ... | |
167 | 104 |
d.qualitymanagementkind, |
168 | 105 |
d.pidsystems, |
169 | 106 |
d.certificates, |
170 |
dc.officialname, |
|
171 |
tc.code, tc.name, ts.code, ts.name, |
|
172 |
pac.code, pac.name, pas.code, pas.name, |
|
173 |
sc.code, sc.name, ss.code, ss.name, |
|
174 |
i.pid |
|
107 |
dc.id, |
|
108 |
dc.officialname |
Also available in: Unified diff
merged branch dsm into trunk