Project

General

Profile

« Previous | Next » 

Revision 48139

integrated (hopefully) all required changes from dnet40

View differences:

queryDatasources.sql
1
SELECT d.id AS datasourceid,
2
--regexp_replace(d.id, '^.*::', '') AS originalid,
1
SELECT
2
	d.id                                                                                                       AS datasourceid,
3
	--regexp_replace(d.id, '^.*::', '') AS originalid,
3 4
	-- array_agg(DISTINCT (d.id, i.pid)) as identities,
4
	ARRAY[d.id, i.pid] as identities,
5
	ARRAY [d.id, i.pid]                                                                                        AS identities,
5 6
	--SELECT ARRAY[dd.id, ii.pid] as identities from datasources dd left outer join datasourcepids dps2 on (dps2.datasource = dd.id)
6 7
	--	left outer join identities ii on (ii.pid = dps2.pid)
7 8

  
8
-- d.id           AS originalid,
9
d.officialname AS officialname,
10
d.englishname AS englishname,
9
	-- d.id           AS originalid,
10
	d.officialname                                                                                             AS officialname,
11
	d.englishname                                                                                              AS englishname,
11 12

  
12
CASE
13
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['driver', 'openaire2.0'])
14
THEN
15
				'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
16
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['driver'])
17
THEN
18
				'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
19
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire2.0'])
20
THEN
21
				'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
22
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire3.0'])
23
THEN
24
				'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
25
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['openaire2.0_data'])
26
THEN
27
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
28
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['native'])
29
THEN
30
				'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
31
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['hostedBy'])
32
THEN
33
	'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
34
WHEN (array_agg( DISTINCT tcc.compliance :: TEXT ) @> ARRAY ['notCompatible'])
35
THEN
36
				'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
37
ELSE
38
				'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
39
END AS openairecompatibility,
13
	CASE
14
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver', 'openaire2.0'])
15
		THEN
16
			'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
17
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['driver'])
18
		THEN
19
			'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
20
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0'])
21
		THEN
22
			'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
23
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire3.0'])
24
		THEN
25
			'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
26
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['openaire2.0_data'])
27
		THEN
28
			'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
29
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['native'])
30
		THEN
31
			'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
32
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['hostedBy'])
33
		THEN
34
			'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
35
	WHEN (array_agg(DISTINCT tcc.compliance :: TEXT) @> ARRAY ['notCompatible'])
36
		THEN
37
			'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
38
	ELSE
39
		'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
40
	END                                                                                                        AS openairecompatibility,
40 41

  
41
d.websiteurl AS websiteurl,
42
d.logourl AS logourl,
43
d.contactemail AS contactemail,
44
array_agg( DISTINCT CASE
45
			WHEN apc.param='baseUrl' and api.protocolclass='oai' THEN
46
				CASE WHEN COALESCE(apc.edited, '') = ''  THEN apc.original ELSE apc.edited END
47
			ELSE
48
				NULL
49
END ) AS accessinfopackage, -- we need baseUrl param only -when available-, but we can not use WHERE clause on apc.param, otherwise we exclude datasources without a baseUrl param and the returned rows are less than we expect.
50
		d.latitude					as latitude,
51
		d.longitude					as longitude,
52
		d.namespaceprefix			as namespaceprefix,
53
		d.od_numberofitems			as odnumberofitems,          	
54
		d.od_numberofitemsdate		as odnumberofitemsdate,      		
55
		array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) as subjects,
56
		d.description				as description,
57
		d.od_policies				as odpolicies,
58
		ARRAY(select trim(s) from unnest(string_to_array(d.od_languages, ',')) as s)  as odlanguages,
59
		ARRAY(select trim(s) from unnest(string_to_array(d.od_contenttypes, '-')) as s) as odcontenttypes,
60
		d.inferred					as inferred,
61
		d.deletedbyinference		as deletedbyinference,
62
		d.trust						as trust,
63
		d.inferenceprovenance		as inferenceprovenance,
64
		d.dateofcollection			as dateofcollection,
65
		d.dateofvalidation			as dateofvalidation,
66
		
67
		-- re3data fields
68
		d.releasestartdate			as releasestartdate,
69
		d.releaseenddate			as releaseenddate,
70
		d.missionstatementurl		as missionstatementurl,
71
		d.dataprovider				as dataprovider,
72
		d.serviceprovider			as serviceprovider,
73
		d.databaseaccesstype		as databaseaccesstype,
74
		d.datauploadtype			as datauploadtype,
75
		d.databaseaccessrestriction	as databaseaccessrestriction,
76
		d.datauploadrestriction		as datauploadrestriction,
77
		d.versioning				as versioning,
78
		d.citationguidelineurl		as citationguidelineurl,
79
		d.qualitymanagementkind		as qualitymanagementkind,
80
		d.pidsystems				as pidsystems,
81
		d.certificates				as certificates,
82
		array_agg(DISTINCT p.name || '&&&' || p.url) as policies,
83
		-- end of re3data fields
84
		
85
		dc.id					as collectedfromid,
86
		dc.officialname			as collectedfromname,
87
		
88
		tc.code || '@@@' || tc.name || '@@@' || ts.code || '@@@' || ts.name	as datasourcetype,
89
		pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name	as provenanceaction
42
	d.websiteurl                                                                                               AS websiteurl,
43
	d.logourl                                                                                                  AS logourl,
44
	d.contactemail                                                                                             AS contactemail,
45
	array_agg(DISTINCT CASE
46
	                   WHEN apc.param = 'baseUrl' AND api.protocolclass = 'oai'
47
		                   THEN
48
			                   CASE WHEN COALESCE(apc.edited, '') = ''
49
				                   THEN apc.original
50
			                   ELSE apc.edited END
51
	                   ELSE
52
		                   NULL
53
	                   END)                                                                                    AS accessinfopackage,
54
	-- we need baseUrl param only -when available-, but we can not use WHERE clause on apc.param, otherwise we exclude datasources without a baseUrl param and the returned rows are less than we expect.
55
	d.latitude                                                                                                 AS latitude,
56
	d.longitude                                                                                                AS longitude,
57
	d.namespaceprefix                                                                                          AS namespaceprefix,
58
	d.od_numberofitems                                                                                         AS odnumberofitems,
59
	d.od_numberofitemsdate                                                                                     AS odnumberofitemsdate,
60
	array_agg(DISTINCT s.name || '###' || sc.code || '@@@' || sc.name || '@@@' || ss.code || '@@@' || ss.name) AS subjects,
61
	d.description                                                                                              AS description,
62
	d.od_policies                                                                                              AS odpolicies,
63
	ARRAY(SELECT trim(s)
64
	      FROM unnest(string_to_array(d.od_languages, ',')) AS s)                                              AS odlanguages,
65
	ARRAY(SELECT trim(s)
66
	      FROM unnest(string_to_array(d.od_contenttypes, '-')) AS s)                                           AS odcontenttypes,
67
	d.inferred                                                                                                 AS inferred,
68
	d.deletedbyinference                                                                                       AS deletedbyinference,
69
	d.trust                                                                                                    AS trust,
70
	d.inferenceprovenance                                                                                      AS inferenceprovenance,
71
	d.dateofcollection                                                                                         AS dateofcollection,
72
	d.dateofvalidation                                                                                         AS dateofvalidation,
90 73

  
91
	  -- merged into identities above
92
		-- array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) as pid
93
		
74
	-- re3data fields
75
	d.releasestartdate                                                                                         AS releasestartdate,
76
	d.releaseenddate                                                                                           AS releaseenddate,
77
	d.missionstatementurl                                                                                      AS missionstatementurl,
78
	d.dataprovider                                                                                             AS dataprovider,
79
	d.serviceprovider                                                                                          AS serviceprovider,
80
	d.databaseaccesstype                                                                                       AS databaseaccesstype,
81
	d.datauploadtype                                                                                           AS datauploadtype,
82
	d.databaseaccessrestriction                                                                                AS databaseaccessrestriction,
83
	d.datauploadrestriction                                                                                    AS datauploadrestriction,
84
	d.versioning                                                                                               AS versioning,
85
	d.citationguidelineurl                                                                                     AS citationguidelineurl,
86
	d.qualitymanagementkind                                                                                    AS qualitymanagementkind,
87
	d.pidsystems                                                                                               AS pidsystems,
88
	d.certificates                                                                                             AS certificates,
89
	array_agg(DISTINCT p.name || '&&&' || p.url)                                                               AS policies,
90
	-- end of re3data fields
94 91

  
92
	dc.id                                                                                                      AS collectedfromid,
93
	dc.officialname                                                                                            AS collectedfromname,
94

  
95
	tc.code || '@@@' || tc.name || '@@@' || ts.code || '@@@' || ts.name                                        AS datasourcetype,
96
	pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name                                    AS provenanceaction
97

  
98
-- merged into identities above
99
-- array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) as pid
100

  
101

  
95 102
FROM datasources d
96
	left outer join class tc on (tc.code = d.datasourceclass)	
97
	left outer join scheme ts on (ts.code = d.datasourcescheme)
98
	
99
	left outer join class pac on (pac.code = d.provenanceactionclass)	
100
	left outer join scheme pas on (pas.code = d.provenanceactionscheme)		
103
	LEFT OUTER JOIN class tc ON (tc.code = d.datasourceclass)
104
	LEFT OUTER JOIN scheme ts ON (ts.code = d.datasourcescheme)
101 105

  
102
	left outer join datasourcepids dps on (dps.datasource = d.id)
103
	left outer join identities i on (i.pid = dps.pid)
106
	LEFT OUTER JOIN class pac ON (pac.code = d.provenanceactionclass)
107
	LEFT OUTER JOIN scheme pas ON (pas.code = d.provenanceactionscheme)
104 108

  
105
	left outer join datasources dc on (dc.id = d.collectedfrom)
106
	left outer join api on (api.datasource = d.id)
107
	left outer join apicollections apc on (apc.api = api.id)	
108
	
109
	left outer join temp_compliances tcc on (tcc.api = api.id)	
110
	
111
	left outer join datasource_subject ds on (ds.datasource = d.id)
112
	left outer join subjects s on (s.id = ds.subject) 
113
	
114
	left outer join class sc on (sc.code = s.semanticclass)
115
	left outer join scheme ss on (ss.code = s.semanticscheme)
116
	
117
	left outer join datasource_policy dp on (dp.datasource = d.id)
118
	left outer join policies p on (p.id = dp.policy)
119
	
109
	LEFT OUTER JOIN datasourcepids dps ON (dps.datasource = d.id)
110
	LEFT OUTER JOIN identities i ON (i.pid = dps.pid)
111

  
112
	LEFT OUTER JOIN datasources dc ON (dc.id = d.collectedfrom)
113
	LEFT OUTER JOIN api ON (api.datasource = d.id)
114
	LEFT OUTER JOIN apicollections apc ON (apc.api = api.id)
115

  
116
	LEFT OUTER JOIN temp_compliances tcc ON (tcc.api = api.id)
117

  
118
	LEFT OUTER JOIN datasource_subject ds ON (ds.datasource = d.id)
119
	LEFT OUTER JOIN subjects s ON (s.id = ds.subject)
120

  
121
	LEFT OUTER JOIN class sc ON (sc.code = s.semanticclass)
122
	LEFT OUTER JOIN scheme ss ON (ss.code = s.semanticscheme)
123

  
124
	LEFT OUTER JOIN datasource_policy dp ON (dp.datasource = d.id)
125
	LEFT OUTER JOIN policies p ON (p.id = dp.policy)
126

  
120 127
--  These clauses have been commented to allow the import of all the datasource on the index 
121 128
-- WHERE d.datasourceclass != 'entityregistry'
122 129
--	AND d.openairecompatibilityclass IS NOT null
123 130
--	AND d.openairecompatibilityclass != 'UNKNOWN'
124 131
--	AND d.openairecompatibilityclass != 'notCompatible'
125
	
126
GROUP BY 
127
		d.id,
128
		d.officialname, 
129
		d.englishname,
130
		d.websiteurl,
131
		d.logourl,
132
		d.contactemail,
133
		d.namespaceprefix,
134
		d.description,
135
		d.od_numberofitems,          	
136
		d.od_numberofitemsdate,      		
137
		d.od_policies,
138
		d.od_languages,         		
139
		d.od_contenttypes,
140
		d.latitude,
141
		d.longitude,
142
		d.inferred,
143
		d.deletedbyinference,
144
		d.trust,
145
		d.inferenceprovenance,
146
		d.dateofcollection,
147
		d.dateofvalidation,
148
		dc.id,
149
		d.releasestartdate,
150
		d.releaseenddate,
151
		d.missionstatementurl,
152
		d.dataprovider,
153
		d.serviceprovider,
154
		d.databaseaccesstype,
155
		d.datauploadtype,
156
		d.databaseaccessrestriction,
157
		d.datauploadrestriction,
158
		d.versioning,
159
		d.citationguidelineurl,
160
		d.qualitymanagementkind,
161
		d.pidsystems,
162
		d.certificates,
163
		dc.officialname,
164
		tc.code, tc.name, ts.code, ts.name, 
165
		pac.code, pac.name, pas.code, pas.name,
166
		sc.code, sc.name, ss.code, ss.name,
167
	  i.pid
132

  
133
GROUP BY
134
	d.id,
135
	d.officialname,
136
	d.englishname,
137
	d.websiteurl,
138
	d.logourl,
139
	d.contactemail,
140
	d.namespaceprefix,
141
	d.description,
142
	d.od_numberofitems,
143
	d.od_numberofitemsdate,
144
	d.od_policies,
145
	d.od_languages,
146
	d.od_contenttypes,
147
	d.latitude,
148
	d.longitude,
149
	d.inferred,
150
	d.deletedbyinference,
151
	d.trust,
152
	d.inferenceprovenance,
153
	d.dateofcollection,
154
	d.dateofvalidation,
155
	dc.id,
156
	d.releasestartdate,
157
	d.releaseenddate,
158
	d.missionstatementurl,
159
	d.dataprovider,
160
	d.serviceprovider,
161
	d.databaseaccesstype,
162
	d.datauploadtype,
163
	d.databaseaccessrestriction,
164
	d.datauploadrestriction,
165
	d.versioning,
166
	d.citationguidelineurl,
167
	d.qualitymanagementkind,
168
	d.pidsystems,
169
	d.certificates,
170
	dc.officialname,
171
	tc.code, tc.name, ts.code, ts.name,
172
	pac.code, pac.name, pas.code, pas.name,
173
	sc.code, sc.name, ss.code, ss.name,
174
	i.pid

Also available in: Unified diff