Project

General

Profile

« Previous | Next » 

Revision 57437

sql queries for open orgs similarities

View differences:

modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/querySimilarityFromOpenOrgsDB.sql
1
SELECT local_id AS id1, oa_original_id AS id2 FROM openaire_simrels WHERE reltype = 'is_similar'
2

  
3
UNION ALL
4

  
5
SELECT
6
	o.id                                                     AS id1,
7
	'openorgsmesh'||substring(o.id, 13)||'-'||md5(a.acronym) AS id2
8
FROM acronyms a
9
	LEFT OUTER JOIN organizations o ON (a.id = o.id)
10

  
11
UNION ALL
12
		
13
SELECT
14
	o.id                                                     AS id1,
15
	'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name)    AS id2
16
FROM other_names n
17
	LEFT OUTER JOIN organizations o ON (n.id = o.id)
modules/dnet-openaireplus-workflows/trunk/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizationsFromOpenOrgsDB.sql
24 24
	o.name,
25 25
	o.modification_date,
26 26
	o.country
27
	
28
UNION ALL
29
		
30
SELECT
31
	'openorgsmesh'||substring(o.id, 13)||'-'||md5(a.acronym)                                                                      AS organizationid,
32
	a.acronym                                                                                                                     AS legalshortname,
33
	a.acronym                                                                                                                     AS legalname,
34
	ARRAY[]::text[]                                                                                                               AS "alternativeNames",
35
	(array_agg(u.url))[1]                                                                                                         AS websiteurl,
36
	o.modification_date                                                                                                           AS dateoftransformation,
37
	false                                                                                                                         AS inferred,
38
	false                                                                                                                         AS deletedbyinference,
39
	0.88                                                                                                                          AS trust,
40
	''                                                                                                                            AS inferenceprovenance,
41
	'openaire____::openorgs'                                                                                                      AS collectedfromid,
42
	'OpenOrgs Database'                                                                                                           AS collectedfromname,
43
	o.country || '@@@dnet:countries'                                                                                              AS country,
44
	'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
45
	array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types')                                                       AS pid
46
FROM acronyms a
47
	LEFT OUTER JOIN organizations o ON (a.id = o.id)
48
	LEFT OUTER JOIN urls u          ON (u.id = o.id)
49
	LEFT OUTER JOIN other_ids i     ON (i.id = o.id)
50
GROUP BY
51
	o.id, o.modification_date, o.country, a.acronym
27 52

  
28 53

  
54
UNION ALL
55
		
56
SELECT
57
	'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name)                                                                         AS organizationid,
58
	n.name                                                                                                                        AS legalshortname,
59
	n.name                                                                                                                        AS legalname,
60
	ARRAY[]::text[]                                                                                                               AS "alternativeNames",
61
	(array_agg(u.url))[1]                                                                                                         AS websiteurl,
62
	o.modification_date                                                                                                           AS dateoftransformation,
63
	false                                                                                                                         AS inferred,
64
	false                                                                                                                         AS deletedbyinference,
65
	0.88                                                                                                                          AS trust,
66
	''                                                                                                                            AS inferenceprovenance,
67
	'openaire____::openorgs'                                                                                                      AS collectedfromid,
68
	'OpenOrgs Database'                                                                                                           AS collectedfromname,
69
	o.country || '@@@dnet:countries'                                                                                              AS country,
70
	'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
71
	array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types')                                                       AS pid
72
FROM other_names n
73
	LEFT OUTER JOIN organizations o ON (n.id = o.id)
74
	LEFT OUTER JOIN urls u          ON (u.id = o.id)
75
	LEFT OUTER JOIN other_ids i     ON (i.id = o.id)
76
GROUP BY
77
	o.id, o.modification_date, o.country, n.name
78

  
79

  

Also available in: Unified diff