Project

General

Profile

« Previous | Next » 

Revision 29637

Added by Eri Katsari about 10 years ago

View differences:

modules/dnet-openaire-stats/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/GeneralTests.java
81 81
		log.info(q);
82 82
		String[] excludedIds = q.split("\n");
83 83
		log.info(excludedIds.length);
84
	
84

  
85 85
		in.close();
86 86

  
87
		String value = "2222ce7e70525366b85135104043bd4d25c5lll";
88
		String value2 = "66b85135104043bd4d2";
89

  
90
		for (String id : excludedIds) {
91

  
92
			if (value.contains(id)) {
93
				log.info("contained");
94
			}
95
		}
87 96
	}
88 97

  
89 98
	public static boolean isNumeric(String str) {
modules/dnet-openaire-stats/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/Serializer.java
116 116
	private String getResultProject(OafRel oaf) {
117 117
		String buff = new String();
118 118
		String result = oaf.getTarget();
119
		if (isValidProjectResult(id)) {
120
			buff += getStringField(oaf.getTarget());
119

  
120
		if (isValidProjectResult(result)) {
121
			buff += getStringField(result);
121 122
			// TODO is declared as int!!!
122 123
			buff += getYearDifferenceInteger(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
123 124

  
124 125
			// DELETE FROM "shadow"."project_results" WHERE result like
125 126
			// '%ce7e70525366b85135104043bd4d25c5%';
126
		}
127
		else
128
		{
127
		} else {
129 128
			return null;
130 129
		}
131 130
		return buff;
132 131
	}
133 132

  
133
	private boolean isValidProjectResult(String id) {
134
		for (String excludedId : excludedIds) {
135

  
136
			if (id.contains(excludedId)) {
137
				log.info("contained");
138
				return false;
139
			}
140
		}
141
		return true;
142
	}
143

  
134 144
	private HashMap<String, List<String>> getDatasourceLanguages(OafEntity valueEntity) {
135 145
		HashMap<String, List<String>> rels = new HashMap<String, List<String>>();
136 146
		List<String> buffs = new ArrayList<String>();
......
412 422
		// embargo_end_date
413 423
		buff += getStringField(metadata.getEmbargoenddate().getValue());
414 424

  
415
		// `delayed`,
416
		buff += getStringField("no");
417

  
418 425
		// `authors`,
419 426
		int authors = 0;
427
		String delayed = "no";
420 428

  
429
		// UPDATE "shadow"."result" SET delayed = 'yes' WHERE id IN
430
		// (SELECT id FROM "shadow"."result_projects"
431
		// WHERE "shadow"."result_projects".id = "shadow"."result".id AND
432
		// daysfromend > 0);
433

  
421 434
		for (OafRel rel : data.getCachedRelList()) {
422 435

  
423 436
			if (rel.getRelType().equals(RelType.personResult)) {
424 437

  
425 438
				authors++;
439
			} else if (rel.getRelType().equals(RelType.resultProject))
440
			// TODO remember : in result Project, first id is project, second is
441
			// result.
442
			{
443
				String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(), rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
444
				if (Integer.parseInt(daysfromend) > 0) {
445
					delayed = "yes";
446
				}
426 447
			}
427 448
		}
449
		// `delayed`,
450
		buff += getStringField(delayed);
451

  
428 452
		// log.info("Result " + full_id +"Author count : " + authors );
429 453
		buff += getNumericField(String.valueOf(authors));
430 454

  
modules/dnet-openaire-stats/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/ContextExporter.java
55 55
		// log.info("***********context map  buffer" + contextMap);
56 56

  
57 57
		String data = contextTransformer.transformXSL(contextMap);
58
		log.info("++++++++++++++ Transformed data " + data);
58
//		log.info("++++++++++++++ Transformed data " + data);
59 59
		log.info(data);
60 60
		processData(data);
61 61
	}
......
77 77
		try {
78 78

  
79 79
			String[] split = data.split("COPY");
80
			log.info("Importing context, concept and category...");
81
			split[0] = split[0].replaceFirst("", "");
82
			log.info("Importing context" + split[0]);
80
//			log.info("Importing context, concept and category...");
81
			split[0] = split[0].replaceFirst("\n", "");
82
//			log.info("Importing context" + split[0]);
83 83
			writeData(split[0], outputPath + "context");
84
			log.info("Importing category" + split[1]);
84
//			log.info("Importing category" + split[1]);
85 85
			writeData(split[1], outputPath + "category");
86
			log.info("concept" + split[2]);
86
//			log.info("concept" + split[2]);
87 87
			writeData(split[2], outputPath + "concept");
88 88
		} catch (Exception e) {
89 89
			String msg = " Unable to create file with context, " + "concept and category values in output path " + outputPath + ". Reason: ";
modules/dnet-openaire-stats/trunk/src/main/resources/eu/dnetlib/data/mapreduce/hbase/statsExport/stats_db_schema.sql
70 70
 
71 71
BEGIN
72 72
   
73
 CREATE OR REPLACE VIEW  "shadow".datasource_results   as  SELECT datasource as id  , id  as  result FROM "shadow".result_datasources ;
73
 CREATE OR REPLACE    VIEW  "shadow".datasource_results   as  SELECT datasource as id  , id  as  result FROM "shadow".result_datasources ;
74 74
 
75
 CREATE OR REPLACE VIEW "shadow".organization_datasources as SELECT   organization  as id , id as datasource  FROM "shadow".datasource_organizations ;
75
 CREATE OR REPLACE   VIEW "shadow".organization_datasources as SELECT   organization  as id , id as datasource  FROM "shadow".datasource_organizations ;
76 76
 
77
 CREATE OR REPLACE VIEW "shadow".organization_projects as SELECT  id  as project, organization as id FROM "shadow".project_organizations ;
77
 CREATE OR REPLACE   VIEW "shadow".organization_projects as SELECT  id  as project, organization as id FROM "shadow".project_organizations ;
78 78
 
79
CREATE OR REPLACE VIEW shadow.result_projects AS
79
CREATE OR REPLACE   VIEW shadow.result_projects AS
80 80
    SELECT  project_results.result AS id,  project_results.id AS project, daysfromend as daysfromend FROM project_results;
81 81
 
82
 CREATE OR REPLACE VIEW "shadow".datasource_topics AS 
83
 SELECT distinct datasource.id, result_topics.topic
84
   FROM datasource, datasource_results, result, result_topics
85
  WHERE datasource.datasource_results = datasource_results.id AND datasource_results.result = result.id AND result_topics.id = result.result_topics;
82
 CREATE OR REPLACE   VIEW "shadow".datasource_topics AS 
83
 SELECT distinct "shadow".datasource.id, "shadow".result_topics.topic
84
   FROM "shadow".datasource, "shadow".datasource_results, "shadow".result, "shadow".result_topics
85
  WHERE "shadow".datasource.datasource_results = "shadow".datasource_results.id AND "shadow".datasource_results.result = "shadow".result.id AND 
86
  "shadow".result_topics.id = "shadow".result.result_topics;
86 87
 
87

  
88 88
  END;$$;
89
   
90
 
89 91

  
90

  
91 92
ALTER FUNCTION shadow.create_views() OWNER TO sqoop;
92 93
 
93 94
  
......
104 105
UPDATE "shadow".datasource SET name='Other' where name='Unknown Repository';
105 106

  
106 107
UPDATE "shadow".result_datasources SET datasource=(SELECT id FROM "shadow".datasource WHERE name='Other') WHERE datasource NOT IN (SELECT id FROM "shadow".datasource);
107

  
108
DELETE FROM "shadow"."project_results" WHERE result like '%ce7e70525366b85135104043bd4d25c5%';
109
DELETE FROM "shadow"."project_results" WHERE result like '%f2c58f68ce24b1620892e58f513fe434%';
110
DELETE FROM "shadow"."project_results" WHERE result like '%2c5c62d74859b5c438644e0b16b1df72%';
111
DELETE FROM "shadow"."project_results" WHERE result like '%4f63371fe55931ad54fb0a064fbf492a%';
112
DELETE FROM "shadow"."project_results" WHERE result like '%fc2728fd8417eb840e55d493c9e24df1%';
113
DELETE FROM "shadow"."project_results" WHERE result like '%e252d94b8ee43343dce676797eb1852b%';
114
DELETE FROM "shadow"."project_results" WHERE result like '%bd23d2798f64de9e22f9c181418a41cb%';
115
DELETE FROM "shadow"."project_results" WHERE result like '%a71b2b8b4ae7967d6de49257ca072a27%';
116
DELETE FROM "shadow"."project_results" WHERE result like '%d566db9a4134c79a345482cee7d72068%';
117
DELETE FROM "shadow"."project_results" WHERE result like '%7f96945916d250894d0228713538f881%';
118
DELETE FROM "shadow"."project_results" WHERE result like '%e0364b5223b1238a92b0f14e228317fb%';
119
DELETE FROM "shadow"."project_results" WHERE result like '%e0364b5223b1238a92b0f14e228317fb%';
120
DELETE FROM "shadow"."project_results" WHERE result like '%0220669ba22117ac5bb926ecff57b66d%';
121
DELETE FROM "shadow"."project_results" WHERE result like '%fc662c8ec9fb0a90414b9979883f1749%';
122
DELETE FROM "shadow"."project_results" WHERE result like '%7f910c6ce811f621f818398f9d1c8f0f%';
123
DELETE FROM "shadow"."project_results" WHERE result like '%c94fed40368ed0df90deb1f0c18cc662%';
124
DELETE FROM "shadow"."project_results" WHERE result like '%f977ba88a386905527806320a3d4340b%';
125
DELETE FROM "shadow"."project_results" WHERE result like '%23179e252a801676eb4020235d1db07a%';
126
DELETE FROM "shadow"."project_results" WHERE result like '%17569bd9b84297cb2b5b33431b440b65%';
127
DELETE FROM "shadow"."project_results" WHERE result like '%666e4829393c1e71567a7184066adde5%';
128
DELETE FROM "shadow"."project_results" WHERE result like '%2aec5aeec0c64ef1b60fff18b7ec3ede%';
129
DELETE FROM "shadow"."project_results" WHERE result like '%87269afe6918f6982b6431a2a6365661%';
130
DELETE FROM "shadow"."project_results" WHERE result like '%f7e4365c6edf8b41d7e753163a5f9111%';
131
DELETE FROM "shadow"."project_results" WHERE result like '%87f8032a82b519fc16c4545b7a3c4818%';
132
DELETE FROM "shadow"."project_results" WHERE result like '%99665f6386eaad3ceb0019b3decd93d2%';
133
DELETE FROM "shadow"."project_results" WHERE result like '%143e3f503cf55a5275d7e14d26d3af7f%';
134
DELETE FROM "shadow"."project_results" WHERE result like '%6043a0e49c8142f519a666080247ec9a%';
135
DELETE FROM "shadow"."project_results" WHERE result like '%ee92c0232a0631c3aaed95f77e15f561%';
136
DELETE FROM "shadow"."project_results" WHERE result like '%3661e02c0699fe7cbbcfdf457d20c22e%';
137
DELETE FROM "shadow"."project_results" WHERE result like '%ead9a00b10810705f6d5f49b5dd1fc37%';
138
DELETE FROM "shadow"."project_results" WHERE result like '%050cbbba648acd5f4c84b2f246ce330b%';
139

  
140
   
141
--INSERT INTO "shadow".datasource_results (id, result) (SELECT datasource, id FROM "shadow".result_datasources);
142 108
  
143
--INSERT INTO "shadow".organization_datasources ( datasource, id ) (SELECT  id ,organization   FROM "shadow".datasourceOrganization);
144
 
145
--INSERT INTO "shadow".organization_projects (project , id) (SELECT id, organization FROM "shadow".project_organizations );
146
 
147
--INSERT INTO "shadow".project_results (id, result) (SELECT project, id FROM "shadow".result_projects);
148 109
TRUNCATE TABLE "shadow".defaults;
149 110
INSERT INTO "shadow".defaults VALUES ('result', 'year', 'number', 'count', '', 'column', 1);
150 111
INSERT INTO "shadow".defaults VALUES ('project', 'funding_lvl1', 'number', 'count', '', 'column', 2);
151 112
INSERT INTO "shadow".defaults VALUES ('organization', 'country', 'number', 'count', '', 'column',  3);
152 113
INSERT INTO "shadow".defaults VALUES ('datasource', 'oa_compatible', 'number', 'count', '', 'column', 4);
153 114

  
154
UPDATE "shadow"."project" SET haspubs='yes' WHERE id in (select project from shadow.result_projects, shadow.result where shadow.result.result_projects = shadow.result_projects.id and "shadow"."result_projects".project = "shadow"."project".id and shadow.result.type = 'publication'); -- where id IN (SELECT project from "shadow"."result_projects");
155 115

  
156
UPDATE "shadow"."project" SET numpubs=(SELECT count(*) from "shadow"."result_projects", shadow.result WHERE shadow.result.result_projects = shadow.result_projects.id and project="shadow"."project".id and shadow.result.type='publication');
157
UPDATE "shadow"."project" SET delayedpubs = (SELECT count(*) from "shadow"."result_projects", shadow.result WHERE shadow.result.result_projects = shadow.result_projects.id and project="shadow"."project".id AND daysfromend > 0 and shadow.result.type='publication');
158
UPDATE "shadow"."project" SET daysforlastpub = (SELECT max(daysfromend) FROM "shadow"."result_projects", shadow.result WHERE shadow.result.result_projects = shadow.result_projects.id and shadow.result.type='publication' and project="shadow"."project".id);
159 116

  
160
UPDATE "shadow"."result" SET delayed = 'yes' WHERE id IN (SELECT id FROM "shadow"."result_projects" WHERE "shadow"."result_projects".id = "shadow"."result".id AND daysfromend > 0);
117
CREATE OR REPLACE    VIEW shadow.results_publication AS 
118
SELECT  "shadow".results.id AS result,  "shadow".results.result_projects AS result_projects 
119
FROM  "shadow".results where  "shadow".result.type='publication';
120
  
121
CREATE OR REPLACE   VIEW shadow.results_projects_publication AS
122
  SELECT  "shadow".results_publication.result AS result,  "shadow".results_publication.result_projects AS result_projects,
123
 "shadow". project.project_results as project_results, "shadow".project.id as project
124
    FROM  "shadow".results_publication, "shadow".project  where  "shadow".results_publication.result="shadow".project.project_results;
125

  
126

  
127
UPDATE "shadow"."project" SET haspubs='yes'
128
WHERE id in (select project from shadow.result_projects,
129
shadow.results_publication where shadow.results_publication.result_projects
130
= shadow.result_projects.id and "shadow"."result_projects".project = "shadow"."project".id); -- where id IN (SELECT project from "shadow"."result_projects");
131

  
132
UPDATE "shadow"."project" SET numpubs=(SELECT count(*) from 
133
"shadow"."result_projects", shadow.result_projects WHERE shadow.result_projects.result_projects = shadow.result_projects.id 
134
and project="shadow"."project".id );
135

  
136
UPDATE "shadow"."project" SET delayedpubs = (SELECT count(*) from "shadow"."result_projects", 
137
shadow.result_projects WHERE shadow.result_projects.result_projects = shadow.result_projects.id and project="shadow"."project".id 
138
AND daysfromend > 0);
139
--UPDATE "shadow"."project" SET daysforlastpub = 
140
--(SELECT max(daysfromend) FROM "shadow"."result_projects",  "shadow".results_publication 
141
--WHERE    "shadow".results_publication.result_projects = shadow.result_projects.id  and project="shadow"."project".id);
142
--
143
--
144
UPDATE "shadow"."project" SET daysforlastpub = 
145
(SELECT max(daysfromend) FROM "shadow"."result_projects", shadow.results_publication 
146
WHERE shadow.results_publication.result_projects = shadow.result_projects.id and project="shadow"."project".id);
147

  
148
--UPDATE "shadow"."project" SET daysforlastpub = 
149
--(SELECT max(daysfromend) FROM "shadow"."result_projects", shadow.result 
150
--WHERE shadow.result.result_projects = shadow.result_projects.id and 
151
--shadow.result.type='publication' and project="shadow"."project".id);
152
-- 
153

  
154
--shadow.results_publication
155
--UPDATE "shadow"."result" SET delayed = 'yes' WHERE id IN (SELECT id FROM "shadow"."result_projects" WHERE "shadow"."result_projects".id = "shadow"."result".id AND daysfromend > 0);
161 156
 
162 157
 
163 158
END

Also available in: Unified diff