Revision 54431
Added by Tsampikos Livisianos over 5 years ago
modules/dnet-openaire-stats/branches/new-schema/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/utils/Serializer.java | ||
---|---|---|
22 | 22 |
import org.apache.log4j.Logger; |
23 | 23 |
import org.jsoup.Jsoup; |
24 | 24 |
|
25 |
import java.text.DateFormat; |
|
26 |
import java.text.ParseException; |
|
25 | 27 |
import java.text.SimpleDateFormat; |
26 | 28 |
import java.util.ArrayList; |
27 | 29 |
import java.util.Date; |
... | ... | |
100 | 102 |
switch (valueEntity.getType()) { |
101 | 103 |
case datasource: |
102 | 104 |
getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING); |
105 |
getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING); |
|
103 | 106 |
case result: |
104 | 107 |
getResultTopics(valueEntity, relations, DELIM, ENCLOSING); |
105 | 108 |
getResultLanguages(valueEntity, relations, DELIM, ENCLOSING); |
... | ... | |
109 | 112 |
getResultDois(valueEntity, relations, DELIM, ENCLOSING); |
110 | 113 |
getResultCitations(valueEntity, relations, DELIM, ENCLOSING); |
111 | 114 |
getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING); |
115 |
getResultExtra(valueEntity, relations, DELIM, ENCLOSING); |
|
112 | 116 |
|
113 | 117 |
case project: |
114 | 118 |
getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING); |
... | ... | |
119 | 123 |
|
120 | 124 |
} |
121 | 125 |
|
126 |
private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) { |
|
127 |
Datasource d = valueEntity.getDatasource(); |
|
128 |
Metadata metadata = d.getMetadata(); |
|
122 | 129 |
|
130 |
relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING)); |
|
131 |
} |
|
132 |
|
|
133 |
private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) { |
|
134 |
Result result = valueEntity.getResult(); |
|
135 |
Result.Metadata metadata = result.getMetadata(); |
|
136 |
|
|
137 |
StringBuilder buff = new StringBuilder(); |
|
138 |
String titleString = ""; |
|
139 |
|
|
140 |
for (int i = 0; i < metadata.getTitleList().size(); i++) { |
|
141 |
StructuredProperty title = metadata.getTitleList().get(i); |
|
142 |
|
|
143 |
titleString = title.getValue().replaceAll("\\s+", " "); |
|
144 |
titleString = titleString.replaceAll("\n", " "); |
|
145 |
break; |
|
146 |
} |
|
147 |
|
|
148 |
// pubtitle |
|
149 |
buff.append(getStringField(titleString, DELIM, ENCLOSING)); |
|
150 |
|
|
151 |
String sources = ""; |
|
152 |
for (Instance instance : (result.getInstanceList())) { |
|
153 |
List<String> urls = instance.getUrlList(); |
|
154 |
for (String url : urls) { |
|
155 |
sources += cleanUrl(url, DELIM, ENCLOSING) + " ;"; |
|
156 |
} |
|
157 |
} |
|
158 |
|
|
159 |
//sources |
|
160 |
sources = ENCLOSING + sources + ENCLOSING + DELIM; |
|
161 |
buff.append(sources); |
|
162 |
|
|
163 |
relations.put("resultExtra", buff.toString()); |
|
164 |
} |
|
165 |
|
|
123 | 166 |
private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) { |
124 | 167 |
|
125 | 168 |
String relName = oafEntity.getType().toString().toLowerCase() + "Oid"; |
... | ... | |
289 | 332 |
Metadata metadata = oaf.getEntity().getDatasource().getMetadata(); |
290 | 333 |
StringBuilder buff = new StringBuilder(); |
291 | 334 |
|
292 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
293 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
294 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
295 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
296 |
|
|
297 | 335 |
// name |
298 | 336 |
if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) { |
299 | 337 |
buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING)); |
300 | 338 |
} else { |
301 | 339 |
buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING)); |
302 | 340 |
} |
341 |
|
|
303 | 342 |
// type |
304 |
|
|
305 | 343 |
if (metadata.hasDatasourcetype()) { |
306 | 344 |
buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING)); |
307 | 345 |
} |
... | ... | |
309 | 347 |
// compatibility, |
310 | 348 |
buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING)); |
311 | 349 |
|
312 |
// latitude |
|
313 |
buff.append(getLatLongField(metadata.getLatitude().getValue(), DELIM, ENCLOSING)); |
|
314 |
|
|
315 |
// longtitude |
|
316 |
buff.append(getLatLongField(metadata.getLongitude().getValue(), DELIM, ENCLOSING)); |
|
317 |
|
|
318 | 350 |
// dateofvalidation, |
319 | 351 |
buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING)); |
320 | 352 |
|
321 | 353 |
// yearofvalidation, |
322 | 354 |
buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING)); |
323 | 355 |
|
324 |
//website
|
|
325 |
buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
|
|
356 |
//harvested
|
|
357 |
buff.append(getStringField("false", DELIM, ENCLOSING));
|
|
326 | 358 |
|
327 | 359 |
//piwik_id |
328 | 360 |
String piwik_id = ""; |
... | ... | |
334 | 366 |
} |
335 | 367 |
buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING)); |
336 | 368 |
|
337 |
//harvested |
|
338 |
buff.append(getStringField("false", DELIM, ENCLOSING)); |
|
339 |
|
|
340 |
// deletedByInference |
|
341 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
342 |
|
|
343 |
// number?? |
|
344 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
345 |
|
|
346 | 369 |
return buff.toString(); |
347 | 370 |
|
348 | 371 |
} |
... | ... | |
352 | 375 |
StringBuilder buff = new StringBuilder(); |
353 | 376 |
Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata(); |
354 | 377 |
|
355 |
// `organization_datasources`, |
|
356 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
357 |
// organization_projects |
|
358 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
359 | 378 |
// `name`, |
360 | 379 |
buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING)); |
361 | 380 |
|
362 | 381 |
// `country`, |
363 | 382 |
buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING)); |
364 | 383 |
|
365 |
//website |
|
366 |
buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING)); |
|
367 |
|
|
368 |
// deletedByInference |
|
369 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
370 |
|
|
371 |
// number |
|
372 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
373 |
|
|
374 | 384 |
return buff.toString(); |
375 | 385 |
} |
376 | 386 |
|
... | ... | |
379 | 389 |
|
380 | 390 |
Result.Metadata metadata = oaf.getEntity().getResult().getMetadata(); |
381 | 391 |
|
382 |
// result_topics/ |
|
383 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
384 |
// result_languages |
|
385 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
386 |
// `result_projects`, |
|
387 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
388 |
// `result_datasources`, |
|
389 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
390 |
// `result_classifications`, |
|
391 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
392 |
/// `result_infrastructures`, |
|
393 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
394 |
// `result_claims`, |
|
395 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
396 |
// `result_results`, |
|
397 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
398 |
// pubtitle |
|
399 |
|
|
400 |
String titleString = new String(); |
|
401 |
|
|
402 |
for (int i = 0; i < metadata.getTitleList().size(); i++) { |
|
403 |
StructuredProperty title = metadata.getTitleList().get(i); |
|
404 |
|
|
405 |
if (i == 0) { |
|
406 |
titleString = title.getValue().replaceAll("\\s+", " "); |
|
407 |
titleString = titleString.replaceAll("\n", " "); |
|
408 |
} |
|
409 |
break; |
|
410 |
} |
|
411 |
|
|
412 |
// pubtitle |
|
413 |
buff.append(getStringField(titleString, DELIM, ENCLOSING)); |
|
414 |
|
|
415 |
// format |
|
416 |
String formatString = new String(); |
|
417 |
|
|
418 |
for (StringField format : metadata.getFormatList()) { |
|
419 |
formatString += format.getValue() + ";"; |
|
420 |
|
|
421 |
} |
|
422 |
|
|
423 |
buff.append(getStringField(formatString, DELIM, ENCLOSING)); |
|
424 | 392 |
// publisher |
425 |
|
|
426 | 393 |
buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING)); |
427 | 394 |
|
428 | 395 |
// journal |
429 |
|
|
430 | 396 |
buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING)); //#null#! |
431 | 397 |
|
432 | 398 |
// year |
433 | 399 |
buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING)); |
434 | 400 |
|
435 |
// date CHANGED THIS TO DATE FORMAT
|
|
401 |
// date |
|
436 | 402 |
buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING)); |
437 | 403 |
|
438 |
// access_mode, |
|
439 |
buff.append(getStringField(getAccessMode(oaf.getEntity().getResult()), DELIM, ENCLOSING)); |
|
440 |
|
|
441 | 404 |
// bestlicense |
442 | 405 |
buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING)); |
443 | 406 |
|
... | ... | |
448 | 411 |
buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING)); |
449 | 412 |
|
450 | 413 |
// `authors`, |
451 |
int authors = 0;
|
|
414 |
int authors = metadata.getAuthorCount();
|
|
452 | 415 |
|
416 |
|
|
453 | 417 |
String delayed = "no"; |
454 | 418 |
|
455 | 419 |
for (OafRel rel : oaf.getEntity().getCachedRelList()) { |
... | ... | |
477 | 441 |
//authors |
478 | 442 |
buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING)); |
479 | 443 |
|
480 |
String sources = new String(); |
|
481 |
|
|
482 |
|
|
483 |
for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) { |
|
484 |
List<String> urls = instance.getUrlList(); |
|
485 |
for (String url : urls) { |
|
486 |
sources += cleanUrl(url, DELIM, ENCLOSING) + " ;"; |
|
487 |
} |
|
488 |
} |
|
489 |
|
|
490 |
//sources |
|
491 |
sources = ENCLOSING + sources + ENCLOSING + DELIM; |
|
492 |
|
|
493 |
buff.append(sources); |
|
494 |
|
|
495 |
// deletedByInference |
|
496 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
497 |
|
|
498 |
|
|
499 |
// number?? |
|
500 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
501 | 444 |
return buff.toString(); |
502 | 445 |
|
503 | 446 |
} |
... | ... | |
568 | 511 |
StringBuilder buff = new StringBuilder(); |
569 | 512 |
Project.Metadata metadata = oaf.getEntity().getProject().getMetadata(); |
570 | 513 |
|
571 |
// project_organizations |
|
572 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
573 |
|
|
574 |
// project_results |
|
575 |
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM); |
|
576 |
|
|
577 |
|
|
578 | 514 |
// `acronym`, |
579 | 515 |
String acronym = metadata.getAcronym().getValue(); |
580 | 516 |
if (acronym.equalsIgnoreCase("UNKNOWN")) { |
581 | 517 |
acronym = metadata.getTitle().getValue(); |
582 | 518 |
} |
583 |
|
|
584 | 519 |
buff.append(getStringField(acronym, DELIM, ENCLOSING)); |
585 | 520 |
|
586 |
//title!
|
|
521 |
//title |
|
587 | 522 |
buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING)); |
588 | 523 |
|
524 |
//funding_lvl |
|
589 | 525 |
List<StringField> fundList = metadata.getFundingtreeList(); |
590 |
|
|
591 | 526 |
if (!fundList.isEmpty()) // `funding_lvl0`, |
592 | 527 |
{ |
593 | 528 |
//TODO funder + 3 funding levels |
... | ... | |
597 | 532 |
funding_lvl2 text, |
598 | 533 |
funding_lvl3 text,*/ |
599 | 534 |
buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING)); |
600 |
|
|
601 | 535 |
} else { |
602 | 536 |
buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING)); |
603 |
|
|
604 | 537 |
} |
605 | 538 |
|
539 |
//sc39 |
|
606 | 540 |
String sc39 = metadata.getEcsc39().getValue().toString(); |
607 | 541 |
if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) { |
608 | 542 |
sc39 = "yes"; |
609 | 543 |
} else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) { |
610 | 544 |
sc39 = "no"; |
611 | 545 |
} |
612 |
|
|
613 | 546 |
buff.append(getStringField(sc39, DELIM, ENCLOSING)); |
614 | 547 |
|
615 |
|
|
616 | 548 |
//project_type |
617 | 549 |
buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING)); |
618 | 550 |
|
619 |
// `url`, |
|
620 |
buff.append(getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING)); |
|
621 |
|
|
622 | 551 |
// start_year |
623 |
|
|
624 | 552 |
buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING)); |
625 | 553 |
|
626 | 554 |
// end_year |
627 | 555 |
buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING)); |
628 | 556 |
|
629 | 557 |
// duration enddate-startdate |
630 |
|
|
631 | 558 |
buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING)); |
632 | 559 |
|
633 | 560 |
// haspubs |
... | ... | |
637 | 564 |
buff.append(getNumericField("0", DELIM, ENCLOSING)); |
638 | 565 |
|
639 | 566 |
// enddate |
640 |
buff.append(getNumericField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
|
567 |
buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
|
641 | 568 |
|
642 | 569 |
// startdate |
643 |
buff.append(getNumericField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
|
570 |
buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
|
644 | 571 |
|
645 | 572 |
// `daysforlastpub`, |
646 | 573 |
buff.append(getNumericField("", DELIM, ENCLOSING)); |
... | ... | |
650 | 577 |
|
651 | 578 |
//call identifier |
652 | 579 |
buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING)); |
580 |
|
|
653 | 581 |
//code |
654 | 582 |
buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING)); |
655 | 583 |
|
656 |
//esc39 |
|
657 |
buff.append(getStringField(metadata.getEcsc39().getValue(), DELIM, ENCLOSING)); |
|
658 |
|
|
659 |
//getUrl |
|
660 |
String sources = new String(); |
|
661 |
|
|
662 |
for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) { |
|
663 |
List<String> urls = instance.getUrlList(); |
|
664 |
for (String u : urls) { |
|
665 |
sources += u + ";"; |
|
666 |
} |
|
667 |
} |
|
668 |
|
|
669 |
sources = cleanUrl(sources, DELIM, ENCLOSING); |
|
670 |
sources = ENCLOSING + sources + ENCLOSING + DELIM; |
|
671 |
|
|
672 |
buff.append(sources); |
|
673 |
|
|
674 |
// deletedByInference |
|
675 |
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING)); |
|
676 |
|
|
677 |
// `number` |
|
678 |
buff.append(getStringField("1", DELIM, ENCLOSING)); |
|
679 | 584 |
return buff.toString(); |
680 | 585 |
|
681 | 586 |
} |
... | ... | |
809 | 714 |
data = data.replace(DELIM, " "); |
810 | 715 |
data = data.replace(ENCLOSING, " "); |
811 | 716 |
data = data.replaceAll("\\r\\n|\\r|\\n", ""); |
812 |
return ENCLOSING + data + ENCLOSING + DELIM; |
|
717 |
try { |
|
718 |
DateFormat format = new SimpleDateFormat("yyyy-MM-dd"); |
|
719 |
data = format.format(format.parse(data)); |
|
720 |
return ENCLOSING + data + ENCLOSING + DELIM; |
|
721 |
} catch (ParseException e) { |
|
722 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
|
723 |
} |
|
813 | 724 |
} |
814 | 725 |
} |
815 | 726 |
|
modules/dnet-openaire-stats/branches/new-schema/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/daos/StatsDAO.java | ||
---|---|---|
130 | 130 |
log.info("Schema renamed"); |
131 | 131 |
} |
132 | 132 |
|
133 |
public void addArrayColumns() throws Exception { |
|
134 |
log.info("Adding new columns ..."); |
|
135 |
Connection con = statsDatasource.getConnection(); |
|
136 |
|
|
137 |
startTime = System.currentTimeMillis(); |
|
138 |
String q = "{call shadow.create_arrays()}"; |
|
139 |
|
|
140 |
CallableStatement st = con.prepareCall(q); |
|
141 |
st.execute(); |
|
142 |
|
|
143 |
st.close(); |
|
144 |
con.close(); |
|
145 |
|
|
146 |
endtime = System.currentTimeMillis(); |
|
147 |
log.info("Time to add columns: " + ((endtime - startTime) / 60000) + " minutes "); |
|
148 |
} |
|
149 |
|
|
133 | 150 |
public void cleanTables() throws Exception { |
134 | 151 |
log.info(" Cleaning Tables..."); |
135 | 152 |
Connection con = statsDatasource.getConnection(); |
... | ... | |
169 | 186 |
Connection con = statsDatasource.getConnection(); |
170 | 187 |
|
171 | 188 |
startTime = System.currentTimeMillis(); |
172 |
String q = "{call create_charts()}"; |
|
189 |
String q = "{call shadow.create_charts()}";
|
|
173 | 190 |
|
174 | 191 |
CallableStatement st = con.prepareCall(q); |
175 | 192 |
st.execute(); |
... | ... | |
191 | 208 |
Connection con = statsDatasource.getConnection(); |
192 | 209 |
|
193 | 210 |
startTime = System.currentTimeMillis(); |
194 |
String q = "{call create_chart_indexes()}"; |
|
211 |
String q = "{call shadow.create_chart_indexes()}";
|
|
195 | 212 |
|
196 | 213 |
CallableStatement st = con.prepareCall(q); |
197 | 214 |
st.execute(); |
... | ... | |
212 | 229 |
log.info(" Building Database Indexes..."); |
213 | 230 |
Connection con = statsDatasource.getConnection(); |
214 | 231 |
startTime = System.currentTimeMillis(); |
215 |
String q = "{call create_indexes()}"; |
|
232 |
String q = "{call shadow.create_indexes()}";
|
|
216 | 233 |
CallableStatement st = con.prepareCall(q); |
217 | 234 |
st.execute(); |
218 | 235 |
st.close(); |
... | ... | |
231 | 248 |
Connection con = statsDatasource.getConnection(); |
232 | 249 |
|
233 | 250 |
startTime = System.currentTimeMillis(); |
234 |
String q = "{call create_views()}"; |
|
251 |
String q = "{call shadow.create_views()}";
|
|
235 | 252 |
|
236 | 253 |
CallableStatement st = con.prepareCall(q); |
237 | 254 |
st.execute(); |
... | ... | |
254 | 271 |
startTime = System.currentTimeMillis(); |
255 | 272 |
|
256 | 273 |
populateDefaults(); |
257 |
update_project_results(); |
|
258 |
//update_project_has_pubs();
|
|
259 |
//update_project_pubs_count();
|
|
260 |
//update_project_delated_pubs();
|
|
261 |
//update_project_daysforlastpub();
|
|
262 |
//update_project_delayed_pubs();
|
|
274 |
//update_project_results();
|
|
275 |
update_project_has_pubs(); |
|
276 |
update_project_pubs_count(); |
|
277 |
update_project_delated_pubs(); |
|
278 |
update_project_daysforlastpub(); |
|
279 |
update_project_delayed_pubs(); |
|
263 | 280 |
cleanUp(); |
264 | 281 |
|
265 | 282 |
endtime = System.currentTimeMillis(); |
... | ... | |
274 | 291 |
Connection con = statsDatasource.getConnection(); |
275 | 292 |
|
276 | 293 |
startTime = System.currentTimeMillis(); |
277 |
String q = "{call extra_defaults_datasource()}"; |
|
294 |
String q = "{call shadow.extra_defaults_datasource()}";
|
|
278 | 295 |
|
279 | 296 |
CallableStatement st = con.prepareCall(q); |
280 | 297 |
st.execute(); |
... | ... | |
296 | 313 |
Connection con = statsDatasource.getConnection(); |
297 | 314 |
|
298 | 315 |
startTime = System.currentTimeMillis(); |
299 |
String q = "{call update_project_results()}"; |
|
316 |
String q = "{call shadow.update_project_results()}";
|
|
300 | 317 |
|
301 | 318 |
CallableStatement st = con.prepareCall(q); |
302 | 319 |
st.execute(); |
... | ... | |
319 | 336 |
Connection con = statsDatasource.getConnection(); |
320 | 337 |
|
321 | 338 |
startTime = System.currentTimeMillis(); |
322 |
String q = "{call project_has_pubs()}"; |
|
339 |
String q = "{call shadow.project_has_pubs()}";
|
|
323 | 340 |
|
324 | 341 |
CallableStatement st = con.prepareCall(q); |
325 | 342 |
st.execute(); |
... | ... | |
341 | 358 |
Connection con = statsDatasource.getConnection(); |
342 | 359 |
|
343 | 360 |
startTime = System.currentTimeMillis(); |
344 |
String q = "{call project_pubs_count()}"; |
|
361 |
String q = "{call shadow.project_pubs_count()}";
|
|
345 | 362 |
|
346 | 363 |
CallableStatement st = con.prepareCall(q); |
347 | 364 |
st.execute(); |
... | ... | |
363 | 380 |
Connection con = statsDatasource.getConnection(); |
364 | 381 |
|
365 | 382 |
startTime = System.currentTimeMillis(); |
366 |
String q = "{call project_delayedpubs()}"; |
|
383 |
String q = "{call shadow.project_delayedpubs()}";
|
|
367 | 384 |
|
368 | 385 |
CallableStatement st = con.prepareCall(q); |
369 | 386 |
st.execute(); |
... | ... | |
387 | 404 |
Connection con = statsDatasource.getConnection(); |
388 | 405 |
|
389 | 406 |
startTime = System.currentTimeMillis(); |
390 |
String q = "{call project_daysforlastpub()}"; |
|
407 |
String q = "{call shadow.project_daysforlastpub()}";
|
|
391 | 408 |
|
392 | 409 |
CallableStatement st = con.prepareCall(q); |
393 | 410 |
st.execute(); |
... | ... | |
411 | 428 |
Connection con = statsDatasource.getConnection(); |
412 | 429 |
|
413 | 430 |
startTime = System.currentTimeMillis(); |
414 |
String q = "{call project_delayed()}"; |
|
431 |
String q = "{call shadow.project_delayed()}";
|
|
415 | 432 |
|
416 | 433 |
CallableStatement st = con.prepareCall(q); |
417 | 434 |
st.execute(); |
... | ... | |
434 | 451 |
Connection con = statsDatasource.getConnection(); |
435 | 452 |
|
436 | 453 |
startTime = System.currentTimeMillis(); |
437 |
String q = "{call cleanTemps()}"; |
|
454 |
String q = "{call shadow.cleanTemps()}";
|
|
438 | 455 |
|
439 | 456 |
CallableStatement st = con.prepareCall(q); |
440 | 457 |
st.execute(); |
modules/dnet-openaire-stats/branches/new-schema/src/main/java/eu/dnetlib/data/mapreduce/hbase/statsExport/drivers/DBDriver.java | ||
---|---|---|
39 | 39 |
public void finalizedDB() throws Exception { |
40 | 40 |
|
41 | 41 |
try { |
42 |
statsDao.cleanTables(); |
|
42 |
//statsDao.cleanTables(); |
|
43 |
//statsDao.addArrayColumns(); |
|
43 | 44 |
statsDao.buildIndexes(); |
44 | 45 |
statsDao.buildViews(); |
45 | 46 |
statsDao.executeExtraInserts(); |
modules/dnet-openaire-stats/branches/new-schema/src/main/resources/eu/dnetlib/data/mapreduce/hbase/statsExport/stats_db_schema.sql | ||
---|---|---|
43 | 43 |
|
44 | 44 |
CREATE TABLE shadow.datasource ( |
45 | 45 |
id text NOT NULL, |
46 |
datasource_results text NOT NULL, |
|
47 |
datasource_organizations text NOT NULL, |
|
48 |
datasource_languages text NOT NULL, |
|
49 |
datasource_topics text NOT NULL, |
|
50 | 46 |
name text , |
51 | 47 |
type text, |
52 | 48 |
compatibility text, |
53 |
latitude text, |
|
54 |
longitude text, |
|
55 | 49 |
dateofvalidation text, |
56 | 50 |
yearofvalidation integer, |
57 |
websiteurl text, |
|
58 |
piwik_id integer, |
|
59 | 51 |
harvested TEXT DEFAULT 'false', |
60 |
deletedbyinference TEXT, |
|
61 |
number integer DEFAULT 1 NOT NULL |
|
52 |
piwik_id integer |
|
62 | 53 |
); |
63 | 54 |
|
64 | 55 |
-- |
... | ... | |
70 | 61 |
language text |
71 | 62 |
); |
72 | 63 |
|
64 |
-- |
|
65 |
-- Name: datasource_websites; Type: TABLE; Schema: shadow; Owner: sqoop; Tablespace: |
|
66 |
-- |
|
73 | 67 |
|
68 |
CREATE TABLE shadow.datasource_websites ( |
|
69 |
id text NOT NULL, |
|
70 |
website text |
|
71 |
); |
|
72 |
|
|
74 | 73 |
-- |
75 | 74 |
-- Name: datasource_organizations; Type: TABLE; Schema: shadow; Owner: sqoop; Tablespace: |
76 | 75 |
-- |
... | ... | |
152 | 151 |
|
153 | 152 |
CREATE TABLE shadow.organization ( |
154 | 153 |
id text NOT NULL, |
155 |
organization_datasources text NOT NULL, |
|
156 |
organization_projects text NOT NULL, |
|
157 | 154 |
name text, |
158 |
country text, |
|
159 |
websiteurl text, |
|
160 |
deletedbyinference TEXT, |
|
161 |
number integer DEFAULT 1 NOT NULL |
|
155 |
country text |
|
162 | 156 |
); |
163 | 157 |
|
164 | 158 |
|
... | ... | |
203 | 197 |
|
204 | 198 |
CREATE TABLE shadow.project ( |
205 | 199 |
id text NOT NULL, |
206 |
project_organizations text NOT NULL, |
|
207 |
project_results text NOT NULL, |
|
208 | 200 |
acronym text , |
209 | 201 |
title text , |
210 | 202 |
funder text, |
211 | 203 |
funding_lvl0 text, |
212 | 204 |
funding_lvl1 text, |
213 | 205 |
funding_lvl2 text, |
214 |
funding_lvl3 text,
|
|
206 |
funding_lvl3 text, |
|
215 | 207 |
sc39 text, |
216 | 208 |
type text, |
217 |
url text, |
|
218 | 209 |
start_year integer, |
219 | 210 |
end_year integer, |
220 | 211 |
duration integer, |
... | ... | |
225 | 216 |
daysforlastpub integer, |
226 | 217 |
delayedpubs integer, |
227 | 218 |
callidentifier text, |
228 |
code text, |
|
229 |
ecarticle293 text, |
|
230 |
sources text, |
|
231 |
deletedbyinference TEXT, |
|
232 |
number text |
|
219 |
code text |
|
233 | 220 |
); |
234 | 221 |
|
235 | 222 |
-- |
... | ... | |
292 | 279 |
-- |
293 | 280 |
|
294 | 281 |
CREATE TABLE shadow.result ( |
295 |
id text NOT NULL, |
|
296 |
result_topics text NOT NULL, |
|
297 |
result_languages text NOT NULL, |
|
298 |
result_projects text NOT NULL, |
|
299 |
result_datasources text NOT NULL, |
|
300 |
result_classifications text NOT NULL, |
|
301 |
result_infrastructures text NOT NULL, |
|
302 |
result_claims text NOT NULL, |
|
303 |
result_results text NOT NULL, |
|
304 |
title text, |
|
305 |
format text, |
|
282 |
id text NOT NULL, |
|
306 | 283 |
publisher text, |
307 | 284 |
journal text, |
308 | 285 |
year integer, |
309 | 286 |
date text , |
310 |
access_mode text, |
|
311 | 287 |
bestlicense text, |
312 | 288 |
type text , |
313 | 289 |
embargo_end_date text, |
314 | 290 |
delayed text, |
315 |
authors integer, |
|
316 |
source text, |
|
317 |
deletedbyinference TEXT, |
|
318 |
number integer DEFAULT 1 NOT NULL |
|
291 |
authors integer |
|
319 | 292 |
); |
320 | 293 |
|
294 |
-- |
|
295 |
-- Name: result_extra; Type: TABLE; Schema: shadow; Owner: sqoop; Tablespace: |
|
296 |
-- |
|
321 | 297 |
|
298 |
CREATE TABLE shadow.result_extra ( |
|
299 |
id text NOT NULL, |
|
300 |
title text, |
|
301 |
source text |
|
302 |
); |
|
322 | 303 |
|
323 | 304 |
-- |
324 | 305 |
-- Name: result_claims; Type: TABLE; Schema: shadow; Owner: sqoop; Tablespace: |
... | ... | |
438 | 419 |
$BODY$ |
439 | 420 |
BEGIN |
440 | 421 |
|
441 |
INSERT INTO "shadow".datasource (id,datasource_results,datasource_organizations,datasource_languages,datasource_topics,name,type,compatibility,number) (SELECT 'other','other','other','other','other','Other','Repository','unknown',1 WHERE NOT EXISTS (SELECT 1 FROM "shadow".datasource WHERE name='Unknown Repository'));
|
|
422 |
INSERT INTO "shadow".datasource (id,name,type,compatibility) (SELECT 'other','Other','Repository','unknown' WHERE NOT EXISTS (SELECT 1 FROM "shadow".datasource WHERE name='Unknown Repository'));
|
|
442 | 423 |
UPDATE "shadow".datasource SET name='Other' where name='Unknown Repository'; |
443 | 424 |
UPDATE "shadow".result_datasources SET datasource=(SELECT id FROM "shadow".datasource WHERE name='Other') WHERE datasource NOT IN (SELECT id FROM "shadow".datasource); |
444 | 425 |
|
... | ... | |
451 | 432 |
UPDATE "shadow".datasource SET yearofvalidation=null WHERE yearofvalidation='-1'; |
452 | 433 |
|
453 | 434 |
|
454 |
UPDATE shadow.project SET funder='FCT' WHERE funder='Fundação para a Ciência e a Tecnologia, I.P.'; |
|
435 |
-- UPDATE shadow.project SET funder='FCT' WHERE funder='Fundação para a Ciência e a Tecnologia, I.P.';
|
|
455 | 436 |
|
456 |
update shadow.datasource set harvested ='true' where datasource.id in ( select distinct d.id from datasource d, datasource_results dr where d.id=dr.id);
|
|
437 |
UPDATE shadow.datasource set harvested ='true' WHERE datasource.id IN (SELECT DISTINCT d.id FROM datasource d, result_datasources rd where d.id=rd.datasource);
|
|
457 | 438 |
|
458 | 439 |
END |
459 | 440 |
$BODY$ |
... | ... | |
482 | 463 |
as daysfromend FROM "shadow".result_projects, |
483 | 464 |
"shadow".result, |
484 | 465 |
"shadow".project |
485 |
where "shadow".result_projects.id="shadow".result.result_projects
|
|
466 |
where "shadow".result_projects.id="shadow".result.id
|
|
486 | 467 |
and "shadow".result.type='publication' |
487 |
and "shadow".project.project_results= "shadow".result_projects.project;
|
|
468 |
and "shadow".project.id= "shadow".result_projects.project;
|
|
488 | 469 |
|
489 | 470 |
END |
490 | 471 |
$BODY$ |
... | ... | |
503 | 484 |
$BODY$ |
504 | 485 |
BEGIN |
505 | 486 |
|
487 |
UPDATE shadow.project SET haspubs='yes' WHERE id IN (SELECT pr.id FROM shadow.project_results pr, shadow.result r WHERE pr.result=r.id AND r.type='publication'); |
|
506 | 488 |
|
507 |
CREATE OR REPLACE VIEW shadow.project_pub_count AS |
|
508 |
SELECT count(*) AS count, project_results_publication.project_results |
|
509 |
FROM shadow.project_results_publication |
|
510 |
GROUP BY project_results_publication.project_results; |
|
511 |
|
|
512 |
--HAS PUBS |
|
513 |
UPDATE "shadow"."project" SET haspubs='yes' WHERE project_results in (select project_results from shadow.project_results_publication ); |
|
514 |
|
|
515 |
|
|
516 | 489 |
END |
517 | 490 |
$BODY$ |
518 | 491 |
LANGUAGE plpgsql VOLATILE |
... | ... | |
531 | 504 |
$BODY$ |
532 | 505 |
BEGIN |
533 | 506 |
|
534 |
--COUNT PUBS
|
|
507 |
UPDATE shadow.project p SET numpubs=prr.np FROM (SELECT pr.id, count(distinct pr.result) AS np FROM shadow.project_results pr, shadow.result r WHERE pr.result=r.id AND r.type='publication' GROUP BY pr.id) AS prr WHERE prr.id=p.id;
|
|
535 | 508 |
|
536 |
UPDATE "shadow"."project" SET numpubs=( SELECT count from shadow.project_pub_count |
|
537 |
WHERE shadow.project_pub_count.project_results = shadow.project.project_results ) |
|
538 |
where "shadow"."project".project_results in ( SELECT "shadow".project_results_publication.project_results FROM shadow.project_results_publication ); |
|
539 |
|
|
540 |
|
|
541 | 509 |
END |
542 | 510 |
$BODY$ |
543 | 511 |
LANGUAGE plpgsql VOLATILE |
... | ... | |
555 | 523 |
RETURNS void AS |
556 | 524 |
$BODY$ |
557 | 525 |
BEGIN |
558 |
--delayedpubs PUBS |
|
559 | 526 |
|
560 |
create or replace view shadow.delayedpubs as SELECT count(*) , project_results from "shadow"."project_results_publication" |
|
561 |
WHERE "shadow"."project_results_publication".daysfromend > 0 group by project_results; |
|
527 |
UPDATE shadow.project pp SET delayedpubs = prr.dp FROM (SELECT p.id, count(distinct r.id) as dp FROM project p, project_results pr, result r WHERE p.id=pr.id AND pr.result=r.id AND r.type='publication' AND to_date(r.date, 'YYYY-MM-DD')-to_date(p.enddate, 'YYYY-MM-DD') > 0 GROUP BY p.id) AS prr WHERE pp.id=prr.id; |
|
562 | 528 |
|
563 |
UPDATE "shadow"."project" SET delayedpubs = (SELECT "shadow"."delayedpubs".count from "shadow"."delayedpubs" |
|
564 |
WHERE "shadow"."delayedpubs".project_results=project.project_results ) where "shadow".project.project_results in ( select project_results from "shadow"."delayedpubs" ); |
|
565 |
|
|
566 |
|
|
567 | 529 |
END |
568 | 530 |
$BODY$ |
569 | 531 |
LANGUAGE plpgsql VOLATILE |
... | ... | |
581 | 543 |
$BODY$ |
582 | 544 |
BEGIN |
583 | 545 |
|
546 |
UPDATE shadow.project pp SET daysforlastpub = prr.dp FROM (SELECT p.id, max(to_date(r.date, 'YYYY-MM-DD')-to_date(p.enddate, 'YYYY-MM-DD')) as dp FROM project p, project_results pr, result r WHERE p.id=pr.id AND pr.result=r.id AND r.type='publication' AND to_date(r.date, 'YYYY-MM-DD')-to_date(p.enddate, 'YYYY-MM-DD') > 0 GROUP BY p.id) AS prr WHERE pp.id=prr.id; |
|
584 | 547 |
|
585 |
--daysforlastpub |
|
586 |
|
|
587 |
UPDATE "shadow"."project" SET daysforlastpub = ( |
|
588 |
SELECT max(daysfromend) |
|
589 |
FROM "shadow"."project_results_publication" WHERE shadow.project.project_results = shadow.project_results_publication.project_results AND "shadow"."project_results_publication".daysfromend > 0 ) |
|
590 |
where "shadow".project.project_results in ( select project_results from "shadow"."delayedpubs" ); |
|
591 |
|
|
592 |
|
|
593 | 548 |
END |
594 | 549 |
$BODY$ |
595 | 550 |
LANGUAGE plpgsql VOLATILE |
... | ... | |
607 | 562 |
$BODY$ |
608 | 563 |
BEGIN |
609 | 564 |
|
610 |
--delayed
|
|
565 |
UPDATE shadow.result SET delayed = 'yes' WHERE id IN (SELECT distinct r.id FROM result r, project_results pr, project p WHERE r.id=pr.result AND pr.id=p.id AND to_date(r.date, 'YYYY-MM-DD')-to_date(p.enddate, 'YYYY-MM-DD') > 0);
|
|
611 | 566 |
|
612 |
UPDATE "shadow"."result" SET delayed = 'yes' WHERE result.id IN |
|
613 |
(SELECT result from shadow.project_results_publication where daysfromend >0); |
|
614 |
|
|
615 |
|
|
616 | 567 |
END |
617 | 568 |
$BODY$ |
618 | 569 |
LANGUAGE plpgsql VOLATILE |
... | ... | |
644 | 595 |
|
645 | 596 |
---functions |
646 | 597 |
|
598 |
|
|
647 | 599 |
-- |
600 |
-- Name: create_arrays(); Type FUNCTION; Schema: shadow; Owner: sqoop |
|
601 |
-- |
|
602 |
CREATE OR REPLACE FUNCTION shadow.create_arrays() |
|
603 |
RETURNS void AS |
|
604 |
$BODY$ |
|
605 |
BEGIN |
|
606 |
|
|
607 |
ALTER TABLE shadow.result ADD COLUMN funders text[], ADD COLUMN funding_lvl0 text[], ADD COLUMN projects text[], ADD COLUMN datasources text[]; |
|
608 |
UPDATE shadow.result r SET funders = prr.funders FROM (SELECT pr.result AS rid, array_agg(distinct funder) AS funders FROM shadow.project p, shadow.project_results pr WHERE p.id=pr.id GROUP BY pr.result) AS prr WHERE r.id = prr.rid; |
|
609 |
UPDATE shadow.result r SET funding_lvl0 = prr.funding_lvl0 FROM (SELECT pr.result AS rid, array_agg(distinct funding_lvl0) AS funding_lvl0 FROM shadow.project p, shadow.project_results pr WHERE p.id=pr.id GROUP BY pr.result) AS prr WHERE r.id = prr.rid; |
|
610 |
UPDATE shadow.result r SET projects = prr.ids FROM (SELECT pr.result AS rid, array_agg(distinct pr.id) AS ids FROM shadow.project_results pr GROUP BY pr.result) AS prr WHERE r.id = prr.rid; |
|
611 |
-- UPDATE shadow.result r SET datasources = drr.ids FROM (SELECT rd.id AS rid, array_agg(distinct rd.datasource) AS ids FROM shadow.result_datasources rd GROUP BY rd.id) AS drr WHERE r.id = drr.rid; |
|
612 |
|
|
613 |
CREATE TABLE shadow.result_temp AS SELECT r.id, publisher, journal, year, date, bestlicense, type, embargo_end_date, delayed, authors, funders, funding_lvl0, projects, array_agg(distinct rd.datasource) as datasources FROM shadow.result r, shadow.result_datasources rd WHERE r.id=rd.id GROUP BY r.id, r.publisher, r.journal, r.year, r.date, r.bestlicense, r.type, r.embargo_end_date, r.delayed, r.authors, r.funders, r.funding_lvl0, r.projects; |
|
614 |
DROP TABLE shadow.result CASCADE; |
|
615 |
ALTER TABLE shadow.result_temp RENAME TO result; |
|
616 |
|
|
617 |
END |
|
618 |
$BODY$ |
|
619 |
LANGUAGE plpgsql VOLATILE |
|
620 |
COST 100; |
|
621 |
ALTER FUNCTION shadow.create_arrays() |
|
622 |
OWNER TO sqoop; |
|
623 |
|
|
624 |
-- |
|
648 | 625 |
-- Name: create_indexes(); Type: FUNCTION; Schema: shadow; Owner: sqoop |
649 | 626 |
-- |
650 | 627 |
|
... | ... | |
654 | 631 |
|
655 | 632 |
BEGIN |
656 | 633 |
|
657 |
CREATE INDEX datasource_datasource_languages ON "shadow".datasource USING btree (datasource_languages); |
|
658 |
CREATE INDEX datasource_datasource_organizations ON "shadow".datasource USING btree (datasource_organizations); |
|
659 |
CREATE INDEX datasource_datasource_results ON "shadow".datasource USING btree (datasource_results); |
|
660 |
CREATE INDEX datasource_datasource_topics ON "shadow".datasource USING btree (datasource_topics); |
|
661 | 634 |
CREATE INDEX datasource_id ON "shadow".datasource USING btree (id); |
662 | 635 |
CREATE INDEX datasource_type ON "shadow".datasource USING btree (type); |
663 | 636 |
CREATE INDEX datasource_name ON "shadow".datasource USING btree (name); |
664 | 637 |
CREATE INDEX datasource_piwik_id ON "shadow".datasource USING btree (piwik_id); |
665 |
CREATE INDEX result_access_mode ON "shadow".result USING btree (access_mode); |
|
638 |
|
|
666 | 639 |
CREATE INDEX result_authors ON "shadow".result USING btree (authors); |
667 | 640 |
CREATE INDEX result_id ON "shadow".result USING btree (id); |
668 |
CREATE INDEX result_result_datasources ON "shadow".result USING btree (result_datasources); |
|
669 |
CREATE INDEX result_result_languages ON "shadow".result USING btree (result_languages); |
|
670 |
CREATE INDEX result_result_projects ON "shadow".result USING btree (result_projects); |
|
671 |
CREATE INDEX result_result_topics ON "shadow".result USING btree (result_topics); |
|
672 | 641 |
CREATE INDEX result_year ON "shadow".result USING btree (year); |
673 | 642 |
CREATE INDEX result_date ON "shadow"."result" USING btree ("date"); |
674 | 643 |
CREATE INDEX result_type ON "shadow"."result" USING btree ("type"); |
... | ... | |
677 | 646 |
CREATE INDEX project_acronym ON "shadow"."project" USING btree (acronym); |
678 | 647 |
CREATE INDEX project_enddate ON "shadow"."project" USING btree (enddate); |
679 | 648 |
CREATE INDEX project_id ON "shadow"."project" USING btree (id); |
680 |
CREATE INDEX project_project_results ON "shadow"."project" USING btree (project_results); |
|
681 | 649 |
CREATE INDEX project_results_result ON "shadow"."project_results" USING btree (result); |
682 | 650 |
CREATE INDEX project_results_project ON "shadow"."project_results" USING btree (id); |
683 | 651 |
|
... | ... | |
688 | 656 |
CREATE INDEX result_pids_type ON "shadow".result_pids USING btree (type COLLATE pg_catalog."default"); |
689 | 657 |
CREATE INDEX result_pids_pid ON "shadow".result_pids USING btree(pid COLLATE pg_catalog."default"); |
690 | 658 |
|
659 |
CREATE INDEX result_extra_id ON "shadow".result_extra USING btree(id COLLATE pg_catalog."default"); |
|
691 | 660 |
|
692 | 661 |
CREATE INDEX datasource_oids_id ON "shadow".datasource_oids USING btree (id COLLATE pg_catalog."default"); |
693 | 662 |
CREATE INDEX datasource_oids_orid ON "shadow".datasource_oids USING btree(orid COLLATE pg_catalog."default"); |
663 |
CREATE INDEX datasource_websites_id ON "shadow".datasource_websites USING btree(id COLLATE pg_catalog."default"); |
|
694 | 664 |
|
695 | 665 |
|
696 | 666 |
|
... | ... | |
737 | 707 |
USING btree |
738 | 708 |
(country COLLATE pg_catalog."default"); |
739 | 709 |
|
740 |
CREATE INDEX org_dtsrc |
|
741 |
ON shadow.organization |
|
742 |
USING btree |
|
743 |
(organization_datasources COLLATE pg_catalog."default"); |
|
744 |
|
|
745 |
CREATE INDEX org_proj |
|
746 |
ON shadow.organization |
|
747 |
USING btree |
|
748 |
(organization_projects COLLATE pg_catalog."default"); |
|
749 |
|
|
750 | 710 |
CREATE INDEX proj_funder |
751 | 711 |
ON shadow.project |
752 | 712 |
USING btree |
... | ... | |
762 | 722 |
USING btree |
763 | 723 |
(funding_lvl0 COLLATE pg_catalog."default"); |
764 | 724 |
|
765 |
|
|
766 | 725 |
CREATE INDEX proj_fndlvl1 |
767 | 726 |
ON shadow.project |
768 | 727 |
USING btree |
... | ... | |
773 | 732 |
USING btree |
774 | 733 |
(funding_lvl2 COLLATE pg_catalog."default"); |
775 | 734 |
|
776 |
|
|
777 | 735 |
CREATE INDEX proj_org_org |
778 | 736 |
ON shadow.project_organizations |
779 | 737 |
USING btree |
... | ... | |
784 | 742 |
USING btree |
785 | 743 |
(id COLLATE pg_catalog."default"); |
786 | 744 |
|
787 |
|
|
788 |
|
|
789 |
|
|
790 |
|
|
791 |
|
|
792 |
|
|
793 | 745 |
CREATE INDEX res_res_id |
794 | 746 |
ON shadow.result_results |
795 | 747 |
USING btree |
796 | 748 |
(id COLLATE pg_catalog."default"); |
797 | 749 |
|
798 |
|
|
799 |
|
|
800 | 750 |
CREATE INDEX res_res_res |
801 | 751 |
ON shadow.result_results |
802 | 752 |
USING btree |
... | ... | |
806 | 756 |
ON shadow.result_languages |
807 | 757 |
USING btree |
808 | 758 |
(id COLLATE pg_catalog."default"); |
759 |
|
|
809 | 760 |
CREATE INDEX res_conc_id |
810 | 761 |
ON shadow.result_concepts |
811 | 762 |
USING btree |
... | ... | |
816 | 767 |
USING btree |
817 | 768 |
(concept COLLATE pg_catalog."default"); |
818 | 769 |
|
819 |
CREATE INDEX res_class |
|
820 |
ON shadow.result |
|
821 |
USING btree |
|
822 |
(result_classifications COLLATE pg_catalog."default"); |
|
823 |
|
|
824 | 770 |
CREATE INDEX res_bestlicense |
825 | 771 |
ON shadow.result |
826 | 772 |
USING btree |
... | ... | |
830 | 776 |
ON shadow.project |
831 | 777 |
USING btree |
832 | 778 |
(sc39 COLLATE pg_catalog."default"); |
779 |
|
|
833 | 780 |
CREATE INDEX proj_fndlvl3 |
834 | 781 |
ON shadow.project |
835 | 782 |
USING btree |
... | ... | |
860 | 807 |
USING btree |
861 | 808 |
(organization COLLATE pg_catalog."default"); |
862 | 809 |
|
810 |
CREATE INDEX result_funders_idx ON shadow.result USING gin (funders); |
|
811 |
CREATE INDEX result_funding_lvl0_idx ON shadow.result USING gin (funding_lvl0); |
|
812 |
CREATE INDEX result_projects_idx ON shadow.result USING gin (projects); |
|
813 |
CREATE INDEX result_datasources_idx ON shadow.result USING gin (datasources); |
|
863 | 814 |
|
815 |
|
|
864 | 816 |
END;$$; |
865 | 817 |
|
866 | 818 |
|
... | ... | |
876 | 828 |
|
877 | 829 |
BEGIN |
878 | 830 |
|
879 |
CREATE OR REPLACE VIEW "shadow".datasource_results as SELECT datasource as id , id as result FROM "shadow".result_datasources ;
|
|
831 |
CREATE OR REPLACE VIEW shadow.datasource_results AS SELECT datasource AS id, id AS result FROM shadow.result_datasources;
|
|
880 | 832 |
|
881 |
CREATE OR REPLACE VIEW "shadow".organization_datasources as SELECT organization as id , id as datasource FROM "shadow".datasource_organizations ;
|
|
833 |
CREATE OR REPLACE VIEW shadow.organization_datasources AS SELECT organization AS id, id AS datasource FROM shadow.datasource_organizations;
|
|
882 | 834 |
|
883 |
CREATE OR REPLACE VIEW "shadow".organization_projects as SELECT id as project, organization as id FROM "shadow".project_organizations ;
|
|
835 |
CREATE OR REPLACE VIEW shadow.organization_projects AS SELECT id AS project, organization as id FROM shadow.project_organizations;
|
|
884 | 836 |
|
885 | 837 |
|
886 |
|
|
887 | 838 |
CREATE OR REPLACE VIEW shadow.result_projects AS SELECT shadow.project_results.result AS id, |
888 | 839 |
shadow.project_results.id AS project, ( select to_date("shadow"."result"."date", 'YYYY-MM-DD')- to_date("shadow"."project"."enddate", 'YYYY-MM-DD') |
889 | 840 |
from shadow.result, shadow.project where shadow.result.id = shadow.project_results.result |
... | ... | |
893 | 844 |
CREATE OR REPLACE VIEW "shadow".datasource_topics AS |
894 | 845 |
SELECT distinct "shadow".datasource.id, "shadow".result_topics.topic |
895 | 846 |
FROM "shadow".datasource, "shadow".datasource_results, "shadow".result, "shadow".result_topics |
896 |
WHERE "shadow".datasource.datasource_results = "shadow".datasource_results.id AND "shadow".datasource_results.result = "shadow".result.id AND
|
|
897 |
"shadow".result_topics.id = "shadow".result.result_topics;
|
|
847 |
WHERE "shadow".datasource.id = "shadow".datasource_results.id AND "shadow".datasource_results.result = "shadow".result.id AND
|
|
848 |
"shadow".result_topics.id = "shadow".result.id;
|
|
898 | 849 |
|
899 | 850 |
|
900 | 851 |
END;$$; |
... | ... | |
925 | 876 |
CREATE TABLE "shadow".chart_datasource_projects_data AS SELECT rd.datasource, p.title, count(distinct rd.id) FROM "shadow".result_datasources rd, "shadow".project p, "shadow".project_results pr, "shadow".result r WHERE p.id=pr.id AND pr.result=rd.id AND pr.result=r.id and r.type='dataset' GROUP BY rd.datasource, p.title; |
926 | 877 |
|
927 | 878 |
-- project charts |
928 |
CREATE TABLE "shadow".chart_project_year AS SELECT p.id, r.year, count( distinct r.id) FROM "shadow".result r, "shadow".result_projects rp, "shadow".project p WHERE r.id=rp.id AND p.id=rp.project AND r.year>=p.start_year GROUP BY p.id, r.year;
|
|
879 |
CREATE TABLE "shadow".chart_project_year AS SELECT p.id, r.year, count( distinct r.id) FROM "shadow".result r, "shadow".project_results pr, "shadow".project p WHERE r.id=pr.result AND p.id=pr.id AND r.year>=p.start_year GROUP BY p.id, r.year;
|
|
929 | 880 |
CREATE TABLE "shadow".chart_project_license AS SELECT pr.id, r.bestlicense, count(distinct r.id) FROM "shadow".result r, "shadow".project_results pr WHERE r.id=pr.result AND r.type='publication' GROUP BY pr.id, r.bestlicense; |
930 | 881 |
CREATE TABLE "shadow".chart_project_repos AS SELECT pr.id, d.name, count (distinct r.id) FROM "shadow".result r, "shadow".project_results pr, "shadow".datasource d, "shadow".datasource_results dr WHERE r.id=dr.result AND d.id=dr.id AND r.id=pr.result AND r.type='publication' GROUP BY pr.id, d.name; |
931 | 882 |
|
... | ... | |
998 | 949 |
|
999 | 950 |
END;$$; |
1000 | 951 |
|
1001 |
--DROP type if exists infra_report_rec cascade; |
|
1002 |
|
|
1003 |
--CREATE TYPE infra_report_rec as |
|
1004 |
--(Publications varchar(50), Open_Access_Publications varchar(50), |
|
1005 |
--FP7_Publications varchar(50), FP7_Closed_Access_Publications varchar(50), |
|
1006 |
--FP7_Open_Access_Publications varchar(50), |
|
1007 |
--FP7_Restricted_Access_Publications varchar(50), FP7_Embargo_Publications varchar(50) ); |
|
1008 |
|
|
1009 |
--CREATE OR REPLACE FUNCTION shadow.getInfraReports() |
|
1010 |
-- RETURNS infra_report_rec |
|
1011 |
--AS |
|
1012 |
-- $$ |
|
1013 |
--DECLARE |
|
1014 |
-- result_record infra_report_rec; |
|
1015 |
--BEGIN |
|
1016 |
-- |
|
1017 |
-- SELECT count(*) INTO result_record.Publications FROM shadow.result where type='publication'; |
|
1018 |
-- SELECT count(*) INTO result_record.Open_Access_Publications FROM shadow.result WHERE bestlicense='Open Access' and type='publication'; |
|
1019 |
-- |
|
1020 |
--SELECT count (distinct result_projects.id) INTO result_record.FP7_Publications FROM shadow.result, shadow.result_projects, shadow.project |
|
1021 |
--WHERE result.result_projects = result_projects.id and type='publication' and result_projects.project = project.id and funding_lvl0 = 'FP7'; |
|
1022 |
-- |
|
1023 |
--SELECT count (distinct shadow.result_projects.id) |
|
1024 |
-- INTO result_record.FP7_Closed_Access_Publications |
|
1025 |
-- FROM shadow.result, |
|
1026 |
-- shadow.result_projects, |
|
1027 |
-- shadow.project |
|
1028 |
-- WHERE result.result_projects = result_projects.id |
|
1029 |
-- AND result_projects.project = project.id |
|
1030 |
-- AND funding_lvl0 = 'FP7' |
|
1031 |
-- AND bestlicense='Closed Access' |
|
1032 |
-- AND type='publication'; |
|
1033 |
-- |
|
1034 |
-- |
|
1035 |
--SELECT count(distinct result_projects.id) INTO result_record.FP7_Open_Access_Publications FROM shadow.result, |
|
1036 |
-- shadow.result_projects, shadow.project |
|
1037 |
-- WHERE result_projects = result_projects.id AND result_projects.project = project.id and type='publication' and funding_lvl0 = 'FP7' and bestlicense='Open Access'; |
|
1038 |
-- |
|
1039 |
--SELECT count(distinct result_projects.id) INTO result_record.FP7_Restricted_Access_Publications |
|
1040 |
-- FROM shadow.result, shadow.result_projects, shadow.project |
|
1041 |
-- WHERE result.result_projects=result_projects.id |
|
1042 |
-- AND result_projects.project = project.id and funding_lvl0 = 'FP7' and bestlicense='Restricted' and type='publication'; |
|
1043 |
-- |
|
1044 |
-- |
|
1045 |
--SELECT count(distinct result_projects.id) INTO result_record.FP7_Embargo_Publications FROM shadow.result, shadow.result_projects, shadow.project |
|
1046 |
--WHERE result.result_projects = result_projects.id AND result_projects.project = project.id and funding_lvl0 = 'FP7' and bestlicense='Embargo' and type='publication'; |
|
1047 |
-- |
|
1048 |
--RETURN result_record; |
|
1049 |
-- |
|
1050 |
--END |
|
1051 |
--$$ LANGUAGE plpgsql; |
|
1052 |
-- |
|
1053 |
--drop type if exists fp7_report_rec cascade; |
|
1054 |
-- |
|
1055 |
--CREATE TYPE fp7_report_rec as |
|
1056 |
--(FP7_Publications varchar(50), |
|
1057 |
-- FP7_Projects varchar(50), |
|
1058 |
-- FP7_SC39_Projects varchar(50), |
|
1059 |
-- FP7_SC39_Publications varchar(50), |
|
1060 |
-- FP7_SC39_Open_Access varchar(50)); |
|
1061 |
-- |
|
1062 |
-- |
|
1063 |
--CREATE OR REPLACE FUNCTION shadow.getFp7Reports() |
|
1064 |
-- RETURNS fp7_report_rec |
|
1065 |
--AS |
|
1066 |
-- $$ |
|
1067 |
-- |
|
1068 |
--DECLARE |
|
1069 |
-- result_record fp7_report_rec; |
|
1070 |
-- |
|
1071 |
--BEGIN |
|
1072 |
-- |
|
1073 |
----FP7 WITH PUBS |
|
1074 |
--SELECT count(distinct project.id) into result_record.FP7_Publications FROM shadow.result, shadow.result_projects, shadow.project |
|
1075 |
--WHERE result.result_projects = result_projects.id and type='publication' |
|
1076 |
-- and result_projects.project = project.id and funding_lvl0='FP7' ; |
|
1077 |
-- |
|
1078 |
--SELECT count(id) into result_record.FP7_Projects FROM shadow.project WHERE funding_lvl0 = 'FP7'; |
|
1079 |
-- SELECT count(number) into result_record.FP7_SC39_Projects from shadow.project where funding_lvl0='FP7' and sc39='yes'; |
|
1080 |
-- |
|
1081 |
--SELECT count(distinct project.id) into result_record.FP7_SC39_Publications FROM shadow.result, shadow.result_projects, shadow.project |
|
1082 |
-- WHERE result_projects.project=project.id and funding_lvl0 = 'FP7' and sc39='yes' and result.result_projects = result_projects.id and type='publication'; |
|
1083 |
-- |
|
1084 |
--SELECT count(distinct result_projects.id) into result_record.FP7_SC39_Open_Access FROM shadow.result, shadow.result_projects, shadow.project |
|
1085 |
-- WHERE result.result_projects = result_projects.id AND result_projects.project = project.id and funding_lvl0 = 'FP7' and bestlicense='Open Access' and sc39='yes' and type='publication'; |
|
1086 |
-- |
|
1087 |
--RETURN result_record; |
|
1088 |
-- |
|
1089 |
--END |
|
1090 |
--$$ LANGUAGE plpgsql; |
|
1091 |
-- |
|
1092 |
-- |
|
1093 |
--drop type wt_report_rec cascade; |
|
1094 |
-- |
|
1095 |
--CREATE TYPE wt_report_rec as |
|
1096 |
--(WT_Publications varchar(50), |
|
1097 |
-- WT_Projects varchar(50), |
|
1098 |
-- WT_Open_Access varchar(50), |
|
1099 |
-- WT_Restricted_Access varchar(50), |
|
1100 |
-- WT_Embargo varchar(50)); |
|
1101 |
-- |
|
1102 |
-- |
|
1103 |
--CREATE OR REPLACE FUNCTION shadow.getWTReports() |
|
1104 |
-- RETURNS wt_report_rec |
|
1105 |
--AS |
|
1106 |
-- $$ |
|
1107 |
-- |
|
1108 |
--DECLARE |
|
1109 |
-- result_record wt_report_rec; |
|
1110 |
-- |
|
1111 |
--BEGIN |
|
1112 |
-- |
|
1113 |
--SELECT count(distinct project.id) into result_record.WT_Publications FROM shadow.result, shadow.project, shadow.result_projects |
|
1114 |
--where result_projects.project = project.id and project.funding_lvl0='WT' and result.result_projects = result_projects.id and type='publication'; |
|
1115 |
-- |
|
1116 |
--SELECT count(id) into result_record.WT_Projects FROM project WHERE funding_lvl0 = 'WT'; |
|
1117 |
-- |
|
1118 |
--SELECT count(distinct result_projects.id) into result_record.WT_Open_Access FROM shadow.result, shadow.result_projects, shadow.project WHERE result.result_projects= result_projects.id |
|
1119 |
--AND result_projects.project = project.id and funding_lvl0 = 'WT' and bestlicense='Open Access' and type='publication'; |
|
1120 |
-- |
|
1121 |
--SELECT count(distinct result_projects.id) into result_record.WT_Restricted_Access FROM shadow.result, shadow.result_projects, |
|
1122 |
--shadow.project WHERE result.result_projects = result_projects.id AND |
|
1123 |
--result_projects.project = project.id and funding_lvl0 = 'WT' and |
|
1124 |
--bestlicense='Restricted' and type='publication'; |
|
1125 |
-- |
|
1126 |
--SELECT count(distinct result_projects.id) into result_record.WT_Embargo FROM shadow.result, shadow.result_projects, shadow.project |
|
1127 |
--WHERE result.result_projects=result_projects.id AND result_projects.project = project.id and funding_lvl0 = 'WT' |
|
1128 |
--and bestlicense='Embargo' and type='publication'; |
|
1129 |
-- |
|
1130 |
--RETURN result_record; |
|
1131 |
-- |
|
1132 |
--END |
|
1133 |
--$$ LANGUAGE plpgsql; |
|
1134 |
-- |
|
1135 |
-- |
|
1136 |
-- |
|
1137 |
--drop type if exists erc_report_rec cascade; |
|
1138 |
-- |
|
1139 |
--CREATE TYPE erc_report_rec as |
|
1140 |
--(ERC_Publications varchar(50), |
|
1141 |
-- ERC_Projects varchar(50), |
|
1142 |
-- ERC_Open_Access varchar(50), |
|
1143 |
-- ERC_Restricted_Access varchar(50), |
|
1144 |
-- ERC_Embargo varchar(50)); |
|
1145 |
-- |
|
1146 |
-- |
|
1147 |
-- |
|
1148 |
--CREATE OR REPLACE FUNCTION shadow.getERCReports() |
|
1149 |
-- RETURNS erc_report_rec |
|
1150 |
--AS |
|
1151 |
-- $$ |
|
1152 |
-- |
|
1153 |
--DECLARE |
|
1154 |
-- result_record erc_report_rec; |
|
1155 |
-- |
|
1156 |
--BEGIN |
|
1157 |
-- |
|
1158 |
--SELECT count(distinct project.id) into result_record.ERC_Publications FROM shadow.result, shadow.project, shadow.result_projects |
|
1159 |
--where result_projects.project = project.id and project.funding_lvl2='ERC' and result.result_projects = result_projects.id and type='publication'; |
|
1160 |
-- |
|
1161 |
--SELECT count(id) into result_record.ERC_Projects FROM shadow.project WHERE funding_lvl2 = 'ERC'; |
|
1162 |
-- |
|
1163 |
-- |
|
1164 |
--SELECT count(distinct result_projects.id) into result_record.ERC_Open_Access FROM shadow.result, |
|
1165 |
--shadow.result_projects, shadow.project WHERE result.result_projects= result_projects.id |
|
1166 |
--AND result_projects.project = project.id and funding_lvl2 = 'ERC' and bestlicense='Open Access' and type='publication'; |
|
1167 |
-- |
|
1168 |
-- |
|
1169 |
--SELECT count(distinct result_projects.id) into result_record.ERC_Restricted_Access FROM shadow.result, shadow.result_projects, shadow.project WHERE |
|
1170 |
-- result.result_projects = result_projects.id AND result_projects.project = project.id and funding_lvl2 = 'ERC' and bestlicense='Restricted' and type='publication'; |
|
1171 |
-- |
|
1172 |
--SELECT count(distinct result_projects.id) into result_record.ERC_Embargo FROM shadow.result, shadow.result_projects, shadow.project |
|
1173 |
-- WHERE result.result_projects=result_projects.id AND result_projects.project = project.id and funding_lvl2 = 'ERC' and bestlicense='Embargo' and type='publication'; |
|
1174 |
-- |
|
1175 |
-- |
|
1176 |
--RETURN result_record; |
|
1177 |
-- |
|
1178 |
--END |
|
1179 |
--$$ LANGUAGE plpgsql; |
|
1180 |
|
|
1181 | 952 |
-- sqoopQL database dump complete |
1182 | 953 |
-- |
modules/dnet-openaire-stats/branches/new-schema/pom.xml | ||
---|---|---|
11 | 11 |
<modelVersion>4.0.0</modelVersion> |
12 | 12 |
<groupId>eu.dnetlib</groupId> |
13 | 13 |
<artifactId>dnet-openaire-stats</artifactId> |
14 |
<version>3.0.0-SNAPSHOT</version>
|
|
14 |
<version>4.0.0-SNAPSHOT</version>
|
|
15 | 15 |
|
16 | 16 |
<build> |
17 | 17 |
<plugins> |
Also available in: Unified diff
finalize new-schema for betadb