Project

General

Profile

« Previous | Next » 

Revision 29633

Added by Eri Katsari about 10 years ago

View differences:

modules/dnet-openaire-stats-workflow/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/job.properties
12 12
Stats_sqoop_StatementPerTrans = 100000
13 13
Stats_sqoop_ReducersCount=4
14 14
Stats_Hbase_Source_Table=db_openaireplus_node6_t
15
Stats_output_Path=/tmp/test_stats/
15
Stats_output_Path=/tmp/test_stats_new/
16 16
Stats_null_String_Field=NULL
17 17
Stats_null_Numeric_Field=-1
18 18
Stats_delim_Character=!
......
39 39
out19=concept
40 40
out20=category 
41 41
isLookupEndpoint=http://node6.t.openaire.research-infrastructures.eu:8280/is/services/isLookUp
42
Stats_Column_Families=datasource,project,organization
43
Stats_indexConf=index.conf { result { dups = true, links = [{ relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },{ relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] },{ relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]},organization { dups = false, links = [{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
42
Stats_Column_Families=result
43
Stats_indexConf=index.conf { result { dups = true, links = []}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]},organization { dups = false, links = [{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
44
#Stats_indexConf=index.conf { result { dups = true, links = [{ relType = personResult_authorship_hasAuthor, targetEntity = person, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_isMergedIn, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_dedup_merges, targetEntity = result, expandAs = child, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] }, { relType = resultResult_publicationDataset_isRelatedTo, targetEntity = result, expandAs = rel, symmetric = true, fields = [title,dateofacceptance,publisher,resulttype] },{ relType = resultResult_similarity_isAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] },{ relType = resultResult_similarity_hasAmongTopNSimilarDocuments, targetEntity = result, expandAs = rel, symmetric = false, fields = [title,dateofacceptance,publisher,resulttype,similarity,type] }]}, person { dups = false, links = [ { relType = personResult_authorship_isAuthorOf, targetEntity = result, expandAs = rel, symmetric = true, fields = [fullname,ranking] }, { relType = projectPerson_contactPerson_isContact, targetEntity = project, expandAs = rel, symmetric = true, fields = [fullname,email,fax,phone] } ]}, datasource { dups = false, links = [ { relType = datasourceOrganization_provision_provides, targetEntity = organization, expandAs = rel, symmetric = true, fields = [officialname,websiteurl,datasourcetype,aggregatortype] } ]},organization { dups = false, links = [{ relType = projectOrganization_participation_isParticipant, targetEntity = project, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] }, { relType = datasourceOrganization_provision_isProvidedBy, targetEntity = datasource, expandAs = rel, symmetric = true, fields = [legalname,legalshortname,websiteurl,country] } ]}, project { dups = false, links = [ { relType = projectOrganization_participation_hasParticipant, targetEntity = organization, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = resultProject_outcome_produces, targetEntity = result, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] }, { relType = projectPerson_contactPerson_hasContact, targetEntity = person, expandAs = rel, symmetric = true, fields = [code,acronym,title,websiteurl,contracttype,fundingtree] } ]}}
modules/dnet-openaire-stats-workflow/src/main/resources/eu/dnetlib/iis/core/javamapreduce/stats/oozie_app/workflow.xml
22 22
			</main-class>
23 23
			<!-- column families: -->
24 24
			<arg>	<!-- ${Stats_Column_Families} -->
25
				-f datasource,datasourceOrganization_provision_provides ,
25
				-f result
26
			<!-- datasource,datasourceOrganization_provision_provides ,
26 27
				organization,
27 28
				project, projectOrganization_participation_hasParticipant ,result,resultProject_outcome_produces,
28 29
				personResult_authorship_hasAuthor,resultResult_publicationDataset_isRelatedTo 
29
							</arg>
30
				-->			</arg>
30 31

  
31 32
			<capture-output />
32 33
		</java>
......
602 603
			<arg>-PStats_enclosing_Character=${Stats_enclosing_Character} </arg>
603 604

  
604 605
		</java>
605
		<ok to="finalizeDatabase" />
606
		<ok to="end"/>
606 607
		<error to="fail" />
607 608
	</action>
608 609

  
modules/dnet-openaire-stats-workflow/run.sh
1
#!/bin/bash
2
rm -rf /home/eri.katsari/stats/*
3
hadoop fs -rm -r /user/eri.katsari/stats/oozie_app/*
4
mv /home/eri.katsari/oozie-package.tar.gz  /home/eri.katsari/stats/oozie-package.tar.gz
5
tar -xvf  /home/eri.katsari/stats/oozie-package.tar.gz -C /home/eri.katsari/stats/
6
hadoop fs -put /home/eri.katsari/stats/oozie_app/*  /user/eri.katsari/stats/oozie_app/
7

  
0 8

  
modules/dnet-openaire-stats-workflow/upload.sh
1
#!/bin/bash
2

  
3
cd /home/eri/workspace/dnet40/modules/dnet-openaire-stats/trunk
4
svn up
5
mvn clean install -Dmaven.test.skip=true
6
cd /home/eri/workspace/dnet40/modules/dnet-openaire-stats-workflow
7
mvn clean package  -Dworkflow.source.dir=eu/dnetlib/iis/core/javamapreduce/stats -Poozie  -Diis.hadoop.frontend.home.dir=/home  -Duser.name=eri.katsari
8
sudo scp ./target/oozie-package.tar.gz    eri.katsari@146.48.123.66:/home/eri.katsari/
9

  
0 10

  

Also available in: Unified diff