Project

General

Profile

1
package eu.dnetlib.msro.workflows.hadoop.utils;
2

    
3
import java.util.Collection;
4
import java.util.Set;
5

    
6
import com.google.common.base.Function;
7
import com.google.common.base.Predicate;
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Lists;
10
import com.google.common.collect.Sets;
11

    
12
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
13
import eu.dnetlib.data.proto.DedupProtos.Dedup;
14
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
15
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship;
16
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
17
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
18
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson;
19
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
20
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
21
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
22
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
23
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
24
import eu.dnetlib.data.proto.TypeProtos.Type;
25

    
26
/**
27
 * Common static utility methods to manage the hbase tables
28
 *
29
 * @author claudio
30
 *
31
 */
32
public class HBaseTableUtils {
33

    
34
	private static final String _ = "_";
35
	private static Function<Type, String> typeName = new Function<Type, String>() {
36

    
37
		@Override
38
		public String apply(final Type type) {
39
			return type.toString();
40
		}
41
	};
42

    
43
	public static Set<String> listAllColumns() {
44
		final Set<String> union = Sets.union(listEntities(), listRelationships());
45
		return Sets.union(union, listDedupRelationships());
46
	}
47

    
48
	public static Set<String> listDedupColumns(final Collection<Type> entityTypes) {
49
		final Set<String> entities = listEntities(Lists.newArrayList(Iterables.transform(entityTypes, typeName)));
50
		return Sets.union(entities, listDedupRelationships());
51
	}
52

    
53
	private static Set<String> listDedupRelationships() {
54
		final Set<String> cfs = Sets.newHashSet();
55
		cfs.add(RelType.organizationOrganization + _ + SubRelType.dedup + _ + Dedup.RelName.merges);
56
		cfs.add(RelType.organizationOrganization + _ + SubRelType.dedup + _ + Dedup.RelName.isMergedIn);
57
		cfs.add(RelType.organizationOrganization + _ + SubRelType.dedupSimilarity + _ + DedupSimilarity.RelName.isSimilarTo);
58

    
59
		cfs.add(RelType.personPerson + _ + SubRelType.dedup + _ + Dedup.RelName.merges);
60
		cfs.add(RelType.personPerson + _ + SubRelType.dedup + _ + Dedup.RelName.isMergedIn);
61
		cfs.add(RelType.personPerson + _ + SubRelType.dedupSimilarity + _ + DedupSimilarity.RelName.isSimilarTo);
62

    
63
		cfs.add(RelType.resultResult + _ + SubRelType.dedup + _ + Dedup.RelName.merges);
64
		cfs.add(RelType.resultResult + _ + SubRelType.dedup + _ + Dedup.RelName.isMergedIn);
65
		cfs.add(RelType.resultResult + _ + SubRelType.dedupSimilarity + _ + DedupSimilarity.RelName.isSimilarTo);
66

    
67
		return cfs;
68
	}
69

    
70
	private static Set<String> listEntities(final Collection<String> entityType) {
71
		return Sets.newHashSet(Iterables.filter(Iterables.transform(Lists.newArrayList(Type.values()), typeName), new Predicate<String>() {
72

    
73
			@Override
74
			public boolean apply(final String s) {
75
				return entityType.contains(s);
76
			}
77
		}));
78
	}
79

    
80
	public static Set<String> listEntities() {
81
		return Sets.newHashSet(Iterables.transform(Lists.newArrayList(Type.values()), typeName));
82
	}
83

    
84
	public static Set<String> listRelationships() {
85
		final Set<String> cfs = Sets.newHashSet();
86
		cfs.add(RelType.datasourceOrganization + _ + SubRelType.provision + _ + Provision.RelName.isProvidedBy);
87
		cfs.add(RelType.datasourceOrganization + _ + SubRelType.provision + _ + Provision.RelName.provides);
88

    
89
		cfs.add(RelType.personPerson + _ + SubRelType.coauthorship + _ + CoAuthorship.RelName.isCoauthorOf);
90

    
91
		cfs.add(RelType.personResult + _ + SubRelType.authorship + _ + Authorship.RelName.isAuthorOf);
92
		cfs.add(RelType.personResult + _ + SubRelType.authorship + _ + Authorship.RelName.hasAuthor);
93

    
94
		cfs.add(RelType.projectOrganization + _ + SubRelType.participation + _ + Participation.RelName.hasParticipant);
95
		cfs.add(RelType.projectOrganization + _ + SubRelType.participation + _ + Participation.RelName.isParticipant);
96

    
97
		cfs.add(RelType.projectPerson + _ + SubRelType.contactPerson + _ + ContactPerson.RelName.isContact);
98
		cfs.add(RelType.projectPerson + _ + SubRelType.contactPerson + _ + ContactPerson.RelName.hasContact);
99

    
100
		cfs.add(RelType.resultProject + _ + SubRelType.outcome + _ + Outcome.RelName.isProducedBy);
101
		cfs.add(RelType.resultProject + _ + SubRelType.outcome + _ + Outcome.RelName.produces);
102

    
103
		cfs.add(RelType.resultResult + _ + SubRelType.similarity + _ + Similarity.RelName.hasAmongTopNSimilarDocuments);
104
		cfs.add(RelType.resultResult + _ + SubRelType.similarity + _ + Similarity.RelName.isAmongTopNSimilarDocuments);
105

    
106
		cfs.add(RelType.resultResult + _ + SubRelType.publicationDataset + _ + PublicationDataset.RelName.isRelatedTo);
107

    
108
		return cfs;
109
	}
110

    
111
}
(1-1/3)