Project

General

Profile

1
package eu.dnetlib.openaire.hadoop.utils;
2

    
3
import java.util.Collection;
4
import java.util.Set;
5

    
6
import com.google.common.base.Function;
7
import com.google.common.base.Predicate;
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Lists;
10
import com.google.common.collect.Sets;
11
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
12
import eu.dnetlib.data.proto.DedupProtos.Dedup;
13
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
14
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship;
15
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
16
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
17
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson;
18
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
19
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
20
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
21
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
22
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Part;
23
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
24
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
25
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Supplement;
26
import eu.dnetlib.data.proto.TypeProtos.Type;
27

    
28
/**
29
 * Common static utility methods to manage the hbase tables
30
 *
31
 * @author claudio
32
 */
33
public class HBaseTableUtils {
34

    
35
	private static final String SEPARATOR = "_";
36

    
37
	private static final Function<Type, String> typeName = new Function<Type, String>() {
38

    
39
		@Override
40
		public String apply(final Type type) {
41
			return type.toString();
42
		}
43
	};
44

    
45
	public static Set<String> listAllColumns() {
46
		final Set<String> union = Sets.union(listEntities(), listRelationships());
47
		return Sets.union(union, listDedupRelationships());
48
	}
49

    
50
	public static Set<String> listDedupColumns(final Collection<Type> entityTypes) {
51
		final Set<String> entities = listEntities(Lists.newArrayList(Iterables.transform(entityTypes, typeName)));
52
		return Sets.union(entities, listDedupRelationships());
53
	}
54

    
55
	private static Set<String> listDedupRelationships() {
56
		final Set<String> cfs = Sets.newHashSet();
57
		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
58
		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
59
		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
60

    
61
		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
62
		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
63
		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
64

    
65
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
66
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
67
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
68

    
69
		return cfs;
70
	}
71

    
72
	private static Set<String> listEntities(final Collection<String> entityType) {
73
		return Sets.newHashSet(Iterables.filter(Iterables.transform(Lists.newArrayList(Type.values()), typeName), new Predicate<String>() {
74

    
75
			@Override
76
			public boolean apply(final String s) {
77
				return entityType.contains(s);
78
			}
79
		}));
80
	}
81

    
82
	public static Set<String> listEntities() {
83
		return Sets.newHashSet(Iterables.transform(Lists.newArrayList(Type.values()), typeName));
84
	}
85

    
86
	public static Set<String> listRelationships() {
87
		final Set<String> cfs = Sets.newHashSet();
88
		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.isProvidedBy);
89
		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.provides);
90

    
91
		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.coauthorship + SEPARATOR + CoAuthorship.RelName.isCoauthorOf);
92

    
93
		cfs.add(RelType.personResult + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.isAuthorOf);
94
		cfs.add(RelType.personResult + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.hasAuthor);
95

    
96
		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.hasParticipant);
97
		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.isParticipant);
98

    
99
		cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.isContact);
100
		cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.hasContact);
101

    
102
		cfs.add(RelType.resultProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.isProducedBy);
103
		cfs.add(RelType.resultProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.produces);
104

    
105
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.hasAmongTopNSimilarDocuments);
106
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.isAmongTopNSimilarDocuments);
107

    
108
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementedBy);
109
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementTo);
110
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.isPartOf);
111
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.hasPart);
112

    
113
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.publicationDataset + SEPARATOR + PublicationDataset.RelName.isRelatedTo);
114

    
115
		cfs.add(RelType.resultOrganization + SEPARATOR + SubRelType.affiliation + SEPARATOR + Affiliation.RelName.isAuthorInstitutionOf);
116
		cfs.add(RelType.resultOrganization + SEPARATOR + SubRelType.affiliation + SEPARATOR + Affiliation.RelName.hasAuthorInstitution);
117

    
118
		return cfs;
119
	}
120

    
121
	public enum VolatileColumnFamily {
122
		dedup, dedupPerson; // instance is here to remove the old protos
123

    
124
		public static boolean isVolatile(final String columnName) {
125
			try {
126
				return VolatileColumnFamily.valueOf(columnName) != null;
127
			} catch (final Throwable e) {
128
				return false;
129
			}
130
		}
131
	}
132

    
133
}
    (1-1/1)