Project

General

Profile

1
package eu.dnetlib.openaire.hadoop.utils;
2

    
3
import java.util.Collection;
4
import java.util.Set;
5

    
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.common.collect.Lists;
9
import com.google.common.collect.Sets;
10

    
11
import eu.dnetlib.data.proto.TypeProtos.Type;
12

    
13
/**
14
 * Common static utility methods to manage the hbase tables
15
 *
16
 * @author claudio
17
 */
18
public class HBaseTableUtils {
19

    
20
	private static final String SEPARATOR = "_";
21

    
22
	private static final Function<Type, String> typeName = type -> type.toString();
23

    
24
	public static Set<String> listAllColumns() {
25
		final Set<String> union = Sets.union(listEntities(), listRelationships());
26
		return Sets.union(union, listDedupRelationships());
27
	}
28

    
29
	public static Set<String> listDedupColumns(final Collection<Type> entityTypes) {
30
		final Set<String> entities = listEntities(Lists.newArrayList(Iterables.transform(entityTypes, typeName)));
31
		return Sets.union(entities, listDedupRelationships());
32
	}
33

    
34
	private static Set<String> listDedupRelationships() {
35
		final Set<String> cfs = Sets.newHashSet();
36
//		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
37
//		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
38
//		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
39
//
40
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
41
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
42
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
43
//
44
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
45
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
46
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
47
//
48
//		cfs.add(RelType.publicationDataset + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
49
//		cfs.add(RelType.publicationDataset + SEPARATOR + SubRelType.isRelatedTo + SEPARATOR + IsRelatedTo.RelName.isRelatedTo);
50
//
51
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
52
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
53
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
54

    
55
		return cfs;
56
	}
57

    
58
	private static Set<String> listEntities(final Collection<String> entityType) {
59
        return Sets.newHashSet(
60
                Iterables.filter(
61
                        Iterables.transform(Lists.newArrayList(Type.values()), typeName),
62
                        s -> entityType.contains(s)));
63
    }
64

    
65
	public static Set<String> listEntities() {
66
		return Sets.newHashSet(Iterables.transform(Lists.newArrayList(Type.values()), typeName));
67
	}
68

    
69
	public static Set<String> listRelationships() {
70
		final Set<String> cfs = Sets.newHashSet();
71
//		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.isProvidedBy);
72
//		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.provides);
73
//
74
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.coauthorship + SEPARATOR + CoAuthorship.RelName.isCoauthorOf);
75
//
76
//		cfs.add(RelType.personPublication + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.isAuthorOf);
77
//		cfs.add(RelType.personPublication + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.hasAuthor);
78
//
79
//		cfs.add(RelType.personDataset + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.isAuthorOf);
80
//		cfs.add(RelType.personDataset + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.hasAuthor);
81
//
82
//		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.hasParticipant);
83
//		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.isParticipant);
84
//
85
//		cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.isContact);
86
//		cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.hasContact);
87
//
88
//		cfs.add(RelType.publicationProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.isProducedBy);
89
//		cfs.add(RelType.publicationProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.produces);
90
//
91
//		cfs.add(RelType.datasetProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.isProducedBy);
92
//		cfs.add(RelType.datasetProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.produces);
93
//
94
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.hasAmongTopNSimilarDocuments);
95
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.isAmongTopNSimilarDocuments);
96
//
97
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementedBy);
98
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementTo);
99
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.isPartOf);
100
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.hasPart);
101
//
102
//		cfs.add(RelType.publicationDataset + SEPARATOR + SubRelType.isRelatedTo + SEPARATOR + IsRelatedTo.RelName.isRelatedTo);
103
//
104
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.hasAmongTopNSimilarDocuments);
105
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.isAmongTopNSimilarDocuments);
106
//
107
//		cfs.add(RelType.publicationOrganization + SEPARATOR + SubRelType.affiliation + SEPARATOR + Affiliation.RelName.isAffiliatedWith);
108

    
109
		return cfs;
110
	}
111

    
112
	public enum VolatileColumnFamily {
113
		dedup, dedupPerson; // instance is here to remove the old protos
114

    
115
		public static boolean isVolatile(final String columnName) {
116
			try {
117
				return VolatileColumnFamily.valueOf(columnName) != null;
118
			} catch (final Throwable e) {
119
				return false;
120
			}
121
		}
122
	}
123

    
124
}
    (1-1/1)