Project

General

Profile

1 39712 claudio.at
package eu.dnetlib.openaire.hadoop.utils;
2 35797 claudio.at
3 48679 claudio.at
import java.util.Arrays;
4 39712 claudio.at
import java.util.Set;
5 48679 claudio.at
import java.util.stream.Collectors;
6
import java.util.stream.Stream;
7 39712 claudio.at
8 35797 claudio.at
import com.google.common.collect.Sets;
9
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
10
import eu.dnetlib.data.proto.DedupProtos.Dedup;
11
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
12
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
13
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
14
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
15 38308 claudio.at
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
16 35797 claudio.at
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
17 48679 claudio.at
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.*;
18 35797 claudio.at
import eu.dnetlib.data.proto.TypeProtos.Type;
19
20
/**
21
 * Common static utility methods to manage the hbase tables
22
 *
23
 * @author claudio
24
 */
25
public class HBaseTableUtils {
26
27 44868 claudio.at
	private static final String SEPARATOR = "_";
28 35797 claudio.at
29
	public static Set<String> listAllColumns() {
30
		final Set<String> union = Sets.union(listEntities(), listRelationships());
31
		return Sets.union(union, listDedupRelationships());
32
	}
33
34 54137 claudio.at
	public static Set<String> listDedupRelationships() {
35 35797 claudio.at
		final Set<String> cfs = Sets.newHashSet();
36 44868 claudio.at
		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
37
		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
38
		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
39 35797 claudio.at
40 44868 claudio.at
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
41
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
42
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
43 35797 claudio.at
44
		return cfs;
45
	}
46
47 48679 claudio.at
	private static Stream<String> types() {
48
		return Arrays.stream(Type.values())
49
				.map(Enum::toString);
50 35797 claudio.at
	}
51
52
	public static Set<String> listEntities() {
53 48679 claudio.at
		return types()
54
				.collect(Collectors.toSet());
55 35797 claudio.at
	}
56
57
	public static Set<String> listRelationships() {
58
		final Set<String> cfs = Sets.newHashSet();
59 44868 claudio.at
		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.isProvidedBy);
60
		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.provides);
61 35797 claudio.at
62 44868 claudio.at
		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.hasParticipant);
63
		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.isParticipant);
64 35797 claudio.at
65 44868 claudio.at
		cfs.add(RelType.resultProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.isProducedBy);
66
		cfs.add(RelType.resultProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.produces);
67 35797 claudio.at
68 44868 claudio.at
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.hasAmongTopNSimilarDocuments);
69
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.isAmongTopNSimilarDocuments);
70 35797 claudio.at
71 44868 claudio.at
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementedBy);
72
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementTo);
73
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.isPartOf);
74
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.hasPart);
75 40179 claudio.at
76 44868 claudio.at
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.publicationDataset + SEPARATOR + PublicationDataset.RelName.isRelatedTo);
77 35797 claudio.at
78 44880 claudio.at
		cfs.add(RelType.resultOrganization + SEPARATOR + SubRelType.affiliation + SEPARATOR + Affiliation.RelName.isAuthorInstitutionOf);
79 46585 alessia.ba
		cfs.add(RelType.resultOrganization + SEPARATOR + SubRelType.affiliation + SEPARATOR + Affiliation.RelName.hasAuthorInstitution);
80 38308 claudio.at
81 48679 claudio.at
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.version + SEPARATOR + SoftwareSoftware.RelName.isVersionOf);
82
		cfs.add(RelType.resultResult + SEPARATOR + SubRelType.relationship + SEPARATOR + Relationship.RelName.isRelatedTo);
83
84 35797 claudio.at
		return cfs;
85
	}
86
87
}