Project

General

Profile

« Previous | Next » 

Revision 44953

ongoing refactoring

View differences:

modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/openaire/hadoop/utils/HBaseTableUtils.java
1
package eu.dnetlib.openaire.hadoop.utils;
2

  
3
import java.util.Collection;
4
import java.util.Set;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.common.collect.Lists;
9
import com.google.common.collect.Sets;
10

  
11
import eu.dnetlib.data.proto.TypeProtos.Type;
12

  
13
/**
14
 * Common static utility methods to manage the hbase tables
15
 *
16
 * @author claudio
17
 */
18
public class HBaseTableUtils {
19

  
20
	private static final String SEPARATOR = "_";
21

  
22
	private static final Function<Type, String> typeName = type -> type.toString();
23

  
24
	public static Set<String> listAllColumns() {
25
		final Set<String> union = Sets.union(listEntities(), listRelationships());
26
		return Sets.union(union, listDedupRelationships());
27
	}
28

  
29
	public static Set<String> listDedupColumns(final Collection<Type> entityTypes) {
30
		final Set<String> entities = listEntities(Lists.newArrayList(Iterables.transform(entityTypes, typeName)));
31
		return Sets.union(entities, listDedupRelationships());
32
	}
33

  
34
	private static Set<String> listDedupRelationships() {
35
		final Set<String> cfs = Sets.newHashSet();
36
//		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
37
//		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
38
//		cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
39
//
40
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
41
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
42
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
43
//
44
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
45
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
46
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
47
//
48
//		cfs.add(RelType.publicationDataset + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
49
//		cfs.add(RelType.publicationDataset + SEPARATOR + SubRelType.isRelatedTo + SEPARATOR + IsRelatedTo.RelName.isRelatedTo);
50
//
51
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges);
52
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn);
53
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo);
54

  
55
		return cfs;
56
	}
57

  
58
	private static Set<String> listEntities(final Collection<String> entityType) {
59
        return Sets.newHashSet(
60
                Iterables.filter(
61
                        Iterables.transform(Lists.newArrayList(Type.values()), typeName),
62
                        s -> entityType.contains(s)));
63
    }
64

  
65
	public static Set<String> listEntities() {
66
		return Sets.newHashSet(Iterables.transform(Lists.newArrayList(Type.values()), typeName));
67
	}
68

  
69
	public static Set<String> listRelationships() {
70
		final Set<String> cfs = Sets.newHashSet();
71
//		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.isProvidedBy);
72
//		cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.provides);
73
//
74
//		cfs.add(RelType.personPerson + SEPARATOR + SubRelType.coauthorship + SEPARATOR + CoAuthorship.RelName.isCoauthorOf);
75
//
76
//		cfs.add(RelType.personPublication + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.isAuthorOf);
77
//		cfs.add(RelType.personPublication + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.hasAuthor);
78
//
79
//		cfs.add(RelType.personDataset + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.isAuthorOf);
80
//		cfs.add(RelType.personDataset + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.hasAuthor);
81
//
82
//		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.hasParticipant);
83
//		cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.isParticipant);
84
//
85
//		cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.isContact);
86
//		cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.hasContact);
87
//
88
//		cfs.add(RelType.publicationProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.isProducedBy);
89
//		cfs.add(RelType.publicationProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.produces);
90
//
91
//		cfs.add(RelType.datasetProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.isProducedBy);
92
//		cfs.add(RelType.datasetProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.produces);
93
//
94
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.hasAmongTopNSimilarDocuments);
95
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.isAmongTopNSimilarDocuments);
96
//
97
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementedBy);
98
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.supplement + SEPARATOR + Supplement.RelName.isSupplementTo);
99
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.isPartOf);
100
//		cfs.add(RelType.publicationPublication + SEPARATOR + SubRelType.part + SEPARATOR + Part.RelName.hasPart);
101
//
102
//		cfs.add(RelType.publicationDataset + SEPARATOR + SubRelType.isRelatedTo + SEPARATOR + IsRelatedTo.RelName.isRelatedTo);
103
//
104
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.hasAmongTopNSimilarDocuments);
105
//		cfs.add(RelType.datasetDataset + SEPARATOR + SubRelType.similarity + SEPARATOR + Similarity.RelName.isAmongTopNSimilarDocuments);
106
//
107
//		cfs.add(RelType.publicationOrganization + SEPARATOR + SubRelType.affiliation + SEPARATOR + Affiliation.RelName.isAffiliatedWith);
108

  
109
		return cfs;
110
	}
111

  
112
	public enum VolatileColumnFamily {
113
		dedup, dedupPerson; // instance is here to remove the old protos
114

  
115
		public static boolean isVolatile(final String columnName) {
116
			try {
117
				return VolatileColumnFamily.valueOf(columnName) != null;
118
			} catch (final Throwable e) {
119
				return false;
120
			}
121
		}
122
	}
123

  
124
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/DNGFRowKeyDecoder.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import eu.dnetlib.data.proto.TypeProtos.Type;
4
import org.apache.commons.lang3.StringUtils;
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7

  
8
public class DNGFRowKeyDecoder {
9

  
10
	public final static String ID_REGEX = "^[0-9][0-9]\\|.{12}::[a-zA-Z0-9]{32}$";
11
	/**
12
	 * logger.
13
	 */
14
	private static final Log log = LogFactory.getLog(DNGFRowKeyDecoder.class); // NOPMD by marko on 11/24/08 5:02 PM
15
	private static final String SEPARATOR = "|";
16
	private String key;
17

  
18
	private Type type = null;
19

  
20
	private String id = null;
21

  
22
	private DNGFRowKeyDecoder(final String key) throws IllegalArgumentException {
23
		this.key = key;
24

  
25
		if (!key.matches(ID_REGEX)) {
26
			String msg = "invalid key: '" + key + "'";
27
			log.error(msg);
28
			throw new IllegalArgumentException(msg);
29
		}
30

  
31
		int tag = Integer.parseInt(StringUtils.substringBefore(key, SEPARATOR));
32
		this.type = Type.valueOf(tag);
33
		this.id = StringUtils.substringAfter(key, SEPARATOR);
34

  
35
		// System.out.println(OafRowTypeDecoder.class.getName() +" decoded key: " + split);
36
	}
37

  
38
	public static DNGFRowKeyDecoder decode(final byte[] key) throws IllegalArgumentException {
39
		return new DNGFRowKeyDecoder(new String(key));
40
	}
41

  
42
	public static DNGFRowKeyDecoder decode(final String key) throws IllegalArgumentException {
43
		return new DNGFRowKeyDecoder(key);
44
	}
45

  
46
	public String getKey() {
47
		return key;
48
	}
49

  
50
	public Type getType() {
51
		return type;
52
	}
53

  
54
	public String getId() {
55
		return id;
56
	}
57
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/DNGFRelDecoder.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
4
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
5
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
6
import eu.dnetlib.data.proto.TypeProtos.Type;
7

  
8
public class DNGFRelDecoder {
9

  
10
	private final DNGFRel dgnfRel;
11

  
12
	private DNGFRelDecoder(final DNGFRel DNGFRel) {
13
		this.dgnfRel = DNGFRel;
14
	}
15

  
16
	public static DNGFRelDecoder decode(final DNGFRel DNGFRel) {
17
		return new DNGFRelDecoder(DNGFRel);
18
	}
19

  
20
	public Qualifier getRelType() {
21
		return dgnfRel.getRelType();
22
	}
23

  
24
	public String getRelClass() {
25
		return dgnfRel.getRelType().getClassid();
26
	}
27

  
28
	public String getRelScheme() {
29
		return dgnfRel.getRelType().getSchemeid();
30
	}
31

  
32
	public DNGFRel.Builder setClassId(final String classId) {
33
		final DNGFRel.Builder relBuilder = DNGFRel.newBuilder(dgnfRel);
34
		relBuilder.getRelTypeBuilder().setClassid(classId);
35
		return relBuilder;
36
	}
37

  
38
	public Type getTargetType() {
39
		return dgnfRel.getTargetType();
40
	}
41

  
42
	public Type getSourceType() {
43
		return dgnfRel.getSourceType();
44
	}
45

  
46
//	protected FieldDescriptor fd(final MessageOrBuilder mb, final int fieldNumber) {
47
//		return mb.getDescriptorForType().findFieldByNumber(fieldNumber);
48
//	}
49
//
50
//	protected FieldDescriptor fd(final MessageOrBuilder mb, final String fieldName) {
51
//		return mb.getDescriptorForType().findFieldByName(fieldName);
52
//	}
53

  
54
	public String getCachedTargedId() {
55

  
56
		if (!dgnfRel.hasCachedTarget()) return null;
57

  
58
		final DNGFEntity entity = dgnfRel.getCachedTarget();
59
		return DNGFEntityDecoder.decode(entity).getId();
60
	}
61

  
62
	public String getRelSourceId() {
63
		return dgnfRel.getSource();
64
	}
65

  
66
	public String getRelTargetId() {
67
		return dgnfRel.getTarget();
68
	}
69

  
70
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/DNGFEntityDecoder.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.util.List;
4

  
5
import com.google.common.base.Function;
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.FieldDescriptor;
9
import com.google.protobuf.GeneratedMessage;
10
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
11
import eu.dnetlib.data.proto.TypeProtos.Type;
12
import eu.dnetlib.data.transform.AbstractProtoMapper;
13

  
14
public class DNGFEntityDecoder extends AbstractProtoMapper {
15

  
16
	private final DNGFEntity dngfEntity;
17

  
18
	private DNGFEntityDecoder(final DNGFEntity oafEntity) {
19
		this.dngfEntity = oafEntity;
20
	}
21

  
22
	public static DNGFEntityDecoder decode(final DNGFEntity oafEntity) {
23
		return new DNGFEntityDecoder(oafEntity);
24
	}
25

  
26
	public Type getType() {
27
		return dngfEntity.getType();
28
	}
29

  
30
	public String getId() {
31
		return dngfEntity.getId();
32
	}
33

  
34
	public GeneratedMessage getDNGFEntity() {
35
		return dngfEntity;
36
	}
37

  
38
	public GeneratedMessage getEntity() {
39
		final FieldDescriptor fd = dngfEntity.getDescriptorForType().findFieldByName(typeName());
40
		return (GeneratedMessage) dngfEntity.getField(fd);
41
	}
42

  
43
	public String typeName() {
44
		return dngfEntity.getType().toString();
45
	}
46

  
47
	public GeneratedMessage getMetadata() {
48
		final GeneratedMessage entity = getEntity();
49
		final FieldDescriptor fd = entity.getDescriptorForType().findFieldByName("metadata");
50
		final GeneratedMessage field = (GeneratedMessage) entity.getField(fd);
51
		return field;
52
	}
53

  
54
	public List<String> getFieldValues(final String path) {
55
		return Lists.newLinkedList(Iterables.transform(processPath(getDNGFEntity(), path, eu.dnetlib.pace.config.Type.String), new Function<Object, String>() {
56

  
57
			@Override
58
			public String apply(final Object o) {
59
				return o.toString();
60
			}
61
		}));
62
	}
63

  
64
	public String getDateOfCollection() {
65
		return dngfEntity.getDateofcollection();
66
	}
67

  
68
	public String getDateOfTransformation() {
69
		return dngfEntity.getDateoftransformation();
70
	}
71
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/RelDescriptor.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3

  
4
public class RelDescriptor {
5

  
6
    private final String code;
7

  
8
    private final String ontologyCode;
9

  
10
    private final String termCode;
11

  
12
	public RelDescriptor(final String value) {
13
		super();
14
        this.code = value;
15

  
16
		String[] s = value.split("_");
17

  
18
        this.ontologyCode = s[0];
19
        this.termCode = s[1];
20
    }
21

  
22

  
23
    public String getCode() {
24
        return code;
25
    }
26

  
27
    public String getOntologyCode() {
28
        return ontologyCode;
29
    }
30

  
31
    public String getTermCode() {
32
        return termCode;
33
    }
34

  
35

  
36
    @Override
37
	public String toString() {
38
        return getCode();
39
    }
40

  
41
	@Override
42
	public int hashCode() {
43
		final int prime = 31;
44
		int result = 1;
45
        result = (prime * result) + ((code == null) ? 0 : code.hashCode());
46
        return result;
47
	}
48

  
49
	@Override
50
	public boolean equals(final Object obj) {
51
		if (this == obj) return true;
52
		if (obj == null) return false;
53
		if (getClass() != obj.getClass()) return false;
54
		RelDescriptor other = (RelDescriptor) obj;
55
        if (code == null) {
56
            if (other.code != null) return false;
57
        } else if (!code.equals(other.code)) return false;
58
        return true;
59
	}
60

  
61
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/DNGFDecoder.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import com.google.protobuf.Descriptors.EnumValueDescriptor;
4
import com.google.protobuf.Descriptors.FieldDescriptor;
5
import com.google.protobuf.ExtensionRegistry;
6
import com.google.protobuf.GeneratedMessage;
7
import com.google.protobuf.GeneratedMessage.GeneratedExtension;
8
import com.google.protobuf.InvalidProtocolBufferException;
9
import com.google.protobuf.Message;
10
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
11
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
12
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
13
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
14
import eu.dnetlib.data.proto.KindProtos.Kind;
15

  
16
import java.util.List;
17
import java.util.Map.Entry;
18

  
19
/**
20
 * Helper class, to be used as accessor helper over the DNGF structure.
21
 *
22
 * @author claudio
23
 *
24
 */
25
public class DNGFDecoder {
26

  
27
	/**
28
	 * DNGF object
29
	 */
30
	private DNGF dngf;
31

  
32
	/**
33
	 * Cached sub decoder
34
	 */
35
	private DNGFEntityDecoder entityDecoder = null;
36

  
37
	/**
38
	 * Cached sub decoder
39
	 */
40
	private DNGFRelDecoder relDecoder = null;
41

  
42
	protected DNGFDecoder(final byte[] value) {
43
		this(value, null);
44
	}
45

  
46
	protected DNGFDecoder(final byte[] value, final GeneratedExtension... ge) {
47
		try {
48
			final ExtensionRegistry registry = ExtensionRegistry.newInstance();
49
			if (ge != null) {
50
				for(GeneratedExtension e : ge) {
51
					registry.add(e);
52
				}
53
			}
54
			this.dngf = DNGF.parseFrom(value, registry);
55

  
56
		} catch (InvalidProtocolBufferException e) {
57
			throw new RuntimeException("unable to deserialize proto: " + new String(value));
58
		}
59
	}
60

  
61
	private DNGFDecoder(final DNGF dngf) {
62
		this.dngf = dngf;
63
	}
64

  
65
    public static DNGFDecoder decode(final DNGF dngf) {
66
        return new DNGFDecoder(dngf);
67
    }
68

  
69
    public static DNGFDecoder decode(final byte[] b) {
70
        return new DNGFDecoder(b);
71
    }
72

  
73
    public static DNGFDecoder decode(final byte[] b, final GeneratedExtension... ge) {
74
        return new DNGFDecoder(b, ge);
75
    }
76

  
77
    private static String escapeXml(final String value) {
78
        return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
79
    }
80

  
81
	public Kind getKind() {
82
		return dngf.getKind();
83
	}
84

  
85
    // / Entity
86

  
87
	public DNGF getDNGF() {
88
		return dngf;
89
	}
90

  
91
	public GeneratedMessage getMetadata() {
92
		return decodeEntity().getMetadata();
93
	}
94

  
95
	public GeneratedMessage getDNGFEntity() {
96
		return decodeEntity().getEntity();
97
	}
98

  
99
	public String getEntityId() {
100
		return decodeEntity().getId();
101
	}
102

  
103
	public DNGFEntity getEntity() {
104
		return dngf.getEntity();
105
	}
106

  
107
    // / Rel
108

  
109
	public DNGFEntityDecoder decodeEntity() {
110
		if (entityDecoder == null) {
111
			entityDecoder = DNGFEntityDecoder.decode(getEntity());
112
		}
113
		return entityDecoder;
114
	}
115

  
116
	public DNGFRel getDNGFRel() {
117
		return dngf.getRel();
118
	}
119

  
120
	public Qualifier relType() {
121
		return decodeRel().getRelType();
122
	}
123

  
124
	public String relTypeName() {
125
		return relType().toString();
126
	}
127

  
128
	public String relSourceId() {
129
		return decodeRel().getRelSourceId();
130
	}
131

  
132
	public String relTargetId() {
133
		return decodeRel().getRelTargetId();
134
	}
135

  
136
	private DNGFRelDecoder decodeRel() {
137
		if (relDecoder == null) {
138
			relDecoder = DNGFRelDecoder.decode(getDNGFRel());
139
		}
140
		return relDecoder;
141
	}
142

  
143
	public byte[] toByteArray() {
144
		return dngf.toByteArray();
145
	}
146

  
147
	public String asXml() {
148
		StringBuilder sb = new StringBuilder("<oaf>");
149

  
150
		for (Entry<FieldDescriptor, Object> e : dngf.getAllFields().entrySet()) {
151
			asXml(sb, e.getKey(), e.getValue());
152
		}
153
		sb.append("</oaf>");
154
		return sb.toString();
155
	}
156

  
157
	@SuppressWarnings("unchecked")
158
	private void asXml(final StringBuilder sb, final FieldDescriptor fd, final Object value) {
159

  
160
		if (fd.isRepeated() && (value instanceof List<?>)) {
161
			for (Object o : ((List<Object>) value)) {
162
				asXml(sb, fd, o);
163
			}
164
		} else if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
165
			sb.append("<" + fd.getName() + ">");
166
			for (Entry<FieldDescriptor, Object> e : ((Message) value).getAllFields().entrySet()) {
167
				asXml(sb, e.getKey(), e.getValue());
168
			}
169
			sb.append("</" + fd.getName() + ">");
170
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
171
			sb.append("<" + fd.getName() + ">");
172
			sb.append(((EnumValueDescriptor) value).getName());
173
			sb.append("</" + fd.getName() + ">");
174
		} else {
175
			sb.append("<" + fd.getName() + ">");
176
			sb.append(escapeXml(value.toString()));
177
			sb.append("</" + fd.getName() + ">");
178
		}
179
	}
180

  
181
}
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFDecoderTest.java
5 5

  
6 6
import java.util.List;
7 7

  
8
import eu.dnetlib.data.graph.model.DNGFDecoder;
8 9
import org.junit.Test;
9 10

  
10 11
import eu.dnetlib.data.proto.KindProtos.Kind;
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFRowKeyDecoderTest.java
1 1
package eu.dnetlib.data.mapreduce.util;
2 2

  
3
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
3 4
import org.junit.Test;
4 5

  
5 6
public class DNGFRowKeyDecoderTest {
modules/dnet-graph-domain/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/DNGFTest.java
2 2

  
3 3
import com.google.protobuf.GeneratedMessage;
4 4
import com.google.protobuf.InvalidProtocolBufferException;
5
import eu.dnetlib.data.graph.model.DNGFDecoder;
5 6
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
6 7
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
7 8
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/model/DNGFEntityDecoder.java
1
package eu.dnetlib.data.graph.model;
2

  
3
import java.util.List;
4

  
5
import com.google.common.base.Function;
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.FieldDescriptor;
9
import com.google.protobuf.GeneratedMessage;
10
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
11
import eu.dnetlib.data.proto.TypeProtos.Type;
12
import eu.dnetlib.data.transform.AbstractProtoMapper;
13

  
14
public class DNGFEntityDecoder extends AbstractProtoMapper {
15

  
16
	private final DNGFEntity dngfEntity;
17

  
18
	private DNGFEntityDecoder(final DNGFEntity oafEntity) {
19
		this.dngfEntity = oafEntity;
20
	}
21

  
22
	public static DNGFEntityDecoder decode(final DNGFEntity oafEntity) {
23
		return new DNGFEntityDecoder(oafEntity);
24
	}
25

  
26
	public Type getType() {
27
		return dngfEntity.getType();
28
	}
29

  
30
	public String getId() {
31
		return dngfEntity.getId();
32
	}
33

  
34
	public GeneratedMessage getDNGFEntity() {
35
		return dngfEntity;
36
	}
37

  
38
	public GeneratedMessage getEntity() {
39
		final FieldDescriptor fd = dngfEntity.getDescriptorForType().findFieldByName(typeName());
40
		return (GeneratedMessage) dngfEntity.getField(fd);
41
	}
42

  
43
	public String typeName() {
44
		return dngfEntity.getType().toString();
45
	}
46

  
47
	public GeneratedMessage getMetadata() {
48
		final GeneratedMessage entity = getEntity();
49
		final FieldDescriptor fd = entity.getDescriptorForType().findFieldByName("metadata");
50
		final GeneratedMessage field = (GeneratedMessage) entity.getField(fd);
51
		return field;
52
	}
53

  
54
	public List<String> getFieldValues(final String path) {
55
		return Lists.newLinkedList(Iterables.transform(processPath(getDNGFEntity(), path, eu.dnetlib.pace.config.Type.String), new Function<Object, String>() {
56

  
57
			@Override
58
			public String apply(final Object o) {
59
				return o.toString();
60
			}
61
		}));
62
	}
63

  
64
	public String getDateOfCollection() {
65
		return dngfEntity.getDateofcollection();
66
	}
67

  
68
	public String getDateOfTransformation() {
69
		return dngfEntity.getDateoftransformation();
70
	}
71
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/model/DNGFDecoder.java
1
package eu.dnetlib.data.graph.model;
2

  
3
import java.util.List;
4
import java.util.Map.Entry;
5

  
6
import com.google.protobuf.*;
7
import com.google.protobuf.Descriptors.EnumValueDescriptor;
8
import com.google.protobuf.Descriptors.FieldDescriptor;
9

  
10
import com.google.protobuf.GeneratedMessage.GeneratedExtension;
11
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
12
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
13
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
14
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
15
import eu.dnetlib.data.proto.KindProtos.Kind;
16

  
17
/**
18
 * Helper class, to be used as accessor helper over the DNGF structure.
19
 *
20
 * @author claudio
21
 *
22
 */
23
public class DNGFDecoder {
24

  
25
	/**
26
	 * DNGF object
27
	 */
28
	private DNGF dngf;
29

  
30
	/**
31
	 * Cached sub decoder
32
	 */
33
	private DNGFEntityDecoder entityDecoder = null;
34

  
35
	/**
36
	 * Cached sub decoder
37
	 */
38
	private DNGFRelDecoder relDecoder = null;
39

  
40
	protected DNGFDecoder(final byte[] value) {
41
		this(value, null);
42
	}
43

  
44
	protected DNGFDecoder(final byte[] value, final GeneratedExtension... ge) {
45
		try {
46
			final ExtensionRegistry registry = ExtensionRegistry.newInstance();
47
			if (ge != null) {
48
				for(GeneratedExtension e : ge) {
49
					registry.add(e);
50
				}
51
			}
52
			this.dngf = DNGF.parseFrom(value, registry);
53

  
54
		} catch (InvalidProtocolBufferException e) {
55
			throw new RuntimeException("unable to deserialize proto: " + new String(value));
56
		}
57
	}
58

  
59
	private DNGFDecoder(final DNGF dngf) {
60
		this.dngf = dngf;
61
	}
62

  
63
    public static DNGFDecoder decode(final DNGF dngf) {
64
        return new DNGFDecoder(dngf);
65
    }
66

  
67
    public static DNGFDecoder decode(final byte[] b) {
68
        return new DNGFDecoder(b);
69
    }
70

  
71
    public static DNGFDecoder decode(final byte[] b, final GeneratedExtension... ge) {
72
        return new DNGFDecoder(b, ge);
73
    }
74

  
75
    private static String escapeXml(final String value) {
76
        return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
77
    }
78

  
79
	public Kind getKind() {
80
		return dngf.getKind();
81
	}
82

  
83
    // / Entity
84

  
85
	public DNGF getDNGF() {
86
		return dngf;
87
	}
88

  
89
	public GeneratedMessage getMetadata() {
90
		return decodeEntity().getMetadata();
91
	}
92

  
93
	public GeneratedMessage getDNGFEntity() {
94
		return decodeEntity().getEntity();
95
	}
96

  
97
	public String getEntityId() {
98
		return decodeEntity().getId();
99
	}
100

  
101
	public DNGFEntity getEntity() {
102
		return dngf.getEntity();
103
	}
104

  
105
    // / Rel
106

  
107
	public DNGFEntityDecoder decodeEntity() {
108
		if (entityDecoder == null) {
109
			entityDecoder = DNGFEntityDecoder.decode(getEntity());
110
		}
111
		return entityDecoder;
112
	}
113

  
114
	public DNGFRel getDNGFRel() {
115
		return dngf.getRel();
116
	}
117

  
118
	public GeneratedMessage getRel() {
119
		return decodeRel().getRel();
120
	}
121

  
122
	public Qualifier relType() {
123
		return decodeRel().getRelType();
124
	}
125

  
126
	public String relTypeName() {
127
		return relType().toString();
128
	}
129

  
130
	public String relSourceId() {
131
		return decodeRel().getRelSourceId();
132
	}
133

  
134
	public String relTargetId() {
135
		return decodeRel().getRelTargetId();
136
	}
137

  
138
	private DNGFRelDecoder decodeRel() {
139
		if (relDecoder == null) {
140
			relDecoder = DNGFRelDecoder.decode(getDNGFRel());
141
		}
142
		return relDecoder;
143
	}
144

  
145
	public byte[] toByteArray() {
146
		return dngf.toByteArray();
147
	}
148

  
149
	public String asXml() {
150
		StringBuilder sb = new StringBuilder("<oaf>");
151

  
152
		for (Entry<FieldDescriptor, Object> e : dngf.getAllFields().entrySet()) {
153
			asXml(sb, e.getKey(), e.getValue());
154
		}
155
		sb.append("</oaf>");
156
		return sb.toString();
157
	}
158

  
159
	@SuppressWarnings("unchecked")
160
	private void asXml(final StringBuilder sb, final FieldDescriptor fd, final Object value) {
161

  
162
		if (fd.isRepeated() && (value instanceof List<?>)) {
163
			for (Object o : ((List<Object>) value)) {
164
				asXml(sb, fd, o);
165
			}
166
		} else if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
167
			sb.append("<" + fd.getName() + ">");
168
			for (Entry<FieldDescriptor, Object> e : ((Message) value).getAllFields().entrySet()) {
169
				asXml(sb, e.getKey(), e.getValue());
170
			}
171
			sb.append("</" + fd.getName() + ">");
172
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
173
			sb.append("<" + fd.getName() + ">");
174
			sb.append(((EnumValueDescriptor) value).getName());
175
			sb.append("</" + fd.getName() + ">");
176
		} else {
177
			sb.append("<" + fd.getName() + ">");
178
			sb.append(escapeXml(value.toString()));
179
			sb.append("</" + fd.getName() + ">");
180
		}
181
	}
182

  
183
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/model/DNGFRowKeyDecoder.java
1
package eu.dnetlib.data.graph.model;
2

  
3
import eu.dnetlib.data.proto.TypeProtos.Type;
4
import org.apache.commons.lang3.StringUtils;
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7

  
8
public class DNGFRowKeyDecoder {
9

  
10
	public final static String ID_REGEX = "^[0-9][0-9]\\|.{12}::[a-zA-Z0-9]{32}$";
11
	/**
12
	 * logger.
13
	 */
14
	private static final Log log = LogFactory.getLog(DNGFRowKeyDecoder.class); // NOPMD by marko on 11/24/08 5:02 PM
15
	private static final String SEPARATOR = "|";
16
	private String key;
17

  
18
	private Type type = null;
19

  
20
	private String id = null;
21

  
22
	private DNGFRowKeyDecoder(final String key) throws IllegalArgumentException {
23
		this.key = key;
24

  
25
		if (!key.matches(ID_REGEX)) {
26
			String msg = "invalid key: '" + key + "'";
27
			log.error(msg);
28
			throw new IllegalArgumentException(msg);
29
		}
30

  
31
		int tag = Integer.parseInt(StringUtils.substringBefore(key, SEPARATOR));
32
		this.type = Type.valueOf(tag);
33
		this.id = StringUtils.substringAfter(key, SEPARATOR);
34

  
35
		// System.out.println(OafRowTypeDecoder.class.getName() +" decoded key: " + split);
36
	}
37

  
38
	public static DNGFRowKeyDecoder decode(final byte[] key) throws IllegalArgumentException {
39
		return new DNGFRowKeyDecoder(new String(key));
40
	}
41

  
42
	public static DNGFRowKeyDecoder decode(final String key) throws IllegalArgumentException {
43
		return new DNGFRowKeyDecoder(key);
44
	}
45

  
46
	public String getKey() {
47
		return key;
48
	}
49

  
50
	public Type getType() {
51
		return type;
52
	}
53

  
54
	public String getId() {
55
		return id;
56
	}
57
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/model/DNGFRelDecoder.java
1
package eu.dnetlib.data.graph.model;
2

  
3
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
4
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
5
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
6
import eu.dnetlib.data.proto.TypeProtos.Type;
7

  
8
public class DNGFRelDecoder {
9

  
10
	private final DNGFRel dgnfRel;
11

  
12
	private DNGFRelDecoder(final DNGFRel DNGFRel) {
13
		this.dgnfRel = DNGFRel;
14
	}
15

  
16
	public static DNGFRelDecoder decode(final DNGFRel DNGFRel) {
17
		return new DNGFRelDecoder(DNGFRel);
18
	}
19

  
20
	public Qualifier getRelType() {
21
		return dgnfRel.getRelType();
22
	}
23

  
24
	public String getRelClass() {
25
		return dgnfRel.getRelType().getClassid();
26
	}
27

  
28
	public String getRelScheme() {
29
		return dgnfRel.getRelType().getSchemeid();
30
	}
31

  
32
	public DNGFRel.Builder setClassId(final String classId) {
33
		final DNGFRel.Builder relBuilder = DNGFRel.newBuilder(dgnfRel);
34
		relBuilder.getRelTypeBuilder().setClassid(classId);
35
		return relBuilder;
36
	}
37

  
38
	public Type getTargetType() {
39
		return dgnfRel.getTargetType();
40
	}
41

  
42
	public Type getSourceType() {
43
		return dgnfRel.getSourceType();
44
	}
45

  
46
//	protected FieldDescriptor fd(final MessageOrBuilder mb, final int fieldNumber) {
47
//		return mb.getDescriptorForType().findFieldByNumber(fieldNumber);
48
//	}
49
//
50
//	protected FieldDescriptor fd(final MessageOrBuilder mb, final String fieldName) {
51
//		return mb.getDescriptorForType().findFieldByName(fieldName);
52
//	}
53

  
54
	public String getCachedTargedId() {
55

  
56
		if (!dgnfRel.hasCachedTarget()) return null;
57

  
58
		final DNGFEntity entity = dgnfRel.getCachedTarget();
59
		return DNGFEntityDecoder.decode(entity).getId();
60
	}
61

  
62
	public String getRelSourceId() {
63
		return dgnfRel.getSource();
64
	}
65

  
66
	public String getRelTargetId() {
67
		return dgnfRel.getTarget();
68
	}
69

  
70
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/utils/RelDescriptor.java
1
package eu.dnetlib.data.graph.utils;
2

  
3
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
4
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
5

  
6
public class RelDescriptor {
7

  
8
	private final String it;
9

  
10
	private final RelType relType;
11

  
12
	private final SubRelType subRelType;
13

  
14
	private final String relClass;
15

  
16
	public RelDescriptor(final String value) {
17
		super();
18
		this.it = value;
19

  
20
		String[] s = value.split("_");
21

  
22
		this.relType = RelType.valueOf(s[0]);
23
		this.subRelType = SubRelType.valueOf(s[1]);
24
		this.relClass = s[2];
25
	}
26

  
27
	public SubRelType getSubRelType() {
28
		return subRelType;
29
	}
30

  
31
	public RelType getRelType() {
32
		return relType;
33
	}
34

  
35
	public String getRelClass() {
36
		return relClass;
37
	}
38

  
39
	public String getIt() {
40
		return it;
41
	}
42

  
43
	@Override
44
	public String toString() {
45
		return getIt();
46
	}
47

  
48
	@Override
49
	public int hashCode() {
50
		final int prime = 31;
51
		int result = 1;
52
		result = (prime * result) + ((it == null) ? 0 : it.hashCode());
53
		return result;
54
	}
55

  
56
	@Override
57
	public boolean equals(final Object obj) {
58
		if (this == obj) return true;
59
		if (obj == null) return false;
60
		if (getClass() != obj.getClass()) return false;
61
		RelDescriptor other = (RelDescriptor) obj;
62
		if (it == null) {
63
			if (other.it != null) return false;
64
		} else if (!it.equals(other.it)) return false;
65
		return true;
66
	}
67

  
68
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/utils/HBaseTableUtils.java
1
package eu.dnetlib.data.graph.utils;
2

  
3
import java.util.Set;
4

  
5
import com.google.common.collect.Sets;
6

  
7
/**
8
 * Common static utility methods to manage the hbase tables
9
 *
10
 * @author claudio
11
 */
12
public class HBaseTableUtils {
13

  
14
	private static final Set<String> cfs = Sets.newHashSet("metadata", "rel");
15

  
16
	public Set<String> getColumns() {
17
		return cfs;
18
	}
19

  
20
}
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/transform/DNGFUtils.java
11 11
import com.google.protobuf.Descriptors.Descriptor;
12 12
import com.google.protobuf.Descriptors.FieldDescriptor;
13 13

  
14
import eu.dnetlib.data.mapreduce.util.DNGFDecoder;
14
import eu.dnetlib.data.graph.model.DNGFDecoder;
15 15
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
16 16
import eu.dnetlib.data.proto.KindProtos.Kind;
17 17
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/transform/xml/OafToHbaseXsltFunctions.java
6 6
import com.google.common.collect.Iterables;
7 7
import com.google.common.collect.Lists;
8 8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.DNGFRowKeyDecoder;
9
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
10 10
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
11 11
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
12 12
import eu.dnetlib.data.proto.FieldTypeProtos.*;
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/transform/xml/DbmfToHbaseXsltFunctions.java
3 3
import com.google.common.collect.Lists;
4 4
import com.google.protobuf.Descriptors.FieldDescriptor;
5 5
import com.google.protobuf.Message.Builder;
6
import eu.dnetlib.data.mapreduce.util.DNGFRowKeyDecoder;
6
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
7 7
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
8 8
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
9 9
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/transform/xml/OdfToHbaseXsltFunctions.java
5 5

  
6 6
import com.google.common.collect.Lists;
7 7
import com.google.common.collect.Maps;
8
import eu.dnetlib.data.mapreduce.util.DNGFRowKeyDecoder;
8
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
9 9
import eu.dnetlib.data.proto.DNGFProtos;
10 10
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
11 11
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/transform/xml/CommonDNetXsltFunctions.java
5 5

  
6 6
import com.google.common.collect.Lists;
7 7
import com.google.common.collect.Maps;
8
import eu.dnetlib.data.mapreduce.util.DNGFRowKeyDecoder;
8
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder;
9 9
import eu.dnetlib.data.proto.DNGFProtos.DNGF;
10 10
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity;
11 11
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel;
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/transform/OntologyTerm.java
16 16
		return new OntologyTerm();
17 17
	}
18 18

  
19

  
20
	public String get
21

  
19 22
	public String getCode() {
20 23
		return code;
21 24
	}
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/buildMergedToAnchorMapJob.xml
54 54
                <FAMILIES>
55 55
                    <FAMILY value="person"/>
56 56
	                <FAMILY value="personPerson_dedup_isMergedIn" />
57
	                <FAMILY value="rel:dnet:result_result_relations" />
57 58
                </FAMILIES>
58 59
        	</SCAN>
59 60
        </HADOOP_JOB>

Also available in: Unified diff