Revision 51225
Added by Claudio Atzori over 6 years ago
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/model/DNGFUtils.java | ||
---|---|---|
1 |
package eu.dnetlib.data.graph.model; |
|
2 |
|
|
3 |
import com.google.common.base.Function; |
|
4 |
import com.google.common.base.Predicate; |
|
5 |
import com.google.common.collect.Iterables; |
|
6 |
import com.google.common.collect.Lists; |
|
7 |
import com.google.common.collect.Sets; |
|
8 |
import com.google.protobuf.Descriptors; |
|
9 |
import com.google.protobuf.GeneratedMessage; |
|
10 |
import eu.dnetlib.data.proto.*; |
|
11 |
|
|
12 |
import java.util.Arrays; |
|
13 |
import java.util.Set; |
|
14 |
|
|
15 |
/** |
|
16 |
* Created by sandro on 12/13/16. |
|
17 |
*/ |
|
18 |
public class DNGFUtils { |
|
19 |
|
|
20 |
public static Set<String> entities() { |
|
21 |
return Sets.newHashSet(Iterables.transform(Lists.newArrayList(TypeProtos.Type.values()), t -> t.toString())); |
|
22 |
} |
|
23 |
|
|
24 |
public static Predicate<DNGFProtos.DNGF> relationFilter() { |
|
25 |
return dngf -> dngf.getKind().equals(KindProtos.Kind.relation); |
|
26 |
} |
|
27 |
|
|
28 |
public static Predicate<DNGFProtos.DNGF> entityFilter() { |
|
29 |
return dngf -> dngf.getKind().equals(KindProtos.Kind.entity); |
|
30 |
} |
|
31 |
|
|
32 |
public static Function<DNGFDecoder, String> idDecoder() { |
|
33 |
return input -> input.getEntityId(); |
|
34 |
} |
|
35 |
|
|
36 |
public static Predicate<FieldTypeProtos.StructuredProperty> mainTitleFilter() { |
|
37 |
return sp -> (sp.getQualifier() != null) && sp.getQualifier().getClassname().equals("main title"); |
|
38 |
} |
|
39 |
|
|
40 |
public static Set<String> getFieldNames(final Descriptors.Descriptor d, final Integer... tag) { |
|
41 |
return Sets.newHashSet(Iterables.transform(Arrays.asList(tag), i -> { |
|
42 |
final Descriptors.FieldDescriptor fd = d.findFieldByNumber(i); |
|
43 |
if (fd == null) throw new IllegalArgumentException("undefined tag: " + i + " for type: " + d.getFullName()); |
|
44 |
return fd.getName(); |
|
45 |
})); |
|
46 |
} |
|
47 |
|
|
48 |
public static FieldTypeProtos.StringField sf(final String value) { |
|
49 |
return FieldTypeProtos.StringField.newBuilder().setValue(value).build(); |
|
50 |
} |
|
51 |
|
|
52 |
public static FieldTypeProtos.StructuredProperty sp(final String value, final String classid, final String schemeid) { |
|
53 |
return sp(value, classid, schemeid, di()); |
|
54 |
} |
|
55 |
|
|
56 |
public static FieldTypeProtos.StructuredProperty sp(final String value, final String classid, final String schemeid, final FieldTypeProtos.DataInfo di) { |
|
57 |
return FieldTypeProtos.StructuredProperty.newBuilder().setValue(value).setQualifier(simpleQualifier(classid, schemeid)).setDataInfo(di).build(); |
|
58 |
} |
|
59 |
|
|
60 |
public static FieldTypeProtos.KeyValue kv(final String k, final String v) { |
|
61 |
return FieldTypeProtos.KeyValue.newBuilder().setKey(k).setValue(v).build(); |
|
62 |
} |
|
63 |
|
|
64 |
public static FieldTypeProtos.Qualifier simpleQualifier(final String classid, final String schemeid) { |
|
65 |
return q(classid, classid, schemeid, schemeid); |
|
66 |
} |
|
67 |
|
|
68 |
public static FieldTypeProtos.Qualifier q(final String classid, final String classname, final String schemeid, final String schemename) { |
|
69 |
return FieldTypeProtos.Qualifier.newBuilder() |
|
70 |
.setClassid(classid) |
|
71 |
.setClassname(classname) |
|
72 |
.setSchemename(schemeid) |
|
73 |
.setSchemeid(schemename).build(); |
|
74 |
} |
|
75 |
|
|
76 |
public static FieldTypeProtos.DataInfo di(final String trust) { |
|
77 |
return di(false, "", false, simpleQualifier("", ""), trust); |
|
78 |
} |
|
79 |
|
|
80 |
public static FieldTypeProtos.DataInfo di() { |
|
81 |
return di(""); |
|
82 |
} |
|
83 |
|
|
84 |
public static FieldTypeProtos.DataInfo di(Boolean deletedByInference, String inferenceProvenance, Boolean inferred, FieldTypeProtos.Qualifier provenanceAction, String trust) { |
|
85 |
return FieldTypeProtos.DataInfo.newBuilder() |
|
86 |
.setDeletedbyinference(deletedByInference) |
|
87 |
.setInferenceprovenance(inferenceProvenance) |
|
88 |
.setInferred(inferred) |
|
89 |
.setProvenanceaction(provenanceAction) |
|
90 |
.setTrust(trust).build(); |
|
91 |
} |
|
92 |
|
|
93 |
public static FieldTypeProtos.Context context(final String id) { |
|
94 |
return FieldTypeProtos.Context.newBuilder().setId(id).build(); |
|
95 |
} |
|
96 |
|
|
97 |
|
|
98 |
public static DNGFDecoder embed(final GeneratedMessage msg, |
|
99 |
final KindProtos.Kind kind, |
|
100 |
final boolean deletedByInference, |
|
101 |
final boolean inferred, |
|
102 |
final String provenance, |
|
103 |
final String action) { |
|
104 |
|
|
105 |
final DNGFProtos.DNGF.Builder DNGF = DNGFProtos.DNGF |
|
106 |
.newBuilder() |
|
107 |
.setKind(kind) |
|
108 |
.setLastupdatetimestamp(System.currentTimeMillis()) |
|
109 |
.setDataInfo(di(deletedByInference, provenance, inferred, simpleQualifier(action, action), "0.5")); |
|
110 |
switch (kind) { |
|
111 |
case entity: |
|
112 |
DNGF.setEntity((DNGFProtos.DNGFEntity) msg); |
|
113 |
break; |
|
114 |
case relation: |
|
115 |
DNGF.setRel((DNGFProtos.DNGFRel) msg); |
|
116 |
break; |
|
117 |
default: |
|
118 |
break; |
|
119 |
} |
|
120 |
|
|
121 |
return DNGFDecoder.decode(DNGF.build()); |
|
122 |
} |
|
123 |
|
|
124 |
public static DNGFDecoder embed(final GeneratedMessage msg, final KindProtos.Kind kind) { |
|
125 |
return embed(msg, kind, false, false, "inference_provenance", "provenance_action"); |
|
126 |
} |
|
127 |
|
|
128 |
|
|
129 |
} |
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/graph/utils/RelDescriptor.java | ||
---|---|---|
1 |
package eu.dnetlib.data.graph.utils; |
|
2 |
|
|
3 |
import java.util.regex.Matcher; |
|
4 |
import java.util.regex.Pattern; |
|
5 |
|
|
6 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
7 |
import org.apache.commons.lang3.StringUtils; |
|
8 |
|
|
9 |
public class RelDescriptor { |
|
10 |
|
|
11 |
public static final String ONTOLOGY_SEPARATOR = "_"; |
|
12 |
|
|
13 |
public static final String QUALIFIER_SEPARATOR = ":"; |
|
14 |
|
|
15 |
public static final String RELTYPE_REGEX = "(\\w+_\\w+)_(\\w+)"; |
|
16 |
public static final String ID_REGEX = "([0-9][0-9]\\|.{12}::[a-zA-Z0-9]{32})"; |
|
17 |
|
|
18 |
public static final Pattern QUALIFIER_PATTERN = Pattern.compile("^" + RELTYPE_REGEX + "$"); |
|
19 |
public static final Pattern FULL_QUALIFIER_PATTERN = Pattern.compile("^" + RELTYPE_REGEX + QUALIFIER_SEPARATOR + ID_REGEX + "$"); |
|
20 |
|
|
21 |
private String code; |
|
22 |
|
|
23 |
private String ontologyCode; |
|
24 |
|
|
25 |
private String termCode; |
|
26 |
|
|
27 |
private String targetId = null; |
|
28 |
|
|
29 |
public RelDescriptor(final String ontologyCode, final String termCode) { |
|
30 |
this.ontologyCode = ontologyCode; |
|
31 |
this.termCode = termCode; |
|
32 |
this.code = ontologyCode + ONTOLOGY_SEPARATOR + termCode; |
|
33 |
} |
|
34 |
|
|
35 |
public RelDescriptor(final String ontologyCode, final String termCode, final String targetId) { |
|
36 |
this(ontologyCode, termCode); |
|
37 |
this.targetId = targetId; |
|
38 |
} |
|
39 |
|
|
40 |
public RelDescriptor(final String value) { |
|
41 |
if (StringUtils.isBlank(value)) { |
|
42 |
throw new IllegalArgumentException("empty value is not permitted"); |
|
43 |
} |
|
44 |
|
|
45 |
final Matcher matcher = QUALIFIER_PATTERN.matcher(value); |
|
46 |
if (matcher.matches()) { |
|
47 |
this.code = value; |
|
48 |
this.ontologyCode = matcher.group(1); |
|
49 |
this.termCode = matcher.group(2); |
|
50 |
} else { |
|
51 |
final Matcher fullMatcher = FULL_QUALIFIER_PATTERN.matcher(value); |
|
52 |
if (fullMatcher.matches()) { |
|
53 |
this.code = fullMatcher.group(1) + ONTOLOGY_SEPARATOR + fullMatcher.group(2); |
|
54 |
this.ontologyCode = fullMatcher.group(1); |
|
55 |
this.termCode = fullMatcher.group(2); |
|
56 |
this.targetId = fullMatcher.group(3); |
|
57 |
} else { |
|
58 |
throw new IllegalArgumentException(String.format("invalid qualifier format: '%s'", value)); |
|
59 |
} |
|
60 |
} |
|
61 |
} |
|
62 |
|
|
63 |
public String shortQualifier() { |
|
64 |
return getOntologyCode() + ONTOLOGY_SEPARATOR + getTermCode(); |
|
65 |
} |
|
66 |
|
|
67 |
public String qualifier() { |
|
68 |
return qualifier(getTargetId()); |
|
69 |
} |
|
70 |
|
|
71 |
public String qualifier(final String targetId) { |
|
72 |
if (StringUtils.isBlank(targetId)) { |
|
73 |
throw new IllegalStateException("incomplete qualifier"); |
|
74 |
} |
|
75 |
|
|
76 |
return shortQualifier() + QUALIFIER_SEPARATOR + targetId; |
|
77 |
} |
|
78 |
|
|
79 |
public static String asString(final Qualifier qualifier) { |
|
80 |
if (qualifier == null) { |
|
81 |
throw new IllegalArgumentException("invalid qualifier"); |
|
82 |
} |
|
83 |
return qualifier.getSchemeid() + ONTOLOGY_SEPARATOR + qualifier.getClassid(); |
|
84 |
} |
|
85 |
|
|
86 |
public Qualifier asQualifier() { |
|
87 |
return Qualifier.newBuilder() |
|
88 |
.setClassid(getTermCode()).setClassname(getTermCode()) |
|
89 |
.setSchemeid(getOntologyCode()).setSchemename(getOntologyCode()).build(); |
|
90 |
} |
|
91 |
|
|
92 |
public String getCode() { |
|
93 |
return code; |
|
94 |
} |
|
95 |
|
|
96 |
public String getOntologyCode() { |
|
97 |
return ontologyCode; |
|
98 |
} |
|
99 |
|
|
100 |
public String getTermCode() { |
|
101 |
return termCode; |
|
102 |
} |
|
103 |
|
|
104 |
@Override |
|
105 |
public String toString() { |
|
106 |
return getCode(); |
|
107 |
} |
|
108 |
|
|
109 |
@Override |
|
110 |
public int hashCode() { |
|
111 |
final int prime = 31; |
|
112 |
int result = 1; |
|
113 |
result = (prime * result) + ((code == null) ? 0 : code.hashCode()); |
|
114 |
return result; |
|
115 |
} |
|
116 |
|
|
117 |
@Override |
|
118 |
public boolean equals(final Object obj) { |
|
119 |
if (this == obj) return true; |
|
120 |
if (obj == null) return false; |
|
121 |
if (getClass() != obj.getClass()) return false; |
|
122 |
RelDescriptor other = (RelDescriptor) obj; |
|
123 |
if (code == null) { |
|
124 |
if (other.code != null) return false; |
|
125 |
} else if (!code.equals(other.code)) return false; |
|
126 |
return true; |
|
127 |
} |
|
128 |
|
|
129 |
public String getTargetId() { |
|
130 |
return targetId; |
|
131 |
} |
|
132 |
|
|
133 |
public void setTargetId(final String targetId) { |
|
134 |
this.targetId = targetId; |
|
135 |
} |
|
136 |
} |
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorMapper.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model.gt; |
|
2 |
|
|
3 |
import com.google.common.base.Function; |
|
4 |
import com.google.common.collect.Iterables; |
|
5 |
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo; |
|
6 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
7 |
import eu.dnetlib.data.proto.FieldTypeProtos.StringField; |
|
8 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
9 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
10 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
|
11 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
12 |
import eu.dnetlib.data.proto.PersonProtos.Person.CoAuthor; |
|
13 |
import eu.dnetlib.data.proto.PersonProtos.Person.MergedPerson; |
|
14 |
import eu.dnetlib.data.proto.PersonProtos.Person.Metadata; |
|
15 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
16 |
import org.apache.commons.lang3.StringUtils; |
|
17 |
|
|
18 |
public class GTAuthorMapper { |
|
19 |
|
|
20 |
public DNGF map(final GTAuthor gta) { |
|
21 |
|
|
22 |
final DNGF.Builder oaf = DNGF.newBuilder(); |
|
23 |
|
|
24 |
oaf.setDataInfo(getDataInfo()); |
|
25 |
oaf.setLastupdatetimestamp(System.currentTimeMillis()); |
|
26 |
oaf.setKind(Kind.entity); |
|
27 |
oaf.setEntity(getDNGFEntity(gta)); |
|
28 |
|
|
29 |
return oaf.build(); |
|
30 |
} |
|
31 |
|
|
32 |
private DNGFEntity getDNGFEntity(final GTAuthor gta) { |
|
33 |
final DNGFEntity.Builder entity = DNGFEntity.newBuilder(); |
|
34 |
entity.setType(Type.person); |
|
35 |
entity.setId(gta.getId()); |
|
36 |
entity.setPerson(getPerson(gta)); |
|
37 |
return entity.build(); |
|
38 |
} |
|
39 |
|
|
40 |
private Person getPerson(final GTAuthor gta) { |
|
41 |
final Person.Builder person = Person.newBuilder(); |
|
42 |
|
|
43 |
if (gta.getAuthor() != null) { |
|
44 |
final Person.Metadata.Builder m = Person.Metadata.newBuilder(); |
|
45 |
|
|
46 |
if (StringUtils.isNotBlank(gta.getAuthor().getFullname())) { |
|
47 |
m.setFullname(sf(gta.getAuthor().getFullname())); |
|
48 |
} |
|
49 |
if (StringUtils.isNotBlank(gta.getAuthor().getFirstname())) { |
|
50 |
m.setFirstname(sf(gta.getAuthor().getFirstname())); |
|
51 |
} |
|
52 |
if (StringUtils.isNotBlank(gta.getAuthor().getSecondnames())) { |
|
53 |
m.addSecondnames(sf(gta.getAuthor().getSecondnames())); |
|
54 |
} |
|
55 |
|
|
56 |
person.setMetadata(m.build()); |
|
57 |
} |
|
58 |
|
|
59 |
person.setAnchor(true); |
|
60 |
person.addAllMergedperson(Iterables.transform(gta.getMerged(), new Function<Author, MergedPerson>() { |
|
61 |
|
|
62 |
@Override |
|
63 |
public MergedPerson apply(final Author a) { |
|
64 |
final MergedPerson.Builder mp = MergedPerson.newBuilder(); |
|
65 |
mp.setId(a.getId()); |
|
66 |
mp.setMetadata(getMetadata(a)); |
|
67 |
return mp.build(); |
|
68 |
} |
|
69 |
|
|
70 |
})); |
|
71 |
|
|
72 |
person.addAllCoauthor(Iterables.transform(gta.getCoAuthors(), new Function<eu.dnetlib.pace.model.gt.CoAuthor, CoAuthor>() { |
|
73 |
|
|
74 |
@Override |
|
75 |
public CoAuthor apply(final eu.dnetlib.pace.model.gt.CoAuthor ca) { |
|
76 |
final CoAuthor.Builder coAuthor = CoAuthor.newBuilder(); |
|
77 |
coAuthor.setId(ca.getId()); |
|
78 |
if (StringUtils.isNotBlank(ca.getAnchorId())) { |
|
79 |
coAuthor.setAnchorId(ca.getAnchorId()); |
|
80 |
} |
|
81 |
coAuthor.setMetadata(getMetadata(ca)); |
|
82 |
return coAuthor.build(); |
|
83 |
} |
|
84 |
})); |
|
85 |
|
|
86 |
return person.build(); |
|
87 |
} |
|
88 |
|
|
89 |
private Metadata getMetadata(final Author a) { |
|
90 |
final Metadata.Builder m = Metadata.newBuilder(); |
|
91 |
m.setFullname(sf(a.getFullname())); |
|
92 |
if (a.isWellFormed()) { |
|
93 |
m.setFirstname(sf(a.getFirstname())); |
|
94 |
m.addSecondnames(sf(a.getSecondnames())); |
|
95 |
} |
|
96 |
return m.build(); |
|
97 |
} |
|
98 |
|
|
99 |
private DataInfo getDataInfo() { |
|
100 |
final DataInfo.Builder d = DataInfo.newBuilder(); |
|
101 |
d.setDeletedbyinference(false); |
|
102 |
d.setInferred(true); |
|
103 |
d.setTrust("0.5"); |
|
104 |
d.setInferenceprovenance("dedup-person-groundtruth"); |
|
105 |
d.setProvenanceaction(Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("")); |
|
106 |
return d.build(); |
|
107 |
} |
|
108 |
|
|
109 |
private StringField sf(final String s) { |
|
110 |
final StringField.Builder sf = StringField.newBuilder(); |
|
111 |
sf.setValue(s); |
|
112 |
return sf.build(); |
|
113 |
} |
|
114 |
|
|
115 |
} |
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/elasticsearchFeedWDSDataJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="4bf4bdc4-3668-4dc5-977c-a98862ad7a24_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="elasticsearchFeedWDSDataJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that builds WDS records and index them on ES</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.wds.IndexWDSMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.io.Text"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="es.nodes" |
|
26 |
value="ip-90-147-167-137.ct1.garrservices.it:9200, ip-90-147-167-126.ct1.garrservices.it:9200, ip-90-147-167-13.ct1.garrservices.it:9200, ip-90-147-167-125.ct1.garrservices.it:9200"/> |
|
27 |
<PROPERTY key="es.nodes.resolve.hostname" value="false"/> |
|
28 |
<PROPERTY key="es.nodes.wan.only" value="true"/> |
|
29 |
|
|
30 |
<PROPERTY key="es.resource" value="wds/item"/> |
|
31 |
<PROPERTY key="es.mapping.id" value="id"/> |
|
32 |
<PROPERTY key="es.input.json" value="yes"/> |
|
33 |
|
|
34 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
35 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
36 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
37 |
|
|
38 |
</STATIC_CONFIGURATION> |
|
39 |
<JOB_INTERFACE> |
|
40 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
41 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
42 |
</JOB_INTERFACE> |
|
43 |
<SCAN> |
|
44 |
<FILTERS/> |
|
45 |
<FAMILIES/> |
|
46 |
</SCAN> |
|
47 |
</HADOOP_JOB> |
|
48 |
<STATUS> |
|
49 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
50 |
<RUNNING_INSTANCES value="0"/> |
|
51 |
<CUMULATIVE_RUN value="0"/> |
|
52 |
</STATUS> |
|
53 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
54 |
</BODY> |
|
55 |
</RESOURCE_PROFILE> |
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupBuildRootsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="895ce6a9-4131-4954-b9ed-949ff78f5448_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
4 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<HADOOP_JOB name="dedupBuildRootsJob" type="mapreduce"> |
|
11 |
<DESCRIPTION>map reduce job that build the roots and redirects the rels</DESCRIPTION> |
|
12 |
<STATIC_CONFIGURATION> |
|
13 |
|
|
14 |
<!-- I/O FORMAT --> |
|
15 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat" /> |
|
16 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat" /> |
|
17 |
|
|
18 |
<!-- MAPPER --> |
|
19 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsMapper" /> |
|
20 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text" /> |
|
21 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
22 |
|
|
23 |
<!-- REDUCER --> |
|
24 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupBuildRootsReducer" /> |
|
25 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable" /> |
|
26 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Writable" /> |
|
27 |
|
|
28 |
<!-- MISC --> |
|
29 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false" /> |
|
30 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false" /> |
|
31 |
<PROPERTY key="mapreduce.map.speculative" value="false" /> |
|
32 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
33 |
<PROPERTY key="mapred.task.timeout" value="2400000"/> |
|
34 |
|
|
35 |
<PROPERTY key="mapred.reduce.tasks" value="10" /> |
|
36 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
37 |
|
|
38 |
<!-- Uncomment to override the default lib path --> |
|
39 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
40 |
</STATIC_CONFIGURATION> |
|
41 |
<JOB_INTERFACE> |
|
42 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table" /> |
|
43 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table" /> |
|
44 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table" /> |
|
45 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table" /> |
|
46 |
</JOB_INTERFACE> |
|
47 |
<SCAN caching="10"> |
|
48 |
<FILTERS operator="MUST_PASS_ALL"> |
|
49 |
<FILTER type="prefix" param="entityTypeId" /> |
|
50 |
</FILTERS> |
|
51 |
<FAMILIES/> |
|
52 |
</SCAN> |
|
53 |
</HADOOP_JOB> |
|
54 |
<STATUS> |
|
55 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
56 |
<RUNNING_INSTANCES value="0"/> |
|
57 |
<CUMULATIVE_RUN value="0" /> |
|
58 |
</STATUS> |
|
59 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
60 |
</BODY> |
|
61 |
</RESOURCE_PROFILE> |
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/elasticsearchFeedDliScholixDataJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="6e96b76e-8398-4afe-a74b-56cea0b1dc68_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="elasticsearchFeedDliScholixDataJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that joins the entities on the hbase table and produces scholix records that are indexed on ES</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.elasticsearch.hadoop.mr.EsOutputFormat" /> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dli.PrepareScholixDataMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="eu.dnetlib.data.mapreduce.hbase.dli.kv.DliKey"/> |
|
22 |
<PROPERTY key="mapreduce.mapoutput.key.class" value="eu.dnetlib.data.mapreduce.hbase.dli.kv.DliKey"/> |
|
23 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
24 |
|
|
25 |
<!-- PARTITIONER --> |
|
26 |
<PROPERTY key="mapred.partitioner.class" value="eu.dnetlib.data.mapreduce.hbase.dli.kv.NaturalKeyPartitioner"/> |
|
27 |
<PROPERTY key="mapreduce.partitioner.class" value="eu.dnetlib.data.mapreduce.hbase.dli.kv.NaturalKeyPartitioner"/> |
|
28 |
<PROPERTY key="mapred.output.value.groupfn.class" value="eu.dnetlib.data.mapreduce.hbase.dli.kv.NaturalKeyGroupingComparator"/> |
|
29 |
<PROPERTY key="mapreduce.output.value.groupfn.class" value="eu.dnetlib.data.mapreduce.hbase.dli.kv.NaturalKeyGroupingComparator"/> |
|
30 |
<!--<PROPERTY key="mapred.output.key.comparator.class" value="eu.dnetlib.data.mapreduce.hbase.dli.kv.CompositeKeyComparator"/>--> |
|
31 |
|
|
32 |
<!-- REDUCER --> |
|
33 |
<PROPERTY key="mapreduce.reduce.class" |
|
34 |
value="eu.dnetlib.data.mapreduce.hbase.dli.PrepareScholixDataReducer"/> |
|
35 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
36 |
<PROPERTY key="mapred.output.value.class" value="org.apache.hadoop.io.Text"/> |
|
37 |
|
|
38 |
<!-- MISC --> |
|
39 |
<PROPERTY key="es.nodes" |
|
40 |
value="90.147.167.27:9200,90.147.167.25:9200,90.147.167.28:9200,90.147.167.26:9200"/> |
|
41 |
<PROPERTY key="es.nodes.resolve.hostname" value="false"/> |
|
42 |
<PROPERTY key="es.nodes.wan.only" value="true"/> |
|
43 |
<PROPERTY key="es.resource" value="dli/scholix"/> |
|
44 |
<PROPERTY key="es.input.json" value="yes"/> |
|
45 |
<PROPERTY key="mapred.reduce.tasks" value="10"/> |
|
46 |
|
|
47 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
48 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
49 |
|
|
50 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
51 |
|
|
52 |
<!-- Uncomment to override the default lib path --> |
|
53 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
54 |
</STATIC_CONFIGURATION> |
|
55 |
<JOB_INTERFACE> |
|
56 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
57 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
58 |
</JOB_INTERFACE> |
|
59 |
<SCAN> |
|
60 |
<FILTERS/> |
|
61 |
<FAMILIES/> |
|
62 |
</SCAN> |
|
63 |
</HADOOP_JOB> |
|
64 |
<STATUS> |
|
65 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
66 |
<RUNNING_INSTANCES value="0"/> |
|
67 |
<CUMULATIVE_RUN value="0"/> |
|
68 |
</STATUS> |
|
69 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
70 |
</BODY> |
|
71 |
</RESOURCE_PROFILE> |
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/actionmanager/actions/AtomicActionDeserialiser.java | ||
---|---|---|
1 |
package eu.dnetlib.data.actionmanager.actions; |
|
2 |
|
|
3 |
import java.lang.reflect.Type; |
|
4 |
|
|
5 |
import com.google.gson.*; |
|
6 |
import com.googlecode.protobuf.format.JsonFormat; |
|
7 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
8 |
import eu.dnetlib.rmi.data.hadoop.actionmanager.Agent; |
|
9 |
import eu.dnetlib.rmi.data.hadoop.actionmanager.actions.AbstractActionSerializer; |
|
10 |
import eu.dnetlib.rmi.data.hadoop.actionmanager.actions.AtomicAction; |
|
11 |
import org.apache.commons.codec.binary.Base64; |
|
12 |
import org.apache.commons.lang3.StringUtils; |
|
13 |
import org.apache.commons.logging.Log; |
|
14 |
import org.apache.commons.logging.LogFactory; |
|
15 |
|
|
16 |
/** |
|
17 |
* Created by claudio on 30/09/15. |
|
18 |
*/ |
|
19 |
public class AtomicActionDeserialiser extends AbstractActionSerializer implements JsonDeserializer<AtomicAction> { |
|
20 |
|
|
21 |
private static final Log log = LogFactory.getLog(AtomicActionDeserialiser.class); |
|
22 |
|
|
23 |
public static AtomicAction fromJSON(String s) { |
|
24 |
final GsonBuilder gson = new GsonBuilder(); |
|
25 |
|
|
26 |
gson.registerTypeAdapter(AtomicAction.class, new AtomicActionDeserialiser()); |
|
27 |
|
|
28 |
return gson.create().fromJson(s, AtomicAction.class); |
|
29 |
} |
|
30 |
|
|
31 |
@Override |
|
32 |
public AtomicAction deserialize(final JsonElement json, final Type typeOfT, final JsonDeserializationContext context) throws JsonParseException { |
|
33 |
|
|
34 |
if (json.isJsonObject()) { |
|
35 |
final JsonObject j = (JsonObject) json; |
|
36 |
|
|
37 |
final Agent a = new Agent(); |
|
38 |
|
|
39 |
final JsonObject aj = j.get(agent).getAsJsonObject(); |
|
40 |
a.setId(aj.get(agent_id).getAsString()); |
|
41 |
a.setName(aj.get(agent_name).getAsString()); |
|
42 |
a.setType(Agent.AGENT_TYPE.valueOf(aj.get(agent_type).getAsString())); |
|
43 |
|
|
44 |
AtomicAction aa = new AtomicAction(j.get(rawSet).getAsString(), a); |
|
45 |
|
|
46 |
aa.setTargetColumn(j.get(targetColumn).getAsString()); |
|
47 |
aa.setTargetColumnFamily(j.get(targetColumnFamily).getAsString()); |
|
48 |
aa.setTargetRowKey(j.get(targetRowKey).getAsString()); |
|
49 |
|
|
50 |
aa.setRowKey(j.get(rowKey).getAsString()); |
|
51 |
aa.setTargetValue(decodeTargetValue(j.get(targetValue).getAsString())); |
|
52 |
|
|
53 |
return aa; |
|
54 |
} |
|
55 |
|
|
56 |
throw new JsonParseException("input is not a json object"); |
|
57 |
} |
|
58 |
|
|
59 |
private byte[] decodeTargetValue(final String s) { |
|
60 |
if (StringUtils.isBlank(s)) { |
|
61 |
return null; |
|
62 |
} |
|
63 |
try { |
|
64 |
final String json = new String(Base64.decodeBase64(s.getBytes())); |
|
65 |
|
|
66 |
DNGF.Builder oaf = DNGF.newBuilder(); |
|
67 |
JsonFormat.merge(json, oaf); |
|
68 |
|
|
69 |
return oaf.build().toByteArray(); |
|
70 |
} catch (JsonFormat.ParseException e) { |
|
71 |
log.error("unable to parse proto", e); |
|
72 |
return null; |
|
73 |
} |
|
74 |
} |
|
75 |
|
|
76 |
} |
|
77 |
|
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupFixRelationsJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="5b99d940-7477-40db-a458-549d301419c5_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupFixRelationsJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that redirects the rels to fix the graph consistency</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION><!-- I/O FORMAT --> |
|
14 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
15 |
<PROPERTY key="mapreduce.outputformat.class" |
|
16 |
value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
17 |
<!-- MAPPER --> |
|
18 |
<PROPERTY key="mapreduce.map.class" |
|
19 |
value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFixRelationMapper"/> |
|
20 |
<PROPERTY key="mapreduce.mapoutput.key.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKey"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKey"/> |
|
22 |
<PROPERTY key="mapreduce.mapoutput.value.class" |
|
23 |
value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
24 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
25 |
<!-- PARTITIONER / SECONDARY SORTING --> |
|
26 |
<PROPERTY key="mapreduce.partitioner.class" |
|
27 |
value="eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKeyPartitioner"/> |
|
28 |
<PROPERTY key="mapred.partitioner.class" |
|
29 |
value="eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKeyPartitioner"/> |
|
30 |
|
|
31 |
<PROPERTY key="mapred.output.value.groupfn.class" |
|
32 |
value="eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKeyGroupingComparator"/> |
|
33 |
<PROPERTY key="mapreduce.output.value.groupfn.class" |
|
34 |
value="eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKeyGroupingComparator"/> |
|
35 |
<!-- REDUCER --> |
|
36 |
<PROPERTY key="mapreduce.reduce.class" |
|
37 |
value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupFixRelationReducer"/> |
|
38 |
<PROPERTY key="mapreduce.output.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
39 |
<PROPERTY key="mapreduce.output.value.class" value="org.apache.hadoop.io.Writable"/> |
|
40 |
<!-- MISC --> |
|
41 |
<PROPERTY key="mapreduce.reduce.tasks.speculative.execution" value="false"/> |
|
42 |
<PROPERTY key="mapreduce.map.tasks.speculative.execution" value="false"/> |
|
43 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
44 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
45 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
46 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
47 |
<PROPERTY key="mapred.map.speculative" value="false"/> |
|
48 |
<PROPERTY key="mapred.reduce.speculative" value="false"/> |
|
49 |
<PROPERTY key="mapred.reduce.tasks" value="100"/> |
|
50 |
|
|
51 |
</STATIC_CONFIGURATION> |
|
52 |
<JOB_INTERFACE> |
|
53 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
54 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/> |
|
55 |
<PARAM name="ontologies" required="true" |
|
56 |
description="ontology values fetched from the information system"/> |
|
57 |
</JOB_INTERFACE> |
|
58 |
<SCAN caching="10"> |
|
59 |
<FILTERS operator="MUST_PASS_ALL"> |
|
60 |
<FILTER type="prefix" param="entityTypeId"/> |
|
61 |
</FILTERS> |
|
62 |
<FAMILIES/> |
|
63 |
</SCAN> |
|
64 |
</HADOOP_JOB> |
|
65 |
<STATUS> |
|
66 |
<LAST_SUBMISSION_DATE value="2017-03-16T10:45:48+01:00"/> |
|
67 |
<RUNNING_INSTANCES value="0"/> |
|
68 |
<CUMULATIVE_RUN value="2"/> |
|
69 |
</STATUS> |
|
70 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
71 |
</BODY> |
|
72 |
</RESOURCE_PROFILE> |
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java | ||
---|---|---|
1 |
package eu.dnetlib.data.index; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import com.google.common.base.Function; |
|
6 |
|
|
7 |
|
|
8 |
import org.apache.commons.logging.Log; |
|
9 |
import org.apache.commons.logging.LogFactory; |
|
10 |
import org.apache.solr.client.solrj.SolrQuery; |
|
11 |
import org.apache.solr.client.solrj.impl.CloudSolrServer; |
|
12 |
import org.apache.solr.client.solrj.response.QueryResponse; |
|
13 |
import org.apache.solr.client.solrj.response.UpdateResponse; |
|
14 |
import org.apache.solr.common.SolrInputDocument; |
|
15 |
|
|
16 |
/** |
|
17 |
* Created by michele on 11/11/15. |
|
18 |
*/ |
|
19 |
public class CloudIndexClient { |
|
20 |
|
|
21 |
private static final Log log = LogFactory.getLog(CloudIndexClient.class); |
|
22 |
private static final String INDEX_RECORD_RESULT_FIELD = "dnetResult"; |
|
23 |
|
|
24 |
private final CloudSolrServer solrServer; |
|
25 |
|
|
26 |
protected CloudIndexClient(final CloudSolrServer solrServer) { |
|
27 |
this.solrServer = solrServer; |
|
28 |
} |
|
29 |
|
|
30 |
public int feed(final String record, final String indexDsId, final Function<String, String> toIndexRecord) throws CloudIndexClientException { |
|
31 |
try { |
|
32 |
final SolrInputDocument doc = prepareSolrDocument(record, indexDsId, toIndexRecord); |
|
33 |
if ((doc == null) || doc.isEmpty()) throw new CloudIndexClientException("Invalid solr document"); |
|
34 |
return feed(doc); |
|
35 |
} catch (final Throwable e) { |
|
36 |
throw new CloudIndexClientException("Error feeding document", e); |
|
37 |
} |
|
38 |
} |
|
39 |
|
|
40 |
public int feed(final SolrInputDocument document) throws CloudIndexClientException { |
|
41 |
try { |
|
42 |
final UpdateResponse res = solrServer.add(document); |
|
43 |
//log.debug("feed time for single records, elapsed time: " + HumanTime.exactly(res.getElapsedTime())); |
|
44 |
if (res.getStatus() != 0) { |
|
45 |
throw new CloudIndexClientException("bad status: " + res.getStatus()); |
|
46 |
} |
|
47 |
solrServer.commit(); |
|
48 |
return res.getStatus(); |
|
49 |
} catch (final Throwable e) { |
|
50 |
throw new CloudIndexClientException("Error feeding document", e); |
|
51 |
} |
|
52 |
} |
|
53 |
|
|
54 |
public void feed(final List<SolrInputDocument> docs, final AfterFeedingCallback callback) throws CloudIndexClientException { |
|
55 |
try { |
|
56 |
final UpdateResponse res = solrServer.add(docs); |
|
57 |
//log.debug("feed time for " + docs.size() + " records, elapsed tipe: : " + HumanTime.exactly(res.getElapsedTime())); |
|
58 |
solrServer.commit(); |
|
59 |
if (callback != null) { |
|
60 |
callback.doAfterFeeding(res); |
|
61 |
} |
|
62 |
if (res.getStatus() != 0) throw new CloudIndexClientException("bad status: " + res.getStatus()); |
|
63 |
} catch (final Throwable e) { |
|
64 |
throw new CloudIndexClientException("Error feeding documents", e); |
|
65 |
} |
|
66 |
} |
|
67 |
|
|
68 |
public SolrInputDocument prepareSolrDocument(final String record, final String indexDsId, final Function<String, String> toIndexRecord) |
|
69 |
throws CloudIndexClientException { |
|
70 |
try { |
|
71 |
/* |
|
72 |
final StreamingInputDocumentFactory documentFactory = new StreamingInputDocumentFactory(); |
|
73 |
|
|
74 |
final String version = (new SimpleDateFormat("yyyy-MM-dd\'T\'hh:mm:ss\'Z\'")).format(new Date()); |
|
75 |
final String indexRecord = toIndexRecord.apply(record); |
|
76 |
|
|
77 |
if (log.isDebugEnabled()) { |
|
78 |
log.debug("***************************************\nSubmitting index record:\n" + indexRecord + "\n***************************************\n"); |
|
79 |
} |
|
80 |
|
|
81 |
return documentFactory.parseDocument(version, indexRecord, indexDsId, INDEX_RECORD_RESULT_FIELD);*/ |
|
82 |
return null; |
|
83 |
} catch (final Throwable e) { |
|
84 |
throw new CloudIndexClientException("Error creating solr document", e); |
|
85 |
} |
|
86 |
} |
|
87 |
|
|
88 |
public boolean isRecordIndexed(final String id) throws CloudIndexClientException { |
|
89 |
try { |
|
90 |
final SolrQuery query = new SolrQuery(); |
|
91 |
query.setQuery("objidentifier:\"" + id + "\""); |
|
92 |
final QueryResponse res = solrServer.query(query); |
|
93 |
return res.getResults().size() > 0; |
|
94 |
} catch (final Throwable e) { |
|
95 |
throw new CloudIndexClientException("Error searching documents", e); |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
public void close() { |
|
100 |
if (solrServer != null) { |
|
101 |
solrServer.shutdown(); |
|
102 |
} |
|
103 |
} |
|
104 |
|
|
105 |
public interface AfterFeedingCallback { |
|
106 |
|
|
107 |
void doAfterFeeding(final UpdateResponse response); |
|
108 |
} |
|
109 |
} |
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMarkDeletedEntityJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="667fe203-ee51-4dff-8c9c-b90e66e96eb4_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMarkDeletedEntityJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map only job that closes the similarity mesh</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.hbase.mapreduce.TableOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.DedupMarkDeletedEntityMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.hbase.io.ImmutableBytesWritable"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="org.apache.hadoop.hbase.client.Put"/> |
|
23 |
|
|
24 |
<!-- MISC --> |
|
25 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
26 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
27 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
28 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
29 |
|
|
30 |
<PROPERTY key="mapred.reduce.tasks" value="0"/> |
|
31 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
32 |
|
|
33 |
<!-- Uncomment to override the default lib path --> |
|
34 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
35 |
</STATIC_CONFIGURATION> |
|
36 |
<JOB_INTERFACE> |
|
37 |
<PARAM name="hbase.mapred.inputtable" required="true" description="source hbase table"/> |
|
38 |
<PARAM name="hbase.mapred.outputtable" required="true" description="target hbase table"/> |
|
39 |
<PARAM name="hbase.mapreduce.inputtable" required="true" description="source hbase table"/> |
|
40 |
<PARAM name="hbase.mapreduce.outputtable" required="true" description="target hbase table"/> |
|
41 |
</JOB_INTERFACE> |
|
42 |
<SCAN> |
|
43 |
<FILTERS operator="MUST_PASS_ALL"> |
|
44 |
<FILTER type="prefix" param="entityTypeId"/> |
|
45 |
</FILTERS> |
|
46 |
<FAMILIES> |
|
47 |
<FAMILY value="metadata"/> |
|
48 |
<FAMILY value="rels"/> |
|
49 |
</FAMILIES> |
|
50 |
</SCAN> |
|
51 |
</HADOOP_JOB> |
|
52 |
<STATUS> |
|
53 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
54 |
<RUNNING_INSTANCES value="0"/> |
|
55 |
<CUMULATIVE_RUN value="0"/> |
|
56 |
</STATUS> |
|
57 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
58 |
</BODY> |
|
59 |
</RESOURCE_PROFILE> |
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/index/CloudIndexClientException.java | ||
---|---|---|
1 |
package eu.dnetlib.data.index; |
|
2 |
|
|
3 |
/** |
|
4 |
* Created by michele on 23/11/15. |
|
5 |
*/ |
|
6 |
public class CloudIndexClientException extends Exception { |
|
7 |
|
|
8 |
public CloudIndexClientException(final String message) { |
|
9 |
super(message); |
|
10 |
} |
|
11 |
|
|
12 |
public CloudIndexClientException(final String message, final Throwable cause) { |
|
13 |
super(message, cause); |
|
14 |
} |
|
15 |
} |
modules/dnet-graph-domain/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/HadoopJobConfigurationDSResources/HadoopJobConfigurationDSResourceType/dedupMinDistGraphJob.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER |
|
4 |
value="de888da6-2d10-4d42-a624-a44d4083414a_SGFkb29wSm9iQ29uZmlndXJhdGlvbkRTUmVzb3VyY2VzL0hhZG9vcEpvYkNvbmZpZ3VyYXRpb25EU1Jlc291cmNlVHlwZQ=="/> |
|
5 |
<RESOURCE_TYPE value="HadoopJobConfigurationDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="HadoopJobConfigurationDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2001-12-31T12:00:00"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<HADOOP_JOB name="dedupMinDistGraphJob" type="mapreduce"> |
|
12 |
<DESCRIPTION>map reduce job that finds the minimum vertex in each connected component in the input graph (as adjacency lists)</DESCRIPTION> |
|
13 |
<STATIC_CONFIGURATION> |
|
14 |
|
|
15 |
<!-- I/O FORMAT --> |
|
16 |
<PROPERTY key="mapreduce.inputformat.class" value="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"/> |
|
17 |
<PROPERTY key="mapreduce.outputformat.class" value="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"/> |
|
18 |
|
|
19 |
<!-- MAPPER --> |
|
20 |
<PROPERTY key="mapreduce.map.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchMapper"/> |
|
21 |
<PROPERTY key="mapred.mapoutput.key.class" value="org.apache.hadoop.io.Text"/> |
|
22 |
<PROPERTY key="mapred.mapoutput.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
23 |
|
|
24 |
<!-- REDUCER --> |
|
25 |
<PROPERTY key="mapreduce.reduce.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.MindistSearchReducer"/> |
|
26 |
<PROPERTY key="mapred.output.key.class" value="org.apache.hadoop.io.Text"/> |
|
27 |
<PROPERTY key="mapred.output.value.class" value="eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable"/> |
|
28 |
|
|
29 |
<!-- MISC --> |
|
30 |
<PROPERTY key="mapred.compress.map.output" value="false"/> |
|
31 |
<PROPERTY key="mapred.reduce.tasks.speculative.execution" value="false"/> |
|
32 |
<PROPERTY key="mapred.map.tasks.speculative.execution" value="false"/> |
|
33 |
<PROPERTY key="mapreduce.map.speculative" value="false"/> |
|
34 |
<PROPERTY key="mapreduce.reduce.speculative" value="false"/> |
|
35 |
|
|
36 |
<PROPERTY key="mapred.reduce.tasks" value="100"/> |
|
37 |
<!-- <PROPERTY key="user.name" value="dnet" /> --> |
|
38 |
|
|
39 |
<!-- Uncomment to override the default lib path --> |
|
40 |
<!-- <PROPERTY key="job.lib" value="/user/dnet/dnet-mapreduce-jobs-0.0.2-SNAPSHOT-jar-with-dependencies.jar"/> --> |
|
41 |
</STATIC_CONFIGURATION> |
|
42 |
<JOB_INTERFACE> |
|
43 |
<PARAM name="mapred.input.dir" required="true" description="source sequence file on hdfs"/> |
|
44 |
<PARAM name="mapred.output.dir" required="true" description="target sequence file on hdfs"/> |
|
45 |
</JOB_INTERFACE> |
|
46 |
<SCAN> |
|
47 |
<FILTERS/> |
|
48 |
<FAMILIES/> |
|
49 |
</SCAN> |
|
50 |
</HADOOP_JOB> |
|
51 |
<STATUS> |
|
52 |
<LAST_SUBMISSION_DATE value="2001-12-31T12:00:00"/> |
|
53 |
<RUNNING_INSTANCES value="0"/> |
|
54 |
<CUMULATIVE_RUN value="0"/> |
|
55 |
</STATUS> |
|
56 |
<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS> |
|
57 |
</BODY> |
|
58 |
</RESOURCE_PROFILE> |
|
59 |
|
modules/dnet-graph-domain/trunk/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import com.google.common.base.Predicate; |
|
4 |
import com.google.common.base.Predicates; |
|
5 |
import com.google.common.base.Splitter; |
|
6 |
import com.google.common.collect.Iterables; |
|
7 |
import com.google.common.collect.Lists; |
|
8 |
import com.google.protobuf.Descriptors.Descriptor; |
|
9 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
10 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
11 |
import com.google.protobuf.Message; |
|
12 |
import com.google.protobuf.Message.Builder; |
|
13 |
import com.google.protobuf.ProtocolMessageEnum; |
|
14 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
15 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
|
16 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFRel; |
|
17 |
import eu.dnetlib.data.proto.FieldTypeProtos.*; |
|
18 |
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription; |
|
19 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
20 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
21 |
import eu.dnetlib.miscutils.collections.Pair; |
|
22 |
import org.apache.commons.codec.binary.Base64; |
|
23 |
import org.apache.commons.codec.binary.Hex; |
|
24 |
import org.apache.commons.lang3.StringUtils; |
|
25 |
import org.apache.commons.lang3.math.NumberUtils; |
|
26 |
import org.w3c.dom.NamedNodeMap; |
|
27 |
import org.w3c.dom.Node; |
|
28 |
import org.w3c.dom.NodeList; |
|
29 |
|
|
30 |
import java.nio.charset.Charset; |
|
31 |
import java.security.MessageDigest; |
|
32 |
import java.text.ParseException; |
|
33 |
import java.text.SimpleDateFormat; |
|
34 |
import java.util.Date; |
|
35 |
import java.util.Iterator; |
|
36 |
import java.util.List; |
|
37 |
import java.util.Map; |
|
38 |
|
|
39 |
public abstract class AbstractDNetXsltFunctions { |
|
40 |
|
|
41 |
public static final String URL_REGEX = "^(http|https|ftp)\\://.*"; |
|
42 |
private static final int MAX_NSPREFIX_LEN = 12; |
|
43 |
|
|
44 |
|
|
45 |
public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX); |
|
46 |
|
|
47 |
// Builder for Entities |
|
48 |
protected static DNGF getOaf(final DNGFEntity.Builder entity, final DataInfo.Builder info) { |
|
49 |
return _getOaf(DNGF.newBuilder(), info).setKind(Kind.entity).setEntity(entity).build(); |
|
50 |
} |
|
51 |
|
|
52 |
// Builder for Rels |
|
53 |
protected static DNGF getOaf(final DNGFRel.Builder rel, final DataInfo.Builder info) { |
|
54 |
return _getOaf(DNGF.newBuilder(), info).setKind(Kind.relation).setRel(rel).build(); |
|
55 |
} |
|
56 |
|
|
57 |
private static DNGF.Builder _getOaf(final DNGF.Builder oaf, final DataInfo.Builder info) { |
|
58 |
if (info != null) { |
|
59 |
return oaf.setDataInfo(ensureDataInfo(info)); |
|
60 |
} else return oaf; |
|
61 |
} |
|
62 |
|
|
63 |
protected static DataInfo.Builder ensureDataInfo(final DataInfo.Builder info) { |
|
64 |
if (info.isInitialized()) return info; |
|
65 |
return getDataInfo(null, "UNKNOWN", "0.9", false, false); |
|
66 |
} |
|
67 |
|
|
68 |
protected static KeyValue getKV(final String id, final String name) { |
|
69 |
return KeyValue.newBuilder().setKey(id).setValue(name).build(); |
|
70 |
} |
|
71 |
|
|
72 |
protected static DNGFRel.Builder getRel( |
|
73 |
final String sourceId, |
|
74 |
final Type sourceType, |
|
75 |
final String targetId, |
|
76 |
final Type targetType, |
|
77 |
final Qualifier relType, |
|
78 |
final boolean isChild) { |
|
79 |
return DNGFRel.newBuilder().setSource(sourceId).setTarget(targetId) |
|
80 |
.setSourceType(sourceType) |
|
81 |
.setTargetType(targetType).setRelType(relType) |
|
82 |
.setChild(isChild); |
|
83 |
} |
|
84 |
|
|
85 |
protected static DNGFEntity.Builder getEntity(final Type type, |
|
86 |
final String id, |
|
87 |
final KeyValue collectedFrom, |
|
88 |
final List<String> originalIds, |
|
89 |
final String dateOfCollection, |
|
90 |
final String dateOfTransformation, |
|
91 |
final List<StructuredProperty> pids) { |
|
92 |
final DNGFEntity.Builder builder = DNGFEntity.newBuilder().setType(type).setId(id); |
|
93 |
if (collectedFrom != null) builder.addCollectedfrom(collectedFrom); |
|
94 |
builder.setDateoftransformation(StringUtils.isBlank(dateOfTransformation) ? "" : dateOfTransformation); |
|
95 |
builder.setDateofcollection(StringUtils.isBlank(dateOfCollection) ? "" : dateOfCollection); |
|
96 |
|
|
97 |
if ((originalIds != null) && !originalIds.isEmpty()) { |
|
98 |
builder.addAllOriginalId(Iterables.filter(originalIds, getPredicateNotBlankString())); |
|
99 |
} |
|
100 |
|
|
101 |
if ((pids != null) && !pids.isEmpty()) { |
|
102 |
builder.addAllPid(Iterables.filter(pids, Predicates.notNull())); |
|
103 |
} |
|
104 |
|
|
105 |
return builder; |
|
106 |
} |
|
107 |
|
|
108 |
public static Predicate<String> getPredicateNotBlankString() { |
|
109 |
return s -> StringUtils.isNotBlank(s); |
|
110 |
} |
|
111 |
|
|
112 |
|
|
113 |
public static Node getDataInfo(final NodeList about) { |
|
114 |
if (about.getLength() > 0) { |
|
115 |
final NodeList aboutChildren = about.item(0).getChildNodes(); |
|
116 |
for (int i = 0; i < aboutChildren.getLength(); i++) { |
|
117 |
final Node currentNode = aboutChildren.item(i); |
|
118 |
if ("datainfo".equals(currentNode.getLocalName())) { |
|
119 |
return currentNode; |
|
120 |
} |
|
121 |
} |
|
122 |
} |
|
123 |
return null; |
|
124 |
} |
|
125 |
|
|
126 |
|
|
127 |
public static DataInfo.Builder getDataInfo(final NodeList about, |
|
128 |
final String provenanceaction, |
|
129 |
final String trust, |
|
130 |
final boolean deletedbyinference, |
|
131 |
final boolean inferred) { |
|
132 |
|
|
133 |
final DataInfo.Builder dataInfoBuilder = DataInfo.newBuilder(); |
|
134 |
dataInfoBuilder.setInferred(Boolean.valueOf(inferred)); |
|
135 |
dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(deletedbyinference)); |
|
136 |
dataInfoBuilder.setTrust(trust); |
|
137 |
dataInfoBuilder.setProvenanceaction(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build()); |
|
138 |
|
|
139 |
// checking instanceof because when receiving an empty <oaf:datainfo> we don't want to parse it. |
|
140 |
if (((about != null) && (about.getLength() > 0)) /* && (dataInfo instanceof org.w3c.dom.Element) */) { |
|
141 |
|
|
142 |
final org.w3c.dom.Element dataInfoElement = getDirectChild((org.w3c.dom.Element) about.item(0), "datainfo"); |
|
143 |
if (dataInfoElement != null) { |
|
144 |
org.w3c.dom.Element elem = getDirectChild(dataInfoElement, "inferred"); |
|
145 |
dataInfoBuilder.setInferred(Boolean.valueOf(getStringValue(elem, String.valueOf(inferred)))); |
|
146 |
|
|
147 |
elem = getDirectChild(dataInfoElement, "deletedbyinference"); |
|
148 |
dataInfoBuilder.setDeletedbyinference(Boolean.valueOf(getStringValue(elem, String.valueOf(deletedbyinference)))); |
|
149 |
|
|
150 |
elem = getDirectChild(dataInfoElement, "trust"); |
|
151 |
dataInfoBuilder.setTrust(getStringValue(elem, trust)); |
|
152 |
|
|
153 |
elem = getDirectChild(dataInfoElement, "inferenceprovenance"); |
|
154 |
dataInfoBuilder.setInferenceprovenance(getStringValue(elem)); |
|
155 |
|
|
156 |
elem = getDirectChild(dataInfoElement, "provenanceaction"); |
|
157 |
final Qualifier.Builder pBuilder = Qualifier.newBuilder(); |
|
158 |
if (elem != null && elem.hasAttributes()) { |
|
159 |
final NamedNodeMap attributes = elem.getAttributes(); |
|
160 |
pBuilder.setClassid(getAttributeValue(attributes, "classid")); |
|
161 |
pBuilder.setClassname(getAttributeValue(attributes, "classname")); |
|
162 |
pBuilder.setSchemeid(getAttributeValue(attributes, "schemeid")); |
|
163 |
pBuilder.setSchemename(getAttributeValue(attributes, "schemename")); |
|
164 |
} else { |
|
165 |
pBuilder.mergeFrom(getSimpleQualifier(provenanceaction, "dnet:provenanceActions").build()); |
|
166 |
} |
|
167 |
dataInfoBuilder.setProvenanceaction(pBuilder); |
|
168 |
} |
|
169 |
} |
|
170 |
|
|
171 |
return dataInfoBuilder; |
|
172 |
} |
|
173 |
|
|
174 |
|
|
175 |
|
|
176 |
protected static OAIProvenance getOAIProvenance(final NodeList about) { |
|
177 |
|
|
178 |
OAIProvenance.Builder oaiProv = OAIProvenance.newBuilder(); |
|
179 |
|
|
180 |
if (((about != null) && (about.getLength() > 0))) { |
|
181 |
|
|
182 |
final org.w3c.dom.Element provenance = getDirectChild((org.w3c.dom.Element) about.item(0), "provenance"); |
|
183 |
|
|
184 |
if (provenance != null) { |
|
185 |
final org.w3c.dom.Element origDesc = getDirectChild(provenance, "originDescription"); |
|
186 |
oaiProv.setOriginDescription(buildOriginDescription(origDesc, OriginDescription.newBuilder())); |
|
187 |
} |
|
188 |
} |
|
189 |
|
|
190 |
return oaiProv.build(); |
|
191 |
} |
|
192 |
|
|
193 |
private static OriginDescription buildOriginDescription(final org.w3c.dom.Element origDesc, final OriginDescription.Builder od) { |
|
194 |
od.setHarvestDate(origDesc.getAttribute("harvestDate")).setAltered(Boolean.valueOf(origDesc.getAttribute("altered"))); |
|
195 |
|
|
196 |
org.w3c.dom.Element elem = getDirectChild(origDesc, "baseURL"); |
|
197 |
od.setBaseURL(getStringValue(elem)); |
|
198 |
|
|
199 |
elem = getDirectChild(origDesc, "identifier"); |
|
200 |
od.setIdentifier(getStringValue(elem)); |
|
201 |
|
|
202 |
elem = getDirectChild(origDesc, "datestamp"); |
|
203 |
od.setDatestamp(getStringValue(elem)); |
|
204 |
|
|
205 |
elem = getDirectChild(origDesc, "metadataNamespace"); |
|
206 |
od.setMetadataNamespace(getStringValue(elem)); |
|
207 |
|
|
208 |
elem = getDirectChild(origDesc, "originDescription"); |
|
209 |
|
|
210 |
if (elem != null) { |
|
211 |
|
|
212 |
od.setOriginDescription(buildOriginDescription(elem, OriginDescription.newBuilder())); |
|
213 |
} |
|
214 |
|
|
215 |
return od.build(); |
|
216 |
} |
|
217 |
|
|
218 |
protected static String getStringValue(final org.w3c.dom.Element elem, final String defaultValue) { |
|
219 |
return (elem != null && elem.getTextContent() != null) ? elem.getTextContent() : defaultValue; |
|
220 |
} |
|
221 |
|
|
222 |
protected static String getStringValue(final org.w3c.dom.Element elem) { |
|
223 |
return getStringValue(elem, ""); |
|
224 |
} |
|
225 |
|
|
226 |
protected static String getAttributeValue(final NamedNodeMap attributes, final String name) { |
|
227 |
final Node attr = attributes.getNamedItem(name); |
|
228 |
if (attr == null) return ""; |
|
229 |
final String value = attr.getNodeValue(); |
|
230 |
return value != null ? value : ""; |
|
231 |
} |
|
232 |
|
|
233 |
protected static org.w3c.dom.Element getDirectChild(final org.w3c.dom.Element parent, final String name) { |
|
234 |
for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) { |
|
235 |
if ((child instanceof org.w3c.dom.Element) && name.equals(child.getLocalName())) return (org.w3c.dom.Element) child; |
|
236 |
} |
|
237 |
return null; |
|
238 |
} |
|
239 |
|
|
240 |
protected static Qualifier.Builder getSimpleQualifier(final String classname, final String schemename) { |
|
241 |
return getQualifier(classname, classname, schemename, schemename); |
|
242 |
} |
|
243 |
|
|
244 |
protected static Qualifier.Builder getSimpleQualifier(final ProtocolMessageEnum classname, final String schemename) { |
|
245 |
return getQualifier(classname.toString(), classname.toString(), schemename, schemename); |
|
246 |
} |
|
247 |
|
|
248 |
protected static Qualifier.Builder getQualifier(final String classid, final String classname, final String schemeid, final String schemename) { |
|
249 |
return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemeid).setSchemename(schemename); |
|
250 |
} |
|
251 |
|
|
252 |
protected static Qualifier.Builder setQualifier(final Qualifier.Builder qualifier, final List<String> fields) { |
|
253 |
if ((fields == null) || fields.isEmpty() || fields.get(0).isEmpty()) return null; |
|
254 |
|
|
255 |
if ((fields != null) && !fields.isEmpty() && (fields.get(0) != null)) { |
|
256 |
qualifier.setClassid(fields.get(0)); |
|
257 |
qualifier.setClassname(fields.get(0)); |
|
258 |
} |
|
259 |
return qualifier; |
|
260 |
} |
|
261 |
|
|
262 |
protected static void addStructuredProps(final Builder builder, |
|
263 |
final FieldDescriptor fd, |
|
264 |
final List<String> values, |
|
265 |
final String classid, |
|
266 |
final String schemeid) { |
|
267 |
if (values != null) { |
|
268 |
for (final String s : values) { |
|
269 |
addField(builder, fd, getStructuredProperty(s, classid, classid, schemeid, schemeid)); |
|
270 |
} |
|
271 |
} |
|
272 |
} |
|
273 |
|
|
274 |
protected static List<StructuredProperty> parsePids(final NodeList nodelist) { |
|
275 |
|
|
276 |
final List<StructuredProperty> pids = Lists.newArrayList(); |
|
277 |
|
|
278 |
for (int i = 0; i < nodelist.getLength(); i++) { |
|
279 |
final Node node = nodelist.item(i); |
|
280 |
Node pidType = null; |
|
281 |
if (node.getNodeType() == Node.ELEMENT_NODE) { |
|
282 |
if (node.getLocalName().equalsIgnoreCase("identifier")) { |
|
283 |
pidType = node.getAttributes().getNamedItem("identifierType"); |
|
284 |
} |
|
285 |
//this is to handle dataset pids |
|
286 |
if (node.getLocalName().equalsIgnoreCase("alternateIdentifier")) { |
|
287 |
pidType = node.getAttributes().getNamedItem("alternateIdentifierType"); |
|
288 |
} |
|
289 |
|
|
290 |
for (int j = 0; j < node.getChildNodes().getLength(); j++) { |
|
291 |
final Node child = node.getChildNodes().item(j); |
|
292 |
|
|
293 |
if ((child.getNodeType() == Node.TEXT_NODE) && (pidType != null) && (pidType.getNodeValue() != null) && !pidType.getNodeValue().isEmpty() |
|
294 |
&& !pidType.getNodeValue().equalsIgnoreCase("url")) { |
|
295 |
|
|
296 |
final String type = pidType.getNodeValue().toLowerCase(); |
|
297 |
|
|
298 |
final String value = child.getTextContent(); |
|
299 |
|
|
300 |
pids.add(getStructuredProperty(value, type, type, "dnet:pid_types", "dnet:pid_types")); |
|
301 |
break; |
|
302 |
} |
|
303 |
} |
|
304 |
} |
|
305 |
} |
|
306 |
return pids; |
|
307 |
} |
|
308 |
|
|
309 |
@SuppressWarnings("unchecked") |
|
310 |
protected static void addField(final Builder builder, final FieldDescriptor descriptor, Object value) { |
|
311 |
|
|
312 |
if (value == null) return; |
|
313 |
|
|
314 |
if (value instanceof List<?>) { |
|
315 |
for (final Object o : (List<Object>) value) { |
|
316 |
addField(builder, descriptor, o); |
|
317 |
} |
|
318 |
} else { |
|
319 |
Object fieldValue = value; |
|
320 |
switch (descriptor.getType()) { |
|
321 |
case BOOL: |
|
322 |
fieldValue = Boolean.valueOf(value.toString()); |
|
323 |
break; |
|
324 |
case BYTES: |
|
325 |
fieldValue = value.toString().getBytes(Charset.forName("UTF-8")); |
|
326 |
break; |
|
327 |
case DOUBLE: |
|
328 |
fieldValue = Double.valueOf(value.toString()); |
|
329 |
break; |
|
330 |
case FLOAT: |
|
331 |
fieldValue = Float.valueOf(value.toString()); |
|
332 |
break; |
|
333 |
case INT32: |
|
334 |
case INT64: |
|
335 |
case SINT32: |
|
336 |
case SINT64: |
|
337 |
fieldValue = Integer.valueOf(value.toString()); |
|
338 |
break; |
|
339 |
case MESSAGE: |
|
340 |
final Builder q = builder.newBuilderForField(descriptor); |
|
341 |
|
|
342 |
if (value instanceof Builder) { |
|
343 |
value = ((Builder) value).build(); |
|
344 |
final byte[] b = ((Message) value).toByteArray(); |
|
345 |
try { |
|
346 |
q.mergeFrom(b); |
|
347 |
} catch (final InvalidProtocolBufferException e) { |
|
348 |
throw new IllegalArgumentException("Unable to merge value: " + value + " with builder: " + q.getDescriptorForType().getName()); |
|
349 |
} |
|
350 |
} else if (Qualifier.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { |
|
351 |
if (value instanceof Qualifier) { |
|
352 |
q.mergeFrom((Qualifier) value); |
|
353 |
} else { |
|
354 |
parseMessage(q, Qualifier.getDescriptor(), value.toString(), "@@@"); |
|
355 |
} |
|
356 |
} else if (StructuredProperty.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { |
|
357 |
if (value instanceof StructuredProperty) { |
|
358 |
q.mergeFrom((StructuredProperty) value); |
|
359 |
} else { |
|
360 |
parseMessage(q, StructuredProperty.getDescriptor(), value.toString(), "###"); |
|
361 |
} |
|
362 |
} else if (KeyValue.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { |
|
363 |
if (value instanceof KeyValue) { |
|
364 |
q.mergeFrom((KeyValue) value); |
|
365 |
} else { |
|
366 |
parseMessage(q, KeyValue.getDescriptor(), value.toString(), "&&&"); |
|
367 |
} |
|
368 |
} else if (StringField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { |
|
369 |
if (value instanceof StringField) { |
|
370 |
q.mergeFrom((StringField) value); |
|
371 |
} else { |
|
372 |
q.setField(StringField.getDescriptor().findFieldByName("value"), value); |
|
373 |
} |
|
374 |
} else if (BoolField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { |
|
375 |
if (value instanceof BoolField) { |
|
376 |
q.mergeFrom((BoolField) value); |
|
377 |
} else if (value instanceof String) { |
|
378 |
q.setField(BoolField.getDescriptor().findFieldByName("value"), Boolean.valueOf((String) value)); |
|
379 |
} else { |
|
380 |
q.setField(BoolField.getDescriptor().findFieldByName("value"), value); |
|
381 |
} |
|
382 |
} else if (IntField.getDescriptor().getName().equals(q.getDescriptorForType().getName())) { |
|
383 |
if (value instanceof IntField) { |
|
384 |
q.mergeFrom((IntField) value); |
|
385 |
} else if (value instanceof String) { |
|
386 |
q.setField(IntField.getDescriptor().findFieldByName("value"), NumberUtils.toInt((String) value)); |
|
387 |
} else { |
|
388 |
q.setField(IntField.getDescriptor().findFieldByName("value"), value); |
|
389 |
} |
|
390 |
} |
|
391 |
|
|
392 |
fieldValue = q.buildPartial(); |
|
393 |
break; |
|
394 |
default: |
|
395 |
break; |
|
396 |
} |
|
397 |
|
|
398 |
doAddField(builder, descriptor, fieldValue); |
|
399 |
} |
|
400 |
|
|
401 |
} |
|
402 |
|
|
403 |
protected static void doAddField(final Builder builder, final FieldDescriptor fd, final Object value) { |
|
404 |
if (value != null) { |
|
405 |
if (fd.isRepeated()) { |
|
406 |
builder.addRepeatedField(fd, value); |
|
407 |
} else if (fd.isOptional() || fd.isRequired()) { |
|
408 |
builder.setField(fd, value); |
|
409 |
} |
|
410 |
} |
|
411 |
} |
|
412 |
|
|
413 |
protected static void parseMessage(final Builder builder, final Descriptor descriptor, final String value, final String split) { |
|
414 |
|
|
415 |
Iterable<Pair> iterablePair = () -> { |
|
416 |
|
|
417 |
final Iterator<FieldDescriptor> fields = descriptor.getFields().iterator(); |
|
418 |
final Iterator<String> values = Lists.newArrayList(Splitter.on(split).trimResults().split(value)).iterator(); |
|
419 |
|
|
420 |
return new Iterator<Pair>() { |
|
421 |
@Override |
|
422 |
public boolean hasNext() { |
|
423 |
return fields.hasNext() && values.hasNext(); |
|
424 |
} |
|
425 |
|
|
426 |
@Override |
|
427 |
public Pair next() { |
|
428 |
final FieldDescriptor field = fields.next(); |
|
429 |
final String value1 = values.next(); |
|
430 |
return new Pair(field, value1); |
|
431 |
} |
|
432 |
|
|
433 |
@Override |
|
434 |
public void remove() { |
|
435 |
throw new UnsupportedOperationException("cannot remove"); |
|
436 |
} |
|
437 |
}; |
|
438 |
}; |
|
439 |
|
|
440 |
// final IterablePair<FieldDescriptor, String> iterablePair = |
|
441 |
// new IterablePair<FieldDescriptor, String>(descriptor.getFields(), Lists.newArrayList(Splitter |
|
442 |
// .on(split).trimResults().split(value))); |
|
443 |
|
|
444 |
for (final Pair<FieldDescriptor, String> p : iterablePair) { |
|
445 |
addField(builder, p.getKey(), p.getValue()); |
|
446 |
} |
|
447 |
} |
|
448 |
|
|
449 |
protected static String base64(final byte[] data) { |
|
450 |
final byte[] bytes = Base64.encodeBase64(data); |
|
451 |
return new String(bytes); |
|
452 |
} |
|
453 |
|
|
454 |
public static String replace(final String s, final String regex, final String replacement) { |
|
455 |
return s.replaceAll(regex, replacement); |
|
456 |
} |
|
457 |
|
|
458 |
public static String trim(final String s) { |
|
459 |
return s.trim(); |
|
460 |
} |
|
461 |
|
|
462 |
protected static String removePrefix(final Type type, final String s) { |
|
463 |
return removePrefix(type.toString(), s); |
|
464 |
} |
|
465 |
|
|
466 |
private static String removePrefix(final String prefix, final String s) { |
|
467 |
return StringUtils.removeStart("" + s, prefix + "|"); |
|
468 |
} |
|
469 |
|
|
470 |
protected static Qualifier.Builder getDefaultQualifier(final String scheme) { |
|
471 |
final Qualifier.Builder qualifier = Qualifier.newBuilder().setSchemeid(scheme).setSchemename(scheme); |
|
472 |
return qualifier; |
|
473 |
} |
|
474 |
|
|
475 |
protected static StructuredProperty getStructuredProperty(final String value, |
|
476 |
final String classid, |
|
477 |
final String classname, |
Also available in: Unified diff
backported from dnet50