Revision 53896
Added by Giorgos Alexiou almost 6 years ago
modules/dnet-openaire-lodexport/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/LodMapper.java | ||
---|---|---|
20 | 20 |
import org.apache.log4j.Logger; |
21 | 21 |
|
22 | 22 |
import java.io.IOException; |
23 |
import java.text.ParseException; |
|
23 | 24 |
import java.text.SimpleDateFormat; |
24 | 25 |
import java.util.*; |
25 | 26 |
import java.util.Map.Entry; |
... | ... | |
29 | 30 |
* export |
30 | 31 |
*/ |
31 | 32 |
public class LodMapper extends TableMapper<Text, Text> { |
32 |
private static final String DATE_OF_TRANSFORMATION_PATTERN = "yyyy-MM-dd'T'HH:mm:ss";
|
|
33 |
private static final String LAST_EXECUTION_DATE_PATTERN = "yyyy-MM-dd";
|
|
34 |
private Logger log = Logger.getLogger(this.getClass());
|
|
35 |
private EntityConfigTable entityConfigTable;
|
|
36 |
private String lastExecutionDate = "";
|
|
33 |
private static final String DATE_OF_TRANSFORMATION_PATTERN = "yyyy-MM-dd'T'HH:mm:ss";
|
|
34 |
private static final String LAST_EXECUTION_DATE_PATTERN = "yyyy-MM-dd";
|
|
35 |
private Logger log = Logger.getLogger(this.getClass());
|
|
36 |
private EntityConfigTable entityConfigTable;
|
|
37 |
private String lastExecutionDate = "";
|
|
37 | 38 |
|
39 |
public static enum ENTITIES_COUNTER { |
|
40 |
RESULT, PROJECT, DATASOURCE, PERSON, ORGANIZATION, RESULT_NO_DATE, PROJECT_NO_DATE, DATASOURCE_NO_DATE, |
|
41 |
PERSON_NO_DATE, RGANIZATION_NO_DATE, DELETED_BY_INFERENCE, NOT_DELETED_BY_INFERENCE, TOTAL_ENTITIES, |
|
42 |
TOTAL_RELATIONS, UPDATED, NOT_UPDATED, NO_DATE |
|
38 | 43 |
|
39 |
public static enum ENTITIES_COUNTER { |
|
40 |
RESULT, |
|
41 |
PROJECT, |
|
42 |
DATASOURCE, |
|
43 |
PERSON, |
|
44 |
ORGANIZATION, |
|
45 |
RESULT_NO_DATE, |
|
46 |
PROJECT_NO_DATE, |
|
47 |
DATASOURCE_NO_DATE, |
|
48 |
PERSON_NO_DATE, |
|
49 |
RGANIZATION_NO_DATE, |
|
50 |
DELETED_BY_INFERENCE, |
|
51 |
NOT_DELETED_BY_INFERENCE, |
|
52 |
TOTAL_ENTITIES, |
|
53 |
TOTAL_RELATIONS, |
|
54 |
UPDATED, |
|
55 |
NOT_UPDATED, |
|
56 |
NO_DATE |
|
44 |
} |
|
57 | 45 |
|
58 |
}
|
|
46 |
;
|
|
59 | 47 |
|
60 |
;
|
|
48 |
private String DELIM;
|
|
61 | 49 |
|
62 |
private String DELIM; |
|
50 |
@Override |
|
51 |
protected void setup(Context context) throws IOException, InterruptedException { |
|
52 |
loadEntityConfig(context); |
|
53 |
DELIM = context.getConfiguration().get("lod.delim"); |
|
54 |
lastExecutionDate = context.getConfiguration().get("lod.lastExecutionDate"); |
|
63 | 55 |
|
64 |
@Override |
|
65 |
protected void setup(Context context) throws IOException, InterruptedException { |
|
66 |
loadEntityConfig(context); |
|
67 |
DELIM = context.getConfiguration().get("lod.delim"); |
|
68 |
lastExecutionDate = context.getConfiguration().get("lod.lastExecutionDate"); |
|
56 |
} |
|
69 | 57 |
|
70 |
} |
|
58 |
@Override |
|
59 |
protected void map(final ImmutableBytesWritable keyIn, final Result result, final Context context) |
|
60 |
throws IOException { |
|
71 | 61 |
|
62 |
final OafRowKeyDecoder decoder = OafRowKeyDecoder.decode(keyIn.copyBytes()); |
|
63 |
final Type type = decoder.getType(); |
|
64 |
final Oaf oaf = UpdateMerger.mergeBodyUpdates(context, result.getFamilyMap(Bytes.toBytes(type.toString()))); |
|
72 | 65 |
|
73 |
@Override |
|
74 |
protected void map(final ImmutableBytesWritable keyIn, final Result result, final Context context) throws IOException { |
|
66 |
if (isValid(oaf)) { |
|
75 | 67 |
|
76 |
final OafRowKeyDecoder decoder = OafRowKeyDecoder.decode(keyIn.copyBytes()); |
|
77 |
final Type type = decoder.getType(); |
|
78 |
final Oaf oaf = UpdateMerger.mergeBodyUpdates(context, result.getFamilyMap(Bytes.toBytes(type.toString()))); |
|
68 |
if (deletedByInference(oaf)) { |
|
69 |
context.getCounter(ENTITIES_COUNTER.DELETED_BY_INFERENCE).increment(1); |
|
70 |
} else { |
|
71 |
context.getCounter(ENTITIES_COUNTER.NOT_DELETED_BY_INFERENCE).increment(1); |
|
72 |
} |
|
79 | 73 |
|
80 |
if (isValid(oaf)) { |
|
74 |
context.getCounter(ENTITIES_COUNTER.TOTAL_ENTITIES).increment(1); |
|
75 |
try { |
|
76 |
emitProtos(context, result, oaf); |
|
77 |
} catch (ParseException e) { |
|
78 |
// TODO Auto-generated catch block |
|
79 |
e.printStackTrace(); |
|
80 |
} |
|
81 |
} |
|
81 | 82 |
|
82 |
if (deletedByInference(oaf)) { |
|
83 |
context.getCounter(ENTITIES_COUNTER.DELETED_BY_INFERENCE).increment(1); |
|
84 |
} else { |
|
85 |
context.getCounter(ENTITIES_COUNTER.NOT_DELETED_BY_INFERENCE).increment(1); |
|
86 |
} |
|
83 |
} |
|
87 | 84 |
|
88 |
context.getCounter(ENTITIES_COUNTER.TOTAL_ENTITIES).increment(1); |
|
89 |
emitProtos(context, result, oaf); |
|
90 |
} |
|
85 |
private boolean isValid(Oaf oaf) { |
|
86 |
try { |
|
87 |
if (oaf != null && oaf.isInitialized()) { |
|
88 |
return true; |
|
89 |
} |
|
91 | 90 |
|
92 |
} |
|
91 |
} catch (Exception e) { |
|
92 |
log.error("invalid proto", e); |
|
93 |
} |
|
93 | 94 |
|
94 |
private boolean isValid(Oaf oaf) { |
|
95 |
try { |
|
96 |
if (oaf != null && oaf.isInitialized()) { |
|
97 |
return true; |
|
98 |
} |
|
95 |
return false; |
|
96 |
} |
|
99 | 97 |
|
100 |
} catch (Exception e) { |
|
101 |
log.error("invalid proto", e); |
|
102 |
} |
|
98 |
private boolean isUpdated(Oaf oaf, Context context) throws IOException, ParseException { |
|
99 |
String dateOfTransformationString = ""; |
|
100 |
try { |
|
101 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(DATE_OF_TRANSFORMATION_PATTERN, |
|
102 |
Locale.getDefault()); |
|
103 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
104 |
dateOfTransformationString = oaf.getEntity().getDateoftransformation(); |
|
105 |
if (dateOfTransformationString == null || dateOfTransformationString.isEmpty() |
|
106 |
|| dateOfTransformationString.equals(" ")) { |
|
107 |
context.getCounter(ENTITIES_COUNTER.NO_DATE).increment(1); |
|
108 |
return true; |
|
109 |
} |
|
103 | 110 |
|
104 |
return false; |
|
105 |
} |
|
111 |
Date dateOfTransformation = simpleDateFormat.parse(dateOfTransformationString); |
|
106 | 112 |
|
107 |
private boolean isUpdated(Oaf oaf, Context context) throws IOException { |
|
108 |
String dateOfTransformationString = ""; |
|
109 |
try { |
|
110 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(DATE_OF_TRANSFORMATION_PATTERN, Locale.getDefault()); |
|
111 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
112 |
dateOfTransformationString = oaf.getEntity().getDateoftransformation(); |
|
113 |
if (dateOfTransformationString == null || dateOfTransformationString.isEmpty() || dateOfTransformationString.equals(" ")) { |
|
114 |
context.getCounter(ENTITIES_COUNTER.NO_DATE).increment(1); |
|
115 |
return true; |
|
116 |
} |
|
113 |
SimpleDateFormat lastExecDateFormatter = new SimpleDateFormat(LAST_EXECUTION_DATE_PATTERN); |
|
114 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
115 |
Date lastExecDate = lastExecDateFormatter.parse(lastExecutionDate); |
|
117 | 116 |
|
118 |
Date dateOfTransformation = simpleDateFormat.parse(dateOfTransformationString); |
|
117 |
if (lastExecDate.before(dateOfTransformation)) { |
|
118 |
context.getCounter(ENTITIES_COUNTER.UPDATED).increment(1); |
|
119 |
return true; |
|
120 |
} |
|
121 |
} catch (ParseException pe) { |
|
122 |
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(LAST_EXECUTION_DATE_PATTERN, Locale.getDefault()); |
|
123 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
124 |
dateOfTransformationString = oaf.getEntity().getDateoftransformation(); |
|
125 |
if (dateOfTransformationString == null || dateOfTransformationString.isEmpty() |
|
126 |
|| dateOfTransformationString.equals(" ")) { |
|
127 |
context.getCounter(ENTITIES_COUNTER.NO_DATE).increment(1); |
|
128 |
return true; |
|
129 |
} |
|
119 | 130 |
|
120 |
SimpleDateFormat lastExecDateFormatter = new SimpleDateFormat(LAST_EXECUTION_DATE_PATTERN); |
|
121 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
122 |
Date lastExecDate = lastExecDateFormatter.parse(lastExecutionDate); |
|
131 |
Date dateOfTransformation = simpleDateFormat.parse(dateOfTransformationString); |
|
132 |
SimpleDateFormat lastExecDateFormatter = new SimpleDateFormat(LAST_EXECUTION_DATE_PATTERN); |
|
133 |
simpleDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); |
|
134 |
Date lastExecDate = lastExecDateFormatter.parse(lastExecutionDate); |
|
123 | 135 |
|
124 |
if (lastExecDate.before(dateOfTransformation)) {
|
|
125 |
context.getCounter(ENTITIES_COUNTER.UPDATED).increment(1);
|
|
126 |
return true;
|
|
127 |
}
|
|
128 |
} catch (Exception e) {
|
|
129 |
log.error("invalid date " + dateOfTransformationString, e);
|
|
130 |
throw new IOException(e);
|
|
131 |
}
|
|
132 |
context.getCounter(ENTITIES_COUNTER.NOT_UPDATED).increment(1);
|
|
133 |
return false;
|
|
134 |
}
|
|
136 |
if (lastExecDate.before(dateOfTransformation)) {
|
|
137 |
context.getCounter(ENTITIES_COUNTER.UPDATED).increment(1);
|
|
138 |
return true;
|
|
139 |
}
|
|
140 |
} catch (Exception e) {
|
|
141 |
log.error("invalid date " + dateOfTransformationString, e);
|
|
142 |
throw new IOException(e);
|
|
143 |
}
|
|
144 |
context.getCounter(ENTITIES_COUNTER.NOT_UPDATED).increment(1);
|
|
145 |
return false;
|
|
146 |
}
|
|
135 | 147 |
|
136 |
private void emitProtos(Context context, Result result, Oaf oaf) throws IOException {
|
|
137 |
Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(oaf.getKind()).setDataInfo(oaf.getDataInfo());
|
|
138 |
Type type = oaf.getEntity().getType();
|
|
139 |
oafBuilder.setEntity(oaf.getEntity());
|
|
140 |
// emit relation first so we can cache them to entity protos
|
|
141 |
emitRelation(context, result, oaf, type, oafBuilder);
|
|
148 |
private void emitProtos(Context context, Result result, Oaf oaf) throws IOException, ParseException {
|
|
149 |
Oaf.Builder oafBuilder = Oaf.newBuilder().setKind(oaf.getKind()).setDataInfo(oaf.getDataInfo());
|
|
150 |
Type type = oaf.getEntity().getType();
|
|
151 |
oafBuilder.setEntity(oaf.getEntity());
|
|
152 |
// emit relation first so we can cache them to entity protos
|
|
153 |
emitRelation(context, result, oaf, type, oafBuilder);
|
|
142 | 154 |
|
143 |
if (isUpdated(oaf,context)) {
|
|
144 |
emitEntity(context, oaf, oafBuilder);
|
|
145 |
}
|
|
146 |
}
|
|
155 |
if (isUpdated(oaf, context)) {
|
|
156 |
emitEntity(context, oaf, oafBuilder);
|
|
157 |
}
|
|
158 |
}
|
|
147 | 159 |
|
148 |
private void emitEntity(Context context, Oaf oaf, Oaf.Builder oafBuilder) {
|
|
149 |
String serialized = Serializer.serialize(oafBuilder.build(), DELIM);
|
|
160 |
private void emitEntity(Context context, Oaf oaf, Oaf.Builder oafBuilder) {
|
|
161 |
String serialized = Serializer.serialize(oafBuilder.build(), DELIM);
|
|
150 | 162 |
|
151 |
if (serialized != null && !oaf.getEntity().getId().contains("dedup")) {
|
|
163 |
if (serialized != null && !oaf.getEntity().getId().contains("dedup")) {
|
|
152 | 164 |
|
153 |
try { |
|
154 |
context.getCounter(ENTITIES_COUNTER.valueOf(oaf.getEntity().getType().toString().toUpperCase())).increment(1); |
|
155 |
//output entity types so we can have seperate files for each type |
|
156 |
Text TextKeyOut = new Text(oaf.getEntity().getType().toString()); |
|
157 |
context.write((TextKeyOut), new Text(serialized)); |
|
158 |
|
|
159 |
if (oaf.getEntity().getDateoftransformation().isEmpty()) { |
|
160 |
context.getCounter(ENTITIES_COUNTER.valueOf(oaf.getEntity().getType().toString().toUpperCase()+"_NO_DATE")).increment(1); |
|
165 |
try { |
|
166 |
context.getCounter(ENTITIES_COUNTER.valueOf(oaf.getEntity().getType().toString().toUpperCase())) |
|
167 |
.increment(1); |
|
168 |
// output entity types so we can have seperate files for each type |
|
169 |
Text TextKeyOut = new Text(oaf.getEntity().getType().toString()); |
|
170 |
context.write((TextKeyOut), new Text(serialized)); |
|
171 |
|
|
172 |
if (oaf.getEntity().getDateoftransformation().isEmpty()) { |
|
173 |
context.getCounter( |
|
174 |
ENTITIES_COUNTER.valueOf(oaf.getEntity().getType().toString().toUpperCase() + "_NO_DATE")) |
|
175 |
.increment(1); |
|
161 | 176 |
// context.getCounter(ENTITIES_COUNTER.UPDATED).increment(1); |
162 |
}
|
|
177 |
}
|
|
163 | 178 |
// else { |
164 | 179 |
// context.getCounter(ENTITIES_COUNTER.NOT_UPDATED).increment(1); |
165 | 180 |
// } |
166 | 181 |
|
167 |
} catch (Exception e) {
|
|
168 |
log.error("Error writing entity to M/R output", e);
|
|
169 |
}
|
|
170 |
}
|
|
182 |
} catch (Exception e) {
|
|
183 |
log.error("Error writing entity to M/R output", e);
|
|
184 |
}
|
|
185 |
}
|
|
171 | 186 |
|
172 |
}
|
|
187 |
}
|
|
173 | 188 |
|
174 |
// may have multiple relations per each field
|
|
175 |
private void emitRelation(Context context, Result result, Oaf oaf, Type type, Oaf.Builder builder) {
|
|
176 |
// derived relations; ResultLanguages, resultConcepts etc are
|
|
177 |
// created here
|
|
189 |
// may have multiple relations per each field
|
|
190 |
private void emitRelation(Context context, Result result, Oaf oaf, Type type, Oaf.Builder builder) {
|
|
191 |
// derived relations; ResultLanguages, resultConcepts etc are
|
|
192 |
// created here
|
|
178 | 193 |
|
179 |
if (!oaf.getEntity().getId().contains("dedup")) {
|
|
180 |
// Existing Hbase relations are generated here
|
|
181 |
if (entityConfigTable.getDescriptors(type) != null && !entityConfigTable.getDescriptors(type).isEmpty()) {
|
|
182 |
for (LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
|
|
183 |
try {
|
|
194 |
if (!oaf.getEntity().getId().contains("dedup")) {
|
|
195 |
// Existing Hbase relations are generated here
|
|
196 |
if (entityConfigTable.getDescriptors(type) != null && !entityConfigTable.getDescriptors(type).isEmpty()) {
|
|
197 |
for (LinkDescriptor ld : entityConfigTable.getDescriptors(type)) {
|
|
198 |
try {
|
|
184 | 199 |
|
185 |
final Map<byte[], byte[]> columnMap = result.getFamilyMap(Bytes.toBytes(ld.getRelDescriptor().getIt())); |
|
200 |
final Map<byte[], byte[]> columnMap = result |
|
201 |
.getFamilyMap(Bytes.toBytes(ld.getRelDescriptor().getIt())); |
|
186 | 202 |
|
187 |
List<OafRel> relOaf = decodeRelation(oaf, columnMap, ld);
|
|
203 |
List<OafRel> relOaf = decodeRelation(oaf, columnMap, ld);
|
|
188 | 204 |
|
205 |
for (OafRel rel : relOaf) { |
|
206 |
builder.getEntityBuilder().addCachedRel(rel); |
|
207 |
try { |
|
208 |
// keep all relations to one file |
|
209 |
Text TextKeyOut = new Text("relations"); |
|
189 | 210 |
|
190 |
for (OafRel rel : relOaf) { |
|
191 |
builder.getEntityBuilder().addCachedRel(rel); |
|
192 |
try { |
|
193 |
//keep all relations to one file |
|
194 |
Text TextKeyOut = new Text("relations"); |
|
211 |
String buff = Serializer.serialize(rel, DELIM); |
|
195 | 212 |
|
196 |
String buff = Serializer.serialize(rel, DELIM); |
|
213 |
if (!buff.isEmpty() && !rel.getTarget().contains("dedup")) { |
|
214 |
context.write((TextKeyOut), new Text(buff)); |
|
215 |
context.getCounter(ENTITIES_COUNTER.TOTAL_RELATIONS).increment(1); |
|
216 |
} |
|
197 | 217 |
|
198 |
if (!buff.isEmpty() && !rel.getTarget().contains("dedup")) { |
|
199 |
context.write((TextKeyOut), new Text(buff)); |
|
200 |
context.getCounter(ENTITIES_COUNTER.TOTAL_RELATIONS).increment(1); |
|
201 |
} |
|
218 |
} catch (Exception e) { |
|
219 |
log.error("Error while writing Relation Proto to M/R output", e); |
|
220 |
} |
|
202 | 221 |
|
203 |
} catch (Exception e) { |
|
204 |
log.error("Error while writing Relation Proto to M/R output", e); |
|
205 |
} |
|
222 |
} |
|
206 | 223 |
|
207 |
} |
|
224 |
relOaf.clear(); |
|
225 |
} catch (Exception e) { |
|
226 |
log.error("Error while decoding Relation Proto from HBase Body", e); |
|
208 | 227 |
|
209 |
relOaf.clear(); |
|
210 |
} catch (Exception e) { |
|
211 |
log.error("Error while decoding Relation Proto from HBase Body", e); |
|
228 |
} |
|
212 | 229 |
|
213 |
} |
|
230 |
} |
|
231 |
} |
|
232 |
} |
|
214 | 233 |
|
215 |
} |
|
216 |
} |
|
217 |
} |
|
234 |
Set<String> relationsList = new HashSet<String>(); |
|
218 | 235 |
|
236 |
Serializer.extractRelations(oaf, DELIM, relationsList); |
|
219 | 237 |
|
220 |
Set<String> relationsList = new HashSet<String>(); |
|
238 |
for (String rel : relationsList) { |
|
239 |
try { |
|
221 | 240 |
|
222 |
Serializer.extractRelations(oaf, DELIM, relationsList);
|
|
241 |
Text TextKeyOut = new Text("relations");
|
|
223 | 242 |
|
224 |
for (String rel : relationsList) { |
|
225 |
try { |
|
243 |
if (!oaf.getEntity().getId().contains("dedup")) { |
|
244 |
if (!rel.contains("dedup")) { |
|
245 |
context.write((TextKeyOut), new Text(rel)); |
|
246 |
context.getCounter(ENTITIES_COUNTER.TOTAL_RELATIONS).increment(1); |
|
226 | 247 |
|
227 |
Text TextKeyOut = new Text("relations"); |
|
248 |
} |
|
249 |
} else { |
|
250 |
// for dedup entities write only dedup relationships: all the permutations |
|
251 |
// of children |
|
252 |
if (rel.contains("dedup")) { |
|
253 |
context.write((TextKeyOut), new Text(rel)); |
|
254 |
context.getCounter(ENTITIES_COUNTER.TOTAL_RELATIONS).increment(1); |
|
255 |
} |
|
228 | 256 |
|
229 |
if (!oaf.getEntity().getId().contains("dedup")) { |
|
230 |
if (!rel.contains("dedup")) { |
|
231 |
context.write((TextKeyOut), new Text(rel)); |
|
232 |
context.getCounter(ENTITIES_COUNTER.TOTAL_RELATIONS).increment(1); |
|
257 |
} |
|
233 | 258 |
|
234 |
} |
|
235 |
} else { |
|
236 |
//for dedup entities write only dedup relationships: all the permutations |
|
237 |
// of children |
|
238 |
if (rel.contains("dedup")) { |
|
239 |
context.write((TextKeyOut), new Text(rel)); |
|
240 |
context.getCounter(ENTITIES_COUNTER.TOTAL_RELATIONS).increment(1); |
|
241 |
} |
|
259 |
} catch (Exception e) { |
|
260 |
log.error("Error writing relations to output : " + rel); |
|
261 |
} |
|
262 |
} |
|
263 |
relationsList.clear(); |
|
264 |
} |
|
242 | 265 |
|
243 |
} |
|
266 |
private ArrayList<OafRel> decodeRelation(final Oaf body, Map<byte[], byte[]> columnMap, final LinkDescriptor ld) |
|
267 |
throws IOException, InterruptedException { |
|
244 | 268 |
|
245 |
} catch (Exception e) { |
|
246 |
log.error("Error writing relations to output : " + rel); |
|
247 |
} |
|
248 |
} |
|
249 |
relationsList.clear(); |
|
250 |
} |
|
269 |
ArrayList<OafRel> rels = new ArrayList<OafRel>(); |
|
251 | 270 |
|
271 |
if (hasData(columnMap)) { |
|
252 | 272 |
|
253 |
private ArrayList<OafRel> decodeRelation(final Oaf body, Map<byte[], byte[]> columnMap, |
|
254 |
final LinkDescriptor ld) throws IOException, InterruptedException { |
|
273 |
for (Entry<byte[], byte[]> e : columnMap.entrySet()) { |
|
255 | 274 |
|
256 |
ArrayList<OafRel> rels = new ArrayList<OafRel>(); |
|
275 |
final Oaf decodedOaf = decodeProto(e.getValue()); |
|
276 |
if (isValid(decodedOaf)) { |
|
277 |
OafRel.Builder relBuilder = OafRel.newBuilder(decodedOaf.getRel()); |
|
257 | 278 |
|
258 |
if (hasData(columnMap)) {
|
|
279 |
// skip dedups
|
|
259 | 280 |
|
260 |
for (Entry<byte[], byte[]> e : columnMap.entrySet()) { |
|
281 |
if (ld.getRelDescriptor().getIt().contains(SubRelType.dedup.toString()) |
|
282 |
&& isDedupSelf(relBuilder)) { |
|
283 |
// log.error("invalid protto", e); |
|
261 | 284 |
|
262 |
final Oaf decodedOaf = decodeProto(e.getValue()); |
|
263 |
if (isValid(decodedOaf)) { |
|
264 |
OafRel.Builder relBuilder = OafRel.newBuilder(decodedOaf.getRel()); |
|
285 |
} else { |
|
286 |
OafRel oafRel = relBuilder.setRelType(ld.getRelDescriptor().getRelType()).build(); |
|
287 |
relBuilder.setCachedTarget(body.getEntity()); |
|
288 |
rels.add(oafRel); |
|
289 |
} |
|
290 |
} |
|
265 | 291 |
|
266 |
// skip dedups |
|
292 |
} |
|
293 |
} |
|
267 | 294 |
|
268 |
if (ld.getRelDescriptor().getIt().contains(SubRelType.dedup.toString()) && isDedupSelf(relBuilder)) {
|
|
269 |
// log.error("invalid protto", e);
|
|
295 |
return rels;
|
|
296 |
}
|
|
270 | 297 |
|
271 |
} else { |
|
272 |
OafRel oafRel = relBuilder.setRelType(ld.getRelDescriptor().getRelType()).build(); |
|
273 |
relBuilder.setCachedTarget(body.getEntity()); |
|
274 |
rels.add(oafRel); |
|
275 |
} |
|
276 |
} |
|
298 |
private Oaf decodeProto(final byte[] body) { |
|
299 |
try { |
|
300 |
return Oaf.parseFrom(body); |
|
301 |
} catch (InvalidProtocolBufferException e) { |
|
302 |
log.error("Invalid Protos", e); |
|
303 |
} |
|
304 |
return null; |
|
305 |
} |
|
277 | 306 |
|
278 |
}
|
|
279 |
}
|
|
307 |
private void loadEntityConfig(Context context) throws InterruptedException {
|
|
308 |
String indexConf = context.getConfiguration().get("index.conf");
|
|
280 | 309 |
|
281 |
return rels; |
|
282 |
} |
|
310 |
if (indexConf == null || indexConf.isEmpty()) { |
|
311 |
log.error("NULL ENTITY CONFIGURATION TABLE IN MAPPER : "); |
|
312 |
throw new InterruptedException("Null enitity configuration in mappper"); |
|
283 | 313 |
|
284 |
private Oaf decodeProto(final byte[] body) { |
|
285 |
try { |
|
286 |
return Oaf.parseFrom(body); |
|
287 |
} catch (InvalidProtocolBufferException e) { |
|
288 |
log.error("Invalid Protos", e); |
|
289 |
} |
|
290 |
return null; |
|
291 |
} |
|
314 |
} |
|
292 | 315 |
|
316 |
entityConfigTable = IndexConfig.load(indexConf).getConfigMap(); |
|
293 | 317 |
|
294 |
private void loadEntityConfig(Context context) throws InterruptedException { |
|
295 |
String indexConf = context.getConfiguration().get("index.conf"); |
|
318 |
} |
|
296 | 319 |
|
297 |
if (indexConf == null || indexConf.isEmpty()) {
|
|
298 |
log.error("NULL ENTITY CONFIGURATION TABLE IN MAPPER : ");
|
|
299 |
throw new InterruptedException("Null enitity configuration in mappper");
|
|
320 |
private boolean isDedupSelf(final OafRelOrBuilder rel) {
|
|
321 |
return rel.getSource().contains(rel.getTarget());
|
|
322 |
}
|
|
300 | 323 |
|
301 |
} |
|
324 |
private boolean hasData(final Map<byte[], byte[]> columnMap) { |
|
325 |
return columnMap != null && !columnMap.isEmpty(); |
|
326 |
} |
|
302 | 327 |
|
303 |
entityConfigTable = IndexConfig.load(indexConf).getConfigMap(); |
|
328 |
private boolean deletedByInference(final Oaf oaf) { |
|
329 |
return oaf.getDataInfo().getDeletedbyinference(); |
|
330 |
} |
|
304 | 331 |
|
305 |
} |
|
332 |
@Override |
|
333 |
protected void cleanup(Context context) throws IOException, InterruptedException { |
|
334 |
super.cleanup(context); |
|
335 |
} |
|
306 | 336 |
|
307 |
private boolean isDedupSelf(final OafRelOrBuilder rel) {
|
|
308 |
return rel.getSource().contains(rel.getTarget());
|
|
309 |
}
|
|
337 |
public EntityConfigTable getEntityConfigTable() {
|
|
338 |
return entityConfigTable;
|
|
339 |
}
|
|
310 | 340 |
|
311 |
private boolean hasData(final Map<byte[], byte[]> columnMap) {
|
|
312 |
return columnMap != null && !columnMap.isEmpty();
|
|
313 |
}
|
|
341 |
public void setEntityConfigTable(EntityConfigTable entityConfigTable) {
|
|
342 |
this.entityConfigTable = entityConfigTable;
|
|
343 |
}
|
|
314 | 344 |
|
315 |
private boolean deletedByInference(final Oaf oaf) { |
|
316 |
return oaf.getDataInfo().getDeletedbyinference(); |
|
317 |
} |
|
318 |
|
|
319 |
@Override |
|
320 |
protected void cleanup(Context context) throws IOException, InterruptedException { |
|
321 |
super.cleanup(context); |
|
322 |
} |
|
323 |
|
|
324 |
|
|
325 |
public EntityConfigTable getEntityConfigTable() { |
|
326 |
return entityConfigTable; |
|
327 |
} |
|
328 |
|
|
329 |
public void setEntityConfigTable(EntityConfigTable entityConfigTable) { |
|
330 |
this.entityConfigTable = entityConfigTable; |
|
331 |
} |
|
332 |
|
|
333 |
|
|
334 | 345 |
} |
Also available in: Unified diff
catch parse exception to date of traformation