Project

General

Profile

1 47679 alessia.ba
package eu.dnetlib.msro.openaireplus.workflows.nodes.claims;
2 39694 alessia.ba
3 49417 claudio.at
import java.util.concurrent.atomic.AtomicInteger;
4 39697 claudio.at
5 39694 alessia.ba
import com.googlecode.sarasvati.Arc;
6
import com.googlecode.sarasvati.NodeToken;
7
import eu.dnetlib.data.hadoop.rmi.HadoopService;
8 47679 alessia.ba
import eu.dnetlib.data.proto.KindProtos.Kind;
9
import eu.dnetlib.data.proto.OafProtos.Oaf;
10 48027 alessia.ba
import eu.dnetlib.data.proto.OafProtos.OafEntity;
11
import eu.dnetlib.data.proto.ResultProtos.Result;
12
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
13
import eu.dnetlib.data.proto.TypeProtos.Type;
14 47679 alessia.ba
import eu.dnetlib.msro.rmi.MSROException;
15
import org.apache.commons.codec.binary.Base64;
16 39697 claudio.at
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18 47679 alessia.ba
import org.apache.hadoop.util.StringUtils;
19 39694 alessia.ba
20
/**
21
 * Created by alessia on 23/10/15.
22
 */
23 48027 alessia.ba
public class ApplyClaimUpdatesJobNode extends AbstractClaimsToHBASE {
24 39697 claudio.at
25 47693 alessia.ba
	private static final Log log = LogFactory.getLog(ApplyClaimUpdatesJobNode.class);
26 39697 claudio.at
27 47679 alessia.ba
	@Override
28
	protected String execute(NodeToken token) throws Exception {
29
		//TODO: use claim.claim_date from the claim db
30
		long timestamp = System.currentTimeMillis();
31 48047 alessia.ba
		setTotal(getClaimDatabaseUtils().count(getCountQuery()));
32 41177 claudio.at
33 49417 claudio.at
		final AtomicInteger discardedClaims = new AtomicInteger(0);
34
		final HadoopService hadoopService = getServiceLocator().getService(HadoopService.class);
35 39694 alessia.ba
36 49417 claudio.at
		getClaimDatabaseUtils().query(getSql()).forEach(claim -> {
37 48139 alessia.ba
			try {
38
				log.debug(claim);
39
				String contextId = claim.getSource();
40
				String rowKey = getFullId(getOpenAIREType(claim.getTargetType()), claim.getTarget());
41 47693 alessia.ba
42 48139 alessia.ba
				String value = getValue(rowKey, contextId, timestamp);
43
				hadoopService.addHBaseColumn(getClusterName(), getTableName(), rowKey, "result", "update_" + System.nanoTime(), value);
44
				incrementProcessed();
45 49417 claudio.at
			} catch (Exception e) {
46 48027 alessia.ba
				log.error("Discarding claim " + claim + ". Cause: " + e.getMessage());
47 49417 claudio.at
				discardedClaims.incrementAndGet();
48 48027 alessia.ba
			}
49 49417 claudio.at
		});
50 48047 alessia.ba
		log.info("Total Claim Updates: " + getTotal());
51
		token.getEnv().setAttribute("claimUpdatesSize", getTotal());
52
		log.info("Claim updates: " + getProcessed());
53
		token.getEnv().setAttribute("claimUpdates", getProcessed());
54 49417 claudio.at
		log.info("Discarded Claim Updates: " + discardedClaims.intValue());
55
		token.getEnv().setAttribute("discardedClaimUpdates", discardedClaims.intValue());
56 39697 claudio.at
57 47679 alessia.ba
		return Arc.DEFAULT_ARC;
58
	}
59 39697 claudio.at
60 48042 alessia.ba
	protected String getValue(final String rowkey, final String contextid, final long timestamp) throws MSROException {
61
		log.debug(StringUtils.format("%s --  %s", rowkey, contextid));
62 41177 claudio.at
63 48042 alessia.ba
		Result.Builder resultBuilder = Result.newBuilder().setMetadata(Result.Metadata.newBuilder().addContext(getContext(contextid)));
64
		OafEntity.Builder entityBuilder = OafEntity.newBuilder().setId(rowkey).setType(Type.result).setResult(resultBuilder);
65 48027 alessia.ba
		Oaf.Builder builder = Oaf.newBuilder().setKind(Kind.entity).setLastupdatetimestamp(timestamp).setEntity(entityBuilder);
66 39697 claudio.at
67 47679 alessia.ba
		return Base64.encodeBase64String(builder.build().toByteArray());
68 41177 claudio.at
	}
69
70 48027 alessia.ba
	private Context getContext(final String sourceId) {
71 53132 alessia.ba
		return Context.newBuilder().addDataInfo(getDataInfo()).setId(sourceId).build();
72 39710 claudio.at
	}
73 39694 alessia.ba
}