Project

General

Profile

1
package eu.dnetlib.msro.openaireplus.workflows.nodes.claims;
2

    
3
import java.util.concurrent.atomic.AtomicInteger;
4

    
5
import com.googlecode.sarasvati.Arc;
6
import com.googlecode.sarasvati.NodeToken;
7
import eu.dnetlib.data.hadoop.rmi.HadoopService;
8
import eu.dnetlib.data.proto.KindProtos.Kind;
9
import eu.dnetlib.data.proto.OafProtos.Oaf;
10
import eu.dnetlib.data.proto.OafProtos.OafEntity;
11
import eu.dnetlib.data.proto.ResultProtos.Result;
12
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
13
import eu.dnetlib.data.proto.TypeProtos.Type;
14
import eu.dnetlib.msro.rmi.MSROException;
15
import org.apache.commons.codec.binary.Base64;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18
import org.apache.hadoop.util.StringUtils;
19

    
20
/**
21
 * Created by alessia on 23/10/15.
22
 */
23
public class ApplyClaimUpdatesJobNode extends AbstractClaimsToHBASE {
24

    
25
	private static final Log log = LogFactory.getLog(ApplyClaimUpdatesJobNode.class);
26

    
27
	@Override
28
	protected String execute(NodeToken token) throws Exception {
29
		//TODO: use claim.claim_date from the claim db
30
		long timestamp = System.currentTimeMillis();
31
		setTotal(getClaimDatabaseUtils().count(getCountQuery()));
32

    
33
		final AtomicInteger discardedClaims = new AtomicInteger(0);
34
		final HadoopService hadoopService = getServiceLocator().getService(HadoopService.class);
35

    
36
		getClaimDatabaseUtils().query(getSql()).forEach(claim -> {
37
			try {
38
				log.debug(claim);
39
				String contextId = claim.getSource();
40
				String rowKey = getFullId(getOpenAIREType(claim.getTargetType()), claim.getTarget());
41

    
42
				String value = getValue(rowKey, contextId, timestamp);
43
				hadoopService.addHBaseColumn(getClusterName(), getTableName(), rowKey, "result", "update_" + System.nanoTime(), value);
44
				incrementProcessed();
45
			} catch (Exception e) {
46
				log.error("Discarding claim " + claim + ". Cause: " + e.getMessage());
47
				discardedClaims.incrementAndGet();
48
			}
49
		});
50
		log.info("Total Claim Updates: " + getTotal());
51
		token.getEnv().setAttribute("claimUpdatesSize", getTotal());
52
		log.info("Claim updates: " + getProcessed());
53
		token.getEnv().setAttribute("claimUpdates", getProcessed());
54
		log.info("Discarded Claim Updates: " + discardedClaims.intValue());
55
		token.getEnv().setAttribute("discardedClaimUpdates", discardedClaims.intValue());
56

    
57
		return Arc.DEFAULT_ARC;
58
	}
59

    
60
	protected String getValue(final String rowkey, final String contextid, final long timestamp) throws MSROException {
61
		log.debug(StringUtils.format("%s --  %s", rowkey, contextid));
62

    
63
		Result.Builder resultBuilder = Result.newBuilder().setMetadata(Result.Metadata.newBuilder().addContext(getContext(contextid)));
64
		OafEntity.Builder entityBuilder = OafEntity.newBuilder().setId(rowkey).setType(Type.result).setResult(resultBuilder);
65
		Oaf.Builder builder = Oaf.newBuilder().setKind(Kind.entity).setLastupdatetimestamp(timestamp).setEntity(entityBuilder);
66

    
67
		return Base64.encodeBase64String(builder.build().toByteArray());
68
	}
69

    
70
	private Context getContext(final String sourceId) {
71
		return Context.newBuilder().setDataInfo(getDataInfo()).setId(sourceId).build();
72
	}
73
}
(3-3/6)