Project

General

Profile

1
package eu.dnetlib.msro.openaireplus.workflows.nodes.claims;
2

    
3
import java.util.List;
4

    
5
import com.googlecode.sarasvati.Arc;
6
import com.googlecode.sarasvati.NodeToken;
7
import eu.dnetlib.data.hadoop.rmi.HadoopService;
8
import eu.dnetlib.data.proto.KindProtos.Kind;
9
import eu.dnetlib.data.proto.OafProtos.Oaf;
10
import eu.dnetlib.data.proto.OafProtos.OafEntity;
11
import eu.dnetlib.data.proto.ResultProtos.Result;
12
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
13
import eu.dnetlib.data.proto.TypeProtos.Type;
14
import eu.dnetlib.msro.rmi.MSROException;
15
import org.apache.commons.codec.binary.Base64;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18
import org.apache.hadoop.util.StringUtils;
19

    
20
/**
21
 * Created by alessia on 23/10/15.
22
 */
23
public class ApplyClaimUpdatesJobNode extends AbstractClaimsToHBASE {
24

    
25
	private static final Log log = LogFactory.getLog(ApplyClaimUpdatesJobNode.class);
26

    
27
	@Override
28
	protected String execute(NodeToken token) throws Exception {
29
		//TODO: use claim.claim_date from the claim db
30
		long timestamp = System.currentTimeMillis();
31

    
32
		List<Claim> claimUpdates = this.getClaimDatabaseUtils().query(getSql());
33
		int totalClaims = 0;
34
		int totalWrites = 0;
35
		int discardedClaims = 0;
36

    
37
		HadoopService hadoopService = getServiceLocator().getService(HadoopService.class);
38

    
39
		for (Claim claim : claimUpdates) {
40
			try{
41
			log.debug(claim);
42
			totalClaims++;
43
			String contextId = claim.getSource();
44
			String rowKey = getFullId(getOpenAIREType(claim.getTargetType()), claim.getTarget());
45

    
46
			String value = getValue(contextId, rowKey, claim.getSemantics(), timestamp);
47
			hadoopService.addHBaseColumn(getClusterName(), getTableName(), rowKey, "result", "update_" + System.nanoTime(), value);
48
			totalWrites++;
49
			incrementProcessed();
50
			} catch (IllegalArgumentException e) {
51
				log.error("Discarding claim " + claim + ". Cause: " + e.getMessage());
52
				discardedClaims++;
53
			}
54
		}
55

    
56
		log.info("Total Claim Updates: " + totalClaims);
57
		token.getEnv().setAttribute("claimUpdatesSize", totalClaims);
58
		log.info("Claim updates writeOps: " + totalWrites);
59
		token.getEnv().setAttribute("claimUpdatesWriteOps", totalWrites);
60
		log.info("Discarded Claim Updates: " + discardedClaims);
61
		token.getEnv().setAttribute("discardedClaimUpdates", discardedClaims);
62

    
63
		return Arc.DEFAULT_ARC;
64
	}
65

    
66
	protected String getValue(final String sourceId, final String semantics, final String targetId, final long timestamp) throws MSROException {
67
		log.debug(StringUtils.format("%s -- %s -- %s", sourceId, semantics, targetId));
68

    
69
		Result.Builder resultBuilder = Result.newBuilder().setMetadata(Result.Metadata.newBuilder().addContext(getContext(sourceId)));
70
		OafEntity.Builder entityBuilder = OafEntity.newBuilder().setId(targetId).setType(Type.result).setResult(resultBuilder);
71
		Oaf.Builder builder = Oaf.newBuilder().setKind(Kind.entity).setLastupdatetimestamp(timestamp).setEntity(entityBuilder);
72

    
73
		return Base64.encodeBase64String(builder.build().toByteArray());
74
	}
75

    
76
	private Context getContext(final String sourceId) {
77
		return Context.newBuilder().setDataInfo(getDataInfo()).setId(sourceId).build();
78
	}
79
}
(3-3/6)