Project

General

Profile

« Previous | Next » 

Revision 63100

Added by Claudio Atzori 9 months ago

[maven-release-plugin] copy for tag dnet-deduplication-2.0.1

View differences:

modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/DefineHBaseOpenaireSchemaJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop.hbase;
2

  
3
import java.util.Set;
4

  
5
import com.googlecode.sarasvati.Arc;
6
import com.googlecode.sarasvati.NodeToken;
7
import eu.dnetlib.openaire.hadoop.utils.HBaseTableUtils;
8
import org.apache.commons.lang3.StringUtils;
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11

  
12
public class DefineHBaseOpenaireSchemaJobNode extends AbstractHBaseAdminJobNode {
13

  
14
	private static final Log log = LogFactory.getLog(DefineHBaseOpenaireSchemaJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
15

  
16
	private String schema;
17

  
18
	@Override
19
	protected String execute(final NodeToken token) throws Exception {
20

  
21
		final String schemaOverride = StringUtils.isNotBlank(getSchema()) ? getSchema() : asCSV(HBaseTableUtils.listAllColumns());
22
		log.info("table definition: " + schemaOverride);
23
		token.getEnv().setAttribute(getTableColumnsParamName(), schemaOverride);
24

  
25
		return Arc.DEFAULT_ARC;
26
	}
27

  
28
	public String getSchema() {
29
		return schema;
30
	}
31

  
32
	public void setSchema(final String schema) {
33
		this.schema = schema;
34
	}
35
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/DropHBaseTableJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop.hbase;
2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5

  
6
import com.googlecode.sarasvati.Arc;
7
import com.googlecode.sarasvati.NodeToken;
8

  
9
import eu.dnetlib.data.hadoop.rmi.HadoopService;
10

  
11
/**
12
 * The Class DropHBaseTableJobNode.
13
 */
14
public class DropHBaseTableJobNode extends AbstractHBaseAdminJobNode {
15

  
16
	/** The Constant log. */
17
	private static final Log log = LogFactory.getLog(DropHBaseTableJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
18

  
19
	/*
20
	 * (non-Javadoc)
21
	 * 
22
	 * @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken)
23
	 */
24
	@Override
25
	protected String execute(final NodeToken token) throws Exception {
26

  
27
		final String tableName = tableName(token);
28
		final String cluster = cluster(token);
29

  
30
		log.info("Dropping hbase table '" + tableName + "' on cluster: '" + cluster + "'");
31

  
32
		getServiceLocator().getService(HadoopService.class).dropHbaseTable(cluster, tableName);
33

  
34
		return Arc.DEFAULT_ARC;
35
	}
36

  
37
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/PrepareMDStoreImportJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop;
2

  
3
import java.io.IOException;
4

  
5
import com.googlecode.sarasvati.Arc;
6
import com.googlecode.sarasvati.NodeToken;
7
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
8
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
9
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
10
import eu.dnetlib.msro.rmi.MSROException;
11
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
12
import org.apache.commons.lang.StringUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15
import org.springframework.beans.factory.annotation.Autowired;
16

  
17
public class PrepareMDStoreImportJobNode extends SimpleJobNode {
18

  
19
	private static final Log log = LogFactory.getLog(PrepareMDStoreImportJobNode.class);
20

  
21
	@Autowired
22
	private UniqueServiceLocator serviceLocator;
23

  
24
	private String hdfsPathParam;
25

  
26
	private String hdfsPath;
27

  
28
	private String mappingParam;
29

  
30
	private String mapping;
31

  
32
	@Override
33
	protected String execute(final NodeToken token) throws Exception {
34

  
35
		token.getEnv().setAttribute(getHdfsPathParam(), getHdfsPath());
36
		token.getEnv().setAttribute(getMappingParam(), readXslt(getMapping()));
37

  
38
		return Arc.DEFAULT_ARC;
39
	}
40

  
41
	private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException {
42
		if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id");
43

  
44
		log.info("loading mapping from profile id: " + profileId);
45

  
46
		final String xquery =
47
				String.format("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='%s']/BODY/CONFIGURATION/SCRIPT/CODE/*[local-name()='stylesheet']", profileId);
48
		return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery);
49
	}
50

  
51
	public String getHdfsPathParam() {
52
		return hdfsPathParam;
53
	}
54

  
55
	public void setHdfsPathParam(final String hdfsPathParam) {
56
		this.hdfsPathParam = hdfsPathParam;
57
	}
58

  
59
	public String getHdfsPath() {
60
		return hdfsPath;
61
	}
62

  
63
	public void setHdfsPath(final String hdfsPath) {
64
		this.hdfsPath = hdfsPath;
65
	}
66

  
67
	public String getMapping() {
68
		return mapping;
69
	}
70

  
71
	public void setMapping(final String mapping) {
72
		this.mapping = mapping;
73
	}
74

  
75
	public String getMappingParam() {
76
		return mappingParam;
77
	}
78

  
79
	public void setMappingParam(final String mappingParam) {
80
		this.mappingParam = mappingParam;
81
	}
82

  
83
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/AbstractHBaseJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop.hbase;
2

  
3
import java.io.IOException;
4
import java.util.Map;
5

  
6
import com.googlecode.sarasvati.Engine;
7
import com.googlecode.sarasvati.NodeToken;
8
import com.googlecode.sarasvati.env.Env;
9
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions;
10
import eu.dnetlib.data.hadoop.rmi.HadoopService;
11
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
12
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
13
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
14
import eu.dnetlib.enabling.resultset.rmi.ResultSetException;
15
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
16
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
17
import eu.dnetlib.msro.rmi.MSROException;
18
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode;
19
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode;
20
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener;
21
import eu.dnetlib.msro.workflows.resultset.ProcessCountingResultSetFactory;
22
import eu.dnetlib.msro.workflows.util.ProgressProvider;
23
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider;
24
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
25
import org.apache.commons.lang.StringUtils;
26
import org.apache.commons.logging.Log;
27
import org.apache.commons.logging.LogFactory;
28
import org.springframework.beans.factory.annotation.Autowired;
29
import org.springframework.beans.factory.annotation.Required;
30

  
31
/**
32
 * Created by claudio on 08/04/16.
33
 */
34
public abstract class AbstractHBaseJobNode extends BlackboardJobNode implements ProgressJobNode {
35

  
36
	private static final Log log = LogFactory.getLog(StoreHBaseRecordsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
37

  
38
	private final String INPUT_HBASE_TABLE_PARAM = "hbaseTable";
39
	private final String INPUT_EPR_PARAM = "input_epr";
40
	private final String INPUT_CLUSTER_PARAM = "cluster";
41

  
42
	private final String XSLT_PARAM = "xslt";
43

  
44
	private final String OUTPUT_HBASE_TABLE_PARAM = "table";
45
	private final String OUTPUT_CLUSTER_PARAM = "cluster";
46
	private final String SIMULATION_PARAM = "simulation";
47

  
48
	@Autowired
49
	protected UniqueServiceLocator serviceLocator;
50

  
51
	protected String inputEprParam;
52
	protected String hbaseTableProperty;
53
	protected String cluster;
54
	protected String mapping;
55

  
56
	protected boolean simulation = false;
57

  
58
	protected ProgressProvider progressProvider;
59

  
60
	protected ProcessCountingResultSetFactory processCountingResultSetFactory;
61

  
62
	protected abstract HadoopBlackboardActions getAction();
63

  
64
	@Override
65
	protected String obtainServiceId(final NodeToken token) {
66
		return getServiceLocator().getServiceId(HadoopService.class);
67
	}
68

  
69
	@Override
70
	protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception {
71
		log.info("Invoking blackboard method: " + getAction().toString());
72

  
73
		job.setAction(getAction().toString());
74
		job.getParameters().put(INPUT_EPR_PARAM, DnetXsltFunctions.encodeBase64(prepareEpr(token)));
75
		job.getParameters().put(XSLT_PARAM, DnetXsltFunctions.encodeBase64(readXslt(getMapping())));
76
		job.getParameters().put(OUTPUT_HBASE_TABLE_PARAM, tableName(token));
77
		job.getParameters().put(OUTPUT_CLUSTER_PARAM, cluster(token));
78
		job.getParameters().put(SIMULATION_PARAM, String.valueOf(isSimulation()));
79
	}
80

  
81
	@Override
82
	protected BlackboardWorkflowJobListener generateBlackboardListener(final Engine engine, final NodeToken token) {
83
		return new BlackboardWorkflowJobListener(engine, token) {
84

  
85
			@Override
86
			protected void populateEnv(final Env env, final Map<String, String> responseParams) {
87
				final String count = responseParams.get("count");
88
				log.info(String.format("%s %s objects to HBase table %s, cluster %s", getAction().toString(), count, tableName(token), cluster(token)));
89
				env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + getName() + ":count", count);
90
			}
91
		};
92
	}
93

  
94
	protected String tableName(final NodeToken token) {
95
		if (token.getEnv().hasAttribute(INPUT_HBASE_TABLE_PARAM)) {
96
			String table = token.getEnv().getAttribute(INPUT_HBASE_TABLE_PARAM);
97
			log.debug("found override value in wfEnv for 'hbaseTable' param: " + table);
98
			return table;
99
		}
100
		return getPropertyFetcher().getProperty(getHbaseTableProperty());
101
	}
102

  
103
	protected String cluster(final NodeToken token) {
104
		if (token.getEnv().hasAttribute(INPUT_CLUSTER_PARAM)) {
105
			String cluster = token.getEnv().getAttribute("cluster");
106
			log.debug("found override value in wfEnv for 'cluster' param: " + cluster);
107
			return cluster;
108
		}
109
		return getCluster();
110
	}
111

  
112
	private String prepareEpr(final NodeToken token) throws ResultSetException {
113
		final String epr = token.getEnv().getAttribute(inputEprParam);
114
		final ResultsetProgressProvider resultsetProgressProvider = processCountingResultSetFactory.createProgressProvider(token.getProcess(), epr);
115

  
116
		setProgressProvider(resultsetProgressProvider);
117

  
118
		return resultsetProgressProvider.getEpr().toString();
119
	}
120

  
121
	private String readXslt(final String profileId) throws IOException, MSROException, ISLookUpException {
122
		if (StringUtils.isBlank(profileId)) throw new MSROException("missing profile id");
123

  
124
		log.info("loading mapping from profile id: " + profileId);
125

  
126
		final String xquery =
127
				String.format("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='%s']/BODY/CONFIGURATION/SCRIPT/CODE/*[local-name()='stylesheet']", profileId);
128
		return serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery);
129
	}
130

  
131
	public String getInputEprParam() {
132
		return inputEprParam;
133
	}
134

  
135
	public void setInputEprParam(final String inputEprParam) {
136
		this.inputEprParam = inputEprParam;
137
	}
138

  
139
	public String getHbaseTableProperty() {
140
		return hbaseTableProperty;
141
	}
142

  
143
	public void setHbaseTableProperty(final String hbaseTableProperty) {
144
		this.hbaseTableProperty = hbaseTableProperty;
145
	}
146

  
147
	@Override
148
	public ProgressProvider getProgressProvider() {
149
		return progressProvider;
150
	}
151

  
152
	public void setProgressProvider(final ProgressProvider progressProvider) {
153
		this.progressProvider = progressProvider;
154
	}
155

  
156
	public ProcessCountingResultSetFactory getProcessCountingResultSetFactory() {
157
		return processCountingResultSetFactory;
158
	}
159

  
160
	@Required
161
	public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) {
162
		this.processCountingResultSetFactory = processCountingResultSetFactory;
163
	}
164

  
165
	public String getMapping() {
166
		return mapping;
167
	}
168

  
169
	public void setMapping(final String mapping) {
170
		this.mapping = mapping;
171
	}
172

  
173
	public String getCluster() {
174
		return cluster;
175
	}
176

  
177
	public void setCluster(final String cluster) {
178
		this.cluster = cluster;
179
	}
180

  
181
	public boolean isSimulation() {
182
		return simulation;
183
	}
184

  
185
	public void setSimulation(final boolean simulation) {
186
		this.simulation = simulation;
187
	}
188

  
189
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/ExistHBaseTableJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop.hbase;
2

  
3
import com.googlecode.sarasvati.NodeToken;
4
import eu.dnetlib.data.hadoop.rmi.HadoopService;
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7

  
8
public class ExistHBaseTableJobNode extends AbstractHBaseAdminJobNode {
9

  
10
	private static final Log log = LogFactory.getLog(ExistHBaseTableJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
11

  
12
	private String existOutNode;
13

  
14
	private String dontExistOutNode;
15

  
16
	@Override
17
	protected String execute(final NodeToken token) throws Exception {
18
		final String tableName = tableName(token);
19
		final String cluster = cluster(token);
20

  
21
		log.info("checking table existance: '" + tableName + "' on cluster: '" + cluster + "'");
22

  
23
		final HadoopService hadoopService = getServiceLocator().getService(HadoopService.class);
24
		boolean exists = hadoopService.existHbaseTable(cluster, tableName);
25

  
26
		log.info("table '" + tableName + "' exists: " + exists);
27

  
28
		if (exists) {
29
			final String tableDesc = hadoopService.describeHBaseTableConfiguration(cluster, tableName);
30
			token.getEnv().setAttribute(getTableConfigurationParamName(), tableDesc);
31
		}
32

  
33
		return exists ? getExistOutNode() : getDontExistOutNode();
34
	}
35

  
36
	public String getExistOutNode() {
37
		return existOutNode;
38
	}
39

  
40
	public void setExistOutNode(final String existOutNode) {
41
		this.existOutNode = existOutNode;
42
	}
43

  
44
	public String getDontExistOutNode() {
45
		return dontExistOutNode;
46
	}
47

  
48
	public void setDontExistOutNode(final String dontExistOutNode) {
49
		this.dontExistOutNode = dontExistOutNode;
50
	}
51

  
52
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-deduplication</artifactId>
12
	<packaging>jar</packaging>
13
	<version>2.0.1</version>
14
	<scm>
15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-deduplication/tags/dnet-deduplication-2.0.1</developerConnection>
16
	</scm>
17
	<dependencies>
18
		<dependency>
19
			<groupId>eu.dnetlib</groupId>
20
			<artifactId>dnet-msro-service</artifactId>
21
			<version>[4.0.0,5.0.0)</version>
22
		</dependency>
23
		<dependency>
24
			<groupId>eu.dnetlib</groupId>
25
			<artifactId>dnet-hadoop-service-rmi</artifactId>
26
			<version>[1.0.0,2.0.0)</version>
27
		</dependency>
28
		<dependency>
29
			<groupId>eu.dnetlib</groupId>
30
			<artifactId>dnet-actionmanager-api</artifactId>
31
			<version>[4.0.0,5.0.0)</version>
32
		</dependency>
33
		<dependency>
34
			<groupId>eu.dnetlib</groupId>
35
			<artifactId>dnet-modular-ui</artifactId>
36
			<version>[3.0.0,4.0.0)</version>
37
		</dependency>
38

  
39
		<dependency>
40
			<groupId>eu.dnetlib</groupId>
41
			<artifactId>dnet-index-client</artifactId>
42
			<version>[3.0.0,4.0.0)</version>
43
		</dependency>
44

  
45
		<dependency>
46
			<groupId>eu.dnetlib</groupId>
47
			<artifactId>dnet-openaireplus-mapping-utils</artifactId>
48
			<version>[7.0.0,8.0.0)</version>
49
			<exclusions>
50
				<exclusion>
51
					<groupId>eu.dnetlib</groupId>
52
					<artifactId>dnet-hadoop-commons</artifactId>
53
				</exclusion>
54
			</exclusions>
55
		</dependency>
56

  
57

  
58
		<dependency>
59
			<groupId>javax.servlet</groupId>
60
			<artifactId>javax.servlet-api</artifactId>
61
			<version>${javax.servlet.version}</version>
62
			<scope>provided</scope>
63
		</dependency>
64
		<dependency>
65
			<groupId>com.fasterxml.jackson.core</groupId>
66
			<artifactId>jackson-databind</artifactId>
67
			<version>${jackson.version}</version>
68
		</dependency>
69
		<dependency>
70
			<groupId>com.google.guava</groupId>
71
			<artifactId>guava</artifactId>
72
			<version>${google.guava.version}</version>
73
		</dependency>
74

  
75
		<dependency>
76
			<groupId>junit</groupId>
77
			<artifactId>junit</artifactId>
78
			<version>${junit.version}</version>
79
			<scope>test</scope>
80
		</dependency>
81

  
82
	</dependencies>
83
</project>
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/AbstractHBaseAdminJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop.hbase;
2

  
3
import java.util.Map.Entry;
4
import java.util.Set;
5
import javax.annotation.Resource;
6

  
7
import com.google.common.base.Joiner;
8
import com.google.common.base.Splitter;
9
import com.google.common.collect.Sets;
10
import com.googlecode.sarasvati.NodeToken;
11
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
12
import eu.dnetlib.msro.rmi.MSROException;
13
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
14
import org.apache.commons.lang.StringUtils;
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17

  
18
public abstract class AbstractHBaseAdminJobNode extends SimpleJobNode {
19

  
20
	/** The Constant log. */
21
	private static final Log log = LogFactory.getLog(AbstractHBaseAdminJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
22

  
23
	private String tableColumnsParamName = "columns";
24
	private String tableConfigurationParamName = "tableConf";
25
	private String hbaseTableProperty;
26
	private String cluster;
27

  
28
	@Resource
29
	private UniqueServiceLocator serviceLocator;
30

  
31
	@Override
32
	protected void beforeStart(final NodeToken token) {
33
		for (Entry<String, String> e : parseJsonParameters(token).entrySet()) {
34
			token.getEnv().setAttribute(e.getKey(), e.getValue());
35
		}
36
	}
37

  
38
	protected String tableName(final NodeToken token) {
39
		if (token.getEnv().hasAttribute("hbaseTable")) {
40
			String table = token.getEnv().getAttribute("hbaseTable");
41
			log.debug("found override value in wfEnv for 'hbaseTable' param: " + table);
42
			return table;
43
		}
44
		return getPropertyFetcher().getProperty(getHbaseTableProperty());
45
	}
46

  
47
	protected String cluster(final NodeToken token) {
48
		if (token.getEnv().hasAttribute("cluster")) {
49
			String cluster = token.getEnv().getAttribute("cluster");
50
			log.debug("found override value in wfEnv for 'cluster' param: " + cluster);
51
			return cluster;
52
		}
53
		return getCluster();
54
	}
55

  
56
	protected Set<String> getColumns(final NodeToken token) throws MSROException {
57
		String envCols = token.getEnv().getAttribute(getTableColumnsParamName());
58
		if (StringUtils.isBlank(envCols)) { throw new MSROException("cannot find table description"); }
59
		log.debug("using columns from env: " + envCols);
60
		return Sets.newHashSet(Splitter.on(",").omitEmptyStrings().split(envCols));
61
	}
62

  
63
	protected String asCSV(final Iterable<String> columns) {
64
		return Joiner.on(",").skipNulls().join(columns);
65
	}
66

  
67
	public String getCluster() {
68
		return cluster;
69
	}
70

  
71
	public void setCluster(final String cluster) {
72
		this.cluster = cluster;
73
	}
74

  
75
	public String getHbaseTableProperty() {
76
		return hbaseTableProperty;
77
	}
78

  
79
	public void setHbaseTableProperty(final String hbaseTableProperty) {
80
		this.hbaseTableProperty = hbaseTableProperty;
81
	}
82

  
83
	public String getTableColumnsParamName() {
84
		return tableColumnsParamName;
85
	}
86

  
87
	public void setTableColumnsParamName(final String tableColumnsParamName) {
88
		this.tableColumnsParamName = tableColumnsParamName;
89
	}
90

  
91
	public UniqueServiceLocator getServiceLocator() {
92
		return serviceLocator;
93
	}
94

  
95
	public String getTableConfigurationParamName() {
96
		return tableConfigurationParamName;
97
	}
98

  
99
	public void setTableConfigurationParamName(final String tableConfigurationParamName) {
100
		this.tableConfigurationParamName = tableConfigurationParamName;
101
	}
102
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/CreateHdfsDirectoryJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop;
2

  
3
import com.googlecode.sarasvati.NodeToken;
4
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions;
5
import eu.dnetlib.data.hadoop.rmi.HadoopService;
6
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
7
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode;
8

  
9
public class CreateHdfsDirectoryJobNode extends BlackboardJobNode {
10

  
11
	private String cluster;
12

  
13
	private boolean force = false;
14

  
15
	@Override
16
	protected String obtainServiceId(final NodeToken token) {
17
		return getServiceLocator().getServiceId(HadoopService.class);
18
	}
19

  
20
	@Override
21
	protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception {
22

  
23
		job.setAction(HadoopBlackboardActions.CREATE_HDFS_DIR.toString());
24
		job.getParameters().put("cluster", getCluster());
25
		job.getParameters().put("force", String.valueOf(isForce()));
26

  
27
		// The "path" parameter is set by the following call
28
		job.getParameters().putAll(parseJsonParameters(token));
29
	}
30

  
31
	public String getCluster() {
32
		return cluster;
33
	}
34

  
35
	public void setCluster(final String cluster) {
36
		this.cluster = cluster;
37
	}
38

  
39
	public boolean isForce() {
40
		return force;
41
	}
42

  
43
	public void setForce(boolean force) {
44
		this.force = force;
45
	}
46
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/SetClusterAndTableJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop;
2

  
3
import javax.annotation.Resource;
4

  
5
import org.apache.commons.lang.StringUtils;
6
import org.springframework.beans.factory.annotation.Value;
7

  
8
import com.googlecode.sarasvati.Arc;
9
import com.googlecode.sarasvati.NodeToken;
10

  
11
import eu.dnetlib.data.hadoop.rmi.HadoopService;
12
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
13
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
14

  
15
/**
16
 * The Class SetClusterAndTableJobNode.
17
 */
18
public class SetClusterAndTableJobNode extends AsyncJobNode {
19

  
20
	/** The cluster. */
21
	private String cluster;
22

  
23
	/** The table. */
24
	@Value("${hbase.mapred.datatable}")
25
	private String table;
26

  
27
	/** The table param. */
28
	private String tableParam;
29

  
30
	/** The service locator. */
31
	@Resource
32
	private UniqueServiceLocator serviceLocator;
33

  
34
	/*
35
	 * (non-Javadoc)
36
	 * 
37
	 * @see eu.dnetlib.msro.workflows.nodes.AsyncJobNode#execute(com.googlecode.sarasvati.NodeToken)
38
	 */
39
	@Override
40
	protected String execute(final NodeToken token) throws Exception {
41

  
42
		if (StringUtils.isBlank(getCluster())) throw new IllegalArgumentException("missing cluster name parameter");
43
		if (StringUtils.isBlank(getTable())) throw new IllegalArgumentException("missing table name parameter");
44
		if (!serviceLocator.getService(HadoopService.class).existHbaseTable(getCluster(), getTable()))
45
			throw new IllegalArgumentException(String.format("unexisting table %s on cluster %s", getTable(), getCluster()));
46

  
47
		token.getEnv().setAttribute("cluster", getCluster());
48
		token.getEnv().setAttribute(getTableParam(), getTable());
49

  
50
		return Arc.DEFAULT_ARC;
51
	}
52

  
53
	/**
54
	 * Gets the cluster.
55
	 *
56
	 * @return the cluster
57
	 */
58
	public String getCluster() {
59
		return cluster;
60
	}
61

  
62
	/**
63
	 * Sets the cluster.
64
	 *
65
	 * @param cluster
66
	 *            the new cluster
67
	 */
68
	public void setCluster(final String cluster) {
69
		this.cluster = cluster;
70
	}
71

  
72
	/**
73
	 * Gets the table param.
74
	 *
75
	 * @return the table param
76
	 */
77
	public String getTableParam() {
78
		return tableParam;
79
	}
80

  
81
	/**
82
	 * Sets the table param.
83
	 *
84
	 * @param tableParam
85
	 *            the new table param
86
	 */
87
	public void setTableParam(final String tableParam) {
88
		this.tableParam = tableParam;
89
	}
90

  
91
	public String getTable() {
92
		return table;
93
	}
94

  
95
	public void setTable(String table) {
96
		this.table = table;
97
	}
98

  
99
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/test/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestrationTest.java
1
package eu.dnetlib.msro.workflows.dedup.conf;
2

  
3
import static org.junit.Assert.assertNotNull;
4
import static org.junit.Assert.assertTrue;
5

  
6
import java.io.IOException;
7
import java.nio.charset.Charset;
8
import java.util.Queue;
9

  
10
import org.apache.commons.io.IOUtils;
11
import org.junit.Before;
12
import org.junit.Test;
13
import org.junit.Ignore;
14

  
15
import com.google.common.collect.Lists;
16

  
17
import eu.dnetlib.pace.config.DedupConfig;
18

  
19
@Ignore
20
public class DedupConfigurationOrchestrationTest {
21

  
22
	public DedupConfigurationOrchestration dco;
23

  
24
	@Before
25
	public void setUp() throws IOException {
26

  
27
		final Entity e = new Entity("result", "50", "Publication");
28

  
29
		final String actionSetId = "001";
30
		final Queue<DedupConfig> configurations = Lists.newLinkedList();
31

  
32
		configurations.add(DedupConfig.loadDefault());
33

  
34
		dco = new DedupConfigurationOrchestration(e, actionSetId, configurations);
35
		assertNotNull(dco);
36
		assertNotNull(dco.getActionSetId());
37
		assertNotNull(dco.getEntity());
38
		assertNotNull(dco.getConfigurations());
39
	}
40

  
41
	@Test
42
	public void testSerialization() {
43

  
44
		final String json = dco.toString();
45
		final DedupConfigurationOrchestration anotherDco = DedupConfigurationOrchestration.fromJSON(json);
46
		assertNotNull(anotherDco);
47
		assertTrue(json.equals(anotherDco.toString()));
48
	}
49

  
50
	@Test
51
	public void testSerializationOrgs() throws IOException {
52

  
53
		final Entity e = new Entity("organization", "20", "Organization");
54

  
55
		final String actionSetId = "001";
56
		final Queue<DedupConfig> configurations = Lists.newLinkedList();
57

  
58
		configurations.add(DedupConfig.load(IOUtils.toString(getClass().getResourceAsStream("organisation.conf.json"), Charset.forName("UTF-8"))));
59
		dco = new DedupConfigurationOrchestration(e, actionSetId, configurations);
60

  
61
		System.out.println(dco.toString());
62

  
63
	}
64
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/StoreHdfsRecordsJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop;
2

  
3
import java.util.Map;
4

  
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7
import org.springframework.beans.factory.annotation.Required;
8

  
9
import com.googlecode.sarasvati.Engine;
10
import com.googlecode.sarasvati.NodeToken;
11
import com.googlecode.sarasvati.env.Env;
12

  
13
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions;
14
import eu.dnetlib.data.hadoop.rmi.HadoopService;
15
import eu.dnetlib.enabling.resultset.rmi.ResultSetException;
16
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
17
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
18
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode;
19
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode;
20
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener;
21
import eu.dnetlib.msro.workflows.resultset.ProcessCountingResultSetFactory;
22
import eu.dnetlib.msro.workflows.util.ProgressProvider;
23
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider;
24
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
25

  
26
public class StoreHdfsRecordsJobNode extends BlackboardJobNode implements ProgressJobNode {
27

  
28
	private static final Log log = LogFactory.getLog(StoreHdfsRecordsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
29

  
30
	private String inputEprParam;
31
	private String hdfsPathParam;
32
	private String cluster;
33

  
34
	private ProgressProvider progressProvider;
35

  
36
	private ProcessCountingResultSetFactory processCountingResultSetFactory;
37

  
38
	@Override
39
	protected String obtainServiceId(final NodeToken token) {
40
		return getServiceLocator().getServiceId(HadoopService.class);
41
	}
42

  
43
	@Override
44
	protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception {
45
		log.info("Invoking blackboard method");
46

  
47
		job.setAction(HadoopBlackboardActions.IMPORT_EPR_HDFS.toString());
48
		job.getParameters().put("input_epr", DnetXsltFunctions.encodeBase64(prepareEpr(token)));
49
		job.getParameters().put("path", token.getEnv().getAttribute(getHdfsPathParam()));
50
		job.getParameters().put("cluster", getCluster());
51
	}
52

  
53
	@Override
54
	protected BlackboardWorkflowJobListener generateBlackboardListener(final Engine engine, final NodeToken token) {
55
		return new BlackboardWorkflowJobListener(engine, token) {
56

  
57
			@Override
58
			protected void populateEnv(final Env env, final Map<String, String> responseParams) {
59
				env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + getName() + ":count", responseParams.get("count"));
60
			}
61
		};
62
	}
63

  
64
	private String prepareEpr(final NodeToken token) throws ResultSetException {
65
		final String epr = token.getEnv().getAttribute(inputEprParam);
66
		final ResultsetProgressProvider resultsetProgressProvider = processCountingResultSetFactory.createProgressProvider(token.getProcess(), epr);
67

  
68
		setProgressProvider(resultsetProgressProvider);
69

  
70
		return resultsetProgressProvider.getEpr().toString();
71
	}
72

  
73
	public String getInputEprParam() {
74
		return inputEprParam;
75
	}
76

  
77
	public void setInputEprParam(final String inputEprParam) {
78
		this.inputEprParam = inputEprParam;
79
	}
80

  
81
	@Required
82
	public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) {
83
		this.processCountingResultSetFactory = processCountingResultSetFactory;
84
	}
85

  
86
	@Override
87
	public ProgressProvider getProgressProvider() {
88
		return progressProvider;
89
	}
90

  
91
	public void setProgressProvider(final ProgressProvider progressProvider) {
92
		this.progressProvider = progressProvider;
93
	}
94

  
95
	public ProcessCountingResultSetFactory getProcessCountingResultSetFactory() {
96
		return processCountingResultSetFactory;
97
	}
98

  
99
	public String getCluster() {
100
		return cluster;
101
	}
102

  
103
	public void setCluster(final String cluster) {
104
		this.cluster = cluster;
105
	}
106

  
107
	public String getHdfsPathParam() {
108
		return hdfsPathParam;
109
	}
110

  
111
	public void setHdfsPathParam(final String hdfsPathParam) {
112
		this.hdfsPathParam = hdfsPathParam;
113
	}
114

  
115
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/test/java/eu/dnetlib/msro/workflows/dedup/SerializationTest.java
1
package eu.dnetlib.msro.workflows.dedup;
2

  
3
import java.io.IOException;
4

  
5
import com.google.common.collect.Iterables;
6
import com.googlecode.protobuf.format.JsonFormat;
7
import eu.dnetlib.data.proto.OafProtos.Oaf;
8
import org.apache.commons.codec.binary.Base64;
9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.junit.Test;
13

  
14
/**
15
 * Created by claudio on 05/04/16.
16
 */
17
public class SerializationTest {
18

  
19
	private static final Log log = LogFactory.getLog(SerializationTest.class);
20

  
21
	@Test
22
	public void test() throws IOException {
23

  
24
		final String data = Iterables.getFirst(IOUtils.readLines(getClass().getResourceAsStream("oaf_data.base64")), "");
25

  
26
		final byte[] oafBytes = Base64.decodeBase64(data);
27

  
28
		Oaf oaf = Oaf.parseFrom(oafBytes);
29

  
30
		JsonFormat jsonFormat = new JsonFormat();
31
		String asJson = jsonFormat.printToString(oaf);
32

  
33
		log.info(asJson);
34

  
35
	}
36

  
37
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/GetHBaseTableDescriptionJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop.hbase;
2

  
3
import java.util.List;
4

  
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7

  
8
import com.googlecode.sarasvati.Arc;
9
import com.googlecode.sarasvati.NodeToken;
10

  
11
import eu.dnetlib.data.hadoop.rmi.HadoopService;
12

  
13
public class GetHBaseTableDescriptionJobNode extends AbstractHBaseAdminJobNode {
14

  
15
	private static final Log log = LogFactory.getLog(GetHBaseTableDescriptionJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
16

  
17
	@Override
18
	protected String execute(final NodeToken token) throws Exception {
19
		final String tableName = tableName(token);
20
		final String cluster = cluster(token);
21

  
22
		log.info("getting table description: '" + tableName + "' on cluster: '" + cluster + "'");
23

  
24
		final List<String> columns = getServiceLocator().getService(HadoopService.class).describeHbaseTable(cluster, tableName);
25
		log.debug(String.format("table '%s': " + columns, tableName));
26
		token.getEnv().setAttribute(getTableColumnsParamName(), asCSV(columns));
27

  
28
		return Arc.DEFAULT_ARC;
29
	}
30

  
31
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/test/java/eu/dnetlib/msro/workflows/dedup/SimilarityMeshBuilderTest.java
1
package eu.dnetlib.msro.workflows.dedup;
2

  
3
import java.util.List;
4

  
5
import org.junit.Before;
6
import org.junit.Test;
7

  
8
import com.google.common.collect.Lists;
9

  
10
import eu.dnetlib.data.proto.TypeProtos.Type;
11
import eu.dnetlib.msro.workflows.hadoop.utils.Similarity;
12
import eu.dnetlib.msro.workflows.hadoop.utils.SimilarityMeshBuilder;
13

  
14
public class SimilarityMeshBuilderTest {
15

  
16
	private List<String> list;
17

  
18
	@Before
19
	public void setUp() throws Exception {
20
		list = Lists.newArrayList();
21
		for (int i = 0; i < 10; i++) {
22
			list.add(i + "");
23
		}
24
	}
25

  
26
	@Test
27
	public void test() {
28
		final List<Similarity> combinations = SimilarityMeshBuilder.build(Type.result, list);
29

  
30
		System.out.println(combinations);
31
		System.out.println(combinations.size());
32

  
33
	}
34

  
35
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/test/java/eu/dnetlib/data/dedup/DedupInspectorFunctionsTest.java
1
package eu.dnetlib.data.dedup;
2

  
3
import com.google.common.collect.Lists;
4
import eu.dnetlib.data.proto.OafProtos.Oaf;
5
import com.google.common.base.Function;
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.junit.Before;
9
import org.junit.Test;
10

  
11
import java.util.Map;
12

  
13
public class DedupInspectorFunctionsTest {
14

  
15
    private static final Log log = LogFactory.getLog(DedupInspectorFunctionsTest.class);
16

  
17
    private String orgFromIndex = "<record>CAESigMIFCK7ARK4ARIlCiNJbnRlcm5hdGlvbmFsIFRlbm5pcyBGZWRlcmF0aW9uIEx0ZCoHCgVm\r\nYWxzZTIHCgVmYWxzZToHCgVmYWxzZUIHCgVmYWxzZUoHCgVmYWxzZVIHCgVmYWxzZVoHCgVmYWxz\r\nZWIHCgVmYWxzZWoHCgVmYWxzZXIHCgVmYWxzZYIBNAoCR0ISDlVuaXRlZCBLaW5nZG9tGg5kbmV0\r\nOmNvdW50cmllcyIOZG5ldDpjb3VudHJpZXNCMnJjdWtfX19fX19fXzo6ODYxOTIxQzMtNjcyMy00\r\nOUMwLUIyQ0UtRDcyODJBOUMxRTk2SkkKMTEwfG9wZW5haXJlX19fXzo6YWIyZDMzMTA3NDFlYTgw\r\nZDNiODcyNmY2NTE1MDI4NTgSFFJlc2VhcmNoIENvdW5jaWxzIFVLWgoyMDE3LTExLTA0YjEyMHxy\r\nY3VrX19fX19fX186OjVjYjFmZTZhYjg1NDcwMTBiMjJkOTAyN2U3MjUyMzVkagoyMDE5LTA1LTE5\r\nIqsBCAEQARoDMC45IiRkZWR1cC1zaW1pbGFyaXR5LW9yZ2FuaXphdGlvbi1zaW1wbGUqegoic3lz\r\naW1wb3J0OmNyb3Nzd2FsazplbnRpdHlyZWdpc3RyeRIic3lzaW1wb3J0OmNyb3Nzd2FsazplbnRp\r\ndHlyZWdpc3RyeRoXZG5ldDpwcm92ZW5hbmNlX2FjdGlvbnMiF2RuZXQ6cHJvdmVuYW5jZV9hY3Rp\r\nb25z\r\n\n</record>";
18

  
19
    private DedupIndexDAO dao;
20

  
21
    @Before
22
    public void setUp() {
23
        dao = new DedupIndexDAO();
24
    }
25

  
26
    @Test
27
    public void test_1() {
28

  
29
        Oaf oaf = dao.getXml2OafFunction().apply(orgFromIndex);
30
        System.out.println(oaf);
31

  
32

  
33
        Map<String, String> map = dao.getOaf2FieldMapFunction("organization", Lists.newArrayList("legalname", "legalshortname", "country", "websiteurl", "provenance")).apply(oaf);
34

  
35
        System.out.println(map);
36

  
37

  
38
    }
39

  
40
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/data/dedup/DedupDbDAO.java
1
package eu.dnetlib.data.dedup;
2

  
3
import java.util.List;
4
import java.util.Map.Entry;
5
import java.util.Set;
6

  
7
import javax.annotation.Resource;
8

  
9
import org.apache.commons.lang.exception.ExceptionUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.springframework.beans.factory.annotation.Value;
13

  
14
import com.google.common.collect.Lists;
15

  
16
import eu.dnetlib.data.proto.TypeProtos.Type;
17
import eu.dnetlib.enabling.database.rmi.DatabaseService;
18
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
19
import eu.dnetlib.functionality.modular.ui.dedup.SimilarityGroup;
20
import eu.dnetlib.msro.workflows.hadoop.utils.Similarity;
21
import eu.dnetlib.msro.workflows.hadoop.utils.SimilarityMeshBuilder;
22

  
23
public class DedupDbDAO {
24

  
25
	private static final Log log = LogFactory.getLog(DedupDbDAO.class);
26

  
27
	@Value("${dnet.dedup.db.name}")
28
	private String dbName;
29

  
30
	@Resource
31
	private UniqueServiceLocator serviceLocator;
32

  
33
	public boolean commit(final SimilarityGroup group) throws Exception {
34

  
35
		final DatabaseService dbService = serviceLocator.getService(DatabaseService.class);
36

  
37
		log.info("adding similarities");
38
		updateGroupSql(dbService, group);
39

  
40
		log.info("adding dissimilarities");
41
		dissimilaritiesSql(dbService, group);
42

  
43
		return true;
44
	}
45

  
46
	private void updateGroupSql(final DatabaseService dbService, final SimilarityGroup group) throws Exception {
47

  
48
		log.info("cleanup similarities");
49
		for (final String id : group.getGroup()) {
50
			// sql.append(String.format("DELETE FROM entities WHERE objidentifier = '%s'; ", id));
51
			safeUpdateSql(dbService, dbName, String.format("DELETE FROM similarity_groups WHERE objidentifier = '%s'; ", id));
52
		}
53

  
54
		// cleanup empty groups
55
		log.info("cleanup empty groups");
56
		safeUpdateSql(dbService, dbName, "DELETE FROM groups WHERE id NOT IN (SELECT groupid FROM similarity_groups)");
57

  
58
		// look for dissimilarities to remove
59
		log.info("reverting dissimilarities");
60
		for (Similarity s : mesh(group)) {
61
			safeUpdateSql(dbService, dbName, String.format("DELETE FROM dissimilarities WHERE actionsetid = '%s' AND id1 = '%s' AND id2 = '%s'; ",
62
					group.getActionSet(), s.getPair().getKey(), s.getPair().getValue()));
63
		}
64

  
65
		log.info("adding new group, size: " + group.getGroup().size());
66
		final String type = group.getEntityType().getType();
67
		safeUpdateSql(dbService, dbName,
68
				String.format("INSERT INTO groups(id, entitytype, date, actionsetid) VALUES('%s', '%s', '%s', '%s'); ", group.getId(), type, group.getDate(),
69
						group.getActionSet()));
70
		for (final String id : group.getGroup()) {
71
			// add new entity if needed
72
			if (!dbService.contains(dbName, "entities", "id", id)) {
73
				safeUpdateSql(dbService, dbName, String.format("INSERT INTO entities(id, entitytype) VALUES('%s', '%s'); ", id, type));
74
			}
75

  
76
			// throw new Exception("id already defined in a similarity group.");
77
			safeUpdateSql(dbService, dbName, String.format("INSERT INTO similarity_groups(groupid, objidentifier) VALUES('%s', '%s'); ", group.getId(), id));
78
		}
79
	}
80

  
81
	private List<Similarity> mesh(final SimilarityGroup group) {
82
		return SimilarityMeshBuilder.build(Type.valueOf(group.getEntityType().getType()), Lists.newArrayList(group.getGroup()), false);
83
	}
84

  
85
	private void dissimilaritiesSql(final DatabaseService dbService, final SimilarityGroup group) throws Exception {
86

  
87
		final String type = group.getEntityType().getType();
88

  
89
		// add potential new entities
90
		for (final Entry<String, Set<String>> e : group.getDissimilar().entrySet()) {
91
			if (!dbService.contains(dbName, "entities", "id", e.getKey())) {
92
				safeUpdateSql(dbService, dbName, String.format("INSERT INTO entities(id, entitytype) VALUES('%s', '%s'); ", e.getKey(), type));
93
			}
94
			for (final String id : e.getValue()) {
95
				if (!dbService.contains(dbName, "entities", "id", id)) {
96
					safeUpdateSql(dbService, dbName, String.format("INSERT INTO entities(id, entitytype) VALUES('%s', '%s'); ", id, type));
97
				}
98
			}
99
		}
100

  
101
		log.info("store dissimilarities");
102
		for (final Entry<String, Set<String>> e : group.getDissimilar().entrySet()) {
103
			for (final String id : e.getValue()) {
104
				safeUpdateSql(dbService, dbName,
105
						String.format("INSERT INTO dissimilarities(id1, id2, actionsetid) VALUES('%s', '%s', '%s'); ", e.getKey(), id, group.getActionSet()));
106
			}
107
		}
108
	}
109

  
110
	private void safeUpdateSql(final DatabaseService dbService, final String dbName, final String sql) throws Exception {
111
		try {
112
			log.info(sql);
113
			dbService.updateSQL(dbName, sql);
114
		} catch (final Exception e) {
115
			log.error(e.getMessage());
116
			log.debug(ExceptionUtils.getFullStackTrace(e));
117
			throw e;
118
		}
119
	}
120

  
121
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/test/resources/eu/dnetlib/msro/workflows/dedup/conf/organisation.conf.json
1
{
2
  "wf" : {
3
    "threshold" : "0.9",
4
    "dedupRun" : "001",
5
    "entityType" : "organization",
6
    "orderField" : "legalname",
7
    "queueMaxSize" : "2000",
8
    "groupMaxSize" : "50",
9
    "slidingWindowSize" : "200",
10
    "rootBuilder" : [ "organization", "projectOrganization_participation_isParticipant", "datasourceOrganization_provision_isProvidedBy" ],
11
    "includeChildren" : "true"
12
  },
13
  "pace" : {
14
    "clustering" : [
15
      { "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} },
16
      { "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } },
17
      { "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }
18
    ],
19
    "strictConditions" : [
20
      { "name" : "exactMatch", "fields" : [ "gridid" ] }
21
    ],
22
    "conditions" : [
23
      { "name" : "exactMatch", "fields" : [ "country" ] },
24
      { "name" : "DomainExactMatch", "fields" : [ "websiteurl" ] }
25
    ],
26
    "model" : [
27
      { "name" : "country", "algo" : "Null", "type" : "String", "weight" : "0", "ignoreMissing" : "false", "path" : "organization/metadata/country/classid" },
28
      { "name" : "legalshortname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.1", "ignoreMissing" : "false", "path" : "organization/metadata/legalshortname/value" },
29
      { "name" : "legalname", "algo" : "JaroWinklerNormalizedName", "type" : "String", "weight" : "0.9", "ignoreMissing" : "false", "path" : "organization/metadata/legalname/value", "params" : {"windowSize" : 4, "threshold" : 0.5} },
30
      { "name" : "websiteurl", "algo" : "Null", "type" : "URL", "weight" : "0", "ignoreMissing" : "true", "path" : "organization/metadata/websiteurl/value", "params" : { "host" : 0.5, "path" : 0.5 } },
31
      { "name" : "gridid", "algo" : "Null", "type" : "String", "weight" : "0.0", "ignoreMissing" : "true", "path" : "pid[qualifier#classid = {grid}]/value" }
32
    ],
33
    "blacklists" : { }
34
  }
35
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/data/dedup/AbstractDedupDAO.java
1
package eu.dnetlib.data.dedup;
2

  
3
import javax.annotation.Resource;
4

  
5
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
6

  
7
public abstract class AbstractDedupDAO {
8

  
9
	@Resource
10
	protected UniqueServiceLocator serviceLocator;
11

  
12
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/IndexDSUpdateJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop;
2

  
3
import javax.annotation.Resource;
4

  
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7

  
8
import com.googlecode.sarasvati.Arc;
9
import com.googlecode.sarasvati.NodeToken;
10

  
11
import eu.dnetlib.enabling.is.registry.rmi.ISRegistryException;
12
import eu.dnetlib.enabling.is.registry.rmi.ISRegistryService;
13
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
14
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
15

  
16
public class IndexDSUpdateJobNode extends AsyncJobNode {
17

  
18
	private static final Log log = LogFactory.getLog(IndexDSUpdateJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
19

  
20
	@Resource
21
	private UniqueServiceLocator serviceLocator;
22

  
23
	@Override
24
	public String execute(final NodeToken token) throws Exception {
25

  
26
		final String dsId = token.getEnv().getAttribute("index_id");
27
		final String version = token.getEnv().getAttribute("index.feed.timestamp");
28
		log.info("updating indexDS: " + dsId + " version: " + version);
29
		updateIndexDS(dsId, version);
30

  
31
		return Arc.DEFAULT_ARC;
32
	}
33

  
34
	/**
35
	 * method updates the given indexDataStructureId INDEX_SIZE, INDEX_LAST_UPDATE
36
	 * 
37
	 * @param dsId
38
	 * @param version
39
	 * @return true if the update was performed successfully, false otherwise
40
	 * @throws ISRegistryException
41
	 */
42
	private boolean updateIndexDS(final String dsId, final String version) throws ISRegistryException {
43

  
44
		final String xquery = "for $x in collection('')/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '" + dsId + "']"
45
				+ " return update value $x//INDEX_LAST_UPDATE with '" + version + "'";
46

  
47
		log.debug("\n\n updating indexDataStructure: " + xquery + "\n\n");
48

  
49
		return serviceLocator.getService(ISRegistryService.class).executeXUpdate(xquery);
50
	}
51

  
52
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/test/resources/eu/dnetlib/msro/workflows/dedup/oaf_data.base64
1
CAES6D0IMjrDDRLZAwpICghBcnRpY2xlcxI8CgdrZXl3b3JkEgdrZXl3b3JkGhNkbmV0OnJlc3VsdF9zdWJqZWN0IhNkbmV0OnJlc3VsdF9zdWJqZWN0Ev0BCrYBQW5hbHlzaXMgYW5kIFJlcG9ydCBvbiBPcmlnaW5hbCBEb2N1bWVudGFyeSBFdmlkZW5jZSBDb25jZXJuaW5nIHRoZSBVc2Ugb2YgT3BpdW0gaW4gSW5kaWE6IFtGdXJuaXNoZWQgdG8gdGhlIOKAnEJyaXRpc2ggTWVkaWNhbCBKb3VybmFs4oCdIGJ5IHVwd2FyZHMgb2YgMTAwIEluZGlhbiBNZWRpY2FsIE9mZmljZXJzLl0SQgoKbWFpbiB0aXRsZRIKbWFpbiB0aXRsZRoTZG5ldDpkYXRhQ2l0ZV90aXRsZSITZG5ldDpkYXRhQ2l0ZV90aXRsZSIMCgoxODk0LTAyLTE3QkoKC3B1YmxpY2F0aW9uEgtwdWJsaWNhdGlvbhoWZG5ldDpyZXN1bHRfdHlwb2xvZ2llcyIWZG5ldDpyZXN1bHRfdHlwb2xvZ2llc2IuCgNlbmcSB0VuZ2xpc2gaDmRuZXQ6bGFuZ3VhZ2VzIg5kbmV0Omxhbmd1YWdlc8oBAgoAMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM4NTgy+AEaOQoET1BFThILT3BlbiBBY2Nlc3MaEWRuZXQ6YWNjZXNzX21vZGVzIhFkbmV0OmFjY2Vzc19tb2RlcyJFCgQwMDAxEgdBcnRpY2xlGhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlIhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlKkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbEooaHR0cDovL2V1cm9wZXBtYy5vcmcvYXJ0aWNsZXMvUE1DMjQwMzU3NzL4ARo5CgRPUEVOEgtPcGVuIEFjY2VzcxoRZG5ldDphY2Nlc3NfbW9kZXMiEWRuZXQ6YWNjZXNzX21vZGVzIkUKBDAwMDESB0FydGljbGUaGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UiGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UqSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsSihodHRwOi8vZXVyb3BlcG1jLm9yZy9hcnRpY2xlcy9QTUMyNDAzMzQyMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM3OTMy+AEaOQoET1BFThILT3BlbiBBY2Nlc3MaEWRuZXQ6YWNjZXNzX21vZGVzIhFkbmV0OmFjY2Vzc19tb2RlcyJFCgQwMDAxEgdBcnRpY2xlGhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlIhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlKkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbEooaHR0cDovL2V1cm9wZXBtYy5vcmcvYXJ0aWNsZXMvUE1DMjQwMzQ1MEIYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNzkyQhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM4MzdCGG9haTpldXJvcGVwbWMub3JnOjk5MzYyOUIYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNDYxQhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM1MzZKSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsUkYKGG9haTpldXJvcGVwbWMub3JnOjk5Mzc5MhIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUjgKClBNQzI0MDMzNDISKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1JGChhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM4MzcSKgoDb2FpEgNvYWkaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1I4CgpQTUMyNDAzNzkzEioKA3BtYxIDcG1jGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSRgoYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNjI5EioKA29haRIDb2FpGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSOAoKUE1DMjQwMzQ1MBIqCgNwbWMSA3BtYxoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUkYKGG9haTpldXJvcGVwbWMub3JnOjk5MzQ2MRIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUjgKClBNQzI0MDM1NzcSKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1I4CgpQTUMyNDAzODU4EioKA3BtYxIDcG1jGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSRgoYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNTM2EioKA29haRIDb2FpGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNaGDIwMTUtMDItMDZUMTM6MjU6MzMuODU0WmIxNTB8ZGVkdXBfd2ZfMDAxOjo0MTA0YTc3NDBhMTZhMjIzOWUzODcxYTEzYzZlNWQxY4IBlAgIMjrXBRLZAwpICghBcnRpY2xlcxI8CgdrZXl3b3JkEgdrZXl3b3JkGhNkbmV0OnJlc3VsdF9zdWJqZWN0IhNkbmV0OnJlc3VsdF9zdWJqZWN0Ev0BCrYBQW5hbHlzaXMgYW5kIFJlcG9ydCBvbiBPcmlnaW5hbCBEb2N1bWVudGFyeSBFdmlkZW5jZSBDb25jZXJuaW5nIHRoZSBVc2Ugb2YgT3BpdW0gaW4gSW5kaWE6IFtGdXJuaXNoZWQgdG8gdGhlIOKAnEJyaXRpc2ggTWVkaWNhbCBKb3VybmFs4oCdIGJ5IHVwd2FyZHMgb2YgMTAwIEluZGlhbiBNZWRpY2FsIE9mZmljZXJzLl0SQgoKbWFpbiB0aXRsZRIKbWFpbiB0aXRsZRoTZG5ldDpkYXRhQ2l0ZV90aXRsZSITZG5ldDpkYXRhQ2l0ZV90aXRsZSIMCgoxODk0LTAyLTAzQkoKC3B1YmxpY2F0aW9uEgtwdWJsaWNhdGlvbhoWZG5ldDpyZXN1bHRfdHlwb2xvZ2llcyIWZG5ldDpyZXN1bHRfdHlwb2xvZ2llc2IuCgNlbmcSB0VuZ2xpc2gaDmRuZXQ6bGFuZ3VhZ2VzIg5kbmV0Omxhbmd1YWdlc8oBAgoAMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM4NThCGG9haTpldXJvcGVwbWMub3JnOjk5MzgzN0pKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxSOAoKUE1DMjQwMzg1OBIqCgNwbWMSA3BtYxoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUkYKGG9haTpldXJvcGVwbWMub3JnOjk5MzgzNxIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzWhgyMDE1LTAyLTA2VDEzOjI0OjQzLjM1NVpiMTUwfG9kX19fX19fXzkwODo6OTQxMTFjYzhhNTYwMTgzOGQ5MjQ1NTFmZWViMTI4OTSKAQCCAZQICDI61wUS2QMKSAoIQXJ0aWNsZXMSPAoHa2V5d29yZBIHa2V5d29yZBoTZG5ldDpyZXN1bHRfc3ViamVjdCITZG5ldDpyZXN1bHRfc3ViamVjdBL9AQq2AUFuYWx5c2lzIGFuZCBSZXBvcnQgb24gT3JpZ2luYWwgRG9jdW1lbnRhcnkgRXZpZGVuY2UgQ29uY2VybmluZyB0aGUgVXNlIG9mIE9waXVtIGluIEluZGlhOiBbRnVybmlzaGVkIHRvIHRoZSDigJxCcml0aXNoIE1lZGljYWwgSm91cm5hbOKAnSBieSB1cHdhcmRzIG9mIDEwMCBJbmRpYW4gTWVkaWNhbCBPZmZpY2Vycy5dEkIKCm1haW4gdGl0bGUSCm1haW4gdGl0bGUaE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiDAoKMTg5NC0wMi0xMEJKCgtwdWJsaWNhdGlvbhILcHVibGljYXRpb24aFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXMiFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXNiLgoDZW5nEgdFbmdsaXNoGg5kbmV0Omxhbmd1YWdlcyIOZG5ldDpsYW5ndWFnZXPKAQIKADL4ARo5CgRPUEVOEgtPcGVuIEFjY2VzcxoRZG5ldDphY2Nlc3NfbW9kZXMiEWRuZXQ6YWNjZXNzX21vZGVzIkUKBDAwMDESB0FydGljbGUaGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UiGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UqSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsSihodHRwOi8vZXVyb3BlcG1jLm9yZy9hcnRpY2xlcy9QTUMyNDAzNTc3QhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM2MjlKSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsUjgKClBNQzI0MDM1NzcSKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1JGChhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM2MjkSKgoDb2FpEgNvYWkaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1oYMjAxNS0wMi0wNlQxMzoyMDoyMi43ODhaYjE1MHxvZF9fX19fX185MDg6OmJhNGY4OGYwMjhiOWJkM2FlYzYxZjc2NGZkNTA4MGQzigEAggGUCAgyOtcFEtkDCkgKCEFydGljbGVzEjwKB2tleXdvcmQSB2tleXdvcmQaE2RuZXQ6cmVzdWx0X3N1YmplY3QiE2RuZXQ6cmVzdWx0X3N1YmplY3QS/QEKtgFBbmFseXNpcyBhbmQgUmVwb3J0IG9uIE9yaWdpbmFsIERvY3VtZW50YXJ5IEV2aWRlbmNlIENvbmNlcm5pbmcgdGhlIFVzZSBvZiBPcGl1bSBpbiBJbmRpYTogW0Z1cm5pc2hlZCB0byB0aGUg4oCcQnJpdGlzaCBNZWRpY2FsIEpvdXJuYWzigJ0gYnkgdXB3YXJkcyBvZiAxMDAgSW5kaWFuIE1lZGljYWwgT2ZmaWNlcnMuXRJCCgptYWluIHRpdGxlEgptYWluIHRpdGxlGhNkbmV0OmRhdGFDaXRlX3RpdGxlIhNkbmV0OmRhdGFDaXRlX3RpdGxlIgwKCjE4OTQtMDEtMDZCSgoLcHVibGljYXRpb24SC3B1YmxpY2F0aW9uGhZkbmV0OnJlc3VsdF90eXBvbG9naWVzIhZkbmV0OnJlc3VsdF90eXBvbG9naWVzYi4KA2VuZxIHRW5nbGlzaBoOZG5ldDpsYW5ndWFnZXMiDmRuZXQ6bGFuZ3VhZ2VzygECCgAy+AEaOQoET1BFThILT3BlbiBBY2Nlc3MaEWRuZXQ6YWNjZXNzX21vZGVzIhFkbmV0OmFjY2Vzc19tb2RlcyJFCgQwMDAxEgdBcnRpY2xlGhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlIhlkbmV0OnB1YmxpY2F0aW9uX3Jlc291cmNlKkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbEooaHR0cDovL2V1cm9wZXBtYy5vcmcvYXJ0aWNsZXMvUE1DMjQwMzM0MkIYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNDYxSkoKMTEwfG9wZW5kb2FyX19fXzo6OGI2ZGQ3ZGI5YWY0OWU2NzMwNmZlYjU5YThiZGM1MmMSFUV1cm9wZSBQdWJNZWQgQ2VudHJhbFI4CgpQTUMyNDAzMzQyEioKA3BtYxIDcG1jGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNSRgoYb2FpOmV1cm9wZXBtYy5vcmc6OTkzNDYxEioKA29haRIDb2FpGg5kbmV0OnBpZF90eXBlcyIOZG5ldDpwaWRfdHlwZXNaGDIwMTUtMDItMDZUMTM6MjQ6NTQuMjE4WmIxNTB8b2RfX19fX19fOTA4Ojo0OGNiZmYxOTk1MDc0YzI0NmU2MGY1YzM5YmZiMzVjNooBAIIBlAgIMjrXBRLZAwpICghBcnRpY2xlcxI8CgdrZXl3b3JkEgdrZXl3b3JkGhNkbmV0OnJlc3VsdF9zdWJqZWN0IhNkbmV0OnJlc3VsdF9zdWJqZWN0Ev0BCrYBQW5hbHlzaXMgYW5kIFJlcG9ydCBvbiBPcmlnaW5hbCBEb2N1bWVudGFyeSBFdmlkZW5jZSBDb25jZXJuaW5nIHRoZSBVc2Ugb2YgT3BpdW0gaW4gSW5kaWE6IFtGdXJuaXNoZWQgdG8gdGhlIOKAnEJyaXRpc2ggTWVkaWNhbCBKb3VybmFs4oCdIGJ5IHVwd2FyZHMgb2YgMTAwIEluZGlhbiBNZWRpY2FsIE9mZmljZXJzLl0SQgoKbWFpbiB0aXRsZRIKbWFpbiB0aXRsZRoTZG5ldDpkYXRhQ2l0ZV90aXRsZSITZG5ldDpkYXRhQ2l0ZV90aXRsZSIMCgoxODk0LTAxLTEzQkoKC3B1YmxpY2F0aW9uEgtwdWJsaWNhdGlvbhoWZG5ldDpyZXN1bHRfdHlwb2xvZ2llcyIWZG5ldDpyZXN1bHRfdHlwb2xvZ2llc2IuCgNlbmcSB0VuZ2xpc2gaDmRuZXQ6bGFuZ3VhZ2VzIg5kbmV0Omxhbmd1YWdlc8oBAgoAMvgBGjkKBE9QRU4SC09wZW4gQWNjZXNzGhFkbmV0OmFjY2Vzc19tb2RlcyIRZG5ldDphY2Nlc3NfbW9kZXMiRQoEMDAwMRIHQXJ0aWNsZRoZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSIZZG5ldDpwdWJsaWNhdGlvbl9yZXNvdXJjZSpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxKKGh0dHA6Ly9ldXJvcGVwbWMub3JnL2FydGljbGVzL1BNQzI0MDM3OTNCGG9haTpldXJvcGVwbWMub3JnOjk5Mzc5MkpKCjExMHxvcGVuZG9hcl9fX186OjhiNmRkN2RiOWFmNDllNjczMDZmZWI1OWE4YmRjNTJjEhVFdXJvcGUgUHViTWVkIENlbnRyYWxSOAoKUE1DMjQwMzc5MxIqCgNwbWMSA3BtYxoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzUkYKGG9haTpldXJvcGVwbWMub3JnOjk5Mzc5MhIqCgNvYWkSA29haRoOZG5ldDpwaWRfdHlwZXMiDmRuZXQ6cGlkX3R5cGVzWhgyMDE1LTAyLTA2VDEzOjI0OjQzLjEyMlpiMTUwfG9kX19fX19fXzkwODo6NDEwNGE3NzQwYTE2YTIyMzllMzg3MWExM2M2ZTVkMWOKAQCCAZQICDI61wUS2QMKSAoIQXJ0aWNsZXMSPAoHa2V5d29yZBIHa2V5d29yZBoTZG5ldDpyZXN1bHRfc3ViamVjdCITZG5ldDpyZXN1bHRfc3ViamVjdBL9AQq2AUFuYWx5c2lzIGFuZCBSZXBvcnQgb24gT3JpZ2luYWwgRG9jdW1lbnRhcnkgRXZpZGVuY2UgQ29uY2VybmluZyB0aGUgVXNlIG9mIE9waXVtIGluIEluZGlhOiBbRnVybmlzaGVkIHRvIHRoZSDigJxCcml0aXNoIE1lZGljYWwgSm91cm5hbOKAnSBieSB1cHdhcmRzIG9mIDEwMCBJbmRpYW4gTWVkaWNhbCBPZmZpY2Vycy5dEkIKCm1haW4gdGl0bGUSCm1haW4gdGl0bGUaE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiE2RuZXQ6ZGF0YUNpdGVfdGl0bGUiDAoKMTg5NC0wMi0xN0JKCgtwdWJsaWNhdGlvbhILcHVibGljYXRpb24aFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXMiFmRuZXQ6cmVzdWx0X3R5cG9sb2dpZXNiLgoDZW5nEgdFbmdsaXNoGg5kbmV0Omxhbmd1YWdlcyIOZG5ldDpsYW5ndWFnZXPKAQIKADL4ARo5CgRPUEVOEgtPcGVuIEFjY2VzcxoRZG5ldDphY2Nlc3NfbW9kZXMiEWRuZXQ6YWNjZXNzX21vZGVzIkUKBDAwMDESB0FydGljbGUaGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UiGWRuZXQ6cHVibGljYXRpb25fcmVzb3VyY2UqSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsSihodHRwOi8vZXVyb3BlcG1jLm9yZy9hcnRpY2xlcy9QTUMyNDAzNDUwQhhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM1MzZKSgoxMTB8b3BlbmRvYXJfX19fOjo4YjZkZDdkYjlhZjQ5ZTY3MzA2ZmViNTlhOGJkYzUyYxIVRXVyb3BlIFB1Yk1lZCBDZW50cmFsUjgKClBNQzI0MDM0NTASKgoDcG1jEgNwbWMaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1JGChhvYWk6ZXVyb3BlcG1jLm9yZzo5OTM1MzYSKgoDb2FpEgNvYWkaDmRuZXQ6cGlkX3R5cGVzIg5kbmV0OnBpZF90eXBlc1oYMjAxNS0wMi0wNlQxMzoyNTozMy44NTRaYjE1MHxvZF9fX19fX185MDg6OmMzYjJiMjA4OWE2YTJkMmUzNDZiZDg1YWQ5NmM3MzlligEAigEAIoEBCAEQABoDMC45IiJkZWR1cC1zaW1pbGFyaXR5LXJlc3VsdC1sZXZlbnN0ZWluKlIKD3N5c2ltcG9ydDpkZWR1cBIPc3lzaW1wb3J0OmRlZHVwGhZkbmV0OnByb3ZlbmFuY2VBY3Rpb25zIhZkbmV0OnByb3ZlbmFuY2VBY3Rpb25zKUT7BZlTAQAA
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/hadoop/utils/Similarity.java
1
package eu.dnetlib.msro.workflows.hadoop.utils;
2

  
3
import com.google.gson.Gson;
4

  
5
import eu.dnetlib.data.proto.TypeProtos.Type;
6
import eu.dnetlib.miscutils.collections.Pair;
7

  
8
public class Similarity {
9

  
10
	private Pair<String, String> pair;
11
	private Type type;
12

  
13
	public Similarity(final Pair<String, String> pair, final Type type) {
14
		super();
15
		this.setPair(pair);
16
		this.setType(type);
17
	}
18

  
19
	public Pair<String, String> getPair() {
20
		return pair;
21
	}
22

  
23
	public void setPair(final Pair<String, String> pair) {
24
		this.pair = pair;
25
	}
26

  
27
	public Type getType() {
28
		return type;
29
	}
30

  
31
	public void setType(final Type type) {
32
		this.type = type;
33
	}
34

  
35
	@Override
36
	public String toString() {
37
		return new Gson().toJson(this, Similarity.class);
38
	}
39
}
modules/dnet-deduplication/tags/dnet-deduplication-2.0.1/src/main/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestration.java
1
package eu.dnetlib.msro.workflows.dedup.conf;
2

  
3
import java.io.IOException;
4
import java.util.Queue;
5

  
6
import com.google.gson.Gson;
7
import com.google.gson.GsonBuilder;
8

  
9
import eu.dnetlib.pace.config.DedupConfig;
10
import eu.dnetlib.pace.util.PaceException;
11
import org.codehaus.jackson.map.ObjectMapper;
12

  
13
/**
14
 * The Class DedupConfigurationOrchestration.
15
 */
16
public class DedupConfigurationOrchestration {
17

  
18
	/** The entity. */
19
	private Entity entity;
20

  
21
	/** The action set id. */
22
	private String actionSetId;
23

  
24
	/** The configurations. */
25
	private Queue<DedupConfig> configurations;
26

  
27
	public DedupConfigurationOrchestration() {}
28

  
29
	/**
30
	 * Instantiates a new dedup configuration orchestration.
31
	 *
32
	 * @param entity
33
	 *            the entity
34
	 * @param actionSetId
35
	 *            the action set id
36
	 * @param configurations
37
	 *            the configurations
38
	 */
39
	public DedupConfigurationOrchestration(final Entity entity, final String actionSetId, final Queue<DedupConfig> configurations) {
40
		super();
41
		this.setEntity(entity);
42
		this.setActionSetId(actionSetId);
43
		this.setConfigurations(configurations);
44
	}
45

  
46
	/**
47
	 * Gets the entity.
48
	 *
49
	 * @return the entity
50
	 */
51
	public Entity getEntity() {
52
		return entity;
53
	}
54

  
55
	/**
56
	 * Gets the action set id.
57
	 *
58
	 * @return the action set id
59
	 */
60
	public String getActionSetId() {
61
		return actionSetId;
62
	}
63

  
64
	/**
65
	 * Gets the configurations.
66
	 *
67
	 * @return the configurations
68
	 */
69
	public Queue<DedupConfig> getConfigurations() {
70
		return configurations;
71
	}
72

  
73
	public void setEntity(final Entity entity) {
74
		this.entity = entity;
75
	}
76

  
77
	public void setActionSetId(final String actionSetId) {
78
		this.actionSetId = actionSetId;
79
	}
80

  
81
	public void setConfigurations(final Queue<DedupConfig> configurations) {
82
		this.configurations = configurations;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff