Project

General

Profile

« Previous | Next » 

Revision 37206

[maven-release-plugin] copy for tag dnet-deduplication-1.1.3

View differences:

modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/resources/eu/dnetlib/msro/workflows/applicationContext-workflow-nodes.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns="http://www.springframework.org/schema/beans"
3
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p"
4
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
5

  
6
	<bean id="wfNodePrepareMDStoreImport"
7
		class="eu.dnetlib.msro.workflows.hadoop.PrepareMDStoreImportJobNode"
8
		scope="prototype" />
9
		
10
	<bean id="wfNodeReuseHdfsRecords"
11
		class="eu.dnetlib.msro.workflows.hadoop.ReuseHdfsRecordsJobNode"
12
		scope="prototype" />
13
		
14
	<bean id="wfNodeMDStoreBatchExporter"
15
		class="eu.dnetlib.msro.workflows.hadoop.MDStoreBatchExporterJobNode"
16
		scope="prototype" />		
17
		
18
	<bean id="wfNodeStoreHdfsRecords"
19
		class="eu.dnetlib.msro.workflows.hadoop.StoreHdfsRecordsJobNode"
20
		scope="prototype" p:processCountingResultSetFactory-ref="msroProcessCountingResultSetFactory" />		
21
		
22
	<bean id="wfNodeDeleteHdfsPathJob"
23
		class="eu.dnetlib.msro.workflows.hadoop.DeleteHdfsPathJobNode"
24
		scope="prototype" />		
25
		
26
	<bean id="wfNodeSubmitHadoopJob"
27
		class="eu.dnetlib.msro.workflows.hadoop.SubmitHadoopJobNode"
28
		scope="prototype" />
29
		
30
	<bean id="wfNodeSetClusterAndTable"
31
		class="eu.dnetlib.msro.workflows.hadoop.SetClusterAndTableJobNode"
32
		scope="prototype" />
33
		
34
	<bean id="wfNodeSetDedupConfiguration"
35
		class="eu.dnetlib.msro.workflows.dedup.DedupConfigurationSetterJobNode"
36
		scope="prototype" />
37
		
38
	<bean id="wfNodePrepareDedupIndexJob"
39
		class="eu.dnetlib.msro.workflows.dedup.PrepareDedupIndexJobNode"
40
		scope="prototype" p:hbaseTable="${hbase.mapred.datatable}" />
41
		
42
	<bean id="wfNodeFinalizeDedupIndexFeeding"
43
		class="eu.dnetlib.msro.workflows.dedup.FinalizeDedupIndexJobNode"
44
		scope="prototype" />				
45
		
46
	<!-- RESET -->		
47
		
48
	<bean id="wfNodeCheckHBaseTable"
49
		class="eu.dnetlib.msro.workflows.hadoop.hbase.ExistHBaseTableJobNode"
50
		scope="prototype" />
51
		
52
	<bean id="wfNodeDropHBaseTable"
53
		class="eu.dnetlib.msro.workflows.hadoop.hbase.DropHBaseTableJobNode"
54
		scope="prototype" />
55
		
56
	<bean id="wfNodeDefineHBaseOpenaireSchema"
57
		class="eu.dnetlib.msro.workflows.hadoop.hbase.DefineHBaseOpenaireSchemaJobNode"
58
		scope="prototype" />
59
		
60
	<bean id="wfNodeCreateHBaseTable"
61
		class="eu.dnetlib.msro.workflows.hadoop.hbase.CreateHBaseTableJobNode"
62
		scope="prototype" />
63
		
64
	<bean id="wfNodeFindIndex"
65
		class="eu.dnetlib.msro.workflows.hadoop.FindIndexJobNode"
66
		scope="prototype" />
67
		
68
	<bean id="wfNodeIndexDsUpdateJob"
69
		class="eu.dnetlib.msro.workflows.hadoop.IndexDSUpdateJobNode"
70
		scope="prototype" />
71
		
72
	<bean id="wfNodeQueryUserActionDbJob"
73
		class="eu.dnetlib.msro.workflows.dedup.QueryUserActionDbJobNode"
74
		scope="prototype" />
75
		
76
	<bean id="wfNodeBuildSimilarityMeshJob"
77
		class="eu.dnetlib.msro.workflows.dedup.BuildSimilarityMeshJobNode"
78
		p:similarity-ref="similarityTemplate" scope="prototype" />		
79
		
80
	<bean id="similarityTemplate"
81
		class="eu.dnetlib.springutils.stringtemplate.StringTemplateFactory"
82
		p:template="classpath:/eu/dnetlib/msro/workflows/dedup/similarityTemplate.st"
83
		scope="prototype" />	
84
		
85
	<bean id="wfNodeStoreHBase"
86
		class="eu.dnetlib.msro.workflows.hadoop.hbase.StoreHBaseRecordsJobNode"
87
		scope="prototype" p:processCountingResultSetFactory-ref="msroProcessCountingResultSetFactory" />		
88
		
89
	<bean id="wfNodeDeleteFromHBase"
90
		class="eu.dnetlib.msro.workflows.hadoop.hbase.DeleteHBaseRecordsJobNode"
91
		scope="prototype" p:processCountingResultSetFactory-ref="msroProcessCountingResultSetFactory" />		
92
		
93
	<bean id="wfNodePromoteActions"
94
		class="eu.dnetlib.msro.workflows.actions.PromoteActionsJobNode"
95
		scope="prototype" />
96
		
97
		
98
	<!-- DEDUP -->
99
	
100
	
101
	<bean id="dedupOrchestrationLoader"
102
		class="eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestrationLoader"
103
		scope="prototype" />
104

  
105
	<bean id="wfNodeCheckEntitySequence"
106
		class="eu.dnetlib.msro.workflows.dedup.DedupCheckEntitySequenceJobNode"
107
		scope="prototype" />
108

  
109
	<bean id="wfNodeFetchRelClasses"
110
		class="eu.dnetlib.msro.workflows.hadoop.FetchRelClassesJobNode"
111
		scope="prototype" />
112

  
113
	<bean id="wfNodeDedupSimilarityToActionsJobNode" class="eu.dnetlib.msro.workflows.dedup.DedupSimilarityToActionsJobNode"
114
		scope="prototype" />
115

  
116
	<bean id="wfNodeDedupGrouperJob" class="eu.dnetlib.msro.workflows.dedup.DedupGrouperJobNode"
117
		scope="prototype" />
118
		
119
	<bean id="wfNodeDuplicateScanJob" class="eu.dnetlib.msro.workflows.dedup.DedupDuplicateScanJobNode"
120
		scope="prototype"/>
121

  
122
	<bean id="wfNodeDedupConfigurationAwareJob"
123
		class="eu.dnetlib.msro.workflows.dedup.DedupConfigurationAwareJobNode"
124
		scope="prototype" />
125

  
126
	<!-- / DEDUP -->	
127

  
128
	<bean id="wfNodeUpdateActionSets" 
129
		class="eu.dnetlib.msro.workflows.actions.UpdateSetsJobNode"
130
		scope="prototype" />
131

  
132
	<bean id="wfNodePrepareActionSets" 
133
		class="eu.dnetlib.msro.workflows.actions.PrepareActionSetsJobNode"
134
		scope="prototype" />
135

  
136
	<bean id="wfNodePrepareConfiguredActionSet" 
137
		class="eu.dnetlib.msro.workflows.actions.PrepareConfiguredActionSetJobNode"
138
		scope="prototype" />
139
	
140
	<bean id="wfNodeCleanActionSetsProfile"
141
		class="eu.dnetlib.msro.workflows.actions.CleanActionSetsProfileJobNode"
142
		p:xupdate="${dnet.actionmanager.garbage.sets.xupdate}" scope="prototype" />
143
		
144
	<bean id="wfNodeGarbageSets"
145
		class="eu.dnetlib.msro.workflows.actions.GarbageSetsJobNode"
146
		scope="prototype" />		
147

  
148
</beans>
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestrationLoader.java
1
package eu.dnetlib.msro.workflows.dedup.conf;
2

  
3
import java.io.StringReader;
4
import java.util.List;
5
import java.util.Queue;
6

  
7
import javax.annotation.Resource;
8

  
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.dom4j.Document;
12
import org.dom4j.DocumentException;
13
import org.dom4j.Element;
14
import org.dom4j.io.SAXReader;
15

  
16
import com.google.common.collect.Lists;
17

  
18
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
19
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
20
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
21
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
22
import eu.dnetlib.pace.config.DedupConfig;
23

  
24
/**
25
 * The Class DedupConfigurationOrchestrationLoader.
26
 */
27
public class DedupConfigurationOrchestrationLoader {
28

  
29
	/** The Constant log. */
30
	private static final Log log = LogFactory.getLog(DedupConfigurationOrchestrationLoader.class);
31

  
32
	/** The service locator. */
33
	@Resource
34
	private UniqueServiceLocator serviceLocator;
35

  
36
	/**
37
	 * Load the dedup orchestration profile from the IS.
38
	 *
39
	 * @param id
40
	 *            the id
41
	 * @return the dedup configuration orchestration
42
	 * @throws ISLookUpDocumentNotFoundException
43
	 *             the IS look up document not found exception
44
	 * @throws ISLookUpException
45
	 *             the IS look up exception
46
	 * @throws DocumentException
47
	 *             the document exception
48
	 */
49
	public DedupConfigurationOrchestration loadByActionSetId(final String id) throws Exception {
50

  
51
		final ISLookUpService isLookUpService = serviceLocator.getService(ISLookUpService.class);
52

  
53
		final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ACTION_SET/@id = '%s']", id);
54
		log.info("loading dedup orchestration: " + xquery);
55

  
56
		return parseOrchestrationProfile(isLookUpService, isLookUpService.getResourceProfileByQuery(xquery));
57
	}
58

  
59
	public List<DedupConfigurationOrchestration> loadByEntityName(final String entityName) throws Exception {
60

  
61
		final ISLookUpService isLookUpService = serviceLocator.getService(ISLookUpService.class);
62

  
63
		final String xquery = String.format("/RESOURCE_PROFILE[.//DEDUPLICATION/ENTITY/@name = '%s']", entityName);
64
		log.info("loading dedup orchestration: " + xquery);
65

  
66
		final List<DedupConfigurationOrchestration> res = Lists.newArrayList();
67

  
68
		for (final String profile : isLookUpService.quickSearchProfile(xquery)) {
69
			res.add(parseOrchestrationProfile(isLookUpService, profile));
70
		}
71

  
72
		return res;
73
	}
74

  
75
	private DedupConfigurationOrchestration parseOrchestrationProfile(final ISLookUpService isLookUpService, final String dedupOrchestation)
76
			throws DocumentException,
77
			ISLookUpException, ISLookUpDocumentNotFoundException {
78
		final Document doc = new SAXReader().read(new StringReader(dedupOrchestation));
79

  
80
		final Element e = (Element) doc.selectSingleNode("//DEDUPLICATION/ENTITY");
81
		final Entity entity = new Entity(e.attributeValue("name"), e.attributeValue("code"), e.attributeValue("label"));
82

  
83
		final String actionSetId = doc.valueOf("//DEDUPLICATION/ACTION_SET/@id");
84
		final Queue<DedupConfig> configurations = Lists.newLinkedList();
85

  
86
		for (final Object o : doc.selectNodes("//SCAN_SEQUENCE/SCAN")) {
87
			configurations.add(loadConfig(isLookUpService, actionSetId, o));
88
		}
89

  
90
		final DedupConfigurationOrchestration dco = new DedupConfigurationOrchestration(entity, actionSetId, configurations);
91

  
92
		log.debug("loaded dedup configuration orchestration: " + dco.toString());
93
		log.info("loaded dedup configuration orchestration, size: " + dco.getConfigurations().size());
94
		return dco;
95
	}
96

  
97
	private DedupConfig loadConfig(final ISLookUpService isLookUpService, final String actionSetId, final Object o) throws ISLookUpException,
98
			ISLookUpDocumentNotFoundException {
99
		final Element s = (Element) o;
100
		final String configProfileId = s.attributeValue("id");
101
		final String conf =
102
				isLookUpService.getResourceProfileByQuery(String.format(
103
						"for $x in /RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '%s'] return $x//DEDUPLICATION/text()",
104
						configProfileId));
105

  
106
		final DedupConfig dedupConfig = DedupConfig.load(conf);
107
		dedupConfig.getWf().setConfigurationId(actionSetId);
108
		return dedupConfig;
109
	}
110
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/resources/eu/dnetlib/functionality/modular/ui/workflows/values/applicationContext-ui.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns="http://www.springframework.org/schema/beans"
3
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"
4
	xmlns:p="http://www.springframework.org/schema/p" xmlns:util="http://www.springframework.org/schema/util"
5
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
6
		http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd
7
		http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd">
8

  
9
	<bean id="listDedupOrchestrationValues" lazy-init="true" 
10
		class="eu.dnetlib.functionality.modular.ui.workflows.values.ListDedupOrchestrationValues"
11
		p:name="dedupOrchestrations" />      
12
      
13
	<bean id="listDedupConfigValues" lazy-init="true" 
14
		class="eu.dnetlib.functionality.modular.ui.workflows.values.ListDedupConfigValues"
15
		p:name="dedupConfigs" />    
16
        
17
	<bean id="listActionManagerSetsValues" lazy-init="true" 
18
		class="eu.dnetlib.functionality.modular.ui.workflows.values.ListActionManagerSetsValues"
19
		p:name="actionSets" />
20
		
21
	<bean id="listHBaseTables" lazy-init="true"
22
		class="eu.dnetlib.functionality.modular.ui.workflows.values.ListHBaseTables"
23
		p:name="hbaseTables"/>
24
		
25
	<bean id="listHadoopClusters" lazy-init="true"
26
		class="eu.dnetlib.functionality.modular.ui.workflows.values.ListHadoopClusters"
27
		p:name="hadoopClusters" />	
28
	
29
	<bean id="listHBaseMappings" lazy-init="true"
30
		class="eu.dnetlib.functionality.modular.ui.workflows.values.ListHBaseMappingTitleValues"
31
		p:name="hbaseMappings" />
32

  
33
</beans>
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupCheckEntitySequenceJobNode.java
1
package eu.dnetlib.msro.workflows.dedup;
2

  
3
import java.util.Queue;
4

  
5
import org.apache.commons.lang.StringUtils;
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.springframework.beans.factory.annotation.Autowired;
9

  
10
import com.google.common.base.Function;
11
import com.google.common.base.Splitter;
12
import com.google.common.collect.Iterables;
13
import com.google.common.collect.Lists;
14
import com.googlecode.sarasvati.Arc;
15
import com.googlecode.sarasvati.NodeToken;
16

  
17
import eu.dnetlib.msro.rmi.MSROException;
18
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration;
19
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestrationLoader;
20
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
21

  
22
public class DedupCheckEntitySequenceJobNode extends AsyncJobNode {
23

  
24
	private static final Log log = LogFactory.getLog(DedupCheckEntitySequenceJobNode.class);
25

  
26
	@Autowired
27
	private DedupConfigurationOrchestrationLoader dedupOrchestrationLoader;
28

  
29
	private String dedupConfigSequenceParam;
30

  
31
	private String entitySequence;
32

  
33
	@Override
34
	protected String execute(final NodeToken token) throws Exception {
35

  
36
		if (StringUtils.isBlank(getEntitySequence())) throw new MSROException("missing entity sequence, e.g. a csv: organization,person,result");
37

  
38
		if (token.getFullEnv().hasAttribute(DedupGrouperJobNode.DEDUP_GROUPER_LOOPER)) {
39
			log.info("reset env variable: " + DedupGrouperJobNode.DEDUP_GROUPER_LOOPER + " to zero");
40
			token.getFullEnv().setAttribute(DedupGrouperJobNode.DEDUP_GROUPER_LOOPER, 0);
41
		}
42

  
43
		if (!token.getEnv().hasAttribute("entitySequence")) {
44

  
45
			log.info("parsing config sequence: " + getEntitySequence());
46

  
47
			token.getEnv().setAttribute("entitySequence", getEntitySequence());
48

  
49
			final Iterable<String> sequence = Splitter.on(",").omitEmptyStrings().split(getEntitySequence());
50
			final Queue<DedupConfigurationOrchestration> q =
51
					Lists.newLinkedList(Iterables.transform(sequence, new Function<String, DedupConfigurationOrchestration>() {
52

  
53
						@Override
54
						public DedupConfigurationOrchestration apply(final String entityName) {
55
							try {
56
								final DedupConfigurationOrchestration dco = Iterables.getFirst(dedupOrchestrationLoader.loadByEntityName(entityName), null);
57
								if (dco == null) throw new RuntimeException("unable to find DedupOrchestration profile for entity type: " + entityName);
58
								return dco;
59
							} catch (final Throwable e) {
60
								throw new RuntimeException("", e);
61
							}
62
						}
63
					}));
64

  
65
			log.info("built sequence of dedup orchestration profiles, size: " + q.size());
66
			final DedupConfigurationOrchestration dco = q.remove();
67
			log.info("closing mesh for entity: " + dco.getEntity().getName());
68
			setDedupConfParams(token, dco);
69
			token.getEnv().setTransientAttribute("entitySequenceQueue", q);
70

  
71
			return Arc.DEFAULT_ARC;
72
		}
73

  
74
		@SuppressWarnings("unchecked")
75
		final Queue<DedupConfigurationOrchestration> q = (Queue<DedupConfigurationOrchestration>) token.getEnv().getTransientAttribute("entitySequenceQueue");
76

  
77
		if (!q.isEmpty()) {
78
			log.info("remaining dedup orchestration profiles: " + q.size());
79
			final DedupConfigurationOrchestration dco = q.remove();
80
			log.info("closing mesh for entity: " + dco.getEntity().getName());
81

  
82
			setDedupConfParams(token, dco);
83
			return Arc.DEFAULT_ARC;
84
		}
85

  
86
		log.info("completed closing mesh for entities: " + getEntitySequence());
87
		return "done";
88

  
89
	}
90

  
91
	private void setDedupConfParams(final NodeToken token, final DedupConfigurationOrchestration dco) {
92
		token.getEnv().setAttribute("entityType", dco.getEntity().getName());
93
		token.getEnv().setAttribute("entityTypeId", dco.getEntity().getCode());
94
		token.getEnv().setAttribute(getDedupConfigSequenceParam(), dco.toString());
95
	}
96

  
97
	public String getEntitySequence() {
98
		return entitySequence;
99
	}
100

  
101
	public void setEntitySequence(final String entitySequence) {
102
		this.entitySequence = entitySequence;
103
	}
104

  
105
	public String getDedupConfigSequenceParam() {
106
		return dedupConfigSequenceParam;
107
	}
108

  
109
	public void setDedupConfigSequenceParam(final String dedupConfigSequenceParam) {
110
		this.dedupConfigSequenceParam = dedupConfigSequenceParam;
111
	}
112

  
113
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/resources/eu/dnetlib/functionality/modular/ui/webContext-modular-ui-dedup.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns="http://www.springframework.org/schema/beans"
3
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:jaxws="http://cxf.apache.org/jaxws"
4
	xmlns:sec="http://cxf.apache.org/configuration/security" xmlns:wsa="http://cxf.apache.org/ws/addressing"
5
	xmlns:p="http://www.springframework.org/schema/p" xmlns:http="http://cxf.apache.org/transports/http/configuration"
6
	xmlns:t="http://dnetlib.eu/springbeans/t" xmlns:template="http://dnetlib.eu/springbeans/template"
7
	xmlns:util="http://www.springframework.org/schema/util" xmlns:context="http://www.springframework.org/schema/context"
8
	xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
9
						http://cxf.apache.org/ws/addressing http://cxf.apache.org/schemas/ws-addr-conf.xsd
10
						http://cxf.apache.org/configuration/security http://cxf.apache.org/schemas/configuration/security.xsd
11
						http://cxf.apache.org/transports/http/configuration http://cxf.apache.org/schemas/configuration/http-conf.xsd
12
						http://cxf.apache.org/jaxws http://cxf.apache.org/schemas/jaxws.xsd
13
						http://dnetlib.eu/springbeans/template http://dnetlib.eu/springbeans/template.xsd
14
						http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd
15
						http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context.xsd">
16

  
17

  
18
	<bean name="/ui/dedupInspector.do"
19
		class="eu.dnetlib.functionality.modular.ui.dedup.DedupServiceEntryPointController"
20
		p:menu="Dedup Service Inspector" 
21
		p:title="Dedup Service Inspector"
22
		p:description="Dedup Service Inspector"
23
		p:order="5" 
24
		p:group="Tools">
25
		<property name="permissionLevels">
26
			<set>
27
				<value>IS_ADMIN</value>
28
			</set>
29
		</property>
30
	</bean>
31
	
32
</beans>
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupDuplicateScanJobNode.java
1
package eu.dnetlib.msro.workflows.dedup;
2

  
3
import org.apache.commons.collections.CollectionUtils;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6

  
7
import com.googlecode.sarasvati.Arc;
8
import com.googlecode.sarasvati.Engine;
9
import com.googlecode.sarasvati.NodeToken;
10

  
11
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
12
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration;
13
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener;
14

  
15
public class DedupDuplicateScanJobNode extends DedupConfigurationAwareJobNode {
16

  
17
	private static final Log log = LogFactory.getLog(DedupDuplicateScanJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
18

  
19
	private class DedupBlackboardWorkflowJobListener extends BlackboardWorkflowJobListener {
20

  
21
		public DedupBlackboardWorkflowJobListener(final Engine engine, final NodeToken token) {
22
			super(engine, token);
23
		}
24

  
25
		@Override
26
		protected void onDone(final BlackboardJob job) {
27

  
28
			final DedupConfigurationOrchestration confs = dedupConfigurations(getToken());
29

  
30
			confs.getConfigurations().poll();
31

  
32
			log.info("checking dedup configs queue, size: " + confs.getConfigurations().size());
33

  
34
			if (CollectionUtils.isEmpty(confs.getConfigurations())) {
35
				log.info("dedup similarity scan done");
36
				super.complete(job, "done");
37
			} else {
38
				log.debug("remaining confs: " + confs);
39

  
40
				getToken().getEnv().setAttribute(getDedupConfigSequenceParam(), confs.toString());
41

  
42
				super.complete(job, Arc.DEFAULT_ARC);
43
			}
44
		}
45
	}
46

  
47
	@Override
48
	protected BlackboardWorkflowJobListener generateBlackboardListener(final Engine engine, final NodeToken token) {
49
		return new DedupBlackboardWorkflowJobListener(engine, token);
50
	}
51

  
52
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupSimilarityToActionsJobNode.java
1
package eu.dnetlib.msro.workflows.dedup;
2

  
3
import org.apache.commons.lang.StringUtils;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6

  
7
import com.googlecode.sarasvati.NodeToken;
8

  
9
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
10
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
11
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
12
import eu.dnetlib.data.proto.TypeProtos.Type;
13
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
14
import eu.dnetlib.msro.rmi.MSROException;
15

  
16
public class DedupSimilarityToActionsJobNode extends DedupConfigurationAwareJobNode {
17

  
18
	private static final Log log = LogFactory.getLog(DedupSimilarityToActionsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
19

  
20
	@Override
21
	protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception {
22
		super.prepareJob(job, token);
23

  
24
		final String entityType = token.getEnv().getAttribute("entityType");
25
		if (StringUtils.isBlank(entityType))
26
			throw new MSROException("unable to find wf param: entityType");
27

  
28
		String cf = "_" + SubRelType.dedupSimilarity + "_" + DedupSimilarity.RelName.isSimilarTo;
29
		switch (Type.valueOf(entityType)) {
30

  
31
		case organization:
32
			cf = RelType.organizationOrganization + cf;
33
			break;
34
		case person:
35
			cf = RelType.personPerson + cf;
36
			break;
37
		case result:
38
			cf = RelType.resultResult + cf;
39
			break;
40
		default:
41
			throw new MSROException("invalid parameter entityType: " + entityType);
42
		}
43

  
44
		log.info("using similarity CF: " + cf);
45
		job.getParameters().put("similarityCF", cf);
46
		token.getEnv().setAttribute("similarityCF", cf);
47
	}
48

  
49
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/dedup/BuildSimilarityMeshJobNode.java
1
package eu.dnetlib.msro.workflows.dedup;
2

  
3
import java.io.StringReader;
4
import java.util.Iterator;
5
import java.util.List;
6
import java.util.Queue;
7

  
8
import javax.annotation.Resource;
9
import javax.xml.ws.wsaddressing.W3CEndpointReference;
10

  
11
import org.antlr.stringtemplate.StringTemplate;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14
import org.dom4j.Document;
15
import org.dom4j.DocumentException;
16
import org.dom4j.Node;
17
import org.dom4j.io.SAXReader;
18
import org.springframework.beans.factory.annotation.Required;
19

  
20
import com.google.common.collect.Lists;
21
import com.google.common.collect.Queues;
22
import com.googlecode.sarasvati.Arc;
23
import com.googlecode.sarasvati.NodeToken;
24

  
25
import eu.dnetlib.data.proto.TypeProtos.Type;
26
import eu.dnetlib.enabling.resultset.IterableResultSetFactory;
27
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
28
import eu.dnetlib.msro.workflows.hadoop.utils.Similarity;
29
import eu.dnetlib.msro.workflows.hadoop.utils.SimilarityMeshBuilder;
30
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
31

  
32
public class BuildSimilarityMeshJobNode extends AsyncJobNode {
33

  
34
	private static final Log log = LogFactory.getLog(BuildSimilarityMeshJobNode.class);
35

  
36
	/** The result set factory. */
37
	@Resource(name = "iterableResultSetFactory")
38
	private IterableResultSetFactory resultSetFactory;
39

  
40
	/** The result set client factory. */
41
	@Resource(name = "resultSetClientFactory")
42
	private ResultSetClientFactory resultSetClientFactory;
43

  
44
	private StringTemplate similarity;
45

  
46
	private String inputEprParam;
47

  
48
	private String outputEprParam;
49

  
50
	@Override
51
	protected String execute(final NodeToken token) throws Exception {
52

  
53
		final String inputEpr = token.getEnv().getAttribute(getInputEprParam());
54

  
55
		final Iterator<String> rsClient = resultSetClientFactory.getClient(inputEpr).iterator();
56
		final Queue<Object> queue = Queues.newLinkedBlockingQueue();
57
		final SAXReader reader = new SAXReader();
58

  
59
		if (rsClient.hasNext()) {
60
			populateQueue(queue, reader, rsClient.next());
61
		}
62

  
63
		final W3CEndpointReference eprOut = resultSetFactory.createIterableResultSet(new Iterable<String>() {
64

  
65
			@Override
66
			public Iterator<String> iterator() {
67
				return new Iterator<String>() {
68

  
69
					@Override
70
					public boolean hasNext() {
71
						synchronized (queue) {
72
							return !queue.isEmpty();
73
						}
74
					}
75

  
76
					@Override
77
					public String next() {
78
						synchronized (queue) {
79
							final Object o = queue.poll();
80
							while (queue.isEmpty() && rsClient.hasNext()) {
81
								populateQueue(queue, reader, rsClient.next());
82
							}
83
							return buildSimilarity((Similarity) o);
84
						}
85
					}
86

  
87
					@Override
88
					public void remove() {
89
						throw new UnsupportedOperationException();
90
					}
91
				};
92
			}
93
		});
94

  
95
		token.getEnv().setAttribute(getOutputEprParam(), eprOut.toString());
96

  
97
		return Arc.DEFAULT_ARC;
98
	}
99

  
100
	private void populateQueue(final Queue<Object> q, final SAXReader r, final String xml) {
101
		try {
102
			final Document d = r.read(new StringReader(xml));
103
			final String groupid = d.valueOf("//FIELD[@name='id']");
104
			final List<?> items = d.selectNodes("//FIELD[@name='group']/ITEM");
105
			final String entitytype = d.valueOf("//FIELD[@name='entitytype']");
106
			final List<String> group = Lists.newArrayList();
107
			for (final Object id : items) {
108
				group.add(((Node) id).getText());
109
			}
110
			// compute the full mesh
111
			final Type type = Type.valueOf(entitytype);
112

  
113
			final List<Similarity> mesh = SimilarityMeshBuilder.build(type, group);
114
			// total += mesh.size();
115
			if (log.isDebugEnabled()) {
116
				log.debug(String.format("built mesh for group '%s', size %d", groupid, mesh.size()));
117
			}
118
			for (final Similarity s : mesh) {
119
				if (log.isDebugEnabled()) {
120
					log.debug(String.format("adding to queue: %s", s.toString()));
121
				}
122
				q.add(s);
123
			}
124
		} catch (final DocumentException e) {
125
			log.error("invalid document: " + xml);
126
		}
127
	}
128

  
129
	private String buildSimilarity(final Similarity s) {
130
		final StringTemplate template = new StringTemplate(getSimilarity().getTemplate());
131

  
132
		template.setAttribute("source", s.getPair().getKey());
133
		template.setAttribute("target", s.getPair().getValue());
134
		template.setAttribute("type", s.getType().toString());
135

  
136
		final String res = template.toString();
137
		return res;
138
	}
139

  
140
	public String getInputEprParam() {
141
		return inputEprParam;
142
	}
143

  
144
	public void setInputEprParam(final String inputEprParam) {
145
		this.inputEprParam = inputEprParam;
146
	}
147

  
148
	public String getOutputEprParam() {
149
		return outputEprParam;
150
	}
151

  
152
	public void setOutputEprParam(final String outputEprParam) {
153
		this.outputEprParam = outputEprParam;
154
	}
155

  
156
	public StringTemplate getSimilarity() {
157
		return similarity;
158
	}
159

  
160
	@Required
161
	public void setSimilarity(final StringTemplate similarity) {
162
		this.similarity = similarity;
163
	}
164

  
165
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/dedup/DedupConfigurationSetterJobNode.java
1
package eu.dnetlib.msro.workflows.dedup;
2

  
3
import org.apache.commons.lang.StringUtils;
4
import org.springframework.beans.factory.annotation.Autowired;
5

  
6
import com.googlecode.sarasvati.Arc;
7
import com.googlecode.sarasvati.NodeToken;
8

  
9
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestration;
10
import eu.dnetlib.msro.workflows.dedup.conf.DedupConfigurationOrchestrationLoader;
11
import eu.dnetlib.msro.workflows.nodes.AsyncJobNode;
12

  
13
public class DedupConfigurationSetterJobNode extends AsyncJobNode {
14

  
15
	private String dedupConfigSequence;
16

  
17
	private String dedupConfigSequenceParam;
18

  
19
	@Autowired
20
	private DedupConfigurationOrchestrationLoader dedupOrchestrationLoader;
21

  
22
	@Override
23
	protected String execute(final NodeToken token) throws Exception {
24

  
25
		if (StringUtils.isBlank(getDedupConfigSequence())) throw new IllegalArgumentException("missing configuration sequence");
26

  
27
		final DedupConfigurationOrchestration dedupOrchestration = dedupOrchestrationLoader.loadByActionSetId(getDedupConfigSequence());
28

  
29
		token.getEnv().setAttribute("entityType", dedupOrchestration.getEntity().getName());
30
		token.getEnv().setAttribute("entityTypeId", dedupOrchestration.getEntity().getCode());
31

  
32
		token.getEnv().setAttribute(getDedupConfigSequenceParam(), dedupOrchestration.toString());
33

  
34
		return Arc.DEFAULT_ARC;
35
	}
36

  
37
	public String getDedupConfigSequence() {
38
		return dedupConfigSequence;
39
	}
40

  
41
	public void setDedupConfigSequence(final String dedupConfigSequence) {
42
		this.dedupConfigSequence = dedupConfigSequence;
43
	}
44

  
45
	public String getDedupConfigSequenceParam() {
46
		return dedupConfigSequenceParam;
47
	}
48

  
49
	public void setDedupConfigSequenceParam(final String dedupConfigSequenceParam) {
50
		this.dedupConfigSequenceParam = dedupConfigSequenceParam;
51
	}
52

  
53
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/hadoop/utils/HBaseTableUtils.java
1
package eu.dnetlib.msro.workflows.hadoop.utils;
2

  
3
import java.util.Collection;
4
import java.util.Set;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.base.Predicate;
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Lists;
10
import com.google.common.collect.Sets;
11

  
12
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
13
import eu.dnetlib.data.proto.DedupProtos.Dedup;
14
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
15
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship;
16
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship;
17
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
18
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson;
19
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
20
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
21
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
22
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
23
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
24
import eu.dnetlib.data.proto.TypeProtos.Type;
25

  
26
/**
27
 * Common static utility methods to manage the hbase tables
28
 *
29
 * @author claudio
30
 *
31
 */
32
public class HBaseTableUtils {
33

  
34
	private static final String _ = "_";
35
	private static Function<Type, String> typeName = new Function<Type, String>() {
36

  
37
		@Override
38
		public String apply(final Type type) {
39
			return type.toString();
40
		}
41
	};
42

  
43
	public static Set<String> listAllColumns() {
44
		final Set<String> union = Sets.union(listEntities(), listRelationships());
45
		return Sets.union(union, listDedupRelationships());
46
	}
47

  
48
	public static Set<String> listDedupColumns(final Collection<Type> entityTypes) {
49
		final Set<String> entities = listEntities(Lists.newArrayList(Iterables.transform(entityTypes, typeName)));
50
		return Sets.union(entities, listDedupRelationships());
51
	}
52

  
53
	private static Set<String> listDedupRelationships() {
54
		final Set<String> cfs = Sets.newHashSet();
55
		cfs.add(RelType.organizationOrganization + _ + SubRelType.dedup + _ + Dedup.RelName.merges);
56
		cfs.add(RelType.organizationOrganization + _ + SubRelType.dedup + _ + Dedup.RelName.isMergedIn);
57
		cfs.add(RelType.organizationOrganization + _ + SubRelType.dedupSimilarity + _ + DedupSimilarity.RelName.isSimilarTo);
58

  
59
		cfs.add(RelType.personPerson + _ + SubRelType.dedup + _ + Dedup.RelName.merges);
60
		cfs.add(RelType.personPerson + _ + SubRelType.dedup + _ + Dedup.RelName.isMergedIn);
61
		cfs.add(RelType.personPerson + _ + SubRelType.dedupSimilarity + _ + DedupSimilarity.RelName.isSimilarTo);
62

  
63
		cfs.add(RelType.resultResult + _ + SubRelType.dedup + _ + Dedup.RelName.merges);
64
		cfs.add(RelType.resultResult + _ + SubRelType.dedup + _ + Dedup.RelName.isMergedIn);
65
		cfs.add(RelType.resultResult + _ + SubRelType.dedupSimilarity + _ + DedupSimilarity.RelName.isSimilarTo);
66

  
67
		return cfs;
68
	}
69

  
70
	private static Set<String> listEntities(final Collection<String> entityType) {
71
		return Sets.newHashSet(Iterables.filter(Iterables.transform(Lists.newArrayList(Type.values()), typeName), new Predicate<String>() {
72

  
73
			@Override
74
			public boolean apply(final String s) {
75
				return entityType.contains(s);
76
			}
77
		}));
78
	}
79

  
80
	public static Set<String> listEntities() {
81
		return Sets.newHashSet(Iterables.transform(Lists.newArrayList(Type.values()), typeName));
82
	}
83

  
84
	public static Set<String> listRelationships() {
85
		final Set<String> cfs = Sets.newHashSet();
86
		cfs.add(RelType.datasourceOrganization + _ + SubRelType.provision + _ + Provision.RelName.isProvidedBy);
87
		cfs.add(RelType.datasourceOrganization + _ + SubRelType.provision + _ + Provision.RelName.provides);
88

  
89
		cfs.add(RelType.personPerson + _ + SubRelType.coauthorship + _ + CoAuthorship.RelName.isCoauthorOf);
90

  
91
		cfs.add(RelType.personResult + _ + SubRelType.authorship + _ + Authorship.RelName.isAuthorOf);
92
		cfs.add(RelType.personResult + _ + SubRelType.authorship + _ + Authorship.RelName.hasAuthor);
93

  
94
		cfs.add(RelType.projectOrganization + _ + SubRelType.participation + _ + Participation.RelName.hasParticipant);
95
		cfs.add(RelType.projectOrganization + _ + SubRelType.participation + _ + Participation.RelName.isParticipant);
96

  
97
		cfs.add(RelType.projectPerson + _ + SubRelType.contactPerson + _ + ContactPerson.RelName.isContact);
98
		cfs.add(RelType.projectPerson + _ + SubRelType.contactPerson + _ + ContactPerson.RelName.hasContact);
99

  
100
		cfs.add(RelType.resultProject + _ + SubRelType.outcome + _ + Outcome.RelName.isProducedBy);
101
		cfs.add(RelType.resultProject + _ + SubRelType.outcome + _ + Outcome.RelName.produces);
102

  
103
		cfs.add(RelType.resultResult + _ + SubRelType.similarity + _ + Similarity.RelName.hasAmongTopNSimilarDocuments);
104
		cfs.add(RelType.resultResult + _ + SubRelType.similarity + _ + Similarity.RelName.isAmongTopNSimilarDocuments);
105

  
106
		cfs.add(RelType.resultResult + _ + SubRelType.publicationDataset + _ + PublicationDataset.RelName.isRelatedTo);
107

  
108
		return cfs;
109
	}
110

  
111
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/dedup/conf/DedupConfigurationOrchestration.java
1
package eu.dnetlib.msro.workflows.dedup.conf;
2

  
3
import java.util.Queue;
4

  
5
import com.google.gson.Gson;
6
import com.google.gson.GsonBuilder;
7

  
8
import eu.dnetlib.pace.config.DedupConfig;
9

  
10
/**
11
 * The Class DedupConfigurationOrchestration.
12
 */
13
public class DedupConfigurationOrchestration {
14

  
15
	/** The entity. */
16
	private Entity entity;
17

  
18
	/** The action set id. */
19
	private String actionSetId;
20

  
21
	/** The configurations. */
22
	private Queue<DedupConfig> configurations;
23

  
24
	public DedupConfigurationOrchestration() {}
25

  
26
	/**
27
	 * Instantiates a new dedup configuration orchestration.
28
	 *
29
	 * @param entity
30
	 *            the entity
31
	 * @param actionSetId
32
	 *            the action set id
33
	 * @param configurations
34
	 *            the configurations
35
	 */
36
	public DedupConfigurationOrchestration(final Entity entity, final String actionSetId, final Queue<DedupConfig> configurations) {
37
		super();
38
		this.setEntity(entity);
39
		this.setActionSetId(actionSetId);
40
		this.setConfigurations(configurations);
41
	}
42

  
43
	/**
44
	 * Gets the entity.
45
	 *
46
	 * @return the entity
47
	 */
48
	public Entity getEntity() {
49
		return entity;
50
	}
51

  
52
	/**
53
	 * Gets the action set id.
54
	 *
55
	 * @return the action set id
56
	 */
57
	public String getActionSetId() {
58
		return actionSetId;
59
	}
60

  
61
	/**
62
	 * Gets the configurations.
63
	 *
64
	 * @return the configurations
65
	 */
66
	public Queue<DedupConfig> getConfigurations() {
67
		return configurations;
68
	}
69

  
70
	public void setEntity(final Entity entity) {
71
		this.entity = entity;
72
	}
73

  
74
	public void setActionSetId(final String actionSetId) {
75
		this.actionSetId = actionSetId;
76
	}
77

  
78
	public void setConfigurations(final Queue<DedupConfig> configurations) {
79
		this.configurations = configurations;
80
	}
81

  
82
	/**
83
	 * From json.
84
	 *
85
	 * @param json
86
	 *            the json
87
	 * @return the dedup configuration orchestration
88
	 */
89
	public static DedupConfigurationOrchestration fromJSON(final String json) {
90
		return new Gson().fromJson(json, DedupConfigurationOrchestration.class);
91
	}
92

  
93
	/*
94
	 * (non-Javadoc)
95
	 * 
96
	 * @see java.lang.Object#toString()
97
	 */
98
	@Override
99
	public String toString() {
100
		return new GsonBuilder().setPrettyPrinting().create().toJson(this);
101
	}
102

  
103
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-deduplication</artifactId>
12
	<packaging>jar</packaging>
13
	<version>1.1.3</version>
14
	<scm>
15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-deduplication/tags/dnet-deduplication-1.1.3</developerConnection>
16
	</scm>
17
	<dependencies>
18
		<dependency>
19
			<groupId>eu.dnetlib</groupId>
20
			<artifactId>dnet-msro-service</artifactId>
21
			<version>[3.0.0,4.0.0)</version>
22
		</dependency>
23
		<dependency>
24
			<groupId>eu.dnetlib</groupId>
25
			<artifactId>dnet-hadoop-service-rmi</artifactId>
26
			<version>[1.0.0,2.0.0)</version>
27
		</dependency>
28
		<dependency>
29
			<groupId>eu.dnetlib</groupId>
30
			<artifactId>dnet-actionmanager-api</artifactId>
31
			<version>[3.0.0,4.0.0)</version>
32
		</dependency>
33
		<dependency>
34
			<groupId>eu.dnetlib</groupId>
35
			<artifactId>dnet-modular-ui</artifactId>
36
			<version>[3.0.0,4.0.0)</version>
37
		</dependency>
38

  
39
		<dependency>
40
			<groupId>eu.dnetlib</groupId>
41
			<artifactId>dnet-index-solr-client</artifactId>
42
			<version>[2.0.0,3.0.0)</version>
43
		</dependency>
44

  
45
		<dependency>
46
			<groupId>eu.dnetlib</groupId>
47
			<artifactId>dnet-openaireplus-mapping-utils</artifactId>
48
			<version>[3.0.0,4.0.0)</version>
49
		</dependency>
50

  
51

  
52
		<dependency>
53
			<groupId>javax.servlet</groupId>
54
			<artifactId>javax.servlet-api</artifactId>
55
			<version>${javax.servlet.version}</version>
56
			<scope>provided</scope>
57
		</dependency>
58
		<dependency>
59
			<groupId>com.fasterxml.jackson.core</groupId>
60
			<artifactId>jackson-databind</artifactId>
61
			<version>2.4.3</version>
62
		</dependency>
63
		<dependency>
64
			<groupId>com.google.guava</groupId>
65
			<artifactId>guava</artifactId>
66
			<version>${google.guava.version}</version>
67
		</dependency>
68

  
69
		<dependency>
70
			<groupId>junit</groupId>
71
			<artifactId>junit</artifactId>
72
			<version>${junit.version}</version>
73
			<scope>test</scope>
74
		</dependency>
75

  
76
	</dependencies>
77
</project>
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/java/eu/dnetlib/msro/workflows/hadoop/hbase/DeleteHBaseRecordsJobNode.java
1
package eu.dnetlib.msro.workflows.hadoop.hbase;
2

  
3
import java.io.IOException;
4
import java.util.Map;
5

  
6
import org.apache.commons.io.IOUtils;
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9
import org.springframework.beans.factory.annotation.Required;
10

  
11
import com.googlecode.sarasvati.Engine;
12
import com.googlecode.sarasvati.NodeToken;
13
import com.googlecode.sarasvati.env.Env;
14

  
15
import eu.dnetlib.data.hadoop.rmi.HadoopBlackboardActions;
16
import eu.dnetlib.data.hadoop.rmi.HadoopService;
17
import eu.dnetlib.enabling.resultset.rmi.ResultSetException;
18
import eu.dnetlib.enabling.tools.blackboard.BlackboardJob;
19
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions;
20
import eu.dnetlib.msro.workflows.nodes.BlackboardJobNode;
21
import eu.dnetlib.msro.workflows.nodes.ProgressJobNode;
22
import eu.dnetlib.msro.workflows.nodes.blackboard.BlackboardWorkflowJobListener;
23
import eu.dnetlib.msro.workflows.resultset.ProcessCountingResultSetFactory;
24
import eu.dnetlib.msro.workflows.util.ProgressProvider;
25
import eu.dnetlib.msro.workflows.util.ResultsetProgressProvider;
26
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
27

  
28
public class DeleteHBaseRecordsJobNode extends BlackboardJobNode implements ProgressJobNode {
29

  
30
	private static final Log log = LogFactory.getLog(DeleteHBaseRecordsJobNode.class); // NOPMD by marko on 11/24/08 5:02 PM
31

  
32
	private String inputEprParam;
33
	private String hbaseTableProperty;
34
	private String cluster;
35
	private String xslt;
36

  
37
	private boolean simulation = false;
38

  
39
	private ProgressProvider progressProvider;
40

  
41
	private ProcessCountingResultSetFactory processCountingResultSetFactory;
42

  
43
	@Override
44
	protected String obtainServiceId(final NodeToken token) {
45
		return getServiceLocator().getServiceId(HadoopService.class);
46
	}
47

  
48
	@Override
49
	protected void prepareJob(final BlackboardJob job, final NodeToken token) throws Exception {
50
		log.info("Invoking blackboard method");
51

  
52
		job.setAction(HadoopBlackboardActions.DELETE_EPR_HBASE.toString());
53
		job.getParameters().put("input_epr", DnetXsltFunctions.encodeBase64(prepareEpr(token)));
54
		job.getParameters().put("xslt", DnetXsltFunctions.encodeBase64(prepareXslt()));
55
		job.getParameters().put("table", getPropertyFetcher().getProperty(getHbaseTableProperty()));
56
		job.getParameters().put("cluster", cluster);
57
		job.getParameters().put("simulation", String.valueOf(isSimulation()));
58
	}
59

  
60
	@Override
61
	protected BlackboardWorkflowJobListener generateBlackboardListener(final Engine engine, final NodeToken token) {
62
		return new BlackboardWorkflowJobListener(engine, token) {
63

  
64
			@Override
65
			protected void populateEnv(final Env env, final Map<String, String> responseParams) {
66
				final String count = responseParams.get("count");
67
				log.info(String.format("Deleted %s objects from HBase table %s, cluster %s", count, getPropertyFetcher().getProperty(getHbaseTableProperty()),
68
						getCluster()));
69
				env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + getName() + ":count", count);
70
			}
71
		};
72
	}
73

  
74
	private String prepareEpr(final NodeToken token) throws ResultSetException {
75
		final String epr = token.getEnv().getAttribute(inputEprParam);
76
		final ResultsetProgressProvider resultsetProgressProvider = processCountingResultSetFactory.createProgressProvider(token.getProcess(), epr);
77

  
78
		setProgressProvider(resultsetProgressProvider);
79

  
80
		return resultsetProgressProvider.getEpr().toString();
81
	}
82

  
83
	private String prepareXslt() throws IOException {
84
		return (xslt == null) || xslt.isEmpty() ? "" : IOUtils.toString(getClass().getResourceAsStream(xslt));
85
	}
86

  
87
	public String getInputEprParam() {
88
		return inputEprParam;
89
	}
90

  
91
	public void setInputEprParam(final String inputEprParam) {
92
		this.inputEprParam = inputEprParam;
93
	}
94

  
95
	public String getHbaseTableProperty() {
96
		return hbaseTableProperty;
97
	}
98

  
99
	public void setHbaseTableProperty(final String hbaseTableProperty) {
100
		this.hbaseTableProperty = hbaseTableProperty;
101
	}
102

  
103
	@Required
104
	public void setProcessCountingResultSetFactory(final ProcessCountingResultSetFactory processCountingResultSetFactory) {
105
		this.processCountingResultSetFactory = processCountingResultSetFactory;
106
	}
107

  
108
	@Override
109
	public ProgressProvider getProgressProvider() {
110
		return progressProvider;
111
	}
112

  
113
	public void setProgressProvider(final ProgressProvider progressProvider) {
114
		this.progressProvider = progressProvider;
115
	}
116

  
117
	public ProcessCountingResultSetFactory getProcessCountingResultSetFactory() {
118
		return processCountingResultSetFactory;
119
	}
120

  
121
	public String getXslt() {
122
		return xslt;
123
	}
124

  
125
	public void setXslt(final String xslt) {
126
		this.xslt = xslt;
127
	}
128

  
129
	public String getCluster() {
130
		return cluster;
131
	}
132

  
133
	public void setCluster(final String cluster) {
134
		this.cluster = cluster;
135
	}
136

  
137
	public boolean isSimulation() {
138
		return simulation;
139
	}
140

  
141
	public void setSimulation(final boolean simulation) {
142
		this.simulation = simulation;
143
	}
144

  
145
}
modules/dnet-deduplication/tags/dnet-deduplication-1.1.3/src/main/resources/eu/dnetlib/web/resources/js/dedup_inspector_controllers.js
1
var dedupInspectorControllers = angular.module('dedupInspectorControllers', []);
2

  
3
function common_init($scope, $http, $sce, $location) {
4
	initSpinner();
5
	$scope.showError        = function(error)   { show_notification("error", error); }
6
	$scope.showNotification = function(message) { show_notification("info", message); }
7
	$scope.showSpinner      = function()        { showSpinner(); }
8
	$scope.hideSpinner      = function()        { hideSpinner(); }
9
	$scope.to_trusted       = function(html)    { return $sce.trustAsHtml(html); }
10
	$scope.go               = function(path)    { document.location.href='#'+path; }
11
	$scope.encodeValue      = function(val)     { return val; }
12
	
13
	
14
	$scope.encodeDateParam = function(date) {
15
		var year = date.getFullYear();
16
		var month = ("00" + (date.getMonth() + 1)).slice(-2);
17
		var day = ("00" + date.getDate()).slice(-2);
18
		return year + month + day;
19
	}
20

  
21
	$scope.decodeDateParam = function(s) {
22
		var year = s.substring(0,4);
23
		var month = s.substring(4,6);
24
		var day = s.substring(6,8);
25

  
26
		return new Date(year,month-1,day);
27
	}
28
}
29

  
30
dedupInspectorControllers.controller('addSimRelsCtrl', [
31
	'$scope', '$http', '$sce', '$location', '$routeParams', 'groupService',
32
	function ($scope, $http, $sce, $location, $routeParams, groupService) {
33
	
34
		common_init($scope, $http, $sce, $location);
35
	
36
		$scope.validentityTypes = { 
37
			'result' 		: 	{id:'50', type:'result', label:'Publication'}, 
38
			'organization' 	: 	{id:'20', type:'organization', label:'Organization'},
39
			'person' 		: 	{id:'30', type:'person', label:'Person'} 
40
		};
41
		$scope.group = groupService.getGroup();
42
		$scope.dissimilar = groupService.getDissimilar();
43
		
44
		if ($scope.group == null || $scope.group.entityType.type != $scope.validentityTypes[$routeParams.entityType].type) {
45
			$scope.group = {
46
				actionSet : $routeParams.actionSet,
47
				details : {},
48
				byRoot : {},
49
				byMerged : {},
50
				group : [],
51
				rootIds : [],
52
				detailList : [],
53
				rootList : [],
54
				entityType : $scope.validentityTypes[$routeParams.entityType]
55
			}
56
			groupService.setGroup($scope.group);
57
		};
58
		
59
		$scope.group.actionSet = $routeParams.actionSet;
60
		
61
		if($scope.dissimilar == null || $scope.group.entityType.type != $scope.validentityTypes[$routeParams.entityType].type) {
62
			$scope.dissimilar = {
63
				group : {},
64
				entityType : $scope.validentityTypes[$routeParams.entityType]
65
			};
66
			groupService.setDissimilar($scope.dissimilar);
67
		}
68

  
69
		$scope.groupIdDetails = {
70
			map : {},
71
			list : []
72
		};
73
		
74
		if($routeParams.query && $routeParams.query.length > 0) {
75
			$scope.query = $routeParams.query;
76
		}
77
		if($routeParams.start) {
78
			$scope.start = parseInt($routeParams.start);
79
		} else {
80
			$scope.start = 0;
81
		}
82
		$scope.rows = 20;
83
	
84
		$scope.search = function() {
85
			$scope.showSpinner();
86
			var q = '';
87
			if($scope.query.match('^[0-9][0-9]\|.{12}::[a-zA-Z0-9]{32}$')) {
88
				q = 'objidentifier exact "' + $scope.query + '"';
89
			} else {
90
				q = $scope.query;
91
			}
92

  
93
			$http.defaults.headers.post["Content-Type"] = "application/x-www-form-urlencoded; charset=UTF-8";
94
			$http.post('dedup/search.do', $.param({
95
				'entityType' : $scope.group.entityType.type,
96
				'query'  	: q,
97
				'actionSet' : $scope.group.actionSet,
98
				'start'	 	: $scope.start,
99
				'rows'	 	: $scope.rows,
100
				'fields' 	: $scope.defaultFields()
101
			})).success(function(res) {
102
            	if(res) {
103
            		$scope.results = res;
104
            		$scope.hideSpinner();
105
            	} else {
106
                	$scope.hideSpinner();
107
            		$scope.showError('Registration failed');
108
            	}
109
			}).error(function(err) {
110
				$scope.hideSpinner();
111
				$scope.showError('Registration failed: ' + err.message);
112
			});			
113
		}
114
		
115
		$scope.defaultFields = function() {
116
			switch($scope.group.entityType.type) { 
117
			  case "result": 
118
				  return "title,dateofacceptance,description";
119
			  case "organization": 
120
				  return "legalname,legalshortname,websiteurl";
121
			  case "person": 
122
				  return "fullname";
123
			  default: 
124
				  return "";
125
			}	
126
		}
127
		
128
		$scope.allFields = function() {
129
			switch($scope.group.entityType.type) { 
130
			  case "result": 
131
				  return "title,dateofacceptance,author,description";
132
			  case "organization": 
133
				  return "legalname,legalshortname,country,websiteurl";
134
			  default: 
135
				  return "";
136
			}	
137
		}		
138
		
139
		if($scope.query) {
140
			$scope.search();
141
		}
142
		
143
		$scope.searchById = function(id, type, fields, responseCallback) {
144
			$http.post('dedup/searchById.do', $.param({
145
				'entityType' : type,
146
				'objidentifier' : id,
147
				'fields' : fields
148
			})).success(function(res) {
149
	        	if(res) {
150
	        		responseCallback(id, res);
151
	        	} else {
152
	        		$scope.showError('Unable to query: ' + mId);
153
	        	}
154
			}).error(function(err) {
155
				$scope.showError('Unable to query id "' + mId + '", error: ' + err.message);
156
			});
157
		}		
158
		
159
		$scope.resetForm = function() {
160
			if (confirm('Reset group?')) {
161
				$scope.doResetForm();
162
			}
163
		}
164
		$scope.doResetForm = function() {
165
			$scope.group.group = [];
166
			$scope.group.rootIds = [];
167
			$scope.group.details = {};
168
			$scope.group.byRoot = {};
169
			$scope.group.byMerged = {};
170
			$scope.group.rootList = [];
171
			$scope.group.detailList = [];
172
			
173
			$scope.groupIdDetails.map = {};
174
			$scope.groupIdDetails.list = [];
175

  
176
			$scope.dissimilar = {
177
				group : {},
178
				entityType : $scope.validentityTypes[$routeParams.entityType]
179
			};
180
		}
181
				
182
		$scope.commit = function() {
183
			if (confirm('Commit actions?')) {
184
				$scope.showSpinner();
185
				var g = {
186
					actionSet : $scope.group.actionSet,
187
					entityType : $scope.group.entityType,
188
					group : $scope.group.group,
189
					rootIds : $scope.group.rootIds,
190
					dissimilar : $scope.dissimilar.group
191
				}
192
				$http.defaults.headers.post["Content-Type"] = "application/json; charset=UTF-8";
193
				$http.post('dedup/commit.do', g).success(function(res) {
194
	            	if(res) {
195
	            		$scope.showNotification('Commit done');
196
	            		$scope.search();
197
	            		$scope.hideSpinner();
198
	            	} else {
199
	                	$scope.hideSpinner();
200
	            		$scope.showError('Commit failed');
201
	            	}
202
				}).error(function(err) {
203
					$scope.hideSpinner();
204
					$scope.showError('Commit failed: ' + err.message);
205
				});
206
				$scope.doResetForm();
207
			}
208
		}
209

  
210
		$scope.removeFromResultList = function(ids, results) {
211
			var tmp = [];	
212
			angular.forEach(results, function(r) {
213
		    	if(jQuery.grep(ids, function(val, i) {
214
		    		  return val == r.id;
215
				}).length == 0) {
216
		    		tmp.push(r);
217
		    	};
218
		    });		    
219
		    return tmp;			
220
		}
221
		
222
		$scope.removeFromIdList = function(ids, list) {
223
			var tmp = [];	
224
			angular.forEach(list, function(id) {
225
		    	if(jQuery.grep(ids, function(val, i) {
226
		    		  return val == id;
227
				}).length == 0) {
228
		    		tmp.push(id);
229
		    	};
230
		    });		    
231
		    return tmp;			
232
		}			
233
		
234
		$scope.add = function(id, idList) {
235
			var size = $scope.group.group.length;
236
			var ids = idList.split(",");
237
		    angular.forEach(ids, function(id) {
238
		    	if(jQuery.grep($scope.group.group, function(val, i) {
239
		    		  return val == id;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff