Project

General

Profile

« Previous | Next » 

Revision 47707

import cleaner service

View differences:

modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/controllers/MsroWorkerController.java
26 26
import eu.dnetlib.enabling.annotations.DnetService;
27 27
import eu.dnetlib.enabling.annotations.DnetServiceType;
28 28
import eu.dnetlib.msro.exceptions.MSROException;
29
import eu.dnetlib.msro.workers.aggregation.collect.CollectorPluginEnumerator;
30 29
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
31 30
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
32 31
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
......
34 33
import eu.dnetlib.msro.workflows.ProcessStatus;
35 34
import eu.dnetlib.msro.workflows.WorkflowInstance;
36 35
import eu.dnetlib.msro.workflows.WorkflowProcessInfo;
36
import eu.dnetlib.msro.workflows.nodes.collect.CollectorPluginEnumerator;
37 37
import eu.dnetlib.msro.workflows.procs.ProcessExecutor;
38 38
import eu.dnetlib.msro.workflows.procs.ProcessFactory;
39 39
import eu.dnetlib.msro.workflows.procs.ProcessRegistry;
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/repohi/RegisterWorkflowFromTemplateJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.repohi;
2

  
3
import java.io.StringReader;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.stream.Collectors;
8

  
9
import org.apache.commons.lang3.StringEscapeUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.dom4j.Document;
13
import org.dom4j.Element;
14
import org.dom4j.Node;
15
import org.dom4j.io.SAXReader;
16
import org.springframework.beans.factory.annotation.Autowired;
17
import org.springframework.context.annotation.Scope;
18
import org.springframework.stereotype.Component;
19

  
20
import eu.dnetlib.clients.is.InformationServiceClient;
21
import eu.dnetlib.miscutils.templates.TemplateUtil;
22
import eu.dnetlib.msro.annotations.ProcessNode;
23
import eu.dnetlib.msro.workflows.Arc;
24
import eu.dnetlib.msro.workflows.WorkflowStatus;
25
import eu.dnetlib.msro.workflows.nodes.ParallelProcessNode;
26
import eu.dnetlib.msro.workflows.procs.ProcessAware;
27
import eu.dnetlib.msro.workflows.procs.Token;
28
import eu.dnetlib.msro.workflows.procs.WorkflowProcess;
29

  
30
@Component
31
@Scope("prototype")
32
@ProcessNode("RegisterWorkflowFromTemplate")
33
public class RegisterWorkflowFromTemplateJobNode extends ParallelProcessNode implements ProcessAware {
34

  
35
	private String wfName;
36
	private String wfDescription;
37
	private String wfTemplate;
38
	private Map<String, String> params;
39

  
40
	@Autowired
41
	private TemplateUtil templateUtil;
42

  
43
	@Autowired
44
	private InformationServiceClient isClient;
45

  
46
	private WorkflowProcess process;
47

  
48
	private static final Log log = LogFactory.getLog(RegisterWorkflowFromTemplateJobNode.class);
49

  
50
	@Override
51
	public String execute(final Token token) throws Exception {
52

  
53
		final Map<String, Object> map = new HashMap<>();
54

  
55
		if (params != null) {
56
			params.forEach((k, v) -> map.put(k, StringEscapeUtils.escapeXml11(v)));
57
		}
58

  
59
		map.put("name", StringEscapeUtils.escapeXml11(wfName));
60
		map.put("desc", StringEscapeUtils.escapeXml11(wfDescription));
61
		map.put("worker", StringEscapeUtils.escapeXml11(process.getWorkerId()));
62
		map.put("dsId", StringEscapeUtils.escapeXml11(process.getDsId()));
63
		map.put("iface", StringEscapeUtils.escapeXml11(process.getDsInterface()));
64

  
65
		final String wfProfile = templateUtil.processTemplateProfile(wfTemplate, map).get("wf");
66

  
67
		final Document doc = new SAXReader().read(new StringReader(wfProfile));
68
		final Node paramsNode = doc.selectSingleNode("//CONFIGURATION/PARAMETERS");
69

  
70
		final List<?> emptySysParams = paramsNode.selectNodes("//PARAM[@managedBy='system' and @required='true' and not(text())]");
71
		if (emptySysParams.size() > 0) {
72
			((Element) doc.selectSingleNode("//CONFIGURATION")).addAttribute("status", WorkflowStatus.WAIT_SYS_SETTINGS.toString());
73
			log.warn("The following system parameters are empty: "
74
					+ emptySysParams.stream().map(p -> ((Node) p).valueOf("@name")).collect(Collectors.joining()));
75
		} else if (paramsNode.selectNodes("//PARAM[@managedBy='user' and @required='true' and not(text())]").size() > 0) {
76
			((Element) doc.selectSingleNode("//CONFIGURATION")).addAttribute("status", WorkflowStatus.WAIT_USER_SETTINGS.toString());
77
		} else {
78
			((Element) doc.selectSingleNode("//CONFIGURATION")).addAttribute("status", WorkflowStatus.EXECUTABLE.toString());
79
		}
80

  
81
		final String wfId = isClient.register(doc.asXML());
82

  
83
		token.getEnv().setAttribute("wfId", wfId);
84

  
85
		return Arc.DEFAULT_ARC;
86

  
87
	}
88

  
89
	public String getWfName() {
90
		return wfName;
91
	}
92

  
93
	public void setWfName(final String wfName) {
94
		this.wfName = wfName;
95
	}
96

  
97
	public String getWfDescription() {
98
		return wfDescription;
99
	}
100

  
101
	public void setWfDescription(final String wfDescription) {
102
		this.wfDescription = wfDescription;
103
	}
104

  
105
	public String getWfTemplate() {
106
		return wfTemplate;
107
	}
108

  
109
	public void setWfTemplate(final String wfTemplate) {
110
		this.wfTemplate = wfTemplate;
111
	}
112

  
113
	public Map<String, String> getParams() {
114
		return params;
115
	}
116

  
117
	public void setParams(final Map<String, String> params) {
118
		this.params = params;
119
	}
120

  
121
	@Override
122
	public void setProcess(final WorkflowProcess process) {
123
		this.process = process;
124
	}
125

  
126
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/repohi/RemoveApiExtraFieldsJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.repohi;
2

  
3
import java.io.StringReader;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Set;
8
import java.util.stream.Collectors;
9

  
10
import org.dom4j.Document;
11
import org.dom4j.Node;
12
import org.dom4j.io.SAXReader;
13
import org.springframework.beans.factory.annotation.Autowired;
14
import org.springframework.context.annotation.Scope;
15
import org.springframework.stereotype.Component;
16

  
17
import eu.dnetlib.clients.dsManager.DsManagerClient;
18
import eu.dnetlib.clients.is.InformationServiceClient;
19
import eu.dnetlib.clients.locators.ServiceClientFactory;
20
import eu.dnetlib.msro.annotations.ProcessNode;
21
import eu.dnetlib.msro.workflows.Arc;
22
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode;
23

  
24
@Component
25
@Scope("prototype")
26
@ProcessNode("RemoveApiExtraFields")
27
public class RemoveApiExtraFieldsJobNode extends SimpleParallelProcessNode {
28

  
29
	private String datasourceId;
30
	private String datasourceInterface;
31
	private List<String> fields;
32

  
33
	@Autowired
34
	private ServiceClientFactory clientFactory;
35

  
36
	@Override
37
	protected String execute() throws Exception {
38
		if (fields != null) {
39
			final Set<String> invalidFields = fields.stream().map(String::toLowerCase).collect(Collectors.toSet());
40
			final Map<String, String> map = calculateValidExtraFields(datasourceId, datasourceInterface, invalidFields);
41
			clientFactory.getClient(DsManagerClient.class).bulkUpdateApiExtraFields(datasourceId, datasourceInterface, map);
42
		}
43
		return Arc.DEFAULT_ARC;
44
	}
45

  
46
	private Map<String, String> calculateValidExtraFields(final String repoId, final String ifaceId, final Set<String> invalidFields) throws Exception {
47
		final Map<String, String> res = new HashMap<>();
48

  
49
		final String profile = clientFactory.getClient(InformationServiceClient.class).getProfile(repoId);
50

  
51
		final SAXReader reader = new SAXReader();
52
		final Document doc = reader.read(new StringReader(profile));
53

  
54
		final Node ifcNode = doc.selectSingleNode("//INTERFACE[@id='" + ifaceId + "']");
55
		if (ifcNode != null) {
56
			for (final Object o : ifcNode.selectNodes("./INTERFACE_EXTRA_FIELD")) {
57
				final String name = ((Node) o).valueOf("@name");
58
				if (!invalidFields.contains(name.toLowerCase())) {
59
					res.put(name, ((Node) o).getText());
60
				}
61
			}
62
		}
63

  
64
		return res;
65
	}
66

  
67
	public List<String> getFields() {
68
		return fields;
69
	}
70

  
71
	public void setFields(final List<String> fields) {
72
		this.fields = fields;
73
	}
74

  
75
	public String getDatasourceId() {
76
		return datasourceId;
77
	}
78

  
79
	public void setDatasourceId(final String datasourceId) {
80
		this.datasourceId = datasourceId;
81
	}
82

  
83
	public String getDatasourceInterface() {
84
		return datasourceInterface;
85
	}
86

  
87
	public void setDatasourceInterface(final String datasourceInterface) {
88
		this.datasourceInterface = datasourceInterface;
89
	}
90

  
91
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/xslt/ApplyXsltJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.xslt;
2

  
3
import java.util.HashMap;
4
import java.util.Map;
5
import java.util.function.Function;
6

  
7
import org.springframework.context.annotation.Scope;
8
import org.springframework.core.io.ClassPathResource;
9
import org.springframework.stereotype.Component;
10

  
11
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
12
import eu.dnetlib.msro.annotations.ProcessNode;
13
import eu.dnetlib.msro.exceptions.MSROException;
14
import eu.dnetlib.msro.workflows.nodes.AbstractApplyFunctionJobNode;
15

  
16
@Component
17
@Scope("prototype")
18
@ProcessNode("ApplyXslt")
19
public class ApplyXsltJobNode extends AbstractApplyFunctionJobNode<String, String> {
20

  
21
	private String xsltClasspath;
22
	private Map<String, String> xsltParams = new HashMap<>();
23

  
24
	@Override
25
	protected Function<String, String> getFunction() throws MSROException {
26
		return new ApplyXslt(new ClassPathResource(xsltClasspath), xsltParams);
27
	}
28

  
29
	public String getXsltClasspath() {
30
		return xsltClasspath;
31
	}
32

  
33
	public void setXsltClasspath(final String xsltClasspath) {
34
		this.xsltClasspath = xsltClasspath;
35
	}
36

  
37
	public Map<String, String> getXsltParams() {
38
		return xsltParams;
39
	}
40

  
41
	public void setXsltParams(final Map<String, String> xsltParams) {
42
		this.xsltParams = xsltParams;
43
	}
44

  
45
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/MDStoreToApiExtraFieldsJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.mdstore;
2

  
3
import java.io.StringReader;
4
import java.util.HashMap;
5
import java.util.Map;
6

  
7
import org.dom4j.Document;
8
import org.dom4j.Node;
9
import org.dom4j.io.SAXReader;
10
import org.springframework.beans.factory.annotation.Autowired;
11
import org.springframework.context.annotation.Scope;
12
import org.springframework.stereotype.Component;
13

  
14
import eu.dnetlib.clients.dsManager.DsManagerClient;
15
import eu.dnetlib.clients.is.InformationServiceClient;
16
import eu.dnetlib.clients.locators.ServiceClientFactory;
17
import eu.dnetlib.msro.annotations.ProcessNode;
18
import eu.dnetlib.msro.workflows.Arc;
19
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode;
20

  
21
@Component
22
@Scope("prototype")
23
@ProcessNode("MDStoreToApiExtraFields")
24
public class MDStoreToApiExtraFieldsJobNode extends SimpleParallelProcessNode {
25

  
26
	private String mdId;
27
	private String datasourceId;
28
	private String datasourceInterface;
29
	private String extraFieldForTotal;
30
	private String extraFieldForDate;
31
	private String extraFieldForMdId;
32

  
33
	@Autowired
34
	private InformationServiceClient isClient;
35

  
36
	@Autowired
37
	private ServiceClientFactory serviceClientFactory;
38

  
39
	@Override
40
	protected String execute() throws Exception {
41
		final String xq = "doc('/db/DRIVER/" + mdId + "')/concat(.//NUMBER_OF_RECORDS, ' @=@ ', .//LAST_STORAGE_DATE)";
42

  
43
		final String s = isClient.findOne(xq);
44

  
45
		final String[] arr = s.split(" @=@ ");
46

  
47
		final Map<String, String> map = getCurrentExtraFields(datasourceId, datasourceInterface);
48
		map.put(extraFieldForTotal, arr[0].trim());
49
		map.put(extraFieldForDate, arr[1].trim());
50
		map.put(extraFieldForMdId, mdId);
51

  
52
		serviceClientFactory.getClient(DsManagerClient.class).bulkUpdateApiExtraFields(datasourceId, datasourceInterface, map);
53

  
54
		return Arc.DEFAULT_ARC;
55
	}
56

  
57
	private Map<String, String> getCurrentExtraFields(final String dsId, final String ifaceId) throws Exception {
58
		final Map<String, String> res = new HashMap<>();
59

  
60
		final String profile = isClient.getProfile(dsId);
61

  
62
		final SAXReader reader = new SAXReader();
63
		final Document doc = reader.read(new StringReader(profile));
64

  
65
		final Node ifcNode = doc.selectSingleNode("//INTERFACE[@id='" + ifaceId + "']");
66
		if (ifcNode != null) {
67
			for (final Object o : ifcNode.selectNodes("./INTERFACE_EXTRA_FIELD")) {
68
				res.put(((Node) o).valueOf("@name"), ((Node) o).getText());
69
			}
70
		}
71

  
72
		return res;
73
	}
74

  
75
	public String getMdId() {
76
		return mdId;
77
	}
78

  
79
	public void setMdId(final String mdId) {
80
		this.mdId = mdId;
81
	}
82

  
83
	public String getDatasourceId() {
84
		return datasourceId;
85
	}
86

  
87
	public void setDatasourceId(final String datasourceId) {
88
		this.datasourceId = datasourceId;
89
	}
90

  
91
	public String getDatasourceInterface() {
92
		return datasourceInterface;
93
	}
94

  
95
	public void setDatasourceInterface(final String datasourceInterface) {
96
		this.datasourceInterface = datasourceInterface;
97
	}
98

  
99
	public String getExtraFieldForTotal() {
100
		return extraFieldForTotal;
101
	}
102

  
103
	public void setExtraFieldForTotal(final String extraFieldForTotal) {
104
		this.extraFieldForTotal = extraFieldForTotal;
105
	}
106

  
107
	public String getExtraFieldForDate() {
108
		return extraFieldForDate;
109
	}
110

  
111
	public void setExtraFieldForDate(final String extraFieldForDate) {
112
		this.extraFieldForDate = extraFieldForDate;
113
	}
114

  
115
	public String getExtraFieldForMdId() {
116
		return extraFieldForMdId;
117
	}
118

  
119
	public void setExtraFieldForMdId(final String extraFieldForMdId) {
120
		this.extraFieldForMdId = extraFieldForMdId;
121
	}
122

  
123
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/MdBuilderJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.mdstore;
2

  
3
import java.net.URLEncoder;
4
import java.util.function.Function;
5

  
6
import org.antlr.stringtemplate.StringTemplate;
7
import org.apache.commons.io.IOUtils;
8
import org.apache.commons.lang3.StringUtils;
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.springframework.beans.factory.annotation.Autowired;
12
import org.springframework.beans.factory.annotation.Value;
13
import org.springframework.context.annotation.Scope;
14
import org.springframework.core.io.Resource;
15
import org.springframework.stereotype.Component;
16

  
17
import eu.dnetlib.clients.is.InformationServiceClient;
18
import eu.dnetlib.exceptions.InformationServiceException;
19
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
20
import eu.dnetlib.msro.annotations.ProcessNode;
21
import eu.dnetlib.msro.exceptions.MSROException;
22
import eu.dnetlib.msro.workflows.nodes.AbstractApplyFunctionJobNode;
23
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode;
24

  
25
@Component
26
@Scope("prototype")
27
@ProcessNode("MdBuilder")
28
public class MdBuilderJobNode extends AbstractApplyFunctionJobNode<String, String> {
29

  
30
	private String datasourceId;
31
	private String datasourceInterface;
32

  
33
	@Value("${msro.worker.mdstore.mdbuilder.xslt.template}")
34
	private Resource mdBuilderTemplateXslt;
35

  
36
	@Autowired
37
	private InformationServiceClient isClient;
38

  
39
	private static final Log log = LogFactory.getLog(SimpleParallelProcessNode.class);
40

  
41
	@Override
42
	protected Function<String, String> getFunction() throws MSROException {
43

  
44
		if (StringUtils.isBlank(datasourceId)) { throw new MSROException("Missing datasourceId"); }
45
		if (StringUtils.isBlank(datasourceInterface)) { throw new MSROException("Missing datasourceInterface"); }
46

  
47
		final String xq = "for $x in doc('/db/DRIVER/" + datasourceId + "') "
48
				+ "return concat("
49
				+ "$x//BASE_URL, ' @@@ ', "
50
				+ "$x//EXTRA_FIELDS/FIELD/value[../key='NamespacePrefix'], ' @@@ ', "
51
				+ "$x//INTERFACE[@id='" + datasourceInterface + "']/INTERFACE_EXTRA_FIELD[@name='metadata_identifier_path'], ' @@@ ', "
52
				+ "$x//INTERFACE[@id='" + datasourceInterface + "']/ACCESS_PROTOCOL/@format)";
53
		try {
54
			final String[] arr = isClient.findOne(xq).split("@@@");
55

  
56
			final StringTemplate st = new StringTemplate(IOUtils.toString(mdBuilderTemplateXslt.getInputStream()));
57
			st.setAttribute("datasourceId", datasourceId);
58
			st.setAttribute("baseurl", URLEncoder.encode(arr[0].trim(), "UTF-8"));
59
			st.setAttribute("namespacePrefix", arr[1].trim());
60
			st.setAttribute("xpath", arr[2].trim());
61
			st.setAttribute("metadatanamespace", getMetadataNamespace(arr[3].trim()));
62

  
63
			return new ApplyXslt(st.toString());
64
		} catch (final Exception e) {
65
			log.error("Error preparing MDBuilder function", e);
66
			throw new MSROException("Error preparing MDBuilder function", e);
67
		}
68
	}
69

  
70
	private String getMetadataNamespace(final String format) {
71
		final String xQuery = "collection('/db/DRIVER/conf/mdFormats')//METADATAFORMAT[@name='" + format + "' or @prefix='" + format + "']/@namespace/string()";
72
		try {
73
			return isClient.findOne(xQuery);
74
		} catch (final InformationServiceException e) {
75
			log.warn("Format " + format + " not registered on IS");
76
			return "";
77
		}
78

  
79
	}
80

  
81
	public String getDatasourceId() {
82
		return datasourceId;
83
	}
84

  
85
	public void setDatasourceId(final String datasourceId) {
86
		this.datasourceId = datasourceId;
87
	}
88

  
89
	public String getDatasourceInterface() {
90
		return datasourceInterface;
91
	}
92

  
93
	public void setDatasourceInterface(final String datasourceInterface) {
94
		this.datasourceInterface = datasourceInterface;
95
	}
96

  
97
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/MDStoreFeederJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.mdstore;
2

  
3
import java.util.stream.Stream;
4

  
5
import org.apache.commons.logging.Log;
6
import org.apache.commons.logging.LogFactory;
7
import org.springframework.beans.factory.annotation.Autowired;
8
import org.springframework.context.annotation.Scope;
9
import org.springframework.stereotype.Component;
10

  
11
import eu.dnetlib.data.mdstore.MDStoreService;
12
import eu.dnetlib.data.mdstore.MDStoreService.FeedMode;
13
import eu.dnetlib.msro.annotations.ProcessNode;
14
import eu.dnetlib.msro.workflows.Arc;
15
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode;
16

  
17
@Component
18
@Scope("prototype")
19
@ProcessNode("Store")
20
public class MDStoreFeederJobNode extends SimpleParallelProcessNode {
21

  
22
	private static final Log log = LogFactory.getLog(MDStoreFeederJobNode.class);
23

  
24
	@Autowired
25
	private MDStoreService mdstoreService;
26

  
27
	private String mdId;
28

  
29
	private Stream<String> inputStream;
30

  
31
	private FeedMode mode;
32

  
33
	@Override
34
	protected String execute() throws Exception {
35
		log.info("Start feeding mdstore " + mdId);
36
		mdstoreService.store(mdId, inputStream, mode);
37
		return Arc.DEFAULT_ARC;
38
	}
39

  
40
	public String getMdId() {
41
		return mdId;
42
	}
43

  
44
	public void setMdId(final String mdId) {
45
		this.mdId = mdId;
46
	}
47

  
48
	public Stream<String> getInputStream() {
49
		return inputStream;
50
	}
51

  
52
	public void setInputStream(final Stream<String> inputStream) {
53
		this.inputStream = inputStream;
54
	}
55

  
56
	public FeedMode getMode() {
57
		return mode;
58
	}
59

  
60
	public void setMode(final FeedMode mode) {
61
		this.mode = mode;
62
	}
63

  
64
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/DeleteMDStoreJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.mdstore;
2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5
import org.springframework.beans.factory.annotation.Autowired;
6
import org.springframework.context.annotation.Scope;
7
import org.springframework.stereotype.Component;
8

  
9
import eu.dnetlib.data.mdstore.MDStoreService;
10
import eu.dnetlib.msro.annotations.ProcessNode;
11
import eu.dnetlib.msro.workflows.Arc;
12
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode;
13

  
14
@Component
15
@Scope("prototype")
16
@ProcessNode("DeleteMDStore")
17
public class DeleteMDStoreJobNode extends SimpleParallelProcessNode {
18

  
19
	private static final Log log = LogFactory.getLog(DeleteMDStoreJobNode.class);
20

  
21
	@Autowired
22
	private MDStoreService mdstoreService;
23

  
24
	private String mdId;
25

  
26
	@Override
27
	protected String execute() throws Exception {
28
		mdstoreService.delete(mdId);
29
		log.info("Mdstore deleted: " + mdId);
30
		return Arc.DEFAULT_ARC;
31
	}
32

  
33
	public String getMdId() {
34
		return mdId;
35
	}
36

  
37
	public void setMdId(final String mdId) {
38
		this.mdId = mdId;
39
	}
40
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/CreateMDStoreJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.mdstore;
2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5
import org.springframework.beans.factory.annotation.Autowired;
6
import org.springframework.context.annotation.Scope;
7
import org.springframework.stereotype.Component;
8

  
9
import eu.dnetlib.data.mdstore.MDStoreService;
10
import eu.dnetlib.msro.annotations.ProcessNode;
11
import eu.dnetlib.msro.workflows.Arc;
12
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode;
13

  
14
@Component
15
@Scope("prototype")
16
@ProcessNode("CreateMDStore")
17
public class CreateMDStoreJobNode extends SimpleParallelProcessNode {
18

  
19
	private static final Log log = LogFactory.getLog(CreateMDStoreJobNode.class);
20

  
21
	@Autowired
22
	private MDStoreService mdstoreService;
23

  
24
	private String format;
25

  
26
	private String layout;
27

  
28
	private String interpretation;
29

  
30
	private String resultMdId;
31

  
32
	@Override
33
	protected String execute() throws Exception {
34
		resultMdId = mdstoreService.create(format, layout, interpretation);
35
		log.info("New mdstore " + resultMdId);
36
		return Arc.DEFAULT_ARC;
37
	}
38

  
39
	public String getFormat() {
40
		return format;
41
	}
42

  
43
	public void setFormat(final String format) {
44
		this.format = format;
45
	}
46

  
47
	public String getLayout() {
48
		return layout;
49
	}
50

  
51
	public void setLayout(final String layout) {
52
		this.layout = layout;
53
	}
54

  
55
	public String getInterpretation() {
56
		return interpretation;
57
	}
58

  
59
	public void setInterpretation(final String interpretation) {
60
		this.interpretation = interpretation;
61
	}
62

  
63
	public String getResultMdId() {
64
		return resultMdId;
65
	}
66

  
67
	public void setResultMdId(final String resultMdId) {
68
		this.resultMdId = resultMdId;
69
	}
70

  
71
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/CollectException.java
1
package eu.dnetlib.msro.workers.aggregation.collect;
2

  
3
import eu.dnetlib.msro.exceptions.MSROException;
4

  
5
public class CollectException extends MSROException {
6

  
7
	private static final long serialVersionUID = -5882744781341998714L;
8

  
9
	public CollectException(final String message) {
10
		super(message);
11
	}
12

  
13
	public CollectException(final String message, final Throwable cause) {
14
		super(message, cause);
15
	}
16

  
17
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/CollectJobNode.java
1
package eu.dnetlib.msro.workers.aggregation.collect;
2

  
3
import java.io.StringReader;
4
import java.util.stream.Stream;
5

  
6
import org.dom4j.Document;
7
import org.dom4j.Node;
8
import org.dom4j.io.SAXReader;
9
import org.springframework.beans.factory.annotation.Autowired;
10
import org.springframework.context.annotation.Scope;
11
import org.springframework.stereotype.Component;
12

  
13
import eu.dnetlib.clients.is.InformationServiceClient;
14
import eu.dnetlib.msro.annotations.ProcessNode;
15
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
16
import eu.dnetlib.msro.workflows.Arc;
17
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode;
18

  
19
@Component
20
@Scope("prototype")
21
@ProcessNode("Collect")
22
public class CollectJobNode extends SimpleParallelProcessNode {
23

  
24
	@Autowired
25
	private InformationServiceClient isLookup;
26

  
27
	@Autowired
28
	private MetadataCollector collector;
29

  
30
	private String datasourceId;
31
	private String datasourceInterface;
32
	private Stream<String> outputStream;
33

  
34
	@Override
35
	protected String execute() throws Exception {
36
		final String profile = isLookup.getProfile(datasourceId);
37
		final Document doc = new SAXReader().read(new StringReader(profile));
38
		final Node ifcNode = doc.selectSingleNode("//INTERFACE[@id='" + datasourceInterface + "']");
39

  
40
		final InterfaceDescriptor interfaceDescriptor = InterfaceDescriptor.newInstance(ifcNode);
41

  
42
		outputStream = collector.collect(interfaceDescriptor);
43

  
44
		return Arc.DEFAULT_ARC;
45
	}
46

  
47
	public String getDatasourceId() {
48
		return datasourceId;
49
	}
50

  
51
	public void setDatasourceId(final String datasourceId) {
52
		this.datasourceId = datasourceId;
53
	}
54

  
55
	public String getDatasourceInterface() {
56
		return datasourceInterface;
57
	}
58

  
59
	public void setDatasourceInterface(final String datasourceInterface) {
60
		this.datasourceInterface = datasourceInterface;
61
	}
62

  
63
	public Stream<String> getOutputStream() {
64
		return outputStream;
65
	}
66

  
67
	public void setOutputStream(final Stream<String> outputStream) {
68
		this.outputStream = outputStream;
69
	}
70

  
71
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/CollectorPluginEnumerator.java
1
package eu.dnetlib.msro.workers.aggregation.collect;
2

  
3
import java.util.List;
4
import java.util.stream.Collectors;
5

  
6
import org.springframework.beans.BeansException;
7
import org.springframework.context.ApplicationContext;
8
import org.springframework.context.ApplicationContextAware;
9
import org.springframework.stereotype.Component;
10

  
11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
12
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
13

  
14
@Component
15
public class CollectorPluginEnumerator implements ApplicationContextAware {
16

  
17
	private ApplicationContext applicationContext;
18

  
19
	public List<CollectorPlugin> getAll() {
20
		return applicationContext.getBeansOfType(CollectorPlugin.class)
21
				.values()
22
				.stream()
23
				.filter(o -> o.getClass().isAnnotationPresent(DnetCollectorPlugin.class))
24
				.collect(Collectors.toList());
25
	}
26

  
27
	public CollectorPlugin get(final String protocol) throws CollectException {
28
		for (final CollectorPlugin cp : getAll()) {
29
			if (protocol.equalsIgnoreCase(cp.getProtocol())) { return cp; }
30
		}
31
		throw new CollectException("plugin not found for protocol: " + protocol);
32
	}
33

  
34
	@Override
35
	public void setApplicationContext(final ApplicationContext applicationContext) throws BeansException {
36
		this.applicationContext = applicationContext;
37
	}
38

  
39
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/MetadataCollector.java
1
package eu.dnetlib.msro.workers.aggregation.collect;
2

  
3
import java.util.Set;
4
import java.util.stream.Stream;
5

  
6
import org.springframework.beans.factory.annotation.Autowired;
7
import org.springframework.stereotype.Component;
8

  
9
import com.google.common.collect.Sets;
10

  
11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
12
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
13

  
14
@Component
15
public class MetadataCollector {
16

  
17
	@Autowired
18
	private CollectorPluginEnumerator collectorPluginEnumerator;
19

  
20
	public Stream<String> collect(final InterfaceDescriptor ifDescriptor) throws CollectException {
21
		return dateRangeCollect(ifDescriptor, null, null);
22
	}
23

  
24
	public Stream<String> dateRangeCollect(
25
			final InterfaceDescriptor ifDescriptor, final String from, final String until)
26
			throws CollectException {
27
		final CollectorPlugin plugin = collectorPluginEnumerator.get(ifDescriptor.getProtocol());
28

  
29
		if (!verifyParams(ifDescriptor.getParams().keySet(), Sets.newHashSet(plugin.listNameParameters()))) { throw new CollectException(
30
				"Invalid parameters, valid: " + plugin.listNameParameters() + ", current: " + ifDescriptor.getParams().keySet()); }
31

  
32
		return plugin.collect(ifDescriptor, from, until);
33
	}
34

  
35
	private boolean verifyParams(final Set<String> curr, final Set<String> valid) {
36
		return valid.containsAll(curr);
37
	}
38
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/mongo/MongoDumpPlugin.java
8 8

  
9 9
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
10 10
import eu.dnetlib.msro.exceptions.CollectorServiceRuntimeException;
11
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
13 12
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
14 13
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
14
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
15

  
15 16
import org.springframework.stereotype.Component;
16 17

  
17 18
@Component
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/archive/zip/ZipCollectorPlugin.java
6 6
import java.util.stream.Stream;
7 7

  
8 8
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
9
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
10 9
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
11 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
13 12
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
13
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
14

  
14 15
import org.springframework.stereotype.Component;
15 16

  
16 17
// import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/archive/targz/TarGzCollectorPlugin.java
8 8
import org.springframework.stereotype.Component;
9 9

  
10 10
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
11
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
13 12
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
14 13
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
15 14
// import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
16 15
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
16
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
17 17

  
18 18
/**
19 19
 * Collector pluging for collecting a .tar.gz folder of records
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/filesystem/FilesystemCollectorPlugin.java
12 12
import org.springframework.stereotype.Component;
13 13

  
14 14
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
15
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
16 15
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
17 16
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
18 17
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
19 18
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType;
20 19
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
21 20
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
21
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
22 22

  
23 23
/**
24 24
 * @author andrea
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/CollectorPlugin.java
5 5
import java.util.stream.Collectors;
6 6
import java.util.stream.Stream;
7 7

  
8
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
9 8
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
9
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
10 10

  
11 11
public interface CollectorPlugin {
12 12

  
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/filesfrommetadata/FilesFromMetadataCollectorPlugin.java
7 7

  
8 8
import org.springframework.stereotype.Component;
9 9

  
10
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
11 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
13 12
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
14 13
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
14
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
15 15

  
16 16
/**
17 17
 * @author sandro
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oaisets/OaiSetsIterator.java
9 9
import com.google.common.collect.Sets;
10 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector;
11 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
12
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
12
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
13

  
13 14
import org.apache.commons.logging.Log;
14 15
import org.apache.commons.logging.LogFactory;
15 16
import org.dom4j.Document;
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oaisets/OaiSetsCollectorPlugin.java
6 6
import org.springframework.stereotype.Component;
7 7

  
8 8
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
9
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
10 9
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
11 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector;
13 12
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
13
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
14 14

  
15 15
@Component
16 16
@DnetCollectorPlugin("oai_sets")
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oai/OaiIterator.java
14 14
import org.dom4j.Node;
15 15
import org.dom4j.io.SAXReader;
16 16

  
17
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
18 17
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector;
19 18
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner;
19
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
20 20

  
21 21
public class OaiIterator implements Iterator<String> {
22 22

  
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oai/engine/HttpConnector.java
26 26
import org.apache.commons.logging.LogFactory;
27 27
import org.springframework.stereotype.Component;
28 28

  
29
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
29
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
30 30

  
31 31
/**
32 32
 * @author jochen, michele, andrea
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oai/OaiCollectorPlugin.java
13 13
import com.google.common.collect.Lists;
14 14

  
15 15
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
16
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
17 16
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
18 17
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
19 18
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
20 19
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType;
21 20
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector;
22 21
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
22
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
23 23

  
24 24
@Component
25 25
@DnetCollectorPlugin(value = "OAI", parameters = {
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/ftp/FtpCollectorPlugin.java
6 6
import com.google.common.base.Splitter;
7 7
import com.google.common.collect.Sets;
8 8
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
9
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
10 9
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
11 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
13 12
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType;
14 13
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
14
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
15

  
15 16
import org.springframework.stereotype.Component;
16 17

  
17 18
/**
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/csv/FileCSVCollectorPlugin.java
20 20
import org.dom4j.Element;
21 21
import org.springframework.stereotype.Component;
22 22

  
23
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
24 23
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
25 24
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
26 25
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
27 26
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
27
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
28 28

  
29 29
@Component
30 30
@DnetCollectorPlugin(value = "fileCSV", parameters = {
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/csv/HttpCSVCollectorPlugin.java
20 20
import com.google.common.collect.Iterators;
21 21

  
22 22
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
23
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
24 23
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
25 24
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
26 25
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
27 26
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
27
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
28 28

  
29 29
/**
30 30
 * The Class HttpCSVCollectorPlugin.
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/httplist/HttpListCollectorPlugin.java
3 3
import java.util.stream.Stream;
4 4

  
5 5
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
6
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
7 6
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
8 7
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
9 8
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
10 9
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
10
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
11

  
11 12
import org.springframework.stereotype.Component;
12 13

  
13 14
@Component
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/FileGZipCollectorPlugin.java
7 7

  
8 8
import org.springframework.stereotype.Component;
9 9

  
10
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
11 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
12
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
13 13

  
14 14
@Component
15 15
@DnetCollectorPlugin(value = "fileGzip", parameters = {
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/FileCollectorPlugin.java
6 6

  
7 7
import org.springframework.stereotype.Component;
8 8

  
9
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
10 9
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
11 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
11
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
12 12

  
13 13
@Component
14 14
@DnetCollectorPlugin(value = "file", parameters = {
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/HttpCollectorPlugin.java
10 10
import org.apache.http.impl.client.HttpClients;
11 11
import org.springframework.stereotype.Component;
12 12

  
13
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
14 13
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
15 14
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
15
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
16 16

  
17 17
@Component
18 18
@DnetCollectorPlugin(value = "http", parameters = {
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/ClasspathCollectorPlugin.java
5 5

  
6 6
import org.springframework.stereotype.Component;
7 7

  
8
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
9 8
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
10 9
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
10
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
11 11

  
12 12
@Component
13 13
@DnetCollectorPlugin(value = "classpath", parameters = {
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/AbstractSplittedRecordPlugin.java
10 10
import org.apache.commons.lang3.StringUtils;
11 11

  
12 12
import eu.dnetlib.miscutils.iterators.xml.XMLIterator;
13
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
14 13
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
14
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
15 15
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
16 16

  
17 17
public abstract class AbstractSplittedRecordPlugin implements CollectorPlugin {
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/sftp/SftpCollectorPlugin.java
6 6
import com.google.common.base.Splitter;
7 7
import com.google.common.collect.Sets;
8 8
import eu.dnetlib.miscutils.streams.DnetStreamSupport;
9
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
10 9
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin;
11 10
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam;
12 11
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin;
13 12
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType;
14 13
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor;
14
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
15

  
15 16
import org.springframework.stereotype.Component;
16 17

  
17 18
/**
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/cleaner/VocabularyRule.java
1
package eu.dnetlib.msro.workers.aggregation.cleaner;
2

  
3
import java.util.HashMap;
4
import java.util.HashSet;
5
import java.util.Map;
6
import java.util.Set;
7

  
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10

  
11
import com.google.common.base.Joiner;
12

  
13
import eu.dnetlib.clients.is.InformationServiceClient;
14
import eu.dnetlib.msro.exceptions.MSROException;
15

  
16
/**
17
 * @author michele
18
 *
19
 *         Vocabulary rules must be declared in a CleanerDS profile, for each vocabulary must be present the relative VocabularyDS profile:
20
 *
21
 *         <RULE xpath="..." vocabularies="VOC1" /> <RULE xpath="..." vocabularies="VOC1, VOC2, VOC3" />
22
 */
23

  
24
public class VocabularyRule extends XPATHCleaningRule {
25

  
26
	private static final Log log = LogFactory.getLog(VocabularyRule.class); // NOPMD by marko on 11/24/08 5:02 PM
27
	private final Set<String> vocabularies;
28
	private final Map<String, String> synonyms = new HashMap<>();
29
	private final Set<String> validTerms = new HashSet<>();
30

  
31
	public VocabularyRule(final Set<String> vocabularies, final InformationServiceClient isClient) throws MSROException {
32
		this.vocabularies = vocabularies;
33
		loadSynonymsAndTerms(isClient);
34
	}
35

  
36
	@Override
37
	protected String calculateNewValue(final String oldValue) throws MSROException {
38
		log.debug("calculating new value for: " + oldValue);
39

  
40
		if (synonyms.isEmpty()) {
41
			log.debug("Vocabulary terms is void, vocabularies: " + vocabularies);
42
		}
43

  
44
		String newValue = null;
45

  
46
		if (synonyms.containsKey(oldValue.toLowerCase())) {
47
			newValue = synonyms.get(oldValue.toLowerCase());
48
		}
49

  
50
		if (newValue == null) {
51
			log.debug("Synonym " + oldValue + " not found in vocabulary");
52
			return oldValue;
53
		}
54

  
55
		return newValue;
56
	}
57

  
58
	private void loadSynonymsAndTerms(final InformationServiceClient isClient) throws MSROException {
59

  
60
		for (final String vocabulary : vocabularies) {
61
			try {
62
				final String query = "for $x in collection('/db/DRIVER/conf/vocabulary')"
63
						+ "//RESOURCE_PROFILE[.//VOCABULARY_NAME/@code='" + vocabulary + "']//TERM return "
64
						+ "( concat($x/@code,'|-:-|', $x/@code), concat($x/@english_name,'|-:-|', $x/@code), concat($x/@native_name,'|-:-|', $x/@code), "
65
						+ "for $y in $x//SYNONYM return concat($y/@term,'|-:-|', $x/@code) )";
66

  
67
				for (final String s : isClient.find(query)) {
68
					log.debug("SYNONYM : " + s);
69
					final String[] arr = s.split("\\|-:-\\|");
70
					if ((arr[0] == null) || arr[0].isEmpty()) {
71
						continue;
72
					}
73
					synonyms.put(arr[0].toLowerCase(), arr[1]);
74
					validTerms.add(arr[1].toLowerCase());
75
				}
76

  
77
				log.debug("VOCABULARY " + vocabulary.trim() + " - terms size " + synonyms.size());
78
			} catch (final Exception e) {
79
				throw new MSROException("Error obtaining vocabulary " + vocabulary, e);
80
			}
81
		}
82

  
83
	}
84

  
85
	@Override
86
	protected Map<String, String> verifyValue(final String value) throws MSROException {
87
		if (synonyms.isEmpty()) {
88
			log.debug("Vocabulary terms is void, vocabularies: " + vocabularies);
89
		}
90

  
91
		if (validTerms.contains(value.toLowerCase())) { return null; }
92

  
93
		final Map<String, String> error = new HashMap<String, String>();
94
		error.put("term", value);
95
		error.put("vocabularies", vocabularies.toString().replaceAll("\\[", "").replaceAll("\\]", ""));
96
		error.put("xpath", getXpath());
97
		return error;
98
	}
99

  
100
	public Map<String, String> getVocabularyTerms() {
101
		return synonyms;
102
	}
103

  
104
	@Override
105
	public String toString() {
106
		return "VOCABULARIES: [" + Joiner.on(", ").join(vocabularies) + "]";
107
	}
108

  
109
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/cleaner/CleaningRuleFactory.java
1
package eu.dnetlib.msro.workers.aggregation.cleaner;
2

  
3
import java.io.StringReader;
4
import java.util.Set;
5

  
6
import org.dom4j.Document;
7
import org.dom4j.Element;
8
import org.dom4j.io.SAXReader;
9
import org.springframework.beans.factory.annotation.Autowired;
10
import org.springframework.stereotype.Component;
11

  
12
import com.google.common.base.Splitter;
13
import com.google.common.collect.Sets;
14

  
15
import eu.dnetlib.clients.is.InformationServiceClient;
16
import eu.dnetlib.msro.exceptions.MSROException;
17

  
18
@Component
19
public class CleaningRuleFactory {
20

  
21
	@Autowired
22
	private InformationServiceClient isClient;
23

  
24
	public CleaningRule obtainCleaningRule(final String ruleId) throws MSROException {
25
		try {
26
			final String prof =
27
					isClient.findOne("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION");
28

  
29
			final SAXReader reader = new SAXReader();
30
			final Document doc = reader.read(new StringReader(prof));
31

  
32
			final CleaningRule rule = new CleaningRule();
33

  
34
			for (final Object o : doc.selectNodes("//RULE")) {
35
				final Element node = (Element) o;
36

  
37
				final String xpath = node.valueOf("@xpath");
38
				final String vocabularies = node.valueOf("@vocabularies");
39
				final String groovyRule = node.valueOf("@groovy");
40
				final String strict = node.valueOf("@strict");
41

  
42
				final XPATHCleaningRule xpathRule;
43
				if ((vocabularies != null) && (vocabularies.length() > 0)) {
44
					final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies));
45
					xpathRule = new VocabularyRule(list, isClient);
46
				} else {
47
					xpathRule = new GroovyRule(groovyRule);
48
				}
49
				xpathRule.setXpath(xpath);
50
				xpathRule.setStrict("true".equals(strict));
51
				rule.getXpathRules().add(xpathRule);
52
			}
53
			return rule;
54
		} catch (final Exception e) {
55
			throw new MSROException("Error obtaing cleaner rule " + ruleId, e);
56
		}
57
	}
58

  
59
}
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/cleaner/CleaningRule.java
1
package eu.dnetlib.msro.workers.aggregation.cleaner;
2

  
3
import java.io.StringReader;
4
import java.util.ArrayList;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.function.Function;
8

  
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.dom4j.Document;
12
import org.dom4j.Element;
13
import org.dom4j.Namespace;
14
import org.dom4j.QName;
15
import org.dom4j.io.SAXReader;
16
import org.springframework.beans.factory.annotation.Required;
17

  
18
public class CleaningRule implements Function<String, String> {
19

  
20
	private static final Log log = LogFactory.getLog(CleaningRule.class); // NOPMD by marko on 11/24/08 5:02 PM
21

  
22
	private List<XPATHCleaningRule> xpathRules = new ArrayList<XPATHCleaningRule>();
23

  
24
	@Override
25
	public String apply(final String text) {
26

  
27
		try {
28
			final List<Map<String, String>> errors = new ArrayList<>();
29
			final Document doc = (new SAXReader()).read(new StringReader(text));
30
			for (final XPATHCleaningRule r : xpathRules) {
31
				errors.addAll(r.applyXpathRule(doc));
32
			}
33
			if (errors.size() > 0) {
34
				markAsInvalid(doc, errors);
35
			}
36
			return doc.asXML();
37
		} catch (final Exception e) {
38
			log.error("Error evaluating rule", e);
39
		}
40
		return "";
41
	}
42

  
43
	private void markAsInvalid(final Document doc, final List<Map<String, String>> errors) {
44
		final Element element = (Element) doc.selectSingleNode("//*[local-name()='header']");
45
		if (element != null) {
46
			final Element inv = element.addElement(new QName("invalid", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri")));
47
			for (final Map<String, String> e : errors) {
48
				final Element err = inv.addElement(new QName("error", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri")));
49
				for (final Map.Entry<String, String> entry : e.entrySet()) {
50
					err.addAttribute(entry.getKey(), entry.getValue());
51
				}
52
			}
53
			inv.addAttribute("value", "true");
54
		}
55
	}
56

  
57
	public List<XPATHCleaningRule> getXpathRules() {
58
		return xpathRules;
59
	}
60

  
61
	@Required
62
	public void setXpathRules(final List<XPATHCleaningRule> xpathRules) {
63
		this.xpathRules = xpathRules;
64
	}
65

  
66
}
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff