Revision 47707
Added by Michele Artini almost 7 years ago
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/controllers/MsroWorkerController.java | ||
---|---|---|
26 | 26 |
import eu.dnetlib.enabling.annotations.DnetService; |
27 | 27 |
import eu.dnetlib.enabling.annotations.DnetServiceType; |
28 | 28 |
import eu.dnetlib.msro.exceptions.MSROException; |
29 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectorPluginEnumerator; |
|
30 | 29 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
31 | 30 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
32 | 31 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
... | ... | |
34 | 33 |
import eu.dnetlib.msro.workflows.ProcessStatus; |
35 | 34 |
import eu.dnetlib.msro.workflows.WorkflowInstance; |
36 | 35 |
import eu.dnetlib.msro.workflows.WorkflowProcessInfo; |
36 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectorPluginEnumerator; |
|
37 | 37 |
import eu.dnetlib.msro.workflows.procs.ProcessExecutor; |
38 | 38 |
import eu.dnetlib.msro.workflows.procs.ProcessFactory; |
39 | 39 |
import eu.dnetlib.msro.workflows.procs.ProcessRegistry; |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/repohi/RegisterWorkflowFromTemplateJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.repohi; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
import java.util.stream.Collectors; |
|
8 |
|
|
9 |
import org.apache.commons.lang3.StringEscapeUtils; |
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
import org.dom4j.Document; |
|
13 |
import org.dom4j.Element; |
|
14 |
import org.dom4j.Node; |
|
15 |
import org.dom4j.io.SAXReader; |
|
16 |
import org.springframework.beans.factory.annotation.Autowired; |
|
17 |
import org.springframework.context.annotation.Scope; |
|
18 |
import org.springframework.stereotype.Component; |
|
19 |
|
|
20 |
import eu.dnetlib.clients.is.InformationServiceClient; |
|
21 |
import eu.dnetlib.miscutils.templates.TemplateUtil; |
|
22 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
23 |
import eu.dnetlib.msro.workflows.Arc; |
|
24 |
import eu.dnetlib.msro.workflows.WorkflowStatus; |
|
25 |
import eu.dnetlib.msro.workflows.nodes.ParallelProcessNode; |
|
26 |
import eu.dnetlib.msro.workflows.procs.ProcessAware; |
|
27 |
import eu.dnetlib.msro.workflows.procs.Token; |
|
28 |
import eu.dnetlib.msro.workflows.procs.WorkflowProcess; |
|
29 |
|
|
30 |
@Component |
|
31 |
@Scope("prototype") |
|
32 |
@ProcessNode("RegisterWorkflowFromTemplate") |
|
33 |
public class RegisterWorkflowFromTemplateJobNode extends ParallelProcessNode implements ProcessAware { |
|
34 |
|
|
35 |
private String wfName; |
|
36 |
private String wfDescription; |
|
37 |
private String wfTemplate; |
|
38 |
private Map<String, String> params; |
|
39 |
|
|
40 |
@Autowired |
|
41 |
private TemplateUtil templateUtil; |
|
42 |
|
|
43 |
@Autowired |
|
44 |
private InformationServiceClient isClient; |
|
45 |
|
|
46 |
private WorkflowProcess process; |
|
47 |
|
|
48 |
private static final Log log = LogFactory.getLog(RegisterWorkflowFromTemplateJobNode.class); |
|
49 |
|
|
50 |
@Override |
|
51 |
public String execute(final Token token) throws Exception { |
|
52 |
|
|
53 |
final Map<String, Object> map = new HashMap<>(); |
|
54 |
|
|
55 |
if (params != null) { |
|
56 |
params.forEach((k, v) -> map.put(k, StringEscapeUtils.escapeXml11(v))); |
|
57 |
} |
|
58 |
|
|
59 |
map.put("name", StringEscapeUtils.escapeXml11(wfName)); |
|
60 |
map.put("desc", StringEscapeUtils.escapeXml11(wfDescription)); |
|
61 |
map.put("worker", StringEscapeUtils.escapeXml11(process.getWorkerId())); |
|
62 |
map.put("dsId", StringEscapeUtils.escapeXml11(process.getDsId())); |
|
63 |
map.put("iface", StringEscapeUtils.escapeXml11(process.getDsInterface())); |
|
64 |
|
|
65 |
final String wfProfile = templateUtil.processTemplateProfile(wfTemplate, map).get("wf"); |
|
66 |
|
|
67 |
final Document doc = new SAXReader().read(new StringReader(wfProfile)); |
|
68 |
final Node paramsNode = doc.selectSingleNode("//CONFIGURATION/PARAMETERS"); |
|
69 |
|
|
70 |
final List<?> emptySysParams = paramsNode.selectNodes("//PARAM[@managedBy='system' and @required='true' and not(text())]"); |
|
71 |
if (emptySysParams.size() > 0) { |
|
72 |
((Element) doc.selectSingleNode("//CONFIGURATION")).addAttribute("status", WorkflowStatus.WAIT_SYS_SETTINGS.toString()); |
|
73 |
log.warn("The following system parameters are empty: " |
|
74 |
+ emptySysParams.stream().map(p -> ((Node) p).valueOf("@name")).collect(Collectors.joining())); |
|
75 |
} else if (paramsNode.selectNodes("//PARAM[@managedBy='user' and @required='true' and not(text())]").size() > 0) { |
|
76 |
((Element) doc.selectSingleNode("//CONFIGURATION")).addAttribute("status", WorkflowStatus.WAIT_USER_SETTINGS.toString()); |
|
77 |
} else { |
|
78 |
((Element) doc.selectSingleNode("//CONFIGURATION")).addAttribute("status", WorkflowStatus.EXECUTABLE.toString()); |
|
79 |
} |
|
80 |
|
|
81 |
final String wfId = isClient.register(doc.asXML()); |
|
82 |
|
|
83 |
token.getEnv().setAttribute("wfId", wfId); |
|
84 |
|
|
85 |
return Arc.DEFAULT_ARC; |
|
86 |
|
|
87 |
} |
|
88 |
|
|
89 |
public String getWfName() { |
|
90 |
return wfName; |
|
91 |
} |
|
92 |
|
|
93 |
public void setWfName(final String wfName) { |
|
94 |
this.wfName = wfName; |
|
95 |
} |
|
96 |
|
|
97 |
public String getWfDescription() { |
|
98 |
return wfDescription; |
|
99 |
} |
|
100 |
|
|
101 |
public void setWfDescription(final String wfDescription) { |
|
102 |
this.wfDescription = wfDescription; |
|
103 |
} |
|
104 |
|
|
105 |
public String getWfTemplate() { |
|
106 |
return wfTemplate; |
|
107 |
} |
|
108 |
|
|
109 |
public void setWfTemplate(final String wfTemplate) { |
|
110 |
this.wfTemplate = wfTemplate; |
|
111 |
} |
|
112 |
|
|
113 |
public Map<String, String> getParams() { |
|
114 |
return params; |
|
115 |
} |
|
116 |
|
|
117 |
public void setParams(final Map<String, String> params) { |
|
118 |
this.params = params; |
|
119 |
} |
|
120 |
|
|
121 |
@Override |
|
122 |
public void setProcess(final WorkflowProcess process) { |
|
123 |
this.process = process; |
|
124 |
} |
|
125 |
|
|
126 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/repohi/RemoveApiExtraFieldsJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.repohi; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
import java.util.Set; |
|
8 |
import java.util.stream.Collectors; |
|
9 |
|
|
10 |
import org.dom4j.Document; |
|
11 |
import org.dom4j.Node; |
|
12 |
import org.dom4j.io.SAXReader; |
|
13 |
import org.springframework.beans.factory.annotation.Autowired; |
|
14 |
import org.springframework.context.annotation.Scope; |
|
15 |
import org.springframework.stereotype.Component; |
|
16 |
|
|
17 |
import eu.dnetlib.clients.dsManager.DsManagerClient; |
|
18 |
import eu.dnetlib.clients.is.InformationServiceClient; |
|
19 |
import eu.dnetlib.clients.locators.ServiceClientFactory; |
|
20 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
21 |
import eu.dnetlib.msro.workflows.Arc; |
|
22 |
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode; |
|
23 |
|
|
24 |
@Component |
|
25 |
@Scope("prototype") |
|
26 |
@ProcessNode("RemoveApiExtraFields") |
|
27 |
public class RemoveApiExtraFieldsJobNode extends SimpleParallelProcessNode { |
|
28 |
|
|
29 |
private String datasourceId; |
|
30 |
private String datasourceInterface; |
|
31 |
private List<String> fields; |
|
32 |
|
|
33 |
@Autowired |
|
34 |
private ServiceClientFactory clientFactory; |
|
35 |
|
|
36 |
@Override |
|
37 |
protected String execute() throws Exception { |
|
38 |
if (fields != null) { |
|
39 |
final Set<String> invalidFields = fields.stream().map(String::toLowerCase).collect(Collectors.toSet()); |
|
40 |
final Map<String, String> map = calculateValidExtraFields(datasourceId, datasourceInterface, invalidFields); |
|
41 |
clientFactory.getClient(DsManagerClient.class).bulkUpdateApiExtraFields(datasourceId, datasourceInterface, map); |
|
42 |
} |
|
43 |
return Arc.DEFAULT_ARC; |
|
44 |
} |
|
45 |
|
|
46 |
private Map<String, String> calculateValidExtraFields(final String repoId, final String ifaceId, final Set<String> invalidFields) throws Exception { |
|
47 |
final Map<String, String> res = new HashMap<>(); |
|
48 |
|
|
49 |
final String profile = clientFactory.getClient(InformationServiceClient.class).getProfile(repoId); |
|
50 |
|
|
51 |
final SAXReader reader = new SAXReader(); |
|
52 |
final Document doc = reader.read(new StringReader(profile)); |
|
53 |
|
|
54 |
final Node ifcNode = doc.selectSingleNode("//INTERFACE[@id='" + ifaceId + "']"); |
|
55 |
if (ifcNode != null) { |
|
56 |
for (final Object o : ifcNode.selectNodes("./INTERFACE_EXTRA_FIELD")) { |
|
57 |
final String name = ((Node) o).valueOf("@name"); |
|
58 |
if (!invalidFields.contains(name.toLowerCase())) { |
|
59 |
res.put(name, ((Node) o).getText()); |
|
60 |
} |
|
61 |
} |
|
62 |
} |
|
63 |
|
|
64 |
return res; |
|
65 |
} |
|
66 |
|
|
67 |
public List<String> getFields() { |
|
68 |
return fields; |
|
69 |
} |
|
70 |
|
|
71 |
public void setFields(final List<String> fields) { |
|
72 |
this.fields = fields; |
|
73 |
} |
|
74 |
|
|
75 |
public String getDatasourceId() { |
|
76 |
return datasourceId; |
|
77 |
} |
|
78 |
|
|
79 |
public void setDatasourceId(final String datasourceId) { |
|
80 |
this.datasourceId = datasourceId; |
|
81 |
} |
|
82 |
|
|
83 |
public String getDatasourceInterface() { |
|
84 |
return datasourceInterface; |
|
85 |
} |
|
86 |
|
|
87 |
public void setDatasourceInterface(final String datasourceInterface) { |
|
88 |
this.datasourceInterface = datasourceInterface; |
|
89 |
} |
|
90 |
|
|
91 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/xslt/ApplyXsltJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.xslt; |
|
2 |
|
|
3 |
import java.util.HashMap; |
|
4 |
import java.util.Map; |
|
5 |
import java.util.function.Function; |
|
6 |
|
|
7 |
import org.springframework.context.annotation.Scope; |
|
8 |
import org.springframework.core.io.ClassPathResource; |
|
9 |
import org.springframework.stereotype.Component; |
|
10 |
|
|
11 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
|
12 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
13 |
import eu.dnetlib.msro.exceptions.MSROException; |
|
14 |
import eu.dnetlib.msro.workflows.nodes.AbstractApplyFunctionJobNode; |
|
15 |
|
|
16 |
@Component |
|
17 |
@Scope("prototype") |
|
18 |
@ProcessNode("ApplyXslt") |
|
19 |
public class ApplyXsltJobNode extends AbstractApplyFunctionJobNode<String, String> { |
|
20 |
|
|
21 |
private String xsltClasspath; |
|
22 |
private Map<String, String> xsltParams = new HashMap<>(); |
|
23 |
|
|
24 |
@Override |
|
25 |
protected Function<String, String> getFunction() throws MSROException { |
|
26 |
return new ApplyXslt(new ClassPathResource(xsltClasspath), xsltParams); |
|
27 |
} |
|
28 |
|
|
29 |
public String getXsltClasspath() { |
|
30 |
return xsltClasspath; |
|
31 |
} |
|
32 |
|
|
33 |
public void setXsltClasspath(final String xsltClasspath) { |
|
34 |
this.xsltClasspath = xsltClasspath; |
|
35 |
} |
|
36 |
|
|
37 |
public Map<String, String> getXsltParams() { |
|
38 |
return xsltParams; |
|
39 |
} |
|
40 |
|
|
41 |
public void setXsltParams(final Map<String, String> xsltParams) { |
|
42 |
this.xsltParams = xsltParams; |
|
43 |
} |
|
44 |
|
|
45 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/MDStoreToApiExtraFieldsJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.mdstore; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.Map; |
|
6 |
|
|
7 |
import org.dom4j.Document; |
|
8 |
import org.dom4j.Node; |
|
9 |
import org.dom4j.io.SAXReader; |
|
10 |
import org.springframework.beans.factory.annotation.Autowired; |
|
11 |
import org.springframework.context.annotation.Scope; |
|
12 |
import org.springframework.stereotype.Component; |
|
13 |
|
|
14 |
import eu.dnetlib.clients.dsManager.DsManagerClient; |
|
15 |
import eu.dnetlib.clients.is.InformationServiceClient; |
|
16 |
import eu.dnetlib.clients.locators.ServiceClientFactory; |
|
17 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
18 |
import eu.dnetlib.msro.workflows.Arc; |
|
19 |
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode; |
|
20 |
|
|
21 |
@Component |
|
22 |
@Scope("prototype") |
|
23 |
@ProcessNode("MDStoreToApiExtraFields") |
|
24 |
public class MDStoreToApiExtraFieldsJobNode extends SimpleParallelProcessNode { |
|
25 |
|
|
26 |
private String mdId; |
|
27 |
private String datasourceId; |
|
28 |
private String datasourceInterface; |
|
29 |
private String extraFieldForTotal; |
|
30 |
private String extraFieldForDate; |
|
31 |
private String extraFieldForMdId; |
|
32 |
|
|
33 |
@Autowired |
|
34 |
private InformationServiceClient isClient; |
|
35 |
|
|
36 |
@Autowired |
|
37 |
private ServiceClientFactory serviceClientFactory; |
|
38 |
|
|
39 |
@Override |
|
40 |
protected String execute() throws Exception { |
|
41 |
final String xq = "doc('/db/DRIVER/" + mdId + "')/concat(.//NUMBER_OF_RECORDS, ' @=@ ', .//LAST_STORAGE_DATE)"; |
|
42 |
|
|
43 |
final String s = isClient.findOne(xq); |
|
44 |
|
|
45 |
final String[] arr = s.split(" @=@ "); |
|
46 |
|
|
47 |
final Map<String, String> map = getCurrentExtraFields(datasourceId, datasourceInterface); |
|
48 |
map.put(extraFieldForTotal, arr[0].trim()); |
|
49 |
map.put(extraFieldForDate, arr[1].trim()); |
|
50 |
map.put(extraFieldForMdId, mdId); |
|
51 |
|
|
52 |
serviceClientFactory.getClient(DsManagerClient.class).bulkUpdateApiExtraFields(datasourceId, datasourceInterface, map); |
|
53 |
|
|
54 |
return Arc.DEFAULT_ARC; |
|
55 |
} |
|
56 |
|
|
57 |
private Map<String, String> getCurrentExtraFields(final String dsId, final String ifaceId) throws Exception { |
|
58 |
final Map<String, String> res = new HashMap<>(); |
|
59 |
|
|
60 |
final String profile = isClient.getProfile(dsId); |
|
61 |
|
|
62 |
final SAXReader reader = new SAXReader(); |
|
63 |
final Document doc = reader.read(new StringReader(profile)); |
|
64 |
|
|
65 |
final Node ifcNode = doc.selectSingleNode("//INTERFACE[@id='" + ifaceId + "']"); |
|
66 |
if (ifcNode != null) { |
|
67 |
for (final Object o : ifcNode.selectNodes("./INTERFACE_EXTRA_FIELD")) { |
|
68 |
res.put(((Node) o).valueOf("@name"), ((Node) o).getText()); |
|
69 |
} |
|
70 |
} |
|
71 |
|
|
72 |
return res; |
|
73 |
} |
|
74 |
|
|
75 |
public String getMdId() { |
|
76 |
return mdId; |
|
77 |
} |
|
78 |
|
|
79 |
public void setMdId(final String mdId) { |
|
80 |
this.mdId = mdId; |
|
81 |
} |
|
82 |
|
|
83 |
public String getDatasourceId() { |
|
84 |
return datasourceId; |
|
85 |
} |
|
86 |
|
|
87 |
public void setDatasourceId(final String datasourceId) { |
|
88 |
this.datasourceId = datasourceId; |
|
89 |
} |
|
90 |
|
|
91 |
public String getDatasourceInterface() { |
|
92 |
return datasourceInterface; |
|
93 |
} |
|
94 |
|
|
95 |
public void setDatasourceInterface(final String datasourceInterface) { |
|
96 |
this.datasourceInterface = datasourceInterface; |
|
97 |
} |
|
98 |
|
|
99 |
public String getExtraFieldForTotal() { |
|
100 |
return extraFieldForTotal; |
|
101 |
} |
|
102 |
|
|
103 |
public void setExtraFieldForTotal(final String extraFieldForTotal) { |
|
104 |
this.extraFieldForTotal = extraFieldForTotal; |
|
105 |
} |
|
106 |
|
|
107 |
public String getExtraFieldForDate() { |
|
108 |
return extraFieldForDate; |
|
109 |
} |
|
110 |
|
|
111 |
public void setExtraFieldForDate(final String extraFieldForDate) { |
|
112 |
this.extraFieldForDate = extraFieldForDate; |
|
113 |
} |
|
114 |
|
|
115 |
public String getExtraFieldForMdId() { |
|
116 |
return extraFieldForMdId; |
|
117 |
} |
|
118 |
|
|
119 |
public void setExtraFieldForMdId(final String extraFieldForMdId) { |
|
120 |
this.extraFieldForMdId = extraFieldForMdId; |
|
121 |
} |
|
122 |
|
|
123 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/MdBuilderJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.mdstore; |
|
2 |
|
|
3 |
import java.net.URLEncoder; |
|
4 |
import java.util.function.Function; |
|
5 |
|
|
6 |
import org.antlr.stringtemplate.StringTemplate; |
|
7 |
import org.apache.commons.io.IOUtils; |
|
8 |
import org.apache.commons.lang3.StringUtils; |
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.springframework.beans.factory.annotation.Autowired; |
|
12 |
import org.springframework.beans.factory.annotation.Value; |
|
13 |
import org.springframework.context.annotation.Scope; |
|
14 |
import org.springframework.core.io.Resource; |
|
15 |
import org.springframework.stereotype.Component; |
|
16 |
|
|
17 |
import eu.dnetlib.clients.is.InformationServiceClient; |
|
18 |
import eu.dnetlib.exceptions.InformationServiceException; |
|
19 |
import eu.dnetlib.miscutils.functional.xml.ApplyXslt; |
|
20 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
21 |
import eu.dnetlib.msro.exceptions.MSROException; |
|
22 |
import eu.dnetlib.msro.workflows.nodes.AbstractApplyFunctionJobNode; |
|
23 |
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode; |
|
24 |
|
|
25 |
@Component |
|
26 |
@Scope("prototype") |
|
27 |
@ProcessNode("MdBuilder") |
|
28 |
public class MdBuilderJobNode extends AbstractApplyFunctionJobNode<String, String> { |
|
29 |
|
|
30 |
private String datasourceId; |
|
31 |
private String datasourceInterface; |
|
32 |
|
|
33 |
@Value("${msro.worker.mdstore.mdbuilder.xslt.template}") |
|
34 |
private Resource mdBuilderTemplateXslt; |
|
35 |
|
|
36 |
@Autowired |
|
37 |
private InformationServiceClient isClient; |
|
38 |
|
|
39 |
private static final Log log = LogFactory.getLog(SimpleParallelProcessNode.class); |
|
40 |
|
|
41 |
@Override |
|
42 |
protected Function<String, String> getFunction() throws MSROException { |
|
43 |
|
|
44 |
if (StringUtils.isBlank(datasourceId)) { throw new MSROException("Missing datasourceId"); } |
|
45 |
if (StringUtils.isBlank(datasourceInterface)) { throw new MSROException("Missing datasourceInterface"); } |
|
46 |
|
|
47 |
final String xq = "for $x in doc('/db/DRIVER/" + datasourceId + "') " |
|
48 |
+ "return concat(" |
|
49 |
+ "$x//BASE_URL, ' @@@ ', " |
|
50 |
+ "$x//EXTRA_FIELDS/FIELD/value[../key='NamespacePrefix'], ' @@@ ', " |
|
51 |
+ "$x//INTERFACE[@id='" + datasourceInterface + "']/INTERFACE_EXTRA_FIELD[@name='metadata_identifier_path'], ' @@@ ', " |
|
52 |
+ "$x//INTERFACE[@id='" + datasourceInterface + "']/ACCESS_PROTOCOL/@format)"; |
|
53 |
try { |
|
54 |
final String[] arr = isClient.findOne(xq).split("@@@"); |
|
55 |
|
|
56 |
final StringTemplate st = new StringTemplate(IOUtils.toString(mdBuilderTemplateXslt.getInputStream())); |
|
57 |
st.setAttribute("datasourceId", datasourceId); |
|
58 |
st.setAttribute("baseurl", URLEncoder.encode(arr[0].trim(), "UTF-8")); |
|
59 |
st.setAttribute("namespacePrefix", arr[1].trim()); |
|
60 |
st.setAttribute("xpath", arr[2].trim()); |
|
61 |
st.setAttribute("metadatanamespace", getMetadataNamespace(arr[3].trim())); |
|
62 |
|
|
63 |
return new ApplyXslt(st.toString()); |
|
64 |
} catch (final Exception e) { |
|
65 |
log.error("Error preparing MDBuilder function", e); |
|
66 |
throw new MSROException("Error preparing MDBuilder function", e); |
|
67 |
} |
|
68 |
} |
|
69 |
|
|
70 |
private String getMetadataNamespace(final String format) { |
|
71 |
final String xQuery = "collection('/db/DRIVER/conf/mdFormats')//METADATAFORMAT[@name='" + format + "' or @prefix='" + format + "']/@namespace/string()"; |
|
72 |
try { |
|
73 |
return isClient.findOne(xQuery); |
|
74 |
} catch (final InformationServiceException e) { |
|
75 |
log.warn("Format " + format + " not registered on IS"); |
|
76 |
return ""; |
|
77 |
} |
|
78 |
|
|
79 |
} |
|
80 |
|
|
81 |
public String getDatasourceId() { |
|
82 |
return datasourceId; |
|
83 |
} |
|
84 |
|
|
85 |
public void setDatasourceId(final String datasourceId) { |
|
86 |
this.datasourceId = datasourceId; |
|
87 |
} |
|
88 |
|
|
89 |
public String getDatasourceInterface() { |
|
90 |
return datasourceInterface; |
|
91 |
} |
|
92 |
|
|
93 |
public void setDatasourceInterface(final String datasourceInterface) { |
|
94 |
this.datasourceInterface = datasourceInterface; |
|
95 |
} |
|
96 |
|
|
97 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/MDStoreFeederJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.mdstore; |
|
2 |
|
|
3 |
import java.util.stream.Stream; |
|
4 |
|
|
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 |
import org.springframework.beans.factory.annotation.Autowired; |
|
8 |
import org.springframework.context.annotation.Scope; |
|
9 |
import org.springframework.stereotype.Component; |
|
10 |
|
|
11 |
import eu.dnetlib.data.mdstore.MDStoreService; |
|
12 |
import eu.dnetlib.data.mdstore.MDStoreService.FeedMode; |
|
13 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
14 |
import eu.dnetlib.msro.workflows.Arc; |
|
15 |
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode; |
|
16 |
|
|
17 |
@Component |
|
18 |
@Scope("prototype") |
|
19 |
@ProcessNode("Store") |
|
20 |
public class MDStoreFeederJobNode extends SimpleParallelProcessNode { |
|
21 |
|
|
22 |
private static final Log log = LogFactory.getLog(MDStoreFeederJobNode.class); |
|
23 |
|
|
24 |
@Autowired |
|
25 |
private MDStoreService mdstoreService; |
|
26 |
|
|
27 |
private String mdId; |
|
28 |
|
|
29 |
private Stream<String> inputStream; |
|
30 |
|
|
31 |
private FeedMode mode; |
|
32 |
|
|
33 |
@Override |
|
34 |
protected String execute() throws Exception { |
|
35 |
log.info("Start feeding mdstore " + mdId); |
|
36 |
mdstoreService.store(mdId, inputStream, mode); |
|
37 |
return Arc.DEFAULT_ARC; |
|
38 |
} |
|
39 |
|
|
40 |
public String getMdId() { |
|
41 |
return mdId; |
|
42 |
} |
|
43 |
|
|
44 |
public void setMdId(final String mdId) { |
|
45 |
this.mdId = mdId; |
|
46 |
} |
|
47 |
|
|
48 |
public Stream<String> getInputStream() { |
|
49 |
return inputStream; |
|
50 |
} |
|
51 |
|
|
52 |
public void setInputStream(final Stream<String> inputStream) { |
|
53 |
this.inputStream = inputStream; |
|
54 |
} |
|
55 |
|
|
56 |
public FeedMode getMode() { |
|
57 |
return mode; |
|
58 |
} |
|
59 |
|
|
60 |
public void setMode(final FeedMode mode) { |
|
61 |
this.mode = mode; |
|
62 |
} |
|
63 |
|
|
64 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/DeleteMDStoreJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.mdstore; |
|
2 |
|
|
3 |
import org.apache.commons.logging.Log; |
|
4 |
import org.apache.commons.logging.LogFactory; |
|
5 |
import org.springframework.beans.factory.annotation.Autowired; |
|
6 |
import org.springframework.context.annotation.Scope; |
|
7 |
import org.springframework.stereotype.Component; |
|
8 |
|
|
9 |
import eu.dnetlib.data.mdstore.MDStoreService; |
|
10 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
11 |
import eu.dnetlib.msro.workflows.Arc; |
|
12 |
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode; |
|
13 |
|
|
14 |
@Component |
|
15 |
@Scope("prototype") |
|
16 |
@ProcessNode("DeleteMDStore") |
|
17 |
public class DeleteMDStoreJobNode extends SimpleParallelProcessNode { |
|
18 |
|
|
19 |
private static final Log log = LogFactory.getLog(DeleteMDStoreJobNode.class); |
|
20 |
|
|
21 |
@Autowired |
|
22 |
private MDStoreService mdstoreService; |
|
23 |
|
|
24 |
private String mdId; |
|
25 |
|
|
26 |
@Override |
|
27 |
protected String execute() throws Exception { |
|
28 |
mdstoreService.delete(mdId); |
|
29 |
log.info("Mdstore deleted: " + mdId); |
|
30 |
return Arc.DEFAULT_ARC; |
|
31 |
} |
|
32 |
|
|
33 |
public String getMdId() { |
|
34 |
return mdId; |
|
35 |
} |
|
36 |
|
|
37 |
public void setMdId(final String mdId) { |
|
38 |
this.mdId = mdId; |
|
39 |
} |
|
40 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/mdstore/CreateMDStoreJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.mdstore; |
|
2 |
|
|
3 |
import org.apache.commons.logging.Log; |
|
4 |
import org.apache.commons.logging.LogFactory; |
|
5 |
import org.springframework.beans.factory.annotation.Autowired; |
|
6 |
import org.springframework.context.annotation.Scope; |
|
7 |
import org.springframework.stereotype.Component; |
|
8 |
|
|
9 |
import eu.dnetlib.data.mdstore.MDStoreService; |
|
10 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
11 |
import eu.dnetlib.msro.workflows.Arc; |
|
12 |
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode; |
|
13 |
|
|
14 |
@Component |
|
15 |
@Scope("prototype") |
|
16 |
@ProcessNode("CreateMDStore") |
|
17 |
public class CreateMDStoreJobNode extends SimpleParallelProcessNode { |
|
18 |
|
|
19 |
private static final Log log = LogFactory.getLog(CreateMDStoreJobNode.class); |
|
20 |
|
|
21 |
@Autowired |
|
22 |
private MDStoreService mdstoreService; |
|
23 |
|
|
24 |
private String format; |
|
25 |
|
|
26 |
private String layout; |
|
27 |
|
|
28 |
private String interpretation; |
|
29 |
|
|
30 |
private String resultMdId; |
|
31 |
|
|
32 |
@Override |
|
33 |
protected String execute() throws Exception { |
|
34 |
resultMdId = mdstoreService.create(format, layout, interpretation); |
|
35 |
log.info("New mdstore " + resultMdId); |
|
36 |
return Arc.DEFAULT_ARC; |
|
37 |
} |
|
38 |
|
|
39 |
public String getFormat() { |
|
40 |
return format; |
|
41 |
} |
|
42 |
|
|
43 |
public void setFormat(final String format) { |
|
44 |
this.format = format; |
|
45 |
} |
|
46 |
|
|
47 |
public String getLayout() { |
|
48 |
return layout; |
|
49 |
} |
|
50 |
|
|
51 |
public void setLayout(final String layout) { |
|
52 |
this.layout = layout; |
|
53 |
} |
|
54 |
|
|
55 |
public String getInterpretation() { |
|
56 |
return interpretation; |
|
57 |
} |
|
58 |
|
|
59 |
public void setInterpretation(final String interpretation) { |
|
60 |
this.interpretation = interpretation; |
|
61 |
} |
|
62 |
|
|
63 |
public String getResultMdId() { |
|
64 |
return resultMdId; |
|
65 |
} |
|
66 |
|
|
67 |
public void setResultMdId(final String resultMdId) { |
|
68 |
this.resultMdId = resultMdId; |
|
69 |
} |
|
70 |
|
|
71 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/CollectException.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.collect; |
|
2 |
|
|
3 |
import eu.dnetlib.msro.exceptions.MSROException; |
|
4 |
|
|
5 |
public class CollectException extends MSROException { |
|
6 |
|
|
7 |
private static final long serialVersionUID = -5882744781341998714L; |
|
8 |
|
|
9 |
public CollectException(final String message) { |
|
10 |
super(message); |
|
11 |
} |
|
12 |
|
|
13 |
public CollectException(final String message, final Throwable cause) { |
|
14 |
super(message, cause); |
|
15 |
} |
|
16 |
|
|
17 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/CollectJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.collect; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.stream.Stream; |
|
5 |
|
|
6 |
import org.dom4j.Document; |
|
7 |
import org.dom4j.Node; |
|
8 |
import org.dom4j.io.SAXReader; |
|
9 |
import org.springframework.beans.factory.annotation.Autowired; |
|
10 |
import org.springframework.context.annotation.Scope; |
|
11 |
import org.springframework.stereotype.Component; |
|
12 |
|
|
13 |
import eu.dnetlib.clients.is.InformationServiceClient; |
|
14 |
import eu.dnetlib.msro.annotations.ProcessNode; |
|
15 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
|
16 |
import eu.dnetlib.msro.workflows.Arc; |
|
17 |
import eu.dnetlib.msro.workflows.nodes.SimpleParallelProcessNode; |
|
18 |
|
|
19 |
@Component |
|
20 |
@Scope("prototype") |
|
21 |
@ProcessNode("Collect") |
|
22 |
public class CollectJobNode extends SimpleParallelProcessNode { |
|
23 |
|
|
24 |
@Autowired |
|
25 |
private InformationServiceClient isLookup; |
|
26 |
|
|
27 |
@Autowired |
|
28 |
private MetadataCollector collector; |
|
29 |
|
|
30 |
private String datasourceId; |
|
31 |
private String datasourceInterface; |
|
32 |
private Stream<String> outputStream; |
|
33 |
|
|
34 |
@Override |
|
35 |
protected String execute() throws Exception { |
|
36 |
final String profile = isLookup.getProfile(datasourceId); |
|
37 |
final Document doc = new SAXReader().read(new StringReader(profile)); |
|
38 |
final Node ifcNode = doc.selectSingleNode("//INTERFACE[@id='" + datasourceInterface + "']"); |
|
39 |
|
|
40 |
final InterfaceDescriptor interfaceDescriptor = InterfaceDescriptor.newInstance(ifcNode); |
|
41 |
|
|
42 |
outputStream = collector.collect(interfaceDescriptor); |
|
43 |
|
|
44 |
return Arc.DEFAULT_ARC; |
|
45 |
} |
|
46 |
|
|
47 |
public String getDatasourceId() { |
|
48 |
return datasourceId; |
|
49 |
} |
|
50 |
|
|
51 |
public void setDatasourceId(final String datasourceId) { |
|
52 |
this.datasourceId = datasourceId; |
|
53 |
} |
|
54 |
|
|
55 |
public String getDatasourceInterface() { |
|
56 |
return datasourceInterface; |
|
57 |
} |
|
58 |
|
|
59 |
public void setDatasourceInterface(final String datasourceInterface) { |
|
60 |
this.datasourceInterface = datasourceInterface; |
|
61 |
} |
|
62 |
|
|
63 |
public Stream<String> getOutputStream() { |
|
64 |
return outputStream; |
|
65 |
} |
|
66 |
|
|
67 |
public void setOutputStream(final Stream<String> outputStream) { |
|
68 |
this.outputStream = outputStream; |
|
69 |
} |
|
70 |
|
|
71 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/CollectorPluginEnumerator.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.collect; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.stream.Collectors; |
|
5 |
|
|
6 |
import org.springframework.beans.BeansException; |
|
7 |
import org.springframework.context.ApplicationContext; |
|
8 |
import org.springframework.context.ApplicationContextAware; |
|
9 |
import org.springframework.stereotype.Component; |
|
10 |
|
|
11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
|
12 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
|
13 |
|
|
14 |
@Component |
|
15 |
public class CollectorPluginEnumerator implements ApplicationContextAware { |
|
16 |
|
|
17 |
private ApplicationContext applicationContext; |
|
18 |
|
|
19 |
public List<CollectorPlugin> getAll() { |
|
20 |
return applicationContext.getBeansOfType(CollectorPlugin.class) |
|
21 |
.values() |
|
22 |
.stream() |
|
23 |
.filter(o -> o.getClass().isAnnotationPresent(DnetCollectorPlugin.class)) |
|
24 |
.collect(Collectors.toList()); |
|
25 |
} |
|
26 |
|
|
27 |
public CollectorPlugin get(final String protocol) throws CollectException { |
|
28 |
for (final CollectorPlugin cp : getAll()) { |
|
29 |
if (protocol.equalsIgnoreCase(cp.getProtocol())) { return cp; } |
|
30 |
} |
|
31 |
throw new CollectException("plugin not found for protocol: " + protocol); |
|
32 |
} |
|
33 |
|
|
34 |
@Override |
|
35 |
public void setApplicationContext(final ApplicationContext applicationContext) throws BeansException { |
|
36 |
this.applicationContext = applicationContext; |
|
37 |
} |
|
38 |
|
|
39 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/MetadataCollector.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.collect; |
|
2 |
|
|
3 |
import java.util.Set; |
|
4 |
import java.util.stream.Stream; |
|
5 |
|
|
6 |
import org.springframework.beans.factory.annotation.Autowired; |
|
7 |
import org.springframework.stereotype.Component; |
|
8 |
|
|
9 |
import com.google.common.collect.Sets; |
|
10 |
|
|
11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
|
12 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
|
13 |
|
|
14 |
@Component |
|
15 |
public class MetadataCollector { |
|
16 |
|
|
17 |
@Autowired |
|
18 |
private CollectorPluginEnumerator collectorPluginEnumerator; |
|
19 |
|
|
20 |
public Stream<String> collect(final InterfaceDescriptor ifDescriptor) throws CollectException { |
|
21 |
return dateRangeCollect(ifDescriptor, null, null); |
|
22 |
} |
|
23 |
|
|
24 |
public Stream<String> dateRangeCollect( |
|
25 |
final InterfaceDescriptor ifDescriptor, final String from, final String until) |
|
26 |
throws CollectException { |
|
27 |
final CollectorPlugin plugin = collectorPluginEnumerator.get(ifDescriptor.getProtocol()); |
|
28 |
|
|
29 |
if (!verifyParams(ifDescriptor.getParams().keySet(), Sets.newHashSet(plugin.listNameParameters()))) { throw new CollectException( |
|
30 |
"Invalid parameters, valid: " + plugin.listNameParameters() + ", current: " + ifDescriptor.getParams().keySet()); } |
|
31 |
|
|
32 |
return plugin.collect(ifDescriptor, from, until); |
|
33 |
} |
|
34 |
|
|
35 |
private boolean verifyParams(final Set<String> curr, final Set<String> valid) { |
|
36 |
return valid.containsAll(curr); |
|
37 |
} |
|
38 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/mongo/MongoDumpPlugin.java | ||
---|---|---|
8 | 8 |
|
9 | 9 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
10 | 10 |
import eu.dnetlib.msro.exceptions.CollectorServiceRuntimeException; |
11 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
13 | 12 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
14 | 13 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
14 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
15 |
|
|
15 | 16 |
import org.springframework.stereotype.Component; |
16 | 17 |
|
17 | 18 |
@Component |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/archive/zip/ZipCollectorPlugin.java | ||
---|---|---|
6 | 6 |
import java.util.stream.Stream; |
7 | 7 |
|
8 | 8 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
9 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
10 | 9 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
11 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner; |
13 | 12 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
13 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
14 |
|
|
14 | 15 |
import org.springframework.stereotype.Component; |
15 | 16 |
|
16 | 17 |
// import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/archive/targz/TarGzCollectorPlugin.java | ||
---|---|---|
8 | 8 |
import org.springframework.stereotype.Component; |
9 | 9 |
|
10 | 10 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
11 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
13 | 12 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
14 | 13 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner; |
15 | 14 |
// import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
16 | 15 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
16 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
17 | 17 |
|
18 | 18 |
/** |
19 | 19 |
* Collector pluging for collecting a .tar.gz folder of records |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/filesystem/FilesystemCollectorPlugin.java | ||
---|---|---|
12 | 12 |
import org.springframework.stereotype.Component; |
13 | 13 |
|
14 | 14 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
15 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
16 | 15 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
17 | 16 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
18 | 17 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
19 | 18 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType; |
20 | 19 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner; |
21 | 20 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
21 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
22 | 22 |
|
23 | 23 |
/** |
24 | 24 |
* @author andrea |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/CollectorPlugin.java | ||
---|---|---|
5 | 5 |
import java.util.stream.Collectors; |
6 | 6 |
import java.util.stream.Stream; |
7 | 7 |
|
8 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
9 | 8 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
9 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
10 | 10 |
|
11 | 11 |
public interface CollectorPlugin { |
12 | 12 |
|
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/filesfrommetadata/FilesFromMetadataCollectorPlugin.java | ||
---|---|---|
7 | 7 |
|
8 | 8 |
import org.springframework.stereotype.Component; |
9 | 9 |
|
10 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
11 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
13 | 12 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
14 | 13 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
14 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
15 | 15 |
|
16 | 16 |
/** |
17 | 17 |
* @author sandro |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oaisets/OaiSetsIterator.java | ||
---|---|---|
9 | 9 |
import com.google.common.collect.Sets; |
10 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector; |
11 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner; |
12 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
12 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
13 |
|
|
13 | 14 |
import org.apache.commons.logging.Log; |
14 | 15 |
import org.apache.commons.logging.LogFactory; |
15 | 16 |
import org.dom4j.Document; |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oaisets/OaiSetsCollectorPlugin.java | ||
---|---|---|
6 | 6 |
import org.springframework.stereotype.Component; |
7 | 7 |
|
8 | 8 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
9 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
10 | 9 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
11 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector; |
13 | 12 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
13 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
14 | 14 |
|
15 | 15 |
@Component |
16 | 16 |
@DnetCollectorPlugin("oai_sets") |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oai/OaiIterator.java | ||
---|---|---|
14 | 14 |
import org.dom4j.Node; |
15 | 15 |
import org.dom4j.io.SAXReader; |
16 | 16 |
|
17 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
18 | 17 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector; |
19 | 18 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.XmlCleaner; |
19 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
20 | 20 |
|
21 | 21 |
public class OaiIterator implements Iterator<String> { |
22 | 22 |
|
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oai/engine/HttpConnector.java | ||
---|---|---|
26 | 26 |
import org.apache.commons.logging.LogFactory; |
27 | 27 |
import org.springframework.stereotype.Component; |
28 | 28 |
|
29 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException;
|
|
29 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException;
|
|
30 | 30 |
|
31 | 31 |
/** |
32 | 32 |
* @author jochen, michele, andrea |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/oai/OaiCollectorPlugin.java | ||
---|---|---|
13 | 13 |
import com.google.common.collect.Lists; |
14 | 14 |
|
15 | 15 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
16 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
17 | 16 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
18 | 17 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
19 | 18 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
20 | 19 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType; |
21 | 20 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.oai.engine.HttpConnector; |
22 | 21 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
22 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
23 | 23 |
|
24 | 24 |
@Component |
25 | 25 |
@DnetCollectorPlugin(value = "OAI", parameters = { |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/ftp/FtpCollectorPlugin.java | ||
---|---|---|
6 | 6 |
import com.google.common.base.Splitter; |
7 | 7 |
import com.google.common.collect.Sets; |
8 | 8 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
9 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
10 | 9 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
11 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
13 | 12 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType; |
14 | 13 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
14 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
15 |
|
|
15 | 16 |
import org.springframework.stereotype.Component; |
16 | 17 |
|
17 | 18 |
/** |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/csv/FileCSVCollectorPlugin.java | ||
---|---|---|
20 | 20 |
import org.dom4j.Element; |
21 | 21 |
import org.springframework.stereotype.Component; |
22 | 22 |
|
23 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
24 | 23 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
25 | 24 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
26 | 25 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
27 | 26 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
27 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
28 | 28 |
|
29 | 29 |
@Component |
30 | 30 |
@DnetCollectorPlugin(value = "fileCSV", parameters = { |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/csv/HttpCSVCollectorPlugin.java | ||
---|---|---|
20 | 20 |
import com.google.common.collect.Iterators; |
21 | 21 |
|
22 | 22 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
23 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
24 | 23 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
25 | 24 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
26 | 25 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
27 | 26 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
27 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
28 | 28 |
|
29 | 29 |
/** |
30 | 30 |
* The Class HttpCSVCollectorPlugin. |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/httplist/HttpListCollectorPlugin.java | ||
---|---|---|
3 | 3 |
import java.util.stream.Stream; |
4 | 4 |
|
5 | 5 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
6 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
7 | 6 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
8 | 7 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
9 | 8 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
10 | 9 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
10 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
11 |
|
|
11 | 12 |
import org.springframework.stereotype.Component; |
12 | 13 |
|
13 | 14 |
@Component |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/FileGZipCollectorPlugin.java | ||
---|---|---|
7 | 7 |
|
8 | 8 |
import org.springframework.stereotype.Component; |
9 | 9 |
|
10 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
11 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
12 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
13 | 13 |
|
14 | 14 |
@Component |
15 | 15 |
@DnetCollectorPlugin(value = "fileGzip", parameters = { |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/FileCollectorPlugin.java | ||
---|---|---|
6 | 6 |
|
7 | 7 |
import org.springframework.stereotype.Component; |
8 | 8 |
|
9 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
10 | 9 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
11 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
11 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
12 | 12 |
|
13 | 13 |
@Component |
14 | 14 |
@DnetCollectorPlugin(value = "file", parameters = { |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/HttpCollectorPlugin.java | ||
---|---|---|
10 | 10 |
import org.apache.http.impl.client.HttpClients; |
11 | 11 |
import org.springframework.stereotype.Component; |
12 | 12 |
|
13 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
14 | 13 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
15 | 14 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
15 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
16 | 16 |
|
17 | 17 |
@Component |
18 | 18 |
@DnetCollectorPlugin(value = "http", parameters = { |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/ClasspathCollectorPlugin.java | ||
---|---|---|
5 | 5 |
|
6 | 6 |
import org.springframework.stereotype.Component; |
7 | 7 |
|
8 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
9 | 8 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
10 | 9 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
10 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
11 | 11 |
|
12 | 12 |
@Component |
13 | 13 |
@DnetCollectorPlugin(value = "classpath", parameters = { |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/split/AbstractSplittedRecordPlugin.java | ||
---|---|---|
10 | 10 |
import org.apache.commons.lang3.StringUtils; |
11 | 11 |
|
12 | 12 |
import eu.dnetlib.miscutils.iterators.xml.XMLIterator; |
13 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
14 | 13 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
14 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
15 | 15 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
16 | 16 |
|
17 | 17 |
public abstract class AbstractSplittedRecordPlugin implements CollectorPlugin { |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/collect/plugins/sftp/SftpCollectorPlugin.java | ||
---|---|---|
6 | 6 |
import com.google.common.base.Splitter; |
7 | 7 |
import com.google.common.collect.Sets; |
8 | 8 |
import eu.dnetlib.miscutils.streams.DnetStreamSupport; |
9 |
import eu.dnetlib.msro.workers.aggregation.collect.CollectException; |
|
10 | 9 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.CollectorPlugin; |
11 | 10 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorParam; |
12 | 11 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.DnetCollectorPlugin; |
13 | 12 |
import eu.dnetlib.msro.workers.aggregation.collect.plugins.ProtocolParameterType; |
14 | 13 |
import eu.dnetlib.msro.workers.aggregation.objects.InterfaceDescriptor; |
14 |
import eu.dnetlib.msro.workflows.nodes.collect.CollectException; |
|
15 |
|
|
15 | 16 |
import org.springframework.stereotype.Component; |
16 | 17 |
|
17 | 18 |
/** |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/cleaner/VocabularyRule.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.cleaner; |
|
2 |
|
|
3 |
import java.util.HashMap; |
|
4 |
import java.util.HashSet; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.Set; |
|
7 |
|
|
8 |
import org.apache.commons.logging.Log; |
|
9 |
import org.apache.commons.logging.LogFactory; |
|
10 |
|
|
11 |
import com.google.common.base.Joiner; |
|
12 |
|
|
13 |
import eu.dnetlib.clients.is.InformationServiceClient; |
|
14 |
import eu.dnetlib.msro.exceptions.MSROException; |
|
15 |
|
|
16 |
/** |
|
17 |
* @author michele |
|
18 |
* |
|
19 |
* Vocabulary rules must be declared in a CleanerDS profile, for each vocabulary must be present the relative VocabularyDS profile: |
|
20 |
* |
|
21 |
* <RULE xpath="..." vocabularies="VOC1" /> <RULE xpath="..." vocabularies="VOC1, VOC2, VOC3" /> |
|
22 |
*/ |
|
23 |
|
|
24 |
public class VocabularyRule extends XPATHCleaningRule { |
|
25 |
|
|
26 |
private static final Log log = LogFactory.getLog(VocabularyRule.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
27 |
private final Set<String> vocabularies; |
|
28 |
private final Map<String, String> synonyms = new HashMap<>(); |
|
29 |
private final Set<String> validTerms = new HashSet<>(); |
|
30 |
|
|
31 |
public VocabularyRule(final Set<String> vocabularies, final InformationServiceClient isClient) throws MSROException { |
|
32 |
this.vocabularies = vocabularies; |
|
33 |
loadSynonymsAndTerms(isClient); |
|
34 |
} |
|
35 |
|
|
36 |
@Override |
|
37 |
protected String calculateNewValue(final String oldValue) throws MSROException { |
|
38 |
log.debug("calculating new value for: " + oldValue); |
|
39 |
|
|
40 |
if (synonyms.isEmpty()) { |
|
41 |
log.debug("Vocabulary terms is void, vocabularies: " + vocabularies); |
|
42 |
} |
|
43 |
|
|
44 |
String newValue = null; |
|
45 |
|
|
46 |
if (synonyms.containsKey(oldValue.toLowerCase())) { |
|
47 |
newValue = synonyms.get(oldValue.toLowerCase()); |
|
48 |
} |
|
49 |
|
|
50 |
if (newValue == null) { |
|
51 |
log.debug("Synonym " + oldValue + " not found in vocabulary"); |
|
52 |
return oldValue; |
|
53 |
} |
|
54 |
|
|
55 |
return newValue; |
|
56 |
} |
|
57 |
|
|
58 |
private void loadSynonymsAndTerms(final InformationServiceClient isClient) throws MSROException { |
|
59 |
|
|
60 |
for (final String vocabulary : vocabularies) { |
|
61 |
try { |
|
62 |
final String query = "for $x in collection('/db/DRIVER/conf/vocabulary')" |
|
63 |
+ "//RESOURCE_PROFILE[.//VOCABULARY_NAME/@code='" + vocabulary + "']//TERM return " |
|
64 |
+ "( concat($x/@code,'|-:-|', $x/@code), concat($x/@english_name,'|-:-|', $x/@code), concat($x/@native_name,'|-:-|', $x/@code), " |
|
65 |
+ "for $y in $x//SYNONYM return concat($y/@term,'|-:-|', $x/@code) )"; |
|
66 |
|
|
67 |
for (final String s : isClient.find(query)) { |
|
68 |
log.debug("SYNONYM : " + s); |
|
69 |
final String[] arr = s.split("\\|-:-\\|"); |
|
70 |
if ((arr[0] == null) || arr[0].isEmpty()) { |
|
71 |
continue; |
|
72 |
} |
|
73 |
synonyms.put(arr[0].toLowerCase(), arr[1]); |
|
74 |
validTerms.add(arr[1].toLowerCase()); |
|
75 |
} |
|
76 |
|
|
77 |
log.debug("VOCABULARY " + vocabulary.trim() + " - terms size " + synonyms.size()); |
|
78 |
} catch (final Exception e) { |
|
79 |
throw new MSROException("Error obtaining vocabulary " + vocabulary, e); |
|
80 |
} |
|
81 |
} |
|
82 |
|
|
83 |
} |
|
84 |
|
|
85 |
@Override |
|
86 |
protected Map<String, String> verifyValue(final String value) throws MSROException { |
|
87 |
if (synonyms.isEmpty()) { |
|
88 |
log.debug("Vocabulary terms is void, vocabularies: " + vocabularies); |
|
89 |
} |
|
90 |
|
|
91 |
if (validTerms.contains(value.toLowerCase())) { return null; } |
|
92 |
|
|
93 |
final Map<String, String> error = new HashMap<String, String>(); |
|
94 |
error.put("term", value); |
|
95 |
error.put("vocabularies", vocabularies.toString().replaceAll("\\[", "").replaceAll("\\]", "")); |
|
96 |
error.put("xpath", getXpath()); |
|
97 |
return error; |
|
98 |
} |
|
99 |
|
|
100 |
public Map<String, String> getVocabularyTerms() { |
|
101 |
return synonyms; |
|
102 |
} |
|
103 |
|
|
104 |
@Override |
|
105 |
public String toString() { |
|
106 |
return "VOCABULARIES: [" + Joiner.on(", ").join(vocabularies) + "]"; |
|
107 |
} |
|
108 |
|
|
109 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/cleaner/CleaningRuleFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.cleaner; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.Set; |
|
5 |
|
|
6 |
import org.dom4j.Document; |
|
7 |
import org.dom4j.Element; |
|
8 |
import org.dom4j.io.SAXReader; |
|
9 |
import org.springframework.beans.factory.annotation.Autowired; |
|
10 |
import org.springframework.stereotype.Component; |
|
11 |
|
|
12 |
import com.google.common.base.Splitter; |
|
13 |
import com.google.common.collect.Sets; |
|
14 |
|
|
15 |
import eu.dnetlib.clients.is.InformationServiceClient; |
|
16 |
import eu.dnetlib.msro.exceptions.MSROException; |
|
17 |
|
|
18 |
@Component |
|
19 |
public class CleaningRuleFactory { |
|
20 |
|
|
21 |
@Autowired |
|
22 |
private InformationServiceClient isClient; |
|
23 |
|
|
24 |
public CleaningRule obtainCleaningRule(final String ruleId) throws MSROException { |
|
25 |
try { |
|
26 |
final String prof = |
|
27 |
isClient.findOne("/RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value='" + ruleId + "' or .//CLEANER_NAME='" + ruleId + "']//CONFIGURATION"); |
|
28 |
|
|
29 |
final SAXReader reader = new SAXReader(); |
|
30 |
final Document doc = reader.read(new StringReader(prof)); |
|
31 |
|
|
32 |
final CleaningRule rule = new CleaningRule(); |
|
33 |
|
|
34 |
for (final Object o : doc.selectNodes("//RULE")) { |
|
35 |
final Element node = (Element) o; |
|
36 |
|
|
37 |
final String xpath = node.valueOf("@xpath"); |
|
38 |
final String vocabularies = node.valueOf("@vocabularies"); |
|
39 |
final String groovyRule = node.valueOf("@groovy"); |
|
40 |
final String strict = node.valueOf("@strict"); |
|
41 |
|
|
42 |
final XPATHCleaningRule xpathRule; |
|
43 |
if ((vocabularies != null) && (vocabularies.length() > 0)) { |
|
44 |
final Set<String> list = Sets.newHashSet(Splitter.on(",").omitEmptyStrings().trimResults().split(vocabularies)); |
|
45 |
xpathRule = new VocabularyRule(list, isClient); |
|
46 |
} else { |
|
47 |
xpathRule = new GroovyRule(groovyRule); |
|
48 |
} |
|
49 |
xpathRule.setXpath(xpath); |
|
50 |
xpathRule.setStrict("true".equals(strict)); |
|
51 |
rule.getXpathRules().add(xpathRule); |
|
52 |
} |
|
53 |
return rule; |
|
54 |
} catch (final Exception e) { |
|
55 |
throw new MSROException("Error obtaing cleaner rule " + ruleId, e); |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
} |
modules/dnet-springboot-apps/trunk/dnet-simple-aggregation-worker/src/main/java/eu/dnetlib/msro/workers/aggregation/cleaner/CleaningRule.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workers.aggregation.cleaner; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.ArrayList; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
import java.util.function.Function; |
|
8 |
|
|
9 |
import org.apache.commons.logging.Log; |
|
10 |
import org.apache.commons.logging.LogFactory; |
|
11 |
import org.dom4j.Document; |
|
12 |
import org.dom4j.Element; |
|
13 |
import org.dom4j.Namespace; |
|
14 |
import org.dom4j.QName; |
|
15 |
import org.dom4j.io.SAXReader; |
|
16 |
import org.springframework.beans.factory.annotation.Required; |
|
17 |
|
|
18 |
public class CleaningRule implements Function<String, String> { |
|
19 |
|
|
20 |
private static final Log log = LogFactory.getLog(CleaningRule.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
21 |
|
|
22 |
private List<XPATHCleaningRule> xpathRules = new ArrayList<XPATHCleaningRule>(); |
|
23 |
|
|
24 |
@Override |
|
25 |
public String apply(final String text) { |
|
26 |
|
|
27 |
try { |
|
28 |
final List<Map<String, String>> errors = new ArrayList<>(); |
|
29 |
final Document doc = (new SAXReader()).read(new StringReader(text)); |
|
30 |
for (final XPATHCleaningRule r : xpathRules) { |
|
31 |
errors.addAll(r.applyXpathRule(doc)); |
|
32 |
} |
|
33 |
if (errors.size() > 0) { |
|
34 |
markAsInvalid(doc, errors); |
|
35 |
} |
|
36 |
return doc.asXML(); |
|
37 |
} catch (final Exception e) { |
|
38 |
log.error("Error evaluating rule", e); |
|
39 |
} |
|
40 |
return ""; |
|
41 |
} |
|
42 |
|
|
43 |
private void markAsInvalid(final Document doc, final List<Map<String, String>> errors) { |
|
44 |
final Element element = (Element) doc.selectSingleNode("//*[local-name()='header']"); |
|
45 |
if (element != null) { |
|
46 |
final Element inv = element.addElement(new QName("invalid", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri"))); |
|
47 |
for (final Map<String, String> e : errors) { |
|
48 |
final Element err = inv.addElement(new QName("error", new Namespace("dri", "http://www.driver-repository.eu/namespace/dri"))); |
|
49 |
for (final Map.Entry<String, String> entry : e.entrySet()) { |
|
50 |
err.addAttribute(entry.getKey(), entry.getValue()); |
|
51 |
} |
|
52 |
} |
|
53 |
inv.addAttribute("value", "true"); |
|
54 |
} |
|
55 |
} |
|
56 |
|
|
57 |
public List<XPATHCleaningRule> getXpathRules() { |
|
58 |
return xpathRules; |
|
59 |
} |
|
60 |
|
|
61 |
@Required |
|
62 |
public void setXpathRules(final List<XPATHCleaningRule> xpathRules) { |
|
63 |
this.xpathRules = xpathRules; |
|
64 |
} |
|
65 |
|
|
66 |
} |
Also available in: Unified diff
import cleaner service