Project

General

Profile

« Previous | Next » 

Revision 39021

[maven-release-plugin] copy for tag dnet-eagle-workflows-3.0.3

View differences:

modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-eagle-workflows/trunk/", "deploy_repository": "dnet4-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", "name": "dnet-eagle-workflows"}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/ui/DownloadPluginValues.java
1
package eu.dnetlib.msro.eagle.ui;
2

  
3
import java.util.List;
4
import java.util.Map;
5

  
6
import javax.annotation.Resource;
7

  
8
import com.google.common.base.Function;
9
import com.google.common.collect.Iterables;
10
import com.google.common.collect.Lists;
11

  
12
import eu.dnetlib.data.download.rmi.DownloadPluginEnumerator;
13
import eu.dnetlib.msro.workflows.util.ValidNodeValuesFetcher;
14

  
15
public class DownloadPluginValues extends ValidNodeValuesFetcher {
16

  
17
	/** The download plugin enumerator. */
18
	@Resource
19
	private DownloadPluginEnumerator downloadPluginEnumerator;
20

  
21
	@Override
22
	protected List<DnetParamValue> obtainValues(final Map<String, String> params) throws Exception {
23

  
24
		return Lists.newArrayList(Iterables.transform(downloadPluginEnumerator.getAll().keySet(), new Function<String, DnetParamValue>() {
25

  
26
			@Override
27
			public DnetParamValue apply(final String input) {
28
				return new DnetParamValue(input, input);
29
			}
30
		}));
31
	}
32

  
33
	/**
34
	 * @return the downloadPluginEnumerator
35
	 */
36
	public DownloadPluginEnumerator getDownloadPluginEnumerator() {
37
		return downloadPluginEnumerator;
38
	}
39

  
40
	/**
41
	 * @param downloadPluginEnumerator
42
	 *            the downloadPluginEnumerator to set
43
	 */
44
	public void setDownloadPluginEnumerator(final DownloadPluginEnumerator downloadPluginEnumerator) {
45
		this.downloadPluginEnumerator = downloadPluginEnumerator;
46
	}
47
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/composition/ComposeEprJobNode.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.composition;
2

  
3
import java.io.StringReader;
4
import java.util.Iterator;
5
import java.util.List;
6

  
7
import javax.xml.ws.wsaddressing.W3CEndpointReference;
8

  
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.dom4j.Document;
12
import org.dom4j.DocumentException;
13
import org.dom4j.Element;
14
import org.dom4j.io.SAXReader;
15
import org.springframework.beans.factory.annotation.Required;
16

  
17
import com.googlecode.sarasvati.Arc;
18
import com.googlecode.sarasvati.NodeToken;
19

  
20
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
21
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
22
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils;
23
import eu.dnetlib.miscutils.functional.UnaryFunction;
24
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
25

  
26
public class ComposeEprJobNode extends SimpleJobNode {
27

  
28
	private static final Log log = LogFactory.getLog(ComposeEprJobNode.class);
29

  
30
	private String mainEprParam;
31
	private String alternateEprParam;
32
	private String xpathToInjectionPoint;
33
	private String xpathToInjectedElements;
34
	private String outputEprParam;
35

  
36
	private MappedResultSetFactory mappedResultSetFactory;
37
	private ResultSetClientFactory resultSetClientFactory;
38

  
39
	@Override
40
	protected String execute(final NodeToken token) throws Exception {
41
		final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(mainEprParam));
42
		final Iterator<String> alternateRsClient = resultSetClientFactory.getClient(token.getEnv().getAttribute(alternateEprParam)).iterator();
43
		final SAXReader reader = new SAXReader();
44

  
45
		final W3CEndpointReference outputEpr = mappedResultSetFactory.createMappedResultSet(inputEpr, new UnaryFunction<String, String>() {
46

  
47
			@Override
48
			public String evaluate(final String current) {
49
				try {
50
					Document main = reader.read(new StringReader(current));
51
					Document alternate = reader.read(new StringReader(alternateRsClient.next()));
52
					compose(main, alternate);
53
					return main.asXML();
54
				} catch (DocumentException e) {
55
					log.error("Error composing EPRs", e);
56
					throw new IllegalStateException(e);
57
				}
58
			}
59

  
60
			private void compose(final Document main, final Document alternate) {
61
				// Prepare injected fragments (source)
62
				List<Element> injectedElementList = alternate.selectNodes(getXpathToInjectedElements());
63
				// Retrieve injection point (destination)
64
				Element injectionPointElement = (Element) main.selectSingleNode(getXpathToInjectionPoint());
65

  
66
				if ((injectedElementList != null) && (injectionPointElement != null)) {
67
					Element parent = injectionPointElement.getParent();
68
					// injectionPointElement.detach();
69
					for (Element injectedElement : injectedElementList) {
70
						injectedElement.detach();
71
						// Move node around
72
						parent.add(injectedElement);
73
					}
74
				}
75
			}
76
		});
77

  
78
		token.getEnv().setAttribute(outputEprParam, outputEpr.toString());
79
		return Arc.DEFAULT_ARC;
80
	}
81

  
82
	public MappedResultSetFactory getMappedResultSetFactory() {
83
		return mappedResultSetFactory;
84
	}
85

  
86
	@Required
87
	public void setMappedResultSetFactory(
88
			final MappedResultSetFactory mappedResultSetFactory) {
89
		this.mappedResultSetFactory = mappedResultSetFactory;
90
	}
91

  
92
	public ResultSetClientFactory getResultSetClientFactory() {
93
		return resultSetClientFactory;
94
	}
95

  
96
	@Required
97
	public void setResultSetClientFactory(
98
			final ResultSetClientFactory resultSetClientFactory) {
99
		this.resultSetClientFactory = resultSetClientFactory;
100
	}
101

  
102
	public String getInputEprParam() {
103
		return mainEprParam;
104
	}
105

  
106
	public String getMainEprParam() {
107
		return mainEprParam;
108
	}
109

  
110
	public void setMainEprParam(final String mainEprParam) {
111
		this.mainEprParam = mainEprParam;
112
	}
113

  
114
	public String getAlternateEprParam() {
115
		return alternateEprParam;
116
	}
117

  
118
	public void setAlternateEprParam(final String alternateEprParam) {
119
		this.alternateEprParam = alternateEprParam;
120
	}
121

  
122
	public String getOutputEprParam() {
123
		return outputEprParam;
124
	}
125

  
126
	public void setOutputEprParam(final String outputEprParam) {
127
		this.outputEprParam = outputEprParam;
128
	}
129

  
130
	public String getXpathToInjectionPoint() {
131
		return xpathToInjectionPoint;
132
	}
133

  
134
	public void setXpathToInjectionPoint(final String xpathToInjectionPoint) {
135
		this.xpathToInjectionPoint = xpathToInjectionPoint;
136
	}
137

  
138
	public String getXpathToInjectedElements() {
139
		return xpathToInjectedElements;
140
	}
141

  
142
	public void setXpathToInjectedElements(final String xpathToInjectedElements) {
143
		this.xpathToInjectedElements = xpathToInjectedElements;
144
	}
145

  
146
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/index/PrepareCreateIndexJobNode.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.index;
2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5

  
6
import com.googlecode.sarasvati.NodeToken;
7

  
8
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
9

  
10
public class PrepareCreateIndexJobNode extends SimpleJobNode {
11

  
12
	private static final Log log = LogFactory.getLog(PrepareCreateIndexJobNode.class);
13

  
14
	private String layout;
15
	private String format;
16
	private String interpretation;
17

  
18
	@Override
19
	protected String execute(final NodeToken token) throws Exception {
20
		log.info("Preparing env for CreateIndexJobNode");
21
		token.getEnv().setAttribute("layout", layout);
22
		token.getEnv().setAttribute("format", format);
23
		token.getEnv().setAttribute("interpretation", interpretation);
24
		return null;
25
	}
26

  
27
	public String getLayout() {
28
		return layout;
29
	}
30

  
31
	public void setLayout(final String layout) {
32
		this.layout = layout;
33
	}
34

  
35
	public String getFormat() {
36
		return format;
37
	}
38

  
39
	public void setFormat(final String format) {
40
		this.format = format;
41
	}
42

  
43
	public String getInterpretation() {
44
		return interpretation;
45
	}
46

  
47
	public void setInterpretation(final String interpretation) {
48
		this.interpretation = interpretation;
49
	}
50

  
51
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/transform/tmid/TrismegistosInjectionJobNode.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.transform.tmid;
2

  
3
import javax.xml.ws.wsaddressing.W3CEndpointReference;
4

  
5
import com.googlecode.sarasvati.Arc;
6
import com.googlecode.sarasvati.NodeToken;
7

  
8
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
9
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils;
10
import eu.dnetlib.msro.rmi.MSROException;
11
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
12

  
13
public class TrismegistosInjectionJobNode extends SimpleJobNode {
14
	private String inputEprParam;
15
	private String outputEprParam;
16
	private String tmIdListPath;
17

  
18
	private MappedResultSetFactory mappedResultSetFactory;
19
	
20
	@Override
21
	protected String execute(NodeToken token) throws Exception {
22
		final String inputEpr = token.getEnv().getAttribute(inputEprParam);
23
		if ((inputEpr == null) || inputEpr.isEmpty()) throw new MSROException("InputEprParam (" + inputEprParam + ") not found in ENV");
24
		final W3CEndpointReference epr = getMappedResultSetFactory().createMappedResultSet(new EPRUtils().getEpr(inputEpr), 
25
				new TrismegistosInjectionUnaryFunction(tmIdListPath));
26

  
27
		token.getEnv().setAttribute(outputEprParam, epr.toString());
28

  
29
		return Arc.DEFAULT_ARC;
30
	}
31
	
32
	public String getInputEprParam() {
33
		return inputEprParam;
34
	}
35

  
36
	public void setInputEprParam(String inputEprParam) {
37
		this.inputEprParam = inputEprParam;
38
	}
39

  
40
	public String getOutputEprParam() {
41
		return outputEprParam;
42
	}
43

  
44
	public void setOutputEprParam(String outputEprParam) {
45
		this.outputEprParam = outputEprParam;
46
	}
47

  
48
	public String getTmIdListPath() {
49
		return tmIdListPath;
50
	}
51

  
52
	public void setTmIdListPath(String tmIdListPath) {
53
		this.tmIdListPath = tmIdListPath;
54
	}
55

  
56
	public MappedResultSetFactory getMappedResultSetFactory() {
57
		return mappedResultSetFactory;
58
	}
59

  
60
	public void setMappedResultSetFactory(MappedResultSetFactory mappedResultSetFactory) {
61
		this.mappedResultSetFactory = mappedResultSetFactory;
62
	}
63

  
64
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/transform/tmid/TrismegistosInjectionUnaryFunction.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.transform.tmid;
2

  
3
import java.io.IOException;
4
import java.io.StringReader;
5
import java.util.ArrayList;
6
import java.util.HashMap;
7
import java.util.List;
8
import java.util.Map;
9

  
10
import javax.xml.stream.XMLInputFactory;
11
import javax.xml.stream.XMLStreamConstants;
12
import javax.xml.stream.XMLStreamException;
13
import javax.xml.stream.XMLStreamReader;
14

  
15
import org.apache.commons.logging.Log;
16
import org.apache.commons.logging.LogFactory;
17
import org.dom4j.Document;
18
import org.dom4j.DocumentHelper;
19
import org.dom4j.Element;
20
import org.dom4j.Node;
21
import org.dom4j.io.SAXReader;
22
import org.springframework.core.io.FileSystemResource;
23

  
24
import eu.dnetlib.miscutils.functional.UnaryFunction;
25
import eu.dnetlib.miscutils.functional.hash.Hashing;
26

  
27
public class TrismegistosInjectionUnaryFunction implements UnaryFunction<String, String> {
28

  
29
	private static final Log log = LogFactory.getLog(TrismegistosInjectionUnaryFunction.class);
30

  
31
	private SAXReader reader;
32

  
33
	private static Map<String, List<String>> tmIdToLocalIdsMap;
34
	private static Map<String, String> localIdToTmIdMap;
35

  
36
	public TrismegistosInjectionUnaryFunction(final String tmIdListPath) throws XMLStreamException, IOException {
37
		this.reader = new SAXReader();
38
		tmIdToLocalIdsMap = new HashMap<String, List<String>>();
39
		localIdToTmIdMap = new HashMap<String, String>();
40

  
41
		FileSystemResource tmIds = new FileSystemResource(tmIdListPath);
42
		XMLInputFactory factory = XMLInputFactory.newInstance();
43
		XMLStreamReader reader = factory.createXMLStreamReader(tmIds.getInputStream());
44

  
45
		boolean isSafeToGetNextXmlElement = true;
46
		while (isSafeToGetNextXmlElement) {
47
			if (reader.getEventType() == XMLStreamConstants.START_ELEMENT) {
48
				if ("tmid".equals(reader.getLocalName())) {
49
					parseTmEvent(reader);
50
				}
51
			}
52
			if (reader.hasNext()) {
53
				reader.next();
54
			} else {
55
				isSafeToGetNextXmlElement = false;
56
				break;
57
			}
58
		}
59
	}
60

  
61
	private void parseTmEvent(final XMLStreamReader reader) throws XMLStreamException {
62
		String tmId = reader.getAttributeValue(null, "id");
63
		List<String> localIds = new ArrayList<String>();
64

  
65
		boolean isSafeToGetNextXmlElement = true;
66
		while (isSafeToGetNextXmlElement) {
67
			if (reader.getEventType() == XMLStreamConstants.END_ELEMENT) {
68
				break;
69
			} else {
70
				if (reader.getEventType() == XMLStreamConstants.START_ELEMENT) {
71
					if ("link".equals(reader.getLocalName())) {
72
						String localId = parseLinkEvent(reader);
73
						localIdToTmIdMap.put(localId, tmId);
74
						localIds.add(localId);
75
					}
76
				}
77
			}
78
			if (reader.hasNext()) {
79
				reader.next();
80
			} else {
81
				isSafeToGetNextXmlElement = false;
82
				break;
83
			}
84
		}
85
		tmIdToLocalIdsMap.put(tmId, localIds);
86
	}
87

  
88
	private String parseLinkEvent(final XMLStreamReader reader) throws XMLStreamException {
89
		String cp = reader.getAttributeValue(null, "cp");
90
		String val = "";
91
		boolean isSafeToGetNextXmlElement = true;
92
		while (isSafeToGetNextXmlElement) {
93
			if (reader.getEventType() == XMLStreamConstants.END_ELEMENT) {
94
				break;
95
			} else if (reader.getEventType() == XMLStreamConstants.CHARACTERS) {
96
				val = reader.getText();
97
			}
98
			if (reader.hasNext()) {
99
				reader.next();
100
			} else {
101
				isSafeToGetNextXmlElement = false;
102
				break;
103
			}
104
		}
105
		return cp + "::" + val;
106
	}
107

  
108
	@Override
109
	public String evaluate(final String inputXml) {
110
		try {
111
			Document doc = reader.read(new StringReader(inputXml));
112
			Node entityType = doc.selectSingleNode("//*[local-name()='entityType']");
113
			if (entityType == null) return inputXml;
114

  
115
			// TMid injection on Artifacts/Inscriptions
116
			if ("artifact".equals(entityType.getText())) {
117
				Node recordSourceInfo = doc.selectSingleNode("//*[local-name()='eagleObject']/*[local-name()='recordSourceInfo']");
118
				if (recordSourceInfo == null) return inputXml;
119
				String localId =
120
						recordSourceInfo.selectSingleNode("@providerAcronym").getText() + "::" + recordSourceInfo.selectSingleNode("./text()").getText();
121
				String tmId = localIdToTmIdMap.get(localId);
122
				if (tmId != null) {
123
					// TMid found! Prepare for injection..
124
					List<String> alternateIds = tmIdToLocalIdsMap.get(tmId);
125
					Element inscription = (Element) doc.selectSingleNode("//*[local-name()='inscription']");
126
					Element injectedHasTmId = prepareInjectedElement(tmId, alternateIds, localId);
127
					replaceElement(inscription, (Element) inscription.selectSingleNode("./*[local-name()='hasTmId']"), injectedHasTmId);
128
					return doc.asXML();
129
				} else // There is no TM ID for this localID. Just return the input XML here..
130
					return inputXml;
131
			}
132

  
133
			// TMid injection on other items with rel to Artifacts
134
			if ("visual".equals(entityType.getText()) || "documental".equals(entityType.getText())) {
135
				Node recordSourceInfo = doc.selectSingleNode("//*[local-name()='hasArtifact']/*[local-name()='recordSourceInfo']");
136
				if (recordSourceInfo == null) return inputXml;
137
				String localId =
138
						recordSourceInfo.selectSingleNode("@providerAcronym").getText() + "::" + recordSourceInfo.selectSingleNode("./text()").getText();
139
				String tmId = localIdToTmIdMap.get(localId);
140
				if (tmId != null) {
141
					// TMid found! Prepare for injection..
142
					List<String> alternateIds = tmIdToLocalIdsMap.get(tmId);
143
					Element inscription = (Element) doc.selectSingleNode("//*[local-name()='hasArtifact']");
144
					Element injectedHasTmId = prepareInjectedElement(tmId, alternateIds, localId);
145
					replaceElement(inscription, (Element) inscription.selectSingleNode("./*[local-name()='hasTmId']"), injectedHasTmId);
146
					return doc.asXML();
147
				} else // There is no TM ID for this localID. Just return the input string here..
148
					return inputXml;
149
			}
150
		} catch (Exception e) {
151
			log.error(inputXml);
152
			throw new IllegalStateException("Problems with Trismegistos injection", e);
153
		}
154

  
155
		return inputXml;
156
	}
157

  
158
	private Element prepareInjectedElement(final String tmId, final List<String> alternateIds, final String localId) {
159
		Document fragment = DocumentHelper.createDocument();
160
		Element hasTmId = fragment.addElement("hasTmId");
161
		hasTmId.addElement("tmId").addText(tmId);
162
		for (String alternateId : alternateIds) {
163
			if (!localId.equals(alternateId)) {
164
				String[] tokens = alternateId.split("::");
165
				hasTmId.addElement("alternateId").addAttribute("providerAcronym", tokens[0]).addAttribute("localId", tokens[1])
166
						.setText(tokens[0] + "::" + Hashing.md5(tokens[1]) + "::artifact");
167
			}
168
		}
169
		return (Element) fragment.selectSingleNode("./*[local-name()='hasTmId']");
170
	}
171

  
172
	private void replaceElement(final Element parent, final Element oldElement, final Element newElement) {
173
		List parentContent = parent.content();
174
		int index = parentContent.indexOf(oldElement);
175
		parentContent.set(index, newElement);
176
	}
177

  
178
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/transform/editions/EditionXsltUnaryFunction.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.transform.editions;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.io.StringReader;
6
import java.util.Map;
7

  
8
import javax.xml.transform.stream.StreamSource;
9

  
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.dom4j.Document;
13
import org.dom4j.DocumentException;
14
import org.dom4j.Element;
15
import org.dom4j.Node;
16
import org.dom4j.io.SAXReader;
17

  
18
import eu.dnetlib.miscutils.functional.UnaryFunction;
19
import eu.dnetlib.miscutils.functional.xml.ApplyXslt;
20

  
21
public class EditionXsltUnaryFunction implements UnaryFunction<String, String> {
22
	private static final Log log = LogFactory.getLog(EditionXsltUnaryFunction.class);
23
	
24
	private ApplyXslt xslt;
25
	private SAXReader reader;
26
	
27
	public EditionXsltUnaryFunction(final String xsltPath, Map<String, String> xsltParams) throws IOException {
28
		
29
		this.xslt = new ApplyXslt(new StreamSource(new File(xsltPath)), xsltPath, xsltParams);
30
		this.reader = new SAXReader();
31
	}
32
	
33
	/**
34
	 * xPath the <metadata> node and applies xslt 2.0 transformation
35
	 */
36
	@Override
37
	public String evaluate(String input) {
38
		try {
39
			Document doc = reader.read(new StringReader(input));
40
			Node result = doc.selectSingleNode("//*[local-name()='metadata']/*");
41
			String edition = xslt.evaluate(result.asXML());
42
			result.detach();
43
			((Element) doc.selectSingleNode("//*[local-name()='metadata']")).add(reader.read(new StringReader(edition)).getRootElement());
44
			return doc.asXML();
45
		} catch (DocumentException e) {
46
			log.error("Problem creating dom4j document from \n" + input, e);
47
			throw new RuntimeException(e);
48
		}
49

  
50
	}
51
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/transform/editions/ApplyEditionXsltJobNode.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.transform.editions;
2

  
3
import java.util.HashMap;
4
import java.util.Map;
5

  
6
import javax.xml.ws.wsaddressing.W3CEndpointReference;
7

  
8
import org.springframework.beans.factory.annotation.Required;
9

  
10
import com.googlecode.sarasvati.Arc;
11
import com.googlecode.sarasvati.NodeToken;
12

  
13
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
14
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils;
15
import eu.dnetlib.msro.rmi.MSROException;
16
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
17

  
18
/**
19
 * This class is intended to process records coming in MDStore compliant form (with header) and apply the xslt located at
20
 * http://sourceforge.net/projects/epidoc/files/Example%20Stylesheets/ to the <metadata> blocks of each record.
21
 *
22
 * @author Andrea Mannocci
23
 *
24
 */
25
public class ApplyEditionXsltJobNode extends SimpleJobNode {
26

  
27
	private String inputEprParam;
28
	private String outputEprParam;
29
	private String xsltPath;
30

  
31
	private MappedResultSetFactory mappedResultSetFactory;
32
	private final Map<String, String> xsltParams = new HashMap<String, String>();
33

  
34
	@Override
35
	protected String execute(final NodeToken token) throws Exception {
36
		final String inputEpr = token.getEnv().getAttribute(inputEprParam);
37
		if ((inputEpr == null) || inputEpr.isEmpty()) throw new MSROException("InputEprParam (" + inputEprParam + ") not found in ENV");
38

  
39
		for (String name : token.getFullEnv().getAttributeNames()) {
40
			xsltParams.put(name, token.getFullEnv().getAttribute(name));
41
		}
42
		for (String name : token.getEnv().getAttributeNames()) {
43
			xsltParams.put(name, token.getEnv().getAttribute(name));
44
		}
45

  
46
		xsltParams.putAll(parseJsonParameters(token));
47

  
48
		final W3CEndpointReference epr = mappedResultSetFactory.createMappedResultSet(new EPRUtils().getEpr(inputEpr),
49
				new EditionXsltUnaryFunction(xsltPath, xsltParams));
50

  
51
		token.getEnv().setAttribute(outputEprParam, epr.toString());
52

  
53
		return Arc.DEFAULT_ARC;
54
	}
55

  
56
	public String getInputEprParam() {
57
		return inputEprParam;
58
	}
59

  
60
	public void setInputEprParam(final String inputEprParam) {
61
		this.inputEprParam = inputEprParam;
62
	}
63

  
64
	public String getOutputEprParam() {
65
		return outputEprParam;
66
	}
67

  
68
	public void setOutputEprParam(final String outputEprParam) {
69
		this.outputEprParam = outputEprParam;
70
	}
71

  
72
	public String getXsltPath() {
73
		return xsltPath;
74
	}
75

  
76
	public void setXsltPath(final String xsltPath) {
77
		this.xsltPath = xsltPath;
78
	}
79

  
80
	public MappedResultSetFactory getMappedResultSetFactory() {
81
		return mappedResultSetFactory;
82
	}
83

  
84
	@Required
85
	public void setMappedResultSetFactory(
86
			final MappedResultSetFactory mappedResultSetFactory) {
87
		this.mappedResultSetFactory = mappedResultSetFactory;
88
	}
89

  
90
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/backlink/translations/TranslationBacklinkUnaryFunction.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.backlink.translations;
2

  
3
import java.io.StringReader;
4
import java.util.Iterator;
5
import java.util.List;
6
import java.util.Map;
7

  
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10
import org.dom4j.Document;
11
import org.dom4j.DocumentException;
12
import org.dom4j.Element;
13
import org.dom4j.Node;
14
import org.dom4j.io.SAXReader;
15

  
16
import com.google.common.collect.Lists;
17
import com.google.common.collect.Maps;
18

  
19
import eu.dnetlib.miscutils.functional.UnaryFunction;
20

  
21
public class TranslationBacklinkUnaryFunction implements UnaryFunction<String, String> {
22

  
23
	private static final Log log = LogFactory.getLog(TranslationBacklinkUnaryFunction.class);
24

  
25
	final private SAXReader reader;
26
	final private Map<String, List<TranslationInfo>> eagleIdToTranslationsInfo;
27

  
28
	public TranslationBacklinkUnaryFunction(final Iterator<String> translationIterator) {
29
		reader = new SAXReader();
30
		eagleIdToTranslationsInfo = Maps.newHashMap();
31

  
32
		while (translationIterator.hasNext()) {
33
			String translationRecord = translationIterator.next();
34
			try {
35
				processTranslationRecord(translationRecord, eagleIdToTranslationsInfo);
36
			} catch (DocumentException e) {
37
				log.error("Error during the pre-processing of MediaWiki translations", e);
38
				throw new RuntimeException(e);
39
			}
40
		}
41
	}
42

  
43
	private void processTranslationRecord(final String translationRecord, final Map<String, List<TranslationInfo>> eagleIdToTranslations)
44
			throws DocumentException {
45
		Document doc = reader.read(new StringReader(translationRecord));
46
		List<Node> eagleIdList = doc.selectNodes("//*[local-name()='hasArtifact']/*[local-name()='dnetResourceIdentifier']");
47
		List<TranslationInfo> translationInfoList;
48
		for (Node e : eagleIdList) {
49
			String[] tokens = e.getText().split("::");
50
			String eagleId = tokens[0] + "::" + tokens[1];
51
			// Fetch or create from scratch che List of EagleId
52
			if (eagleIdToTranslations.containsKey(eagleId)) {
53
				translationInfoList = eagleIdToTranslations.get(eagleId);
54
			} else {
55
				translationInfoList = Lists.newArrayList();
56
			}
57
			// Prepare TranslationInfo to append
58
			TranslationInfo info = new TranslationInfo();
59
			info.setDnetResourceIdentifier(doc.valueOf("//*[local-name()='eagleObject']/*[local-name()='dnetResourceIdentifier']"));
60
			info.setProviderAcronym(doc.valueOf("//*[local-name()='eagleObject']/*[local-name()='recordSourceInfo']/@providerAcronym"));
61
			info.setProviderName(doc.valueOf("//*[local-name()='eagleObject']/*[local-name()='recordSourceInfo']/@providerName"));
62
			info.setLandingPage(doc.valueOf("//*[local-name()='eagleObject']/*[local-name()='recordSourceInfo']/@landingPage"));
63
			info.setLocalId(doc.valueOf("//*[local-name()='eagleObject']/*[local-name()='recordSourceInfo']"));
64
			info.setText(doc.valueOf("//*[local-name()='translation']/*[local-name()='text']"));
65
			info.setLang(doc.valueOf("//*[local-name()='translation']/*[local-name()='text']/@lang"));
66
			translationInfoList.add(info);
67
			eagleIdToTranslations.put(eagleId, translationInfoList);
68
		}
69
	}
70

  
71
	@Override
72
	public String evaluate(final String record) {
73
		try {
74
			Document doc = reader.read(new StringReader(record));
75
			String eagleObjId = doc.valueOf("//*[local-name()='eagleObject']/*[local-name()='dnetResourceIdentifier']");
76
			String[] tokens = eagleObjId.split("::");
77
			eagleObjId = tokens[0] + "::" + tokens[1];
78
			if (!eagleIdToTranslationsInfo.containsKey(eagleObjId)) return record;
79

  
80
			Element injectionPoint = (Element) doc.selectSingleNode("//*[local-name()='inscription']");
81
			if (injectionPoint == null) {
82
				injectionPoint = (Element) doc.selectSingleNode("//*[local-name()='transcription']");
83
				if (injectionPoint == null) {
84
					injectionPoint = (Element) doc.selectSingleNode("//*[local-name()='visualRepresentation']");
85
				}
86
			}
87

  
88
			List<TranslationInfo> translationsInfoList = eagleIdToTranslationsInfo.get(eagleObjId);
89
			for (TranslationInfo translationInfo : translationsInfoList) {
90
				String fragment = HasTranslationFragment.generateFragment(translationInfo);
91
				Document fragmentDoc = new SAXReader().read(new StringReader(fragment));
92
				injectionPoint.add(fragmentDoc.getRootElement());
93
			}
94
			return doc.asXML();
95
		} catch (DocumentException e) {
96
			throw new RuntimeException(e);
97
		}
98
	}
99

  
100
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/backlink/translations/TranslationInfo.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.backlink.translations;
2

  
3
public class TranslationInfo {
4

  
5
	private String dnetResourceIdentifier;
6
	private String providerName;
7
	private String providerAcronym;
8
	private String landingPage;
9
	private String localId;
10
	private String text;
11
	private String lang;
12

  
13
	public String getDnetResourceIdentifier() {
14
		return dnetResourceIdentifier;
15
	}
16

  
17
	public void setDnetResourceIdentifier(final String dnetResourceIdentifier) {
18
		this.dnetResourceIdentifier = dnetResourceIdentifier;
19
	}
20

  
21
	public String getProviderName() {
22
		return providerName;
23
	}
24

  
25
	public void setProviderName(final String providerName) {
26
		this.providerName = providerName;
27
	}
28

  
29
	public String getProviderAcronym() {
30
		return providerAcronym;
31
	}
32

  
33
	public void setProviderAcronym(final String providerAcronym) {
34
		this.providerAcronym = providerAcronym;
35
	}
36

  
37
	public String getLandingPage() {
38
		return landingPage;
39
	}
40

  
41
	public void setLandingPage(final String landingPage) {
42
		this.landingPage = landingPage;
43
	}
44

  
45
	public String getLocalId() {
46
		return localId;
47
	}
48

  
49
	public void setLocalId(final String localId) {
50
		this.localId = localId;
51
	}
52

  
53
	public String getText() {
54
		return text;
55
	}
56

  
57
	public void setText(final String text) {
58
		this.text = text;
59
	}
60

  
61
	public String getLang() {
62
		return lang;
63
	}
64

  
65
	public void setLang(final String lang) {
66
		this.lang = lang;
67
	}
68
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/backlink/translations/HasTranslationFragment.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.backlink.translations;
2

  
3
import java.io.IOException;
4

  
5
import org.antlr.stringtemplate.StringTemplate;
6
import org.apache.commons.io.IOUtils;
7
import org.apache.commons.lang.StringEscapeUtils;
8

  
9
public class HasTranslationFragment {
10

  
11
	static public String generateFragment(final TranslationInfo translationInfo) {
12
		String fragmentTemplate;
13
		try {
14
			fragmentTemplate =
15
					IOUtils.toString(HasTranslationFragment.class
16
							.getResourceAsStream("/eu/dnetlib/msro/eagle/workflows/backlink/translations/hasTranslation.xml.st"));
17
			StringTemplate st = new StringTemplate(fragmentTemplate);
18
			st.setAttribute("dnetResourceIdentifier", StringEscapeUtils.escapeXml(translationInfo.getDnetResourceIdentifier()));
19
			st.setAttribute("providerName", StringEscapeUtils.escapeXml(translationInfo.getProviderName()));
20
			st.setAttribute("providerAcronym", StringEscapeUtils.escapeXml(translationInfo.getProviderAcronym()));
21
			st.setAttribute("landingPage", StringEscapeUtils.escapeXml(translationInfo.getLandingPage()));
22
			st.setAttribute("localId", StringEscapeUtils.escapeXml(translationInfo.getLocalId()));
23
			st.setAttribute("text", StringEscapeUtils.escapeXml(translationInfo.getText()));
24
			st.setAttribute("lang", StringEscapeUtils.escapeXml(translationInfo.getLang()));
25
			return st.toString();
26
		} catch (IOException e) {
27
			throw new RuntimeException("HasTranslation template not found!", e);
28
		}
29

  
30
	}
31

  
32
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/msro/eagle/workflows/nodes/backlink/translations/TranslationBacklinkJobNode.java
1
package eu.dnetlib.msro.eagle.workflows.nodes.backlink.translations;
2

  
3
import java.util.Iterator;
4

  
5
import javax.xml.ws.wsaddressing.W3CEndpointReference;
6

  
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9
import org.springframework.beans.factory.annotation.Autowired;
10
import org.springframework.beans.factory.annotation.Required;
11

  
12
import com.googlecode.sarasvati.Arc;
13
import com.googlecode.sarasvati.NodeToken;
14

  
15
import eu.dnetlib.data.mdstore.MDStoreService;
16
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
17
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
18
import eu.dnetlib.enabling.resultset.MappedResultSetFactory;
19
import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
20
import eu.dnetlib.enabling.resultset.client.utils.EPRUtils;
21
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
22

  
23
public class TranslationBacklinkJobNode extends SimpleJobNode {
24

  
25
	private static final Log log = LogFactory.getLog(TranslationBacklinkJobNode.class);
26

  
27
	private String inputEprParam;
28
	private String outputEprParam;
29

  
30
	private MappedResultSetFactory mappedResultSetFactory;
31
	private ResultSetClientFactory resultSetClientFactory;
32

  
33
	@Autowired
34
	private UniqueServiceLocator serviceLocator;
35

  
36
	@Override
37
	protected String execute(final NodeToken token) throws Exception {
38
		final W3CEndpointReference inputEpr = new EPRUtils().getEpr(token.getEnv().getAttribute(inputEprParam));
39

  
40
		// retrieve Translation mdStore
41
		String xQueryTranslation = "for $x in collection('/db/DRIVER/WorkflowDSResources/WorkflowDSResourceType') "
42
				+ "where $x//WORKFLOW_NAME = 'index' and $x//PARAM[./@name = 'providerName']/text() = 'EAGLE MediaWiki' "
43
				+ "return $x//PARAM[./@name='mdId']/text()";
44
		String translationStoreId = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQueryTranslation).get(0);
45
		final W3CEndpointReference translationsEpr = serviceLocator.getService(MDStoreService.class).deliverMDRecords(translationStoreId, "", "", "");
46

  
47
		final Iterator<String> translationIterator = resultSetClientFactory.getClient(translationsEpr).iterator();
48
		final W3CEndpointReference epr = mappedResultSetFactory.createMappedResultSet(inputEpr, new TranslationBacklinkUnaryFunction(translationIterator));
49

  
50
		token.getEnv().setAttribute(outputEprParam, epr.toString());
51

  
52
		return Arc.DEFAULT_ARC;
53
	}
54

  
55
	public String getInputEprParam() {
56
		return inputEprParam;
57
	}
58

  
59
	public void setInputEprParam(final String inputEprParam) {
60
		this.inputEprParam = inputEprParam;
61
	}
62

  
63
	public String getOutputEprParam() {
64
		return outputEprParam;
65
	}
66

  
67
	public void setOutputEprParam(final String outputEprParam) {
68
		this.outputEprParam = outputEprParam;
69
	}
70

  
71
	public MappedResultSetFactory getMappedResultSetFactory() {
72
		return mappedResultSetFactory;
73
	}
74

  
75
	@Required
76
	public void setMappedResultSetFactory(final MappedResultSetFactory mappedResultSetFactory) {
77
		this.mappedResultSetFactory = mappedResultSetFactory;
78
	}
79

  
80
	public ResultSetClientFactory getResultSetClientFactory() {
81
		return resultSetClientFactory;
82
	}
83

  
84
	@Required
85
	public void setResultSetClientFactory(final ResultSetClientFactory resultSetClientFactory) {
86
		this.resultSetClientFactory = resultSetClientFactory;
87
	}
88

  
89
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/data/collector/plugins/http/UbiEratLupaHttpCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.http;
2

  
3
import java.util.Iterator;
4

  
5
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
6
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
7
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
8

  
9
public class UbiEratLupaHttpCollectorPlugin extends AbstractCollectorPlugin {
10

  
11
	@Override
12
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
13
			throws CollectorServiceException {
14
		final String baseUrl = interfaceDescriptor.getBaseUrl();
15

  
16
		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
17

  
18
		return new Iterable<String>() {
19

  
20
			@Override
21
			public Iterator<String> iterator() {
22
				return new UbiEratLupaIterator(baseUrl);
23
			}
24
		};
25
	}
26

  
27
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/data/collector/plugins/http/UbiEratLupaIterator.java
1
package eu.dnetlib.data.collector.plugins.http;
2

  
3
import java.io.IOException;
4
import java.net.MalformedURLException;
5
import java.net.URL;
6
import java.util.Iterator;
7
import java.util.Queue;
8
import java.util.concurrent.PriorityBlockingQueue;
9

  
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.dom4j.Document;
13
import org.dom4j.DocumentException;
14
import org.dom4j.Node;
15
import org.dom4j.io.SAXReader;
16

  
17
public class UbiEratLupaIterator implements Iterator<String> {
18
	private static final Log log = LogFactory.getLog(UbiEratLupaIterator.class);
19
	
20
	private String baseUrl;
21
	private int offset = 0;
22
	private static final int LIMIT = 500;
23
	private boolean onLastPage;
24
	
25
	private SAXReader reader = new SAXReader();
26
	private Queue<String> queue = new PriorityBlockingQueue<String>();
27
	
28
	public UbiEratLupaIterator() { }
29
	
30
	public UbiEratLupaIterator(String baseurl) {
31
		this.baseUrl = baseurl;
32
		this.onLastPage = false;
33
	}
34

  
35
	@Override
36
	public boolean hasNext() {
37
		if (queue.size() == 0) {
38
			if(onLastPage) {
39
				return false;
40
			} else {
41
				refillQueue();
42
				if (queue.size() == 0)
43
					return false;
44
				else
45
					return true;
46
			}
47
		} else {
48
			return true;
49
		}
50
	}
51

  
52
	private void refillQueue() {
53
		String url = baseUrl + "?limit=" + LIMIT + "&offset=" + offset;
54
		log.info("HTTP GET: " + url);
55
		URL request;
56
		try {
57
			request = new URL(url);
58
			Document doc = reader.read(request.openStream());
59
			
60
			for (Object o : doc.selectNodes("//monument")) {
61
				queue.add(((Node) o).asXML());
62
			}
63
			
64
			if (queue.size() < LIMIT) {
65
				onLastPage = true;
66
			}
67
			offset = offset + LIMIT;
68
		} catch (MalformedURLException e) {
69
			throw new RuntimeException("Malformed URL ->  " + url, e);
70
		} catch (DocumentException e) {
71
			throw new RuntimeException("An error occurred while parsing the page fetched from " + url, e);
72
		} catch (IOException e) {
73
			throw new RuntimeException("An error occurred getting page " + url, e);
74
		}
75
		
76
	}
77

  
78
	@Override
79
	public String next() {
80
		return queue.remove();
81
	}
82

  
83
	@Override
84
	public void remove() { }
85

  
86
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/data/collector/plugins/mediawiki/MediawikiCollectorPlugin.java
1
package eu.dnetlib.data.collector.plugins.mediawiki;
2

  
3
import java.util.Iterator;
4

  
5
import eu.dnetlib.data.collector.plugin.AbstractCollectorPlugin;
6
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
7
import eu.dnetlib.data.collector.rmi.InterfaceDescriptor;
8

  
9
public class MediawikiCollectorPlugin extends AbstractCollectorPlugin {
10

  
11
	@Override
12
	public Iterable<String> collect(final InterfaceDescriptor interfaceDescriptor, final String fromDate, final String untilDate)
13
			throws CollectorServiceException {
14
		final String baseUrl = interfaceDescriptor.getBaseUrl();
15
		final String apnamespace = interfaceDescriptor.getParams().get("apnamespace");
16

  
17
		if (baseUrl == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'baseurl' is null or empty"); }
18

  
19
		if (apnamespace == null || baseUrl.isEmpty()) { throw new CollectorServiceException("Param 'apnamespace' is null or empty"); }
20

  
21
		return new Iterable<String>() {
22

  
23
			@Override
24
			public Iterator<String> iterator() {
25
				return new MediawikiIterator(baseUrl, apnamespace);
26
			}
27
		};
28
	}
29

  
30
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/data/collector/plugins/mediawiki/MediawikiIterator.java
1
package eu.dnetlib.data.collector.plugins.mediawiki;
2

  
3
import java.net.URL;
4
import java.util.Iterator;
5
import java.util.Queue;
6
import java.util.concurrent.PriorityBlockingQueue;
7

  
8
import org.apache.commons.logging.Log;
9
import org.apache.commons.logging.LogFactory;
10
import org.dom4j.Document;
11
import org.dom4j.Element;
12
import org.dom4j.Node;
13
import org.dom4j.io.SAXReader;
14

  
15
public class MediawikiIterator implements Iterator<String> {
16
	
17
	private static final Log log = LogFactory.getLog(MediawikiIterator.class);
18
	
19
	private SAXReader reader = new SAXReader();
20
	
21
	private Queue<String> queue = new PriorityBlockingQueue<String>();
22
	
23
	private String baseUrl;
24
	private String apnamespace;
25
	private String apcontinue;
26
	private boolean started;
27

  
28
	//	These are the call templates used by this class
29
	//	http://www.eagle-network.eu/wiki/api.php?action=query&list=allpages&apnamespace=120&aplimit=10&format=xml
30
	//	http://www.eagle-network.eu/wiki/api.php?action=wbgetentities&ids=Q100|Q1000|Q1001|Q1002|Q1003|Q1004|Q1005|Q1006|Q1007|Q1008
31
	
32
	public MediawikiIterator() { }
33
	
34
	public MediawikiIterator(final String baseUrl, final String apnamespace) {
35
		this.baseUrl = baseUrl;
36
		this.apnamespace = apnamespace;
37
		this.started = false;
38
	}
39
	
40
	private void verifyStarted() {
41
		if (!this.started) {
42
			this.apcontinue = firstPage();
43
			this.started = true;
44
		}
45
	}
46
	
47
	@Override
48
	public boolean hasNext() {
49
		synchronized (queue) {
50
			verifyStarted();
51
			return !queue.isEmpty();
52
		}
53
	}
54

  
55
	@Override
56
	public String next() {
57
		synchronized (queue) {
58
			verifyStarted();
59
			final String res = queue.poll();
60
			while (queue.isEmpty() && (apcontinue != null) && !apcontinue.isEmpty()) {
61
				apcontinue = otherPages(apcontinue);
62
			}
63
			return res;
64
		}
65
	}
66
	
67
	@Override
68
	public void remove() { }
69
	
70
	private String firstPage() {
71
		String url = baseUrl + "?action=query&list=allpages&apnamespace=" + apnamespace + "&aplimit=50&format=xml";
72
		log.info("Downloading first page using url: " + url);
73
		
74
		return downloadPage(url);
75
	}
76
	
77
	private String otherPages(String apcontinue) {
78
		return downloadPage(baseUrl + "?action=query&list=allpages&apnamespace=" + apnamespace + "&aplimit=50&format=xml&apcontinue=" + apcontinue);
79
	}
80

  
81
	private String downloadPage(String url) {
82
		try {
83
			log.info("HTTP GET: " + url);
84
			URL request = new URL(url);
85
			Document doc = reader.read(request.openStream());
86
			
87
			String nextApContinue = doc.valueOf("//*[local-name()='query-continue']//*[local-name()='allpages']/@apcontinue");
88

  
89
			String pagesUrl = "?action=wbgetentities&format=xml&ids=";
90
			for (Object o : doc.selectNodes("//*[local-name()='p']")) {
91
				String[] titleTokens = ((Element) o).valueOf("@title").split(":");
92
				pagesUrl += titleTokens[1] + "|";
93
			}
94
			pagesUrl = pagesUrl.substring(0, pagesUrl.length()-1); //remove last pipe char
95
			
96
			URL pages = new URL(baseUrl + pagesUrl);
97
			doc = reader.read(pages.openStream());
98
			
99
			// extract single mediawiki entities and enqueue each one separately
100
			for (Object o : doc.selectNodes("//*[local-name()='entity']")) {
101
				queue.add(((Node) o).asXML());
102
			}
103
			
104
			System.out.println("Done");
105
			return nextApContinue;
106
		} catch (Exception e) {
107
			throw new RuntimeException("Error processing data from: " + url, e);
108
		}
109
	}
110
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/data/download/plugins/EagleDownloadPlugin.java
1
package eu.dnetlib.data.download.plugins;
2

  
3
import java.util.ArrayList;
4
import java.util.List;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.gson.Gson;
9

  
10
import eu.dnetlib.data.download.rmi.DownloadItem;
11
import eu.dnetlib.data.download.rmi.DownloadPlugin;
12

  
13
public class EagleDownloadPlugin implements DownloadPlugin {
14

  
15
	@Override
16
	public DownloadItem retrieveUrl(final DownloadItem item) {
17
		String url = item.getUrl();
18

  
19
		if ((url == null) || (url.trim().length() == 0)) return null;
20
		@SuppressWarnings("unchecked")
21
		List<String> urls = new Gson().fromJson(url, ArrayList.class);
22
		if ((urls == null) || (urls.size() == 0)) {
23
			item.setOriginalUrl(null);
24
			item.setUrl(null);
25
			item.setFileName(item.getIdItemMetadata());
26
			return item;
27
		}
28
		item.setOriginalUrl(urls.get(0));
29
		item.setUrl(urls.get(0));
30
		item.setFileName(item.getIdItemMetadata());
31
		return item;
32
	}
33

  
34
	@Override
35
	public Iterable<DownloadItem> retireveUrls(final Iterable<DownloadItem> items) {
36

  
37
		return Iterables.transform(items, new Function<DownloadItem, DownloadItem>() {
38

  
39
			@Override
40
			public DownloadItem apply(final DownloadItem input) {
41
				return retrieveUrl(input);
42
			}
43
		});
44
	}
45

  
46
	@Override
47
	public String getPluginName() {
48
		return "EagleDownloadPlugin";
49
	}
50

  
51
	@Override
52
	public void setBasePath(final String arg0) {
53
		// TODO Auto-generated method stub
54
	}
55

  
56
	/**
57
	 * {@inheritDoc}
58
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#getRegularExpression()
59
	 */
60
	@Override
61
	public List<String> getRegularExpression() {
62
		// TODO Auto-generated method stub
63
		return null;
64
	}
65

  
66
	/**
67
	 * {@inheritDoc}
68
	 * @see eu.dnetlib.data.download.rmi.DownloadPlugin#setRegularExpression(java.util.List)
69
	 */
70
	@Override
71
	public void setRegularExpression(final List<String> regularExpression) {
72
		// TODO Auto-generated method stub
73

  
74
	}
75

  
76
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/data/provision/epidoc/MdIdResolver.java
1
package eu.dnetlib.data.provision.epidoc;
2

  
3
import org.apache.commons.logging.Log;
4
import org.apache.commons.logging.LogFactory;
5
import org.springframework.beans.factory.annotation.Autowired;
6
import org.springframework.cache.annotation.Cacheable;
7

  
8
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
9
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
10
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
11

  
12
public class MdIdResolver {
13

  
14
	private static final Log log = LogFactory.getLog(MdIdResolver.class);
15

  
16
	@Autowired
17
	private UniqueServiceLocator serviceLocator;
18

  
19
	@Cacheable(value = "mdIds", key = "#acronym")
20
	public String resolveMdId(final String acronym) throws ISLookUpException {
21
		log.info("Resolving mdID for " + acronym + ". Cache not used.");
22
		String xQuery =
23
				String.format(
24
						"let $apiId:=/RESOURCE_PROFILE[.//DATASOURCE_ORIGINAL_ID='%s']//INTERFACE[@compliance='eagleMetadata']/@id/string() "
25
								+ "return /RESOURCE_PROFILE[.//WORKFLOW_NAME='collect' and .//PARAM[@name='api']=$apiId]//NODE[@name='STORE']//PARAM[@name='mdId']/string()",
26
								acronym);
27

  
28
		String mdId = serviceLocator.getService(ISLookUpService.class).quickSearchProfile(xQuery).get(0);
29
		return mdId;
30
	}
31

  
32
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/java/eu/dnetlib/data/provision/epidoc/EpidocRecordProvider.java
1
package eu.dnetlib.data.provision.epidoc;
2

  
3
import java.io.IOException;
4
import java.io.StringReader;
5

  
6
import javax.servlet.http.HttpServletRequest;
7
import javax.servlet.http.HttpServletResponse;
8

  
9
import org.apache.commons.logging.Log;
10
import org.apache.commons.logging.LogFactory;
11
import org.dom4j.Document;
12
import org.dom4j.DocumentException;
13
import org.dom4j.Node;
14
import org.dom4j.io.SAXReader;
15
import org.springframework.beans.factory.annotation.Autowired;
16
import org.springframework.stereotype.Controller;
17
import org.springframework.web.bind.annotation.PathVariable;
18
import org.springframework.web.bind.annotation.RequestMapping;
19

  
20
import eu.dnetlib.data.mdstore.DocumentNotFoundException;
21
import eu.dnetlib.data.mdstore.MDStoreService;
22
import eu.dnetlib.data.mdstore.MDStoreServiceException;
23
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
24
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
25

  
26
@Controller
27
public class EpidocRecordProvider {
28

  
29
	private static final Log log = LogFactory.getLog(EpidocRecordProvider.class);
30

  
31
	@Autowired
32
	private UniqueServiceLocator serviceLocator;
33

  
34
	@Autowired
35
	private MdIdResolver resolver;
36

  
37
	@RequestMapping("/epidoc/{objIdentfier}")
38
	public void provideEpidoc(final HttpServletRequest request, final HttpServletResponse response, @PathVariable final String objIdentfier)
39
			throws IOException, DocumentException, ISLookUpException, MDStoreServiceException {
40
		log.info("Retrieve original Epidoc record id = " + objIdentfier);
41

  
42
		String repoAcronym = objIdentfier.split("::")[0];
43
		String record;
44
		try {
45
			record = serviceLocator.getService(MDStoreService.class).deliverRecord(resolver.resolveMdId(repoAcronym), objIdentfier);
46
		} catch (DocumentNotFoundException e) {
47
			response.getWriter().write("The record identifier is invalid or not present");
48
			return;
49
		} catch (IndexOutOfBoundsException e) {
50
			response.getWriter().write("The record identifier is invalid");
51
			return;
52
		}
53

  
54
		Document doc = new SAXReader().read(new StringReader(record));
55
		Node teiNode = doc.selectSingleNode("//*[local-name()='TEI']");
56
		if (teiNode != null) {
57
			response.setHeader("Content-Type", "text/xml; charset=UTF-8");
58
			response.getWriter().write(doc.selectSingleNode("//*[local-name()='TEI']").asXML());
59
		} else {
60
			response.getWriter().write("EpiDoc representation not available for this record");
61
		}
62
	}
63

  
64
}
modules/dnet-eagle-workflows/tags/dnet-eagle-workflows-3.0.3/src/main/resources/eu/dnetlib/test/profiles/eagle/workflows/repo-hi-epidoc.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
			value="dd5882b3-1a92-4e59-929c-f36b73854e57_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl" />
6
		<RESOURCE_TYPE value="WorkflowDSResourceType" />
7
		<RESOURCE_KIND value="WorkflowDSResources" />
8
		<RESOURCE_URI value="value3" />
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z" />
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>EpiDoc collection and transformation</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>REPO_HI</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>20</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true">
17
				<DESCRIPTION>Verify if DS is pending</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM name="expectedInterfaceTypologyPrefixes" managedBy="system" required="false" type="string">epidoc</PARAM>
20
					<PARAM name="expectedCompliancePrefixes" managedBy="system" required="false" type="string">eagleMetadata</PARAM>
21
				</PARAMETERS>
22
				<ARCS>
23
					<ARC to="createMetaWf"/>
24
					<ARC to="validateDs" name="validateDs"/>
25
				</ARCS>
26
			</NODE>
27
		
28
			<NODE name="validateDs" type="ValidateDatasource">
29
				<DESCRIPTION>Validate DS</DESCRIPTION>
30
				<PARAMETERS/>
31
				<ARCS>
32
					<ARC to="createMetaWf"/>
33
				</ARCS>
34
			</NODE>
35
			
36
			<NODE name="createMetaWf" type="RegisterMetaWf">
37
				<DESCRIPTION>Create MetaWorkflow</DESCRIPTION>
38
				<PARAMETERS>
39
					<PARAM name="wfName" managedBy="system" required="true" type="string">EpiDoc collection and transformation</PARAM>
40
				</PARAMETERS>
41
				<ARCS>
42
					<ARC to="createNative"/>
43
					<ARC to="createTransformed"/>
44
					<ARC to="createCleaned"/>
45
					<ARC to="prepareCreateIndex"/>
46
				</ARCS>
47
			</NODE>
48
			
49
			<NODE name="createNative" type="CreateMDStore">
50
				<DESCRIPTION>Create native store</DESCRIPTION>
51
				<PARAMETERS>
52
					<PARAM name="format" managedBy="system" required="true" type="string">GMF</PARAM>
53
					<PARAM name="interpretation" managedBy="system" required="true" type="string">native</PARAM>
54
					<PARAM name="layout" managedBy="system" required="true" type="string">store</PARAM>
55
					<PARAM name="outputPrefix" managedBy="system" required="true" type="string">coll_</PARAM>
56
				</PARAMETERS>
57
				<ARCS>
58
					<ARC to="updateMetaWf" />
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff