Project

General

Profile

« Previous | Next » 

Revision 34593

added indexing field

View differences:

modules/cnr-mongo-mdstore/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/mongodb/MongoMDStore.java
21 21
import com.mongodb.QueryBuilder;
22 22

  
23 23
import eu.dnetlib.data.mdstore.DocumentNotFoundException;
24
import eu.dnetlib.data.mdstore.modular.MDFormatDescription;
24 25
import eu.dnetlib.data.mdstore.modular.RecordParser;
25 26
import eu.dnetlib.data.mdstore.modular.connector.MDStore;
26 27
import eu.dnetlib.data.mdstore.modular.mongodb.utils.MDStoreUtils;
......
264 265
		this.discardedCollection = discardedCollection;
265 266
	}
266 267

  
268
	@Override
269
	public void feed(final Iterable<String> records, final boolean incremental, final List<MDFormatDescription> mdformats) {
270
		// TODO Auto-generated method stub
271

  
272
	}
273

  
267 274
}
modules/cnr-modular-mdstore-service/branches/indexFields/src/test/java/eu/dnetlib/enabling/tools/blackboard/MDStoreUtilTest.java
1
package eu.dnetlib.enabling.tools.blackboard;
2

  
3
import java.io.ByteArrayInputStream;
4

  
5
import javax.xml.parsers.DocumentBuilder;
6
import javax.xml.parsers.DocumentBuilderFactory;
7
import javax.xml.xpath.XPath;
8
import javax.xml.xpath.XPathConstants;
9
import javax.xml.xpath.XPathExpression;
10
import javax.xml.xpath.XPathFactory;
11

  
12
import org.junit.Test;
13
import org.w3c.dom.Document;
14
import org.w3c.dom.NamedNodeMap;
15
import org.w3c.dom.Node;
16
import org.w3c.dom.NodeList;
17

  
18
public class MDStoreUtilTest {
19

  
20
	@Test
21
	public void test() throws Exception {
22

  
23
		String result = "<FIELDS> <FIELD indexable=\"false\" name=\"doi\" result=\"false\" stat=\"true\" xpath=\"//*[local-name()='resource']/*[local-name()='identifier' and ./@identifierType='DOI']\"/> </FIELDS>";
24
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
25
		DocumentBuilder builder;
26
		builder = factory.newDocumentBuilder();
27
		Document doc = builder.parse(new ByteArrayInputStream(result.getBytes()));
28
		XPathFactory xPathFactory = XPathFactory.newInstance();
29
		XPath myXpath = xPathFactory.newXPath();
30
		XPathExpression expression = myXpath.compile("//FIELD");
31
		Object values = expression.evaluate(doc, XPathConstants.NODESET);
32
		NodeList v = (NodeList) values;
33
		for (int i = 0; i < v.getLength(); i++) {
34
			Node currentItem = v.item(i);
35
			NamedNodeMap attributes = currentItem.getAttributes();
36
			String name = null;
37
			String xpath = null;
38
			if (attributes.getNamedItem("name") != null) {
39
				name = attributes.getNamedItem("name").getNodeValue();
40
			}
41
			if (attributes.getNamedItem("xpath") != null) {
42
				xpath = attributes.getNamedItem("xpath").getNodeValue();
43
			}
44
			if (attributes.getNamedItem("value") != null) {
45
				xpath = attributes.getNamedItem("value").getNodeValue();
46
			}
47
		}
48

  
49
	}
50
}
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/FeedAction.java
1 1
package eu.dnetlib.data.mdstore.modular;
2 2

  
3
import java.util.List;
4

  
3 5
import org.apache.commons.logging.Log;
4 6
import org.apache.commons.logging.LogFactory;
7
import org.springframework.beans.factory.annotation.Autowired;
5 8
import org.springframework.beans.factory.annotation.Required;
6 9

  
7 10
import eu.dnetlib.data.mdstore.MDStoreServiceException;
......
14 17

  
15 18
	private MDStoreFeeder feeder;
16 19

  
20
	@Autowired
21
	private MDStoreUtils mdstoreUtils;
22

  
17 23
	@Override
18 24
	public void executeAsync(final BlackboardServerHandler handler, final BlackboardJob job) throws MDStoreServiceException {
19 25

  
......
28 34
			storingType = "REFRESH";
29 35
		}
30 36

  
31
		feeder.feed(mdId, epr, storingType, true, new FeedDoneCallback() {
37
		final String layoutIndex = job.getParameters().get("layoutIndex");
38
		String format = feeder.getDao().getMDStore(mdId).getFormat();
32 39

  
40
		List<MDFormatDescription> mdformats = mdstoreUtils.getField(format, layoutIndex);
41
		if (mdformats != null) {
42
			for (MDFormatDescription desc : mdformats) {
43
				log.info("name: " + desc.getName());
44
				log.info("xpath: " + desc.getXpath());
45
			}
46
		}
47

  
48
		feeder.feed(mdId, epr, storingType, true, mdformats, new FeedDoneCallback() {
49

  
33 50
			@Override
34 51
			public void call(final int size) {
35 52
				job.getParameters().put("total", "" + size);
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/FieldsExtractor.java
1
package eu.dnetlib.data.mdstore.modular;
2

  
3
import java.io.ByteArrayInputStream;
4
import java.util.List;
5
import java.util.Map;
6

  
7
import javax.xml.parsers.DocumentBuilder;
8
import javax.xml.parsers.DocumentBuilderFactory;
9
import javax.xml.parsers.ParserConfigurationException;
10
import javax.xml.xpath.XPath;
11
import javax.xml.xpath.XPathConstants;
12
import javax.xml.xpath.XPathExpression;
13
import javax.xml.xpath.XPathFactory;
14

  
15
import org.w3c.dom.Document;
16
import org.w3c.dom.Node;
17
import org.w3c.dom.NodeList;
18

  
19
import com.google.common.collect.Lists;
20
import com.google.common.collect.Maps;
21

  
22
public class FieldsExtractor {
23

  
24
	private final DocumentBuilderFactory factory;
25
	private DocumentBuilder builder;
26
	private XPath xpath;
27

  
28
	public FieldsExtractor() throws ParserConfigurationException {
29
		factory = DocumentBuilderFactory.newInstance();
30
		this.builder = factory.newDocumentBuilder();
31
		XPathFactory xPathFactory = XPathFactory.newInstance();
32
		this.xpath = xPathFactory.newXPath();
33
	}
34

  
35
	public Map<String, List<String>> getData(final String xml, final List<MDFormatDescription> mdref) {
36

  
37
		try {
38
			Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
39

  
40
			Map<String, List<String>> result = Maps.newHashMap();
41
			for (MDFormatDescription info : mdref) {
42
				XPathExpression expression = xpath.compile(info.getXpath());
43
				NodeList values = (NodeList) expression.evaluate(doc, XPathConstants.NODESET);
44
				for (int i = 0; i < values.getLength(); i++) {
45
					Node currentItem = values.item(i);
46
					NodeList fields = currentItem.getChildNodes();
47
					if (fields != null) {
48
						List<String> currentValues = Lists.newArrayList();
49
						for (int k = 0; k < fields.getLength(); k++) {
50
							Node field = fields.item(k);
51
							while (field.hasChildNodes()) {
52
								field = field.getFirstChild();
53
							}
54
							currentValues.add(field.getNodeValue());
55

  
56
						}
57
						result.put(info.getName(), currentValues);
58
					}
59
				}
60
			}
61
			return result;
62
		} catch (Exception e) {
63
			return Maps.newHashMap();
64
		}
65
	}
66

  
67
}
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/StreamingRecordParser.java
11 11
import javax.xml.stream.XMLStreamReader;
12 12

  
13 13
/**
14
 * This method outperforms SimpleRecordParser by a vast amount, especially since we are just getting stuff in the
15
 * header.
16
 * 
14
 * This method outperforms SimpleRecordParser by a vast amount, especially since we are just getting stuff in the header.
15
 *
17 16
 * @author marko
18
 * 
17
 *
19 18
 */
20 19
public class StreamingRecordParser implements RecordParser {
21 20

  
22 21
	@Override
23
	public Map<String, String> parseRecord(String record) {
22
	public Map<String, String> parseRecord(final String record) {
24 23

  
25 24
		try {
26 25
			XMLInputFactory factory = XMLInputFactory.newInstance();
......
33 32

  
34 33
			while (parser.hasNext()) {
35 34
				int event = parser.next();
36

  
37 35
				if (event == XMLStreamConstants.END_ELEMENT) {
38 36
					elementStack.pop();
39 37
				} else if (event == XMLStreamConstants.START_ELEMENT) {
......
48 46
					} else if ("identifier".equals(localName) && "efgEntity".equals(grandParent(elementStack))) {
49 47
						if (!res.containsKey("originalId")) {
50 48
							parser.next();
51
//							log.info("ZZZZZZ OK: found identifier at right depth " + elementStack);
49
							// log.info("ZZZZZZ OK: found identifier at right depth " + elementStack);
52 50
							res.put("originalId", parser.getText().trim());
53 51
						}
54 52
					}
55 53

  
56 54
					else if ("identifier".equals(localName)) {
57 55

  
58
//						log.info("ZZZZZZ: found identifier not at right depth " + elementStack + " grand parent " + grandParent(elementStack));
56
						// log.info("ZZZZZZ: found identifier not at right depth " + elementStack + " grand parent " +
57
						// grandParent(elementStack));
59 58
					}
60 59

  
61
					if (res.containsKey("id") && res.containsKey("originalId"))
62
						return res;
60
					if (res.containsKey("id") && res.containsKey("originalId")) { return res; }
63 61
				}
64 62
			}
65 63
			return res;
......
69 67

  
70 68
	}
71 69

  
72
	private String grandParent(Stack<String> elementStack) {
73
		if (elementStack.size() <= 3)
74
			return "";
70
	private String grandParent(final Stack<String> elementStack) {
71
		if (elementStack.size() <= 3) { return ""; }
75 72
		return elementStack.get(elementStack.size() - 3);
76 73
	}
77 74

  
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/connector/MDStore.java
1 1
package eu.dnetlib.data.mdstore.modular.connector;
2 2

  
3
import java.util.List;
4

  
3 5
import eu.dnetlib.data.mdstore.DocumentNotFoundException;
6
import eu.dnetlib.data.mdstore.modular.MDFormatDescription;
4 7
import eu.dnetlib.enabling.resultset.ResultSetListener;
5 8

  
6 9
public interface MDStore {
......
17 20

  
18 21
	void feed(Iterable<String> records, boolean incremental);
19 22

  
23
	void feed(Iterable<String> records, boolean incremental, List<MDFormatDescription> mdformats);
24

  
20 25
	ResultSetListener deliver(String from, String until, String recordFilter);
21 26

  
22 27
	ResultSetListener deliverIds(String from, String until, String recordFilter);
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/MDFormatDescription.java
1
package eu.dnetlib.data.mdstore.modular;
2

  
3
// TODO: Auto-generated Javadoc
4
/**
5
 * The Class MDFormatDescription.
6
 */
7
public class MDFormatDescription {
8

  
9
	/** The name. */
10
	private String name;
11

  
12
	/** The xpath. */
13
	private String xpath;
14

  
15
	/**
16
	 * Gets the name.
17
	 *
18
	 * @return the name
19
	 */
20
	public String getName() {
21
		return name;
22
	}
23

  
24
	/**
25
	 * Sets the name.
26
	 *
27
	 * @param name
28
	 *            the name to set
29
	 */
30
	public void setName(final String name) {
31
		this.name = name;
32
	}
33

  
34
	/**
35
	 * Gets the xpath.
36
	 *
37
	 * @return the xpath
38
	 */
39
	public String getXpath() {
40
		return xpath;
41
	}
42

  
43
	/**
44
	 * Sets the xpath.
45
	 *
46
	 * @param xpath
47
	 *            the xpath to set
48
	 */
49
	public void setXpath(final String xpath) {
50
		this.xpath = xpath;
51
	}
52

  
53
}
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/MDStoreFeeder.java
1 1
package eu.dnetlib.data.mdstore.modular;
2 2

  
3
import java.util.List;
4

  
3 5
import org.apache.commons.logging.Log;
4 6
import org.apache.commons.logging.LogFactory;
5 7
import org.springframework.beans.factory.annotation.Required;
......
28 30
			final String rsEpr,
29 31
			final String storingType,
30 32
			final boolean sync,
33
			final List<MDFormatDescription> mdformats,
31 34
			final FeedDoneCallback doneCallback,
32 35
			final FeedFailedCallback failCallback) throws MDStoreServiceException {
33 36
		log.info("Start feeding mdstore " + mdId + " with epr " + rsEpr);
......
43 46
				mdstore.truncate();
44 47
			}
45 48

  
46
			mdstore.feed(records, refresh);
49
			if (mdformats == null) {
50
				mdstore.feed(records, refresh);
51
			} else {
52
				mdstore.feed(records, refresh, mdformats);
53
			}
47 54

  
48 55
			dao.commit(mdstore.getId(), mdId);
49 56

  
......
69 76
		try {
70 77
			final String now = DateUtils.now_ISO8601();
71 78

  
72
			final String mdstoreXUpdate =
73
					"for $x in //RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '" + mdId + "']" + "return update value $x//LAST_STORAGE_DATE with '" + now
74
					+ "'";
79
			final String mdstoreXUpdate = "for $x in //RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '" + mdId + "']"
80
					+ "return update value $x//LAST_STORAGE_DATE with '" + now + "'";
75 81

  
76 82
			serviceLocator.getService(ISRegistryService.class).executeXUpdate(mdstoreXUpdate);
77 83

  
......
83 89

  
84 90
	public void touchSize(final String mdId, final int size) {
85 91
		try {
86
			final String mdstoreNumberXUpdate =
87
					"for $x in //RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '" + mdId + "']" + "return update value $x//NUMBER_OF_RECORDS with '" + size
88
					+ "'";
92
			final String mdstoreNumberXUpdate = "for $x in //RESOURCE_PROFILE[.//RESOURCE_IDENTIFIER/@value = '" + mdId + "']"
93
					+ "return update value $x//NUMBER_OF_RECORDS with '" + size + "'";
89 94

  
90 95
			serviceLocator.getService(ISRegistryService.class).executeXUpdate(mdstoreNumberXUpdate);
91 96
		} catch (final Exception e) {
......
122 127
	public UniqueServiceLocator getServiceLocator() {
123 128
		return serviceLocator;
124 129
	}
125
	
130

  
126 131
	@Required
127
	public void setServiceLocator(UniqueServiceLocator serviceLocator) {
132
	public void setServiceLocator(final UniqueServiceLocator serviceLocator) {
128 133
		this.serviceLocator = serviceLocator;
129 134
	}
130 135

  
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/java/eu/dnetlib/data/mdstore/modular/MDStoreUtils.java
1
package eu.dnetlib.data.mdstore.modular;
2

  
3
import java.io.ByteArrayInputStream;
4
import java.util.List;
5

  
6
import javax.xml.parsers.DocumentBuilder;
7
import javax.xml.parsers.DocumentBuilderFactory;
8
import javax.xml.xpath.XPath;
9
import javax.xml.xpath.XPathConstants;
10
import javax.xml.xpath.XPathExpression;
11
import javax.xml.xpath.XPathFactory;
12

  
13
import org.springframework.beans.factory.annotation.Autowired;
14
import org.w3c.dom.Document;
15
import org.w3c.dom.NamedNodeMap;
16
import org.w3c.dom.Node;
17
import org.w3c.dom.NodeList;
18

  
19
import com.google.common.collect.Lists;
20

  
21
import eu.dnetlib.data.mdstore.MDStoreServiceException;
22
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpDocumentNotFoundException;
23
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
24
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
25
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
26

  
27
public class MDStoreUtils {
28

  
29
	/**
30
	 * service locator.
31
	 */
32
	@Autowired
33
	private UniqueServiceLocator serviceLocator;
34

  
35
	public List<MDFormatDescription> getField(final String format, final String layout) throws MDStoreServiceException {
36

  
37
		String xquery = "for $x in collection('')/RESOURCE_PROFILE/BODY[CONFIGURATION/NAME='" + format + "'] return $x/STATUS/LAYOUTS/LAYOUT[@name='" + layout
38
				+ "']/FIELDS";
39

  
40
		try {
41
			String result = serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(xquery);
42
			final List<MDFormatDescription> mdformat = Lists.newArrayList();
43
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
44
			DocumentBuilder builder;
45
			builder = factory.newDocumentBuilder();
46
			Document doc = builder.parse(new ByteArrayInputStream(result.getBytes()));
47
			XPathFactory xPathFactory = XPathFactory.newInstance();
48
			XPath myXpath = xPathFactory.newXPath();
49
			XPathExpression expression = myXpath.compile("//FIELD");
50
			Object values = expression.evaluate(doc, XPathConstants.NODESET);
51
			NodeList v = (NodeList) values;
52
			for (int i = 0; i < v.getLength(); i++) {
53
				Node currentItem = v.item(i);
54
				NamedNodeMap attributes = currentItem.getAttributes();
55
				String name = null;
56
				String xpath = null;
57
				if (attributes.getNamedItem("name") != null) {
58
					name = attributes.getNamedItem("name").getNodeValue();
59
				}
60
				if (attributes.getNamedItem("xpath") != null) {
61
					xpath = attributes.getNamedItem("xpath").getNodeValue();
62
				}
63
				if (attributes.getNamedItem("value") != null) {
64
					xpath = attributes.getNamedItem("value").getNodeValue();
65
				}
66

  
67
				MDFormatDescription currentMdFormat = new MDFormatDescription();
68
				currentMdFormat.setName(name);
69
				currentMdFormat.setXpath(xpath);
70
				mdformat.add(currentMdFormat);
71
			}
72
			return mdformat;
73

  
74
		} catch (ISLookUpDocumentNotFoundException e1) {
75
			return null;
76
		} catch (ISLookUpException e1) {
77
			return null;
78
		} catch (Exception e) {
79
			throw new MDStoreServiceException("Error on retrieving field from mdformat", e);
80
		}
81

  
82
	}
83
}
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/resources/eu/dnetlib/data/mdstore/modular/applicationContext-modular-mdstore.properties
4 4
services.mdstore.recordParser=eu.dnetlib.data.mdstore.modular.StreamingRecordParser
5 5
services.mdstore.rsfactory.pagesize=20
6 6
services.mdstore.discardrecords=true
7
services.mdstore.transaction=mongoMdStoreTransaction
7
services.mdstore.transaction=mongoMdStoreTransaction
modules/cnr-modular-mdstore-service/branches/indexFields/src/main/resources/eu/dnetlib/data/mdstore/modular/applicationContext-modular-mdstore.xml
16 16
		init-method="start" destroy-method="stop" p:notificationHandler-ref="mdstoreNotificationHandler"
17 17
		p:iterableResultSetFactory-ref="iterableResultSetFactory"
18 18
		p:feeder-ref="mdstoreFeeder" p:retriever-ref="mdstoreRetriever" />
19
		
20
	<bean id="mdStoreUtils" class="eu.dnetlib.data.mdstore.modular.MDStoreUtils"/>
19 21

  
20 22
	<bean id="mdstoreNotificationHandler"
21 23
		class="eu.dnetlib.enabling.tools.blackboard.BlackboardServerExecutorNotificationHandler"
......
70 72
		/> -->
71 73
	<bean id="mdstoreRecordParser" 
72 74
		factory-bean="recordParserFactory" factory-method="newInstance"/>
75
		
76
	<bean id="fieldExtractor" class="eu.dnetlib.data.mdstore.modular.FieldsExtractor"/>
73 77
	
74 78
	<bean id="recordParserFactory" class="eu.dnetlib.data.mdstore.modular.RecordParserFactory"
75 79
		p:parserType="${services.mdstore.recordParser}" />

Also available in: Unified diff