Project

General

Profile

1
package eu.dnetlib.oai.parser;
2

    
3
import java.io.StringReader;
4
import java.util.List;
5
import java.util.Map.Entry;
6

    
7
import com.google.common.collect.ArrayListMultimap;
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Multimap;
10
import eu.dnetlib.oai.PublisherField;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13
import org.dom4j.Document;
14
import org.dom4j.DocumentException;
15
import org.dom4j.Node;
16
import org.dom4j.io.SAXReader;
17

    
18
/**
19
 * An instance of this class can parse an XML record and extract the information needed to store the record in a publisher store.
20
 *
21
 * @author alessia
22
 */
23
public class PublisherRecordParser {
24

    
25
	private static final Log log = LogFactory.getLog(PublisherRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM
26

    
27
	/**
28
	 * List of the indices of the target store.
29
	 */
30
	private List<PublisherField> storeIndices;
31

    
32
	private final SAXReader saxReader = new SAXReader();
33

    
34
	/**
35
	 * Parses the record and returns a map where a key is the name of an index, the value is the value in the record at the xpath specificed
36
	 * in this.storeIndices.
37
	 *
38
	 * @param record
39
	 *            the XML string to parse.
40
	 * @return a Multimap describing the values to be indexed for this record.
41
	 */
42
	@SuppressWarnings({ "unchecked", "rawtypes" })
43
	public Multimap<String, String> parseRecord(final String record) {
44
		Multimap<String, String> recordProps = ArrayListMultimap.create();
45
		try {
46
			Document doc = this.saxReader.read(new StringReader(record));
47
			for (PublisherField field : this.storeIndices) {
48
				for (Entry<String, String> indexEntry : field.getSources().entries()) {
49
					// each xpath can return a list of nodes or strings, depending on the xpath
50
					List xPathResult = doc.selectNodes(indexEntry.getValue());
51
					if ((xPathResult != null) && !xPathResult.isEmpty()) {
52
						if (containsStrings(xPathResult)) {
53
							recordProps.putAll(field.getFieldName(), xPathResult);
54
						} else {
55
							if (containsNodes(xPathResult)) {
56
								recordProps.putAll(field.getFieldName(), Iterables.transform(xPathResult, obj -> {
57
									if (obj == null) return "";
58
									Node node = (Node) obj;
59
									return node.getText();
60
								}));
61
							}
62
						}
63
					}
64
				}
65
			}
66

    
67
		} catch (DocumentException e) {
68
			log.fatal("Can't parse record");
69
			recordProps = null;
70
		}
71
		return recordProps;
72

    
73
	}
74

    
75
	@SuppressWarnings("rawtypes")
76
	private boolean containsStrings(final List objects) {
77
		Object first = objects.get(0);
78
		return first instanceof String;
79
	}
80

    
81
	@SuppressWarnings("rawtypes")
82
	private boolean containsNodes(final List objects) {
83
		Object first = objects.get(0);
84
		return first instanceof Node;
85
	}
86

    
87
	public List<PublisherField> getStoreIndices() {
88
		return storeIndices;
89
	}
90

    
91
	public void setStoreIndices(final List<PublisherField> storeIndices) {
92
		this.storeIndices = storeIndices;
93
	}
94

    
95
	public SAXReader getSaxReader() {
96
		return saxReader;
97
	}
98

    
99
	public PublisherRecordParser(final List<PublisherField> storeIndices) {
100
		super();
101
		this.storeIndices = storeIndices;
102
	}
103

    
104
	public PublisherRecordParser() {
105
		super();
106
		// TODO Auto-generated constructor stub
107
	}
108

    
109

    
110
}
    (1-1/1)