Project

General

Profile

1
package eu.dnetlib.oai.parser;
2

    
3
import java.io.StringReader;
4
import java.util.List;
5
import java.util.Map.Entry;
6

    
7
import com.google.common.collect.ArrayListMultimap;
8
import com.google.common.collect.Iterables;
9
import com.google.common.collect.Multimap;
10
import eu.dnetlib.oai.PublisherField;
11
import eu.dnetlib.oai.conf.OAIConfigurationExistReader;
12
import org.apache.commons.lang3.StringUtils;
13
import org.apache.commons.logging.Log;
14
import org.apache.commons.logging.LogFactory;
15
import org.dom4j.Document;
16
import org.dom4j.DocumentException;
17
import org.dom4j.Node;
18
import org.dom4j.io.SAXReader;
19

    
20
/**
21
 * An instance of this class can parse an XML record and extract the information needed to store the record in a publisher store.
22
 *
23
 * @author alessia
24
 */
25
public class PublisherRecordParser {
26

    
27
	private static final Log log = LogFactory.getLog(PublisherRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM
28

    
29
	/**
30
	 * List of the indices of the target store.
31
	 */
32
	private List<PublisherField> storeIndices;
33

    
34
	private final SAXReader saxReader = new SAXReader();
35

    
36
	/**
37
	 * Parses the record and returns a map where a key is the name of an index, the value is the value in the record at the xpath specificed
38
	 * in this.storeIndices.
39
	 *
40
	 * @param record
41
	 *            the XML string to parse.
42
	 * @param source
43
	 *             String identifying the source of the record. Can be null.
44
	 * @return a Multimap describing the values to be indexed for this record.
45
	 */
46
	@SuppressWarnings({ "unchecked", "rawtypes" })
47
	public Multimap<String, String> parseRecord(final String record, final String source) {
48
		Multimap<String, String> recordProps = ArrayListMultimap.create();
49
		try {
50
			Document doc = this.saxReader.read(new StringReader(record));
51
			if(StringUtils.isNotBlank(source)) recordProps.put(OAIConfigurationExistReader.SET_FIELD, source);
52
			for (PublisherField field : this.storeIndices) {
53
				for (Entry<String, String> indexEntry : field.getSources().entries()) {
54
					// each xpath can return a list of nodes or strings, depending on the xpath
55
					List xPathResult = doc.selectNodes(indexEntry.getValue());
56
					if ((xPathResult != null) && !xPathResult.isEmpty()) {
57
						if (containsStrings(xPathResult)) {
58
							recordProps.putAll(field.getFieldName(), xPathResult);
59
						} else {
60
							if (containsNodes(xPathResult)) {
61

    
62
								recordProps.putAll(field.getFieldName(), Iterables.transform(xPathResult, obj -> {
63
									if (obj == null) return "";
64
									Node node = (Node) obj;
65
									return node.getText();
66
								}));
67
							}
68
						}
69
					}
70
				}
71
			}
72

    
73
		} catch (DocumentException e) {
74
			log.fatal("Can't parse record");
75
			recordProps = null;
76
		}
77
		return recordProps;
78
	}
79

    
80
	@SuppressWarnings("rawtypes")
81
	private boolean containsStrings(final List objects) {
82
		Object first = objects.get(0);
83
		return first instanceof String;
84
	}
85

    
86
	@SuppressWarnings("rawtypes")
87
	private boolean containsNodes(final List objects) {
88
		Object first = objects.get(0);
89
		return first instanceof Node;
90
	}
91

    
92
	public List<PublisherField> getStoreIndices() {
93
		return storeIndices;
94
	}
95

    
96
	public void setStoreIndices(final List<PublisherField> storeIndices) {
97
		this.storeIndices = storeIndices;
98
	}
99

    
100
	public SAXReader getSaxReader() {
101
		return saxReader;
102
	}
103

    
104
	public PublisherRecordParser(final List<PublisherField> storeIndices) {
105
		this.storeIndices = storeIndices;
106
	}
107

    
108
	public PublisherRecordParser() {
109
		super();
110
		// TODO Auto-generated constructor stub
111
	}
112

    
113

    
114
}
    (1-1/1)