Project

General

Profile

1
package eu.dnetlib.data.oai.store.parser;
2

    
3
import java.io.StringReader;
4
import java.util.List;
5
import java.util.Map.Entry;
6

    
7
import com.google.common.base.Function;
8
import com.google.common.collect.ArrayListMultimap;
9
import com.google.common.collect.Iterables;
10
import com.google.common.collect.Multimap;
11
import eu.dnetlib.data.information.oai.publisher.PublisherField;
12
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationExistReader;
13
import org.apache.commons.lang3.StringUtils;
14
import org.apache.commons.logging.Log;
15
import org.apache.commons.logging.LogFactory;
16
import org.dom4j.Document;
17
import org.dom4j.DocumentException;
18
import org.dom4j.Node;
19
import org.dom4j.io.SAXReader;
20

    
21
/**
22
 * An instance of this class can parse an XML record and extract the information needed to store the record in a publisher store.
23
 * 
24
 * 
25
 * @author alessia
26
 * 
27
 */
28
public class PublisherRecordParser {
29

    
30
	private static final Log log = LogFactory.getLog(PublisherRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM
31

    
32
	/**
33
	 * List of the indices of the target store.
34
	 */
35
	private List<PublisherField> storeIndices;
36

    
37
	private final SAXReader saxReader = new SAXReader();
38

    
39
	/**
40
	 * Parses the record and returns a map where a key is the name of an index, the value is the value in the record at the xpath specificed
41
	 * in this.storeIndices.
42
	 * 
43
	 * @param record
44
	 *            the XML string to parse.
45
	 * @return a Multimap describing the values to be indexed for this record.
46
	 */
47
	@SuppressWarnings({ "unchecked", "rawtypes" })
48
	public Multimap<String, String> parseRecord(final String record, final String source) {
49
		Multimap<String, String> recordProps = ArrayListMultimap.create();
50
		try {
51
			Document doc = this.saxReader.read(new StringReader(record));
52
			if(StringUtils.isNotBlank(source)) recordProps.put(OAIConfigurationExistReader.SET_FIELD, source);
53
			for (PublisherField field : this.storeIndices) {
54
				for (Entry<String, String> indexEntry : field.getSources().entries()) {
55
					// each xpath can return a list of nodes or strings, depending on the xpath
56
					List xPathResult = doc.selectNodes(indexEntry.getValue());
57
					if ((xPathResult != null) && !xPathResult.isEmpty()) {
58
						if (containsStrings(xPathResult)) {
59
							recordProps.putAll(field.getFieldName(), xPathResult);
60
						} else {
61
							if (containsNodes(xPathResult)) {
62
								recordProps.putAll(field.getFieldName(), Iterables.transform(xPathResult, new Function<Object, String>() {
63

    
64
									@Override
65
									public String apply(final Object obj) {
66
										if (obj == null) return "";
67
										Node node = (Node) obj;
68
										return node.getText();
69
									}
70
								}));
71
							}
72
						}
73
					}
74
				}
75
			}
76

    
77
		} catch (DocumentException e) {
78
			log.fatal("Can't parse record");
79
			recordProps = null;
80
		}
81
		return recordProps;
82

    
83
	}
84

    
85
	@SuppressWarnings("rawtypes")
86
	private boolean containsStrings(final List objects) {
87
		Object first = objects.get(0);
88
		return first instanceof String;
89
	}
90

    
91
	@SuppressWarnings("rawtypes")
92
	private boolean containsNodes(final List objects) {
93
		Object first = objects.get(0);
94
		return first instanceof Node;
95
	}
96

    
97
	public List<PublisherField> getStoreIndices() {
98
		return storeIndices;
99
	}
100

    
101
	public void setStoreIndices(final List<PublisherField> storeIndices) {
102
		this.storeIndices = storeIndices;
103
	}
104

    
105
	public SAXReader getSaxReader() {
106
		return saxReader;
107
	}
108

    
109
	public PublisherRecordParser(final List<PublisherField> storeIndices) {
110
		super();
111
		this.storeIndices = storeIndices;
112
	}
113

    
114
	public PublisherRecordParser() {
115
		super();
116
		// TODO Auto-generated constructor stub
117
	}
118

    
119
}
    (1-1/1)