Revision 42184
Added by Michele Artini about 8 years ago
PublisherRecordParser.java | ||
---|---|---|
4 | 4 |
import java.util.List; |
5 | 5 |
import java.util.Map.Entry; |
6 | 6 |
|
7 |
import com.google.common.base.Function; |
|
8 |
import com.google.common.collect.ArrayListMultimap; |
|
9 |
import com.google.common.collect.Iterables; |
|
10 |
import com.google.common.collect.Multimap; |
|
11 |
import eu.dnetlib.data.information.oai.publisher.PublisherField; |
|
12 | 7 |
import org.apache.commons.logging.Log; |
13 | 8 |
import org.apache.commons.logging.LogFactory; |
14 | 9 |
import org.dom4j.Document; |
... | ... | |
16 | 11 |
import org.dom4j.Node; |
17 | 12 |
import org.dom4j.io.SAXReader; |
18 | 13 |
|
14 |
import com.google.common.base.Function; |
|
15 |
import com.google.common.collect.ArrayListMultimap; |
|
16 |
import com.google.common.collect.Iterables; |
|
17 |
import com.google.common.collect.Multimap; |
|
18 |
|
|
19 |
import eu.dnetlib.oai.PublisherField; |
|
20 |
|
|
19 | 21 |
/** |
20 | 22 |
* An instance of this class can parse an XML record and extract the information needed to store the record in a publisher store. |
21 | 23 |
* |
... | ... | |
44 | 46 |
* Parses the record and returns a map where a key is the name of an index, the value is the value in the record at the xpath specificed |
45 | 47 |
* in this.storeIndices. |
46 | 48 |
* |
47 |
* @param record the XML string to parse. |
|
49 |
* @param record |
|
50 |
* the XML string to parse. |
|
48 | 51 |
* @return a Multimap describing the values to be indexed for this record. |
49 | 52 |
*/ |
50 | 53 |
@SuppressWarnings({ "unchecked", "rawtypes" }) |
51 | 54 |
public Multimap<String, String> parseRecord(final String record) { |
52 | 55 |
Multimap<String, String> recordProps = ArrayListMultimap.create(); |
53 | 56 |
try { |
54 |
Document doc = this.saxReader.read(new StringReader(record)); |
|
55 |
for (PublisherField field : this.storeIndices) { |
|
56 |
for (Entry<String, String> indexEntry : field.getSources().entries()) { |
|
57 |
final Document doc = this.saxReader.read(new StringReader(record));
|
|
58 |
for (final PublisherField field : this.storeIndices) {
|
|
59 |
for (final Entry<String, String> indexEntry : field.getSources().entries()) {
|
|
57 | 60 |
// each xpath can return a list of nodes or strings, depending on the xpath |
58 |
List xPathResult = doc.selectNodes(indexEntry.getValue()); |
|
61 |
final List xPathResult = doc.selectNodes(indexEntry.getValue());
|
|
59 | 62 |
if ((xPathResult != null) && !xPathResult.isEmpty()) { |
60 | 63 |
if (containsStrings(xPathResult)) { |
61 | 64 |
recordProps.putAll(field.getFieldName(), xPathResult); |
... | ... | |
65 | 68 |
|
66 | 69 |
@Override |
67 | 70 |
public String apply(final Object obj) { |
68 |
if (obj == null) return "";
|
|
69 |
Node node = (Node) obj; |
|
71 |
if (obj == null) { return ""; }
|
|
72 |
final Node node = (Node) obj;
|
|
70 | 73 |
return node.getText(); |
71 | 74 |
} |
72 | 75 |
})); |
... | ... | |
76 | 79 |
} |
77 | 80 |
} |
78 | 81 |
|
79 |
} catch (DocumentException e) { |
|
82 |
} catch (final DocumentException e) {
|
|
80 | 83 |
log.fatal("Can't parse record"); |
81 | 84 |
recordProps = null; |
82 | 85 |
} |
... | ... | |
86 | 89 |
|
87 | 90 |
@SuppressWarnings("rawtypes") |
88 | 91 |
private boolean containsStrings(final List objects) { |
89 |
Object first = objects.get(0); |
|
92 |
final Object first = objects.get(0);
|
|
90 | 93 |
return first instanceof String; |
91 | 94 |
} |
92 | 95 |
|
93 | 96 |
@SuppressWarnings("rawtypes") |
94 | 97 |
private boolean containsNodes(final List objects) { |
95 |
Object first = objects.get(0); |
|
98 |
final Object first = objects.get(0);
|
|
96 | 99 |
return first instanceof Node; |
97 | 100 |
} |
98 | 101 |
|
99 | 102 |
public List<PublisherField> getStoreIndices() { |
100 |
return storeIndices; |
|
103 |
return this.storeIndices;
|
|
101 | 104 |
} |
102 | 105 |
|
103 | 106 |
public void setStoreIndices(final List<PublisherField> storeIndices) { |
... | ... | |
105 | 108 |
} |
106 | 109 |
|
107 | 110 |
public SAXReader getSaxReader() { |
108 |
return saxReader; |
|
111 |
return this.saxReader;
|
|
109 | 112 |
} |
110 | 113 |
|
111 | 114 |
} |
Also available in: Unified diff
oai import