Revision 45455
Added by Alessia Bardi about 7 years ago
PublisherRecordParser.java | ||
---|---|---|
4 | 4 |
import java.util.List; |
5 | 5 |
import java.util.Map.Entry; |
6 | 6 |
|
7 |
import com.google.common.collect.ArrayListMultimap; |
|
8 |
import com.google.common.collect.Iterables; |
|
9 |
import com.google.common.collect.Multimap; |
|
10 |
import eu.dnetlib.oai.PublisherField; |
|
7 | 11 |
import org.apache.commons.logging.Log; |
8 | 12 |
import org.apache.commons.logging.LogFactory; |
9 | 13 |
import org.dom4j.Document; |
... | ... | |
11 | 15 |
import org.dom4j.Node; |
12 | 16 |
import org.dom4j.io.SAXReader; |
13 | 17 |
|
14 |
import com.google.common.base.Function; |
|
15 |
import com.google.common.collect.ArrayListMultimap; |
|
16 |
import com.google.common.collect.Iterables; |
|
17 |
import com.google.common.collect.Multimap; |
|
18 |
|
|
19 |
import eu.dnetlib.oai.PublisherField; |
|
20 |
|
|
21 | 18 |
/** |
22 | 19 |
* An instance of this class can parse an XML record and extract the information needed to store the record in a publisher store. |
23 | 20 |
* |
... | ... | |
26 | 23 |
public class PublisherRecordParser { |
27 | 24 |
|
28 | 25 |
private static final Log log = LogFactory.getLog(PublisherRecordParser.class); // NOPMD by marko on 11/24/08 5:02 PM |
29 |
private final SAXReader saxReader = new SAXReader(); |
|
26 |
|
|
30 | 27 |
/** |
31 | 28 |
* List of the indices of the target store. |
32 | 29 |
*/ |
33 | 30 |
private List<PublisherField> storeIndices; |
34 | 31 |
|
35 |
public PublisherRecordParser(final List<PublisherField> storeIndices) { |
|
36 |
super(); |
|
37 |
this.storeIndices = storeIndices; |
|
38 |
} |
|
32 |
private final SAXReader saxReader = new SAXReader(); |
|
39 | 33 |
|
40 |
public PublisherRecordParser() { |
|
41 |
super(); |
|
42 |
// TODO Auto-generated constructor stub |
|
43 |
} |
|
44 |
|
|
45 | 34 |
/** |
46 | 35 |
* Parses the record and returns a map where a key is the name of an index, the value is the value in the record at the xpath specificed |
47 | 36 |
* in this.storeIndices. |
... | ... | |
54 | 43 |
public Multimap<String, String> parseRecord(final String record) { |
55 | 44 |
Multimap<String, String> recordProps = ArrayListMultimap.create(); |
56 | 45 |
try { |
57 |
final Document doc = this.saxReader.read(new StringReader(record));
|
|
58 |
for (final PublisherField field : this.storeIndices) {
|
|
59 |
for (final Entry<String, String> indexEntry : field.getSources().entries()) {
|
|
46 |
Document doc = this.saxReader.read(new StringReader(record)); |
|
47 |
for (PublisherField field : this.storeIndices) { |
|
48 |
for (Entry<String, String> indexEntry : field.getSources().entries()) { |
|
60 | 49 |
// each xpath can return a list of nodes or strings, depending on the xpath |
61 |
final List xPathResult = doc.selectNodes(indexEntry.getValue());
|
|
50 |
List xPathResult = doc.selectNodes(indexEntry.getValue()); |
|
62 | 51 |
if ((xPathResult != null) && !xPathResult.isEmpty()) { |
63 | 52 |
if (containsStrings(xPathResult)) { |
64 | 53 |
recordProps.putAll(field.getFieldName(), xPathResult); |
65 | 54 |
} else { |
66 | 55 |
if (containsNodes(xPathResult)) { |
67 |
recordProps.putAll(field.getFieldName(), Iterables.transform(xPathResult, new Function<Object, String>() { |
|
68 |
|
|
69 |
@Override |
|
70 |
public String apply(final Object obj) { |
|
71 |
if (obj == null) { return ""; } |
|
72 |
final Node node = (Node) obj; |
|
73 |
return node.getText(); |
|
74 |
} |
|
56 |
recordProps.putAll(field.getFieldName(), Iterables.transform(xPathResult, obj -> { |
|
57 |
if (obj == null) return ""; |
|
58 |
Node node = (Node) obj; |
|
59 |
return node.getText(); |
|
75 | 60 |
})); |
76 | 61 |
} |
77 | 62 |
} |
... | ... | |
79 | 64 |
} |
80 | 65 |
} |
81 | 66 |
|
82 |
} catch (final DocumentException e) {
|
|
67 |
} catch (DocumentException e) { |
|
83 | 68 |
log.fatal("Can't parse record"); |
84 | 69 |
recordProps = null; |
85 | 70 |
} |
... | ... | |
89 | 74 |
|
90 | 75 |
@SuppressWarnings("rawtypes") |
91 | 76 |
private boolean containsStrings(final List objects) { |
92 |
final Object first = objects.get(0);
|
|
77 |
Object first = objects.get(0); |
|
93 | 78 |
return first instanceof String; |
94 | 79 |
} |
95 | 80 |
|
96 | 81 |
@SuppressWarnings("rawtypes") |
97 | 82 |
private boolean containsNodes(final List objects) { |
98 |
final Object first = objects.get(0);
|
|
83 |
Object first = objects.get(0); |
|
99 | 84 |
return first instanceof Node; |
100 | 85 |
} |
101 | 86 |
|
102 | 87 |
public List<PublisherField> getStoreIndices() { |
103 |
return this.storeIndices;
|
|
88 |
return storeIndices; |
|
104 | 89 |
} |
105 | 90 |
|
106 | 91 |
public void setStoreIndices(final List<PublisherField> storeIndices) { |
... | ... | |
108 | 93 |
} |
109 | 94 |
|
110 | 95 |
public SAXReader getSaxReader() { |
111 |
return this.saxReader;
|
|
96 |
return saxReader; |
|
112 | 97 |
} |
113 | 98 |
|
99 |
public PublisherRecordParser(final List<PublisherField> storeIndices) { |
|
100 |
super(); |
|
101 |
this.storeIndices = storeIndices; |
|
102 |
} |
|
103 |
|
|
104 |
public PublisherRecordParser() { |
|
105 |
super(); |
|
106 |
// TODO Auto-generated constructor stub |
|
107 |
} |
|
108 |
|
|
109 |
|
|
114 | 110 |
} |
Also available in: Unified diff
Included changes about the CQL translation as in most recent version of cnr-cql-utils of DNet40 + refactoring of OAI parameters