Project

General

Profile

1
package eu.dnetlib.data.transform;
2

    
3
import java.io.StringReader;
4

    
5
import org.apache.commons.codec.binary.Base64;
6
import org.apache.commons.lang3.StringUtils;
7
import org.apache.solr.common.SolrInputDocument;
8
import org.dom4j.Document;
9
import org.dom4j.DocumentException;
10
import org.dom4j.Element;
11
import org.dom4j.io.SAXReader;
12

    
13
import com.google.common.base.Splitter;
14
import com.google.common.collect.Lists;
15
import com.google.protobuf.GeneratedMessage;
16

    
17
import eu.dnetlib.pace.config.Type;
18

    
19
/**
20
 * The Class ProtoDocumentMapper.
21
 */
22
public class SolrProtoMapper extends AbstractProtoMapper {
23

    
24
	private static final String ID_SEPARATOR = "::";
25

    
26
	/** The fields. */
27
	private Document fields;
28

    
29
	/**
30
	 * Instantiates a new proto document mapper.
31
	 *
32
	 * @param fields
33
	 *            the fields
34
	 * @throws DocumentException
35
	 *             the document exception
36
	 */
37
	public SolrProtoMapper(final String fields) throws DocumentException {
38
		this.fields = parse(fields);
39

    
40
		if (StringUtils.isBlank(this.fields.valueOf("//FIELD[@name = 'objIdentifier']/@name")))
41
			throw new IllegalArgumentException("field objIdentifier is mandatory");
42
	}
43

    
44
	/**
45
	 * Map.
46
	 *
47
	 * @param proto
48
	 *            the proto
49
	 * @param version
50
	 *            the version
51
	 * @param dsId
52
	 *            the ds id
53
	 * @return the solr input document
54
	 * @throws DocumentException
55
	 *             the document exception
56
	 */
57
	public SolrInputDocument map(final GeneratedMessage proto, final String version, final String dsId, final String actionSetId) throws DocumentException {
58

    
59
		final SolrInputDocument doc = new SolrInputDocument();
60

    
61
		for (final Object o : fields.selectNodes("//FIELD[string(@path)]")) {
62
			final Element e = (Element) o;
63

    
64
			final String name = e.attribute("name").getValue().toLowerCase().trim();
65
			final String path = e.attribute("path").getValue();
66

    
67
			doc.setField(name, processMultiPath(proto, Lists.newLinkedList(Splitter.on("|").trimResults().split(path)), Type.String));
68
		}
69

    
70
		final String objIdentifier = patchId((String) doc.getFieldValue("objidentifier"));
71
		doc.setField("objidentifier", objIdentifier);
72
		doc.setField("__indexrecordidentifier", getRecordId(objIdentifier, actionSetId));
73
		doc.setField("__dsid", dsId);
74
		doc.setField("__dsversion", version);
75
		doc.setField("__result", Base64.encodeBase64String(proto.toByteArray()));
76
		doc.setField("actionset", actionSetId);
77

    
78
		return doc;
79
	}
80

    
81
	public String getRecordId(final String objIdentifier, final String actionSetId) {
82
		return objIdentifier + ID_SEPARATOR + actionSetId;
83
	}
84

    
85
	/**
86
	 * Patch the objidentifier: when it comes from HBase, i.e. contains the separator '|' returns the string that follows.
87
	 *
88
	 * @param objidentifier
89
	 *            the objidentifier
90
	 * @return the string
91
	 */
92
	private String patchId(final String objidentifier) {
93
		return objidentifier.contains("|") ? StringUtils.substringAfter(objidentifier, "|") : objidentifier;
94
	}
95

    
96
	/**
97
	 * Parses the.
98
	 *
99
	 * @param s
100
	 *            the s
101
	 * @return the document
102
	 * @throws DocumentException
103
	 *             the document exception
104
	 */
105
	private Document parse(final String s) throws DocumentException {
106
		return new SAXReader().read(new StringReader(s));
107
	}
108

    
109
}
(10-10/13)