Project

General

Profile

1
package eu.dnetlib.data.cleaner;
2

    
3
import java.util.HashMap;
4
import java.util.HashSet;
5
import java.util.Map;
6
import java.util.Set;
7

    
8
import com.google.common.base.Joiner;
9
import eu.dnetlib.rmi.data.CleanerException;
10
import eu.dnetlib.rmi.enabling.ISLookUpService;
11
import org.apache.commons.logging.Log;
12
import org.apache.commons.logging.LogFactory;
13

    
14
/**
15
 * @author michele
16
 *
17
 *         Vocabulary rules must be declared in a CleanerDS profile, for each vocabulary must be present the relative VocabularyDS profile:
18
 *
19
 *         <RULE xpath="..." vocabularies="VOC1" /> <RULE xpath="..." vocabularies="VOC1, VOC2, VOC3" />
20
 */
21

    
22
public class VocabularyRule extends XPATHCleaningRule {
23

    
24
	private static final Log log = LogFactory.getLog(VocabularyRule.class); // NOPMD by marko on 11/24/08 5:02 PM
25
	private final Set<String> vocabularies;
26
	private final Map<String, String> synonyms = new HashMap<>();
27
	private final Set<String> validTerms = new HashSet<>();
28

    
29
	public VocabularyRule(final Set<String> vocabularies, final ISLookUpService lookup) throws CleanerException {
30
		this.vocabularies = vocabularies;
31
		loadSynonymsAndTerms(lookup);
32
	}
33

    
34
	@Override
35
	protected String calculateNewValue(final String oldValue) throws CleanerException {
36
		log.debug("calculating new value for: " + oldValue);
37

    
38
		if (this.synonyms.isEmpty()) {
39
			log.debug("Vocabulary terms is void, vocabularies: " + this.vocabularies);
40
		}
41

    
42
		String newValue = null;
43

    
44
		if (this.synonyms.containsKey(oldValue.toLowerCase())) {
45
			newValue = this.synonyms.get(oldValue.toLowerCase());
46
		}
47

    
48
		if (newValue == null) {
49
			log.debug("Synonym " + oldValue + " not found in vocabulary");
50
			return oldValue;
51
		}
52

    
53
		return newValue;
54
	}
55

    
56
	private void loadSynonymsAndTerms(final ISLookUpService lookup) throws CleanerException {
57

    
58
		for (final String vocabulary : this.vocabularies) {
59
			try {
60
				final String query = "for $x in collection('/db/DRIVER/VocabularyDSResources/VocabularyDSResourceType')"
61
						+ "//RESOURCE_PROFILE[.//VOCABULARY_NAME/@code='" + vocabulary + "']//TERM return "
62
						+ "( concat($x/@code,'|-:-|', $x/@code), concat($x/@english_name,'|-:-|', $x/@code), concat($x/@native_name,'|-:-|', $x/@code), "
63
						+ "for $y in $x//SYNONYM return concat($y/@term,'|-:-|', $x/@code) )";
64

    
65
				for (final String s : lookup.quickSearchProfile(query)) {
66
					log.debug("SYNONYM : " + s);
67
					final String[] arr = s.split("\\|-:-\\|");
68
					if (arr[0] == null || arr[0].isEmpty()) {
69
						continue;
70
					}
71
					this.synonyms.put(arr[0].toLowerCase(), arr[1]);
72
					this.validTerms.add(arr[1].toLowerCase());
73
				}
74

    
75
				log.debug("VOCABULARY " + vocabulary.trim() + " - terms size " + this.synonyms.size());
76
			} catch (final Exception e) {
77
				throw new CleanerException("Error obtaining vocabulary " + vocabulary, e);
78
			}
79
		}
80

    
81
	}
82

    
83
	@Override
84
	protected Map<String, String> verifyValue(final String value) throws CleanerException {
85
		if (this.synonyms.isEmpty()) {
86
			log.debug("Vocabulary terms is void, vocabularies: " + this.vocabularies);
87
		}
88

    
89
		if (this.validTerms.contains(value.toLowerCase())) { return null; }
90

    
91
		final Map<String, String> error = new HashMap<String, String>();
92
		error.put("term", value);
93
		error.put("vocabularies", this.vocabularies.toString().replaceAll("\\[", "").replaceAll("\\]", ""));
94
		error.put("xpath", this.getXpath());
95
		return error;
96
	}
97

    
98
	public Map<String, String> getVocabularyTerms() {
99
		return this.synonyms;
100
	}
101

    
102
	@Override
103
	public String toString() {
104
		return "VOCABULARIES: [" + Joiner.on(", ").join(this.vocabularies) + "]";
105
	}
106

    
107
}
(5-5/6)