Project

General

Profile

1
/**
2
 * 
3
 */
4
package eu.dnetlib.data.collective.transformation.engine.functions;
5

    
6
import java.util.LinkedList;
7
import java.util.List;
8
import java.util.Map;
9
import java.util.TreeMap;
10

    
11
import org.dom4j.Node;
12

    
13
import eu.dnetlib.common.profile.Resource;
14
import eu.dnetlib.common.utils.XMLUtils;
15

    
16
/**
17
 * @author jochen
18
 *
19
 */
20
public class Vocabulary implements IVocabulary{
21

    
22
	private List<Term> terms;
23
	private Map<String, String> encodingMap;
24
	private Resource resource;
25
	private boolean isCaseSensitive = true;
26
	private String delimiter = null;
27
	private String name = null;
28
		
29
	/**
30
	 * @return the terms
31
	 */
32
	public List<Term> getTerms() {
33
		return terms;
34
	}
35

    
36
	/**
37
	 * @param terms the terms to set
38
	 */
39
	public void setTerms(List<Term> terms) {
40
		this.terms = terms;
41
	}
42

    
43
	/**
44
	 * @return the name
45
	 */
46
	public String getName() {
47
		return name;
48
	}
49

    
50
	/**
51
	 * @param name the name to set
52
	 */
53
	public void setName(String name) {
54
		this.name = name;
55
	}
56

    
57
	public String getVocabularyName(){
58
		return resource.getValue("//VOCABULARY_NAME");
59
	}
60
	
61
	/**
62
	 * returns the normalized, encoded String for a given key if found, otherwise a special value -depending on the vocabulary- is returned indicating that it couldn't be normalized 
63
	 * @param key a list of Strings to encode
64
	 * @return a normalized, encoded String
65
	 */
66
	@Override
67
	public String encoding(List<String> keys)throws ProcessingException{
68
		// take the first best
69
		for (String key: keys){
70
			key = key.trim();
71
			if (!isCaseSensitive) 
72
				key = key.toLowerCase();
73
			if (encodingMap.containsKey(key))
74
				return encodingMap.get(key);
75
		}
76
		if (encodingMap.containsKey("Unknown") || encodingMap.containsKey("unknown")){
77
			if (isCaseSensitive) return encodingMap.get("Unknown");
78
			else  return encodingMap.get("unknown");
79
		}else{
80
			if (isCaseSensitive) return encodingMap.get("Undetermined");
81
			else  return encodingMap.get("undetermined");
82
		}
83
	}
84
	
85
	class Term{
86
		String code;
87
		String name;
88
		List<String> synonyms = new LinkedList<String>();
89
		
90
		void addSynonym(String synonym){
91
			synonyms.add(synonym);
92
		}
93
		
94
		List<String> getSynonyms(){
95
			return synonyms;
96
		}
97
	}
98

    
99
	
100
	/**
101
	 * init the encoding with the given list of term parameters
102
	 * @param termList list of parameters with expected key:value pairs 'name':string, 'encoding':string, 'synonyms':list<string>
103
	 */
104
	@SuppressWarnings("unchecked")
105
	public void setResource(List<Map<String, ?>> aTermList){
106
		terms = new LinkedList<Term>();
107
		for (Map<String, ?> termMap : aTermList){
108
			Term t = new Term();
109
			terms.add(t);
110
			t.name = (String)termMap.get("name");
111
			t.code = (String)termMap.get("code");
112
			for (String synonym: (List<String>)termMap.get("synonyms"))
113
				t.addSynonym(synonym);
114
		}
115
		setCode();
116
	}
117
	
118
	/**
119
	 * init the encoding with term parameters from a vocabulary resource profile
120
	 * @param resource
121
	 */
122
	public void setResource(Resource resource) {
123
		this.resource = resource;
124
		terms = new LinkedList<Term>();
125
		List<Node> nodes = resource.getNodeList("//TERMS/*");
126
		int index = 1;
127
		for (Node n: nodes){
128
			Term t = new Term();
129
			terms.add(t);
130
			try {
131
				t.name = XMLUtils.getNode(n, "//TERM[" + index + "]/@english_name").getText();
132
				t.code = XMLUtils.getNode(n, "//TERM[" + index + "]/@code").getText();
133
				List<Node> nsynonyms = XMLUtils.getNodes(n, "//TERM[" + index + "]/SYNONYMS/*");
134
				int indexSynonyms = 1;
135
				for (Node nsynonym: nsynonyms){
136
					String synonymTerm = XMLUtils.getNode(nsynonym, "//TERM[" + index + "]//SYNONYM[" + indexSynonyms + "]/@term").getText();
137
					t.addSynonym(synonymTerm);
138
					indexSynonyms++;
139
				}
140
			} catch (Exception e) {
141
				throw new IllegalStateException(e);
142
			}
143
			index++;
144
		}
145
		setCode();
146
	}
147
	
148
	private void setCode(){
149
		encodingMap = new TreeMap<String, String>();
150
		for (Term t: terms){
151
			if (isCaseSensitive){ 
152
				encodingMap.put(t.name, t.code);
153
				encodingMap.put(t.code, t.code);
154
			}else{
155
				encodingMap.put(t.name.toLowerCase(), t.code);
156
				encodingMap.put(t.code.toLowerCase(), t.code);
157
			}
158
			if (this.delimiter != null){
159
				String[] splittedEncodings = t.code.split(this.delimiter);
160
				for (String encoding: splittedEncodings){
161
					if (isCaseSensitive){
162
						encodingMap.put(encoding, t.code);						
163
					}else{
164
						encodingMap.put(encoding.toLowerCase(), t.code);						
165
					}
166
				}
167
			}
168

    
169
			for (String synonym : t.synonyms){
170
				if (isCaseSensitive) encodingMap.put(synonym, t.code);
171
				else encodingMap.put(synonym.toLowerCase(), t.code);
172
			}
173
		}
174
	}
175

    
176
	public Resource getResource() {
177
		return resource;
178
	}
179

    
180
	public void setCaseSensitive(boolean isCaseSensitive) {
181
		this.isCaseSensitive = isCaseSensitive;
182
	}
183

    
184
	public boolean isCaseSensitive() {
185
		return isCaseSensitive;
186
	}
187

    
188
	/**
189
	 * @param delimiter the delimiter to set
190
	 */
191
	public void setDelimiter(String delimiter) {
192
		this.delimiter = delimiter;
193
	}
194

    
195
	/**
196
	 * @return the delimiter
197
	 */
198
	public String getDelimiter() {
199
		return delimiter;
200
	}
201

    
202
	@Override
203
	public List<String> encoding(List<String> aKeys, String aDefaultPattern,
204
			String aFilterFunction) throws ProcessingException {
205
		throw new ProcessingException("no implementation of filtered encoding.");
206
	}
207
	
208

    
209
}
(17-17/17)