Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.oai.utils;
2

    
3
import java.text.Normalizer;
4
import java.util.List;
5

    
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.mongodb.BasicDBObject;
9
import com.mongodb.BasicDBObjectBuilder;
10
import com.mongodb.DBObject;
11
import com.mongodb.MongoClient;
12
import com.mongodb.client.FindIterable;
13
import com.mongodb.client.MongoCollection;
14
import com.mongodb.client.model.Filters;
15
import com.mongodb.client.model.FindOneAndReplaceOptions;
16
import com.mongodb.client.model.FindOneAndUpdateOptions;
17
import com.mongodb.client.model.IndexOptions;
18
import org.apache.commons.lang.StringEscapeUtils;
19
import org.apache.commons.lang.StringUtils;
20
import org.bson.conversions.Bson;
21

    
22
public class MongoSetCollection implements SetCollection {
23

    
24
	public static String DEFAULT_SET = "OTHER";
25

    
26
	private MongoClient publisherMongoClient;
27
	private String setCollection = "sets";
28
	private String setCountCollection = "setsCount";
29

    
30
	public MongoSetCollection(MongoClient c){
31
		this.publisherMongoClient = c;
32
	}
33

    
34
	public void ensureIndexes(final String dbName) {
35
		this.ensureIndexesOnSets(dbName);
36
		this.ensureIndexesOnCount(dbName);
37
	}
38

    
39
	@Override
40
	public List<SetInfo> getAllSets(final boolean enabledOnly, final String dbName) {
41
		FindIterable<DBObject> iter = null;
42
		if (!enabledOnly) {
43
			iter = this.getSetsCollection(dbName).find();
44
		} else {
45
			Bson where = Filters.eq("enabled", true);
46
			iter = this.getSetsCollection(dbName).find(where);
47
		}
48
		return Lists.newArrayList(Iterables.transform(iter, new com.google.common.base.Function<DBObject, SetInfo>() {
49

    
50
			@Override
51
			public SetInfo apply(final DBObject dbObject) {
52
				return getSetFromDBObject(dbObject);
53
			}
54
		}));
55
	}
56

    
57
	@Override
58
	public boolean containSet(final String set, final String dbName) {
59
		Bson query = Filters.eq("spec", set);
60
		return this.getSetsCollection(dbName).count(query) != 0;
61
	}
62

    
63
	@Override
64
	public boolean containEnabledSet(final String set, final String publisherDBName) {
65
		Bson query = Filters.and(Filters.eq("spec", set), Filters.eq("enabled", true));
66
		return this.getSetsCollection(publisherDBName).count(query) != 0;
67
	}
68

    
69
	@Override
70
	public String getSetQuery(final String set, final String dbName) {
71
		Bson query = Filters.eq("spec", set);
72
		BasicDBObject returnField = new BasicDBObject("query", 1);
73
		DBObject obj = this.getSetsCollection(dbName).find(query).projection(returnField).first();
74
		return (String) obj.get("query");
75
	}
76

    
77
	@Override
78
	public int count(final String setSpec, final String mdPrefix, final String dbName) {
79
		Bson query = Filters.and(Filters.eq("spec", setSpec), Filters.eq("mdPrefix", mdPrefix));
80
		BasicDBObject returnField = new BasicDBObject("count", 1);
81
		DBObject obj = this.getSetsCountCollection(dbName).find(query).projection(returnField).first();
82
		if (obj == null) return 0;
83
		return (Integer) obj.get("count");
84
	}
85

    
86
	public void updateCounts(final String setSpec, final String mdPrefix, final int count, final String dbName) {
87
		BasicDBObject countUpdate = new BasicDBObject("$set", new BasicDBObject("count", count));
88
		Bson query = Filters.and(Filters.eq("spec", setSpec), Filters.eq("mdPrefix", mdPrefix));
89
		this.getSetsCountCollection(dbName).findOneAndUpdate(query, countUpdate, new FindOneAndUpdateOptions().upsert(true));
90
	}
91

    
92
	public void upsertSet(final SetInfo setInfo, final boolean fromConfiguration, final String dbName) {
93
		DBObject obj = this.getObjectFromSet(setInfo);
94
		obj.put("fromConfiguration", fromConfiguration);
95
		//this.getSetsCollection(dbName).update(new BasicDBObject("spec", setInfo.getSetSpec()), obj, true, false);
96
		this.getSetsCollection(dbName).findOneAndReplace(Filters.eq("spec", setInfo.getSetSpec()), obj, new FindOneAndReplaceOptions().upsert(true));
97
	}
98

    
99
	public String normalizeSetSpec(final String setName) {
100
		String s = StringEscapeUtils.unescapeXml(setName);
101
		s = Normalizer.normalize(s, Normalizer.Form.NFD);
102
		// replace spaces with underscores
103
		s = s.replaceAll(" ", "_");
104
		// remove tilde, dots... over letters
105
		s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}&&[^-_]]", "");
106
		// change punctuation into an underscore
107
		s = s.replaceAll("[\\p{Punct}&&[^-_]]", "_");
108
		// remove all non-word characters
109
		s = s.replaceAll("[\\W&&[^-_]]", "");
110
		// Avoiding set '___' generated when we have "strange" set names such as those in cyrillic/ukrain
111
		// strips _ from the beginning and the end
112
		String stripped = StringUtils.strip(s, "_ ");
113
		if (StringUtils.isBlank(stripped)) {
114
			stripped = DEFAULT_SET;
115
		}
116
		return stripped;
117
	}
118

    
119
	public List<SetInfo> getConfiguredSets(final String dbName) {
120
		Bson query = Filters.eq("fromConfiguration", true);
121
		return this.findSets(query, dbName);
122
	}
123

    
124
	public List<SetInfo> getSetsFromData(final String dbName) {
125
		Bson query = Filters.eq("fromConfiguration", false);
126
		return this.findSets(query, dbName);
127
	}
128

    
129
	public void dropOAISets(final String dbName) {
130
		this.getSetsCountCollection(dbName).drop();
131
		this.getSetsCollection(dbName).drop();
132
	}
133

    
134
	public void dropSet(final String dbName, final String setSpec) {
135
		Bson query = Filters.eq("spec", setSpec);
136
		this.getSetsCollection(dbName).deleteMany(query);
137
		this.getSetsCountCollection(dbName).deleteMany(query);
138
	}
139

    
140
	public void dropConfigurationSets(final String dbName) {
141
		this.getSetsCollection(dbName).deleteMany(Filters.eq("fromConfiguration", true));
142
	}
143

    
144
	protected List<SetInfo> findSets(final Bson query, final String dbName) {
145
		final FindIterable<DBObject> sets = this.getSetsCollection(dbName).find(query);
146
		List<SetInfo> res = Lists.newArrayList();
147
		for (DBObject obj : sets) {
148
			res.add(this.getSetFromDBObject(obj));
149
		}
150
		return res;
151
	}
152

    
153
	private SetInfo getSetFromDBObject(final DBObject obj) {
154
		SetInfo setInfo = new SetInfo();
155
		setInfo.setEnabled((Boolean) obj.get("enabled"));
156
		setInfo.setQuery((String) obj.get("query"));
157
		setInfo.setSetDescription((String) obj.get("description"));
158
		setInfo.setSetName((String) obj.get("name"));
159
		setInfo.setSetSpec((String) obj.get("spec"));
160
		return setInfo;
161
	}
162

    
163
	private DBObject getObjectFromSet(final SetInfo s) {
164
		DBObject obj = BasicDBObjectBuilder.start("spec", s.getSetSpec()).add("name", s.getSetName()).add("description", s.getSetDescription())
165
				.add("query", s.getQuery()).add("enabled", s.isEnabled()).get();
166
		return obj;
167
	}
168

    
169
	private void ensureIndexesOnSets(final String dbName) {
170
		this.getSetsCollection(dbName).createIndex(new BasicDBObject("spec", 1), new IndexOptions().background(true));
171
		this.getSetsCollection(dbName).createIndex(new BasicDBObject("fromConfiguration", 1), new IndexOptions().background(true));
172
	}
173

    
174
	private void ensureIndexesOnCount(final String dbName) {
175
		BasicDBObject index = (BasicDBObject) BasicDBObjectBuilder.start("spec", 1).add("mdPrefix", 1).get();
176
		this.getSetsCountCollection(dbName).createIndex(index, new IndexOptions().background(true));
177
	}
178

    
179
	public MongoCollection<DBObject> getSetsCollection(final String dbName) {
180
		return this.getCollection(this.setCollection, dbName);
181
	}
182

    
183
	public MongoCollection<DBObject> getSetsCountCollection(final String dbName) {
184
		return this.getCollection(this.setCountCollection, dbName);
185
	}
186

    
187
	private MongoCollection<DBObject> getCollection(final String collectionName, final String dbName) {
188
		return publisherMongoClient.getDatabase(dbName).getCollection(collectionName, DBObject.class);
189
	}
190

    
191
	public String getSetCollection() {
192
		return setCollection;
193
	}
194

    
195
	public void setSetCollection(final String setCollection) {
196
		this.setCollection = setCollection;
197
	}
198

    
199
	public String getSetCountCollection() {
200
		return setCountCollection;
201
	}
202

    
203
	public void setSetCountCollection(final String setCountCollection) {
204
		this.setCountCollection = setCountCollection;
205
	}
206

    
207
	public MongoClient getPublisherMongoClient() {
208
		return publisherMongoClient;
209
	}
210

    
211
	public void setPublisherMongoClient(final MongoClient publisherMongoClient) {
212
		this.publisherMongoClient = publisherMongoClient;
213
	}
214

    
215
}
(2-2/7)