Project

General

Profile

1
package eu.dnetlib.data.mdstore.plugins;
2

    
3
import java.io.StringReader;
4
import java.util.Map;
5

    
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.dom4j.Document;
9
import org.dom4j.DocumentException;
10
import org.dom4j.io.SAXReader;
11

    
12
import com.mongodb.BasicDBObject;
13
import com.mongodb.DBObject;
14
import com.mongodb.client.MongoCollection;
15

    
16
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
17
import eu.dnetlib.rmi.data.MDStoreServiceException;
18

    
19
public class FilterMdRecordsPlugin extends AbstractIstiMDStorePlugin {
20

    
21
	private static final Log log = LogFactory.getLog(FilterMdRecordsPlugin.class);
22

    
23
	@Override
24
	public final void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException {
25

    
26
		final String xpath = params.get("xpath");
27

    
28
		final MongoCollection<DBObject> collPubs = store.getCollection();
29

    
30
		long valid = 0;
31
		long skipped = 0;
32

    
33
		final SAXReader reader = new SAXReader();
34
		for (final DBObject obj : collPubs.find()) {
35
			try {
36
				final String recordId = obj.get("id").toString();
37
				final Document doc = reader.read(new StringReader(obj.get("body").toString()));
38
				if (doc.selectNodes(xpath).isEmpty()) {
39
					collPubs.deleteOne(new BasicDBObject("id", recordId));
40
					skipped++;
41
				} else {
42
					valid++;
43
				}
44
			} catch (final DocumentException e) {
45
				log.warn("Problem parsing a mdstore record", e);
46
			}
47
		}
48

    
49
		log.info("***** Filtering records *****");
50
		log.info("*     xpath: " + xpath);
51
		log.info("*     valid: " + valid);
52
		log.info("*   skipped: " + skipped);
53
		log.info("*****************************");
54

    
55
		touch(store);
56
	}
57

    
58
}
(9-9/12)