Project

General

Profile

« Previous | Next » 

Revision 61714

filter plugin

View differences:

modules/dnet-isti/trunk/src/main/java/eu/dnetlib/data/mdstore/plugins/FilterMdRecordsPlugin.java
1
package eu.dnetlib.data.mdstore.plugins;
2

  
3
import java.io.StringReader;
4
import java.util.Map;
5

  
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.dom4j.Document;
9
import org.dom4j.DocumentException;
10
import org.dom4j.io.SAXReader;
11

  
12
import com.mongodb.BasicDBObject;
13
import com.mongodb.DBObject;
14
import com.mongodb.client.MongoCollection;
15

  
16
import eu.dnetlib.data.mdstore.modular.mongodb.MongoMDStore;
17
import eu.dnetlib.rmi.data.MDStoreServiceException;
18

  
19
public class FilterMdRecordsPlugin extends AbstractIstiMDStorePlugin {
20

  
21
	private static final Log log = LogFactory.getLog(FilterMdRecordsPlugin.class);
22

  
23
	@Override
24
	public final void process(final MongoMDStore store, final Map<String, String> params) throws MDStoreServiceException {
25

  
26
		final String xpath = params.get("xpath");
27

  
28
		final MongoCollection<DBObject> collPubs = store.getCollection();
29

  
30
		long valid = 0;
31
		long skipped = 0;
32

  
33
		final SAXReader reader = new SAXReader();
34
		for (final DBObject obj : collPubs.find()) {
35
			try {
36
				final String recordId = obj.get("id").toString();
37
				final Document doc = reader.read(new StringReader(obj.get("body").toString()));
38
				if (doc.selectNodes(xpath).isEmpty()) {
39
					collPubs.deleteOne(new BasicDBObject("id", recordId));
40
					skipped++;
41
				} else {
42
					valid++;
43
				}
44
			} catch (final DocumentException e) {
45
				log.warn("Problem parsing a mdstore record", e);
46
			}
47
		}
48

  
49
		log.info("***** Filtering records *****");
50
		log.info("*     xpath: " + xpath);
51
		log.info("*     valid: " + valid);
52
		log.info("*   skipped: " + skipped);
53
		log.info("*****************************");
54

  
55
		touch(store);
56
	}
57

  
58
}
modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/common/filterRecords.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="bc0e925e-2beb-44c4-9a91-e81b45392361_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
4
		<RESOURCE_TYPE value="WorkflowTemplateDSResourceType" />
5
		<RESOURCE_KIND value="WorkflowTemplateDSResources" />
6
		<RESOURCE_URI value="" />
7
		<DATE_OF_CREATION value="2021-10-25T11:50:45+02:00" />
8
	</HEADER>
9
	<BODY>
10
		<CONFIGURATION>
11
			<PARAMETERS>
12
				<PARAM description="Datasource Id" name="dsId" required="true" type="string"/>
13
				<PARAM description="Datasource Interface" name="interface" required="true" type="string"/>
14
				<PARAM description="MDStoreId" name="mdId" required="true" type="string" />
15
				<PARAM description="xpath" name="xpath" required="true" type="string" />
16
			</PARAMETERS>
17
			<WORKFLOW>
18
				<NODE isStart="true" name="filter_records" type="RunMDStorePlugin">
19
					<DESCRIPTION>Filter records by XPATH</DESCRIPTION>
20
					<PARAMETERS>
21
						<PARAM name="pluginName" value="filterMDstorePlugin" />
22
						<PARAM name="parameters">
23
							<MAP>
24
								<ENTRY key="mdId" ref="mdId" />
25
								<ENTRY key="xpath" ref="xpath" />
26
							</MAP>
27
						</PARAM>
28
					</PARAMETERS>
29
					<ARCS>
30
						<ARC to="UPDATE_INFO" />
31
					</ARCS>
32
				</NODE>
33
				
34
				<NODE name="UPDATE_INFO" type="MDStoreToApiExtraField">
35
					<DESCRIPTION>Update datasouce API extra fields</DESCRIPTION>
36
					<PARAMETERS>
37
						<PARAM name="mdId" ref="mdId"/>
38
						<PARAM name="datasourceId" ref="dsId"/>
39
						<PARAM name="datasourceInterface" ref="interface"/>
40
						<PARAM name="extraFieldForTotal" value="last_aggregation_total"/>
41
						<PARAM name="extraFieldForDate" value="last_aggregation_date"/>
42
						<PARAM name="extraFieldForMdId" value="last_aggregation_mdId"/>
43
					</PARAMETERS>
44
					<ARCS>
45
						<ARC to="success"/>
46
					</ARCS>
47
				</NODE>
48
			</WORKFLOW>
49
		</CONFIGURATION>
50
	</BODY>
51
</RESOURCE_PROFILE>
modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/isti/applicationContext-isti.xml
24 24
	
25 25
	<bean id="enrichLocalLinksPlugin" class="eu.dnetlib.data.mdstore.plugins.EnrichLocalLinksPlugin" />
26 26

  
27
	<bean id="filterMDstorePlugin" class="eu.dnetlib.data.mdstore.plugins.FilterMdRecordsPlugin" />
28

  
27 29
	<!-- Affiliations DAO -->
28 30
	<bean id="affiliationsDao" class="eu.dnetlib.data.db.AffiliationsDao">
29 31
		<property name="jdbcTemplate">

Also available in: Unified diff