Project

General

Profile

« Previous | Next » 

Revision 51251

[maven-release-plugin] copy for tag dnet-oai-store-service-8.0.2

View differences:

modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/deploy.info
1
{"type_source": "SVN", "goal": "package -U source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-oai-store-service/trunk", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-oai-store-service"}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/java/eu/dnetlib/data/oai/store/mongo/RecordInfoGeneratorTest.java
1
package eu.dnetlib.data.oai.store.mongo;
2

  
3
import java.io.IOException;
4
import java.util.zip.ZipEntry;
5
import java.util.zip.ZipOutputStream;
6

  
7
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader;
8
import org.apache.commons.io.IOUtils;
9
import org.apache.commons.io.output.ByteArrayOutputStream;
10
import org.bson.types.Binary;
11
import org.junit.Before;
12
import org.junit.Test;
13
import org.springframework.core.io.ClassPathResource;
14

  
15
/**
16
 * RecordInfoGenerator Tester.
17
 *
18
 * @author <Authors name>
19
 * @version 1.0
20
 * @since <pre>Apr 6, 2016</pre>
21
 */
22
public class RecordInfoGeneratorTest {
23

  
24
	private RecordInfoGenerator gen = new RecordInfoGenerator();
25
	private String filePath = "/eu/dnetlib/data/oai/store/mongo/testRecord.xml";
26
	private Binary binaryXML;
27

  
28
	@Before
29
	public void before() throws Exception {
30
		String testRecord = IOUtils.toString(new ClassPathResource(filePath).getInputStream());
31
		binaryXML = createCompressRecord(testRecord);
32
	}
33

  
34
	/**
35
	 * Method: decompressRecord(final byte[] input)
36
	 */
37
	@Test
38
	public void testDecompressRecord() throws Exception {
39
		long timeStart = System.currentTimeMillis();
40
		String record = gen.decompressRecord(binaryXML.getData());
41
		long timeEnd = System.currentTimeMillis();
42
		System.out.println("Decompressed record in ms " + (timeEnd - timeStart));
43
		System.out.println(record);
44
	}
45

  
46
	private Binary createCompressRecord(final String record) throws IOException {
47
		ByteArrayOutputStream os = new ByteArrayOutputStream();
48
		ZipOutputStream zos = new ZipOutputStream(os);
49
		ZipEntry entry = new ZipEntry(OAIConfigurationReader.BODY_FIELD);
50
		zos.putNextEntry(entry);
51
		zos.write(record.getBytes());
52
		zos.closeEntry();
53
		//zos.flush();
54
		zos.close();
55
		return new Binary(os.toByteArray());
56
	}
57

  
58
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/java/eu/dnetlib/data/oai/store/mongo/MongoPublisherStoreTest.java
1
/**
2
 *
3
 */
4
package eu.dnetlib.data.oai.store.mongo;
5

  
6
import java.io.IOException;
7

  
8
import org.bson.types.Binary;
9
import org.junit.Assert;
10
import org.junit.Test;
11

  
12

  
13
/**
14
 * @author sandro
15
 *
16
 */
17
public class MongoPublisherStoreTest {
18

  
19
	/**
20
	 * Test method for {@link eu.dnetlib.data.oai.store.mongo.MongoPublisherStore#createCompressRecord(java.lang.String)}.
21
	 * @throws IOException
22
	 */
23
	@Test
24
	public void testCreateCompressRecord() throws IOException {
25

  
26
		MongoPublisherStore store = new MongoPublisherStore();
27
		RecordInfoGenerator gen = new RecordInfoGenerator();
28
		StringBuffer buffer = new StringBuffer();
29
		String  input =" CIAO MONDO DA SANDRO!";
30

  
31
		for (int i=0; i< 10000; i++) {
32
			buffer.append(input);
33
		}
34
		Binary data = store.createCompressRecord(buffer.toString());
35
		Assert.assertEquals(buffer.toString(),gen.decompressRecord(data.getData()));
36

  
37

  
38
	}
39

  
40
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/java/eu/dnetlib/data/oai/store/mongo/MetadataExtractorTest.java
1
package eu.dnetlib.data.oai.store.mongo;
2

  
3
import java.io.StringReader;
4

  
5
import org.apache.commons.io.IOUtils;
6
import org.dom4j.Document;
7
import org.dom4j.io.SAXReader;
8
import org.junit.After;
9
import org.junit.Before;
10
import org.junit.Test;
11
import org.springframework.core.io.ClassPathResource;
12

  
13
/**
14
 * MetadataExtractor Tester.
15
 *
16
 * @author alessia
17
 * @version 1.0
18
 * @since <pre>Apr 6, 2016</pre>
19
 */
20
public class MetadataExtractorTest {
21

  
22
	final MetadataExtractor extractor = new MetadataExtractor();
23
	final SAXReader reader = new SAXReader();
24
	private Document doc;
25
	private String filePath = "/eu/dnetlib/data/oai/store/mongo/testRecord.xml";
26

  
27
	@Before
28
	public void before() throws Exception {
29
		String testRecord = IOUtils.toString(new ClassPathResource(filePath).getInputStream());
30
		doc = reader.read(new StringReader(testRecord));
31
	}
32

  
33
	@After
34
	public void after() throws Exception {
35
	}
36

  
37
	/**
38
	 * Method: evaluate(final Document xmlDoc)
39
	 */
40
	@Test
41
	public void testEvaluate() throws Exception {
42
		long timeStart = System.currentTimeMillis();
43
		String metadata = extractor.evaluate(doc);
44
		long timeEnd = System.currentTimeMillis();
45
		System.out.println("Got metadata in ms " + (timeEnd - timeStart));
46
		System.out.println(metadata);
47
	}
48

  
49
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/java/eu/dnetlib/data/oai/store/mongo/ProvenanceExtractorTest.java
1
package eu.dnetlib.data.oai.store.mongo;
2

  
3
import java.io.StringReader;
4

  
5
import org.apache.commons.io.IOUtils;
6
import org.dom4j.Document;
7
import org.dom4j.io.SAXReader;
8
import org.junit.After;
9
import org.junit.Before;
10
import org.junit.Test;
11
import org.springframework.core.io.ClassPathResource;
12

  
13
/**
14
 * ProvenanceExtractor Tester.
15
 *
16
 * @author <Authors name>
17
 * @version 1.0
18
 * @since <pre>Apr 6, 2016</pre>
19
 */
20
public class ProvenanceExtractorTest {
21

  
22
	final ProvenanceExtractor extractor = new ProvenanceExtractor();
23
	final SAXReader reader = new SAXReader();
24
	private Document doc;
25
	private String filePath = "/eu/dnetlib/data/oai/store/mongo/testRecord.xml";
26

  
27
	@Before
28
	public void before() throws Exception {
29
		String testRecord = IOUtils.toString(new ClassPathResource(filePath).getInputStream());
30
		doc = reader.read(new StringReader(testRecord));
31
	}
32

  
33
	@After
34
	public void after() throws Exception {
35
	}
36

  
37
	/**
38
	 * Method: evaluate(final Document xmlDoc)
39
	 */
40
	@Test
41
	public void testEvaluate() throws Exception {
42
		long timeStart = System.currentTimeMillis();
43
		String prov = extractor.evaluate(doc);
44
		long timeEnd = System.currentTimeMillis();
45
		System.out.println("Got provenance in ms " + (timeEnd - timeStart));
46
		System.out.println(prov);
47
	}
48

  
49
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/java/eu/dnetlib/data/oai/store/sets/MongoSetCollectionTest.java
1
package eu.dnetlib.data.oai.store.sets;
2

  
3
import static org.junit.Assert.assertEquals;
4

  
5
import org.junit.Before;
6
import org.junit.Test;
7

  
8
public class MongoSetCollectionTest {
9

  
10
	private MongoSetCollection mongoSetCollection;
11
	private String strangeSet = "Наукові журнали Національного Авіаційного Університету";
12

  
13
	@Before
14
	public void setup() {
15
		this.mongoSetCollection = new MongoSetCollection();
16
	}
17

  
18
	@Test
19
	public void test() {
20
		String normalised = this.mongoSetCollection.normalizeSetSpec(strangeSet);
21
		assertEquals(MongoSetCollection.DEFAULT_SET, normalised);
22
	}
23

  
24
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/resources/eu/dnetlib/test/profiles/OAIPublisherConfigurationDSResources/OAIPublisherConfigurationDSResourceType/OAIPublisherConfiguration-1.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="OAIPublisherConfiguration" />
4
		<RESOURCE_TYPE value="OAIPublisherConfigurationDSResourceType" />
5
		<RESOURCE_KIND value="OAIPublisherConfigurationDSResources" />
6
		<RESOURCE_URI value="" />
7
		<DATE_OF_CREATION value="2001-12-31T12:00:00" />
8
	</HEADER>
9
	<BODY>
10
		    <CONFIGURATION>
11
            <OAISETS>
12
                <OAISET enabled="true">
13
                    <spec>OpenAccess</spec>
14
                    <name>Set of Open Access articles</name>
15
                    <description>Set of records having 'OPEN' license</description>
16
                    <query>(license = "OPEN")</query>
17
                </OAISET>
18
                <OAISET enabled="true">
19
                    <spec>ArticlesInNature</spec>
20
                    <name>Articles published by Nature</name>
21
                    <description>Set of articles published by Nature Publishing Group</description>
22
                    <query>(publisher = "Nature Publishing Group")</query>
23
                </OAISET>
24
                <OAISET enabled="true">
25
                    <spec>publications</spec>
26
                    <name>Publications</name>
27
                    <description>Set of all Publications</description>
28
                    <query>resulttypeid="publication"</query>
29
                </OAISET>
30
            </OAISETS>
31
            <METADATAFORMATS>
32
                <METADATAFORMAT exportable="true" metadataPrefix="oaf">
33
                    <NAMESPACE>http://namespace.openaire.eu/oaf</NAMESPACE>
34
                    <SCHEMA>http://www.openaire.eu/schema/0.1/oaf-0.1.xsd</SCHEMA>
35
                    <SOURCE_METADATA_FORMAT interpretation="openaire" layout="index" name="oaf"/>
36
                    <TRANSFORMATION_RULE/>
37
                    <BASE_QUERY>*</BASE_QUERY>
38
                </METADATAFORMAT>
39
                <METADATAFORMAT metadataPrefix="oai_dc" exportable="false">
40
                    <NAMESPACE>http://www.openarchives.org/OAI/2.0/oai_dc/</NAMESPACE>
41
                    <SCHEMA>http://www.openarchives.org/OAI/2.0/oai_dc.xsd</SCHEMA>
42
                    <SOURCE_METADATA_FORMAT interpretation="openaire" layout="index" name="oaf"/>
43
                    <TRANSFORMATION_RULE>oaf2dc_VHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZXMvVHJhbnNmb3JtYXRpb25SdWxlRFNSZXNvdXJjZVR5cGU=</TRANSFORMATION_RULE>
44
                    <BASE_QUERY>oaftype="result"</BASE_QUERY>
45
                </METADATAFORMAT>
46
            </METADATAFORMATS>
47
            <INDICES>
48
                <INDEX name="objIdentifier" repeatable="false">
49
                    <SOURCE name="oaf" layout="index" interpretation="openaire" path="//*[local-name() ='objIdentifier']"/>
50
                </INDEX>
51
                <INDEX name="set" repeatable="true">
52
                    <SOURCE name="oaf" layout="index" interpretation="openaire" path="//collectedfrom/@name"/>
53
                </INDEX>
54
                <INDEX name="publisher" repeatable="true">
55
                    <SOURCE name="oaf" layout="index" interpretation="openaire" path="//publisher"/>
56
                </INDEX>
57
                <INDEX name="license" repeatable="false">
58
                    <SOURCE name="oaf" layout="index" interpretation="openaire" path="//bestlicense/@classid"/>
59
                </INDEX>
60
                <INDEX name="oaftype" repeatable="false">
61
                    <SOURCE name="oaf" layout="index" interpretation="openaire" path="local-name(//*[local-name()='entity']/*)"/>
62
                </INDEX>
63
                <INDEX name="resulttypeid" repeatable="false">
64
                    <SOURCE name="oaf" layout="index" interpretation="openaire" path="//*[local-name()='entity']/*[local-name()='result']/resulttype/@classid"/>
65
                </INDEX>
66
            </INDICES>
67
        </CONFIGURATION>
68
		<STATUS>
69
			<LAST_UPDATE value="2001-12-31T12:00:00" />
70
		</STATUS>
71
		<SECURITY_PARAMETERS>SECURITY_PARAMETERS</SECURITY_PARAMETERS>
72
	</BODY>
73
</RESOURCE_PROFILE>
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/resources/eu/dnetlib/data/oai/store/mongo/testRecord.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<record xmlns:dc="http://purl.org/dc/elements/1.1/"
3
        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
4
        xmlns:dri="http://www.driver-repository.eu/namespace/dri"
5
        xmlns:oaf="http://namespace.openaire.eu/oaf"
6
        xmlns:oai="http://www.openarchives.org/OAI/2.0/"
7
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
8
	<header xmlns="http://namespace.openaire.eu/">
9
		<dri:objIdentifier>od_______263::06235d139088d4b2ca45ace37c458a01</dri:objIdentifier>
10
		<dri:recordIdentifier>oai:DiVA.org:su-91808</dri:recordIdentifier>
11
		<dri:dateOfCollection/>
12
		<dri:mdFormat/>
13
		<dri:mdFormatInterpretation/>
14
		<dri:repositoryId>866c86e1-0e6a-4e12-b5ed-b7e0eb956381_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>
15
		<dr:objectIdentifier/>
16
		<dr:dateOfCollection>2016-04-05T06:52:25.98Z</dr:dateOfCollection>
17
		<dr:dateOfTransformation>2016-04-05T06:53:01.394Z</dr:dateOfTransformation>
18
		<oaf:datasourceprefix>od_______263</oaf:datasourceprefix>
19
	</header>
20
	<metadata xmlns="http://namespace.openaire.eu/">
21
		<oaf:entity>
22
			<dc:title>Evaluation and Tuning of Model Trajectoriesand Spreading Rates in the Baltic Sea Using Surface Drifter Observations</dc:title>
23
			<dc:creator>Kjellsson, Joakim</dc:creator>
24
			<dc:creator>Döös, Kristofer</dc:creator>
25
			<dc:creator>Soomere, Tarmo</dc:creator>
26
			<dc:date>2013</dc:date>
27
			<dc:description>Results from experiments with surface drifters in the Baltic Sea in 2010–2011 are presented and discussed. In a first experiment, 12 SVP-B (Surface Velocity Program, with Barometer) drifters with a drogue at 12–18 m depth were deployed in the Baltic Sea. In a second experiment, shallow drifters extending to a depth of 1.5 m were deployed in the Gulf of Finland. Results from the SVP-B drifter experiment are compared to results from a regional ocean model and a trajectory code. Differences between the observed SVP-B drifters and simulated drifters are found for absolute dispersion (i.e., squared displacement from initial position) and relative dispersion (i.e., squared distance between two initially paired drifters). The former is somewhat underestimated since the simulated currents are neither as fast nor as variable as those observed. The latter is underestimated both due to the above-mentioned reasons and due to the resolution of the ocean model. For the shallower drifters, spreading in the upper 1–2 m of the Gulf of Finland is investigated. The spreading rate is about 200 m/day for separations &lt;0.5 km, 500 m/day for separations below 1 km and in the range of 0.5–3 km/day for separations in the range of 1–4 km. The spreading rate does not follow Richardson’s law. The initial spreading, up to a distance of about d=100–150 m, is governed by the power law d∼t 0.27 whereas for larger separations the distance increases as d∼t2.5.</dc:description>
28
			<dc:description>BalticWay</dc:description>
29
			<dc:format>application/pdf</dc:format>
30
			<dc:identifier>http://urn.kb.se/resolve?urn=urn:nbn:se:su:diva-91808</dc:identifier>
31
			<dc:language>eng</dc:language>
32
			<dc:publisher>Stockholms universitet, Meteorologiska institutionen (MISU)</dc:publisher>
33
			<dc:publisher>Stockholms universitet, Meteorologiska institutionen (MISU)</dc:publisher>
34
			<dc:publisher>Institute of Cybernetics, Tallinn University of Technology</dc:publisher>
35
			<dc:relation>Preventive Methods for Coastal Protection : Towards the Use of Ocean Dynamics for Pollution Control, p. 251-281</dc:relation>
36
			<dc:relation>info:eu-repo/grantAgreement/EC/FP7/217246</dc:relation>
37
			<dc:subject>surface drifters; rco; ocean model; trajectory;</dc:subject>
38
			<dc:type>Chapter in book</dc:type>
39
			<dc:type>info:eu-repo/semantics/bookPart</dc:type>
40
			<dc:type>text</dc:type>
41
			<dr:CobjCategory>0013</dr:CobjCategory>
42
			<dr:CobjIdentifier>urn:isbn:978-3-319-00440-2</dr:CobjIdentifier>
43
			<dr:CobjIdentifier>urn:isbn:978-3-319-00439-6</dr:CobjIdentifier>
44
			<dr:CobjIdentifier>doi:10.1007/978-3-319-00440-2_8</dr:CobjIdentifier>
45
			<oaf:dateAccepted>2013-01-01</oaf:dateAccepted>
46
			<oaf:projectid>corda_______::217246</oaf:projectid>
47
			<oaf:collectedDatasourceid>opendoar____::263</oaf:collectedDatasourceid>
48
			<oaf:accessrights>OPEN</oaf:accessrights>
49
			<oaf:hostedBy id="opendoar____::263" name="Publikationer från Stockholms universitet"/>
50
			<oaf:collectedFrom id="opendoar____::263" name="Publikationer från Stockholms universitet"/>
51
			<oaf:identifier identifierType="doi">10.1007/978-3-319-00440-2_8</oaf:identifier>
52
		</oaf:entity>
53
	</metadata>
54
	<about xmlns="http://namespace.openaire.eu/">
55
		<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
56
			<originDescription altered="true" harvestDate="2016-04-05T06:52:25.98Z">
57
				<baseURL>http://su.diva-portal.org/dice/oai</baseURL>
58
				<identifier>oai:DiVA.org:su-91808</identifier>
59
				<datestamp>2013-07-05T10:47:00Z</datestamp>
60
				<metadataNamespace>http://www.openarchives.org/OAI/2.0/oai_dc/</metadataNamespace>
61
			</originDescription>
62
		</provenance>
63
		<oaf:datainfo>
64
			<oaf:inferred>false</oaf:inferred>
65
			<oaf:deletedbyinference>false</oaf:deletedbyinference>
66
			<oaf:trust>0.9</oaf:trust>
67
			<oaf:inferenceprovenance/>
68
			<oaf:provenanceaction classid="sysimport:crosswalk:repository"
69
			                      classname="sysimport:crosswalk:repository"
70
			                      schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
71
		</oaf:datainfo>
72
	</about>
73
</record>
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/test/resources/log4j.properties
1
org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger
2

  
3
log4j.rootLogger=WARN, CONSOLE
4
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
5
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
6
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
7

  
8
log4j.logger.eu.dnetlib=INFO
9
log4j.logger.eu.dnetlib.data=DEBUG
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/main/java/eu/dnetlib/data/oai/store/mongo/DNetOAIMongoCursor.java
1
package eu.dnetlib.data.oai.store.mongo;
2

  
3
import java.util.Iterator;
4

  
5
import com.google.common.collect.Lists;
6
import com.mongodb.DBObject;
7
import com.mongodb.client.MongoCursor;
8
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo;
9
import eu.dnetlib.data.oai.store.Cursor;
10
import eu.dnetlib.miscutils.functional.UnaryFunction;
11

  
12
public class DNetOAIMongoCursor implements Cursor {
13

  
14
	/**
15
	 * Underlying mongo cursor.
16
	 */
17
	private MongoCursor<DBObject> dbCursor;
18
	private int size = 0;
19
	/**
20
	 * Function to apply to records before delivering.
21
	 */
22
	private UnaryFunction<String, String> function;
23

  
24
	/**
25
	 * true if the RecordInfo returned by this Cursor must include the record body, false otherwise.
26
	 */
27
	private boolean bodyIncluded;
28

  
29
	private RecordInfoGenerator recordInfoGenerator;
30
	private MetadataExtractor metadataExtractor;
31
	private ProvenanceExtractor provenanceExtractor;
32

  
33
	public DNetOAIMongoCursor() {
34
		super();
35
	}
36

  
37
	public DNetOAIMongoCursor(final MongoCursor<DBObject> dbCursor, final boolean bodyIncluded, final RecordInfoGenerator recordInfoGenerator,
38
			final MetadataExtractor metadataExtractor) {
39
		this(dbCursor, null, bodyIncluded, recordInfoGenerator, metadataExtractor);
40
	}
41

  
42
	public DNetOAIMongoCursor(final MongoCursor<DBObject> dbCursor, final UnaryFunction<String, String> function, final boolean bodyIncluded,
43
			final RecordInfoGenerator recordInfoGenerator, final MetadataExtractor metadataExtractor) {
44
		super();
45
		this.dbCursor = dbCursor;
46
		this.size = 0;
47
		this.function = function;
48
		this.bodyIncluded = bodyIncluded;
49
		this.recordInfoGenerator = recordInfoGenerator;
50
		this.metadataExtractor = metadataExtractor;
51
	}
52

  
53
	/**
54
	 *
55
	 * {@inheritDoc}
56
	 */
57
	@Override
58
	public int count() {
59
		//I can do it because MongoCursor are always created from queries with "limit", so I do not expect the creation of the list to explode
60
		//to not exagerate, I'll get the size only if the current size is 0
61
		if (size == 0)
62
			size = Lists.newArrayList(dbCursor).size();
63
		return size;
64
	}
65

  
66
	/**
67
	 *
68
	 * {@inheritDoc}
69
	 *
70
	 * @see java.lang.Iterable#iterator()
71
	 */
72
	@Override
73
	public Iterator<RecordInfo> iterator() {
74

  
75
		return new Iterator<RecordInfo>() {
76

  
77
			@Override
78
			public boolean hasNext() {
79
				return dbCursor.hasNext();
80
			}
81

  
82
			@Override
83
			public RecordInfo next() {
84
				DBObject res = dbCursor.next();
85
				RecordInfo info = recordInfoGenerator.transformDBObject(res, bodyIncluded);
86
				if ((function != null) && bodyIncluded && (info != null)) {
87
					info.setMetadata(function.evaluate(info.getMetadata()));
88
				}
89
				return info;
90
			}
91

  
92
			@Override
93
			public void remove() {
94
				throw new UnsupportedOperationException();
95
			}
96

  
97
		};
98
	}
99

  
100
	public UnaryFunction<String, String> getFunction() {
101
		return function;
102
	}
103

  
104
	public void setFunction(final UnaryFunction<String, String> function) {
105
		this.function = function;
106
	}
107

  
108
	public MongoCursor<DBObject> getDbCursor() {
109
		return dbCursor;
110
	}
111

  
112
	public void setDbCursor(final MongoCursor<DBObject> dbCursor) {
113
		this.dbCursor = dbCursor;
114
	}
115

  
116
	@Override
117
	public boolean isBodyIncluded() {
118
		return this.bodyIncluded;
119
	}
120

  
121
	@Override
122
	public void setBodyIncluded(final boolean bodyIncluded) {
123
		this.bodyIncluded = bodyIncluded;
124
	}
125

  
126
	public RecordInfoGenerator getRecordInfoGenerator() {
127
		return recordInfoGenerator;
128
	}
129

  
130
	public void setRecordInfoGenerator(final RecordInfoGenerator recordInfoGenerator) {
131
		this.recordInfoGenerator = recordInfoGenerator;
132
	}
133

  
134
	public MetadataExtractor getMetadataExtractor() {
135
		return metadataExtractor;
136
	}
137

  
138
	public void setMetadataExtractor(final MetadataExtractor metadataExtractor) {
139
		this.metadataExtractor = metadataExtractor;
140
	}
141

  
142
	public ProvenanceExtractor getProvenanceExtractor() {
143
		return provenanceExtractor;
144
	}
145

  
146
	public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) {
147
		this.provenanceExtractor = provenanceExtractor;
148
	}
149

  
150
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/main/java/eu/dnetlib/data/oai/store/mongo/MongoPublisherCacheHelper.java
1
package eu.dnetlib.data.oai.store.mongo;
2

  
3
import java.util.List;
4
import javax.annotation.Resource;
5

  
6
import com.google.common.base.Function;
7
import com.google.common.collect.Iterables;
8
import com.google.common.collect.Lists;
9
import com.mongodb.DBObject;
10
import com.mongodb.MongoClient;
11
import com.mongodb.WriteConcern;
12
import com.mongodb.client.FindIterable;
13
import com.mongodb.client.MongoDatabase;
14
import com.mongodb.client.model.Filters;
15
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader;
16
import eu.dnetlib.data.oai.store.RecordChangeDetector;
17
import eu.dnetlib.data.oai.store.sets.MongoSetCollection;
18
import eu.dnetlib.functionality.cql.mongo.MongoCqlTranslator;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21
import org.springframework.beans.factory.annotation.Autowired;
22
import org.springframework.cache.annotation.CacheEvict;
23
import org.springframework.cache.annotation.Cacheable;
24

  
25
/**
26
 * Created by alessia on 19/07/16.
27
 */
28
public class MongoPublisherCacheHelper {
29

  
30
	private static final Log log = LogFactory.getLog(MongoPublisherCacheHelper.class);
31

  
32
	@Autowired
33
	private MongoClient publisherMongoClient;
34
	@Resource(name = "oaiConfigurationExistReader")
35
	private OAIConfigurationReader configuration;
36
	@Resource
37
	private MetadataExtractor metadataExtractor;
38
	@Resource
39
	private RecordInfoGenerator recordInfoGenerator;
40
	@Resource
41
	private RecordChangeDetector recordChangeDetector;
42

  
43

  
44
	public  MongoDatabase getDB(final String dbName) {
45
		return this.publisherMongoClient.getDatabase(dbName).withWriteConcern(WriteConcern.JOURNALED);
46
	}
47

  
48
	@Cacheable(value="oaistores", key="#dbname")
49
	public List<MongoPublisherStore> listPublisherStores(final String dbName, final String metadataCollectionName, final boolean alwaysNewRecord, final MongoSetCollection mongoSetCollection) {
50
		log.info("Not using cache for listPublisherStores on "+dbName);
51
		final MongoDatabase db = getDB(dbName);
52
		final FindIterable<DBObject> stores = db.getCollection(metadataCollectionName, DBObject.class).find();
53
		return Lists.newArrayList(
54
				Iterables.transform(stores, new Function<DBObject, MongoPublisherStore>() {
55
					@Override
56
					public MongoPublisherStore apply(final DBObject storeInfo) {
57
						return createFromDBObject(storeInfo, db, alwaysNewRecord, mongoSetCollection);
58
					}
59
				})
60
		);
61
	}
62

  
63
	@Cacheable(value="oaistoresById", key="#storeId + #dbName", unless="#result == null")
64
	public MongoPublisherStore getStoreById(final String storeId, final String dbName, final String metadataCollectionName, final boolean alwaysNewRecord, final MongoSetCollection mongoSetCollection) {
65
		log.info(String.format("Not using cache for getStoreById: %s", storeId));
66
		DBObject storeInfo = getDB(dbName).getCollection(metadataCollectionName, DBObject.class).find(Filters.eq("id", storeId)).first();
67
		log.info("Got DBObject from mongo "+dbName+"."+metadataCollectionName+", id "+storeId+" is : "+storeInfo );
68
		return this.createFromDBObject(storeInfo, getDB(dbName), alwaysNewRecord, mongoSetCollection);
69
	}
70

  
71
	@CacheEvict(value="oaistoresById", key = "#storeId + #dbName")
72
	public void deleteFromCache(String storeId, String dbName){
73
		log.info("Evicting "+storeId+" for db "+dbName+ " from the cache");
74
	}
75

  
76

  
77
	private MongoPublisherStore createFromDBObject(final DBObject storeInfo, final MongoDatabase db, final boolean alwaysNewRecord, final MongoSetCollection mongoSetCollection) {
78
		if (storeInfo == null){
79
			log.error("cannot create MongoPublisherStore from null DBObject");
80
			return null;
81
		}
82
		log.debug("Creating MongoPublisherStore from DBObject "+storeInfo.toString());
83
		String storeId = (String) storeInfo.get("id");
84
		String mdFormat = (String) storeInfo.get("metadataFormat");
85
		String mdInterpretation = (String) storeInfo.get("interpretation");
86
		String mdLayout = (String) storeInfo.get("layout");
87
		MongoPublisherStore store = new MongoPublisherStore(storeId, mdFormat, mdInterpretation, mdLayout, db.getCollection(storeId, DBObject.class),
88
				this.configuration.getFields(mdFormat, mdInterpretation, mdLayout), recordInfoGenerator, this.configuration.getIdScheme(),
89
				this.configuration.getIdNamespace(), this.metadataExtractor, this.recordChangeDetector, alwaysNewRecord, db);
90
		store.setMongoSetCollection(mongoSetCollection);
91
		return store;
92
	}
93
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/main/java/eu/dnetlib/data/oai/store/mongo/RecordInfoGenerator.java
1
package eu.dnetlib.data.oai.store.mongo;
2

  
3
import java.io.ByteArrayInputStream;
4
import java.io.IOException;
5
import java.io.StringReader;
6
import java.time.LocalDateTime;
7
import java.time.ZoneId;
8
import java.time.format.DateTimeFormatter;
9
import java.util.Date;
10
import java.util.List;
11
import java.util.zip.ZipEntry;
12
import java.util.zip.ZipInputStream;
13
import javax.annotation.Resource;
14

  
15
import com.google.common.collect.Sets;
16
import com.mongodb.DBObject;
17
import eu.dnetlib.data.information.oai.publisher.OaiPublisherRuntimeException;
18
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader;
19
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo;
20
import org.apache.commons.io.IOUtils;
21
import org.apache.commons.lang3.StringEscapeUtils;
22
import org.apache.commons.logging.Log;
23
import org.apache.commons.logging.LogFactory;
24
import org.dom4j.Document;
25
import org.dom4j.DocumentException;
26
import org.dom4j.io.SAXReader;
27

  
28
/**
29
 * Helper class to generate a RecordInfo object from a Mongo DBObject.
30
 *
31
 * @author alessia
32
 *
33
 */
34
public class RecordInfoGenerator {
35

  
36
	private static final Log log = LogFactory.getLog(RecordInfoGenerator.class);
37
	@Resource
38
	private MetadataExtractor metadataExtractor;
39
	@Resource
40
	private ProvenanceExtractor provenanceExtractor;
41

  
42
	@SuppressWarnings("unchecked")
43
	public RecordInfo transformDBObject(final DBObject object, final boolean includeBody) {
44
		if ((object == null) || object.keySet().isEmpty()) return null;
45
		String id = (String) object.get(OAIConfigurationReader.ID_FIELD);
46
		// need to escape the identifier, otherwise the XML breaks
47
		id = StringEscapeUtils.escapeXml11(id);
48
		boolean deleted = (Boolean) object.get("deleted");
49
		RecordInfo record = new RecordInfo();
50
		record.setIdentifier(id);
51
		record.setInternalId(object.get("_id").toString());
52
		Object datestampObject = object.get(OAIConfigurationReader.DATESTAMP_FIELD);
53
		if(datestampObject instanceof Date){
54
			record.setDatestamp((Date) datestampObject);
55
		}
56
		else {
57
			//assuming we have a string in the form 2017-12-05T14:24:48.61Z to convert to Date
58
			log.debug("Why the hell there are string instead of Dates in datestamp???");
59
			try {
60
				LocalDateTime d = LocalDateTime.parse(datestampObject.toString(), DateTimeFormatter.ISO_INSTANT);
61
				Date utilDate = Date.from(d.atZone(ZoneId.systemDefault()).toInstant());
62
				record.setDatestamp(utilDate);
63
			}catch(Exception dateException){
64
				log.warn("Error setting date from a string datestamp, using current date instead: "+dateException.getMessage());
65
				record.setDatestamp(new Date());
66
			}
67
		}
68
		record.setDeleted(deleted);
69
		List<String> sets = (List<String>) object.get(OAIConfigurationReader.SET_FIELD);
70
		if (sets != null) {
71
			record.setSetspecs(Sets.newHashSet(sets));
72
		}
73
		if (includeBody && !deleted) {
74
			String body = decompressRecord((byte[]) object.get(OAIConfigurationReader.BODY_FIELD));
75
			final SAXReader reader = new SAXReader();
76
			Document doc;
77
			try {
78
				doc = reader.read(new StringReader(body));
79
				record.setMetadata(this.metadataExtractor.evaluate(doc));
80
				record.setProvenance(this.provenanceExtractor.evaluate(doc));
81
			} catch (DocumentException e) {
82
				throw new OaiPublisherRuntimeException(e);
83
			}
84
		}
85
		return record;
86

  
87
	}
88

  
89
	public  String decompressRecord(final byte[] input)  {
90

  
91
		try {
92
			ByteArrayInputStream bis = new ByteArrayInputStream(input);
93
			ZipInputStream zis = new ZipInputStream(bis);
94
			ZipEntry ze;
95
			ze = zis.getNextEntry();
96
			if (ze== null)
97
				throw new OaiPublisherRuntimeException("cannot decompress null zip entry ");
98
			if (!ze.getName().equals(OAIConfigurationReader.BODY_FIELD))
99
				throw new OaiPublisherRuntimeException("cannot decompress zip entry name :"+ze.getName());
100
			return IOUtils.toString(zis);
101
		} catch (IOException e) {
102
			throw new OaiPublisherRuntimeException(e);
103
		}
104

  
105

  
106
	}
107

  
108
	public MetadataExtractor getMetadataExtractor() {
109
		return metadataExtractor;
110
	}
111

  
112
	public void setMetadataExtractor(final MetadataExtractor metadataExtractor) {
113
		this.metadataExtractor = metadataExtractor;
114
	}
115

  
116
	public ProvenanceExtractor getProvenanceExtractor() {
117
		return provenanceExtractor;
118
	}
119

  
120
	public void setProvenanceExtractor(final ProvenanceExtractor provenanceExtractor) {
121
		this.provenanceExtractor = provenanceExtractor;
122
	}
123
}
modules/dnet-oai-store-service/tags/dnet-oai-store-service-8.0.2/src/main/java/eu/dnetlib/data/oai/store/mongo/MongoPublisherStore.java
1
package eu.dnetlib.data.oai.store.mongo;
2

  
3
import java.io.IOException;
4
import java.util.Collection;
5
import java.util.Date;
6
import java.util.List;
7
import java.util.concurrent.ArrayBlockingQueue;
8
import java.util.concurrent.BlockingQueue;
9
import java.util.concurrent.TimeUnit;
10
import java.util.zip.ZipEntry;
11
import java.util.zip.ZipOutputStream;
12

  
13
import com.google.common.base.Function;
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Stopwatch;
16
import com.google.common.collect.Iterables;
17
import com.google.common.collect.Lists;
18
import com.google.common.collect.Multimap;
19
import com.mongodb.BasicDBObject;
20
import com.mongodb.BasicDBObjectBuilder;
21
import com.mongodb.DBObject;
22
import com.mongodb.WriteConcern;
23
import com.mongodb.client.FindIterable;
24
import com.mongodb.client.ListIndexesIterable;
25
import com.mongodb.client.MongoCollection;
26
import com.mongodb.client.MongoDatabase;
27
import com.mongodb.client.model.Filters;
28
import com.mongodb.client.model.IndexOptions;
29
import com.mongodb.client.model.Sorts;
30
import com.mongodb.client.model.UpdateOptions;
31
import com.mongodb.client.result.DeleteResult;
32
import com.mongodb.client.result.UpdateResult;
33
import eu.dnetlib.data.information.oai.publisher.OaiPublisherRuntimeException;
34
import eu.dnetlib.data.information.oai.publisher.PublisherField;
35
import eu.dnetlib.data.information.oai.publisher.conf.OAIConfigurationReader;
36
import eu.dnetlib.data.information.oai.publisher.info.RecordInfo;
37
import eu.dnetlib.data.information.oai.publisher.info.SetInfo;
38
import eu.dnetlib.data.oai.store.PublisherStore;
39
import eu.dnetlib.data.oai.store.RecordChangeDetector;
40
import eu.dnetlib.data.oai.store.parser.PublisherRecordParser;
41
import eu.dnetlib.data.oai.store.sets.MongoSetCollection;
42
import eu.dnetlib.functionality.cql.mongo.MongoCqlTranslator;
43
import eu.dnetlib.miscutils.functional.UnaryFunction;
44
import org.apache.commons.io.output.ByteArrayOutputStream;
45
import org.apache.commons.lang3.StringUtils;
46
import org.apache.commons.logging.Log;
47
import org.apache.commons.logging.LogFactory;
48
import org.bson.conversions.Bson;
49
import org.bson.types.Binary;
50

  
51
public class MongoPublisherStore implements PublisherStore<DNetOAIMongoCursor> {
52

  
53
	private static final Log log = LogFactory.getLog(MongoPublisherStore.class); // NOPMD by marko on 11/24/08 5:02 PM
54

  
55
	private String id, metadataFormat, interpretation, layout;
56
	/** Keeps information about the fields to be created in mongo. **/
57
	private List<PublisherField> mongoFields;
58

  
59
	private MongoCollection<DBObject> collection;
60
	private MongoCollection<DBObject> discardedCollection;
61

  
62
	private RecordInfoGenerator recordInfoGenerator;
63
	private MetadataExtractor metadataExtractor;
64

  
65
	private RecordChangeDetector recordChangeDetector;
66

  
67
	private MongoSetCollection mongoSetCollection;
68

  
69
	/**
70
	 * Used to generate the OAI identifiers compliant to the protocol. See
71
	 * http://www.openarchives.org/OAI/openarchivesprotocol.html#UniqueIdentifier.
72
	 */
73
	private String idScheme;
74
	/**
75
	 * Used to generate the OAI identifiers compliant to the protocol. See
76
	 * http://www.openarchives.org/OAI/openarchivesprotocol.html#UniqueIdentifier.
77
	 */
78
	private String idNamespace;
79

  
80
	private boolean alwaysNewRecord;
81

  
82
	public MongoPublisherStore() {
83
		super();
84
	}
85

  
86
	public MongoPublisherStore(final String id,
87
			final String metadataFormat,
88
			final String interpretation,
89
			final String layout,
90
			final MongoCollection<DBObject> collection,
91
			final List<PublisherField> mongoFields,
92
			final RecordInfoGenerator recordInfoGenerator,
93
			final String idScheme,
94
			final String idNamespace,
95
			final MetadataExtractor metadataExtractor,
96
			final RecordChangeDetector recordChangeDetector,
97
			final boolean alwaysNewRecord,
98
			final MongoDatabase mongodb) {
99
		super();
100
		this.id = id;
101
		this.metadataFormat = metadataFormat;
102
		this.interpretation = interpretation;
103
		this.layout = layout;
104
		this.collection = collection;
105
		this.discardedCollection = mongodb.getCollection("discarded-" + collection.getNamespace().getCollectionName(), DBObject.class);
106
		this.mongoFields = mongoFields;
107
		this.recordInfoGenerator = recordInfoGenerator;
108
		this.idScheme = idScheme;
109
		this.idNamespace = idNamespace;
110
		this.recordChangeDetector = recordChangeDetector;
111
		this.alwaysNewRecord = alwaysNewRecord;
112
	}
113

  
114
	@Override
115
	public RecordInfo getRecord(final String recordId) {
116
		Bson query = Filters.eq(OAIConfigurationReader.ID_FIELD, recordId);
117
		DBObject result = this.collection.find(query).first();
118
		log.debug(result);
119
		return this.recordInfoGenerator.transformDBObject(result, true);
120
	}
121

  
122
	@Override
123
	public RecordInfo getRecord(final String recordId, final UnaryFunction<String, String> unaryFunction) {
124
		RecordInfo result = this.getRecord(recordId);
125
		if (result != null) {
126
			String transformedBody = unaryFunction.evaluate(result.getMetadata());
127
			result.setMetadata(transformedBody);
128
		}
129
		return result;
130
	}
131

  
132
	@Override
133
	public DNetOAIMongoCursor getRecords(final String queryString, final boolean bodyIncluded, final int limit) {
134
		FindIterable<DBObject> iter = loggedFindByQuery(queryString, limit);
135
		return new DNetOAIMongoCursor(iter.iterator(), bodyIncluded, this.recordInfoGenerator, this.metadataExtractor);
136
	}
137

  
138
	@Override
139
	public DNetOAIMongoCursor getRecords(final String queryString,
140
			final UnaryFunction<String, String> unaryFunction,
141
			final boolean bodyIncluded,
142
			final int limit) {
143
		FindIterable<DBObject> iter = loggedFindByQuery(queryString, limit);
144
		return new DNetOAIMongoCursor(iter.iterator(), unaryFunction, bodyIncluded, this.recordInfoGenerator, this.metadataExtractor);
145
	}
146

  
147
	private FindIterable<DBObject> loggedFindByQuery(final String queryString, final int limit) {
148
		final Bson query = parseQuery(queryString);
149
		long start = System.currentTimeMillis();
150
		Bson sortByIdAsc = Sorts.orderBy(Sorts.ascending("_id"));
151
		FindIterable<DBObject> iter = this.collection.find(query).sort(sortByIdAsc).limit(limit);
152
		long end = System.currentTimeMillis();
153
		log.debug("Query:" + query + "\ntime to get mongo iterable (ms): " + (end - start));
154
		return iter;
155
	}
156

  
157
	private Bson parseQuery(final String query) {
158
		try {
159
			return MongoCqlTranslator.toMongo(query);
160
		} catch(Exception e ) {
161
			throw new OaiPublisherRuntimeException(e);
162
		}
163
	}
164

  
165
	@Override
166
	public List<PublisherField> getIndices() {
167
		return this.mongoFields;
168
	}
169

  
170
	/**
171
	 * <p>
172
	 * Ensure indices on the configuration-defined fields and on the system fields DATESTAMP_FIELD and LAST_COLLECTION_DATE_FIELD.
173
	 * <p>
174
	 * <p>
175
	 * Note that by default ID_FIELD, SET_FIELD, DELETED_FIELD, BODY_FIELD, UPDATED_FIELD are not indexed. If you want an index on those,
176
	 * then you have to specify it in the configuration file of the OAI Publisher: <br>
177
	 * <INDEX name="deleted">
178
	 * </p>
179
	 *
180
	 * {@inheritDoc}
181
	 *
182
	 */
183
	@Override
184
	public void ensureIndices() {
185
		final ListIndexesIterable<BasicDBObject> indexesIterable = this.collection.listIndexes(BasicDBObject.class);
186
		final IndexOptions indexOptions = new IndexOptions().background(true);
187
		Stopwatch sw = Stopwatch.createUnstarted();
188
		sw.start();
189
		// I want to keep the composite indexes that might have been defined manually
190
		// I DO NOT NEED TO DO THIS. If the indexes are there, they are there.
191
		/*log.debug("Ensuring currently defined composite indexes on store "+id+": ");
192
		for (BasicDBObject o : indexesIterable) {
193
			BasicDBObject fieldIndexed = (BasicDBObject) o.get("key");
194
			if (fieldIndexed.keySet().size() > 1) {
195
				log.debug(o);
196
				this.collection.createIndex(fieldIndexed, indexOptions);
197
			}
198
		}*/
199
		// Indexes on single fields.
200
		for (PublisherField field : this.mongoFields) {
201
			BasicDBObject mongoIdx = new BasicDBObject(field.getFieldName(), 1);
202
			log.debug("Creating index on store "+id+" : " + mongoIdx);
203
			this.collection.createIndex(mongoIdx, indexOptions);
204
		}
205
		log.debug("Creating index over : " + OAIConfigurationReader.DATESTAMP_FIELD);
206
		this.collection.createIndex(new BasicDBObject(OAIConfigurationReader.DATESTAMP_FIELD, 1), indexOptions);
207
		log.debug("Creating index over : " + OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD);
208
		this.collection.createIndex(new BasicDBObject(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, 1), indexOptions);
209
		sw.stop();
210
		log.info("All indexes have been updated in " + sw.elapsed(TimeUnit.MILLISECONDS) + " milliseconds");
211
	}
212

  
213
	/**
214
	 * Creates a compound index over the specified fields on the given store.
215
	 * <p>
216
	 * The creation is performed on the background
217
	 * </p>
218
	 *
219
	 * @param fieldNames
220
	 *            List of fields to be included in the compound index
221
	 * @theStore MongoPublisherStore where to create the index
222
	 */
223
	public void createCompoundIndex(final List<String> fieldNames) {
224
		if ((fieldNames == null) || fieldNames.isEmpty()) {
225
			log.fatal("No fields specified for the creation of the compound index");
226
		}
227
		BasicDBObjectBuilder theIndexBuilder = BasicDBObjectBuilder.start();
228
		for (String f : fieldNames) {
229
			theIndexBuilder.add(f, 1);
230
		}
231
		BasicDBObject theIndex = (BasicDBObject) theIndexBuilder.get();
232
		log.info("Creating index " + theIndex + " on " + this.getId());
233
		this.getCollection().createIndex(theIndex, new IndexOptions().background(true));
234
	}
235

  
236
	private void dropDiscarded(final String source) {
237
		if (StringUtils.isBlank(source)) {
238
			log.debug("Dropping discarded records from publisherStore " + id);
239
			discardedCollection.drop();
240
		} else {
241
			log.debug("Dropping discarded records for source " + source + " from publisherStore " + id);
242
			discardedCollection.deleteMany(Filters.eq(OAIConfigurationReader.SET_FIELD, source));
243
		}
244
	}
245

  
246
	@Override
247
	public int feed(final Iterable<String> records, final String source) {
248
		final BlockingQueue<Object> queue = new ArrayBlockingQueue<Object>(80);
249
		final Object sentinel = new Object();
250
		this.dropDiscarded(source);
251
		final Date feedDate = new Date();
252
		Thread background = new Thread(new Runnable() {
253

  
254
			@Override
255
			public void run() {
256
				//For fast feeding we want to use a collection with unack write concern
257
				final MongoCollection<DBObject> unackCollection = collection.withWriteConcern(WriteConcern.UNACKNOWLEDGED);
258
				while (true) {
259
					try {
260
						Object record = queue.take();
261
						if (record == sentinel) {
262
							break;
263
						}
264
						safeFeedRecord((String) record, source, feedDate, unackCollection);
265
					} catch (InterruptedException e) {
266
						log.fatal("got exception in background thread", e);
267
						throw new IllegalStateException(e);
268
					}
269
				}
270
			}
271
		});
272
		background.start();
273
		long startFeed = feedDate.getTime();
274
		try {
275
			log.info("feeding publisherStore " + id);
276
			for (final String record : records) {
277
				queue.put(record);
278
			}
279
			queue.put(sentinel);
280
			log.info("finished feeding publisherStore " + id);
281

  
282
			background.join();
283
		} catch (InterruptedException e) {
284
			throw new IllegalStateException(e);
285
		}
286
		long endFeed = System.currentTimeMillis();
287
		log.fatal("OAI STORE " + id + " FEEDING COMPLETED IN " + (endFeed - startFeed) + "ms");
288
		this.setDeletedFlags(feedDate, source);
289
		return this.count();
290
	}
291

  
292
	/**
293
	 * Launches the thread that flags the records to be considered as 'deleted'.
294
	 * <p>
295
	 * The datestamp of the deleted records must be updated as well, according to the OAI specs available at
296
	 * http://www.openarchives.org/OAI/openarchivesprotocol.html#DeletedRecords: if a repository does keep track of deletions then the
297
	 * datestamp of the deleted record must be the date and time that it was deleted.
298
	 * </p>
299
	 *
300
	 * @param feedDate
301
	 * @param source
302
	 */
303
	private void setDeletedFlags(final Date feedDate, final String source) {
304
		//get the collection with ACKNOWLEDGE Write concern
305
		final MongoCollection<DBObject> ackCollection = collection.withWriteConcern(WriteConcern.ACKNOWLEDGED);
306
		Thread deletedSetter = new Thread(new Runnable() {
307

  
308
			@Override
309
			public void run() {
310
				Bson filter = Filters.and(Filters.eq(OAIConfigurationReader.DELETED_FIELD, false),
311
						Filters.lt(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, feedDate));
312
				if (!StringUtils.isBlank(source)) {
313
					filter = Filters.and(filter, Filters.eq(OAIConfigurationReader.SET_FIELD, source));
314
				}
315
				log.debug("Delete flag query: " + filter);
316
				BasicDBObject update = new BasicDBObject("$set",
317
						BasicDBObjectBuilder.start(OAIConfigurationReader.DELETED_FIELD, true).append(OAIConfigurationReader.DATESTAMP_FIELD, feedDate)
318
								.append(OAIConfigurationReader.UPDATED_FIELD, true).get());
319
				log.debug("Updating as: " + update.toString());
320
				final UpdateResult updateResult = ackCollection.updateMany(filter, update, new UpdateOptions().upsert(false));
321
				log.debug("Deleted flags set for source: " + source + " #records = " + updateResult.getModifiedCount());
322
			}
323
		});
324

  
325
		deletedSetter.start();
326
		try {
327
			deletedSetter.join();
328
		} catch (InterruptedException e) {
329
			throw new IllegalStateException(e);
330
		}
331
	}
332

  
333
	@Override
334
	public void drop() {
335
		this.collection.drop();
336
	}
337

  
338
	@Override
339
	public void drop(final String queryString) {
340
		Bson query = parseQuery(queryString);
341
		final DeleteResult deleteResult = this.collection.deleteMany(query);
342
		log.debug("Deleted by query: " + queryString + " #deleted: " + deleteResult.getDeletedCount());
343

  
344
	}
345

  
346
	@Override
347
	public int count() {
348
		return (int) this.collection.count();
349
	}
350

  
351
	@Override
352
	public int count(final String queryString) {
353
		if (StringUtils.isBlank(queryString)) return (int) this.collection.count();
354
		Bson query = parseQuery(queryString);
355
		return (int) this.collection.count(query);
356
	}
357

  
358
	public List<String> getDistinctSetNamesFromRecords() {
359
		log.info("Going to ask for all distinct sets in the oaistore " + id + ": this may take a long time...");
360
		return Lists.newArrayList(this.collection.distinct(OAIConfigurationReader.SET_FIELD, String.class));
361
	}
362

  
363
	// ***********************************************************************************************//
364
	// Feed utilities
365
	// ***********************************************************************************************//
366
	private boolean safeFeedRecord(final String record, final String source, final Date feedDate, final MongoCollection<DBObject> unackCollection) {
367
		try {
368
			if (!record.isEmpty()) return feedRecord(record, source, feedDate, unackCollection);
369
		} catch (final Throwable e) {
370
			log.error("Got unhandled exception while parsing record", e);
371
			discardedCollection.insertOne(new BasicDBObject(OAIConfigurationReader.SET_FIELD, source).append(OAIConfigurationReader.BODY_FIELD, record));
372
		}
373
		return false;
374
	}
375

  
376
	/**
377
	 * Feed the record to the store.
378
	 *
379
	 * @return true if the record is new, false otherwise
380
	 */
381
	private boolean feedRecord(final String record, final String source, final Date feedDate, final MongoCollection<DBObject> unackCollection) {
382
		PublisherRecordParser parser = new PublisherRecordParser(this.mongoFields);
383
		final Multimap<String, String> recordProperties = parser.parseRecord(record, source);
384
		String id = "";
385
		String oaiID = "";
386
		if (recordProperties.containsKey(OAIConfigurationReader.ID_FIELD)) {
387
			id = recordProperties.get(OAIConfigurationReader.ID_FIELD).iterator().next();
388
			oaiID = getOAIIdentifier(id);
389
			if (isNewRecord(oaiID)) {
390
				feedNew(oaiID, record, recordProperties, feedDate, unackCollection);
391
				return true;
392
			} else {
393
				if (isChanged(oaiID, record)) {
394
					updateRecord(oaiID, record, recordProperties, feedDate, unackCollection);
395
				} else {
396
					// it is not changed, I only have to update the last collection date
397
					handleRecord(oaiID, feedDate, unackCollection);
398
				}
399
			}
400
		} else {
401
			log.error("parsed record seems invalid -- no identifier property with name: " + OAIConfigurationReader.ID_FIELD);
402
			discardedCollection.insertOne(new BasicDBObject(OAIConfigurationReader.SET_FIELD, source).append(OAIConfigurationReader.BODY_FIELD, record).append(
403
					OAIConfigurationReader.DATESTAMP_FIELD, feedDate));
404
		}
405
		return false;
406
	}
407

  
408
	private BasicDBObject createBasicObject(final String oaiID, final String record, final Multimap<String, String> recordProperties) {
409
		BasicDBObject obj = new BasicDBObject();
410
		for (final String key : recordProperties.keySet()) {
411
			if (key.equals(OAIConfigurationReader.ID_FIELD)) {
412
				obj.put(key, oaiID);
413
			} else {
414
				Collection<String> values = recordProperties.get(key);
415
				if (key.equals(OAIConfigurationReader.SET_FIELD)) {
416

  
417
					Iterable<String> setSpecs = Iterables.transform(values, new Function<String, String>() {
418

  
419
						@Override
420
						public String apply(final String s) {
421
							return mongoSetCollection.normalizeSetSpec(s);
422
						}
423

  
424
					});
425
					obj.put(key, setSpecs);
426
				} else {
427
					// let's check if the key is the name of a repeatable field or not
428
					PublisherField keyField = Iterables.find(this.mongoFields, new Predicate<PublisherField>() {
429

  
430
						@Override
431
						public boolean apply(final PublisherField field) {
432
							return field.getFieldName().equals(key);
433
						}
434
					}, null);
435
					if (keyField == null) {
436
						log.warn("Expected field to index: " + key + " could not be found, but we keep going...");
437
					}
438
					if ((keyField != null) && !keyField.isRepeatable()) {
439
						if ((values != null) && !values.isEmpty()) {
440
							obj.put(key, values.iterator().next());
441
						}
442
					} else {
443
						obj.put(key, values);
444
					}
445
				}
446
			}
447
		}
448
		try {
449
			obj.put(OAIConfigurationReader.BODY_FIELD, createCompressRecord(record));
450
			obj.put(OAIConfigurationReader.DELETED_FIELD, false);
451
			return obj;
452
		} catch (IOException e) {
453
			throw new OaiPublisherRuntimeException(e);
454
		}
455
	}
456

  
457
	/**
458
	 * @param record
459
	 * @throws IOException
460
	 */
461
	public Binary createCompressRecord(final String record) throws IOException {
462
		ByteArrayOutputStream os = new ByteArrayOutputStream();
463
		ZipOutputStream zos = new ZipOutputStream(os);
464
		ZipEntry entry = new ZipEntry(OAIConfigurationReader.BODY_FIELD);
465
		zos.putNextEntry(entry);
466
		zos.write(record.getBytes());
467
		zos.closeEntry();
468
		zos.flush();
469
		zos.close();
470
		return new Binary(os.toByteArray());
471
	}
472

  
473
	private void feedNew(final String oaiID,
474
			final String record,
475
			final Multimap<String, String> recordProperties,
476
			final Date feedDate,
477
			final MongoCollection<DBObject> unackCollection) {
478
		log.debug("New record received. Assigned oai id: " + oaiID);
479
		DBObject obj = this.createBasicObject(oaiID, record, recordProperties);
480
		obj.put(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, feedDate);
481
		obj.put(OAIConfigurationReader.DATESTAMP_FIELD, feedDate);
482
		obj.put(OAIConfigurationReader.UPDATED_FIELD, false);
483
		unackCollection.insertOne(obj);
484
		this.upsertSets(recordProperties.get(OAIConfigurationReader.SET_FIELD));
485
	}
486

  
487
	private void updateRecord(final String oaiID,
488
			final String record,
489
			final Multimap<String, String> recordProperties,
490
			final Date feedDate,
491
			final MongoCollection<DBObject> unackCollection) {
492
		log.debug("updating record " + oaiID);
493
		BasicDBObject obj = this.createBasicObject(oaiID, record, recordProperties);
494
		obj.put(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, feedDate);
495
		obj.put(OAIConfigurationReader.DATESTAMP_FIELD, feedDate);
496
		obj.put(OAIConfigurationReader.UPDATED_FIELD, true);
497
		Bson oldObj = Filters.eq(OAIConfigurationReader.ID_FIELD, oaiID);
498
		unackCollection.replaceOne(oldObj, obj, new UpdateOptions().upsert(true));
499
		this.upsertSets(recordProperties.get(OAIConfigurationReader.SET_FIELD));
500
	}
501

  
502
	public void upsertSets(final Iterable<String> setNames) {
503
		// feed the list of sets, if any
504
		if (setNames != null) {
505
			for (String setName : setNames) {
506
				if (StringUtils.isNotBlank(setName)) {
507
					final SetInfo set = new SetInfo();
508
					String setSpec = this.mongoSetCollection.normalizeSetSpec(setName);
509
					set.setSetSpec(setSpec);
510
					set.setSetName(setName);
511
					set.setSetDescription("This set contains metadata records whose provenance is " + setName);
512
					set.setEnabled(true);
513
					String query = "(" + OAIConfigurationReader.SET_FIELD + " = \"" + setSpec + "\") ";
514
					set.setQuery(query);
515
					this.mongoSetCollection.upsertSet(set, false, getCollection().getNamespace().getDatabaseName());
516
				}
517
			}
518
		}
519
	}
520

  
521
	private void handleRecord(final String oaiID, final Date lastCollectionDate, final MongoCollection<DBObject> unackCollection) {
522
		log.debug("handling unchanged record " + oaiID);
523
		Bson oldObj = Filters.eq(OAIConfigurationReader.ID_FIELD, oaiID);
524
		BasicDBObject update = new BasicDBObject("$set", new BasicDBObject(OAIConfigurationReader.LAST_COLLECTION_DATE_FIELD, lastCollectionDate));
525
		unackCollection.updateOne(oldObj, update, new UpdateOptions().upsert(true));
526
	}
527

  
528
	private boolean isNewRecord(final String oaiIdentifier) {
529
		if (alwaysNewRecord || (collection.count() == 0)) return true;
530
		return this.collection.find(Filters.eq(OAIConfigurationReader.ID_FIELD, oaiIdentifier)).first() == null;
531
	}
532

  
533
	// ***********************************************************************************************//
534
	// Setters / Getters / Basic utilities
535
	// ***********************************************************************************************//
536

  
537
	private boolean isChanged(final String oaiID, final String record) {
538
		RecordInfo oldRecord = getRecord(oaiID);
539
		if (oldRecord == null) return StringUtils.isBlank(record);
540
		return this.recordChangeDetector.differs(oldRecord.getMetadata(), record);
541
	}
542

  
543
	private String getOAIIdentifier(final String id) {
544
		return this.idScheme + ":" + this.idNamespace + ":" + id;
545
	}
546

  
547
	@Override
548
	public String toString() {
549
		return "MongoPublisherStore{" +
550
				"id='" + id + '\'' +
551
				", metadataFormat='" + metadataFormat + '\'' +
552
				", interpretation='" + interpretation + '\'' +
553
				", layout='" + layout + '\'' +
554
				", idScheme='" + idScheme + '\'' +
555
				", idNamespace='" + idNamespace + '\'' +
556
				", alwaysNewRecord=" + alwaysNewRecord +
557
				'}';
558
	}
559

  
560
	@Override
561
	public int hashCode() {
562
		final int prime = 31;
563
		int result = 1;
564
		result = (prime * result) + ((collection == null) ? 0 : collection.hashCode());
565
		result = (prime * result) + ((id == null) ? 0 : id.hashCode());
566
		result = (prime * result) + ((interpretation == null) ? 0 : interpretation.hashCode());
567
		result = (prime * result) + ((layout == null) ? 0 : layout.hashCode());
568
		result = (prime * result) + ((metadataFormat == null) ? 0 : metadataFormat.hashCode());
569
		return result;
570
	}
571

  
572
	@Override
573
	public boolean equals(final Object obj) {
574
		if (this == obj) return true;
575
		if (obj == null) return false;
576
		if (!(obj instanceof MongoPublisherStore)) return false;
577
		MongoPublisherStore other = (MongoPublisherStore) obj;
578
		if (collection == null) {
579
			if (other.collection != null) return false;
580
		} else if (!collection.equals(other.collection)) return false;
581
		if (id == null) {
582
			if (other.id != null) return false;
583
		} else if (!id.equals(other.id)) return false;
584
		if (interpretation == null) {
585
			if (other.interpretation != null) return false;
586
		} else if (!interpretation.equals(other.interpretation)) return false;
587
		if (layout == null) {
588
			if (other.layout != null) return false;
589
		} else if (!layout.equals(other.layout)) return false;
590
		if (metadataFormat == null) {
591
			if (other.metadataFormat != null) return false;
592
		} else if (!metadataFormat.equals(other.metadataFormat)) return false;
593
		return true;
594
	}
595

  
596
	public MongoCollection<DBObject> getCollection() {
597
		return collection;
598
	}
599

  
600
	public void setCollection(final MongoCollection<DBObject> collection) {
601
		this.collection = collection;
602
	}
603

  
604
	public MongoCollection<DBObject> getDiscardedCollection() {
605
		return discardedCollection;
606
	}
607

  
608
	public void setDiscardedCollection(final MongoCollection<DBObject> discardedCollection) {
609
		this.discardedCollection = discardedCollection;
610
	}
611

  
612
	public String getIdScheme() {
613
		return idScheme;
614
	}
615

  
616
	public void setIdScheme(final String idScheme) {
617
		this.idScheme = idScheme;
618
	}
619

  
620
	public String getIdNamespace() {
621
		return idNamespace;
622
	}
623

  
624
	public void setIdNamespace(final String idNamespace) {
625
		this.idNamespace = idNamespace;
626
	}
627

  
628
	public RecordInfoGenerator getRecordInfoGenerator() {
629
		return recordInfoGenerator;
630
	}
631

  
632
	public void setRecordInfoGenerator(final RecordInfoGenerator recordInfoGenerator) {
633
		this.recordInfoGenerator = recordInfoGenerator;
634
	}
635

  
636
	public MetadataExtractor getMetadataExtractor() {
637
		return metadataExtractor;
638
	}
639

  
640
	public void setMetadataExtractor(final MetadataExtractor metadataExtractor) {
641
		this.metadataExtractor = metadataExtractor;
642
	}
643

  
644
	public RecordChangeDetector getRecordChangeDetector() {
645
		return recordChangeDetector;
646
	}
647

  
648
	public void setRecordChangeDetector(final RecordChangeDetector recordChangeDetector) {
649
		this.recordChangeDetector = recordChangeDetector;
650
	}
651

  
652
	@Override
653
	public String getId() {
654
		return this.id;
655
	}
656

  
657
	public void setId(final String id) {
658
		this.id = id;
659
	}
660

  
661
	@Override
662
	public String getMetadataFormat() {
663
		return this.metadataFormat;
664
	}
665

  
666
	public void setMetadataFormat(final String metadataFormat) {
667
		this.metadataFormat = metadataFormat;
668
	}
669

  
670
	@Override
671
	public String getInterpretation() {
672
		return this.interpretation;
673
	}
674

  
675
	public void setInterpretation(final String interpretation) {
676
		this.interpretation = interpretation;
677
	}
678

  
679
	@Override
680
	public String getLayout() {
681
		return this.layout;
682
	}
683

  
684
	public void setLayout(final String layout) {
685
		this.layout = layout;
686
	}
687

  
688
	public MongoSetCollection getMongoSetCollection() {
689
		return mongoSetCollection;
690
	}
691

  
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff