Revision 52200
Added by Claudio Atzori almost 6 years ago
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U source:jar", |
|
2 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-openaireplus-mapping-utils/branches/2.2.x", |
|
3 |
"deploy_repository": "dnet4-snapshots", "version": "4", |
|
4 |
"mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", |
|
5 |
"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", |
|
6 |
"name": "dnet-openaireplus-mapping-utils-BRANCH-2.2.x"} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/pace/result.pace.conf | ||
---|---|---|
1 |
pace.conf { |
|
2 |
strictconditions { |
|
3 |
doiExactMatch { fields = [pid] } |
|
4 |
}, |
|
5 |
conditions { |
|
6 |
yearMatch { fields = [dateofacceptance] }, |
|
7 |
titleVersionMatch { fields = [title] } |
|
8 |
}, |
|
9 |
model { |
|
10 |
pid { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = pid/value }, |
|
11 |
title { algo = JaroWinkler, type = String, weight = 1.0, ignoreMissing = false, path = result/metadata/title/value }, |
|
12 |
dateofacceptance { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = result/metadata/dateofacceptance/value } |
|
13 |
} |
|
14 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/pace/result.full.pace.conf | ||
---|---|---|
1 |
pace.conf { |
|
2 |
clustering { |
|
3 |
acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, |
|
4 |
ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} }, |
|
5 |
suffixprefix { fields = [title], params = { max = 1, len = 3 } } |
|
6 |
}, |
|
7 |
strictconditions { |
|
8 |
doiExactMatch { fields = [pid] } |
|
9 |
}, |
|
10 |
conditions { |
|
11 |
yearMatch { fields = [dateofacceptance] }, |
|
12 |
titleVersionMatch { fields = [title] }, |
|
13 |
sizeMatch { fields = [authors] } |
|
14 |
}, |
|
15 |
model { |
|
16 |
pid { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = pid/value, overrideMatch = true }, |
|
17 |
title { algo = JaroWinkler, type = String, weight = 0.75, ignoreMissing = false, path = result/metadata/title/value }, |
|
18 |
dateofacceptance { algo = Null, type = String, weight = 0.0, ignoreMissing = true, path = result/metadata/dateofacceptance/value }, |
|
19 |
authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.25, ignoreMissing = true, path = result/author/metadata/fullname/value } |
|
20 |
}, |
|
21 |
blacklists = { |
|
22 |
title = [ |
|
23 |
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$", |
|
24 |
"^(Kiri Karl Morgensternile).*$", |
|
25 |
"^(\\[Eksliibris Aleksandr).*\\]$", |
|
26 |
"^(\\[Eksliibris Aleksandr).*$", |
|
27 |
"^(Eksliibris Aleksandr).*$", |
|
28 |
"^(Kiri A\\. de Vignolles).*$", |
|
29 |
"^(2 kirja Karl Morgensternile).*$", |
|
30 |
"^(Pirita kloostri idaosa arheoloogilised).*$", |
|
31 |
"^(Kiri tundmatule).*$", |
|
32 |
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$", |
|
33 |
"^(Eksliibris Nikolai Birukovile).*$", |
|
34 |
"^(Eksliibris Nikolai Issakovile).*$", |
|
35 |
"^(WHP Cruise Summary Information of section).*$", |
|
36 |
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", |
|
37 |
"^(Measurement of the spin\\-dependent structure function).*" |
|
38 |
] } |
|
39 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/pace/result.authors.pace.conf | ||
---|---|---|
1 |
pace.conf { |
|
2 |
clustering { |
|
3 |
acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, |
|
4 |
ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} }, |
|
5 |
suffixprefix { fields = [title], params = { max = 1, len = 3 } } |
|
6 |
}, |
|
7 |
conditions { |
|
8 |
titleVersionMatch { fields = [title] }, |
|
9 |
sizeMatch { fields = [authors] } |
|
10 |
}, |
|
11 |
model { |
|
12 |
title { algo = JaroWinkler, type = String, weight = 0.5, ignoreMissing = false, path = result/metadata/title/value }, |
|
13 |
authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.5, ignoreMissing = true, path = result/author/metadata/fullname/value } |
|
14 |
}, |
|
15 |
blacklists = { |
|
16 |
title = [ |
|
17 |
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$", |
|
18 |
"^(Kiri Karl Morgensternile).*$", |
|
19 |
"^(\\[Eksliibris Aleksandr).*\\]$", |
|
20 |
"^(\\[Eksliibris Aleksandr).*$", |
|
21 |
"^(Eksliibris Aleksandr).*$", |
|
22 |
"^(Kiri A\\. de Vignolles).*$", |
|
23 |
"^(2 kirja Karl Morgensternile).*$", |
|
24 |
"^(Pirita kloostri idaosa arheoloogilised).*$", |
|
25 |
"^(Kiri tundmatule).*$", |
|
26 |
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$", |
|
27 |
"^(Eksliibris Nikolai Birukovile).*$", |
|
28 |
"^(Eksliibris Nikolai Issakovile).*$", |
|
29 |
"^(WHP Cruise Summary Information of section).*$", |
|
30 |
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", |
|
31 |
"^(Measurement of the spin\\-dependent structure function).*" |
|
32 |
] } |
|
33 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/pace/organization.pace.conf | ||
---|---|---|
1 |
pace.conf { |
|
2 |
conditions { }, |
|
3 |
model { |
|
4 |
legalname { algo = JaroWinkler, type = String, weight = 0.6, ignoreMissing = false, path = organization/metadata/legalname/value }, |
|
5 |
legalshortname { algo = JaroWinkler, type = String, weight = 0.4, ignoreMissing = true, path = organization/metadata/legalshortname/value } |
|
6 |
} |
|
7 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/pace/result.simple.pace.conf | ||
---|---|---|
1 |
pace.conf { |
|
2 |
conditions { }, |
|
3 |
model { |
|
4 |
title { algo = JaroWinkler, type = String, weight = 0.5, ignoreMissing = false, path = result/metadata/title/value } |
|
5 |
} |
|
6 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/data/transform/fields.xml | ||
---|---|---|
1 |
<FIELDS> |
|
2 |
<FIELD name="oaftype" path="entity/type" tokenizable="false" indexable="true" stat="false" result="false"/> |
|
3 |
<FIELD name="objIdentifier" path="entity/id" tokenizable="false" indexable="true" stat="false" result="false"/> |
|
4 |
<FIELD name="collectedfrom" path="entity/collectedfrom/value" tokenizable="false" indexable="true" result="false" stat="false" /> |
|
5 |
<FIELD name="pid" path="entity/pid/value" tokenizable="false" stat="false" result="false" indexable="true"/> |
|
6 |
<FIELD name="deletedbyinference" path="dataInfo/deletedbyinference" tokenizable="false" stat="false" result="false" indexable="true"/> |
|
7 |
<FIELD name="inferred" path="dataInfo/inferred" tokenizable="false" stat="false" result="false" indexable="true"/> |
|
8 |
|
|
9 |
<!-- ORGANIZATION FIELDS --> |
|
10 |
<FIELD name="organizationlegalname" path="entity/organization/metadata/legalname/value" type="ngramtext" stat="false" indexable="true" result="false"/> |
|
11 |
<FIELD name="organizationlegalshortname" path="entity/organization/metadata/legalshortname/value" type="ngramtext" stat="false" indexable="true" result="false"/> |
|
12 |
<FIELD name="organizationwebsiteurl" path="entity/organization/metadata/websiteurl/value" stat="false" indexable="true" result="false"/> |
|
13 |
|
|
14 |
<!-- PERSON FIELDS --> |
|
15 |
<FIELD name="personfirstname" path="entity/person/metadata/firstname/value" stat="false" indexable="true" result="false"/> |
|
16 |
<FIELD name="personsecondnames" path="entity/person/metadata/secondnames/value" stat="false" indexable="true" result="false"/> |
|
17 |
<FIELD name="personfullname" path="entity/person/metadata/fullname/value" stat="false" indexable="true" result="false"/> |
|
18 |
|
|
19 |
<!-- RESULT FIELDS --> |
|
20 |
<FIELD name="resulttitle" path="entity/result/metadata/title/value | entity/children/result/metadata/title/value" stat="false" result="false" indexable="true"/> |
|
21 |
<FIELD name="resultdescription" path="entity/result/metadata/description/value" result="false" indexable="true" stat="false"/> |
|
22 |
|
|
23 |
</FIELDS> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/data/transform/simpleRecord.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<record> |
|
3 |
<metadata> |
|
4 |
<a>A text value</a> |
|
5 |
<b attr="attribute value"/> |
|
6 |
</metadata> |
|
7 |
</record> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/resources/eu/dnetlib/data/transform/record.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<record xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oaf="http://namespace.openaire.eu/oaf"> |
|
3 |
<header xmlns="http://namespace.openaire.eu/"> |
|
4 |
<dri:objIdentifier>od______1064::fe947e59cf7db2f039b4c8cc25693fb0</dri:objIdentifier> |
|
5 |
<dri:recordIdentifier>95168db1-d57e-4b99-855b-993cf91d1283_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==::oai:ora.ouls.ox.ac.uk:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76</dri:recordIdentifier> |
|
6 |
<dri:dateOfCollection/> |
|
7 |
<dri:mdFormat/> |
|
8 |
<dri:mdFormatInterpretation/> |
|
9 |
<dri:repositoryId>2a02b271-0756-453c-b2f0-8c472a8806a5_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId> |
|
10 |
<dr:objectIdentifier/> |
|
11 |
<dr:dateOfCollection>2013-05-10T16:04:02Z</dr:dateOfCollection> |
|
12 |
<oaf:datasourceprefix>od______1064</oaf:datasourceprefix> |
|
13 |
</header> |
|
14 |
<metadata xmlns="http://namespace.openaire.eu/"> |
|
15 |
<dc:creator>Uphoff, S</dc:creator> |
|
16 |
<dc:creator>Holden, SJ</dc:creator> |
|
17 |
<dc:dateAccepted>2011-01-01</dc:dateAccepted> |
|
18 |
<dc:description>The analysis of structure and dynamics of biomolecules is important for understanding their function. Toward this aim, we introduce a method called 'switchable FRET', which combines single-molecule fluorescence resonance energy transfer (FRET) with reversible photoswitching of fluorophores. Typically, single-molecule FRET is measured within a single donor-acceptor pair and reports on only one distance. Although multipair FRET approaches that monitor multiple distances have been developed, they are technically challenging and difficult to extend, mainly because of their reliance on spectrally distinct acceptors. In contrast, switchable FRET sequentially probes FRET between a single donor and spectrally identical photoswitchable acceptors, dramatically reducing the experimental and analytical complexity and enabling direct monitoring of multiple distances. Our experiments on DNA molecules, a protein-DNA complex and dynamic Holliday junctions demonstrate the potential of switchable FRET for studying dynamic, multicomponent biomolecules. </dc:description> |
|
19 |
<dc:identifier>http://pub.uni-bielefeld.de/publication/2303387</dc:identifier> |
|
20 |
<dc:language>eng</dc:language> |
|
21 |
<dc:title>Monitoring multiple distances within a single molecule using switchable FRET.</dc:title> |
|
22 |
<dc:source>Symplectic Elements at Oxford</dc:source> |
|
23 |
<dc:source>PubMed (http://www.ncbi.nlm.nih.gov/pubmed/)</dc:source> |
|
24 |
<dc:source>Web of Science (Lite) (http://apps.webofknowledge.com/summary.do)</dc:source> |
|
25 |
<dc:subject>Biotinylation</dc:subject> |
|
26 |
<dc:subject>Computer Simulation</dc:subject> |
|
27 |
<dr:CobjCategory>0001</dr:CobjCategory> |
|
28 |
<dr:CobjIdentifier>urn:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76</dr:CobjIdentifier> |
|
29 |
<dr:CobjIdentifier>pii:nmeth.1502</dr:CobjIdentifier> |
|
30 |
<dr:CobjIdentifier>local:71163</dr:CobjIdentifier> |
|
31 |
<dr:CobjIdentifier>eissn:1548-7105</dr:CobjIdentifier> |
|
32 |
<dr:CobjIdentifier>doi:10.1038/nmeth.1502</dr:CobjIdentifier> |
|
33 |
<dr:CobjIdentifier>issn:1548-7091</dr:CobjIdentifier> |
|
34 |
<oaf:accessrights>EMBARGO</oaf:accessrights> |
|
35 |
<oaf:collectedDatasourceid>issn____::12345678</oaf:collectedDatasourceid> |
|
36 |
<oaf:hostedBy name="DOAJ" id="doaj____::1234"/> |
|
37 |
<oaf:collectedFrom name="My favourite journal" id="issn____::12345678"/> |
|
38 |
<oaf:fulltext>http://xyz</oaf:fulltext> |
|
39 |
<oaf:journal issn="12345678" eissn="e1234567" lissn="l1234567">My favourite journal</oaf:journal> |
|
40 |
<oaf:journal issn="12345678" eissn="e1234567">My second favourite journal</oaf:journal> |
|
41 |
<oaf:identifier identifierType="doi">10.1038/nmeth.1502</oaf:identifier> |
|
42 |
<oaf:reference identifier="IPR004915" query="http://www.ebi.ac.uk/europepmc/webservices/rest/PMC/PMC155133/databaseLinks/INTERPRO/1/json" source="INTERPRO" title="Nonstructural protein NS-S, bunyaviral" type="dataset">http://www.ebi.ac.uk/interpro/entry/IPR004915</oaf:reference> |
|
43 |
<oaf:reference identifier="IPRXXXXXX" query="http://www.ebi.ac.uk/europepmc/webservices/rest/PMC/PMCYYYYYY/databaseLinks/INTERPRO/2/json" source="INTERPRO" title="Nonstructural protein XY-S, bunyaviral" type="dataset">http://www.ebi.ac.uk/interpro/entry/TTTTTTTTT</oaf:reference> |
|
44 |
</metadata> |
|
45 |
</record> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/pace/clustering/BlacklistAwareClusteringCombinerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
7 |
import eu.dnetlib.pace.config.Config; |
|
8 |
import eu.dnetlib.pace.config.Type; |
|
9 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
10 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
11 |
import eu.dnetlib.pace.model.MapDocument; |
|
12 |
|
|
13 |
public class BlacklistAwareClusteringCombinerTest extends AbstractProtoPaceTest { |
|
14 |
|
|
15 |
private Config config; |
|
16 |
|
|
17 |
@Before |
|
18 |
public void setUp() { |
|
19 |
config = getResultFullConf(); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testCombine() { |
|
24 |
MapDocument result = result(config, "A", "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission", "2013"); |
|
25 |
FieldListImpl fl = new FieldListImpl(); |
|
26 |
fl.add(new FieldValueImpl(Type.String, "desc", "hello world description pipeline")); |
|
27 |
|
|
28 |
result.getFieldMap().put("desc", fl); |
|
29 |
|
|
30 |
fl.clear(); |
|
31 |
fl.add(new FieldValueImpl(Type.String, "title", "lorem ipsum cabalie qwerty")); |
|
32 |
result.getFieldMap().get("title").add(fl); |
|
33 |
|
|
34 |
System.out.println(BlacklistAwareClusteringCombiner.filterAndCombine(result, config, config.blacklists())); |
|
35 |
} |
|
36 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
7 |
import eu.dnetlib.pace.config.Config; |
|
8 |
import eu.dnetlib.pace.config.Type; |
|
9 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
10 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
11 |
import eu.dnetlib.pace.model.MapDocument; |
|
12 |
|
|
13 |
public class ClusteringCombinerTest extends AbstractProtoPaceTest { |
|
14 |
|
|
15 |
private Config config; |
|
16 |
|
|
17 |
@Before |
|
18 |
public void setUp() { |
|
19 |
config = getResultFullConf(); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testCombine() { |
|
24 |
String title = "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission"; |
|
25 |
MapDocument result = result(config, "A", title, "2013"); |
|
26 |
|
|
27 |
FieldListImpl fl = new FieldListImpl(); |
|
28 |
fl.add(new FieldValueImpl(Type.String, "desc", "lorem ipsum cabalie qwerty")); |
|
29 |
|
|
30 |
result.getFieldMap().put("desc", fl); |
|
31 |
System.out.println(title); |
|
32 |
System.out.println(ClusteringCombiner.combine(result, config)); |
|
33 |
} |
|
34 |
|
|
35 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.util.List; |
|
6 |
|
|
7 |
import org.apache.commons.io.IOUtils; |
|
8 |
import org.apache.commons.lang.RandomStringUtils; |
|
9 |
import org.apache.commons.lang.StringUtils; |
|
10 |
|
|
11 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
12 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
13 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
14 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; |
|
15 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
16 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
17 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
18 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
19 |
import eu.dnetlib.pace.config.Config; |
|
20 |
import eu.dnetlib.pace.config.DynConf; |
|
21 |
import eu.dnetlib.pace.config.Type; |
|
22 |
import eu.dnetlib.pace.model.Field; |
|
23 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
24 |
import eu.dnetlib.pace.model.MapDocument; |
|
25 |
import eu.dnetlib.pace.model.ProtoDocumentBuilder; |
|
26 |
|
|
27 |
public abstract class AbstractProtoPaceTest extends OafTest { |
|
28 |
|
|
29 |
protected Config getOrganizationSimpleConf() { |
|
30 |
return DynConf.load(readFromClasspath("/eu/dnetlib/pace/organization.pace.conf")); |
|
31 |
} |
|
32 |
|
|
33 |
protected Config getResultConf() { |
|
34 |
return DynConf.load(readFromClasspath("/eu/dnetlib/pace/result.pace.conf")); |
|
35 |
} |
|
36 |
|
|
37 |
protected Config getResultFullConf() { |
|
38 |
return DynConf.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf")); |
|
39 |
} |
|
40 |
|
|
41 |
protected Config getResultAuthorsConf() { |
|
42 |
return DynConf.load(readFromClasspath("/eu/dnetlib/pace/result.authors.pace.conf")); |
|
43 |
} |
|
44 |
|
|
45 |
protected Config getResultSimpleConf() { |
|
46 |
return DynConf.load(readFromClasspath("/eu/dnetlib/pace/result.simple.pace.conf")); |
|
47 |
} |
|
48 |
|
|
49 |
private String readFromClasspath(final String filename) { |
|
50 |
StringWriter sw = new StringWriter(); |
|
51 |
try { |
|
52 |
IOUtils.copy(getClass().getResourceAsStream(filename), sw); |
|
53 |
return sw.toString(); |
|
54 |
} catch (IOException e) { |
|
55 |
throw new RuntimeException("cannot load resource from classpath: " + filename); |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
protected MapDocument result(final Config config, final String id, final String title) { |
|
60 |
return result(config, id, title, null, null, null); |
|
61 |
} |
|
62 |
|
|
63 |
protected MapDocument result(final Config config, final String id, final String title, final String date) { |
|
64 |
return result(config, id, title, date, null, null); |
|
65 |
} |
|
66 |
|
|
67 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) { |
|
68 |
return result(config, id, title, date, pid, null); |
|
69 |
} |
|
70 |
|
|
71 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) { |
|
72 |
Result.Metadata.Builder metadata = Result.Metadata.newBuilder(); |
|
73 |
if (!StringUtils.isBlank(title)) { |
|
74 |
metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles"))); |
|
75 |
metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles"))); |
|
76 |
} |
|
77 |
if (!StringUtils.isBlank(date)) { |
|
78 |
metadata.setDateofacceptance(sf(date)); |
|
79 |
} |
|
80 |
|
|
81 |
OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result); |
|
82 |
Result.Builder result = Result.newBuilder().setMetadata(metadata); |
|
83 |
|
|
84 |
if (authors != null) { |
|
85 |
for (String author : authors) { |
|
86 |
result.addAuthor(person(author)); |
|
87 |
} |
|
88 |
} |
|
89 |
|
|
90 |
entity.setResult(result); |
|
91 |
|
|
92 |
if (!StringUtils.isBlank(pid)) { |
|
93 |
entity.addPid(sp(pid, "doi")); |
|
94 |
entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai")); |
|
95 |
} |
|
96 |
|
|
97 |
OafEntity build = entity.build(); |
|
98 |
return ProtoDocumentBuilder.newInstance(id, build, config.fields()); |
|
99 |
} |
|
100 |
|
|
101 |
private Person.Builder person(final String author) { |
|
102 |
Person.Builder person = Person.newBuilder(); |
|
103 |
|
|
104 |
eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(author, false); |
|
105 |
Person.Metadata.Builder metadata = Person.Metadata.newBuilder(); |
|
106 |
if (p.isAccurate()) { |
|
107 |
metadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
108 |
metadata.addSecondnames(sf(p.getNormalisedSurname())); |
|
109 |
metadata.setFullname(sf(p.getNormalisedFullname())); |
|
110 |
} else { |
|
111 |
metadata.setFullname(sf(p.getOriginal())); |
|
112 |
} |
|
113 |
|
|
114 |
return person.setMetadata(metadata); |
|
115 |
} |
|
116 |
|
|
117 |
private OafEntity.Builder oafEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) { |
|
118 |
OafEntity.Builder entity = OafEntity.newBuilder().setId(id).setType(type); |
|
119 |
return entity; |
|
120 |
} |
|
121 |
|
|
122 |
protected MapDocument organization(final Config config, final String id, final String legalName) { |
|
123 |
return organization(config, id, legalName, null); |
|
124 |
} |
|
125 |
|
|
126 |
protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) { |
|
127 |
Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder(); |
|
128 |
if (legalName != null) { |
|
129 |
metadata.setLegalname(sf(legalName)); |
|
130 |
} |
|
131 |
if (legalShortName != null) { |
|
132 |
metadata.setLegalshortname(sf(legalShortName)); |
|
133 |
} |
|
134 |
|
|
135 |
OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result); |
|
136 |
entity.setOrganization(Organization.newBuilder().setMetadata(metadata)); |
|
137 |
|
|
138 |
return ProtoDocumentBuilder.newInstance(id, entity.build(), config.fields()); |
|
139 |
} |
|
140 |
|
|
141 |
private StructuredProperty sp(final String pid, final String type) { |
|
142 |
Builder pidSp = |
|
143 |
StructuredProperty.newBuilder().setValue(pid) |
|
144 |
.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types")); |
|
145 |
return pidSp.build(); |
|
146 |
} |
|
147 |
|
|
148 |
protected Field title(final String s) { |
|
149 |
return new FieldValueImpl(Type.String, "title", s); |
|
150 |
} |
|
151 |
|
|
152 |
protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) { |
|
153 |
return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier); |
|
154 |
} |
|
155 |
|
|
156 |
/* |
|
157 |
* protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); } |
|
158 |
* |
|
159 |
* protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return |
|
160 |
* Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); } |
|
161 |
*/ |
|
162 |
|
|
163 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/pace/model/ProtoDocumentBuilderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
6 |
import eu.dnetlib.pace.config.Config; |
|
7 |
|
|
8 |
public class ProtoDocumentBuilderTest extends AbstractProtoPaceTest { |
|
9 |
|
|
10 |
@Test |
|
11 |
public void test1() { |
|
12 |
|
|
13 |
String id = "12345"; |
|
14 |
|
|
15 |
Config config = getResultConf(); |
|
16 |
|
|
17 |
MapDocument document = ProtoDocumentBuilder.newInstance(id, getResult(id), config.fields()); |
|
18 |
|
|
19 |
System.out.println(document); |
|
20 |
|
|
21 |
String stringDoc = MapDocumentSerializer.toString(document); |
|
22 |
|
|
23 |
System.out.println(stringDoc); |
|
24 |
|
|
25 |
MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes()); |
|
26 |
|
|
27 |
System.out.println(decoded); |
|
28 |
} |
|
29 |
|
|
30 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/pace/distance/DetectorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.distance; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertTrue; |
|
4 |
|
|
5 |
import java.util.List; |
|
6 |
|
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import com.google.common.collect.Lists; |
|
10 |
|
|
11 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
12 |
import eu.dnetlib.pace.config.Config; |
|
13 |
import eu.dnetlib.pace.model.MapDocument; |
|
14 |
|
|
15 |
public class DetectorTest extends AbstractProtoPaceTest { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testDistanceResultSimple() { |
|
19 |
Config config = getResultSimpleConf(); |
|
20 |
|
|
21 |
MapDocument resA = result(config, "A", "Recent results from CDF"); |
|
22 |
MapDocument resB = result(config, "B", "Recent results from CDF"); |
|
23 |
|
|
24 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
25 |
System.out.println(String.format(" d ---> %s", d)); |
|
26 |
|
|
27 |
assertTrue(d == 1.0); |
|
28 |
} |
|
29 |
|
|
30 |
@Test |
|
31 |
public void testDistanceResultSimpleMissingDates() { |
|
32 |
Config config = getResultSimpleConf(); |
|
33 |
|
|
34 |
MapDocument resA = result(config, "A", "Recent results from BES"); |
|
35 |
MapDocument resB = result(config, "A", "Recent results from CES"); |
|
36 |
|
|
37 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
38 |
System.out.println(String.format(" d ---> %s", d)); |
|
39 |
|
|
40 |
assertTrue(d > 0.97); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testDistanceResultInvalidDate() { |
|
45 |
Config config = getResultConf(); |
|
46 |
|
|
47 |
MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05"); |
|
48 |
MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty"); |
|
49 |
|
|
50 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
51 |
System.out.println(String.format(" d ---> %s", d)); |
|
52 |
|
|
53 |
assertTrue(d == 1.0); |
|
54 |
} |
|
55 |
|
|
56 |
@Test |
|
57 |
public void testDistanceResultMissingOneDate() { |
|
58 |
Config config = getResultConf(); |
|
59 |
|
|
60 |
MapDocument resA = result(config, "A", "title title title 6BESR", null); |
|
61 |
MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02"); |
|
62 |
|
|
63 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
64 |
System.out.println(String.format(" d ---> %s", d)); |
|
65 |
|
|
66 |
assertTrue(d > 0.9 && d < 1.0); |
|
67 |
} |
|
68 |
|
|
69 |
@Test |
|
70 |
public void testDistanceResult() { |
|
71 |
Config config = getResultConf(); |
|
72 |
|
|
73 |
MapDocument resA = result(config, "A", "title title title BES", ""); |
|
74 |
MapDocument resB = result(config, "B", "title title title CLEO"); |
|
75 |
|
|
76 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
77 |
System.out.println(String.format(" d ---> %s", d)); |
|
78 |
|
|
79 |
// assertTrue(d > 0.9 && d < 1.0); |
|
80 |
} |
|
81 |
|
|
82 |
@Test |
|
83 |
public void testDistanceResultMissingTwoDate() { |
|
84 |
Config config = getResultConf(); |
|
85 |
|
|
86 |
MapDocument resA = result(config, "A", "title title title 6BESR"); |
|
87 |
MapDocument resB = result(config, "B", "title title title 6CLER"); |
|
88 |
|
|
89 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
90 |
|
|
91 |
System.out.println(String.format(" d ---> %s", d)); |
|
92 |
|
|
93 |
assertTrue(d > 0.9 && d < 1.0); |
|
94 |
} |
|
95 |
|
|
96 |
@Test |
|
97 |
public void testDistanceOrganizationIgnoreMissing() { |
|
98 |
|
|
99 |
Config config = getOrganizationSimpleConf(); |
|
100 |
|
|
101 |
MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE"); |
|
102 |
MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR"); |
|
103 |
|
|
104 |
double d = new PaceDocumentDistance().between(orgA, orgB, config); |
|
105 |
System.out.println(String.format(" d ---> %s", d)); |
|
106 |
|
|
107 |
assertTrue(d == 1.0); |
|
108 |
} |
|
109 |
|
|
110 |
@Test |
|
111 |
public void testDistanceResultCase1() { |
|
112 |
|
|
113 |
Config config = getResultConf(); |
|
114 |
|
|
115 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003"); |
|
116 |
MapDocument resB = result(config, "B", "Search for the Standard Model Higgs Boson", "2003"); |
|
117 |
|
|
118 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
119 |
System.out.println(String.format(" d ---> %s", d)); |
|
120 |
|
|
121 |
assertTrue(d > 0.9 && d < 1.0); |
|
122 |
} |
|
123 |
|
|
124 |
@Test |
|
125 |
public void testDistanceResultCaseDoiMatch1() { |
|
126 |
Config config = getResultConf(); |
|
127 |
|
|
128 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", "http://dx.doi.org/10.1594/PANGAEA.726855"); |
|
129 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", "10.1594/PANGAEA.726855"); |
|
130 |
|
|
131 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
132 |
System.out.println(String.format(" d ---> %s", d)); |
|
133 |
|
|
134 |
assertTrue("exact DOIs will produce an exact match", d == 1.0); |
|
135 |
} |
|
136 |
|
|
137 |
@Test |
|
138 |
public void testDistanceResultCaseDoiMatch2() { |
|
139 |
Config config = getResultConf(); |
|
140 |
|
|
141 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "http://dx.doi.org/10.1594/PANGAEA.726855"); |
|
142 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2005", "doi:10.1594/PANGAEA.726855"); |
|
143 |
|
|
144 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
145 |
System.out.println(String.format(" d ---> %s", d)); |
|
146 |
|
|
147 |
assertTrue("exact DOIs will produce an exact match, regardless of different titles or publication years", d == 1.0); |
|
148 |
} |
|
149 |
|
|
150 |
@Test |
|
151 |
public void testDistanceResultCaseDoiMatch3() { |
|
152 |
Config config = getResultConf(); |
|
153 |
|
|
154 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
155 |
MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003"); |
|
156 |
|
|
157 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
158 |
System.out.println(String.format(" d ---> %s", d)); |
|
159 |
|
|
160 |
assertTrue("a missing DOI will casue the comparsion to continue with the following conditions", d == 1.0); |
|
161 |
} |
|
162 |
|
|
163 |
@Test |
|
164 |
public void testDistanceResultCaseDoiMatch4() { |
|
165 |
Config config = getResultConf(); |
|
166 |
|
|
167 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
168 |
MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2005"); |
|
169 |
|
|
170 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
171 |
System.out.println(String.format(" d ---> %s", d)); |
|
172 |
|
|
173 |
assertTrue("a missing DOI, comparsion continues with the following conditions, different publication years will drop the score to 0", d == 0.0); |
|
174 |
} |
|
175 |
|
|
176 |
@Test |
|
177 |
public void testDistanceResultCaseDoiMatch5() { |
|
178 |
|
|
179 |
Config config = getResultConf(); |
|
180 |
|
|
181 |
MapDocument resA = result(config, "A", "Search for the Standard Model Higgs Boson", "2003", "10.1016/j.jmb.2010.12.020"); |
|
182 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003"); |
|
183 |
|
|
184 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
185 |
System.out.println(String.format(" d ---> %s", d)); |
|
186 |
|
|
187 |
assertTrue("a missing DOI, comparsion continues with the following conditions", d > 0.9 && d < 1.0); |
|
188 |
} |
|
189 |
|
|
190 |
@Test |
|
191 |
public void testDistanceResultCaseDoiMatch6() { |
|
192 |
Config config = getResultConf(); |
|
193 |
|
|
194 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
195 |
MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003", "anotherDifferentDOI"); |
|
196 |
|
|
197 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
198 |
System.out.println(String.format(" d ---> %s", d)); |
|
199 |
|
|
200 |
assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d == 0.0); |
|
201 |
} |
|
202 |
|
|
203 |
// http://dx.doi.org/10.1594/PANGAEA.726855 doi:10.1594/PANGAEA.726855 |
|
204 |
|
|
205 |
@Test |
|
206 |
public void testDistanceResultCaseAuthor1() { |
|
207 |
|
|
208 |
Config config = getResultAuthorsConf(); |
|
209 |
|
|
210 |
List<String> authorsA = Lists.newArrayList("a", "b", "c", "d"); |
|
211 |
List<String> authorsB = Lists.newArrayList("a", "b", "c"); |
|
212 |
|
|
213 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
214 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
215 |
|
|
216 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
217 |
System.out.println(String.format(" d ---> %s", d)); |
|
218 |
|
|
219 |
assertTrue(d == 0.0); |
|
220 |
} |
|
221 |
|
|
222 |
@Test |
|
223 |
public void testDistanceResultCaseAuthor2() { |
|
224 |
|
|
225 |
Config config = getResultAuthorsConf(); |
|
226 |
|
|
227 |
List<String> authorsA = Lists.newArrayList("a", "b", "c"); |
|
228 |
List<String> authorsB = Lists.newArrayList("a", "b", "c"); |
|
229 |
|
|
230 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
231 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
232 |
|
|
233 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
234 |
System.out.println(String.format(" d ---> %s", d)); |
|
235 |
|
|
236 |
assertTrue(d == 1.0); |
|
237 |
} |
|
238 |
|
|
239 |
@Test |
|
240 |
public void testDistanceResultCaseAuthor3() { |
|
241 |
|
|
242 |
Config config = getResultAuthorsConf(); |
|
243 |
|
|
244 |
List<String> authorsA = Lists.newArrayList("Bardi, A.", "Manghi, P.", "Artini, M."); |
|
245 |
List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele"); |
|
246 |
|
|
247 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
248 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
249 |
|
|
250 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
251 |
System.out.println(String.format(" d ---> %s", d)); |
|
252 |
|
|
253 |
assertTrue(d > 0.9 && d < 1.0); |
|
254 |
} |
|
255 |
|
|
256 |
@Test |
|
257 |
public void testDistanceResultCaseAuthor4() { |
|
258 |
|
|
259 |
Config config = getResultAuthorsConf(); |
|
260 |
|
|
261 |
List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a"); |
|
262 |
List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele"); |
|
263 |
|
|
264 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
265 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
266 |
|
|
267 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
268 |
System.out.println(String.format(" d ---> %s", d)); |
|
269 |
|
|
270 |
// assertTrue(d == 0.0); |
|
271 |
} |
|
272 |
|
|
273 |
@Test |
|
274 |
public void testDistanceResultFullConf() { |
|
275 |
|
|
276 |
Config config = getResultFullConf(); |
|
277 |
|
|
278 |
List<String> authorsA = Lists.newArrayList("Nagarajan Pranesh", "Guy Vautier", "Punyanganie de Silva"); |
|
279 |
List<String> authorsB = Lists.newArrayList("Pranesh Nagarajan", "Vautier Guy", "de Silva Punyanganie"); |
|
280 |
|
|
281 |
MapDocument resA = |
|
282 |
result(config, "A", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010", |
|
283 |
"10.1186/1752-1947-4-299", authorsA); |
|
284 |
MapDocument resB = |
|
285 |
result(config, "B", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010", null, |
|
286 |
authorsB); |
|
287 |
|
|
288 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
289 |
System.out.println(String.format(" d ---> %s", d)); |
|
290 |
|
|
291 |
// assertTrue(d == 0.0); |
|
292 |
} |
|
293 |
|
|
294 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
6 |
|
|
7 |
public class OafDecoderTest { |
|
8 |
|
|
9 |
@Test |
|
10 |
public void test() { |
|
11 |
|
|
12 |
OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity); |
|
13 |
|
|
14 |
System.out.println(decoder.asXml()); |
|
15 |
} |
|
16 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/data/mapreduce/util/OafRowKeyDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
public class OafRowKeyDecoderTest { |
|
6 |
|
|
7 |
@Test |
|
8 |
public void test() { |
|
9 |
|
|
10 |
String id1 = "50|acnbad______::0a454baf9c61e63d42fb83ab549f8062"; |
|
11 |
|
|
12 |
OafRowKeyDecoder d = OafRowKeyDecoder.decode(id1); |
|
13 |
|
|
14 |
System.out.println(d.getId()); |
|
15 |
} |
|
16 |
|
|
17 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import org.junit.Before; |
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
10 |
|
|
11 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
12 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
13 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
14 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
15 |
|
|
16 |
public class OafRelDecoderTest { |
|
17 |
|
|
18 |
private OafRel oafRel; |
|
19 |
|
|
20 |
@Before |
|
21 |
public void setUp() { |
|
22 |
oafRel = OafTest.getPersonResult("ID_1", "ID_2", "1", "isAuthor"); |
|
23 |
} |
|
24 |
|
|
25 |
@Test |
|
26 |
public void testSetClass() { |
|
27 |
|
|
28 |
OafRelDecoder d1 = OafRelDecoder.decode(oafRel); |
|
29 |
|
|
30 |
assertNotNull(d1); |
|
31 |
assertEquals("isAuthor", d1.getRelClass()); |
|
32 |
|
|
33 |
OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("hasAuthor").build()); |
|
34 |
|
|
35 |
assertEquals("hasAuthor", d2.getRelClass()); |
|
36 |
assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassid()); |
|
37 |
assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassname()); |
|
38 |
|
|
39 |
FieldDescriptor fd = Authorship.getDescriptor().findFieldByName("ranking"); |
|
40 |
assertEquals(d1.getSubRel().getField(fd), d2.getSubRel().getField(fd)); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testGetCF() { |
|
45 |
assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, Authorship.RelName.isAuthorOf)); |
|
46 |
assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, "isAuthorOf")); |
|
47 |
} |
|
48 |
|
|
49 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/test/java/eu/dnetlib/data/transform/ProtoDocumentMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.io.IOException; |
|
7 |
import java.io.StringWriter; |
|
8 |
|
|
9 |
import org.apache.commons.codec.binary.Base64; |
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
13 |
import org.apache.solr.common.SolrInputDocument; |
|
14 |
import org.apache.solr.common.SolrInputField; |
|
15 |
import org.dom4j.DocumentException; |
|
16 |
import org.junit.Before; |
|
17 |
import org.junit.Test; |
|
18 |
|
|
19 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
20 |
import com.googlecode.protobuf.format.JsonFormat; |
|
21 |
|
|
22 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
23 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
24 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
25 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
26 |
import eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory; |
|
27 |
|
|
28 |
public class ProtoDocumentMapperTest { |
|
29 |
|
|
30 |
private static final Log log = LogFactory.getLog(ProtoDocumentMapperTest.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
31 |
private String fields; |
|
32 |
|
|
33 |
@Before |
|
34 |
public void setUp() throws IOException { |
|
35 |
final StringWriter sw = new StringWriter(); |
|
36 |
IOUtils.copy(getClass().getResourceAsStream("fields.xml"), sw); |
|
37 |
fields = sw.toString(); |
|
38 |
assertNotNull(fields); |
|
39 |
assertFalse(fields.isEmpty()); |
|
40 |
|
|
41 |
log.info(fields); |
|
42 |
} |
|
43 |
|
|
44 |
@Test |
|
45 |
public void testProto2SolrDocument() throws DocumentException, InvalidProtocolBufferException { |
|
46 |
final ProtoDocumentMapper mapper = new ProtoDocumentMapper(fields); |
|
47 |
|
|
48 |
assertNotNull(mapper); |
|
49 |
|
|
50 |
final OafEntity.Builder entity = OafTest.getResultBuilder("01"); |
|
51 |
entity.addChildren(OafTest.getResultBuilder("01_children")); |
|
52 |
|
|
53 |
final Oaf oaf = OafTest.embed(entity.build(), Kind.entity).getOaf(); |
|
54 |
|
|
55 |
assertNotNull(oaf.getEntity().getChildrenList()); |
|
56 |
assertFalse(oaf.getEntity().getChildrenList().isEmpty()); |
|
57 |
|
|
58 |
log.info("byte[] size: " + oaf.toByteArray().length); |
|
59 |
|
|
60 |
log.info("json size: " + JsonFormat.printToString(oaf).length()); |
|
61 |
|
|
62 |
log.info("base64 size: " + Base64.encodeBase64String(oaf.toByteArray()).length()); |
|
63 |
|
|
64 |
final byte[] decodeBase64 = Base64.decodeBase64(Base64.encodeBase64String(oaf.toByteArray())); |
|
65 |
|
|
66 |
log.info("decoded: " + JsonFormat.printToString(Oaf.parseFrom(decodeBase64))); |
|
67 |
|
|
68 |
final SolrInputDocument doc = mapper.map(oaf, InputDocumentFactory.getParsedDateField("2015-02-15"), "asd"); |
|
69 |
|
|
70 |
assertNotNull(doc); |
|
71 |
|
|
72 |
for (final SolrInputField f : doc.values()) { |
|
73 |
log.info(f); |
|
74 |
} |
|
75 |
} |
|
76 |
} |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/main/resources/eu/dnetlib/actionmanager/xslt/datacite2updateActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DataciteInfoPackageToHbaseXsltFunctions" |
|
4 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
5 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
6 |
xmlns:date="eu.dnetlib.miscutils.datetime.DateUtils" |
|
7 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:exslt="http://exslt.org/common" |
|
8 |
extension-element-prefixes="exslt" exclude-result-prefixes="xsl dnet exslt oaf dr dri date"> |
|
9 |
|
|
10 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
11 |
|
|
12 |
<xsl:param name="trust" select="string('0.9')"/> |
|
13 |
<xsl:param name="provenance" select="string('UNKNOWN')"/> |
|
14 |
<xsl:param name="namespaceprefix" select="string('datacite____')"/> |
|
15 |
|
|
16 |
<xsl:template match="/*"> |
|
17 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
18 |
<xsl:variable name="metadata" select="exslt:node-set(/*[local-name()='record']/*[local-name()='metadata']/*[local-name()='resource'])"/> |
|
19 |
<xsl:variable name="rightNSPrefix"> |
|
20 |
<xsl:choose> |
|
21 |
<xsl:when test="not($namespaceprefix)"> |
|
22 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
23 |
</xsl:when> |
|
24 |
<xsl:otherwise> |
|
25 |
<xsl:value-of select="$namespaceprefix"/> |
|
26 |
</xsl:otherwise> |
|
27 |
</xsl:choose> |
|
28 |
</xsl:variable> |
|
29 |
|
|
30 |
<xsl:choose> |
|
31 |
<xsl:when test="count($metadata) = 0"> |
|
32 |
<ACTIONS/> |
|
33 |
</xsl:when> |
|
34 |
<xsl:otherwise> |
|
35 |
<xsl:variable name="originalId" select="//*[local-name() = 'identifier' and ./@identifierType='DOI']"/> |
|
36 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
37 |
|
|
38 |
<xsl:variable name="creators" select="//*[local-name() = 'creator']"/> |
|
39 |
<xsl:variable name="titles" select="//*[local-name() = 'title']"/> |
|
40 |
<xsl:variable name="subjects" select="//*[local-name() = 'subject']"/> |
|
41 |
<xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/> |
|
42 |
<xsl:variable name="descriptions" select="//*[local-name() = 'description']"/> |
|
43 |
<xsl:variable name="dates" select="//*[local-name() = 'date']"/> |
|
44 |
<xsl:variable name="dateaccepted" select="//oaf:dateAccepted" /> |
|
45 |
<xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/> |
|
46 |
<xsl:variable name="formats" select="//*[local-name() = 'format']"/> |
|
47 |
<xsl:variable name="sizes" select="//*[local-name() = 'size']"/> |
|
48 |
<xsl:variable name="rights" select="//oaf:accessrights" /> |
|
49 |
<xsl:variable name="version" select="//*[local-name() = 'version']"/> |
|
50 |
<xsl:variable name="instanceURI" |
|
51 |
select="concat('http://dx.doi.org','/',//*[local-name() = 'resource']/*[local-name() = 'identifier'])"/> |
|
52 |
<xsl:variable name="hostedbyid" |
|
53 |
select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
54 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
55 |
<xsl:variable name="collectedfromid" |
|
56 |
select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
57 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
58 |
<xsl:variable name="dateOfCollection" select="//dr:dateOfCollection"/> |
|
59 |
<xsl:variable name="language" select="//oaf:language" /> |
|
60 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory" /> |
|
61 |
|
|
62 |
<ACTIONS> |
|
63 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:now())}"> |
|
64 |
<xsl:value-of |
|
65 |
select="dnet:oafDataCiteResultFromInfoPackage($resultId, $dataInfo, $metadata, $titles, |
|
66 |
$subjects, $publisher, $descriptions, $dates, $dateaccepted, $resourceType, |
|
67 |
$formats, $sizes, $language, $cobjcategory, $rights, $version, $provenance, $trust, $hostedbyid, $hostedbyname, |
|
68 |
$collectedfromid, $collectedfromname, $originalId, $instanceURI, $dateOfCollection)" |
|
69 |
/> |
|
70 |
</ACTION> |
|
71 |
</ACTIONS> |
|
72 |
</xsl:otherwise> |
|
73 |
</xsl:choose> |
|
74 |
</xsl:template> |
|
75 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/main/resources/eu/dnetlib/actionmanager/xslt/datacite2insertActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DataciteInfoPackageToHbaseXsltFunctions" |
|
4 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
5 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
6 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:exslt="http://exslt.org/common" |
|
7 |
extension-element-prefixes="exslt" exclude-result-prefixes="xsl dnet exslt oaf dr dri"> |
|
8 |
|
|
9 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
10 |
|
|
11 |
<xsl:param name="trust" select="string('0.9')"/> |
|
12 |
<xsl:param name="provenance" select="string('UNKNOWN')"/> |
|
13 |
<xsl:param name="namespaceprefix" select="string('datacite____')"/> |
|
14 |
|
|
15 |
<xsl:template match="/*"> |
|
16 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
17 |
<xsl:variable name="metadata" select="exslt:node-set(/*[local-name()='record']/*[local-name()='metadata']/*[local-name()='resource'])"/> |
|
18 |
<xsl:variable name="rightNSPrefix"> |
|
19 |
<xsl:choose> |
|
20 |
<xsl:when test="not($namespaceprefix)"> |
|
21 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
22 |
</xsl:when> |
|
23 |
<xsl:otherwise> |
|
24 |
<xsl:value-of select="$namespaceprefix"/> |
|
25 |
</xsl:otherwise> |
|
26 |
</xsl:choose> |
|
27 |
</xsl:variable> |
|
28 |
|
|
29 |
<xsl:choose> |
|
30 |
<xsl:when test="count($metadata) = 0"> |
|
31 |
<ACTIONS/> |
|
32 |
</xsl:when> |
|
33 |
<xsl:otherwise> |
|
34 |
<xsl:variable name="originalId" select="/record/metadata/*[local-name() = 'identifier' and ./@identifierType='DOI']"/> |
|
35 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
36 |
|
|
37 |
<xsl:variable name="creators" select="//*[local-name() = 'creator']"/> |
|
38 |
<xsl:variable name="titles" select="//*[local-name() = 'title']"/> |
|
39 |
<xsl:variable name="subjects" select="//*[local-name() = 'subject']"/> |
|
40 |
<xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/> |
|
41 |
<xsl:variable name="descriptions" select="//*[local-name() = 'description']"/> |
|
42 |
<xsl:variable name="dates" select="//*[local-name() = 'date']"/> |
|
43 |
<xsl:variable name="dateaccepted" select="//oaf:dateAccepted" /> |
|
44 |
<xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/> |
|
45 |
<xsl:variable name="formats" select="//*[local-name() = 'format']"/> |
|
46 |
<xsl:variable name="sizes" select="//*[local-name() = 'size']"/> |
|
47 |
<xsl:variable name="rights" select="//oaf:accessrights" /> |
|
48 |
<xsl:variable name="version" select="//*[local-name() = 'version']"/> |
|
49 |
<xsl:variable name="instanceURI" select="concat('http://dx.doi.org', '/', //*[local-name() = 'resource']/*[local-name() = 'identifier'])"/> |
|
50 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
51 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
52 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
53 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
54 |
<xsl:variable name="dateOfCollection" select="//dr:dateOfCollection"/> |
|
55 |
<xsl:variable name="language" select="//oaf:language" /> |
|
56 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory" /> |
|
57 |
|
|
58 |
<ACTIONS> |
|
59 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" |
|
60 |
targetColumn="body"> |
|
61 |
<xsl:value-of |
|
62 |
select="dnet:oafDataCiteResultFromInfoPackage($resultId, $dataInfo, $metadata, $titles, |
|
63 |
$subjects, $publisher, $descriptions, $dates, $dateaccepted, $resourceType, |
|
64 |
$formats, $sizes, $language, $cobjcategory, $rights, $version, $provenance, $trust, $hostedbyid, $hostedbyname, |
|
65 |
$collectedfromid, $collectedfromname, $originalId, $instanceURI, $dateOfCollection)" |
|
66 |
/> |
|
67 |
</ACTION> |
|
68 |
|
|
69 |
<xsl:for-each select="//*[local-name() = 'creator']"> |
|
70 |
<xsl:variable name="personIdTemp"> |
|
71 |
<xsl:choose> |
|
72 |
<xsl:when test="string-length(./*[local-name() = 'nameIdentifier']) > 0"> |
|
73 |
<xsl:value-of |
|
74 |
select="translate(normalize-space(./*[local-name() = 'nameIdentifier']),' .,','___')" |
|
75 |
/> |
|
76 |
</xsl:when> |
|
77 |
<xsl:otherwise> |
|
78 |
<xsl:value-of |
|
79 |
select="translate(normalize-space(./*[local-name() = 'creatorName']),' .,','___')" |
|
80 |
/> |
|
81 |
</xsl:otherwise> |
|
82 |
</xsl:choose> |
|
83 |
|
|
84 |
</xsl:variable> |
|
85 |
|
|
86 |
<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, concat($originalId, '::', normalize-space($personIdTemp)))"/> |
|
87 |
|
|
88 |
<xsl:variable name="originalPersonId" select="./*[local-name() = 'nameIdentifier']"/> |
|
89 |
<xsl:if test="string-length($personId) > 0"> |
|
90 |
<ACTION targetKey="{$personId}" targetColumnFamily="person" targetColumn="body"> |
|
91 |
<xsl:value-of |
|
92 |
select="dnet:oafDataCitePersonFromInfoPackage($personId, $dataInfo, normalize-space(./*[local-name() = 'creatorName']), $provenance, $trust, $collectedfromid, $collectedfromname,$originalPersonId, $dateOfCollection)" |
|
93 |
/> |
|
94 |
</ACTION> |
|
95 |
<ACTION targetKey="{$personId}" targetColumnFamily="personResult_authorship_isAuthorOf" |
|
96 |
targetColumn="{$resultId}"> |
|
97 |
<xsl:value-of |
|
98 |
select="dnet:oafDataCitePersonResultFromInfoPackage($personId, $resultId, position(), 'isAuthorOf', $provenance, $trust, $dataInfo)" |
|
99 |
/> |
|
100 |
</ACTION> |
|
101 |
<ACTION targetKey="{$resultId}" targetColumnFamily="personResult_authorship_hasAuthor" |
|
102 |
targetColumn="{$personId}"> |
|
103 |
<xsl:value-of |
|
104 |
select="dnet:oafDataCitePersonResultFromInfoPackage($resultId, $personId, position(), 'hasAuthor', $provenance, $trust, $dataInfo)" |
|
105 |
/> |
|
106 |
</ACTION> |
|
107 |
</xsl:if> |
|
108 |
</xsl:for-each> |
|
109 |
<xsl:for-each select="//*[local-name()='relatedPublication']"> |
|
110 |
|
|
111 |
<!-- relatedDataset ids must be in the openaire format --> |
|
112 |
<xsl:variable name="publicationId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/> |
|
113 |
|
|
114 |
<xsl:if test="string-length($publicationId) > 0"> |
|
115 |
|
|
116 |
<xsl:variable name="resultDataset" select="dnet:oafDataCitePublicationDatasetFromInfoPackage($resultId, $publicationId, 'isRelatedTo', $provenance, $trust, $dataInfo)"/> |
|
117 |
<xsl:variable name="datasetResult" select="dnet:oafDataCitePublicationDatasetFromInfoPackage($publicationId, $resultId, 'isRelatedTo', $provenance, $trust, $dataInfo)"/> |
|
118 |
|
|
119 |
<ACTION targetKey="{$resultId}" targetColumnFamily="resultResult_publicationDataset_isRelatedTo" |
|
120 |
targetColumn="{$publicationId}"><xsl:value-of select="$resultDataset"/> |
|
121 |
</ACTION> |
|
122 |
<ACTION targetKey="{$publicationId}" targetColumnFamily="resultResult_publicationDataset_isRelatedTo" |
|
123 |
targetColumn="{$resultId}"><xsl:value-of select="$datasetResult"/> |
|
124 |
</ACTION> |
|
125 |
</xsl:if> |
|
126 |
</xsl:for-each> |
|
127 |
</ACTIONS> |
|
128 |
</xsl:otherwise> |
|
129 |
</xsl:choose> |
|
130 |
</xsl:template> |
|
131 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/main/resources/eu/dnetlib/actionmanager/xslt/rels2actions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions" |
|
7 |
xmlns:exslt="http://exslt.org/common" xmlns:action="http://namespace.openaire.eu/action" |
|
8 |
extension-element-prefixes="exslt" |
|
9 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"> |
|
10 |
|
|
11 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
12 |
|
|
13 |
<xsl:param name="trust" select="string('0.9')" /> |
|
14 |
<xsl:param name="provenance" select="string('UNKNOWN')" /> |
|
15 |
<xsl:param name="namespaceprefix" select="string('unknown_____')" /> |
|
16 |
|
|
17 |
<xsl:template match="/*"> |
|
18 |
|
|
19 |
<xsl:choose> |
|
20 |
<xsl:when test="count(//RELATION) = 0"> |
|
21 |
<ACTIONS /> |
|
22 |
</xsl:when> |
|
23 |
<xsl:otherwise> |
|
24 |
<ACTIONS> |
|
25 |
<xsl:for-each select="//RELATION"> |
|
26 |
<xsl:choose> |
|
27 |
<xsl:when test="./@type = 'resultProject'"> |
|
28 |
<xsl:variable name="resultId" select="./@source" /> |
|
29 |
<xsl:variable name="projectId"> |
|
30 |
<xsl:choose> |
|
31 |
<xsl:when test="starts-with(@target, '40|')"> |
|
32 |
<xsl:value-of select="./@target" /> |
|
33 |
</xsl:when> |
|
34 |
<xsl:otherwise> |
|
35 |
<xsl:value-of select="dnet:oafSplitId('project', normalize-space(@target))"/> |
|
36 |
</xsl:otherwise> |
|
37 |
</xsl:choose> |
|
38 |
</xsl:variable> |
|
39 |
|
|
40 |
<ACTION targetKey="{$resultId}" targetColumnFamily="resultProject_outcome_isProducedBy" targetColumn="{$projectId}"> |
|
41 |
<xsl:value-of select="dnet:oafResultProjectFromInfoPackage($resultId, $projectId, 'isProducedBy', $provenance, $trust)" /> |
|
42 |
</ACTION> |
|
43 |
<ACTION targetKey="{$projectId}" targetColumnFamily="resultProject_outcome_produces" targetColumn="{$resultId}"> |
|
44 |
<xsl:value-of select="dnet:oafResultProjectFromInfoPackage($projectId, $resultId, 'produces', $provenance, $trust)" /> |
|
45 |
</ACTION> |
|
46 |
</xsl:when> |
|
47 |
|
|
48 |
<!-- TODO: check this block, it caused problems: |
|
49 |
Cannot convert argument/return type in call to method 'eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions.oafPersonResultFromInfoPackage(node-set, node-set, result-tree, reference, reference)' |
|
50 |
FATAL ERROR: 'Could not compile stylesheet' |
|
51 |
|
|
52 |
<xsl:when test="./@type = 'personResult'"> |
|
53 |
<xsl:variable name="personId" select="./@source" /> |
|
54 |
<xsl:variable name="resultId" select="./@target" /> |
|
55 |
|
|
56 |
<xsl:variable name="pos"> |
|
57 |
<xsl:choose> |
|
58 |
<xsl:when test="@position"><xsl:value-of select="@position"/></xsl:when> |
|
59 |
<xsl:otherwise>1000</xsl:otherwise> |
|
60 |
</xsl:choose> |
|
61 |
</xsl:variable> |
|
62 |
|
|
63 |
<ACTION targetKey="{$personId}" targetColumnFamily="personResult" targetColumn="{$resultId}"> |
|
64 |
<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($personId, $resultId, $pos, $provenance, $trust)" /> |
|
65 |
</ACTION> |
|
66 |
<ACTION targetKey="{$resultId}" targetColumnFamily="personResult" targetColumn="{$personId}"> |
|
67 |
<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($resultId, $personId, $pos, $provenance, $trust)" /> |
|
68 |
</ACTION> |
|
69 |
</xsl:when> |
|
70 |
--> |
|
71 |
</xsl:choose> |
|
72 |
</xsl:for-each> |
|
73 |
</ACTIONS> |
|
74 |
</xsl:otherwise> |
|
75 |
</xsl:choose> |
|
76 |
</xsl:template> |
|
77 |
|
|
78 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2updateActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions" |
|
7 |
xmlns:date="eu.dnetlib.miscutils.datetime.DateUtils" |
|
8 |
xmlns:exslt="http://exslt.org/common" xmlns:action="http://namespace.openaire.eu/action" |
|
9 |
extension-element-prefixes="exslt" |
|
10 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt date"> |
|
11 |
|
|
12 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
13 |
|
|
14 |
<xsl:param name="trust" select="string('0.9')" /> |
|
15 |
<xsl:param name="provenance" select="string('UNKNOWN')" /> |
|
16 |
<xsl:param name="namespaceprefix" select="string('unknown_____')" /> |
|
17 |
|
|
18 |
<xsl:template match="/*"> |
|
19 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
20 |
<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/> |
|
21 |
|
|
22 |
<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)" /> |
|
23 |
<xsl:variable name="collectedDatasourceid"> |
|
24 |
<xsl:choose> |
|
25 |
<xsl:when test="string-length(//oaf:collectedDatasourceid) > 0"> |
|
26 |
<xsl:value-of select="//oaf:collectedDatasourceid" /> |
|
27 |
</xsl:when> |
|
28 |
<xsl:otherwise> |
|
29 |
<xsl:value-of select="UNKNOWN" /> |
|
30 |
</xsl:otherwise> |
|
31 |
</xsl:choose> |
|
32 |
</xsl:variable> |
|
33 |
|
|
34 |
<xsl:choose> |
|
35 |
<xsl:when test="count($metadata) = 0"> |
|
36 |
<ACTIONS /> |
|
37 |
</xsl:when> |
|
38 |
<xsl:otherwise> |
|
39 |
|
|
40 |
<xsl:variable name="objidentifier" select="/record/*[local-name() = 'header']/*[local-name() = 'objIdentifier']" /> |
|
41 |
|
|
42 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', $objidentifier)" /> |
|
43 |
|
|
44 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)" /> |
|
45 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name" /> |
|
46 |
|
|
47 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)" /> |
|
48 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name" /> |
|
49 |
|
|
50 |
<!-- <xsl:variable name="country" select="substring(//dr:repositoryCountry, 1, 200)" /> --> |
|
51 |
<!-- <xsl:variable name="accessmode" select="substring(//oaf:accessrights, 1, 200)" /> --> |
|
52 |
|
|
53 |
<xsl:variable name="result" select="dnet:oafResultFromInfoPackage($resultId, $dataInfo, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, $collectedfromname, $objidentifier, $dateofcollection, $metadata)" /> |
|
54 |
|
|
55 |
<ACTIONS> |
|
56 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:now())}"> |
|
57 |
<xsl:value-of select="$result" /> |
|
58 |
</ACTION> |
|
59 |
</ACTIONS> |
|
60 |
</xsl:otherwise> |
|
61 |
</xsl:choose> |
|
62 |
</xsl:template> |
|
63 |
|
|
64 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2insertActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions" |
|
7 |
xmlns:exslt="http://exslt.org/common" xmlns:action="http://namespace.openaire.eu/action" |
|
8 |
extension-element-prefixes="exslt" |
|
9 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"> |
|
10 |
|
|
11 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
12 |
|
|
13 |
<xsl:param name="trust" select="string('0.9')" /> |
|
14 |
<xsl:param name="provenance" select="string('UNKNOWN')" /> |
|
15 |
<xsl:param name="namespaceprefix" select="string('unknown_____')" /> |
|
16 |
|
|
17 |
<xsl:template match="/*"> |
|
18 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
19 |
<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/> |
|
20 |
|
|
21 |
<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)" /> |
|
22 |
<xsl:variable name="collectedDatasourceid"> |
|
23 |
<xsl:choose> |
|
24 |
<xsl:when test="string-length(//oaf:collectedDatasourceid) > 0"> |
|
25 |
<xsl:value-of select="//oaf:collectedDatasourceid" /> |
|
26 |
</xsl:when> |
|
27 |
<xsl:otherwise> |
|
28 |
<xsl:value-of select="UNKNOWN" /> |
|
29 |
</xsl:otherwise> |
|
30 |
</xsl:choose> |
|
31 |
</xsl:variable> |
|
32 |
|
|
33 |
|
|
34 |
<xsl:choose> |
|
35 |
<xsl:when test="count($metadata) = 0"> |
|
36 |
<ACTIONS /> |
|
37 |
</xsl:when> |
|
38 |
<xsl:otherwise> |
|
39 |
|
|
40 |
<xsl:variable name="objidentifier" select="/record/*[local-name() = 'header']/*[local-name() = 'objIdentifier']" /> |
|
41 |
|
|
42 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', $objidentifier)" /> |
|
43 |
|
|
44 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)" /> |
|
45 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name" /> |
|
46 |
|
|
47 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)" /> |
|
48 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name" /> |
|
49 |
|
|
50 |
<!-- <xsl:variable name="country" select="substring(//dr:repositoryCountry, 1, 200)" /> --> |
|
51 |
<!-- <xsl:variable name="accessmode" select="substring(//oaf:accessrights, 1, 200)" /> --> |
|
52 |
|
|
53 |
<xsl:variable name="result" select="dnet:oafResultFromInfoPackage($resultId, $dataInfo, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, $collectedfromname, $objidentifier, $dateofcollection, $metadata)" /> |
|
54 |
|
|
55 |
<ACTIONS> |
|
56 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="body"> |
|
57 |
<xsl:value-of select="$result" /> |
|
58 |
</ACTION> |
|
59 |
|
|
60 |
<!-- <xsl:variable name="instance" select="dnet:oafInstanceFromInfoPackage($resultId, $hostedbyid, $hostedbyname, $provenance, $trust, $metadata)" /> --> |
|
61 |
|
|
62 |
<!-- <ACTION targetKey="{$resultId}" targetColumnFamily="instance" targetColumn="{$resultId}"> --> |
|
63 |
<!-- <xsl:value-of select="$instance" /> --> |
|
64 |
<!-- </ACTION> --> |
|
65 |
|
|
66 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
67 |
|
|
68 |
<xsl:variable name="projectId" select="dnet:oafSplitId('project', normalize-space(.))"/> |
|
69 |
|
|
70 |
<xsl:variable name="resultproject" select="dnet:oafResultProjectFromInfoPackage($resultId, $projectId, 'isProducedBy', $provenance, $trust, $dataInfo)" /> |
|
71 |
<xsl:variable name="projectresult" select="dnet:oafResultProjectFromInfoPackage($projectId, $resultId, 'produces', $provenance, $trust, $dataInfo)" /> |
|
72 |
|
|
73 |
<xsl:if test="string-length($projectId) > 0"> |
|
74 |
<ACTION targetKey="{$resultId}" targetColumnFamily="resultProject_outcome_isProducedBy" targetColumn="{$projectId}"> |
|
75 |
<xsl:value-of select="$resultproject" /> |
|
76 |
</ACTION> |
|
77 |
<ACTION targetKey="{$projectId}" targetColumnFamily="resultProject_outcome_produces" targetColumn="{$resultId}"> |
|
78 |
<xsl:value-of select="$projectresult" /> |
|
79 |
</ACTION> |
|
80 |
</xsl:if> |
|
81 |
</xsl:for-each> |
|
82 |
|
|
83 |
<xsl:for-each select="//dc:creator"> |
|
84 |
|
|
85 |
<xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, concat($objidentifier, '::', .))" /> |
|
86 |
|
|
87 |
<xsl:variable name="person" select="dnet:oafPersonFromInfoPackage($personId, $dataInfo, ., $provenance, $trust, $collectedfromid, $collectedfromname, $objidentifier, $dateofcollection)" /> |
|
88 |
<xsl:variable name="personresult" select="dnet:oafPersonResultFromInfoPackage($personId, $resultId, position(), 'isAuthorOf', $provenance, $trust, $dataInfo)" /> |
|
89 |
<xsl:variable name="resultperson" select="dnet:oafPersonResultFromInfoPackage($resultId, $personId, position(), 'hasAuthor', $provenance, $trust, $dataInfo)" /> |
|
90 |
<xsl:if test="string-length($personId) > 0"> |
|
91 |
<ACTION targetKey="{$personId}" targetColumnFamily="person" targetColumn="body"> |
|
92 |
<xsl:value-of select="$person" /> |
|
93 |
</ACTION> |
|
94 |
<ACTION targetKey="{$personId}" targetColumnFamily="personResult_authorship_isAuthorOf" targetColumn="{$resultId}"> |
|
95 |
<xsl:value-of select="$personresult" /> |
|
96 |
</ACTION> |
|
97 |
<ACTION targetKey="{$resultId}" targetColumnFamily="personResult_authorship_hasAuthor" targetColumn="{$personId}"> |
|
98 |
<xsl:value-of select="$resultperson" /> |
|
99 |
</ACTION> |
|
100 |
</xsl:if> |
|
101 |
</xsl:for-each> |
|
102 |
|
|
103 |
<xsl:for-each select="//*[local-name()='relatedPublication']"> |
|
104 |
|
|
105 |
<!-- relatedDataset ids must be in the openaire format --> |
|
106 |
<xsl:variable name="publicationId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/> |
|
107 |
|
|
108 |
<xsl:if test="string-length($publicationId) > 0"> |
|
109 |
|
|
110 |
<xsl:variable name="resultDataset" select="dnet:oafResultResult_PublicationDataset_FromMDStore($resultId, $publicationId, 'isRelatedTo', $dataInfo)"/> |
|
111 |
<xsl:variable name="datasetResult" select="dnet:oafResultResult_PublicationDataset_FromMDStore($publicationId, $resultId, 'isRelatedTo', $dataInfo)"/> |
|
112 |
|
|
113 |
<ACTION targetKey="{$resultId}" targetColumnFamily="resultResult_publicationDataset_isRelatedTo" |
|
114 |
targetColumn="{$publicationId}"><xsl:value-of select="$resultDataset"/> |
|
115 |
</ACTION> |
|
116 |
<ACTION targetKey="{$publicationId}" targetColumnFamily="resultResult_publicationDataset_isRelatedTo" |
|
117 |
targetColumn="{$resultId}"><xsl:value-of select="$datasetResult"/> |
|
118 |
</ACTION> |
|
119 |
</xsl:if> |
|
120 |
</xsl:for-each> |
|
121 |
|
|
122 |
</ACTIONS> |
|
123 |
</xsl:otherwise> |
|
124 |
</xsl:choose> |
|
125 |
</xsl:template> |
|
126 |
|
|
127 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/branches/2.2.x/src/main/resources/eu/dnetlib/data/transform/datacite_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
Also available in: Unified diff
cleanup