Project

General

Profile

« Previous | Next » 

Revision 40063

Tests load gthe XSLT from the TDSRule profiles in dnet-openaireplus-profiles

View differences:

modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/dmf_2_hbase.xsl
1
<xsl:stylesheet xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dc="http://purl.org/dc/elements/1.1/"
2
                xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:exslt="http://exslt.org/common"
3
                xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreToHbaseXsltFunctions" xmlns:oaf="http://namespace.openaire.eu/oaf"
4
                xmlns:dr="http://www.driver-repository.eu/namespace/dr" exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"
5
                extension-element-prefixes="exslt" version="1.0">
6
    <xsl:output indent="yes" omit-xml-declaration="yes"/>
7
    <xsl:param name="mergeIdForHomonyms" select="true()"/>
8
    <xsl:param name="writeCoAuthors" select="false()"/>
9
    <xsl:template match="/*">
10
        <xsl:variable name="about" select="/*[local-name() = 'record']/*[local-name() = 'about']"/>
11
        <xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/>
12
        <xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)"/>
13
        <xsl:variable name="namespaceprefix">
14
            <xsl:choose>
15

  
16
                <!-- TODO check namespaceprefix length is 12 -->
17
                <xsl:when test="string-length(//oaf:datasourceprefix) &gt; 0">
18
                    <xsl:value-of select="//oaf:datasourceprefix"/>
19
                </xsl:when>
20
                <xsl:otherwise>
21
                    <xsl:value-of select="unknown_"/>
22
                </xsl:otherwise>
23
            </xsl:choose>
24
        </xsl:variable>
25
        <xsl:choose>
26
            <!-- 			<xsl:when test="count($metadata) = 0 or string-length($namespaceprefix) = 0"> -->
27
            <xsl:when test="count($metadata) = 0">
28
                <ROWS/>
29
            </xsl:when>
30
            <xsl:otherwise>
31
                <xsl:variable name="objIdentifier" select="//dri:objIdentifier"/>
32
                <xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/>
33
                <xsl:if test="string-length($resultId) &gt; 0">
34
                    <xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/>
35
                    <xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/>
36
                    <xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/>
37
                    <xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/>
38
                    <xsl:variable name="originalidTest" select="/record/*[local-name() = 'header']/*[local-name() = 'recordIdentifier']"/>
39
                    <xsl:variable name="originalid">
40
                        <xsl:choose>
41
                            <xsl:when test="contains($originalidTest, '::')">
42
                                <xsl:value-of select="substring-after($originalidTest, '::')"/>
43
                            </xsl:when>
44
                            <xsl:otherwise>
45
                                <xsl:value-of select="$originalidTest"/>
46
                            </xsl:otherwise>
47
                        </xsl:choose>
48
                    </xsl:variable>
49
                    <xsl:variable name="result"
50
                                  select="dnet:oafResultFromMDStore($resultId, $about, $hostedbyid, $hostedbyname, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, $metadata)"/>
51
                    <ROWS>
52
                        <ROW columnFamily="result" key="{$resultId}">
53
                            <QUALIFIER name="body" type="base64">
54
                                <xsl:value-of select="$result"/>
55
                            </QUALIFIER>
56
                        </ROW>
57
                        <xsl:for-each select="//*[local-name()='projectid']">
58
                            <xsl:variable name="projectId" select="dnet:oafSplitId('project', normalize-space(.))"/>
59
                            <xsl:if test="string-length($projectId) &gt; 0">
60
                                <xsl:variable name="resultproject"
61
                                              select="dnet:oafResultProject_Outcome_FromMDStore($resultId, $projectId, 'isProducedBy', $about)"/>
62
                                <xsl:variable name="projectresult"
63
                                              select="dnet:oafResultProject_Outcome_FromMDStore($projectId, $resultId, 'produces', $about)"/>
64
                                <ROW columnFamily="resultProject_outcome_isProducedBy" key="{$resultId}">
65
                                    <QUALIFIER name="{$projectId}" type="base64">
66
                                        <xsl:value-of select="$resultproject"/>
67
                                    </QUALIFIER>
68
                                </ROW>
69
                                <ROW columnFamily="resultProject_outcome_produces" key="{$projectId}">
70
                                    <QUALIFIER name="{$resultId}" type="base64">
71
                                        <xsl:value-of select="$projectresult"/>
72
                                    </QUALIFIER>
73
                                </ROW>
74
                            </xsl:if>
75
                        </xsl:for-each>
76
                        <xsl:for-each select="//*[local-name()='relatedDataset']">
77

  
78
                            <!-- relatedDataset ids must be in the openaire format  -->
79
                            <xsl:variable name="datasetId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/>
80
                            <xsl:if test="string-length($datasetId) &gt; 0">
81
                                <xsl:variable name="resultDataset"
82
                                              select="dnet:oafResultResult_PublicationDataset_FromMDStore($resultId, $datasetId, 'isRelatedTo', $about)"/>
83
                                <xsl:variable name="datasetResult"
84
                                              select="dnet:oafResultResult_PublicationDataset_FromMDStore($datasetId, $resultId, 'isRelatedTo', $about)"/>
85
                                <ROW columnFamily="resultResult_publicationDataset_isRelatedTo" key="{$resultId}">
86
                                    <QUALIFIER name="{$datasetId}" type="base64">
87
                                        <xsl:value-of select="$resultDataset"/>
88
                                    </QUALIFIER>
89
                                </ROW>
90
                                <ROW columnFamily="resultResult_publicationDataset_isRelatedTo" key="{$datasetId}">
91
                                    <QUALIFIER name="{$resultId}" type="base64">
92
                                        <xsl:value-of select="$datasetResult"/>
93
                                    </QUALIFIER>
94
                                </ROW>
95
                            </xsl:if>
96
                        </xsl:for-each>
97
                        <xsl:for-each select="//*[local-name()='creator']">
98
                            <xsl:if test="string-length(normalize-space(.)) &gt; 0">
99
                                <xsl:variable name="personIdPart">
100
                                    <xsl:call-template name="personIdTmpl">
101
                                        <xsl:with-param name="objIdentifier" select="$objIdentifier"/>
102
                                    </xsl:call-template>
103
                                </xsl:variable>
104
                                <xsl:variable name="position" select="position()"/>
105
                                <xsl:variable name="personId" select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdPart))"/>
106
                                <xsl:variable name="person"
107
                                              select="dnet:oafPersonFromMDStore($personId, $about, $collectedfromid, $collectedfromname, $originalid, $dateofcollection, .)"/>
108
                                <xsl:variable name="personresult"
109
                                              select="dnet:oafPersonResult_Authorship_FromMDStore($personId, $resultId, $position, 'isAuthorOf', $about)"/>
110
                                <xsl:variable name="resultperson"
111
                                              select="dnet:oafPersonResult_Authorship_FromMDStore($resultId, $personId, $position, 'hasAuthor', $about)"/>
112
                                <xsl:if test="string-length($personId) &gt; 0">
113
                                    <ROW columnFamily="person" key="{$personId}">
114
                                        <QUALIFIER name="body" type="base64">
115
                                            <xsl:value-of select="$person"/>
116
                                        </QUALIFIER>
117
                                    </ROW>
118
                                    <ROW columnFamily="personResult_authorship_isAuthorOf" key="{$personId}">
119
                                        <QUALIFIER name="{$resultId}" type="base64">
120
                                            <xsl:value-of select="$personresult"/>
121
                                        </QUALIFIER>
122
                                    </ROW>
123
                                    <ROW columnFamily="personResult_authorship_hasAuthor" key="{$resultId}">
124
                                        <QUALIFIER name="{$personId}" type="base64">
125
                                            <xsl:value-of select="$resultperson"/>
126
                                        </QUALIFIER>
127
                                    </ROW>
128
                                    <ROW columnFamily="result" key="{$personId}">
129
                                        <QUALIFIER name="{$resultId}" type="base64">
130
                                            <xsl:value-of select="$result"/>
131
                                        </QUALIFIER>
132
                                    </ROW>
133
                                </xsl:if>
134

  
135
                                <!-- COAUTHORS -->
136
                                <xsl:if test="$writeCoAuthors = true()">
137
                                    <xsl:for-each select="../dc:creator">
138
                                        <xsl:if test="$position != position()">
139
                                            <xsl:variable name="coauthorIdPart">
140
                                                <xsl:call-template name="personIdTmpl">
141
                                                    <xsl:with-param name="objIdentifier" select="$objIdentifier"/>
142
                                                </xsl:call-template>
143
                                            </xsl:variable>
144
                                            <xsl:variable name="coauthorId" select="dnet:oafId('person', $namespaceprefix, normalize-space($coauthorIdPart))"/>
145
                                            <xsl:variable name="personperson"
146
                                                          select="dnet:oafPersonPerson_CoAuthorship_FromDMF($personId, $coauthorId, 'isCoAuthorOf', $about)"/>
147
                                            <ROW columnFamily="personPerson_coAuthorship_isCoAuthorOf" key="{$personId}">
148
                                                <QUALIFIER name="{$coauthorId}" type="base64">
149
                                                    <xsl:value-of select="$personperson"/>
150
                                                </QUALIFIER>
151
                                            </ROW>
152
                                        </xsl:if>
153
                                    </xsl:for-each>
154
                                </xsl:if>
155
                                <!-- / COAUTHORS -->
156
                            </xsl:if>
157
                        </xsl:for-each>
158
                    </ROWS>
159
                </xsl:if>
160
            </xsl:otherwise>
161
        </xsl:choose>
162
    </xsl:template>
163
    <xsl:template name="personIdTmpl">
164
        <xsl:param name="objIdentifier"/>
165
        <xsl:choose>
166
            <xsl:when test="$mergeIdForHomonyms = false()">
167
                <xsl:value-of select="concat($objIdentifier, '::', normalize-space(.))"/>
168
            </xsl:when>
169
            <xsl:otherwise>
170
                <xsl:value-of select="normalize-space(.)"/>
171
            </xsl:otherwise>
172
        </xsl:choose>
173
    </xsl:template>
174
</xsl:stylesheet>
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/odf_2_hbase.xsl
1
<?xml version="1.0" encoding="UTF-8"?>
2
<xsl:stylesheet version="1.0"
3
	xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/"
4
	xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri"
5
	xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf"
6
	xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreDataCiteToHbaseXsltFunctions"
7
	xmlns:exslt="http://exslt.org/common" extension-element-prefixes="exslt"
8
	exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt">
9

  
10
	<xsl:output omit-xml-declaration="yes" indent="yes" />
11
	<xsl:template match="/*">
12
		<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/>
13
		<xsl:variable name="dateofcollection" select="//dri:dateOfCollection" />
14
		<xsl:variable name="metadata"
15
			select="exslt:node-set(//*[local-name()='metadata']/*)" />
16
		<xsl:variable name="namespaceprefix">
17
			<xsl:choose>
18
				<!-- TODO check namespaceprefix length is 12 -->
19
				<xsl:when test="string-length(//oaf:datasourceprefix) &gt; 0">
20
					<xsl:value-of select="//oaf:datasourceprefix" />
21
				</xsl:when>
22
				<xsl:otherwise>
23
					<xsl:value-of select="unknown_____" />
24
				</xsl:otherwise>
25
			</xsl:choose>
26
		</xsl:variable>
27

  
28
		<xsl:choose>
29
			<xsl:when
30
				test="count($metadata) = 0 or normalize-space(//oaf:skipRecord)= 'true'">
31
				<ROWS />
32
			</xsl:when>
33
			<xsl:otherwise>
34

  
35
				<xsl:variable name="resultId"
36
					select="dnet:oafSimpleId('result', //dri:objIdentifier)" />
37

  
38
				<xsl:if test="string-length($resultId) &gt; 0">
39
					<xsl:variable name="originalid"
40
						select="concat('',  //*[local-name() = 'resource']/*[local-name()='identifier'])" />
41
					<xsl:variable name="creators" select="//*[local-name() = 'creator']" />
42
					<xsl:variable name="titles" select="//*[local-name() = 'title']" />
43
					<xsl:variable name="subjects" select="//*[local-name() = 'subject']" />
44
					<xsl:variable name="publisher" select="//*[local-name() = 'publisher']" />
45
					<xsl:variable name="descriptions" select="//*[local-name() = 'description']" />
46
					<xsl:variable name="dates" select="//*[local-name() = 'date']" />
47
					<xsl:variable name="dateaccepted" select="//oaf:dateAccepted" />
48
					<xsl:variable name="resourceType"
49
						select="//*[local-name() = 'resourceType']" />
50
					<xsl:variable name="formats" select="//*[local-name() = 'format']" />
51
					<xsl:variable name="sizes" select="//*[local-name() = 'size']" />
52
					<xsl:variable name="rights" select="//oaf:accessrights" />
53
					<xsl:variable name="version" select="//*[local-name() = 'version']" />
54
					<xsl:variable name="language" select="//oaf:language" />
55
					<xsl:variable name="cobjcategory" select="//dr:CobjCategory" />
56

  
57
					<xsl:variable name="instanceURI">
58
						<xsl:choose>
59
							<xsl:when
60
								test="string-length( //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']) &gt; 0">
61
								<xsl:value-of
62
									select="concat('http://dx.doi.org','/', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']/text())" />
63
							</xsl:when>
64
							<xsl:otherwise>
65
								<xsl:value-of
66
									select="concat('', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='URL'])" />
67
							</xsl:otherwise>
68
						</xsl:choose>
69
					</xsl:variable>
70

  
71
					<xsl:variable name="hostedbyid"
72
						select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)" />
73
					<xsl:variable name="hostedbyname" select="concat('', //oaf:hostedBy/@name)" />
74
					<xsl:variable name="collectedfromid"
75
						select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)" />
76
					<xsl:variable name="collectedfromname"
77
						select="concat('', //oaf:collectedFrom/@name)" />
78
					<xsl:variable name="dateOfCollection"
79
						select="concat('', //dri:dateOfCollection)" />
80

  
81
					<xsl:variable name="result"
82
						select="dnet:oafResult_FromDatacite($resultId, $dataInfo, $metadata, $titles, 
83
                        $subjects, $publisher, $descriptions, $dates, $dateaccepted, $resourceType, 
84
                        $formats, $sizes, $language, $cobjcategory, $rights, $version, $hostedbyid, $hostedbyname,
85
                        $collectedfromid, $collectedfromname, $originalid, $instanceURI, $dateOfCollection)" />
86

  
87
					<ROWS>
88
						<ROW key="{$resultId}" columnFamily="result">
89
							<QUALIFIER name="body" type="base64">
90
								<xsl:value-of select="$result" />
91
							</QUALIFIER>
92
						</ROW>
93
						<xsl:for-each select="//*[local-name() = 'creator']">
94
							<xsl:variable name="personIdTemp">
95
								<xsl:choose>
96
									<xsl:when
97
										test="string-length(./*[local-name() = 'nameIdentifier']) &gt; 0">
98
										<xsl:value-of
99
											select="translate(normalize-space(./*[local-name() = 'nameIdentifier']),' .,','___')" />
100
									</xsl:when>
101
									<xsl:otherwise>
102
										<xsl:value-of
103
											select="translate(normalize-space(./*[local-name() = 'creatorName']),' .,','___')" />
104
									</xsl:otherwise>
105
								</xsl:choose>
106
							</xsl:variable>
107
							<xsl:variable name="personId"
108
								select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdTemp))" />
109

  
110
							<xsl:variable name="originalPersonId"
111
								select="./*[local-name() = 'nameIdentifier']" />
112
							<xsl:variable name="position" select="position()" />
113
							<xsl:if test="string-length($personId) &gt; 0">
114
								<xsl:variable name="person"
115
									select="dnet:oafPerson_FromDatacite($personId, $dataInfo, $collectedfromid, $collectedfromname,$originalPersonId, $dateOfCollection ,normalize-space(./*[local-name() = 'creatorName']))" />
116

  
117
								<xsl:variable name="personresult"
118
									select="dnet:oafPersonResult_Authorship_FromDatacite($personId, $resultId, $position, 'isAuthorOf', $dataInfo)" />
119
								<xsl:variable name="resultperson"
120
									select="dnet:oafPersonResult_Authorship_FromDatacite($resultId, $personId, $position, 'hasAuthor', $dataInfo)" />
121
								<ROW key="{$personId}" columnFamily="person">
122
									<QUALIFIER name="body" type="base64">
123
										<xsl:value-of select="$person" />
124
									</QUALIFIER>
125
								</ROW>
126
								<ROW key="{$personId}" columnFamily="personResult_authorship_isAuthorOf">
127
									<QUALIFIER name="{$resultId}" type="base64">
128
										<xsl:value-of select="$personresult" />
129
									</QUALIFIER>
130
								</ROW>
131
								<ROW key="{$resultId}" columnFamily="personResult_authorship_hasAuthor">
132
									<QUALIFIER name="{$personId}" type="base64">
133
										<xsl:value-of select="$resultperson" />
134
									</QUALIFIER>
135
								</ROW>
136
							</xsl:if>
137
						</xsl:for-each>
138

  
139
						<xsl:for-each select="//*[local-name()='projectid']">
140

  
141
							<xsl:variable name="projectId"
142
								select="dnet:oafSplitId('project', normalize-space(.))" />
143

  
144
							<xsl:variable name="resultproject"
145
								select="dnet:oafResultProject_Outcome_FromDatacite($resultId, $projectId, 'isProducedBy', $dataInfo)" />
146
							<xsl:variable name="projectresult"
147
								select="dnet:oafResultProject_Outcome_FromDatacite($projectId, $resultId, 'produces', $dataInfo)" />
148

  
149
							<xsl:if test="string-length($projectId) &gt; 0">
150
								<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy">
151
									<QUALIFIER name="{$projectId}" type="base64">
152
										<xsl:value-of select="$resultproject" />
153
									</QUALIFIER>
154
								</ROW>
155
								<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces">
156
									<QUALIFIER name="{$resultId}" type="base64">
157
										<xsl:value-of select="$projectresult" />
158
									</QUALIFIER>
159
								</ROW>
160
							</xsl:if>
161
						</xsl:for-each>
162

  
163
						<xsl:for-each select="//*[local-name()='relatedPublication']">
164

  
165
							<!-- relatedDataset ids must be in the openaire format  -->
166
							<xsl:variable name="publicationId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/>
167

  
168
							<xsl:if test="string-length($publicationId) &gt; 0">
169

  
170
								<xsl:variable name="resultDataset" select="dnet:oafResultResult_PublicationDataset_FromDatacite($resultId, $publicationId, 'isRelatedTo', $dataInfo)"/>
171
								<xsl:variable name="datasetResult" select="dnet:oafResultResult_PublicationDataset_FromDatacite($publicationId, $resultId, 'isRelatedTo', $dataInfo)"/>
172

  
173
								<ROW key="{$resultId}" columnFamily="resultResult_publicationDataset_isRelatedTo">
174
									<QUALIFIER name="{$publicationId}" type="base64"><xsl:value-of select="$resultDataset"/></QUALIFIER>
175
								</ROW>
176
								<ROW key="{$publicationId}" columnFamily="resultResult_publicationDataset_isRelatedTo">
177
									<QUALIFIER name="{$resultId}" type="base64"><xsl:value-of select="$datasetResult"/></QUALIFIER>
178
								</ROW>
179
							</xsl:if>
180
						</xsl:for-each>
181
					</ROWS>
182
				</xsl:if>
183
			</xsl:otherwise>
184
		</xsl:choose>
185
	</xsl:template>
186
</xsl:stylesheet>
modules/dnet-mapreduce-jobs/trunk/install.sh
1 1
#!/bin/bash
2 2

  
3
mvn clean install -DskipTests=true;
3
mvn clean install;
4 4
rm -rf ~/.m2/repository/eu/dnetlib/dnet-mapreduce-jobs-assembly;
5 5
mvn assembly:assembly -DskipTests=true && mvn install:install-file -Dfile=target/dnet-mapreduce-jobs-0.0.8.4-SNAPSHOT-jar-with-dependencies.jar -DgroupId=eu.dnetlib -DartifactId=dnet-mapreduce-jobs-assembly -Dversion=0.0.8.4-SNAPSHOT -Dpackaging=jar
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java
21 21
import eu.dnetlib.miscutils.functional.xml.IndentXmlString;
22 22
import org.apache.commons.io.IOUtils;
23 23
import org.apache.commons.lang.StringUtils;
24
import org.apache.commons.logging.Log;
25
import org.apache.commons.logging.LogFactory;
24 26
import org.dom4j.Document;
25 27
import org.dom4j.DocumentException;
26 28
import org.dom4j.io.SAXReader;
......
32 34

  
33 35
public class XsltRowTransformerFactoryTest {
34 36

  
37
	private static final Log log = LogFactory.getLog(XsltRowTransformerFactoryTest.class);
35 38
	private XsltRowTransformerFactory factory;
36 39

  
37 40
	private EntityConfigTable entityConfigTable;
41
	private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/";
38 42

  
39 43
	@Before
40 44
	public void setUp() throws Exception {
......
55 59

  
56 60
		xslParams.put("mergeIdForHomonymsMap", m);
57 61

  
58
		final List<Row> rows = asRows(load("dmf_2_hbase_person.xsl"), xslParams, load("recordManyAuthors.xml"));
62
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordManyAuthors.xml"));
59 63
		int authorCount = 0;
60 64
		for (final Row row : rows) {
61 65
			for (final Column<String, byte[]> col : row.getColumns()) {
......
64 68
				final OafDecoder d = OafDecoder.decode(col.getValue());
65 69
				assertNotNull(d);
66 70

  
67
				System.out.println(d.getEntity().getPerson().getCoauthorList().size());
71
				log.debug(d.getEntity().getPerson().getCoauthorList().size());
68 72
			}
69 73
		}
70
		System.out.println("---> " + authorCount);
74
		log.info("authors' count: ---> " + authorCount);
71 75

  
72 76
	}
73 77

  
......
84 88

  
85 89
		xslParams.put("mergeIdForHomonymsMap", m);
86 90

  
87
		final List<Row> rows = asRows(load("dmf_2_hbase_person.xsl"), xslParams, load("recordArxiv.xml"));
91
		final List<Row> rows = asRows(loadFromTransformationProfile("oaf_person2hbase.xml"), xslParams, load("recordArxiv.xml"));
88 92

  
89 93
		for (final Row row : rows) {
90 94
			for (final Column<String, byte[]> col : row.getColumns()) {
91 95

  
92 96
				final OafDecoder d = OafDecoder.decode(col.getValue());
93
				System.out.println(d.getOaf());
97
				log.debug(d.getOaf());
94 98
			}
95 99
		}
96 100
	}
......
98 102
	@Test
99 103
	public void testParseClaimUpdate() throws Exception {
100 104

  
101
		doTest(load("odf_2_hbase.xsl"), load("recordClaimUpdate.xml"));
105
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaimUpdate.xml"));
102 106
	}
103 107

  
104 108
	@Test
105 109
	public void testParseDatasetPUB() throws Exception {
106 110

  
107
		doTest(load("odf_2_hbase.xsl"), load("recordDatasetPUB.xml"));
111
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatasetPUB.xml"));
108 112
	}
109 113

  
110 114
	@Test
111 115
	public void testParseClaim() throws Exception {
112 116

  
113
		doTest(load("dmf_2_hbase.xsl"), load("recordClaim.xml"));
117
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordClaim.xml"));
114 118
	}
115 119

  
116 120
	@Test
117 121
	public void testParseACM() throws Exception {
118 122

  
119
		doTest(load("dmf_2_hbase.xsl"), load("recordACM.xml"));
123
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordACM.xml"));
120 124
	}
121 125

  
122 126
	@Test
123 127
	public void testParseASB() throws Exception {
124 128

  
125
		doTest(load("dmf_2_hbase.xsl"), load("recordASB.xml"));
129
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("recordASB.xml"));
126 130
	}
127 131

  
128 132
	@Test
......
140 144
	@Test
141 145
	public void testParseDmf() throws Exception {
142 146

  
143
		doTest(load("dmf_2_hbase.xsl"), load("record.xml"));
147
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("record.xml"));
144 148
	}
145 149

  
146 150
	@Test
147 151
	public void testParseDatacite() throws Exception {
148 152

  
149
		doTest(load("datacite_2_hbase.xsl"), load("recordDatacite.xml"));
153
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite.xml"));
150 154
	}
151 155

  
152 156
	@Test
153 157
	public void testParseDatacite2() throws Exception {
154 158

  
155
		doTest(load("datacite_2_hbase.xsl"), load("recordDatacite2.xml"));
159
		doTest(loadFromTransformationProfile("odf2hbase.xml"), load("recordDatacite2.xml"));
156 160
	}
157 161

  
158 162
	@Test
159 163
	public void testLinkPangaea() throws Exception {
160 164

  
161 165
		final List<Row> rows = Lists.newArrayList();
162
		rows.addAll(asRows(load("datacite_2_hbase.xsl"), load("pangaeODF.xml")));
163
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("pangaeOAF.xml")));
166
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("pangaeODF.xml")));
167
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("pangaeOAF.xml")));
164 168
		rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
165 169

  
166 170
		printAll(mapAll(buildTable(rows)));
......
171 175

  
172 176
		final List<Row> rows = Lists.newArrayList();
173 177
		rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
174
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("recordCorda.xml")));
178
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordCorda.xml")));
175 179

  
176 180
		printAll(mapAll(buildTable(rows)));
177 181
	}
......
181 185

  
182 186
		final List<Row> rows = Lists.newArrayList();
183 187
		rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordFCT.xml")));
184
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("recordFCT.xml")));
188
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordFCT.xml")));
185 189

  
186 190
		printAll(mapAll(buildTable(rows)));
187 191
	}
......
191 195

  
192 196
		final List<Row> rows = Lists.newArrayList();
193 197
		rows.addAll(asRows(load("projects_2_hbase.xsl"), load("projectRecordWT.xml")));
194
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), load("recordWT.xml")));
198
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordWT.xml")));
195 199

  
196 200
		printAll(mapAll(buildTable(rows)));
197 201
	}
......
221 225
		xslParams.put("mergeIdForHomonymsMap", m);
222 226

  
223 227
		final List<Row> rows = Lists.newArrayList();
224
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordArxiv.xml")));
225
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordArxiv2.xml")));
226
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordPuma1.xml")));
227
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordPuma2.xml")));
228
		rows.addAll(asRows(load("dmf_2_hbase.xsl"), xslParams, load("recordUNIBI.xml")));
228
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv.xml")));
229
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordArxiv2.xml")));
230
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma1.xml")));
231
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordPuma2.xml")));
232
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), xslParams, load("recordUNIBI.xml")));
229 233

  
230 234
		printPersonFullnames(mapAll(buildTable(rows)));
231 235
	}
......
233 237
	@Test
234 238
	public void testParseDoajOAF() throws Exception {
235 239

  
236
		doTest(load("dmf_2_hbase.xsl"), load("doajUniversityRecord.xml"));
240
		doTest(loadFromTransformationProfile("oaf2hbase.xml"), load("doajUniversityRecord.xml"));
237 241
	}
238 242

  
239 243
	@Test
......
244 248

  
245 249
	// @Test
246 250
	public void testFromMongodbCompressedDump() throws Exception {
247
		doTestJsonGz(load("dmf_2_hbase.xsl"), load("mdstore_cleaned.json.gz"));
251
		doTestJsonGz(loadFromTransformationProfile("oaf2hbase.xml"), load("mdstore_cleaned.json.gz"));
248 252
	}
249 253

  
250 254
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
251 255
		try {
252 256
			final List<Row> rows = asRows(xsltStream, recordStream);
253 257

  
254
			System.out.println(rows);
258
			log.info(rows);
255 259

  
256 260
			final Map<String, Map<String, Map<String, byte[]>>> table = buildTable(rows);
257 261

  
......
539 543
				if ((xpaths != null) && !xpaths.isEmpty() && (xpaths.get(kd.getType()) != null)) {
540 544
					final Document doc = r.read(new StringReader(val));
541 545

  
542
					System.out.println("\n" + e.getKey());
546
					log.debug("\n" + e.getKey());
543 547
					for (final String xpath : xpaths.get(kd.getType())) {
544
						System.out.println(doc.valueOf(xpath));
548
						log.debug(doc.valueOf(xpath));
545 549
					}
546 550
				} else {
547 551

  
548
					System.out.println(val);
552
					log.info(val);
549 553
				}
550 554
			}
551 555
		}
......
554 558
	private void printNoIndent(final Map<String, XmlRecordFactory> builders) {
555 559
		for (final Entry<String, XmlRecordFactory> e : builders.entrySet()) {
556 560
			if (e.getValue().isValid()) {
557
				System.out.println(e.getValue().build());
561
				log.debug(e.getValue().build());
558 562
			} else {
559
				System.out.println("invalid builder: " + e.getKey());
563
				log.debug("invalid builder: " + e.getKey());
560 564
			}
561 565
		}
562 566
	}
......
576 580
		return getClass().getResourceAsStream(fileName);
577 581
	}
578 582

  
583
	private InputStream loadFromTransformationProfile(final String profilePath) {
584
		log.info("Loading xslt from: " + basePathProfiles + profilePath);
585
		InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath);
586
		SAXReader saxReader = new SAXReader();
587
		Document doc = null;
588
		try {
589
			doc = saxReader.read(profile);
590
		} catch (DocumentException e) {
591
			e.printStackTrace();
592
			throw new RuntimeException(e);
593
		}
594
		String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML();
595
		return IOUtils.toInputStream(xslt);
596
	}
597

  
579 598
	@Test
599
	public void testLoadFromTransformationProfile() throws IOException {
600
		InputStream in = loadFromTransformationProfile("oaf2hbase.xml");
601
		log.info(IOUtils.toString(in));
602
	}
603

  
604
	@Test
580 605
	public void test_template() throws Exception {
581
		final String xslt = IOUtils.toString(load("dmf_2_hbase.xsl"));
606
		final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml"));
582 607
		final XsltRowTransformer transformer = factory.getTransformer(xslt);
583 608
		assertNotNull(transformer);
584 609

  
modules/dnet-mapreduce-jobs/trunk/src/test/resources/log4j.properties
1
### Root Level ###
2
log4j.rootLogger=WARN, CONSOLE
3

  
4
### Configuration for the CONSOLE appender ###
5
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
6
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
7
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c - %m%n
8

  
9
org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger
10

  
11
### Application Level ###
12
log4j.logger.eu.dnetlib=INFO
13
log4j.logger.eu.dnetlib.data.transform.xml.AbstractDNetOafXsltFunctions=DEBUG
14
log4j.logger.eu.dnetlib.data.transform.XsltRowTransformerFactoryTest=INFO
modules/dnet-mapreduce-jobs/trunk/pom.xml
4 4
	<parent>
5 5
		<groupId>eu.dnetlib</groupId>
6 6
		<artifactId>dnet-hadoop-parent</artifactId>
7
		<version>1.0.0</version>
7
		<version>1.0.0-SNAPSHOT</version>
8 8
		<relativePath/>
9 9
	</parent>
10 10
	<modelVersion>4.0.0</modelVersion>
......
225 225
			<groupId>org.mockito</groupId>
226 226
			<artifactId>mockito-all</artifactId>
227 227
			<version>1.8.5</version>
228
			<scope>test</scope>
228 229
		</dependency>
230
		<dependency>
231
			<groupId>eu.dnetlib</groupId>
232
			<artifactId>dnet-openaireplus-profiles</artifactId>
233
			<version>[1.0.9-SNAPSHOT]</version>
234
			<scope>test</scope>
235
		</dependency>
229 236

  
230 237
	</dependencies>
231 238
</project>

Also available in: Unified diff