Project

General

Profile

« Previous | Next » 

Revision 33596

[maven-release-plugin] copy for tag icm-iis-statistics-1.0.0

View differences:

modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
    <parent>
4
        <groupId>eu.dnetlib</groupId>
5
        <artifactId>icm-iis-parent-container</artifactId>
6
        <version>1.0.0</version>
7
    </parent>
8
    <modelVersion>4.0.0</modelVersion>
9
    <artifactId>icm-iis-statistics</artifactId>
10
    <packaging>jar</packaging>
11
    <version>1.0.0</version>
12

  
13
    <scm>
14
      <developerConnection>
15
        scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0
16
      </developerConnection>
17
    </scm>
18
    
19
    <properties>
20
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
21
    </properties>
22
    <dependencies>
23
        <dependency>
24
            <groupId>junit</groupId>
25
            <artifactId>junit</artifactId>
26
            <version>4.10</version>
27
            <scope>test</scope>
28
        </dependency>    
29
            
30
        <dependency>
31
			<groupId>eu.dnetlib</groupId>
32
			<artifactId>icm-iis-core</artifactId>
33
			<version>1.0.0</version>
34
		</dependency>
35
		<dependency>
36
			<groupId>eu.dnetlib</groupId>
37
			<artifactId>icm-iis-core</artifactId>
38
			<version>1.0.0</version>
39
			<type>test-jar</type>
40
			<scope>test</scope>
41
		</dependency>
42
        <dependency>
43
            <groupId>eu.dnetlib</groupId>
44
            <artifactId>icm-iis-schemas</artifactId>
45
            <version>1.0.0</version>
46
        </dependency>
47
        <!-- required after introducing 'provided' scope for hadoop libs -->
48
        <dependency>
49
			<groupId>org.apache.hadoop</groupId>
50
			<artifactId>hadoop-common</artifactId>
51
			<version>${iis.hadoop.common.version}</version>
52
			<scope>provided</scope>
53
		</dependency>
54
		<!-- Needed by Oozie tests { -->
55
		<!-- required after introducing 'provided' scope for hadoop dependencies -->
56
		<dependency>
57
			<groupId>org.apache.oozie</groupId>
58
			<artifactId>oozie-core</artifactId>
59
			<version>${iis.oozie.version}</version>
60
			<scope>test</scope>
61
		</dependency>
62
		<dependency>
63
			<groupId>org.apache.hadoop</groupId>
64
			<artifactId>hadoop-hdfs</artifactId>
65
			<version>${iis.hadoop.hdfs.version}</version>
66
			<scope>test</scope>
67
		</dependency>
68
		<!-- end of required after introducing 'provided' scope for hadoop dependencies -->
69
		<dependency>
70
			<groupId>org.apache.oozie</groupId>
71
			<artifactId>oozie-core</artifactId>
72
			<version>${iis.oozie.version}</version>
73
			<type>test-jar</type>
74
			<scope>test</scope>
75
		</dependency>
76
		<dependency>
77
			<groupId>org.apache.hadoop</groupId>
78
			<artifactId>hadoop-hdfs</artifactId>
79
			<version>${iis.hadoop.hdfs.version}</version>
80
			<type>test-jar</type>
81
			<scope>test</scope>
82
		</dependency>
83
		<dependency>
84
			<groupId>org.apache.hadoop</groupId>
85
			<artifactId>hadoop-test</artifactId>
86
			<version>${iis.hadoop.test.version}</version>
87
			<scope>test</scope>
88
		</dependency>
89
		<dependency>
90
			<groupId>org.apache.hadoop</groupId>
91
			<artifactId>hadoop-common</artifactId>
92
			<version>${iis.hadoop.common.version}</version>
93
			<type>test-jar</type>
94
			<scope>test</scope>
95
        </dependency>
96

  
97
        <dependency>
98
            <groupId>org.apache.hive</groupId>
99
            <artifactId>hive-exec</artifactId>
100
            <version>${iis.hive.version}</version>
101
        </dependency>
102
        <dependency>
103
            <groupId>org.apache.hive</groupId>
104
            <artifactId>hive-cli</artifactId>
105
            <version>${iis.hive.version}</version>
106
        </dependency>
107
        <dependency>
108
            <groupId>org.apache.hive</groupId>
109
            <artifactId>hive-builtins</artifactId>
110
            <version>${iis.hive.version}</version>
111
        </dependency>
112
    </dependencies>
113
    <repositories>
114
	    <!-- This repository contains our patched 
115
	    version of "avro" and "avro-mapred" modules (see the dependencies section)
116
	    This entry might be removed when the patch to these modules becomes 
117
	    a part of the official Avro release.-->
118
	    <repository>
119
			<id>dnet-deps</id>
120
			<name>dnet dependencies</name>
121
			<url>http://maven.research-infrastructures.eu/nexus/content/repositories/dnet-deps</url>
122
			<releases>
123
				<enabled>true</enabled>
124
			</releases>
125
			<snapshots>
126
				<enabled>false</enabled>
127
			</snapshots>
128
			<layout>default</layout>
129
		</repository>
130
    </repositories>
131
</project>
0 132

  
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/main/resources/eu/dnetlib/iis/statistics/main/oozie_app/lib/scripts/generator.q
1
CREATE EXTERNAL TABLE document
2
COMMENT "A table backed by Avro data with the Avro schema stored in HDFS"
3
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
4
STORED AS
5
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
6
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
7
LOCATION '${input_document_authors_citations}'
8
TBLPROPERTIES ('avro.schema.literal'='${schema_input_document_authors_citations}');
9

  
10
CREATE EXTERNAL TABLE projectId
11
COMMENT "A table backed by Avro data with the Avro schema stored in HDFS"
12
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
13
STORED AS
14
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
15
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
16
LOCATION '${input_project_id}'
17
TBLPROPERTIES ('avro.schema.literal'='${schema_input_project_id}');
18

  
19
CREATE EXTERNAL TABLE personId
20
COMMENT "A table backed by Avro data with the Avro schema stored in HDFS"
21
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
22
STORED AS
23
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
24
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
25
LOCATION '${input_person_id}'
26
TBLPROPERTIES ('avro.schema.literal'='${schema_input_person_id}');
27

  
28

  
29
CREATE TABLE document_statistics
30
COMMENT "A table backed by Avro data with the Avro schema stored in HDFS"
31
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
32
STORED AS
33
INPUTFORMAT  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
34
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
35
LOCATION '${output_document_statistics}'
36
TBLPROPERTIES ('avro.schema.literal'='${schema_output_document_statistics}');
37

  
38
CREATE TABLE author_statistics
39
COMMENT "A table backed by Avro data with the Avro schema stored in HDFS"
40
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
41
STORED AS
42
INPUTFORMAT  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
43
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
44
LOCATION '${output_author_statistics}'
45
TBLPROPERTIES ('avro.schema.literal'='${schema_output_author_statistics}');
46

  
47
CREATE TABLE project_statistics
48
COMMENT "A table backed by Avro data with the Avro schema stored in HDFS"
49
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
50
STORED AS
51
INPUTFORMAT  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
52
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
53
LOCATION '${output_project_statistics}'
54
TBLPROPERTIES ('avro.schema.literal'='${schema_output_project_statistics}');
55

  
56
CREATE TABLE global_statistics
57
COMMENT "A table backed by Avro data with the Avro schema stored in HDFS"
58
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
59
STORED AS
60
INPUTFORMAT  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
61
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
62
LOCATION '${output_global_statistics}'
63
TBLPROPERTIES ('avro.schema.literal'='${schema_output_global_statistics}');
64

  
65
-- UDFs
66

  
67
CREATE TEMPORARY FUNCTION collect_all AS 'eu.dnetlib.iis.core.hive.CollectAllUDAF';
68
CREATE TEMPORARY FUNCTION count_array AS 'eu.dnetlib.iis.core.hive.CountArrayElementsUDF';
69
CREATE TEMPORARY FUNCTION empty_array AS 'eu.dnetlib.iis.core.hive.CreateEmptyArrayUDF';
70
CREATE TEMPORARY FUNCTION merge_maps AS 'eu.dnetlib.iis.core.hive.MergeStringIntMapsUDAF';
71
CREATE TEMPORARY FUNCTION list_to_map AS 'eu.dnetlib.iis.core.hive.CountArrayElementsUDAF';
72
CREATE TEMPORARY FUNCTION gen_coauthors AS 'eu.dnetlib.iis.statistics.hive.GenerateCoauthorsUDF';
73

  
74
-- common tables
75

  
76
create table citations
77
location '${workingDir}/citations'
78
as
79
select documentId, year, refDocId, isPublished
80
from document lateral view explode(referencedDocumentsIds) ids AS refDocId;
81

  
82
create table citationsWithNulls
83
location '${workingDir}/citationsWithNulls'
84
as
85
select citations.documentId as sourceId, citations.isPublished as sourcePublished, citations.year, document.documentId as targetId, document.isPublished as targetPublished from
86
document left outer join citations
87
on document.documentId = citations.refDocId;
88

  
89
create table publishedSourceCitations
90
location '${workingDir}/publishedSourceCitations'
91
as
92
select * from citationsWithNulls where sourcePublished or sourceId is null;
93

  
94
create table docStats
95
location '${workingDir}/docStats'
96
as
97
select
98
    targetId as docId,
99
    collect_all(targetPublished)[0] as published,
100
    cast(count(sourceId) as INT) as numberOfCitations,
101
    count_array(collect_all(year)) as numberOfCitationsPerYear,
102
    map("1", if(count(sourceId) >= 1, 1, 0),
103
        "10", if(count(sourceId) >= 10, 1, 0),
104
        "50", if(count(sourceId) >= 50, 1, 0),
105
        "100", if(count(sourceId) >= 100, 1, 0),
106
        "250", if(count(sourceId) >= 250, 1, 0),
107
        "500", if(count(sourceId) >= 500, 1, 0)) as numberOfPapersCitedAtLeastXTimes
108
from citationsWithNulls group by targetId;
109

  
110
create table publishedSourceDocStats
111
location '${workingDir}/publishedSourceDocStats'
112
as
113
select
114
    targetId as docId,
115
    collect_all(targetPublished)[0] as published,
116
    cast(count(sourceId) as INT) as numberOfCitations,
117
    count_array(collect_all(year)) as numberOfCitationsPerYear,
118
    map("1", if(count(sourceId) >= 1, 1, 0),
119
        "10", if(count(sourceId) >= 10, 1, 0),
120
        "50", if(count(sourceId) >= 50, 1, 0),
121
        "100", if(count(sourceId) >= 100, 1, 0),
122
        "250", if(count(sourceId) >= 250, 1, 0),
123
        "500", if(count(sourceId) >= 500, 1, 0)) as numberOfPapersCitedAtLeastXTimes
124
from publishedSourceCitations group by targetId;
125

  
126
create table allDocStatistics
127
location '${workingDir}/allDocStatistics'
128
as
129
select
130
    docStats.docId,
131
    docStats.published,
132
    docStats.numberOfCitations,
133
    docStats.numberOfCitationsPerYear,
134
    docStats.numberOfPapersCitedAtLeastXTimes,
135
    coalesce(publishedSourceDocStats.numberOfCitations, 0) as numberOfPublishedCitations,
136
    coalesce(publishedSourceDocStats.numberOfCitationsPerYear, map('unknown', 0)) as numberOfPublishedCitationsPerYear,
137
    coalesce(publishedSourceDocStats.numberOfPapersCitedAtLeastXTimes, map("1", 0, "10", 0, "50", 0, "100", 0, "250", 0, "500", 0)) as numberOfPapersCitedAtLeastXTimesByPublished
138
from docStats left outer join publishedSourceDocStats
139
on docStats.docId = publishedSourceDocStats.docId;
140

  
141
create table publishedDocStatistics
142
location '${workingDir}/publishedDocStatistics'
143
as
144
select * from allDocStatistics
145
where published;
146

  
147

  
148
-- document statistics
149

  
150
insert overwrite table document_statistics
151
select
152
    docId as documentId,
153
    named_struct(
154
        "citationsFromAllPapers", named_struct(
155
            "numberOfCitations", numberOfCitations,
156
            "numberOfCitationsPerYear", numberOfCitationsPerYear),
157
        "citationsFromPublishedPapers", named_struct(
158
            "numberOfCitations", numberOfPublishedCitations,
159
            "numberOfCitationsPerYear", numberOfPublishedCitationsPerYear)
160
    ) as statistic
161
from allDocStatistics;
162

  
163

  
164
-- global statistics
165

  
166
create table globalAll
167
location '${workingDir}/globalAll'
168
as
169
select
170
    named_struct(
171
        'numberOfPapers', cast(count(docId) as INT),
172
        'citationsFromAllPapers',
173
        named_struct(
174
            'basic',
175
            named_struct(
176
                'numberOfCitations', cast(sum(numberOfCitations) as INT),
177
                'numberOfCitationsPerYear', merge_maps(numberOfCitationsPerYear)
178
            ),
179
            'averageNumberOfCitationsPerPaper', cast(avg(numberOfCitations) as FLOAT),
180
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimes)
181
        ),
182
        'citationsFromPublishedPapers',
183
        named_struct(
184
            'basic',
185
            named_struct(
186
                'numberOfCitations', cast(sum(numberOfPublishedCitations) as INT),
187
                'numberOfCitationsPerYear', merge_maps(numberOfPublishedCitationsPerYear)
188
            ),
189
            'averageNumberOfCitationsPerPaper', cast(avg(numberOfPublishedCitations) as FLOAT),
190
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimesByPublished)
191
        )
192
    ) as allPapers
193
from allDocStatistics;
194

  
195
create table globalPublished
196
location '${workingDir}/globalPublished'
197
as
198
select
199
    named_struct(
200
        'numberOfPapers', cast(count(docId) as INT),
201
        'citationsFromAllPapers',
202
        named_struct(
203
            'basic',
204
            named_struct(
205
                'numberOfCitations', cast(sum(numberOfCitations) as INT),
206
                'numberOfCitationsPerYear', merge_maps(numberOfCitationsPerYear)
207
            ),
208
            'averageNumberOfCitationsPerPaper', cast(avg(numberOfCitations) as FLOAT),
209
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimes)
210
        ),
211
        'citationsFromPublishedPapers',
212
        named_struct(
213
            'basic',
214
            named_struct(
215
                'numberOfCitations', cast(sum(numberOfPublishedCitations) as INT),
216
                'numberOfCitationsPerYear', merge_maps(numberOfPublishedCitationsPerYear)
217
            ),
218
            'averageNumberOfCitationsPerPaper', cast(avg(numberOfPublishedCitations) as FLOAT),
219
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimesByPublished)
220
        )
221
    ) as publishedPapers
222
from publishedDocStatistics;
223

  
224
insert overwrite table global_statistics
225
select globalAll.allPapers, globalPublished.publishedPapers
226
from globalAll join globalPublished;
227

  
228

  
229
-- project statistics
230

  
231
create table projectDocument
232
location '${workingDir}/projectDocument'
233
as
234
select projectId.id as projId, projDoc.documentId from
235
projectId left outer join
236
(select documentId, projectId
237
from document lateral view explode(projectIds) ids as projectId) projDoc
238
on projectId.id = projDoc.projectId;
239

  
240
create table projectAll
241
location '${workingDir}/projectAll'
242
as
243
select
244
projId,
245
    named_struct(
246
        'numberOfPapers', cast(count(docId) as INT),
247
        'citationsFromAllPapers',
248
        named_struct(
249
            'basic',
250
            named_struct(
251
                'numberOfCitations', coalesce(cast(sum(numberOfCitations) as INT), 0),
252
                'numberOfCitationsPerYear', merge_maps(numberOfCitationsPerYear)
253
            ),
254
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfCitations) as FLOAT), 0),
255
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimes)
256
        ),
257
        'citationsFromPublishedPapers',
258
        named_struct(
259
            'basic',
260
            named_struct(
261
                'numberOfCitations', coalesce(cast(sum(numberOfPublishedCitations) as INT), 0),
262
                'numberOfCitationsPerYear', merge_maps(numberOfPublishedCitationsPerYear)
263
            ),
264
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfPublishedCitations) as FLOAT), 0),
265
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimesByPublished)
266
        )
267
    ) as allPapers
268
from
269
(select * from
270
projectDocument left outer join allDocStatistics
271
on projectDocument.documentId = allDocStatistics.docId) merged
272
group by projId;
273

  
274
create table projectPublished
275
location '${workingDir}/projectPublished'
276
as
277
select
278
projId,
279
named_struct(
280
        'numberOfPapers', cast(count(docId) as INT),
281
        'citationsFromAllPapers',
282
        named_struct(
283
            'basic',
284
            named_struct(
285
                'numberOfCitations', coalesce(cast(sum(numberOfCitations) as INT), 0),
286
                'numberOfCitationsPerYear', merge_maps(numberOfCitationsPerYear)
287
            ),
288
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfCitations) as FLOAT), 0),
289
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimes)
290
        ),
291
        'citationsFromPublishedPapers',
292
        named_struct(
293
            'basic',
294
            named_struct(
295
                'numberOfCitations', coalesce(cast(sum(numberOfPublishedCitations) as INT), 0),
296
                'numberOfCitationsPerYear', merge_maps(numberOfPublishedCitationsPerYear)
297
            ),
298
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfPublishedCitations) as FLOAT), 0),
299
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimesByPublished)
300
        )
301
    ) as publishedPapers
302
from
303
(select * from
304
projectDocument left outer join publishedDocStatistics
305
on projectDocument.documentId = publishedDocStatistics.docId) merged
306
group by projId;
307

  
308
insert overwrite table project_statistics
309
select
310
    projectAll.projId as projectId,
311
    named_struct(
312
        'allPapers', projectAll.allPapers,
313
        'publishedPapers', projectPublished.publishedPapers
314
    ) as statistic
315
from projectAll join projectPublished
316
on projectAll.projId = projectPublished.projId;
317

  
318

  
319
-- author stats
320

  
321
create table authorDocument
322
location '${workingDir}/authorDocument'
323
as
324
select personId.id as authorId, authDoc.documentId from
325
personId left outer join
326
(select documentId, authorId
327
from document lateral view explode(authorIds) ids as authorId) authDoc
328
on personId.id = authDoc.authorId;
329

  
330
create table authorAll
331
location '${workingDir}/authorAll'
332
as
333
select
334
authorId,
335
named_struct(
336
        'numberOfPapers', cast(count(docId) as INT),
337
        'citationsFromAllPapers',
338
        named_struct(
339
            'basic',
340
            named_struct(
341
                'numberOfCitations', coalesce(cast(sum(numberOfCitations) as INT), 0),
342
                'numberOfCitationsPerYear', merge_maps(numberOfCitationsPerYear)
343
            ),
344
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfCitations) as FLOAT), 0),
345
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimes)
346
        ),
347
        'citationsFromPublishedPapers',
348
        named_struct(
349
            'basic',
350
            named_struct(
351
                'numberOfCitations', coalesce(cast(sum(numberOfPublishedCitations) as INT), 0),
352
                'numberOfCitationsPerYear', merge_maps(numberOfPublishedCitationsPerYear)
353
            ),
354
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfPublishedCitations) as FLOAT), 0),
355
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimesByPublished)
356
        )
357
    ) as allPapers
358
from
359
(select * from
360
authorDocument left outer join allDocStatistics
361
on authorDocument.documentId = allDocStatistics.docId) merged
362
group by authorId;
363

  
364
create table authorPublished
365
location '${workingDir}/authorPublished'
366
as
367
select
368
authorId,
369
named_struct(
370
        'numberOfPapers', cast(count(docId) as INT),
371
        'citationsFromAllPapers',
372
        named_struct(
373
            'basic',
374
            named_struct(
375
                'numberOfCitations', coalesce(cast(sum(numberOfCitations) as INT), 0),
376
                'numberOfCitationsPerYear', merge_maps(numberOfCitationsPerYear)
377
            ),
378
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfCitations) as FLOAT), 0),
379
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimes)
380
        ),
381
        'citationsFromPublishedPapers',
382
        named_struct(
383
            'basic',
384
            named_struct(
385
                'numberOfCitations', coalesce(cast(sum(numberOfPublishedCitations) as INT), 0),
386
                'numberOfCitationsPerYear', merge_maps(numberOfPublishedCitationsPerYear)
387
            ),
388
            'averageNumberOfCitationsPerPaper', coalesce(cast(avg(numberOfPublishedCitations) as FLOAT), 0),
389
            'numberOfPapersCitedAtLeastXTimes', merge_maps(numberOfPapersCitedAtLeastXTimesByPublished)
390
        )
391
    ) as publishedPapers
392
from
393
(select * from
394
authorDocument left outer join publishedDocStatistics
395
on authorDocument.documentId = publishedDocStatistics.docId) merged
396
group by authorId;
397

  
398
create table docCoauthors
399
location '${workingDir}/docCoauthors'
400
as
401
select documentId, authorId, authorIds
402
from document lateral view explode(authorIds) ids as authorId;
403

  
404
create table coAuthorsMap
405
location '${workingDir}/coauthorstmp'
406
as
407
select authorId, list_to_map(authorIds) as coAuthorsMap
408
from docCoauthors
409
group by authorId;
410

  
411
create table coauthors
412
location '${workingDir}/coauthors'
413
as
414
select authorId, gen_coauthors(authorId, coAuthorsMap) as coAuthors
415
from coAuthorsMap;
416

  
417

  
418
create table coauthorsFull
419
location '${workingDir}/coauthorsFull'
420
as
421
select personId.id as authorId, coalesce(coauthors.coAuthors, empty_array(named_struct("id", "id", "coauthoredPapersCount", 0))) as coAuthor from
422
personId left outer join coauthors
423
on personId.id = coauthors.authorId;
424

  
425
insert overwrite table author_statistics
426
select
427
    authorAll.authorId, named_struct(
428
        "core", named_struct(
429
            'allPapers', authorAll.allPapers,
430
            'publishedPapers', authorPublished.publishedPapers),
431
        "coAuthors", coauthorsFull.coAuthor
432
    ) as statistic
433
from authorAll join authorPublished
434
on authorAll.authorId = authorPublished.authorId
435
join coauthorsFull on authorPublished.authorId = coauthorsFull.authorId;
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/main/resources/eu/dnetlib/iis/statistics/main/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.4" name="statistics_main">
2
    	
3
	<parameters>
4
		<property>
5
			<name>input_document_authors_citations</name>
6
			<description>input document with authors and citation</description>
7
		</property>
8
		<property>
9
			<name>input_person_id</name>
10
			<description>input person id</description>
11
		</property>
12
        <property>
13
			<name>input_project_id</name>
14
			<description>output project id</description>
15
		</property>
16
        <property>
17
			<name>output_document_statistics</name>
18
			<description>output document statistics</description>
19
		</property>
20
        <property>
21
			<name>output_author_statistics</name>
22
			<description>output author statistics</description>
23
		</property>
24
        <property>
25
			<name>output_project_statistics</name>
26
			<description>output project statistics</description>
27
		</property>
28
        <property>
29
			<name>output_global_statistics</name>
30
			<description>output global statistics</description>
31
		</property>
32
	</parameters>
33
    
34
    <global>
35
        <job-tracker>${jobTracker}</job-tracker>
36
        <name-node>${nameNode}</name-node>
37
        <configuration>
38
            <property>
39
                <name>mapred.job.queue.name</name>
40
                <value>${queueName}</value>
41
            </property>
42
		</configuration>
43
	</global>
44
    
45
    <start to="generate-schema"/>
46
    
47
    <action name="generate-schema">
48
		<java>
49
			<main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
50
			<arg>eu.dnetlib.iis.statistics.schemas.DocumentWithAuthorsAndCitations</arg>
51
			<arg>eu.dnetlib.iis.statistics.schemas.PersonId</arg>
52
			<arg>eu.dnetlib.iis.statistics.schemas.ProjectId</arg>
53
			<arg>eu.dnetlib.iis.statistics.schemas.DocumentToDocumentStatistics</arg>
54
			<arg>eu.dnetlib.iis.statistics.schemas.AuthorToAuthorStatistics</arg>
55
			<arg>eu.dnetlib.iis.statistics.schemas.ProjectToProjectStatistics</arg>
56
			<arg>eu.dnetlib.iis.statistics.schemas.CommonCoreStatistics</arg>
57
			<capture-output />
58
		</java>
59
		<ok to="generator" />
60
		<error to="fail" />
61
	</action>
62
    
63
    <action name="generator">
64
        <hive xmlns="uri:oozie:hive-action:0.2">
65
            <job-tracker>${jobTracker}</job-tracker>
66
            <name-node>${nameNode}</name-node>
67
            <prepare>
68
                <delete path="${nameNode}${workingDir}/generator" />
69
				<mkdir path="${nameNode}${workingDir}/generator" />
70
                <mkdir path="${nameNode}${workingDir}/generator/working_dir" />
71
            </prepare>
72
            <configuration>
73
                <property>
74
                    <name>mapred.job.queue.name</name>
75
                    <value>${queueName}</value>
76
                </property>
77
                <property>
78
                    <name>oozie.hive.defaults</name>
79
                    <value>hive-site.xml</value>
80
                </property>
81
                 <property>
82
                    <name>hive.exec.scratchdir</name>
83
                    <value>/tmp/hive-${wf:user()}</value>
84
                </property>
85
                <property>
86
                    <name>mapred.reduce.tasks</name>
87
                    <value>50</value>
88
                </property>
89
                <property>
90
                    <name>mapred.child.java.opts</name>
91
                    <value>-Xmx2048M</value>
92
                </property>
93
            </configuration>
94
            <script>lib/scripts/generator.q</script>
95
            <!-- The working directory of the workflow node. -->
96
            <param>workingDir=${workingDir}/generator/working_dir</param>
97
    
98
            <param>input_document_authors_citations=${input_document_authors_citations}</param>
99
            <param>schema_input_document_authors_citations=${wf:actionData('generate-schema')['eu.dnetlib.iis.statistics.schemas.DocumentWithAuthorsAndCitations']}</param>
100
            
101
            <param>input_person_id=${input_person_id}</param>
102
            <param>schema_input_person_id=${wf:actionData('generate-schema')['eu.dnetlib.iis.statistics.schemas.PersonId']}</param>
103
            
104
            <param>input_project_id=${input_project_id}</param>
105
            <param>schema_input_project_id=${wf:actionData('generate-schema')['eu.dnetlib.iis.statistics.schemas.ProjectId']}</param>
106
            
107
            <param>output_document_statistics=${output_document_statistics}</param>
108
            <param>schema_output_document_statistics=${wf:actionData('generate-schema')['eu.dnetlib.iis.statistics.schemas.DocumentToDocumentStatistics']}</param>
109
            
110
            <param>output_author_statistics=${output_author_statistics}</param>
111
            <param>schema_output_author_statistics=${wf:actionData('generate-schema')['eu.dnetlib.iis.statistics.schemas.AuthorToAuthorStatistics']}</param>
112
            
113
            <param>output_project_statistics=${output_project_statistics}</param>
114
            <param>schema_output_project_statistics=${wf:actionData('generate-schema')['eu.dnetlib.iis.statistics.schemas.ProjectToProjectStatistics']}</param>
115
            
116
            <param>output_global_statistics=${output_global_statistics}</param>
117
            <param>schema_output_global_statistics=${wf:actionData('generate-schema')['eu.dnetlib.iis.statistics.schemas.CommonCoreStatistics']}</param>
118
        </hive>   
119
        <ok to="end"/>
120
        <error to="fail"/>
121
    </action>
122
    <kill name="fail">
123
		<message>Unfortunately, the workflow failed -- error message:
124
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
125
    </kill>
126
    <end name="end"/>
127
</workflow-app>
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/main/resources/eu/dnetlib/iis/statistics/main/job.properties
1
input_document_authors_citations=/share/transformers/statistics/document_authors_citations/2014-01-21
2
input_person_id=/share/transformers/statistics/person_id/2014-01-21
3
input_project_id=/share/transformers/statistics/project_id/2014-01-21
4
output_document_statistics=${workingDir}/document_statistics
5
output_author_statistics=${workingDir}/author_statistics
6
output_project_statistics=${workingDir}/project_statistics
7
output_global_statistics=${workingDir}/global_statistics
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/main/java/eu/dnetlib/iis/statistics/hive/GenerateCoauthorsUDF.java
1
package eu.dnetlib.iis.statistics.hive;
2

  
3
import java.util.ArrayList;
4
import java.util.Arrays;
5
import java.util.List;
6
import java.util.Map;
7
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
8
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
9
import org.apache.hadoop.hive.ql.metadata.HiveException;
10
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
11
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
12
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
13
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
14
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
15
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
16
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
17
import org.apache.hadoop.io.IntWritable;
18
import org.apache.hadoop.io.Text;
19

  
20
/**
21
 *
22
 * @author Dominika Tkaczyk
23
 */
24
public class GenerateCoauthorsUDF extends GenericUDF {
25

  
26
    private StringObjectInspector authorIdOI;
27
    private MapObjectInspector mapOI;
28
    private StringObjectInspector mapKeyOI;
29
    private IntObjectInspector mapValueOI;
30
    
31
    @Override
32
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
33
        if (arguments.length != 2) {
34
            throw new UDFArgumentLengthException("GenerateCoauthorsUDF takes 2 argument: string, map<string, int>");
35
        }
36
       
37
        if (!(arguments[0] instanceof StringObjectInspector)) {
38
            throw new UDFArgumentException("The argument must be a list");
39
        }
40
        
41
        authorIdOI = (StringObjectInspector) arguments[0];
42
        
43
        if (!(arguments[1] instanceof MapObjectInspector)) {
44
            throw new UDFArgumentException("The argument must be a list");
45
        }
46
        
47
        mapOI = (MapObjectInspector) arguments[1];
48
        
49
        if (!(mapOI.getMapKeyObjectInspector() instanceof StringObjectInspector)) {
50
            throw new UDFArgumentException("The argument must be a list");
51
        }
52
        
53
        mapKeyOI = (StringObjectInspector) mapOI.getMapKeyObjectInspector();
54
        
55
        if (!(mapOI.getMapValueObjectInspector() instanceof IntObjectInspector)) {
56
            throw new UDFArgumentException("The argument must be a list");
57
        }
58
        
59
        mapValueOI = (IntObjectInspector) mapOI.getMapValueObjectInspector();
60
        
61
        List names = Arrays.asList("id", "coauthoredPapersCount");
62
        List ois = Arrays.asList(
63
                PrimitiveObjectInspectorFactory.writableStringObjectInspector,
64
                PrimitiveObjectInspectorFactory.writableIntObjectInspector);
65
        
66
        return ObjectInspectorFactory.getStandardListObjectInspector(
67
                ObjectInspectorFactory.getStandardStructObjectInspector(names, ois));
68
    }
69

  
70
    @Override
71
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
72
        List coauthors = new ArrayList();
73
        String key = authorIdOI.getPrimitiveJavaObject(arguments[0].get());
74
        Map map = mapOI.getMap(arguments[1].get());
75
        if (!map.isEmpty()) {
76
            for (Object entry : map.entrySet()) {
77
                Map.Entry mapEntry = (Map.Entry<Object, Object>) entry;
78
                String id = mapKeyOI.getPrimitiveJavaObject(mapEntry.getKey());
79
                if (!id.equals(key)) {
80
                    int count = mapValueOI.get(mapEntry.getValue());
81
                    Object[] coauthor = new Object[2];
82
                    coauthor[0] = new Text(id);
83
                    coauthor[1] = new IntWritable(count);
84
                    coauthors.add(coauthor);
85
                }
86
            }
87
        }
88
             
89
        return coauthors;
90
    }
91

  
92
    @Override
93
    public String getDisplayString(String[] strings) {
94
        return "GenerateCoauthors()";
95
    }
96
    
97
}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/data/author_to_author_statistics.json
1
{"authorId": "id-1", "statistics": {"core": {"allPapers": {"numberOfPapers": 2, "citationsFromAllPapers": {"basic": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}, "averageNumberOfCitationsPerPaper": 0.5, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}}, "coAuthors": [{"id": "id-3", "coauthoredPapersCount": 1}, {"id": "id-123", "coauthoredPapersCount": 2}, {"id": "id-2", "coauthoredPapersCount": 1}, {"id": "id-800", "coauthoredPapersCount": 1}]}}
2
{"authorId": "id-123", "statistics": {"core": {"allPapers": {"numberOfPapers": 2, "citationsFromAllPapers": {"basic": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}, "averageNumberOfCitationsPerPaper": 0.5, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}}, "coAuthors": [{"id": "id-3", "coauthoredPapersCount": 1}, {"id": "id-2", "coauthoredPapersCount": 1}, {"id": "id-1", "coauthoredPapersCount": 2}, {"id": "id-800", "coauthoredPapersCount": 1}]}}
3
{"authorId": "id-2", "statistics": {"core": {"allPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}}, "coAuthors": [{"id": "id-123", "coauthoredPapersCount": 1}, {"id": "id-1", "coauthoredPapersCount": 1}, {"id": "id-800", "coauthoredPapersCount": 1}]}}
4
{"authorId": "id-3", "statistics": {"core": {"allPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "averageNumberOfCitationsPerPaper": 2.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 0, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}}}, "coAuthors": [{"id": "id-123", "coauthoredPapersCount": 1}, {"id": "id-1", "coauthoredPapersCount": 1}]}}
5
{"authorId": "id-345", "statistics": {"core": {"allPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "averageNumberOfCitationsPerPaper": 2.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "averageNumberOfCitationsPerPaper": 2.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}}}, "coAuthors": []}}
6
{"authorId": "id-590", "statistics": {"core": {"allPapers": {"numberOfPapers": 0, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}}, "publishedPapers": {"numberOfPapers": 0, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}}}, "coAuthors": []}}
7
{"authorId": "id-800", "statistics": {"core": {"allPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}}, "coAuthors": [{"id": "id-123", "coauthoredPapersCount": 1}, {"id": "id-2", "coauthoredPapersCount": 1}, {"id": "id-1", "coauthoredPapersCount": 1}]}}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/data/document_to_document_statistics.json
1
{"documentId": "id-1", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "citationsFromPublishedPapers": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}}}
2
{"documentId": "id-2", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}}}
3
{"documentId": "id-3", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}}}
4
{"documentId": "id-4", "statistics": {"citationsFromAllPapers": {"numberOfCitations": 3, "numberOfCitationsPerYear": {"2010": 1, "2001": 2}}, "citationsFromPublishedPapers": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2010": 1, "2001": 1}}}}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/data/project_to_project_statistics.json
1
{"projectId": "1", "statistics": {"allPapers": {"numberOfPapers": 0, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}}, "publishedPapers": {"numberOfPapers": 0, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}}}}
2
{"projectId": "2", "statistics": {"allPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}}}
3
{"projectId": "3", "statistics": {"allPapers": {"numberOfPapers": 0, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}}, "publishedPapers": {"numberOfPapers": 0, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {}}}}}
4
{"projectId": "4", "statistics": {"allPapers": {"numberOfPapers": 2, "citationsFromAllPapers": {"basic": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}, "averageNumberOfCitationsPerPaper": 0.5, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}}}
5
{"projectId": "7", "statistics": {"allPapers": {"numberOfPapers": 3, "citationsFromAllPapers": {"basic": {"numberOfCitations": 5, "numberOfCitationsPerYear": {"2010": 1, "2001": 4}}, "averageNumberOfCitationsPerPaper": 1.6666666, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 2, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 3, "numberOfCitationsPerYear": {"2010": 1, "2001": 2}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 2, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 1, "citationsFromAllPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 0, "numberOfCitationsPerYear": {}}, "averageNumberOfCitationsPerPaper": 0.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 0, "100": 0, "500": 0, "50": 0}}}}}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/data/global_statistics.json
1
{"allPapers": {"numberOfPapers": 4, "citationsFromAllPapers": {"basic": {"numberOfCitations": 7, "numberOfCitationsPerYear": {"2010": 1, "2001": 6}}, "averageNumberOfCitationsPerPaper": 1.75, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 3, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 4, "numberOfCitationsPerYear": {"2010": 1, "2001": 3}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 3, "100": 0, "500": 0, "50": 0}}}, "publishedPapers": {"numberOfPapers": 2, "citationsFromAllPapers": {"basic": {"numberOfCitations": 2, "numberOfCitationsPerYear": {"2001": 2}}, "averageNumberOfCitationsPerPaper": 1.0, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}, "citationsFromPublishedPapers": {"basic": {"numberOfCitations": 1, "numberOfCitationsPerYear": {"2001": 1}}, "averageNumberOfCitationsPerPaper": 0.5, "numberOfPapersCitedAtLeastXTimes": {"250": 0, "10": 0, "1": 1, "100": 0, "500": 0, "50": 0}}}}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/data/document_authors_citations.json
1
{"projectIds": ["2", "4", "7"], "referencedDocumentsIds": ["id-2", "id-4", "id-3"], "authorIds": ["id-1", "id-2", "id-123", "id-800"], "isPublished": true, "year":"2001", "documentId": "id-1"}
2
{"projectIds": ["7", "4"], "referencedDocumentsIds": [], "authorIds": ["id-1", "id-3", "id-123"], "isPublished": false, "year":"2011", "documentId": "id-2"}
3
{"projectIds": [], "referencedDocumentsIds": ["id-4"], "authorIds": ["id-345"], "isPublished": true, "year":"2010", "documentId": "id-3"}
4
{"projectIds": ["7"], "referencedDocumentsIds": ["id-3", "id-2", "id-4"], "authorIds": [], "isPublished": false, "year":"2001", "documentId": "id-4"}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/data/person_id.json
1
{"id": "id-1"}
2
{"id": "id-2"}
3
{"id": "id-3"}
4
{"id": "id-345"}
5
{"id": "id-800"}
6
{"id": "id-590"}
7
{"id": "id-123"}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/data/project_id.json
1
{"id":"1"}
2
{"id":"2"}
3
{"id":"3"}
4
{"id":"4"}
5
{"id":"7"}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/oozie_app/import.txt
1
## This is a classpath-based import file (this header is required)
2
statistics_main classpath eu/dnetlib/iis/statistics/main/oozie_app
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/src/test/resources/eu/dnetlib/iis/statistics/main/sampledataproducer/oozie_app/workflow.xml
1
<workflow-app xmlns="uri:oozie:workflow:0.2" name="test-statistics_main">
2
    <start to="producer"/>
3
    <action name="producer">
4
        <java>
5
            <job-tracker>${jobTracker}</job-tracker>
6
            <name-node>${nameNode}</name-node>
7
			<!-- The data generated by this node is deleted in this section -->
8
			<prepare>
9
				<delete path="${nameNode}${workingDir}/producer" />
10
				<mkdir path="${nameNode}${workingDir}/producer" />
11
			</prepare>
12
            <configuration>
13
                <property>
14
                    <name>mapred.job.queue.name</name>
15
                    <value>${queueName}</value>
16
                </property>
17
            </configuration>
18
            <!-- This is simple wrapper for the Java code -->
19
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
20
			<!-- The business Java code that gets to be executed -->
21
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
22
            <!-- Specification of the output ports -->
23
            <arg>-C{document_authors_citations,
24
				eu.dnetlib.iis.statistics.schemas.DocumentWithAuthorsAndCitations,
25
				eu/dnetlib/iis/statistics/main/sampledataproducer/data/document_authors_citations.json}</arg>
26
            <arg>-C{person_id,
27
				eu.dnetlib.iis.statistics.schemas.PersonId,
28
				eu/dnetlib/iis/statistics/main/sampledataproducer/data/person_id.json}</arg>
29
            <arg>-C{project_id,
30
				eu.dnetlib.iis.statistics.schemas.ProjectId,
31
				eu/dnetlib/iis/statistics/main/sampledataproducer/data/project_id.json}</arg>
32
			<!-- All input and output ports have to be bound to paths in HDFS, working 
33
				directory has to be specified as well -->
34
			<arg>-SworkingDir=${workingDir}/producer/working_dir</arg>
35
			<arg>-Odocument_authors_citations=${workingDir}/producer/document_authors_citations</arg>
36
            <arg>-Operson_id=${workingDir}/producer/person_id</arg>
37
            <arg>-Oproject_id=${workingDir}/producer/project_id</arg>
38
            
39
        </java>
40
        <ok to="statistics_main"/>
41
        <error to="fail"/>
42
    </action>
43
    <action name="statistics_main">
44
        <sub-workflow>
45
            <app-path>${wf:appPath()}/statistics_main</app-path>
46
            <configuration>
47
                <property>
48
                    <name>jobTracker</name>
49
                    <value>${jobTracker}</value>
50
                </property>
51
                <property>
52
                    <name>nameNode</name>
53
                    <value>${nameNode}</value>
54
                </property>
55
                <property>
56
                    <name>queueName</name>
57
                    <value>${queueName}</value>
58
                </property>
59
                <!-- Working directory of the subworkflow -->
60
                <property>
61
                    <name>workingDir</name>
62
                    <value>${workingDir}/statistics_main/working_dir</value>
63
                </property>
64
                <!-- Input ports. -->
65
                <property>
66
                    <name>input_document_authors_citations</name>
67
                    <value>${workingDir}/producer/document_authors_citations</value>
68
                </property>
69
                <property>
70
                    <name>input_person_id</name>
71
                    <value>${workingDir}/producer/person_id</value>
72
                </property>
73
                <property>
74
                    <name>input_project_id</name>
75
                    <value>${workingDir}/producer/project_id</value>
76
                </property>
77
                <!-- Output port bound to given path -->
78
                <property>
79
                    <name>output_document_statistics</name>
80
                    <value>${workingDir}/statistics_main/document_statistics</value>
81
                </property>
82
                <property>
83
                    <name>output_author_statistics</name>
84
                    <value>${workingDir}/statistics_main/author_statistics</value>
85
                </property>
86
                <property>
87
                    <name>output_project_statistics</name>
88
                    <value>${workingDir}/statistics_main/project_statistics</value>
89
                </property>
90
                <property>
91
                    <name>output_global_statistics</name>
92
                    <value>${workingDir}/statistics_main/global_statistics</value>
93
                </property>
94
            </configuration>
95
        </sub-workflow>
96
        <ok to="consumer"/>
97
        <error to="fail"/>
98
    </action>
99
    <action name="consumer">
100
		<java>
101
			<job-tracker>${jobTracker}</job-tracker>
102
			<name-node>${nameNode}</name-node>
103
			<!-- The data generated by this node is deleted in this section -->
104
			<prepare>
105
				<delete path="${nameNode}${workingDir}/consumer" />
106
				<mkdir path="${nameNode}${workingDir}/consumer" />
107
			</prepare>
108
			<configuration>
109
				<property>
110
					<name>mapred.job.queue.name</name>
111
					<value>${queueName}</value>
112
				</property>
113
			</configuration>
114
			<!-- This is simple wrapper for the Java code -->
115
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
116
			<!-- The business Java code that gets to be executed -->
117
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.TestingConsumer</arg>
118
			<!-- Specification of the input ports -->
119
			<arg>-C{document_statistics,
120
				eu.dnetlib.iis.statistics.schemas.DocumentToDocumentStatistics,
121
				eu/dnetlib/iis/statistics/main/sampledataproducer/data/document_to_document_statistics.json}</arg>
122
            <arg>-C{author_statistics,
123
				eu.dnetlib.iis.statistics.schemas.AuthorToAuthorStatistics,
124
				eu/dnetlib/iis/statistics/main/sampledataproducer/data/author_to_author_statistics.json}</arg>
125
            <arg>-C{project_statistics,
126
				eu.dnetlib.iis.statistics.schemas.ProjectToProjectStatistics,
127
				eu/dnetlib/iis/statistics/main/sampledataproducer/data/project_to_project_statistics.json}</arg>
128
            <arg>-C{global_statistics,
129
				eu.dnetlib.iis.statistics.schemas.CommonCoreStatistics,
130
				eu/dnetlib/iis/statistics/main/sampledataproducer/data/global_statistics.json}</arg>
131
			<!-- All input and output ports have to be bound to paths in HDFS, working 
132
				directory has to be specified as well -->
133
			<arg>-SworkingDir=${workingDir}/consumer/working_dir</arg>
134
			<arg>-Idocument_statistics=${workingDir}/statistics_main/document_statistics</arg>
135
            <arg>-Iauthor_statistics=${workingDir}/statistics_main/author_statistics</arg>
136
            <arg>-Iproject_statistics=${workingDir}/statistics_main/project_statistics</arg>
137
            <arg>-Iglobal_statistics=${workingDir}/statistics_main/global_statistics</arg>
138
		</java>
139
		<ok to="end" />
140
		<error to="fail" />
141
	</action>
142
    <kill name="fail">
143
		<message>Unfortunately, the workflow failed -- error message:
144
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
145
    </kill>
146
    <end name="end"/>
147
</workflow-app>
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/deploy.info
1
{
2
  "type_source": "SVN", 
3
  "goal": "package -U -T 4C source:jar", 
4
  "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/icm-iis-statistics/trunk/", 
5
  "deploy_repository": "dnet4-snapshots", 
6
  "version": "4",
7
  "mail": "m.horst@icm.edu.pl,d.tkaczyk@icm.edu.pl",
8
  "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", 
9
  "name": "icm-iis-statistics"
10
}
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/src/test/resources/test-custom-log4j.properties
1
#
2
# Licensed to the Apache Software Foundation (ASF) under one
3
# or more contributor license agreements.  See the NOTICE file
4
# distributed with this work for additional information
5
# regarding copyright ownership.  The ASF licenses this file
6
# to you under the Apache License, Version 2.0 (the
7
# "License"); you may not use this file except in compliance
8
# with the License.  You may obtain a copy of the License at
9
# 
10
#      http://www.apache.org/licenses/LICENSE-2.0
11
# 
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
#
18

  
19
#    http://www.apache.org/licenses/LICENSE-2.0
20
#
21
# Unless required by applicable law or agreed to in writing, software
22
# distributed under the License is distributed on an "AS IS" BASIS,
23
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24
# See the License for the specific language governing permissions and
25
# limitations under the License. See accompanying LICENSE file.
26

  
27
#
28

  
29
log4j.appender.oozie=org.apache.log4j.ConsoleAppender
30
log4j.appender.oozie.Target=System.out
31
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
32
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
33

  
34
log4j.appender.null=org.apache.log4j.varia.NullAppender
35

  
36
log4j.logger.org.apache=INFO, oozie
37
log4j.logger.org.mortbay=WARN, oozie
38
log4j.logger.org.hsqldb=WARN, oozie
39

  
40
log4j.logger.opslog=NONE, null
41
log4j.logger.applog=NONE, null
42
log4j.logger.instrument=NONE, null
43

  
44
log4j.logger.a=NONE, null
45

  
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/src/test/resources/hsqldb-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
    <property>
19
        <name>oozie.service.JPAService.jdbc.driver</name>
20
        <value>org.hsqldb.jdbcDriver</value>
21
    </property>
22
    <property>
23
        <name>oozie.service.JPAService.jdbc.url</name>
24
        <value>jdbc:hsqldb:mem:oozie-db;create=true</value>
25
    </property>
26
</configuration>
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/src/test/resources/mysql-oozie-site.xml
1
<?xml version="1.0"?>
2
<!--
3
  Licensed to the Apache Software Foundation (ASF) under one
4
  or more contributor license agreements.  See the NOTICE file
5
  distributed with this work for additional information
6
  regarding copyright ownership.  The ASF licenses this file
7
  to you under the Apache License, Version 2.0 (the
8
  "License"); you may not use this file except in compliance
9
  with the License.  You may obtain a copy of the License at
10

  
11
       http://www.apache.org/licenses/LICENSE-2.0
12

  
13
  Unless required by applicable law or agreed to in writing, software
14
  distributed under the License is distributed on an "AS IS" BASIS,
15
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
  See the License for the specific language governing permissions and
17
  limitations under the License.
18
-->
19
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
20
<configuration>
21
    <property>
22
      <name>oozie.service.JPAService.jdbc.driver</name>
23
        <value>com.mysql.jdbc.Driver</value>
24
        <description>JDBC driver class.</description>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.port</name>
28
        <value>3306</value>
29
    </property>
30
    <property>
31
      <name>oozie.service.JPAService.jdbc.url</name>
32
        <value>jdbc:mysql://${oozie.test.db.host}:${oozie.test.db.port}/oozie</value>
33
        <description>JDBC URL.</description>
34
    </property>
35
    <property>
36
        <name>oozie.service.JPAService.jdbc.username</name>
37
        <value>oozie</value>
38
        <description>DB user name.</description>
39
    </property>
40
    <property>
41
        <name>oozie.service.JPAService.jdbc.password</name>
42
        <value>oozie</value>
43
        <description>
44
            DB user password. IMPORTANT: if password is emtpy leave a 1 space string, the service trims the
45
            value, if empty Configuration assumes it is NULL.
46
        </description>
47
    </property>
48
</configuration>
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/src/test/resources/oracle-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
    <property>
19
        <name>oozie.service.JPAService.jdbc.driver</name>
20
        <value>oracle.jdbc.driver.OracleDriver</value>
21
    </property>
22
    <property>
23
        <name>oozie.test.db.port</name>
24
        <value>1521</value>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.name</name>
28
        <value>xe</value>
29
    </property>
30
    <property>
31
        <name>oozie.service.JPAService.jdbc.url</name>
32
        <value>jdbc:oracle:thin:@//${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value>
33
    </property>
34
    <property>
35
        <name>oozie.service.JPAService.jdbc.username</name>
36
        <value>oozie</value>
37
    </property>
38
    <property>
39
        <name>oozie.service.JPAService.jdbc.password</name>
40
        <value>oozie</value>
41
    </property>
42
</configuration>
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/src/test/resources/postgres-oozie-site.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Copyright (c) 2010 Yahoo! Inc. All rights reserved.
5
  Licensed under the Apache License, Version 2.0 (the "License");
6
  you may not use this file except in compliance with the License.
7
  You may obtain a copy of the License at
8

  
9
    http://www.apache.org/licenses/LICENSE-2.0
10

  
11
  Unless required by applicable law or agreed to in writing, software
12
  distributed under the License is distributed on an "AS IS" BASIS,
13
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  See the License for the specific language governing permissions and
15
  limitations under the License. See accompanying LICENSE file.
16
-->
17
<configuration>
18
    <property>
19
        <name>oozie.service.JPAService.jdbc.driver</name>
20
        <value>org.postgresql.Driver</value>
21
    </property>
22
    <property>
23
        <name>oozie.test.db.port</name>
24
        <value>5432</value>
25
    </property>
26
    <property>
27
        <name>oozie.test.db.name</name>
28
        <value>oozie</value>
29
    </property>
30
    <property>
31
        <name>oozie.service.JPAService.jdbc.url</name>
32
        <value>jdbc:postgresql://${oozie.test.db.host}:${oozie.test.db.port}/${oozie.test.db.name}</value>
33
    </property>
34
    <property>
35
        <name>oozie.service.JPAService.jdbc.username</name>
36
        <value>oozie</value>
37
    </property>
38
    <property>
39
        <name>oozie.service.JPAService.jdbc.password</name>
40
        <value>oozie</value>
41
    </property>
42
</configuration>
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/src/test/resources/test-oozie-log4j.properties
1
#
2
# Licensed to the Apache Software Foundation (ASF) under one
3
# or more contributor license agreements.  See the NOTICE file
4
# distributed with this work for additional information
5
# regarding copyright ownership.  The ASF licenses this file
6
# to you under the Apache License, Version 2.0 (the
7
# "License"); you may not use this file except in compliance
8
# with the License.  You may obtain a copy of the License at
9
# 
10
#      http://www.apache.org/licenses/LICENSE-2.0
11
# 
12
# Unless required by applicable law or agreed to in writing, software
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
# See the License for the specific language governing permissions and
16
# limitations under the License.
17
#
18

  
19
#    http://www.apache.org/licenses/LICENSE-2.0
20
#
21
# Unless required by applicable law or agreed to in writing, software
22
# distributed under the License is distributed on an "AS IS" BASIS,
23
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24
# See the License for the specific language governing permissions and
25
# limitations under the License. See accompanying LICENSE file.
26

  
27
#
28

  
29
log4j.appender.oozie=org.apache.log4j.ConsoleAppender
30
log4j.appender.oozie.Target=System.out
31
log4j.appender.oozie.layout=org.apache.log4j.PatternLayout
32
log4j.appender.oozie.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
33

  
34
log4j.appender.null=org.apache.log4j.varia.NullAppender
35

  
36
log4j.logger.org.apache=INFO, oozie
37
log4j.logger.org.mortbay=WARN, oozie
38
log4j.logger.org.hsqldb=WARN, oozie
39

  
40
log4j.logger.opslog=NONE, null
41
log4j.logger.applog=NONE, null
42
log4j.logger.instrument=NONE, null
43

  
44
log4j.logger.a=ALL, null
45

  
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/src/test/resources/hadoop-config.xml
1
<?xml version="1.0"?>
2
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3
<!--
4
  Licensed to the Apache Software Foundation (ASF) under one
5
  or more contributor license agreements.  See the NOTICE file
6
  distributed with this work for additional information
7
  regarding copyright ownership.  The ASF licenses this file
8
  to you under the Apache License, Version 2.0 (the
9
  "License"); you may not use this file except in compliance
10
  with the License.  You may obtain a copy of the License at
11

  
12
       http://www.apache.org/licenses/LICENSE-2.0
13

  
14
  Unless required by applicable law or agreed to in writing, software
15
  distributed under the License is distributed on an "AS IS" BASIS,
16
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
  See the License for the specific language governing permissions and
18
  limitations under the License.
19
-->
20
<configuration>
21

  
22
    <property>
23
        <name>mapreduce.jobtracker.kerberos.principal</name>
24
        <value>mapred/_HOST@LOCALREALM</value>
25
    </property>
26

  
27
    <property>
28
        <name>dfs.namenode.kerberos.principal</name>
29
        <value>hdfs/_HOST@LOCALREALM</value>
30
    </property>
31

  
32
    <property>
33
        <name>mapreduce.framework.name</name>
34
        <value>yarn</value>
35
    </property>
36

  
37
</configuration>
modules/icm-iis-statistics/tags/icm-iis-statistics-1.0.0/core/README.md
1
This directory and its subdirectories and files are here as a hack to make the Oozie unit tests work. 
2

  
3
Details
4
-------
5
Oozie tests assume that they're placed inside directory tree of Oozie source code -- see the source code of class `XTestCase` which is an ancestor of `MiniOozieTestCase` class which, in turn, should be inherited by your test case class. 
6

  
7
How to get the source code of the `XTestCase` class:
8

  
9
- download source code of the Ubuntu's `oozie` package prepared by Cloudera (`apt-get source oozie`). It is version 3.1.3+155 of this package. 
10
- open file `oozie-3.1.3+155/src/core/src/test/java/org/apache/oozie/test/XTestCase.java` and look at lines 93-105.

Also available in: Unified diff