Revision 39164
Added by Marek Horst over 8 years ago
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/meta/citation_pmc.json | ||
---|---|---|
1 |
{ |
|
2 |
"sourceDocumentId": "id-3", |
|
3 |
"rawText": "Kovach, M.E., Elzer, P.H., Hill, D.S., Robertson, G.T., Farris, M.A., Roop, R.M., Peterson, K.M.. Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes. Gene. 1995; 166: 175-176", |
|
4 |
"destinationDocumentId": "pmc-id", |
|
5 |
"externalDestinationDocumentIds": { |
|
6 |
"pmid": "1234567" |
|
7 |
} |
|
8 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/person/1/person:body | ||
---|---|---|
1 |
{ |
|
2 |
kind: entity |
|
3 |
entity: { |
|
4 |
type: person |
|
5 |
id: "30|0314fe20-be3c-4bc3-adee-6bbc2cde3cb7_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:DiVA.org:uu-127423_Oliveira__Paulo_UNKNOWN" |
|
6 |
person: { |
|
7 |
metadata: { |
|
8 |
firstname: { |
|
9 |
value: "Oliveira, Paulo" |
|
10 |
} |
|
11 |
fullname: { |
|
12 |
value: "Oliveira, Paulo" |
|
13 |
} |
|
14 |
} |
|
15 |
} |
|
16 |
} |
|
17 |
dataInfo: { |
|
18 |
inferred: false |
|
19 |
deletedbyinference: false |
|
20 |
trust: "0.9" |
|
21 |
provenanceaction: { |
|
22 |
classid: "UNKNOWN" |
|
23 |
classname: "UNKNOWN" |
|
24 |
schemeid: "dnet:provenanceActions" |
|
25 |
schemename: "dnet:provenanceActions" |
|
26 |
} |
|
27 |
} |
|
28 |
timestamp: 1360248058786 |
|
29 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/project/1/project:body | ||
---|---|---|
1 |
{ |
|
2 |
kind: entity |
|
3 |
entity: { |
|
4 |
type: project |
|
5 |
id: "40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43" |
|
6 |
project: { |
|
7 |
metadata: { |
|
8 |
code: { |
|
9 |
value: "241544" |
|
10 |
} |
|
11 |
acronym: { |
|
12 |
value: "SYSKID" |
|
13 |
} |
|
14 |
title: { |
|
15 |
value: "Systems biology towards novel chronic kidney disease diagnosis and treatment" |
|
16 |
} |
|
17 |
startdate: { |
|
18 |
value: "2010-01-01" |
|
19 |
} |
|
20 |
enddate: { |
|
21 |
value: "2014-12-31" |
|
22 |
} |
|
23 |
callidentifier: { |
|
24 |
value: "FP7-HEALTH-2009-single-stage" |
|
25 |
} |
|
26 |
ecsc39: { |
|
27 |
value: "true" |
|
28 |
} |
|
29 |
contracttype: { |
|
30 |
classid: "CP" |
|
31 |
classname: "Collaborative project" |
|
32 |
schemeid: "ec:FP7contractTypes" |
|
33 |
schemename: "ec:FP7contractTypes" |
|
34 |
} |
|
35 |
fundingtree: { |
|
36 |
value: "{\"funding_level_2\":{\"class\":\"ec:program\", \"id\":\"corda_______::FP7::SP1::HEALTH\", \"description\":\"Health\", \"name\":\"HEALTH\", \"parent\":{\"funding_level_1\":{\"class\":\"ec:specificprogram\", \"id\":\"corda_______::FP7::SP1\", \"description\":\"SP1-Cooperation\", \"name\":\"SP1\", \"parent\":{\"funding_level_0\":{\"class\":\"ec:frameworkprogram\", \"id\":\"corda_______::FP7\", \"description\":\"SEVENTH FRAMEWORK PROGRAMME\", \"name\":\"FP7\", \"parent\":{}}}}}}}" |
|
37 |
} |
|
38 |
} |
|
39 |
} |
|
40 |
originalId: "corda_______::241544" |
|
41 |
collectedfrom: { |
|
42 |
key: "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f" |
|
43 |
value: "CORDA - COmmon Research DAta Warehouse" |
|
44 |
} |
|
45 |
} |
|
46 |
dataInfo: { |
|
47 |
inferred: false |
|
48 |
deletedbyinference: false |
|
49 |
trust: "0.9" |
|
50 |
provenanceaction: { |
|
51 |
classid: "sysimport:crosswalk:entityregistry" |
|
52 |
classname: "sysimport:crosswalk:entityregistry" |
|
53 |
schemeid: "dnet:provenanceActions" |
|
54 |
schemename: "dnet:provenanceActions" |
|
55 |
} |
|
56 |
} |
|
57 |
timestamp: 1372675091270 |
|
58 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/dedup/result:body | ||
---|---|---|
1 |
{ |
|
2 |
kind: entity |
|
3 |
entity: { |
|
4 |
type: result |
|
5 |
id: "50|dedup_wf_001::0154f41ff22d1ac2ef656d2931ac828c" |
|
6 |
result: { |
|
7 |
metadata: { |
|
8 |
title: { |
|
9 |
value: "Soldadura entre gr\\303\\241nulos" |
|
10 |
qualifier: { |
|
11 |
classid: "main title" |
|
12 |
classname: "main title" |
|
13 |
schemeid: "dnet:dataCite_title" |
|
14 |
schemename: "dnet:dataCite_title" |
|
15 |
} |
|
16 |
} |
|
17 |
dateofacceptance: { |
|
18 |
value: "2011-01-01" |
|
19 |
} |
|
20 |
resulttype: { |
|
21 |
classid: "publication" |
|
22 |
classname: "publication" |
|
23 |
schemeid: "dnet:result_typologies" |
|
24 |
schemename: "dnet:result_typologies" |
|
25 |
} |
|
26 |
storagedate: { |
|
27 |
value: "2012-10-18" |
|
28 |
} |
|
29 |
language: { |
|
30 |
classid: "und" |
|
31 |
classname: "Undetermined" |
|
32 |
schemeid: "dnet:languages" |
|
33 |
schemename: "dnet:languages" |
|
34 |
} |
|
35 |
} |
|
36 |
|
|
37 |
instance: { |
|
38 |
licence: { |
|
39 |
classid: "OPEN" |
|
40 |
classname: "Open Access" |
|
41 |
schemeid: "dnet:access_modes" |
|
42 |
schemename: "dnet:access_modes" |
|
43 |
} |
|
44 |
instancetype: { |
|
45 |
classid: "0001" |
|
46 |
classname: "Article" |
|
47 |
schemeid: "dnet:publication_resource" |
|
48 |
schemename: "dnet:publication_resource" |
|
49 |
} |
|
50 |
hostedby: { |
|
51 |
key: "10|opendoar____::9766527f2b5d3e95d4a733fcfb77bd7e" |
|
52 |
value: "INRIA a CCSD electronic archive server" |
|
53 |
} |
|
54 |
url: "http://hal.archives-ouvertes.fr/hal-00613125" |
|
55 |
} |
|
56 |
|
|
57 |
} |
|
58 |
originalId: "crossref____::003b0746ef72ff253ef1e465c758c961" |
|
59 |
collectedfrom: { |
|
60 |
key: "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" |
|
61 |
value: "Crossref" |
|
62 |
} |
|
63 |
pid: { |
|
64 |
value: "doi:10.1002/prca.201000155" |
|
65 |
qualifier: { |
|
66 |
classid: "doi" |
|
67 |
classname: "doi" |
|
68 |
schemeid: "dnet:pid_types" |
|
69 |
schemename: "dnet:pid_types" |
|
70 |
} |
|
71 |
} |
|
72 |
} |
|
73 |
dataInfo: { |
|
74 |
inferred: false |
|
75 |
deletedbyinference: false |
|
76 |
trust: "NEUTRAL" |
|
77 |
provenanceaction: { |
|
78 |
classid: "user:claim:doi" |
|
79 |
classname: "user:claim:doi" |
|
80 |
schemeid: "dnet:provenanceActions" |
|
81 |
schemename: "dnet:provenanceActions" |
|
82 |
} |
|
83 |
} |
|
84 |
timestamp: 1372359095914 |
|
85 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/dedup/resultResult_dedup_merges:50|od______1106::8477827a014277dfebb0d5882496a797 | ||
---|---|---|
1 |
{ |
|
2 |
kind: relation |
|
3 |
rel: { |
|
4 |
relType: resultResult |
|
5 |
source: "50|dedup_wf_001::0154f41ff22d1ac2ef656d2931ac828c" |
|
6 |
target: "50|od______1106::8477827a014277dfebb0d5882496a797" |
|
7 |
child: false |
|
8 |
resultResult: { |
|
9 |
dedup: { |
|
10 |
relMetadata: { |
|
11 |
semantics: { |
|
12 |
classid: "merges" |
|
13 |
classname: "merges" |
|
14 |
schemeid: "dnet:result_result_relations" |
|
15 |
schemename: "dnet:result_result_relations" |
|
16 |
} |
|
17 |
} |
|
18 |
} |
|
19 |
} |
|
20 |
subRelType: dedup |
|
21 |
relClass: "merges" |
|
22 |
} |
|
23 |
dataInfo: { |
|
24 |
inferred: false |
|
25 |
deletedbyinference: false |
|
26 |
trust: "0.8" |
|
27 |
provenanceaction: { |
|
28 |
classid: "" |
|
29 |
classname: "" |
|
30 |
schemeid: "dnet:provenanceActions" |
|
31 |
schemename: "dnet:provenanceActions" |
|
32 |
} |
|
33 |
} |
|
34 |
timestamp: 1423870324129 |
|
35 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/result:body | ||
---|---|---|
1 |
{ |
|
2 |
kind: entity |
|
3 |
entity: { |
|
4 |
type: result |
|
5 |
id: "50|crossref____::003b0746ef72ff253ef1e465c758c961" |
|
6 |
result: { |
|
7 |
metadata: { |
|
8 |
title: { |
|
9 |
value: "Human urinary peptide database for multiple disease biomarker discovery" |
|
10 |
qualifier: { |
|
11 |
classid: "main title" |
|
12 |
classname: "main title" |
|
13 |
schemeid: "dnet:dataCite_title" |
|
14 |
schemename: "dnet:dataCite_title" |
|
15 |
} |
|
16 |
} |
|
17 |
dateofacceptance: { |
|
18 |
value: "2011-01-01" |
|
19 |
} |
|
20 |
resulttype: { |
|
21 |
classid: "publication" |
|
22 |
classname: "publication" |
|
23 |
schemeid: "dnet:result_typologies" |
|
24 |
schemename: "dnet:result_typologies" |
|
25 |
} |
|
26 |
storagedate: { |
|
27 |
value: "2012-10-18" |
|
28 |
} |
|
29 |
language: { |
|
30 |
classid: "und" |
|
31 |
classname: "Undetermined" |
|
32 |
schemeid: "dnet:languages" |
|
33 |
schemename: "dnet:languages" |
|
34 |
} |
|
35 |
} |
|
36 |
|
|
37 |
instance: { |
|
38 |
licence: { |
|
39 |
classid: "OPEN" |
|
40 |
classname: "Open Access" |
|
41 |
schemeid: "dnet:access_modes" |
|
42 |
schemename: "dnet:access_modes" |
|
43 |
} |
|
44 |
instancetype: { |
|
45 |
classid: "0001" |
|
46 |
classname: "Article" |
|
47 |
schemeid: "dnet:publication_resource" |
|
48 |
schemename: "dnet:publication_resource" |
|
49 |
} |
|
50 |
hostedby: { |
|
51 |
key: "10|opendoar____::9766527f2b5d3e95d4a733fcfb77bd7e" |
|
52 |
value: "INRIA a CCSD electronic archive server" |
|
53 |
} |
|
54 |
url: "http://hal.archives-ouvertes.fr/hal-00613125" |
|
55 |
} |
|
56 |
|
|
57 |
} |
|
58 |
originalId: "crossref____::003b0746ef72ff253ef1e465c758c961" |
|
59 |
collectedfrom: { |
|
60 |
key: "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" |
|
61 |
value: "Crossref" |
|
62 |
} |
|
63 |
pid: { |
|
64 |
value: "doi:10.1002/prca.201000155" |
|
65 |
qualifier: { |
|
66 |
classid: "doi" |
|
67 |
classname: "doi" |
|
68 |
schemeid: "dnet:pid_types" |
|
69 |
schemename: "dnet:pid_types" |
|
70 |
} |
|
71 |
} |
|
72 |
} |
|
73 |
dataInfo: { |
|
74 |
inferred: false |
|
75 |
deletedbyinference: false |
|
76 |
trust: "NEUTRAL" |
|
77 |
provenanceaction: { |
|
78 |
classid: "user:claim:doi" |
|
79 |
classname: "user:claim:doi" |
|
80 |
schemeid: "dnet:provenanceActions" |
|
81 |
schemename: "dnet:provenanceActions" |
|
82 |
} |
|
83 |
} |
|
84 |
timestamp: 1372359095914 |
|
85 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/personResult_authorship_hasAuthor:30|crossref____::7c844171342140b6133fb0536dba9843 | ||
---|---|---|
1 |
{ |
|
2 |
kind: relation |
|
3 |
rel: { |
|
4 |
relType: personResult |
|
5 |
source: "50|crossref____::003b0746ef72ff253ef1e465c758c961" |
|
6 |
target: "30|crossref____::7c844171342140b6133fb0536dba9843" |
|
7 |
child: false |
|
8 |
personResult: { |
|
9 |
authorship: { |
|
10 |
relMetadata: { |
|
11 |
semantics: { |
|
12 |
classid: "hasAuthor" |
|
13 |
classname: "hasAuthor" |
|
14 |
schemeid: "dnet:personroles" |
|
15 |
schemename: "dnet:personroles" |
|
16 |
} |
|
17 |
} |
|
18 |
ranking: "4" |
|
19 |
} |
|
20 |
} |
|
21 |
subRelType: authorship |
|
22 |
relClass: "hasAuthor" |
|
23 |
} |
|
24 |
dataInfo: { |
|
25 |
inferred: false |
|
26 |
deletedbyinference: false |
|
27 |
trust: "NEUTRAL" |
|
28 |
provenanceaction: { |
|
29 |
classid: "user:claim:doi" |
|
30 |
classname: "user:claim:doi" |
|
31 |
schemeid: "dnet:provenanceActions" |
|
32 |
schemename: "dnet:provenanceActions" |
|
33 |
} |
|
34 |
} |
|
35 |
timestamp: 1372359095915 |
|
36 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/resultProject_outcome_isProducedBy:40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43 | ||
---|---|---|
1 |
{ |
|
2 |
kind: relation |
|
3 |
rel: { |
|
4 |
relType: resultProject |
|
5 |
source: "50|crossref____::003b0746ef72ff253ef1e465c758c961" |
|
6 |
target: "40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43" |
|
7 |
child: false |
|
8 |
resultProject: { |
|
9 |
outcome: { |
|
10 |
relMetadata: { |
|
11 |
semantics: { |
|
12 |
classid: "isProducedBy" |
|
13 |
classname: "isProducedBy" |
|
14 |
schemeid: "dnet:result_project_relations" |
|
15 |
schemename: "dnet:result_project_relations" |
|
16 |
} |
|
17 |
} |
|
18 |
} |
|
19 |
} |
|
20 |
subRelType: outcome |
|
21 |
relClass: "isProducedBy" |
|
22 |
} |
|
23 |
dataInfo: { |
|
24 |
inferred: false |
|
25 |
deletedbyinference: false |
|
26 |
trust: "0.8" |
|
27 |
provenanceaction: { |
|
28 |
classid: "" |
|
29 |
classname: "" |
|
30 |
schemeid: "dnet:provenanceActions" |
|
31 |
schemename: "dnet:provenanceActions" |
|
32 |
} |
|
33 |
} |
|
34 |
timestamp: 1372359095915 |
|
35 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/personResult_authorship_hasAuthor:30|crossref____::f6813047fd08021c87acdee2061c20f4 | ||
---|---|---|
1 |
{ |
|
2 |
kind: relation |
|
3 |
rel: { |
|
4 |
relType: personResult |
|
5 |
source: "50|crossref____::003b0746ef72ff253ef1e465c758c961" |
|
6 |
target: "30|crossref____::f6813047fd08021c87acdee2061c20f4" |
|
7 |
child: false |
|
8 |
personResult: { |
|
9 |
authorship: { |
|
10 |
relMetadata: { |
|
11 |
semantics: { |
|
12 |
classid: "hasAuthor" |
|
13 |
classname: "hasAuthor" |
|
14 |
schemeid: "dnet:personroles" |
|
15 |
schemename: "dnet:personroles" |
|
16 |
} |
|
17 |
} |
|
18 |
ranking: "5" |
|
19 |
} |
|
20 |
} |
|
21 |
subRelType: authorship |
|
22 |
relClass: "hasAuthor" |
|
23 |
} |
|
24 |
dataInfo: { |
|
25 |
inferred: false |
|
26 |
deletedbyinference: false |
|
27 |
trust: "NEUTRAL" |
|
28 |
provenanceaction: { |
|
29 |
classid: "user:claim:doi" |
|
30 |
classname: "user:claim:doi" |
|
31 |
schemeid: "dnet:provenanceActions" |
|
32 |
schemename: "dnet:provenanceActions" |
|
33 |
} |
|
34 |
} |
|
35 |
timestamp: 1372359095915 |
|
36 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/dataset/result:body | ||
---|---|---|
1 |
{ |
|
2 |
kind: entity |
|
3 |
entity: { |
|
4 |
type: result |
|
5 |
id: "50|od______1106::138537e0568c27c402756042b15e7858" |
|
6 |
result: { |
|
7 |
metadata: { |
|
8 |
title: { |
|
9 |
value: "Santa Rosa [ciruelo]" |
|
10 |
qualifier: { |
|
11 |
classid: "main title" |
|
12 |
classname: "main title" |
|
13 |
schemeid: "dnet:dataCite_title" |
|
14 |
schemename: "dnet:dataCite_title" |
|
15 |
} |
|
16 |
} |
|
17 |
dateofacceptance: { |
|
18 |
value: "1964-12-01" |
|
19 |
} |
|
20 |
resulttype: { |
|
21 |
classid: "publication" |
|
22 |
classname: "publication" |
|
23 |
schemeid: "dnet:result_typologies" |
|
24 |
schemename: "dnet:result_typologies" |
|
25 |
} |
|
26 |
storagedate: { |
|
27 |
value: "2012-10-18" |
|
28 |
} |
|
29 |
language: { |
|
30 |
classid: "und" |
|
31 |
classname: "Undetermined" |
|
32 |
schemeid: "dnet:languages" |
|
33 |
schemename: "dnet:languages" |
|
34 |
} |
|
35 |
} |
|
36 |
|
|
37 |
instance: { |
|
38 |
licence: { |
|
39 |
classid: "OPEN" |
|
40 |
classname: "Open Access" |
|
41 |
schemeid: "dnet:access_modes" |
|
42 |
schemename: "dnet:access_modes" |
|
43 |
} |
|
44 |
instancetype: { |
|
45 |
classid: "0021" |
|
46 |
classname: "Dataset" |
|
47 |
schemeid: "dnet:publication_resource" |
|
48 |
schemename: "dnet:publication_resource" |
|
49 |
} |
|
50 |
hostedby: { |
|
51 |
key: "10|opendoar____::c9f95a0a5af052bffce5c89917335f67" |
|
52 |
value: "Digital.CSIC" |
|
53 |
} |
|
54 |
url: "http://hdl.handle.net/10261/86687" |
|
55 |
} |
|
56 |
|
|
57 |
} |
|
58 |
originalId: "oai:digital.csic.es:10261/86687" |
|
59 |
collectedfrom: { |
|
60 |
key: "10|opendoar____::c9f95a0a5af052bffce5c89917335f67" |
|
61 |
value: "Digital.CSIC" |
|
62 |
} |
|
63 |
pid: { |
|
64 |
value: "oai:digital.csic.es:10261/86687" |
|
65 |
qualifier: { |
|
66 |
classid: "oai" |
|
67 |
classname: "Open Archives Initiative" |
|
68 |
schemeid: "dnet:pid_types" |
|
69 |
schemename: "dnet:pid_types" |
|
70 |
} |
|
71 |
} |
|
72 |
} |
|
73 |
dataInfo: { |
|
74 |
inferred: false |
|
75 |
deletedbyinference: false |
|
76 |
trust: "0.9" |
|
77 |
provenanceaction: { |
|
78 |
classid: "sysimport:crosswalk:repository" |
|
79 |
classname: "sysimport:crosswalk:repository" |
|
80 |
schemeid: "dnet:provenanceActions" |
|
81 |
schemename: "dnet:provenanceActions" |
|
82 |
} |
|
83 |
} |
|
84 |
timestamp: 1423856239322 |
|
85 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/primary/main/oozie_app/workflow.xml | ||
---|---|---|
288 | 288 |
<value>$UNDEFINED$</value> |
289 | 289 |
<description>external hbase zookeeper client port, required only whe zookeeper quorum property is set</description> |
290 | 290 |
</property> |
291 |
<!-- trust level threshold section --> |
|
291 | 292 |
<property> |
293 |
<name>export_trust_level_threshold</name> |
|
294 |
<value>$UNDEFINED$</value> |
|
295 |
<description>default trust level threshold of exported data</description> |
|
296 |
</property> |
|
297 |
<property> |
|
298 |
<name>export_trust_level_threshold_document_classes</name> |
|
299 |
<value>$UNDEFINED$</value> |
|
300 |
<description>document_classes trust level threshold</description> |
|
301 |
</property> |
|
302 |
<property> |
|
303 |
<name>export_trust_level_threshold_document_referencedProjects</name> |
|
304 |
<value>$UNDEFINED$</value> |
|
305 |
<description>document_referencedProjects trust level threshold</description> |
|
306 |
</property> |
|
307 |
<property> |
|
308 |
<name>export_trust_level_threshold_document_referencedDatasets</name> |
|
309 |
<value>$UNDEFINED$</value> |
|
310 |
<description>document_referencedDatasets trust level threshold</description> |
|
311 |
</property> |
|
312 |
<property> |
|
313 |
<name>export_trust_level_threshold_document_pdb</name> |
|
314 |
<value>$UNDEFINED$</value> |
|
315 |
<description>document to protein databank trust level threshold</description> |
|
316 |
</property> |
|
317 |
<!-- --> |
|
318 |
<property> |
|
292 | 319 |
<name>export_documentssimilarity_threshold</name> |
293 | 320 |
<value>$UNDEFINED$</value> |
294 | 321 |
<description>documents similarity threshold value below which similarity export is omitted</description> |
... | ... | |
444 | 471 |
<name>content_read_timeout</name> |
445 | 472 |
<value>${import_content_read_timeout}</value> |
446 | 473 |
</property> |
447 |
<!-- metadata extraction related --> |
|
448 |
<property> |
|
449 |
<name>metadataextraction_excluded_checksums</name> |
|
450 |
<value>${metadataextraction_excluded_checksums}</value> |
|
451 |
</property> |
|
452 |
<property> |
|
453 |
<name>metadataextraction_max_file_size_mb</name> |
|
454 |
<value>${metadataextraction_max_file_size_mb}</value> |
|
455 |
</property> |
|
456 |
<property> |
|
457 |
<name>metadataextraction_default_cache_location</name> |
|
458 |
<value>${metadataextraction_default_cache_location}</value> |
|
459 |
</property> |
|
474 |
<!-- metadata extraction related are autmatically propagated--> |
|
460 | 475 |
<!-- metadatainput and metadataextraction output subdirectory names --> |
461 | 476 |
<property> |
462 | 477 |
<name>metadataimport_output_name_document_meta</name> |
... | ... | |
496 | 511 |
<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value> |
497 | 512 |
</property> |
498 | 513 |
<property> |
499 |
<name>output_citation_pmc</name> |
|
500 |
<value>${workingDir}/mainworkflows_common_import/citation_pmc</value> |
|
501 |
</property> |
|
502 |
<property> |
|
503 | 514 |
<name>output_document_text</name> |
504 | 515 |
<value>${workingDir}/mainworkflows_common_import/document-text</value> |
505 | 516 |
</property> |
... | ... | |
589 | 600 |
<value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value> |
590 | 601 |
</property> |
591 | 602 |
<property> |
592 |
<name>input_citation_pmc</name> |
|
593 |
<value>${workingDir}/mainworkflows_common_import/citation_pmc</value> |
|
594 |
</property> |
|
595 |
<property> |
|
596 | 603 |
<name>input_project_concept</name> |
597 | 604 |
<value>${workingDir}/mainworkflows_common_import/project-concept</value> |
598 | 605 |
</property> |
... | ... | |
737 | 744 |
<action name="transformers_export_documentmetadata"> |
738 | 745 |
<sub-workflow> |
739 | 746 |
<app-path>${wf:appPath()}/transformers_export_documentmetadata</app-path> |
747 |
<propagate-configuration/> |
|
740 | 748 |
<configuration> |
741 | 749 |
<property> |
742 |
<name>jobTracker</name> |
|
743 |
<value>${jobTracker}</value> |
|
744 |
</property> |
|
745 |
<property> |
|
746 |
<name>nameNode</name> |
|
747 |
<value>${nameNode}</value> |
|
748 |
</property> |
|
749 |
<property> |
|
750 |
<name>queueName</name> |
|
751 |
<value>${queueName}</value> |
|
752 |
</property> |
|
753 |
<!-- Working directory of the subworkflow --> |
|
754 |
<property> |
|
755 | 750 |
<name>workingDir</name> |
756 | 751 |
<value>${workingDir}/transformers_export_documentmetadata/working_dir</value> |
757 | 752 |
</property> |
... | ... | |
944 | 939 |
<name>action_set_id_entity_dataset</name> |
945 | 940 |
<value>${export_action_set_id_entity_dataset}</value> |
946 | 941 |
</property> |
942 |
|
|
947 | 943 |
<property> |
944 |
<name>trust_level_threshold</name> |
|
945 |
<value>${export_trust_level_threshold}</value> |
|
946 |
</property> |
|
947 |
<property> |
|
948 |
<name>trust_level_threshold_document_referencedProjects</name> |
|
949 |
<value>${export_trust_level_threshold_document_referencedProjects}</value> |
|
950 |
</property> |
|
951 |
<property> |
|
952 |
<name>trust_level_threshold_document_referencedDatasets</name> |
|
953 |
<value>${export_trust_level_threshold_document_referencedDatasets}</value> |
|
954 |
</property> |
|
955 |
<property> |
|
956 |
<name>trust_level_threshold_document_classes</name> |
|
957 |
<value>${export_trust_level_threshold_document_classes}</value> |
|
958 |
</property> |
|
959 |
<property> |
|
960 |
<name>trust_level_threshold_document_pdb</name> |
|
961 |
<value>${export_trust_level_threshold_document_pdb}</value> |
|
962 |
</property> |
|
963 |
<property> |
|
948 | 964 |
<name>action_hbase_remote_zookeeper_quorum</name> |
949 | 965 |
<value>${export_action_hbase_remote_zookeeper_quorum}</value> |
950 | 966 |
</property> |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/mainworkflows/importer/mapred/PredefinedTableInputFormat.java | ||
---|---|---|
41 | 41 |
|
42 | 42 |
public static final String DEFAULT_CHARSET = "utf8"; |
43 | 43 |
|
44 |
public static final String PART_SEPARATOR = ":";
|
|
44 |
public static final String PART_SEPARATOR = "#";
|
|
45 | 45 |
|
46 | 46 |
public static class FakeSplit extends InputSplit implements Writable { |
47 | 47 |
public void write(DataOutput out) throws IOException { } |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/primary/processing/oozie_app/workflow.xml | ||
---|---|---|
85 | 85 |
<description>input extracted document metadata directory</description> |
86 | 86 |
</property> |
87 | 87 |
<property> |
88 |
<name>input_citation_pmc</name> |
|
89 |
<description>input directory holding citations extracted from PMC</description> |
|
90 |
</property> |
|
91 |
<property> |
|
92 | 88 |
<name>input_project_concept</name> |
93 | 89 |
<description>input project concept directory</description> |
94 | 90 |
</property> |
... | ... | |
159 | 155 |
</property> |
160 | 156 |
<property> |
161 | 157 |
<name>output_citation</name> |
162 |
<description>output containing grouped citations coming from citation matching and pmc ingestion</description>
|
|
158 |
<description>output containing grouped citations coming from citation fuzzy and direct matching modules</description>
|
|
163 | 159 |
</property> |
164 | 160 |
<property> |
165 | 161 |
<name>output_document_similarity</name> |
... | ... | |
693 | 689 |
<!-- citation matching part --> |
694 | 690 |
<decision name="decision-citationmatching"> |
695 | 691 |
<switch> |
696 |
<case to="transformers_citationmatching">${active_citationmatching eq "true"}</case> |
|
692 |
<case to="transformers_citationmatching_direct">${active_citationmatching eq "true"}</case>
|
|
697 | 693 |
<default to="skip-citationmatching"/> |
698 | 694 |
</switch> |
699 | 695 |
</decision> |
700 | 696 |
|
697 |
<!-- preparing citation matching input --> |
|
698 |
<action name="transformers_citationmatching_direct"> |
|
699 |
<sub-workflow> |
|
700 |
<app-path>${wf:appPath()}/transformers_citationmatching_direct</app-path> |
|
701 |
<propagate-configuration/> |
|
702 |
<configuration> |
|
703 |
<property> |
|
704 |
<name>workingDir</name> |
|
705 |
<value>${workingDir}/transformers_citationmatching_direct/working_dir</value> |
|
706 |
</property> |
|
707 |
<property> |
|
708 |
<name>input</name> |
|
709 |
<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value> |
|
710 |
</property> |
|
711 |
<property> |
|
712 |
<name>output</name> |
|
713 |
<value>${workingDir}/transformers_citationmatching_direct/output_citation_metadata</value> |
|
714 |
</property> |
|
715 |
</configuration> |
|
716 |
</sub-workflow> |
|
717 |
<ok to="citationmatching_direct"/> |
|
718 |
<error to="fail" /> |
|
719 |
</action> |
|
720 |
|
|
721 |
<action name="citationmatching_direct"> |
|
722 |
<sub-workflow> |
|
723 |
<app-path>${wf:appPath()}/citationmatching_direct</app-path> |
|
724 |
<propagate-configuration/> |
|
725 |
<configuration> |
|
726 |
<property> |
|
727 |
<name>workingDir</name> |
|
728 |
<value>${workingDir}/citationmatching_direct/working_dir</value> |
|
729 |
</property> |
|
730 |
<property> |
|
731 |
<name>input</name> |
|
732 |
<value>${workingDir}/transformers_citationmatching_direct/output_citation_metadata</value> |
|
733 |
</property> |
|
734 |
<property> |
|
735 |
<name>output</name> |
|
736 |
<value>${workingDir}/citationmatching_direct/output</value> |
|
737 |
</property> |
|
738 |
</configuration> |
|
739 |
</sub-workflow> |
|
740 |
<ok to="transformers_citationmatching" /> |
|
741 |
<error to="fail" /> |
|
742 |
</action> |
|
743 |
|
|
744 |
<!-- preparing citation matching input --> |
|
701 | 745 |
<action name="transformers_citationmatching"> |
702 | 746 |
<sub-workflow> |
703 | 747 |
<app-path>${wf:appPath()}/transformers_citationmatching</app-path> |
... | ... | |
748 | 792 |
</property> |
749 | 793 |
</configuration> |
750 | 794 |
</sub-workflow> |
795 |
<ok to="transformers_citations_from_matching"/> |
|
796 |
<error to="fail" /> |
|
797 |
</action> |
|
798 |
|
|
799 |
<!-- normalize and group citations part --> |
|
800 |
<action name="transformers_citations_from_matching"> |
|
801 |
<sub-workflow> |
|
802 |
<app-path>${wf:appPath()}/transformers_citations_from_matching</app-path> |
|
803 |
<propagate-configuration/> |
|
804 |
<configuration> |
|
805 |
<property> |
|
806 |
<name>workingDir</name> |
|
807 |
<value>${workingDir}/transformers_citations_from_matching/working_dir</value> |
|
808 |
</property> |
|
809 |
<property> |
|
810 |
<name>input</name> |
|
811 |
<value>${workingDir}/citationmatching_chain/output</value> |
|
812 |
</property> |
|
813 |
<property> |
|
814 |
<name>output</name> |
|
815 |
<value>${workingDir}/transformers_citations_from_matching/output</value> |
|
816 |
</property> |
|
817 |
</configuration> |
|
818 |
</sub-workflow> |
|
819 |
<ok to="transformers_citations_from_ingestpmc"/> |
|
820 |
<error to="fail" /> |
|
821 |
</action> |
|
822 |
|
|
823 |
<action name="transformers_citations_from_ingestpmc"> |
|
824 |
<sub-workflow> |
|
825 |
<app-path>${wf:appPath()}/transformers_citations_from_ingestpmc</app-path> |
|
826 |
<propagate-configuration/> |
|
827 |
<configuration> |
|
828 |
<property> |
|
829 |
<name>workingDir</name> |
|
830 |
<value>${workingDir}/transformers_citations_from_ingestpmc/working_dir</value> |
|
831 |
</property> |
|
832 |
<property> |
|
833 |
<name>input</name> |
|
834 |
<value>${workingDir}/citationmatching_direct/output</value> |
|
835 |
</property> |
|
836 |
<property> |
|
837 |
<name>output</name> |
|
838 |
<value>${workingDir}/transformers_citations_from_ingestpmc/output</value> |
|
839 |
</property> |
|
840 |
</configuration> |
|
841 |
</sub-workflow> |
|
842 |
<ok to="transformers_citations_from_referencemetadata"/> |
|
843 |
<error to="fail" /> |
|
844 |
</action> |
|
845 |
|
|
846 |
<action name="transformers_citations_from_referencemetadata"> |
|
847 |
<sub-workflow> |
|
848 |
<app-path>${wf:appPath()}/transformers_citations_from_referencemetadata</app-path> |
|
849 |
<propagate-configuration/> |
|
850 |
<configuration> |
|
851 |
<property> |
|
852 |
<name>workingDir</name> |
|
853 |
<value>${workingDir}/transformers_citations_from_referencemetadata/working_dir</value> |
|
854 |
</property> |
|
855 |
<property> |
|
856 |
<name>input</name> |
|
857 |
<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value> |
|
858 |
</property> |
|
859 |
<property> |
|
860 |
<name>output</name> |
|
861 |
<value>${workingDir}/transformers_citations_from_referencemetadata/output</value> |
|
862 |
</property> |
|
863 |
</configuration> |
|
864 |
</sub-workflow> |
|
865 |
<ok to="citations_union"/> |
|
866 |
<error to="fail" /> |
|
867 |
</action> |
|
868 |
|
|
869 |
<action name="citations_union"> |
|
870 |
<sub-workflow> |
|
871 |
<app-path>${wf:appPath()}/transformers_common_union3</app-path> |
|
872 |
<propagate-configuration/> |
|
873 |
<configuration> |
|
874 |
<property> |
|
875 |
<name>workingDir</name> |
|
876 |
<value>${workingDir}/citations_union/working_dir</value> |
|
877 |
</property> |
|
878 |
<property> |
|
879 |
<name>input_a</name> |
|
880 |
<value>${workingDir}/transformers_citations_from_ingestpmc/output</value> |
|
881 |
</property> |
|
882 |
<property> |
|
883 |
<name>input_b</name> |
|
884 |
<value>${workingDir}/transformers_citations_from_matching/output</value> |
|
885 |
</property> |
|
886 |
<property> |
|
887 |
<name>input_c</name> |
|
888 |
<value>${workingDir}/transformers_citations_from_referencemetadata/output</value> |
|
889 |
</property> |
|
890 |
<property> |
|
891 |
<name>output</name> |
|
892 |
<value>${workingDir}/citations_union/output</value> |
|
893 |
</property> |
|
894 |
<property> |
|
895 |
<name>schema</name> |
|
896 |
<value>eu.dnetlib.iis.common.citations.schemas.Citation</value> |
|
897 |
</property> |
|
898 |
</configuration> |
|
899 |
</sub-workflow> |
|
900 |
<ok to="citations_collapser"/> |
|
901 |
<error to="fail" /> |
|
902 |
</action> |
|
903 |
|
|
904 |
<action name="citations_collapser"> |
|
905 |
<sub-workflow> |
|
906 |
<app-path>${wf:appPath()}/collapsers_basic_collapser</app-path> |
|
907 |
<propagate-configuration/> |
|
908 |
<configuration> |
|
909 |
<property> |
|
910 |
<name>workingDir</name> |
|
911 |
<value>${workingDir}/citations_collapser/working_dir</value> |
|
912 |
</property> |
|
913 |
<property> |
|
914 |
<name>blocking_field</name> |
|
915 |
<value>sourceDocumentId</value> |
|
916 |
</property> |
|
917 |
<property> |
|
918 |
<name>record_collapser</name> |
|
919 |
<value>eu.dnetlib.iis.collapsers.basic.GenericCitationCollapser</value> |
|
920 |
</property> |
|
921 |
<property> |
|
922 |
<name>schema</name> |
|
923 |
<value>eu.dnetlib.iis.common.citations.schemas.Citation</value> |
|
924 |
</property> |
|
925 |
<!-- Input ports. --> |
|
926 |
<property> |
|
927 |
<name>input</name> |
|
928 |
<value>${workingDir}/citations_union/output</value> |
|
929 |
</property> |
|
930 |
<!-- Output port bound to given path --> |
|
931 |
<property> |
|
932 |
<name>output</name> |
|
933 |
<value>${workingDir}/citations_collapser/output</value> |
|
934 |
</property> |
|
935 |
</configuration> |
|
936 |
</sub-workflow> |
|
937 |
<ok to="transformers_export_citations"/> |
|
938 |
<error to="fail"/> |
|
939 |
</action> |
|
940 |
|
|
941 |
<action name="transformers_export_citations"> |
|
942 |
<sub-workflow> |
|
943 |
<app-path>${wf:appPath()}/transformers_export_citations</app-path> |
|
944 |
<propagate-configuration/> |
|
945 |
<configuration> |
|
946 |
<property> |
|
947 |
<name>workingDir</name> |
|
948 |
<value>${workingDir}/transformers_export_citations/working_dir</value> |
|
949 |
</property> |
|
950 |
<property> |
|
951 |
<name>input</name> |
|
952 |
<value>${workingDir}/citations_collapser/output</value> |
|
953 |
</property> |
|
954 |
<property> |
|
955 |
<name>output</name> |
|
956 |
<value>${output_citation}</value> |
|
957 |
</property> |
|
958 |
</configuration> |
|
959 |
</sub-workflow> |
|
751 | 960 |
<ok to="decision-documentssimilarity"/> |
752 | 961 |
<error to="fail" /> |
753 | 962 |
</action> |
754 |
|
|
963 |
|
|
964 |
<!-- end of normalize and group citations part --> |
|
755 | 965 |
<action name="skip-citationmatching"> |
756 | 966 |
<java> |
757 | 967 |
<prepare> |
758 | 968 |
<!-- notice: directory have to aligned with skipped action output --> |
759 |
<delete path="${nameNode}${workingDir}/citationmatching_chain" />
|
|
760 |
<mkdir path="${nameNode}${workingDir}/citationmatching_chain" />
|
|
969 |
<delete path="${nameNode}${workingDir}/transformers_export_citations" />
|
|
970 |
<mkdir path="${nameNode}${workingDir}/transformers_export_citations" />
|
|
761 | 971 |
</prepare> |
762 | 972 |
<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class> |
763 | 973 |
<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg> |
764 |
<arg>-C{citation, |
|
765 |
eu.dnetlib.iis.citationmatching.schemas.Citation,
|
|
974 |
<arg>-C{citations,
|
|
975 |
eu.dnetlib.iis.export.schemas.Citations,
|
|
766 | 976 |
eu/dnetlib/iis/mainworkflows/data/empty.json}</arg> |
767 | 977 |
<!-- notice: directory have to aligned with skipped action output --> |
768 |
<arg>-Ocitation=${workingDir}/citationmatching_chain/output</arg>
|
|
978 |
<arg>-Ocitations=${output_citation}</arg>
|
|
769 | 979 |
</java> |
770 | 980 |
<ok to="decision-documentssimilarity"/> |
771 | 981 |
<error to="fail"/> |
... | ... | |
979 | 1189 |
</property> |
980 | 1190 |
</configuration> |
981 | 1191 |
</sub-workflow> |
982 |
<ok to="transformers_citations_from_matching"/>
|
|
1192 |
<ok to="end"/>
|
|
983 | 1193 |
<error to="fail" /> |
984 | 1194 |
</action> |
985 | 1195 |
|
... | ... | |
1022 | 1232 |
<arg>-Oglobal_statistics=${workingDir}/statistics/output_global_statistics</arg> |
1023 | 1233 |
--> |
1024 | 1234 |
</java> |
1025 |
<ok to="transformers_citations_from_matching"/>
|
|
1235 |
<ok to="end"/>
|
|
1026 | 1236 |
<error to="fail"/> |
1027 | 1237 |
</action> |
1028 | 1238 |
<!-- end of statistics part --> |
1239 |
|
|
1029 | 1240 |
|
1030 |
<!-- normalize and group citations part --> |
|
1031 |
<action name="transformers_citations_from_matching"> |
|
1032 |
<sub-workflow> |
|
1033 |
<app-path>${wf:appPath()}/transformers_citations_from_matching</app-path> |
|
1034 |
<propagate-configuration/> |
|
1035 |
<configuration> |
|
1036 |
<property> |
|
1037 |
<name>workingDir</name> |
|
1038 |
<value>${workingDir}/transformers_citations_from_matching/working_dir</value> |
|
1039 |
</property> |
|
1040 |
<property> |
|
1041 |
<name>input</name> |
|
1042 |
<value>${workingDir}/citationmatching_chain/output</value> |
|
1043 |
</property> |
|
1044 |
<property> |
|
1045 |
<name>output</name> |
|
1046 |
<value>${workingDir}/transformers_citations_from_matching/output</value> |
|
1047 |
</property> |
|
1048 |
</configuration> |
|
1049 |
</sub-workflow> |
|
1050 |
<ok to="transformers_citations_from_ingestpmc"/> |
|
1051 |
<error to="fail" /> |
|
1052 |
</action> |
|
1053 |
|
|
1054 |
<action name="transformers_citations_from_ingestpmc"> |
|
1055 |
<sub-workflow> |
|
1056 |
<app-path>${wf:appPath()}/transformers_citations_from_ingestpmc</app-path> |
|
1057 |
<propagate-configuration/> |
|
1058 |
<configuration> |
|
1059 |
<property> |
|
1060 |
<name>workingDir</name> |
|
1061 |
<value>${workingDir}/transformers_citations_from_ingestpmc/working_dir</value> |
|
1062 |
</property> |
|
1063 |
<property> |
|
1064 |
<name>input</name> |
|
1065 |
<value>${input_citation_pmc}</value> |
|
1066 |
</property> |
|
1067 |
<property> |
|
1068 |
<name>output</name> |
|
1069 |
<value>${workingDir}/transformers_citations_from_ingestpmc/output</value> |
|
1070 |
</property> |
|
1071 |
</configuration> |
|
1072 |
</sub-workflow> |
|
1073 |
<ok to="citations_collapser"/> |
|
1074 |
<error to="fail" /> |
|
1075 |
</action> |
|
1076 |
|
|
1077 |
<action name="citations_collapser"> |
|
1078 |
<sub-workflow> |
|
1079 |
<app-path>${wf:appPath()}/collapsers_multiple_input_collapser</app-path> |
|
1080 |
<propagate-configuration/> |
|
1081 |
<configuration> |
|
1082 |
<property> |
|
1083 |
<name>workingDir</name> |
|
1084 |
<value>${workingDir}/citations_collapser/working_dir</value> |
|
1085 |
</property> |
|
1086 |
<!-- Input ports & parameters. --> |
|
1087 |
<property> |
|
1088 |
<name>origin_1</name> |
|
1089 |
<value>ingested</value> |
|
1090 |
</property> |
|
1091 |
<property> |
|
1092 |
<name>input_1</name> |
|
1093 |
<value>${workingDir}/transformers_citations_from_ingestpmc/output</value> |
|
1094 |
</property> |
|
1095 |
<property> |
|
1096 |
<name>origin_2</name> |
|
1097 |
<value>matched</value> |
|
1098 |
</property> |
|
1099 |
<property> |
|
1100 |
<name>input_2</name> |
|
1101 |
<value>${workingDir}/transformers_citations_from_matching/output</value> |
|
1102 |
</property> |
|
1103 |
<property> |
|
1104 |
<name>blocking_field</name> |
|
1105 |
<value>sourceDocumentId</value> |
|
1106 |
</property> |
|
1107 |
<property> |
|
1108 |
<name>schema_input</name> |
|
1109 |
<value>eu.dnetlib.iis.common.citations.schemas.Citation</value> |
|
1110 |
</property> |
|
1111 |
<property> |
|
1112 |
<name>output</name> |
|
1113 |
<value>${workingDir}/citations_collapser/output</value> |
|
1114 |
</property> |
|
1115 |
<property> |
|
1116 |
<name>schema_input_envelope</name> |
|
1117 |
<value>eu.dnetlib.iis.common.citations.schemas.CitationEnvelope</value> |
|
1118 |
</property> |
|
1119 |
<property> |
|
1120 |
<name>record_collapser</name> |
|
1121 |
<value>eu.dnetlib.iis.collapsers.origins.PMCCitationCollapser</value> |
|
1122 |
</property> |
|
1123 |
</configuration> |
|
1124 |
</sub-workflow> |
|
1125 |
<ok to="transformers_export_citations"/> |
|
1126 |
<error to="fail" /> |
|
1127 |
</action> |
|
1128 |
|
|
1129 |
<action name="transformers_export_citations"> |
|
1130 |
<sub-workflow> |
|
1131 |
<app-path>${wf:appPath()}/transformers_export_citations</app-path> |
|
1132 |
<propagate-configuration/> |
|
1133 |
<configuration> |
|
1134 |
<property> |
|
1135 |
<name>workingDir</name> |
|
1136 |
<value>${workingDir}/transformers_export_citations/working_dir</value> |
|
1137 |
</property> |
|
1138 |
<property> |
|
1139 |
<name>input</name> |
|
1140 |
<value>${workingDir}/citations_collapser/output</value> |
|
1141 |
</property> |
|
1142 |
<property> |
|
1143 |
<name>output</name> |
|
1144 |
<value>${output_citation}</value> |
|
1145 |
</property> |
|
1146 |
</configuration> |
|
1147 |
</sub-workflow> |
|
1148 |
<ok to="end"/> |
|
1149 |
<error to="fail" /> |
|
1150 |
</action> |
|
1151 |
|
|
1152 |
<!-- end of normalize and group citations part --> |
|
1153 |
|
|
1154 | 1241 |
<kill name="fail"> |
1155 | 1242 |
<message>Unfortunately, the process failed -- error message: |
1156 | 1243 |
[${wf:errorMessage(wf:lastErrorNode())}]</message> |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/common/import/oozie_app/import.txt | ||
---|---|---|
8 | 8 |
ingest_html_plaintext classpath eu/dnetlib/iis/ingest/html/plaintext/oozie_app |
9 | 9 |
ingest_pmc_plaintext classpath eu/dnetlib/iis/ingest/pmc/plaintext/oozie_app |
10 | 10 |
ingest_pmc_metadata classpath eu/dnetlib/iis/ingest/pmc/metadata/oozie_app |
11 |
ingest_pmc_citations classpath eu/dnetlib/iis/transformers/ingest/pmc/citations/oozie_app |
|
12 |
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app |
|
13 | 11 |
multiple_input_collapser classpath eu/dnetlib/iis/collapsers/multiple_input_collapser/oozie_app |
14 | 12 |
metadataextraction classpath eu/dnetlib/iis/metadataextraction/oozie_app |
15 | 13 |
metadataextraction_cached classpath eu/dnetlib/iis/mainworkflows/metadataextraction/cached_by_checksum/oozie_app |
16 | 14 |
transformers_common_union4 classpath eu/dnetlib/iis/transformers/common/union4/oozie_app |
17 | 15 |
transformers_idextractor classpath eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/oozie_app |
18 |
transformers_externalidtooaid classpath eu/dnetlib/iis/transformers/importer/documentmetadata/externalidtooaid/oozie_app |
|
19 |
transformers_ingest_pmc_metadata classpath eu/dnetlib/iis/transformers/ingest/pmc/metadata/oozie_app |
|
20 |
ingest_pmc_idmapping_pmidtooaid classpath eu/dnetlib/iis/ingest/pmc/idmapping/pmidtooaid/oozie_app |
|
16 |
transformers_ingest_pmc_metadata classpath eu/dnetlib/iis/transformers/ingest/pmc/metadata/oozie_app |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/metadataextraction/extracted_document_metadata.json | ||
---|---|---|
4 | 4 |
"abstract": "The aim of this paper is to present video quality prediction models for objective non-intrusive, prediction of H.264 encoded video for all content types combining parameters both in the physical and application layer over Universal Mobile Telecommunication Systems (UMTS) networks. In order to characterize the Quality of Service (QoS) level, a learning model based on Adaptive Neural Fuzzy Inference System (ANFIS) and a second model based on non-linear regression analysis is proposed to predict the video quality in terms of the Mean Opinion Score (MOS). The objective of the paper is two-fold. First, to find the impact of QoS parameters on end-to-end video quality for H.264 encoded video. Second, to develop learning models based on ANFIS and nonlinear regression analysis to predict video quality over UMTS networks by considering the impact of radio link loss models. The loss models considered are 2-state Markov models. Both the models are trained with a combination of physical and application layer parameters and validated with unseen dataset. Preliminary results show that good prediction accuracy was obtained from both the models. The work should help in the development of a reference-free video prediction model and QoS control methods for video over UMTS networks.", |
5 | 5 |
"language": "eng", |
6 | 6 |
"keywords": null, |
7 |
"externalIdentifiers": null, |
|
7 |
"externalIdentifiers": { |
|
8 |
"pmid": "1234567" |
|
9 |
}, |
|
8 | 10 |
"journal": "International Journal of Digital Multimedia Broadcasting", |
9 | 11 |
"year": null, |
10 | 12 |
"publisher": null, |
... | ... | |
13 | 15 |
"affiliations": null, |
14 | 16 |
"volume": "19", |
15 | 17 |
"issue": "2", |
16 |
"pages": {"start": "121", "end": "132"} |
|
18 |
"pages": {"start": "121", "end": "132"}, |
|
19 |
"publicationTypeName": null |
|
17 | 20 |
} |
18 | 21 |
{ |
19 | 22 |
"id": "id-2", |
... | ... | |
30 | 33 |
"affiliations": null, |
31 | 34 |
"volume": "3", |
32 | 35 |
"issue": null, |
33 |
"pages": {"start": "4428", "end": "4445"} |
|
36 |
"pages": {"start": "4428", "end": "4445"}, |
|
37 |
"publicationTypeName": null |
|
34 | 38 |
} |
35 | 39 |
{ |
36 | 40 |
"id": "id-3", |
... | ... | |
60 | 64 |
"start": "803", |
61 | 65 |
"end": "826" |
62 | 66 |
}, |
63 |
"location": null |
|
67 |
"location": null, |
|
68 |
"externalIds": null |
|
64 | 69 |
}, |
65 | 70 |
"text": "J. Abernethy, F. Bach, T. Evgeniou, and J.-P. Vert. A new approach to collaborative filtering: Operator estimation with spectral regularization. Journal of Machine Learning Research, 10:803-826, 2009." |
66 | 71 |
}, |
67 | 72 |
{ |
73 |
"position": 3, |
|
74 |
"basicMetadata": { |
|
75 |
"publisher": null, |
|
76 |
"title": "Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes.", |
|
77 |
"url": null, |
|
78 |
"series": null, |
|
79 |
"authors": ["Kovach, M.E.", "Elzer, P.H.", "Hill, D.S.", "Robertson, G.T.", "Farris, M.A.", "Roop, R.M.", "Peterson, K.M."], |
|
80 |
"volume": null, |
|
81 |
"edition": null, |
|
82 |
"source": null, |
|
83 |
"year": null, |
|
84 |
"issue": null, |
|
85 |
"pages": null, |
|
86 |
"location": null, |
|
87 |
"externalIds": { |
|
88 |
"pmid": "1234567" |
|
89 |
} |
|
90 |
}, |
|
91 |
"text": "Kovach, M.E., Elzer, P.H., Hill, D.S., Robertson, G.T., Farris, M.A., Roop, R.M., Peterson, K.M.. Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes. Gene. 1995; 166: 175-176" |
|
92 |
}, |
|
93 |
{ |
|
68 | 94 |
"position": 9, |
69 | 95 |
"basicMetadata": { |
70 | 96 |
"publisher": null, |
... | ... | |
78 | 104 |
"year": "2008", |
79 | 105 |
"issue": null, |
80 | 106 |
"pages": null, |
81 |
"location": null |
|
107 |
"location": null, |
|
108 |
"externalIds": null |
|
82 | 109 |
}, |
83 | 110 |
"text": "F. Bach. Exploring large feature spaces with hierarchical multiple kernel learning. In Advances in Neural Information Processing Systems (NIPS), 2008." |
84 | 111 |
} |
... | ... | |
87 | 114 |
"affiliations": null, |
88 | 115 |
"volume": null, |
89 | 116 |
"issue": "0", |
90 |
"pages": {"start": "69", "end": "110"} |
|
117 |
"pages": {"start": "69", "end": "110"}, |
|
118 |
"publicationTypeName": null |
|
91 | 119 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/meta/project.json | ||
---|---|---|
1 |
{"id":"WT::08616","projectAcronym":"","projectGrantId":"08616","fundingClass":"WT"} |
|
2 |
{"id":"ec::226639","projectAcronym":"INTIF","projectGrantId":"226639","fundingClass":"FP7"} |
|
1 |
{"id":"WT::08616","projectAcronym":"","projectGrantId":"08616","fundingClass":"WT::WT"} |
|
2 |
{"id":"ec::226639","projectAcronym":"INTIF","projectGrantId":"226639","fundingClass":"EC::FP7"} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/output/document_classes.json | ||
---|---|---|
76 | 76 |
"Arts \u0026 recreation", |
77 | 77 |
"Sports, games \u0026 entertainment" |
78 | 78 |
], |
79 |
"confidenceLevel": 0.613
|
|
79 |
"confidenceLevel": 0.595
|
|
80 | 80 |
}, |
81 | 81 |
{ |
82 | 82 |
"classLabels": [ |
... | ... | |
97 | 97 |
"Arts \u0026 recreation", |
98 | 98 |
"Music" |
99 | 99 |
], |
100 |
"confidenceLevel": 0.525
|
|
100 |
"confidenceLevel": 0.508
|
|
101 | 101 |
}, |
102 | 102 |
{ |
103 | 103 |
"classLabels": [ |
104 | 104 |
"Arts \u0026 recreation", |
105 | 105 |
"Arts" |
106 | 106 |
], |
107 |
"confidenceLevel": 0.472
|
|
107 |
"confidenceLevel": 0.455
|
|
108 | 108 |
} |
109 | 109 |
], |
110 | 110 |
"meshEuroPMCClasses": [ |
... | ... | |
265 | 265 |
"Science", |
266 | 266 |
"Physics" |
267 | 267 |
], |
268 |
"confidenceLevel": 0.951
|
|
268 |
"confidenceLevel": 0.945
|
|
269 | 269 |
}, |
270 | 270 |
{ |
271 | 271 |
"classLabels": [ |
272 | 272 |
"Technology", |
273 | 273 |
"Technology" |
274 | 274 |
], |
275 |
"confidenceLevel": 0.93
|
|
275 |
"confidenceLevel": 0.927
|
|
276 | 276 |
}, |
277 | 277 |
{ |
278 | 278 |
"classLabels": [ |
279 | 279 |
"Technology", |
280 | 280 |
"Chemical engineering" |
281 | 281 |
], |
282 |
"confidenceLevel": 0.79
|
|
282 |
"confidenceLevel": 0.78
|
|
283 | 283 |
} |
284 | 284 |
], |
285 | 285 |
"meshEuroPMCClasses": [ |
... | ... | |
426 | 426 |
"Science", |
427 | 427 |
"Mathematics" |
428 | 428 |
], |
429 |
"confidenceLevel": 0.957
|
|
429 |
"confidenceLevel": 0.954
|
|
430 | 430 |
}, |
431 | 431 |
{ |
432 | 432 |
"classLabels": [ |
433 | 433 |
"Computer science, information \u0026 general works", |
434 | 434 |
"Computer science, knowledge \u0026 systems" |
435 | 435 |
], |
436 |
"confidenceLevel": 0.948
|
|
436 |
"confidenceLevel": 0.942
|
|
437 | 437 |
}, |
438 | 438 |
{ |
439 | 439 |
"classLabels": [ |
440 | 440 |
"Social sciences", |
441 | 441 |
"Statistics" |
442 | 442 |
], |
443 |
"confidenceLevel": 0.939
|
|
443 |
"confidenceLevel": 0.936
|
|
444 | 444 |
}, |
445 | 445 |
{ |
446 | 446 |
"classLabels": [ |
447 | 447 |
"Social sciences", |
448 | 448 |
"Law" |
449 | 449 |
], |
450 |
"confidenceLevel": 0.56
|
|
450 |
"confidenceLevel": 0.542
|
|
451 | 451 |
}, |
452 | 452 |
{ |
453 | 453 |
"classLabels": [ |
454 | 454 |
"Science", |
455 | 455 |
"Chemistry" |
456 | 456 |
], |
457 |
"confidenceLevel": 0.35
|
|
457 |
"confidenceLevel": 0.333
|
|
458 | 458 |
} |
459 | 459 |
], |
460 | 460 |
"meshEuroPMCClasses": [ |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/output/citations.json | ||
---|---|---|
2 | 2 |
"documentId": "id-3", |
3 | 3 |
"citations": [ |
4 | 4 |
{ |
5 |
"rawText": "F. Bach. Exploring large feature spaces with hierarchical multiple kernel learning. In Advances in Neural Information Processing Systems (NIPS), 2008.", |
|
6 |
"destinationDocumentId": "id-4", |
|
7 |
"confidenceLevel": 0.7486356, |
|
8 |
"externalDestinationDocumentIds": {} |
|
9 |
}, |
|
10 |
{ |
|
5 |
"position": 1, |
|
11 | 6 |
"rawText": "J. Abernethy, F. Bach, T. Evgeniou, and J.-P. Vert. A new approach to collaborative filtering: Operator estimation with spectral regularization. Journal of Machine Learning Research, 10:803-826, 2009.", |
12 | 7 |
"destinationDocumentId": null, |
13 | 8 |
"confidenceLevel": null, |
14 | 9 |
"externalDestinationDocumentIds": {} |
15 | 10 |
}, |
16 |
{ |
|
11 |
{ |
|
12 |
"position": 3, |
|
17 | 13 |
"rawText": "Kovach, M.E., Elzer, P.H., Hill, D.S., Robertson, G.T., Farris, M.A., Roop, R.M., Peterson, K.M.. Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes. Gene. 1995; 166: 175-176", |
18 |
"destinationDocumentId": "pmc-id",
|
|
14 |
"destinationDocumentId": "id-1",
|
|
19 | 15 |
"confidenceLevel": 1.0, |
20 | 16 |
"externalDestinationDocumentIds": { |
21 |
"pmid": "1234567"
|
|
17 |
"pmid": "1234567" |
|
22 | 18 |
} |
23 |
} |
|
19 |
}, |
|
20 |
{ |
|
21 |
"position": 9, |
|
22 |
"rawText": "F. Bach. Exploring large feature spaces with hierarchical multiple kernel learning. In Advances in Neural Information Processing Systems (NIPS), 2008.", |
|
23 |
"destinationDocumentId": "id-4", |
|
24 |
"confidenceLevel": 0.7486356, |
|
25 |
"externalDestinationDocumentIds": {} |
|
26 |
} |
|
24 | 27 |
] |
25 | 28 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/statistics/oozie_app/workflow.xml | ||
---|---|---|
283 | 283 |
<value>${workingDir}/mainworkflows_common_import/metadataimport</value> |
284 | 284 |
</property> |
285 | 285 |
<property> |
286 |
<name>output_citation_pmc</name> |
|
287 |
<value>${workingDir}/mainworkflows_common_import/citation_pmc</value> |
|
288 |
</property> |
|
289 |
<property> |
|
290 | 286 |
<name>output_dataset</name> |
291 | 287 |
<value>${workingDir}/mainworkflows_common_import/dataset</value> |
292 | 288 |
</property> |
... | ... | |
295 | 291 |
<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value> |
296 | 292 |
</property> |
297 | 293 |
<property> |
298 |
<name>output_citation_pmc</name> |
|
299 |
<value>${workingDir}/mainworkflows_common_import/citation_pmc</value> |
|
300 |
</property> |
|
301 |
<property> |
|
302 | 294 |
<name>output_document_text</name> |
303 | 295 |
<value>${workingDir}/mainworkflows_common_import/document-text</value> |
304 | 296 |
</property> |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/oozie_app/workflow.xml | ||
---|---|---|
62 | 62 |
<arg>-C{document_text_wos, |
63 | 63 |
eu.dnetlib.iis.metadataextraction.schemas.DocumentText, |
64 | 64 |
eu/dnetlib/iis/mainworkflows/data/empty.json}</arg> |
65 |
<arg>-C{citation, |
|
66 |
eu.dnetlib.iis.ingest.pmc.citations.schemas.Citation, |
|
67 |
eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/meta/citation_pmc.json}</arg> |
|
68 | 65 |
<arg>-C{document_text_classpath, |
69 | 66 |
eu.dnetlib.iis.mainworkflows.schemas.DocumentContentClasspath, |
70 | 67 |
eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/metadataextraction/document_text_classpath.json}</arg> |
... | ... | |
77 | 74 |
<arg>-Odataset=${workingDir}/producer/dataset</arg> |
78 | 75 |
<arg>-Oextracted_document_metadata=${workingDir}/producer/extracted_document_metadata</arg> |
79 | 76 |
<arg>-Odocument_text_wos=${workingDir}/producer/document_text_wos</arg> |
80 |
<arg>-Ocitation=${workingDir}/producer/citation</arg> |
|
81 | 77 |
<arg>-Odocument_text_classpath=${workingDir}/producer/document_text_classpath</arg> |
82 | 78 |
</java> |
83 | 79 |
<ok to="document_text_producer"/> |
... | ... | |
263 | 259 |
<name>input_extracted_document_metadata</name> |
264 | 260 |
<value>${workingDir}/producer/extracted_document_metadata</value> |
265 | 261 |
</property> |
266 |
<property> |
|
267 |
<name>input_citation_pmc</name> |
|
268 |
<value>${workingDir}/producer/citation</value> |
|
269 |
</property> |
|
270 | 262 |
<property> |
271 | 263 |
<name>output_document_to_project</name> |
272 | 264 |
<value>${workingDir}/exported/document_to_project</value> |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/person/1/person#body | ||
---|---|---|
1 |
{ |
|
2 |
kind: entity |
|
3 |
entity: { |
|
4 |
type: person |
|
5 |
id: "30|0314fe20-be3c-4bc3-adee-6bbc2cde3cb7_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:DiVA.org:uu-127423_Oliveira__Paulo_UNKNOWN" |
|
6 |
person: { |
|
7 |
metadata: { |
|
8 |
firstname: { |
|
9 |
value: "Oliveira, Paulo" |
|
10 |
} |
|
11 |
fullname: { |
|
12 |
value: "Oliveira, Paulo" |
|
13 |
} |
|
14 |
} |
|
15 |
} |
|
16 |
} |
|
17 |
dataInfo: { |
|
18 |
inferred: false |
|
19 |
deletedbyinference: false |
|
20 |
trust: "0.9" |
|
21 |
provenanceaction: { |
|
22 |
classid: "UNKNOWN" |
|
23 |
classname: "UNKNOWN" |
|
24 |
schemeid: "dnet:provenanceActions" |
|
25 |
schemename: "dnet:provenanceActions" |
|
26 |
} |
|
27 |
} |
|
28 |
timestamp: 1360248058786 |
|
29 |
} |
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/project/1/project#body | ||
---|---|---|
1 |
{ |
|
2 |
kind: entity |
|
3 |
entity: { |
|
4 |
type: project |
|
5 |
id: "40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43" |
|
6 |
project: { |
|
7 |
metadata: { |
Also available in: Unified diff
merging trunk changes with IIS-CDH-5.3.0 branch