Project

General

Profile

« Previous | Next » 

Revision 39164

Added by Marek Horst over 8 years ago

merging trunk changes with IIS-CDH-5.3.0 branch

View differences:

modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/meta/citation_pmc.json
1
{
2
  "sourceDocumentId": "id-3",
3
  "rawText": "Kovach, M.E., Elzer, P.H., Hill, D.S., Robertson, G.T., Farris, M.A., Roop, R.M., Peterson, K.M.. Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes. Gene. 1995; 166: 175-176",
4
  "destinationDocumentId": "pmc-id",
5
  "externalDestinationDocumentIds": {
6
    "pmid": "1234567"
7
  }
8
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/person/1/person:body
1
{
2
	kind: entity
3
  	entity: {
4
    type: person
5
    id: "30|0314fe20-be3c-4bc3-adee-6bbc2cde3cb7_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:DiVA.org:uu-127423_Oliveira__Paulo_UNKNOWN"
6
    person: {
7
      metadata: {
8
        firstname: {
9
        	value: "Oliveira, Paulo"
10
        }
11
        fullname: {
12
        	value: "Oliveira, Paulo"
13
        }
14
      }
15
    }
16
  }
17
  dataInfo: {
18
    inferred: false
19
    deletedbyinference: false
20
    trust: "0.9"
21
    provenanceaction: {
22
      classid: "UNKNOWN"
23
      classname: "UNKNOWN"
24
      schemeid: "dnet:provenanceActions"
25
      schemename: "dnet:provenanceActions"
26
    }
27
  }
28
  timestamp: 1360248058786
29
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/project/1/project:body
1
{
2
  kind: entity
3
  entity: {
4
    type: project
5
    id: "40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43"
6
    project: {
7
      metadata: {
8
        code: {
9
        	value: "241544"
10
        }
11
        acronym: {
12
        	value: "SYSKID"
13
        }
14
        title: {
15
        	value: "Systems biology towards novel chronic kidney disease diagnosis and treatment"
16
        }
17
        startdate: {
18
        	value: "2010-01-01"
19
        }
20
        enddate: {
21
        	value: "2014-12-31"
22
        }
23
        callidentifier: {
24
        	value: "FP7-HEALTH-2009-single-stage"
25
        }
26
        ecsc39: {
27
        	value: "true"
28
        }
29
        contracttype: {
30
          classid: "CP"
31
          classname: "Collaborative project"
32
          schemeid: "ec:FP7contractTypes"
33
          schemename: "ec:FP7contractTypes"
34
        }
35
        fundingtree: {
36
        	value: "{\"funding_level_2\":{\"class\":\"ec:program\", \"id\":\"corda_______::FP7::SP1::HEALTH\", \"description\":\"Health\", \"name\":\"HEALTH\", \"parent\":{\"funding_level_1\":{\"class\":\"ec:specificprogram\", \"id\":\"corda_______::FP7::SP1\", \"description\":\"SP1-Cooperation\", \"name\":\"SP1\", \"parent\":{\"funding_level_0\":{\"class\":\"ec:frameworkprogram\", \"id\":\"corda_______::FP7\", \"description\":\"SEVENTH FRAMEWORK PROGRAMME\", \"name\":\"FP7\", \"parent\":{}}}}}}}"
37
        }
38
      }
39
    }
40
    originalId: "corda_______::241544"
41
    collectedfrom: {
42
      key: "10|openaire____::b30dac7baac631f3da7c2bb18dd9891f"
43
      value: "CORDA - COmmon Research DAta Warehouse"
44
    }
45
  }
46
  dataInfo: {
47
    inferred: false
48
    deletedbyinference: false
49
    trust: "0.9"
50
    provenanceaction: {
51
      classid: "sysimport:crosswalk:entityregistry"
52
      classname: "sysimport:crosswalk:entityregistry"
53
      schemeid: "dnet:provenanceActions"
54
      schemename: "dnet:provenanceActions"
55
    }
56
  }
57
  timestamp: 1372675091270
58
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/dedup/result:body
1
{
2
  kind: entity
3
  entity: {
4
    type: result
5
    id: "50|dedup_wf_001::0154f41ff22d1ac2ef656d2931ac828c"
6
    result: {
7
      metadata: {
8
        title: {
9
          value: "Soldadura entre gr\\303\\241nulos"
10
          qualifier: {
11
            classid: "main title"
12
            classname: "main title"
13
            schemeid: "dnet:dataCite_title"
14
            schemename: "dnet:dataCite_title"
15
          }
16
        }
17
        dateofacceptance: {
18
          value: "2011-01-01"
19
        }
20
        resulttype: {
21
          classid: "publication"
22
          classname: "publication"
23
          schemeid: "dnet:result_typologies"
24
          schemename: "dnet:result_typologies"
25
        }
26
        storagedate: { 
27
        	value: "2012-10-18"
28
        }
29
        language: {
30
          classid: "und"
31
          classname: "Undetermined"
32
          schemeid: "dnet:languages"
33
          schemename: "dnet:languages"
34
        }
35
      }
36
      
37
      instance: {
38
	    licence: {
39
	      classid: "OPEN"
40
	      classname: "Open Access"
41
	      schemeid: "dnet:access_modes"
42
	      schemename: "dnet:access_modes"
43
	    }
44
	    instancetype: {
45
	      classid: "0001"
46
	      classname: "Article"
47
	      schemeid: "dnet:publication_resource"
48
	      schemename: "dnet:publication_resource"
49
	    }
50
	    hostedby: {
51
	      key: "10|opendoar____::9766527f2b5d3e95d4a733fcfb77bd7e"
52
	      value: "INRIA a CCSD electronic archive server"
53
	    }
54
	    url: "http://hal.archives-ouvertes.fr/hal-00613125"
55
	  }
56

  
57
    }
58
    originalId: "crossref____::003b0746ef72ff253ef1e465c758c961"
59
    collectedfrom: {
60
      key: "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"
61
      value: "Crossref"
62
    }
63
    pid: {
64
      value: "doi:10.1002/prca.201000155"
65
      qualifier: {
66
        classid: "doi"
67
        classname: "doi"
68
        schemeid: "dnet:pid_types"
69
        schemename: "dnet:pid_types"
70
      }
71
    }
72
  }
73
  dataInfo: {
74
    inferred: false
75
    deletedbyinference: false
76
    trust: "NEUTRAL"
77
    provenanceaction: {
78
      classid: "user:claim:doi"
79
      classname: "user:claim:doi"
80
      schemeid: "dnet:provenanceActions"
81
      schemename: "dnet:provenanceActions"
82
    }
83
  }
84
  timestamp: 1372359095914
85
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/dedup/resultResult_dedup_merges:50|od______1106::8477827a014277dfebb0d5882496a797
1
{
2
  kind: relation
3
  rel: {
4
    relType: resultResult
5
    source: "50|dedup_wf_001::0154f41ff22d1ac2ef656d2931ac828c"
6
    target: "50|od______1106::8477827a014277dfebb0d5882496a797"
7
    child: false
8
    resultResult: {
9
      dedup: {
10
	      relMetadata: {
11
	        semantics: {
12
	          classid: "merges"
13
	          classname: "merges"
14
	          schemeid: "dnet:result_result_relations"
15
	          schemename: "dnet:result_result_relations"
16
	        }
17
	      }
18
	   }
19
    }
20
    subRelType: dedup
21
    relClass: "merges"
22
  }
23
  dataInfo: {
24
    inferred: false
25
    deletedbyinference: false
26
    trust: "0.8"
27
    provenanceaction: {
28
      classid: ""
29
      classname: ""
30
      schemeid: "dnet:provenanceActions"
31
      schemename: "dnet:provenanceActions"
32
    }
33
  }
34
  timestamp: 1423870324129
35
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/result:body
1
{
2
  kind: entity
3
  entity: {
4
    type: result
5
    id: "50|crossref____::003b0746ef72ff253ef1e465c758c961"
6
    result: {
7
      metadata: {
8
        title: {
9
          value: "Human urinary peptide database for multiple disease biomarker discovery"
10
          qualifier: {
11
            classid: "main title"
12
            classname: "main title"
13
            schemeid: "dnet:dataCite_title"
14
            schemename: "dnet:dataCite_title"
15
          }
16
        }
17
        dateofacceptance: {
18
          value: "2011-01-01"
19
        }
20
        resulttype: {
21
          classid: "publication"
22
          classname: "publication"
23
          schemeid: "dnet:result_typologies"
24
          schemename: "dnet:result_typologies"
25
        }
26
        storagedate: { 
27
        	value: "2012-10-18"
28
        }
29
        language: {
30
          classid: "und"
31
          classname: "Undetermined"
32
          schemeid: "dnet:languages"
33
          schemename: "dnet:languages"
34
        }
35
      }
36
      
37
      instance: {
38
	    licence: {
39
	      classid: "OPEN"
40
	      classname: "Open Access"
41
	      schemeid: "dnet:access_modes"
42
	      schemename: "dnet:access_modes"
43
	    }
44
	    instancetype: {
45
	      classid: "0001"
46
	      classname: "Article"
47
	      schemeid: "dnet:publication_resource"
48
	      schemename: "dnet:publication_resource"
49
	    }
50
	    hostedby: {
51
	      key: "10|opendoar____::9766527f2b5d3e95d4a733fcfb77bd7e"
52
	      value: "INRIA a CCSD electronic archive server"
53
	    }
54
	    url: "http://hal.archives-ouvertes.fr/hal-00613125"
55
	  }
56

  
57
    }
58
    originalId: "crossref____::003b0746ef72ff253ef1e465c758c961"
59
    collectedfrom: {
60
      key: "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2"
61
      value: "Crossref"
62
    }
63
    pid: {
64
      value: "doi:10.1002/prca.201000155"
65
      qualifier: {
66
        classid: "doi"
67
        classname: "doi"
68
        schemeid: "dnet:pid_types"
69
        schemename: "dnet:pid_types"
70
      }
71
    }
72
  }
73
  dataInfo: {
74
    inferred: false
75
    deletedbyinference: false
76
    trust: "NEUTRAL"
77
    provenanceaction: {
78
      classid: "user:claim:doi"
79
      classname: "user:claim:doi"
80
      schemeid: "dnet:provenanceActions"
81
      schemename: "dnet:provenanceActions"
82
    }
83
  }
84
  timestamp: 1372359095914
85
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/personResult_authorship_hasAuthor:30|crossref____::7c844171342140b6133fb0536dba9843
1
{
2
  kind: relation
3
  rel: {
4
    relType: personResult
5
    source: "50|crossref____::003b0746ef72ff253ef1e465c758c961"
6
    target: "30|crossref____::7c844171342140b6133fb0536dba9843"
7
    child: false
8
    personResult: {
9
      authorship: {
10
      		relMetadata: {
11
        		semantics: {
12
          			classid: "hasAuthor"
13
          			classname: "hasAuthor"
14
          			schemeid: "dnet:personroles"
15
          			schemename: "dnet:personroles"
16
        		}
17
      		}
18
      		ranking: "4"
19
    	}
20
  	}
21
  	subRelType: authorship
22
  	relClass: "hasAuthor"
23
  }
24
  dataInfo: {
25
    inferred: false
26
    deletedbyinference: false
27
    trust: "NEUTRAL"
28
    provenanceaction: {
29
      classid: "user:claim:doi"
30
      classname: "user:claim:doi"
31
      schemeid: "dnet:provenanceActions"
32
      schemename: "dnet:provenanceActions"
33
    }
34
  }
35
  timestamp: 1372359095915
36
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/resultProject_outcome_isProducedBy:40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43
1
{
2
  kind: relation
3
  rel: {
4
    relType: resultProject
5
    source: "50|crossref____::003b0746ef72ff253ef1e465c758c961"
6
    target: "40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43"
7
    child: false
8
    resultProject: {
9
      outcome: {
10
	      relMetadata: {
11
	        semantics: {
12
	          classid: "isProducedBy"
13
	          classname: "isProducedBy"
14
	          schemeid: "dnet:result_project_relations"
15
	          schemename: "dnet:result_project_relations"
16
	        }
17
	      }
18
	   }
19
    }
20
    subRelType: outcome
21
    relClass: "isProducedBy"
22
  }
23
  dataInfo: {
24
    inferred: false
25
    deletedbyinference: false
26
    trust: "0.8"
27
    provenanceaction: {
28
      classid: ""
29
      classname: ""
30
      schemeid: "dnet:provenanceActions"
31
      schemename: "dnet:provenanceActions"
32
    }
33
  }
34
  timestamp: 1372359095915
35
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/article/personResult_authorship_hasAuthor:30|crossref____::f6813047fd08021c87acdee2061c20f4
1
{
2
  kind: relation
3
  rel: {
4
    relType: personResult
5
    source: "50|crossref____::003b0746ef72ff253ef1e465c758c961"
6
    target: "30|crossref____::f6813047fd08021c87acdee2061c20f4"
7
    child: false
8
    personResult: {
9
      authorship: {
10
      		relMetadata: {
11
        		semantics: {
12
          			classid: "hasAuthor"
13
          			classname: "hasAuthor"
14
          			schemeid: "dnet:personroles"
15
          			schemename: "dnet:personroles"
16
        		}
17
      		}
18
      		ranking: "5"
19
    	}
20
    }
21
    subRelType: authorship
22
  	relClass: "hasAuthor"
23
  }
24
  dataInfo: {
25
    inferred: false
26
    deletedbyinference: false
27
    trust: "NEUTRAL"
28
    provenanceaction: {
29
      classid: "user:claim:doi"
30
      classname: "user:claim:doi"
31
      schemeid: "dnet:provenanceActions"
32
      schemename: "dnet:provenanceActions"
33
    }
34
  }
35
  timestamp: 1372359095915
36
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/result/dataset/result:body
1
{
2
  kind: entity
3
  entity: {
4
    type: result
5
    id: "50|od______1106::138537e0568c27c402756042b15e7858"
6
    result: {
7
      metadata: {
8
        title: {
9
          value: "Santa Rosa [ciruelo]"
10
          qualifier: {
11
            classid: "main title"
12
            classname: "main title"
13
            schemeid: "dnet:dataCite_title"
14
            schemename: "dnet:dataCite_title"
15
          }
16
        }
17
        dateofacceptance: {
18
          value: "1964-12-01"
19
        }
20
        resulttype: {
21
          classid: "publication"
22
          classname: "publication"
23
          schemeid: "dnet:result_typologies"
24
          schemename: "dnet:result_typologies"
25
        }
26
        storagedate: { 
27
        	value: "2012-10-18"
28
        }
29
        language: {
30
          classid: "und"
31
          classname: "Undetermined"
32
          schemeid: "dnet:languages"
33
          schemename: "dnet:languages"
34
        }
35
      }
36
      
37
      instance: {
38
	    licence: {
39
	      classid: "OPEN"
40
	      classname: "Open Access"
41
	      schemeid: "dnet:access_modes"
42
	      schemename: "dnet:access_modes"
43
	    }
44
	    instancetype: {
45
	      classid: "0021"
46
	      classname: "Dataset"
47
	      schemeid: "dnet:publication_resource"
48
	      schemename: "dnet:publication_resource"
49
	    }
50
	    hostedby: {
51
	      key: "10|opendoar____::c9f95a0a5af052bffce5c89917335f67"
52
	      value: "Digital.CSIC"
53
	    }
54
	    url: "http://hdl.handle.net/10261/86687"
55
	  }
56

  
57
    }
58
    originalId: "oai:digital.csic.es:10261/86687"
59
    collectedfrom: {
60
      key: "10|opendoar____::c9f95a0a5af052bffce5c89917335f67"
61
      value: "Digital.CSIC"
62
    }
63
    pid: {
64
      value: "oai:digital.csic.es:10261/86687"
65
      qualifier: {
66
        classid: "oai"
67
        classname: "Open Archives Initiative"
68
        schemeid: "dnet:pid_types"
69
        schemename: "dnet:pid_types"
70
      }
71
    }
72
  }
73
  dataInfo: {
74
    inferred: false
75
    deletedbyinference: false
76
    trust: "0.9"
77
    provenanceaction: {
78
      classid: "sysimport:crosswalk:repository"
79
      classname: "sysimport:crosswalk:repository"
80
      schemeid: "dnet:provenanceActions"
81
      schemename: "dnet:provenanceActions"
82
    }
83
  }
84
  timestamp: 1423856239322
85
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/primary/main/oozie_app/workflow.xml
288 288
			<value>$UNDEFINED$</value>
289 289
			<description>external hbase zookeeper client port, required only whe zookeeper quorum property is set</description>
290 290
		</property>
291
		<!-- trust level threshold section -->
291 292
		<property>
293
			<name>export_trust_level_threshold</name>
294
			<value>$UNDEFINED$</value>
295
			<description>default trust level threshold of exported data</description>
296
		</property>
297
		<property>
298
			<name>export_trust_level_threshold_document_classes</name>
299
			<value>$UNDEFINED$</value>
300
			<description>document_classes trust level threshold</description>
301
		</property>
302
		<property>
303
			<name>export_trust_level_threshold_document_referencedProjects</name>
304
			<value>$UNDEFINED$</value>
305
			<description>document_referencedProjects trust level threshold</description>
306
		</property>
307
		<property>
308
			<name>export_trust_level_threshold_document_referencedDatasets</name>
309
			<value>$UNDEFINED$</value>
310
			<description>document_referencedDatasets trust level threshold</description>
311
		</property>
312
		<property>
313
			<name>export_trust_level_threshold_document_pdb</name>
314
			<value>$UNDEFINED$</value>
315
			<description>document to protein databank trust level threshold</description>
316
		</property>
317
		<!--  -->
318
		<property>
292 319
			<name>export_documentssimilarity_threshold</name>
293 320
			<value>$UNDEFINED$</value>
294 321
			<description>documents similarity threshold value below which similarity export is omitted</description>
......
444 471
					<name>content_read_timeout</name>
445 472
					<value>${import_content_read_timeout}</value>
446 473
				</property>
447
				<!-- metadata extraction related -->
448
				<property>
449
					<name>metadataextraction_excluded_checksums</name>
450
					<value>${metadataextraction_excluded_checksums}</value>
451
				</property>
452
				<property>
453
					<name>metadataextraction_max_file_size_mb</name>
454
					<value>${metadataextraction_max_file_size_mb}</value>
455
				</property>
456
				<property>
457
					<name>metadataextraction_default_cache_location</name>
458
					<value>${metadataextraction_default_cache_location}</value>
459
				</property>
474
				<!-- metadata extraction related are autmatically propagated-->
460 475
				<!-- metadatainput and metadataextraction output subdirectory names -->
461 476
				<property>
462 477
					<name>metadataimport_output_name_document_meta</name>
......
496 511
					<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value>
497 512
				</property>
498 513
				<property>
499
					<name>output_citation_pmc</name>
500
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
501
				</property>
502
				<property>
503 514
					<name>output_document_text</name>
504 515
					<value>${workingDir}/mainworkflows_common_import/document-text</value>
505 516
				</property>
......
589 600
					<value>${workingDir}/mainworkflows_common_import/extracted_document_metadata</value>
590 601
				</property>
591 602
				<property>
592
					<name>input_citation_pmc</name>
593
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
594
				</property>
595
				<property>
596 603
					<name>input_project_concept</name>
597 604
					<value>${workingDir}/mainworkflows_common_import/project-concept</value>
598 605
				</property>
......
737 744
    <action name="transformers_export_documentmetadata">
738 745
        <sub-workflow>
739 746
            <app-path>${wf:appPath()}/transformers_export_documentmetadata</app-path>
747
            <propagate-configuration/>
740 748
            <configuration>
741 749
                <property>
742
                    <name>jobTracker</name>
743
                    <value>${jobTracker}</value>
744
                </property>
745
                <property>
746
                    <name>nameNode</name>
747
                    <value>${nameNode}</value>
748
                </property>
749
                <property>
750
                    <name>queueName</name>
751
                    <value>${queueName}</value>
752
                </property>
753
                <!-- Working directory of the subworkflow -->
754
                <property>
755 750
                    <name>workingDir</name>
756 751
                    <value>${workingDir}/transformers_export_documentmetadata/working_dir</value>
757 752
                </property>
......
944 939
					<name>action_set_id_entity_dataset</name>
945 940
					<value>${export_action_set_id_entity_dataset}</value>
946 941
				</property>
942
				
947 943
				<property>
944
					<name>trust_level_threshold</name>
945
					<value>${export_trust_level_threshold}</value>
946
				</property>
947
				<property>
948
					<name>trust_level_threshold_document_referencedProjects</name>
949
					<value>${export_trust_level_threshold_document_referencedProjects}</value>
950
				</property>
951
				<property>
952
					<name>trust_level_threshold_document_referencedDatasets</name>
953
					<value>${export_trust_level_threshold_document_referencedDatasets}</value>
954
				</property>
955
				<property>
956
					<name>trust_level_threshold_document_classes</name>
957
					<value>${export_trust_level_threshold_document_classes}</value>
958
				</property>
959
				<property>
960
					<name>trust_level_threshold_document_pdb</name>
961
					<value>${export_trust_level_threshold_document_pdb}</value>
962
				</property>
963
				<property>
948 964
					<name>action_hbase_remote_zookeeper_quorum</name>
949 965
					<value>${export_action_hbase_remote_zookeeper_quorum}</value>
950 966
				</property>
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/java/eu/dnetlib/iis/mainworkflows/importer/mapred/PredefinedTableInputFormat.java
41 41
	
42 42
	public static final String DEFAULT_CHARSET = "utf8";
43 43
	
44
	public static final String PART_SEPARATOR = ":";
44
	public static final String PART_SEPARATOR = "#";
45 45
	
46 46
	public static class FakeSplit extends InputSplit implements Writable {
47 47
	    public void write(DataOutput out) throws IOException { }
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/primary/processing/oozie_app/workflow.xml
85 85
			<description>input extracted document metadata directory</description>
86 86
		</property>
87 87
		<property>
88
			<name>input_citation_pmc</name>
89
			<description>input directory holding citations extracted from PMC</description>
90
		</property>
91
		<property>
92 88
			<name>input_project_concept</name>
93 89
			<description>input project concept directory</description>
94 90
		</property>
......
159 155
		</property>
160 156
		<property>
161 157
			<name>output_citation</name>
162
			<description>output containing grouped citations coming from citation matching and pmc ingestion</description>			
158
			<description>output containing grouped citations coming from citation fuzzy and direct matching modules</description>			
163 159
		</property>
164 160
		<property>
165 161
			<name>output_document_similarity</name>
......
693 689
    <!-- citation matching part -->
694 690
    <decision name="decision-citationmatching">
695 691
        <switch>
696
            <case to="transformers_citationmatching">${active_citationmatching eq "true"}</case>
692
            <case to="transformers_citationmatching_direct">${active_citationmatching eq "true"}</case>
697 693
            <default to="skip-citationmatching"/>
698 694
        </switch>
699 695
    </decision>
700 696
    
697
    <!-- preparing citation matching input -->
698
    <action name="transformers_citationmatching_direct">
699
	    <sub-workflow>
700
            <app-path>${wf:appPath()}/transformers_citationmatching_direct</app-path>
701
            <propagate-configuration/>
702
            <configuration>
703
            	<property>
704
                    <name>workingDir</name>
705
                    <value>${workingDir}/transformers_citationmatching_direct/working_dir</value>
706
                </property>
707
            	<property>
708
					<name>input</name>
709
					<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value>
710
				</property>
711
				<property>
712
					<name>output</name>
713
					<value>${workingDir}/transformers_citationmatching_direct/output_citation_metadata</value>
714
				</property>
715
            </configuration>
716
        </sub-workflow>
717
		<ok to="citationmatching_direct"/>
718
		<error to="fail" />
719
    </action>
720
    
721
    <action name="citationmatching_direct">
722
		<sub-workflow>
723
            <app-path>${wf:appPath()}/citationmatching_direct</app-path>
724
            <propagate-configuration/>
725
            <configuration>
726
            	<property>
727
                    <name>workingDir</name>
728
                    <value>${workingDir}/citationmatching_direct/working_dir</value>
729
                </property>
730
                <property>
731
					<name>input</name>
732
					<value>${workingDir}/transformers_citationmatching_direct/output_citation_metadata</value>
733
				</property>
734
            	<property>
735
					<name>output</name>
736
					<value>${workingDir}/citationmatching_direct/output</value>
737
				</property>
738
			</configuration>
739
        </sub-workflow>
740
		<ok to="transformers_citationmatching" />
741
		<error to="fail" />
742
	</action>
743
    
744
    <!-- preparing citation matching input -->
701 745
    <action name="transformers_citationmatching">
702 746
	    <sub-workflow>
703 747
            <app-path>${wf:appPath()}/transformers_citationmatching</app-path>
......
748 792
		        </property>
749 793
            </configuration>
750 794
        </sub-workflow>
795
		<ok to="transformers_citations_from_matching"/>
796
		<error to="fail" />
797
    </action>
798
        
799
    <!-- normalize and group citations part -->
800
    <action name="transformers_citations_from_matching">
801
	    <sub-workflow>
802
            <app-path>${wf:appPath()}/transformers_citations_from_matching</app-path>
803
            <propagate-configuration/>
804
            <configuration>
805
            	<property>
806
                    <name>workingDir</name>
807
                    <value>${workingDir}/transformers_citations_from_matching/working_dir</value>
808
                </property>
809
            	<property>
810
					<name>input</name>
811
					<value>${workingDir}/citationmatching_chain/output</value>
812
				</property>
813
				<property>
814
					<name>output</name>
815
					<value>${workingDir}/transformers_citations_from_matching/output</value>
816
				</property>
817
            </configuration>
818
        </sub-workflow>
819
		<ok to="transformers_citations_from_ingestpmc"/>
820
		<error to="fail" />
821
    </action>
822
    
823
    <action name="transformers_citations_from_ingestpmc">
824
	    <sub-workflow>
825
            <app-path>${wf:appPath()}/transformers_citations_from_ingestpmc</app-path>
826
            <propagate-configuration/>
827
            <configuration>
828
            	<property>
829
                    <name>workingDir</name>
830
                    <value>${workingDir}/transformers_citations_from_ingestpmc/working_dir</value>
831
                </property>
832
            	<property>
833
					<name>input</name>
834
					<value>${workingDir}/citationmatching_direct/output</value>
835
				</property>
836
				<property>
837
					<name>output</name>
838
					<value>${workingDir}/transformers_citations_from_ingestpmc/output</value>
839
				</property>
840
            </configuration>
841
        </sub-workflow>
842
		<ok to="transformers_citations_from_referencemetadata"/>
843
		<error to="fail" />
844
    </action>
845
    
846
    <action name="transformers_citations_from_referencemetadata">
847
	    <sub-workflow>
848
            <app-path>${wf:appPath()}/transformers_citations_from_referencemetadata</app-path>
849
            <propagate-configuration/>
850
            <configuration>
851
            	<property>
852
                    <name>workingDir</name>
853
                    <value>${workingDir}/transformers_citations_from_referencemetadata/working_dir</value>
854
                </property>
855
            	<property>
856
					<name>input</name>
857
					<value>${workingDir}/transformers_metadatamerger/output_merged_metadata</value>
858
				</property>
859
				<property>
860
					<name>output</name>
861
					<value>${workingDir}/transformers_citations_from_referencemetadata/output</value>
862
				</property>
863
            </configuration>
864
        </sub-workflow>
865
		<ok to="citations_union"/>
866
		<error to="fail" />
867
    </action>
868
    
869
    <action name="citations_union">
870
	    <sub-workflow>
871
            <app-path>${wf:appPath()}/transformers_common_union3</app-path>
872
            <propagate-configuration/>
873
            <configuration>
874
            	<property>
875
                    <name>workingDir</name>
876
                    <value>${workingDir}/citations_union/working_dir</value>
877
                </property>
878
            	<property>
879
					<name>input_a</name>
880
					<value>${workingDir}/transformers_citations_from_ingestpmc/output</value>
881
				</property>
882
				<property>
883
					<name>input_b</name>
884
					<value>${workingDir}/transformers_citations_from_matching/output</value>
885
				</property>
886
				<property>
887
					<name>input_c</name>
888
					<value>${workingDir}/transformers_citations_from_referencemetadata/output</value>
889
				</property>
890
				<property>
891
					<name>output</name>
892
					<value>${workingDir}/citations_union/output</value>
893
				</property>
894
				<property>
895
					<name>schema</name>
896
					<value>eu.dnetlib.iis.common.citations.schemas.Citation</value>
897
				</property>
898
            </configuration>
899
        </sub-workflow>
900
        <ok to="citations_collapser"/>
901
		<error to="fail" />
902
    </action>
903
    
904
    <action name="citations_collapser">
905
        <sub-workflow>
906
            <app-path>${wf:appPath()}/collapsers_basic_collapser</app-path>
907
            <propagate-configuration/>
908
            <configuration>
909
                <property>
910
                    <name>workingDir</name>
911
                    <value>${workingDir}/citations_collapser/working_dir</value>
912
                </property>
913
                <property>
914
                    <name>blocking_field</name>
915
                    <value>sourceDocumentId</value>
916
                </property>
917
                <property>
918
                    <name>record_collapser</name>
919
                    <value>eu.dnetlib.iis.collapsers.basic.GenericCitationCollapser</value>
920
        		</property>
921
                <property>
922
                    <name>schema</name>
923
                    <value>eu.dnetlib.iis.common.citations.schemas.Citation</value>
924
                </property>
925
                <!-- Input ports. -->
926
                <property>
927
                    <name>input</name>
928
                    <value>${workingDir}/citations_union/output</value>
929
                </property>
930
                <!-- Output port bound to given path -->
931
                <property>
932
                    <name>output</name>
933
                    <value>${workingDir}/citations_collapser/output</value>
934
                </property>
935
            </configuration>
936
        </sub-workflow>
937
        <ok to="transformers_export_citations"/>
938
        <error to="fail"/>
939
    </action>
940
    
941
    <action name="transformers_export_citations">
942
	    <sub-workflow>
943
            <app-path>${wf:appPath()}/transformers_export_citations</app-path>
944
            <propagate-configuration/>
945
            <configuration>
946
            	<property>
947
                    <name>workingDir</name>
948
                    <value>${workingDir}/transformers_export_citations/working_dir</value>
949
                </property>
950
            	<property>
951
					<name>input</name>
952
					<value>${workingDir}/citations_collapser/output</value>
953
				</property>
954
				<property>
955
					<name>output</name>
956
					<value>${output_citation}</value>
957
				</property>
958
            </configuration>
959
        </sub-workflow>
751 960
		<ok to="decision-documentssimilarity"/>
752 961
		<error to="fail" />
753 962
    </action>
754
        
963
    
964
    <!-- end of normalize and group citations part -->
755 965
    <action name="skip-citationmatching">
756 966
        <java>
757 967
			<prepare>
758 968
				<!-- notice: directory have to aligned with skipped action output -->
759
				<delete path="${nameNode}${workingDir}/citationmatching_chain" />
760
				<mkdir path="${nameNode}${workingDir}/citationmatching_chain" />
969
				<delete path="${nameNode}${workingDir}/transformers_export_citations" />
970
				<mkdir path="${nameNode}${workingDir}/transformers_export_citations" />
761 971
			</prepare>
762 972
			<main-class>eu.dnetlib.iis.core.java.ProcessWrapper</main-class>
763 973
			<arg>eu.dnetlib.iis.core.java.jsonworkflownodes.Producer</arg>
764
            <arg>-C{citation,
765
				eu.dnetlib.iis.citationmatching.schemas.Citation,
974
            <arg>-C{citations,
975
				eu.dnetlib.iis.export.schemas.Citations,
766 976
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
767 977
            <!-- notice: directory have to aligned with skipped action output -->
768
            <arg>-Ocitation=${workingDir}/citationmatching_chain/output</arg>
978
            <arg>-Ocitations=${output_citation}</arg>
769 979
        </java>
770 980
        <ok to="decision-documentssimilarity"/>
771 981
        <error to="fail"/>
......
979 1189
				</property>
980 1190
            </configuration>
981 1191
        </sub-workflow>
982
		<ok to="transformers_citations_from_matching"/>
1192
		<ok to="end"/>
983 1193
		<error to="fail" />
984 1194
    </action>
985 1195
    
......
1022 1232
            <arg>-Oglobal_statistics=${workingDir}/statistics/output_global_statistics</arg>
1023 1233
             -->
1024 1234
        </java>
1025
        <ok to="transformers_citations_from_matching"/>
1235
        <ok to="end"/>
1026 1236
        <error to="fail"/>
1027 1237
    </action>
1028 1238
    <!-- end of statistics part -->
1239

  
1029 1240
    
1030
    <!-- normalize and group citations part -->
1031
    <action name="transformers_citations_from_matching">
1032
	    <sub-workflow>
1033
            <app-path>${wf:appPath()}/transformers_citations_from_matching</app-path>
1034
            <propagate-configuration/>
1035
            <configuration>
1036
            	<property>
1037
                    <name>workingDir</name>
1038
                    <value>${workingDir}/transformers_citations_from_matching/working_dir</value>
1039
                </property>
1040
            	<property>
1041
					<name>input</name>
1042
					<value>${workingDir}/citationmatching_chain/output</value>
1043
				</property>
1044
				<property>
1045
					<name>output</name>
1046
					<value>${workingDir}/transformers_citations_from_matching/output</value>
1047
				</property>
1048
            </configuration>
1049
        </sub-workflow>
1050
		<ok to="transformers_citations_from_ingestpmc"/>
1051
		<error to="fail" />
1052
    </action>
1053
    
1054
    <action name="transformers_citations_from_ingestpmc">
1055
	    <sub-workflow>
1056
            <app-path>${wf:appPath()}/transformers_citations_from_ingestpmc</app-path>
1057
            <propagate-configuration/>
1058
            <configuration>
1059
            	<property>
1060
                    <name>workingDir</name>
1061
                    <value>${workingDir}/transformers_citations_from_ingestpmc/working_dir</value>
1062
                </property>
1063
            	<property>
1064
					<name>input</name>
1065
					<value>${input_citation_pmc}</value>
1066
				</property>
1067
				<property>
1068
					<name>output</name>
1069
					<value>${workingDir}/transformers_citations_from_ingestpmc/output</value>
1070
				</property>
1071
            </configuration>
1072
        </sub-workflow>
1073
		<ok to="citations_collapser"/>
1074
		<error to="fail" />
1075
    </action>
1076
    
1077
    <action name="citations_collapser">
1078
		<sub-workflow>
1079
            <app-path>${wf:appPath()}/collapsers_multiple_input_collapser</app-path>
1080
            <propagate-configuration/>
1081
            <configuration>
1082
                <property>
1083
                    <name>workingDir</name>
1084
                    <value>${workingDir}/citations_collapser/working_dir</value>
1085
                </property>
1086
                <!-- Input ports & parameters. -->
1087
                <property>
1088
                    <name>origin_1</name>
1089
                    <value>ingested</value>
1090
                </property>
1091
                <property>
1092
                    <name>input_1</name>
1093
                    <value>${workingDir}/transformers_citations_from_ingestpmc/output</value>
1094
                </property>
1095
                <property>
1096
                    <name>origin_2</name>
1097
                    <value>matched</value>
1098
                </property>
1099
                <property>
1100
                    <name>input_2</name>
1101
                    <value>${workingDir}/transformers_citations_from_matching/output</value>
1102
                </property>
1103
                <property>
1104
                    <name>blocking_field</name>
1105
                    <value>sourceDocumentId</value>
1106
                </property>
1107
                <property>
1108
                    <name>schema_input</name>
1109
                    <value>eu.dnetlib.iis.common.citations.schemas.Citation</value>
1110
                </property>
1111
                <property>
1112
                    <name>output</name>
1113
                    <value>${workingDir}/citations_collapser/output</value>
1114
                </property>
1115
                <property>
1116
                    <name>schema_input_envelope</name>
1117
                    <value>eu.dnetlib.iis.common.citations.schemas.CitationEnvelope</value>
1118
                </property>
1119
                <property>
1120
                    <name>record_collapser</name>
1121
                    <value>eu.dnetlib.iis.collapsers.origins.PMCCitationCollapser</value>
1122
        		</property>
1123
            </configuration>
1124
        </sub-workflow>
1125
		<ok to="transformers_export_citations"/>
1126
		<error to="fail" />
1127
    </action>
1128
    
1129
    <action name="transformers_export_citations">
1130
	    <sub-workflow>
1131
            <app-path>${wf:appPath()}/transformers_export_citations</app-path>
1132
            <propagate-configuration/>
1133
            <configuration>
1134
            	<property>
1135
                    <name>workingDir</name>
1136
                    <value>${workingDir}/transformers_export_citations/working_dir</value>
1137
                </property>
1138
            	<property>
1139
					<name>input</name>
1140
					<value>${workingDir}/citations_collapser/output</value>
1141
				</property>
1142
				<property>
1143
					<name>output</name>
1144
					<value>${output_citation}</value>
1145
				</property>
1146
            </configuration>
1147
        </sub-workflow>
1148
		<ok to="end"/>
1149
		<error to="fail" />
1150
    </action>
1151
    
1152
    <!-- end of normalize and group citations part -->
1153
    
1154 1241
	<kill name="fail">
1155 1242
		<message>Unfortunately, the process failed -- error message:
1156 1243
			[${wf:errorMessage(wf:lastErrorNode())}]</message>
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/common/import/oozie_app/import.txt
8 8
ingest_html_plaintext classpath eu/dnetlib/iis/ingest/html/plaintext/oozie_app
9 9
ingest_pmc_plaintext classpath eu/dnetlib/iis/ingest/pmc/plaintext/oozie_app
10 10
ingest_pmc_metadata classpath eu/dnetlib/iis/ingest/pmc/metadata/oozie_app
11
ingest_pmc_citations classpath eu/dnetlib/iis/transformers/ingest/pmc/citations/oozie_app
12
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app
13 11
multiple_input_collapser classpath eu/dnetlib/iis/collapsers/multiple_input_collapser/oozie_app
14 12
metadataextraction classpath eu/dnetlib/iis/metadataextraction/oozie_app
15 13
metadataextraction_cached classpath eu/dnetlib/iis/mainworkflows/metadataextraction/cached_by_checksum/oozie_app
16 14
transformers_common_union4 classpath eu/dnetlib/iis/transformers/common/union4/oozie_app
17 15
transformers_idextractor classpath eu/dnetlib/iis/transformers/importer/documentmetadata/idextractor/oozie_app
18
transformers_externalidtooaid classpath eu/dnetlib/iis/transformers/importer/documentmetadata/externalidtooaid/oozie_app
19
transformers_ingest_pmc_metadata classpath eu/dnetlib/iis/transformers/ingest/pmc/metadata/oozie_app
20
ingest_pmc_idmapping_pmidtooaid classpath eu/dnetlib/iis/ingest/pmc/idmapping/pmidtooaid/oozie_app
16
transformers_ingest_pmc_metadata classpath eu/dnetlib/iis/transformers/ingest/pmc/metadata/oozie_app
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/metadataextraction/extracted_document_metadata.json
4 4
	"abstract": "The aim of this paper is to present video quality prediction models for objective non-intrusive, prediction of H.264 encoded video for all content types combining parameters both in the physical and application layer over Universal Mobile Telecommunication Systems (UMTS) networks. In order to characterize the Quality of Service (QoS) level, a learning model based on Adaptive Neural Fuzzy Inference System (ANFIS) and a second model based on non-linear regression analysis is proposed to predict the video quality in terms of the Mean Opinion Score (MOS). The objective of the paper is two-fold. First, to find the impact of QoS parameters on end-to-end video quality for H.264 encoded video. Second, to develop learning models based on ANFIS and nonlinear regression analysis to predict video quality over UMTS networks by considering the impact of radio link loss models. The loss models considered are 2-state Markov models. Both the models are trained with a combination of physical and application layer parameters and validated with unseen dataset. Preliminary results show that good prediction accuracy was obtained from both the models. The work should help in the development of a reference-free video prediction model and QoS control methods for video over UMTS networks.",
5 5
	"language": "eng",
6 6
	"keywords": null,
7
	"externalIdentifiers": null, 
7
	"externalIdentifiers": {
8
            "pmid": "1234567"
9
    }, 
8 10
	"journal": "International Journal of Digital Multimedia Broadcasting", 
9 11
	"year": null, 	
10 12
	"publisher": null,
......
13 15
 	"affiliations": null,
14 16
	"volume": "19", 
15 17
	"issue": "2",
16
	"pages": {"start": "121", "end": "132"}
18
	"pages": {"start": "121", "end": "132"},
19
	"publicationTypeName": null
17 20
}
18 21
{	
19 22
	"id": "id-2", 
......
30 33
 	"affiliations": null,
31 34
	"volume": "3", 
32 35
	"issue": null,
33
	"pages": {"start": "4428", "end": "4445"}
36
	"pages": {"start": "4428", "end": "4445"},
37
	"publicationTypeName": null
34 38
}
35 39
{	
36 40
	"id": "id-3", 
......
60 64
					"start": "803", 
61 65
					"end": "826"
62 66
				}, 
63
				"location": null
67
				"location": null,
68
				"externalIds": null
64 69
			}, 
65 70
			"text": "J. Abernethy, F. Bach, T. Evgeniou, and J.-P. Vert. A new approach to collaborative filtering: Operator estimation with spectral regularization. Journal of Machine Learning Research, 10:803-826, 2009."
66 71
		},
67 72
		{
73
			"position": 3, 
74
			"basicMetadata": {
75
				"publisher": null, 
76
				"title": "Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes.", 
77
				"url": null, 
78
				"series": null, 
79
				"authors": ["Kovach, M.E.", "Elzer, P.H.", "Hill, D.S.", "Robertson, G.T.", "Farris, M.A.", "Roop, R.M.", "Peterson, K.M."], 
80
				"volume": null, 
81
				"edition": null, 
82
				"source": null, 
83
				"year": null, 
84
				"issue": null, 
85
				"pages": null, 
86
				"location": null,
87
				"externalIds": {
88
					"pmid": "1234567"
89
			    }
90
			}, 
91
			"text": "Kovach, M.E., Elzer, P.H., Hill, D.S., Robertson, G.T., Farris, M.A., Roop, R.M., Peterson, K.M.. Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes. Gene. 1995; 166: 175-176"
92
		},
93
		{
68 94
			"position": 9, 
69 95
			"basicMetadata": {
70 96
				"publisher": null, 
......
78 104
				"year": "2008", 
79 105
				"issue": null, 
80 106
				"pages": null, 
81
				"location": null
107
				"location": null,
108
				"externalIds": null
82 109
			}, 
83 110
			"text": "F. Bach. Exploring large feature spaces with hierarchical multiple kernel learning. In Advances in Neural Information Processing Systems (NIPS), 2008."
84 111
		}
......
87 114
 	"affiliations": null,
88 115
	"volume": null, 
89 116
	"issue": "0",
90
	"pages": {"start": "69", "end": "110"}
117
	"pages": {"start": "69", "end": "110"},
118
	"publicationTypeName": null
91 119
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/meta/project.json
1
{"id":"WT::08616","projectAcronym":"","projectGrantId":"08616","fundingClass":"WT"}
2
{"id":"ec::226639","projectAcronym":"INTIF","projectGrantId":"226639","fundingClass":"FP7"}
1
{"id":"WT::08616","projectAcronym":"","projectGrantId":"08616","fundingClass":"WT::WT"}
2
{"id":"ec::226639","projectAcronym":"INTIF","projectGrantId":"226639","fundingClass":"EC::FP7"}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/output/document_classes.json
76 76
          "Arts \u0026 recreation",
77 77
          "Sports, games \u0026 entertainment"
78 78
        ],
79
        "confidenceLevel": 0.613
79
        "confidenceLevel": 0.595
80 80
      },
81 81
      {
82 82
        "classLabels": [
......
97 97
          "Arts \u0026 recreation",
98 98
          "Music"
99 99
        ],
100
        "confidenceLevel": 0.525
100
        "confidenceLevel": 0.508
101 101
      },
102 102
      {
103 103
        "classLabels": [
104 104
          "Arts \u0026 recreation",
105 105
          "Arts"
106 106
        ],
107
        "confidenceLevel": 0.472
107
        "confidenceLevel": 0.455
108 108
      }
109 109
    ],
110 110
    "meshEuroPMCClasses": [
......
265 265
          "Science",
266 266
          "Physics"
267 267
        ],
268
        "confidenceLevel": 0.951
268
        "confidenceLevel": 0.945
269 269
      },
270 270
      {
271 271
        "classLabels": [
272 272
          "Technology",
273 273
          "Technology"
274 274
        ],
275
        "confidenceLevel": 0.93
275
        "confidenceLevel": 0.927
276 276
      },
277 277
      {
278 278
        "classLabels": [
279 279
          "Technology",
280 280
          "Chemical engineering"
281 281
        ],
282
        "confidenceLevel": 0.79
282
        "confidenceLevel": 0.78
283 283
      }
284 284
    ],
285 285
    "meshEuroPMCClasses": [
......
426 426
          "Science",
427 427
          "Mathematics"
428 428
        ],
429
        "confidenceLevel": 0.957
429
        "confidenceLevel": 0.954
430 430
      },
431 431
      {
432 432
        "classLabels": [
433 433
          "Computer science, information \u0026 general works",
434 434
          "Computer science, knowledge \u0026 systems"
435 435
        ],
436
        "confidenceLevel": 0.948
436
        "confidenceLevel": 0.942
437 437
      },
438 438
      {
439 439
        "classLabels": [
440 440
          "Social sciences",
441 441
          "Statistics"
442 442
        ],
443
        "confidenceLevel": 0.939
443
        "confidenceLevel": 0.936
444 444
      },
445 445
      {
446 446
        "classLabels": [
447 447
          "Social sciences",
448 448
          "Law"
449 449
        ],
450
        "confidenceLevel": 0.56
450
        "confidenceLevel": 0.542
451 451
      },
452 452
      {
453 453
        "classLabels": [
454 454
          "Science",
455 455
          "Chemistry"
456 456
        ],
457
        "confidenceLevel": 0.35
457
        "confidenceLevel": 0.333
458 458
      }
459 459
    ],
460 460
    "meshEuroPMCClasses": [
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/output/citations.json
2 2
  "documentId": "id-3",
3 3
  "citations": [
4 4
    {
5
      "rawText": "F. Bach. Exploring large feature spaces with hierarchical multiple kernel learning. In Advances in Neural Information Processing Systems (NIPS), 2008.",
6
      "destinationDocumentId": "id-4",
7
      "confidenceLevel": 0.7486356,
8
      "externalDestinationDocumentIds": {}
9
    },
10
    {
5
      "position": 1,
11 6
      "rawText": "J. Abernethy, F. Bach, T. Evgeniou, and J.-P. Vert. A new approach to collaborative filtering: Operator estimation with spectral regularization. Journal of Machine Learning Research, 10:803-826, 2009.",
12 7
      "destinationDocumentId": null,
13 8
      "confidenceLevel": null,
14 9
      "externalDestinationDocumentIds": {}
15 10
    },
16
	{
11
    {
12
      "position": 3,
17 13
      "rawText": "Kovach, M.E., Elzer, P.H., Hill, D.S., Robertson, G.T., Farris, M.A., Roop, R.M., Peterson, K.M.. Four new derivatives of the broad-host range cloning vector pBBR1MCS, carrying different antibiotic resistance cassettes. Gene. 1995; 166: 175-176",
18
      "destinationDocumentId": "pmc-id",
14
      "destinationDocumentId": "id-1",
19 15
      "confidenceLevel": 1.0,
20 16
      "externalDestinationDocumentIds": {
21
            "pmid": "1234567"
17
        "pmid": "1234567"
22 18
      }
23
	}  
19
    },
20
    {
21
      "position": 9,
22
      "rawText": "F. Bach. Exploring large feature spaces with hierarchical multiple kernel learning. In Advances in Neural Information Processing Systems (NIPS), 2008.",
23
      "destinationDocumentId": "id-4",
24
      "confidenceLevel": 0.7486356,
25
      "externalDestinationDocumentIds": {}
26
    }
24 27
  ]
25 28
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/main/resources/eu/dnetlib/iis/mainworkflows/statistics/oozie_app/workflow.xml
283 283
					<value>${workingDir}/mainworkflows_common_import/metadataimport</value>
284 284
				</property>
285 285
				<property>
286
					<name>output_citation_pmc</name>
287
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
288
				</property>
289
				<property>
290 286
					<name>output_dataset</name>
291 287
					<value>${workingDir}/mainworkflows_common_import/dataset</value>
292 288
				</property>
......
295 291
					<value>${workingDir}/mainworkflows_common_import/dataset_to_mdstore</value>
296 292
				</property>
297 293
				<property>
298
					<name>output_citation_pmc</name>
299
					<value>${workingDir}/mainworkflows_common_import/citation_pmc</value>
300
				</property>
301
				<property>
302 294
					<name>output_document_text</name>
303 295
					<value>${workingDir}/mainworkflows_common_import/document-text</value>
304 296
				</property>
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/primary/processing/oozie_app/workflow.xml
62 62
			<arg>-C{document_text_wos,
63 63
				eu.dnetlib.iis.metadataextraction.schemas.DocumentText,
64 64
				eu/dnetlib/iis/mainworkflows/data/empty.json}</arg>
65
			<arg>-C{citation,
66
				eu.dnetlib.iis.ingest.pmc.citations.schemas.Citation,
67
				eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/meta/citation_pmc.json}</arg>
68 65
			<arg>-C{document_text_classpath,
69 66
				eu.dnetlib.iis.mainworkflows.schemas.DocumentContentClasspath,
70 67
				eu/dnetlib/iis/mainworkflows/integration/primary/processing/input/metadataextraction/document_text_classpath.json}</arg>
......
77 74
            <arg>-Odataset=${workingDir}/producer/dataset</arg>
78 75
            <arg>-Oextracted_document_metadata=${workingDir}/producer/extracted_document_metadata</arg>
79 76
            <arg>-Odocument_text_wos=${workingDir}/producer/document_text_wos</arg>
80
            <arg>-Ocitation=${workingDir}/producer/citation</arg>
81 77
            <arg>-Odocument_text_classpath=${workingDir}/producer/document_text_classpath</arg>           
82 78
        </java>
83 79
        <ok to="document_text_producer"/>
......
263 259
					<name>input_extracted_document_metadata</name>
264 260
					<value>${workingDir}/producer/extracted_document_metadata</value>
265 261
				</property>
266
				<property>
267
					<name>input_citation_pmc</name>
268
					<value>${workingDir}/producer/citation</value>
269
				</property>
270 262
                <property>
271 263
                    <name>output_document_to_project</name>
272 264
                    <value>${workingDir}/exported/document_to_project</value>
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/person/1/person#body
1
{
2
	kind: entity
3
  	entity: {
4
    type: person
5
    id: "30|0314fe20-be3c-4bc3-adee-6bbc2cde3cb7_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=::oai:DiVA.org:uu-127423_Oliveira__Paulo_UNKNOWN"
6
    person: {
7
      metadata: {
8
        firstname: {
9
        	value: "Oliveira, Paulo"
10
        }
11
        fullname: {
12
        	value: "Oliveira, Paulo"
13
        }
14
      }
15
    }
16
  }
17
  dataInfo: {
18
    inferred: false
19
    deletedbyinference: false
20
    trust: "0.9"
21
    provenanceaction: {
22
      classid: "UNKNOWN"
23
      classname: "UNKNOWN"
24
      schemeid: "dnet:provenanceActions"
25
      schemename: "dnet:provenanceActions"
26
    }
27
  }
28
  timestamp: 1360248058786
29
}
modules/icm-iis-mainworkflows/branches/IIS-CDH-5.3.0/src/test/resources/eu/dnetlib/iis/mainworkflows/integration/import/input/meta/project/1/project#body
1
{
2
  kind: entity
3
  entity: {
4
    type: project
5
    id: "40|corda_______::ced15df040f56f2ff3d011e9f0b4bc43"
6
    project: {
7
      metadata: {
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff