Project

General

Profile

« Previous | Next » 

Revision 35145

claims search: search in openaire/crossref/datacite with list of dois, up to 10 dois for crossref and datacite , datasources query: add (datasourcecompatibilityid <> hostedBy)

View differences:

claim.php
672 672
	// return a result (object) containing publications and total
673 673
	private function _searchDOI($doi, $page, $size) {
674 674
		try {
675
			$request = self :: CROSSREF_API_DOI . urlencode( trim($doi));
676
			JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG);
677
			$time = microtime(TRUE);
678
			$response = $this -> http -> get($request);
679
			JLog :: add('Received response in ' . (microtime(TRUE)  - $time) . ' s', JLog :: INFO, self :: LOG);
680
			if ($response == NULL)
681
				throw new Exception('no HTTP response');
682
			if ($response -> code != self :: HTTP_OK)
683
				throw new Exception('HTTP response code ' . $response -> code);
684
			$res = array();
685
			$res = json_decode(str_replace("-", "_", $response -> body));
686
			$result = new JObject();
687
			$result -> publications = array();
688
			$result -> totalPublications = 0;
689
			$result -> totalDatasets = 0;			
690
			if(isset($res -> status) && $res -> status === "ok" && isset($res -> message)){
691
				
692
					$publication = $this -> _parseJsonDOI($res -> message);
693
					if($publication!=null){
694
						$result -> publications[] = $publication;
695
						$result -> totalPublications = 1;
696
					} 
697
			}
675
                        $result = new JObject();
676
                        $result -> publications = array();
677
                        $result -> totalPublications = 0;
678
                        $result -> totalDatasets = 0;	
679
                        $dois = explode(" ", preg_replace('/\s+/', ' ',$doi));                  
680
                        $unique_dois =array_slice( array_unique($dois),0,10);
681
                        $pattern1 ='#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])\S)+)\b#';
682
                        $pattern2 ='#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])[[:graph:]])+)\b#';                        
683
                         if (preg_match($pattern1,$unique_dois[0])||preg_match($pattern2,$unique_dois[0])){
684
                              if(count($dois)>10){
685
                                $result = new JObject();
686
                                $result->publications = array();
687
                                $result->totalPublications = '-';
688
                                $result->totalDatasets = 0;
689
                                $result->maxDoisExceeded = true;
690
                                return $result;
691
                             }
692
                            JLog :: add('It;s a DOI!!!!!!!!! '.$dois[0] , JLog :: INFO, self :: LOG);
693
                            JLog :: add('Searching for DOIs ' , JLog :: INFO, self :: LOG);                                              
694
                            foreach($unique_dois as $doi_){
695
                                JLog :: add('DOI::: '.$doi_ , JLog :: INFO, self :: LOG);
696
                                $publication = $this->_searchSingleDOI($doi_);
697
                                if ($publication != null) {
698
                                    $result->publications[] = $publication;
699
                                }
700
                            }
701
                         }else{
702
                             JLog :: add('It;s NOOOOT  a DOI!!!!!!!!! '.$dois[0] , JLog :: INFO, self :: LOG);
703
                         }
704
                      
705
                        $result -> totalPublications = count( $result -> publications);
698 706
			if($result -> totalPublications ===0){
699 707
				//.'&rows='.$size.'&offset='.$page
700 708
				$request = "http://api.crossref.org/works?query=" . urlencode( trim($doi))."&rows=".$size."&offset=".($page-1)*$size;
......
732 740
				return $result;
733 741
		}
734 742
	}
743
        public function searchSingleDOI($doi) {
744
		if ($this -> cache -> getCaching()) {
745
			$cacheId = self :: SEARCH_DOI_CACHE_ID . '.' . $doi;
746
			$results = $this -> cache -> get($cacheId, self :: CACHE_GROUP);
747
			if ($results === FALSE) {
748
				$results = $this -> _searchSingleDOI($doi);
749
				if ($results !== NULL)
750
					$this -> cache -> store($results, $cacheId, self :: CACHE_GROUP);
751
			}
752
		} else
753
			$results = $this -> _searchSingleDOI($doi);
754
		return $results;
755
	}
756
        private function _searchSingleDOI($doi) {
757
		try {                           
758
			$request = self :: CROSSREF_API_DOI . urlencode( trim($doi));
759
			JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG);
760
			$time = microtime(TRUE);
761
			$response = $this -> http -> get($request);
762
			JLog :: add('Received response in ' . (microtime(TRUE)  - $time) . ' s', JLog :: INFO, self :: LOG);
763
			if ($response == NULL)
764
				throw new Exception('no HTTP response');
765
			if ($response -> code != self :: HTTP_OK)
766
				throw new Exception('HTTP response code ' . $response -> code);
767
			$res = array();
768
			$res = json_decode(str_replace("-", "_", $response -> body));				
769
			if(isset($res -> status) && $res -> status === "ok" && isset($res -> message)){				
770
                            $publication = $this -> _parseJsonDOI($res -> message);
771
                            return $publication;					 
772
			}
773
                
774
		} catch (Exception $e) {
775
			JLog :: add('Error performing DOI search (doi: ' . $doi . '): ' . $e -> getMessage(), JLog :: ERROR, self :: LOG);
776
			$result = new JObject();
777
				$result -> publications = array();
778
				$result -> totalPublications = 0;
779
				$result -> totalDatasets = 0;
780
				JLog :: add('error parsing DOI record', JLog :: INFO, self :: LOG);
781
				return null;
782
		}
783
	}
735 784
	private function _parseJsonDOI($message) {
736 785
		$publication = new JObject();
737 786
		$publication -> id = isset($message -> DOI)?$message -> DOI:NULL;
......
765 814
		return NULL; 
766 815
		
767 816
	}
768
	private function _searchDOIOld($doi, $page, $size) {
769
		try {
770
			$request = self :: CROSSREF_URL . urlencode(self :: DOI_SCHEME . trim($doi));
771
			JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG);
772
			$time = microtime(TRUE);
773
			$response = $this -> http -> get($request);
774
			JLog :: add('Received response in ' . (microtime(TRUE)  - $time) . ' s', JLog :: INFO, self :: LOG);
775
			if ($response == NULL)
776
				throw new Exception('no HTTP response');
777
			if ($response -> code != self :: HTTP_OK)
778
				throw new Exception('HTTP response code ' . $response -> code);
779
			$document = new DOMDocument();
780
			$document -> recover = TRUE;
781
			if ($document -> loadXML(trim($response -> body)) == FALSE)
782
				throw new Exception('invalid XML response');
783
			$xpath = new DOMXPath($document);
784
			if ((($crossrefNodes = $xpath -> query('/doi_records/doi_record/crossref')) == FALSE) || (($crossrefNode = $crossrefNodes -> item(0)) == NULL))
785
				throw new Exception('error parsing DOI record');
786
			if (($journalNodes = $xpath -> query('./journal', $crossrefNode)) == FALSE)
787
				throw new Exception('error parsing DOI record');
788
			if (($bookNodes = $xpath -> query('./book', $crossrefNode)) == FALSE)
789
				throw new Exception('error parsing DOI record');
790
			if (($conferenceNodes = $xpath -> query('./conference', $crossrefNode)) == FALSE)
791
				throw new Exception('error parsing DOI record');
792
/*			if (($dissertationNodes = $xpath -> query('./dissertation', $crossrefNode)) == FALSE)
793
				throw new Exception('error parsing DOI record');
794
			if (($reportPaperNodes = $xpath -> query('./report-paper', $crossrefNode)) == FALSE)
795
				throw new Exception('error parsing DOI record');
796
			if (($standardNodes = $xpath -> query('./standard', $crossrefNode)) == FALSE)
797
				throw new Exception('error parsing DOI record');
798
			if (($saComponentNodes = $xpath -> query('./sa_component', $crossrefNode)) == FALSE)
799
				throw new Exception('error parsing DOI record');
800
			if (($databaseNodes = $xpath -> query('./database', $crossrefNode)) == FALSE)
801
				throw new Exception('error parsing DOI record');*/
802
			if (($errorNodes = $xpath -> query('./error', $crossrefNode)) == FALSE){
803
				//throw new Exception('error parsing DOI record');
804
				$result = new JObject();
805
				$result -> publications = array();
806
				$result -> totalPublications = 0;
807
				$result -> totalDatasets = 0;
808
				JLog :: add('error parsing DOI record', JLog :: INFO, self :: LOG);
809
				return $result;					
810
			}
811
			if (($journalNode = $journalNodes -> item(0)) != NULL) {
812
				$result = $this -> parseJournal($doi, $xpath, $journalNode);
813
				JLog :: add('Retrieved journal in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG);
814
				$result -> totalPublications = 1;
815
				$result -> totalDatasets = 0;
816
				return $result;
817
			} else if (($bookNode = $bookNodes -> item(0)) != NULL) {
818
				$result = $this -> parseBook($doi, $xpath, $bookNode);
819
				JLog :: add('Retrieved book in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG);
820
				$result -> totalPublications = 1;
821
				$result -> totalDatasets = 0;
822
				return $result;			
823
			}else if (($conferenceNode = $conferenceNodes -> item(0)) != NULL){
824
				$result = $this -> parseConference($doi, $xpath, $conferenceNode);
825
				JLog :: add('Retrieved conference in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG);
826
				$result -> totalPublications = 1;
827
				$result -> totalDatasets = 0;
828
				return $result;	
829
/*			else if (($dissertationNode = $dissertationNodes -> item(0)) != NULL)
830
				return dissertation;
831
			else if (($reportPaperNode = $reportPaperNodes -> item(0)) != NULL)
832
				return reportPaper;
833
			else if (($standardNode = $standardNodes -> item(0)) != NULL)
834
				return standard;
835
			else if (($saComponentNode = $saComponentNodes -> item(0)) != NULL)
836
				return saComponent;
837
			else if (($databaseNode = $databaseNodes -> item(0)) != NULL)
838
				return database;*/
839
			} else if (($errorNode = $errorNodes -> item(0)) != NULL) {
840
				$result = new JObject();
841
				$result -> publications = array();
842
				$result -> totalPublications = 0;
843
				$result -> totalDatasets = 0;
844
				JLog :: add('Retrieved 0 DOI records in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG);
845
				return $result;
846
			} else{ 
847
				throw new Exception('error parsing DOI record');
848
			
849
			
850
			}
851
		} catch (Exception $e) {
852
			JLog :: add('Error performing DOI search (doi: ' . $doi . '): ' . $e -> getMessage(), JLog :: ERROR, self :: LOG);
853
			$result = new JObject();
854
				$result -> publications = array();
855
				$result -> totalPublications = 0;
856
				$result -> totalDatasets = 0;
857
				JLog :: add('error parsing DOI record', JLog :: INFO, self :: LOG);
858
				return $result;
859
		}
860
	}
861
	
817
                
862 818
	// Search for a dataset DOI using caching if enabled.
863 819
	// $doi the dataset DOI to search for
864 820
	// return a result (object) containing datasets and total
865
	/*private function _searchDataCite($doi) {
821
	private function _searchDataCiteWithDoi($doi) {
866 822
		try {
867 823
			$request = self :: DATACITE_URL . urlencode(trim($doi));
868 824
			JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG);
......
871 827
			JLog :: add('Received response in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG);
872 828
			if ($response== NULL)
873 829
				throw new Exception('no HTTP response');
874
			if ($response -> code == self :: HTTP_NOT_FOUND) { // no dataset found; just return empty result
830
			/*if ($response -> code == self :: HTTP_NOT_FOUND) { // no dataset found; just return empty result
875 831
				$result = new JObject();
876 832
				$result -> datasets = array();
877 833
				$result -> totalDatasets = 0;
878 834
				JLog :: add('Retrieved 0 DataCite records in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG);
879 835
				return $result;
880
			}
836
			}*/
881 837
			if ($response -> code != self :: HTTP_OK)
882 838
				throw new Exception('HTTP response code ' . $response -> code);
883 839
			$document = new DOMDocument();
......
920 876
				if ($author -> fullName != NULL)
921 877
					$dataset -> authors[] = $author;
922 878
			}
923
			$result = new JObject();
879
			/*$result = new JObject();
924 880
			$result -> datasets = array();
925 881
			$result -> totalDatasets = 0;
926 882
			if (($dataset -> id != NULL) || ($dataset -> url != NULL) || ($dataset -> title != NULL) || ($dataset -> authors != NULL) || ($dataset -> year != NULL)) {
927 883
				$result -> datasets[] = $dataset;
928 884
				$result -> totalDatasets = 1;
929
			}
885
			}*/
930 886
			JLog :: add('Retrieved DataCite record in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG);
931
			return $result;
887
			return $dataset;
932 888
		} catch (Exception $e) {
933 889
			JLog :: add('Error performing DataCite search (doi: ' . $doi . '): ' . $e -> getMessage(), JLog :: ERROR, self :: LOG);
934 890
			return NULL;
935 891
		}
936
	}*/
892
	}
937 893
	private function _searchDataCite($doi, $page, $size) {
938
		try {
894
            $result = new JObject();
895
            $result->datasets = array();
896
            $result->totalPublications = 0;
897
            $result->totalDatasets = 0;
898
            try {
899
                $dois = explode(" ", preg_replace('/\s+/', ' ',$doi));   
900
               
901
                $unique_dois =array_slice( array_unique($dois),0,10);
902
                $pattern1 = '#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])\S)+)\b#';
903
                $pattern2 = '#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])[[:graph:]])+)\b#';
904
                if (preg_match($pattern1, $unique_dois[0]) || preg_match($pattern2, $unique_dois[0])) {
905
                    if(count($dois)>10){
906
                       $result = new JObject();
907
                       $result->datasets = array();
908
                       $result->totalPublications = 0;
909
                       $result->totalDatasets = '-';
910
                       $result->maxDoisExceeded = true;
911
                       return $result;
912
                    }
913
                    JLog :: add('It;s a dataset  DOI!!!!!!!!! ' . $dois[0], JLog :: INFO, self :: LOG);
914
                    JLog :: add('Searching for DOIs in Datacite', JLog :: INFO, self :: LOG);
915
                    foreach ($unique_dois as $doi_) {
916
                        JLog :: add('DOI::: ' . $doi_, JLog :: INFO, self :: LOG);
917
                        $dataset = $this->_searchDataCiteWithDoi($doi_);
918
                        if ($dataset != null) {
919
                            $result->datasets[] = $dataset;
920
                        }
921
                    }
922
                }else{
923
                    
924
                    JLog :: add('It;sNOOOOOOOT a dataset  DOI!!!!!!!!! ' . $dois[0], JLog :: INFO, self :: LOG);
925
                }
926
                $result->totalDatasets = count($result->datasets);
927
                if($result->totalDatasets!==0){
928
                    return $result;
929
                }
939 930
			$request = self :: DATACITE_URL_KEYWORD. urlencode(trim($doi)).self :: DATACITE_URL_KEYWORD_REST."&rows=".$size."&start=".($page-1)*$size;
940 931
			JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG);
941 932
			$time = microtime(TRUE);
......
962 953
				throw new Exception('error parsing DataCite record');
963 954
			$numFound=((($numFoundNode)) == NULL) ? NULL : trim($numFoundNode -> nodeValue);
964 955
  			if ((($resultNodes = $xpath -> query('/response/result/doc')) == FALSE) || (($resultNode = $resultNodes -> item(0)) == NULL))
965
				throw new Exception('error parsing DataCite record');
966
			$result = new JObject();
967
			$result -> datasets = array();
968
			$result -> totalDatasets = 0;
956
				throw new Exception('error parsing DataCite record');                
969 957
			foreach ($resultNodes as $resultNode) {				 
970 958
				if (($idNodes = $xpath -> query('./str[@name = "doi"]/text()', $resultNode)) == FALSE)
971 959
					throw new Exception('error parsing DataCite record');
......
1415 1403
				$publication -> description = (($shortDescriptionNode = $shortDescriptionNodes -> item(0)) == NULL) ? NULL : trim($shortDescriptionNode -> nodeValue);
1416 1404
				if (($author -> lastName != NULL) || ($author -> firstName != NULL))
1417 1405
					$publication -> authors[] = $author;
1418
				if ((($doiNode = $doiNodes -> item(0)) != NULL) && (($doiResult = $this -> searchDOI(trim($doiNode -> nodeValue),10,1)) != NULL) && ($doiResult -> totalPublications > 0)) { // resolve via DOI
1406
				if ((($doiNode = $doiNodes -> item(0)) != NULL) && (($doiResult = $this -> searchSingleDOI(trim($doiNode -> nodeValue))) != NULL)) { // resolve via DOI
1419 1407
					if ($publication -> url == NULL) // set URL if missing
1420
						$publication -> url = $doiResult -> publications[0] -> url;
1408
						$publication -> url = $doiResult -> url;
1421 1409
					if ($publication -> title == NULL) // set title if missing
1422
						$publication -> title = $doiResult -> publications[0] -> title;
1410
						$publication -> title = $doiResult -> title;
1423 1411
					
1424
					foreach ($doiResult -> publications[0] -> authors as $doiAuthor) { // merge authors
1412
					foreach ($doiResult -> authors as $doiAuthor) { // merge authors
1425 1413
						if ((count($publication -> authors) == 0) || ($doiAuthor -> id != $publication -> authors[0] -> id) || ($doiAuthor -> lastName != $publication -> authors[0] -> lastName) || ($doiAuthor -> firstName != $publication -> authors[0] -> firstName) || ($doiAuthor -> fullName != $publication -> authors[0] -> fullName))
1426 1414
							$publication -> authors[] = $doiAuthor;
1427 1415
					}
1428 1416
					if ($publication -> year == NULL) // set year if missing
1429
						$publication -> year = $doiResult -> publications[0] -> year;
1417
						$publication -> year = $doiResult  -> year;
1430 1418
				}				
1431 1419
				if (($publication -> id != NULL) || ($publication -> title != NULL) || ($publication -> authors != NULL) || ($publication -> year != NULL) || ($publication -> description != NULL))
1432 1420
					$result -> publications[] = $publication;

Also available in: Unified diff