Revision 35145
Added by Argiro Kokogiannaki over 9 years ago
claim.php | ||
---|---|---|
672 | 672 |
// return a result (object) containing publications and total |
673 | 673 |
private function _searchDOI($doi, $page, $size) { |
674 | 674 |
try { |
675 |
$request = self :: CROSSREF_API_DOI . urlencode( trim($doi)); |
|
676 |
JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG); |
|
677 |
$time = microtime(TRUE); |
|
678 |
$response = $this -> http -> get($request); |
|
679 |
JLog :: add('Received response in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
|
680 |
if ($response == NULL) |
|
681 |
throw new Exception('no HTTP response'); |
|
682 |
if ($response -> code != self :: HTTP_OK) |
|
683 |
throw new Exception('HTTP response code ' . $response -> code); |
|
684 |
$res = array(); |
|
685 |
$res = json_decode(str_replace("-", "_", $response -> body)); |
|
686 |
$result = new JObject(); |
|
687 |
$result -> publications = array(); |
|
688 |
$result -> totalPublications = 0; |
|
689 |
$result -> totalDatasets = 0; |
|
690 |
if(isset($res -> status) && $res -> status === "ok" && isset($res -> message)){ |
|
691 |
|
|
692 |
$publication = $this -> _parseJsonDOI($res -> message); |
|
693 |
if($publication!=null){ |
|
694 |
$result -> publications[] = $publication; |
|
695 |
$result -> totalPublications = 1; |
|
696 |
} |
|
697 |
} |
|
675 |
$result = new JObject(); |
|
676 |
$result -> publications = array(); |
|
677 |
$result -> totalPublications = 0; |
|
678 |
$result -> totalDatasets = 0; |
|
679 |
$dois = explode(" ", preg_replace('/\s+/', ' ',$doi)); |
|
680 |
$unique_dois =array_slice( array_unique($dois),0,10); |
|
681 |
$pattern1 ='#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])\S)+)\b#'; |
|
682 |
$pattern2 ='#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])[[:graph:]])+)\b#'; |
|
683 |
if (preg_match($pattern1,$unique_dois[0])||preg_match($pattern2,$unique_dois[0])){ |
|
684 |
if(count($dois)>10){ |
|
685 |
$result = new JObject(); |
|
686 |
$result->publications = array(); |
|
687 |
$result->totalPublications = '-'; |
|
688 |
$result->totalDatasets = 0; |
|
689 |
$result->maxDoisExceeded = true; |
|
690 |
return $result; |
|
691 |
} |
|
692 |
JLog :: add('It;s a DOI!!!!!!!!! '.$dois[0] , JLog :: INFO, self :: LOG); |
|
693 |
JLog :: add('Searching for DOIs ' , JLog :: INFO, self :: LOG); |
|
694 |
foreach($unique_dois as $doi_){ |
|
695 |
JLog :: add('DOI::: '.$doi_ , JLog :: INFO, self :: LOG); |
|
696 |
$publication = $this->_searchSingleDOI($doi_); |
|
697 |
if ($publication != null) { |
|
698 |
$result->publications[] = $publication; |
|
699 |
} |
|
700 |
} |
|
701 |
}else{ |
|
702 |
JLog :: add('It;s NOOOOT a DOI!!!!!!!!! '.$dois[0] , JLog :: INFO, self :: LOG); |
|
703 |
} |
|
704 |
|
|
705 |
$result -> totalPublications = count( $result -> publications); |
|
698 | 706 |
if($result -> totalPublications ===0){ |
699 | 707 |
//.'&rows='.$size.'&offset='.$page |
700 | 708 |
$request = "http://api.crossref.org/works?query=" . urlencode( trim($doi))."&rows=".$size."&offset=".($page-1)*$size; |
... | ... | |
732 | 740 |
return $result; |
733 | 741 |
} |
734 | 742 |
} |
743 |
public function searchSingleDOI($doi) { |
|
744 |
if ($this -> cache -> getCaching()) { |
|
745 |
$cacheId = self :: SEARCH_DOI_CACHE_ID . '.' . $doi; |
|
746 |
$results = $this -> cache -> get($cacheId, self :: CACHE_GROUP); |
|
747 |
if ($results === FALSE) { |
|
748 |
$results = $this -> _searchSingleDOI($doi); |
|
749 |
if ($results !== NULL) |
|
750 |
$this -> cache -> store($results, $cacheId, self :: CACHE_GROUP); |
|
751 |
} |
|
752 |
} else |
|
753 |
$results = $this -> _searchSingleDOI($doi); |
|
754 |
return $results; |
|
755 |
} |
|
756 |
private function _searchSingleDOI($doi) { |
|
757 |
try { |
|
758 |
$request = self :: CROSSREF_API_DOI . urlencode( trim($doi)); |
|
759 |
JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG); |
|
760 |
$time = microtime(TRUE); |
|
761 |
$response = $this -> http -> get($request); |
|
762 |
JLog :: add('Received response in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
|
763 |
if ($response == NULL) |
|
764 |
throw new Exception('no HTTP response'); |
|
765 |
if ($response -> code != self :: HTTP_OK) |
|
766 |
throw new Exception('HTTP response code ' . $response -> code); |
|
767 |
$res = array(); |
|
768 |
$res = json_decode(str_replace("-", "_", $response -> body)); |
|
769 |
if(isset($res -> status) && $res -> status === "ok" && isset($res -> message)){ |
|
770 |
$publication = $this -> _parseJsonDOI($res -> message); |
|
771 |
return $publication; |
|
772 |
} |
|
773 |
|
|
774 |
} catch (Exception $e) { |
|
775 |
JLog :: add('Error performing DOI search (doi: ' . $doi . '): ' . $e -> getMessage(), JLog :: ERROR, self :: LOG); |
|
776 |
$result = new JObject(); |
|
777 |
$result -> publications = array(); |
|
778 |
$result -> totalPublications = 0; |
|
779 |
$result -> totalDatasets = 0; |
|
780 |
JLog :: add('error parsing DOI record', JLog :: INFO, self :: LOG); |
|
781 |
return null; |
|
782 |
} |
|
783 |
} |
|
735 | 784 |
private function _parseJsonDOI($message) { |
736 | 785 |
$publication = new JObject(); |
737 | 786 |
$publication -> id = isset($message -> DOI)?$message -> DOI:NULL; |
... | ... | |
765 | 814 |
return NULL; |
766 | 815 |
|
767 | 816 |
} |
768 |
private function _searchDOIOld($doi, $page, $size) { |
|
769 |
try { |
|
770 |
$request = self :: CROSSREF_URL . urlencode(self :: DOI_SCHEME . trim($doi)); |
|
771 |
JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG); |
|
772 |
$time = microtime(TRUE); |
|
773 |
$response = $this -> http -> get($request); |
|
774 |
JLog :: add('Received response in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
|
775 |
if ($response == NULL) |
|
776 |
throw new Exception('no HTTP response'); |
|
777 |
if ($response -> code != self :: HTTP_OK) |
|
778 |
throw new Exception('HTTP response code ' . $response -> code); |
|
779 |
$document = new DOMDocument(); |
|
780 |
$document -> recover = TRUE; |
|
781 |
if ($document -> loadXML(trim($response -> body)) == FALSE) |
|
782 |
throw new Exception('invalid XML response'); |
|
783 |
$xpath = new DOMXPath($document); |
|
784 |
if ((($crossrefNodes = $xpath -> query('/doi_records/doi_record/crossref')) == FALSE) || (($crossrefNode = $crossrefNodes -> item(0)) == NULL)) |
|
785 |
throw new Exception('error parsing DOI record'); |
|
786 |
if (($journalNodes = $xpath -> query('./journal', $crossrefNode)) == FALSE) |
|
787 |
throw new Exception('error parsing DOI record'); |
|
788 |
if (($bookNodes = $xpath -> query('./book', $crossrefNode)) == FALSE) |
|
789 |
throw new Exception('error parsing DOI record'); |
|
790 |
if (($conferenceNodes = $xpath -> query('./conference', $crossrefNode)) == FALSE) |
|
791 |
throw new Exception('error parsing DOI record'); |
|
792 |
/* if (($dissertationNodes = $xpath -> query('./dissertation', $crossrefNode)) == FALSE) |
|
793 |
throw new Exception('error parsing DOI record'); |
|
794 |
if (($reportPaperNodes = $xpath -> query('./report-paper', $crossrefNode)) == FALSE) |
|
795 |
throw new Exception('error parsing DOI record'); |
|
796 |
if (($standardNodes = $xpath -> query('./standard', $crossrefNode)) == FALSE) |
|
797 |
throw new Exception('error parsing DOI record'); |
|
798 |
if (($saComponentNodes = $xpath -> query('./sa_component', $crossrefNode)) == FALSE) |
|
799 |
throw new Exception('error parsing DOI record'); |
|
800 |
if (($databaseNodes = $xpath -> query('./database', $crossrefNode)) == FALSE) |
|
801 |
throw new Exception('error parsing DOI record');*/ |
|
802 |
if (($errorNodes = $xpath -> query('./error', $crossrefNode)) == FALSE){ |
|
803 |
//throw new Exception('error parsing DOI record'); |
|
804 |
$result = new JObject(); |
|
805 |
$result -> publications = array(); |
|
806 |
$result -> totalPublications = 0; |
|
807 |
$result -> totalDatasets = 0; |
|
808 |
JLog :: add('error parsing DOI record', JLog :: INFO, self :: LOG); |
|
809 |
return $result; |
|
810 |
} |
|
811 |
if (($journalNode = $journalNodes -> item(0)) != NULL) { |
|
812 |
$result = $this -> parseJournal($doi, $xpath, $journalNode); |
|
813 |
JLog :: add('Retrieved journal in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
|
814 |
$result -> totalPublications = 1; |
|
815 |
$result -> totalDatasets = 0; |
|
816 |
return $result; |
|
817 |
} else if (($bookNode = $bookNodes -> item(0)) != NULL) { |
|
818 |
$result = $this -> parseBook($doi, $xpath, $bookNode); |
|
819 |
JLog :: add('Retrieved book in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
|
820 |
$result -> totalPublications = 1; |
|
821 |
$result -> totalDatasets = 0; |
|
822 |
return $result; |
|
823 |
}else if (($conferenceNode = $conferenceNodes -> item(0)) != NULL){ |
|
824 |
$result = $this -> parseConference($doi, $xpath, $conferenceNode); |
|
825 |
JLog :: add('Retrieved conference in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
|
826 |
$result -> totalPublications = 1; |
|
827 |
$result -> totalDatasets = 0; |
|
828 |
return $result; |
|
829 |
/* else if (($dissertationNode = $dissertationNodes -> item(0)) != NULL) |
|
830 |
return dissertation; |
|
831 |
else if (($reportPaperNode = $reportPaperNodes -> item(0)) != NULL) |
|
832 |
return reportPaper; |
|
833 |
else if (($standardNode = $standardNodes -> item(0)) != NULL) |
|
834 |
return standard; |
|
835 |
else if (($saComponentNode = $saComponentNodes -> item(0)) != NULL) |
|
836 |
return saComponent; |
|
837 |
else if (($databaseNode = $databaseNodes -> item(0)) != NULL) |
|
838 |
return database;*/ |
|
839 |
} else if (($errorNode = $errorNodes -> item(0)) != NULL) { |
|
840 |
$result = new JObject(); |
|
841 |
$result -> publications = array(); |
|
842 |
$result -> totalPublications = 0; |
|
843 |
$result -> totalDatasets = 0; |
|
844 |
JLog :: add('Retrieved 0 DOI records in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
|
845 |
return $result; |
|
846 |
} else{ |
|
847 |
throw new Exception('error parsing DOI record'); |
|
848 |
|
|
849 |
|
|
850 |
} |
|
851 |
} catch (Exception $e) { |
|
852 |
JLog :: add('Error performing DOI search (doi: ' . $doi . '): ' . $e -> getMessage(), JLog :: ERROR, self :: LOG); |
|
853 |
$result = new JObject(); |
|
854 |
$result -> publications = array(); |
|
855 |
$result -> totalPublications = 0; |
|
856 |
$result -> totalDatasets = 0; |
|
857 |
JLog :: add('error parsing DOI record', JLog :: INFO, self :: LOG); |
|
858 |
return $result; |
|
859 |
} |
|
860 |
} |
|
861 |
|
|
817 |
|
|
862 | 818 |
// Search for a dataset DOI using caching if enabled. |
863 | 819 |
// $doi the dataset DOI to search for |
864 | 820 |
// return a result (object) containing datasets and total |
865 |
/*private function _searchDataCite($doi) {
|
|
821 |
private function _searchDataCiteWithDoi($doi) {
|
|
866 | 822 |
try { |
867 | 823 |
$request = self :: DATACITE_URL . urlencode(trim($doi)); |
868 | 824 |
JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG); |
... | ... | |
871 | 827 |
JLog :: add('Received response in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
872 | 828 |
if ($response== NULL) |
873 | 829 |
throw new Exception('no HTTP response'); |
874 |
if ($response -> code == self :: HTTP_NOT_FOUND) { // no dataset found; just return empty result |
|
830 |
/*if ($response -> code == self :: HTTP_NOT_FOUND) { // no dataset found; just return empty result
|
|
875 | 831 |
$result = new JObject(); |
876 | 832 |
$result -> datasets = array(); |
877 | 833 |
$result -> totalDatasets = 0; |
878 | 834 |
JLog :: add('Retrieved 0 DataCite records in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
879 | 835 |
return $result; |
880 |
} |
|
836 |
}*/
|
|
881 | 837 |
if ($response -> code != self :: HTTP_OK) |
882 | 838 |
throw new Exception('HTTP response code ' . $response -> code); |
883 | 839 |
$document = new DOMDocument(); |
... | ... | |
920 | 876 |
if ($author -> fullName != NULL) |
921 | 877 |
$dataset -> authors[] = $author; |
922 | 878 |
} |
923 |
$result = new JObject(); |
|
879 |
/*$result = new JObject();
|
|
924 | 880 |
$result -> datasets = array(); |
925 | 881 |
$result -> totalDatasets = 0; |
926 | 882 |
if (($dataset -> id != NULL) || ($dataset -> url != NULL) || ($dataset -> title != NULL) || ($dataset -> authors != NULL) || ($dataset -> year != NULL)) { |
927 | 883 |
$result -> datasets[] = $dataset; |
928 | 884 |
$result -> totalDatasets = 1; |
929 |
} |
|
885 |
}*/
|
|
930 | 886 |
JLog :: add('Retrieved DataCite record in ' . (microtime(TRUE) - $time) . ' s', JLog :: INFO, self :: LOG); |
931 |
return $result;
|
|
887 |
return $dataset;
|
|
932 | 888 |
} catch (Exception $e) { |
933 | 889 |
JLog :: add('Error performing DataCite search (doi: ' . $doi . '): ' . $e -> getMessage(), JLog :: ERROR, self :: LOG); |
934 | 890 |
return NULL; |
935 | 891 |
} |
936 |
}*/
|
|
892 |
} |
|
937 | 893 |
private function _searchDataCite($doi, $page, $size) { |
938 |
try { |
|
894 |
$result = new JObject(); |
|
895 |
$result->datasets = array(); |
|
896 |
$result->totalPublications = 0; |
|
897 |
$result->totalDatasets = 0; |
|
898 |
try { |
|
899 |
$dois = explode(" ", preg_replace('/\s+/', ' ',$doi)); |
|
900 |
|
|
901 |
$unique_dois =array_slice( array_unique($dois),0,10); |
|
902 |
$pattern1 = '#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])\S)+)\b#'; |
|
903 |
$pattern2 = '#\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?!["&\'<>])[[:graph:]])+)\b#'; |
|
904 |
if (preg_match($pattern1, $unique_dois[0]) || preg_match($pattern2, $unique_dois[0])) { |
|
905 |
if(count($dois)>10){ |
|
906 |
$result = new JObject(); |
|
907 |
$result->datasets = array(); |
|
908 |
$result->totalPublications = 0; |
|
909 |
$result->totalDatasets = '-'; |
|
910 |
$result->maxDoisExceeded = true; |
|
911 |
return $result; |
|
912 |
} |
|
913 |
JLog :: add('It;s a dataset DOI!!!!!!!!! ' . $dois[0], JLog :: INFO, self :: LOG); |
|
914 |
JLog :: add('Searching for DOIs in Datacite', JLog :: INFO, self :: LOG); |
|
915 |
foreach ($unique_dois as $doi_) { |
|
916 |
JLog :: add('DOI::: ' . $doi_, JLog :: INFO, self :: LOG); |
|
917 |
$dataset = $this->_searchDataCiteWithDoi($doi_); |
|
918 |
if ($dataset != null) { |
|
919 |
$result->datasets[] = $dataset; |
|
920 |
} |
|
921 |
} |
|
922 |
}else{ |
|
923 |
|
|
924 |
JLog :: add('It;sNOOOOOOOT a dataset DOI!!!!!!!!! ' . $dois[0], JLog :: INFO, self :: LOG); |
|
925 |
} |
|
926 |
$result->totalDatasets = count($result->datasets); |
|
927 |
if($result->totalDatasets!==0){ |
|
928 |
return $result; |
|
929 |
} |
|
939 | 930 |
$request = self :: DATACITE_URL_KEYWORD. urlencode(trim($doi)).self :: DATACITE_URL_KEYWORD_REST."&rows=".$size."&start=".($page-1)*$size; |
940 | 931 |
JLog :: add('Requesting ' . $request, JLog :: INFO, self :: LOG); |
941 | 932 |
$time = microtime(TRUE); |
... | ... | |
962 | 953 |
throw new Exception('error parsing DataCite record'); |
963 | 954 |
$numFound=((($numFoundNode)) == NULL) ? NULL : trim($numFoundNode -> nodeValue); |
964 | 955 |
if ((($resultNodes = $xpath -> query('/response/result/doc')) == FALSE) || (($resultNode = $resultNodes -> item(0)) == NULL)) |
965 |
throw new Exception('error parsing DataCite record'); |
|
966 |
$result = new JObject(); |
|
967 |
$result -> datasets = array(); |
|
968 |
$result -> totalDatasets = 0; |
|
956 |
throw new Exception('error parsing DataCite record'); |
|
969 | 957 |
foreach ($resultNodes as $resultNode) { |
970 | 958 |
if (($idNodes = $xpath -> query('./str[@name = "doi"]/text()', $resultNode)) == FALSE) |
971 | 959 |
throw new Exception('error parsing DataCite record'); |
... | ... | |
1415 | 1403 |
$publication -> description = (($shortDescriptionNode = $shortDescriptionNodes -> item(0)) == NULL) ? NULL : trim($shortDescriptionNode -> nodeValue); |
1416 | 1404 |
if (($author -> lastName != NULL) || ($author -> firstName != NULL)) |
1417 | 1405 |
$publication -> authors[] = $author; |
1418 |
if ((($doiNode = $doiNodes -> item(0)) != NULL) && (($doiResult = $this -> searchDOI(trim($doiNode -> nodeValue),10,1)) != NULL) && ($doiResult -> totalPublications > 0)) { // resolve via DOI
|
|
1406 |
if ((($doiNode = $doiNodes -> item(0)) != NULL) && (($doiResult = $this -> searchSingleDOI(trim($doiNode -> nodeValue))) != NULL)) { // resolve via DOI
|
|
1419 | 1407 |
if ($publication -> url == NULL) // set URL if missing |
1420 |
$publication -> url = $doiResult -> publications[0] -> url;
|
|
1408 |
$publication -> url = $doiResult -> url; |
|
1421 | 1409 |
if ($publication -> title == NULL) // set title if missing |
1422 |
$publication -> title = $doiResult -> publications[0] -> title;
|
|
1410 |
$publication -> title = $doiResult -> title; |
|
1423 | 1411 |
|
1424 |
foreach ($doiResult -> publications[0] -> authors as $doiAuthor) { // merge authors
|
|
1412 |
foreach ($doiResult -> authors as $doiAuthor) { // merge authors |
|
1425 | 1413 |
if ((count($publication -> authors) == 0) || ($doiAuthor -> id != $publication -> authors[0] -> id) || ($doiAuthor -> lastName != $publication -> authors[0] -> lastName) || ($doiAuthor -> firstName != $publication -> authors[0] -> firstName) || ($doiAuthor -> fullName != $publication -> authors[0] -> fullName)) |
1426 | 1414 |
$publication -> authors[] = $doiAuthor; |
1427 | 1415 |
} |
1428 | 1416 |
if ($publication -> year == NULL) // set year if missing |
1429 |
$publication -> year = $doiResult -> publications[0] -> year;
|
|
1417 |
$publication -> year = $doiResult -> year; |
|
1430 | 1418 |
} |
1431 | 1419 |
if (($publication -> id != NULL) || ($publication -> title != NULL) || ($publication -> authors != NULL) || ($publication -> year != NULL) || ($publication -> description != NULL)) |
1432 | 1420 |
$result -> publications[] = $publication; |
Also available in: Unified diff
claims search: search in openaire/crossref/datacite with list of dois, up to 10 dois for crossref and datacite , datasources query: add (datasourcecompatibilityid <> hostedBy)