From 9b0b6df2fe8902bf329ec9d9fb634216d870c893 Mon Sep 17 00:00:00 2001 From: Pfauenauge Date: Thu, 18 May 2017 22:37:31 +0200 Subject: [PATCH] GoogleAPI startRow/paging support (use Config: const downloadGoogleMaxPages = 10;) and Data capture loop import multiple dates from CLI (use Config: const maxDaysBatchImport = 99;) --- .../apis/Google/Service/Webmasters.php | 9 +++ organic-search-analytics/data-capture-run.php | 65 +++++++++++++++---- .../inc/code/dataCapture.php | 39 ++++++++++- 3 files changed, 96 insertions(+), 17 deletions(-) diff --git a/organic-search-analytics/apis/Google/Service/Webmasters.php b/organic-search-analytics/apis/Google/Service/Webmasters.php index 8f332ca..92f452f 100644 --- a/organic-search-analytics/apis/Google/Service/Webmasters.php +++ b/organic-search-analytics/apis/Google/Service/Webmasters.php @@ -738,6 +738,7 @@ class Google_Service_Webmasters_SearchAnalyticsQueryRequest extends Google_Colle public $dimensions; public $endDate; public $rowLimit; + public $startRow; public $searchType; public $startDate; @@ -782,6 +783,14 @@ public function getRowLimit() { return $this->rowLimit; } + public function setStartRow($startRow) + { + $this->startRow = $startRow; + } + public function getStartRow() + { + return $this->startRow; + } public function setSearchType($searchType) { $this->searchType = $searchType; diff --git a/organic-search-analytics/data-capture-run.php b/organic-search-analytics/data-capture-run.php index 9d7976d..023fe42 100644 --- a/organic-search-analytics/data-capture-run.php +++ b/organic-search-analytics/data-capture-run.php @@ -5,7 +5,7 @@ if( isset( $_GET ) && is_array( $_GET ) && count( $_GET ) > 0 ) { $params = $_GET; -} elseif( isset( $argv ) && is_array( $argv ) && count( $argv ) >= 4 ) { +} elseif( isset( $argv ) && is_array( $argv ) && count( $argv ) >= 3 ) { $params = array(); $params['type'] = $argv[1]; $params['domain'] = $argv[2]; @@ -28,17 +28,20 @@ if( isset( $argv[8] ) ) { $params['search_type'] = $argv[8]; } + if( isset( $argv[9] ) ) { + $params['start_row'] = $argv[9]; + } } - -if( isset($params) && isset($params['type']) && isset($params['domain']) && isset($params['date']) && preg_match("/^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$/",$params['date']) ) { +if( isset($params) && isset($params['type']) && isset($params['domain']) /*&& isset($params['date'])*/ && (preg_match("/^[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])$/",$params['date']) || $params['date'] == null) ) { /* Set the max allowed execution time for the page to allow for longer procesing times. */ ini_set('max_execution_time', 600); //300 seconds = 5 minutes - + /* Set overrides from URL paramters */ $overrideSettings = array(); if( isset( $params['mode'] ) ) { $overrideSettings['mode'] = $params['mode']; } if( isset( $params['row_limit'] ) ) { $overrideSettings['row_limit'] = $params['row_limit']; } + if( isset( $params['start_row'] ) ) { $overrideSettings['start_row'] = $params['start_row']; } if( isset( $params['dimensions'] ) ) { $overrideSettings['dimensions'] = explode( ',', $params['dimensions'] ); } if( isset( $params['search_type'] ) ) { $overrideSettings['search_type'] = explode( ',', $params['search_type'] ); } if( isset( $params['filters'] ) ) { @@ -56,16 +59,50 @@ switch( $params['type'] ) { case 'googleSearchAnalytics': - $recordsImported = $dataCapture->downloadGoogleSearchAnalytics( $params['domain'],$params['date'], $overrideSettings ); - if( !isset( $params['mode'] ) || $params['mode'] != 'return' ) { - switch( $recordsImported ) { - case -1: - echo "There was an error in authorizing your API connection."; - break; - default: - echo number_format( $recordsImported ) . " records succesfully imported to the database for " . $params['domain'] . " for date: " . $params['date'] . "."; - } - } + if( $params['date'] == '' ) { + $maxDaysBatchImport = 1; + if( defined( 'config::maxDaysBatchImport' ) ) { + $maxDaysBatchImport = min(max(config::maxDaysBatchImport, 1), 999); + } + $googleSearchAnalyticsDates = $dataCapture->checkNeededDataGoogleSearchAnalytics($params['domain']); + if( count( $googleSearchAnalyticsDates['datesWithNoData'] ) > 0 ) { + $i=0; + foreach( $googleSearchAnalyticsDates['datesWithNoData'] as $date ) { + if( $i < $maxDaysBatchImport ) { + echo "\n".$date.': '; + $recordsImported = $dataCapture->downloadGoogleSearchAnalyticsPaged( $params['domain'], $date, $overrideSettings ); + if( !isset( $params['mode'] ) || $params['mode'] != 'return' ) { + switch( $recordsImported ) { + case -1: + echo "There was an error in authorizing your API connection."; + break; + default: + echo number_format( $recordsImported ) . " records succesfully imported to the database for " . $params['domain'] . " for date: " . $params['date'] . "."; + } + } + else { + break; + } + sleep(1); //make sure Google limits are not hit + } + $i++; + } + } + } + else { + $recordsImported = $dataCapture->downloadGoogleSearchAnalyticsPaged( $params['domain'],$params['date'], $overrideSettings ); + if( !isset( $params['mode'] ) || $params['mode'] != 'return' ) { + switch( $recordsImported ) { + case -1: + echo "There was an error in authorizing your API connection."; + break; + default: + echo number_format( $recordsImported ) . " records succesfully imported to the database for " . $params['domain'] . " for date: " . $params['date'] . "."; + } + } + } + + break; case 'bingSearchKeywords': $recordsImported = $dataCapture->downloadBingSearchKeywords($params['domain'],$params['date'], $overrideSettings); diff --git a/organic-search-analytics/inc/code/dataCapture.php b/organic-search-analytics/inc/code/dataCapture.php index aac1ec2..699c620 100644 --- a/organic-search-analytics/inc/code/dataCapture.php +++ b/organic-search-analytics/inc/code/dataCapture.php @@ -64,7 +64,8 @@ private function defaultGoogleSearchAnalyticsSettings() { return array( 'mode' => 'import', /* What to do with the data. Valid options: import, return */ 'dimensions' => $this->getDimensions(), - 'row_limit' => 5000 /* Number of rows to capture from Google. Valid options: 1-5000 */ + 'row_limit' => 5000, /* Number of rows to capture from Google. Valid options: 1-5000 */ + 'start_row' => 0 /* Start rows to capture from Google. Valid options: >=0 */ ); } @@ -204,6 +205,36 @@ public function checkNeededDataGoogleSearchAnalytics($website) { return $returnArray; } + /** + * Request Google Search Analytics API and loop through (max. 20) pages + * + * @param $website String Website URL that is enabled in Google Search Console + * @param $date Date (YYYY-MM-DD) Date for which to request data + * @param $overrides Array Values to override default settings for request + * + * @returns Integer,array Number of records found or var_dump of returned data from Google depending on mode + */ + public function downloadGoogleSearchAnalyticsPaged( $website, $date, $overrides = array() ) { + $params = array_merge( $this->defaultGoogleSearchAnalyticsSettings(), $overrides ); + + $recordsImported = null; + $recordsImportedTotal = 0; + $maxPages = 1; + if( defined( 'config::downloadGoogleMaxPages' ) ) { + $maxPages = max(config::downloadGoogleMaxPages, 1); + } + for ($page = 0; (is_null($recordsImported) || $recordsImported > 0) && $page < $maxPages; $page++) { + $recordsImported = $this->downloadGoogleSearchAnalytics( $website, $date, $overrides ); + if( $recordsImported > 0 ) { + $recordsImportedTotal = $recordsImportedTotal + $recordsImported; + $overrides['start_row'] = $params['row_limit']*($page+1); + } + else { + break; + } + } + return $recordsImportedTotal; + } /** * Request Google Search Analytics API @@ -239,11 +270,13 @@ public function downloadGoogleSearchAnalytics( $website, $date, $overrides = arr /* Build Search Analytics Request */ $searchAnalyticsRequest->setDimensions( $params['dimensions'] ); $searchAnalyticsRequest->setRowLimit( $params['row_limit'] ); /* Valid options: 1-5000 */ + $searchAnalyticsRequest->setStartRow( $params['start_row'] ); /* Valid options: >=0 */ + /* Set date for Search Analytics Request */ $searchAnalyticsRequest->setStartDate( $date ); $searchAnalyticsRequest->setEndDate( $date ); - + if( isset( $params['filters'] ) || isset( $params['groups'] ) ) { $searchAnalyticsDimensionFilterGroup = new Google_Service_Webmasters_ApiDimensionFilterGroup; } @@ -260,7 +293,7 @@ public function downloadGoogleSearchAnalytics( $website, $date, $overrides = arr $searchAnalyticsDimensionFilterGroup->setFilters( $filters ); $searchAnalyticsRequest->setDimensionFilterGroups( array( $searchAnalyticsDimensionFilterGroup ) ); } - + if( isset( $params['groups'] ) ) { /* TODO */ // $dimensionFilterGroups['groups'] = $params['groups'];