Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 110 additions & 40 deletions jw-shortcode-scraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,34 @@ class Shortcode_Scraper extends WP_CLI_Command {
private $assoc_args = [];
private $progress_bar;

/**
* List of shortcodes to be ignored.
*
* @var array
*/
private $ignore_codes = [];

/**
* List of shortcodes found.
*
* @var array
*/
private $codes_found = [];

/**
* Scrapes post content and provides a list of shortcodes that are in use.
*
* # OPTIONS
*
* [--ignore=<comma-separated-shortcodes>]
* : Ignores specific shortcodes.
*
* [--min-name=<minimum-length-of-shortcode>]
* : Minimum of characters that a shortcode should have.
*
* [--unique-list]
* : Get a unique list of shortcodes.
*
* [--export]
* : Exports the results to a CSV file.
*
Expand All @@ -31,16 +54,22 @@ public function scrape( $args, $assoc_args ) {
$this->args = $args;
$this->assoc_args = $assoc_args;

if ( ! empty( $this->assoc_args['ignore'] ) ) {
$this->ignore_codes = explode( ',', $this->assoc_args['ignore'] );
}

// Rather or not to export the results.
$export = isset( $assoc_args['export'] );

$this->site = isset( $assoc_args['site'] ) ? $assoc_args['site'] : false;
if ( $this->site ) {
$blog_details = get_blog_details( $this->site );
if ( ! $blog_details ) {
WP_CLI::error( sprintf( 'Getting blog details for %s failed', $this->site ) );
if ( is_multisite() ) {
$this->site = isset( $assoc_args['site'] ) ? $assoc_args['site'] : false;
if ( $this->site ) {
$blog_details = get_blog_details( $this->site );
if ( ! $blog_details ) {
WP_CLI::error( sprintf( 'Getting blog details for %s failed', $this->site ) );
}
switch_to_blog( $blog_details->blog_id );
}
switch_to_blog( $blog_details->blog_id );
}

// Use these to store some arrays.
Expand All @@ -49,23 +78,31 @@ public function scrape( $args, $assoc_args ) {
// Process the post results.
$multiplier = 0;

while ( $posts = $this->query_posts( $multiplier ) ) {
// Start progress bar and get total posts.
$this->progress_bar( $this->query_posts( 0, true ), 'posts', 'Processing' );

$this->progress_bar( count( $posts ), 'Post Objects', 'Processing' );
while ( $posts = $this->query_posts( $multiplier ) ) {

$this->process_posts( $posts, $results );

$this->progress_bar( 'finish' );

$multiplier ++;
};

WP_CLI\Utils\format_items( 'table', $results, array(
'post_id',
'post_name',
'shortcode',
'parameters_raw',
) );
$this->progress_bar( 'finish' );

if ( $this->assoc_args['unique-list'] ) {
WP_CLI\Utils\format_items( 'table', $this->codes_found, array(
'shortcode',
'count',
) );
} else {
WP_CLI\Utils\format_items( 'table', $results, array(
'post_id',
'post_name',
'shortcode',
'parameters_raw',
) );
}

if ( $export ) {
try {
Expand All @@ -90,38 +127,57 @@ public function scrape( $args, $assoc_args ) {
/**
* A simple SQL loopable query for pages and posts.
*
* @param int $multiplier The multiplier for batch processing
* @param int $multiplier The multiplier for batch processing.
* @param bool $get_total Get total number of posts.
*
* @return false|array Array of objects on success, null otherwise.
*/
private function query_posts( $multiplier = 0 ) {
private function query_posts( $multiplier = 0, $get_total = false ) {
global $wpdb;

$sql_query = "
SELECT ID,post_name,post_content
FROM {$wpdb->posts}
WHERE post_status = %s
AND post_type IN ( %s, %s )
LIMIT 100 OFFSET %d
";

$sql_query = $wpdb->prepare( $sql_query, 'publish', 'post', 'page', ( $multiplier * 100 ) ); // @codingStandardsIgnoreLine Code is provided above.
$result_set = $wpdb->get_results( $sql_query );

return count( $result_set ) > 0 ? $result_set : false; // @codingStandardsIgnoreLine Code is sanitized above.
if ( true === $get_total ) {
$sql_query = "
SELECT COUNT(*) as total
FROM {$wpdb->posts}
WHERE post_status = %s
AND post_type IN ( %s, %s )
";

$sql_query = $wpdb->prepare( $sql_query, 'publish', 'post', 'page' ); // @codingStandardsIgnoreLine Code is provided above.
$result_set = $wpdb->get_results( $sql_query );

return count( $result_set ) > 0 ? $result_set[0]->total : 0; // @codingStandardsIgnoreLine Code is sanitized above.
} else {
$sql_query = "
SELECT ID,post_name,post_content
FROM {$wpdb->posts}
WHERE post_status = %s
AND post_type IN ( %s, %s )
LIMIT 100 OFFSET %d
";

$sql_query = $wpdb->prepare( $sql_query, 'publish', 'post', 'page', ( $multiplier * 100 ) ); // @codingStandardsIgnoreLine Code is provided above.
$result_set = $wpdb->get_results( $sql_query );

return count( $result_set ) > 0 ? $result_set : false; // @codingStandardsIgnoreLine Code is sanitized above.
}
}

/**
* Processes posts queried by the database.
*
* @param array $posts Array of post objects
* @param array $posts Array of post objects.
* @param array $results Passing by reference, an array of results to append to/modify.
*
* @return void
*/
private function process_posts( $posts, &$results ) {

$regex = '/\[([a-zA-Z0-9_-]+) ?([^\]]+)?/';
$shortcode_length = '+'; // any length.
if ( ! empty( $this->assoc_args['min-name'] ) ) {
$shortcode_length = '{' . (int) $this->assoc_args['min-name'] . ',}';
}
$regex = '/\[([a-zA-Z0-9_-]' . $shortcode_length . ') ?([^\]]+)?/';

foreach ( $posts as $post_data ) {

Expand Down Expand Up @@ -164,9 +220,23 @@ private function process_posts( $posts, &$results ) {
unset( $shortcode_strings[ $fake ], $shortcode_parameters[ $fake ] );
}

// Combine them
// Combine them.
$codes = [];

foreach ( $shortcode_strings as $key => $shortcode_string ) {
// check if current code should be ignored.
if ( ! empty( $this->ignore_codes ) && in_array( $shortcode_string, $this->ignore_codes, true ) ) {
continue;
}
// add code to the codes found list.
if ( ! array_key_exists( $shortcode_string, $this->codes_found ) ) {
$this->codes_found[ $shortcode_string ] = array(
'shortcode' => $shortcode_string,
'count' => 1,
);
} else {
$this->codes_found[ $shortcode_string ]['count']++;
}
$codes[] = [
'shortcode' => $shortcode_string,
'values' => $shortcode_parameters[ $key ],
Expand All @@ -180,16 +250,16 @@ private function process_posts( $posts, &$results ) {
'shortcode' => $code['shortcode'],
'post_name' => $post_data->post_name,
'parameters' => $this->format_params( $code['values'] ),
'parameters_raw' => json_encode( $code['values'] ),
'parameters_raw' => wp_json_encode( $code['values'] ),
);
}
} // End foreach().
}
}

/**
* Formats parameters into a readable output for the CSV.
*
* @param string $parameters
* @param string $parameters Formats parameter.
*
* @return string
*/
Expand All @@ -214,18 +284,18 @@ private function format_params( $parameters = '' ) {
* Wrapper function for WP_CLI Progress bar
*
* @param int|string $param If integer, start progress bar, if string, should be tick or finish.
* @param string $object_type Type of object being traversed
* @param string $action Action being performed
* @param string $object_type Type of object being traversed.
* @param string $action Action being performed.
*
* @return bool|object False on failure, WP_CLI progress bar object otherwise.
*/
private function progress_bar( $param, $object_type = '', $action = 'Migrating' ) {

if ( $param && is_numeric( $param ) ) {
$this->progress_bar = \WP_CLI\Utils\make_progress_bar( "$action $param $object_type.", $param );
} elseif ( ( $this->progress_bar && 'tick' == $param ) && method_exists( $this->progress_bar, 'tick' ) ) {
} elseif ( ( $this->progress_bar && 'tick' === $param ) && method_exists( $this->progress_bar, 'tick' ) ) {
$this->progress_bar->tick();
} elseif ( ( $this->progress_bar && 'finish' == $param ) && method_exists( $this->progress_bar, 'finish' ) ) {
} elseif ( ( $this->progress_bar && 'finish' === $param ) && method_exists( $this->progress_bar, 'finish' ) ) {
$this->progress_bar->finish();
}

Expand Down