diff --git a/conf/default.php b/conf/default.php index 4ea93b8..32a742b 100644 --- a/conf/default.php +++ b/conf/default.php @@ -1,13 +1,11 @@ */ -$conf['minimum_word_length'] = 2; // Minimum world length of words inside the cloud +$conf['minimum_word_length'] = 2; // Minimum word length of words inside the cloud $conf['search_blacklist'] = ''; // Specify search words which shouldn't appear on pages $conf['word_blacklist'] = ''; // Specify words which shouldn't appear on pages $conf['tag_blacklist'] = ''; // Specify tags which shouldn't appear on pages -$conf['list_tags_of_subns'] = 0; // list also tags in subnamespaces of a specified namespace - -//Setup VIM: ex: et ts=2 : +$conf['list_tags_of_subns'] = 1; // list also tags in subnamespaces of a specified namespace diff --git a/conf/metadata.php b/conf/metadata.php index ec69ee9..d29bf2f 100644 --- a/conf/metadata.php +++ b/conf/metadata.php @@ -12,4 +12,4 @@ $meta['tag_blacklist'] = array('string'); $meta['list_tags_of_subns'] = array('onoff'); -//Setup VIM: ex: et ts=2 : +//Setup VIM: ex: et ts=4 enc=utf-8 : diff --git a/syntax.php b/syntax.php index ea63f0c..72cf6c2 100644 --- a/syntax.php +++ b/syntax.php @@ -1,164 +1,168 @@ */ - -use dokuwiki\File\PageResolver; - -class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin { - protected $knownFlags = array('showCount'); +class syntax_plugin_cloud extends DokuWiki_Syntax_Plugin +{ protected $stopwords = null; - /** - * Constructor. Loads stopwords. - */ - public function __construct() { - $this->stopwords = $this->_getStopwords(); + public function getType() + { + return 'substition'; } - function getType() { return 'substition'; } - function getPType() { return 'block'; } - function getSort() { return 98; } + public function getPType() + { + return 'block'; + } + + public function getSort() + { + return 98; + } - function connectTo($mode) { + /** + * Connect pattern to lexer + */ + public function connectTo($mode) + { $this->Lexer->addSpecialPattern('~~\w*?CLOUD.*?~~', $mode, 'plugin_cloud'); } - function handle($match, $state, $pos, Doku_Handler $handler) { + /** + * Handle the match + */ + public function handle($match, $state, $pos, Doku_Handler $handler) + { $match = substr($match, 2, -2); // strip markup - if (substr($match, 0, 3) == 'TAG') { + list($prefix, $params) = explode('CLOUD', $match, 2); + if ($prefix === '') { + $type = 'word'; + } elseif ($prefix === 'TAG') { $type = 'tag'; - } elseif (substr($match, 0, 6) == 'SEARCH') { + } elseif ($prefix === 'SEARCH') { $type = 'search'; } else { - $type = 'word'; + return false; } - // Ensure we always have 2 entries in the exploded array - list($num, $ns) = array_pad(explode('>', $match, 2), 2, ''); - list($junk, $num) = array_pad(explode(':', $num, 2), 2, ''); - - $flags = [ - 'showCount' => false, - ]; - if (preg_match('/\[.*]/', $junk, $matches) === 1) { - $matches = trim($matches[0], '[]'); - $found = explode(',', $matches); - $flags = array(); - foreach ($found as $flag) { - if (in_array($flag, $this->knownFlags)) { - // Actually we just set flags as present - // Later we might add values to flags like key=value pairs - $flags [$flag] = true; - } - } - } + list($params, $ns) = explode('>', $params, 2); + $namespaces = isset($ns) ? array_map('trim', explode('|', $ns)) : []; - if (!is_numeric($num)) $num = 50; - if(!is_null($ns)) $namespaces = explode('|', $ns); - else $namespaces = null; + list($options, $num) = explode(':', $params, 2); + $num = (isset($num) && is_numeric($num)) ? ($num + 0) : 50; - return array($type, $num, $namespaces, $flags); + $flags = []; + $found = array_map('trim', explode(',', substr($options, 1, -1))); + foreach ($found as $flag) { + // Actually we just set flags as present + // Later we might add values to flags like key=value pairs + $flags[$flag] = true; + } + + return [$type, $num, $namespaces, $flags]; } - function render($format, Doku_Renderer $renderer, $data) { + /** + * Create output + */ + public function render($format, Doku_Renderer $renderer, $data) + { global $conf; - list($type, $num, $namespaces, $flags) = $data; - if ($format == 'xhtml') { + if ($format != 'xhtml') return false; - if ($type == 'tag') { // we need the tag helper plugin - /** @var helper_plugin_tag $tag */ - if (plugin_isdisabled('tag') || (!$tag = plugin_load('helper', 'tag'))) { + list($type, $num, $namespaces, $flags) = $data; + switch ($type) { + case 'tag': // require tag plugin + $cloud = $this->getTagCloud($num, $min, $max, $namespaces); + if ($cloud === false) { msg('The Tag Plugin must be installed to display tag clouds.', -1); return false; } - $cloud = $this->_getTagCloud($num, $min, $max, $namespaces, $tag); - } elseif($type == 'search') { - /** @var helper_plugin_searchstats $helper */ - $helper = plugin_load('helper', 'searchstats'); - if($helper) { - $cloud = $helper->getSearchWordArray($num); - $this->_filterCloud($cloud, 'search_blacklist'); - // calculate min/max values - $min = PHP_INT_MAX; - $max = 0; - foreach ($cloud as $size) { - $min = min($size, $min); - $max = max($size, $max); - } - } else { + break; + case 'search': // require searchstats plugin + $cloud = $this->getSearchCloud($num, $min, $max); + if ($cloud === false) { msg('You have to install the searchstats plugin to use this feature.', -1); return false; } - } else { - $cloud = $this->_getWordCloud($num, $min, $max); - } - if (!is_array($cloud) || empty($cloud)) return false; - $delta = ($max-$min)/16; - - // prevent caching to ensure the included pages are always fresh - $renderer->nocache(); - - // and render the cloud - $renderer->doc .= '
'.DOKU_LF; - foreach ($cloud as $word => $size) { - if ($size < $min+round($delta)) $class = 'cloud1'; - elseif ($size < $min+round(2*$delta)) $class = 'cloud2'; - elseif ($size < $min+round(4*$delta)) $class = 'cloud3'; - elseif ($size < $min+round(8*$delta)) $class = 'cloud4'; - else $class = 'cloud5'; - - $name = $word; - if ($type == 'tag' && isset($tag)) { - if (class_exists('dokuwiki\File\PageResolver')) { - // Compatibility with tag plugin < 2022-09-30 - $ns = method_exists($tag, 'getNamespace') ? $tag->getNamespace() : $tag->namespace; - $resolver = new PageResolver($ns . ':'); - $page = $resolver->resolveId($word); - $exists = page_exists($page); - } else { - // Compatibility with Hogfather and older - $page = $word; - resolve_pageid($tag->namespace, $page, $exists); - } - if($exists) { - $link = wl($page); - if($conf['useheading']) { - $name = p_get_first_heading($page, false); - if (empty($name)) { - $name = $word; - } + break; + default: + $cloud = $this->getWordCloud($num, $min, $max); + } + if (!is_array($cloud) || empty($cloud)) return false; + + // prevent caching to ensure the included pages are always fresh + $renderer->nocache(); + + // and render the cloud + $renderer->doc .= '
'; + $delta = ($max - $min) / 16; + foreach ($cloud as $word => $size) { + if ($size < $min + round($delta)) $class = 'cloud1'; + elseif ($size < $min + round(2 * $delta)) $class = 'cloud2'; + elseif ($size < $min + round(4 * $delta)) $class = 'cloud3'; + elseif ($size < $min + round(8 * $delta)) $class = 'cloud4'; + else $class = 'cloud5'; + + $name = $word; + if ($type == 'tag') { + /** @var helper_plugin_tag $tag */ + isset($tag) || $tag = $this->loadHelper('tag', true); + + $ns = method_exists($tag, 'getNamespace') ? $tag->getNamespace() : $tag->namespace; + if (class_exists('dokuwiki\File\PageResolver')) { + // Compatibility with tag plugin < 2022-09-30 + $resolver = new PageResolver($ns . ':'); + $id = $resolver->resolveId($word); + $exists = page_exists($id); + } else { + // Compatibility with Hogfather and older + $id = $word; + $exists = false; + resolve_pageID($ns, $id, $exists); + } + + if ($exists) { + $link = wl($id); + if ($conf['useheading']) { + $name = p_get_first_heading($id, false); + if (blank($name)) { + $name = $word; } - $class .= '_tag1'; - } else { - $link = wl($word, array('do'=>'showtag', 'tag'=>$word)); - $class .= '_tag2'; } - $title = $word; } else { - if($conf['userewrite'] == 2) { - $link = wl($word, array('do'=>'search', 'id'=>$word)); - } else { - $link = wl($word, 'do=search'); - } - $title = $size; + $link = wl($id, ['do' => 'showtag', 'tag' => $word]); } - - if ($flags['showCount']) { - $name .= '('.$size.')'; + $title = $word; + $class .= ($exists ? '_tag1' : '_tag2'); + } else { + if ($conf['userewrite'] == 2) { + $link = wl($word, ['do' => 'search', 'id' => $word]); + } else { + $link = wl($word, 'do=search'); } - $renderer->doc .= DOKU_TAB . '' . hsc($name) . '' . DOKU_LF; + $title = $size; } - $renderer->doc .= '
' . DOKU_LF; - return true; + + if (array_key_exists('showCount', $flags) && $flags['showCount'] === true) { + $name .= '(' . $size . ')'; + } + $renderer->doc .= '' . hsc($name) . ' '; } - return false; + $renderer->doc .= '
'; + return true; } /** @@ -169,131 +173,199 @@ function render($format, Doku_Renderer $renderer, $data) { * - conf/stopwords.txt * * If both files exists, then both files are used - the content is merged. + * + * @return array list of stop words */ - protected function _getStopwords() { - global $conf; - // load stopwords - $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt'; - if (@file_exists($swfile)) $stopwords = file($swfile, FILE_IGNORE_NEW_LINES); - else $stopwords = array(); - - // load extra local stopwords - $swfile = DOKU_CONF.'stopwords.txt'; - if (@file_exists($swfile)) $stopwords = array_merge($stopwords, file($swfile, FILE_IGNORE_NEW_LINES)); + protected function getStopwords() + { + if ($this->stopwords === null) { + // load DokuWiki stopwords + if (is_callable('dokuwiki\Search\Tokenizer::getInstance')) { + $this->stopwords = dokuwiki\Search\Tokenizer::getInstance()->getStopwords(); + } else { + $this->stopwords = idx_get_stopwords(); + } - if (count($stopwords) == 0) { - return null; + // load extra local stopwords + $swfile = DOKU_CONF . 'stopwords.txt'; + if (file_exists($swfile)) { + $this->stopwords = array_merge( + $this->stopwords, + file($swfile, FILE_IGNORE_NEW_LINES) + ); + } } - - return $stopwords; + return $this->stopwords; } /** * Applies filters on the cloud: * - removes all short words, see config option 'minimum_word_length' - * - removes all words in configured blacklist $balcklistName from $cloud array + * - removes all words in configured blacklist $blacklistName from $cloud array + * + * @param array $cloud array(word=>count) + * @param string $blacklistName config setting name */ - function _filterCloud(&$cloud, $balcklistName) { - // Remove to short words + protected function filterCloud(&$cloud, $blacklistName) + { + // Remove short words $min = $this->getConf('minimum_word_length'); - foreach ($cloud as $key => $count) { - if (iconv_strlen($key) < $min) - unset($cloud[$key]); + if (is_callable('dokuwiki\Utf8\PhpString::strlen')) { + foreach ($cloud as $key => $count) { + if (Utf8\PhpString::strlen($key) < $min) { + unset($cloud[$key]); + } + } + } else { + foreach ($cloud as $key => $count) { + if (utf8_strlen($key) < $min) { + unset($cloud[$key]); + } + } } // Remove stopwords - if ($this->stopwords != null) - { - foreach ($this->stopwords as $word) { - if (isset($cloud[$word])) - unset($cloud[$word]); + foreach ($this->getStopwords() as $word) { + if (isset($cloud[$word])) { + unset($cloud[$word]); } } // Remove word which are on the blacklist - $blacklist = $this->getConf($balcklistName); - if(!empty($blacklist)) { - $blacklist = explode(',', $blacklist); - $blacklist = str_replace(' ', '', $blacklist); // remove spaces - + $blacklist = $this->getConf($blacklistName); + if (!empty($blacklist)) { + $blacklist = array_map('trim', explode(',', $blacklist)); foreach ($blacklist as $word) { - if (isset($cloud[$word])) + if (isset($cloud[$word])) { unset($cloud[$word]); + } } } } /** * Returns the sorted word cloud array + * + * @param int $num number of words shown in cloud + * @param int $min lowest shown count + * @param int $max highest shown count + * @return array(word=>count) */ - function _getWordCloud($num, &$min, &$max) { - global $conf; - - $cloud = array(); - - if (@file_exists($conf['indexdir'].'/page.idx')) { // new word-length based index - require_once(DOKU_INC.'inc/indexer.php'); - + protected function getWordCloud($num, &$min, &$max) + { + $cloud = []; + + if (is_callable('dokuwiki\Search\FulltextIndex::getInstance')) { + $FulltextIndex = dokuwiki\Search\FulltextIndex::getInstance(); + $lengths = $FulltextIndex->getIndexLengths(0); + $funcGetIndex = [$FulltextIndex, 'getIndex']; + } else { $lengths = idx_indexLengths(0); - foreach ($lengths as $len) { - $idx = idx_getIndex('i', $len); - $word_idx = idx_getIndex('w', $len); - - $this->_addWordsToCloud($cloud, $idx, $word_idx); - } - - } else { // old index - $idx = file($conf['cachedir'].'/index.idx'); - $word_idx = file($conf['cachedir'].'/word.idx'); + $funcGetIndex = 'idx_getIndex'; + } - $this->_addWordsToCloud($cloud, $idx, $word_idx); + foreach ($lengths as $len) { + $this->addWordsToCloud($cloud, $funcGetIndex('i', $len), $funcGetIndex('w', $len)); } - $this->_filterCloud($cloud, 'word_blacklist'); + $this->filterCloud($cloud, 'word_blacklist'); - return $this->_sortCloud($cloud, $num, $min, $max); + return $this->sortCloud($cloud, $num, $min, $max); } /** * Adds all words in given index as $word => $freq to $cloud array + * + * @param array $cloud array(word=>count) + * @param array $idx list with per page the frequency of each word in $word_idx + * @param array $word_idx list with words of same length */ - function _addWordsToCloud(&$cloud, $idx, $word_idx) { + protected function addWordsToCloud(&$cloud, $idx, $word_idx) + { $wcount = count($word_idx); // collect the frequency of the words for ($i = 0; $i < $wcount; $i++) { $key = trim($word_idx[$i]); - $value = explode(':', $idx[$i]); - if (!trim($value[0])) continue; - $cloud[$key] = count($value); + $pages = explode(':', $idx[$i]); + if (!trim($pages[0])) continue; + $cloud[$key] = count($pages); } } /** * Returns the sorted tag cloud array + * + * @param int $num number of words shown in the cloud + * @param int $min lowest shown count + * @param int $max highest shown count + * @param array $namespaces array of namespaces where to count the tags + * @return false|array(word=>count) */ - function _getTagCloud($num, &$min, &$max, $namespaces, helper_plugin_tag $tag) { - $cloud = $tag->tagOccurrences([], $namespaces, true, $this->getConf('list_tags_of_subns')); + protected function getTagCloud($num, &$min, &$max, $namespaces) + { + if (!plugin_isdisabled('tag')) { + /** @var helper_plugin_tag $tag */ + $tag = $this->loadHelper('tag', true); + $cloud = $tag->tagOccurrences([], $namespaces, true, $this->getConf('list_tags_of_subns')); + $this->filterCloud($cloud, 'tag_blacklist'); + } else { + return false; + } + return $this->sortCloud($cloud, $num, $min, $max); + } - $this->_filterCloud($cloud, 'tag_blacklist'); + /** + * Returns the search cloud array + * + * @param int $num number of words shown in the cloud + * @param int $min lowest shown count + * @param int $max highest shown count + * @return array|false + */ + protected function getSearchCloud($num, &$min, &$max) + { + if (!plugin_isdisabled('searchstats')) { + /** @var helper_plugin_searchstats $helper */ + $helper = $this->loadHelper('searchstats', true); + $cloud = $helper->getSearchWordArray($num); + $this->filterCloud($cloud, 'search_blacklist'); + } else { + return false; + } - return $this->_sortCloud($cloud, $num, $min, $max); + // calculate min/max values + $min = PHP_INT_MAX; + $max = 0; + foreach ($cloud as $size) { + $min = min($size, $min); + $max = max($size, $max); + } + return $cloud; } /** * Sorts and slices the cloud + * + * @param array $cloud array(word=>count) + * @param int $num number of words shown in the cloud + * @param int $min lowest shown count + * @param int $max highest shown count + * @return array(word=>count) */ - function _sortCloud($cloud, $num, &$min, &$max) { - if(empty($cloud)) return $cloud; + protected function sortCloud($cloud, $num, &$min, &$max) + { + if (empty($cloud)) { + return $cloud; + } // sort by frequency, then alphabetically - arsort($cloud); + arsort($cloud, SORT_NUMERIC); $cloud = array_chunk($cloud, $num, true); $max = current($cloud[0]); $min = end($cloud[0]); - ksort($cloud[0]); + Sort::ksort($cloud[0]); return $cloud[0]; } } -// vim:ts=4:sw=4:et: