Your IP : 216.73.216.54


Current Path : /var/www/html/mediawiki-1.43.1/extensions/SmiteSpam/includes/
Upload File :
Current File : /var/www/html/mediawiki-1.43.1/extensions/SmiteSpam/includes/SmiteSpamAnalyzer.php

<?php
/**
 * The class performing the evaluation for all wiki pages. The run() method is
 * called to check and return a list of probable spam pages.
 */
class SmiteSpamAnalyzer {
	/**
	 * @var array
	 */
	protected $config;

	/**
	 * @param bool $sort
	 */
	public function __construct( $sort = true ) {
		global $wgSmiteSpamCheckers, $wgSmiteSpamThreshold;
		global $wgSmiteSpamIgnorePagesWithNoExternalLinks;
		global $wgSmiteSpamIgnoreSmallPages;
		$this->config = [
			'checkers' => $wgSmiteSpamCheckers,
			'threshold' => $wgSmiteSpamThreshold,
			'ignorePagesWithNoExternalLinks' => $wgSmiteSpamIgnorePagesWithNoExternalLinks,
			'ignoreSmallPages' => $wgSmiteSpamIgnoreSmallPages,
			'sort' => $sort,
		];
	}

	/**
	 * Retrieves a list of pages in the wiki based on the offset and limit
	 * and runs checks on each of them. Pages whose evaluated value exceeds the
	 * threshold defined in the configuration are returned as an array.
	 * @todo Perform DB queries in batches, else prone to timeouts
	 *
	 * @param int $offset
	 * @param int $limit
	 * @return array
	 */
	public function run( $offset = 0, $limit = 500 ) {
		$dbr = SmiteSpamUtils::getReadDB();

		$usersResult = $dbr->select(
			[ 'smitespam_trusted_user' ],
			'trusted_user_id'
		);

		$trustedUsers = [];

		foreach ( $usersResult as $row ) {
			$trustedUsers[] = $row->trusted_user_id;
		}

		$result = $dbr->select(
			[ 'page' ],
			'page_id',
			[
				'page_is_redirect = 0',
			],
			__METHOD__,
			[
				"ORDER BY" => "page_id DESC",
				"OFFSET" => $offset,
				"LIMIT" => $limit,
			]
		);

		$checkers = $this->config['checkers'];

		$spamPages = [];

		foreach ( $result as $row ) {
			$page = new SmiteSpamWikiPage( $row->page_id );
			/** @var TextContent $text */
			$text = $page->getContent();

			if ( !$page || !$page->exists() ) {
				continue;
			}

			if ( $page->getTitle()->getContentModel() !== CONTENT_MODEL_WIKITEXT
				|| !$page->getContent()
				|| !method_exists( $text, 'getText' ) ) {
				// Page does not contain regular wikitext
				// or cannot get content
				continue;
			}

			$creatorID = $page->getCreator()->getId();

			if ( in_array( $creatorID, $trustedUsers ) ) {
				continue;
			}

			if ( $this->config['ignorePagesWithNoExternalLinks']
				&& count( $page->getMetadata( 'externalLinks' ) ) == 0 ) {
				continue;
			}

			if ( $this->config['ignoreSmallPages']
				&& count( $page->getMetadata( 'externalLinks' ) ) == 0
				&& strlen( $page->getMetadata( 'content' ) ) < 500 ) {
				// Ignore small pages with no external links
				continue;
			}

			$value = 0;
			$checkersUsed = 0;
			foreach ( $checkers as $checker => $weight ) {
				$checker = 'SmiteSpam' . $checker . 'Checker';
				$check = new $checker;
				$checkvalue = $check->getValue( $page );
				if ( $checkvalue !== false ) {
					$value += $checkvalue * $weight;
					$checkersUsed++;
				}
			}

			$page->spamProbability = $value / $checkersUsed;
			if ( $page->spamProbability >= $this->config['threshold'] ) {
				$spamPages[] = $page;
			}
		}
		/**
		 * @todo check compatibility of inline function
		 */
		if ( $this->config['sort'] ) {
			usort(
				$spamPages,
				static function ( $pageA, $pageB ) {
					return $pageA->spamProbability < $pageB->spamProbability;
				}
			);
		}
		return $spamPages;
	}
}