* https://www.mediawiki.org/ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html * * @file */ use MediaWiki\MediaWikiServices; use MediaWiki\Session\Session; use MediaWiki\Session\SessionId; use MediaWiki\Session\SessionManager; use Wikimedia\IPUtils; // The point of this class is to be a wrapper around super globals // phpcs:disable MediaWiki.Usage.SuperGlobalsUsage.SuperGlobals /** * The WebRequest class encapsulates getting at data passed in the * URL or via a POSTed form stripping illegal input characters and * normalizing Unicode sequences. * * @ingroup HTTP */ class WebRequest { /** * The parameters from $_GET, $_POST and the path router * @var array */ protected $data; /** * The parameters from $_GET. The parameters from the path router are * added by interpolateTitle() during Setup.php. * @var array */ protected $queryAndPathParams; /** * The parameters from $_GET only. */ protected $queryParams; /** * Lazy-initialized request headers indexed by upper-case header name * @var array */ protected $headers = []; /** * Flag to make WebRequest::getHeader return an array of values. * @since 1.26 */ public const GETHEADER_LIST = 1; /** * The unique request ID. * @var string */ private static $reqId; /** * Lazy-init response object * @var WebResponse */ private $response; /** * Cached client IP address * @var string */ private $ip; /** * The timestamp of the start of the request, with microsecond precision. * @var float */ protected $requestTime; /** * Cached URL protocol * @var string */ protected $protocol; /** * @var SessionId|null Session ID to use for this * request. We can't save the session directly due to reference cycles not * working too well (slow GC). * * TODO: Investigate whether this GC slowness concern (added in a73c5b7395 with regard to * PHP 5.6) still applies in PHP 7.2+. */ protected $sessionId = null; /** @var bool Whether this HTTP request is "safe" (even if it is an HTTP post) */ protected $markedAsSafe = false; /** * @codeCoverageIgnore */ public function __construct() { $this->requestTime = $_SERVER['REQUEST_TIME_FLOAT']; // POST overrides GET data // We don't use $_REQUEST here to avoid interference from cookies... $this->data = $_POST + $_GET; $this->queryAndPathParams = $this->queryParams = $_GET; } /** * Extract relevant query arguments from the http request uri's path * to be merged with the normal php provided query arguments. * Tries to use the REQUEST_URI data if available and parses it * according to the wiki's configuration looking for any known pattern. * * If the REQUEST_URI is not provided we'll fall back on the PATH_INFO * provided by the server if any and use that to set a 'title' parameter. * * @internal This has many odd special cases and so should only be used by * interpolateTitle() for index.php. Instead try getRequestPathSuffix(). * * @param string $want If this is not 'all', then the function * will return an empty array if it determines that the URL is * inside a rewrite path. * * @return array Any query arguments found in path matches. * @throws FatalError If invalid routes are configured (T48998) */ public static function getPathInfo( $want = 'all' ) { // PATH_INFO is mangled due to https://bugs.php.net/bug.php?id=31892 // And also by Apache 2.x, double slashes are converted to single slashes. // So we will use REQUEST_URI if possible. if ( isset( $_SERVER['REQUEST_URI'] ) ) { // Slurp out the path portion to examine... $url = $_SERVER['REQUEST_URI']; if ( !preg_match( '!^https?://!', $url ) ) { $url = 'http://unused' . $url; } $a = parse_url( $url ); if ( !$a ) { return []; } $path = $a['path'] ?? ''; global $wgScript; if ( $path == $wgScript && $want !== 'all' ) { // Script inside a rewrite path? // Abort to keep from breaking... return []; } $router = new PathRouter; // Raw PATH_INFO style $router->add( "$wgScript/$1" ); global $wgArticlePath; if ( $wgArticlePath ) { $router->validateRoute( $wgArticlePath, 'wgArticlePath' ); $router->add( $wgArticlePath ); } global $wgActionPaths; $articlePaths = PathRouter::getActionPaths( $wgActionPaths, $wgArticlePath ); if ( $articlePaths ) { $router->add( $articlePaths, [ 'action' => '$key' ] ); } global $wgVariantArticlePath; if ( $wgVariantArticlePath ) { $router->validateRoute( $wgVariantArticlePath, 'wgVariantArticlePath' ); $router->add( $wgVariantArticlePath, [ 'variant' => '$2' ], [ '$2' => MediaWikiServices::getInstance()->getContentLanguage()-> getVariants() ] ); } Hooks::runner()->onWebRequestPathInfoRouter( $router ); $matches = $router->parse( $path ); } else { global $wgUsePathInfo; $matches = []; if ( $wgUsePathInfo ) { if ( !empty( $_SERVER['ORIG_PATH_INFO'] ) ) { // Mangled PATH_INFO // https://bugs.php.net/bug.php?id=31892 // Also reported when ini_get('cgi.fix_pathinfo')==false $matches['title'] = substr( $_SERVER['ORIG_PATH_INFO'], 1 ); } elseif ( !empty( $_SERVER['PATH_INFO'] ) ) { // Regular old PATH_INFO yay $matches['title'] = substr( $_SERVER['PATH_INFO'], 1 ); } } } return $matches; } /** * If the request URL matches a given base path, extract the path part of * the request URL after that base, and decode escape sequences in it. * * If the request URL does not match, false is returned. * * @since 1.35 * @param string $basePath The base URL path. Trailing slashes will be * stripped. * @return string|false */ public static function getRequestPathSuffix( $basePath ) { $basePath = rtrim( $basePath, '/' ) . '/'; $requestUrl = self::getGlobalRequestURL(); $qpos = strpos( $requestUrl, '?' ); if ( $qpos !== false ) { $requestPath = substr( $requestUrl, 0, $qpos ); } else { $requestPath = $requestUrl; } if ( substr( $requestPath, 0, strlen( $basePath ) ) !== $basePath ) { return false; } return rawurldecode( substr( $requestPath, strlen( $basePath ) ) ); } /** * Work out an appropriate URL prefix containing scheme and host, based on * information detected from $_SERVER * * @return string */ public static function detectServer() { global $wgAssumeProxiesUseDefaultProtocolPorts; $proto = self::detectProtocol(); $stdPort = $proto === 'https' ? 443 : 80; $varNames = [ 'HTTP_HOST', 'SERVER_NAME', 'HOSTNAME', 'SERVER_ADDR' ]; $host = 'localhost'; $port = $stdPort; foreach ( $varNames as $varName ) { if ( !isset( $_SERVER[$varName] ) ) { continue; } $parts = IPUtils::splitHostAndPort( $_SERVER[$varName] ); if ( !$parts ) { // Invalid, do not use continue; } $host = $parts[0]; if ( $wgAssumeProxiesUseDefaultProtocolPorts && isset( $_SERVER['HTTP_X_FORWARDED_PROTO'] ) ) { // T72021: Assume that upstream proxy is running on the default // port based on the protocol. We have no reliable way to determine // the actual port in use upstream. $port = $stdPort; } elseif ( $parts[1] === false ) { if ( isset( $_SERVER['SERVER_PORT'] ) ) { $port = $_SERVER['SERVER_PORT']; } // else leave it as $stdPort } else { $port = $parts[1]; } break; } return $proto . '://' . IPUtils::combineHostAndPort( $host, $port, $stdPort ); } /** * Detect the protocol from $_SERVER. * This is for use prior to Setup.php, when no WebRequest object is available. * At other times, use the non-static function getProtocol(). * * @return string */ public static function detectProtocol() { if ( ( !empty( $_SERVER['HTTPS'] ) && $_SERVER['HTTPS'] !== 'off' ) || ( isset( $_SERVER['HTTP_X_FORWARDED_PROTO'] ) && $_SERVER['HTTP_X_FORWARDED_PROTO'] === 'https' ) ) { return 'https'; } else { return 'http'; } } /** * Get the number of seconds to have elapsed since request start, * in fractional seconds, with microsecond resolution. * * @return float * @since 1.25 */ public function getElapsedTime() { return microtime( true ) - $this->requestTime; } /** * Get the unique request ID. * This is either the value of the UNIQUE_ID envvar (if present) or a * randomly-generated 24-character string. * * @return string * @since 1.27 */ public static function getRequestId() { // This method is called from various error handlers and should be kept simple. if ( !self::$reqId ) { global $wgAllowExternalReqID; $id = $wgAllowExternalReqID ? RequestContext::getMain()->getRequest()->getHeader( 'X-Request-Id' ) : null; if ( !$id ) { $id = $_SERVER['UNIQUE_ID'] ?? wfRandomString( 24 ); } self::$reqId = $id; } return self::$reqId; } /** * Override the unique request ID. This is for sub-requests, such as jobs, * that wish to use the same id but are not part of the same execution context. * * @param string $id * @since 1.27 */ public static function overrideRequestId( $id ) { self::$reqId = $id; } /** * Get the current URL protocol (http or https) * @return string */ public function getProtocol() { if ( $this->protocol === null ) { $this->protocol = self::detectProtocol(); } return $this->protocol; } /** * Check for title, action, and/or variant data in the URL * and interpolate it into the GET variables. * This should only be run after the content language is available, * as we may need the list of language variants to determine * available variant URLs. */ public function interpolateTitle() { // T18019: title interpolation on API queries is useless and sometimes harmful if ( defined( 'MW_API' ) ) { return; } $matches = self::getPathInfo( 'title' ); foreach ( $matches as $key => $val ) { $this->data[$key] = $this->queryAndPathParams[$key] = $val; } } /** * URL rewriting function; tries to extract page title and, * optionally, one other fixed parameter value from a URL path. * * @param string $path The URL path given from the client * @param array $bases One or more URLs, optionally with $1 at the end * @param string|bool $key If provided, the matching key in $bases will be * passed on as the value of this URL parameter * @return array Array of URL variables to interpolate; empty if no match */ public static function extractTitle( $path, $bases, $key = false ) { foreach ( (array)$bases as $keyValue => $base ) { // Find the part after $wgArticlePath $base = str_replace( '$1', '', $base ); $baseLen = strlen( $base ); if ( substr( $path, 0, $baseLen ) == $base ) { $raw = substr( $path, $baseLen ); if ( $raw !== '' ) { $matches = [ 'title' => rawurldecode( $raw ) ]; if ( $key ) { $matches[$key] = $keyValue; } return $matches; } } } return []; } /** * Recursively normalizes UTF-8 strings in the given array. * * @param string|array $data * @return array|string Cleaned-up version of the given * @internal */ public function normalizeUnicode( $data ) { if ( is_array( $data ) ) { foreach ( $data as $key => $val ) { $data[$key] = $this->normalizeUnicode( $val ); } } else { $contLang = MediaWikiServices::getInstance()->getContentLanguage(); $data = $contLang->normalize( $data ); } return $data; } /** * Fetch a value from the given array or return $default if it's not set. * * @param array $arr * @param string $name * @param mixed $default * @return mixed */ private function getGPCVal( $arr, $name, $default ) { # PHP is so nice to not touch input data, except sometimes: # https://www.php.net/variables.external#language.variables.external.dot-in-names # Work around PHP *feature* to avoid *bugs* elsewhere. $name = strtr( $name, '.', '_' ); if ( !isset( $arr[$name] ) ) { return $default; } $data = $arr[$name]; # Optimisation: Skip UTF-8 normalization and legacy transcoding for simple ASCII strings. $isAsciiStr = ( is_string( $data ) && preg_match( '/[^\x20-\x7E]/', $data ) === 0 ); if ( !$isAsciiStr ) { if ( isset( $_GET[$name] ) && is_string( $data ) ) { # Check for alternate/legacy character encoding. $data = MediaWikiServices::getInstance() ->getContentLanguage() ->checkTitleEncoding( $data ); } $data = $this->normalizeUnicode( $data ); } return $data; } /** * Fetch a scalar from the input without normalization, or return $default * if it's not set. * * Unlike self::getVal(), this does not perform any normalization on the * input value. * * @since 1.28 * @param string $name * @param string|null $default * @return string|null */ public function getRawVal( $name, $default = null ) { $name = strtr( $name, '.', '_' ); // See comment in self::getGPCVal() if ( isset( $this->data[$name] ) && !is_array( $this->data[$name] ) ) { $val = $this->data[$name]; } else { $val = $default; } if ( $val === null ) { return $val; } else { return (string)$val; } } /** * Fetch a scalar from the input or return $default if it's not set. * Returns a string. Arrays are discarded. Useful for * non-freeform text inputs (e.g. predefined internal text keys * selected by a drop-down menu). For freeform input, see getText(). * * @param string $name * @param string|null $default Optional default (or null) * @return string|null */ public function getVal( $name, $default = null ) { $val = $this->getGPCVal( $this->data, $name, $default ); if ( is_array( $val ) ) { $val = $default; } if ( $val === null ) { return $val; } else { return (string)$val; } } /** * Set an arbitrary value into our get/post data. * * @param string $key Key name to use * @param mixed $value Value to set * @return mixed Old value if one was present, null otherwise */ public function setVal( $key, $value ) { $ret = $this->data[$key] ?? null; $this->data[$key] = $value; return $ret; } /** * Unset an arbitrary value from our get/post data. * * @param string $key Key name to use * @return mixed Old value if one was present, null otherwise */ public function unsetVal( $key ) { if ( !isset( $this->data[$key] ) ) { $ret = null; } else { $ret = $this->data[$key]; unset( $this->data[$key] ); } return $ret; } /** * Fetch an array from the input or return $default if it's not set. * If source was scalar, will return an array with a single element. * If no source and no default, returns null. * * @param string $name * @param array|null $default Optional default (or null) * @return array|null */ public function getArray( $name, $default = null ) { $val = $this->getGPCVal( $this->data, $name, $default ); if ( $val === null ) { return null; } else { return (array)$val; } } /** * Fetch an array of integers, or return $default if it's not set. * If source was scalar, will return an array with a single element. * If no source and no default, returns null. * If an array is returned, contents are guaranteed to be integers. * * @param string $name * @param array|null $default Option default (or null) * @return int[]|null */ public function getIntArray( $name, $default = null ) { $val = $this->getArray( $name, $default ); if ( is_array( $val ) ) { $val = array_map( 'intval', $val ); } return $val; } /** * Fetch an integer value from the input or return $default if not set. * Guaranteed to return an integer; non-numeric input will typically * return 0. * * @param string $name * @param int $default * @return int */ public function getInt( $name, $default = 0 ) { return intval( $this->getRawVal( $name, $default ) ); } /** * Fetch an integer value from the input or return null if empty. * Guaranteed to return an integer or null; non-numeric input will * typically return null. * * @param string $name * @return int|null */ public function getIntOrNull( $name ) { $val = $this->getRawVal( $name ); return is_numeric( $val ) ? intval( $val ) : null; } /** * Fetch a floating point value from the input or return $default if not set. * Guaranteed to return a float; non-numeric input will typically * return 0. * * @since 1.23 * @param string $name * @param float $default * @return float */ public function getFloat( $name, $default = 0.0 ) { return floatval( $this->getRawVal( $name, $default ) ); } /** * Fetch a boolean value from the input or return $default if not set. * Guaranteed to return true or false, with normal PHP semantics for * boolean interpretation of strings. * * @param string $name * @param bool $default * @return bool */ public function getBool( $name, $default = false ) { return (bool)$this->getRawVal( $name, $default ); } /** * Fetch a boolean value from the input or return $default if not set. * Unlike getBool, the string "false" will result in boolean false, which is * useful when interpreting information sent from JavaScript. * * @param string $name * @param bool $default * @return bool */ public function getFuzzyBool( $name, $default = false ) { return $this->getBool( $name, $default ) && strcasecmp( $this->getRawVal( $name ), 'false' ) !== 0; } /** * Return true if the named value is set in the input, whatever that * value is (even "0"). Return false if the named value is not set. * Example use is checking for the presence of check boxes in forms. * * @param string $name * @return bool */ public function getCheck( $name ) { # Checkboxes and buttons are only present when clicked # Presence connotes truth, absence false return $this->getRawVal( $name, null ) !== null; } /** * Fetch a text string from the given array or return $default if it's not * set. Carriage returns are stripped from the text. This should generally * be used for form "