<?php use MediaWiki\MediaWikiServices; /** * Custom job to perform updates on tables in busier environments * * Job parameters include: * - table : DB table to update * - column : The *_user_text column to update * - oldname : The old user name * - newname : The new user name * - count : The expected number of rows to update in this batch * - logId : The ID of the logging table row expected to exist if the rename was committed * * Additionally, one of the following groups of parameters must be set: * a) The timestamp based rename paramaters: * - timestampColumn : The *_timestamp column * - minTimestamp : The minimum bound of the timestamp column range for this batch * - maxTimestamp : The maximum bound of the timestamp column range for this batch * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY) [optional] * b) The unique key based rename paramaters: * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY) * - keyId : A list of values for this column to determine rows to update for this batch * * To avoid some race conditions, the following parameters should be set: * - userID : The ID of the user to update * - uidColumn : The *_user_id column */ class RenameUserJob extends Job { /** @var array Core tables+columns that are being migrated to the `actor` table */ private static $actorMigratedColumns = [ 'revision.rev_user_text', 'archive.ar_user_text', 'ipblocks.ipb_by_text', 'image.img_user_text', 'oldimage.oi_user_text', 'filearchive.fa_user_text', 'recentchanges.rc_user_text', 'logging.log_user_text', ]; public function __construct( Title $title, $params = [] ) { parent::__construct( 'renameUser', $title, $params ); } public function run() { global $wgUpdateRowsPerQuery; $dbw = wfGetDB( DB_MASTER ); $table = $this->params['table']; $column = $this->params['column']; // Skip core tables that were migrated to the actor table, even if the // field still exists in the database. if ( in_array( "$table.$column", self::$actorMigratedColumns, true ) ) { if ( !RenameuserSQL::actorMigrationWriteOld() ) { wfDebugLog( 'Renameuser', "Ignoring job {$this->toString()}, column $table.$column " . "actor migration stage lacks WRITE_OLD\n" ); return true; } } // It's not worth a hook to let extensions add themselves to that list. // Just check whether the table and column still exist instead. if ( !$dbw->tableExists( $table, __METHOD__ ) ) { wfDebugLog( 'Renameuser', "Ignoring job {$this->toString()}, table $table does not exist\n" ); return true; } elseif ( !$dbw->fieldExists( $table, $column, __METHOD__ ) ) { wfDebugLog( 'Renameuser', "Ignoring job {$this->toString()}, column $table.$column does not exist\n" ); return true; } $oldname = $this->params['oldname']; $newname = $this->params['newname']; $count = $this->params['count']; if ( isset( $this->params['userID'] ) ) { $userID = $this->params['userID']; $uidColumn = $this->params['uidColumn']; } else { $userID = null; $uidColumn = null; } if ( isset( $this->params['timestampColumn'] ) ) { $timestampColumn = $this->params['timestampColumn']; $minTimestamp = $this->params['minTimestamp']; $maxTimestamp = $this->params['maxTimestamp']; } else { $timestampColumn = null; $minTimestamp = null; $maxTimestamp = null; } $uniqueKey = $this->params['uniqueKey'] ?? null; $keyId = $this->params['keyId'] ?? null; $logId = $this->params['logId'] ?? null; if ( $logId ) { # Block until the transaction that inserted this job commits. # The atomic section is for sanity as FOR UPDATE does not lock in auto-commit mode # per http://dev.mysql.com/doc/refman/5.7/en/innodb-locking-reads.html. $dbw->startAtomic( __METHOD__ ); $committed = $dbw->selectField( 'logging', '1', [ 'log_id' => $logId ], __METHOD__, [ 'FOR UPDATE' ] ); $dbw->endAtomic( __METHOD__ ); # If the transaction inserting this job was rolled back, detect that if ( $committed === false ) { // rollback happened? throw new LogicException( 'Cannot run job if the account rename failed.' ); } } # Flush any state snapshot data (and release the lock above) $dbw->commit( __METHOD__, 'flush' ); # Conditions like "*_user_text = 'x' $conds = [ $column => $oldname ]; # If user ID given, add that to condition to avoid rename collisions if ( $userID !== null ) { $conds[$uidColumn] = $userID; } # Bound by timestamp if given if ( $timestampColumn !== null ) { $conds[] = "$timestampColumn >= " . $dbw->addQuotes( $minTimestamp ); $conds[] = "$timestampColumn <= " . $dbw->addQuotes( $maxTimestamp ); # Bound by unique key if given (B/C) } elseif ( $uniqueKey !== null && $keyId !== null ) { $conds[$uniqueKey] = $keyId; } else { throw new InvalidArgumentException( 'Expected ID batch or time range' ); } $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); $affectedCount = 0; # Actually update the rows for this job... if ( $uniqueKey !== null ) { # Select the rows to update by PRIMARY KEY $ids = $dbw->selectFieldValues( $table, $uniqueKey, $conds, __METHOD__ ); # Update these rows by PRIMARY KEY to avoid replica lag foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) { $dbw->commit( __METHOD__, 'flush' ); $lbFactory->waitForReplication(); $dbw->update( $table, [ $column => $newname ], [ $column => $oldname, $uniqueKey => $batch ], __METHOD__ ); $affectedCount += $dbw->affectedRows(); } } else { # Update the chunk of rows directly $dbw->update( $table, [ $column => $newname ], $conds, __METHOD__ ); $affectedCount += $dbw->affectedRows(); } # Special case: revisions may be deleted while renaming... if ( $affectedCount < $count && $table === 'revision' && $timestampColumn !== null ) { # If some revisions were not renamed, they may have been deleted. # Do a pass on the archive table to get these straglers... $ids = $dbw->selectFieldValues( 'archive', 'ar_id', [ 'ar_user_text' => $oldname, 'ar_user' => $userID, // No user,rev_id index, so use timestamp to bound // the rows. This can use the user,timestamp index. "ar_timestamp >= '$minTimestamp'", "ar_timestamp <= '$maxTimestamp'" ], __METHOD__ ); foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) { $dbw->commit( __METHOD__, 'flush' ); $lbFactory->waitForReplication(); $dbw->update( 'archive', [ 'ar_user_text' => $newname ], [ 'ar_user_text' => $oldname, 'ar_id' => $batch ], __METHOD__ ); } } # Special case: revisions may be restored while renaming... if ( $affectedCount < $count && $table === 'archive' && $timestampColumn !== null ) { # If some revisions were not renamed, they may have been restored. # Do a pass on the revision table to get these straglers... $ids = $dbw->selectFieldValues( 'revision', 'rev_id', [ 'rev_user_text' => $oldname, 'rev_user' => $userID, // No user,rev_id index, so use timestamp to bound // the rows. This can use the user,timestamp index. "rev_timestamp >= '$minTimestamp'", "rev_timestamp <= '$maxTimestamp'" ], __METHOD__ ); foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as $batch ) { $dbw->commit( __METHOD__, 'flush' ); $lbFactory->waitForReplication(); $dbw->update( 'revision', [ 'rev_user_text' => $newname ], [ 'rev_user_text' => $oldname, 'rev_id' => $batch ], __METHOD__ ); } } return true; } }