Merge "Split CLDRPluralRuleEvaluator file"
This commit is contained in:
commit
d6b075d238
8 changed files with 639 additions and 573 deletions
|
|
@ -1125,13 +1125,13 @@ $wgAutoloadLocalClasses = array(
|
|||
'FakeConverter' => 'languages/Language.php',
|
||||
'Language' => 'languages/Language.php',
|
||||
'LanguageConverter' => 'languages/LanguageConverter.php',
|
||||
'CLDRPluralRuleConverter' => 'languages/utils/CLDRPluralRuleEvaluator.php',
|
||||
'CLDRPluralRuleConverter_Expression' => 'languages/utils/CLDRPluralRuleEvaluator.php',
|
||||
'CLDRPluralRuleConverter_Fragment' => 'languages/utils/CLDRPluralRuleEvaluator.php',
|
||||
'CLDRPluralRuleConverter_Operator' => 'languages/utils/CLDRPluralRuleEvaluator.php',
|
||||
'CLDRPluralRuleConverter' => 'languages/utils/CLDRPluralRuleConverter.php',
|
||||
'CLDRPluralRuleConverter_Expression' => 'languages/utils/CLDRPluralRuleConverter_Expression.php',
|
||||
'CLDRPluralRuleConverter_Fragment' => 'languages/utils/CLDRPluralRuleConverter_Fragment.php',
|
||||
'CLDRPluralRuleConverter_Operator' => 'languages/utils/CLDRPluralRuleConverter_Operator.php',
|
||||
'CLDRPluralRuleEvaluator' => 'languages/utils/CLDRPluralRuleEvaluator.php',
|
||||
'CLDRPluralRuleEvaluator_Range' => 'languages/utils/CLDRPluralRuleEvaluator.php',
|
||||
'CLDRPluralRuleError' => 'languages/utils/CLDRPluralRuleEvaluator.php',
|
||||
'CLDRPluralRuleEvaluator_Range' => 'languages/utils/CLDRPluralRuleEvaluator_Range.php',
|
||||
'CLDRPluralRuleError' => 'languages/utils/CLDRPluralRuleError.php',
|
||||
|
||||
# maintenance
|
||||
'BackupDumper' => 'maintenance/backup.inc',
|
||||
|
|
|
|||
313
languages/utils/CLDRPluralRuleConverter.php
Normal file
313
languages/utils/CLDRPluralRuleConverter.php
Normal file
|
|
@ -0,0 +1,313 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Niklas Laxström, Tim Starling
|
||||
*
|
||||
* @copyright Copyright © 2010-2012, Niklas Laxström
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
|
||||
*
|
||||
* @file
|
||||
* @since 1.20
|
||||
*/
|
||||
|
||||
/**
|
||||
* Helper class for converting rules to reverse polish notation (RPN).
|
||||
*/
|
||||
class CLDRPluralRuleConverter {
|
||||
/**
|
||||
* The input string
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $rule;
|
||||
|
||||
/**
|
||||
* The current position
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $pos;
|
||||
|
||||
/**
|
||||
* The past-the-end position
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $end;
|
||||
|
||||
/**
|
||||
* The operator stack
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $operators = array();
|
||||
|
||||
/**
|
||||
* The operand stack
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $operands = array();
|
||||
|
||||
/**
|
||||
* Precedence levels. Note that there's no need to worry about associativity
|
||||
* for the level 4 operators, since they return boolean and don't accept
|
||||
* boolean inputs.
|
||||
*/
|
||||
static $precedence = array(
|
||||
'or' => 2,
|
||||
'and' => 3,
|
||||
'is' => 4,
|
||||
'is-not' => 4,
|
||||
'in' => 4,
|
||||
'not-in' => 4,
|
||||
'within' => 4,
|
||||
'not-within' => 4,
|
||||
'mod' => 5,
|
||||
',' => 6,
|
||||
'..' => 7,
|
||||
);
|
||||
|
||||
/**
|
||||
* A character list defining whitespace, for use in strspn() etc.
|
||||
*/
|
||||
const WHITESPACE_CLASS = " \t\r\n";
|
||||
|
||||
/**
|
||||
* Same for digits. Note that the grammar given in UTS #35 doesn't allow
|
||||
* negative numbers or decimal separators.
|
||||
*/
|
||||
const NUMBER_CLASS = '0123456789';
|
||||
|
||||
/**
|
||||
* A character list of symbolic operands.
|
||||
*/
|
||||
const OPERAND_SYMBOLS = 'nivwft';
|
||||
|
||||
/**
|
||||
* An anchored regular expression which matches a word at the current offset.
|
||||
*/
|
||||
const WORD_REGEX = '/[a-zA-Z@]+/A';
|
||||
|
||||
/**
|
||||
* Convert a rule to RPN. This is the only public entry point.
|
||||
*
|
||||
* @param string $rule The rule to convert
|
||||
* @return string The RPN representation of the rule
|
||||
*/
|
||||
public static function convert( $rule ) {
|
||||
$parser = new self( $rule );
|
||||
return $parser->doConvert();
|
||||
}
|
||||
|
||||
/**
|
||||
* Private constructor.
|
||||
*/
|
||||
protected function __construct( $rule ) {
|
||||
$this->rule = $rule;
|
||||
$this->pos = 0;
|
||||
$this->end = strlen( $rule );
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the operation.
|
||||
*
|
||||
* @return string The RPN representation of the rule (e.g. "5 3 mod n is")
|
||||
*/
|
||||
protected function doConvert() {
|
||||
$expectOperator = true;
|
||||
|
||||
// Iterate through all tokens, saving the operators and operands to a
|
||||
// stack per Dijkstra's shunting yard algorithm.
|
||||
/** @var CLDRPluralRuleConverter_Operator $token */
|
||||
while ( false !== ( $token = $this->nextToken() ) ) {
|
||||
// In this grammar, there are only binary operators, so every valid
|
||||
// rule string will alternate between operator and operand tokens.
|
||||
$expectOperator = !$expectOperator;
|
||||
|
||||
if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
|
||||
// Operand
|
||||
if ( $expectOperator ) {
|
||||
$token->error( 'unexpected operand' );
|
||||
}
|
||||
$this->operands[] = $token;
|
||||
continue;
|
||||
} else {
|
||||
// Operator
|
||||
if ( !$expectOperator ) {
|
||||
$token->error( 'unexpected operator' );
|
||||
}
|
||||
// Resolve higher precedence levels
|
||||
$lastOp = end( $this->operators );
|
||||
while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
|
||||
$this->doOperation( $lastOp, $this->operands );
|
||||
array_pop( $this->operators );
|
||||
$lastOp = end( $this->operators );
|
||||
}
|
||||
$this->operators[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// Finish off the stack
|
||||
while ( $op = array_pop( $this->operators ) ) {
|
||||
$this->doOperation( $op, $this->operands );
|
||||
}
|
||||
|
||||
// Make sure the result is sane. The first case is possible for an empty
|
||||
// string input, the second should be unreachable.
|
||||
if ( !count( $this->operands ) ) {
|
||||
$this->error( 'condition expected' );
|
||||
} elseif ( count( $this->operands ) > 1 ) {
|
||||
$this->error( 'missing operator or too many operands' );
|
||||
}
|
||||
|
||||
$value = $this->operands[0];
|
||||
if ( $value->type !== 'boolean' ) {
|
||||
$this->error( 'the result must have a boolean type' );
|
||||
}
|
||||
|
||||
return $this->operands[0]->rpn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the next token from the input string.
|
||||
*
|
||||
* @return CLDRPluralRuleConverter_Fragment The next token
|
||||
*/
|
||||
protected function nextToken() {
|
||||
if ( $this->pos >= $this->end ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Whitespace
|
||||
$length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
|
||||
$this->pos += $length;
|
||||
|
||||
if ( $this->pos >= $this->end ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Number
|
||||
$length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
|
||||
if ( $length !== 0 ) {
|
||||
$token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
|
||||
$this->pos += $length;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Two-character operators
|
||||
$op2 = substr( $this->rule, $this->pos, 2 );
|
||||
if ( $op2 === '..' || $op2 === '!=' ) {
|
||||
$token = $this->newOperator( $op2, $this->pos, 2 );
|
||||
$this->pos += 2;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Single-character operators
|
||||
$op1 = $this->rule[$this->pos];
|
||||
if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
|
||||
$token = $this->newOperator( $op1, $this->pos, 1 );
|
||||
$this->pos ++;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Word
|
||||
if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
|
||||
$this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
|
||||
}
|
||||
$word1 = strtolower( $m[0] );
|
||||
$word2 = '';
|
||||
$nextTokenPos = $this->pos + strlen( $word1 );
|
||||
if ( $word1 === 'not' || $word1 === 'is' ) {
|
||||
// Look ahead one word
|
||||
$nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
|
||||
if ( $nextTokenPos < $this->end
|
||||
&& preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
|
||||
) {
|
||||
$word2 = strtolower( $m[0] );
|
||||
$nextTokenPos += strlen( $word2 );
|
||||
}
|
||||
}
|
||||
|
||||
// Two-word operators like "is not" take precedence over single-word operators like "is"
|
||||
if ( $word2 !== '' ) {
|
||||
$bothWords = "{$word1}-{$word2}";
|
||||
if ( isset( self::$precedence[$bothWords] ) ) {
|
||||
$token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
|
||||
$this->pos = $nextTokenPos;
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
||||
// Single-word operators
|
||||
if ( isset( self::$precedence[$word1] ) ) {
|
||||
$token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
|
||||
$this->pos += strlen( $word1 );
|
||||
return $token;
|
||||
}
|
||||
|
||||
// The single-character operand symbols
|
||||
if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
|
||||
$token = $this->newNumber( $word1, $this->pos );
|
||||
$this->pos ++;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Samples
|
||||
if ( $word1 === '@integer' || $word1 === '@decimal' ) {
|
||||
// Samples are like comments, they have no effect on rule evaluation.
|
||||
// They run from the first sample indicator to the end of the string.
|
||||
$this->pos = $this->end;
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->error( 'unrecognised word' );
|
||||
}
|
||||
|
||||
/**
|
||||
* For the binary operator $op, pop its operands off the stack and push
|
||||
* a fragment with rpn and type members describing the result of that
|
||||
* operation.
|
||||
*
|
||||
* @param CLDRPluralRuleConverter_Operator $op
|
||||
*/
|
||||
protected function doOperation( $op ) {
|
||||
if ( count( $this->operands ) < 2 ) {
|
||||
$op->error( 'missing operand' );
|
||||
}
|
||||
$right = array_pop( $this->operands );
|
||||
$left = array_pop( $this->operands );
|
||||
$result = $op->operate( $left, $right );
|
||||
$this->operands[] = $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a numerical expression object
|
||||
*
|
||||
* @param string $text
|
||||
* @param int $pos
|
||||
* @return CLDRPluralRuleConverter_Expression The numerical expression
|
||||
*/
|
||||
protected function newNumber( $text, $pos ) {
|
||||
return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a binary operator
|
||||
*
|
||||
* @param string $type
|
||||
* @param int $pos
|
||||
* @param int $length
|
||||
* @return CLDRPluralRuleConverter_Operator The operator
|
||||
*/
|
||||
protected function newOperator( $type, $pos, $length ) {
|
||||
return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
|
||||
}
|
||||
|
||||
/**
|
||||
* Throw an error
|
||||
*/
|
||||
protected function error( $message ) {
|
||||
throw new CLDRPluralRuleError( $message );
|
||||
}
|
||||
}
|
||||
41
languages/utils/CLDRPluralRuleConverter_Expression.php
Normal file
41
languages/utils/CLDRPluralRuleConverter_Expression.php
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Niklas Laxström, Tim Starling
|
||||
*
|
||||
* @copyright Copyright © 2010-2012, Niklas Laxström
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
|
||||
*
|
||||
* @file
|
||||
* @since 1.20
|
||||
*/
|
||||
|
||||
/**
|
||||
* Helper for CLDRPluralRuleConverter.
|
||||
* An expression object, representing a region of the input string (for error
|
||||
* messages), the RPN notation used to evaluate it, and the result type for
|
||||
* validation.
|
||||
*/
|
||||
class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment {
|
||||
/** @var string */
|
||||
public $type;
|
||||
|
||||
/** @var string */
|
||||
public $rpn;
|
||||
|
||||
function __construct( $parser, $type, $rpn, $pos, $length ) {
|
||||
parent::__construct( $parser, $pos, $length );
|
||||
$this->type = $type;
|
||||
$this->rpn = $rpn;
|
||||
}
|
||||
|
||||
public function isType( $type ) {
|
||||
if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) {
|
||||
return true;
|
||||
}
|
||||
if ( $type === $this->type ) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
35
languages/utils/CLDRPluralRuleConverter_Fragment.php
Normal file
35
languages/utils/CLDRPluralRuleConverter_Fragment.php
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Niklas Laxström, Tim Starling
|
||||
*
|
||||
* @copyright Copyright © 2010-2012, Niklas Laxström
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
|
||||
*
|
||||
* @file
|
||||
* @since 1.20
|
||||
*/
|
||||
|
||||
/**
|
||||
* Helper for CLDRPluralRuleConverter.
|
||||
* The base class for operators and expressions, describing a region of the input string.
|
||||
*/
|
||||
class CLDRPluralRuleConverter_Fragment {
|
||||
public $parser, $pos, $length, $end;
|
||||
|
||||
function __construct( $parser, $pos, $length ) {
|
||||
$this->parser = $parser;
|
||||
$this->pos = $pos;
|
||||
$this->length = $length;
|
||||
$this->end = $pos + $length;
|
||||
}
|
||||
|
||||
public function error( $message ) {
|
||||
$text = $this->getText();
|
||||
throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" );
|
||||
}
|
||||
|
||||
public function getText() {
|
||||
return substr( $this->parser->rule, $this->pos, $this->length );
|
||||
}
|
||||
}
|
||||
114
languages/utils/CLDRPluralRuleConverter_Operator.php
Normal file
114
languages/utils/CLDRPluralRuleConverter_Operator.php
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Niklas Laxström, Tim Starling
|
||||
*
|
||||
* @copyright Copyright © 2010-2012, Niklas Laxström
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
|
||||
*
|
||||
* @file
|
||||
* @since 1.20
|
||||
*/
|
||||
|
||||
/**
|
||||
* Helper for CLDRPluralRuleConverter.
|
||||
* An operator object, representing a region of the input string (for error
|
||||
* messages), and the binary operator at that location.
|
||||
*/
|
||||
class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment {
|
||||
/** @var string The name */
|
||||
public $name;
|
||||
|
||||
/**
|
||||
* Each op type has three characters: left operand type, right operand type and result type
|
||||
*
|
||||
* b = boolean
|
||||
* n = number
|
||||
* r = range
|
||||
*
|
||||
* A number is a kind of range.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
static $opTypes = array(
|
||||
'or' => 'bbb',
|
||||
'and' => 'bbb',
|
||||
'is' => 'nnb',
|
||||
'is-not' => 'nnb',
|
||||
'in' => 'nrb',
|
||||
'not-in' => 'nrb',
|
||||
'within' => 'nrb',
|
||||
'not-within' => 'nrb',
|
||||
'mod' => 'nnn',
|
||||
',' => 'rrr',
|
||||
'..' => 'nnr',
|
||||
);
|
||||
|
||||
/**
|
||||
* Map converting from the abbrevation to the full form.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
static $typeSpecMap = array(
|
||||
'b' => 'boolean',
|
||||
'n' => 'number',
|
||||
'r' => 'range',
|
||||
);
|
||||
|
||||
/**
|
||||
* Map for converting the new operators introduced in Rev 33 to the old forms
|
||||
*/
|
||||
static $aliasMap = array(
|
||||
'%' => 'mod',
|
||||
'!=' => 'not-in',
|
||||
'=' => 'in'
|
||||
);
|
||||
|
||||
/**
|
||||
* Initialize a new instance of a CLDRPluralRuleConverter_Operator object
|
||||
*
|
||||
* @param CLDRPluralRuleConverter $parser The parser
|
||||
* @param string $name The operator name
|
||||
* @param int $pos The length
|
||||
* @param int $length
|
||||
*/
|
||||
function __construct( $parser, $name, $pos, $length ) {
|
||||
parent::__construct( $parser, $pos, $length );
|
||||
if ( isset( self::$aliasMap[$name] ) ) {
|
||||
$name = self::$aliasMap[$name];
|
||||
}
|
||||
$this->name = $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the operation
|
||||
*
|
||||
* @param CLDRPluralRuleConverter_Expression $left The left part of the expression
|
||||
* @param CLDRPluralRuleConverter_Expression $right The right part of the expression
|
||||
* @return CLDRPluralRuleConverter_Expression The result of the operation
|
||||
*/
|
||||
public function operate( $left, $right ) {
|
||||
$typeSpec = self::$opTypes[$this->name];
|
||||
|
||||
$leftType = self::$typeSpecMap[$typeSpec[0]];
|
||||
$rightType = self::$typeSpecMap[$typeSpec[1]];
|
||||
$resultType = self::$typeSpecMap[$typeSpec[2]];
|
||||
|
||||
$start = min( $this->pos, $left->pos, $right->pos );
|
||||
$end = max( $this->end, $left->end, $right->end );
|
||||
$length = $end - $start;
|
||||
|
||||
$newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType,
|
||||
"{$left->rpn} {$right->rpn} {$this->name}",
|
||||
$start, $length );
|
||||
|
||||
if ( !$left->isType( $leftType ) ) {
|
||||
$newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
|
||||
}
|
||||
|
||||
if ( !$right->isType( $rightType ) ) {
|
||||
$newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
|
||||
}
|
||||
return $newExpr;
|
||||
}
|
||||
}
|
||||
21
languages/utils/CLDRPluralRuleError.php
Normal file
21
languages/utils/CLDRPluralRuleError.php
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Niklas Laxström, Tim Starling
|
||||
*
|
||||
* @copyright Copyright © 2010-2012, Niklas Laxström
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
|
||||
*
|
||||
* @file
|
||||
* @since 1.20
|
||||
*/
|
||||
|
||||
/**
|
||||
* The exception class for all the classes in this file. This will be thrown
|
||||
* back to the caller if there is any validation error.
|
||||
*/
|
||||
class CLDRPluralRuleError extends MWException {
|
||||
function __construct( $message ) {
|
||||
parent::__construct( 'CLDR plural rule error: ' . $message );
|
||||
}
|
||||
}
|
||||
|
|
@ -30,6 +30,7 @@
|
|||
* @file
|
||||
* @since 1.20
|
||||
*/
|
||||
|
||||
class CLDRPluralRuleEvaluator {
|
||||
/**
|
||||
* Evaluate a number against a set of plural rules. If a rule passes,
|
||||
|
|
@ -179,570 +180,3 @@ class CLDRPluralRuleEvaluator {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluator helper class representing a range list.
|
||||
*/
|
||||
class CLDRPluralRuleEvaluator_Range {
|
||||
/**
|
||||
* The parts
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $parts = array();
|
||||
|
||||
/**
|
||||
* Initialize a new instance of CLDRPluralRuleEvaluator_Range
|
||||
*
|
||||
* @param int $start The start of the range
|
||||
* @param int|bool $end The end of the range, or false if the range is not bounded.
|
||||
*/
|
||||
function __construct( $start, $end = false ) {
|
||||
if ( $end === false ) {
|
||||
$this->parts[] = $start;
|
||||
} else {
|
||||
$this->parts[] = array( $start, $end );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if the given number is inside the range.
|
||||
*
|
||||
* @param int $number The number to check
|
||||
* @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range.
|
||||
* @return bool True if the number is inside the range; otherwise, false.
|
||||
*/
|
||||
function isNumberIn( $number, $integerConstraint = true ) {
|
||||
foreach ( $this->parts as $part ) {
|
||||
if ( is_array( $part ) ) {
|
||||
if ( ( !$integerConstraint || floor( $number ) === (float)$number )
|
||||
&& $number >= $part[0] && $number <= $part[1]
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if ( $number == $part ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Readable alias for isNumberIn( $number, false ), and the implementation
|
||||
* of the "within" operator.
|
||||
*
|
||||
* @param int $number The number to check
|
||||
* @return bool True if the number is inside the range; otherwise, false.
|
||||
*/
|
||||
function isNumberWithin( $number ) {
|
||||
return $this->isNumberIn( $number, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Add another part to this range.
|
||||
*
|
||||
* @param CLDRPluralRuleEvaluator_Range|int $other The part to add, either
|
||||
* a range object itself or a single number.
|
||||
*/
|
||||
function add( $other ) {
|
||||
if ( $other instanceof self ) {
|
||||
$this->parts = array_merge( $this->parts, $other->parts );
|
||||
} else {
|
||||
$this->parts[] = $other;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string representation of the rule evaluator range.
|
||||
* The purpose of this method is to help debugging.
|
||||
*
|
||||
* @return string The string representation of the rule evaluator range
|
||||
*/
|
||||
function __toString() {
|
||||
$s = 'Range(';
|
||||
foreach ( $this->parts as $i => $part ) {
|
||||
if ( $i ) {
|
||||
$s .= ', ';
|
||||
}
|
||||
if ( is_array( $part ) ) {
|
||||
$s .= $part[0] . '..' . $part[1];
|
||||
} else {
|
||||
$s .= $part;
|
||||
}
|
||||
}
|
||||
$s .= ')';
|
||||
return $s;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class for converting rules to reverse polish notation (RPN).
|
||||
*/
|
||||
class CLDRPluralRuleConverter {
|
||||
/**
|
||||
* The input string
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $rule;
|
||||
|
||||
/**
|
||||
* The current position
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $pos;
|
||||
|
||||
/**
|
||||
* The past-the-end position
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $end;
|
||||
|
||||
/**
|
||||
* The operator stack
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $operators = array();
|
||||
|
||||
/**
|
||||
* The operand stack
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $operands = array();
|
||||
|
||||
/**
|
||||
* Precedence levels. Note that there's no need to worry about associativity
|
||||
* for the level 4 operators, since they return boolean and don't accept
|
||||
* boolean inputs.
|
||||
*/
|
||||
static $precedence = array(
|
||||
'or' => 2,
|
||||
'and' => 3,
|
||||
'is' => 4,
|
||||
'is-not' => 4,
|
||||
'in' => 4,
|
||||
'not-in' => 4,
|
||||
'within' => 4,
|
||||
'not-within' => 4,
|
||||
'mod' => 5,
|
||||
',' => 6,
|
||||
'..' => 7,
|
||||
);
|
||||
|
||||
/**
|
||||
* A character list defining whitespace, for use in strspn() etc.
|
||||
*/
|
||||
const WHITESPACE_CLASS = " \t\r\n";
|
||||
|
||||
/**
|
||||
* Same for digits. Note that the grammar given in UTS #35 doesn't allow
|
||||
* negative numbers or decimal separators.
|
||||
*/
|
||||
const NUMBER_CLASS = '0123456789';
|
||||
|
||||
/**
|
||||
* A character list of symbolic operands.
|
||||
*/
|
||||
const OPERAND_SYMBOLS = 'nivwft';
|
||||
|
||||
/**
|
||||
* An anchored regular expression which matches a word at the current offset.
|
||||
*/
|
||||
const WORD_REGEX = '/[a-zA-Z@]+/A';
|
||||
|
||||
/**
|
||||
* Convert a rule to RPN. This is the only public entry point.
|
||||
*
|
||||
* @param string $rule The rule to convert
|
||||
* @return string The RPN representation of the rule
|
||||
*/
|
||||
public static function convert( $rule ) {
|
||||
$parser = new self( $rule );
|
||||
return $parser->doConvert();
|
||||
}
|
||||
|
||||
/**
|
||||
* Private constructor.
|
||||
*/
|
||||
protected function __construct( $rule ) {
|
||||
$this->rule = $rule;
|
||||
$this->pos = 0;
|
||||
$this->end = strlen( $rule );
|
||||
}
|
||||
|
||||
/**
|
||||
* Do the operation.
|
||||
*
|
||||
* @return string The RPN representation of the rule (e.g. "5 3 mod n is")
|
||||
*/
|
||||
protected function doConvert() {
|
||||
$expectOperator = true;
|
||||
|
||||
// Iterate through all tokens, saving the operators and operands to a
|
||||
// stack per Dijkstra's shunting yard algorithm.
|
||||
/** @var CLDRPluralRuleConverter_Operator $token */
|
||||
while ( false !== ( $token = $this->nextToken() ) ) {
|
||||
// In this grammar, there are only binary operators, so every valid
|
||||
// rule string will alternate between operator and operand tokens.
|
||||
$expectOperator = !$expectOperator;
|
||||
|
||||
if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
|
||||
// Operand
|
||||
if ( $expectOperator ) {
|
||||
$token->error( 'unexpected operand' );
|
||||
}
|
||||
$this->operands[] = $token;
|
||||
continue;
|
||||
} else {
|
||||
// Operator
|
||||
if ( !$expectOperator ) {
|
||||
$token->error( 'unexpected operator' );
|
||||
}
|
||||
// Resolve higher precedence levels
|
||||
$lastOp = end( $this->operators );
|
||||
while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
|
||||
$this->doOperation( $lastOp, $this->operands );
|
||||
array_pop( $this->operators );
|
||||
$lastOp = end( $this->operators );
|
||||
}
|
||||
$this->operators[] = $token;
|
||||
}
|
||||
}
|
||||
|
||||
// Finish off the stack
|
||||
while ( $op = array_pop( $this->operators ) ) {
|
||||
$this->doOperation( $op, $this->operands );
|
||||
}
|
||||
|
||||
// Make sure the result is sane. The first case is possible for an empty
|
||||
// string input, the second should be unreachable.
|
||||
if ( !count( $this->operands ) ) {
|
||||
$this->error( 'condition expected' );
|
||||
} elseif ( count( $this->operands ) > 1 ) {
|
||||
$this->error( 'missing operator or too many operands' );
|
||||
}
|
||||
|
||||
$value = $this->operands[0];
|
||||
if ( $value->type !== 'boolean' ) {
|
||||
$this->error( 'the result must have a boolean type' );
|
||||
}
|
||||
|
||||
return $this->operands[0]->rpn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the next token from the input string.
|
||||
*
|
||||
* @return CLDRPluralRuleConverter_Fragment The next token
|
||||
*/
|
||||
protected function nextToken() {
|
||||
if ( $this->pos >= $this->end ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Whitespace
|
||||
$length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
|
||||
$this->pos += $length;
|
||||
|
||||
if ( $this->pos >= $this->end ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Number
|
||||
$length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
|
||||
if ( $length !== 0 ) {
|
||||
$token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
|
||||
$this->pos += $length;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Two-character operators
|
||||
$op2 = substr( $this->rule, $this->pos, 2 );
|
||||
if ( $op2 === '..' || $op2 === '!=' ) {
|
||||
$token = $this->newOperator( $op2, $this->pos, 2 );
|
||||
$this->pos += 2;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Single-character operators
|
||||
$op1 = $this->rule[$this->pos];
|
||||
if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
|
||||
$token = $this->newOperator( $op1, $this->pos, 1 );
|
||||
$this->pos ++;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Word
|
||||
if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
|
||||
$this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
|
||||
}
|
||||
$word1 = strtolower( $m[0] );
|
||||
$word2 = '';
|
||||
$nextTokenPos = $this->pos + strlen( $word1 );
|
||||
if ( $word1 === 'not' || $word1 === 'is' ) {
|
||||
// Look ahead one word
|
||||
$nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
|
||||
if ( $nextTokenPos < $this->end
|
||||
&& preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
|
||||
) {
|
||||
$word2 = strtolower( $m[0] );
|
||||
$nextTokenPos += strlen( $word2 );
|
||||
}
|
||||
}
|
||||
|
||||
// Two-word operators like "is not" take precedence over single-word operators like "is"
|
||||
if ( $word2 !== '' ) {
|
||||
$bothWords = "{$word1}-{$word2}";
|
||||
if ( isset( self::$precedence[$bothWords] ) ) {
|
||||
$token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
|
||||
$this->pos = $nextTokenPos;
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
||||
// Single-word operators
|
||||
if ( isset( self::$precedence[$word1] ) ) {
|
||||
$token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
|
||||
$this->pos += strlen( $word1 );
|
||||
return $token;
|
||||
}
|
||||
|
||||
// The single-character operand symbols
|
||||
if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
|
||||
$token = $this->newNumber( $word1, $this->pos );
|
||||
$this->pos ++;
|
||||
return $token;
|
||||
}
|
||||
|
||||
// Samples
|
||||
if ( $word1 === '@integer' || $word1 === '@decimal' ) {
|
||||
// Samples are like comments, they have no effect on rule evaluation.
|
||||
// They run from the first sample indicator to the end of the string.
|
||||
$this->pos = $this->end;
|
||||
return false;
|
||||
}
|
||||
|
||||
$this->error( 'unrecognised word' );
|
||||
}
|
||||
|
||||
/**
|
||||
* For the binary operator $op, pop its operands off the stack and push
|
||||
* a fragment with rpn and type members describing the result of that
|
||||
* operation.
|
||||
*
|
||||
* @param CLDRPluralRuleConverter_Operator $op
|
||||
*/
|
||||
protected function doOperation( $op ) {
|
||||
if ( count( $this->operands ) < 2 ) {
|
||||
$op->error( 'missing operand' );
|
||||
}
|
||||
$right = array_pop( $this->operands );
|
||||
$left = array_pop( $this->operands );
|
||||
$result = $op->operate( $left, $right );
|
||||
$this->operands[] = $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a numerical expression object
|
||||
*
|
||||
* @param string $text
|
||||
* @param int $pos
|
||||
* @return CLDRPluralRuleConverter_Expression The numerical expression
|
||||
*/
|
||||
protected function newNumber( $text, $pos ) {
|
||||
return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a binary operator
|
||||
*
|
||||
* @param string $type
|
||||
* @param int $pos
|
||||
* @param int $length
|
||||
* @return CLDRPluralRuleConverter_Operator The operator
|
||||
*/
|
||||
protected function newOperator( $type, $pos, $length ) {
|
||||
return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
|
||||
}
|
||||
|
||||
/**
|
||||
* Throw an error
|
||||
*/
|
||||
protected function error( $message ) {
|
||||
throw new CLDRPluralRuleError( $message );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper for CLDRPluralRuleConverter.
|
||||
* The base class for operators and expressions, describing a region of the input string.
|
||||
*/
|
||||
class CLDRPluralRuleConverter_Fragment {
|
||||
public $parser, $pos, $length, $end;
|
||||
|
||||
function __construct( $parser, $pos, $length ) {
|
||||
$this->parser = $parser;
|
||||
$this->pos = $pos;
|
||||
$this->length = $length;
|
||||
$this->end = $pos + $length;
|
||||
}
|
||||
|
||||
public function error( $message ) {
|
||||
$text = $this->getText();
|
||||
throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" );
|
||||
}
|
||||
|
||||
public function getText() {
|
||||
return substr( $this->parser->rule, $this->pos, $this->length );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper for CLDRPluralRuleConverter.
|
||||
* An expression object, representing a region of the input string (for error
|
||||
* messages), the RPN notation used to evaluate it, and the result type for
|
||||
* validation.
|
||||
*/
|
||||
class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment {
|
||||
/** @var string */
|
||||
public $type;
|
||||
|
||||
/** @var string */
|
||||
public $rpn;
|
||||
|
||||
function __construct( $parser, $type, $rpn, $pos, $length ) {
|
||||
parent::__construct( $parser, $pos, $length );
|
||||
$this->type = $type;
|
||||
$this->rpn = $rpn;
|
||||
}
|
||||
|
||||
public function isType( $type ) {
|
||||
if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) {
|
||||
return true;
|
||||
}
|
||||
if ( $type === $this->type ) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper for CLDRPluralRuleConverter.
|
||||
* An operator object, representing a region of the input string (for error
|
||||
* messages), and the binary operator at that location.
|
||||
*/
|
||||
class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment {
|
||||
/** @var string The name */
|
||||
public $name;
|
||||
|
||||
/**
|
||||
* Each op type has three characters: left operand type, right operand type and result type
|
||||
*
|
||||
* b = boolean
|
||||
* n = number
|
||||
* r = range
|
||||
*
|
||||
* A number is a kind of range.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
static $opTypes = array(
|
||||
'or' => 'bbb',
|
||||
'and' => 'bbb',
|
||||
'is' => 'nnb',
|
||||
'is-not' => 'nnb',
|
||||
'in' => 'nrb',
|
||||
'not-in' => 'nrb',
|
||||
'within' => 'nrb',
|
||||
'not-within' => 'nrb',
|
||||
'mod' => 'nnn',
|
||||
',' => 'rrr',
|
||||
'..' => 'nnr',
|
||||
);
|
||||
|
||||
/**
|
||||
* Map converting from the abbrevation to the full form.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
static $typeSpecMap = array(
|
||||
'b' => 'boolean',
|
||||
'n' => 'number',
|
||||
'r' => 'range',
|
||||
);
|
||||
|
||||
/**
|
||||
* Map for converting the new operators introduced in Rev 33 to the old forms
|
||||
*/
|
||||
static $aliasMap = array(
|
||||
'%' => 'mod',
|
||||
'!=' => 'not-in',
|
||||
'=' => 'in'
|
||||
);
|
||||
|
||||
/**
|
||||
* Initialize a new instance of a CLDRPluralRuleConverter_Operator object
|
||||
*
|
||||
* @param CLDRPluralRuleConverter $parser The parser
|
||||
* @param string $name The operator name
|
||||
* @param int $pos The length
|
||||
* @param int $length
|
||||
*/
|
||||
function __construct( $parser, $name, $pos, $length ) {
|
||||
parent::__construct( $parser, $pos, $length );
|
||||
if ( isset( self::$aliasMap[$name] ) ) {
|
||||
$name = self::$aliasMap[$name];
|
||||
}
|
||||
$this->name = $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the operation
|
||||
*
|
||||
* @param CLDRPluralRuleConverter_Expression $left The left part of the expression
|
||||
* @param CLDRPluralRuleConverter_Expression $right The right part of the expression
|
||||
* @return CLDRPluralRuleConverter_Expression The result of the operation
|
||||
*/
|
||||
public function operate( $left, $right ) {
|
||||
$typeSpec = self::$opTypes[$this->name];
|
||||
|
||||
$leftType = self::$typeSpecMap[$typeSpec[0]];
|
||||
$rightType = self::$typeSpecMap[$typeSpec[1]];
|
||||
$resultType = self::$typeSpecMap[$typeSpec[2]];
|
||||
|
||||
$start = min( $this->pos, $left->pos, $right->pos );
|
||||
$end = max( $this->end, $left->end, $right->end );
|
||||
$length = $end - $start;
|
||||
|
||||
$newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType,
|
||||
"{$left->rpn} {$right->rpn} {$this->name}",
|
||||
$start, $length );
|
||||
|
||||
if ( !$left->isType( $leftType ) ) {
|
||||
$newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
|
||||
}
|
||||
|
||||
if ( !$right->isType( $rightType ) ) {
|
||||
$newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
|
||||
}
|
||||
return $newExpr;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The exception class for all the classes in this file. This will be thrown
|
||||
* back to the caller if there is any validation error.
|
||||
*/
|
||||
class CLDRPluralRuleError extends MWException {
|
||||
function __construct( $message ) {
|
||||
parent::__construct( 'CLDR plural rule error: ' . $message );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
108
languages/utils/CLDRPluralRuleEvaluator_Range.php
Normal file
108
languages/utils/CLDRPluralRuleEvaluator_Range.php
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
<?php
|
||||
/**
|
||||
* @author Niklas Laxström, Tim Starling
|
||||
*
|
||||
* @copyright Copyright © 2010-2012, Niklas Laxström
|
||||
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
|
||||
*
|
||||
* @file
|
||||
* @since 1.20
|
||||
*/
|
||||
|
||||
/**
|
||||
* Evaluator helper class representing a range list.
|
||||
*/
|
||||
class CLDRPluralRuleEvaluator_Range {
|
||||
/**
|
||||
* The parts
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
public $parts = array();
|
||||
|
||||
/**
|
||||
* Initialize a new instance of CLDRPluralRuleEvaluator_Range
|
||||
*
|
||||
* @param int $start The start of the range
|
||||
* @param int|bool $end The end of the range, or false if the range is not bounded.
|
||||
*/
|
||||
function __construct( $start, $end = false ) {
|
||||
if ( $end === false ) {
|
||||
$this->parts[] = $start;
|
||||
} else {
|
||||
$this->parts[] = array( $start, $end );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if the given number is inside the range.
|
||||
*
|
||||
* @param int $number The number to check
|
||||
* @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range.
|
||||
* @return bool True if the number is inside the range; otherwise, false.
|
||||
*/
|
||||
function isNumberIn( $number, $integerConstraint = true ) {
|
||||
foreach ( $this->parts as $part ) {
|
||||
if ( is_array( $part ) ) {
|
||||
if ( ( !$integerConstraint || floor( $number ) === (float)$number )
|
||||
&& $number >= $part[0] && $number <= $part[1]
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if ( $number == $part ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Readable alias for isNumberIn( $number, false ), and the implementation
|
||||
* of the "within" operator.
|
||||
*
|
||||
* @param int $number The number to check
|
||||
* @return bool True if the number is inside the range; otherwise, false.
|
||||
*/
|
||||
function isNumberWithin( $number ) {
|
||||
return $this->isNumberIn( $number, false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Add another part to this range.
|
||||
*
|
||||
* @param CLDRPluralRuleEvaluator_Range|int $other The part to add, either
|
||||
* a range object itself or a single number.
|
||||
*/
|
||||
function add( $other ) {
|
||||
if ( $other instanceof self ) {
|
||||
$this->parts = array_merge( $this->parts, $other->parts );
|
||||
} else {
|
||||
$this->parts[] = $other;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the string representation of the rule evaluator range.
|
||||
* The purpose of this method is to help debugging.
|
||||
*
|
||||
* @return string The string representation of the rule evaluator range
|
||||
*/
|
||||
function __toString() {
|
||||
$s = 'Range(';
|
||||
foreach ( $this->parts as $i => $part ) {
|
||||
if ( $i ) {
|
||||
$s .= ', ';
|
||||
}
|
||||
if ( is_array( $part ) ) {
|
||||
$s .= $part[0] . '..' . $part[1];
|
||||
} else {
|
||||
$s .= $part;
|
||||
}
|
||||
}
|
||||
$s .= ')';
|
||||
return $s;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in a new issue