Skip to content

Commit ceab210

Browse files
committed
PHP 8.2 | Tokenizer/PHP: add support for DNF types
This commit adds tokenizer support for DNF types as per the proposal outlined in 387. This means that: * Two new tokens are introduced `T_TYPE_OPEN_PARENTHESIS` and `T_TYPE_CLOSE_PARENTHESIS` for the parentheses used in DNF types. This allows for sniffs to specifically target those tokens and prevents sniffs which are looking for the "normal" open/close parenthesis tokens from acting on DNF parentheses. * These new tokens, like other parentheses, will get the `parenthesis_opener` and `parenthesis_closer` token array indexes and the tokens between them will have the `nested_parenthesis` index. Based on the currently added tests, the commit safeguards that: * The `|` in types is still tokenized as `T_TYPE_UNION`, even in DNF types. * The `&` in types is still tokenized as `T_TYPE_INTERSECTION`, even in DNF types. * The `static` keyword for properties is still tokenized as `T_STATIC`, even when right before a DNF type (which could be confused for a function call). * The arrow function retokenization to `T_FN` with a `T_FN_ARROW` scope opener is handled correctly, even when DNF types are involved and including when the arrow function is declared to return by reference. * The keyword tokens, like `self`, `parent`, `static`, `true` or `false`, when used in DNF types are still tokenized to their own token and not tokenized as `T_STRING`. * The `array` keyword when used in DNF types is still tokenized as `T_STRING` and not as `T_ARRAY`. * A `?` intended as an (illegal) nullability operator in combination with a DNF type is still tokenized as `T_NULLABLE` and not as `T_INLINE_THEN`. * A function declaration open parenthesis before a typed parameter isn't accidentally retokenized to `T_TYPE_OPEN_PARENTHESIS`. Includes ample unit tests. Even so, strenuous testing of this PR is recommended as there are so many moving parts involved, it is very easy for something to have been overlooked. Related to 105 Closes 387 Closes squizlabs/PHP_CodeSniffer 3731
1 parent 83f3859 commit ceab210

18 files changed

+1282
-63
lines changed

src/Tokenizers/PHP.php

Lines changed: 130 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,8 @@ class PHP extends Tokenizer
464464
T_CLOSE_SHORT_ARRAY => 1,
465465
T_TYPE_UNION => 1,
466466
T_TYPE_INTERSECTION => 1,
467+
T_TYPE_OPEN_PARENTHESIS => 1,
468+
T_TYPE_CLOSE_PARENTHESIS => 1,
467469
];
468470

469471
/**
@@ -747,6 +749,9 @@ protected function tokenize($string)
747749

748750
/*
749751
Special case for `static` used as a function name, i.e. `static()`.
752+
753+
Note: this may incorrectly change the static keyword directly before a DNF property type.
754+
If so, this will be caught and corrected for in the additional processing.
750755
*/
751756

752757
if ($tokenIsArray === true
@@ -2712,21 +2717,23 @@ protected function processAdditional()
27122717
if (isset($this->tokens[$x]) === true && $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
27132718
$ignore = Tokens::$emptyTokens;
27142719
$ignore += [
2715-
T_ARRAY => T_ARRAY,
2716-
T_CALLABLE => T_CALLABLE,
2717-
T_COLON => T_COLON,
2718-
T_NAMESPACE => T_NAMESPACE,
2719-
T_NS_SEPARATOR => T_NS_SEPARATOR,
2720-
T_NULL => T_NULL,
2721-
T_TRUE => T_TRUE,
2722-
T_FALSE => T_FALSE,
2723-
T_NULLABLE => T_NULLABLE,
2724-
T_PARENT => T_PARENT,
2725-
T_SELF => T_SELF,
2726-
T_STATIC => T_STATIC,
2727-
T_STRING => T_STRING,
2728-
T_TYPE_UNION => T_TYPE_UNION,
2729-
T_TYPE_INTERSECTION => T_TYPE_INTERSECTION,
2720+
T_ARRAY => T_ARRAY,
2721+
T_CALLABLE => T_CALLABLE,
2722+
T_COLON => T_COLON,
2723+
T_NAMESPACE => T_NAMESPACE,
2724+
T_NS_SEPARATOR => T_NS_SEPARATOR,
2725+
T_NULL => T_NULL,
2726+
T_TRUE => T_TRUE,
2727+
T_FALSE => T_FALSE,
2728+
T_NULLABLE => T_NULLABLE,
2729+
T_PARENT => T_PARENT,
2730+
T_SELF => T_SELF,
2731+
T_STATIC => T_STATIC,
2732+
T_STRING => T_STRING,
2733+
T_TYPE_UNION => T_TYPE_UNION,
2734+
T_TYPE_INTERSECTION => T_TYPE_INTERSECTION,
2735+
T_TYPE_OPEN_PARENTHESIS => T_TYPE_OPEN_PARENTHESIS,
2736+
T_TYPE_CLOSE_PARENTHESIS => T_TYPE_CLOSE_PARENTHESIS,
27302737
];
27312738

27322739
$closer = $this->tokens[$x]['parenthesis_closer'];
@@ -3029,10 +3036,15 @@ protected function processAdditional()
30293036
continue;
30303037
} else if ($this->tokens[$i]['code'] === T_BITWISE_OR
30313038
|| $this->tokens[$i]['code'] === T_BITWISE_AND
3039+
|| $this->tokens[$i]['code'] === T_OPEN_PARENTHESIS
3040+
|| $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
30323041
) {
30333042
/*
30343043
Convert "|" to T_TYPE_UNION or leave as T_BITWISE_OR.
30353044
Convert "&" to T_TYPE_INTERSECTION or leave as T_BITWISE_AND.
3045+
Convert "(" and ")" to T_TYPE_(OPEN|CLOSE)_PARENTHESIS or leave as T_(OPEN|CLOSE)_PARENTHESIS.
3046+
3047+
All type related tokens will be converted in one go as soon as this section is hit.
30363048
*/
30373049

30383050
$allowed = [
@@ -3048,20 +3060,22 @@ protected function processAdditional()
30483060
T_NS_SEPARATOR => T_NS_SEPARATOR,
30493061
];
30503062

3051-
$suspectedType = null;
3052-
$typeTokenCount = 0;
3063+
$suspectedType = null;
3064+
$typeTokenCountAfter = 0;
30533065

30543066
for ($x = ($i + 1); $x < $numTokens; $x++) {
30553067
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
30563068
continue;
30573069
}
30583070

30593071
if (isset($allowed[$this->tokens[$x]['code']]) === true) {
3060-
++$typeTokenCount;
3072+
++$typeTokenCountAfter;
30613073
continue;
30623074
}
30633075

3064-
if ($typeTokenCount > 0
3076+
if (($typeTokenCountAfter > 0
3077+
|| ($this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
3078+
&& isset($this->tokens[$i]['parenthesis_owner']) === false))
30653079
&& ($this->tokens[$x]['code'] === T_BITWISE_AND
30663080
|| $this->tokens[$x]['code'] === T_ELLIPSIS)
30673081
) {
@@ -3092,6 +3106,7 @@ protected function processAdditional()
30923106
&& $this->tokens[$this->tokens[$x]['scope_condition']]['code'] === T_FUNCTION
30933107
) {
30943108
$suspectedType = 'return';
3109+
break;
30953110
}
30963111

30973112
if ($this->tokens[$x]['code'] === T_EQUAL) {
@@ -3103,35 +3118,95 @@ protected function processAdditional()
31033118
break;
31043119
}//end for
31053120

3106-
if ($typeTokenCount === 0 || isset($suspectedType) === false) {
3107-
// Definitely not a union or intersection type, move on.
3121+
if (($typeTokenCountAfter === 0
3122+
&& ($this->tokens[$i]['code'] !== T_CLOSE_PARENTHESIS
3123+
|| isset($this->tokens[$i]['parenthesis_owner']) === true))
3124+
|| isset($suspectedType) === false
3125+
) {
3126+
// Definitely not a union, intersection or DNF type, move on.
31083127
continue;
31093128
}
31103129

31113130
if ($suspectedType === 'property or parameter') {
31123131
unset($allowed[T_STATIC]);
31133132
}
31143133

3115-
$typeTokenCount = 0;
3116-
$typeOperators = [$i];
3117-
$confirmed = false;
3134+
$typeTokenCountBefore = 0;
3135+
$typeOperators = [$i];
3136+
$confirmed = false;
3137+
$maybeNullable = null;
31183138

31193139
for ($x = ($i - 1); $x >= 0; $x--) {
31203140
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
31213141
continue;
31223142
}
31233143

3144+
if ($suspectedType === 'property or parameter'
3145+
&& $this->tokens[$x]['code'] === T_STRING
3146+
&& strtolower($this->tokens[$x]['content']) === 'static'
3147+
) {
3148+
// Static keyword followed directly by an open parenthesis for a DNF type.
3149+
// This token should be T_STATIC and was incorrectly identified as a function call before.
3150+
$this->tokens[$x]['code'] = T_STATIC;
3151+
$this->tokens[$x]['type'] = 'T_STATIC';
3152+
3153+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
3154+
$line = $this->tokens[$x]['line'];
3155+
echo "\t* token $x on line $line changed back from T_STRING to T_STATIC".PHP_EOL;
3156+
}
3157+
}
3158+
3159+
if ($suspectedType === 'property or parameter'
3160+
&& $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
3161+
) {
3162+
// We need to prevent the open parenthesis for a function/fn declaration from being retokenized
3163+
// to T_TYPE_OPEN_PARENTHESIS if this is the first parameter in the declaration.
3164+
if (isset($this->tokens[$x]['parenthesis_owner']) === true
3165+
&& $this->tokens[$this->tokens[$x]['parenthesis_owner']]['code'] === T_FUNCTION
3166+
) {
3167+
$confirmed = true;
3168+
break;
3169+
} else {
3170+
// This may still be an arrow function which hasn't be handled yet.
3171+
for ($y = ($x - 1); $y > 0; $y--) {
3172+
if (isset(Tokens::$emptyTokens[$this->tokens[$y]['code']]) === false
3173+
&& $this->tokens[$y]['code'] !== T_BITWISE_AND
3174+
) {
3175+
// Non-whitespace content.
3176+
break;
3177+
}
3178+
}
3179+
3180+
if ($this->tokens[$y]['code'] === T_FN) {
3181+
$confirmed = true;
3182+
break;
3183+
}
3184+
}
3185+
}//end if
3186+
31243187
if (isset($allowed[$this->tokens[$x]['code']]) === true) {
3125-
++$typeTokenCount;
3188+
++$typeTokenCountBefore;
31263189
continue;
31273190
}
31283191

3129-
// Union and intersection types can't use the nullable operator, but be tolerant to parse errors.
3130-
if ($typeTokenCount > 0 && $this->tokens[$x]['code'] === T_NULLABLE) {
3192+
// Union, intersection and DNF types can't use the nullable operator, but be tolerant to parse errors.
3193+
if (($typeTokenCountBefore > 0
3194+
|| ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS && isset($this->tokens[$x]['parenthesis_owner']) === false))
3195+
&& ($this->tokens[$x]['code'] === T_NULLABLE
3196+
|| $this->tokens[$x]['code'] === T_INLINE_THEN)
3197+
) {
3198+
if ($this->tokens[$x]['code'] === T_INLINE_THEN) {
3199+
$maybeNullable = $x;
3200+
}
3201+
31313202
continue;
31323203
}
31333204

3134-
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
3205+
if ($this->tokens[$x]['code'] === T_BITWISE_OR
3206+
|| $this->tokens[$x]['code'] === T_BITWISE_AND
3207+
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
3208+
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
3209+
) {
31353210
$typeOperators[] = $x;
31363211
continue;
31373212
}
@@ -3217,14 +3292,40 @@ protected function processAdditional()
32173292
$line = $this->tokens[$x]['line'];
32183293
echo "\t* token $x on line $line changed from T_BITWISE_OR to T_TYPE_UNION".PHP_EOL;
32193294
}
3220-
} else {
3295+
} else if ($this->tokens[$x]['code'] === T_BITWISE_AND) {
32213296
$this->tokens[$x]['code'] = T_TYPE_INTERSECTION;
32223297
$this->tokens[$x]['type'] = 'T_TYPE_INTERSECTION';
32233298

32243299
if (PHP_CODESNIFFER_VERBOSITY > 1) {
32253300
$line = $this->tokens[$x]['line'];
32263301
echo "\t* token $x on line $line changed from T_BITWISE_AND to T_TYPE_INTERSECTION".PHP_EOL;
32273302
}
3303+
} else if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS) {
3304+
$this->tokens[$x]['code'] = T_TYPE_OPEN_PARENTHESIS;
3305+
$this->tokens[$x]['type'] = 'T_TYPE_OPEN_PARENTHESIS';
3306+
3307+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
3308+
$line = $this->tokens[$x]['line'];
3309+
echo "\t* token $x on line $line changed from T_OPEN_PARENTHESIS to T_TYPE_OPEN_PARENTHESIS".PHP_EOL;
3310+
}
3311+
} else if ($this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
3312+
$this->tokens[$x]['code'] = T_TYPE_CLOSE_PARENTHESIS;
3313+
$this->tokens[$x]['type'] = 'T_TYPE_CLOSE_PARENTHESIS';
3314+
3315+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
3316+
$line = $this->tokens[$x]['line'];
3317+
echo "\t* token $x on line $line changed from T_CLOSE_PARENTHESIS to T_TYPE_CLOSE_PARENTHESIS".PHP_EOL;
3318+
}
3319+
}//end if
3320+
}//end foreach
3321+
3322+
if (isset($maybeNullable) === true) {
3323+
$this->tokens[$maybeNullable]['code'] = T_NULLABLE;
3324+
$this->tokens[$maybeNullable]['type'] = 'T_NULLABLE';
3325+
3326+
if (PHP_CODESNIFFER_VERBOSITY > 1) {
3327+
$line = $this->tokens[$maybeNullable]['line'];
3328+
echo "\t* token $maybeNullable on line $line changed from T_INLINE_THEN to T_NULLABLE".PHP_EOL;
32283329
}
32293330
}
32303331

src/Util/Tokens.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@
8282
define('T_ATTRIBUTE_END', 'PHPCS_T_ATTRIBUTE_END');
8383
define('T_ENUM_CASE', 'PHPCS_T_ENUM_CASE');
8484
define('T_TYPE_INTERSECTION', 'PHPCS_T_TYPE_INTERSECTION');
85+
define('T_TYPE_OPEN_PARENTHESIS', 'PHPCS_T_TYPE_OPEN_PARENTHESIS');
86+
define('T_TYPE_CLOSE_PARENTHESIS', 'PHPCS_T_TYPE_CLOSE_PARENTHESIS');
8587

8688
// Some PHP 5.5 tokens, replicated for lower versions.
8789
if (defined('T_FINALLY') === false) {

tests/Core/Tokenizer/ArrayKeywordTest.inc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,20 @@ class Bar {
3939
/* testOOPropertyType */
4040
protected array $property;
4141
}
42+
43+
class DNFTypes {
44+
/* testOOConstDNFType */
45+
const (A&B)|array|(C&D) NAME = [];
46+
47+
/* testOOPropertyDNFType */
48+
protected (A&B)|ARRAY|null $property;
49+
50+
/* testFunctionDeclarationParamDNFType */
51+
public function name(null|array|(A&B) $param) {
52+
/* testClosureDeclarationParamDNFType */
53+
$cl = function ( array|(A&B) $param) {};
54+
55+
/* testArrowDeclarationReturnDNFType */
56+
$arrow = fn($a): (A&B)|Array => new $a;
57+
}
58+
}

tests/Core/Tokenizer/ArrayKeywordTest.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,24 @@ public static function dataArrayType()
131131
'OO property type' => [
132132
'testMarker' => '/* testOOPropertyType */',
133133
],
134+
135+
'OO constant DNF type' => [
136+
'testMarker' => '/* testOOConstDNFType */',
137+
],
138+
'OO property DNF type' => [
139+
'testMarker' => '/* testOOPropertyDNFType */',
140+
'testContent' => 'ARRAY',
141+
],
142+
'function param DNF type' => [
143+
'testMarker' => '/* testFunctionDeclarationParamDNFType */',
144+
],
145+
'closure param DNF type' => [
146+
'testMarker' => '/* testClosureDeclarationParamDNFType */',
147+
],
148+
'arrow return DNF type' => [
149+
'testMarker' => '/* testArrowDeclarationReturnDNFType */',
150+
'testContent' => 'Array',
151+
],
134152
];
135153

136154
}//end dataArrayType()

tests/Core/Tokenizer/BackfillFnTokenTest.inc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@ $arrowWithUnionParam = fn(Traversable&Countable $param) : int => (new SomeClass(
119119
/* testIntersectionReturnType */
120120
$arrowWithUnionReturn = fn($param) : \MyFoo&SomeInterface => new SomeClass($param);
121121

122+
/* testDNFParamType */
123+
$arrowWithUnionParam = fn((Traversable&Countable)|null $param) : SomeClass => new SomeClass($param) ?? null;
124+
125+
/* testDNFReturnType */
126+
$arrowWithUnionReturn = fn($param) : false|(\MyFoo&SomeInterface) => new \MyFoo($param) ?? false;
127+
128+
/* testDNFParamTypeWithReturnByRef */
129+
$arrowWithParamReturnByRef = fn &((A&B)|null $param) => $param * 10;
130+
122131
/* testTernary */
123132
$fn = fn($a) => $a ? /* testTernaryThen */ fn() : string => 'a' : /* testTernaryElse */ fn() : string => 'b';
124133

tests/Core/Tokenizer/BackfillFnTokenTest.php

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,54 @@ public function testIntersectionReturnType()
547547
}//end testIntersectionReturnType()
548548

549549

550+
/**
551+
* Test arrow function with a DNF parameter type.
552+
*
553+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
554+
*
555+
* @return void
556+
*/
557+
public function testDNFParamType()
558+
{
559+
$token = $this->getTargetToken('/* testDNFParamType */', T_FN);
560+
$this->backfillHelper($token);
561+
$this->scopePositionTestHelper($token, 17, 29);
562+
563+
}//end testDNFParamType()
564+
565+
566+
/**
567+
* Test arrow function with a DNF return type.
568+
*
569+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
570+
*
571+
* @return void
572+
*/
573+
public function testDNFReturnType()
574+
{
575+
$token = $this->getTargetToken('/* testDNFReturnType */', T_FN);
576+
$this->backfillHelper($token);
577+
$this->scopePositionTestHelper($token, 16, 29);
578+
579+
}//end testDNFReturnType()
580+
581+
582+
/**
583+
* Test arrow function which returns by reference with a DNF parameter type.
584+
*
585+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
586+
*
587+
* @return void
588+
*/
589+
public function testDNFParamTypeWithReturnByRef()
590+
{
591+
$token = $this->getTargetToken('/* testDNFParamTypeWithReturnByRef */', T_FN);
592+
$this->backfillHelper($token);
593+
$this->scopePositionTestHelper($token, 15, 22);
594+
595+
}//end testDNFParamTypeWithReturnByRef()
596+
597+
550598
/**
551599
* Test arrow functions used in ternary operators.
552600
*

0 commit comments

Comments
 (0)