Skip to content

Commit cecc810

Browse files
committed
HTML API: Add class name utilities has_class() and class_list().
This patch adds two new public methods to the HTML Tag Processor: - `has_class()` indicates if a matched tag contains a given CSS class name. - `class_list()` returns a generator to iterate over all the class names in a matched tag. Included in this patch is a refactoring of the internal logic when matching a tag to reuse the new `has_class()` function. Previously it was relying on optimized code in the `matches()` function which performed byte-for-byte class name comparison. With the change in this patch it will perform class name matching on the decoded value, which might differ if a class attribute contains character references. These methods may be useful for running more complicated queries based on the presence or absence of CSS class names. The use of these methods avoids the need to manually decode the class attribute as reported by `$process->get_attribute( 'class' )`. Props dmsnell. Fixes #59209. git-svn-id: https://develop.svn.wordpress.org/trunk@56703 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 086010a commit cecc810

2 files changed

Lines changed: 244 additions & 58 deletions

File tree

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 89 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,94 @@ public function next_tag( $query = null ) {
626626
}
627627

628628

629+
/**
630+
* Generator for a foreach loop to step through each class name for the matched tag.
631+
*
632+
* This generator function is designed to be used inside a "foreach" loop.
633+
*
634+
* Example:
635+
*
636+
* $p = new WP_HTML_Tag_Processor( "<div class='free &lt;egg&lt;\tlang-en'>" );
637+
* $p->next_tag();
638+
* foreach ( $p->class_list() as $class_name ) {
639+
* echo "{$class_name} ";
640+
* }
641+
* // Outputs: "free <egg> lang-en "
642+
*
643+
* @since 6.4.0
644+
*/
645+
public function class_list() {
646+
/** @var string $class contains the string value of the class attribute, with character references decoded. */
647+
$class = $this->get_attribute( 'class' );
648+
649+
if ( ! is_string( $class ) ) {
650+
return;
651+
}
652+
653+
$seen = array();
654+
655+
$at = 0;
656+
while ( $at < strlen( $class ) ) {
657+
// Skip past any initial boundary characters.
658+
$at += strspn( $class, " \t\f\r\n", $at );
659+
if ( $at >= strlen( $class ) ) {
660+
return;
661+
}
662+
663+
// Find the byte length until the next boundary.
664+
$length = strcspn( $class, " \t\f\r\n", $at );
665+
if ( 0 === $length ) {
666+
return;
667+
}
668+
669+
/*
670+
* CSS class names are case-insensitive in the ASCII range.
671+
*
672+
* @see https://www.w3.org/TR/CSS2/syndata.html#x1
673+
*/
674+
$name = strtolower( substr( $class, $at, $length ) );
675+
$at += $length;
676+
677+
/*
678+
* It's expected that the number of class names for a given tag is relatively small.
679+
* Given this, it is probably faster overall to scan an array for a value rather
680+
* than to use the class name as a key and check if it's a key of $seen.
681+
*/
682+
if ( in_array( $name, $seen, true ) ) {
683+
continue;
684+
}
685+
686+
$seen[] = $name;
687+
yield $name;
688+
}
689+
}
690+
691+
692+
/**
693+
* Returns if a matched tag contains the given ASCII case-insensitive class name.
694+
*
695+
* @since 6.4.0
696+
*
697+
* @param string $wanted_class Look for this CSS class name, ASCII case-insensitive.
698+
* @return bool|null Whether the matched tag contains the given class name, or null if not matched.
699+
*/
700+
public function has_class( $wanted_class ) {
701+
if ( ! $this->tag_name_starts_at ) {
702+
return null;
703+
}
704+
705+
$wanted_class = strtolower( $wanted_class );
706+
707+
foreach ( $this->class_list() as $class_name ) {
708+
if ( $class_name === $wanted_class ) {
709+
return true;
710+
}
711+
}
712+
713+
return false;
714+
}
715+
716+
629717
/**
630718
* Sets a bookmark in the HTML document.
631719
*
@@ -2347,64 +2435,7 @@ private function matches() {
23472435
}
23482436
}
23492437

2350-
$needs_class_name = null !== $this->sought_class_name;
2351-
2352-
if ( $needs_class_name && ! isset( $this->attributes['class'] ) ) {
2353-
return false;
2354-
}
2355-
2356-
/*
2357-
* Match byte-for-byte (case-sensitive and encoding-form-sensitive) on the class name.
2358-
*
2359-
* This will overlook certain classes that exist in other lexical variations
2360-
* than was supplied to the search query, but requires more complicated searching.
2361-
*/
2362-
if ( $needs_class_name ) {
2363-
$class_start = $this->attributes['class']->value_starts_at;
2364-
$class_end = $class_start + $this->attributes['class']->value_length;
2365-
$class_at = $class_start;
2366-
2367-
/*
2368-
* Ensure that boundaries surround the class name to avoid matching on
2369-
* substrings of a longer name. For example, the sequence "not-odd"
2370-
* should not match for the class "odd" even though "odd" is found
2371-
* within the class attribute text.
2372-
*
2373-
* See https://html.spec.whatwg.org/#attributes-3
2374-
* See https://html.spec.whatwg.org/#space-separated-tokens
2375-
*/
2376-
while (
2377-
// phpcs:ignore WordPress.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
2378-
false !== ( $class_at = strpos( $this->html, $this->sought_class_name, $class_at ) ) &&
2379-
$class_at < $class_end
2380-
) {
2381-
/*
2382-
* Verify this class starts at a boundary.
2383-
*/
2384-
if ( $class_at > $class_start ) {
2385-
$character = $this->html[ $class_at - 1 ];
2386-
2387-
if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
2388-
$class_at += strlen( $this->sought_class_name );
2389-
continue;
2390-
}
2391-
}
2392-
2393-
/*
2394-
* Verify this class ends at a boundary as well.
2395-
*/
2396-
if ( $class_at + strlen( $this->sought_class_name ) < $class_end ) {
2397-
$character = $this->html[ $class_at + strlen( $this->sought_class_name ) ];
2398-
2399-
if ( ' ' !== $character && "\t" !== $character && "\f" !== $character && "\r" !== $character && "\n" !== $character ) {
2400-
$class_at += strlen( $this->sought_class_name );
2401-
continue;
2402-
}
2403-
}
2404-
2405-
return true;
2406-
}
2407-
2438+
if ( null !== $this->sought_class_name && ! $this->has_class( $this->sought_class_name ) ) {
24082439
return false;
24092440
}
24102441

tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,17 @@ public function test_next_tag_should_return_false_for_a_non_existing_tag() {
498498
$this->assertFalse( $p->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
499499
}
500500

501+
/**
502+
* @ticket 59209
503+
*
504+
* @covers WP_HTML_Tag_Processor::next_tag
505+
*/
506+
public function test_next_tag_matches_decoded_class_names() {
507+
$p = new WP_HTML_Tag_Processor( '<div class="&lt;egg&gt;">' );
508+
509+
$this->assertTrue( $p->next_tag( array( 'class_name' => '<egg>' ) ), 'Failed to find tag with HTML-encoded class name.' );
510+
}
511+
501512
/**
502513
* @ticket 56299
503514
* @ticket 57852
@@ -1957,6 +1968,150 @@ public function data_next_tag_ignores_contents_of_rawtext_tags() {
19571968
);
19581969
}
19591970

1971+
/**
1972+
* @ticket 59209
1973+
*
1974+
* @covers WP_HTML_Tag_Processor::class_list
1975+
*/
1976+
public function test_class_list_empty_when_missing_class() {
1977+
$p = new WP_HTML_Tag_Processor( '<div>' );
1978+
$p->next_tag();
1979+
1980+
$found_classes = false;
1981+
foreach ( $p->class_list() as $class ) {
1982+
$found_classes = true;
1983+
}
1984+
1985+
$this->assertFalse( $found_classes, 'Found classes when none exist.' );
1986+
}
1987+
1988+
/**
1989+
* @ticket 59209
1990+
*
1991+
* @covers WP_HTML_Tag_Processor::class_list
1992+
*/
1993+
public function test_class_list_empty_when_class_is_boolean() {
1994+
$p = new WP_HTML_Tag_Processor( '<div class>' );
1995+
$p->next_tag();
1996+
1997+
$found_classes = false;
1998+
foreach ( $p->class_list() as $class ) {
1999+
$found_classes = true;
2000+
}
2001+
2002+
$this->assertFalse( $found_classes, 'Found classes when none exist.' );
2003+
}
2004+
2005+
/**
2006+
* @ticket 59209
2007+
*
2008+
* @covers WP_HTML_Tag_Processor::class_list
2009+
*/
2010+
public function test_class_list_empty_when_class_is_empty() {
2011+
$p = new WP_HTML_Tag_Processor( '<div class="">' );
2012+
$p->next_tag();
2013+
2014+
$found_classes = false;
2015+
foreach ( $p->class_list() as $class ) {
2016+
$found_classes = true;
2017+
}
2018+
2019+
$this->assertFalse( $found_classes, 'Found classes when none exist.' );
2020+
}
2021+
2022+
/**
2023+
* @ticket 59209
2024+
*
2025+
* @covers WP_HTML_Tag_Processor::class_list
2026+
*/
2027+
public function test_class_list_visits_each_class_in_order() {
2028+
$p = new WP_HTML_Tag_Processor( '<div class="one two three">' );
2029+
$p->next_tag();
2030+
2031+
$found_classes = array();
2032+
foreach ( $p->class_list() as $class ) {
2033+
$found_classes[] = $class;
2034+
}
2035+
2036+
$this->assertSame( array( 'one', 'two', 'three' ), $found_classes, 'Failed to visit the class names in their original order.' );
2037+
}
2038+
2039+
/**
2040+
* @ticket 59209
2041+
*
2042+
* @covers WP_HTML_Tag_Processor::class_list
2043+
*/
2044+
public function test_class_list_decodes_class_names() {
2045+
$p = new WP_HTML_Tag_Processor( '<div class="&notin;-class &lt;egg&gt; &#xff03;">' );
2046+
$p->next_tag();
2047+
2048+
$found_classes = array();
2049+
foreach ( $p->class_list() as $class ) {
2050+
$found_classes[] = $class;
2051+
}
2052+
2053+
$this->assertSame( array( '∉-class', '<egg>', "\u{ff03}" ), $found_classes, 'Failed to report class names in their decoded form.' );
2054+
}
2055+
2056+
/**
2057+
* @ticket 59209
2058+
*
2059+
* @covers WP_HTML_Tag_Processor::class_list
2060+
*/
2061+
public function test_class_list_visits_unique_class_names_only_once() {
2062+
$p = new WP_HTML_Tag_Processor( '<div class="one one &#x6f;ne">' );
2063+
$p->next_tag();
2064+
2065+
$found_classes = array();
2066+
foreach ( $p->class_list() as $class ) {
2067+
$found_classes[] = $class;
2068+
}
2069+
2070+
$this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' );
2071+
}
2072+
2073+
/**
2074+
* @ticket 59209
2075+
*
2076+
* @covers WP_HTML_Tag_Processor::has_class
2077+
*
2078+
* @dataProvider data_html_with_variations_of_class_values_and_sought_class_names
2079+
*
2080+
* @param string $html Contains a tag optionally containing a `class` attribute.
2081+
* @param string $sought_class Name of class to find in the input tag's `class`.
2082+
* @param bool $has_class Whether the sought class exists in the given HTML.
2083+
*/
2084+
public function test_has_class_handles_expected_class_name_variations( $html, $sought_class, $has_class ) {
2085+
$p = new WP_HTML_Tag_Processor( $html );
2086+
$p->next_tag();
2087+
2088+
if ( $has_class ) {
2089+
$this->assertTrue( $p->has_class( $sought_class ), "Failed to find expected class {$sought_class}." );
2090+
} else {
2091+
$this->assertFalse( $p->has_class( $sought_class ), "Found class {$sought_class} when it doesn't exist." );
2092+
}
2093+
}
2094+
2095+
/**
2096+
* Data provider.
2097+
*
2098+
* @return array[]
2099+
*/
2100+
public function data_html_with_variations_of_class_values_and_sought_class_names() {
2101+
return array(
2102+
'Tag without any classes' => array( '<div>', 'foo', false ),
2103+
'Tag with boolean class' => array( '<img class>', 'foo', false ),
2104+
'Tag with empty class' => array( '<p class="">', 'foo', false ),
2105+
'Tag with exact match' => array( '<button class="foo">', 'foo', true ),
2106+
'Tag with duplicate matches' => array( '<span class="foo bar foo">', 'foo', true ),
2107+
'Tag with non-initial match' => array( '<section class="bar foo">', 'foo', true ),
2108+
'Tag with encoded match' => array( '<main class="&hellip;">', '', true ),
2109+
'Class with tab separator' => array( "<div class='one\ttwo'>", 'two', true ),
2110+
'Class with newline separator' => array( "<div class='one\ntwo\n'>", 'two', true ),
2111+
'False duplicate attribute' => array( '<img class=dog class=cat>', 'cat', false ),
2112+
);
2113+
}
2114+
19602115
/**
19612116
* Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
19622117
*

0 commit comments

Comments
 (0)