Code Coverage for projects/packages/block-delimiter/src/class-block-scanner.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	70.31% covered (warning)	70.31%	161 / 229	64.71% covered (warning)	64.71%	11 / 17	CRAP	0.00% covered (danger)	0.00%	0 / 1
Block_Scanner	70.31% covered (warning)	70.31%	161 / 229	64.71% covered (warning)	64.71%	11 / 17	285.18	0.00% covered (danger)	0.00%	0 / 1
create	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
next_delimiter	83.62% covered (warning)	83.62%	97 / 116	0.00% covered (danger)	0.00%	0 / 1	40.38
__construct	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
find_html_comment_end	66.67% covered (warning)	66.67%	12 / 18	0.00% covered (danger)	0.00%	0 / 1	12.00
get_last_error	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
get_last_json_error	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
get_delimiter_type	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
has_void_flag	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
is_block_type	88.89% covered (warning)	88.89%	16 / 18	0.00% covered (danger)	0.00%	0 / 1	10.14
opens_block	100.00% covered (success)	100.00%	8 / 8	100.00% covered (success)	100.00%	1 / 1	5
is_freeform	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
is_non_whitespace_freeform	66.67% covered (warning)	66.67%	2 / 3	0.00% covered (danger)	0.00%	0 / 1	2.15
get_block_type	83.33% covered (warning)	83.33%	5 / 6	0.00% covered (danger)	0.00%	0 / 1	3.04
get_attributes	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
allocate_and_return_parsed_attributes	100.00% covered (success)	100.00%	12 / 12	100.00% covered (success)	100.00%	1 / 1	5
get_span	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
debug_print_structure	0.00% covered (danger)	0.00%	0 / 39	0.00% covered (danger)	0.00%	0 / 1	90

1	<?php
2	/**
3	* Efficiently scan through block structure in document without parsing
4	* the entire block tree and all of its JSON attributes into memory.
5	*
6	* @package automattic/block-delimiter
7	*/
8
9	declare( strict_types = 1 );
10
11	namespace Automattic;
12
13	use Exception;
14	use WP_HTML_Span;
15
16	/**
17	* Class for efficiently scanning through block structure in a document
18	* without parsing the entire block tree and JSON attributes into memory.
19	*
20	* This class follows design values of the HTML API:
21	* - minimize allocations and strive for zero memory overhead
22	* - make costs explicit; pay only for what you need
23	* - follow a streaming, re-entrant design for pausing and aborting
24	*
25	* For usage, jump straight to {@see self::next_delimiter}.
26	*/
27	class Block_Scanner {
28	/**
29	* Indicates if the last operation failed, otherwise
30	* will be `null` for success.
31	*
32	* @var string\|null
33	*/
34	private $last_error = null;
35
36	/**
37	* Indicates failures from decoding JSON attributes.
38	*
39	* @var int
40	*/
41	private $last_json_error = JSON_ERROR_NONE;
42
43	/**
44	* Holds a reference to the original source text from which to
45	* extract the parsed spans of the delimiter.
46	*
47	* @var string
48	*/
49	private $source_text;
50
51	/**
52	* Byte offset into source text where entire delimiter begins.
53	*
54	* @var int
55	*/
56	private $delimiter_at = 0;
57
58	/**
59	* Byte length of full span of delimiter.
60	*
61	* @var int
62	*/
63	private $delimiter_length = 0;
64
65	/**
66	* Byte offset where namespace span begins.
67	*
68	* @var int
69	*/
70	private $namespace_at = 0;
71
72	/**
73	* Byte length of namespace span, or `0` if implicitly in the "core" namespace.
74	*
75	* @var int
76	*/
77	private $namespace_length = 0;
78
79	/**
80	* Byte offset where block name span begins.
81	*
82	* @var int
83	*/
84	private $name_at = 0;
85
86	/**
87	* Byte length of block name span.
88	*
89	* @var int
90	*/
91	private $name_length = 0;
92
93	/**
94	* Whether the delimiter contains the block self-closing flag.
95	*
96	* This may be erroneous if present within a block closer,
97	* therefore the {@see self::has_void_flag} can be used by
98	* calling code to perform appropriate error-handling.
99	*
100	* @var bool
101	*/
102	private $has_void_flag = false;
103
104	/**
105	* Byte offset where JSON attributes span begins.
106	*
107	* @var int
108	*/
109	private $json_at;
110
111	/**
112	* Byte length of JSON attributes span, or `0` if none are present.
113	*
114	* @var int
115	*/
116	private $json_length;
117
118	/**
119	* Indicates what kind of block comment delimiter this represents.
120	*
121	* One of:
122	*
123	* - `static::OPENER` If the delimiter is opening a block.
124	* - `static::CLOSER` If the delimiter is closing an open block.
125	* - `static::VOID` If the delimiter represents a void block with no inner content.
126	*
127	* If a parsed comment delimiter contains both the closing and the void
128	* flags then it will be interpreted as a void block to match the behavior
129	* of the official block parser, however, this is a mistake and probably
130	* the block ought to close an open block of the same name, if one is open.
131	*
132	* @var string
133	*/
134	private $type;
135
136	/**
137	* Creates a new block scanner.
138	*
139	* Example:
140	*
141	* $scanner = Block_Scanner::create( $html );
142	* while ( $scanner->next_delimiter() ) {
143	* if ( $scanner->opens_block( 'core/image' ) ) {
144	* echo "Found an image!\n";
145	* }
146	* }
147	*
148	* This function is currently a stub so that future improvements can add configuration
149	* options and reject creation, which cannot occur directly inside class constructors.
150	*
151	* @see self::next_delimiter
152	*
153	* @param string $source_text Input document potentially containing block content.
154	* @return ?self Created block scanner, if successfully created.
155	*/
156	public static function create( string $source_text ): ?self {
157	return new self( $source_text );
158	}
159
160	/**
161	* Scan to the next block delimiter in a document, indicating if one was found.
162	*
163	* Block comment delimiters must be valid HTML comments and may contain JSON.
164	* This search does not determine, however, if the JSON is valid.
165	*
166	* Example delimiters:
167	*
168	* `<!-- wp:paragraph {"dropCap": true} -->`
169	* `<!-- wp:separator /-->`
170	* `<!-- /wp:paragraph -->`
171	*
172	* In the case that a block comment delimiter contains both the void indicator and
173	* also the closing indicator, it will be treated as a void block.
174	*
175	* Example:
176	*
177	* // Find all image block opening delimiters.
178	* $images = array();
179	* $scanner = Block_Scanner::create( $html );
180	* while ( $scanner->next_delimiter() ) {
181	* if ( $scanner->opens_block( 'core/image' ) ) {
182	* $images[] = $scanner->get_span();
183	* }
184	* }
185	*
186	* Not all blocks have explicit delimiters. Non-block content at the top-level of
187	* a document (so-called “HTML soup”) forms implicit blocks containing neither a
188	* block name nor block attributes. Because this content often comprises only
189	* HTML whitespace and adds undo performance burden, it is skipped by default.
190	* To scan the implicit freeform blocks, pass the `$freeform_blocks` argument.
191	*
192	* Example:
193	*
194	* $html = '<!-- wp:void /-->\n<!-- wp:void /-->';
195	* $blocks = [
196	* [ 'blockName' => 'core/void' ],
197	* [ 'blockName' => null ],
198	* [ 'blockName' => 'core/void' ],
199	* ];
200	* $scanner = Block_Scanner::create( $html );
201	* while ( $scanner->next_delimiter( freeform_blocks: 'visit' ) {
202	* ...
203	* }
204	*
205	* In some cases it may be useful to conditionally visit the implicit freeform
206	* blocks, such as when determining if a post contains freeform content that
207	* isn’t purely whitespace.
208	*
209	* Example:
210	*
211	* $seen_block_types = [];
212	* $freeform_blocks = 'visit';
213	* $scanner = Block_Scanner::create( $html );
214	* while ( $scanner->next_delimiter( freeform_blocks: $freeform_blocks ) {
215	* if ( ! $scanner->opens_block() ) {
216	* continue;
217	* }
218	*
219	* // Stop wasting time visiting freeform blocks after one has been found.
220	* if ('visit' === $freeform_blocks ) {
221	* if ( $scanner->is_non_whitespace_freeform() ) {
222	* $freeform_blocks = 'skip';
223	* $seen_block_types['core/freeform'] = true;
224	* }
225	* continue;
226	* }
227	*
228	* $seen_block_types[ $scanner->get_block_type() ] = true;
229	* }
230	*
231	* @param string $freeform_blocks Optional. Pass `visit` to match freeform HTML content
232	* not surrounded by block delimiters. Defaults to `skip`.
233	* @return bool Whether a block delimiter was matched.
234	*/
235	public function next_delimiter( string $freeform_blocks = 'skip' ): bool { // phpcs:ignore VariableAnalysis.CodeAnalysis.VariableAnalysis.UnusedVariable
236	if ( $this->last_error ) {
237	return false;
238	}
239
240	$text = $this->source_text;
241	$end = strlen( $text );
242	$at = $this->delimiter_at + $this->delimiter_length;
243	$found_one = false;
244
245	while ( $at < $end ) {
246	/*
247	* Find the next possible opening.
248	*
249	* This follows the behavior in the official block parser, which treats a post
250	* as a list of blocks with nested HTML. If HTML comment syntax appears within
251	* an HTML attribute value, SCRIPT or STYLE element, or in other select places,
252	* which it can do inside of HTML, then the block parsing may break.
253	*
254	* For a more robust parse scan through the document with the HTML API. In
255	* practice, this has not been a problem in the entire history of blocks.
256	*/
257	$comment_opening_at = strpos( $text, '<!--', $at );
258	if ( false === $comment_opening_at ) {
259	return false;
260	}
261
262	$opening_whitespace_at = $comment_opening_at + 4;
263	$opening_whitespace_length = strspn( $text, " \t\f\r\n", $opening_whitespace_at );
264	if ( 0 === $opening_whitespace_length ) {
265	$at = $this->find_html_comment_end( $comment_opening_at, $end );
266	continue;
267	}
268
269	$wp_prefix_at = $opening_whitespace_at + $opening_whitespace_length;
270	if ( $wp_prefix_at >= $end ) {
271	$this->last_error = self::INCOMPLETE_INPUT;
272	return false;
273	}
274
275	$has_closer = false;
276	if ( '/' === $text[ $wp_prefix_at ] ) {
277	$has_closer = true;
278	++$wp_prefix_at;
279	}
280
281	if ( 0 !== substr_compare( $text, 'wp:', $wp_prefix_at, 3 ) ) {
282	$at = $this->find_html_comment_end( $comment_opening_at, $end );
283	continue;
284	}
285
286	$namespace_at = $wp_prefix_at + 3;
287	if ( $namespace_at >= $end ) {
288	$this->last_error = self::INCOMPLETE_INPUT;
289	return false;
290	}
291
292	$start_of_namespace = $text[ $namespace_at ];
293
294	// The namespace must start with a-z.
295	if ( 'a' > $start_of_namespace \|\| 'z' < $start_of_namespace ) {
296	$at = $this->find_html_comment_end( $comment_opening_at, $end );
297	continue;
298	}
299
300	$namespace_length = 1 + strspn( $text, 'abcdefghijklmnopqrstuvwxyz0123456789-_', $namespace_at + 1 );
301	$separator_at = $namespace_at + $namespace_length;
302	if ( $separator_at >= $end ) {
303	$this->last_error = self::INCOMPLETE_INPUT;
304	return false;
305	}
306
307	$has_separator = '/' === $text[ $separator_at ];
308	if ( $has_separator ) {
309	$name_at = $separator_at + 1;
310	$start_of_name = $text[ $name_at ];
311	if ( 'a' > $start_of_name \|\| 'z' < $start_of_name ) {
312	$at = $this->find_html_comment_end( $comment_opening_at, $end );
313	continue;
314	}
315
316	$name_length = 1 + strspn( $text, 'abcdefghijklmnopqrstuvwxyz0123456789-_', $name_at + 1 );
317	} else {
318	$name_at = $namespace_at;
319	$name_length = $namespace_length;
320	$namespace_length = 0;
321	}
322
323	$after_name_whitespace_at = $name_at + $name_length;
324	$after_name_whitespace_length = strspn( $text, " \t\f\r\n", $after_name_whitespace_at );
325	if ( 0 === $after_name_whitespace_length ) {
326	$at = $this->find_html_comment_end( $comment_opening_at, $end );
327	continue;
328	}
329
330	$json_at = $after_name_whitespace_at + $after_name_whitespace_length;
331	if ( $json_at >= $end ) {
332	$this->last_error = self::INCOMPLETE_INPUT;
333	return false;
334	}
335	$has_json = '{' === $text[ $json_at ];
336	$json_length = 0;
337
338	/*
339	* For the final span of the delimiter it's most efficient to find the end
340	* of the HTML comment and work backwards. This prevents complicated parsing
341	* inside the JSON span, which cannot contain the HTML comment terminator.
342	*
343	* This also matches the behavior in the official block parser, though it
344	* allows for matching invalid JSON content.
345	*/
346	$comment_closing_at = strpos( $text, '-->', $json_at );
347	if ( false === $comment_closing_at ) {
348	$this->last_error = self::INCOMPLETE_INPUT;
349	return false;
350	}
351
352	/*
353	* It looks like this logic leaves an error in here, when the position
354	* overlaps the JSON or block name. However, for neither of those is it
355	* possible to parse a valid block if that last overlapping character
356	* is the void flag. This, therefore, will be valid regardless of how
357	* the rest of the comment delimiter is written.
358	*/
359	if ( '/' === $text[ $comment_closing_at - 1 ] ) {
360	$has_void_flag = true;
361	$void_flag_length = 1;
362	} else {
363	$has_void_flag = false;
364	$void_flag_length = 0;
365	}
366
367	/*
368	* If there's no JSON, then the span of text after the name
369	* until the comment closing must be completely whitespace.
370	*/
371	if ( ! $has_json ) {
372	$max_whitespace_length = $comment_closing_at - $json_at - $void_flag_length;
373
374	// This shouldn't be possible, but it can't be allowed regardless.
375	if ( $max_whitespace_length < 0 ) {
376	$at = $this->find_html_comment_end( $comment_opening_at, $end );
377	continue;
378	}
379
380	$closing_whitespace_length = strspn( $text, " \t\f\r\n", $json_at, $comment_closing_at - $json_at - $void_flag_length );
381	if ( 0 === $after_name_whitespace_length + $closing_whitespace_length ) {
382	$at = $this->find_html_comment_end( $comment_opening_at, $end );
383	continue;
384	}
385
386	// This must be a block delimiter!
387	$found_one = true;
388	break;
389	}
390
391	// There's no JSON, so attempt to find its boundary.
392	$after_json_whitespace_length = 0;
393	for ( $char_at = $comment_closing_at - $void_flag_length - 1; $char_at > $json_at; $char_at-- ) {
394	$char = $text[ $char_at ];
395
396	switch ( $char ) {
397	case ' ':
398	case "\t":
399	case "\f":
400	case "\r":
401	case "\n":
402	++$after_json_whitespace_length;
403	continue 2;
404
405	case '}':
406	$json_length = $char_at - $json_at + 1;
407	break 2;
408
409	default:
410	++$at;
411	continue 3;
412	}
413	}
414
415	if ( 0 === $json_length \|\| 0 === $after_json_whitespace_length ) {
416	$at = $this->find_html_comment_end( $comment_opening_at, $end );
417	continue;
418	}
419
420	// This must be a block delimiter!
421	$found_one = true;
422	break;
423	}
424
425	if ( ! $found_one ) {
426	return false;
427	}
428
429	$this->delimiter_at = $comment_opening_at;
430	$this->delimiter_length = $comment_closing_at + 3 - $comment_opening_at;
431
432	$this->namespace_at = $namespace_at;
433	$this->namespace_length = $namespace_length;
434
435	$this->name_at = $name_at;
436	$this->name_length = $name_length;
437
438	$this->json_at = $json_at;
439	$this->json_length = $json_length;
440
441	$this->type = $has_closer
442	? static::CLOSER
443	: ( $has_void_flag ? static::VOID : static::OPENER );
444
445	$this->has_void_flag = $has_void_flag;
446
447	return true;
448	}
449
450	/**
451	* Constructor function.
452	*
453	* @param string $source_text Input document potentially containing block content.
454	*/
455	private function __construct( string $source_text ) {
456	$this->source_text = $source_text;
457	}
458
459	/**
460	* Returns the byte-offset after the ending character of an HTML comment,
461	* assuming the proper starting byte offset.
462	*
463	* @param int $comment_starting_at Where the HTML comment started, the leading `<`.
464	* @param int $search_end Last offset in which to search, for limiting search span.
465	* @return int Offset after the current HTML comment ends, or `$end` if no end was found.
466	*/
467	private function find_html_comment_end( int $comment_starting_at, int $search_end ): int {
468	$text = $this->source_text;
469
470	// Find span-of-dashes comments which look like `<!----->`.
471	$span_of_dashes = strspn( $text, '-', $comment_starting_at + 2 );
472	if (
473	$comment_starting_at + 2 + $span_of_dashes < $search_end &&
474	'>' === $text[ $comment_starting_at + 2 + $span_of_dashes ]
475	) {
476	return $comment_starting_at + $span_of_dashes + 1;
477	}
478
479	// Otherwise, there are other characters inside the comment, find the first `-->` or `--!>`.
480	$now_at = $comment_starting_at + 4;
481	while ( $now_at < $search_end ) {
482	$dashes_at = strpos( $text, '--', $now_at );
483	if ( false === $dashes_at ) {
484	$this->last_error = self::INCOMPLETE_INPUT;
485	return $search_end;
486	}
487
488	$closer_must_be_at = $dashes_at + 2 + strspn( $text, '-', $dashes_at + 2 );
489	if ( $closer_must_be_at < $search_end && '!' === $text[ $closer_must_be_at ] ) {
490	$closer_must_be_at++;
491	}
492
493	if ( $closer_must_be_at < $search_end && '>' === $text[ $closer_must_be_at ] ) {
494	return $closer_must_be_at + 1;
495	}
496
497	$now_at++;
498	}
499
500	return $search_end;
501	}
502
503	/**
504	* Indicates if the last attempt to parse a block comment delimiter
505	* failed, if set, otherwise `null` if the last attempt succeeded.
506	*
507	* @return string\|null
508	*/
509	public function get_last_error() {
510	return $this->last_error;
511	}
512
513	/**
514	* Indicates if the last attempt to parse a block’s JSON attributes failed.
515	*
516	* @see JSON_ERROR_NONE, JSON_ERROR_DEPTH, etc…
517	*
518	* @return int JSON_ERROR_ code from last attempt to parse block JSON attributes.
519	*/
520	public function get_last_json_error(): int {
521	return $this->last_json_error;
522	}
523
524	/**
525	* Returns the type of the block comment delimiter.
526	*
527	* One of:
528	*
529	* - `static::OPENER`
530	* - `static::CLOSER`
531	* - `static::VOID`
532	*
533	* @return string type of the block comment delimiter.
534	*/
535	public function get_delimiter_type(): string {
536	return $this->type;
537	}
538
539	/**
540	* Returns whether the delimiter contains the void flag.
541	*
542	* This should be avoided except in cases of handling errors with
543	* block closers containing the void flag. For normative use,
544	* {@see self::get_delimiter_type}.
545	*
546	* @return bool
547	*/
548	public function has_void_flag(): bool {
549	return $this->has_void_flag;
550	}
551
552	/**
553	* Indicates if the block delimiter represents a block of the given type.
554	*
555	* Since the "core" namespace may be implicit, it's allowable to pass
556	* either the fully-qualified block type with namespace and block name
557	* as well as the shorthand version only containing the block name, if
558	* the desired block is in the "core" namespace.
559	*
560	* Example:
561	*
562	* $is_core_paragraph = $scanner->is_block_type( 'paragraph' );
563	* $is_core_paragraph = $scanner->is_block_type( 'core/paragraph' );
564	* $is_formula = $scanner->is_block_type( 'math-block/formula' );
565	*
566	* @param string $block_type Block type name for the desired block.
567	* E.g. "paragraph", "core/paragraph", "math-blocks/formula".
568	* @return bool Whether this delimiter represents a block of the given type.
569	*/
570	public function is_block_type( string $block_type ): bool {
571	// This is a core/freeform text block, it’s special.
572	if ( 0 === $this->name_length ) {
573	return 'core/freeform' === $block_type \|\| 'freeform' === $block_type;
574	}
575
576	$slash_at = strpos( $block_type, '/' );
577	if ( false === $slash_at ) {
578	$namespace = 'core';
579	$block_name = $block_type;
580	} else {
581	// @todo Get lengths but avoid the allocation, use substr_compare below.
582	$namespace = substr( $block_type, 0, $slash_at );
583	$block_name = substr( $block_type, $slash_at + 1 );
584	}
585
586	// Only the 'core' namespace is allowed to be omitted.
587	if ( 0 === $this->namespace_length && 'core' !== $namespace ) {
588	return false;
589	}
590
591	// If given an explicit namespace, they must match.
592	if (
593	0 !== $this->namespace_length && (
594	strlen( $namespace ) !== $this->namespace_length \|\|
595	0 !== substr_compare( $this->source_text, $namespace, $this->namespace_at, $this->namespace_length )
596	)
597	) {
598	return false;
599	}
600
601	// The block name must match.
602	return (
603	strlen( $block_name ) === $this->name_length &&
604	0 === substr_compare( $this->source_text, $block_name, $this->name_at, $this->name_length )
605	);
606	}
607
608	/**
609	* Indicates if the matched delimiter is an opening or void delimiter
610	* (i.e. it opens the block) of the given type, if a type is provided.
611	*
612	* This is a helper method to ease handling of code inspecting where
613	* blocks start, and of checking if the blocks are of a given type.
614	* The function is variadic to allow for checking if the delimiter
615	* opens one of many possible block types.
616	*
617	* Example:
618	*
619	* $scanner = Block_Scanner::create( $html );
620	* while ( $scanner->next_delimiter() ) {
621	* if ( $scanner->opens_block( 'core/code', 'syntaxhighlighter/code' ) ) {
622	* echo "Found code!";
623	* continue;
624	* }
625	*
626	* if ( $scanner->opens_block( 'core/image' ) ) {
627	* echo "Found an image!";
628	* continue;
629	* }
630	*
631	* if ( $scanner->opens_block() ) {
632	* echo "Found a new block!";
633	* }
634	* }
635	*
636	* @see self::is_block_type
637	*
638	* @param string\|null ...$block_type Optional. Is the matched block type one of these?
639	* If none are provided, will not test block type.
640	* @return bool Whether the matched block delimiter opens a block, and whether it
641	* opens a block of one of the given block types, if provided.
642	*/
643	public function opens_block( ...$block_type ): bool {
644	if ( static::CLOSER === $this->type ) {
645	return false;
646	}
647
648	if ( count( $block_type ) === 0 ) {
649	return true;
650	}
651
652	foreach ( $block_type as $block ) {
653	if ( $this->is_block_type( $block ) ) {
654	return true;
655	}
656	}
657
658	return false;
659	}
660
661	/**
662	* Indicates if the matched delimiter is implied due to top-level
663	* non-block content in the post.
664	*
665	* @see self::is_non_whitespace_freeform
666	*
667	* @return bool Whether or not the matched delimiter is implied as `core/freeform`.
668	*/
669	public function is_freeform(): bool {
670	return 0 === $this->name_length;
671	}
672
673	/**
674	* Indicates if the matched delimiter is implicit and surrounding
675	* top-level non-block content that contains non-whitespace text.
676	*
677	* Many block serializers introduce newlines between block delimiters,
678	* so the presence of top-level non-block content does not imply that
679	* there are “real” freeform HTML blocks. Checking if there is content
680	* beyond whitespace is a more certain check, such as for determining
681	* whether to load CSS for the freeform or fallback block type.
682	*
683	* @see self::is_freeform
684	*
685	* @return bool
686	*/
687	public function is_non_whitespace_freeform(): bool {
688	if ( 0 !== $this->name_length ) {
689	return false;
690	}
691
692	// For now, return false as this method is not yet fully implemented.
693	// @todo Implement logic to check if freeform content contains non-whitespace text.
694	return false;
695	}
696
697	/**
698	* Allocates a substring for the block type and returns the
699	* fully-qualified name, including the namespace.
700	*
701	* This function allocates a substring for the given block type. This
702	* allocation will be small and likely fine in most cases, but it's
703	* preferable to call {@link self::is_block_type} if only needing
704	* to know whether the delimiter is for a given block type, as that
705	* function is more efficient for this purpose and avoids the allocation.
706	*
707	* Example:
708	*
709	* // Avoid.
710	* 'core/paragraph' = $scanner->get_block_type();
711	*
712	* // Prefer.
713	* $scanner->is_block_type( 'core/paragraph' );
714	* $scanner->is_block_type( 'paragraph' );
715	*
716	* @return string Fully-qualified block namespace and type, e.g. "core/paragraph".
717	*/
718	public function get_block_type(): string {
719	// This is a core/freeform text block, it’s special.
720	if ( 0 === $this->name_length ) {
721	return 'core/freeform';
722	}
723
724	// This is implicitly in the "core" namespace.
725	if ( 0 === $this->namespace_length ) {
726	$block_name = substr( $this->source_text, $this->name_at, $this->name_length );
727	return "core/{$block_name}";
728	}
729
730	return substr( $this->source_text, $this->namespace_at, $this->namespace_length + $this->name_length + 1 );
731	}
732
733	/**
734	* Returns a lazy wrapper around the block attributes, which can be used
735	* for efficiently interacting with the JSON attributes.
736	*
737	* @throws Exception This function is not yet implemented.
738	*
739	* @todo Create a lazy JSON wrapper so specific attributes can be
740	* efficiently queried without parsing everything and loading
741	* the entire object into memory.
742	* @todo After realistic benchmarking, see if JsonStreamingParser\Parser
743	* could be used — it would need to be fast enough for the reduction
744	* in memory use to be worth it, compared to {@see \json_decode}.
745	*
746	* @see \JsonStreamingParser\Parser
747	*
748	* @return never
749	*/
750	public function get_attributes(): void {
751	throw new Exception( 'Lazy attribute parsing not yet supported' );
752	}
753
754	/**
755	* Attempts to parse and return the entire JSON attributes from the delimiter,
756	* allocating memory and processing the JSON span in the process.
757	*
758	* This does not return any parsed attributes for a closing block delimiter
759	* even if there is a span of JSON content; this JSON is a parsing error.
760	*
761	* Consider calling {@link self::get_attributes} instead if it's not
762	* necessary to read all the attributes at the same time, as that provides
763	* a more efficient mechanism for typical use cases.
764	*
765	* Since the JSON span inside the comment delimiter may not be valid JSON,
766	* this function will return `null` if it cannot parse the span and set the
767	* {@see self::get_last_json_error} to the appropriate JSON_ERROR_ constant.
768	*
769	* If the delimiter contains no JSON span, it will also return `null`,
770	* but the last error will be set to {@see JSON_ERROR_NONE}.
771	*
772	* Example:
773	*
774	* $scanner = Block_Scanner::create( '<!-- wp:image {"url": "https://wordpress.org/favicon.ico"} -->' );
775	* $scanner->next_delimiter();
776	* $memory_hungry_and_slow_attributes = $scanner->allocate_and_return_parsed_attributes();
777	* $memory_hungry_and_slow_attributes === array( 'url' => 'https://wordpress.org/favicon.ico' );
778	*
779	* $scanner = Block_Scanner::create( '<!-- /wp:image {"url": "https://wordpress.org/favicon.ico"} -->' );
780	* $scanner->next_delimiter();
781	* null = $scanner->allocate_and_return_parsed_attributes();
782	* JSON_ERROR_NONE = $scanner->get_last_json_error();
783	*
784	* $scanner = Block_Scanner::create( '<!-- wp:separator {} /-->' );
785	* $scanner->next_delimiter();
786	* array() === $scanner->allocate_and_return_parsed_attributes();
787	*
788	* $scanner = Block_Scanner::create( '<!-- wp:separator /-->' );
789	* $scanner->next_delimiter();
790	* null = $scanner->allocate_and_return_parsed_attributes();
791	*
792	* $scanner = Block_Scanner::create( '<!-- wp:image {"url} -->' );
793	* $scanner->next_delimiter();
794	* null = $scanner->allocate_and_return_parsed_attributes();
795	* JSON_ERROR_CTRL_CHAR = $scanner->get_last_json_error();
796	*
797	* @return array\|null Parsed JSON attributes, if present and valid, otherwise `null`.
798	*/
799	public function allocate_and_return_parsed_attributes(): ?array {
800	$this->last_json_error = JSON_ERROR_NONE;
801
802	if ( static::CLOSER === $this->type ) {
803	return null;
804	}
805
806	if ( 0 === $this->json_length ) {
807	return null;
808	}
809
810	$json_span = substr( $this->source_text, $this->json_at, $this->json_length );
811	$parsed = json_decode( $json_span, null, 512, JSON_OBJECT_AS_ARRAY \| JSON_INVALID_UTF8_SUBSTITUTE );
812
813	$last_error = json_last_error();
814	$this->last_json_error = $last_error;
815
816	return ( JSON_ERROR_NONE === $last_error && is_array( $parsed ) )
817	? $parsed
818	: null;
819	}
820
821	/**
822	* Returns the span representing the currently-matched delimiter,
823	* if matched, otherwise `null`.
824	*
825	* Note that for freeform blocks this will return a span of length
826	* zero, since there is no explicit block delimiter.
827	*
828	* Example:
829	*
830	* $scanner = Block_Scanner::create( '<!-- wp:void /-->' );
831	* null === $scanner->get_span();
832	*
833	* $scanner->next_delimiter();
834	* WP_HTML_Span( 0, 17 ) === $scanner->get_span();
835	*
836	* @return WP_HTML_Span\|null Span of text in source text spanning matched delimiter.
837	*/
838	public function get_span(): ?WP_HTML_Span {
839	return new WP_HTML_Span( $this->delimiter_at, $this->delimiter_length );
840	}
841
842	// Debugging methods not meant for production use.
843
844	/**
845	* Prints a debugging message showing the structure of the parsed delimiter.
846	*
847	* This is not meant to be used in production!
848	*
849	* @access private
850	*/
851	public function debug_print_structure(): void {
852	$c = ( ! defined( 'STDOUT' ) \|\| posix_isatty( STDOUT ) )
853	? function ( $color = null ) { return $color; } // phpcs:ignore
854	: function ( $color ) { return ''; }; // phpcs:ignore
855
856	if ( $this->is_block_type( 'core/freeform' ) ) {
857	$closer = static::CLOSER === $this->get_delimiter_type() ? '/' : '';
858	echo "{$c( "\e[90m" )}<!-- "; // phpcs:ignore
859	echo "{$c( "\e[0;31m" )}{$closer}"; // phpcs:ignore
860	echo "{$c("\e[90m" )}wp:"; // phpcs:ignore
861	echo "{$c( "\e[0;34m" )}freeform"; // phpcs:ignore
862	echo "{$c( "\e[0;36m" )} {$c("\e[90m")}-->\n"; // phpcs:ignore
863	return;
864	}
865
866	$namespace = substr( $this->source_text, $this->namespace_at, $this->namespace_length );
867	$slash = 0 === $this->namespace_length ? '' : '/';
868	$block_name = substr( $this->source_text, $this->name_at, $this->name_length );
869	$closer = static::CLOSER === $this->type ? '/' : '';
870	$json = substr( $this->source_text, $this->json_at, $this->json_length );
871
872	$opener_whitespace_at = $this->delimiter_at + 4;
873	$opener_whitespace_length = $this->namespace_at - 3 - $opener_whitespace_at - ( static::CLOSER === $this->type ? 1 : 0 );
874
875	$after_name_whitespace_at = $this->name_at + $this->name_length;
876	$after_name_whitespace_length = $this->json_at - $after_name_whitespace_at;
877
878	$closing_whitespace_at = $this->json_at + $this->json_length;
879	$closing_whitespace_length = $this->delimiter_at + $this->delimiter_length - 3 - $closing_whitespace_at;
880
881	if ( '/' === $this->source_text[ $this->delimiter_at + $this->delimiter_length - 4 ] ) {
882	$void_flag = '/';
883	--$closing_whitespace_length;
884	} else {
885	$void_flag = '';
886	}
887
888	$w = function ( $whitespace ) use ( $c ) {
889	return $c( "\e[2;90m" ) . str_replace( array( ' ', "\t", "\f", "\r", "\n" ), array( '␣', '␉', '␌', '␍', '␤' ), $whitespace );
890	};
891
892	echo "{$c( "\e[90m" )}<!--"; // phpcs:ignore
893	echo $w( substr( $this->source_text, $opener_whitespace_at, $opener_whitespace_length ) ); // phpcs:ignore
894	echo "{$c( "\e[0;31m" )}{$closer}"; // phpcs:ignore
895	echo "{$c("\e[90m" )}wp:{$c( "\e[2;34m" )}{$namespace}"; // phpcs:ignore
896	echo "{$c( "\e[2;90m" )}{$slash}"; // phpcs:ignore
897	echo "{$c( "\e[0;34m" )}{$block_name}"; // phpcs:ignore
898	echo $w( substr( $this->source_text, $after_name_whitespace_at, $after_name_whitespace_length ) ); // phpcs:ignore
899	echo "{$c("\e[0;2;32m" )}{$json}"; // phpcs:ignore
900	echo $w( substr( $this->source_text, $closing_whitespace_at, $closing_whitespace_length ) ); // phpcs:ignore
901	echo "{$c( "\e[0;36m" )}{$void_flag}{$c("\e[90m")}-->\n"; // phpcs:ignore
902	}
903
904	// Constant declarations that would otherwise pollute the top of the class.
905
906	/**
907	* Indicates that the block comment delimiter closes an open block.
908	*/
909	const CLOSER = 'closer';
910
911	/**
912	* Indicates that the parser started parsing a block comment delimiter, but
913	* the input document ended before it could finish. The document was likely truncated.
914	*/
915	const INCOMPLETE_INPUT = 'incomplete-input';
916
917	/**
918	* Indicates that the block comment delimiter opens a block.
919	*/
920	const OPENER = 'opener';
921
922	/**
923	* Indicates that the parser has not yet attempted to parse a block comment delimiter.
924	*/
925	const UNINITIALIZED = 'uninitialized';
926
927	/**
928	* Indicates that the block comment delimiter represents a void block
929	* with no inner content of any kind.
930	*/
931	const VOID = 'void';
932	}