← Index
NYTProf Performance Profile   « line view »
For webmerge/scripts/webmerge.pl
  Run on Mon Oct 7 02:42:42 2013
Reported on Mon Oct 7 03:03:26 2013

Filename/usr/lib64/perl5/5.16.0/Pod/Simple/BlackBox.pm
StatementsExecuted 17 statements in 50.8ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
11186µs103µsPod::Simple::BlackBox::::BEGIN@22Pod::Simple::BlackBox::BEGIN@22
11178µs99µsPod::Simple::BlackBox::::BEGIN@1151Pod::Simple::BlackBox::BEGIN@1151
11151µs212µsPod::Simple::BlackBox::::BEGIN@25Pod::Simple::BlackBox::BEGIN@25
11151µs137µsPod::Simple::BlackBox::::BEGIN@23Pod::Simple::BlackBox::BEGIN@23
11134µs34µsPod::Simple::BlackBox::::BEGIN@28Pod::Simple::BlackBox::BEGIN@28
11124µs24µsPod::Simple::BlackBox::::BEGIN@24Pod::Simple::BlackBox::BEGIN@24
0000s0sPod::Simple::BlackBox::::_closers_for_all_curr_openPod::Simple::BlackBox::_closers_for_all_curr_open
0000s0sPod::Simple::BlackBox::::_dump_curr_openPod::Simple::BlackBox::_dump_curr_open
0000s0sPod::Simple::BlackBox::::_gen_errataPod::Simple::BlackBox::_gen_errata
0000s0sPod::Simple::BlackBox::::_handle_encoding_linePod::Simple::BlackBox::_handle_encoding_line
0000s0sPod::Simple::BlackBox::::_handle_encoding_second_levelPod::Simple::BlackBox::_handle_encoding_second_level
0000s0sPod::Simple::BlackBox::::_ponder_DataPod::Simple::BlackBox::_ponder_Data
0000s0sPod::Simple::BlackBox::::_ponder_PlainPod::Simple::BlackBox::_ponder_Plain
0000s0sPod::Simple::BlackBox::::_ponder_VerbatimPod::Simple::BlackBox::_ponder_Verbatim
0000s0sPod::Simple::BlackBox::::_ponder_backPod::Simple::BlackBox::_ponder_back
0000s0sPod::Simple::BlackBox::::_ponder_beginPod::Simple::BlackBox::_ponder_begin
0000s0sPod::Simple::BlackBox::::_ponder_doc_endPod::Simple::BlackBox::_ponder_doc_end
0000s0sPod::Simple::BlackBox::::_ponder_endPod::Simple::BlackBox::_ponder_end
0000s0sPod::Simple::BlackBox::::_ponder_forPod::Simple::BlackBox::_ponder_for
0000s0sPod::Simple::BlackBox::::_ponder_itemPod::Simple::BlackBox::_ponder_item
0000s0sPod::Simple::BlackBox::::_ponder_overPod::Simple::BlackBox::_ponder_over
0000s0sPod::Simple::BlackBox::::_ponder_paragraph_bufferPod::Simple::BlackBox::_ponder_paragraph_buffer
0000s0sPod::Simple::BlackBox::::_ponder_podPod::Simple::BlackBox::_ponder_pod
0000s0sPod::Simple::BlackBox::::_stringify_lolPod::Simple::BlackBox::_stringify_lol
0000s0sPod::Simple::BlackBox::::_traverse_treelet_bitPod::Simple::BlackBox::_traverse_treelet_bit
0000s0sPod::Simple::BlackBox::::_treelet_from_formatting_codesPod::Simple::BlackBox::_treelet_from_formatting_codes
0000s0sPod::Simple::BlackBox::::_verbatim_formatPod::Simple::BlackBox::_verbatim_format
0000s0sPod::Simple::BlackBox::::parse_linePod::Simple::BlackBox::parse_line
0000s0sPod::Simple::BlackBox::::parse_linesPod::Simple::BlackBox::parse_lines
0000s0sPod::Simple::BlackBox::::prettyPod::Simple::BlackBox::pretty
0000s0sPod::Simple::BlackBox::::reinitPod::Simple::BlackBox::reinit
0000s0sPod::Simple::BlackBox::::stringify_lolPod::Simple::BlackBox::stringify_lol
0000s0sPod::Simple::BlackBox::::text_content_of_treeletPod::Simple::BlackBox::text_content_of_treelet
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1
2package Pod::Simple::BlackBox;
3#
4# "What's in the box?" "Pain."
5#
6###########################################################################
7#
8# This is where all the scary things happen: parsing lines into
9# paragraphs; and then into directives, verbatims, and then also
10# turning formatting sequences into treelets.
11#
12# Are you really sure you want to read this code?
13#
14#-----------------------------------------------------------------------------
15#
16# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
17# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
18# to call the proper callbacks on the treelets.
19#
20# Every node in a treelet is a ['name', {attrhash}, ...children...]
21
222149µs2120µs
# spent 103µs (86+17) within Pod::Simple::BlackBox::BEGIN@22 which was called: # once (86µs+17µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22
use integer; # vroom!
# spent 103µs making 1 call to Pod::Simple::BlackBox::BEGIN@22 # spent 17µs making 1 call to integer::import
232141µs2223µs
# spent 137µs (51+86) within Pod::Simple::BlackBox::BEGIN@23 which was called: # once (51µs+86µs) by Pod::Simple::LinkSection::BEGIN@9 at line 23
use strict;
# spent 137µs making 1 call to Pod::Simple::BlackBox::BEGIN@23 # spent 86µs making 1 call to strict::import
242142µs124µs
# spent 24µs within Pod::Simple::BlackBox::BEGIN@24 which was called: # once (24µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 24
use Carp ();
# spent 24µs making 1 call to Pod::Simple::BlackBox::BEGIN@24
252295µs2372µs
# spent 212µs (51+160) within Pod::Simple::BlackBox::BEGIN@25 which was called: # once (51µs+160µs) by Pod::Simple::LinkSection::BEGIN@9 at line 25
use vars qw($VERSION );
# spent 212µs making 1 call to Pod::Simple::BlackBox::BEGIN@25 # spent 160µs making 1 call to vars::import
2615µs$VERSION = '3.20';
27#use constant DEBUG => 7;
28
# spent 34µs within Pod::Simple::BlackBox::BEGIN@28 which was called: # once (34µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 31
BEGIN {
2914µs require Pod::Simple;
30134µs *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
31128.4ms134µs}
# spent 34µs making 1 call to Pod::Simple::BlackBox::BEGIN@28
32
33#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
34
35sub parse_line { shift->parse_lines(@_) } # alias
36
37# - - - Turn back now! Run away! - - -
38
39sub parse_lines { # Usage: $parser->parse_lines(@lines)
40 # an undef means end-of-stream
41 my $self = shift;
42
43 my $code_handler = $self->{'code_handler'};
44 my $cut_handler = $self->{'cut_handler'};
45 my $wl_handler = $self->{'whiteline_handler'};
46 $self->{'line_count'} ||= 0;
47
48 my $scratch;
49
50 DEBUG > 4 and
51 print "# Parsing starting at line ", $self->{'line_count'}, ".\n";
52
53 DEBUG > 5 and
54 print "# About to parse lines: ",
55 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
56
57 my $paras = ($self->{'paras'} ||= []);
58 # paragraph buffer. Because we need to defer processing of =over
59 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer
60 # to process this.
61
62 $self->{'pod_para_count'} ||= 0;
63
64 my $line;
65 foreach my $source_line (@_) {
66 if( $self->{'source_dead'} ) {
67 DEBUG > 4 and print "# Source is dead.\n";
68 last;
69 }
70
71 unless( defined $source_line ) {
72 DEBUG > 4 and print "# Undef-line seen.\n";
73
74 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
75 push @$paras, $paras->[-1], $paras->[-1];
76 # So that it definitely fills the buffer.
77 $self->{'source_dead'} = 1;
78 $self->_ponder_paragraph_buffer;
79 next;
80 }
81
82
83 if( $self->{'line_count'}++ ) {
84 ($line = $source_line) =~ tr/\n\r//d;
85 # If we don't have two vars, we'll end up with that there
86 # tr/// modding the (potentially read-only) original source line!
87
88 } else {
89 DEBUG > 2 and print "First line: [$source_line]\n";
90
91 if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) {
92 DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
93 $self->_handle_encoding_line( "=encoding utf8" );
94 $line =~ tr/\n\r//d;
95
96 } elsif( $line =~ s/^\xFE\xFF//s ) {
97 DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
98 $self->scream(
99 $self->{'line_count'},
100 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
101 );
102 splice @_;
103 push @_, undef;
104 next;
105
106 # TODO: implement somehow?
107
108 } elsif( $line =~ s/^\xFF\xFE//s ) {
109 DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
110 $self->scream(
111 $self->{'line_count'},
112 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
113 );
114 splice @_;
115 push @_, undef;
116 next;
117
118 # TODO: implement somehow?
119
120 } else {
121 DEBUG > 2 and print "First line is BOM-less.\n";
122 ($line = $source_line) =~ tr/\n\r//d;
123 }
124 }
125
126
127 DEBUG > 5 and print "# Parsing line: [$line]\n";
128
129 if(!$self->{'in_pod'}) {
130 if($line =~ m/^=([a-zA-Z]+)/s) {
131 if($1 eq 'cut') {
132 $self->scream(
133 $self->{'line_count'},
134 "=cut found outside a pod block. Skipping to next block."
135 );
136
137 ## Before there were errata sections in the world, it was
138 ## least-pessimal to abort processing the file. But now we can
139 ## just barrel on thru (but still not start a pod block).
140 #splice @_;
141 #push @_, undef;
142
143 next;
144 } else {
145 $self->{'in_pod'} = $self->{'start_of_pod_block'}
146 = $self->{'last_was_blank'} = 1;
147 # And fall thru to the pod-mode block further down
148 }
149 } else {
150 DEBUG > 5 and print "# It's a code-line.\n";
151 $code_handler->(map $_, $line, $self->{'line_count'}, $self)
152 if $code_handler;
153 # Note: this may cause code to be processed out of order relative
154 # to pods, but in order relative to cuts.
155
156 # Note also that we haven't yet applied the transcoding to $line
157 # by time we call $code_handler!
158
159 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
160 # That RE is from perlsyn, section "Plain Old Comments (Not!)",
161 #$fname = $2 if defined $2;
162 #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n";
163 DEBUG > 1 and print "# Setting nextline to $1\n";
164 $self->{'line_count'} = $1 - 1;
165 }
166
167 next;
168 }
169 }
170
171 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
172 # Else we're in pod mode:
173
174 # Apply any necessary transcoding:
175 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
176
177 # HERE WE CATCH =encoding EARLY!
178 if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
179 $line = $self->_handle_encoding_line( $line );
180 }
181
182 if($line =~ m/^=cut/s) {
183 # here ends the pod block, and therefore the previous pod para
184 DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n";
185 $self->{'in_pod'} = 0;
186 # ++$self->{'pod_para_count'};
187 $self->_ponder_paragraph_buffer();
188 # by now it's safe to consider the previous paragraph as done.
189 $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
190 if $cut_handler;
191
192 # TODO: add to docs: Note: this may cause cuts to be processed out
193 # of order relative to pods, but in order relative to code.
194
195 } elsif($line =~ m/^(\s*)$/s) { # it's a blank line
196 if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
197 $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
198 if $wl_handler;
199 }
200
201 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
202 DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n";
203 push @{$paras->[-1]}, $line;
204 } # otherwise it's not interesting
205
206 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
207 DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n";
208 }
209
210 $self->{'last_was_blank'} = 1;
211
212 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
213
214 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
215 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
216 my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
217 # Note that in "=head1 foo", the WS is lost.
218 # Example: ['=head1', {'start_line' => 123}, ' foo']
219
220 ++$self->{'pod_para_count'};
221
222 $self->_ponder_paragraph_buffer();
223 # by now it's safe to consider the previous paragraph as done.
224
225 push @$paras, $new; # the new incipient paragraph
226 DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
227
228 } elsif($line =~ m/^\s/s) {
229
230 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
231 DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n";
232 push @{$paras->[-1]}, $line;
233 } else {
234 ++$self->{'pod_para_count'};
235 $self->_ponder_paragraph_buffer();
236 # by now it's safe to consider the previous paragraph as done.
237 DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n";
238 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
239 }
240 } else {
241 ++$self->{'pod_para_count'};
242 $self->_ponder_paragraph_buffer();
243 # by now it's safe to consider the previous paragraph as done.
244 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
245 DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n";
246 }
247 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
248
249 } else {
250 # It's a non-blank line /continuing/ the current para
251 if(@$paras) {
252 DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n";
253 push @{$paras->[-1]}, $line;
254 } else {
255 # Unexpected case!
256 die "Continuing a paragraph but \@\$paras is empty?";
257 }
258 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
259 }
260
261 } # ends the big while loop
262
263 DEBUG > 1 and print(pretty(@$paras), "\n");
264 return $self;
265}
266
267#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
268
269sub _handle_encoding_line {
270 my($self, $line) = @_;
271
272 # The point of this routine is to set $self->{'_transcoder'} as indicated.
273
274 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
275 DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n";
276
277 my $e = $1;
278 my $orig = $e;
279 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
280
281 my $enc_error;
282
283 # Cf. perldoc Encode and perldoc Encode::Supported
284
285 require Pod::Simple::Transcode;
286
287 if( $self->{'encoding'} ) {
288 my $norm_current = $self->{'encoding'};
289 my $norm_e = $e;
290 foreach my $that ($norm_current, $norm_e) {
291 $that = lc($that);
292 $that =~ s/[-_]//g;
293 }
294 if($norm_current eq $norm_e) {
295 DEBUG > 1 and print "The '=encoding $orig' line is ",
296 "redundant. ($norm_current eq $norm_e). Ignoring.\n";
297 $enc_error = '';
298 # But that doesn't necessarily mean that the earlier one went okay
299 } else {
300 $enc_error = "Encoding is already set to " . $self->{'encoding'};
301 DEBUG > 1 and print $enc_error;
302 }
303 } elsif (
304 # OK, let's turn on the encoding
305 do {
306 DEBUG > 1 and print " Setting encoding to $e\n";
307 $self->{'encoding'} = $e;
308 1;
309 }
310 and $e eq 'HACKRAW'
311 ) {
312 DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n";
313
314 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
315
316 die($enc_error = "WHAT? _transcoder is already set?!")
317 if $self->{'_transcoder'}; # should never happen
318 require Pod::Simple::Transcode;
319 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
320 eval {
321 my @x = ('', "abc", "123");
322 $self->{'_transcoder'}->(@x);
323 };
324 $@ && die( $enc_error =
325 "Really unexpected error setting up encoding $e: $@\nAborting"
326 );
327
328 } else {
329 my @supported = Pod::Simple::Transcode::->all_encodings;
330
331 # Note unsupported, and complain
332 DEBUG and print " Encoding [$e] is unsupported.",
333 "\nSupporteds: @supported\n";
334 my $suggestion = '';
335
336 # Look for a near match:
337 my $norm = lc($e);
338 $norm =~ tr[-_][]d;
339 my $n;
340 foreach my $enc (@supported) {
341 $n = lc($enc);
342 $n =~ tr[-_][]d;
343 next unless $n eq $norm;
344 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
345 last;
346 }
347 my $encmodver = Pod::Simple::Transcode::->encmodver;
348 $enc_error = join '' =>
349 "This document probably does not appear as it should, because its ",
350 "\"=encoding $e\" line calls for an unsupported encoding.",
351 $suggestion, " [$encmodver\'s supported encodings are: @supported]"
352 ;
353
354 $self->scream( $self->{'line_count'}, $enc_error );
355 }
356 push @{ $self->{'encoding_command_statuses'} }, $enc_error;
357
358 return '=encoding ALREADYDONE';
359}
360
361# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
362
363sub _handle_encoding_second_level {
364 # By time this is called, the encoding (if well formed) will already
365 # have been acted one.
366 my($self, $para) = @_;
367 my @x = @$para;
368 my $content = join ' ', splice @x, 2;
369 $content =~ s/^\s+//s;
370 $content =~ s/\s+$//s;
371
372 DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n";
373
374 if($content eq 'ALREADYDONE') {
375 # It's already been handled. Check for errors.
376 if(! $self->{'encoding_command_statuses'} ) {
377 DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n";
378 } elsif( $self->{'encoding_command_statuses'}[-1] ) {
379 $self->whine( $para->[1]{'start_line'},
380 sprintf "Couldn't do %s: %s",
381 $self->{'encoding_command_reqs' }[-1],
382 $self->{'encoding_command_statuses'}[-1],
383 );
384 } else {
385 DEBUG > 2 and print " (Yup, it was successfully handled already.)\n";
386 }
387
388 } else {
389 # Otherwise it's a syntax error
390 $self->whine( $para->[1]{'start_line'},
391 "Invalid =encoding syntax: $content"
392 );
393 }
394
395 return;
396}
397
398#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
399
400{
40127µsmy $m = -321; # magic line number
402
403sub _gen_errata {
404 my $self = $_[0];
405 # Return 0 or more fake-o paragraphs explaining the accumulated
406 # errors on this document.
407
408 return() unless $self->{'errata'} and keys %{$self->{'errata'}};
409
410 my @out;
411
412 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
413 push @out,
414 ['=item', {'start_line' => $m}, "Around line $line:"],
415 map( ['~Para', {'start_line' => $m, '~cooked' => 1},
416 #['~Top', {'start_line' => $m},
417 $_
418 #]
419 ],
420 @{$self->{'errata'}{$line}}
421 )
422 ;
423 }
424
425 # TODO: report of unknown entities? unrenderable characters?
426
427 unshift @out,
428 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
429 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
430 "Hey! ",
431 ['B', {},
432 'The above document had some coding errors, which are explained below:'
433 ]
434 ],
435 ['=over', {'start_line' => $m, 'errata' => 1}, ''],
436 ;
437
438 push @out,
439 ['=back', {'start_line' => $m, 'errata' => 1}, ''],
440 ;
441
442 DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n";
443
444 return @out;
445}
446
447}
448
449#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
450
451##############################################################################
452##
453## stop reading now stop reading now stop reading now stop reading now stop
454##
455## HERE IT BECOMES REALLY SCARY
456##
457## stop reading now stop reading now stop reading now stop reading now stop
458##
459##############################################################################
460
461sub _ponder_paragraph_buffer {
462
463 # Para-token types as found in the buffer.
464 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
465 # =over, =back, =item
466 # and the null =pod (to be complained about if over one line)
467 #
468 # "~data" paragraphs are something we generate at this level, depending on
469 # a currently open =over region
470
471 # Events fired: Begin and end for:
472 # directivename (like head1 .. head4), item, extend,
473 # for (from =begin...=end, =for),
474 # over-bullet, over-number, over-text, over-block,
475 # item-bullet, item-number, item-text,
476 # Document,
477 # Data, Para, Verbatim
478 # B, C, longdirname (TODO -- wha?), etc. for all directives
479 #
480
481 my $self = $_[0];
482 my $paras;
483 return unless @{$paras = $self->{'paras'}};
484 my $curr_open = ($self->{'curr_open'} ||= []);
485
486 my $scratch;
487
488 DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n";
489
490 # We have something in our buffer. So apparently the document has started.
491 unless($self->{'doc_has_started'}) {
492 $self->{'doc_has_started'} = 1;
493
494 my $starting_contentless;
495 $starting_contentless =
496 (
497 !@$curr_open
498 and @$paras and ! grep $_->[0] ne '~end', @$paras
499 # i.e., if the paras is all ~ends
500 )
501 ;
502 DEBUG and print "# Starting ",
503 $starting_contentless ? 'contentless' : 'contentful',
504 " document\n"
505 ;
506
507 $self->_handle_element_start(
508 ($scratch = 'Document'),
509 {
510 'start_line' => $paras->[0][1]{'start_line'},
511 $starting_contentless ? ( 'contentless' => 1 ) : (),
512 },
513 );
514 }
515
516 my($para, $para_type);
517 while(@$paras) {
518 last if @$paras == 1 and
519 ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
520 or $paras->[0][0] eq '=item' )
521 ;
522 # Those're the three kinds of paragraphs that require lookahead.
523 # Actually, an "=item Foo" inside an <over type=text> region
524 # and any =item inside an <over type=block> region (rare)
525 # don't require any lookahead, but all others (bullets
526 # and numbers) do.
527
528# TODO: whinge about many kinds of directives in non-resolving =for regions?
529# TODO: many? like what? =head1 etc?
530
531 $para = shift @$paras;
532 $para_type = $para->[0];
533
534 DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (",
535 $self->_dump_curr_open(), ")\n";
536
537 if($para_type eq '=for') {
538 next if $self->_ponder_for($para,$curr_open,$paras);
539
540 } elsif($para_type eq '=begin') {
541 next if $self->_ponder_begin($para,$curr_open,$paras);
542
543 } elsif($para_type eq '=end') {
544 next if $self->_ponder_end($para,$curr_open,$paras);
545
546 } elsif($para_type eq '~end') { # The virtual end-document signal
547 next if $self->_ponder_doc_end($para,$curr_open,$paras);
548 }
549
550
551 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
552 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
553 if(grep $_->[1]{'~ignore'}, @$curr_open) {
554 DEBUG > 1 and
555 print "Skipping $para_type paragraph because in ignore mode.\n";
556 next;
557 }
558 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
559 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
560
561 if($para_type eq '=pod') {
562 $self->_ponder_pod($para,$curr_open,$paras);
563
564 } elsif($para_type eq '=over') {
565 next if $self->_ponder_over($para,$curr_open,$paras);
566
567 } elsif($para_type eq '=back') {
568 next if $self->_ponder_back($para,$curr_open,$paras);
569
570 } else {
571
572 # All non-magical codes!!!
573
574 # Here we start using $para_type for our own twisted purposes, to
575 # mean how it should get treated, not as what the element name
576 # should be.
577
578 DEBUG > 1 and print "Pondering non-magical $para_type\n";
579
580 my $i;
581
582 # Enforce some =headN discipline
583 if($para_type =~ m/^=head\d$/s
584 and ! $self->{'accept_heads_anywhere'}
585 and @$curr_open
586 and $curr_open->[-1][0] eq '=over'
587 ) {
588 DEBUG > 2 and print "'=$para_type' inside an '=over'!\n";
589 $self->whine(
590 $para->[1]{'start_line'},
591 "You forgot a '=back' before '$para_type'"
592 );
593 unshift @$paras, ['=back', {}, ''], $para; # close the =over
594 next;
595 }
596
597
598 if($para_type eq '=item') {
599
600 my $over;
601 unless(@$curr_open and
602 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
603 $self->whine(
604 $para->[1]{'start_line'},
605 "'=item' outside of any '=over'"
606 );
607 unshift @$paras,
608 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
609 $para
610 ;
611 next;
612 }
613
614
615 my $over_type = $over->[1]{'~type'};
616
617 if(!$over_type) {
618 # Shouldn't happen1
619 die "Typeless over in stack, starting at line "
620 . $over->[1]{'start_line'};
621
622 } elsif($over_type eq 'block') {
623 unless($curr_open->[-1][1]{'~bitched_about'}) {
624 $curr_open->[-1][1]{'~bitched_about'} = 1;
625 $self->whine(
626 $curr_open->[-1][1]{'start_line'},
627 "You can't have =items (as at line "
628 . $para->[1]{'start_line'}
629 . ") unless the first thing after the =over is an =item"
630 );
631 }
632 # Just turn it into a paragraph and reconsider it
633 $para->[0] = '~Para';
634 unshift @$paras, $para;
635 next;
636
637 } elsif($over_type eq 'text') {
638 my $item_type = $self->_get_item_type($para);
639 # That kills the content of the item if it's a number or bullet.
640 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
641
642 if($item_type eq 'text') {
643 # Nothing special needs doing for 'text'
644 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
645 die "Unknown item type $item_type"
646 unless $item_type eq 'number' or $item_type eq 'bullet';
647 # Undo our clobbering:
648 push @$para, $para->[1]{'~orig_content'};
649 delete $para->[1]{'number'};
650 # Only a PROPER item-number element is allowed
651 # to have a number attribute.
652 } else {
653 die "Unhandled item type $item_type"; # should never happen
654 }
655
656 # =item-text thingies don't need any assimilation, it seems.
657
658 } elsif($over_type eq 'number') {
659 my $item_type = $self->_get_item_type($para);
660 # That kills the content of the item if it's a number or bullet.
661 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
662
663 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
664
665 if($item_type eq 'bullet') {
666 # Hm, it's not numeric. Correct for this.
667 $para->[1]{'number'} = $expected_value;
668 $self->whine(
669 $para->[1]{'start_line'},
670 "Expected '=item $expected_value'"
671 );
672 push @$para, $para->[1]{'~orig_content'};
673 # restore the bullet, blocking the assimilation of next para
674
675 } elsif($item_type eq 'text') {
676 # Hm, it's not numeric. Correct for this.
677 $para->[1]{'number'} = $expected_value;
678 $self->whine(
679 $para->[1]{'start_line'},
680 "Expected '=item $expected_value'"
681 );
682 # Text content will still be there and will block next ~Para
683
684 } elsif($item_type ne 'number') {
685 die "Unknown item type $item_type"; # should never happen
686
687 } elsif($expected_value == $para->[1]{'number'}) {
688 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
689
690 } else {
691 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
692 " instead of the expected value of $expected_value\n";
693 $self->whine(
694 $para->[1]{'start_line'},
695 "You have '=item " . $para->[1]{'number'} .
696 "' instead of the expected '=item $expected_value'"
697 );
698 $para->[1]{'number'} = $expected_value; # correcting!!
699 }
700
701 if(@$para == 2) {
702 # For the cases where we /didn't/ push to @$para
703 if($paras->[0][0] eq '~Para') {
704 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
705 push @$para, splice @{shift @$paras},2;
706 } else {
707 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
708 push @$para, ''; # Just so it's not contentless
709 }
710 }
711
712
713 } elsif($over_type eq 'bullet') {
714 my $item_type = $self->_get_item_type($para);
715 # That kills the content of the item if it's a number or bullet.
716 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
717
718 if($item_type eq 'bullet') {
719 # as expected!
720
721 if( $para->[1]{'~_freaky_para_hack'} ) {
722 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
723 push @$para, delete $para->[1]{'~_freaky_para_hack'};
724 }
725
726 } elsif($item_type eq 'number') {
727 $self->whine(
728 $para->[1]{'start_line'},
729 "Expected '=item *'"
730 );
731 push @$para, $para->[1]{'~orig_content'};
732 # and block assimilation of the next paragraph
733 delete $para->[1]{'number'};
734 # Only a PROPER item-number element is allowed
735 # to have a number attribute.
736 } elsif($item_type eq 'text') {
737 $self->whine(
738 $para->[1]{'start_line'},
739 "Expected '=item *'"
740 );
741 # But doesn't need processing. But it'll block assimilation
742 # of the next para.
743 } else {
744 die "Unhandled item type $item_type"; # should never happen
745 }
746
747 if(@$para == 2) {
748 # For the cases where we /didn't/ push to @$para
749 if($paras->[0][0] eq '~Para') {
750 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
751 push @$para, splice @{shift @$paras},2;
752 } else {
753 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
754 push @$para, ''; # Just so it's not contentless
755 }
756 }
757
758 } else {
759 die "Unhandled =over type \"$over_type\"?";
760 # Shouldn't happen!
761 }
762
763 $para_type = 'Plain';
764 $para->[0] .= '-' . $over_type;
765 # Whew. Now fall thru and process it.
766
767
768 } elsif($para_type eq '=extend') {
769 # Well, might as well implement it here.
770 $self->_ponder_extend($para);
771 next; # and skip
772 } elsif($para_type eq '=encoding') {
773 # Not actually acted on here, but we catch errors here.
774 $self->_handle_encoding_second_level($para);
775
776 next; # and skip
777 } elsif($para_type eq '~Verbatim') {
778 $para->[0] = 'Verbatim';
779 $para_type = '?Verbatim';
780 } elsif($para_type eq '~Para') {
781 $para->[0] = 'Para';
782 $para_type = '?Plain';
783 } elsif($para_type eq 'Data') {
784 $para->[0] = 'Data';
785 $para_type = '?Data';
786 } elsif( $para_type =~ s/^=//s
787 and defined( $para_type = $self->{'accept_directives'}{$para_type} )
788 ) {
789 DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n";
790 } else {
791 # An unknown directive!
792 DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n",
793 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
794 ;
795 $self->whine(
796 $para->[1]{'start_line'},
797 "Unknown directive: $para->[0]"
798 );
799
800 # And maybe treat it as text instead of just letting it go?
801 next;
802 }
803
804 if($para_type =~ s/^\?//s) {
805 if(! @$curr_open) { # usual case
806 DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n";
807 } else {
808 my @fors = grep $_->[0] eq '=for', @$curr_open;
809 DEBUG > 1 and print "Containing fors: ",
810 join(',', map $_->[1]{'target'}, @fors), "\n";
811
812 if(! @fors) {
813 DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n";
814
815 #} elsif(grep $_->[1]{'~resolve'}, @fors) {
816 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
817 } elsif( $fors[-1][1]{'~resolve'} ) {
818 # Look to the immediately containing for
819
820 if($para_type eq 'Data') {
821 DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
822 $para->[0] = 'Para';
823 $para_type = 'Plain';
824 } else {
825 DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
826 }
827 } else {
828 DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
829 $para->[0] = $para_type = 'Data';
830 }
831 }
832 }
833
834 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
835 if($para_type eq 'Plain') {
836 $self->_ponder_Plain($para);
837 } elsif($para_type eq 'Verbatim') {
838 $self->_ponder_Verbatim($para);
839 } elsif($para_type eq 'Data') {
840 $self->_ponder_Data($para);
841 } else {
842 die "\$para type is $para_type -- how did that happen?";
843 # Shouldn't happen.
844 }
845
846 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
847 $para->[0] =~ s/^[~=]//s;
848
849 DEBUG and print "\n", pretty($para), "\n";
850
851 # traverse the treelet (which might well be just one string scalar)
852 $self->{'content_seen'} ||= 1;
853 $self->_traverse_treelet_bit(@$para);
854 }
855 }
856
857 return;
858}
859
860###########################################################################
861# The sub-ponderers...
862
- -
865sub _ponder_for {
866 my ($self,$para,$curr_open,$paras) = @_;
867
868 # Fake it out as a begin/end
869 my $target;
870
871 if(grep $_->[1]{'~ignore'}, @$curr_open) {
872 DEBUG > 1 and print "Ignoring ignorable =for\n";
873 return 1;
874 }
875
876 for(my $i = 2; $i < @$para; ++$i) {
877 if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
878 $target = $1;
879 last;
880 }
881 }
882 unless(defined $target) {
883 $self->whine(
884 $para->[1]{'start_line'},
885 "=for without a target?"
886 );
887 return 1;
888 }
889 DEBUG > 1 and
890 print "Faking out a =for $target as a =begin $target / =end $target\n";
891
892 $para->[0] = 'Data';
893
894 unshift @$paras,
895 ['=begin',
896 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
897 $target,
898 ],
899 $para,
900 ['=end',
901 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
902 $target,
903 ],
904 ;
905
906 return 1;
907}
908
909sub _ponder_begin {
910 my ($self,$para,$curr_open,$paras) = @_;
911 my $content = join ' ', splice @$para, 2;
912 $content =~ s/^\s+//s;
913 $content =~ s/\s+$//s;
914 unless(length($content)) {
915 $self->whine(
916 $para->[1]{'start_line'},
917 "=begin without a target?"
918 );
919 DEBUG and print "Ignoring targetless =begin\n";
920 return 1;
921 }
922
923 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
924 $para->[1]{'title'} = $title if ($title);
925 $para->[1]{'target'} = $target; # without any ':'
926 $content = $target; # strip off the title
927
928 $content =~ s/^:!/!:/s;
929 my $neg; # whether this is a negation-match
930 $neg = 1 if $content =~ s/^!//s;
931 my $to_resolve; # whether to process formatting codes
932 $to_resolve = 1 if $content =~ s/^://s;
933
934 my $dont_ignore; # whether this target matches us
935
936 foreach my $target_name (
937 split(',', $content, -1),
938 $neg ? () : '*'
939 ) {
940 DEBUG > 2 and
941 print " Considering whether =begin $content matches $target_name\n";
942 next unless $self->{'accept_targets'}{$target_name};
943
944 DEBUG > 2 and
945 print " It DOES match the acceptable target $target_name!\n";
946 $to_resolve = 1
947 if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
948 $dont_ignore = 1;
949 $para->[1]{'target_matching'} = $target_name;
950 last; # stop looking at other target names
951 }
952
953 if($neg) {
954 if( $dont_ignore ) {
955 $dont_ignore = '';
956 delete $para->[1]{'target_matching'};
957 DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n";
958 } else {
959 $dont_ignore = 1;
960 $para->[1]{'target_matching'} = '!';
961 DEBUG > 2 and print " But the leading ! means that this IS a match!\n";
962 }
963 }
964
965 $para->[0] = '=for'; # Just what we happen to call these, internally
966 $para->[1]{'~really'} ||= '=begin';
967 $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
968 $para->[1]{'~resolve'} = $to_resolve || 0;
969
970 DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '',
971 "ignore contents of this region\n";
972 DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ",
973 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
974 DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n";
975
976 push @$curr_open, $para;
977 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
978 DEBUG > 1 and print "Ignoring ignorable =begin\n";
979 } else {
980 $self->{'content_seen'} ||= 1;
981 $self->_handle_element_start((my $scratch='for'), $para->[1]);
982 }
983
984 return 1;
985}
986
987sub _ponder_end {
988 my ($self,$para,$curr_open,$paras) = @_;
989 my $content = join ' ', splice @$para, 2;
990 $content =~ s/^\s+//s;
991 $content =~ s/\s+$//s;
992 DEBUG and print "Ogling '=end $content' directive\n";
993
994 unless(length($content)) {
995 $self->whine(
996 $para->[1]{'start_line'},
997 "'=end' without a target?" . (
998 ( @$curr_open and $curr_open->[-1][0] eq '=for' )
999 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1000 : ''
1001 )
1002 );
1003 DEBUG and print "Ignoring targetless =end\n";
1004 return 1;
1005 }
1006
1007 unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1008 $self->whine(
1009 $para->[1]{'start_line'},
1010 "'=end $content' is invalid. (Stack: "
1011 . $self->_dump_curr_open() . ')'
1012 );
1013 DEBUG and print "Ignoring mistargetted =end $content\n";
1014 return 1;
1015 }
1016
1017 unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1018 $self->whine(
1019 $para->[1]{'start_line'},
1020 "=end $content without matching =begin. (Stack: "
1021 . $self->_dump_curr_open() . ')'
1022 );
1023 DEBUG and print "Ignoring mistargetted =end $content\n";
1024 return 1;
1025 }
1026
1027 unless($content eq $curr_open->[-1][1]{'target'}) {
1028 $self->whine(
1029 $para->[1]{'start_line'},
1030 "=end $content doesn't match =begin "
1031 . $curr_open->[-1][1]{'target'}
1032 . ". (Stack: "
1033 . $self->_dump_curr_open() . ')'
1034 );
1035 DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1036 return 1;
1037 }
1038
1039 # Else it's okay to close...
1040 if(grep $_->[1]{'~ignore'}, @$curr_open) {
1041 DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n";
1042 # And that may be because of this to-be-closed =for region, or some
1043 # other one, but it doesn't matter.
1044 } else {
1045 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1046 # what's that for?
1047
1048 $self->{'content_seen'} ||= 1;
1049 $self->_handle_element_end( my $scratch = 'for', $para->[1]);
1050 }
1051 DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1052 pop @$curr_open;
1053
1054 return 1;
1055}
1056
1057sub _ponder_doc_end {
1058 my ($self,$para,$curr_open,$paras) = @_;
1059 if(@$curr_open) { # Deal with things left open
1060 DEBUG and print "Stack is nonempty at end-document: (",
1061 $self->_dump_curr_open(), ")\n";
1062
1063 DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n";
1064 unshift @$paras, $self->_closers_for_all_curr_open;
1065 # Make sure there is exactly one ~end in the parastack, at the end:
1066 @$paras = grep $_->[0] ne '~end', @$paras;
1067 push @$paras, $para, $para;
1068 # We need two -- once for the next cycle where we
1069 # generate errata, and then another to be at the end
1070 # when that loop back around to process the errata.
1071 return 1;
1072
1073 } else {
1074 DEBUG and print "Okay, stack is empty now.\n";
1075 }
1076
1077 # Try generating errata section, if applicable
1078 unless($self->{'~tried_gen_errata'}) {
1079 $self->{'~tried_gen_errata'} = 1;
1080 my @extras = $self->_gen_errata();
1081 if(@extras) {
1082 unshift @$paras, @extras;
1083 DEBUG and print "Generated errata... relooping...\n";
1084 return 1; # I.e., loop around again to process these fake-o paragraphs
1085 }
1086 }
1087
1088 splice @$paras; # Well, that's that for this paragraph buffer.
1089 DEBUG and print "Throwing end-document event.\n";
1090
1091 $self->_handle_element_end( my $scratch = 'Document' );
1092 return 1; # Hasta la byebye
1093}
1094
1095sub _ponder_pod {
1096 my ($self,$para,$curr_open,$paras) = @_;
1097 $self->whine(
1098 $para->[1]{'start_line'},
1099 "=pod directives shouldn't be over one line long! Ignoring all "
1100 . (@$para - 2) . " lines of content"
1101 ) if @$para > 3;
1102
1103 # Content ignored unless 'pod_handler' is set
1104 if (my $pod_handler = $self->{'pod_handler'}) {
1105 my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1106 $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1107 $pod_handler->($line, $line_num, $self);
1108 }
1109
1110 # The surrounding methods set content_seen, so let us remain consistent.
1111 # I do not know why it was not here before -- should it not be here?
1112 # $self->{'content_seen'} ||= 1;
1113
1114 return;
1115}
1116
1117sub _ponder_over {
1118 my ($self,$para,$curr_open,$paras) = @_;
1119 return 1 unless @$paras;
1120 my $list_type;
1121
1122 if($paras->[0][0] eq '=item') { # most common case
1123 $list_type = $self->_get_initial_item_type($paras->[0]);
1124
1125 } elsif($paras->[0][0] eq '=back') {
1126 # Ignore empty lists by default
1127 if ($self->{'parse_empty_lists'}) {
1128 $list_type = 'empty';
1129 } else {
1130 shift @$paras;
1131 return 1;
1132 }
1133 } elsif($paras->[0][0] eq '~end') {
1134 $self->whine(
1135 $para->[1]{'start_line'},
1136 "=over is the last thing in the document?!"
1137 );
1138 return 1; # But feh, ignore it.
1139 } else {
1140 $list_type = 'block';
1141 }
1142 $para->[1]{'~type'} = $list_type;
1143 push @$curr_open, $para;
1144 # yes, we reuse the paragraph as a stack item
1145
1146 my $content = join ' ', splice @$para, 2;
1147 my $overness;
1148 if($content =~ m/^\s*$/s) {
1149 $para->[1]{'indent'} = 4;
1150 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
1151221.6ms2120µs
# spent 99µs (78+21) within Pod::Simple::BlackBox::BEGIN@1151 which was called: # once (78µs+21µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1151
no integer;
# spent 99µs making 1 call to Pod::Simple::BlackBox::BEGIN@1151 # spent 21µs making 1 call to integer::unimport
1152 $para->[1]{'indent'} = $1;
1153 if($1 == 0) {
1154 $self->whine(
1155 $para->[1]{'start_line'},
1156 "Can't have a 0 in =over $content"
1157 );
1158 $para->[1]{'indent'} = 4;
1159 }
1160 } else {
1161 $self->whine(
1162 $para->[1]{'start_line'},
1163 "=over should be: '=over' or '=over positive_number'"
1164 );
1165 $para->[1]{'indent'} = 4;
1166 }
1167 DEBUG > 1 and print "=over found of type $list_type\n";
1168
1169 $self->{'content_seen'} ||= 1;
1170 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1171
1172 return;
1173}
1174
1175sub _ponder_back {
1176 my ($self,$para,$curr_open,$paras) = @_;
1177 # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1178
1179 my $content = join ' ', splice @$para, 2;
1180 if($content =~ m/\S/) {
1181 $self->whine(
1182 $para->[1]{'start_line'},
1183 "=back doesn't take any parameters, but you said =back $content"
1184 );
1185 }
1186
1187 if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1188 DEBUG > 1 and print "=back happily closes matching =over\n";
1189 # Expected case: we're closing the most recently opened thing
1190 #my $over = pop @$curr_open;
1191 $self->{'content_seen'} ||= 1;
1192 $self->_handle_element_end( my $scratch =
1193 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1194 );
1195 } else {
1196 DEBUG > 1 and print "=back found without a matching =over. Stack: (",
1197 join(', ', map $_->[0], @$curr_open), ").\n";
1198 $self->whine(
1199 $para->[1]{'start_line'},
1200 '=back without =over'
1201 );
1202 return 1; # and ignore it
1203 }
1204}
1205
1206sub _ponder_item {
1207 my ($self,$para,$curr_open,$paras) = @_;
1208 my $over;
1209 unless(@$curr_open and
1210 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1211 $self->whine(
1212 $para->[1]{'start_line'},
1213 "'=item' outside of any '=over'"
1214 );
1215 unshift @$paras,
1216 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1217 $para
1218 ;
1219 return 1;
1220 }
1221
1222
1223 my $over_type = $over->[1]{'~type'};
1224
1225 if(!$over_type) {
1226 # Shouldn't happen1
1227 die "Typeless over in stack, starting at line "
1228 . $over->[1]{'start_line'};
1229
1230 } elsif($over_type eq 'block') {
1231 unless($curr_open->[-1][1]{'~bitched_about'}) {
1232 $curr_open->[-1][1]{'~bitched_about'} = 1;
1233 $self->whine(
1234 $curr_open->[-1][1]{'start_line'},
1235 "You can't have =items (as at line "
1236 . $para->[1]{'start_line'}
1237 . ") unless the first thing after the =over is an =item"
1238 );
1239 }
1240 # Just turn it into a paragraph and reconsider it
1241 $para->[0] = '~Para';
1242 unshift @$paras, $para;
1243 return 1;
1244
1245 } elsif($over_type eq 'text') {
1246 my $item_type = $self->_get_item_type($para);
1247 # That kills the content of the item if it's a number or bullet.
1248 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1249
1250 if($item_type eq 'text') {
1251 # Nothing special needs doing for 'text'
1252 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1253 die "Unknown item type $item_type"
1254 unless $item_type eq 'number' or $item_type eq 'bullet';
1255 # Undo our clobbering:
1256 push @$para, $para->[1]{'~orig_content'};
1257 delete $para->[1]{'number'};
1258 # Only a PROPER item-number element is allowed
1259 # to have a number attribute.
1260 } else {
1261 die "Unhandled item type $item_type"; # should never happen
1262 }
1263
1264 # =item-text thingies don't need any assimilation, it seems.
1265
1266 } elsif($over_type eq 'number') {
1267 my $item_type = $self->_get_item_type($para);
1268 # That kills the content of the item if it's a number or bullet.
1269 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1270
1271 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1272
1273 if($item_type eq 'bullet') {
1274 # Hm, it's not numeric. Correct for this.
1275 $para->[1]{'number'} = $expected_value;
1276 $self->whine(
1277 $para->[1]{'start_line'},
1278 "Expected '=item $expected_value'"
1279 );
1280 push @$para, $para->[1]{'~orig_content'};
1281 # restore the bullet, blocking the assimilation of next para
1282
1283 } elsif($item_type eq 'text') {
1284 # Hm, it's not numeric. Correct for this.
1285 $para->[1]{'number'} = $expected_value;
1286 $self->whine(
1287 $para->[1]{'start_line'},
1288 "Expected '=item $expected_value'"
1289 );
1290 # Text content will still be there and will block next ~Para
1291
1292 } elsif($item_type ne 'number') {
1293 die "Unknown item type $item_type"; # should never happen
1294
1295 } elsif($expected_value == $para->[1]{'number'}) {
1296 DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n";
1297
1298 } else {
1299 DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'},
1300 " instead of the expected value of $expected_value\n";
1301 $self->whine(
1302 $para->[1]{'start_line'},
1303 "You have '=item " . $para->[1]{'number'} .
1304 "' instead of the expected '=item $expected_value'"
1305 );
1306 $para->[1]{'number'} = $expected_value; # correcting!!
1307 }
1308
1309 if(@$para == 2) {
1310 # For the cases where we /didn't/ push to @$para
1311 if($paras->[0][0] eq '~Para') {
1312 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1313 push @$para, splice @{shift @$paras},2;
1314 } else {
1315 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1316 push @$para, ''; # Just so it's not contentless
1317 }
1318 }
1319
1320
1321 } elsif($over_type eq 'bullet') {
1322 my $item_type = $self->_get_item_type($para);
1323 # That kills the content of the item if it's a number or bullet.
1324 DEBUG and print " Item is of type ", $para->[0], " under $over_type\n";
1325
1326 if($item_type eq 'bullet') {
1327 # as expected!
1328
1329 if( $para->[1]{'~_freaky_para_hack'} ) {
1330 DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n";
1331 push @$para, delete $para->[1]{'~_freaky_para_hack'};
1332 }
1333
1334 } elsif($item_type eq 'number') {
1335 $self->whine(
1336 $para->[1]{'start_line'},
1337 "Expected '=item *'"
1338 );
1339 push @$para, $para->[1]{'~orig_content'};
1340 # and block assimilation of the next paragraph
1341 delete $para->[1]{'number'};
1342 # Only a PROPER item-number element is allowed
1343 # to have a number attribute.
1344 } elsif($item_type eq 'text') {
1345 $self->whine(
1346 $para->[1]{'start_line'},
1347 "Expected '=item *'"
1348 );
1349 # But doesn't need processing. But it'll block assimilation
1350 # of the next para.
1351 } else {
1352 die "Unhandled item type $item_type"; # should never happen
1353 }
1354
1355 if(@$para == 2) {
1356 # For the cases where we /didn't/ push to @$para
1357 if($paras->[0][0] eq '~Para') {
1358 DEBUG and print "Assimilating following ~Para content into $over_type item\n";
1359 push @$para, splice @{shift @$paras},2;
1360 } else {
1361 DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n";
1362 push @$para, ''; # Just so it's not contentless
1363 }
1364 }
1365
1366 } else {
1367 die "Unhandled =over type \"$over_type\"?";
1368 # Shouldn't happen!
1369 }
1370 $para->[0] .= '-' . $over_type;
1371
1372 return;
1373}
1374
1375sub _ponder_Plain {
1376 my ($self,$para) = @_;
1377 DEBUG and print " giving plain treatment...\n";
1378 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1379 or $para->[1]{'~cooked'}
1380 ) {
1381 push @$para,
1382 @{$self->_make_treelet(
1383 join("\n", splice(@$para, 2)),
1384 $para->[1]{'start_line'}
1385 )};
1386 }
1387 # Empty paragraphs don't need a treelet for any reason I can see.
1388 # And precooked paragraphs already have a treelet.
1389 return;
1390}
1391
1392sub _ponder_Verbatim {
1393 my ($self,$para) = @_;
1394 DEBUG and print " giving verbatim treatment...\n";
1395
1396 $para->[1]{'xml:space'} = 'preserve';
1397
1398 my $indent = $self->strip_verbatim_indent;
1399 if ($indent && ref $indent eq 'CODE') {
1400 my @shifted = (shift @{$para}, shift @{$para});
1401 $indent = $indent->($para);
1402 unshift @{$para}, @shifted;
1403 }
1404
1405 for(my $i = 2; $i < @$para; $i++) {
1406 foreach my $line ($para->[$i]) { # just for aliasing
1407 # Strip indentation.
1408 $line =~ s/^\Q$indent// if $indent
1409 && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1410 while( $line =~
1411 # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1412 # tabs are at every EIGHTH column. For portability, it has to be
1413 # one setting everywhere, and 8th wins.
1414 s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1415 ) {}
1416
1417 # TODO: whinge about (or otherwise treat) unindented or overlong lines
1418
1419 }
1420 }
1421
1422 # Now the VerbatimFormatted hoodoo...
1423 if( $self->{'accept_codes'} and
1424 $self->{'accept_codes'}{'VerbatimFormatted'}
1425 ) {
1426 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1427 # Kill any number of terminal newlines
1428 $self->_verbatim_format($para);
1429 } elsif ($self->{'codes_in_verbatim'}) {
1430 push @$para,
1431 @{$self->_make_treelet(
1432 join("\n", splice(@$para, 2)),
1433 $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1434 )};
1435 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1436 } else {
1437 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1438 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1439 }
1440 return;
1441}
1442
1443sub _ponder_Data {
1444 my ($self,$para) = @_;
1445 DEBUG and print " giving data treatment...\n";
1446 $para->[1]{'xml:space'} = 'preserve';
1447 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1448 return;
1449}
1450
- -
1454###########################################################################
1455
1456sub _traverse_treelet_bit { # for use only by the routine above
1457 my($self, $name) = splice @_,0,2;
1458
1459 my $scratch;
1460 $self->_handle_element_start(($scratch=$name), shift @_);
1461
1462 foreach my $x (@_) {
1463 if(ref($x)) {
1464 &_traverse_treelet_bit($self, @$x);
1465 } else {
1466 $self->_handle_text($x);
1467 }
1468 }
1469
1470 $self->_handle_element_end($scratch=$name);
1471 return;
1472}
1473
1474#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1475
1476sub _closers_for_all_curr_open {
1477 my $self = $_[0];
1478 my @closers;
1479 foreach my $still_open (@{ $self->{'curr_open'} || return }) {
1480 my @copy = @$still_open;
1481 $copy[1] = {%{ $copy[1] }};
1482 #$copy[1]{'start_line'} = -1;
1483 if($copy[0] eq '=for') {
1484 $copy[0] = '=end';
1485 } elsif($copy[0] eq '=over') {
1486 $copy[0] = '=back';
1487 } else {
1488 die "I don't know how to auto-close an open $copy[0] region";
1489 }
1490
1491 unless( @copy > 2 ) {
1492 push @copy, $copy[1]{'target'};
1493 $copy[-1] = '' unless defined $copy[-1];
1494 # since =over's don't have targets
1495 }
1496
1497 $copy[1]{'fake-closer'} = 1;
1498
1499 DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n";
1500 unshift @closers, \@copy;
1501 }
1502 return @closers;
1503}
1504
1505#--------------------------------------------------------------------------
1506
1507sub _verbatim_format {
1508 my($it, $p) = @_;
1509
1510 my $formatting;
1511
1512 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1513 DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n";
1514 $p->[$i] .= "\n";
1515 # Unlike with simple Verbatim blocks, we don't end up just doing
1516 # a join("\n", ...) on the contents, so we have to append a
1517 # newline to ever line, and then nix the last one later.
1518 }
1519
1520 if( DEBUG > 4 ) {
1521 print "<<\n";
1522 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1523 print "_verbatim_format $i: $p->[$i]";
1524 }
1525 print ">>\n";
1526 }
1527
1528 for(my $i = $#$p; $i > 2; $i--) {
1529 # work backwards over the lines, except the first (#2)
1530
1531 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1532 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1533 # look at a formatty line preceding a nonformatty one
1534 DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n";
1535 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1536 DEBUG > 5 and print " It's a formatty line. ",
1537 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1538
1539 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1540 DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n";
1541 next;
1542 } else {
1543 DEBUG > 5 and print " Previous line is non-formatty! Yay!\n";
1544 }
1545 } else {
1546 DEBUG > 5 and print " It's not a formatty line. Ignoring\n";
1547 next;
1548 }
1549
1550 # A formatty line has to have #: in the first two columns, and uses
1551 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1552 # Example:
1553 # What do you want? i like pie. [or whatever]
1554 # #:^^^^^^^^^^^^^^^^^ /////////////
1555
1556
1557 DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1558
1559 $formatting = ' ' . $1;
1560 $formatting =~ s/\s+$//s; # nix trailing whitespace
1561 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1562 splice @$p,$i,1; # remove this line
1563 $i--; # don't consider next line
1564 next;
1565 }
1566
1567 if( length($formatting) >= length($p->[$i-1]) ) {
1568 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1569 } else {
1570 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1571 }
1572 # Make $formatting and the previous line be exactly the same length,
1573 # with $formatting having a " " as the last character.
1574
1575 DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1576
1577
1578 my @new_line;
1579 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1580 #print "Format matches $1\n";
1581
1582 if($2) {
1583 #print "SKIPPING <$2>\n";
1584 push @new_line,
1585 substr($p->[$i-1], pos($formatting)-length($1), length($1));
1586 } else {
1587 #print "SNARING $+\n";
1588 push @new_line, [
1589 (
1590 $3 ? 'VerbatimB' :
1591 $4 ? 'VerbatimI' :
1592 $5 ? 'VerbatimBI' : die("Should never get called")
1593 ), {},
1594 substr($p->[$i-1], pos($formatting)-length($1), length($1))
1595 ];
1596 #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1597 }
1598 }
1599 my @nixed =
1600 splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1601 DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n";
1602
1603 DEBUG > 6 and print "New version of the above line is these tokens (",
1604 scalar(@new_line), "):",
1605 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1606 $i--; # So the next line we scrutinize is the line before the one
1607 # that we just went and formatted
1608 }
1609
1610 $p->[0] = 'VerbatimFormatted';
1611
1612 # Collapse adjacent text nodes, just for kicks.
1613 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1614 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1615 DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1616 $p->[$i] .= splice @$p, $i+1, 1; # merge
1617 --$i; # and back up
1618 }
1619 }
1620
1621 # Now look for the last text token, and remove the terminal newline
1622 for( my $i = $#$p; $i >= 2; $i-- ) {
1623 # work backwards over the tokens, even the first
1624 if( !ref($p->[$i]) ) {
1625 if($p->[$i] =~ s/\n$//s) {
1626 DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1627 } else {
1628 DEBUG > 5 and print
1629 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1630 }
1631 last; # we only want the next one
1632 }
1633 }
1634
1635 return;
1636}
1637
1638
1639#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1640
1641
1642sub _treelet_from_formatting_codes {
1643 # Given a paragraph, returns a treelet. Full of scary tokenizing code.
1644 # Like [ '~Top', {'start_line' => $start_line},
1645 # "I like ",
1646 # [ 'B', {}, "pie" ],
1647 # "!"
1648 # ]
1649
1650 my($self, $para, $start_line, $preserve_space) = @_;
1651
1652 my $treelet = ['~Top', {'start_line' => $start_line},];
1653
1654 unless ($preserve_space || $self->{'preserve_whitespace'}) {
1655 $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1656 $para =~ s/ $//;
1657 $para =~ s/^ //;
1658 }
1659
1660 # Only apparent problem the above code is that N<< >> turns into
1661 # N<< >>. But then, word wrapping does that too! So don't do that!
1662
1663 my @stack;
1664 my @lineage = ($treelet);
1665 my $raw = ''; # raw content of L<> fcode before splitting/processing
1666 # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
1667 # into just 1 ' '. Is this the regex's doing or 'raw's?
1668 my $inL = 0;
1669
1670 DEBUG > 4 and print "Paragraph:\n$para\n\n";
1671
1672 # Here begins our frightening tokenizer RE. The following regex matches
1673 # text in four main parts:
1674 #
1675 # * Start-codes. The first alternative matches C< or C<<, the latter
1676 # followed by some whitespace. $1 will hold the entire start code
1677 # (including any space following a multiple-angle-bracket delimiter),
1678 # and $2 will hold only the additional brackets past the first in a
1679 # multiple-bracket delimiter. length($2) + 1 will be the number of
1680 # closing brackets we have to find.
1681 #
1682 # * Closing brackets. Match some amount of whitespace followed by
1683 # multiple close brackets. The logic to see if this closes anything
1684 # is down below. Note that in order to parse C<< >> correctly, we
1685 # have to use look-behind (?<=\s\s), since the match of the starting
1686 # code will have consumed the whitespace.
1687 #
1688 # * A single closing bracket, to close a simple code like C<>.
1689 #
1690 # * Something that isn't a start or end code. We have to be careful
1691 # about accepting whitespace, since perlpodspec says that any whitespace
1692 # before a multiple-bracket closing delimiter should be ignored.
1693 #
1694 while($para =~
1695 m/\G
1696 (?:
1697 # Match starting codes, including the whitespace following a
1698 # multiple-delimiter start code. $1 gets the whole start code and
1699 # $2 gets all but one of the <s in the multiple-bracket case.
1700 ([A-Z]<(?:(<+)\s+)?)
1701 |
1702 # Match multiple-bracket end codes. $3 gets the whitespace that
1703 # should be discarded before an end bracket but kept in other cases
1704 # and $4 gets the end brackets themselves.
1705 (\s+|(?<=\s\s))(>{2,})
1706 |
1707 (\s?>) # $5: simple end-codes
1708 |
1709 ( # $6: stuff containing no start-codes or end-codes
1710 (?:
1711 [^A-Z\s>]
1712 |
1713 (?:
1714 [A-Z](?!<)
1715 )
1716 |
1717 # whitespace is ok, but we don't want to eat the whitespace before
1718 # a multiple-bracket end code.
1719 # NOTE: we may still have problems with e.g. S<< >>
1720 (?:
1721 \s(?!\s*>{2,})
1722 )
1723 )+
1724 )
1725 )
1726 /xgo
1727 ) {
1728 DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n";
1729 if(defined $1) {
1730 if(defined $2) {
1731 DEBUG > 3 and print "Found complex start-text code \"$1\"\n";
1732 push @stack, length($2) + 1;
1733 # length of the necessary complex end-code string
1734 } else {
1735 DEBUG > 3 and print "Found simple start-text code \"$1\"\n";
1736 push @stack, 0; # signal that we're looking for simple
1737 }
1738 push @lineage, [ substr($1,0,1), {}, ]; # new node object
1739 push @{ $lineage[-2] }, $lineage[-1];
1740 if ('L' eq substr($1,0,1)) {
1741 $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
1742 $inL = 1;
1743 } else {
1744 $raw .= $1 if $inL;
1745 }
1746
1747 } elsif(defined $4) {
1748 DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n";
1749 # This is where it gets messy...
1750 if(! @stack) {
1751 # We saw " >>>>" but needed nothing. This is ALL just stuff then.
1752 DEBUG > 4 and print " But it's really just stuff.\n";
1753 push @{ $lineage[-1] }, $3, $4;
1754 next;
1755 } elsif(!$stack[-1]) {
1756 # We saw " >>>>" but needed only ">". Back pos up.
1757 DEBUG > 4 and print " And that's more than we needed to close simple.\n";
1758 push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1759 pos($para) = pos($para) - length($4) + 1;
1760 } elsif($stack[-1] == length($4)) {
1761 # We found " >>>>", and it was exactly what we needed. Commonest case.
1762 DEBUG > 4 and print " And that's exactly what we needed to close complex.\n";
1763 } elsif($stack[-1] < length($4)) {
1764 # We saw " >>>>" but needed only " >>". Back pos up.
1765 DEBUG > 4 and print " And that's more than we needed to close complex.\n";
1766 pos($para) = pos($para) - length($4) + $stack[-1];
1767 } else {
1768 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1769 DEBUG > 4 and print " But it's really just stuff, because we needed more.\n";
1770 push @{ $lineage[-1] }, $3, $4;
1771 next;
1772 }
1773 #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1774
1775 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1776 # Keep the element from being childless
1777
1778 pop @stack;
1779 pop @lineage;
1780
1781 unless (@stack) { # not in an L if there are no open fcodes
1782 $inL = 0;
1783 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1784 $lineage[-1][-1][1]{'raw'} = $raw
1785 }
1786 }
1787 $raw .= $3.$4 if $inL;
1788
1789 } elsif(defined $5) {
1790 DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n";
1791
1792 if(@stack and ! $stack[-1]) {
1793 # We're indeed expecting a simple end-code
1794 DEBUG > 4 and print " It's indeed an end-code.\n";
1795
1796 if(length($5) == 2) { # There was a space there: " >"
1797 push @{ $lineage[-1] }, ' ';
1798 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1799 push @{ $lineage[-1] }, ''; # keep it from being really childless
1800 }
1801
1802 pop @stack;
1803 pop @lineage;
1804 } else {
1805 DEBUG > 4 and print " It's just stuff.\n";
1806 push @{ $lineage[-1] }, $5;
1807 }
1808
1809 unless (@stack) { # not in an L if there are no open fcodes
1810 $inL = 0;
1811 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1812 $lineage[-1][-1][1]{'raw'} = $raw
1813 }
1814 }
1815 $raw .= $5 if $inL;
1816
1817 } elsif(defined $6) {
1818 DEBUG > 3 and print "Found stuff \"$6\"\n";
1819 push @{ $lineage[-1] }, $6;
1820 $raw .= $6 if $inL;
1821 # XXX does not capture multiplace whitespaces -- 'raw' ends up with
1822 # at most 1 leading/trailing whitespace, why not all of it?
1823
1824 } else {
1825 # should never ever ever ever happen
1826 DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n";
1827 die "SPORK 512512!";
1828 }
1829 }
1830
1831 if(@stack) { # Uhoh, some sequences weren't closed.
1832 my $x= "...";
1833 while(@stack) {
1834 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1835 # Hmmmmm!
1836
1837 my $code = (pop @lineage)->[0];
1838 my $ender_length = pop @stack;
1839 if($ender_length) {
1840 --$ender_length;
1841 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1842 } else {
1843 $x = $code . "<$x>";
1844 }
1845 }
1846 DEBUG > 1 and print "Unterminated $x sequence\n";
1847 $self->whine($start_line,
1848 "Unterminated $x sequence",
1849 );
1850 }
1851
1852 return $treelet;
1853}
1854
1855#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1856
1857sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1858 return stringify_lol($_[1]);
1859}
1860
1861sub stringify_lol { # function: stringify_lol($lol)
1862 my $string_form = '';
1863 _stringify_lol( $_[0] => \$string_form );
1864 return $string_form;
1865}
1866
1867sub _stringify_lol { # the real recursor
1868 my($lol, $to) = @_;
1869 for(my $i = 2; $i < @$lol; ++$i) {
1870 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
1871 _stringify_lol( $lol->[$i], $to); # recurse!
1872 } else {
1873 $$to .= $lol->[$i];
1874 }
1875 }
1876 return;
1877}
1878
1879#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1880
1881sub _dump_curr_open { # return a string representation of the stack
1882 my $curr_open = $_[0]{'curr_open'};
1883
1884 return '[empty]' unless @$curr_open;
1885 return join '; ',
1886 map {;
1887 ($_->[0] eq '=for')
1888 ? ( ($_->[1]{'~really'} || '=over')
1889 . ' ' . $_->[1]{'target'})
1890 : $_->[0]
1891 }
1892 @$curr_open
1893 ;
1894}
1895
1896###########################################################################
1897134µsmy %pretty_form = (
1898 "\a" => '\a', # ding!
1899 "\b" => '\b', # BS
1900 "\e" => '\e', # ESC
1901 "\f" => '\f', # FF
1902 "\t" => '\t', # tab
1903 "\cm" => '\cm',
1904 "\cj" => '\cj',
1905 "\n" => '\n', # probably overrides one of either \cm or \cj
1906 '"' => '\"',
1907 '\\' => '\\\\',
1908 '$' => '\\$',
1909 '@' => '\\@',
1910 '%' => '\\%',
1911 '#' => '\\#',
1912);
1913
1914sub pretty { # adopted from Class::Classless
1915 # Not the most brilliant routine, but passable.
1916 # Don't give it a cyclic data structure!
1917 my @stuff = @_; # copy
1918 my $x;
1919 my $out =
1920 # join ",\n" .
1921 join ", ",
1922 map {;
1923 if(!defined($_)) {
1924 "undef";
1925 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
1926 $x = "[ " . pretty(@$_) . " ]" ;
1927 $x;
1928 } elsif(ref($_) eq 'SCALAR') {
1929 $x = "\\" . pretty($$_) ;
1930 $x;
1931 } elsif(ref($_) eq 'HASH') {
1932 my $hr = $_;
1933 $x = "{" . join(", ",
1934 map(pretty($_) . '=>' . pretty($hr->{$_}),
1935 sort keys %$hr ) ) . "}" ;
1936 $x;
1937 } elsif(!length($_)) { q{''} # empty string
1938 } elsif(
1939 $_ eq '0' # very common case
1940 or(
1941 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
1942 and $_ ne '-0' # the strange case that that RE lets thru
1943 )
1944 ) { $_;
1945 } else {
1946 if( chr(65) eq 'A' ) {
1947 s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
1948 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1949 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1950 } else {
1951 # We're in some crazy non-ASCII world!
1952 s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])>
1953 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
1954 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
1955 }
1956 qq{"$_"};
1957 }
1958 } @stuff;
1959 # $out =~ s/\n */ /g if length($out) < 75;
1960 return $out;
1961}
1962
1963#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1964
1965# A rather unsubtle method of blowing away all the state information
1966# from a parser object so it can be reused. Provided as a utility for
1967# backward compatibility in Pod::Man, etc. but not recommended for
1968# general use.
1969
1970sub reinit {
1971 my $self = shift;
1972 foreach (qw(source_dead source_filename doc_has_started
1973start_of_pod_block content_seen last_was_blank paras curr_open
1974line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen
1975Title)) {
1976
1977 delete $self->{$_};
1978 }
1979}
1980
1981#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1982128µs1;
1983