Filename | /usr/lib64/perl5/5.16.0/Pod/Simple/BlackBox.pm |
Statements | Executed 17 statements in 50.8ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 86µs | 103µs | BEGIN@22 | Pod::Simple::BlackBox::
1 | 1 | 1 | 78µs | 99µs | BEGIN@1151 | Pod::Simple::BlackBox::
1 | 1 | 1 | 51µs | 212µs | BEGIN@25 | Pod::Simple::BlackBox::
1 | 1 | 1 | 51µs | 137µs | BEGIN@23 | Pod::Simple::BlackBox::
1 | 1 | 1 | 34µs | 34µs | BEGIN@28 | Pod::Simple::BlackBox::
1 | 1 | 1 | 24µs | 24µs | BEGIN@24 | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _closers_for_all_curr_open | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _dump_curr_open | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _gen_errata | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _handle_encoding_line | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _handle_encoding_second_level | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Data | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Plain | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_Verbatim | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_back | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_begin | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_doc_end | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_end | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_for | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_item | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_over | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_paragraph_buffer | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _ponder_pod | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _stringify_lol | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _traverse_treelet_bit | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _treelet_from_formatting_codes | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | _verbatim_format | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | parse_line | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | parse_lines | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | pretty | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | reinit | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | stringify_lol | Pod::Simple::BlackBox::
0 | 0 | 0 | 0s | 0s | text_content_of_treelet | Pod::Simple::BlackBox::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | |||||
2 | package Pod::Simple::BlackBox; | ||||
3 | # | ||||
4 | # "What's in the box?" "Pain." | ||||
5 | # | ||||
6 | ########################################################################### | ||||
7 | # | ||||
8 | # This is where all the scary things happen: parsing lines into | ||||
9 | # paragraphs; and then into directives, verbatims, and then also | ||||
10 | # turning formatting sequences into treelets. | ||||
11 | # | ||||
12 | # Are you really sure you want to read this code? | ||||
13 | # | ||||
14 | #----------------------------------------------------------------------------- | ||||
15 | # | ||||
16 | # The basic work of this module Pod::Simple::BlackBox is doing the dirty work | ||||
17 | # of parsing Pod into treelets (generally one per non-verbatim paragraph), and | ||||
18 | # to call the proper callbacks on the treelets. | ||||
19 | # | ||||
20 | # Every node in a treelet is a ['name', {attrhash}, ...children...] | ||||
21 | |||||
22 | 2 | 149µs | 2 | 120µs | # spent 103µs (86+17) within Pod::Simple::BlackBox::BEGIN@22 which was called:
# once (86µs+17µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22 # spent 103µs making 1 call to Pod::Simple::BlackBox::BEGIN@22
# spent 17µs making 1 call to integer::import |
23 | 2 | 141µs | 2 | 223µs | # spent 137µs (51+86) within Pod::Simple::BlackBox::BEGIN@23 which was called:
# once (51µs+86µs) by Pod::Simple::LinkSection::BEGIN@9 at line 23 # spent 137µs making 1 call to Pod::Simple::BlackBox::BEGIN@23
# spent 86µs making 1 call to strict::import |
24 | 2 | 142µs | 1 | 24µs | # spent 24µs within Pod::Simple::BlackBox::BEGIN@24 which was called:
# once (24µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 24 # spent 24µs making 1 call to Pod::Simple::BlackBox::BEGIN@24 |
25 | 2 | 295µs | 2 | 372µs | # spent 212µs (51+160) within Pod::Simple::BlackBox::BEGIN@25 which was called:
# once (51µs+160µs) by Pod::Simple::LinkSection::BEGIN@9 at line 25 # spent 212µs making 1 call to Pod::Simple::BlackBox::BEGIN@25
# spent 160µs making 1 call to vars::import |
26 | 1 | 5µs | $VERSION = '3.20'; | ||
27 | #use constant DEBUG => 7; | ||||
28 | # spent 34µs within Pod::Simple::BlackBox::BEGIN@28 which was called:
# once (34µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 31 | ||||
29 | 1 | 4µs | require Pod::Simple; | ||
30 | 1 | 34µs | *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG | ||
31 | 1 | 28.4ms | 1 | 34µs | } # spent 34µs making 1 call to Pod::Simple::BlackBox::BEGIN@28 |
32 | |||||
33 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
34 | |||||
35 | sub parse_line { shift->parse_lines(@_) } # alias | ||||
36 | |||||
37 | # - - - Turn back now! Run away! - - - | ||||
38 | |||||
39 | sub parse_lines { # Usage: $parser->parse_lines(@lines) | ||||
40 | # an undef means end-of-stream | ||||
41 | my $self = shift; | ||||
42 | |||||
43 | my $code_handler = $self->{'code_handler'}; | ||||
44 | my $cut_handler = $self->{'cut_handler'}; | ||||
45 | my $wl_handler = $self->{'whiteline_handler'}; | ||||
46 | $self->{'line_count'} ||= 0; | ||||
47 | |||||
48 | my $scratch; | ||||
49 | |||||
50 | DEBUG > 4 and | ||||
51 | print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; | ||||
52 | |||||
53 | DEBUG > 5 and | ||||
54 | print "# About to parse lines: ", | ||||
55 | join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; | ||||
56 | |||||
57 | my $paras = ($self->{'paras'} ||= []); | ||||
58 | # paragraph buffer. Because we need to defer processing of =over | ||||
59 | # directives and verbatim paragraphs. We call _ponder_paragraph_buffer | ||||
60 | # to process this. | ||||
61 | |||||
62 | $self->{'pod_para_count'} ||= 0; | ||||
63 | |||||
64 | my $line; | ||||
65 | foreach my $source_line (@_) { | ||||
66 | if( $self->{'source_dead'} ) { | ||||
67 | DEBUG > 4 and print "# Source is dead.\n"; | ||||
68 | last; | ||||
69 | } | ||||
70 | |||||
71 | unless( defined $source_line ) { | ||||
72 | DEBUG > 4 and print "# Undef-line seen.\n"; | ||||
73 | |||||
74 | push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; | ||||
75 | push @$paras, $paras->[-1], $paras->[-1]; | ||||
76 | # So that it definitely fills the buffer. | ||||
77 | $self->{'source_dead'} = 1; | ||||
78 | $self->_ponder_paragraph_buffer; | ||||
79 | next; | ||||
80 | } | ||||
81 | |||||
82 | |||||
83 | if( $self->{'line_count'}++ ) { | ||||
84 | ($line = $source_line) =~ tr/\n\r//d; | ||||
85 | # If we don't have two vars, we'll end up with that there | ||||
86 | # tr/// modding the (potentially read-only) original source line! | ||||
87 | |||||
88 | } else { | ||||
89 | DEBUG > 2 and print "First line: [$source_line]\n"; | ||||
90 | |||||
91 | if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { | ||||
92 | DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; | ||||
93 | $self->_handle_encoding_line( "=encoding utf8" ); | ||||
94 | $line =~ tr/\n\r//d; | ||||
95 | |||||
96 | } elsif( $line =~ s/^\xFE\xFF//s ) { | ||||
97 | DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
98 | $self->scream( | ||||
99 | $self->{'line_count'}, | ||||
100 | "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
101 | ); | ||||
102 | splice @_; | ||||
103 | push @_, undef; | ||||
104 | next; | ||||
105 | |||||
106 | # TODO: implement somehow? | ||||
107 | |||||
108 | } elsif( $line =~ s/^\xFF\xFE//s ) { | ||||
109 | DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
110 | $self->scream( | ||||
111 | $self->{'line_count'}, | ||||
112 | "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
113 | ); | ||||
114 | splice @_; | ||||
115 | push @_, undef; | ||||
116 | next; | ||||
117 | |||||
118 | # TODO: implement somehow? | ||||
119 | |||||
120 | } else { | ||||
121 | DEBUG > 2 and print "First line is BOM-less.\n"; | ||||
122 | ($line = $source_line) =~ tr/\n\r//d; | ||||
123 | } | ||||
124 | } | ||||
125 | |||||
126 | |||||
127 | DEBUG > 5 and print "# Parsing line: [$line]\n"; | ||||
128 | |||||
129 | if(!$self->{'in_pod'}) { | ||||
130 | if($line =~ m/^=([a-zA-Z]+)/s) { | ||||
131 | if($1 eq 'cut') { | ||||
132 | $self->scream( | ||||
133 | $self->{'line_count'}, | ||||
134 | "=cut found outside a pod block. Skipping to next block." | ||||
135 | ); | ||||
136 | |||||
137 | ## Before there were errata sections in the world, it was | ||||
138 | ## least-pessimal to abort processing the file. But now we can | ||||
139 | ## just barrel on thru (but still not start a pod block). | ||||
140 | #splice @_; | ||||
141 | #push @_, undef; | ||||
142 | |||||
143 | next; | ||||
144 | } else { | ||||
145 | $self->{'in_pod'} = $self->{'start_of_pod_block'} | ||||
146 | = $self->{'last_was_blank'} = 1; | ||||
147 | # And fall thru to the pod-mode block further down | ||||
148 | } | ||||
149 | } else { | ||||
150 | DEBUG > 5 and print "# It's a code-line.\n"; | ||||
151 | $code_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
152 | if $code_handler; | ||||
153 | # Note: this may cause code to be processed out of order relative | ||||
154 | # to pods, but in order relative to cuts. | ||||
155 | |||||
156 | # Note also that we haven't yet applied the transcoding to $line | ||||
157 | # by time we call $code_handler! | ||||
158 | |||||
159 | if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { | ||||
160 | # That RE is from perlsyn, section "Plain Old Comments (Not!)", | ||||
161 | #$fname = $2 if defined $2; | ||||
162 | #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; | ||||
163 | DEBUG > 1 and print "# Setting nextline to $1\n"; | ||||
164 | $self->{'line_count'} = $1 - 1; | ||||
165 | } | ||||
166 | |||||
167 | next; | ||||
168 | } | ||||
169 | } | ||||
170 | |||||
171 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||||
172 | # Else we're in pod mode: | ||||
173 | |||||
174 | # Apply any necessary transcoding: | ||||
175 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
176 | |||||
177 | # HERE WE CATCH =encoding EARLY! | ||||
178 | if( $line =~ m/^=encoding\s+\S+\s*$/s ) { | ||||
179 | $line = $self->_handle_encoding_line( $line ); | ||||
180 | } | ||||
181 | |||||
182 | if($line =~ m/^=cut/s) { | ||||
183 | # here ends the pod block, and therefore the previous pod para | ||||
184 | DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; | ||||
185 | $self->{'in_pod'} = 0; | ||||
186 | # ++$self->{'pod_para_count'}; | ||||
187 | $self->_ponder_paragraph_buffer(); | ||||
188 | # by now it's safe to consider the previous paragraph as done. | ||||
189 | $cut_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
190 | if $cut_handler; | ||||
191 | |||||
192 | # TODO: add to docs: Note: this may cause cuts to be processed out | ||||
193 | # of order relative to pods, but in order relative to code. | ||||
194 | |||||
195 | } elsif($line =~ m/^(\s*)$/s) { # it's a blank line | ||||
196 | if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line | ||||
197 | $wl_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
198 | if $wl_handler; | ||||
199 | } | ||||
200 | |||||
201 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
202 | DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; | ||||
203 | push @{$paras->[-1]}, $line; | ||||
204 | } # otherwise it's not interesting | ||||
205 | |||||
206 | if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { | ||||
207 | DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; | ||||
208 | } | ||||
209 | |||||
210 | $self->{'last_was_blank'} = 1; | ||||
211 | |||||
212 | } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... | ||||
213 | |||||
214 | if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { | ||||
215 | # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS | ||||
216 | my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; | ||||
217 | # Note that in "=head1 foo", the WS is lost. | ||||
218 | # Example: ['=head1', {'start_line' => 123}, ' foo'] | ||||
219 | |||||
220 | ++$self->{'pod_para_count'}; | ||||
221 | |||||
222 | $self->_ponder_paragraph_buffer(); | ||||
223 | # by now it's safe to consider the previous paragraph as done. | ||||
224 | |||||
225 | push @$paras, $new; # the new incipient paragraph | ||||
226 | DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; | ||||
227 | |||||
228 | } elsif($line =~ m/^\s/s) { | ||||
229 | |||||
230 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
231 | DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; | ||||
232 | push @{$paras->[-1]}, $line; | ||||
233 | } else { | ||||
234 | ++$self->{'pod_para_count'}; | ||||
235 | $self->_ponder_paragraph_buffer(); | ||||
236 | # by now it's safe to consider the previous paragraph as done. | ||||
237 | DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; | ||||
238 | push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; | ||||
239 | } | ||||
240 | } else { | ||||
241 | ++$self->{'pod_para_count'}; | ||||
242 | $self->_ponder_paragraph_buffer(); | ||||
243 | # by now it's safe to consider the previous paragraph as done. | ||||
244 | push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; | ||||
245 | DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; | ||||
246 | } | ||||
247 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
248 | |||||
249 | } else { | ||||
250 | # It's a non-blank line /continuing/ the current para | ||||
251 | if(@$paras) { | ||||
252 | DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; | ||||
253 | push @{$paras->[-1]}, $line; | ||||
254 | } else { | ||||
255 | # Unexpected case! | ||||
256 | die "Continuing a paragraph but \@\$paras is empty?"; | ||||
257 | } | ||||
258 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
259 | } | ||||
260 | |||||
261 | } # ends the big while loop | ||||
262 | |||||
263 | DEBUG > 1 and print(pretty(@$paras), "\n"); | ||||
264 | return $self; | ||||
265 | } | ||||
266 | |||||
267 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
268 | |||||
269 | sub _handle_encoding_line { | ||||
270 | my($self, $line) = @_; | ||||
271 | |||||
272 | # The point of this routine is to set $self->{'_transcoder'} as indicated. | ||||
273 | |||||
274 | return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; | ||||
275 | DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; | ||||
276 | |||||
277 | my $e = $1; | ||||
278 | my $orig = $e; | ||||
279 | push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; | ||||
280 | |||||
281 | my $enc_error; | ||||
282 | |||||
283 | # Cf. perldoc Encode and perldoc Encode::Supported | ||||
284 | |||||
285 | require Pod::Simple::Transcode; | ||||
286 | |||||
287 | if( $self->{'encoding'} ) { | ||||
288 | my $norm_current = $self->{'encoding'}; | ||||
289 | my $norm_e = $e; | ||||
290 | foreach my $that ($norm_current, $norm_e) { | ||||
291 | $that = lc($that); | ||||
292 | $that =~ s/[-_]//g; | ||||
293 | } | ||||
294 | if($norm_current eq $norm_e) { | ||||
295 | DEBUG > 1 and print "The '=encoding $orig' line is ", | ||||
296 | "redundant. ($norm_current eq $norm_e). Ignoring.\n"; | ||||
297 | $enc_error = ''; | ||||
298 | # But that doesn't necessarily mean that the earlier one went okay | ||||
299 | } else { | ||||
300 | $enc_error = "Encoding is already set to " . $self->{'encoding'}; | ||||
301 | DEBUG > 1 and print $enc_error; | ||||
302 | } | ||||
303 | } elsif ( | ||||
304 | # OK, let's turn on the encoding | ||||
305 | do { | ||||
306 | DEBUG > 1 and print " Setting encoding to $e\n"; | ||||
307 | $self->{'encoding'} = $e; | ||||
308 | 1; | ||||
309 | } | ||||
310 | and $e eq 'HACKRAW' | ||||
311 | ) { | ||||
312 | DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; | ||||
313 | |||||
314 | } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { | ||||
315 | |||||
316 | die($enc_error = "WHAT? _transcoder is already set?!") | ||||
317 | if $self->{'_transcoder'}; # should never happen | ||||
318 | require Pod::Simple::Transcode; | ||||
319 | $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); | ||||
320 | eval { | ||||
321 | my @x = ('', "abc", "123"); | ||||
322 | $self->{'_transcoder'}->(@x); | ||||
323 | }; | ||||
324 | $@ && die( $enc_error = | ||||
325 | "Really unexpected error setting up encoding $e: $@\nAborting" | ||||
326 | ); | ||||
327 | |||||
328 | } else { | ||||
329 | my @supported = Pod::Simple::Transcode::->all_encodings; | ||||
330 | |||||
331 | # Note unsupported, and complain | ||||
332 | DEBUG and print " Encoding [$e] is unsupported.", | ||||
333 | "\nSupporteds: @supported\n"; | ||||
334 | my $suggestion = ''; | ||||
335 | |||||
336 | # Look for a near match: | ||||
337 | my $norm = lc($e); | ||||
338 | $norm =~ tr[-_][]d; | ||||
339 | my $n; | ||||
340 | foreach my $enc (@supported) { | ||||
341 | $n = lc($enc); | ||||
342 | $n =~ tr[-_][]d; | ||||
343 | next unless $n eq $norm; | ||||
344 | $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; | ||||
345 | last; | ||||
346 | } | ||||
347 | my $encmodver = Pod::Simple::Transcode::->encmodver; | ||||
348 | $enc_error = join '' => | ||||
349 | "This document probably does not appear as it should, because its ", | ||||
350 | "\"=encoding $e\" line calls for an unsupported encoding.", | ||||
351 | $suggestion, " [$encmodver\'s supported encodings are: @supported]" | ||||
352 | ; | ||||
353 | |||||
354 | $self->scream( $self->{'line_count'}, $enc_error ); | ||||
355 | } | ||||
356 | push @{ $self->{'encoding_command_statuses'} }, $enc_error; | ||||
357 | |||||
358 | return '=encoding ALREADYDONE'; | ||||
359 | } | ||||
360 | |||||
361 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
362 | |||||
363 | sub _handle_encoding_second_level { | ||||
364 | # By time this is called, the encoding (if well formed) will already | ||||
365 | # have been acted one. | ||||
366 | my($self, $para) = @_; | ||||
367 | my @x = @$para; | ||||
368 | my $content = join ' ', splice @x, 2; | ||||
369 | $content =~ s/^\s+//s; | ||||
370 | $content =~ s/\s+$//s; | ||||
371 | |||||
372 | DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; | ||||
373 | |||||
374 | if($content eq 'ALREADYDONE') { | ||||
375 | # It's already been handled. Check for errors. | ||||
376 | if(! $self->{'encoding_command_statuses'} ) { | ||||
377 | DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; | ||||
378 | } elsif( $self->{'encoding_command_statuses'}[-1] ) { | ||||
379 | $self->whine( $para->[1]{'start_line'}, | ||||
380 | sprintf "Couldn't do %s: %s", | ||||
381 | $self->{'encoding_command_reqs' }[-1], | ||||
382 | $self->{'encoding_command_statuses'}[-1], | ||||
383 | ); | ||||
384 | } else { | ||||
385 | DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; | ||||
386 | } | ||||
387 | |||||
388 | } else { | ||||
389 | # Otherwise it's a syntax error | ||||
390 | $self->whine( $para->[1]{'start_line'}, | ||||
391 | "Invalid =encoding syntax: $content" | ||||
392 | ); | ||||
393 | } | ||||
394 | |||||
395 | return; | ||||
396 | } | ||||
397 | |||||
398 | #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` | ||||
399 | |||||
400 | { | ||||
401 | 2 | 7µs | my $m = -321; # magic line number | ||
402 | |||||
403 | sub _gen_errata { | ||||
404 | my $self = $_[0]; | ||||
405 | # Return 0 or more fake-o paragraphs explaining the accumulated | ||||
406 | # errors on this document. | ||||
407 | |||||
408 | return() unless $self->{'errata'} and keys %{$self->{'errata'}}; | ||||
409 | |||||
410 | my @out; | ||||
411 | |||||
412 | foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { | ||||
413 | push @out, | ||||
414 | ['=item', {'start_line' => $m}, "Around line $line:"], | ||||
415 | map( ['~Para', {'start_line' => $m, '~cooked' => 1}, | ||||
416 | #['~Top', {'start_line' => $m}, | ||||
417 | $_ | ||||
418 | #] | ||||
419 | ], | ||||
420 | @{$self->{'errata'}{$line}} | ||||
421 | ) | ||||
422 | ; | ||||
423 | } | ||||
424 | |||||
425 | # TODO: report of unknown entities? unrenderable characters? | ||||
426 | |||||
427 | unshift @out, | ||||
428 | ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], | ||||
429 | ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, | ||||
430 | "Hey! ", | ||||
431 | ['B', {}, | ||||
432 | 'The above document had some coding errors, which are explained below:' | ||||
433 | ] | ||||
434 | ], | ||||
435 | ['=over', {'start_line' => $m, 'errata' => 1}, ''], | ||||
436 | ; | ||||
437 | |||||
438 | push @out, | ||||
439 | ['=back', {'start_line' => $m, 'errata' => 1}, ''], | ||||
440 | ; | ||||
441 | |||||
442 | DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; | ||||
443 | |||||
444 | return @out; | ||||
445 | } | ||||
446 | |||||
447 | } | ||||
448 | |||||
449 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
450 | |||||
451 | ############################################################################## | ||||
452 | ## | ||||
453 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
454 | ## | ||||
455 | ## HERE IT BECOMES REALLY SCARY | ||||
456 | ## | ||||
457 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
458 | ## | ||||
459 | ############################################################################## | ||||
460 | |||||
461 | sub _ponder_paragraph_buffer { | ||||
462 | |||||
463 | # Para-token types as found in the buffer. | ||||
464 | # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, | ||||
465 | # =over, =back, =item | ||||
466 | # and the null =pod (to be complained about if over one line) | ||||
467 | # | ||||
468 | # "~data" paragraphs are something we generate at this level, depending on | ||||
469 | # a currently open =over region | ||||
470 | |||||
471 | # Events fired: Begin and end for: | ||||
472 | # directivename (like head1 .. head4), item, extend, | ||||
473 | # for (from =begin...=end, =for), | ||||
474 | # over-bullet, over-number, over-text, over-block, | ||||
475 | # item-bullet, item-number, item-text, | ||||
476 | # Document, | ||||
477 | # Data, Para, Verbatim | ||||
478 | # B, C, longdirname (TODO -- wha?), etc. for all directives | ||||
479 | # | ||||
480 | |||||
481 | my $self = $_[0]; | ||||
482 | my $paras; | ||||
483 | return unless @{$paras = $self->{'paras'}}; | ||||
484 | my $curr_open = ($self->{'curr_open'} ||= []); | ||||
485 | |||||
486 | my $scratch; | ||||
487 | |||||
488 | DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; | ||||
489 | |||||
490 | # We have something in our buffer. So apparently the document has started. | ||||
491 | unless($self->{'doc_has_started'}) { | ||||
492 | $self->{'doc_has_started'} = 1; | ||||
493 | |||||
494 | my $starting_contentless; | ||||
495 | $starting_contentless = | ||||
496 | ( | ||||
497 | !@$curr_open | ||||
498 | and @$paras and ! grep $_->[0] ne '~end', @$paras | ||||
499 | # i.e., if the paras is all ~ends | ||||
500 | ) | ||||
501 | ; | ||||
502 | DEBUG and print "# Starting ", | ||||
503 | $starting_contentless ? 'contentless' : 'contentful', | ||||
504 | " document\n" | ||||
505 | ; | ||||
506 | |||||
507 | $self->_handle_element_start( | ||||
508 | ($scratch = 'Document'), | ||||
509 | { | ||||
510 | 'start_line' => $paras->[0][1]{'start_line'}, | ||||
511 | $starting_contentless ? ( 'contentless' => 1 ) : (), | ||||
512 | }, | ||||
513 | ); | ||||
514 | } | ||||
515 | |||||
516 | my($para, $para_type); | ||||
517 | while(@$paras) { | ||||
518 | last if @$paras == 1 and | ||||
519 | ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' | ||||
520 | or $paras->[0][0] eq '=item' ) | ||||
521 | ; | ||||
522 | # Those're the three kinds of paragraphs that require lookahead. | ||||
523 | # Actually, an "=item Foo" inside an <over type=text> region | ||||
524 | # and any =item inside an <over type=block> region (rare) | ||||
525 | # don't require any lookahead, but all others (bullets | ||||
526 | # and numbers) do. | ||||
527 | |||||
528 | # TODO: whinge about many kinds of directives in non-resolving =for regions? | ||||
529 | # TODO: many? like what? =head1 etc? | ||||
530 | |||||
531 | $para = shift @$paras; | ||||
532 | $para_type = $para->[0]; | ||||
533 | |||||
534 | DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", | ||||
535 | $self->_dump_curr_open(), ")\n"; | ||||
536 | |||||
537 | if($para_type eq '=for') { | ||||
538 | next if $self->_ponder_for($para,$curr_open,$paras); | ||||
539 | |||||
540 | } elsif($para_type eq '=begin') { | ||||
541 | next if $self->_ponder_begin($para,$curr_open,$paras); | ||||
542 | |||||
543 | } elsif($para_type eq '=end') { | ||||
544 | next if $self->_ponder_end($para,$curr_open,$paras); | ||||
545 | |||||
546 | } elsif($para_type eq '~end') { # The virtual end-document signal | ||||
547 | next if $self->_ponder_doc_end($para,$curr_open,$paras); | ||||
548 | } | ||||
549 | |||||
550 | |||||
551 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
552 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
553 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
554 | DEBUG > 1 and | ||||
555 | print "Skipping $para_type paragraph because in ignore mode.\n"; | ||||
556 | next; | ||||
557 | } | ||||
558 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
559 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
560 | |||||
561 | if($para_type eq '=pod') { | ||||
562 | $self->_ponder_pod($para,$curr_open,$paras); | ||||
563 | |||||
564 | } elsif($para_type eq '=over') { | ||||
565 | next if $self->_ponder_over($para,$curr_open,$paras); | ||||
566 | |||||
567 | } elsif($para_type eq '=back') { | ||||
568 | next if $self->_ponder_back($para,$curr_open,$paras); | ||||
569 | |||||
570 | } else { | ||||
571 | |||||
572 | # All non-magical codes!!! | ||||
573 | |||||
574 | # Here we start using $para_type for our own twisted purposes, to | ||||
575 | # mean how it should get treated, not as what the element name | ||||
576 | # should be. | ||||
577 | |||||
578 | DEBUG > 1 and print "Pondering non-magical $para_type\n"; | ||||
579 | |||||
580 | my $i; | ||||
581 | |||||
582 | # Enforce some =headN discipline | ||||
583 | if($para_type =~ m/^=head\d$/s | ||||
584 | and ! $self->{'accept_heads_anywhere'} | ||||
585 | and @$curr_open | ||||
586 | and $curr_open->[-1][0] eq '=over' | ||||
587 | ) { | ||||
588 | DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; | ||||
589 | $self->whine( | ||||
590 | $para->[1]{'start_line'}, | ||||
591 | "You forgot a '=back' before '$para_type'" | ||||
592 | ); | ||||
593 | unshift @$paras, ['=back', {}, ''], $para; # close the =over | ||||
594 | next; | ||||
595 | } | ||||
596 | |||||
597 | |||||
598 | if($para_type eq '=item') { | ||||
599 | |||||
600 | my $over; | ||||
601 | unless(@$curr_open and | ||||
602 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
603 | $self->whine( | ||||
604 | $para->[1]{'start_line'}, | ||||
605 | "'=item' outside of any '=over'" | ||||
606 | ); | ||||
607 | unshift @$paras, | ||||
608 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
609 | $para | ||||
610 | ; | ||||
611 | next; | ||||
612 | } | ||||
613 | |||||
614 | |||||
615 | my $over_type = $over->[1]{'~type'}; | ||||
616 | |||||
617 | if(!$over_type) { | ||||
618 | # Shouldn't happen1 | ||||
619 | die "Typeless over in stack, starting at line " | ||||
620 | . $over->[1]{'start_line'}; | ||||
621 | |||||
622 | } elsif($over_type eq 'block') { | ||||
623 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
624 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
625 | $self->whine( | ||||
626 | $curr_open->[-1][1]{'start_line'}, | ||||
627 | "You can't have =items (as at line " | ||||
628 | . $para->[1]{'start_line'} | ||||
629 | . ") unless the first thing after the =over is an =item" | ||||
630 | ); | ||||
631 | } | ||||
632 | # Just turn it into a paragraph and reconsider it | ||||
633 | $para->[0] = '~Para'; | ||||
634 | unshift @$paras, $para; | ||||
635 | next; | ||||
636 | |||||
637 | } elsif($over_type eq 'text') { | ||||
638 | my $item_type = $self->_get_item_type($para); | ||||
639 | # That kills the content of the item if it's a number or bullet. | ||||
640 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
641 | |||||
642 | if($item_type eq 'text') { | ||||
643 | # Nothing special needs doing for 'text' | ||||
644 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
645 | die "Unknown item type $item_type" | ||||
646 | unless $item_type eq 'number' or $item_type eq 'bullet'; | ||||
647 | # Undo our clobbering: | ||||
648 | push @$para, $para->[1]{'~orig_content'}; | ||||
649 | delete $para->[1]{'number'}; | ||||
650 | # Only a PROPER item-number element is allowed | ||||
651 | # to have a number attribute. | ||||
652 | } else { | ||||
653 | die "Unhandled item type $item_type"; # should never happen | ||||
654 | } | ||||
655 | |||||
656 | # =item-text thingies don't need any assimilation, it seems. | ||||
657 | |||||
658 | } elsif($over_type eq 'number') { | ||||
659 | my $item_type = $self->_get_item_type($para); | ||||
660 | # That kills the content of the item if it's a number or bullet. | ||||
661 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
662 | |||||
663 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
664 | |||||
665 | if($item_type eq 'bullet') { | ||||
666 | # Hm, it's not numeric. Correct for this. | ||||
667 | $para->[1]{'number'} = $expected_value; | ||||
668 | $self->whine( | ||||
669 | $para->[1]{'start_line'}, | ||||
670 | "Expected '=item $expected_value'" | ||||
671 | ); | ||||
672 | push @$para, $para->[1]{'~orig_content'}; | ||||
673 | # restore the bullet, blocking the assimilation of next para | ||||
674 | |||||
675 | } elsif($item_type eq 'text') { | ||||
676 | # Hm, it's not numeric. Correct for this. | ||||
677 | $para->[1]{'number'} = $expected_value; | ||||
678 | $self->whine( | ||||
679 | $para->[1]{'start_line'}, | ||||
680 | "Expected '=item $expected_value'" | ||||
681 | ); | ||||
682 | # Text content will still be there and will block next ~Para | ||||
683 | |||||
684 | } elsif($item_type ne 'number') { | ||||
685 | die "Unknown item type $item_type"; # should never happen | ||||
686 | |||||
687 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
688 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | ||||
689 | |||||
690 | } else { | ||||
691 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | ||||
692 | " instead of the expected value of $expected_value\n"; | ||||
693 | $self->whine( | ||||
694 | $para->[1]{'start_line'}, | ||||
695 | "You have '=item " . $para->[1]{'number'} . | ||||
696 | "' instead of the expected '=item $expected_value'" | ||||
697 | ); | ||||
698 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
699 | } | ||||
700 | |||||
701 | if(@$para == 2) { | ||||
702 | # For the cases where we /didn't/ push to @$para | ||||
703 | if($paras->[0][0] eq '~Para') { | ||||
704 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
705 | push @$para, splice @{shift @$paras},2; | ||||
706 | } else { | ||||
707 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
708 | push @$para, ''; # Just so it's not contentless | ||||
709 | } | ||||
710 | } | ||||
711 | |||||
712 | |||||
713 | } elsif($over_type eq 'bullet') { | ||||
714 | my $item_type = $self->_get_item_type($para); | ||||
715 | # That kills the content of the item if it's a number or bullet. | ||||
716 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
717 | |||||
718 | if($item_type eq 'bullet') { | ||||
719 | # as expected! | ||||
720 | |||||
721 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
722 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
723 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
724 | } | ||||
725 | |||||
726 | } elsif($item_type eq 'number') { | ||||
727 | $self->whine( | ||||
728 | $para->[1]{'start_line'}, | ||||
729 | "Expected '=item *'" | ||||
730 | ); | ||||
731 | push @$para, $para->[1]{'~orig_content'}; | ||||
732 | # and block assimilation of the next paragraph | ||||
733 | delete $para->[1]{'number'}; | ||||
734 | # Only a PROPER item-number element is allowed | ||||
735 | # to have a number attribute. | ||||
736 | } elsif($item_type eq 'text') { | ||||
737 | $self->whine( | ||||
738 | $para->[1]{'start_line'}, | ||||
739 | "Expected '=item *'" | ||||
740 | ); | ||||
741 | # But doesn't need processing. But it'll block assimilation | ||||
742 | # of the next para. | ||||
743 | } else { | ||||
744 | die "Unhandled item type $item_type"; # should never happen | ||||
745 | } | ||||
746 | |||||
747 | if(@$para == 2) { | ||||
748 | # For the cases where we /didn't/ push to @$para | ||||
749 | if($paras->[0][0] eq '~Para') { | ||||
750 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
751 | push @$para, splice @{shift @$paras},2; | ||||
752 | } else { | ||||
753 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
754 | push @$para, ''; # Just so it's not contentless | ||||
755 | } | ||||
756 | } | ||||
757 | |||||
758 | } else { | ||||
759 | die "Unhandled =over type \"$over_type\"?"; | ||||
760 | # Shouldn't happen! | ||||
761 | } | ||||
762 | |||||
763 | $para_type = 'Plain'; | ||||
764 | $para->[0] .= '-' . $over_type; | ||||
765 | # Whew. Now fall thru and process it. | ||||
766 | |||||
767 | |||||
768 | } elsif($para_type eq '=extend') { | ||||
769 | # Well, might as well implement it here. | ||||
770 | $self->_ponder_extend($para); | ||||
771 | next; # and skip | ||||
772 | } elsif($para_type eq '=encoding') { | ||||
773 | # Not actually acted on here, but we catch errors here. | ||||
774 | $self->_handle_encoding_second_level($para); | ||||
775 | |||||
776 | next; # and skip | ||||
777 | } elsif($para_type eq '~Verbatim') { | ||||
778 | $para->[0] = 'Verbatim'; | ||||
779 | $para_type = '?Verbatim'; | ||||
780 | } elsif($para_type eq '~Para') { | ||||
781 | $para->[0] = 'Para'; | ||||
782 | $para_type = '?Plain'; | ||||
783 | } elsif($para_type eq 'Data') { | ||||
784 | $para->[0] = 'Data'; | ||||
785 | $para_type = '?Data'; | ||||
786 | } elsif( $para_type =~ s/^=//s | ||||
787 | and defined( $para_type = $self->{'accept_directives'}{$para_type} ) | ||||
788 | ) { | ||||
789 | DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; | ||||
790 | } else { | ||||
791 | # An unknown directive! | ||||
792 | DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", | ||||
793 | $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) | ||||
794 | ; | ||||
795 | $self->whine( | ||||
796 | $para->[1]{'start_line'}, | ||||
797 | "Unknown directive: $para->[0]" | ||||
798 | ); | ||||
799 | |||||
800 | # And maybe treat it as text instead of just letting it go? | ||||
801 | next; | ||||
802 | } | ||||
803 | |||||
804 | if($para_type =~ s/^\?//s) { | ||||
805 | if(! @$curr_open) { # usual case | ||||
806 | DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; | ||||
807 | } else { | ||||
808 | my @fors = grep $_->[0] eq '=for', @$curr_open; | ||||
809 | DEBUG > 1 and print "Containing fors: ", | ||||
810 | join(',', map $_->[1]{'target'}, @fors), "\n"; | ||||
811 | |||||
812 | if(! @fors) { | ||||
813 | DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; | ||||
814 | |||||
815 | #} elsif(grep $_->[1]{'~resolve'}, @fors) { | ||||
816 | #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { | ||||
817 | } elsif( $fors[-1][1]{'~resolve'} ) { | ||||
818 | # Look to the immediately containing for | ||||
819 | |||||
820 | if($para_type eq 'Data') { | ||||
821 | DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
822 | $para->[0] = 'Para'; | ||||
823 | $para_type = 'Plain'; | ||||
824 | } else { | ||||
825 | DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
826 | } | ||||
827 | } else { | ||||
828 | DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; | ||||
829 | $para->[0] = $para_type = 'Data'; | ||||
830 | } | ||||
831 | } | ||||
832 | } | ||||
833 | |||||
834 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
835 | if($para_type eq 'Plain') { | ||||
836 | $self->_ponder_Plain($para); | ||||
837 | } elsif($para_type eq 'Verbatim') { | ||||
838 | $self->_ponder_Verbatim($para); | ||||
839 | } elsif($para_type eq 'Data') { | ||||
840 | $self->_ponder_Data($para); | ||||
841 | } else { | ||||
842 | die "\$para type is $para_type -- how did that happen?"; | ||||
843 | # Shouldn't happen. | ||||
844 | } | ||||
845 | |||||
846 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
847 | $para->[0] =~ s/^[~=]//s; | ||||
848 | |||||
849 | DEBUG and print "\n", pretty($para), "\n"; | ||||
850 | |||||
851 | # traverse the treelet (which might well be just one string scalar) | ||||
852 | $self->{'content_seen'} ||= 1; | ||||
853 | $self->_traverse_treelet_bit(@$para); | ||||
854 | } | ||||
855 | } | ||||
856 | |||||
857 | return; | ||||
858 | } | ||||
859 | |||||
860 | ########################################################################### | ||||
861 | # The sub-ponderers... | ||||
862 | |||||
- - | |||||
865 | sub _ponder_for { | ||||
866 | my ($self,$para,$curr_open,$paras) = @_; | ||||
867 | |||||
868 | # Fake it out as a begin/end | ||||
869 | my $target; | ||||
870 | |||||
871 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
872 | DEBUG > 1 and print "Ignoring ignorable =for\n"; | ||||
873 | return 1; | ||||
874 | } | ||||
875 | |||||
876 | for(my $i = 2; $i < @$para; ++$i) { | ||||
877 | if($para->[$i] =~ s/^\s*(\S+)\s*//s) { | ||||
878 | $target = $1; | ||||
879 | last; | ||||
880 | } | ||||
881 | } | ||||
882 | unless(defined $target) { | ||||
883 | $self->whine( | ||||
884 | $para->[1]{'start_line'}, | ||||
885 | "=for without a target?" | ||||
886 | ); | ||||
887 | return 1; | ||||
888 | } | ||||
889 | DEBUG > 1 and | ||||
890 | print "Faking out a =for $target as a =begin $target / =end $target\n"; | ||||
891 | |||||
892 | $para->[0] = 'Data'; | ||||
893 | |||||
894 | unshift @$paras, | ||||
895 | ['=begin', | ||||
896 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
897 | $target, | ||||
898 | ], | ||||
899 | $para, | ||||
900 | ['=end', | ||||
901 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
902 | $target, | ||||
903 | ], | ||||
904 | ; | ||||
905 | |||||
906 | return 1; | ||||
907 | } | ||||
908 | |||||
909 | sub _ponder_begin { | ||||
910 | my ($self,$para,$curr_open,$paras) = @_; | ||||
911 | my $content = join ' ', splice @$para, 2; | ||||
912 | $content =~ s/^\s+//s; | ||||
913 | $content =~ s/\s+$//s; | ||||
914 | unless(length($content)) { | ||||
915 | $self->whine( | ||||
916 | $para->[1]{'start_line'}, | ||||
917 | "=begin without a target?" | ||||
918 | ); | ||||
919 | DEBUG and print "Ignoring targetless =begin\n"; | ||||
920 | return 1; | ||||
921 | } | ||||
922 | |||||
923 | my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; | ||||
924 | $para->[1]{'title'} = $title if ($title); | ||||
925 | $para->[1]{'target'} = $target; # without any ':' | ||||
926 | $content = $target; # strip off the title | ||||
927 | |||||
928 | $content =~ s/^:!/!:/s; | ||||
929 | my $neg; # whether this is a negation-match | ||||
930 | $neg = 1 if $content =~ s/^!//s; | ||||
931 | my $to_resolve; # whether to process formatting codes | ||||
932 | $to_resolve = 1 if $content =~ s/^://s; | ||||
933 | |||||
934 | my $dont_ignore; # whether this target matches us | ||||
935 | |||||
936 | foreach my $target_name ( | ||||
937 | split(',', $content, -1), | ||||
938 | $neg ? () : '*' | ||||
939 | ) { | ||||
940 | DEBUG > 2 and | ||||
941 | print " Considering whether =begin $content matches $target_name\n"; | ||||
942 | next unless $self->{'accept_targets'}{$target_name}; | ||||
943 | |||||
944 | DEBUG > 2 and | ||||
945 | print " It DOES match the acceptable target $target_name!\n"; | ||||
946 | $to_resolve = 1 | ||||
947 | if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; | ||||
948 | $dont_ignore = 1; | ||||
949 | $para->[1]{'target_matching'} = $target_name; | ||||
950 | last; # stop looking at other target names | ||||
951 | } | ||||
952 | |||||
953 | if($neg) { | ||||
954 | if( $dont_ignore ) { | ||||
955 | $dont_ignore = ''; | ||||
956 | delete $para->[1]{'target_matching'}; | ||||
957 | DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; | ||||
958 | } else { | ||||
959 | $dont_ignore = 1; | ||||
960 | $para->[1]{'target_matching'} = '!'; | ||||
961 | DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; | ||||
962 | } | ||||
963 | } | ||||
964 | |||||
965 | $para->[0] = '=for'; # Just what we happen to call these, internally | ||||
966 | $para->[1]{'~really'} ||= '=begin'; | ||||
967 | $para->[1]{'~ignore'} = (! $dont_ignore) || 0; | ||||
968 | $para->[1]{'~resolve'} = $to_resolve || 0; | ||||
969 | |||||
970 | DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', | ||||
971 | "ignore contents of this region\n"; | ||||
972 | DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", | ||||
973 | ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; | ||||
974 | DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; | ||||
975 | |||||
976 | push @$curr_open, $para; | ||||
977 | if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
978 | DEBUG > 1 and print "Ignoring ignorable =begin\n"; | ||||
979 | } else { | ||||
980 | $self->{'content_seen'} ||= 1; | ||||
981 | $self->_handle_element_start((my $scratch='for'), $para->[1]); | ||||
982 | } | ||||
983 | |||||
984 | return 1; | ||||
985 | } | ||||
986 | |||||
987 | sub _ponder_end { | ||||
988 | my ($self,$para,$curr_open,$paras) = @_; | ||||
989 | my $content = join ' ', splice @$para, 2; | ||||
990 | $content =~ s/^\s+//s; | ||||
991 | $content =~ s/\s+$//s; | ||||
992 | DEBUG and print "Ogling '=end $content' directive\n"; | ||||
993 | |||||
994 | unless(length($content)) { | ||||
995 | $self->whine( | ||||
996 | $para->[1]{'start_line'}, | ||||
997 | "'=end' without a target?" . ( | ||||
998 | ( @$curr_open and $curr_open->[-1][0] eq '=for' ) | ||||
999 | ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) | ||||
1000 | : '' | ||||
1001 | ) | ||||
1002 | ); | ||||
1003 | DEBUG and print "Ignoring targetless =end\n"; | ||||
1004 | return 1; | ||||
1005 | } | ||||
1006 | |||||
1007 | unless($content =~ m/^\S+$/) { # i.e., unless it's one word | ||||
1008 | $self->whine( | ||||
1009 | $para->[1]{'start_line'}, | ||||
1010 | "'=end $content' is invalid. (Stack: " | ||||
1011 | . $self->_dump_curr_open() . ')' | ||||
1012 | ); | ||||
1013 | DEBUG and print "Ignoring mistargetted =end $content\n"; | ||||
1014 | return 1; | ||||
1015 | } | ||||
1016 | |||||
1017 | unless(@$curr_open and $curr_open->[-1][0] eq '=for') { | ||||
1018 | $self->whine( | ||||
1019 | $para->[1]{'start_line'}, | ||||
1020 | "=end $content without matching =begin. (Stack: " | ||||
1021 | . $self->_dump_curr_open() . ')' | ||||
1022 | ); | ||||
1023 | DEBUG and print "Ignoring mistargetted =end $content\n"; | ||||
1024 | return 1; | ||||
1025 | } | ||||
1026 | |||||
1027 | unless($content eq $curr_open->[-1][1]{'target'}) { | ||||
1028 | $self->whine( | ||||
1029 | $para->[1]{'start_line'}, | ||||
1030 | "=end $content doesn't match =begin " | ||||
1031 | . $curr_open->[-1][1]{'target'} | ||||
1032 | . ". (Stack: " | ||||
1033 | . $self->_dump_curr_open() . ')' | ||||
1034 | ); | ||||
1035 | DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; | ||||
1036 | return 1; | ||||
1037 | } | ||||
1038 | |||||
1039 | # Else it's okay to close... | ||||
1040 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
1041 | DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; | ||||
1042 | # And that may be because of this to-be-closed =for region, or some | ||||
1043 | # other one, but it doesn't matter. | ||||
1044 | } else { | ||||
1045 | $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; | ||||
1046 | # what's that for? | ||||
1047 | |||||
1048 | $self->{'content_seen'} ||= 1; | ||||
1049 | $self->_handle_element_end( my $scratch = 'for', $para->[1]); | ||||
1050 | } | ||||
1051 | DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; | ||||
1052 | pop @$curr_open; | ||||
1053 | |||||
1054 | return 1; | ||||
1055 | } | ||||
1056 | |||||
1057 | sub _ponder_doc_end { | ||||
1058 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1059 | if(@$curr_open) { # Deal with things left open | ||||
1060 | DEBUG and print "Stack is nonempty at end-document: (", | ||||
1061 | $self->_dump_curr_open(), ")\n"; | ||||
1062 | |||||
1063 | DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; | ||||
1064 | unshift @$paras, $self->_closers_for_all_curr_open; | ||||
1065 | # Make sure there is exactly one ~end in the parastack, at the end: | ||||
1066 | @$paras = grep $_->[0] ne '~end', @$paras; | ||||
1067 | push @$paras, $para, $para; | ||||
1068 | # We need two -- once for the next cycle where we | ||||
1069 | # generate errata, and then another to be at the end | ||||
1070 | # when that loop back around to process the errata. | ||||
1071 | return 1; | ||||
1072 | |||||
1073 | } else { | ||||
1074 | DEBUG and print "Okay, stack is empty now.\n"; | ||||
1075 | } | ||||
1076 | |||||
1077 | # Try generating errata section, if applicable | ||||
1078 | unless($self->{'~tried_gen_errata'}) { | ||||
1079 | $self->{'~tried_gen_errata'} = 1; | ||||
1080 | my @extras = $self->_gen_errata(); | ||||
1081 | if(@extras) { | ||||
1082 | unshift @$paras, @extras; | ||||
1083 | DEBUG and print "Generated errata... relooping...\n"; | ||||
1084 | return 1; # I.e., loop around again to process these fake-o paragraphs | ||||
1085 | } | ||||
1086 | } | ||||
1087 | |||||
1088 | splice @$paras; # Well, that's that for this paragraph buffer. | ||||
1089 | DEBUG and print "Throwing end-document event.\n"; | ||||
1090 | |||||
1091 | $self->_handle_element_end( my $scratch = 'Document' ); | ||||
1092 | return 1; # Hasta la byebye | ||||
1093 | } | ||||
1094 | |||||
1095 | sub _ponder_pod { | ||||
1096 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1097 | $self->whine( | ||||
1098 | $para->[1]{'start_line'}, | ||||
1099 | "=pod directives shouldn't be over one line long! Ignoring all " | ||||
1100 | . (@$para - 2) . " lines of content" | ||||
1101 | ) if @$para > 3; | ||||
1102 | |||||
1103 | # Content ignored unless 'pod_handler' is set | ||||
1104 | if (my $pod_handler = $self->{'pod_handler'}) { | ||||
1105 | my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; | ||||
1106 | $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output | ||||
1107 | $pod_handler->($line, $line_num, $self); | ||||
1108 | } | ||||
1109 | |||||
1110 | # The surrounding methods set content_seen, so let us remain consistent. | ||||
1111 | # I do not know why it was not here before -- should it not be here? | ||||
1112 | # $self->{'content_seen'} ||= 1; | ||||
1113 | |||||
1114 | return; | ||||
1115 | } | ||||
1116 | |||||
1117 | sub _ponder_over { | ||||
1118 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1119 | return 1 unless @$paras; | ||||
1120 | my $list_type; | ||||
1121 | |||||
1122 | if($paras->[0][0] eq '=item') { # most common case | ||||
1123 | $list_type = $self->_get_initial_item_type($paras->[0]); | ||||
1124 | |||||
1125 | } elsif($paras->[0][0] eq '=back') { | ||||
1126 | # Ignore empty lists by default | ||||
1127 | if ($self->{'parse_empty_lists'}) { | ||||
1128 | $list_type = 'empty'; | ||||
1129 | } else { | ||||
1130 | shift @$paras; | ||||
1131 | return 1; | ||||
1132 | } | ||||
1133 | } elsif($paras->[0][0] eq '~end') { | ||||
1134 | $self->whine( | ||||
1135 | $para->[1]{'start_line'}, | ||||
1136 | "=over is the last thing in the document?!" | ||||
1137 | ); | ||||
1138 | return 1; # But feh, ignore it. | ||||
1139 | } else { | ||||
1140 | $list_type = 'block'; | ||||
1141 | } | ||||
1142 | $para->[1]{'~type'} = $list_type; | ||||
1143 | push @$curr_open, $para; | ||||
1144 | # yes, we reuse the paragraph as a stack item | ||||
1145 | |||||
1146 | my $content = join ' ', splice @$para, 2; | ||||
1147 | my $overness; | ||||
1148 | if($content =~ m/^\s*$/s) { | ||||
1149 | $para->[1]{'indent'} = 4; | ||||
1150 | } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { | ||||
1151 | 2 | 21.6ms | 2 | 120µs | # spent 99µs (78+21) within Pod::Simple::BlackBox::BEGIN@1151 which was called:
# once (78µs+21µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1151 # spent 99µs making 1 call to Pod::Simple::BlackBox::BEGIN@1151
# spent 21µs making 1 call to integer::unimport |
1152 | $para->[1]{'indent'} = $1; | ||||
1153 | if($1 == 0) { | ||||
1154 | $self->whine( | ||||
1155 | $para->[1]{'start_line'}, | ||||
1156 | "Can't have a 0 in =over $content" | ||||
1157 | ); | ||||
1158 | $para->[1]{'indent'} = 4; | ||||
1159 | } | ||||
1160 | } else { | ||||
1161 | $self->whine( | ||||
1162 | $para->[1]{'start_line'}, | ||||
1163 | "=over should be: '=over' or '=over positive_number'" | ||||
1164 | ); | ||||
1165 | $para->[1]{'indent'} = 4; | ||||
1166 | } | ||||
1167 | DEBUG > 1 and print "=over found of type $list_type\n"; | ||||
1168 | |||||
1169 | $self->{'content_seen'} ||= 1; | ||||
1170 | $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); | ||||
1171 | |||||
1172 | return; | ||||
1173 | } | ||||
1174 | |||||
1175 | sub _ponder_back { | ||||
1176 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1177 | # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? | ||||
1178 | |||||
1179 | my $content = join ' ', splice @$para, 2; | ||||
1180 | if($content =~ m/\S/) { | ||||
1181 | $self->whine( | ||||
1182 | $para->[1]{'start_line'}, | ||||
1183 | "=back doesn't take any parameters, but you said =back $content" | ||||
1184 | ); | ||||
1185 | } | ||||
1186 | |||||
1187 | if(@$curr_open and $curr_open->[-1][0] eq '=over') { | ||||
1188 | DEBUG > 1 and print "=back happily closes matching =over\n"; | ||||
1189 | # Expected case: we're closing the most recently opened thing | ||||
1190 | #my $over = pop @$curr_open; | ||||
1191 | $self->{'content_seen'} ||= 1; | ||||
1192 | $self->_handle_element_end( my $scratch = | ||||
1193 | 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] | ||||
1194 | ); | ||||
1195 | } else { | ||||
1196 | DEBUG > 1 and print "=back found without a matching =over. Stack: (", | ||||
1197 | join(', ', map $_->[0], @$curr_open), ").\n"; | ||||
1198 | $self->whine( | ||||
1199 | $para->[1]{'start_line'}, | ||||
1200 | '=back without =over' | ||||
1201 | ); | ||||
1202 | return 1; # and ignore it | ||||
1203 | } | ||||
1204 | } | ||||
1205 | |||||
1206 | sub _ponder_item { | ||||
1207 | my ($self,$para,$curr_open,$paras) = @_; | ||||
1208 | my $over; | ||||
1209 | unless(@$curr_open and | ||||
1210 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
1211 | $self->whine( | ||||
1212 | $para->[1]{'start_line'}, | ||||
1213 | "'=item' outside of any '=over'" | ||||
1214 | ); | ||||
1215 | unshift @$paras, | ||||
1216 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
1217 | $para | ||||
1218 | ; | ||||
1219 | return 1; | ||||
1220 | } | ||||
1221 | |||||
1222 | |||||
1223 | my $over_type = $over->[1]{'~type'}; | ||||
1224 | |||||
1225 | if(!$over_type) { | ||||
1226 | # Shouldn't happen1 | ||||
1227 | die "Typeless over in stack, starting at line " | ||||
1228 | . $over->[1]{'start_line'}; | ||||
1229 | |||||
1230 | } elsif($over_type eq 'block') { | ||||
1231 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
1232 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
1233 | $self->whine( | ||||
1234 | $curr_open->[-1][1]{'start_line'}, | ||||
1235 | "You can't have =items (as at line " | ||||
1236 | . $para->[1]{'start_line'} | ||||
1237 | . ") unless the first thing after the =over is an =item" | ||||
1238 | ); | ||||
1239 | } | ||||
1240 | # Just turn it into a paragraph and reconsider it | ||||
1241 | $para->[0] = '~Para'; | ||||
1242 | unshift @$paras, $para; | ||||
1243 | return 1; | ||||
1244 | |||||
1245 | } elsif($over_type eq 'text') { | ||||
1246 | my $item_type = $self->_get_item_type($para); | ||||
1247 | # That kills the content of the item if it's a number or bullet. | ||||
1248 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1249 | |||||
1250 | if($item_type eq 'text') { | ||||
1251 | # Nothing special needs doing for 'text' | ||||
1252 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
1253 | die "Unknown item type $item_type" | ||||
1254 | unless $item_type eq 'number' or $item_type eq 'bullet'; | ||||
1255 | # Undo our clobbering: | ||||
1256 | push @$para, $para->[1]{'~orig_content'}; | ||||
1257 | delete $para->[1]{'number'}; | ||||
1258 | # Only a PROPER item-number element is allowed | ||||
1259 | # to have a number attribute. | ||||
1260 | } else { | ||||
1261 | die "Unhandled item type $item_type"; # should never happen | ||||
1262 | } | ||||
1263 | |||||
1264 | # =item-text thingies don't need any assimilation, it seems. | ||||
1265 | |||||
1266 | } elsif($over_type eq 'number') { | ||||
1267 | my $item_type = $self->_get_item_type($para); | ||||
1268 | # That kills the content of the item if it's a number or bullet. | ||||
1269 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1270 | |||||
1271 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
1272 | |||||
1273 | if($item_type eq 'bullet') { | ||||
1274 | # Hm, it's not numeric. Correct for this. | ||||
1275 | $para->[1]{'number'} = $expected_value; | ||||
1276 | $self->whine( | ||||
1277 | $para->[1]{'start_line'}, | ||||
1278 | "Expected '=item $expected_value'" | ||||
1279 | ); | ||||
1280 | push @$para, $para->[1]{'~orig_content'}; | ||||
1281 | # restore the bullet, blocking the assimilation of next para | ||||
1282 | |||||
1283 | } elsif($item_type eq 'text') { | ||||
1284 | # Hm, it's not numeric. Correct for this. | ||||
1285 | $para->[1]{'number'} = $expected_value; | ||||
1286 | $self->whine( | ||||
1287 | $para->[1]{'start_line'}, | ||||
1288 | "Expected '=item $expected_value'" | ||||
1289 | ); | ||||
1290 | # Text content will still be there and will block next ~Para | ||||
1291 | |||||
1292 | } elsif($item_type ne 'number') { | ||||
1293 | die "Unknown item type $item_type"; # should never happen | ||||
1294 | |||||
1295 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
1296 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | ||||
1297 | |||||
1298 | } else { | ||||
1299 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | ||||
1300 | " instead of the expected value of $expected_value\n"; | ||||
1301 | $self->whine( | ||||
1302 | $para->[1]{'start_line'}, | ||||
1303 | "You have '=item " . $para->[1]{'number'} . | ||||
1304 | "' instead of the expected '=item $expected_value'" | ||||
1305 | ); | ||||
1306 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
1307 | } | ||||
1308 | |||||
1309 | if(@$para == 2) { | ||||
1310 | # For the cases where we /didn't/ push to @$para | ||||
1311 | if($paras->[0][0] eq '~Para') { | ||||
1312 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
1313 | push @$para, splice @{shift @$paras},2; | ||||
1314 | } else { | ||||
1315 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
1316 | push @$para, ''; # Just so it's not contentless | ||||
1317 | } | ||||
1318 | } | ||||
1319 | |||||
1320 | |||||
1321 | } elsif($over_type eq 'bullet') { | ||||
1322 | my $item_type = $self->_get_item_type($para); | ||||
1323 | # That kills the content of the item if it's a number or bullet. | ||||
1324 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
1325 | |||||
1326 | if($item_type eq 'bullet') { | ||||
1327 | # as expected! | ||||
1328 | |||||
1329 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
1330 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
1331 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
1332 | } | ||||
1333 | |||||
1334 | } elsif($item_type eq 'number') { | ||||
1335 | $self->whine( | ||||
1336 | $para->[1]{'start_line'}, | ||||
1337 | "Expected '=item *'" | ||||
1338 | ); | ||||
1339 | push @$para, $para->[1]{'~orig_content'}; | ||||
1340 | # and block assimilation of the next paragraph | ||||
1341 | delete $para->[1]{'number'}; | ||||
1342 | # Only a PROPER item-number element is allowed | ||||
1343 | # to have a number attribute. | ||||
1344 | } elsif($item_type eq 'text') { | ||||
1345 | $self->whine( | ||||
1346 | $para->[1]{'start_line'}, | ||||
1347 | "Expected '=item *'" | ||||
1348 | ); | ||||
1349 | # But doesn't need processing. But it'll block assimilation | ||||
1350 | # of the next para. | ||||
1351 | } else { | ||||
1352 | die "Unhandled item type $item_type"; # should never happen | ||||
1353 | } | ||||
1354 | |||||
1355 | if(@$para == 2) { | ||||
1356 | # For the cases where we /didn't/ push to @$para | ||||
1357 | if($paras->[0][0] eq '~Para') { | ||||
1358 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
1359 | push @$para, splice @{shift @$paras},2; | ||||
1360 | } else { | ||||
1361 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
1362 | push @$para, ''; # Just so it's not contentless | ||||
1363 | } | ||||
1364 | } | ||||
1365 | |||||
1366 | } else { | ||||
1367 | die "Unhandled =over type \"$over_type\"?"; | ||||
1368 | # Shouldn't happen! | ||||
1369 | } | ||||
1370 | $para->[0] .= '-' . $over_type; | ||||
1371 | |||||
1372 | return; | ||||
1373 | } | ||||
1374 | |||||
1375 | sub _ponder_Plain { | ||||
1376 | my ($self,$para) = @_; | ||||
1377 | DEBUG and print " giving plain treatment...\n"; | ||||
1378 | unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) | ||||
1379 | or $para->[1]{'~cooked'} | ||||
1380 | ) { | ||||
1381 | push @$para, | ||||
1382 | @{$self->_make_treelet( | ||||
1383 | join("\n", splice(@$para, 2)), | ||||
1384 | $para->[1]{'start_line'} | ||||
1385 | )}; | ||||
1386 | } | ||||
1387 | # Empty paragraphs don't need a treelet for any reason I can see. | ||||
1388 | # And precooked paragraphs already have a treelet. | ||||
1389 | return; | ||||
1390 | } | ||||
1391 | |||||
1392 | sub _ponder_Verbatim { | ||||
1393 | my ($self,$para) = @_; | ||||
1394 | DEBUG and print " giving verbatim treatment...\n"; | ||||
1395 | |||||
1396 | $para->[1]{'xml:space'} = 'preserve'; | ||||
1397 | |||||
1398 | my $indent = $self->strip_verbatim_indent; | ||||
1399 | if ($indent && ref $indent eq 'CODE') { | ||||
1400 | my @shifted = (shift @{$para}, shift @{$para}); | ||||
1401 | $indent = $indent->($para); | ||||
1402 | unshift @{$para}, @shifted; | ||||
1403 | } | ||||
1404 | |||||
1405 | for(my $i = 2; $i < @$para; $i++) { | ||||
1406 | foreach my $line ($para->[$i]) { # just for aliasing | ||||
1407 | # Strip indentation. | ||||
1408 | $line =~ s/^\Q$indent// if $indent | ||||
1409 | && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); | ||||
1410 | while( $line =~ | ||||
1411 | # Sort of adapted from Text::Tabs -- yes, it's hardwired in that | ||||
1412 | # tabs are at every EIGHTH column. For portability, it has to be | ||||
1413 | # one setting everywhere, and 8th wins. | ||||
1414 | s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e | ||||
1415 | ) {} | ||||
1416 | |||||
1417 | # TODO: whinge about (or otherwise treat) unindented or overlong lines | ||||
1418 | |||||
1419 | } | ||||
1420 | } | ||||
1421 | |||||
1422 | # Now the VerbatimFormatted hoodoo... | ||||
1423 | if( $self->{'accept_codes'} and | ||||
1424 | $self->{'accept_codes'}{'VerbatimFormatted'} | ||||
1425 | ) { | ||||
1426 | while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } | ||||
1427 | # Kill any number of terminal newlines | ||||
1428 | $self->_verbatim_format($para); | ||||
1429 | } elsif ($self->{'codes_in_verbatim'}) { | ||||
1430 | push @$para, | ||||
1431 | @{$self->_make_treelet( | ||||
1432 | join("\n", splice(@$para, 2)), | ||||
1433 | $para->[1]{'start_line'}, $para->[1]{'xml:space'} | ||||
1434 | )}; | ||||
1435 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
1436 | } else { | ||||
1437 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
1438 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
1439 | } | ||||
1440 | return; | ||||
1441 | } | ||||
1442 | |||||
1443 | sub _ponder_Data { | ||||
1444 | my ($self,$para) = @_; | ||||
1445 | DEBUG and print " giving data treatment...\n"; | ||||
1446 | $para->[1]{'xml:space'} = 'preserve'; | ||||
1447 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
1448 | return; | ||||
1449 | } | ||||
1450 | |||||
- - | |||||
1454 | ########################################################################### | ||||
1455 | |||||
1456 | sub _traverse_treelet_bit { # for use only by the routine above | ||||
1457 | my($self, $name) = splice @_,0,2; | ||||
1458 | |||||
1459 | my $scratch; | ||||
1460 | $self->_handle_element_start(($scratch=$name), shift @_); | ||||
1461 | |||||
1462 | foreach my $x (@_) { | ||||
1463 | if(ref($x)) { | ||||
1464 | &_traverse_treelet_bit($self, @$x); | ||||
1465 | } else { | ||||
1466 | $self->_handle_text($x); | ||||
1467 | } | ||||
1468 | } | ||||
1469 | |||||
1470 | $self->_handle_element_end($scratch=$name); | ||||
1471 | return; | ||||
1472 | } | ||||
1473 | |||||
1474 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1475 | |||||
1476 | sub _closers_for_all_curr_open { | ||||
1477 | my $self = $_[0]; | ||||
1478 | my @closers; | ||||
1479 | foreach my $still_open (@{ $self->{'curr_open'} || return }) { | ||||
1480 | my @copy = @$still_open; | ||||
1481 | $copy[1] = {%{ $copy[1] }}; | ||||
1482 | #$copy[1]{'start_line'} = -1; | ||||
1483 | if($copy[0] eq '=for') { | ||||
1484 | $copy[0] = '=end'; | ||||
1485 | } elsif($copy[0] eq '=over') { | ||||
1486 | $copy[0] = '=back'; | ||||
1487 | } else { | ||||
1488 | die "I don't know how to auto-close an open $copy[0] region"; | ||||
1489 | } | ||||
1490 | |||||
1491 | unless( @copy > 2 ) { | ||||
1492 | push @copy, $copy[1]{'target'}; | ||||
1493 | $copy[-1] = '' unless defined $copy[-1]; | ||||
1494 | # since =over's don't have targets | ||||
1495 | } | ||||
1496 | |||||
1497 | $copy[1]{'fake-closer'} = 1; | ||||
1498 | |||||
1499 | DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; | ||||
1500 | unshift @closers, \@copy; | ||||
1501 | } | ||||
1502 | return @closers; | ||||
1503 | } | ||||
1504 | |||||
1505 | #-------------------------------------------------------------------------- | ||||
1506 | |||||
1507 | sub _verbatim_format { | ||||
1508 | my($it, $p) = @_; | ||||
1509 | |||||
1510 | my $formatting; | ||||
1511 | |||||
1512 | for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines | ||||
1513 | DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; | ||||
1514 | $p->[$i] .= "\n"; | ||||
1515 | # Unlike with simple Verbatim blocks, we don't end up just doing | ||||
1516 | # a join("\n", ...) on the contents, so we have to append a | ||||
1517 | # newline to ever line, and then nix the last one later. | ||||
1518 | } | ||||
1519 | |||||
1520 | if( DEBUG > 4 ) { | ||||
1521 | print "<<\n"; | ||||
1522 | for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines | ||||
1523 | print "_verbatim_format $i: $p->[$i]"; | ||||
1524 | } | ||||
1525 | print ">>\n"; | ||||
1526 | } | ||||
1527 | |||||
1528 | for(my $i = $#$p; $i > 2; $i--) { | ||||
1529 | # work backwards over the lines, except the first (#2) | ||||
1530 | |||||
1531 | #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s | ||||
1532 | # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; | ||||
1533 | # look at a formatty line preceding a nonformatty one | ||||
1534 | DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; | ||||
1535 | if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { | ||||
1536 | DEBUG > 5 and print " It's a formatty line. ", | ||||
1537 | "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; | ||||
1538 | |||||
1539 | if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { | ||||
1540 | DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; | ||||
1541 | next; | ||||
1542 | } else { | ||||
1543 | DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; | ||||
1544 | } | ||||
1545 | } else { | ||||
1546 | DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; | ||||
1547 | next; | ||||
1548 | } | ||||
1549 | |||||
1550 | # A formatty line has to have #: in the first two columns, and uses | ||||
1551 | # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. | ||||
1552 | # Example: | ||||
1553 | # What do you want? i like pie. [or whatever] | ||||
1554 | # #:^^^^^^^^^^^^^^^^^ ///////////// | ||||
1555 | |||||
1556 | |||||
1557 | DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; | ||||
1558 | |||||
1559 | $formatting = ' ' . $1; | ||||
1560 | $formatting =~ s/\s+$//s; # nix trailing whitespace | ||||
1561 | unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op | ||||
1562 | splice @$p,$i,1; # remove this line | ||||
1563 | $i--; # don't consider next line | ||||
1564 | next; | ||||
1565 | } | ||||
1566 | |||||
1567 | if( length($formatting) >= length($p->[$i-1]) ) { | ||||
1568 | $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; | ||||
1569 | } else { | ||||
1570 | $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); | ||||
1571 | } | ||||
1572 | # Make $formatting and the previous line be exactly the same length, | ||||
1573 | # with $formatting having a " " as the last character. | ||||
1574 | |||||
1575 | DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; | ||||
1576 | |||||
1577 | |||||
1578 | my @new_line; | ||||
1579 | while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { | ||||
1580 | #print "Format matches $1\n"; | ||||
1581 | |||||
1582 | if($2) { | ||||
1583 | #print "SKIPPING <$2>\n"; | ||||
1584 | push @new_line, | ||||
1585 | substr($p->[$i-1], pos($formatting)-length($1), length($1)); | ||||
1586 | } else { | ||||
1587 | #print "SNARING $+\n"; | ||||
1588 | push @new_line, [ | ||||
1589 | ( | ||||
1590 | $3 ? 'VerbatimB' : | ||||
1591 | $4 ? 'VerbatimI' : | ||||
1592 | $5 ? 'VerbatimBI' : die("Should never get called") | ||||
1593 | ), {}, | ||||
1594 | substr($p->[$i-1], pos($formatting)-length($1), length($1)) | ||||
1595 | ]; | ||||
1596 | #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; | ||||
1597 | } | ||||
1598 | } | ||||
1599 | my @nixed = | ||||
1600 | splice @$p, $i-1, 2, @new_line; # replace myself and the next line | ||||
1601 | DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; | ||||
1602 | |||||
1603 | DEBUG > 6 and print "New version of the above line is these tokens (", | ||||
1604 | scalar(@new_line), "):", | ||||
1605 | map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; | ||||
1606 | $i--; # So the next line we scrutinize is the line before the one | ||||
1607 | # that we just went and formatted | ||||
1608 | } | ||||
1609 | |||||
1610 | $p->[0] = 'VerbatimFormatted'; | ||||
1611 | |||||
1612 | # Collapse adjacent text nodes, just for kicks. | ||||
1613 | for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last | ||||
1614 | if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { | ||||
1615 | DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; | ||||
1616 | $p->[$i] .= splice @$p, $i+1, 1; # merge | ||||
1617 | --$i; # and back up | ||||
1618 | } | ||||
1619 | } | ||||
1620 | |||||
1621 | # Now look for the last text token, and remove the terminal newline | ||||
1622 | for( my $i = $#$p; $i >= 2; $i-- ) { | ||||
1623 | # work backwards over the tokens, even the first | ||||
1624 | if( !ref($p->[$i]) ) { | ||||
1625 | if($p->[$i] =~ s/\n$//s) { | ||||
1626 | DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; | ||||
1627 | } else { | ||||
1628 | DEBUG > 5 and print | ||||
1629 | "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; | ||||
1630 | } | ||||
1631 | last; # we only want the next one | ||||
1632 | } | ||||
1633 | } | ||||
1634 | |||||
1635 | return; | ||||
1636 | } | ||||
1637 | |||||
1638 | |||||
1639 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1640 | |||||
1641 | |||||
1642 | sub _treelet_from_formatting_codes { | ||||
1643 | # Given a paragraph, returns a treelet. Full of scary tokenizing code. | ||||
1644 | # Like [ '~Top', {'start_line' => $start_line}, | ||||
1645 | # "I like ", | ||||
1646 | # [ 'B', {}, "pie" ], | ||||
1647 | # "!" | ||||
1648 | # ] | ||||
1649 | |||||
1650 | my($self, $para, $start_line, $preserve_space) = @_; | ||||
1651 | |||||
1652 | my $treelet = ['~Top', {'start_line' => $start_line},]; | ||||
1653 | |||||
1654 | unless ($preserve_space || $self->{'preserve_whitespace'}) { | ||||
1655 | $para =~ s/\s+/ /g; # collapse and trim all whitespace first. | ||||
1656 | $para =~ s/ $//; | ||||
1657 | $para =~ s/^ //; | ||||
1658 | } | ||||
1659 | |||||
1660 | # Only apparent problem the above code is that N<< >> turns into | ||||
1661 | # N<< >>. But then, word wrapping does that too! So don't do that! | ||||
1662 | |||||
1663 | my @stack; | ||||
1664 | my @lineage = ($treelet); | ||||
1665 | my $raw = ''; # raw content of L<> fcode before splitting/processing | ||||
1666 | # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed | ||||
1667 | # into just 1 ' '. Is this the regex's doing or 'raw's? | ||||
1668 | my $inL = 0; | ||||
1669 | |||||
1670 | DEBUG > 4 and print "Paragraph:\n$para\n\n"; | ||||
1671 | |||||
1672 | # Here begins our frightening tokenizer RE. The following regex matches | ||||
1673 | # text in four main parts: | ||||
1674 | # | ||||
1675 | # * Start-codes. The first alternative matches C< or C<<, the latter | ||||
1676 | # followed by some whitespace. $1 will hold the entire start code | ||||
1677 | # (including any space following a multiple-angle-bracket delimiter), | ||||
1678 | # and $2 will hold only the additional brackets past the first in a | ||||
1679 | # multiple-bracket delimiter. length($2) + 1 will be the number of | ||||
1680 | # closing brackets we have to find. | ||||
1681 | # | ||||
1682 | # * Closing brackets. Match some amount of whitespace followed by | ||||
1683 | # multiple close brackets. The logic to see if this closes anything | ||||
1684 | # is down below. Note that in order to parse C<< >> correctly, we | ||||
1685 | # have to use look-behind (?<=\s\s), since the match of the starting | ||||
1686 | # code will have consumed the whitespace. | ||||
1687 | # | ||||
1688 | # * A single closing bracket, to close a simple code like C<>. | ||||
1689 | # | ||||
1690 | # * Something that isn't a start or end code. We have to be careful | ||||
1691 | # about accepting whitespace, since perlpodspec says that any whitespace | ||||
1692 | # before a multiple-bracket closing delimiter should be ignored. | ||||
1693 | # | ||||
1694 | while($para =~ | ||||
1695 | m/\G | ||||
1696 | (?: | ||||
1697 | # Match starting codes, including the whitespace following a | ||||
1698 | # multiple-delimiter start code. $1 gets the whole start code and | ||||
1699 | # $2 gets all but one of the <s in the multiple-bracket case. | ||||
1700 | ([A-Z]<(?:(<+)\s+)?) | ||||
1701 | | | ||||
1702 | # Match multiple-bracket end codes. $3 gets the whitespace that | ||||
1703 | # should be discarded before an end bracket but kept in other cases | ||||
1704 | # and $4 gets the end brackets themselves. | ||||
1705 | (\s+|(?<=\s\s))(>{2,}) | ||||
1706 | | | ||||
1707 | (\s?>) # $5: simple end-codes | ||||
1708 | | | ||||
1709 | ( # $6: stuff containing no start-codes or end-codes | ||||
1710 | (?: | ||||
1711 | [^A-Z\s>] | ||||
1712 | | | ||||
1713 | (?: | ||||
1714 | [A-Z](?!<) | ||||
1715 | ) | ||||
1716 | | | ||||
1717 | # whitespace is ok, but we don't want to eat the whitespace before | ||||
1718 | # a multiple-bracket end code. | ||||
1719 | # NOTE: we may still have problems with e.g. S<< >> | ||||
1720 | (?: | ||||
1721 | \s(?!\s*>{2,}) | ||||
1722 | ) | ||||
1723 | )+ | ||||
1724 | ) | ||||
1725 | ) | ||||
1726 | /xgo | ||||
1727 | ) { | ||||
1728 | DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; | ||||
1729 | if(defined $1) { | ||||
1730 | if(defined $2) { | ||||
1731 | DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; | ||||
1732 | push @stack, length($2) + 1; | ||||
1733 | # length of the necessary complex end-code string | ||||
1734 | } else { | ||||
1735 | DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; | ||||
1736 | push @stack, 0; # signal that we're looking for simple | ||||
1737 | } | ||||
1738 | push @lineage, [ substr($1,0,1), {}, ]; # new node object | ||||
1739 | push @{ $lineage[-2] }, $lineage[-1]; | ||||
1740 | if ('L' eq substr($1,0,1)) { | ||||
1741 | $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator | ||||
1742 | $inL = 1; | ||||
1743 | } else { | ||||
1744 | $raw .= $1 if $inL; | ||||
1745 | } | ||||
1746 | |||||
1747 | } elsif(defined $4) { | ||||
1748 | DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; | ||||
1749 | # This is where it gets messy... | ||||
1750 | if(! @stack) { | ||||
1751 | # We saw " >>>>" but needed nothing. This is ALL just stuff then. | ||||
1752 | DEBUG > 4 and print " But it's really just stuff.\n"; | ||||
1753 | push @{ $lineage[-1] }, $3, $4; | ||||
1754 | next; | ||||
1755 | } elsif(!$stack[-1]) { | ||||
1756 | # We saw " >>>>" but needed only ">". Back pos up. | ||||
1757 | DEBUG > 4 and print " And that's more than we needed to close simple.\n"; | ||||
1758 | push @{ $lineage[-1] }, $3; # That was a for-real space, too. | ||||
1759 | pos($para) = pos($para) - length($4) + 1; | ||||
1760 | } elsif($stack[-1] == length($4)) { | ||||
1761 | # We found " >>>>", and it was exactly what we needed. Commonest case. | ||||
1762 | DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; | ||||
1763 | } elsif($stack[-1] < length($4)) { | ||||
1764 | # We saw " >>>>" but needed only " >>". Back pos up. | ||||
1765 | DEBUG > 4 and print " And that's more than we needed to close complex.\n"; | ||||
1766 | pos($para) = pos($para) - length($4) + $stack[-1]; | ||||
1767 | } else { | ||||
1768 | # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! | ||||
1769 | DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; | ||||
1770 | push @{ $lineage[-1] }, $3, $4; | ||||
1771 | next; | ||||
1772 | } | ||||
1773 | #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; | ||||
1774 | |||||
1775 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
1776 | # Keep the element from being childless | ||||
1777 | |||||
1778 | pop @stack; | ||||
1779 | pop @lineage; | ||||
1780 | |||||
1781 | unless (@stack) { # not in an L if there are no open fcodes | ||||
1782 | $inL = 0; | ||||
1783 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
1784 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
1785 | } | ||||
1786 | } | ||||
1787 | $raw .= $3.$4 if $inL; | ||||
1788 | |||||
1789 | } elsif(defined $5) { | ||||
1790 | DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n"; | ||||
1791 | |||||
1792 | if(@stack and ! $stack[-1]) { | ||||
1793 | # We're indeed expecting a simple end-code | ||||
1794 | DEBUG > 4 and print " It's indeed an end-code.\n"; | ||||
1795 | |||||
1796 | if(length($5) == 2) { # There was a space there: " >" | ||||
1797 | push @{ $lineage[-1] }, ' '; | ||||
1798 | } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element | ||||
1799 | push @{ $lineage[-1] }, ''; # keep it from being really childless | ||||
1800 | } | ||||
1801 | |||||
1802 | pop @stack; | ||||
1803 | pop @lineage; | ||||
1804 | } else { | ||||
1805 | DEBUG > 4 and print " It's just stuff.\n"; | ||||
1806 | push @{ $lineage[-1] }, $5; | ||||
1807 | } | ||||
1808 | |||||
1809 | unless (@stack) { # not in an L if there are no open fcodes | ||||
1810 | $inL = 0; | ||||
1811 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
1812 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
1813 | } | ||||
1814 | } | ||||
1815 | $raw .= $5 if $inL; | ||||
1816 | |||||
1817 | } elsif(defined $6) { | ||||
1818 | DEBUG > 3 and print "Found stuff \"$6\"\n"; | ||||
1819 | push @{ $lineage[-1] }, $6; | ||||
1820 | $raw .= $6 if $inL; | ||||
1821 | # XXX does not capture multiplace whitespaces -- 'raw' ends up with | ||||
1822 | # at most 1 leading/trailing whitespace, why not all of it? | ||||
1823 | |||||
1824 | } else { | ||||
1825 | # should never ever ever ever happen | ||||
1826 | DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; | ||||
1827 | die "SPORK 512512!"; | ||||
1828 | } | ||||
1829 | } | ||||
1830 | |||||
1831 | if(@stack) { # Uhoh, some sequences weren't closed. | ||||
1832 | my $x= "..."; | ||||
1833 | while(@stack) { | ||||
1834 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
1835 | # Hmmmmm! | ||||
1836 | |||||
1837 | my $code = (pop @lineage)->[0]; | ||||
1838 | my $ender_length = pop @stack; | ||||
1839 | if($ender_length) { | ||||
1840 | --$ender_length; | ||||
1841 | $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); | ||||
1842 | } else { | ||||
1843 | $x = $code . "<$x>"; | ||||
1844 | } | ||||
1845 | } | ||||
1846 | DEBUG > 1 and print "Unterminated $x sequence\n"; | ||||
1847 | $self->whine($start_line, | ||||
1848 | "Unterminated $x sequence", | ||||
1849 | ); | ||||
1850 | } | ||||
1851 | |||||
1852 | return $treelet; | ||||
1853 | } | ||||
1854 | |||||
1855 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1856 | |||||
1857 | sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) | ||||
1858 | return stringify_lol($_[1]); | ||||
1859 | } | ||||
1860 | |||||
1861 | sub stringify_lol { # function: stringify_lol($lol) | ||||
1862 | my $string_form = ''; | ||||
1863 | _stringify_lol( $_[0] => \$string_form ); | ||||
1864 | return $string_form; | ||||
1865 | } | ||||
1866 | |||||
1867 | sub _stringify_lol { # the real recursor | ||||
1868 | my($lol, $to) = @_; | ||||
1869 | for(my $i = 2; $i < @$lol; ++$i) { | ||||
1870 | if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { | ||||
1871 | _stringify_lol( $lol->[$i], $to); # recurse! | ||||
1872 | } else { | ||||
1873 | $$to .= $lol->[$i]; | ||||
1874 | } | ||||
1875 | } | ||||
1876 | return; | ||||
1877 | } | ||||
1878 | |||||
1879 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1880 | |||||
1881 | sub _dump_curr_open { # return a string representation of the stack | ||||
1882 | my $curr_open = $_[0]{'curr_open'}; | ||||
1883 | |||||
1884 | return '[empty]' unless @$curr_open; | ||||
1885 | return join '; ', | ||||
1886 | map {; | ||||
1887 | ($_->[0] eq '=for') | ||||
1888 | ? ( ($_->[1]{'~really'} || '=over') | ||||
1889 | . ' ' . $_->[1]{'target'}) | ||||
1890 | : $_->[0] | ||||
1891 | } | ||||
1892 | @$curr_open | ||||
1893 | ; | ||||
1894 | } | ||||
1895 | |||||
1896 | ########################################################################### | ||||
1897 | 1 | 34µs | my %pretty_form = ( | ||
1898 | "\a" => '\a', # ding! | ||||
1899 | "\b" => '\b', # BS | ||||
1900 | "\e" => '\e', # ESC | ||||
1901 | "\f" => '\f', # FF | ||||
1902 | "\t" => '\t', # tab | ||||
1903 | "\cm" => '\cm', | ||||
1904 | "\cj" => '\cj', | ||||
1905 | "\n" => '\n', # probably overrides one of either \cm or \cj | ||||
1906 | '"' => '\"', | ||||
1907 | '\\' => '\\\\', | ||||
1908 | '$' => '\\$', | ||||
1909 | '@' => '\\@', | ||||
1910 | '%' => '\\%', | ||||
1911 | '#' => '\\#', | ||||
1912 | ); | ||||
1913 | |||||
1914 | sub pretty { # adopted from Class::Classless | ||||
1915 | # Not the most brilliant routine, but passable. | ||||
1916 | # Don't give it a cyclic data structure! | ||||
1917 | my @stuff = @_; # copy | ||||
1918 | my $x; | ||||
1919 | my $out = | ||||
1920 | # join ",\n" . | ||||
1921 | join ", ", | ||||
1922 | map {; | ||||
1923 | if(!defined($_)) { | ||||
1924 | "undef"; | ||||
1925 | } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { | ||||
1926 | $x = "[ " . pretty(@$_) . " ]" ; | ||||
1927 | $x; | ||||
1928 | } elsif(ref($_) eq 'SCALAR') { | ||||
1929 | $x = "\\" . pretty($$_) ; | ||||
1930 | $x; | ||||
1931 | } elsif(ref($_) eq 'HASH') { | ||||
1932 | my $hr = $_; | ||||
1933 | $x = "{" . join(", ", | ||||
1934 | map(pretty($_) . '=>' . pretty($hr->{$_}), | ||||
1935 | sort keys %$hr ) ) . "}" ; | ||||
1936 | $x; | ||||
1937 | } elsif(!length($_)) { q{''} # empty string | ||||
1938 | } elsif( | ||||
1939 | $_ eq '0' # very common case | ||||
1940 | or( | ||||
1941 | m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s | ||||
1942 | and $_ ne '-0' # the strange case that that RE lets thru | ||||
1943 | ) | ||||
1944 | ) { $_; | ||||
1945 | } else { | ||||
1946 | if( chr(65) eq 'A' ) { | ||||
1947 | s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> | ||||
1948 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
1949 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
1950 | } else { | ||||
1951 | # We're in some crazy non-ASCII world! | ||||
1952 | s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> | ||||
1953 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
1954 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
1955 | } | ||||
1956 | qq{"$_"}; | ||||
1957 | } | ||||
1958 | } @stuff; | ||||
1959 | # $out =~ s/\n */ /g if length($out) < 75; | ||||
1960 | return $out; | ||||
1961 | } | ||||
1962 | |||||
1963 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1964 | |||||
1965 | # A rather unsubtle method of blowing away all the state information | ||||
1966 | # from a parser object so it can be reused. Provided as a utility for | ||||
1967 | # backward compatibility in Pod::Man, etc. but not recommended for | ||||
1968 | # general use. | ||||
1969 | |||||
1970 | sub reinit { | ||||
1971 | my $self = shift; | ||||
1972 | foreach (qw(source_dead source_filename doc_has_started | ||||
1973 | start_of_pod_block content_seen last_was_blank paras curr_open | ||||
1974 | line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen | ||||
1975 | Title)) { | ||||
1976 | |||||
1977 | delete $self->{$_}; | ||||
1978 | } | ||||
1979 | } | ||||
1980 | |||||
1981 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
1982 | 1 | 28µs | 1; | ||
1983 |