| Filename | /usr/lib64/perl5/5.16.0/Pod/Simple/BlackBox.pm |
| Statements | Executed 17 statements in 50.8ms |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 86µs | 103µs | Pod::Simple::BlackBox::BEGIN@22 |
| 1 | 1 | 1 | 78µs | 99µs | Pod::Simple::BlackBox::BEGIN@1151 |
| 1 | 1 | 1 | 51µs | 212µs | Pod::Simple::BlackBox::BEGIN@25 |
| 1 | 1 | 1 | 51µs | 137µs | Pod::Simple::BlackBox::BEGIN@23 |
| 1 | 1 | 1 | 34µs | 34µs | Pod::Simple::BlackBox::BEGIN@28 |
| 1 | 1 | 1 | 24µs | 24µs | Pod::Simple::BlackBox::BEGIN@24 |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_closers_for_all_curr_open |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_dump_curr_open |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_gen_errata |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_handle_encoding_line |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_handle_encoding_second_level |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_Data |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_Plain |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_Verbatim |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_back |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_begin |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_doc_end |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_end |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_for |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_item |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_over |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_paragraph_buffer |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_ponder_pod |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_stringify_lol |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_traverse_treelet_bit |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_treelet_from_formatting_codes |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::_verbatim_format |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::parse_line |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::parse_lines |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::pretty |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::reinit |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::stringify_lol |
| 0 | 0 | 0 | 0s | 0s | Pod::Simple::BlackBox::text_content_of_treelet |
| Line | State ments |
Time on line |
Calls | Time in subs |
Code |
|---|---|---|---|---|---|
| 1 | |||||
| 2 | package Pod::Simple::BlackBox; | ||||
| 3 | # | ||||
| 4 | # "What's in the box?" "Pain." | ||||
| 5 | # | ||||
| 6 | ########################################################################### | ||||
| 7 | # | ||||
| 8 | # This is where all the scary things happen: parsing lines into | ||||
| 9 | # paragraphs; and then into directives, verbatims, and then also | ||||
| 10 | # turning formatting sequences into treelets. | ||||
| 11 | # | ||||
| 12 | # Are you really sure you want to read this code? | ||||
| 13 | # | ||||
| 14 | #----------------------------------------------------------------------------- | ||||
| 15 | # | ||||
| 16 | # The basic work of this module Pod::Simple::BlackBox is doing the dirty work | ||||
| 17 | # of parsing Pod into treelets (generally one per non-verbatim paragraph), and | ||||
| 18 | # to call the proper callbacks on the treelets. | ||||
| 19 | # | ||||
| 20 | # Every node in a treelet is a ['name', {attrhash}, ...children...] | ||||
| 21 | |||||
| 22 | 2 | 149µs | 2 | 120µs | # spent 103µs (86+17) within Pod::Simple::BlackBox::BEGIN@22 which was called:
# once (86µs+17µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22 # spent 103µs making 1 call to Pod::Simple::BlackBox::BEGIN@22
# spent 17µs making 1 call to integer::import |
| 23 | 2 | 141µs | 2 | 223µs | # spent 137µs (51+86) within Pod::Simple::BlackBox::BEGIN@23 which was called:
# once (51µs+86µs) by Pod::Simple::LinkSection::BEGIN@9 at line 23 # spent 137µs making 1 call to Pod::Simple::BlackBox::BEGIN@23
# spent 86µs making 1 call to strict::import |
| 24 | 2 | 142µs | 1 | 24µs | # spent 24µs within Pod::Simple::BlackBox::BEGIN@24 which was called:
# once (24µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 24 # spent 24µs making 1 call to Pod::Simple::BlackBox::BEGIN@24 |
| 25 | 2 | 295µs | 2 | 372µs | # spent 212µs (51+160) within Pod::Simple::BlackBox::BEGIN@25 which was called:
# once (51µs+160µs) by Pod::Simple::LinkSection::BEGIN@9 at line 25 # spent 212µs making 1 call to Pod::Simple::BlackBox::BEGIN@25
# spent 160µs making 1 call to vars::import |
| 26 | 1 | 5µs | $VERSION = '3.20'; | ||
| 27 | #use constant DEBUG => 7; | ||||
| 28 | # spent 34µs within Pod::Simple::BlackBox::BEGIN@28 which was called:
# once (34µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 31 | ||||
| 29 | 1 | 4µs | require Pod::Simple; | ||
| 30 | 1 | 34µs | *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG | ||
| 31 | 1 | 28.4ms | 1 | 34µs | } # spent 34µs making 1 call to Pod::Simple::BlackBox::BEGIN@28 |
| 32 | |||||
| 33 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 34 | |||||
| 35 | sub parse_line { shift->parse_lines(@_) } # alias | ||||
| 36 | |||||
| 37 | # - - - Turn back now! Run away! - - - | ||||
| 38 | |||||
| 39 | sub parse_lines { # Usage: $parser->parse_lines(@lines) | ||||
| 40 | # an undef means end-of-stream | ||||
| 41 | my $self = shift; | ||||
| 42 | |||||
| 43 | my $code_handler = $self->{'code_handler'}; | ||||
| 44 | my $cut_handler = $self->{'cut_handler'}; | ||||
| 45 | my $wl_handler = $self->{'whiteline_handler'}; | ||||
| 46 | $self->{'line_count'} ||= 0; | ||||
| 47 | |||||
| 48 | my $scratch; | ||||
| 49 | |||||
| 50 | DEBUG > 4 and | ||||
| 51 | print "# Parsing starting at line ", $self->{'line_count'}, ".\n"; | ||||
| 52 | |||||
| 53 | DEBUG > 5 and | ||||
| 54 | print "# About to parse lines: ", | ||||
| 55 | join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n"; | ||||
| 56 | |||||
| 57 | my $paras = ($self->{'paras'} ||= []); | ||||
| 58 | # paragraph buffer. Because we need to defer processing of =over | ||||
| 59 | # directives and verbatim paragraphs. We call _ponder_paragraph_buffer | ||||
| 60 | # to process this. | ||||
| 61 | |||||
| 62 | $self->{'pod_para_count'} ||= 0; | ||||
| 63 | |||||
| 64 | my $line; | ||||
| 65 | foreach my $source_line (@_) { | ||||
| 66 | if( $self->{'source_dead'} ) { | ||||
| 67 | DEBUG > 4 and print "# Source is dead.\n"; | ||||
| 68 | last; | ||||
| 69 | } | ||||
| 70 | |||||
| 71 | unless( defined $source_line ) { | ||||
| 72 | DEBUG > 4 and print "# Undef-line seen.\n"; | ||||
| 73 | |||||
| 74 | push @$paras, ['~end', {'start_line' => $self->{'line_count'}}]; | ||||
| 75 | push @$paras, $paras->[-1], $paras->[-1]; | ||||
| 76 | # So that it definitely fills the buffer. | ||||
| 77 | $self->{'source_dead'} = 1; | ||||
| 78 | $self->_ponder_paragraph_buffer; | ||||
| 79 | next; | ||||
| 80 | } | ||||
| 81 | |||||
| 82 | |||||
| 83 | if( $self->{'line_count'}++ ) { | ||||
| 84 | ($line = $source_line) =~ tr/\n\r//d; | ||||
| 85 | # If we don't have two vars, we'll end up with that there | ||||
| 86 | # tr/// modding the (potentially read-only) original source line! | ||||
| 87 | |||||
| 88 | } else { | ||||
| 89 | DEBUG > 2 and print "First line: [$source_line]\n"; | ||||
| 90 | |||||
| 91 | if( ($line = $source_line) =~ s/^\xEF\xBB\xBF//s ) { | ||||
| 92 | DEBUG and print "UTF-8 BOM seen. Faking a '=encoding utf8'.\n"; | ||||
| 93 | $self->_handle_encoding_line( "=encoding utf8" ); | ||||
| 94 | $line =~ tr/\n\r//d; | ||||
| 95 | |||||
| 96 | } elsif( $line =~ s/^\xFE\xFF//s ) { | ||||
| 97 | DEBUG and print "Big-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
| 98 | $self->scream( | ||||
| 99 | $self->{'line_count'}, | ||||
| 100 | "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
| 101 | ); | ||||
| 102 | splice @_; | ||||
| 103 | push @_, undef; | ||||
| 104 | next; | ||||
| 105 | |||||
| 106 | # TODO: implement somehow? | ||||
| 107 | |||||
| 108 | } elsif( $line =~ s/^\xFF\xFE//s ) { | ||||
| 109 | DEBUG and print "Little-endian UTF-16 BOM seen. Aborting parsing.\n"; | ||||
| 110 | $self->scream( | ||||
| 111 | $self->{'line_count'}, | ||||
| 112 | "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet." | ||||
| 113 | ); | ||||
| 114 | splice @_; | ||||
| 115 | push @_, undef; | ||||
| 116 | next; | ||||
| 117 | |||||
| 118 | # TODO: implement somehow? | ||||
| 119 | |||||
| 120 | } else { | ||||
| 121 | DEBUG > 2 and print "First line is BOM-less.\n"; | ||||
| 122 | ($line = $source_line) =~ tr/\n\r//d; | ||||
| 123 | } | ||||
| 124 | } | ||||
| 125 | |||||
| 126 | |||||
| 127 | DEBUG > 5 and print "# Parsing line: [$line]\n"; | ||||
| 128 | |||||
| 129 | if(!$self->{'in_pod'}) { | ||||
| 130 | if($line =~ m/^=([a-zA-Z]+)/s) { | ||||
| 131 | if($1 eq 'cut') { | ||||
| 132 | $self->scream( | ||||
| 133 | $self->{'line_count'}, | ||||
| 134 | "=cut found outside a pod block. Skipping to next block." | ||||
| 135 | ); | ||||
| 136 | |||||
| 137 | ## Before there were errata sections in the world, it was | ||||
| 138 | ## least-pessimal to abort processing the file. But now we can | ||||
| 139 | ## just barrel on thru (but still not start a pod block). | ||||
| 140 | #splice @_; | ||||
| 141 | #push @_, undef; | ||||
| 142 | |||||
| 143 | next; | ||||
| 144 | } else { | ||||
| 145 | $self->{'in_pod'} = $self->{'start_of_pod_block'} | ||||
| 146 | = $self->{'last_was_blank'} = 1; | ||||
| 147 | # And fall thru to the pod-mode block further down | ||||
| 148 | } | ||||
| 149 | } else { | ||||
| 150 | DEBUG > 5 and print "# It's a code-line.\n"; | ||||
| 151 | $code_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
| 152 | if $code_handler; | ||||
| 153 | # Note: this may cause code to be processed out of order relative | ||||
| 154 | # to pods, but in order relative to cuts. | ||||
| 155 | |||||
| 156 | # Note also that we haven't yet applied the transcoding to $line | ||||
| 157 | # by time we call $code_handler! | ||||
| 158 | |||||
| 159 | if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) { | ||||
| 160 | # That RE is from perlsyn, section "Plain Old Comments (Not!)", | ||||
| 161 | #$fname = $2 if defined $2; | ||||
| 162 | #DEBUG > 1 and defined $2 and print "# Setting fname to \"$fname\"\n"; | ||||
| 163 | DEBUG > 1 and print "# Setting nextline to $1\n"; | ||||
| 164 | $self->{'line_count'} = $1 - 1; | ||||
| 165 | } | ||||
| 166 | |||||
| 167 | next; | ||||
| 168 | } | ||||
| 169 | } | ||||
| 170 | |||||
| 171 | # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . | ||||
| 172 | # Else we're in pod mode: | ||||
| 173 | |||||
| 174 | # Apply any necessary transcoding: | ||||
| 175 | $self->{'_transcoder'} && $self->{'_transcoder'}->($line); | ||||
| 176 | |||||
| 177 | # HERE WE CATCH =encoding EARLY! | ||||
| 178 | if( $line =~ m/^=encoding\s+\S+\s*$/s ) { | ||||
| 179 | $line = $self->_handle_encoding_line( $line ); | ||||
| 180 | } | ||||
| 181 | |||||
| 182 | if($line =~ m/^=cut/s) { | ||||
| 183 | # here ends the pod block, and therefore the previous pod para | ||||
| 184 | DEBUG > 1 and print "Noting =cut at line ${$self}{'line_count'}\n"; | ||||
| 185 | $self->{'in_pod'} = 0; | ||||
| 186 | # ++$self->{'pod_para_count'}; | ||||
| 187 | $self->_ponder_paragraph_buffer(); | ||||
| 188 | # by now it's safe to consider the previous paragraph as done. | ||||
| 189 | $cut_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
| 190 | if $cut_handler; | ||||
| 191 | |||||
| 192 | # TODO: add to docs: Note: this may cause cuts to be processed out | ||||
| 193 | # of order relative to pods, but in order relative to code. | ||||
| 194 | |||||
| 195 | } elsif($line =~ m/^(\s*)$/s) { # it's a blank line | ||||
| 196 | if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line | ||||
| 197 | $wl_handler->(map $_, $line, $self->{'line_count'}, $self) | ||||
| 198 | if $wl_handler; | ||||
| 199 | } | ||||
| 200 | |||||
| 201 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
| 202 | DEBUG > 1 and print "Saving blank line at line ${$self}{'line_count'}\n"; | ||||
| 203 | push @{$paras->[-1]}, $line; | ||||
| 204 | } # otherwise it's not interesting | ||||
| 205 | |||||
| 206 | if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) { | ||||
| 207 | DEBUG > 1 and print "Noting para ends with blank line at ${$self}{'line_count'}\n"; | ||||
| 208 | } | ||||
| 209 | |||||
| 210 | $self->{'last_was_blank'} = 1; | ||||
| 211 | |||||
| 212 | } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para... | ||||
| 213 | |||||
| 214 | if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) { | ||||
| 215 | # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS | ||||
| 216 | my $new = [$1, {'start_line' => $self->{'line_count'}}, $2]; | ||||
| 217 | # Note that in "=head1 foo", the WS is lost. | ||||
| 218 | # Example: ['=head1', {'start_line' => 123}, ' foo'] | ||||
| 219 | |||||
| 220 | ++$self->{'pod_para_count'}; | ||||
| 221 | |||||
| 222 | $self->_ponder_paragraph_buffer(); | ||||
| 223 | # by now it's safe to consider the previous paragraph as done. | ||||
| 224 | |||||
| 225 | push @$paras, $new; # the new incipient paragraph | ||||
| 226 | DEBUG > 1 and print "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n"; | ||||
| 227 | |||||
| 228 | } elsif($line =~ m/^\s/s) { | ||||
| 229 | |||||
| 230 | if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') { | ||||
| 231 | DEBUG > 1 and print "Resuming verbatim para at line ${$self}{'line_count'}\n"; | ||||
| 232 | push @{$paras->[-1]}, $line; | ||||
| 233 | } else { | ||||
| 234 | ++$self->{'pod_para_count'}; | ||||
| 235 | $self->_ponder_paragraph_buffer(); | ||||
| 236 | # by now it's safe to consider the previous paragraph as done. | ||||
| 237 | DEBUG > 1 and print "Starting verbatim para at line ${$self}{'line_count'}\n"; | ||||
| 238 | push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line]; | ||||
| 239 | } | ||||
| 240 | } else { | ||||
| 241 | ++$self->{'pod_para_count'}; | ||||
| 242 | $self->_ponder_paragraph_buffer(); | ||||
| 243 | # by now it's safe to consider the previous paragraph as done. | ||||
| 244 | push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line]; | ||||
| 245 | DEBUG > 1 and print "Starting plain para at line ${$self}{'line_count'}\n"; | ||||
| 246 | } | ||||
| 247 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
| 248 | |||||
| 249 | } else { | ||||
| 250 | # It's a non-blank line /continuing/ the current para | ||||
| 251 | if(@$paras) { | ||||
| 252 | DEBUG > 2 and print "Line ${$self}{'line_count'} continues current paragraph\n"; | ||||
| 253 | push @{$paras->[-1]}, $line; | ||||
| 254 | } else { | ||||
| 255 | # Unexpected case! | ||||
| 256 | die "Continuing a paragraph but \@\$paras is empty?"; | ||||
| 257 | } | ||||
| 258 | $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0; | ||||
| 259 | } | ||||
| 260 | |||||
| 261 | } # ends the big while loop | ||||
| 262 | |||||
| 263 | DEBUG > 1 and print(pretty(@$paras), "\n"); | ||||
| 264 | return $self; | ||||
| 265 | } | ||||
| 266 | |||||
| 267 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 268 | |||||
| 269 | sub _handle_encoding_line { | ||||
| 270 | my($self, $line) = @_; | ||||
| 271 | |||||
| 272 | # The point of this routine is to set $self->{'_transcoder'} as indicated. | ||||
| 273 | |||||
| 274 | return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s; | ||||
| 275 | DEBUG > 1 and print "Found an encoding line \"=encoding $1\"\n"; | ||||
| 276 | |||||
| 277 | my $e = $1; | ||||
| 278 | my $orig = $e; | ||||
| 279 | push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig"; | ||||
| 280 | |||||
| 281 | my $enc_error; | ||||
| 282 | |||||
| 283 | # Cf. perldoc Encode and perldoc Encode::Supported | ||||
| 284 | |||||
| 285 | require Pod::Simple::Transcode; | ||||
| 286 | |||||
| 287 | if( $self->{'encoding'} ) { | ||||
| 288 | my $norm_current = $self->{'encoding'}; | ||||
| 289 | my $norm_e = $e; | ||||
| 290 | foreach my $that ($norm_current, $norm_e) { | ||||
| 291 | $that = lc($that); | ||||
| 292 | $that =~ s/[-_]//g; | ||||
| 293 | } | ||||
| 294 | if($norm_current eq $norm_e) { | ||||
| 295 | DEBUG > 1 and print "The '=encoding $orig' line is ", | ||||
| 296 | "redundant. ($norm_current eq $norm_e). Ignoring.\n"; | ||||
| 297 | $enc_error = ''; | ||||
| 298 | # But that doesn't necessarily mean that the earlier one went okay | ||||
| 299 | } else { | ||||
| 300 | $enc_error = "Encoding is already set to " . $self->{'encoding'}; | ||||
| 301 | DEBUG > 1 and print $enc_error; | ||||
| 302 | } | ||||
| 303 | } elsif ( | ||||
| 304 | # OK, let's turn on the encoding | ||||
| 305 | do { | ||||
| 306 | DEBUG > 1 and print " Setting encoding to $e\n"; | ||||
| 307 | $self->{'encoding'} = $e; | ||||
| 308 | 1; | ||||
| 309 | } | ||||
| 310 | and $e eq 'HACKRAW' | ||||
| 311 | ) { | ||||
| 312 | DEBUG and print " Putting in HACKRAW (no-op) encoding mode.\n"; | ||||
| 313 | |||||
| 314 | } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) { | ||||
| 315 | |||||
| 316 | die($enc_error = "WHAT? _transcoder is already set?!") | ||||
| 317 | if $self->{'_transcoder'}; # should never happen | ||||
| 318 | require Pod::Simple::Transcode; | ||||
| 319 | $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e); | ||||
| 320 | eval { | ||||
| 321 | my @x = ('', "abc", "123"); | ||||
| 322 | $self->{'_transcoder'}->(@x); | ||||
| 323 | }; | ||||
| 324 | $@ && die( $enc_error = | ||||
| 325 | "Really unexpected error setting up encoding $e: $@\nAborting" | ||||
| 326 | ); | ||||
| 327 | |||||
| 328 | } else { | ||||
| 329 | my @supported = Pod::Simple::Transcode::->all_encodings; | ||||
| 330 | |||||
| 331 | # Note unsupported, and complain | ||||
| 332 | DEBUG and print " Encoding [$e] is unsupported.", | ||||
| 333 | "\nSupporteds: @supported\n"; | ||||
| 334 | my $suggestion = ''; | ||||
| 335 | |||||
| 336 | # Look for a near match: | ||||
| 337 | my $norm = lc($e); | ||||
| 338 | $norm =~ tr[-_][]d; | ||||
| 339 | my $n; | ||||
| 340 | foreach my $enc (@supported) { | ||||
| 341 | $n = lc($enc); | ||||
| 342 | $n =~ tr[-_][]d; | ||||
| 343 | next unless $n eq $norm; | ||||
| 344 | $suggestion = " (Maybe \"$e\" should be \"$enc\"?)"; | ||||
| 345 | last; | ||||
| 346 | } | ||||
| 347 | my $encmodver = Pod::Simple::Transcode::->encmodver; | ||||
| 348 | $enc_error = join '' => | ||||
| 349 | "This document probably does not appear as it should, because its ", | ||||
| 350 | "\"=encoding $e\" line calls for an unsupported encoding.", | ||||
| 351 | $suggestion, " [$encmodver\'s supported encodings are: @supported]" | ||||
| 352 | ; | ||||
| 353 | |||||
| 354 | $self->scream( $self->{'line_count'}, $enc_error ); | ||||
| 355 | } | ||||
| 356 | push @{ $self->{'encoding_command_statuses'} }, $enc_error; | ||||
| 357 | |||||
| 358 | return '=encoding ALREADYDONE'; | ||||
| 359 | } | ||||
| 360 | |||||
| 361 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||||
| 362 | |||||
| 363 | sub _handle_encoding_second_level { | ||||
| 364 | # By time this is called, the encoding (if well formed) will already | ||||
| 365 | # have been acted one. | ||||
| 366 | my($self, $para) = @_; | ||||
| 367 | my @x = @$para; | ||||
| 368 | my $content = join ' ', splice @x, 2; | ||||
| 369 | $content =~ s/^\s+//s; | ||||
| 370 | $content =~ s/\s+$//s; | ||||
| 371 | |||||
| 372 | DEBUG > 2 and print "Ogling encoding directive: =encoding $content\n"; | ||||
| 373 | |||||
| 374 | if($content eq 'ALREADYDONE') { | ||||
| 375 | # It's already been handled. Check for errors. | ||||
| 376 | if(! $self->{'encoding_command_statuses'} ) { | ||||
| 377 | DEBUG > 2 and print " CRAZY ERROR: It wasn't really handled?!\n"; | ||||
| 378 | } elsif( $self->{'encoding_command_statuses'}[-1] ) { | ||||
| 379 | $self->whine( $para->[1]{'start_line'}, | ||||
| 380 | sprintf "Couldn't do %s: %s", | ||||
| 381 | $self->{'encoding_command_reqs' }[-1], | ||||
| 382 | $self->{'encoding_command_statuses'}[-1], | ||||
| 383 | ); | ||||
| 384 | } else { | ||||
| 385 | DEBUG > 2 and print " (Yup, it was successfully handled already.)\n"; | ||||
| 386 | } | ||||
| 387 | |||||
| 388 | } else { | ||||
| 389 | # Otherwise it's a syntax error | ||||
| 390 | $self->whine( $para->[1]{'start_line'}, | ||||
| 391 | "Invalid =encoding syntax: $content" | ||||
| 392 | ); | ||||
| 393 | } | ||||
| 394 | |||||
| 395 | return; | ||||
| 396 | } | ||||
| 397 | |||||
| 398 | #~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~` | ||||
| 399 | |||||
| 400 | { | ||||
| 401 | 2 | 7µs | my $m = -321; # magic line number | ||
| 402 | |||||
| 403 | sub _gen_errata { | ||||
| 404 | my $self = $_[0]; | ||||
| 405 | # Return 0 or more fake-o paragraphs explaining the accumulated | ||||
| 406 | # errors on this document. | ||||
| 407 | |||||
| 408 | return() unless $self->{'errata'} and keys %{$self->{'errata'}}; | ||||
| 409 | |||||
| 410 | my @out; | ||||
| 411 | |||||
| 412 | foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) { | ||||
| 413 | push @out, | ||||
| 414 | ['=item', {'start_line' => $m}, "Around line $line:"], | ||||
| 415 | map( ['~Para', {'start_line' => $m, '~cooked' => 1}, | ||||
| 416 | #['~Top', {'start_line' => $m}, | ||||
| 417 | $_ | ||||
| 418 | #] | ||||
| 419 | ], | ||||
| 420 | @{$self->{'errata'}{$line}} | ||||
| 421 | ) | ||||
| 422 | ; | ||||
| 423 | } | ||||
| 424 | |||||
| 425 | # TODO: report of unknown entities? unrenderable characters? | ||||
| 426 | |||||
| 427 | unshift @out, | ||||
| 428 | ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'], | ||||
| 429 | ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1}, | ||||
| 430 | "Hey! ", | ||||
| 431 | ['B', {}, | ||||
| 432 | 'The above document had some coding errors, which are explained below:' | ||||
| 433 | ] | ||||
| 434 | ], | ||||
| 435 | ['=over', {'start_line' => $m, 'errata' => 1}, ''], | ||||
| 436 | ; | ||||
| 437 | |||||
| 438 | push @out, | ||||
| 439 | ['=back', {'start_line' => $m, 'errata' => 1}, ''], | ||||
| 440 | ; | ||||
| 441 | |||||
| 442 | DEBUG and print "\n<<\n", pretty(\@out), "\n>>\n\n"; | ||||
| 443 | |||||
| 444 | return @out; | ||||
| 445 | } | ||||
| 446 | |||||
| 447 | } | ||||
| 448 | |||||
| 449 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 450 | |||||
| 451 | ############################################################################## | ||||
| 452 | ## | ||||
| 453 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
| 454 | ## | ||||
| 455 | ## HERE IT BECOMES REALLY SCARY | ||||
| 456 | ## | ||||
| 457 | ## stop reading now stop reading now stop reading now stop reading now stop | ||||
| 458 | ## | ||||
| 459 | ############################################################################## | ||||
| 460 | |||||
| 461 | sub _ponder_paragraph_buffer { | ||||
| 462 | |||||
| 463 | # Para-token types as found in the buffer. | ||||
| 464 | # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end, | ||||
| 465 | # =over, =back, =item | ||||
| 466 | # and the null =pod (to be complained about if over one line) | ||||
| 467 | # | ||||
| 468 | # "~data" paragraphs are something we generate at this level, depending on | ||||
| 469 | # a currently open =over region | ||||
| 470 | |||||
| 471 | # Events fired: Begin and end for: | ||||
| 472 | # directivename (like head1 .. head4), item, extend, | ||||
| 473 | # for (from =begin...=end, =for), | ||||
| 474 | # over-bullet, over-number, over-text, over-block, | ||||
| 475 | # item-bullet, item-number, item-text, | ||||
| 476 | # Document, | ||||
| 477 | # Data, Para, Verbatim | ||||
| 478 | # B, C, longdirname (TODO -- wha?), etc. for all directives | ||||
| 479 | # | ||||
| 480 | |||||
| 481 | my $self = $_[0]; | ||||
| 482 | my $paras; | ||||
| 483 | return unless @{$paras = $self->{'paras'}}; | ||||
| 484 | my $curr_open = ($self->{'curr_open'} ||= []); | ||||
| 485 | |||||
| 486 | my $scratch; | ||||
| 487 | |||||
| 488 | DEBUG > 10 and print "# Paragraph buffer: <<", pretty($paras), ">>\n"; | ||||
| 489 | |||||
| 490 | # We have something in our buffer. So apparently the document has started. | ||||
| 491 | unless($self->{'doc_has_started'}) { | ||||
| 492 | $self->{'doc_has_started'} = 1; | ||||
| 493 | |||||
| 494 | my $starting_contentless; | ||||
| 495 | $starting_contentless = | ||||
| 496 | ( | ||||
| 497 | !@$curr_open | ||||
| 498 | and @$paras and ! grep $_->[0] ne '~end', @$paras | ||||
| 499 | # i.e., if the paras is all ~ends | ||||
| 500 | ) | ||||
| 501 | ; | ||||
| 502 | DEBUG and print "# Starting ", | ||||
| 503 | $starting_contentless ? 'contentless' : 'contentful', | ||||
| 504 | " document\n" | ||||
| 505 | ; | ||||
| 506 | |||||
| 507 | $self->_handle_element_start( | ||||
| 508 | ($scratch = 'Document'), | ||||
| 509 | { | ||||
| 510 | 'start_line' => $paras->[0][1]{'start_line'}, | ||||
| 511 | $starting_contentless ? ( 'contentless' => 1 ) : (), | ||||
| 512 | }, | ||||
| 513 | ); | ||||
| 514 | } | ||||
| 515 | |||||
| 516 | my($para, $para_type); | ||||
| 517 | while(@$paras) { | ||||
| 518 | last if @$paras == 1 and | ||||
| 519 | ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim' | ||||
| 520 | or $paras->[0][0] eq '=item' ) | ||||
| 521 | ; | ||||
| 522 | # Those're the three kinds of paragraphs that require lookahead. | ||||
| 523 | # Actually, an "=item Foo" inside an <over type=text> region | ||||
| 524 | # and any =item inside an <over type=block> region (rare) | ||||
| 525 | # don't require any lookahead, but all others (bullets | ||||
| 526 | # and numbers) do. | ||||
| 527 | |||||
| 528 | # TODO: whinge about many kinds of directives in non-resolving =for regions? | ||||
| 529 | # TODO: many? like what? =head1 etc? | ||||
| 530 | |||||
| 531 | $para = shift @$paras; | ||||
| 532 | $para_type = $para->[0]; | ||||
| 533 | |||||
| 534 | DEBUG > 1 and print "Pondering a $para_type paragraph, given the stack: (", | ||||
| 535 | $self->_dump_curr_open(), ")\n"; | ||||
| 536 | |||||
| 537 | if($para_type eq '=for') { | ||||
| 538 | next if $self->_ponder_for($para,$curr_open,$paras); | ||||
| 539 | |||||
| 540 | } elsif($para_type eq '=begin') { | ||||
| 541 | next if $self->_ponder_begin($para,$curr_open,$paras); | ||||
| 542 | |||||
| 543 | } elsif($para_type eq '=end') { | ||||
| 544 | next if $self->_ponder_end($para,$curr_open,$paras); | ||||
| 545 | |||||
| 546 | } elsif($para_type eq '~end') { # The virtual end-document signal | ||||
| 547 | next if $self->_ponder_doc_end($para,$curr_open,$paras); | ||||
| 548 | } | ||||
| 549 | |||||
| 550 | |||||
| 551 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 552 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 553 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 554 | DEBUG > 1 and | ||||
| 555 | print "Skipping $para_type paragraph because in ignore mode.\n"; | ||||
| 556 | next; | ||||
| 557 | } | ||||
| 558 | #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 559 | # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ | ||||
| 560 | |||||
| 561 | if($para_type eq '=pod') { | ||||
| 562 | $self->_ponder_pod($para,$curr_open,$paras); | ||||
| 563 | |||||
| 564 | } elsif($para_type eq '=over') { | ||||
| 565 | next if $self->_ponder_over($para,$curr_open,$paras); | ||||
| 566 | |||||
| 567 | } elsif($para_type eq '=back') { | ||||
| 568 | next if $self->_ponder_back($para,$curr_open,$paras); | ||||
| 569 | |||||
| 570 | } else { | ||||
| 571 | |||||
| 572 | # All non-magical codes!!! | ||||
| 573 | |||||
| 574 | # Here we start using $para_type for our own twisted purposes, to | ||||
| 575 | # mean how it should get treated, not as what the element name | ||||
| 576 | # should be. | ||||
| 577 | |||||
| 578 | DEBUG > 1 and print "Pondering non-magical $para_type\n"; | ||||
| 579 | |||||
| 580 | my $i; | ||||
| 581 | |||||
| 582 | # Enforce some =headN discipline | ||||
| 583 | if($para_type =~ m/^=head\d$/s | ||||
| 584 | and ! $self->{'accept_heads_anywhere'} | ||||
| 585 | and @$curr_open | ||||
| 586 | and $curr_open->[-1][0] eq '=over' | ||||
| 587 | ) { | ||||
| 588 | DEBUG > 2 and print "'=$para_type' inside an '=over'!\n"; | ||||
| 589 | $self->whine( | ||||
| 590 | $para->[1]{'start_line'}, | ||||
| 591 | "You forgot a '=back' before '$para_type'" | ||||
| 592 | ); | ||||
| 593 | unshift @$paras, ['=back', {}, ''], $para; # close the =over | ||||
| 594 | next; | ||||
| 595 | } | ||||
| 596 | |||||
| 597 | |||||
| 598 | if($para_type eq '=item') { | ||||
| 599 | |||||
| 600 | my $over; | ||||
| 601 | unless(@$curr_open and | ||||
| 602 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
| 603 | $self->whine( | ||||
| 604 | $para->[1]{'start_line'}, | ||||
| 605 | "'=item' outside of any '=over'" | ||||
| 606 | ); | ||||
| 607 | unshift @$paras, | ||||
| 608 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
| 609 | $para | ||||
| 610 | ; | ||||
| 611 | next; | ||||
| 612 | } | ||||
| 613 | |||||
| 614 | |||||
| 615 | my $over_type = $over->[1]{'~type'}; | ||||
| 616 | |||||
| 617 | if(!$over_type) { | ||||
| 618 | # Shouldn't happen1 | ||||
| 619 | die "Typeless over in stack, starting at line " | ||||
| 620 | . $over->[1]{'start_line'}; | ||||
| 621 | |||||
| 622 | } elsif($over_type eq 'block') { | ||||
| 623 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
| 624 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
| 625 | $self->whine( | ||||
| 626 | $curr_open->[-1][1]{'start_line'}, | ||||
| 627 | "You can't have =items (as at line " | ||||
| 628 | . $para->[1]{'start_line'} | ||||
| 629 | . ") unless the first thing after the =over is an =item" | ||||
| 630 | ); | ||||
| 631 | } | ||||
| 632 | # Just turn it into a paragraph and reconsider it | ||||
| 633 | $para->[0] = '~Para'; | ||||
| 634 | unshift @$paras, $para; | ||||
| 635 | next; | ||||
| 636 | |||||
| 637 | } elsif($over_type eq 'text') { | ||||
| 638 | my $item_type = $self->_get_item_type($para); | ||||
| 639 | # That kills the content of the item if it's a number or bullet. | ||||
| 640 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 641 | |||||
| 642 | if($item_type eq 'text') { | ||||
| 643 | # Nothing special needs doing for 'text' | ||||
| 644 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
| 645 | die "Unknown item type $item_type" | ||||
| 646 | unless $item_type eq 'number' or $item_type eq 'bullet'; | ||||
| 647 | # Undo our clobbering: | ||||
| 648 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 649 | delete $para->[1]{'number'}; | ||||
| 650 | # Only a PROPER item-number element is allowed | ||||
| 651 | # to have a number attribute. | ||||
| 652 | } else { | ||||
| 653 | die "Unhandled item type $item_type"; # should never happen | ||||
| 654 | } | ||||
| 655 | |||||
| 656 | # =item-text thingies don't need any assimilation, it seems. | ||||
| 657 | |||||
| 658 | } elsif($over_type eq 'number') { | ||||
| 659 | my $item_type = $self->_get_item_type($para); | ||||
| 660 | # That kills the content of the item if it's a number or bullet. | ||||
| 661 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 662 | |||||
| 663 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
| 664 | |||||
| 665 | if($item_type eq 'bullet') { | ||||
| 666 | # Hm, it's not numeric. Correct for this. | ||||
| 667 | $para->[1]{'number'} = $expected_value; | ||||
| 668 | $self->whine( | ||||
| 669 | $para->[1]{'start_line'}, | ||||
| 670 | "Expected '=item $expected_value'" | ||||
| 671 | ); | ||||
| 672 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 673 | # restore the bullet, blocking the assimilation of next para | ||||
| 674 | |||||
| 675 | } elsif($item_type eq 'text') { | ||||
| 676 | # Hm, it's not numeric. Correct for this. | ||||
| 677 | $para->[1]{'number'} = $expected_value; | ||||
| 678 | $self->whine( | ||||
| 679 | $para->[1]{'start_line'}, | ||||
| 680 | "Expected '=item $expected_value'" | ||||
| 681 | ); | ||||
| 682 | # Text content will still be there and will block next ~Para | ||||
| 683 | |||||
| 684 | } elsif($item_type ne 'number') { | ||||
| 685 | die "Unknown item type $item_type"; # should never happen | ||||
| 686 | |||||
| 687 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
| 688 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | ||||
| 689 | |||||
| 690 | } else { | ||||
| 691 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | ||||
| 692 | " instead of the expected value of $expected_value\n"; | ||||
| 693 | $self->whine( | ||||
| 694 | $para->[1]{'start_line'}, | ||||
| 695 | "You have '=item " . $para->[1]{'number'} . | ||||
| 696 | "' instead of the expected '=item $expected_value'" | ||||
| 697 | ); | ||||
| 698 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
| 699 | } | ||||
| 700 | |||||
| 701 | if(@$para == 2) { | ||||
| 702 | # For the cases where we /didn't/ push to @$para | ||||
| 703 | if($paras->[0][0] eq '~Para') { | ||||
| 704 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
| 705 | push @$para, splice @{shift @$paras},2; | ||||
| 706 | } else { | ||||
| 707 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 708 | push @$para, ''; # Just so it's not contentless | ||||
| 709 | } | ||||
| 710 | } | ||||
| 711 | |||||
| 712 | |||||
| 713 | } elsif($over_type eq 'bullet') { | ||||
| 714 | my $item_type = $self->_get_item_type($para); | ||||
| 715 | # That kills the content of the item if it's a number or bullet. | ||||
| 716 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 717 | |||||
| 718 | if($item_type eq 'bullet') { | ||||
| 719 | # as expected! | ||||
| 720 | |||||
| 721 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
| 722 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
| 723 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
| 724 | } | ||||
| 725 | |||||
| 726 | } elsif($item_type eq 'number') { | ||||
| 727 | $self->whine( | ||||
| 728 | $para->[1]{'start_line'}, | ||||
| 729 | "Expected '=item *'" | ||||
| 730 | ); | ||||
| 731 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 732 | # and block assimilation of the next paragraph | ||||
| 733 | delete $para->[1]{'number'}; | ||||
| 734 | # Only a PROPER item-number element is allowed | ||||
| 735 | # to have a number attribute. | ||||
| 736 | } elsif($item_type eq 'text') { | ||||
| 737 | $self->whine( | ||||
| 738 | $para->[1]{'start_line'}, | ||||
| 739 | "Expected '=item *'" | ||||
| 740 | ); | ||||
| 741 | # But doesn't need processing. But it'll block assimilation | ||||
| 742 | # of the next para. | ||||
| 743 | } else { | ||||
| 744 | die "Unhandled item type $item_type"; # should never happen | ||||
| 745 | } | ||||
| 746 | |||||
| 747 | if(@$para == 2) { | ||||
| 748 | # For the cases where we /didn't/ push to @$para | ||||
| 749 | if($paras->[0][0] eq '~Para') { | ||||
| 750 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
| 751 | push @$para, splice @{shift @$paras},2; | ||||
| 752 | } else { | ||||
| 753 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 754 | push @$para, ''; # Just so it's not contentless | ||||
| 755 | } | ||||
| 756 | } | ||||
| 757 | |||||
| 758 | } else { | ||||
| 759 | die "Unhandled =over type \"$over_type\"?"; | ||||
| 760 | # Shouldn't happen! | ||||
| 761 | } | ||||
| 762 | |||||
| 763 | $para_type = 'Plain'; | ||||
| 764 | $para->[0] .= '-' . $over_type; | ||||
| 765 | # Whew. Now fall thru and process it. | ||||
| 766 | |||||
| 767 | |||||
| 768 | } elsif($para_type eq '=extend') { | ||||
| 769 | # Well, might as well implement it here. | ||||
| 770 | $self->_ponder_extend($para); | ||||
| 771 | next; # and skip | ||||
| 772 | } elsif($para_type eq '=encoding') { | ||||
| 773 | # Not actually acted on here, but we catch errors here. | ||||
| 774 | $self->_handle_encoding_second_level($para); | ||||
| 775 | |||||
| 776 | next; # and skip | ||||
| 777 | } elsif($para_type eq '~Verbatim') { | ||||
| 778 | $para->[0] = 'Verbatim'; | ||||
| 779 | $para_type = '?Verbatim'; | ||||
| 780 | } elsif($para_type eq '~Para') { | ||||
| 781 | $para->[0] = 'Para'; | ||||
| 782 | $para_type = '?Plain'; | ||||
| 783 | } elsif($para_type eq 'Data') { | ||||
| 784 | $para->[0] = 'Data'; | ||||
| 785 | $para_type = '?Data'; | ||||
| 786 | } elsif( $para_type =~ s/^=//s | ||||
| 787 | and defined( $para_type = $self->{'accept_directives'}{$para_type} ) | ||||
| 788 | ) { | ||||
| 789 | DEBUG > 1 and print " Pondering known directive ${$para}[0] as $para_type\n"; | ||||
| 790 | } else { | ||||
| 791 | # An unknown directive! | ||||
| 792 | DEBUG > 1 and printf "Unhandled directive %s (Handled: %s)\n", | ||||
| 793 | $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} ) | ||||
| 794 | ; | ||||
| 795 | $self->whine( | ||||
| 796 | $para->[1]{'start_line'}, | ||||
| 797 | "Unknown directive: $para->[0]" | ||||
| 798 | ); | ||||
| 799 | |||||
| 800 | # And maybe treat it as text instead of just letting it go? | ||||
| 801 | next; | ||||
| 802 | } | ||||
| 803 | |||||
| 804 | if($para_type =~ s/^\?//s) { | ||||
| 805 | if(! @$curr_open) { # usual case | ||||
| 806 | DEBUG and print "Treating $para_type paragraph as such because stack is empty.\n"; | ||||
| 807 | } else { | ||||
| 808 | my @fors = grep $_->[0] eq '=for', @$curr_open; | ||||
| 809 | DEBUG > 1 and print "Containing fors: ", | ||||
| 810 | join(',', map $_->[1]{'target'}, @fors), "\n"; | ||||
| 811 | |||||
| 812 | if(! @fors) { | ||||
| 813 | DEBUG and print "Treating $para_type paragraph as such because stack has no =for's\n"; | ||||
| 814 | |||||
| 815 | #} elsif(grep $_->[1]{'~resolve'}, @fors) { | ||||
| 816 | #} elsif(not grep !$_->[1]{'~resolve'}, @fors) { | ||||
| 817 | } elsif( $fors[-1][1]{'~resolve'} ) { | ||||
| 818 | # Look to the immediately containing for | ||||
| 819 | |||||
| 820 | if($para_type eq 'Data') { | ||||
| 821 | DEBUG and print "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
| 822 | $para->[0] = 'Para'; | ||||
| 823 | $para_type = 'Plain'; | ||||
| 824 | } else { | ||||
| 825 | DEBUG and print "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n"; | ||||
| 826 | } | ||||
| 827 | } else { | ||||
| 828 | DEBUG and print "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n"; | ||||
| 829 | $para->[0] = $para_type = 'Data'; | ||||
| 830 | } | ||||
| 831 | } | ||||
| 832 | } | ||||
| 833 | |||||
| 834 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| 835 | if($para_type eq 'Plain') { | ||||
| 836 | $self->_ponder_Plain($para); | ||||
| 837 | } elsif($para_type eq 'Verbatim') { | ||||
| 838 | $self->_ponder_Verbatim($para); | ||||
| 839 | } elsif($para_type eq 'Data') { | ||||
| 840 | $self->_ponder_Data($para); | ||||
| 841 | } else { | ||||
| 842 | die "\$para type is $para_type -- how did that happen?"; | ||||
| 843 | # Shouldn't happen. | ||||
| 844 | } | ||||
| 845 | |||||
| 846 | #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
| 847 | $para->[0] =~ s/^[~=]//s; | ||||
| 848 | |||||
| 849 | DEBUG and print "\n", pretty($para), "\n"; | ||||
| 850 | |||||
| 851 | # traverse the treelet (which might well be just one string scalar) | ||||
| 852 | $self->{'content_seen'} ||= 1; | ||||
| 853 | $self->_traverse_treelet_bit(@$para); | ||||
| 854 | } | ||||
| 855 | } | ||||
| 856 | |||||
| 857 | return; | ||||
| 858 | } | ||||
| 859 | |||||
| 860 | ########################################################################### | ||||
| 861 | # The sub-ponderers... | ||||
| 862 | |||||
| - - | |||||
| 865 | sub _ponder_for { | ||||
| 866 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 867 | |||||
| 868 | # Fake it out as a begin/end | ||||
| 869 | my $target; | ||||
| 870 | |||||
| 871 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 872 | DEBUG > 1 and print "Ignoring ignorable =for\n"; | ||||
| 873 | return 1; | ||||
| 874 | } | ||||
| 875 | |||||
| 876 | for(my $i = 2; $i < @$para; ++$i) { | ||||
| 877 | if($para->[$i] =~ s/^\s*(\S+)\s*//s) { | ||||
| 878 | $target = $1; | ||||
| 879 | last; | ||||
| 880 | } | ||||
| 881 | } | ||||
| 882 | unless(defined $target) { | ||||
| 883 | $self->whine( | ||||
| 884 | $para->[1]{'start_line'}, | ||||
| 885 | "=for without a target?" | ||||
| 886 | ); | ||||
| 887 | return 1; | ||||
| 888 | } | ||||
| 889 | DEBUG > 1 and | ||||
| 890 | print "Faking out a =for $target as a =begin $target / =end $target\n"; | ||||
| 891 | |||||
| 892 | $para->[0] = 'Data'; | ||||
| 893 | |||||
| 894 | unshift @$paras, | ||||
| 895 | ['=begin', | ||||
| 896 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
| 897 | $target, | ||||
| 898 | ], | ||||
| 899 | $para, | ||||
| 900 | ['=end', | ||||
| 901 | {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'}, | ||||
| 902 | $target, | ||||
| 903 | ], | ||||
| 904 | ; | ||||
| 905 | |||||
| 906 | return 1; | ||||
| 907 | } | ||||
| 908 | |||||
| 909 | sub _ponder_begin { | ||||
| 910 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 911 | my $content = join ' ', splice @$para, 2; | ||||
| 912 | $content =~ s/^\s+//s; | ||||
| 913 | $content =~ s/\s+$//s; | ||||
| 914 | unless(length($content)) { | ||||
| 915 | $self->whine( | ||||
| 916 | $para->[1]{'start_line'}, | ||||
| 917 | "=begin without a target?" | ||||
| 918 | ); | ||||
| 919 | DEBUG and print "Ignoring targetless =begin\n"; | ||||
| 920 | return 1; | ||||
| 921 | } | ||||
| 922 | |||||
| 923 | my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/; | ||||
| 924 | $para->[1]{'title'} = $title if ($title); | ||||
| 925 | $para->[1]{'target'} = $target; # without any ':' | ||||
| 926 | $content = $target; # strip off the title | ||||
| 927 | |||||
| 928 | $content =~ s/^:!/!:/s; | ||||
| 929 | my $neg; # whether this is a negation-match | ||||
| 930 | $neg = 1 if $content =~ s/^!//s; | ||||
| 931 | my $to_resolve; # whether to process formatting codes | ||||
| 932 | $to_resolve = 1 if $content =~ s/^://s; | ||||
| 933 | |||||
| 934 | my $dont_ignore; # whether this target matches us | ||||
| 935 | |||||
| 936 | foreach my $target_name ( | ||||
| 937 | split(',', $content, -1), | ||||
| 938 | $neg ? () : '*' | ||||
| 939 | ) { | ||||
| 940 | DEBUG > 2 and | ||||
| 941 | print " Considering whether =begin $content matches $target_name\n"; | ||||
| 942 | next unless $self->{'accept_targets'}{$target_name}; | ||||
| 943 | |||||
| 944 | DEBUG > 2 and | ||||
| 945 | print " It DOES match the acceptable target $target_name!\n"; | ||||
| 946 | $to_resolve = 1 | ||||
| 947 | if $self->{'accept_targets'}{$target_name} eq 'force_resolve'; | ||||
| 948 | $dont_ignore = 1; | ||||
| 949 | $para->[1]{'target_matching'} = $target_name; | ||||
| 950 | last; # stop looking at other target names | ||||
| 951 | } | ||||
| 952 | |||||
| 953 | if($neg) { | ||||
| 954 | if( $dont_ignore ) { | ||||
| 955 | $dont_ignore = ''; | ||||
| 956 | delete $para->[1]{'target_matching'}; | ||||
| 957 | DEBUG > 2 and print " But the leading ! means that this is a NON-match!\n"; | ||||
| 958 | } else { | ||||
| 959 | $dont_ignore = 1; | ||||
| 960 | $para->[1]{'target_matching'} = '!'; | ||||
| 961 | DEBUG > 2 and print " But the leading ! means that this IS a match!\n"; | ||||
| 962 | } | ||||
| 963 | } | ||||
| 964 | |||||
| 965 | $para->[0] = '=for'; # Just what we happen to call these, internally | ||||
| 966 | $para->[1]{'~really'} ||= '=begin'; | ||||
| 967 | $para->[1]{'~ignore'} = (! $dont_ignore) || 0; | ||||
| 968 | $para->[1]{'~resolve'} = $to_resolve || 0; | ||||
| 969 | |||||
| 970 | DEBUG > 1 and print " Making note to ", $dont_ignore ? 'not ' : '', | ||||
| 971 | "ignore contents of this region\n"; | ||||
| 972 | DEBUG > 1 and $dont_ignore and print " Making note to treat contents as ", | ||||
| 973 | ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n"; | ||||
| 974 | DEBUG > 1 and print " (Stack now: ", $self->_dump_curr_open(), ")\n"; | ||||
| 975 | |||||
| 976 | push @$curr_open, $para; | ||||
| 977 | if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 978 | DEBUG > 1 and print "Ignoring ignorable =begin\n"; | ||||
| 979 | } else { | ||||
| 980 | $self->{'content_seen'} ||= 1; | ||||
| 981 | $self->_handle_element_start((my $scratch='for'), $para->[1]); | ||||
| 982 | } | ||||
| 983 | |||||
| 984 | return 1; | ||||
| 985 | } | ||||
| 986 | |||||
| 987 | sub _ponder_end { | ||||
| 988 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 989 | my $content = join ' ', splice @$para, 2; | ||||
| 990 | $content =~ s/^\s+//s; | ||||
| 991 | $content =~ s/\s+$//s; | ||||
| 992 | DEBUG and print "Ogling '=end $content' directive\n"; | ||||
| 993 | |||||
| 994 | unless(length($content)) { | ||||
| 995 | $self->whine( | ||||
| 996 | $para->[1]{'start_line'}, | ||||
| 997 | "'=end' without a target?" . ( | ||||
| 998 | ( @$curr_open and $curr_open->[-1][0] eq '=for' ) | ||||
| 999 | ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' ) | ||||
| 1000 | : '' | ||||
| 1001 | ) | ||||
| 1002 | ); | ||||
| 1003 | DEBUG and print "Ignoring targetless =end\n"; | ||||
| 1004 | return 1; | ||||
| 1005 | } | ||||
| 1006 | |||||
| 1007 | unless($content =~ m/^\S+$/) { # i.e., unless it's one word | ||||
| 1008 | $self->whine( | ||||
| 1009 | $para->[1]{'start_line'}, | ||||
| 1010 | "'=end $content' is invalid. (Stack: " | ||||
| 1011 | . $self->_dump_curr_open() . ')' | ||||
| 1012 | ); | ||||
| 1013 | DEBUG and print "Ignoring mistargetted =end $content\n"; | ||||
| 1014 | return 1; | ||||
| 1015 | } | ||||
| 1016 | |||||
| 1017 | unless(@$curr_open and $curr_open->[-1][0] eq '=for') { | ||||
| 1018 | $self->whine( | ||||
| 1019 | $para->[1]{'start_line'}, | ||||
| 1020 | "=end $content without matching =begin. (Stack: " | ||||
| 1021 | . $self->_dump_curr_open() . ')' | ||||
| 1022 | ); | ||||
| 1023 | DEBUG and print "Ignoring mistargetted =end $content\n"; | ||||
| 1024 | return 1; | ||||
| 1025 | } | ||||
| 1026 | |||||
| 1027 | unless($content eq $curr_open->[-1][1]{'target'}) { | ||||
| 1028 | $self->whine( | ||||
| 1029 | $para->[1]{'start_line'}, | ||||
| 1030 | "=end $content doesn't match =begin " | ||||
| 1031 | . $curr_open->[-1][1]{'target'} | ||||
| 1032 | . ". (Stack: " | ||||
| 1033 | . $self->_dump_curr_open() . ')' | ||||
| 1034 | ); | ||||
| 1035 | DEBUG and print "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n"; | ||||
| 1036 | return 1; | ||||
| 1037 | } | ||||
| 1038 | |||||
| 1039 | # Else it's okay to close... | ||||
| 1040 | if(grep $_->[1]{'~ignore'}, @$curr_open) { | ||||
| 1041 | DEBUG > 1 and print "Not firing any event for this =end $content because in an ignored region\n"; | ||||
| 1042 | # And that may be because of this to-be-closed =for region, or some | ||||
| 1043 | # other one, but it doesn't matter. | ||||
| 1044 | } else { | ||||
| 1045 | $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'}; | ||||
| 1046 | # what's that for? | ||||
| 1047 | |||||
| 1048 | $self->{'content_seen'} ||= 1; | ||||
| 1049 | $self->_handle_element_end( my $scratch = 'for', $para->[1]); | ||||
| 1050 | } | ||||
| 1051 | DEBUG > 1 and print "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n"; | ||||
| 1052 | pop @$curr_open; | ||||
| 1053 | |||||
| 1054 | return 1; | ||||
| 1055 | } | ||||
| 1056 | |||||
| 1057 | sub _ponder_doc_end { | ||||
| 1058 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1059 | if(@$curr_open) { # Deal with things left open | ||||
| 1060 | DEBUG and print "Stack is nonempty at end-document: (", | ||||
| 1061 | $self->_dump_curr_open(), ")\n"; | ||||
| 1062 | |||||
| 1063 | DEBUG > 9 and print "Stack: ", pretty($curr_open), "\n"; | ||||
| 1064 | unshift @$paras, $self->_closers_for_all_curr_open; | ||||
| 1065 | # Make sure there is exactly one ~end in the parastack, at the end: | ||||
| 1066 | @$paras = grep $_->[0] ne '~end', @$paras; | ||||
| 1067 | push @$paras, $para, $para; | ||||
| 1068 | # We need two -- once for the next cycle where we | ||||
| 1069 | # generate errata, and then another to be at the end | ||||
| 1070 | # when that loop back around to process the errata. | ||||
| 1071 | return 1; | ||||
| 1072 | |||||
| 1073 | } else { | ||||
| 1074 | DEBUG and print "Okay, stack is empty now.\n"; | ||||
| 1075 | } | ||||
| 1076 | |||||
| 1077 | # Try generating errata section, if applicable | ||||
| 1078 | unless($self->{'~tried_gen_errata'}) { | ||||
| 1079 | $self->{'~tried_gen_errata'} = 1; | ||||
| 1080 | my @extras = $self->_gen_errata(); | ||||
| 1081 | if(@extras) { | ||||
| 1082 | unshift @$paras, @extras; | ||||
| 1083 | DEBUG and print "Generated errata... relooping...\n"; | ||||
| 1084 | return 1; # I.e., loop around again to process these fake-o paragraphs | ||||
| 1085 | } | ||||
| 1086 | } | ||||
| 1087 | |||||
| 1088 | splice @$paras; # Well, that's that for this paragraph buffer. | ||||
| 1089 | DEBUG and print "Throwing end-document event.\n"; | ||||
| 1090 | |||||
| 1091 | $self->_handle_element_end( my $scratch = 'Document' ); | ||||
| 1092 | return 1; # Hasta la byebye | ||||
| 1093 | } | ||||
| 1094 | |||||
| 1095 | sub _ponder_pod { | ||||
| 1096 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1097 | $self->whine( | ||||
| 1098 | $para->[1]{'start_line'}, | ||||
| 1099 | "=pod directives shouldn't be over one line long! Ignoring all " | ||||
| 1100 | . (@$para - 2) . " lines of content" | ||||
| 1101 | ) if @$para > 3; | ||||
| 1102 | |||||
| 1103 | # Content ignored unless 'pod_handler' is set | ||||
| 1104 | if (my $pod_handler = $self->{'pod_handler'}) { | ||||
| 1105 | my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2]; | ||||
| 1106 | $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output | ||||
| 1107 | $pod_handler->($line, $line_num, $self); | ||||
| 1108 | } | ||||
| 1109 | |||||
| 1110 | # The surrounding methods set content_seen, so let us remain consistent. | ||||
| 1111 | # I do not know why it was not here before -- should it not be here? | ||||
| 1112 | # $self->{'content_seen'} ||= 1; | ||||
| 1113 | |||||
| 1114 | return; | ||||
| 1115 | } | ||||
| 1116 | |||||
| 1117 | sub _ponder_over { | ||||
| 1118 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1119 | return 1 unless @$paras; | ||||
| 1120 | my $list_type; | ||||
| 1121 | |||||
| 1122 | if($paras->[0][0] eq '=item') { # most common case | ||||
| 1123 | $list_type = $self->_get_initial_item_type($paras->[0]); | ||||
| 1124 | |||||
| 1125 | } elsif($paras->[0][0] eq '=back') { | ||||
| 1126 | # Ignore empty lists by default | ||||
| 1127 | if ($self->{'parse_empty_lists'}) { | ||||
| 1128 | $list_type = 'empty'; | ||||
| 1129 | } else { | ||||
| 1130 | shift @$paras; | ||||
| 1131 | return 1; | ||||
| 1132 | } | ||||
| 1133 | } elsif($paras->[0][0] eq '~end') { | ||||
| 1134 | $self->whine( | ||||
| 1135 | $para->[1]{'start_line'}, | ||||
| 1136 | "=over is the last thing in the document?!" | ||||
| 1137 | ); | ||||
| 1138 | return 1; # But feh, ignore it. | ||||
| 1139 | } else { | ||||
| 1140 | $list_type = 'block'; | ||||
| 1141 | } | ||||
| 1142 | $para->[1]{'~type'} = $list_type; | ||||
| 1143 | push @$curr_open, $para; | ||||
| 1144 | # yes, we reuse the paragraph as a stack item | ||||
| 1145 | |||||
| 1146 | my $content = join ' ', splice @$para, 2; | ||||
| 1147 | my $overness; | ||||
| 1148 | if($content =~ m/^\s*$/s) { | ||||
| 1149 | $para->[1]{'indent'} = 4; | ||||
| 1150 | } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) { | ||||
| 1151 | 2 | 21.6ms | 2 | 120µs | # spent 99µs (78+21) within Pod::Simple::BlackBox::BEGIN@1151 which was called:
# once (78µs+21µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1151 # spent 99µs making 1 call to Pod::Simple::BlackBox::BEGIN@1151
# spent 21µs making 1 call to integer::unimport |
| 1152 | $para->[1]{'indent'} = $1; | ||||
| 1153 | if($1 == 0) { | ||||
| 1154 | $self->whine( | ||||
| 1155 | $para->[1]{'start_line'}, | ||||
| 1156 | "Can't have a 0 in =over $content" | ||||
| 1157 | ); | ||||
| 1158 | $para->[1]{'indent'} = 4; | ||||
| 1159 | } | ||||
| 1160 | } else { | ||||
| 1161 | $self->whine( | ||||
| 1162 | $para->[1]{'start_line'}, | ||||
| 1163 | "=over should be: '=over' or '=over positive_number'" | ||||
| 1164 | ); | ||||
| 1165 | $para->[1]{'indent'} = 4; | ||||
| 1166 | } | ||||
| 1167 | DEBUG > 1 and print "=over found of type $list_type\n"; | ||||
| 1168 | |||||
| 1169 | $self->{'content_seen'} ||= 1; | ||||
| 1170 | $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]); | ||||
| 1171 | |||||
| 1172 | return; | ||||
| 1173 | } | ||||
| 1174 | |||||
| 1175 | sub _ponder_back { | ||||
| 1176 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1177 | # TODO: fire off </item-number> or </item-bullet> or </item-text> ?? | ||||
| 1178 | |||||
| 1179 | my $content = join ' ', splice @$para, 2; | ||||
| 1180 | if($content =~ m/\S/) { | ||||
| 1181 | $self->whine( | ||||
| 1182 | $para->[1]{'start_line'}, | ||||
| 1183 | "=back doesn't take any parameters, but you said =back $content" | ||||
| 1184 | ); | ||||
| 1185 | } | ||||
| 1186 | |||||
| 1187 | if(@$curr_open and $curr_open->[-1][0] eq '=over') { | ||||
| 1188 | DEBUG > 1 and print "=back happily closes matching =over\n"; | ||||
| 1189 | # Expected case: we're closing the most recently opened thing | ||||
| 1190 | #my $over = pop @$curr_open; | ||||
| 1191 | $self->{'content_seen'} ||= 1; | ||||
| 1192 | $self->_handle_element_end( my $scratch = | ||||
| 1193 | 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1] | ||||
| 1194 | ); | ||||
| 1195 | } else { | ||||
| 1196 | DEBUG > 1 and print "=back found without a matching =over. Stack: (", | ||||
| 1197 | join(', ', map $_->[0], @$curr_open), ").\n"; | ||||
| 1198 | $self->whine( | ||||
| 1199 | $para->[1]{'start_line'}, | ||||
| 1200 | '=back without =over' | ||||
| 1201 | ); | ||||
| 1202 | return 1; # and ignore it | ||||
| 1203 | } | ||||
| 1204 | } | ||||
| 1205 | |||||
| 1206 | sub _ponder_item { | ||||
| 1207 | my ($self,$para,$curr_open,$paras) = @_; | ||||
| 1208 | my $over; | ||||
| 1209 | unless(@$curr_open and | ||||
| 1210 | $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) { | ||||
| 1211 | $self->whine( | ||||
| 1212 | $para->[1]{'start_line'}, | ||||
| 1213 | "'=item' outside of any '=over'" | ||||
| 1214 | ); | ||||
| 1215 | unshift @$paras, | ||||
| 1216 | ['=over', {'start_line' => $para->[1]{'start_line'}}, ''], | ||||
| 1217 | $para | ||||
| 1218 | ; | ||||
| 1219 | return 1; | ||||
| 1220 | } | ||||
| 1221 | |||||
| 1222 | |||||
| 1223 | my $over_type = $over->[1]{'~type'}; | ||||
| 1224 | |||||
| 1225 | if(!$over_type) { | ||||
| 1226 | # Shouldn't happen1 | ||||
| 1227 | die "Typeless over in stack, starting at line " | ||||
| 1228 | . $over->[1]{'start_line'}; | ||||
| 1229 | |||||
| 1230 | } elsif($over_type eq 'block') { | ||||
| 1231 | unless($curr_open->[-1][1]{'~bitched_about'}) { | ||||
| 1232 | $curr_open->[-1][1]{'~bitched_about'} = 1; | ||||
| 1233 | $self->whine( | ||||
| 1234 | $curr_open->[-1][1]{'start_line'}, | ||||
| 1235 | "You can't have =items (as at line " | ||||
| 1236 | . $para->[1]{'start_line'} | ||||
| 1237 | . ") unless the first thing after the =over is an =item" | ||||
| 1238 | ); | ||||
| 1239 | } | ||||
| 1240 | # Just turn it into a paragraph and reconsider it | ||||
| 1241 | $para->[0] = '~Para'; | ||||
| 1242 | unshift @$paras, $para; | ||||
| 1243 | return 1; | ||||
| 1244 | |||||
| 1245 | } elsif($over_type eq 'text') { | ||||
| 1246 | my $item_type = $self->_get_item_type($para); | ||||
| 1247 | # That kills the content of the item if it's a number or bullet. | ||||
| 1248 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1249 | |||||
| 1250 | if($item_type eq 'text') { | ||||
| 1251 | # Nothing special needs doing for 'text' | ||||
| 1252 | } elsif($item_type eq 'number' or $item_type eq 'bullet') { | ||||
| 1253 | die "Unknown item type $item_type" | ||||
| 1254 | unless $item_type eq 'number' or $item_type eq 'bullet'; | ||||
| 1255 | # Undo our clobbering: | ||||
| 1256 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1257 | delete $para->[1]{'number'}; | ||||
| 1258 | # Only a PROPER item-number element is allowed | ||||
| 1259 | # to have a number attribute. | ||||
| 1260 | } else { | ||||
| 1261 | die "Unhandled item type $item_type"; # should never happen | ||||
| 1262 | } | ||||
| 1263 | |||||
| 1264 | # =item-text thingies don't need any assimilation, it seems. | ||||
| 1265 | |||||
| 1266 | } elsif($over_type eq 'number') { | ||||
| 1267 | my $item_type = $self->_get_item_type($para); | ||||
| 1268 | # That kills the content of the item if it's a number or bullet. | ||||
| 1269 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1270 | |||||
| 1271 | my $expected_value = ++ $curr_open->[-1][1]{'~counter'}; | ||||
| 1272 | |||||
| 1273 | if($item_type eq 'bullet') { | ||||
| 1274 | # Hm, it's not numeric. Correct for this. | ||||
| 1275 | $para->[1]{'number'} = $expected_value; | ||||
| 1276 | $self->whine( | ||||
| 1277 | $para->[1]{'start_line'}, | ||||
| 1278 | "Expected '=item $expected_value'" | ||||
| 1279 | ); | ||||
| 1280 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1281 | # restore the bullet, blocking the assimilation of next para | ||||
| 1282 | |||||
| 1283 | } elsif($item_type eq 'text') { | ||||
| 1284 | # Hm, it's not numeric. Correct for this. | ||||
| 1285 | $para->[1]{'number'} = $expected_value; | ||||
| 1286 | $self->whine( | ||||
| 1287 | $para->[1]{'start_line'}, | ||||
| 1288 | "Expected '=item $expected_value'" | ||||
| 1289 | ); | ||||
| 1290 | # Text content will still be there and will block next ~Para | ||||
| 1291 | |||||
| 1292 | } elsif($item_type ne 'number') { | ||||
| 1293 | die "Unknown item type $item_type"; # should never happen | ||||
| 1294 | |||||
| 1295 | } elsif($expected_value == $para->[1]{'number'}) { | ||||
| 1296 | DEBUG > 1 and print " Numeric item has the expected value of $expected_value\n"; | ||||
| 1297 | |||||
| 1298 | } else { | ||||
| 1299 | DEBUG > 1 and print " Numeric item has ", $para->[1]{'number'}, | ||||
| 1300 | " instead of the expected value of $expected_value\n"; | ||||
| 1301 | $self->whine( | ||||
| 1302 | $para->[1]{'start_line'}, | ||||
| 1303 | "You have '=item " . $para->[1]{'number'} . | ||||
| 1304 | "' instead of the expected '=item $expected_value'" | ||||
| 1305 | ); | ||||
| 1306 | $para->[1]{'number'} = $expected_value; # correcting!! | ||||
| 1307 | } | ||||
| 1308 | |||||
| 1309 | if(@$para == 2) { | ||||
| 1310 | # For the cases where we /didn't/ push to @$para | ||||
| 1311 | if($paras->[0][0] eq '~Para') { | ||||
| 1312 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
| 1313 | push @$para, splice @{shift @$paras},2; | ||||
| 1314 | } else { | ||||
| 1315 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 1316 | push @$para, ''; # Just so it's not contentless | ||||
| 1317 | } | ||||
| 1318 | } | ||||
| 1319 | |||||
| 1320 | |||||
| 1321 | } elsif($over_type eq 'bullet') { | ||||
| 1322 | my $item_type = $self->_get_item_type($para); | ||||
| 1323 | # That kills the content of the item if it's a number or bullet. | ||||
| 1324 | DEBUG and print " Item is of type ", $para->[0], " under $over_type\n"; | ||||
| 1325 | |||||
| 1326 | if($item_type eq 'bullet') { | ||||
| 1327 | # as expected! | ||||
| 1328 | |||||
| 1329 | if( $para->[1]{'~_freaky_para_hack'} ) { | ||||
| 1330 | DEBUG and print "Accomodating '=item * Foo' tolerance hack.\n"; | ||||
| 1331 | push @$para, delete $para->[1]{'~_freaky_para_hack'}; | ||||
| 1332 | } | ||||
| 1333 | |||||
| 1334 | } elsif($item_type eq 'number') { | ||||
| 1335 | $self->whine( | ||||
| 1336 | $para->[1]{'start_line'}, | ||||
| 1337 | "Expected '=item *'" | ||||
| 1338 | ); | ||||
| 1339 | push @$para, $para->[1]{'~orig_content'}; | ||||
| 1340 | # and block assimilation of the next paragraph | ||||
| 1341 | delete $para->[1]{'number'}; | ||||
| 1342 | # Only a PROPER item-number element is allowed | ||||
| 1343 | # to have a number attribute. | ||||
| 1344 | } elsif($item_type eq 'text') { | ||||
| 1345 | $self->whine( | ||||
| 1346 | $para->[1]{'start_line'}, | ||||
| 1347 | "Expected '=item *'" | ||||
| 1348 | ); | ||||
| 1349 | # But doesn't need processing. But it'll block assimilation | ||||
| 1350 | # of the next para. | ||||
| 1351 | } else { | ||||
| 1352 | die "Unhandled item type $item_type"; # should never happen | ||||
| 1353 | } | ||||
| 1354 | |||||
| 1355 | if(@$para == 2) { | ||||
| 1356 | # For the cases where we /didn't/ push to @$para | ||||
| 1357 | if($paras->[0][0] eq '~Para') { | ||||
| 1358 | DEBUG and print "Assimilating following ~Para content into $over_type item\n"; | ||||
| 1359 | push @$para, splice @{shift @$paras},2; | ||||
| 1360 | } else { | ||||
| 1361 | DEBUG and print "Can't assimilate following ", $paras->[0][0], "\n"; | ||||
| 1362 | push @$para, ''; # Just so it's not contentless | ||||
| 1363 | } | ||||
| 1364 | } | ||||
| 1365 | |||||
| 1366 | } else { | ||||
| 1367 | die "Unhandled =over type \"$over_type\"?"; | ||||
| 1368 | # Shouldn't happen! | ||||
| 1369 | } | ||||
| 1370 | $para->[0] .= '-' . $over_type; | ||||
| 1371 | |||||
| 1372 | return; | ||||
| 1373 | } | ||||
| 1374 | |||||
| 1375 | sub _ponder_Plain { | ||||
| 1376 | my ($self,$para) = @_; | ||||
| 1377 | DEBUG and print " giving plain treatment...\n"; | ||||
| 1378 | unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' ) | ||||
| 1379 | or $para->[1]{'~cooked'} | ||||
| 1380 | ) { | ||||
| 1381 | push @$para, | ||||
| 1382 | @{$self->_make_treelet( | ||||
| 1383 | join("\n", splice(@$para, 2)), | ||||
| 1384 | $para->[1]{'start_line'} | ||||
| 1385 | )}; | ||||
| 1386 | } | ||||
| 1387 | # Empty paragraphs don't need a treelet for any reason I can see. | ||||
| 1388 | # And precooked paragraphs already have a treelet. | ||||
| 1389 | return; | ||||
| 1390 | } | ||||
| 1391 | |||||
| 1392 | sub _ponder_Verbatim { | ||||
| 1393 | my ($self,$para) = @_; | ||||
| 1394 | DEBUG and print " giving verbatim treatment...\n"; | ||||
| 1395 | |||||
| 1396 | $para->[1]{'xml:space'} = 'preserve'; | ||||
| 1397 | |||||
| 1398 | my $indent = $self->strip_verbatim_indent; | ||||
| 1399 | if ($indent && ref $indent eq 'CODE') { | ||||
| 1400 | my @shifted = (shift @{$para}, shift @{$para}); | ||||
| 1401 | $indent = $indent->($para); | ||||
| 1402 | unshift @{$para}, @shifted; | ||||
| 1403 | } | ||||
| 1404 | |||||
| 1405 | for(my $i = 2; $i < @$para; $i++) { | ||||
| 1406 | foreach my $line ($para->[$i]) { # just for aliasing | ||||
| 1407 | # Strip indentation. | ||||
| 1408 | $line =~ s/^\Q$indent// if $indent | ||||
| 1409 | && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted}); | ||||
| 1410 | while( $line =~ | ||||
| 1411 | # Sort of adapted from Text::Tabs -- yes, it's hardwired in that | ||||
| 1412 | # tabs are at every EIGHTH column. For portability, it has to be | ||||
| 1413 | # one setting everywhere, and 8th wins. | ||||
| 1414 | s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e | ||||
| 1415 | ) {} | ||||
| 1416 | |||||
| 1417 | # TODO: whinge about (or otherwise treat) unindented or overlong lines | ||||
| 1418 | |||||
| 1419 | } | ||||
| 1420 | } | ||||
| 1421 | |||||
| 1422 | # Now the VerbatimFormatted hoodoo... | ||||
| 1423 | if( $self->{'accept_codes'} and | ||||
| 1424 | $self->{'accept_codes'}{'VerbatimFormatted'} | ||||
| 1425 | ) { | ||||
| 1426 | while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para } | ||||
| 1427 | # Kill any number of terminal newlines | ||||
| 1428 | $self->_verbatim_format($para); | ||||
| 1429 | } elsif ($self->{'codes_in_verbatim'}) { | ||||
| 1430 | push @$para, | ||||
| 1431 | @{$self->_make_treelet( | ||||
| 1432 | join("\n", splice(@$para, 2)), | ||||
| 1433 | $para->[1]{'start_line'}, $para->[1]{'xml:space'} | ||||
| 1434 | )}; | ||||
| 1435 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
| 1436 | } else { | ||||
| 1437 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
| 1438 | $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines | ||||
| 1439 | } | ||||
| 1440 | return; | ||||
| 1441 | } | ||||
| 1442 | |||||
| 1443 | sub _ponder_Data { | ||||
| 1444 | my ($self,$para) = @_; | ||||
| 1445 | DEBUG and print " giving data treatment...\n"; | ||||
| 1446 | $para->[1]{'xml:space'} = 'preserve'; | ||||
| 1447 | push @$para, join "\n", splice(@$para, 2) if @$para > 3; | ||||
| 1448 | return; | ||||
| 1449 | } | ||||
| 1450 | |||||
| - - | |||||
| 1454 | ########################################################################### | ||||
| 1455 | |||||
| 1456 | sub _traverse_treelet_bit { # for use only by the routine above | ||||
| 1457 | my($self, $name) = splice @_,0,2; | ||||
| 1458 | |||||
| 1459 | my $scratch; | ||||
| 1460 | $self->_handle_element_start(($scratch=$name), shift @_); | ||||
| 1461 | |||||
| 1462 | foreach my $x (@_) { | ||||
| 1463 | if(ref($x)) { | ||||
| 1464 | &_traverse_treelet_bit($self, @$x); | ||||
| 1465 | } else { | ||||
| 1466 | $self->_handle_text($x); | ||||
| 1467 | } | ||||
| 1468 | } | ||||
| 1469 | |||||
| 1470 | $self->_handle_element_end($scratch=$name); | ||||
| 1471 | return; | ||||
| 1472 | } | ||||
| 1473 | |||||
| 1474 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 1475 | |||||
| 1476 | sub _closers_for_all_curr_open { | ||||
| 1477 | my $self = $_[0]; | ||||
| 1478 | my @closers; | ||||
| 1479 | foreach my $still_open (@{ $self->{'curr_open'} || return }) { | ||||
| 1480 | my @copy = @$still_open; | ||||
| 1481 | $copy[1] = {%{ $copy[1] }}; | ||||
| 1482 | #$copy[1]{'start_line'} = -1; | ||||
| 1483 | if($copy[0] eq '=for') { | ||||
| 1484 | $copy[0] = '=end'; | ||||
| 1485 | } elsif($copy[0] eq '=over') { | ||||
| 1486 | $copy[0] = '=back'; | ||||
| 1487 | } else { | ||||
| 1488 | die "I don't know how to auto-close an open $copy[0] region"; | ||||
| 1489 | } | ||||
| 1490 | |||||
| 1491 | unless( @copy > 2 ) { | ||||
| 1492 | push @copy, $copy[1]{'target'}; | ||||
| 1493 | $copy[-1] = '' unless defined $copy[-1]; | ||||
| 1494 | # since =over's don't have targets | ||||
| 1495 | } | ||||
| 1496 | |||||
| 1497 | $copy[1]{'fake-closer'} = 1; | ||||
| 1498 | |||||
| 1499 | DEBUG and print "Queuing up fake-o event: ", pretty(\@copy), "\n"; | ||||
| 1500 | unshift @closers, \@copy; | ||||
| 1501 | } | ||||
| 1502 | return @closers; | ||||
| 1503 | } | ||||
| 1504 | |||||
| 1505 | #-------------------------------------------------------------------------- | ||||
| 1506 | |||||
| 1507 | sub _verbatim_format { | ||||
| 1508 | my($it, $p) = @_; | ||||
| 1509 | |||||
| 1510 | my $formatting; | ||||
| 1511 | |||||
| 1512 | for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines | ||||
| 1513 | DEBUG and print "_verbatim_format appends a newline to $i: $p->[$i]\n"; | ||||
| 1514 | $p->[$i] .= "\n"; | ||||
| 1515 | # Unlike with simple Verbatim blocks, we don't end up just doing | ||||
| 1516 | # a join("\n", ...) on the contents, so we have to append a | ||||
| 1517 | # newline to ever line, and then nix the last one later. | ||||
| 1518 | } | ||||
| 1519 | |||||
| 1520 | if( DEBUG > 4 ) { | ||||
| 1521 | print "<<\n"; | ||||
| 1522 | for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines | ||||
| 1523 | print "_verbatim_format $i: $p->[$i]"; | ||||
| 1524 | } | ||||
| 1525 | print ">>\n"; | ||||
| 1526 | } | ||||
| 1527 | |||||
| 1528 | for(my $i = $#$p; $i > 2; $i--) { | ||||
| 1529 | # work backwards over the lines, except the first (#2) | ||||
| 1530 | |||||
| 1531 | #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s | ||||
| 1532 | # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s; | ||||
| 1533 | # look at a formatty line preceding a nonformatty one | ||||
| 1534 | DEBUG > 5 and print "Scrutinizing line $i: $$p[$i]\n"; | ||||
| 1535 | if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) { | ||||
| 1536 | DEBUG > 5 and print " It's a formatty line. ", | ||||
| 1537 | "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n"; | ||||
| 1538 | |||||
| 1539 | if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) { | ||||
| 1540 | DEBUG > 5 and print " Previous line is formatty! Skipping this one.\n"; | ||||
| 1541 | next; | ||||
| 1542 | } else { | ||||
| 1543 | DEBUG > 5 and print " Previous line is non-formatty! Yay!\n"; | ||||
| 1544 | } | ||||
| 1545 | } else { | ||||
| 1546 | DEBUG > 5 and print " It's not a formatty line. Ignoring\n"; | ||||
| 1547 | next; | ||||
| 1548 | } | ||||
| 1549 | |||||
| 1550 | # A formatty line has to have #: in the first two columns, and uses | ||||
| 1551 | # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic. | ||||
| 1552 | # Example: | ||||
| 1553 | # What do you want? i like pie. [or whatever] | ||||
| 1554 | # #:^^^^^^^^^^^^^^^^^ ///////////// | ||||
| 1555 | |||||
| 1556 | |||||
| 1557 | DEBUG > 4 and print "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n"; | ||||
| 1558 | |||||
| 1559 | $formatting = ' ' . $1; | ||||
| 1560 | $formatting =~ s/\s+$//s; # nix trailing whitespace | ||||
| 1561 | unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op | ||||
| 1562 | splice @$p,$i,1; # remove this line | ||||
| 1563 | $i--; # don't consider next line | ||||
| 1564 | next; | ||||
| 1565 | } | ||||
| 1566 | |||||
| 1567 | if( length($formatting) >= length($p->[$i-1]) ) { | ||||
| 1568 | $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' '; | ||||
| 1569 | } else { | ||||
| 1570 | $formatting .= ' ' x (length($p->[$i-1]) - length($formatting)); | ||||
| 1571 | } | ||||
| 1572 | # Make $formatting and the previous line be exactly the same length, | ||||
| 1573 | # with $formatting having a " " as the last character. | ||||
| 1574 | |||||
| 1575 | DEBUG > 4 and print "Formatting <$formatting> on <", $p->[$i-1], ">\n"; | ||||
| 1576 | |||||
| 1577 | |||||
| 1578 | my @new_line; | ||||
| 1579 | while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) { | ||||
| 1580 | #print "Format matches $1\n"; | ||||
| 1581 | |||||
| 1582 | if($2) { | ||||
| 1583 | #print "SKIPPING <$2>\n"; | ||||
| 1584 | push @new_line, | ||||
| 1585 | substr($p->[$i-1], pos($formatting)-length($1), length($1)); | ||||
| 1586 | } else { | ||||
| 1587 | #print "SNARING $+\n"; | ||||
| 1588 | push @new_line, [ | ||||
| 1589 | ( | ||||
| 1590 | $3 ? 'VerbatimB' : | ||||
| 1591 | $4 ? 'VerbatimI' : | ||||
| 1592 | $5 ? 'VerbatimBI' : die("Should never get called") | ||||
| 1593 | ), {}, | ||||
| 1594 | substr($p->[$i-1], pos($formatting)-length($1), length($1)) | ||||
| 1595 | ]; | ||||
| 1596 | #print "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n"; | ||||
| 1597 | } | ||||
| 1598 | } | ||||
| 1599 | my @nixed = | ||||
| 1600 | splice @$p, $i-1, 2, @new_line; # replace myself and the next line | ||||
| 1601 | DEBUG > 10 and print "Nixed count: ", scalar(@nixed), "\n"; | ||||
| 1602 | |||||
| 1603 | DEBUG > 6 and print "New version of the above line is these tokens (", | ||||
| 1604 | scalar(@new_line), "):", | ||||
| 1605 | map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n"; | ||||
| 1606 | $i--; # So the next line we scrutinize is the line before the one | ||||
| 1607 | # that we just went and formatted | ||||
| 1608 | } | ||||
| 1609 | |||||
| 1610 | $p->[0] = 'VerbatimFormatted'; | ||||
| 1611 | |||||
| 1612 | # Collapse adjacent text nodes, just for kicks. | ||||
| 1613 | for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last | ||||
| 1614 | if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) { | ||||
| 1615 | DEBUG > 5 and print "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n"; | ||||
| 1616 | $p->[$i] .= splice @$p, $i+1, 1; # merge | ||||
| 1617 | --$i; # and back up | ||||
| 1618 | } | ||||
| 1619 | } | ||||
| 1620 | |||||
| 1621 | # Now look for the last text token, and remove the terminal newline | ||||
| 1622 | for( my $i = $#$p; $i >= 2; $i-- ) { | ||||
| 1623 | # work backwards over the tokens, even the first | ||||
| 1624 | if( !ref($p->[$i]) ) { | ||||
| 1625 | if($p->[$i] =~ s/\n$//s) { | ||||
| 1626 | DEBUG > 5 and print "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n"; | ||||
| 1627 | } else { | ||||
| 1628 | DEBUG > 5 and print | ||||
| 1629 | "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n"; | ||||
| 1630 | } | ||||
| 1631 | last; # we only want the next one | ||||
| 1632 | } | ||||
| 1633 | } | ||||
| 1634 | |||||
| 1635 | return; | ||||
| 1636 | } | ||||
| 1637 | |||||
| 1638 | |||||
| 1639 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 1640 | |||||
| 1641 | |||||
| 1642 | sub _treelet_from_formatting_codes { | ||||
| 1643 | # Given a paragraph, returns a treelet. Full of scary tokenizing code. | ||||
| 1644 | # Like [ '~Top', {'start_line' => $start_line}, | ||||
| 1645 | # "I like ", | ||||
| 1646 | # [ 'B', {}, "pie" ], | ||||
| 1647 | # "!" | ||||
| 1648 | # ] | ||||
| 1649 | |||||
| 1650 | my($self, $para, $start_line, $preserve_space) = @_; | ||||
| 1651 | |||||
| 1652 | my $treelet = ['~Top', {'start_line' => $start_line},]; | ||||
| 1653 | |||||
| 1654 | unless ($preserve_space || $self->{'preserve_whitespace'}) { | ||||
| 1655 | $para =~ s/\s+/ /g; # collapse and trim all whitespace first. | ||||
| 1656 | $para =~ s/ $//; | ||||
| 1657 | $para =~ s/^ //; | ||||
| 1658 | } | ||||
| 1659 | |||||
| 1660 | # Only apparent problem the above code is that N<< >> turns into | ||||
| 1661 | # N<< >>. But then, word wrapping does that too! So don't do that! | ||||
| 1662 | |||||
| 1663 | my @stack; | ||||
| 1664 | my @lineage = ($treelet); | ||||
| 1665 | my $raw = ''; # raw content of L<> fcode before splitting/processing | ||||
| 1666 | # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed | ||||
| 1667 | # into just 1 ' '. Is this the regex's doing or 'raw's? | ||||
| 1668 | my $inL = 0; | ||||
| 1669 | |||||
| 1670 | DEBUG > 4 and print "Paragraph:\n$para\n\n"; | ||||
| 1671 | |||||
| 1672 | # Here begins our frightening tokenizer RE. The following regex matches | ||||
| 1673 | # text in four main parts: | ||||
| 1674 | # | ||||
| 1675 | # * Start-codes. The first alternative matches C< or C<<, the latter | ||||
| 1676 | # followed by some whitespace. $1 will hold the entire start code | ||||
| 1677 | # (including any space following a multiple-angle-bracket delimiter), | ||||
| 1678 | # and $2 will hold only the additional brackets past the first in a | ||||
| 1679 | # multiple-bracket delimiter. length($2) + 1 will be the number of | ||||
| 1680 | # closing brackets we have to find. | ||||
| 1681 | # | ||||
| 1682 | # * Closing brackets. Match some amount of whitespace followed by | ||||
| 1683 | # multiple close brackets. The logic to see if this closes anything | ||||
| 1684 | # is down below. Note that in order to parse C<< >> correctly, we | ||||
| 1685 | # have to use look-behind (?<=\s\s), since the match of the starting | ||||
| 1686 | # code will have consumed the whitespace. | ||||
| 1687 | # | ||||
| 1688 | # * A single closing bracket, to close a simple code like C<>. | ||||
| 1689 | # | ||||
| 1690 | # * Something that isn't a start or end code. We have to be careful | ||||
| 1691 | # about accepting whitespace, since perlpodspec says that any whitespace | ||||
| 1692 | # before a multiple-bracket closing delimiter should be ignored. | ||||
| 1693 | # | ||||
| 1694 | while($para =~ | ||||
| 1695 | m/\G | ||||
| 1696 | (?: | ||||
| 1697 | # Match starting codes, including the whitespace following a | ||||
| 1698 | # multiple-delimiter start code. $1 gets the whole start code and | ||||
| 1699 | # $2 gets all but one of the <s in the multiple-bracket case. | ||||
| 1700 | ([A-Z]<(?:(<+)\s+)?) | ||||
| 1701 | | | ||||
| 1702 | # Match multiple-bracket end codes. $3 gets the whitespace that | ||||
| 1703 | # should be discarded before an end bracket but kept in other cases | ||||
| 1704 | # and $4 gets the end brackets themselves. | ||||
| 1705 | (\s+|(?<=\s\s))(>{2,}) | ||||
| 1706 | | | ||||
| 1707 | (\s?>) # $5: simple end-codes | ||||
| 1708 | | | ||||
| 1709 | ( # $6: stuff containing no start-codes or end-codes | ||||
| 1710 | (?: | ||||
| 1711 | [^A-Z\s>] | ||||
| 1712 | | | ||||
| 1713 | (?: | ||||
| 1714 | [A-Z](?!<) | ||||
| 1715 | ) | ||||
| 1716 | | | ||||
| 1717 | # whitespace is ok, but we don't want to eat the whitespace before | ||||
| 1718 | # a multiple-bracket end code. | ||||
| 1719 | # NOTE: we may still have problems with e.g. S<< >> | ||||
| 1720 | (?: | ||||
| 1721 | \s(?!\s*>{2,}) | ||||
| 1722 | ) | ||||
| 1723 | )+ | ||||
| 1724 | ) | ||||
| 1725 | ) | ||||
| 1726 | /xgo | ||||
| 1727 | ) { | ||||
| 1728 | DEBUG > 4 and print "\nParagraphic tokenstack = (@stack)\n"; | ||||
| 1729 | if(defined $1) { | ||||
| 1730 | if(defined $2) { | ||||
| 1731 | DEBUG > 3 and print "Found complex start-text code \"$1\"\n"; | ||||
| 1732 | push @stack, length($2) + 1; | ||||
| 1733 | # length of the necessary complex end-code string | ||||
| 1734 | } else { | ||||
| 1735 | DEBUG > 3 and print "Found simple start-text code \"$1\"\n"; | ||||
| 1736 | push @stack, 0; # signal that we're looking for simple | ||||
| 1737 | } | ||||
| 1738 | push @lineage, [ substr($1,0,1), {}, ]; # new node object | ||||
| 1739 | push @{ $lineage[-2] }, $lineage[-1]; | ||||
| 1740 | if ('L' eq substr($1,0,1)) { | ||||
| 1741 | $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator | ||||
| 1742 | $inL = 1; | ||||
| 1743 | } else { | ||||
| 1744 | $raw .= $1 if $inL; | ||||
| 1745 | } | ||||
| 1746 | |||||
| 1747 | } elsif(defined $4) { | ||||
| 1748 | DEBUG > 3 and print "Found apparent complex end-text code \"$3$4\"\n"; | ||||
| 1749 | # This is where it gets messy... | ||||
| 1750 | if(! @stack) { | ||||
| 1751 | # We saw " >>>>" but needed nothing. This is ALL just stuff then. | ||||
| 1752 | DEBUG > 4 and print " But it's really just stuff.\n"; | ||||
| 1753 | push @{ $lineage[-1] }, $3, $4; | ||||
| 1754 | next; | ||||
| 1755 | } elsif(!$stack[-1]) { | ||||
| 1756 | # We saw " >>>>" but needed only ">". Back pos up. | ||||
| 1757 | DEBUG > 4 and print " And that's more than we needed to close simple.\n"; | ||||
| 1758 | push @{ $lineage[-1] }, $3; # That was a for-real space, too. | ||||
| 1759 | pos($para) = pos($para) - length($4) + 1; | ||||
| 1760 | } elsif($stack[-1] == length($4)) { | ||||
| 1761 | # We found " >>>>", and it was exactly what we needed. Commonest case. | ||||
| 1762 | DEBUG > 4 and print " And that's exactly what we needed to close complex.\n"; | ||||
| 1763 | } elsif($stack[-1] < length($4)) { | ||||
| 1764 | # We saw " >>>>" but needed only " >>". Back pos up. | ||||
| 1765 | DEBUG > 4 and print " And that's more than we needed to close complex.\n"; | ||||
| 1766 | pos($para) = pos($para) - length($4) + $stack[-1]; | ||||
| 1767 | } else { | ||||
| 1768 | # We saw " >>>>" but needed " >>>>>>". So this is all just stuff! | ||||
| 1769 | DEBUG > 4 and print " But it's really just stuff, because we needed more.\n"; | ||||
| 1770 | push @{ $lineage[-1] }, $3, $4; | ||||
| 1771 | next; | ||||
| 1772 | } | ||||
| 1773 | #print "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n"; | ||||
| 1774 | |||||
| 1775 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
| 1776 | # Keep the element from being childless | ||||
| 1777 | |||||
| 1778 | pop @stack; | ||||
| 1779 | pop @lineage; | ||||
| 1780 | |||||
| 1781 | unless (@stack) { # not in an L if there are no open fcodes | ||||
| 1782 | $inL = 0; | ||||
| 1783 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
| 1784 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
| 1785 | } | ||||
| 1786 | } | ||||
| 1787 | $raw .= $3.$4 if $inL; | ||||
| 1788 | |||||
| 1789 | } elsif(defined $5) { | ||||
| 1790 | DEBUG > 3 and print "Found apparent simple end-text code \"$5\"\n"; | ||||
| 1791 | |||||
| 1792 | if(@stack and ! $stack[-1]) { | ||||
| 1793 | # We're indeed expecting a simple end-code | ||||
| 1794 | DEBUG > 4 and print " It's indeed an end-code.\n"; | ||||
| 1795 | |||||
| 1796 | if(length($5) == 2) { # There was a space there: " >" | ||||
| 1797 | push @{ $lineage[-1] }, ' '; | ||||
| 1798 | } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element | ||||
| 1799 | push @{ $lineage[-1] }, ''; # keep it from being really childless | ||||
| 1800 | } | ||||
| 1801 | |||||
| 1802 | pop @stack; | ||||
| 1803 | pop @lineage; | ||||
| 1804 | } else { | ||||
| 1805 | DEBUG > 4 and print " It's just stuff.\n"; | ||||
| 1806 | push @{ $lineage[-1] }, $5; | ||||
| 1807 | } | ||||
| 1808 | |||||
| 1809 | unless (@stack) { # not in an L if there are no open fcodes | ||||
| 1810 | $inL = 0; | ||||
| 1811 | if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') { | ||||
| 1812 | $lineage[-1][-1][1]{'raw'} = $raw | ||||
| 1813 | } | ||||
| 1814 | } | ||||
| 1815 | $raw .= $5 if $inL; | ||||
| 1816 | |||||
| 1817 | } elsif(defined $6) { | ||||
| 1818 | DEBUG > 3 and print "Found stuff \"$6\"\n"; | ||||
| 1819 | push @{ $lineage[-1] }, $6; | ||||
| 1820 | $raw .= $6 if $inL; | ||||
| 1821 | # XXX does not capture multiplace whitespaces -- 'raw' ends up with | ||||
| 1822 | # at most 1 leading/trailing whitespace, why not all of it? | ||||
| 1823 | |||||
| 1824 | } else { | ||||
| 1825 | # should never ever ever ever happen | ||||
| 1826 | DEBUG and print "AYYAYAAAAA at line ", __LINE__, "\n"; | ||||
| 1827 | die "SPORK 512512!"; | ||||
| 1828 | } | ||||
| 1829 | } | ||||
| 1830 | |||||
| 1831 | if(@stack) { # Uhoh, some sequences weren't closed. | ||||
| 1832 | my $x= "..."; | ||||
| 1833 | while(@stack) { | ||||
| 1834 | push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] }; | ||||
| 1835 | # Hmmmmm! | ||||
| 1836 | |||||
| 1837 | my $code = (pop @lineage)->[0]; | ||||
| 1838 | my $ender_length = pop @stack; | ||||
| 1839 | if($ender_length) { | ||||
| 1840 | --$ender_length; | ||||
| 1841 | $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length); | ||||
| 1842 | } else { | ||||
| 1843 | $x = $code . "<$x>"; | ||||
| 1844 | } | ||||
| 1845 | } | ||||
| 1846 | DEBUG > 1 and print "Unterminated $x sequence\n"; | ||||
| 1847 | $self->whine($start_line, | ||||
| 1848 | "Unterminated $x sequence", | ||||
| 1849 | ); | ||||
| 1850 | } | ||||
| 1851 | |||||
| 1852 | return $treelet; | ||||
| 1853 | } | ||||
| 1854 | |||||
| 1855 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 1856 | |||||
| 1857 | sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol) | ||||
| 1858 | return stringify_lol($_[1]); | ||||
| 1859 | } | ||||
| 1860 | |||||
| 1861 | sub stringify_lol { # function: stringify_lol($lol) | ||||
| 1862 | my $string_form = ''; | ||||
| 1863 | _stringify_lol( $_[0] => \$string_form ); | ||||
| 1864 | return $string_form; | ||||
| 1865 | } | ||||
| 1866 | |||||
| 1867 | sub _stringify_lol { # the real recursor | ||||
| 1868 | my($lol, $to) = @_; | ||||
| 1869 | for(my $i = 2; $i < @$lol; ++$i) { | ||||
| 1870 | if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) { | ||||
| 1871 | _stringify_lol( $lol->[$i], $to); # recurse! | ||||
| 1872 | } else { | ||||
| 1873 | $$to .= $lol->[$i]; | ||||
| 1874 | } | ||||
| 1875 | } | ||||
| 1876 | return; | ||||
| 1877 | } | ||||
| 1878 | |||||
| 1879 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 1880 | |||||
| 1881 | sub _dump_curr_open { # return a string representation of the stack | ||||
| 1882 | my $curr_open = $_[0]{'curr_open'}; | ||||
| 1883 | |||||
| 1884 | return '[empty]' unless @$curr_open; | ||||
| 1885 | return join '; ', | ||||
| 1886 | map {; | ||||
| 1887 | ($_->[0] eq '=for') | ||||
| 1888 | ? ( ($_->[1]{'~really'} || '=over') | ||||
| 1889 | . ' ' . $_->[1]{'target'}) | ||||
| 1890 | : $_->[0] | ||||
| 1891 | } | ||||
| 1892 | @$curr_open | ||||
| 1893 | ; | ||||
| 1894 | } | ||||
| 1895 | |||||
| 1896 | ########################################################################### | ||||
| 1897 | 1 | 34µs | my %pretty_form = ( | ||
| 1898 | "\a" => '\a', # ding! | ||||
| 1899 | "\b" => '\b', # BS | ||||
| 1900 | "\e" => '\e', # ESC | ||||
| 1901 | "\f" => '\f', # FF | ||||
| 1902 | "\t" => '\t', # tab | ||||
| 1903 | "\cm" => '\cm', | ||||
| 1904 | "\cj" => '\cj', | ||||
| 1905 | "\n" => '\n', # probably overrides one of either \cm or \cj | ||||
| 1906 | '"' => '\"', | ||||
| 1907 | '\\' => '\\\\', | ||||
| 1908 | '$' => '\\$', | ||||
| 1909 | '@' => '\\@', | ||||
| 1910 | '%' => '\\%', | ||||
| 1911 | '#' => '\\#', | ||||
| 1912 | ); | ||||
| 1913 | |||||
| 1914 | sub pretty { # adopted from Class::Classless | ||||
| 1915 | # Not the most brilliant routine, but passable. | ||||
| 1916 | # Don't give it a cyclic data structure! | ||||
| 1917 | my @stuff = @_; # copy | ||||
| 1918 | my $x; | ||||
| 1919 | my $out = | ||||
| 1920 | # join ",\n" . | ||||
| 1921 | join ", ", | ||||
| 1922 | map {; | ||||
| 1923 | if(!defined($_)) { | ||||
| 1924 | "undef"; | ||||
| 1925 | } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') { | ||||
| 1926 | $x = "[ " . pretty(@$_) . " ]" ; | ||||
| 1927 | $x; | ||||
| 1928 | } elsif(ref($_) eq 'SCALAR') { | ||||
| 1929 | $x = "\\" . pretty($$_) ; | ||||
| 1930 | $x; | ||||
| 1931 | } elsif(ref($_) eq 'HASH') { | ||||
| 1932 | my $hr = $_; | ||||
| 1933 | $x = "{" . join(", ", | ||||
| 1934 | map(pretty($_) . '=>' . pretty($hr->{$_}), | ||||
| 1935 | sort keys %$hr ) ) . "}" ; | ||||
| 1936 | $x; | ||||
| 1937 | } elsif(!length($_)) { q{''} # empty string | ||||
| 1938 | } elsif( | ||||
| 1939 | $_ eq '0' # very common case | ||||
| 1940 | or( | ||||
| 1941 | m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s | ||||
| 1942 | and $_ ne '-0' # the strange case that that RE lets thru | ||||
| 1943 | ) | ||||
| 1944 | ) { $_; | ||||
| 1945 | } else { | ||||
| 1946 | if( chr(65) eq 'A' ) { | ||||
| 1947 | s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])> | ||||
| 1948 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
| 1949 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
| 1950 | } else { | ||||
| 1951 | # We're in some crazy non-ASCII world! | ||||
| 1952 | s<([^abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789])> | ||||
| 1953 | #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg; | ||||
| 1954 | <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg; | ||||
| 1955 | } | ||||
| 1956 | qq{"$_"}; | ||||
| 1957 | } | ||||
| 1958 | } @stuff; | ||||
| 1959 | # $out =~ s/\n */ /g if length($out) < 75; | ||||
| 1960 | return $out; | ||||
| 1961 | } | ||||
| 1962 | |||||
| 1963 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 1964 | |||||
| 1965 | # A rather unsubtle method of blowing away all the state information | ||||
| 1966 | # from a parser object so it can be reused. Provided as a utility for | ||||
| 1967 | # backward compatibility in Pod::Man, etc. but not recommended for | ||||
| 1968 | # general use. | ||||
| 1969 | |||||
| 1970 | sub reinit { | ||||
| 1971 | my $self = shift; | ||||
| 1972 | foreach (qw(source_dead source_filename doc_has_started | ||||
| 1973 | start_of_pod_block content_seen last_was_blank paras curr_open | ||||
| 1974 | line_count pod_para_count in_pod ~tried_gen_errata errata errors_seen | ||||
| 1975 | Title)) { | ||||
| 1976 | |||||
| 1977 | delete $self->{$_}; | ||||
| 1978 | } | ||||
| 1979 | } | ||||
| 1980 | |||||
| 1981 | #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ | ||||
| 1982 | 1 | 28µs | 1; | ||
| 1983 |