2051 lines
78 KiB
Perl
2051 lines
78 KiB
Perl
# Pod::Man -- Convert POD data to formatted *roff input.
|
|
#
|
|
# This module translates POD documentation into *roff markup using the man
|
|
# macro set, and is intended for converting POD documents written as Unix
|
|
# manual pages to manual pages that can be read by the man(1) command. It is
|
|
# a replacement for the pod2man command distributed with versions of Perl
|
|
# prior to 5.6.
|
|
#
|
|
# Perl core hackers, please note that this module is also separately
|
|
# maintained outside of the Perl core as part of the podlators. Please send
|
|
# me any patches at the address above in addition to sending them to the
|
|
# standard Perl mailing lists.
|
|
#
|
|
# Written by Russ Allbery <rra@cpan.org>
|
|
# Substantial contributions by Sean Burke <sburke@cpan.org>
|
|
# Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
|
|
# 2010, 2012, 2013, 2014, 2015, 2016, 2017 Russ Allbery <rra@cpan.org>
|
|
#
|
|
# This program is free software; you may redistribute it and/or modify it
|
|
# under the same terms as Perl itself.
|
|
|
|
##############################################################################
|
|
# Modules and declarations
|
|
##############################################################################
|
|
|
|
package Pod::Man;
|
|
|
|
use 5.006;
|
|
use strict;
|
|
use warnings;
|
|
|
|
use subs qw(makespace);
|
|
use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
|
|
|
|
use Carp qw(carp croak);
|
|
use Pod::Simple ();
|
|
|
|
# Conditionally import Encode and set $HAS_ENCODE if it is available.
|
|
our $HAS_ENCODE;
|
|
BEGIN {
|
|
$HAS_ENCODE = eval { require Encode };
|
|
}
|
|
|
|
@ISA = qw(Pod::Simple);
|
|
|
|
$VERSION = '4.10';
|
|
|
|
# Set the debugging level. If someone has inserted a debug function into this
|
|
# class already, use that. Otherwise, use any Pod::Simple debug function
|
|
# that's defined, and failing that, define a debug level of 10.
|
|
BEGIN {
|
|
my $parent = defined (&Pod::Simple::DEBUG) ? \&Pod::Simple::DEBUG : undef;
|
|
unless (defined &DEBUG) {
|
|
*DEBUG = $parent || sub () { 10 };
|
|
}
|
|
}
|
|
|
|
# Import the ASCII constant from Pod::Simple. This is true iff we're in an
|
|
# ASCII-based universe (including such things as ISO 8859-1 and UTF-8), and is
|
|
# generally only false for EBCDIC.
|
|
BEGIN { *ASCII = \&Pod::Simple::ASCII }
|
|
|
|
# Pretty-print a data structure. Only used for debugging.
|
|
BEGIN { *pretty = \&Pod::Simple::pretty }
|
|
|
|
# Formatting instructions for various types of blocks. cleanup makes hyphens
|
|
# hard, adds spaces between consecutive underscores, and escapes backslashes.
|
|
# convert translates characters into escapes. guesswork means to apply the
|
|
# transformations done by the guesswork sub. literal says to protect literal
|
|
# quotes from being turned into UTF-8 quotes. By default, all transformations
|
|
# are on except literal, but some elements override.
|
|
#
|
|
# DEFAULT specifies the default settings. All other elements should list only
|
|
# those settings that they are overriding. Data indicates =for roff blocks,
|
|
# which should be passed along completely verbatim.
|
|
#
|
|
# Formatting inherits negatively, in the sense that if the parent has turned
|
|
# off guesswork, all child elements should leave it off.
|
|
my %FORMATTING = (
|
|
DEFAULT => { cleanup => 1, convert => 1, guesswork => 1, literal => 0 },
|
|
Data => { cleanup => 0, convert => 0, guesswork => 0, literal => 0 },
|
|
Verbatim => { guesswork => 0, literal => 1 },
|
|
C => { guesswork => 0, literal => 1 },
|
|
X => { cleanup => 0, guesswork => 0 },
|
|
);
|
|
|
|
##############################################################################
|
|
# Object initialization
|
|
##############################################################################
|
|
|
|
# Initialize the object and set various Pod::Simple options that we need.
|
|
# Here, we also process any additional options passed to the constructor or
|
|
# set up defaults if none were given. Note that all internal object keys are
|
|
# in all-caps, reserving all lower-case object keys for Pod::Simple and user
|
|
# arguments.
|
|
sub new {
|
|
my $class = shift;
|
|
my $self = $class->SUPER::new;
|
|
|
|
# Tell Pod::Simple not to handle S<> by automatically inserting .
|
|
$self->nbsp_for_S (1);
|
|
|
|
# Tell Pod::Simple to keep whitespace whenever possible.
|
|
if (my $preserve_whitespace = $self->can ('preserve_whitespace')) {
|
|
$self->$preserve_whitespace (1);
|
|
} else {
|
|
$self->fullstop_space_harden (1);
|
|
}
|
|
|
|
# The =for and =begin targets that we accept.
|
|
$self->accept_targets (qw/man MAN roff ROFF/);
|
|
|
|
# Ensure that contiguous blocks of code are merged together. Otherwise,
|
|
# some of the guesswork heuristics don't work right.
|
|
$self->merge_text (1);
|
|
|
|
# Pod::Simple doesn't do anything useful with our arguments, but we want
|
|
# to put them in our object as hash keys and values. This could cause
|
|
# problems if we ever clash with Pod::Simple's own internal class
|
|
# variables.
|
|
%$self = (%$self, @_);
|
|
|
|
# Send errors to stderr if requested.
|
|
if ($$self{stderr} and not $$self{errors}) {
|
|
$$self{errors} = 'stderr';
|
|
}
|
|
delete $$self{stderr};
|
|
|
|
# Validate the errors parameter and act on it.
|
|
if (not defined $$self{errors}) {
|
|
$$self{errors} = 'pod';
|
|
}
|
|
if ($$self{errors} eq 'stderr' || $$self{errors} eq 'die') {
|
|
$self->no_errata_section (1);
|
|
$self->complain_stderr (1);
|
|
if ($$self{errors} eq 'die') {
|
|
$$self{complain_die} = 1;
|
|
}
|
|
} elsif ($$self{errors} eq 'pod') {
|
|
$self->no_errata_section (0);
|
|
$self->complain_stderr (0);
|
|
} elsif ($$self{errors} eq 'none') {
|
|
$self->no_whining (1);
|
|
} else {
|
|
croak (qq(Invalid errors setting: "$$self{errors}"));
|
|
}
|
|
delete $$self{errors};
|
|
|
|
# Degrade back to non-utf8 if Encode is not available.
|
|
#
|
|
# Suppress the warning message when PERL_CORE is set, indicating this is
|
|
# running as part of the core Perl build. Perl builds podlators (and all
|
|
# pure Perl modules) before Encode and other XS modules, so Encode won't
|
|
# yet be available. Rely on the Perl core build to generate man pages
|
|
# later, after all the modules are available, so that UTF-8 handling will
|
|
# be correct.
|
|
if ($$self{utf8} and !$HAS_ENCODE) {
|
|
if (!$ENV{PERL_CORE}) {
|
|
carp ('utf8 mode requested but Encode module not available,'
|
|
. ' falling back to non-utf8');
|
|
}
|
|
delete $$self{utf8};
|
|
}
|
|
|
|
# Initialize various other internal constants based on our arguments.
|
|
$self->init_fonts;
|
|
$self->init_quotes;
|
|
$self->init_page;
|
|
|
|
# For right now, default to turning on all of the magic.
|
|
$$self{MAGIC_CPP} = 1;
|
|
$$self{MAGIC_EMDASH} = 1;
|
|
$$self{MAGIC_FUNC} = 1;
|
|
$$self{MAGIC_MANREF} = 1;
|
|
$$self{MAGIC_SMALLCAPS} = 1;
|
|
$$self{MAGIC_VARS} = 1;
|
|
|
|
return $self;
|
|
}
|
|
|
|
# Translate a font string into an escape.
|
|
sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
|
|
|
|
# Determine which fonts the user wishes to use and store them in the object.
|
|
# Regular, italic, bold, and bold-italic are constants, but the fixed width
|
|
# fonts may be set by the user. Sets the internal hash key FONTS which is
|
|
# used to map our internal font escapes to actual *roff sequences later.
|
|
sub init_fonts {
|
|
my ($self) = @_;
|
|
|
|
# Figure out the fixed-width font. If user-supplied, make sure that they
|
|
# are the right length.
|
|
for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
|
|
my $font = $$self{$_};
|
|
if (defined ($font) && (length ($font) < 1 || length ($font) > 2)) {
|
|
croak qq(roff font should be 1 or 2 chars, not "$font");
|
|
}
|
|
}
|
|
|
|
# Set the default fonts. We can't be sure portably across different
|
|
# implementations what fixed bold-italic may be called (if it's even
|
|
# available), so default to just bold.
|
|
$$self{fixed} ||= 'CW';
|
|
$$self{fixedbold} ||= 'CB';
|
|
$$self{fixeditalic} ||= 'CI';
|
|
$$self{fixedbolditalic} ||= 'CB';
|
|
|
|
# Set up a table of font escapes. First number is fixed-width, second is
|
|
# bold, third is italic.
|
|
$$self{FONTS} = { '000' => '\fR', '001' => '\fI',
|
|
'010' => '\fB', '011' => '\f(BI',
|
|
'100' => toescape ($$self{fixed}),
|
|
'101' => toescape ($$self{fixeditalic}),
|
|
'110' => toescape ($$self{fixedbold}),
|
|
'111' => toescape ($$self{fixedbolditalic}) };
|
|
}
|
|
|
|
# Initialize the quotes that we'll be using for C<> text. This requires some
|
|
# special handling, both to parse the user parameters if given and to make
|
|
# sure that the quotes will be safe against *roff. Sets the internal hash
|
|
# keys LQUOTE and RQUOTE.
|
|
sub init_quotes {
|
|
my ($self) = (@_);
|
|
|
|
# Handle the quotes option first, which sets both quotes at once.
|
|
$$self{quotes} ||= '"';
|
|
if ($$self{quotes} eq 'none') {
|
|
$$self{LQUOTE} = $$self{RQUOTE} = '';
|
|
} elsif (length ($$self{quotes}) == 1) {
|
|
$$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
|
|
} elsif (length ($$self{quotes}) % 2 == 0) {
|
|
my $length = length ($$self{quotes}) / 2;
|
|
$$self{LQUOTE} = substr ($$self{quotes}, 0, $length);
|
|
$$self{RQUOTE} = substr ($$self{quotes}, $length);
|
|
} else {
|
|
croak(qq(Invalid quote specification "$$self{quotes}"))
|
|
}
|
|
|
|
# Now handle the lquote and rquote options.
|
|
if (defined $$self{lquote}) {
|
|
$$self{LQUOTE} = $$self{lquote} eq 'none' ? q{} : $$self{lquote};
|
|
}
|
|
if (defined $$self{rquote}) {
|
|
$$self{RQUOTE} = $$self{rquote} eq 'none' ? q{} : $$self{rquote};
|
|
}
|
|
|
|
# Double the first quote; note that this should not be s///g as two double
|
|
# quotes is represented in *roff as three double quotes, not four. Weird,
|
|
# I know.
|
|
$$self{LQUOTE} =~ s/\"/\"\"/;
|
|
$$self{RQUOTE} =~ s/\"/\"\"/;
|
|
}
|
|
|
|
# Initialize the page title information and indentation from our arguments.
|
|
sub init_page {
|
|
my ($self) = @_;
|
|
|
|
# We used to try first to get the version number from a local binary, but
|
|
# we shouldn't need that any more. Get the version from the running Perl.
|
|
# Work a little magic to handle subversions correctly under both the
|
|
# pre-5.6 and the post-5.6 version numbering schemes.
|
|
my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
|
|
$version[2] ||= 0;
|
|
$version[2] *= 10 ** (3 - length $version[2]);
|
|
for (@version) { $_ += 0 }
|
|
my $version = join ('.', @version);
|
|
|
|
# Set the defaults for page titles and indentation if the user didn't
|
|
# override anything.
|
|
$$self{center} = 'User Contributed Perl Documentation'
|
|
unless defined $$self{center};
|
|
$$self{release} = 'perl v' . $version
|
|
unless defined $$self{release};
|
|
$$self{indent} = 4
|
|
unless defined $$self{indent};
|
|
|
|
# Double quotes in things that will be quoted.
|
|
for (qw/center release/) {
|
|
$$self{$_} =~ s/\"/\"\"/g if $$self{$_};
|
|
}
|
|
}
|
|
|
|
##############################################################################
|
|
# Core parsing
|
|
##############################################################################
|
|
|
|
# This is the glue that connects the code below with Pod::Simple itself. The
|
|
# goal is to convert the event stream coming from the POD parser into method
|
|
# calls to handlers once the complete content of a tag has been seen. Each
|
|
# paragraph or POD command will have textual content associated with it, and
|
|
# as soon as all of a paragraph or POD command has been seen, that content
|
|
# will be passed in to the corresponding method for handling that type of
|
|
# object. The exceptions are handlers for lists, which have opening tag
|
|
# handlers and closing tag handlers that will be called right away.
|
|
#
|
|
# The internal hash key PENDING is used to store the contents of a tag until
|
|
# all of it has been seen. It holds a stack of open tags, each one
|
|
# represented by a tuple of the attributes hash for the tag, formatting
|
|
# options for the tag (which are inherited), and the contents of the tag.
|
|
|
|
# Add a block of text to the contents of the current node, formatting it
|
|
# according to the current formatting instructions as we do.
|
|
sub _handle_text {
|
|
my ($self, $text) = @_;
|
|
DEBUG > 3 and print "== $text\n";
|
|
my $tag = $$self{PENDING}[-1];
|
|
$$tag[2] .= $self->format_text ($$tag[1], $text);
|
|
}
|
|
|
|
# Given an element name, get the corresponding method name.
|
|
sub method_for_element {
|
|
my ($self, $element) = @_;
|
|
$element =~ tr/A-Z-/a-z_/;
|
|
$element =~ tr/_a-z0-9//cd;
|
|
return $element;
|
|
}
|
|
|
|
# Handle the start of a new element. If cmd_element is defined, assume that
|
|
# we need to collect the entire tree for this element before passing it to the
|
|
# element method, and create a new tree into which we'll collect blocks of
|
|
# text and nested elements. Otherwise, if start_element is defined, call it.
|
|
sub _handle_element_start {
|
|
my ($self, $element, $attrs) = @_;
|
|
DEBUG > 3 and print "++ $element (<", join ('> <', %$attrs), ">)\n";
|
|
my $method = $self->method_for_element ($element);
|
|
|
|
# If we have a command handler, we need to accumulate the contents of the
|
|
# tag before calling it. Turn off IN_NAME for any command other than
|
|
# <Para> and the formatting codes so that IN_NAME isn't still set for the
|
|
# first heading after the NAME heading.
|
|
if ($self->can ("cmd_$method")) {
|
|
DEBUG > 2 and print "<$element> starts saving a tag\n";
|
|
$$self{IN_NAME} = 0 if ($element ne 'Para' && length ($element) > 1);
|
|
|
|
# How we're going to format embedded text blocks depends on the tag
|
|
# and also depends on our parent tags. Thankfully, inside tags that
|
|
# turn off guesswork and reformatting, nothing else can turn it back
|
|
# on, so this can be strictly inherited.
|
|
my $formatting = {
|
|
%{ $$self{PENDING}[-1][1] || $FORMATTING{DEFAULT} },
|
|
%{ $FORMATTING{$element} || {} },
|
|
};
|
|
push (@{ $$self{PENDING} }, [ $attrs, $formatting, '' ]);
|
|
DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n";
|
|
} elsif (my $start_method = $self->can ("start_$method")) {
|
|
$self->$start_method ($attrs, '');
|
|
} else {
|
|
DEBUG > 2 and print "No $method start method, skipping\n";
|
|
}
|
|
}
|
|
|
|
# Handle the end of an element. If we had a cmd_ method for this element,
|
|
# this is where we pass along the tree that we built. Otherwise, if we have
|
|
# an end_ method for the element, call that.
|
|
sub _handle_element_end {
|
|
my ($self, $element) = @_;
|
|
DEBUG > 3 and print "-- $element\n";
|
|
my $method = $self->method_for_element ($element);
|
|
|
|
# If we have a command handler, pull off the pending text and pass it to
|
|
# the handler along with the saved attribute hash.
|
|
if (my $cmd_method = $self->can ("cmd_$method")) {
|
|
DEBUG > 2 and print "</$element> stops saving a tag\n";
|
|
my $tag = pop @{ $$self{PENDING} };
|
|
DEBUG > 4 and print "Popped: [", pretty ($tag), "]\n";
|
|
DEBUG > 4 and print "Pending: [", pretty ($$self{PENDING}), "]\n";
|
|
my $text = $self->$cmd_method ($$tag[0], $$tag[2]);
|
|
if (defined $text) {
|
|
if (@{ $$self{PENDING} } > 1) {
|
|
$$self{PENDING}[-1][2] .= $text;
|
|
} else {
|
|
$self->output ($text);
|
|
}
|
|
}
|
|
} elsif (my $end_method = $self->can ("end_$method")) {
|
|
$self->$end_method ();
|
|
} else {
|
|
DEBUG > 2 and print "No $method end method, skipping\n";
|
|
}
|
|
}
|
|
|
|
##############################################################################
|
|
# General formatting
|
|
##############################################################################
|
|
|
|
# Format a text block. Takes a hash of formatting options and the text to
|
|
# format. Currently, the only formatting options are guesswork, cleanup, and
|
|
# convert, all of which are boolean.
|
|
sub format_text {
|
|
my ($self, $options, $text) = @_;
|
|
my $guesswork = $$options{guesswork} && !$$self{IN_NAME};
|
|
my $cleanup = $$options{cleanup};
|
|
my $convert = $$options{convert};
|
|
my $literal = $$options{literal};
|
|
|
|
# Cleanup just tidies up a few things, telling *roff that the hyphens are
|
|
# hard, putting a bit of space between consecutive underscores, and
|
|
# escaping backslashes. Be careful not to mangle our character
|
|
# translations by doing this before processing character translation.
|
|
if ($cleanup) {
|
|
$text =~ s/\\/\\e/g;
|
|
$text =~ s/-/\\-/g;
|
|
$text =~ s/_(?=_)/_\\|/g;
|
|
}
|
|
|
|
# Normally we do character translation, but we won't even do that in
|
|
# <Data> blocks or if UTF-8 output is desired.
|
|
if ($convert && !$$self{utf8} && ASCII) {
|
|
$text =~ s/([^\x00-\x7F])/$ESCAPES{ord ($1)} || "X"/eg;
|
|
}
|
|
|
|
# Ensure that *roff doesn't convert literal quotes to UTF-8 single quotes,
|
|
# but don't mess up our accept escapes.
|
|
if ($literal) {
|
|
$text =~ s/(?<!\\\*)\'/\\*\(Aq/g;
|
|
$text =~ s/(?<!\\\*)\`/\\\`/g;
|
|
}
|
|
|
|
# If guesswork is asked for, do that. This involves more substantial
|
|
# formatting based on various heuristics that may only be appropriate for
|
|
# particular documents.
|
|
if ($guesswork) {
|
|
$text = $self->guesswork ($text);
|
|
}
|
|
|
|
return $text;
|
|
}
|
|
|
|
# Handles C<> text, deciding whether to put \*C` around it or not. This is a
|
|
# whole bunch of messy heuristics to try to avoid overquoting, originally from
|
|
# Barrie Slaymaker. This largely duplicates similar code in Pod::Text.
|
|
sub quote_literal {
|
|
my $self = shift;
|
|
local $_ = shift;
|
|
|
|
# A regex that matches the portion of a variable reference that's the
|
|
# array or hash index, separated out just because we want to use it in
|
|
# several places in the following regex.
|
|
my $index = '(?: \[.*\] | \{.*\} )?';
|
|
|
|
# If in NAME section, just return an ASCII quoted string to avoid
|
|
# confusing tools like whatis.
|
|
return qq{"$_"} if $$self{IN_NAME};
|
|
|
|
# Check for things that we don't want to quote, and if we find any of
|
|
# them, return the string with just a font change and no quoting.
|
|
m{
|
|
^\s*
|
|
(?:
|
|
( [\'\`\"] ) .* \1 # already quoted
|
|
| \\\*\(Aq .* \\\*\(Aq # quoted and escaped
|
|
| \\?\` .* ( \' | \\\*\(Aq ) # `quoted'
|
|
| \$+ [\#^]? \S $index # special ($^Foo, $")
|
|
| [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
|
|
| [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
|
|
| [-+]? ( \d[\d.]* | \.\d+ ) (?: [eE][-+]?\d+ )? # a number
|
|
| 0x [a-fA-F\d]+ # a hex constant
|
|
)
|
|
\s*\z
|
|
}xso and return '\f(FS' . $_ . '\f(FE';
|
|
|
|
# If we didn't return, go ahead and quote the text.
|
|
return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE";
|
|
}
|
|
|
|
# Takes a text block to perform guesswork on. Returns the text block with
|
|
# formatting codes added. This is the code that marks up various Perl
|
|
# constructs and things commonly used in man pages without requiring the user
|
|
# to add any explicit markup, and is applied to all non-literal text. We're
|
|
# guaranteed that the text we're applying guesswork to does not contain any
|
|
# *roff formatting codes. Note that the inserted font sequences must be
|
|
# treated later with mapfonts or textmapfonts.
|
|
#
|
|
# This method is very fragile, both in the regular expressions it uses and in
|
|
# the ordering of those modifications. Care and testing is required when
|
|
# modifying it.
|
|
sub guesswork {
|
|
my $self = shift;
|
|
local $_ = shift;
|
|
DEBUG > 5 and print " Guesswork called on [$_]\n";
|
|
|
|
# By the time we reach this point, all hyphens will be escaped by adding a
|
|
# backslash. We want to undo that escaping if they're part of regular
|
|
# words and there's only a single dash, since that's a real hyphen that
|
|
# *roff gets to consider a possible break point. Make sure that a dash
|
|
# after the first character of a word stays non-breaking, however.
|
|
#
|
|
# Note that this is not user-controllable; we pretty much have to do this
|
|
# transformation or *roff will mangle the output in unacceptable ways.
|
|
s{
|
|
( (?:\G|^|\s) [\(\"]* [a-zA-Z] ) ( \\- )?
|
|
( (?: [a-zA-Z\']+ \\-)+ )
|
|
( [a-zA-Z\']+ ) (?= [\)\".?!,;:]* (?:\s|\Z|\\\ ) )
|
|
\b
|
|
} {
|
|
my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4);
|
|
$hyphen ||= '';
|
|
$main =~ s/\\-/-/g;
|
|
$prefix . $hyphen . $main . $suffix;
|
|
}egx;
|
|
|
|
# Translate "--" into a real em-dash if it's used like one. This means
|
|
# that it's either surrounded by whitespace, it follows a regular word, or
|
|
# it occurs between two regular words.
|
|
if ($$self{MAGIC_EMDASH}) {
|
|
s{ (\s) \\-\\- (\s) } { $1 . '\*(--' . $2 }egx;
|
|
s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx;
|
|
}
|
|
|
|
# Make words in all-caps a little bit smaller; they look better that way.
|
|
# However, we don't want to change Perl code (like @ARGV), nor do we want
|
|
# to fix the MIME in MIME-Version since it looks weird with the
|
|
# full-height V.
|
|
#
|
|
# We change only a string of all caps (2) either at the beginning of the
|
|
# line or following regular punctuation (like quotes) or whitespace (1),
|
|
# and followed by either similar punctuation, an em-dash, or the end of
|
|
# the line (3).
|
|
#
|
|
# Allow the text we're changing to small caps to include double quotes,
|
|
# commas, newlines, and periods as long as it doesn't otherwise interrupt
|
|
# the string of small caps and still fits the criteria. This lets us turn
|
|
# entire warranty disclaimers in man page output into small caps.
|
|
if ($$self{MAGIC_SMALLCAPS}) {
|
|
s{
|
|
( ^ | [\s\(\"\'\`\[\{<>] | \\[ ] ) # (1)
|
|
( [A-Z] [A-Z] (?: \s? [/A-Z+:\d_\$&] | \\- | \s? [.,\"] )* ) # (2)
|
|
(?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | \\[ ] | $ ) # (3)
|
|
} {
|
|
$1 . '\s-1' . $2 . '\s0'
|
|
}egx;
|
|
}
|
|
|
|
# Note that from this point forward, we have to adjust for \s-1 and \s-0
|
|
# strings inserted around things that we've made small-caps if later
|
|
# transforms should work on those strings.
|
|
|
|
# Embolden functions in the form func(), including functions that are in
|
|
# all capitals, but don't embolden if there's anything between the parens.
|
|
# The function must start with an alphabetic character or underscore and
|
|
# then consist of word characters or colons.
|
|
if ($$self{MAGIC_FUNC}) {
|
|
s{
|
|
( \b | \\s-1 )
|
|
( [A-Za-z_] ([:\w] | \\s-?[01])+ \(\) )
|
|
} {
|
|
$1 . '\f(BS' . $2 . '\f(BE'
|
|
}egx;
|
|
}
|
|
|
|
# Change references to manual pages to put the page name in bold but
|
|
# the number in the regular font, with a thin space between the name and
|
|
# the number. Only recognize func(n) where func starts with an alphabetic
|
|
# character or underscore and contains only word characters, periods (for
|
|
# configuration file man pages), or colons, and n is a single digit,
|
|
# optionally followed by some number of lowercase letters. Note that this
|
|
# does not recognize man page references like perl(l) or socket(3SOCKET).
|
|
if ($$self{MAGIC_MANREF}) {
|
|
s{
|
|
( \b | \\s-1 )
|
|
(?<! \\ ) # rule out \s0(1)
|
|
( [A-Za-z_] (?:[.:\w] | \\- | \\s-?[01])+ )
|
|
( \( \d [a-z]* \) )
|
|
} {
|
|
$1 . '\f(BS' . $2 . '\f(BE\|' . $3
|
|
}egx;
|
|
}
|
|
|
|
# Convert simple Perl variable references to a fixed-width font. Be
|
|
# careful not to convert functions, though; there are too many subtleties
|
|
# with them to want to perform this transformation.
|
|
if ($$self{MAGIC_VARS}) {
|
|
s{
|
|
( ^ | \s+ )
|
|
( [\$\@%] [\w:]+ )
|
|
(?! \( )
|
|
} {
|
|
$1 . '\f(FS' . $2 . '\f(FE'
|
|
}egx;
|
|
}
|
|
|
|
# Fix up double quotes. Unfortunately, we miss this transformation if the
|
|
# quoted text contains any code with formatting codes and there's not much
|
|
# we can effectively do about that, which makes it somewhat unclear if
|
|
# this is really a good idea.
|
|
s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
|
|
|
|
# Make C++ into \*(C+, which is a squinched version.
|
|
if ($$self{MAGIC_CPP}) {
|
|
s{ \b C\+\+ } {\\*\(C+}gx;
|
|
}
|
|
|
|
# Done.
|
|
DEBUG > 5 and print " Guesswork returning [$_]\n";
|
|
return $_;
|
|
}
|
|
|
|
##############################################################################
|
|
# Output
|
|
##############################################################################
|
|
|
|
# When building up the *roff code, we don't use real *roff fonts. Instead, we
|
|
# embed font codes of the form \f(<font>[SE] where <font> is one of B, I, or
|
|
# F, S stands for start, and E stands for end. This method turns these into
|
|
# the right start and end codes.
|
|
#
|
|
# We add this level of complexity because the old pod2man didn't get code like
|
|
# B<someI<thing> else> right; after I<> it switched back to normal text rather
|
|
# than bold. We take care of this by using variables that state whether bold,
|
|
# italic, or fixed are turned on as a combined pointer to our current font
|
|
# sequence, and set each to the number of current nestings of start tags for
|
|
# that font.
|
|
#
|
|
# \fP changes to the previous font, but only one previous font is kept. We
|
|
# don't know what the outside level font is; normally it's R, but if we're
|
|
# inside a heading it could be something else. So arrange things so that the
|
|
# outside font is always the "previous" font and end with \fP instead of \fR.
|
|
# Idea from Zack Weinberg.
|
|
sub mapfonts {
|
|
my ($self, $text) = @_;
|
|
my ($fixed, $bold, $italic) = (0, 0, 0);
|
|
my %magic = (F => \$fixed, B => \$bold, I => \$italic);
|
|
my $last = '\fR';
|
|
$text =~ s<
|
|
\\f\((.)(.)
|
|
> <
|
|
my $sequence = '';
|
|
my $f;
|
|
if ($last ne '\fR') { $sequence = '\fP' }
|
|
${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
|
|
$f = $$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) };
|
|
if ($f eq $last) {
|
|
'';
|
|
} else {
|
|
if ($f ne '\fR') { $sequence .= $f }
|
|
$last = $f;
|
|
$sequence;
|
|
}
|
|
>gxe;
|
|
return $text;
|
|
}
|
|
|
|
# Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU
|
|
# groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather
|
|
# than R, presumably because \f(CW doesn't actually do a font change. To work
|
|
# around this, use a separate textmapfonts for text blocks where the default
|
|
# font is always R and only use the smart mapfonts for headings.
|
|
sub textmapfonts {
|
|
my ($self, $text) = @_;
|
|
my ($fixed, $bold, $italic) = (0, 0, 0);
|
|
my %magic = (F => \$fixed, B => \$bold, I => \$italic);
|
|
$text =~ s<
|
|
\\f\((.)(.)
|
|
> <
|
|
${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
|
|
$$self{FONTS}{ ($fixed && 1) . ($bold && 1) . ($italic && 1) };
|
|
>gxe;
|
|
return $text;
|
|
}
|
|
|
|
# Given a command and a single argument that may or may not contain double
|
|
# quotes, handle double-quote formatting for it. If there are no double
|
|
# quotes, just return the command followed by the argument in double quotes.
|
|
# If there are double quotes, use an if statement to test for nroff, and for
|
|
# nroff output the command followed by the argument in double quotes with
|
|
# embedded double quotes doubled. For other formatters, remap paired double
|
|
# quotes to LQUOTE and RQUOTE.
|
|
sub switchquotes {
|
|
my ($self, $command, $text, $extra) = @_;
|
|
$text =~ s/\\\*\([LR]\"/\"/g;
|
|
|
|
# We also have to deal with \*C` and \*C', which are used to add the
|
|
# quotes around C<> text, since they may expand to " and if they do this
|
|
# confuses the .SH macros and the like no end. Expand them ourselves.
|
|
# Also separate troff from nroff if there are any fixed-width fonts in use
|
|
# to work around problems with Solaris nroff.
|
|
my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/);
|
|
my $fixedpat = join '|', @{ $$self{FONTS} }{'100', '101', '110', '111'};
|
|
$fixedpat =~ s/\\/\\\\/g;
|
|
$fixedpat =~ s/\(/\\\(/g;
|
|
if ($text =~ m/\"/ || $text =~ m/$fixedpat/) {
|
|
$text =~ s/\"/\"\"/g;
|
|
my $nroff = $text;
|
|
my $troff = $text;
|
|
$troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
|
|
if ($c_is_quote and $text =~ m/\\\*\(C[\'\`]/) {
|
|
$nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g;
|
|
$nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g;
|
|
$troff =~ s/\\\*\(C[\'\`]//g;
|
|
}
|
|
$nroff = qq("$nroff") . ($extra ? " $extra" : '');
|
|
$troff = qq("$troff") . ($extra ? " $extra" : '');
|
|
|
|
# Work around the Solaris nroff bug where \f(CW\fP leaves the font set
|
|
# to Roman rather than the actual previous font when used in headings.
|
|
# troff output may still be broken, but at least we can fix nroff by
|
|
# just switching the font changes to the non-fixed versions.
|
|
my $font_end = "(?:\\f[PR]|\Q$$self{FONTS}{100}\E)";
|
|
$nroff =~ s/\Q$$self{FONTS}{100}\E(.*?)\\f([PR])/$1/g;
|
|
$nroff =~ s/\Q$$self{FONTS}{101}\E(.*?)$font_end/\\fI$1\\fP/g;
|
|
$nroff =~ s/\Q$$self{FONTS}{110}\E(.*?)$font_end/\\fB$1\\fP/g;
|
|
$nroff =~ s/\Q$$self{FONTS}{111}\E(.*?)$font_end/\\f\(BI$1\\fP/g;
|
|
|
|
# Now finally output the command. Bother with .ie only if the nroff
|
|
# and troff output aren't the same.
|
|
if ($nroff ne $troff) {
|
|
return ".ie n $command $nroff\n.el $command $troff\n";
|
|
} else {
|
|
return "$command $nroff\n";
|
|
}
|
|
} else {
|
|
$text = qq("$text") . ($extra ? " $extra" : '');
|
|
return "$command $text\n";
|
|
}
|
|
}
|
|
|
|
# Protect leading quotes and periods against interpretation as commands. Also
|
|
# protect anything starting with a backslash, since it could expand or hide
|
|
# something that *roff would interpret as a command. This is overkill, but
|
|
# it's much simpler than trying to parse *roff here.
|
|
sub protect {
|
|
my ($self, $text) = @_;
|
|
$text =~ s/^([.\'\\])/\\&$1/mg;
|
|
return $text;
|
|
}
|
|
|
|
# Make vertical whitespace if NEEDSPACE is set, appropriate to the indentation
|
|
# level the situation. This function is needed since in *roff one has to
|
|
# create vertical whitespace after paragraphs and between some things, but
|
|
# other macros create their own whitespace. Also close out a sequence of
|
|
# repeated =items, since calling makespace means we're about to begin the item
|
|
# body.
|
|
sub makespace {
|
|
my ($self) = @_;
|
|
$self->output (".PD\n") if $$self{ITEMS} > 1;
|
|
$$self{ITEMS} = 0;
|
|
$self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n")
|
|
if $$self{NEEDSPACE};
|
|
}
|
|
|
|
# Output any pending index entries, and optionally an index entry given as an
|
|
# argument. Support multiple index entries in X<> separated by slashes, and
|
|
# strip special escapes from index entries.
|
|
sub outindex {
|
|
my ($self, $section, $index) = @_;
|
|
my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
|
|
return unless ($section || @entries);
|
|
|
|
# We're about to output all pending entries, so clear our pending queue.
|
|
$$self{INDEX} = [];
|
|
|
|
# Build the output. Regular index entries are marked Xref, and headings
|
|
# pass in their own section. Undo some *roff formatting on headings.
|
|
my @output;
|
|
if (@entries) {
|
|
push @output, [ 'Xref', join (' ', @entries) ];
|
|
}
|
|
if ($section) {
|
|
$index =~ s/\\-/-/g;
|
|
$index =~ s/\\(?:s-?\d|.\(..|.)//g;
|
|
push @output, [ $section, $index ];
|
|
}
|
|
|
|
# Print out the .IX commands.
|
|
for (@output) {
|
|
my ($type, $entry) = @$_;
|
|
$entry =~ s/\s+/ /g;
|
|
$entry =~ s/\"/\"\"/g;
|
|
$entry =~ s/\\/\\\\/g;
|
|
$self->output (".IX $type " . '"' . $entry . '"' . "\n");
|
|
}
|
|
}
|
|
|
|
# Output some text, without any additional changes.
|
|
sub output {
|
|
my ($self, @text) = @_;
|
|
if ($$self{ENCODE}) {
|
|
print { $$self{output_fh} } Encode::encode ('UTF-8', join ('', @text));
|
|
} else {
|
|
print { $$self{output_fh} } @text;
|
|
}
|
|
}
|
|
|
|
##############################################################################
|
|
# Document initialization
|
|
##############################################################################
|
|
|
|
# Handle the start of the document. Here we handle empty documents, as well
|
|
# as setting up our basic macros in a preamble and building the page title.
|
|
sub start_document {
|
|
my ($self, $attrs) = @_;
|
|
if ($$attrs{contentless} && !$$self{ALWAYS_EMIT_SOMETHING}) {
|
|
DEBUG and print "Document is contentless\n";
|
|
$$self{CONTENTLESS} = 1;
|
|
} else {
|
|
delete $$self{CONTENTLESS};
|
|
}
|
|
|
|
# When UTF-8 output is set, check whether our output file handle already
|
|
# has a PerlIO encoding layer set. If it does not, we'll need to encode
|
|
# our output before printing it (handled in the output() sub). Wrap the
|
|
# check in an eval to handle versions of Perl without PerlIO.
|
|
#
|
|
# PerlIO::get_layers still requires its argument be a glob, so coerce the
|
|
# file handle to a glob.
|
|
$$self{ENCODE} = 0;
|
|
if ($$self{utf8}) {
|
|
$$self{ENCODE} = 1;
|
|
eval {
|
|
my @options = (output => 1, details => 1);
|
|
my @layers = PerlIO::get_layers (*{$$self{output_fh}}, @options);
|
|
if ($layers[-1] & PerlIO::F_UTF8 ()) {
|
|
$$self{ENCODE} = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
# Determine information for the preamble and then output it unless the
|
|
# document was content-free.
|
|
if (!$$self{CONTENTLESS}) {
|
|
my ($name, $section);
|
|
if (defined $$self{name}) {
|
|
$name = $$self{name};
|
|
$section = $$self{section} || 1;
|
|
} else {
|
|
($name, $section) = $self->devise_title;
|
|
}
|
|
my $date = defined($$self{date}) ? $$self{date} : $self->devise_date;
|
|
$self->preamble ($name, $section, $date)
|
|
unless $self->bare_output or DEBUG > 9;
|
|
}
|
|
|
|
# Initialize a few per-document variables.
|
|
$$self{INDENT} = 0; # Current indentation level.
|
|
$$self{INDENTS} = []; # Stack of indentations.
|
|
$$self{INDEX} = []; # Index keys waiting to be printed.
|
|
$$self{IN_NAME} = 0; # Whether processing the NAME section.
|
|
$$self{ITEMS} = 0; # The number of consecutive =items.
|
|
$$self{ITEMTYPES} = []; # Stack of =item types, one per list.
|
|
$$self{SHIFTWAIT} = 0; # Whether there is a shift waiting.
|
|
$$self{SHIFTS} = []; # Stack of .RS shifts.
|
|
$$self{PENDING} = [[]]; # Pending output.
|
|
}
|
|
|
|
# Handle the end of the document. This handles dying on POD errors, since
|
|
# Pod::Parser currently doesn't. Otherwise, does nothing but print out a
|
|
# final comment at the end of the document under debugging.
|
|
sub end_document {
|
|
my ($self) = @_;
|
|
if ($$self{complain_die} && $self->errors_seen) {
|
|
croak ("POD document had syntax errors");
|
|
}
|
|
return if $self->bare_output;
|
|
return if ($$self{CONTENTLESS} && !$$self{ALWAYS_EMIT_SOMETHING});
|
|
$self->output (q(.\" [End document]) . "\n") if DEBUG;
|
|
}
|
|
|
|
# Try to figure out the name and section from the file name and return them as
|
|
# a list, returning an empty name and section 1 if we can't find any better
|
|
# information. Uses File::Basename and File::Spec as necessary.
|
|
sub devise_title {
|
|
my ($self) = @_;
|
|
my $name = $self->source_filename || '';
|
|
my $section = $$self{section} || 1;
|
|
$section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
|
|
$name =~ s/\.p(od|[lm])\z//i;
|
|
|
|
# If Pod::Parser gave us an IO::File reference as the source file name,
|
|
# convert that to the empty string as well. Then, if we don't have a
|
|
# valid name, convert it to STDIN.
|
|
#
|
|
# In podlators 4.00 through 4.07, this also produced a warning, but that
|
|
# was surprising to a lot of programs that had expected to be able to pipe
|
|
# POD through pod2man without specifying the name. In the name of
|
|
# backward compatibility, just quietly set STDIN as the page title.
|
|
if ($name =~ /^IO::File(?:=\w+)\(0x[\da-f]+\)$/i) {
|
|
$name = '';
|
|
}
|
|
if ($name eq '') {
|
|
$name = 'STDIN';
|
|
}
|
|
|
|
# If the section isn't 3, then the name defaults to just the basename of
|
|
# the file.
|
|
if ($section !~ /^3/) {
|
|
require File::Basename;
|
|
$name = uc File::Basename::basename ($name);
|
|
} else {
|
|
require File::Spec;
|
|
my ($volume, $dirs, $file) = File::Spec->splitpath ($name);
|
|
|
|
# Otherwise, assume we're dealing with a module. We want to figure
|
|
# out the full module name from the path to the file, but we don't
|
|
# want to include too much of the path into the module name. Lose
|
|
# anything up to the first of:
|
|
#
|
|
# */lib/*perl*/ standard or site_perl module
|
|
# */*perl*/lib/ from -Dprefix=/opt/perl
|
|
# */*perl*/ random module hierarchy
|
|
#
|
|
# Also strip off a leading site, site_perl, or vendor_perl component,
|
|
# any OS-specific component, and any version number component, and
|
|
# strip off an initial component of "lib" or "blib/lib" since that's
|
|
# what ExtUtils::MakeMaker creates.
|
|
#
|
|
# splitdir requires at least File::Spec 0.8.
|
|
my @dirs = File::Spec->splitdir ($dirs);
|
|
if (@dirs) {
|
|
my $cut = 0;
|
|
my $i;
|
|
for ($i = 0; $i < @dirs; $i++) {
|
|
if ($dirs[$i] =~ /perl/) {
|
|
$cut = $i + 1;
|
|
$cut++ if ($dirs[$i + 1] && $dirs[$i + 1] eq 'lib');
|
|
last;
|
|
}
|
|
}
|
|
if ($cut > 0) {
|
|
splice (@dirs, 0, $cut);
|
|
shift @dirs if ($dirs[0] =~ /^(site|vendor)(_perl)?$/);
|
|
shift @dirs if ($dirs[0] =~ /^[\d.]+$/);
|
|
shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/);
|
|
}
|
|
shift @dirs if $dirs[0] eq 'lib';
|
|
splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib');
|
|
}
|
|
|
|
# Remove empty directories when building the module name; they
|
|
# occur too easily on Unix by doubling slashes.
|
|
$name = join ('::', (grep { $_ ? $_ : () } @dirs), $file);
|
|
}
|
|
return ($name, $section);
|
|
}
|
|
|
|
# Determine the modification date and return that, properly formatted in ISO
|
|
# format.
|
|
#
|
|
# If POD_MAN_DATE is set, that overrides anything else. This can be used for
|
|
# reproducible generation of the same file even if the input file timestamps
|
|
# are unpredictable or the POD comes from standard input.
|
|
#
|
|
# Otherwise, if SOURCE_DATE_EPOCH is set and can be parsed as seconds since
|
|
# the UNIX epoch, base the timestamp on that. See
|
|
# <https://reproducible-builds.org/specs/source-date-epoch/>
|
|
#
|
|
# Otherwise, use the modification date of the input if we can stat it. Be
|
|
# aware that Pod::Simple returns the stringification of the file handle as
|
|
# source_filename for input from a file handle, so we'll stat some random ref
|
|
# string in that case. If that fails, instead use the current time.
|
|
#
|
|
# $self - Pod::Man object, used to get the source file
|
|
#
|
|
# Returns: YYYY-MM-DD date suitable for the left-hand footer
|
|
sub devise_date {
|
|
my ($self) = @_;
|
|
|
|
# If POD_MAN_DATE is set, always use it.
|
|
if (defined($ENV{POD_MAN_DATE})) {
|
|
return $ENV{POD_MAN_DATE};
|
|
}
|
|
|
|
# If SOURCE_DATE_EPOCH is set and can be parsed, use that.
|
|
my $time;
|
|
if (defined($ENV{SOURCE_DATE_EPOCH}) && $ENV{SOURCE_DATE_EPOCH} !~ /\D/) {
|
|
$time = $ENV{SOURCE_DATE_EPOCH};
|
|
}
|
|
|
|
# Otherwise, get the input filename and try to stat it. If that fails,
|
|
# use the current time.
|
|
if (!defined $time) {
|
|
my $input = $self->source_filename;
|
|
if ($input) {
|
|
$time = (stat($input))[9] || time();
|
|
} else {
|
|
$time = time();
|
|
}
|
|
}
|
|
|
|
# Can't use POSIX::strftime(), which uses Fcntl, because MakeMaker uses
|
|
# this and it has to work in the core which can't load dynamic libraries.
|
|
# Use gmtime instead of localtime so that the generated man page does not
|
|
# depend on the local time zone setting and is more reproducible
|
|
my ($year, $month, $day) = (gmtime($time))[5,4,3];
|
|
return sprintf("%04d-%02d-%02d", $year + 1900, $month + 1, $day);
|
|
}
|
|
|
|
# Print out the preamble and the title. The meaning of the arguments to .TH
|
|
# unfortunately vary by system; some systems consider the fourth argument to
|
|
# be a "source" and others use it as a version number. Generally it's just
|
|
# presented as the left-side footer, though, so it doesn't matter too much if
|
|
# a particular system gives it another interpretation.
|
|
#
|
|
# The order of date and release used to be reversed in older versions of this
|
|
# module, but this order is correct for both Solaris and Linux.
|
|
sub preamble {
|
|
my ($self, $name, $section, $date) = @_;
|
|
my $preamble = $self->preamble_template (!$$self{utf8});
|
|
|
|
# Build the index line and make sure that it will be syntactically valid.
|
|
my $index = "$name $section";
|
|
$index =~ s/\"/\"\"/g;
|
|
|
|
# If name or section contain spaces, quote them (section really never
|
|
# should, but we may as well be cautious).
|
|
for ($name, $section) {
|
|
if (/\s/) {
|
|
s/\"/\"\"/g;
|
|
$_ = '"' . $_ . '"';
|
|
}
|
|
}
|
|
|
|
# Double quotes in date, since it will be quoted.
|
|
$date =~ s/\"/\"\"/g;
|
|
|
|
# Substitute into the preamble the configuration options.
|
|
$preamble =~ s/\@CFONT\@/$$self{fixed}/;
|
|
$preamble =~ s/\@LQUOTE\@/$$self{LQUOTE}/;
|
|
$preamble =~ s/\@RQUOTE\@/$$self{RQUOTE}/;
|
|
chomp $preamble;
|
|
|
|
# Get the version information.
|
|
my $version = $self->version_report;
|
|
|
|
# Finally output everything.
|
|
$self->output (<<"----END OF HEADER----");
|
|
.\\" Automatically generated by $version
|
|
.\\"
|
|
.\\" Standard preamble:
|
|
.\\" ========================================================================
|
|
$preamble
|
|
.\\" ========================================================================
|
|
.\\"
|
|
.IX Title "$index"
|
|
.TH $name $section "$date" "$$self{release}" "$$self{center}"
|
|
.\\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
|
.\\" way too many mistakes in technical documents.
|
|
.if n .ad l
|
|
.nh
|
|
----END OF HEADER----
|
|
$self->output (".\\\" [End of preamble]\n") if DEBUG;
|
|
}
|
|
|
|
##############################################################################
|
|
# Text blocks
|
|
##############################################################################
|
|
|
|
# Handle a basic block of text. The only tricky part of this is if this is
|
|
# the first paragraph of text after an =over, in which case we have to change
|
|
# indentations for *roff.
|
|
sub cmd_para {
|
|
my ($self, $attrs, $text) = @_;
|
|
my $line = $$attrs{start_line};
|
|
|
|
# Output the paragraph. We also have to handle =over without =item. If
|
|
# there's an =over without =item, SHIFTWAIT will be set, and we need to
|
|
# handle creation of the indent here. Add the shift to SHIFTS so that it
|
|
# will be cleaned up on =back.
|
|
$self->makespace;
|
|
if ($$self{SHIFTWAIT}) {
|
|
$self->output (".RS $$self{INDENT}\n");
|
|
push (@{ $$self{SHIFTS} }, $$self{INDENT});
|
|
$$self{SHIFTWAIT} = 0;
|
|
}
|
|
|
|
# Add the line number for debugging, but not in the NAME section just in
|
|
# case the comment would confuse apropos.
|
|
$self->output (".\\\" [At source line $line]\n")
|
|
if defined ($line) && DEBUG && !$$self{IN_NAME};
|
|
|
|
# Force exactly one newline at the end and strip unwanted trailing
|
|
# whitespace at the end, but leave "\ " backslashed space from an S< > at
|
|
# the end of a line. Reverse the text first, to avoid having to scan the
|
|
# entire paragraph.
|
|
$text = reverse $text;
|
|
$text =~ s/\A\s*?(?= \\|\S|\z)/\n/;
|
|
$text = reverse $text;
|
|
|
|
# Output the paragraph.
|
|
$self->output ($self->protect ($self->textmapfonts ($text)));
|
|
$self->outindex;
|
|
$$self{NEEDSPACE} = 1;
|
|
return '';
|
|
}
|
|
|
|
# Handle a verbatim paragraph. Put a null token at the beginning of each line
|
|
# to protect against commands and wrap in .Vb/.Ve (which we define in our
|
|
# prelude).
|
|
sub cmd_verbatim {
|
|
my ($self, $attrs, $text) = @_;
|
|
|
|
# Ignore an empty verbatim paragraph.
|
|
return unless $text =~ /\S/;
|
|
|
|
# Force exactly one newline at the end and strip unwanted trailing
|
|
# whitespace at the end. Reverse the text first, to avoid having to scan
|
|
# the entire paragraph.
|
|
$text = reverse $text;
|
|
$text =~ s/\A\s*/\n/;
|
|
$text = reverse $text;
|
|
|
|
# Get a count of the number of lines before the first blank line, which
|
|
# we'll pass to .Vb as its parameter. This tells *roff to keep that many
|
|
# lines together. We don't want to tell *roff to keep huge blocks
|
|
# together.
|
|
my @lines = split (/\n/, $text);
|
|
my $unbroken = 0;
|
|
for (@lines) {
|
|
last if /^\s*$/;
|
|
$unbroken++;
|
|
}
|
|
$unbroken = 10 if ($unbroken > 12 && !$$self{MAGIC_VNOPAGEBREAK_LIMIT});
|
|
|
|
# Prepend a null token to each line.
|
|
$text =~ s/^/\\&/gm;
|
|
|
|
# Output the results.
|
|
$self->makespace;
|
|
$self->output (".Vb $unbroken\n$text.Ve\n");
|
|
$$self{NEEDSPACE} = 1;
|
|
return '';
|
|
}
|
|
|
|
# Handle literal text (produced by =for and similar constructs). Just output
|
|
# it with the minimum of changes.
|
|
sub cmd_data {
|
|
my ($self, $attrs, $text) = @_;
|
|
$text =~ s/^\n+//;
|
|
$text =~ s/\n{0,2}$/\n/;
|
|
$self->output ($text);
|
|
return '';
|
|
}
|
|
|
|
##############################################################################
|
|
# Headings
|
|
##############################################################################
|
|
|
|
# Common code for all headings. This is called before the actual heading is
|
|
# output. It returns the cleaned up heading text (putting the heading all on
|
|
# one line) and may do other things, like closing bad =item blocks.
|
|
sub heading_common {
|
|
my ($self, $text, $line) = @_;
|
|
$text =~ s/\s+$//;
|
|
$text =~ s/\s*\n\s*/ /g;
|
|
|
|
# This should never happen; it means that we have a heading after =item
|
|
# without an intervening =back. But just in case, handle it anyway.
|
|
if ($$self{ITEMS} > 1) {
|
|
$$self{ITEMS} = 0;
|
|
$self->output (".PD\n");
|
|
}
|
|
|
|
# Output the current source line.
|
|
$self->output ( ".\\\" [At source line $line]\n" )
|
|
if defined ($line) && DEBUG;
|
|
return $text;
|
|
}
|
|
|
|
# First level heading. We can't output .IX in the NAME section due to a bug
|
|
# in some versions of catman, so don't output a .IX for that section. .SH
|
|
# already uses small caps, so remove \s0 and \s-1. Maintain IN_NAME as
|
|
# appropriate.
|
|
sub cmd_head1 {
|
|
my ($self, $attrs, $text) = @_;
|
|
$text =~ s/\\s-?\d//g;
|
|
$text = $self->heading_common ($text, $$attrs{start_line});
|
|
my $isname = ($text eq 'NAME' || $text =~ /\(NAME\)/);
|
|
$self->output ($self->switchquotes ('.SH', $self->mapfonts ($text)));
|
|
$self->outindex ('Header', $text) unless $isname;
|
|
$$self{NEEDSPACE} = 0;
|
|
$$self{IN_NAME} = $isname;
|
|
return '';
|
|
}
|
|
|
|
# Second level heading.
|
|
sub cmd_head2 {
|
|
my ($self, $attrs, $text) = @_;
|
|
$text = $self->heading_common ($text, $$attrs{start_line});
|
|
$self->output ($self->switchquotes ('.SS', $self->mapfonts ($text)));
|
|
$self->outindex ('Subsection', $text);
|
|
$$self{NEEDSPACE} = 0;
|
|
return '';
|
|
}
|
|
|
|
# Third level heading. *roff doesn't have this concept, so just put the
|
|
# heading in italics as a normal paragraph.
|
|
sub cmd_head3 {
|
|
my ($self, $attrs, $text) = @_;
|
|
$text = $self->heading_common ($text, $$attrs{start_line});
|
|
$self->makespace;
|
|
$self->output ($self->textmapfonts ('\f(IS' . $text . '\f(IE') . "\n");
|
|
$self->outindex ('Subsection', $text);
|
|
$$self{NEEDSPACE} = 1;
|
|
return '';
|
|
}
|
|
|
|
# Fourth level heading. *roff doesn't have this concept, so just put the
|
|
# heading as a normal paragraph.
|
|
sub cmd_head4 {
|
|
my ($self, $attrs, $text) = @_;
|
|
$text = $self->heading_common ($text, $$attrs{start_line});
|
|
$self->makespace;
|
|
$self->output ($self->textmapfonts ($text) . "\n");
|
|
$self->outindex ('Subsection', $text);
|
|
$$self{NEEDSPACE} = 1;
|
|
return '';
|
|
}
|
|
|
|
##############################################################################
|
|
# Formatting codes
|
|
##############################################################################
|
|
|
|
# All of the formatting codes that aren't handled internally by the parser,
|
|
# other than L<> and X<>.
|
|
sub cmd_b { return $_[0]->{IN_NAME} ? $_[2] : '\f(BS' . $_[2] . '\f(BE' }
|
|
sub cmd_i { return $_[0]->{IN_NAME} ? $_[2] : '\f(IS' . $_[2] . '\f(IE' }
|
|
sub cmd_f { return $_[0]->{IN_NAME} ? $_[2] : '\f(IS' . $_[2] . '\f(IE' }
|
|
sub cmd_c { return $_[0]->quote_literal ($_[2]) }
|
|
|
|
# Index entries are just added to the pending entries.
|
|
sub cmd_x {
|
|
my ($self, $attrs, $text) = @_;
|
|
push (@{ $$self{INDEX} }, $text);
|
|
return '';
|
|
}
|
|
|
|
# Links reduce to the text that we're given, wrapped in angle brackets if it's
|
|
# a URL, followed by the URL. We take an option to suppress the URL if anchor
|
|
# text is given. We need to format the "to" value of the link before
|
|
# comparing it to the text since we may escape hyphens.
|
|
sub cmd_l {
|
|
my ($self, $attrs, $text) = @_;
|
|
if ($$attrs{type} eq 'url') {
|
|
my $to = $$attrs{to};
|
|
if (defined $to) {
|
|
my $tag = $$self{PENDING}[-1];
|
|
$to = $self->format_text ($$tag[1], $to);
|
|
}
|
|
if (not defined ($to) or $to eq $text) {
|
|
return "<$text>";
|
|
} elsif ($$self{nourls}) {
|
|
return $text;
|
|
} else {
|
|
return "$text <$$attrs{to}>";
|
|
}
|
|
} else {
|
|
return $text;
|
|
}
|
|
}
|
|
|
|
##############################################################################
|
|
# List handling
|
|
##############################################################################
|
|
|
|
# Handle the beginning of an =over block. Takes the type of the block as the
|
|
# first argument, and then the attr hash. This is called by the handlers for
|
|
# the four different types of lists (bullet, number, text, and block).
|
|
sub over_common_start {
|
|
my ($self, $type, $attrs) = @_;
|
|
my $line = $$attrs{start_line};
|
|
my $indent = $$attrs{indent};
|
|
DEBUG > 3 and print " Starting =over $type (line $line, indent ",
|
|
($indent || '?'), "\n";
|
|
|
|
# Find the indentation level.
|
|
unless (defined ($indent) && $indent =~ /^[-+]?\d{1,4}\s*$/) {
|
|
$indent = $$self{indent};
|
|
}
|
|
|
|
# If we've gotten multiple indentations in a row, we need to emit the
|
|
# pending indentation for the last level that we saw and haven't acted on
|
|
# yet. SHIFTS is the stack of indentations that we've actually emitted
|
|
# code for.
|
|
if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) {
|
|
$self->output (".RS $$self{INDENT}\n");
|
|
push (@{ $$self{SHIFTS} }, $$self{INDENT});
|
|
}
|
|
|
|
# Now, do record-keeping. INDENTS is a stack of indentations that we've
|
|
# seen so far, and INDENT is the current level of indentation. ITEMTYPES
|
|
# is a stack of list types that we've seen.
|
|
push (@{ $$self{INDENTS} }, $$self{INDENT});
|
|
push (@{ $$self{ITEMTYPES} }, $type);
|
|
$$self{INDENT} = $indent + 0;
|
|
$$self{SHIFTWAIT} = 1;
|
|
}
|
|
|
|
# End an =over block. Takes no options other than the class pointer.
|
|
# Normally, once we close a block and therefore remove something from INDENTS,
|
|
# INDENTS will now be longer than SHIFTS, indicating that we also need to emit
|
|
# *roff code to close the indent. This isn't *always* true, depending on the
|
|
# circumstance. If we're still inside an indentation, we need to emit another
|
|
# .RE and then a new .RS to unconfuse *roff.
|
|
sub over_common_end {
|
|
my ($self) = @_;
|
|
DEBUG > 3 and print " Ending =over\n";
|
|
$$self{INDENT} = pop @{ $$self{INDENTS} };
|
|
pop @{ $$self{ITEMTYPES} };
|
|
|
|
# If we emitted code for that indentation, end it.
|
|
if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) {
|
|
$self->output (".RE\n");
|
|
pop @{ $$self{SHIFTS} };
|
|
}
|
|
|
|
# If we're still in an indentation, *roff will have now lost track of the
|
|
# right depth of that indentation, so fix that.
|
|
if (@{ $$self{INDENTS} } > 0) {
|
|
$self->output (".RE\n");
|
|
$self->output (".RS $$self{INDENT}\n");
|
|
}
|
|
$$self{NEEDSPACE} = 1;
|
|
$$self{SHIFTWAIT} = 0;
|
|
}
|
|
|
|
# Dispatch the start and end calls as appropriate.
|
|
sub start_over_bullet { my $s = shift; $s->over_common_start ('bullet', @_) }
|
|
sub start_over_number { my $s = shift; $s->over_common_start ('number', @_) }
|
|
sub start_over_text { my $s = shift; $s->over_common_start ('text', @_) }
|
|
sub start_over_block { my $s = shift; $s->over_common_start ('block', @_) }
|
|
sub end_over_bullet { $_[0]->over_common_end }
|
|
sub end_over_number { $_[0]->over_common_end }
|
|
sub end_over_text { $_[0]->over_common_end }
|
|
sub end_over_block { $_[0]->over_common_end }
|
|
|
|
# The common handler for all item commands. Takes the type of the item, the
|
|
# attributes, and then the text of the item.
|
|
#
|
|
# Emit an index entry for anything that's interesting, but don't emit index
|
|
# entries for things like bullets and numbers. Newlines in an item title are
|
|
# turned into spaces since *roff can't handle them embedded.
|
|
sub item_common {
|
|
my ($self, $type, $attrs, $text) = @_;
|
|
my $line = $$attrs{start_line};
|
|
DEBUG > 3 and print " $type item (line $line): $text\n";
|
|
|
|
# Clean up the text. We want to end up with two variables, one ($text)
|
|
# which contains any body text after taking out the item portion, and
|
|
# another ($item) which contains the actual item text.
|
|
$text =~ s/\s+$//;
|
|
my ($item, $index);
|
|
if ($type eq 'bullet') {
|
|
$item = "\\\(bu";
|
|
$text =~ s/\n*$/\n/;
|
|
} elsif ($type eq 'number') {
|
|
$item = $$attrs{number} . '.';
|
|
} else {
|
|
$item = $text;
|
|
$item =~ s/\s*\n\s*/ /g;
|
|
$text = '';
|
|
$index = $item if ($item =~ /\w/);
|
|
}
|
|
|
|
# Take care of the indentation. If shifts and indents are equal, close
|
|
# the top shift, since we're about to create an indentation with .IP.
|
|
# Also output .PD 0 to turn off spacing between items if this item is
|
|
# directly following another one. We only have to do that once for a
|
|
# whole chain of items so do it for the second item in the change. Note
|
|
# that makespace is what undoes this.
|
|
if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) {
|
|
$self->output (".RE\n");
|
|
pop @{ $$self{SHIFTS} };
|
|
}
|
|
$self->output (".PD 0\n") if ($$self{ITEMS} == 1);
|
|
|
|
# Now, output the item tag itself.
|
|
$item = $self->textmapfonts ($item);
|
|
$self->output ($self->switchquotes ('.IP', $item, $$self{INDENT}));
|
|
$$self{NEEDSPACE} = 0;
|
|
$$self{ITEMS}++;
|
|
$$self{SHIFTWAIT} = 0;
|
|
|
|
# If body text for this item was included, go ahead and output that now.
|
|
if ($text) {
|
|
$text =~ s/\s*$/\n/;
|
|
$self->makespace;
|
|
$self->output ($self->protect ($self->textmapfonts ($text)));
|
|
$$self{NEEDSPACE} = 1;
|
|
}
|
|
$self->outindex ($index ? ('Item', $index) : ());
|
|
}
|
|
|
|
# Dispatch the item commands to the appropriate place.
|
|
sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
|
|
sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
|
|
sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) }
|
|
sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) }
|
|
|
|
##############################################################################
|
|
# Backward compatibility
|
|
##############################################################################
|
|
|
|
# Reset the underlying Pod::Simple object between calls to parse_from_file so
|
|
# that the same object can be reused to convert multiple pages.
|
|
sub parse_from_file {
|
|
my $self = shift;
|
|
$self->reinit;
|
|
|
|
# Fake the old cutting option to Pod::Parser. This fiddles with internal
|
|
# Pod::Simple state and is quite ugly; we need a better approach.
|
|
if (ref ($_[0]) eq 'HASH') {
|
|
my $opts = shift @_;
|
|
if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
|
|
$$self{in_pod} = 1;
|
|
$$self{last_was_blank} = 1;
|
|
}
|
|
}
|
|
|
|
# Do the work.
|
|
my $retval = $self->SUPER::parse_from_file (@_);
|
|
|
|
# Flush output, since Pod::Simple doesn't do this. Ideally we should also
|
|
# close the file descriptor if we had to open one, but we can't easily
|
|
# figure this out.
|
|
my $fh = $self->output_fh ();
|
|
my $oldfh = select $fh;
|
|
my $oldflush = $|;
|
|
$| = 1;
|
|
print $fh '';
|
|
$| = $oldflush;
|
|
select $oldfh;
|
|
return $retval;
|
|
}
|
|
|
|
# Pod::Simple failed to provide this backward compatibility function, so
|
|
# implement it ourselves. File handles are one of the inputs that
|
|
# parse_from_file supports.
|
|
sub parse_from_filehandle {
|
|
my $self = shift;
|
|
return $self->parse_from_file (@_);
|
|
}
|
|
|
|
# Pod::Simple's parse_file doesn't set output_fh. Wrap the call and do so
|
|
# ourself unless it was already set by the caller, since our documentation has
|
|
# always said that this should work.
|
|
sub parse_file {
|
|
my ($self, $in) = @_;
|
|
unless (defined $$self{output_fh}) {
|
|
$self->output_fh (\*STDOUT);
|
|
}
|
|
return $self->SUPER::parse_file ($in);
|
|
}
|
|
|
|
# Do the same for parse_lines, just to be polite. Pod::Simple's man page
|
|
# implies that the caller is responsible for setting this, but I don't see any
|
|
# reason not to set a default.
|
|
sub parse_lines {
|
|
my ($self, @lines) = @_;
|
|
unless (defined $$self{output_fh}) {
|
|
$self->output_fh (\*STDOUT);
|
|
}
|
|
return $self->SUPER::parse_lines (@lines);
|
|
}
|
|
|
|
# Likewise for parse_string_document.
|
|
sub parse_string_document {
|
|
my ($self, $doc) = @_;
|
|
unless (defined $$self{output_fh}) {
|
|
$self->output_fh (\*STDOUT);
|
|
}
|
|
return $self->SUPER::parse_string_document ($doc);
|
|
}
|
|
|
|
##############################################################################
|
|
# Translation tables
|
|
##############################################################################
|
|
|
|
# The following table is adapted from Tom Christiansen's pod2man. It assumes
|
|
# that the standard preamble has already been printed, since that's what
|
|
# defines all of the accent marks. We really want to do something better than
|
|
# this when *roff actually supports other character sets itself, since these
|
|
# results are pretty poor.
|
|
#
|
|
# This only works in an ASCII world. What to do in a non-ASCII world is very
|
|
# unclear -- hopefully we can assume UTF-8 and just leave well enough alone.
|
|
@ESCAPES{0xA0 .. 0xFF} = (
|
|
"\\ ", undef, undef, undef, undef, undef, undef, undef,
|
|
undef, undef, undef, undef, undef, "\\%", undef, undef,
|
|
|
|
undef, undef, undef, undef, undef, undef, undef, undef,
|
|
undef, undef, undef, undef, undef, undef, undef, undef,
|
|
|
|
"A\\*`", "A\\*'", "A\\*^", "A\\*~", "A\\*:", "A\\*o", "\\*(Ae", "C\\*,",
|
|
"E\\*`", "E\\*'", "E\\*^", "E\\*:", "I\\*`", "I\\*'", "I\\*^", "I\\*:",
|
|
|
|
"\\*(D-", "N\\*~", "O\\*`", "O\\*'", "O\\*^", "O\\*~", "O\\*:", undef,
|
|
"O\\*/", "U\\*`", "U\\*'", "U\\*^", "U\\*:", "Y\\*'", "\\*(Th", "\\*8",
|
|
|
|
"a\\*`", "a\\*'", "a\\*^", "a\\*~", "a\\*:", "a\\*o", "\\*(ae", "c\\*,",
|
|
"e\\*`", "e\\*'", "e\\*^", "e\\*:", "i\\*`", "i\\*'", "i\\*^", "i\\*:",
|
|
|
|
"\\*(d-", "n\\*~", "o\\*`", "o\\*'", "o\\*^", "o\\*~", "o\\*:", undef,
|
|
"o\\*/" , "u\\*`", "u\\*'", "u\\*^", "u\\*:", "y\\*'", "\\*(th", "y\\*:",
|
|
) if ASCII;
|
|
|
|
##############################################################################
|
|
# Premable
|
|
##############################################################################
|
|
|
|
# The following is the static preamble which starts all *roff output we
|
|
# generate. Most is static except for the font to use as a fixed-width font,
|
|
# which is designed by @CFONT@, and the left and right quotes to use for C<>
|
|
# text, designated by @LQOUTE@ and @RQUOTE@. However, the second part, which
|
|
# defines the accent marks, is only used if $escapes is set to true.
|
|
sub preamble_template {
|
|
my ($self, $accents) = @_;
|
|
my $preamble = <<'----END OF PREAMBLE----';
|
|
.de Sp \" Vertical space (when we can't use .PP)
|
|
.if t .sp .5v
|
|
.if n .sp
|
|
..
|
|
.de Vb \" Begin verbatim text
|
|
.ft @CFONT@
|
|
.nf
|
|
.ne \\$1
|
|
..
|
|
.de Ve \" End verbatim text
|
|
.ft R
|
|
.fi
|
|
..
|
|
.\" Set up some character translations and predefined strings. \*(-- will
|
|
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
|
|
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
|
|
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
|
|
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
|
|
.\" nothing in troff, for use with C<>.
|
|
.tr \(*W-
|
|
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
|
|
.ie n \{\
|
|
. ds -- \(*W-
|
|
. ds PI pi
|
|
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
|
|
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
|
|
. ds L" ""
|
|
. ds R" ""
|
|
. ds C` @LQUOTE@
|
|
. ds C' @RQUOTE@
|
|
'br\}
|
|
.el\{\
|
|
. ds -- \|\(em\|
|
|
. ds PI \(*p
|
|
. ds L" ``
|
|
. ds R" ''
|
|
. ds C`
|
|
. ds C'
|
|
'br\}
|
|
.\"
|
|
.\" Escape single quotes in literal strings from groff's Unicode transform.
|
|
.ie \n(.g .ds Aq \(aq
|
|
.el .ds Aq '
|
|
.\"
|
|
.\" If the F register is >0, we'll generate index entries on stderr for
|
|
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
|
|
.\" entries marked with X<> in POD. Of course, you'll have to process the
|
|
.\" output yourself in some meaningful fashion.
|
|
.\"
|
|
.\" Avoid warning from groff about undefined register 'F'.
|
|
.de IX
|
|
..
|
|
.nr rF 0
|
|
.if \n(.g .if rF .nr rF 1
|
|
.if (\n(rF:(\n(.g==0)) \{\
|
|
. if \nF \{\
|
|
. de IX
|
|
. tm Index:\\$1\t\\n%\t"\\$2"
|
|
..
|
|
. if !\nF==2 \{\
|
|
. nr % 0
|
|
. nr F 2
|
|
. \}
|
|
. \}
|
|
.\}
|
|
.rr rF
|
|
----END OF PREAMBLE----
|
|
#'# for cperl-mode
|
|
|
|
if ($accents) {
|
|
$preamble .= <<'----END OF PREAMBLE----'
|
|
.\"
|
|
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
|
|
.\" Fear. Run. Save yourself. No user-serviceable parts.
|
|
. \" fudge factors for nroff and troff
|
|
.if n \{\
|
|
. ds #H 0
|
|
. ds #V .8m
|
|
. ds #F .3m
|
|
. ds #[ \f1
|
|
. ds #] \fP
|
|
.\}
|
|
.if t \{\
|
|
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
|
|
. ds #V .6m
|
|
. ds #F 0
|
|
. ds #[ \&
|
|
. ds #] \&
|
|
.\}
|
|
. \" simple accents for nroff and troff
|
|
.if n \{\
|
|
. ds ' \&
|
|
. ds ` \&
|
|
. ds ^ \&
|
|
. ds , \&
|
|
. ds ~ ~
|
|
. ds /
|
|
.\}
|
|
.if t \{\
|
|
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
|
|
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
|
|
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
|
|
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
|
|
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
|
|
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
|
|
.\}
|
|
. \" troff and (daisy-wheel) nroff accents
|
|
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
|
|
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
|
|
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
|
|
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
|
|
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
|
|
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
|
|
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
|
|
.ds ae a\h'-(\w'a'u*4/10)'e
|
|
.ds Ae A\h'-(\w'A'u*4/10)'E
|
|
. \" corrections for vroff
|
|
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
|
|
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
|
|
. \" for low resolution devices (crt and lpr)
|
|
.if \n(.H>23 .if \n(.V>19 \
|
|
\{\
|
|
. ds : e
|
|
. ds 8 ss
|
|
. ds o a
|
|
. ds d- d\h'-1'\(ga
|
|
. ds D- D\h'-1'\(hy
|
|
. ds th \o'bp'
|
|
. ds Th \o'LP'
|
|
. ds ae ae
|
|
. ds Ae AE
|
|
.\}
|
|
.rm #[ #] #H #V #F C
|
|
----END OF PREAMBLE----
|
|
#`# for cperl-mode
|
|
}
|
|
return $preamble;
|
|
}
|
|
|
|
##############################################################################
|
|
# Module return value and documentation
|
|
##############################################################################
|
|
|
|
1;
|
|
__END__
|
|
|
|
=for stopwords
|
|
en em ALLCAPS teeny fixedbold fixeditalic fixedbolditalic stderr utf8
|
|
UTF-8 Allbery Sean Burke Ossanna Solaris formatters troff uppercased
|
|
Christiansen nourls parsers Kernighan lquote rquote
|
|
|
|
=head1 NAME
|
|
|
|
Pod::Man - Convert POD data to formatted *roff input
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Pod::Man;
|
|
my $parser = Pod::Man->new (release => $VERSION, section => 8);
|
|
|
|
# Read POD from STDIN and write to STDOUT.
|
|
$parser->parse_file (\*STDIN);
|
|
|
|
# Read POD from file.pod and write to file.1.
|
|
$parser->parse_from_file ('file.pod', 'file.1');
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Pod::Man is a module to convert documentation in the POD format (the
|
|
preferred language for documenting Perl) into *roff input using the man
|
|
macro set. The resulting *roff code is suitable for display on a terminal
|
|
using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>.
|
|
It is conventionally invoked using the driver script B<pod2man>, but it can
|
|
also be used directly.
|
|
|
|
As a derived class from Pod::Simple, Pod::Man supports the same methods and
|
|
interfaces. See L<Pod::Simple> for all the details.
|
|
|
|
new() can take options, in the form of key/value pairs that control the
|
|
behavior of the parser. See below for details.
|
|
|
|
If no options are given, Pod::Man uses the name of the input file with any
|
|
trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
|
|
section 1 unless the file ended in C<.pm> in which case it defaults to
|
|
section 3, to a centered title of "User Contributed Perl Documentation", to
|
|
a centered footer of the Perl version it is run with, and to a left-hand
|
|
footer of the modification date of its input (or the current date if given
|
|
C<STDIN> for input).
|
|
|
|
Pod::Man assumes that your *roff formatters have a fixed-width font named
|
|
C<CW>. If yours is called something else (like C<CR>), use the C<fixed>
|
|
option to specify it. This generally only matters for troff output for
|
|
printing. Similarly, you can set the fonts used for bold, italic, and
|
|
bold italic fixed-width output.
|
|
|
|
Besides the obvious pod conversions, Pod::Man also takes care of
|
|
formatting func(), func(3), and simple variable references like $foo or
|
|
@bar so you don't have to use code escapes for them; complex expressions
|
|
like C<$fred{'stuff'}> will still need to be escaped, though. It also
|
|
translates dashes that aren't used as hyphens into en dashes, makes long
|
|
dashes--like this--into proper em dashes, fixes "paired quotes," makes C++
|
|
look right, puts a little space between double underscores, makes ALLCAPS
|
|
a teeny bit smaller in B<troff>, and escapes stuff that *roff treats as
|
|
special so that you don't have to.
|
|
|
|
The recognized options to new() are as follows. All options take a single
|
|
argument.
|
|
|
|
=over 4
|
|
|
|
=item center
|
|
|
|
Sets the centered page header for the C<.TH> macro. The default, if this
|
|
option is not specified, is "User Contributed Perl Documentation".
|
|
|
|
=item date
|
|
|
|
Sets the left-hand footer for the C<.TH> macro. If this option is not set,
|
|
the contents of the environment variable POD_MAN_DATE, if set, will be used.
|
|
Failing that, the value of SOURCE_DATE_EPOCH, the modification date of the
|
|
input file, or the current time if stat() can't find that file (which will be
|
|
the case if the input is from C<STDIN>) will be used. If obtained from the
|
|
file modification date or the current time, the date will be formatted as
|
|
C<YYYY-MM-DD> and will be based on UTC (so that the output will be
|
|
reproducible regardless of local time zone).
|
|
|
|
=item errors
|
|
|
|
How to report errors. C<die> says to throw an exception on any POD
|
|
formatting error. C<stderr> says to report errors on standard error, but
|
|
not to throw an exception. C<pod> says to include a POD ERRORS section
|
|
in the resulting documentation summarizing the errors. C<none> ignores
|
|
POD errors entirely, as much as possible.
|
|
|
|
The default is C<pod>.
|
|
|
|
=item fixed
|
|
|
|
The fixed-width font to use for verbatim text and code. Defaults to
|
|
C<CW>. Some systems may want C<CR> instead. Only matters for B<troff>
|
|
output.
|
|
|
|
=item fixedbold
|
|
|
|
Bold version of the fixed-width font. Defaults to C<CB>. Only matters
|
|
for B<troff> output.
|
|
|
|
=item fixeditalic
|
|
|
|
Italic version of the fixed-width font (actually, something of a misnomer,
|
|
since most fixed-width fonts only have an oblique version, not an italic
|
|
version). Defaults to C<CI>. Only matters for B<troff> output.
|
|
|
|
=item fixedbolditalic
|
|
|
|
Bold italic (probably actually oblique) version of the fixed-width font.
|
|
Pod::Man doesn't assume you have this, and defaults to C<CB>. Some
|
|
systems (such as Solaris) have this font available as C<CX>. Only matters
|
|
for B<troff> output.
|
|
|
|
=item lquote
|
|
|
|
=item rquote
|
|
|
|
Sets the quote marks used to surround CE<lt>> text. C<lquote> sets the
|
|
left quote mark and C<rquote> sets the right quote mark. Either may also
|
|
be set to the special value C<none>, in which case no quote mark is added
|
|
on that side of CE<lt>> text (but the font is still changed for troff
|
|
output).
|
|
|
|
Also see the C<quotes> option, which can be used to set both quotes at once.
|
|
If both C<quotes> and one of the other options is set, C<lquote> or C<rquote>
|
|
overrides C<quotes>.
|
|
|
|
=item name
|
|
|
|
Set the name of the manual page for the C<.TH> macro. Without this
|
|
option, the manual name is set to the uppercased base name of the file
|
|
being converted unless the manual section is 3, in which case the path is
|
|
parsed to see if it is a Perl module path. If it is, a path like
|
|
C<.../lib/Pod/Man.pm> is converted into a name like C<Pod::Man>. This
|
|
option, if given, overrides any automatic determination of the name.
|
|
|
|
If generating a manual page from standard input, the name will be set to
|
|
C<STDIN> if this option is not provided. Providing this option is strongly
|
|
recommended to set a meaningful manual page name.
|
|
|
|
=item nourls
|
|
|
|
Normally, LZ<><> formatting codes with a URL but anchor text are formatted
|
|
to show both the anchor text and the URL. In other words:
|
|
|
|
L<foo|http://example.com/>
|
|
|
|
is formatted as:
|
|
|
|
foo <http://example.com/>
|
|
|
|
This option, if set to a true value, suppresses the URL when anchor text
|
|
is given, so this example would be formatted as just C<foo>. This can
|
|
produce less cluttered output in cases where the URLs are not particularly
|
|
important.
|
|
|
|
=item quotes
|
|
|
|
Sets the quote marks used to surround CE<lt>> text. If the value is a
|
|
single character, it is used as both the left and right quote. Otherwise,
|
|
it is split in half, and the first half of the string is used as the left
|
|
quote and the second is used as the right quote.
|
|
|
|
This may also be set to the special value C<none>, in which case no quote
|
|
marks are added around CE<lt>> text (but the font is still changed for troff
|
|
output).
|
|
|
|
Also see the C<lquote> and C<rquote> options, which can be used to set the
|
|
left and right quotes independently. If both C<quotes> and one of the other
|
|
options is set, C<lquote> or C<rquote> overrides C<quotes>.
|
|
|
|
=item release
|
|
|
|
Set the centered footer for the C<.TH> macro. By default, this is set to
|
|
the version of Perl you run Pod::Man under. Setting this to the empty
|
|
string will cause some *roff implementations to use the system default
|
|
value.
|
|
|
|
Note that some system C<an> macro sets assume that the centered footer
|
|
will be a modification date and will prepend something like "Last
|
|
modified: ". If this is the case for your target system, you may want to
|
|
set C<release> to the last modified date and C<date> to the version
|
|
number.
|
|
|
|
=item section
|
|
|
|
Set the section for the C<.TH> macro. The standard section numbering
|
|
convention is to use 1 for user commands, 2 for system calls, 3 for
|
|
functions, 4 for devices, 5 for file formats, 6 for games, 7 for
|
|
miscellaneous information, and 8 for administrator commands. There is a lot
|
|
of variation here, however; some systems (like Solaris) use 4 for file
|
|
formats, 5 for miscellaneous information, and 7 for devices. Still others
|
|
use 1m instead of 8, or some mix of both. About the only section numbers
|
|
that are reliably consistent are 1, 2, and 3.
|
|
|
|
By default, section 1 will be used unless the file ends in C<.pm> in which
|
|
case section 3 will be selected.
|
|
|
|
=item stderr
|
|
|
|
Send error messages about invalid POD to standard error instead of
|
|
appending a POD ERRORS section to the generated *roff output. This is
|
|
equivalent to setting C<errors> to C<stderr> if C<errors> is not already
|
|
set. It is supported for backward compatibility.
|
|
|
|
=item utf8
|
|
|
|
By default, Pod::Man produces the most conservative possible *roff output
|
|
to try to ensure that it will work with as many different *roff
|
|
implementations as possible. Many *roff implementations cannot handle
|
|
non-ASCII characters, so this means all non-ASCII characters are converted
|
|
either to a *roff escape sequence that tries to create a properly accented
|
|
character (at least for troff output) or to C<X>.
|
|
|
|
If this option is set, Pod::Man will instead output UTF-8. If your *roff
|
|
implementation can handle it, this is the best output format to use and
|
|
avoids corruption of documents containing non-ASCII characters. However,
|
|
be warned that *roff source with literal UTF-8 characters is not supported
|
|
by many implementations and may even result in segfaults and other bad
|
|
behavior.
|
|
|
|
Be aware that, when using this option, the input encoding of your POD
|
|
source should be properly declared unless it's US-ASCII. Pod::Simple will
|
|
attempt to guess the encoding and may be successful if it's Latin-1 or
|
|
UTF-8, but it will produce warnings. Use the C<=encoding> command to
|
|
declare the encoding. See L<perlpod(1)> for more information.
|
|
|
|
=back
|
|
|
|
The standard Pod::Simple method parse_file() takes one argument naming the
|
|
POD file to read from. By default, the output is sent to C<STDOUT>, but
|
|
this can be changed with the output_fh() method.
|
|
|
|
The standard Pod::Simple method parse_from_file() takes up to two
|
|
arguments, the first being the input file to read POD from and the second
|
|
being the file to write the formatted output to.
|
|
|
|
You can also call parse_lines() to parse an array of lines or
|
|
parse_string_document() to parse a document already in memory. As with
|
|
parse_file(), parse_lines() and parse_string_document() default to sending
|
|
their output to C<STDOUT> unless changed with the output_fh() method.
|
|
|
|
To put the output from any parse method into a string instead of a file
|
|
handle, call the output_string() method instead of output_fh().
|
|
|
|
See L<Pod::Simple> for more specific details on the methods available to
|
|
all derived parsers.
|
|
|
|
=head1 DIAGNOSTICS
|
|
|
|
=over 4
|
|
|
|
=item roff font should be 1 or 2 chars, not "%s"
|
|
|
|
(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
|
|
wasn't either one or two characters. Pod::Man doesn't support *roff fonts
|
|
longer than two characters, although some *roff extensions do (the
|
|
canonical versions of B<nroff> and B<troff> don't either).
|
|
|
|
=item Invalid errors setting "%s"
|
|
|
|
(F) The C<errors> parameter to the constructor was set to an unknown value.
|
|
|
|
=item Invalid quote specification "%s"
|
|
|
|
(F) The quote specification given (the C<quotes> option to the
|
|
constructor) was invalid. A quote specification must be either one
|
|
character long or an even number (greater than one) characters long.
|
|
|
|
=item POD document had syntax errors
|
|
|
|
(F) The POD document being formatted had syntax errors and the C<errors>
|
|
option was set to C<die>.
|
|
|
|
=back
|
|
|
|
=head1 ENVIRONMENT
|
|
|
|
=over 4
|
|
|
|
=item PERL_CORE
|
|
|
|
If set and Encode is not available, silently fall back to non-UTF-8 mode
|
|
without complaining to standard error. This environment variable is set
|
|
during Perl core builds, which build Encode after podlators. Encode is
|
|
expected to not (yet) be available in that case.
|
|
|
|
=item POD_MAN_DATE
|
|
|
|
If set, this will be used as the value of the left-hand footer unless the
|
|
C<date> option is explicitly set, overriding the timestamp of the input
|
|
file or the current time. This is primarily useful to ensure reproducible
|
|
builds of the same output file given the same source and Pod::Man version,
|
|
even when file timestamps may not be consistent.
|
|
|
|
=item SOURCE_DATE_EPOCH
|
|
|
|
If set, and POD_MAN_DATE and the C<date> options are not set, this will be
|
|
used as the modification time of the source file, overriding the timestamp of
|
|
the input file or the current time. It should be set to the desired time in
|
|
seconds since UNIX epoch. This is primarily useful to ensure reproducible
|
|
builds of the same output file given the same source and Pod::Man version,
|
|
even when file timestamps may not be consistent. See
|
|
L<https://reproducible-builds.org/specs/source-date-epoch/> for the full
|
|
specification.
|
|
|
|
(Arguably, according to the specification, this variable should be used only
|
|
if the timestamp of the input file is not available and Pod::Man uses the
|
|
current time. However, for reproducible builds in Debian, results were more
|
|
reliable if this variable overrode the timestamp of the input file.)
|
|
|
|
=back
|
|
|
|
=head1 BUGS
|
|
|
|
Encoding handling assumes that PerlIO is available and does not work
|
|
properly if it isn't. The C<utf8> option is therefore not supported
|
|
unless Perl is built with PerlIO support.
|
|
|
|
There is currently no way to turn off the guesswork that tries to format
|
|
unmarked text appropriately, and sometimes it isn't wanted (particularly
|
|
when using POD to document something other than Perl). Most of the work
|
|
toward fixing this has now been done, however, and all that's still needed
|
|
is a user interface.
|
|
|
|
The NAME section should be recognized specially and index entries emitted
|
|
for everything in that section. This would have to be deferred until the
|
|
next section, since extraneous things in NAME tends to confuse various man
|
|
page processors. Currently, no index entries are emitted for anything in
|
|
NAME.
|
|
|
|
Pod::Man doesn't handle font names longer than two characters. Neither do
|
|
most B<troff> implementations, but GNU troff does as an extension. It would
|
|
be nice to support as an option for those who want to use it.
|
|
|
|
The preamble added to each output file is rather verbose, and most of it
|
|
is only necessary in the presence of non-ASCII characters. It would
|
|
ideally be nice if all of those definitions were only output if needed,
|
|
perhaps on the fly as the characters are used.
|
|
|
|
Pod::Man is excessively slow.
|
|
|
|
=head1 CAVEATS
|
|
|
|
If Pod::Man is given the C<utf8> option, the encoding of its output file
|
|
handle will be forced to UTF-8 if possible, overriding any existing
|
|
encoding. This will be done even if the file handle is not created by
|
|
Pod::Man and was passed in from outside. This maintains consistency
|
|
regardless of PERL_UNICODE and other settings.
|
|
|
|
The handling of hyphens and em dashes is somewhat fragile, and one may get
|
|
the wrong one under some circumstances. This should only matter for
|
|
B<troff> output.
|
|
|
|
When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
|
|
necessarily get it right.
|
|
|
|
Converting neutral double quotes to properly matched double quotes doesn't
|
|
work unless there are no formatting codes between the quote marks. This
|
|
only matters for troff output.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Russ Allbery <rra@cpan.org>, based I<very> heavily on the original
|
|
B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>. The modifications to
|
|
work with Pod::Simple instead of Pod::Parser were originally contributed by
|
|
Sean Burke (but I've since hacked them beyond recognition and all bugs are
|
|
mine).
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
|
|
|
Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
|
|
2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017 Russ Allbery <rra@cpan.org>
|
|
|
|
This program is free software; you may redistribute it and/or modify it
|
|
under the same terms as Perl itself.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<Pod::Simple>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>,
|
|
L<man(1)>, L<man(7)>
|
|
|
|
Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
|
|
Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
|
|
the best documentation of standard B<nroff> and B<troff>. At the time of
|
|
this writing, it's available at L<http://www.troff.org/54.pdf>.
|
|
|
|
The man page documenting the man macro set may be L<man(5)> instead of
|
|
L<man(7)> on your system. Also, please see L<pod2man(1)> for extensive
|
|
documentation on writing manual pages if you've not done it before and
|
|
aren't familiar with the conventions.
|
|
|
|
The current version of this module is always available from its web site at
|
|
L<http://www.eyrie.org/~eagle/software/podlators/>. It is also part of the
|
|
Perl core distribution as of 5.6.0.
|
|
|
|
=cut
|