733 lines
18 KiB
Perl
733 lines
18 KiB
Perl
package Pod::Escapes;
|
|
use strict;
|
|
use warnings;
|
|
use 5.006;
|
|
|
|
use vars qw(
|
|
%Code2USASCII
|
|
%Name2character
|
|
%Name2character_number
|
|
%Latin1Code_to_fallback
|
|
%Latin1Char_to_fallback
|
|
$FAR_CHAR
|
|
$FAR_CHAR_NUMBER
|
|
$NOT_ASCII
|
|
@ISA $VERSION @EXPORT_OK %EXPORT_TAGS
|
|
);
|
|
|
|
require Exporter;
|
|
@ISA = ('Exporter');
|
|
$VERSION = '1.07';
|
|
@EXPORT_OK = qw(
|
|
%Code2USASCII
|
|
%Name2character
|
|
%Name2character_number
|
|
%Latin1Code_to_fallback
|
|
%Latin1Char_to_fallback
|
|
e2char
|
|
e2charnum
|
|
);
|
|
%EXPORT_TAGS = ('ALL' => \@EXPORT_OK);
|
|
|
|
#==========================================================================
|
|
|
|
$FAR_CHAR = "?" unless defined $FAR_CHAR;
|
|
$FAR_CHAR_NUMBER = ord($FAR_CHAR) unless defined $FAR_CHAR_NUMBER;
|
|
|
|
$NOT_ASCII = 'A' ne chr(65) unless defined $NOT_ASCII;
|
|
|
|
#--------------------------------------------------------------------------
|
|
sub e2char {
|
|
my $in = $_[0];
|
|
return undef unless defined $in and length $in;
|
|
|
|
# Convert to decimal:
|
|
if($in =~ m/^(0[0-7]*)$/s ) {
|
|
$in = oct $in;
|
|
} elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
|
|
$in = hex $1;
|
|
} # else it's decimal, or named
|
|
|
|
if($in =~ m/^\d+$/s) {
|
|
if($] < 5.007 and $in > 255) { # can't be trusted with Unicode
|
|
return $FAR_CHAR;
|
|
} elsif ($] >= 5.007003) {
|
|
return chr(utf8::unicode_to_native($in));
|
|
} elsif ($NOT_ASCII) {
|
|
return $Code2USASCII{$in} # so "65" => "A" everywhere
|
|
|| $Latin1Code_to_fallback{$in} # Fallback.
|
|
|| $FAR_CHAR; # Fall further back
|
|
} else {
|
|
return chr($in);
|
|
}
|
|
} else {
|
|
return $Name2character{$in}; # returns undef if unknown
|
|
}
|
|
}
|
|
|
|
#--------------------------------------------------------------------------
|
|
sub e2charnum {
|
|
my $in = $_[0];
|
|
return undef unless defined $in and length $in;
|
|
|
|
# Convert to decimal:
|
|
if($in =~ m/^(0[0-7]*)$/s ) {
|
|
$in = oct $in;
|
|
} elsif($in =~ m/^0?x([0-9a-fA-F]+)$/s ) {
|
|
$in = hex $1;
|
|
} # else it's decimal, or named
|
|
|
|
if($in =~ m/^[0-9]+$/s) {
|
|
return 0 + $in;
|
|
} else {
|
|
return $Name2character_number{$in}; # returns undef if unknown
|
|
}
|
|
}
|
|
|
|
#--------------------------------------------------------------------------
|
|
|
|
%Code2USASCII = (
|
|
# mostly generated by
|
|
# perl -e "printf qq{ \x25 3s, '\x25s',\n}, $_, chr($_) foreach (32 .. 126)"
|
|
32, ' ',
|
|
33, '!',
|
|
34, '"',
|
|
35, '#',
|
|
36, '$',
|
|
37, '%',
|
|
38, '&',
|
|
39, "'", #!
|
|
40, '(',
|
|
41, ')',
|
|
42, '*',
|
|
43, '+',
|
|
44, ',',
|
|
45, '-',
|
|
46, '.',
|
|
47, '/',
|
|
48, '0',
|
|
49, '1',
|
|
50, '2',
|
|
51, '3',
|
|
52, '4',
|
|
53, '5',
|
|
54, '6',
|
|
55, '7',
|
|
56, '8',
|
|
57, '9',
|
|
58, ':',
|
|
59, ';',
|
|
60, '<',
|
|
61, '=',
|
|
62, '>',
|
|
63, '?',
|
|
64, '@',
|
|
65, 'A',
|
|
66, 'B',
|
|
67, 'C',
|
|
68, 'D',
|
|
69, 'E',
|
|
70, 'F',
|
|
71, 'G',
|
|
72, 'H',
|
|
73, 'I',
|
|
74, 'J',
|
|
75, 'K',
|
|
76, 'L',
|
|
77, 'M',
|
|
78, 'N',
|
|
79, 'O',
|
|
80, 'P',
|
|
81, 'Q',
|
|
82, 'R',
|
|
83, 'S',
|
|
84, 'T',
|
|
85, 'U',
|
|
86, 'V',
|
|
87, 'W',
|
|
88, 'X',
|
|
89, 'Y',
|
|
90, 'Z',
|
|
91, '[',
|
|
92, "\\", #!
|
|
93, ']',
|
|
94, '^',
|
|
95, '_',
|
|
96, '`',
|
|
97, 'a',
|
|
98, 'b',
|
|
99, 'c',
|
|
100, 'd',
|
|
101, 'e',
|
|
102, 'f',
|
|
103, 'g',
|
|
104, 'h',
|
|
105, 'i',
|
|
106, 'j',
|
|
107, 'k',
|
|
108, 'l',
|
|
109, 'm',
|
|
110, 'n',
|
|
111, 'o',
|
|
112, 'p',
|
|
113, 'q',
|
|
114, 'r',
|
|
115, 's',
|
|
116, 't',
|
|
117, 'u',
|
|
118, 'v',
|
|
119, 'w',
|
|
120, 'x',
|
|
121, 'y',
|
|
122, 'z',
|
|
123, '{',
|
|
124, '|',
|
|
125, '}',
|
|
126, '~',
|
|
);
|
|
|
|
#--------------------------------------------------------------------------
|
|
|
|
%Latin1Code_to_fallback = ();
|
|
@Latin1Code_to_fallback{0xA0 .. 0xFF} = (
|
|
# Copied from Text/Unidecode/x00.pm:
|
|
|
|
' ', qq{!}, qq{C/}, 'PS', qq{\$?}, qq{Y=}, qq{|}, 'SS', qq{"}, qq{(c)}, 'a', qq{<<}, qq{!}, "", qq{(r)}, qq{-},
|
|
'deg', qq{+-}, '2', '3', qq{'}, 'u', 'P', qq{*}, qq{,}, '1', 'o', qq{>>}, qq{1/4}, qq{1/2}, qq{3/4}, qq{?},
|
|
'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
|
|
'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'U', 'Th', 'ss',
|
|
'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
|
|
'd', 'n', 'o', 'o', 'o', 'o', 'o', qq{/}, 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y',
|
|
|
|
);
|
|
|
|
{
|
|
# Now stuff %Latin1Char_to_fallback:
|
|
%Latin1Char_to_fallback = ();
|
|
my($k,$v);
|
|
while( ($k,$v) = each %Latin1Code_to_fallback) {
|
|
$Latin1Char_to_fallback{chr $k} = $v;
|
|
#print chr($k), ' => ', $v, "\n";
|
|
}
|
|
}
|
|
|
|
#--------------------------------------------------------------------------
|
|
|
|
%Name2character_number = (
|
|
# General XML/XHTML:
|
|
'lt' => 60,
|
|
'gt' => 62,
|
|
'quot' => 34,
|
|
'amp' => 38,
|
|
'apos' => 39,
|
|
|
|
# POD-specific:
|
|
'sol' => 47,
|
|
'verbar' => 124,
|
|
|
|
'lchevron' => 171, # legacy for laquo
|
|
'rchevron' => 187, # legacy for raquo
|
|
|
|
# Remember, grave looks like \ (as in virtu\)
|
|
# acute looks like / (as in re/sume/)
|
|
# circumflex looks like ^ (as in papier ma^che/)
|
|
# umlaut/dieresis looks like " (as in nai"ve, Chloe")
|
|
|
|
# From the XHTML 1 .ent files:
|
|
'nbsp' , 160,
|
|
'iexcl' , 161,
|
|
'cent' , 162,
|
|
'pound' , 163,
|
|
'curren' , 164,
|
|
'yen' , 165,
|
|
'brvbar' , 166,
|
|
'sect' , 167,
|
|
'uml' , 168,
|
|
'copy' , 169,
|
|
'ordf' , 170,
|
|
'laquo' , 171,
|
|
'not' , 172,
|
|
'shy' , 173,
|
|
'reg' , 174,
|
|
'macr' , 175,
|
|
'deg' , 176,
|
|
'plusmn' , 177,
|
|
'sup2' , 178,
|
|
'sup3' , 179,
|
|
'acute' , 180,
|
|
'micro' , 181,
|
|
'para' , 182,
|
|
'middot' , 183,
|
|
'cedil' , 184,
|
|
'sup1' , 185,
|
|
'ordm' , 186,
|
|
'raquo' , 187,
|
|
'frac14' , 188,
|
|
'frac12' , 189,
|
|
'frac34' , 190,
|
|
'iquest' , 191,
|
|
'Agrave' , 192,
|
|
'Aacute' , 193,
|
|
'Acirc' , 194,
|
|
'Atilde' , 195,
|
|
'Auml' , 196,
|
|
'Aring' , 197,
|
|
'AElig' , 198,
|
|
'Ccedil' , 199,
|
|
'Egrave' , 200,
|
|
'Eacute' , 201,
|
|
'Ecirc' , 202,
|
|
'Euml' , 203,
|
|
'Igrave' , 204,
|
|
'Iacute' , 205,
|
|
'Icirc' , 206,
|
|
'Iuml' , 207,
|
|
'ETH' , 208,
|
|
'Ntilde' , 209,
|
|
'Ograve' , 210,
|
|
'Oacute' , 211,
|
|
'Ocirc' , 212,
|
|
'Otilde' , 213,
|
|
'Ouml' , 214,
|
|
'times' , 215,
|
|
'Oslash' , 216,
|
|
'Ugrave' , 217,
|
|
'Uacute' , 218,
|
|
'Ucirc' , 219,
|
|
'Uuml' , 220,
|
|
'Yacute' , 221,
|
|
'THORN' , 222,
|
|
'szlig' , 223,
|
|
'agrave' , 224,
|
|
'aacute' , 225,
|
|
'acirc' , 226,
|
|
'atilde' , 227,
|
|
'auml' , 228,
|
|
'aring' , 229,
|
|
'aelig' , 230,
|
|
'ccedil' , 231,
|
|
'egrave' , 232,
|
|
'eacute' , 233,
|
|
'ecirc' , 234,
|
|
'euml' , 235,
|
|
'igrave' , 236,
|
|
'iacute' , 237,
|
|
'icirc' , 238,
|
|
'iuml' , 239,
|
|
'eth' , 240,
|
|
'ntilde' , 241,
|
|
'ograve' , 242,
|
|
'oacute' , 243,
|
|
'ocirc' , 244,
|
|
'otilde' , 245,
|
|
'ouml' , 246,
|
|
'divide' , 247,
|
|
'oslash' , 248,
|
|
'ugrave' , 249,
|
|
'uacute' , 250,
|
|
'ucirc' , 251,
|
|
'uuml' , 252,
|
|
'yacute' , 253,
|
|
'thorn' , 254,
|
|
'yuml' , 255,
|
|
|
|
'fnof' , 402,
|
|
'Alpha' , 913,
|
|
'Beta' , 914,
|
|
'Gamma' , 915,
|
|
'Delta' , 916,
|
|
'Epsilon' , 917,
|
|
'Zeta' , 918,
|
|
'Eta' , 919,
|
|
'Theta' , 920,
|
|
'Iota' , 921,
|
|
'Kappa' , 922,
|
|
'Lambda' , 923,
|
|
'Mu' , 924,
|
|
'Nu' , 925,
|
|
'Xi' , 926,
|
|
'Omicron' , 927,
|
|
'Pi' , 928,
|
|
'Rho' , 929,
|
|
'Sigma' , 931,
|
|
'Tau' , 932,
|
|
'Upsilon' , 933,
|
|
'Phi' , 934,
|
|
'Chi' , 935,
|
|
'Psi' , 936,
|
|
'Omega' , 937,
|
|
'alpha' , 945,
|
|
'beta' , 946,
|
|
'gamma' , 947,
|
|
'delta' , 948,
|
|
'epsilon' , 949,
|
|
'zeta' , 950,
|
|
'eta' , 951,
|
|
'theta' , 952,
|
|
'iota' , 953,
|
|
'kappa' , 954,
|
|
'lambda' , 955,
|
|
'mu' , 956,
|
|
'nu' , 957,
|
|
'xi' , 958,
|
|
'omicron' , 959,
|
|
'pi' , 960,
|
|
'rho' , 961,
|
|
'sigmaf' , 962,
|
|
'sigma' , 963,
|
|
'tau' , 964,
|
|
'upsilon' , 965,
|
|
'phi' , 966,
|
|
'chi' , 967,
|
|
'psi' , 968,
|
|
'omega' , 969,
|
|
'thetasym' , 977,
|
|
'upsih' , 978,
|
|
'piv' , 982,
|
|
'bull' , 8226,
|
|
'hellip' , 8230,
|
|
'prime' , 8242,
|
|
'Prime' , 8243,
|
|
'oline' , 8254,
|
|
'frasl' , 8260,
|
|
'weierp' , 8472,
|
|
'image' , 8465,
|
|
'real' , 8476,
|
|
'trade' , 8482,
|
|
'alefsym' , 8501,
|
|
'larr' , 8592,
|
|
'uarr' , 8593,
|
|
'rarr' , 8594,
|
|
'darr' , 8595,
|
|
'harr' , 8596,
|
|
'crarr' , 8629,
|
|
'lArr' , 8656,
|
|
'uArr' , 8657,
|
|
'rArr' , 8658,
|
|
'dArr' , 8659,
|
|
'hArr' , 8660,
|
|
'forall' , 8704,
|
|
'part' , 8706,
|
|
'exist' , 8707,
|
|
'empty' , 8709,
|
|
'nabla' , 8711,
|
|
'isin' , 8712,
|
|
'notin' , 8713,
|
|
'ni' , 8715,
|
|
'prod' , 8719,
|
|
'sum' , 8721,
|
|
'minus' , 8722,
|
|
'lowast' , 8727,
|
|
'radic' , 8730,
|
|
'prop' , 8733,
|
|
'infin' , 8734,
|
|
'ang' , 8736,
|
|
'and' , 8743,
|
|
'or' , 8744,
|
|
'cap' , 8745,
|
|
'cup' , 8746,
|
|
'int' , 8747,
|
|
'there4' , 8756,
|
|
'sim' , 8764,
|
|
'cong' , 8773,
|
|
'asymp' , 8776,
|
|
'ne' , 8800,
|
|
'equiv' , 8801,
|
|
'le' , 8804,
|
|
'ge' , 8805,
|
|
'sub' , 8834,
|
|
'sup' , 8835,
|
|
'nsub' , 8836,
|
|
'sube' , 8838,
|
|
'supe' , 8839,
|
|
'oplus' , 8853,
|
|
'otimes' , 8855,
|
|
'perp' , 8869,
|
|
'sdot' , 8901,
|
|
'lceil' , 8968,
|
|
'rceil' , 8969,
|
|
'lfloor' , 8970,
|
|
'rfloor' , 8971,
|
|
'lang' , 9001,
|
|
'rang' , 9002,
|
|
'loz' , 9674,
|
|
'spades' , 9824,
|
|
'clubs' , 9827,
|
|
'hearts' , 9829,
|
|
'diams' , 9830,
|
|
'OElig' , 338,
|
|
'oelig' , 339,
|
|
'Scaron' , 352,
|
|
'scaron' , 353,
|
|
'Yuml' , 376,
|
|
'circ' , 710,
|
|
'tilde' , 732,
|
|
'ensp' , 8194,
|
|
'emsp' , 8195,
|
|
'thinsp' , 8201,
|
|
'zwnj' , 8204,
|
|
'zwj' , 8205,
|
|
'lrm' , 8206,
|
|
'rlm' , 8207,
|
|
'ndash' , 8211,
|
|
'mdash' , 8212,
|
|
'lsquo' , 8216,
|
|
'rsquo' , 8217,
|
|
'sbquo' , 8218,
|
|
'ldquo' , 8220,
|
|
'rdquo' , 8221,
|
|
'bdquo' , 8222,
|
|
'dagger' , 8224,
|
|
'Dagger' , 8225,
|
|
'permil' , 8240,
|
|
'lsaquo' , 8249,
|
|
'rsaquo' , 8250,
|
|
'euro' , 8364,
|
|
);
|
|
|
|
|
|
# Fill out %Name2character...
|
|
{
|
|
%Name2character = ();
|
|
my($name, $number);
|
|
while( ($name, $number) = each %Name2character_number) {
|
|
if($] < 5.007 and $number > 255) {
|
|
$Name2character{$name} = $FAR_CHAR;
|
|
# substitute for Unicode characters, for perls
|
|
# that can't reliably handle them
|
|
} elsif ($] >= 5.007003) {
|
|
$Name2character{$name} = chr utf8::unicode_to_native($number);
|
|
# normal case for more recent Perls where we can translate from Unicode
|
|
# to the native character set.
|
|
}
|
|
elsif (exists $Code2USASCII{$number}) {
|
|
$Name2character{$name} = $Code2USASCII{$number};
|
|
# on older Perls, we can use the translations we have hard-coded in this
|
|
# file, but these don't include the non-ASCII-range characters
|
|
}
|
|
elsif ($NOT_ASCII && $number > 127 && $number < 256) {
|
|
# this range on old non-ASCII-platform perls is wrong
|
|
if (exists $Latin1Code_to_fallback{$number}) {
|
|
$Name2character{$name} = $Latin1Code_to_fallback{$number};
|
|
} else {
|
|
$Name2character{$name} = $FAR_CHAR;
|
|
}
|
|
} else {
|
|
$Name2character{$name} = chr $number;
|
|
}
|
|
}
|
|
}
|
|
|
|
#--------------------------------------------------------------------------
|
|
1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
Pod::Escapes - for resolving Pod EE<lt>...E<gt> sequences
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Pod::Escapes qw(e2char);
|
|
...la la la, parsing POD, la la la...
|
|
$text = e2char($e_node->label);
|
|
unless(defined $text) {
|
|
print "Unknown E sequence \"", $e_node->label, "\"!";
|
|
}
|
|
...else print/interpolate $text...
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This module provides things that are useful in decoding
|
|
Pod EE<lt>...E<gt> sequences. Presumably, it should be used
|
|
only by Pod parsers and/or formatters.
|
|
|
|
By default, Pod::Escapes exports none of its symbols. But
|
|
you can request any of them to be exported.
|
|
Either request them individually, as with
|
|
C<use Pod::Escapes qw(symbolname symbolname2...);>,
|
|
or you can do C<use Pod::Escapes qw(:ALL);> to get all
|
|
exportable symbols.
|
|
|
|
=head1 GOODIES
|
|
|
|
=over
|
|
|
|
=item e2char($e_content)
|
|
|
|
Given a name or number that could appear in a
|
|
C<EE<lt>name_or_numE<gt>> sequence, this returns the string that
|
|
it stands for. For example, C<e2char('sol')>, C<e2char('47')>,
|
|
C<e2char('0x2F')>, and C<e2char('057')> all return "/",
|
|
because C<EE<lt>solE<gt>>, C<EE<lt>47E<gt>>, C<EE<lt>0x2fE<gt>>,
|
|
and C<EE<lt>057E<gt>>, all mean "/". If
|
|
the name has no known value (as with a name of "qacute") or is
|
|
syntactically invalid (as with a name of "1/4"), this returns undef.
|
|
|
|
=item e2charnum($e_content)
|
|
|
|
Given a name or number that could appear in a
|
|
C<EE<lt>name_or_numE<gt>> sequence, this returns the number of
|
|
the Unicode character that this stands for. For example,
|
|
C<e2char('sol')>, C<e2char('47')>,
|
|
C<e2char('0x2F')>, and C<e2char('057')> all return 47,
|
|
because C<EE<lt>solE<gt>>, C<EE<lt>47E<gt>>, C<EE<lt>0x2fE<gt>>,
|
|
and C<EE<lt>057E<gt>>, all mean "/", whose Unicode number is 47. If
|
|
the name has no known value (as with a name of "qacute") or is
|
|
syntactically invalid (as with a name of "1/4"), this returns undef.
|
|
|
|
=item $Name2character{I<name>}
|
|
|
|
Maps from names (as in C<EE<lt>I<name>E<gt>>) like "eacute" or "sol"
|
|
to the string that each stands for. Note that this does not
|
|
include numerics (like "64" or "x981c"). Under old Perl versions
|
|
(before 5.7) you get a "?" in place of characters whose Unicode
|
|
value is over 255.
|
|
|
|
=item $Name2character_number{I<name>}
|
|
|
|
Maps from names (as in C<EE<lt>I<name>E<gt>>) like "eacute" or "sol"
|
|
to the Unicode value that each stands for. For example,
|
|
C<$Name2character_number{'eacute'}> is 201, and
|
|
C<$Name2character_number{'eacute'}> is 8364. You get the correct
|
|
Unicode value, regardless of the version of Perl you're using --
|
|
which differs from C<%Name2character>'s behavior under pre-5.7 Perls.
|
|
|
|
Note that this hash does not
|
|
include numerics (like "64" or "x981c").
|
|
|
|
=item $Latin1Code_to_fallback{I<integer>}
|
|
|
|
For numbers in the range 160 (0x00A0) to 255 (0x00FF), this maps
|
|
from the character code for a Latin-1 character (like 233 for
|
|
lowercase e-acute) to the US-ASCII character that best aproximates
|
|
it (like "e"). You may find this useful if you are rendering
|
|
POD in a format that you think deals well only with US-ASCII
|
|
characters.
|
|
|
|
=item $Latin1Char_to_fallback{I<character>}
|
|
|
|
Just as above, but maps from characters (like "\xE9",
|
|
lowercase e-acute) to characters (like "e").
|
|
|
|
=item $Code2USASCII{I<integer>}
|
|
|
|
This maps from US-ASCII codes (like 32) to the corresponding
|
|
character (like space, for 32). Only characters 32 to 126 are
|
|
defined. This is meant for use by C<e2char($x)> when it senses
|
|
that it's running on a non-ASCII platform (where chr(32) doesn't
|
|
get you a space -- but $Code2USASCII{32} will). It's
|
|
documented here just in case you might find it useful.
|
|
|
|
=back
|
|
|
|
=head1 CAVEATS
|
|
|
|
On Perl versions before 5.7, Unicode characters with a value
|
|
over 255 (like lambda or emdash) can't be conveyed. This
|
|
module does work under such early Perl versions, but in the
|
|
place of each such character, you get a "?". Latin-1
|
|
characters (characters 160-255) are unaffected.
|
|
|
|
Under EBCDIC platforms, C<e2char($n)> may not always be the
|
|
same as C<chr(e2charnum($n))>, and ditto for
|
|
C<$Name2character{$name}> and
|
|
C<chr($Name2character_number{$name})>, because the strings are returned as
|
|
native, and the numbers are returned as Unicode.
|
|
However, for Perls starting with v5.8, C<e2char($n)> is the same as
|
|
C<chr(utf8::unicode_to_native(e2charnum($n)))>, and ditto for
|
|
C<$Name2character{$name}> and
|
|
C<chr(utf8::unicode_to_native($Name2character_number{$name}))>.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<Pod::Browser> - a pod web server based on L<Catalyst>.
|
|
|
|
L<Pod::Checker> - check pod documents for syntax errors.
|
|
|
|
L<Pod::Coverage> - check if the documentation for a module is comprehensive.
|
|
|
|
L<perlpod> - description of pod format (for people documenting with pod).
|
|
|
|
L<perlpodspec> - specification of pod format (for people processing it).
|
|
|
|
L<Text::Unidecode> - ASCII transliteration of Unicode text.
|
|
|
|
=head1 REPOSITORY
|
|
|
|
L<https://github.com/neilbowers/Pod-Escapes>
|
|
|
|
=head1 COPYRIGHT AND DISCLAIMERS
|
|
|
|
Copyright (c) 2001-2004 Sean M. Burke. All rights reserved.
|
|
|
|
This library is free software; you can redistribute it and/or modify
|
|
it under the same terms as Perl itself.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
without any warranty; without even the implied warranty of
|
|
merchantability or fitness for a particular purpose.
|
|
|
|
Portions of the data tables in this module are derived from the
|
|
entity declarations in the W3C XHTML specification.
|
|
|
|
Currently (October 2001), that's these three:
|
|
|
|
http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent
|
|
http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent
|
|
http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent
|
|
|
|
=head1 AUTHOR
|
|
|
|
Sean M. Burke C<sburke@cpan.org>
|
|
|
|
Now being maintained by Neil Bowers E<lt>neilb@cpan.orgE<gt>
|
|
|
|
=cut
|
|
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
# What I used for reading the XHTML .ent files:
|
|
|
|
my(@norms, @good, @bad);
|
|
my $dir = 'c:/sgml/docbook/';
|
|
my %escapes;
|
|
foreach my $file (qw(
|
|
xhtml-symbol.ent
|
|
xhtml-lat1.ent
|
|
xhtml-special.ent
|
|
)) {
|
|
open(IN, "<$dir$file") or die "can't read-open $dir$file: $!";
|
|
print "Reading $file...\n";
|
|
while(<IN>) {
|
|
if(m/<!ENTITY\s+(\S+)\s+"&#([^;]+);">/) {
|
|
my($name, $value) = ($1,$2);
|
|
next if $name eq 'quot' or $name eq 'apos' or $name eq 'gt';
|
|
|
|
$value = hex $1 if $value =~ m/^x([a-fA-F0-9]+)$/s;
|
|
print "ILLEGAL VALUE $value" unless $value =~ m/^\d+$/s;
|
|
if($value > 255) {
|
|
push @good , sprintf " %-10s , chr(%s),\n", "'$name'", $value;
|
|
push @bad , sprintf " %-10s , \$bad,\n", "'$name'", $value;
|
|
} else {
|
|
push @norms, sprintf " %-10s , chr(%s),\n", "'$name'", $value;
|
|
}
|
|
} elsif(m/<!ENT/) {
|
|
print "# Skipping $_";
|
|
}
|
|
|
|
}
|
|
close(IN);
|
|
}
|
|
|
|
print @norms;
|
|
print "\n ( \$] .= 5.006001 ? (\n";
|
|
print @good;
|
|
print " ) : (\n";
|
|
print @bad;
|
|
print " )\n);\n";
|
|
|
|
__END__
|
|
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
|