653 lines
15 KiB
Perl
653 lines
15 KiB
Perl
# IO::Zlib.pm
|
|
#
|
|
# Copyright (c) 1998-2004 Tom Hughes <tom@compton.nu>.
|
|
# All rights reserved. This program is free software; you can redistribute
|
|
# it and/or modify it under the same terms as Perl itself.
|
|
|
|
package IO::Zlib;
|
|
|
|
$VERSION = "1.10";
|
|
|
|
=head1 NAME
|
|
|
|
IO::Zlib - IO:: style interface to L<Compress::Zlib>
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
With any version of Perl 5 you can use the basic OO interface:
|
|
|
|
use IO::Zlib;
|
|
|
|
$fh = new IO::Zlib;
|
|
if ($fh->open("file.gz", "rb")) {
|
|
print <$fh>;
|
|
$fh->close;
|
|
}
|
|
|
|
$fh = IO::Zlib->new("file.gz", "wb9");
|
|
if (defined $fh) {
|
|
print $fh "bar\n";
|
|
$fh->close;
|
|
}
|
|
|
|
$fh = IO::Zlib->new("file.gz", "rb");
|
|
if (defined $fh) {
|
|
print <$fh>;
|
|
undef $fh; # automatically closes the file
|
|
}
|
|
|
|
With Perl 5.004 you can also use the TIEHANDLE interface to access
|
|
compressed files just like ordinary files:
|
|
|
|
use IO::Zlib;
|
|
|
|
tie *FILE, 'IO::Zlib', "file.gz", "wb";
|
|
print FILE "line 1\nline2\n";
|
|
|
|
tie *FILE, 'IO::Zlib', "file.gz", "rb";
|
|
while (<FILE>) { print "LINE: ", $_ };
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
C<IO::Zlib> provides an IO:: style interface to L<Compress::Zlib> and
|
|
hence to gzip/zlib compressed files. It provides many of the same methods
|
|
as the L<IO::Handle> interface.
|
|
|
|
Starting from IO::Zlib version 1.02, IO::Zlib can also use an
|
|
external F<gzip> command. The default behaviour is to try to use
|
|
an external F<gzip> if no C<Compress::Zlib> can be loaded, unless
|
|
explicitly disabled by
|
|
|
|
use IO::Zlib qw(:gzip_external 0);
|
|
|
|
If explicitly enabled by
|
|
|
|
use IO::Zlib qw(:gzip_external 1);
|
|
|
|
then the external F<gzip> is used B<instead> of C<Compress::Zlib>.
|
|
|
|
=head1 CONSTRUCTOR
|
|
|
|
=over 4
|
|
|
|
=item new ( [ARGS] )
|
|
|
|
Creates an C<IO::Zlib> object. If it receives any parameters, they are
|
|
passed to the method C<open>; if the open fails, the object is destroyed.
|
|
Otherwise, it is returned to the caller.
|
|
|
|
=back
|
|
|
|
=head1 OBJECT METHODS
|
|
|
|
=over 4
|
|
|
|
=item open ( FILENAME, MODE )
|
|
|
|
C<open> takes two arguments. The first is the name of the file to open
|
|
and the second is the open mode. The mode can be anything acceptable to
|
|
L<Compress::Zlib> and by extension anything acceptable to I<zlib> (that
|
|
basically means POSIX fopen() style mode strings plus an optional number
|
|
to indicate the compression level).
|
|
|
|
=item opened
|
|
|
|
Returns true if the object currently refers to a opened file.
|
|
|
|
=item close
|
|
|
|
Close the file associated with the object and disassociate
|
|
the file from the handle.
|
|
Done automatically on destroy.
|
|
|
|
=item getc
|
|
|
|
Return the next character from the file, or undef if none remain.
|
|
|
|
=item getline
|
|
|
|
Return the next line from the file, or undef on end of string.
|
|
Can safely be called in an array context.
|
|
Currently ignores $/ ($INPUT_RECORD_SEPARATOR or $RS when L<English>
|
|
is in use) and treats lines as delimited by "\n".
|
|
|
|
=item getlines
|
|
|
|
Get all remaining lines from the file.
|
|
It will croak() if accidentally called in a scalar context.
|
|
|
|
=item print ( ARGS... )
|
|
|
|
Print ARGS to the file.
|
|
|
|
=item read ( BUF, NBYTES, [OFFSET] )
|
|
|
|
Read some bytes from the file.
|
|
Returns the number of bytes actually read, 0 on end-of-file, undef on error.
|
|
|
|
=item eof
|
|
|
|
Returns true if the handle is currently positioned at end of file?
|
|
|
|
=item seek ( OFFSET, WHENCE )
|
|
|
|
Seek to a given position in the stream.
|
|
Not yet supported.
|
|
|
|
=item tell
|
|
|
|
Return the current position in the stream, as a numeric offset.
|
|
Not yet supported.
|
|
|
|
=item setpos ( POS )
|
|
|
|
Set the current position, using the opaque value returned by C<getpos()>.
|
|
Not yet supported.
|
|
|
|
=item getpos ( POS )
|
|
|
|
Return the current position in the string, as an opaque object.
|
|
Not yet supported.
|
|
|
|
=back
|
|
|
|
=head1 USING THE EXTERNAL GZIP
|
|
|
|
If the external F<gzip> is used, the following C<open>s are used:
|
|
|
|
open(FH, "gzip -dc $filename |") # for read opens
|
|
open(FH, " | gzip > $filename") # for write opens
|
|
|
|
You can modify the 'commands' for example to hardwire
|
|
an absolute path by e.g.
|
|
|
|
use IO::Zlib ':gzip_read_open' => '/some/where/gunzip -c %s |';
|
|
use IO::Zlib ':gzip_write_open' => '| /some/where/gzip.exe > %s';
|
|
|
|
The C<%s> is expanded to be the filename (C<sprintf> is used, so be
|
|
careful to escape any other C<%> signs). The 'commands' are checked
|
|
for sanity - they must contain the C<%s>, and the read open must end
|
|
with the pipe sign, and the write open must begin with the pipe sign.
|
|
|
|
=head1 CLASS METHODS
|
|
|
|
=over 4
|
|
|
|
=item has_Compress_Zlib
|
|
|
|
Returns true if C<Compress::Zlib> is available. Note that this does
|
|
not mean that C<Compress::Zlib> is being used: see L</gzip_external>
|
|
and L<gzip_used>.
|
|
|
|
=item gzip_external
|
|
|
|
Undef if an external F<gzip> B<can> be used if C<Compress::Zlib> is
|
|
not available (see L</has_Compress_Zlib>), true if an external F<gzip>
|
|
is explicitly used, false if an external F<gzip> must not be used.
|
|
See L</gzip_used>.
|
|
|
|
=item gzip_used
|
|
|
|
True if an external F<gzip> is being used, false if not.
|
|
|
|
=item gzip_read_open
|
|
|
|
Return the 'command' being used for opening a file for reading using an
|
|
external F<gzip>.
|
|
|
|
=item gzip_write_open
|
|
|
|
Return the 'command' being used for opening a file for writing using an
|
|
external F<gzip>.
|
|
|
|
=back
|
|
|
|
=head1 DIAGNOSTICS
|
|
|
|
=over 4
|
|
|
|
=item IO::Zlib::getlines: must be called in list context
|
|
|
|
If you want read lines, you must read in list context.
|
|
|
|
=item IO::Zlib::gzopen_external: mode '...' is illegal
|
|
|
|
Use only modes 'rb' or 'wb' or /wb[1-9]/.
|
|
|
|
=item IO::Zlib::import: '...' is illegal
|
|
|
|
The known import symbols are the C<:gzip_external>, C<:gzip_read_open>,
|
|
and C<:gzip_write_open>. Anything else is not recognized.
|
|
|
|
=item IO::Zlib::import: ':gzip_external' requires an argument
|
|
|
|
The C<:gzip_external> requires one boolean argument.
|
|
|
|
=item IO::Zlib::import: 'gzip_read_open' requires an argument
|
|
|
|
The C<:gzip_external> requires one string argument.
|
|
|
|
=item IO::Zlib::import: 'gzip_read' '...' is illegal
|
|
|
|
The C<:gzip_read_open> argument must end with the pipe sign (|)
|
|
and have the C<%s> for the filename. See L</"USING THE EXTERNAL GZIP">.
|
|
|
|
=item IO::Zlib::import: 'gzip_write_open' requires an argument
|
|
|
|
The C<:gzip_external> requires one string argument.
|
|
|
|
=item IO::Zlib::import: 'gzip_write_open' '...' is illegal
|
|
|
|
The C<:gzip_write_open> argument must begin with the pipe sign (|)
|
|
and have the C<%s> for the filename. An output redirect (>) is also
|
|
often a good idea, depending on your operating system shell syntax.
|
|
See L</"USING THE EXTERNAL GZIP">.
|
|
|
|
=item IO::Zlib::import: no Compress::Zlib and no external gzip
|
|
|
|
Given that we failed to load C<Compress::Zlib> and that the use of
|
|
an external F<gzip> was disabled, IO::Zlib has not much chance of working.
|
|
|
|
=item IO::Zlib::open: needs a filename
|
|
|
|
No filename, no open.
|
|
|
|
=item IO::Zlib::READ: NBYTES must be specified
|
|
|
|
We must know how much to read.
|
|
|
|
=item IO::Zlib::WRITE: too long LENGTH
|
|
|
|
The LENGTH must be less than or equal to the buffer size.
|
|
|
|
=back
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<perlfunc>,
|
|
L<perlop/"I/O Operators">,
|
|
L<IO::Handle>,
|
|
L<Compress::Zlib>
|
|
|
|
=head1 HISTORY
|
|
|
|
Created by Tom Hughes E<lt>F<tom@compton.nu>E<gt>.
|
|
|
|
Support for external gzip added by Jarkko Hietaniemi E<lt>F<jhi@iki.fi>E<gt>.
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright (c) 1998-2004 Tom Hughes E<lt>F<tom@compton.nu>E<gt>.
|
|
All rights reserved. This program is free software; you can redistribute
|
|
it and/or modify it under the same terms as Perl itself.
|
|
|
|
=cut
|
|
|
|
require 5.006;
|
|
|
|
use strict;
|
|
use vars qw($VERSION $AUTOLOAD @ISA);
|
|
|
|
use Carp;
|
|
use Fcntl qw(SEEK_SET);
|
|
|
|
my $has_Compress_Zlib;
|
|
my $aliased;
|
|
|
|
sub has_Compress_Zlib {
|
|
$has_Compress_Zlib;
|
|
}
|
|
|
|
BEGIN {
|
|
eval { require Compress::Zlib };
|
|
$has_Compress_Zlib = $@ || $Compress::Zlib::VERSION < 2.000 ? 0 : 1;
|
|
}
|
|
|
|
use Symbol;
|
|
use Tie::Handle;
|
|
|
|
# These might use some $^O logic.
|
|
my $gzip_read_open = "gzip -dc %s |";
|
|
my $gzip_write_open = "| gzip > %s";
|
|
|
|
my $gzip_external;
|
|
my $gzip_used;
|
|
|
|
sub gzip_read_open {
|
|
$gzip_read_open;
|
|
}
|
|
|
|
sub gzip_write_open {
|
|
$gzip_write_open;
|
|
}
|
|
|
|
sub gzip_external {
|
|
$gzip_external;
|
|
}
|
|
|
|
sub gzip_used {
|
|
$gzip_used;
|
|
}
|
|
|
|
sub can_gunzip {
|
|
$has_Compress_Zlib || $gzip_external;
|
|
}
|
|
|
|
sub _import {
|
|
my $import = shift;
|
|
while (@_) {
|
|
if ($_[0] eq ':gzip_external') {
|
|
shift;
|
|
if (@_) {
|
|
$gzip_external = shift;
|
|
} else {
|
|
croak "$import: ':gzip_external' requires an argument";
|
|
}
|
|
}
|
|
elsif ($_[0] eq ':gzip_read_open') {
|
|
shift;
|
|
if (@_) {
|
|
$gzip_read_open = shift;
|
|
croak "$import: ':gzip_read_open' '$gzip_read_open' is illegal"
|
|
unless $gzip_read_open =~ /^.+%s.+\|\s*$/;
|
|
} else {
|
|
croak "$import: ':gzip_read_open' requires an argument";
|
|
}
|
|
}
|
|
elsif ($_[0] eq ':gzip_write_open') {
|
|
shift;
|
|
if (@_) {
|
|
$gzip_write_open = shift;
|
|
croak "$import: ':gzip_write_open' '$gzip_read_open' is illegal"
|
|
unless $gzip_write_open =~ /^\s*\|.+%s.*$/;
|
|
} else {
|
|
croak "$import: ':gzip_write_open' requires an argument";
|
|
}
|
|
}
|
|
else {
|
|
last;
|
|
}
|
|
}
|
|
return @_;
|
|
}
|
|
|
|
sub _alias {
|
|
my $import = shift;
|
|
if ((!$has_Compress_Zlib && !defined $gzip_external) || $gzip_external) {
|
|
# The undef *gzopen is really needed only during
|
|
# testing where we eval several 'use IO::Zlib's.
|
|
undef *gzopen;
|
|
*gzopen = \&gzopen_external;
|
|
*IO::Handle::gzread = \&gzread_external;
|
|
*IO::Handle::gzwrite = \&gzwrite_external;
|
|
*IO::Handle::gzreadline = \&gzreadline_external;
|
|
*IO::Handle::gzeof = \&gzeof_external;
|
|
*IO::Handle::gzclose = \&gzclose_external;
|
|
$gzip_used = 1;
|
|
} else {
|
|
croak "$import: no Compress::Zlib and no external gzip"
|
|
unless $has_Compress_Zlib;
|
|
*gzopen = \&Compress::Zlib::gzopen;
|
|
*gzread = \&Compress::Zlib::gzread;
|
|
*gzwrite = \&Compress::Zlib::gzwrite;
|
|
*gzreadline = \&Compress::Zlib::gzreadline;
|
|
*gzeof = \&Compress::Zlib::gzeof;
|
|
}
|
|
$aliased = 1;
|
|
}
|
|
|
|
sub import {
|
|
shift;
|
|
my $import = "IO::Zlib::import";
|
|
if (@_) {
|
|
if (_import($import, @_)) {
|
|
croak "$import: '@_' is illegal";
|
|
}
|
|
}
|
|
_alias($import);
|
|
}
|
|
|
|
@ISA = qw(Tie::Handle);
|
|
|
|
sub TIEHANDLE
|
|
{
|
|
my $class = shift;
|
|
my @args = @_;
|
|
|
|
my $self = bless {}, $class;
|
|
|
|
return @args ? $self->OPEN(@args) : $self;
|
|
}
|
|
|
|
sub DESTROY
|
|
{
|
|
}
|
|
|
|
sub OPEN
|
|
{
|
|
my $self = shift;
|
|
my $filename = shift;
|
|
my $mode = shift;
|
|
|
|
croak "IO::Zlib::open: needs a filename" unless defined($filename);
|
|
|
|
$self->{'file'} = gzopen($filename,$mode);
|
|
|
|
return defined($self->{'file'}) ? $self : undef;
|
|
}
|
|
|
|
sub CLOSE
|
|
{
|
|
my $self = shift;
|
|
|
|
return undef unless defined($self->{'file'});
|
|
|
|
my $status = $self->{'file'}->gzclose();
|
|
|
|
delete $self->{'file'};
|
|
|
|
return ($status == 0) ? 1 : undef;
|
|
}
|
|
|
|
sub READ
|
|
{
|
|
my $self = shift;
|
|
my $bufref = \$_[0];
|
|
my $nbytes = $_[1];
|
|
my $offset = $_[2] || 0;
|
|
|
|
croak "IO::Zlib::READ: NBYTES must be specified" unless defined($nbytes);
|
|
|
|
$$bufref = "" unless defined($$bufref);
|
|
|
|
my $bytesread = $self->{'file'}->gzread(substr($$bufref,$offset),$nbytes);
|
|
|
|
return undef if $bytesread < 0;
|
|
|
|
return $bytesread;
|
|
}
|
|
|
|
sub READLINE
|
|
{
|
|
my $self = shift;
|
|
|
|
my $line;
|
|
|
|
return () if $self->{'file'}->gzreadline($line) <= 0;
|
|
|
|
return $line unless wantarray;
|
|
|
|
my @lines = $line;
|
|
|
|
while ($self->{'file'}->gzreadline($line) > 0)
|
|
{
|
|
push @lines, $line;
|
|
}
|
|
|
|
return @lines;
|
|
}
|
|
|
|
sub WRITE
|
|
{
|
|
my $self = shift;
|
|
my $buf = shift;
|
|
my $length = shift;
|
|
my $offset = shift;
|
|
|
|
croak "IO::Zlib::WRITE: too long LENGTH" unless $offset + $length <= length($buf);
|
|
|
|
return $self->{'file'}->gzwrite(substr($buf,$offset,$length));
|
|
}
|
|
|
|
sub EOF
|
|
{
|
|
my $self = shift;
|
|
|
|
return $self->{'file'}->gzeof();
|
|
}
|
|
|
|
sub FILENO
|
|
{
|
|
return undef;
|
|
}
|
|
|
|
sub new
|
|
{
|
|
my $class = shift;
|
|
my @args = @_;
|
|
|
|
_alias("new", @_) unless $aliased; # Some call new IO::Zlib directly...
|
|
|
|
my $self = gensym();
|
|
|
|
tie *{$self}, $class, @args;
|
|
|
|
return tied(${$self}) ? bless $self, $class : undef;
|
|
}
|
|
|
|
sub getline
|
|
{
|
|
my $self = shift;
|
|
|
|
return scalar tied(*{$self})->READLINE();
|
|
}
|
|
|
|
sub getlines
|
|
{
|
|
my $self = shift;
|
|
|
|
croak "IO::Zlib::getlines: must be called in list context"
|
|
unless wantarray;
|
|
|
|
return tied(*{$self})->READLINE();
|
|
}
|
|
|
|
sub opened
|
|
{
|
|
my $self = shift;
|
|
|
|
return defined tied(*{$self})->{'file'};
|
|
}
|
|
|
|
sub AUTOLOAD
|
|
{
|
|
my $self = shift;
|
|
|
|
$AUTOLOAD =~ s/.*:://;
|
|
$AUTOLOAD =~ tr/a-z/A-Z/;
|
|
|
|
return tied(*{$self})->$AUTOLOAD(@_);
|
|
}
|
|
|
|
sub gzopen_external {
|
|
my ($filename, $mode) = @_;
|
|
require IO::Handle;
|
|
my $fh = IO::Handle->new();
|
|
if ($mode =~ /r/) {
|
|
# Because someone will try to read ungzipped files
|
|
# with this we peek and verify the signature. Yes,
|
|
# this means that we open the file twice (if it is
|
|
# gzipped).
|
|
# Plenty of race conditions exist in this code, but
|
|
# the alternative would be to capture the stderr of
|
|
# gzip and parse it, which would be a portability nightmare.
|
|
if (-e $filename && open($fh, $filename)) {
|
|
binmode $fh;
|
|
my $sig;
|
|
my $rdb = read($fh, $sig, 2);
|
|
if ($rdb == 2 && $sig eq "\x1F\x8B") {
|
|
my $ropen = sprintf $gzip_read_open, $filename;
|
|
if (open($fh, $ropen)) {
|
|
binmode $fh;
|
|
return $fh;
|
|
} else {
|
|
return undef;
|
|
}
|
|
}
|
|
seek($fh, 0, SEEK_SET) or
|
|
die "IO::Zlib: open('$filename', 'r'): seek: $!";
|
|
return $fh;
|
|
} else {
|
|
return undef;
|
|
}
|
|
} elsif ($mode =~ /w/) {
|
|
my $level = '';
|
|
$level = "-$1" if $mode =~ /([1-9])/;
|
|
# To maximize portability we would need to open
|
|
# two filehandles here, one for "| gzip $level"
|
|
# and another for "> $filename", and then when
|
|
# writing copy bytes from the first to the second.
|
|
# We are using IO::Handle objects for now, however,
|
|
# and they can only contain one stream at a time.
|
|
my $wopen = sprintf $gzip_write_open, $filename;
|
|
if (open($fh, $wopen)) {
|
|
$fh->autoflush(1);
|
|
binmode $fh;
|
|
return $fh;
|
|
} else {
|
|
return undef;
|
|
}
|
|
} else {
|
|
croak "IO::Zlib::gzopen_external: mode '$mode' is illegal";
|
|
}
|
|
return undef;
|
|
}
|
|
|
|
sub gzread_external {
|
|
# Use read() instead of syswrite() because people may
|
|
# mix reads and readlines, and we don't want to mess
|
|
# the stdio buffering. See also gzreadline_external()
|
|
# and gzwrite_external().
|
|
my $nread = read($_[0], $_[1], @_ == 3 ? $_[2] : 4096);
|
|
defined $nread ? $nread : -1;
|
|
}
|
|
|
|
sub gzwrite_external {
|
|
# Using syswrite() is okay (cf. gzread_external())
|
|
# since the bytes leave this process and buffering
|
|
# is therefore not an issue.
|
|
my $nwrote = syswrite($_[0], $_[1]);
|
|
defined $nwrote ? $nwrote : -1;
|
|
}
|
|
|
|
sub gzreadline_external {
|
|
# See the comment in gzread_external().
|
|
$_[1] = readline($_[0]);
|
|
return defined $_[1] ? length($_[1]) : -1;
|
|
}
|
|
|
|
sub gzeof_external {
|
|
return eof($_[0]);
|
|
}
|
|
|
|
sub gzclose_external {
|
|
close($_[0]);
|
|
# I am not entirely certain why this is needed but it seems
|
|
# the above close() always fails (as if the stream would have
|
|
# been already closed - something to do with using external
|
|
# processes via pipes?)
|
|
return 0;
|
|
}
|
|
|
|
1;
|