- RFC 1950 [1] - Zlib Compressed Data Format
- RFC 1951 [2] - Deflate Compressed Data format
- RFC 1952 [3] - Gzip File Format
See GASP, tkArchive, TkZip,Also take a look at bzip2 and LZMA for better compression ratesAre there any Tcl bindings for zlib?18Aug04 PS Yes there is! tclkit has it by default and I created an extension from that. See the zlib page.gunzip a file with zlib and Tcl
proc gunzip { file {outfile ""} } {
package require zlib
# Gunzip the file
# See http://www.gzip.org/zlib/rfc-gzip.html for gzip file description
set in [open $file r]
fconfigure $in -translation binary -buffering none
set id [read $in 2]
if { ![string equal $id \x1f\x8b] } {
error "$file is not a gzip file."
}
set cm [read $in 1]
if { ![string equal $cm \x8] } {
error "$file: unknown compression method"
}
binary scan [read $in 1] b5 FLAGS
puts $FLAGS
foreach {FTEXT FHCRC FEXTRA FNAME FCOMMENT} [split $FLAGS ""] {}
binary scan [read $in 4] i MTIME
set XFL [read $in 1]
set OS [read $in 1]
if { $FEXTRA } {
binary scan [read $in 2] S XLEN
set ExtraData [read $in $XLEN]
}
set name ""
if { $FNAME } {
set XLEN 1
set name [read $in $XLEN]
set c [read $in 1]
while { $c != "\x0" } {
append name $c
set c [read $in 1]
}
}
set comment ""
if { $FCOMMENT } {
set c [read $in 1]
while { $c != "\x0" } {
append comment $c
set c [read $in 1]
}
}
set CRC16 ""
if { $FHCRC } {
set CRC16 [read $in 2]
}
set cdata [read $in]
close $in
binary scan [string range $cdata end-7 end] ii CRC32 ISIZE
set data [zlib inflate [string range $cdata 0 end-8]]
if { $CRC32 != [zlib crc32 $data] } {
error "gunzip Checksum mismatch."
}
if { $outfile == "" } {
set outfile $file
if { [string equal -nocase [file extension $file] ".gz"] } {
set outfile [file rootname $file]
}
}
if { [string equal $outfile $file] } {
error "Will not overwrite input file. sorry."
}
set out [open $outfile w]
fconfigure $out -translation binary -buffering none
puts -nonewline $out $data
close $out
file mtime $outfile $MTIME
}18Aug04 PSLES: Would someone tell me HOW this is better than [exec gzip filename]?PS: gzip might not be installed? And with a small tweak, you'd just get the file content - put that together with vfs::tar and you might be able to mount tar.gz files...SRIV Stock Windows installs don't have gzip. Think cross platform. "Better" is in the eye of the beholder. Nice work.
DKF: Here's a cheap way to invoke gzip on Windows. Note that just using exec gzip -c <<$d does not work because of translation issues.
proc gzip d {
set data [open foo.tmp w]
fconfigure $data -translation binary
puts -nonewline $data $d
close $data
set f [open "|gzip -c <foo.tmp" r]
fconfigure $f -translation binary
set d [read $f]
close $f
after 100 ;# Ugly hack to give gzip time to exit so we can kill foo.tmp on Windows
file delete foo.tmp
return $d
}14Dec06 gl : Here is a way to read and write gzip .gz files transparently and on the fly -- including channels like "stdin" -- with the help of Trf:
package require Trf
namespace eval gz {
variable CRC
# Attach to a channel for writing -- i.e., write .gz header, enable compression
proc attach_w {f} { l for writing -- i.e., write .gz header, enable compression
# Write header
puts -nonewline $f [binary format "H*iH*" "1f8b0800" [clock seconds] "0003"]
set CRC [binary format x4]
# Init/attach compression
zip -attach $f -mode compress -nowrap 1
fconfigure $f -translation binary -encoding binary
# Init/attach CRC
crc-zlib -attach $f -mode transparent -write-destination ::gz::CRC($f) -write-type variable
fconfigure $f -translation binary -encoding binary
fconfigure $f -translation binary -encoding binary
return $f
}
# Detach from a channel for writing -- i.e., write .gz footer incl. CRC
proc detach_w {f} {
variable CRC
set SIZE [tell $f]
unstack $f ; # CRC
unstack $f ; # gzip
puts -nonewline $f $CRC($f)
puts -nonewline $f [binary format "i" [expr $SIZE % 0x100000000]]
}
# Attach to a channel for reading -- i.e., read and check .gz header, enable decompression
proc attach_r {f} {
# (using code from [http://wiki.tcl.tk/6175])
set id [read $f 2]
if { ![string equal $id \x1f\x8b] } {
error "GZip channel $f: not a gzip file."
}
set cm [read $f 1]
if { ![string equal $cm \x8] } {
error "GZip channel $file: unknown compression method."
}
binary scan [read $f 1] b5 FLAGS
puts $FLAGS
foreach {FTEXT FHCRC FEXTRA FNAME FCOMMENT} [split $FLAGS ""] {}
binary scan [read $f 4] i MTIME
set XFL [read $f 1]
set OS [read $f 1]
if { $FEXTRA } {
binary scan [read $f 2] S XLEN
set ExtraData [read $f $XLEN]
}
set name ""
if { $FNAME } {
set c [read $f 1]
while { $c != "\x0" } {
append name $c
set c [read $f 1]
}
}
set comment ""
if { $FCOMMENT } {
set c [read $f 1]
while { $c != "\x0" } {
append comment $c
set c [read $f 1]
}
}
set CRC16 ""
if { $FHCRC } {
set CRC16 [read $f 2]
}
# Init/attach decompression
zip -attach $f -mode compress -nowrap 1
fconfigure $f -translation binary -encoding binary
# Init/attach CRC
crc-zlib -attach $f -mode transparent -read-destination ::gz::CRC($f) -read-type variable
fconfigure $f -translation binary -encoding binary
return $f
}
# Detach from a channel for reading -- i.e., check .gz footer incl. CRC
proc detach_r {f} { nel for reading -- i.e., check .gz footer incl. CRC
variable CRC
set cmpSize [expr [tell $f] % 0x100000000]
binary scan $CRC($f) i cmpCRC
unstack $f ; # CRC
unstack $f ; # gzip
binary scan [read $f 4] i gzCRC
binary scan [read $f 4] i gzSize
binary scan [read $f 4] i gzSize
if {$gzCRC != $cmpCRC} {
error "GZip channel $f: CRC mismatch."
}
if {$gzSize != $cmpSize} {
error "GZip channel $f: Size mismatch."
}
}
}
# Demo program:
# Decompress to stdout: gztest.tcl filename.gz
# Compress to stdout: gztest.tcl filename
if {[llength $argv] != 1} {
puts "Usage: gztest.tcl filename"
exit -2
}
set fn [lindex $argv 0]
set f [open $fn r]
fconfigure $f -translation binary
if {[string match "*.gz" $fn]} {
# Is .gz
::gz::attach_r $f
fcopy $f stdout
::gz::detach_r $f
} else {
# Is not .gz
::gz::attach_w stdout
fcopy $f stdout
::gz::detach_w stdout
}To save people headaches working out how to do this for HTTP streams:
set gzip [binary format "H*iH*" "1f8b0800" [clock seconds] "0003"]
append gzip [zlib deflate $content]
append gzip [binary format i [zlib crc32 $content]]
append gzip [binary format i [string length $content]]
set content-encoding gzip(CMcC modded this snipped 10Jul07 after much head-scratching and some experimentation)BAS I was able to send gzipped content by doing: fconfigure $chan -translation binary
set gzip [zlib gzip $content -header [list crc [zlib crc32 $content] time [clock seconds] os 3]]
set content-encoding gzipos 3 just means it was generated from Unix OS.
