Updated 2014-06-23 08:19:46 by dkf

See also:


CMcC 20050301 - jyl was trying to track down what seemed to be a leak in a tcl program, which reminded me of a component of tclhttpd which tries to (approximately and imprecisely) measure the size of variables and procs in an interpreter.

I've packaged that up here, may it be of use.
namespace eval tclsize {}
 
# DataSize --
#
#        return the data size for the interpreter or for a particular namespace.
#
# Arguments:
#        ns        (optional) if given, show the data size for this namespace
#
# Results:
#        Returns {size namecount varcount}
proc tclsize::DataSize {{ns "::"} {interp ""}} {
     set ng 0
     set nv 0
     set size 0
 
     foreach g [interp eval $interp info vars ${ns}::*] {
         incr ng
         if {[interp eval $interp array exists $g]} {
             foreach {name value} [interp eval $interp array get $g] {
                 incr size [string length $name]
                 incr size [string length $value]
                 incr nv
             }
         } elseif {[interp eval $interp info exists $g]} {
             # info vars returns declared but undefined namespace vars
             incr size [string length $g]
             incr size [string length [interp eval $interp set $g]]
         }
         incr nv
     }
 
     foreach child [namespace children $ns] {
         foreach {csize cnv cng} [DataSize $child $interp] break
         incr size $csize
         incr nv $cnv
         incr ng $cng
     }
     
     return [list $size $nv $ng]
}

# CodeSize --
#
#        return the code size for the interpreter or for a particular namespace.
#
# Arguments:
#        ns        (optional) if given, show the code size for this namespace
#
# Results:
#        Returns {size proc_count}

proc tclsize::CodeSize {{ns ::} {interp ""}} {
     set np 0
     set size 0
 
     foreach g [interp eval $interp info procs ${ns}::*] {
         incr np
         incr size [string length $g]
         incr size [string length [interp eval $interp info args $g]]
         incr size [string length [interp eval $interp info body $g]]
     }
     
     foreach child [interp eval $interp namespace children $ns] {
         foreach {csize cnp} [CodeSize $child $interp] break
         incr size $csize
         incr np $cnp
     }
     
     return [list $size $np]
}

# InterpSize --
#
#        return the data and code size for an interpreter
#
# Arguments:
#        ns        (optional) if given, show the data size for this namespace
#
# Results:
#        Returns {total datasize namecount varcount codesize proccount}
proc tclsize::InterpSize {{interp ""} {recurse 1}} {
     foreach {data names vars} [DataSize] break
     foreach {code procs} [CodeSize] break
 
     if {$recurse} {
         foreach child [interp slaves $interp] {
             foreach {cdata cnames cvars} [DataSize $child] break
             incr data $cdata
             incr names $cnames
             incr cvars $cvars
             
             foreach {ccode cprocs} [CodeSize $child] break
             incr code $ccode
             incr procs $cprocs
         }
     }
 
     return [list [expr {$data + $code}] $data $names $vars $code $procs]
}

if {[info script] eq $argv0} {
     foreach {total data names vars code procs} [tclsize::InterpSize] break
     puts [subst {
         Total accounted: $total bytes.
         Data size: $data bytes in $vars variables ($names names.)
         Code size: $code bytes in $procs procs.
     }]
}

Lars H: I think string bytelength would be more appropriate than string length here. (The latter will also lead to a lot of shimmering if you've got a lot of data as lists.) Also, the above has the disadvantage that it only looks at the size of the string representation, which sometimes can be quite different from the size of the internal representation.

CMcC: would string bytelength avoid shimmering? - LH: Yes. It too forces generation of a string representation, but it does not as string length also junk every non-string internal representation. - Then it should certainly be that. I think these procs can be useful even though they will only ever be an analog of true memory usage, for the reasons given, and others: hash tables have internal space wasted, malloc has fragmentation, the size of open file descriptors isn't accounted for, ...