package require ral
namespace import ::ral::*
package require ralutil
namespace import ::ralutil::*
# crosstab relValue crossAttr ?attr1 attr2 ...?
#
# Generate a cross tabulation of "rel" for the "crossAttr" against the
# variable number of attributes given. The "crossAttr" argument is the name of
# an attribute of "relValue". The idea is to create new relation that contains all
# the attributes in "args" plus a new attribute for each distinct value of
# "crossAttr". The value of the new attributes is count of tuples that have
# the corresponding value of "crossAttr". Relationally, the "summarize"
# command is used when computations are required across groups of tuples.
proc crosstab {relValue crossAttr args} {
# We start by projecting the attributes that will be retained
# in the resulting relation.
set subproj [relation project $relValue {*}$args]
# The strategy is to build up a summarize command on the fly, adding new
# attributes. So we start with the constant part of the command.
set sumCmd [list relation summarize $relValue $subproj r]
# By projecting on the "crossAttr" we get the unique set of values
# for that attribute since there are no duplicates in relations.
set crossproj [relation project $relValue $crossAttr]
# For each distince value of the "crossAttr" extend the relation with
# a new attribute by the same name as the value and whose value is
# the number of tuples which match the value.
foreach val [lsort [relation list $crossproj]] {
set sumexpr [format\
{[relation cardinality [relation restrictwith $r {$%s == "%s"}]]}\
$crossAttr $val]
lappend sumCmd $val int $sumexpr
}
# Finally we want the total for all the "crossAttr" matches.
lappend sumCmd Total int {[relation cardinality $r]}
set ctab [eval $sumCmd]
# At this point the relational algebra is over! The rest of this is just
# to format some output. First we want to add totals across the bottom of
# the tabular display. Technically, these totals are not part of the cross
# tabulated relation since they represent different facts than the other
# tuples in the relation. So we put the relation into a matrix and add in
# the totals there. The matrix also serves as a convenient means of
# formatting the output. TclRAL has support for moving relations into
# matrices.
set m [relation2matrix $ctab $args]
# Get rid of the "data type" row in the header. It's ugly here.
$m delete row 1
# Add the row where the totals will go
$m add rows 1
$m set cell 0 end Total
set colIndex [expr {[llength $args] - 1}]
# The totals are easy to come by. They are just the summarization
# of the original relation value over the "crossAttr".
# Add them into the matrix.
set totals [relation summarize $relValue $crossproj s\
Total int {[relation cardinality $s]}]
relation foreach t $totals -ascending $crossAttr {
$m set cell [incr colIndex] end [relation extract $t Total]
}
# The grand total is just the number of tuples we started with.
$m set cell end end [relation cardinality $relValue]
# The report package seems a little complicated to use, but TclRAL
# includes some support here too, since "relformat" uses the
# report package to do formatting. A pre-supplied style helps here.
::report::report r [relation degree $ctab]\
style ::ral::relationAsTable {} 1
# Put the totals line in the bottom caption of the report.
r botdata set [r topdata get]
r botcapsep set [r topcapsep get]
r botcapsep enable
r bcaption 1
# Finally some text.
set result [$m format 2string r]
# Add a text caption to the output.
set caption "Cross Tabulation of\
$crossAttr Against [join $args {, }]"
append result $caption \n [string repeat "-" [string length $caption]]
r destroy
$m destroy
return $result
}if 0 { The original data set actually had two records labeled:- Jane;F;tennis
set sportsData [relation create\
{Name string Sex string Sport string}\
{{Name Sex Sport}}\
{Name John Sex M Sport soccer}\
{Name Jane Sex F Sport tennis}\
{Name Tom Sex M Sport football}\
{Name Dick Sex M Sport soccer}\
{Name Harry Sex M Sport tennis}\
{Name Mary Sex F Sport baseball}\
{Name Jeff Sex M Sport baseball}\
{Name Alice Sex F Sport tennis}\
]
puts [relformat $sportsData "Sports Data"]if 0 {+======+======+========+ |Name |Sex |Sport | |string|string|string | +======+======+========+ |John |M |soccer | |Jane |F |tennis | |Tom |M |football| |Dick |M |soccer | |Harry |M |tennis | |Mary |F |baseball| |Jeff |M |baseball| |Alice |F |tennis | +======+======+========+ Sports Data -----------}
puts [crosstab $sportsData Sex Sport]if 0 {
+--------+-+-+-----+ |Sport |F|M|Total| +--------+-+-+-----+ |baseball|1|1|2 | |football|0|1|1 | |soccer |0|2|2 | |tennis |2|1|3 | +--------+-+-+-----+ |Total |3|5|8 | +--------+-+-+-----+ Cross Tabulation of Sex Against Sport -------------------------------------It's also interesting to look at the transposition. }
puts [crosstab $sportsData Sport Sex]if 0 {
+-----+--------+--------+------+------+-----+ |Sex |baseball|football|soccer|tennis|Total| +-----+--------+--------+------+------+-----+ |F |1 |0 |0 |2 |3 | |M |1 |1 |2 |1 |5 | +-----+--------+--------+------+------+-----+ |Total|2 |1 |2 |3 |8 | +-----+--------+--------+------+------+-----+ Cross Tabulation of Sport Against Sex -------------------------------------And just for comparison, this is just the straight summarization across the Sport and Sex attributes. The major difference here is that the "0" rows are missing. }
puts [pipe {
relation project $sportsData Sport Sex |
relation summarize $sportsData ~ r Total int {[relation cardinality $r]} |
relformat ~ "Totals over Sport and Sex" {Sport Sex}
}]if 0 {+========+======+-----+ |Sport |Sex |Total| |string |string|int | +========+======+-----+ |baseball|F |1 | |baseball|M |1 | |football|M |1 | |soccer |M |2 | |tennis |F |2 | |tennis |M |1 | +========+======+-----+ Totals over Sport and Sex -------------------------
See also crosstab and crosstab again.In Ratcl, there is an example called "pivot tables" [1]. }
Category Statistics

