Updated 2012-08-18 13:58:15 by RLE

Author Jackson McCann.
``` #=======================================================================
#
# Package: DGA.tcl
#
# Purpose: A tcl package that implements a number of algorithms on top of
# the basic Tcl ::struct::graph package.

package provide DGA 1.0

#=======================================================================
# NAMESPACE ::DGA::

namespace eval DGA {

#===================================================================
# Export the functions that the user should be calling
namespace export shortest_path min_span_tree predecessor_list
namespace export pl_node pl_arc pl_dist
namespace export topological_sort outonly_nodes is_DAG

# The graph package is needed for obvious reasons so we include
# the struct package that contains it
package require struct

# Define any variables used by the package
variable topo_sort {}

#-------------------------------------------------------------------
# update_node
#
# g - The graph
# n - The node
# u - Used start 0 Unused, 1 Found, 2 Completely used
# d - The distance from the starting node
# p - The predecessor node
proc update_node { g n d p u } {

# Set this nodes predecessor
\$g node set \$n -key predecessor \$p

# The distance to the source node
\$g node set \$n -key distance \$d

# Set the used flag
if { [expr \$u > -1] } {
\$g node set \$n -key "used" \$u
}
}

#-------------------------------------------------------------------
# init_graph
#    Algorithms such as Dijkstra'a and Prim's need to flag the nodes
#    and arcs of the graph and to record additional information
#    against the nodes and arcs.  This function initializes these
#    values for a graph
proc init_graph { g } {

# Setup.  Update each node with the keyed values that will hold
#         the following information for each node:
#         The distance from the source to the node - infinite
#         The predecessor of the node - nothing
#         The nodes status 0 - Not found, 1 - Found, 2 - Finished
foreach node [\$g nodes] {
# This node doesn't have a predecessor and the distance to the
# source node is undefined and the node is unused
update_node \$g \$node -1 {} 0
}

# Initialize the arcs in the graph.  Make sure they all
# have a weight associated with them.  Give a default
# weight of 1 if no weight is found.  Set a flag to show
# if the arc has been used in the algorithm
foreach arc [\$g arcs] {
\$g arc set \$arc -key used 0
if { [catch {
\$g arc get \$arc -key weight
}] } {
\$g arc set \$arc -key weight 1.0
}
}
}

#-------------------------------------------------------------------
# min_arc
#    Within the graph zero, one or many nodes will be in state 1,
#    i.e., the node has been found and still has outward arcs that
#    have not been used.  Find the arc that has the minimum weight.
#    As a side effect if a node has no unused outward arcs left
#    then mark the node as used.  If no arc can be found then
#    return an empty list.
# g - The graph to search
proc min_arc { g } {

# Initialise the working variables used by this proc
# The arc from the source node
set tArc {}
# The weight of the arc
set tArcMin -1

# Find the shortest arc out of the nodes that have been found.
# Use this arc if it provides a shorter route to the target or
# if it discovers a new target node
foreach node [\$g nodes -key used -value 1] {

# The hasArc counter is used to determine when a node has
# no unused arcs left.
set hasArc 0

# Look at each arc that goes out of the current node
foreach arc [\$g arcs -out \$node -key used -value 0] {
# Set hasArc as this node has at least one unused arc
set hasArc 1
# Get the weight of this arc.  If it is less than the
# current value of tArcMin then it will be used
set arcWeight [\$g arc get \$arc -key weight]
if { [expr \$tArcMin == -1] || [expr \$tArcMin > \$arcWeight] } {
# Store the source node and arc names
set tArc \$arc
set tArcMin \$arcWeight
}
}

# If a node doesn't have any unused arcs left then
# mark the node as completely used
if { [expr \$hasArc == 0] } {
\$g node set \$node -key used 2
}
}

# Now mark this arc as having been used
if { \$tArc != {} } {
\$g arc set \$tArc -key used 1
}

return \$tArc
}

#===================================================================
# predecessor_list
#    After an algorithm such as:
#       shortest_path
#       min_span_tree
#    has been run on the graph it will contain a predecessor list,
#    that is for each node that was found an arc will be held under
#    the key 'predecessor' that defines the arc and node that this
#    node was reached from.  This function returns this list as a
#    Tcl list with a structure.  The pl_ functions can be used to
#    access this list
# g - The graph
proc predecessor_list { g } {

set idxList {}
set detList {}
foreach node [\$g nodes] {

# The distance node is from the source
set nDist [\$g node get \$node -key distance]

# The arc that links node to its predecessor
set pArc  [\$g node get \$node -key predecessor]

# The node that precedes this node, if there is one
if { \$pArc == {} } {
set pNode {}
} else {
set pNode [\$g arc source \$pArc]
}

lappend idxList \$node
lappend detList [list \$node \$pNode \$pArc \$nDist]

}

return [list [lsort \$idxList] [lsort \$detList]]
}

#===================================================================
# pl_node
# pl_arc
# pl_dist
#    Three helper functions that make it easier to get
#    details about a node's predecessor.
# pl - The predecessor list as returned by predecessor_list
# n  - The node
proc pl_node { pl n } {
set idx [lsearch -sorted [lindex \$pl 0] \$n]
return [lindex [lindex [lindex \$pl 1] \$idx] 1]
}

proc pl_arc { pl n } {
set idx [lsearch -sorted [lindex \$pl 0] \$n]
return [lindex [lindex [lindex \$pl 1] \$idx] 2]
}

proc pl_dist { pl n } {
set idx [lsearch -sorted [lindex \$pl 0] \$n]
return [lindex [lindex [lindex \$pl 1] \$idx] 3]
}

#===================================================================
# shortest_path
#    Using Dijkstra's algorithm find a shortest path from the node
#    n to every other node that is reachable from n.
#
# g - The graph to be processed
# n - The starting node
proc shortest_path { g n } {

# Setup.  Update the nodes and arcs with the required
#         flags etc.
init_graph \$g

# Mark the starting node as being used and as having a distance
# of zero from itself.
update_node \$g \$n 0.0 {} 1

# Now start looking for nodes we don't know about by exploring out
# from the starting node.
while {1} {

# Find the arc with minimum length that goes from
# a found node
set tArc [min_arc \$g]

# Test for the end of the graph, we have no unused nodes
# or arcs left.  This does not imply that all of the nodes
# in the graph have been discovered, some nodes may not be
# reachable from the given starting node.  These can be
# identified as they have a distance of -1.
if { \$tArc == {} } { break }

# Get the distance from the first node of
# this node
set sDist [\$g node get [\$g arc source \$tArc] -key distance]

# Get the name of the target node
set tNode [\$g arc target \$tArc]

# Get the weight of the arc joining the source and
# target nodes.
set weight [\$g arc get \$tArc -key weight]

# Get the distance from the source node of the
# target node.  This will be -1 if we have never
# seen this node before.
set tDist [\$g node get \$tNode -key distance]

# Does this arc build a short path to the target node?
if { [expr \$tDist == -1] } {
# Yes - it must do, this is the first time
# the target node has been encountered.

# Save the distance from the source node and the
# arc that leads to the predecessor and mark the
# node as found, it will now be considered next time
# we look for the shortest arc.
update_node \$g \$tNode [expr \$sDist + \$weight] \$tArc 1

} else {
# Does this arc provide a cheaper way to get to
# the already discovered node?  Calculate the distance
# based on the new arc's weight and compare it with the stored
# distance for the node.
set ttDist [expr \$sDist + \$weight]
if { [expr \$ttDist < \$tDist] } {
# Update the node to use the new arc as its
# predecessor
update_node \$g \$tNode \$ttDist \$tArc -1
}
}
}
}

#===================================================================
# min_span_tree
#    Using Prims's algorithm find a minimum spanning tree starting
#    at the node n and reaching to every other node that is reachable
#    from n.
#
# g - The graph to be processed
# n - The starting node
proc min_span_tree { g n } {

# Setup.  Update the nodes and arcs with the required
#         flags etc.
init_graph \$g

# Mark the starting node as being used and as having a distance
# of zero from itself.
update_node \$g \$n 0.0 {} 1

# Now start looking for nodes we don't know about by exploring out
# from the starting node.
while {1} {

# Find the arc with minimum length that goes from
# a found node
set tArc [min_arc \$g]

# Test for the end of the graph, we have no unused nodes
# or arcs left.  This does not imply that all of the nodes
# in the graph have been discovered, some nodes may not be
# reachable from the given starting node.  These can be
# identified as they have a distance of -1.
if { \$tArc == {} } { break }

# Get the distance from the first node of
# this node
set sDist [\$g node get [\$g arc source \$tArc] -key distance]

# Get the name of the target node.
set tNode [\$g arc target \$tArc]

# Get the weight of the arc joining the source and
# target nodes.
set weight [\$g arc get \$tArc -key weight]

# Get the distance from the source node of the
# target node.  This will be -1 if we have never
# seen this node before.
set tDist [\$g node get \$tNode -key distance]

# Does this arc discover a new target node?
if { [expr \$tDist == -1] } {
# Yes it does

# Save the distance from the source node and the
# arc that leads to the predecessor and mark the
# node as found.  It will now be considered next time
# we look for the shortest arc.
update_node \$g \$tNode [expr \$sDist + \$weight] \$tArc 1
}
}
}

#===================================================================
# outonly_nodes
#    An 'out only' node is one that has no inward arcs.  That is the
#    node can have no predecessor as no no arc has it as its target
#    This function finds all such nodes with the graph.
#
# g - The graph
proc outonly_nodes { g } {
set result {}

foreach node [\$g nodes] {
if { [\$g node degree -in \$node] == 0 } {
lappend result \$node
}
}

return \$result
}

#-------------------------------------------------------------------
# make_toplevel_node
#    When checking for DAGness and/or performing a topological sort
#    a single top-level node to start the process from is required.
#    This function transforms the graph so that it has that single
#    top node.  The name of this new node is returned.
proc make_toplevel_node { g } {

# Find toplevel node(s)
set oonList [outonly_nodes \$g]

# If there are no out-only nodes then we can't sort the
# graph as it must have at least one cycle.
if { [llength \$oonList] == 0 } {
error "\$g is not a DAG.  Graph contains one or more cycles"
}

# If there are more than one out-only nodes a new parent node that
# provides a single starting point for the sort must be created above
# these nodes.  To make life simple we do this for one node as well.
set startNode [\$g node insert]

# Connect the startNode to the original out-only nodes
foreach node \$oonList {
\$g arc insert \$startNode \$node
}

return \$startNode
}

#-------------------------------------------------------------------
# topo_node
#    This routine is called by the topological_sort when a node has
#    been completely explored.  The routine has two functions.
proc topo_node { dir g n } {
variable topo_sort
lappend topo_sort \$n
}

#===================================================================
# topological_sort
#    Perform a topological sort on the graph.  This will return a
#    list of nodes giving an ordering on the nodes such that all
#    arcs go from left to right. Only an acyclic graph can have a
#    topological sort, because a directed cycle must eventually
#    return home to the source of the cycle. However, every DAG
#    has at least one topological sort.
#
# g - The graph to sort
proc topological_sort { g } {

variable topo_sort {}

# Make a toplevel (parentless) node to start the sort from
set startNode [make_toplevel_node \$g]

# Setup.  Update the nodes and arcs with the required
#         flags etc.
init_graph \$g

# Walk the graph, nodes are added to the topo_sort list
# in the order that they are marked as completely explored.
# The topological sort is the reverse of this order.
# NB - This code won't detect any cycles in the data so it's
#      up to the user to determine that the graph is a DAG
#      The is_DAG function can be used for this
\$g walk \$startNode -order post -type dfs -command topo_node

# Remove the start node we created from the graph, all of the
# arcs will be removed as well.
\$g node delete \$startNode

# Reverse the list and discard the node that
# was added to the graph by this routine.
set result {}
for { set i [expr [llength \$topo_sort] - 2] } {\$i >= 0} {incr i -1} {
lappend result [lindex \$topo_sort \$i]
}

return \$result

}

#===================================================================
# dag_dfs
#    Recursive Depth First Search of the graph.  If a node is
#    discovered that has the used flag set to 1, then it has been
#    found by a cycle through one of its children and the graph
#    is not a DAG.  A stack-based implementation of this function
#    would be better.  It would not risk blowing up if the recursion
#    level got too big.
#
# g - The graph
# n - The next node to check
proc dag_dfs { g n } {

# We have found a new node, mark it as such.
\$g node set \$n -key used 1

# Process each of the arcs out of the node.
foreach arc [\$g arcs -out \$n] {

# Find out the details of the target node.
set tNode [\$g arc target \$arc]
set used [\$g node get \$tNode -key used]

# If the node has been discovered but not completed then
# this is a back edge and the graph contains a cycle.
if { \$used == 1 } {
error "\$g is not a DAG.  Graph contains one or more cycles"
}

dag_dfs \$g \$tNode

}

# We have completely used this node.
\$g node set \$n -key used 2

}

#===================================================================
# is_DAG
#    Determine if the graph is a DAG, that is that it contains no
#    cycles.  If it isn't a DAG then an error is thrown!
#
# g - The graph
proc is_DAG { g } {

# Make a toplevel (parentless) node to start the sort from
set startNode [make_toplevel_node \$g]

# Setup.  Update the nodes and arcs with the required
#         flags etc.
init_graph \$g

# Use the simple recursive definition of depth-first search
# to search through the tree for back edges.
set result [dag_dfs \$g \$startNode]

# Remove the start node we created from the graph. All of the
# arcs will be removed as well.
\$g node delete \$startNode

return \$result
}
}```