EKB I wrote this long before I knew about this wiki. It converts tags in a text canvas to and from XML (so you can write up an XML file, then load it into a text widget). It needs some work (such as entities and handling nested tags), but can do a few tricks, so I thought it was worth sharing.
The main file,
xtt.tcl:
package require dom 2.0
package require struct
# xtt: The XML <--> Text Tag Translator
# ver 1.0
#
# Copyright (c) 2003 Eric Kemp-Benedict
# All Rights Reserved
#
# This code is freely distributable, but is provided as-is with
# no waranty expressed or implied.
#
# Send comments to [email protected]. If you make improvements,
# please send them to me. I will give you credit and distribute the
# improved code. Thanks!
# == Description ==
#
# xtt offers an interface between a tk text widget's tags and
# an XML document's tags.
#
# NOTE: It's pretty basic. In particular, it doesn't (yet) handle
# nested tags or entities.
#
# To use it, just "source" xtt.tcl in your tcl script. Note
# that xtt requires the dom and struct packages. Struct is
# part of tcllib.
#
# Example:
#
# Step 1: Associate XML codes with text widget tags
#
# set xtt::tagArray(i) italic ;# "i" is the XML tag, "italic" is the text widget tag
# set xtt::tagArray(b) bold
# set xtt::tagArray(bi) boldital
# set xtt::tagArray(sup) super
# set xtt::tagArray(sub) sub
#
# Step 2: Specify a tag for a paragraph element (defaults to p, so this is optional)
#
# set xtt::paraElem para
#
# Step 3: Start translating!
#
# From XML -> TextWidget
# xtt::XMLtoText .myTextWidget $parsedXMLdoc
#
# From Text Widget -> XML
# set XMLoutput [xtt::TextToXML .t]
#
# From Text Widget -> DOMnode
# set DOMnode [xtt::TextToDOM .t]
#
namespace eval xtt {
#############################################
##
## Interface
##
#############################################
variable tagArray
variable paraElem "p"
proc XMLtoText {w DOMnode} {
variable tagArray
variable paraElem
set paralist [dom::element getElementsByTagName $DOMnode $paraElem]
foreach p [set $paralist] {
xtt::expandNodes $w $p
$w insert end "\n"
}
}
proc TextToXML {w} {
# NOTE: The stack is for future flexibility. At the moment nested tags are not
# processed. In future versions I expect to process nested tags and that will
# be easier with a stack.
struct::stack tagStack
set dump [$w dump -tag -text 1.0 end]
set length [llength $dump]
set retval "<p>"
for {set i 0} {$i < $length} {incr i} {
switch [lindex $dump $i] {
text {
incr i
set retval $retval[lindex $dump $i]
}
tagon {
incr i
tagStack push [xtt::getTagCode [lindex $dump $i]]
set retval $retval<[tagStack peek]>
}
tagoff {
incr i
set retval $retval</[tagStack pop]>
}
}
}
set retval $retval</p>
# Replace all newlines with "</p><p>"
regsub -all -- "\\n" $retval "</p><p>" retval
# Strip multiple newlines at the end
regsub -- "(<p></p>)+$" $retval "" retval
tagStack destroy
return $retval
}
proc TextToDOM {w} {
# Wrap the XML in a fake "document"
set XMLtext "<document>[TextToXML $w]</document>"
# Return the first child (which is all the contents)
return [dom::node cget [dom::parse $XMLtext] -firstChild]
}
#############################################
##
## Supporting routines
##
#############################################
proc expandNodes {w paraNode} {
variable tagArray
variable paraElem
set childList [dom::node children $paraNode]
foreach child $childList {
if {[dom::node cget $child -nodeType] != "textNode"} {
# Recursively call expandNodes, to nest tags
# Nested tags follow formatting rules for Tk text widget tags
xtt::expandNodes $w $child
}
set type [dom::node cget [dom::node parent $child] -nodeName]
set val [stripNewlines [dom::node cget $child -nodeValue]]
if {$type == $paraElem} {
$w insert end $val
} else {
$w insert end $val $tagArray($type)
}
}
}
proc stripNewlines {text} {
# Loop through and remove any newlines from text. Replace with a space if adjacent characters are not spaces,
# or if not at beginning or end of string.
while {[string first "\n" $text] != -1} {
set newlinePos [string first "\n" $text]
set charBefore [expr $newlinePos - 1]
set charAfter [expr $newlinePos + 1]
set alreadySpace false
if {$newlinePos == 0 || $newlinePos == [expr [string length $text] - 1]} {set alreadySpace true}
if {$newlinePos != 0} {
if {[string range $text $charBefore $charBefore] == " "} {set alreadySpace true}
}
if {$newlinePos != [expr [string length $text] - 1]} {
if {[string range $text $charAfter $charAfter] == " "} {set alreadySpace true}
}
if {$alreadySpace} {
set replaceText ""
} else {
set replaceText " "
}
set text [string replace $text $newlinePos $newlinePos $replaceText]
}
return $text
}
proc getTagCode {code} {
variable tagArray
foreach name [array names tagArray] {
if {$tagArray($name) == $code} {return $name}
}
error "Tag code does not exist"
}
}
A demo script:
source "xtt.tcl"
##
## Set up the text widget
##
set font(normal) "Times 12"
set font(ital) "$font(normal) italic"
set font(bold) "$font(normal) bold"
set font(boldital) "$font(normal) bold italic"
set font(small) "Times 8"
text .t -font $font(normal) -wrap word -spacing3 18p -spacing2 6p -width 70 -height 10
# Add the ".proc" window to look at the processed XML
text .proc -font $font(normal) -wrap word -width 70 -height 10
pack .t -fill both -expand yes -side top
pack .proc -fill both -expand yes
.t tag config italic -font $font(ital)
.t tag config bold -font $font(bold)
.t tag config boldital -font $font(boldital)
.t tag config super -offset 6 -font $font(small)
.t tag config sub -offset -6 -font $font(small)
##
## Load the xml source
##
set xmlFile [open "TestDoc.xml" r]
set document [read $xmlFile]
close $xmlFile
##
## Process the xml source
##
# Move from "document" down to the main node
set parsedDoc [dom::node cget [dom::parse $document] -firstChild]
########################################################
##
## This is the interface between text widget and XML
##
########################################################
##
## Associate XML codes with text widget tags
##
set xtt::tagArray(i) italic
set xtt::tagArray(b) bold
set xtt::tagArray(bi) boldital
set xtt::tagArray(sup) super
set xtt::tagArray(sub) sub
xtt::XMLtoText .t $parsedDoc
.proc insert end [xtt::TextToXML .t]
########################################################
##
## End of interface
##
########################################################
The sample file,
TestDoc.xml used by the demo script:
<body>
<p>
This is text, <i>this is italicized</i>, this is normal. Here's a subscript: CO<sub>2</sub>. The rest of the paragraph is pretty long,
allowing it to be wrapped in the window. It just keeps going and going and there isn't much you can do about it. What would you do about it, anyway? Just make sure it wraps
properly and also that any newlines in the XML file are properly
stripped out before putting them in the text widget. Only text marked
off with paragraph tags should receive newlines.
</p>
<p>
This is another paragraph, with <b>bold</b> text in it. Later in this paragraph I will add some other
special text, but first I want a long enough run of text that there may be some wrapping. Otherwise,
I'm curious to see what a superscript<sup>1</sup> might look like.
</p>
<p>Unfortunately, xtt doesn't (yet) handle nested tags, so I have to
make up a new tag to do <bi>bold italic</bi>. To have some bold text inside
an italicized block, I have to do this: <i>This is </i><bi>so</bi> <i>italic!</i></p>
</body>
if you prefer working with tdom replace these rows:
in xtt.tcl
line 1: package require tdom
line 70: set paralist [$DOMnode getElementsByTagName $paraElem]
line 71: foreach p $paralist {
line 125: return [dom parse $XMLtext]
line 138: set childList [$paraNode childNodes]
line 140: if {[$child nodeType] != "TEXT_NODE"} {
line 145: set type [[$child parentNode] nodeName]
line 146: set val [stripNewlines [$child nodeValue]]
in test.tcl
set parsedDoc [dom parse $document]