Updated 2018-09-20 13:05:23 by jdc

Because of the enormous amount of duplication in XSD files, writing them by hand to can be very tedious. There are tools to reduce the pain, but they are usually quite expensive and still require quite a bit of manual work.

MJ - I have written a small DSL in Tcl to automate the largest part of creating an XSD. I call it TSD for Tcl Schema Definition

The DSL is fairly straightforward For example this tsd file:
text {<?xml version="1.0" encoding="UTF-8"?>}
text {<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">}

define root item+
define item { part1!  part2?  part3 }
define part1 xsd:string
define part2 xsd:decimal {This is optional}
extension part3 xsd:normalizedString {{attr1 {this is attr1}}} { Complex stuff }

text "</xsd:schema>"

will be translated to:
<?xml version="1.0" encoding="UTF-8"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified">
  <xsd:element name="root" type="rootType"/>
  <xsd:complexType name="rootType">
    <xsd:sequence>
      <xsd:element minOccurs="1" maxOccurs="unbounded" ref="item"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="item" type="itemType"/>
  <xsd:complexType name="itemType">
    <xsd:sequence>
      <xsd:element minOccurs="1" maxOccurs="1" ref="part1"/>
      <xsd:element minOccurs="0" maxOccurs="1" ref="part2"/>
      <xsd:element ref="part3"/>
    </xsd:sequence>
  </xsd:complexType>
  <xsd:element name="part1" type="xsd:string"/>
  <xsd:element name="part2" type="xsd:decimal">
    <xsd:annotation>
      <xsd:documentation>This is optional</xsd:documentation>
    </xsd:annotation>
  </xsd:element>
  <xsd:element name="part3" type="part3Type"/>
  <xsd:complexType name="part3Type">
    <xsd:annotation>
      <xsd:documentation> Complex stuff </xsd:documentation>
    </xsd:annotation>
    <xsd:simpleContent>
      <xsd:extension base="xsd:normalizedString">
        <xsd:attribute name="attr1" use="optional">
          <xsd:annotation>
            <xsd:documentation>this is attr1</xsd:documentation>
          </xsd:annotation>
        </xsd:attribute>
      </xsd:extension>
    </xsd:simpleContent>
  </xsd:complexType>
</xsd:schema>

The script:
package require Tcl 8.5
package require tdom

proc define {element sequence {documentation {}}} {
    if {  [llength $sequence] == 1 &&
          [string range $sequence 0 3] eq "xsd:"
    } {
        emitElement $element $sequence $documentation
        return
    }  
    set typename ${element}Type
    emitElement $element $typename 
    emitTypeDef $typename $sequence $documentation
}  

proc extension {element baseType attributes {documentation {}}} {
    emit [subst {<xsd:element name="$element" type="${element}Type"/>
<xsd:complexType name="${element}Type">
    <xsd:annotation>
        <xsd:documentation>$documentation</xsd:documentation>
    </xsd:annotation>
    <xsd:simpleContent>
        <xsd:extension base="$baseType">
    }]  
    foreach attribute $attributes {
        lassign $attribute name doc 
        emit [ subst {    <xsd:attribute name="$name" use="optional">
                <xsd:annotation>
                    <xsd:documentation>$doc</xsd:documentation>
                </xsd:annotation>
            </xsd:attribute>}]
            
    }
    emit {
            </xsd:extension>
    </xsd:simpleContent>
</xsd:complexType>}

}

proc text text {
    emit $text\n
}

proc include {filename} {
    set f [open $filename]
    fconfigure $f -encoding utf-8
    emit \n[read $f]\n
    close $f
}

proc emitElement {element type {documentation {}}} {
    if {$element eq {}} {
        set cardinality {}
        switch -- [string index $type end] {
            + {
                set type [string range $type 0 end-1 ]
                set cardinality {minOccurs="1" maxOccurs="unbounded"}
            }
            ? {
                set type [string range $type 0 end-1 ]
                set cardinality {minOccurs="0" maxOccurs="1"}
            }
            ! {
                set type [string range $type 0 end-1 ]
                set cardinality {minOccurs="1" maxOccurs="1"}
            }
        }
        emit "<xsd:element $cardinality ref=\"$type\">\n"
    } {
        emit "<xsd:element name=\"$element\" type=\"$type\">\n"
    }
    if {$documentation ne {}} {
        emit "<xsd:annotation>\n"
        emit "<xsd:documentation>$documentation</xsd:documentation>\n"
        emit "</xsd:annotation>\n"
    }
    emit "</xsd:element>\n"
}

proc emitSequence {sequence} {
    emit "<xsd:sequence>\n"
    foreach item $sequence {
        emitElement {} $item
    }
    emit "</xsd:sequence>\n"
}

proc emitTypeDef {type sequence documentation} {
    emit "<xsd:complexType name=\"$type\">\n"
    if {$documentation ne {}} {
        emit "<xsd:annotation>\n"
        emit "<xsd:documentation>$documentation</xsd:documentation>\n"
        emit "</xsd:annotation>\n"
    }
    emitSequence $sequence
    emit "</xsd:complexType>\n"
}

proc emit text {
    # This is used to emit the generated xsd text. Redefine this to match your usecase.
    error "You'll need to override emit"
}

proc beautify {xml} {
    set xml  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n[[dom parse $xml] asXML -indent 2]"
}



# ================================

# redefine emit to do the right thing
proc emit {text} {
    append ::result $text
}

if {$argc < 1 || $argc > 2} {
    puts stderr "Usage: tsd2xsd tsd-file ?xsd-file?"
    exit
}

set path [pwd]

cd [file dirname [lindex $argv 1]] 
lassign $argv tsd xsd
source -encoding utf-8 $tsd

if {$xsd ne {} } {
    set f [open $xsd w]
    fconfigure $f -encoding utf-8
} else {
    set f stdout
}

if {[catch {puts $f [beautify $result]} error]} {
    puts stderr $errorInfo
    puts $f $result
}
close $f
cd $path