Updated 2008-11-21 13:38:47 by dkf

Richard Suchenwirth 2000-04-10 - Chinlish ("Chinese from English (alphabet)") is a converter that translates a number of Chinese words written in Pinyin transcription (exception: use y for u-umlaut) to the corresponding Unicodes. In contrast to the other members of The Lish family, this is (and can only be) a partial solution, as some 4000 .. 6000 Chinese characters can even in context not perfectly be mapped to 400 Pinyin syllables. So if you use Chinlish, two things might happen:

  • The word you wanted was not in the dictionary. The pinyin string will come back unchanged. Remedy: if you need it more than once, add it to the dictionary (and put it to the Wiki, for the rest of us ;-) For single occurrences, use the \u notation that always works right.
  • The word you wanted was not the one retrieved, e.g. you wanted a different shi than the most frequent copula. Again: edit the dictionary, if you feel the need, or write \u....

Simplified (PRC) and traditional character forms (Hong Kong, Taiwan) are supported. Default, and recommended for dictionary entries, is Simplified. If you call the main proc chinlish (or the short name cn) with the -fan switch, traditional codes are substituted where appropriate, i.e. as defined in the i18n_jian2fan array. If you add words with jian/fan characters to the dictionary, update the jian2fan map also. Maybe some database can be found that completely covers this mapping, but I was offline over the weekend ;-) - RS 2007-09-04: many years later, a more comprehensive jian/fan converter is finally at fan2jian and jian2fan. }
 array set i18n_pin2u {
	ba \u628a
	Beijing \u5317\u4eac
	bu \u4e0d
	canguan \u53c2\u89c2
	chengxu \u7a0b\u5e8f
	da \u5927
	daxue \u5927\u5b66
	de \u7684
	erqie \u800c\u4e14
	feichang \u975e\u5e38
	ge \u4e2a
	gongzuo \u5de5\u4f5c
	hao \u597d
	he  \u548c
	hen \u5f88
	huanying \u6b22\u8fce
	huida \u56de\u7b54
	jintian \u4eca\u5929
	jisuanji \u8ba1\u7b97\u673a
	kexue \u79d1\u5b66
	lao \u8001
	laoshi \u8001\u5e08
	le \u4e86
	Nanjing \u5357\u4eac
	neng \u80fd
	nenggou \u80fd\u591f
	ni \u4f60
	nimen \u4f60\u4eec
	pengyou \u670b\u53cb
	relie \u70ed\u70c8
	ren \u4eba
	Shanghai \u4e0a\u6d77
	shi \u662f
	suoyi \u6240\u4ee5
	ta \u4ed6
	tamen \u4ed6\u4eec
	Tianjin \u5929\u6d25
	wenti \u95ee\u9898
	wo \u6211
	women \u6211\u4eec
	Xianggang \u9999\u6e2f
	xiao \u5c0f
	xiaoxue \u5c0f\u5b66
	xuesheng \u5b66\u751f
	yanjiu \u7814\u7a76
	yi \u4e00
	yinwei \u56e0\u4e3a
	you \u6709
	yuanlai \u5143\u6765
	zai \u5728
	zhe \u8fd9
	zheyang \u8fd9\u6837
	zhongguo \u4e2d\u56fd
	zhongwen \u4e2d\u6587
	zhongxue \u4e2d\u5b66
	zhuanhuan \u8f6c\u6362
 } ;#--------------------- above: the dictionary - extend as required
 proc chinlish {args} {
	if {$args==""} {set args "huanying, zhe shi zhongwen zhuanhuan chengxu"}
	set res ""
	set fan 0
	if [regsub -- "-fan" $args "" args] {incr fan}
	regsub {[.]$} $args " \u3002" args
	regsub -all {([.,:;!?]+)} $args { \1} text
	foreach i $text {
		if [info exists ::i18n_pin2u($i)] {
			lappend res $::i18n_pin2u($i)
		} else {
			lappend res $i
		}
	}
	set res [join $res ""]
	if $fan {set res [jian2fan $res]}
	set res
 }
 proc cn args {eval chinlish $args}
 proc cn:dic s {
	set res [list]
	foreach i [lsort [array names ::i18n_pin2u $s]] {
		lappend res $i $::i18n_pin2u($i)
	}
	set res
 }
 set i18n_jian2fan {
	\u4e2a \u500b
	\u4e3a \u7232
	\u4eec \u5011
	\u53c2 \u53c3
	\u56fd \u570b
	\u5b66 \u5b78
	\u5e08 \u5e2b
	\u673a \u6a5f
	\u6765 \u4f86
	\u6b22 \u6b61
	\u70ed \u71b1
	\u89c2 \u89c0
	\u8ba1 \u8a08
	\u8f6c \u8f49
	\u8fd9 \u9019
	\u95ee \u554f
	\u9898 \u984c
 }

#--------- simplified(jian) - traditional(fan) mapping (incomplete) - see fan2jian and jian2fan for better data
 proc jian2fan s {
	foreach {jian fan} $::i18n_jian2fan {regsub -all $jian $s $fan s}
	set s
 }
 proc fan2jian s {
	foreach {jian fan} $::i18n_jian2fan {regsub -all $fan $s $jian s}
	set s
 }