#!/usr/bin/env python
"""  create test data for codecs testing

 this forces BOM on utf_16_[bl]e codecs

 to get BOM-less test data, convert from XXX as follows:
 dd skip=2 bs=1 if=XXX of=XXX_NOBOM
"""

import sys, codecs, re

# some test data: just the "latin-1 supplement"
uni_testdata = "".join(map(unichr, range(0xa0,0x100)))

def output(encoding, perchar=False):
    # output to callers specified encoding
    orig_stdout = sys.stdout
    sys.stdout = \
        codecs.lookup(encoding)[3](sys.stdout,errors="replace")
    m = re.match("^.+16[_-]?([bl]e)",encoding,re.IGNORECASE)
    if m:
        bom = eval ("codecs.BOM_" + m.groups()[0].upper())
        orig_stdout.write(bom)
        orig_stdout.flush()
    if perchar:
      # per-character format was just a prog dev. tool
      ii=0
      for cc in uni_testdata:
        print "%d (0x%02X): %s" % (ii, 0xA0+ii ,cc)
        ii += 1
    else:
        print uni_testdata

if __name__ == "__main__":
    if sys.argv[1:]:
        enc = sys.argv[1]
    else:
        enc = "u8"
    per_ch = sys.argv[2:] 
    output(enc, per_ch)

    
#===eof===
