include "TECManager_h.pxi" import Enumerators import MacOS import codecs cdef int CheckError(int _err) except -1: # kTECUsedFallbacksStatus if (_err == noErr): return 0 if _err in Enumerators.TECErrors: raise MacOS.Error(_err, Enumerators.TECErrors[_err]) else: PyMac_Error(_err) raise return -1 def getTextEncoding(script=None, language=None, region=None, font=None): """ getTextEncoding( script=None, # defaults to "don't care", but you can specify an integer or constant from TECManager.ScriptCode (smRoman, smJapanese, etc..) language=None, # defaults to "don't care", but you can specify an integer or constant from TECManager.LanguageCode(langEnglish, langMaltese, etc..) region=None, # defaults to "don't care", but you can specify an integer or constant from TECManager.RegionCode (verUS, verArabic, etc..) font=None, # defaults to "don't care", but you can specify the name of a system font as a python string ) -> int """ cdef int _script cdef int _language cdef int _region cdef int _textEncoding cdef char _font[256] cdef char *_fontref cdef OSStatus err if script is None: _script = kTextScriptDontCare else: _script = script if language is None: _language = kTextLanguageDontCare else: _language = language if region is None: _region = kTextRegionDontCare else: region = region if font is None: _fontref = NULL else: if PyMac_GetStr255(font, _font) == 0: raise _fontref = _font err = UpgradeScriptInfoToTextEncoding(_script, _language, _region, _fontref, &_textEncoding) CheckError(err) return _textEncoding cdef class StatefulDecoder: cdef readonly int textEncoding cdef TextToUnicodeInfo convInfo cdef readonly name cdef public errors def __new__(self, textEncoding, name, errors, *args, **kwargs): cdef OSStatus err self.convInfo = NULL self.name = name self.errors = errors err = CreateTextToUnicodeInfoByEncoding(textEncoding, &self.convInfo) CheckError(err) self.textEncoding = textEncoding def stateless_decode(self, text, errors='strict', mask=()): self.reset() return self.decode(text, errors=errors, mask=mask) def decode(self, text, errors=None, mask=(kUnicodeKeepInfoMask,kUnicodeStringUnterminatedMask,)): cdef int _mask cdef char* t_buffer cdef int t_length cdef int uni_length cdef int i_ig cdef void* ptr_ig cdef int start_offset cdef int out_length cdef int conv_length cdef int conv_length_total cdef UniChar* oUnicodeStr if errors is None: errors = self.errors if isinstance(mask, (int, long)): mask = (mask,) _mask = 0 for msk in mask: _mask = _mask | msk PyObject_AsReadBuffer(text, &t_buffer, &t_length) if t_length == 0: return (unicode(), 0) uni_length = t_length * 4 oUnicodeStr = PyMem_Malloc(uni_length) # could be up to 6, this seems reasonable err = ConvertFromTextToUnicode( self.convInfo, t_length, t_buffer, _mask, 0, NULL, NULL, NULL, uni_length, &conv_length, &out_length, oUnicodeStr ) # kTECBufferBelowMinimumSizeErr # kTECArrayFullErr # kTECUnmappableElementErr # kTECIncompleteElementErr # kTECUsedFallbacksStatus # kTECOutputBufferFullStatus if err == noErr: # handle the common, everything worked case res = (PyUnicode_FromUnicode(oUnicodeStr, out_length >> 1), conv_length) PyMem_Free(oUnicodeStr) return res elif err == kTECOutputBufferFullStatus: pass elif err != kTECUnmappableElementErr: # handle the not-something-we-can-handle case PyMem_Free(oUnicodeStr) CheckError(err) sofar = PyUnicode_FromUnicode(oUnicodeStr, out_length >> 1) decodelst = [sofar] conv_length_total = conv_length exc = UnicodeDecodeError(self.name, text, conv_length_total, conv_length_total+1, 'Unmappable Element') try: errs = codecs.lookup_error(errors) while True: if err == kTECUnmappableElementErr: exc.start = conv_length_total exc.end = conv_length_total + 1 replacement, conv_length_total = errs(exc) decodelst.append(replacement) if conv_length_total == t_length: # handle the last-character-undecodable case break err = ConvertFromTextToUnicode( self.convInfo, t_length - conv_length_total, t_buffer + conv_length_total, _mask, 0, NULL, NULL, NULL, uni_length, &conv_length, &out_length, oUnicodeStr ) sofar = PyUnicode_FromUnicode(oUnicodeStr, out_length >> 1) decodelst.append(sofar) conv_length_total = conv_length_total + conv_length if err == noErr: break elif err == kTECOutputBufferFullStatus: pass elif err != kTECUnmappableElementErr: CheckError(err) except: PyMem_Free(oUnicodeStr) raise PyMem_Free(oUnicodeStr) return (unicode().join(decodelst), conv_length_total) def reset(self): err = ResetTextToUnicodeInfo(self.convInfo) CheckError(err) def __dealloc__(self): cdef OSStatus err if self.convInfo != NULL: err = DisposeTextToUnicodeInfo(&self.convInfo) #CheckError(err) cdef class StatefulEncoder: cdef readonly int textEncoding cdef UnicodeToTextInfo convInfo cdef readonly name cdef public errors def __new__(self, textEncoding, name, errors, *args, **kwargs): cdef OSStatus err self.convInfo = NULL self.name = name self.errors = errors err = CreateUnicodeToTextInfoByEncoding(textEncoding, &self.convInfo) CheckError(err) self.textEncoding = textEncoding def stateless_encode(self, text, errors=None, mask=()): self.reset() return self.encode(text, errors=errors, mask=mask) def encode(self, text, errors=None, mask=(kUnicodeKeepInfoMask,kUnicodeStringUnterminatedMask,)): cdef int _mask cdef UniChar *t_buffer cdef int t_length cdef int i_ig cdef void* ptr_ig cdef int out_length cdef int conv_length cdef int conv_length_total cdef int str_length cdef char* oStr if errors is None: errors = self.errors if isinstance(mask, (int, long)): mask = (mask,) _mask = 0 for msk in mask: _mask = _mask | msk text = unicode(text) PyObject_AsReadBuffer(text, &t_buffer, &t_length) if t_length == 0: return ('', 0) str_length = t_length + 16 # Let's guess that string reps aren't really any bigger than unicode reps oStr = PyMem_Malloc(str_length) err = ConvertFromUnicodeToText( self.convInfo, t_length, t_buffer, _mask, 0, NULL, NULL, NULL, t_length, &conv_length, &out_length, oStr ) # kTECBufferBelowMinimumSizeErr # kTECArrayFullErr # kTECUnmappableElementErr # kTECIncompleteElementErr # kTECUsedFallbacksStatus # kTECOutputBufferFullStatus if err == noErr or conv_length_total == t_length: # handle the common, everything worked case res = (PyString_FromStringAndSize(oStr, out_length), conv_length) PyMem_Free(oStr) return res elif not ( err == kTECOutputBufferFullStatus or err == kTECIncompleteElementErr or err == kTECUsedFallbackStatus or err == kTECUnmappableElementErr): # handle the not-something-we-can-handle case PyMem_Free(oStr) CheckError(err) sofar = PyString_FromStringAndSize(oStr, out_length) decodelst = [sofar] conv_length_total = conv_length exc = UnicodeEncodeError(self.name, text, 0, 0, 'Unmappable Element') try: errs = codecs.lookup_error(errors) while True: if err == kTECUnmappableElementErr: exc.start = (conv_length_total >> 1) exc.end = (conv_length_total >> 1) + 1 replacement, conv_length_total = errs(exc) replacement = replacement.encode(self.name) conv_length_total = conv_length_total << 1 decodelst.append(replacement) if conv_length_total == t_length: break err = ConvertFromUnicodeToText( self.convInfo, t_length - conv_length_total, t_buffer + (conv_length_total >> 1), _mask, 0, NULL, NULL, NULL, t_length, &conv_length, &out_length, oStr ) sofar = PyString_FromStringAndSize(oStr, out_length) decodelst.append(sofar) conv_length_total = conv_length_total + conv_length if err == noErr: break elif not ( err == kTECOutputBufferFullStatus or err == kTECIncompleteElementErr or err == kTECUsedFallbackStatus or err == kTECUnmappableElementErr): CheckError(err) except: PyMem_Free(oStr) raise PyMem_Free(oStr) return (''.join(decodelst), conv_length_total) def reset(self): err = ResetUnicodeToTextInfo(self.convInfo) CheckError(err) def __dealloc__(self): cdef OSStatus err if self.convInfo != NULL: err = DisposeUnicodeToTextInfo(&self.convInfo) #CheckError(err) def ConvertToUnicode(text, script=None, language=None, region=None, font=None, mask=(kUnicodeUseFallbacksMask,)): # (kUnicodeUseFallbacksMask,) """ ConvertToUnicode( text, # the input text, must be a python string script=None, # defaults to "don't care", but you can specify an integer or constant from TECManager.ScriptCode (smRoman, smJapanese, etc..) language=None, # defaults to "don't care", but you can specify an integer or constant from TECManager.LanguageCode(langEnglish, langMaltese, etc..) region=None, # defaults to "don't care", but you can specify an integer or constant from TECManager.RegionCode (verUS, verArabic, etc..) font=None, # defaults to "don't care", but you can specify the name of a system font as a python string mask=(kUnicodeUseFallbacksMask,) # The conversion mask can be specified as a sequence of integers or as a single integer. Use masks from TECManager.UnicodeControlFlags ) -> unicode """ cdef int _mask cdef TextToUnicodeInfo _convInfo cdef OSStatus err, err2 cdef char* _fontref cdef char _font[256] cdef char* t_buffer cdef int t_length cdef int i_ig cdef void* ptr_ig cdef int out_length cdef int conv_length cdef UniChar* oUnicodeStr if isinstance(mask, (int, long)): mask = (mask,) _mask = 0 for msk in mask: _mask = _mask | msk err = CreateTextToUnicodeInfoByEncoding(getTextEncoding(script=script, language=language, region=region, font=font), &_convInfo) CheckError(err) err = PyString_AsStringAndSize(str(text), &t_buffer, &t_length) # XXX - do mem checking oUnicodeStr = PyMem_Malloc(t_length * 4) # could be up to 6, this seems reasonable err = ConvertFromTextToUnicode( _convInfo, t_length, t_buffer, _mask, 0, NULL, NULL, NULL, t_length * 4, &conv_length, &out_length, oUnicodeStr ) err2 = DisposeTextToUnicodeInfo(&_convInfo) # XXX - do strict checking here if err == -8783: # kTECUsedFallbacksStatus err = noErr if (err == noErr and err2 == noErr): rval = PyUnicode_FromUnicode(oUnicodeStr, out_length >> 1) PyMem_Free(oUnicodeStr) return rval PyMem_Free(oUnicodeStr) CheckError(err) CheckError(err2)