""" babel.messages.mofile ~~~~~~~~~~~~~~~~~~~~~ Writing of files in the ``gettext`` MO (machine object) format. :copyright: (c) 2013-2023 by the Babel Team. :license: BSD, see LICENSE for more details. """ from __future__ import annotations import array import struct from typing import TYPE_CHECKING from babel.messages.catalog import Catalog, Message if TYPE_CHECKING: from _typeshed import SupportsRead, SupportsWrite LE_MAGIC: int = 0x950412de BE_MAGIC: int = 0xde120495 def read_mo(fileobj: SupportsRead[bytes]) -> Catalog: """Read a binary MO file from the given file-like object and return a corresponding `Catalog` object. :param fileobj: the file-like object to read the MO file from :note: The implementation of this function is heavily based on the ``GNUTranslations._parse`` method of the ``gettext`` module in the standard library. """ catalog = Catalog() headers = {} filename = getattr(fileobj, 'name', '') buf = fileobj.read() buflen = len(buf) unpack = struct.unpack # Parse the .mo file header, which consists of 5 little endian 32 # bit words. magic = unpack('4I', buf[4:20]) ii = '>II' else: raise OSError(0, 'Bad magic number', filename) # Now put all messages from the .mo file buffer into the catalog # dictionary for _i in range(msgcount): mlen, moff = unpack(ii, buf[origidx:origidx + 8]) mend = moff + mlen tlen, toff = unpack(ii, buf[transidx:transidx + 8]) tend = toff + tlen if mend < buflen and tend < buflen: msg = buf[moff:mend] tmsg = buf[toff:tend] else: raise OSError(0, 'File is corrupt', filename) # See if we're looking at GNU .mo conventions for metadata if mlen == 0: # Catalog description lastkey = key = None for item in tmsg.splitlines(): item = item.strip() if not item: continue if b':' in item: key, value = item.split(b':', 1) lastkey = key = key.strip().lower() headers[key] = value.strip() elif lastkey: headers[lastkey] += b'\n' + item if b'\x04' in msg: # context ctxt, msg = msg.split(b'\x04') else: ctxt = None if b'\x00' in msg: # plural forms msg = msg.split(b'\x00') tmsg = tmsg.split(b'\x00') if catalog.charset: msg = [x.decode(catalog.charset) for x in msg] tmsg = [x.decode(catalog.charset) for x in tmsg] else: if catalog.charset: msg = msg.decode(catalog.charset) tmsg = tmsg.decode(catalog.charset) catalog[msg] = Message(msg, tmsg, context=ctxt) # advance to next entry in the seek tables origidx += 8 transidx += 8 catalog.mime_headers = headers.items() return catalog def write_mo(fileobj: SupportsWrite[bytes], catalog: Catalog, use_fuzzy: bool = False) -> None: """Write a catalog to the specified file-like object using the GNU MO file format. >>> import sys >>> from babel.messages import Catalog >>> from gettext import GNUTranslations >>> from io import BytesIO >>> catalog = Catalog(locale='en_US') >>> catalog.add('foo', 'Voh') >>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz')) >>> catalog.add('fuz', 'Futz', flags=['fuzzy']) >>> catalog.add('Fizz', '') >>> catalog.add(('Fuzz', 'Fuzzes'), ('', '')) >>> buf = BytesIO() >>> write_mo(buf, catalog) >>> x = buf.seek(0) >>> translations = GNUTranslations(fp=buf) >>> if sys.version_info[0] >= 3: ... translations.ugettext = translations.gettext ... translations.ungettext = translations.ngettext >>> translations.ugettext('foo') u'Voh' >>> translations.ungettext('bar', 'baz', 1) u'Bahr' >>> translations.ungettext('bar', 'baz', 2) u'Batz' >>> translations.ugettext('fuz') u'fuz' >>> translations.ugettext('Fizz') u'Fizz' >>> translations.ugettext('Fuzz') u'Fuzz' >>> translations.ugettext('Fuzzes') u'Fuzzes' :param fileobj: the file-like object to write to :param catalog: the `Catalog` instance :param use_fuzzy: whether translations marked as "fuzzy" should be included in the output """ messages = list(catalog) messages[1:] = [m for m in messages[1:] if m.string and (use_fuzzy or not m.fuzzy)] messages.sort() ids = strs = b'' offsets = [] for message in messages: # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. if message.pluralizable: msgid = b'\x00'.join([ msgid.encode(catalog.charset) for msgid in message.id ]) msgstrs = [] for idx, string in enumerate(message.string): if not string: msgstrs.append(message.id[min(int(idx), 1)]) else: msgstrs.append(string) msgstr = b'\x00'.join([ msgstr.encode(catalog.charset) for msgstr in msgstrs ]) else: msgid = message.id.encode(catalog.charset) msgstr = message.string.encode(catalog.charset) if message.context: msgid = b'\x04'.join([message.context.encode(catalog.charset), msgid]) offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) ids += msgid + b'\x00' strs += msgstr + b'\x00' # The header is 7 32-bit unsigned integers. We don't use hash tables, so # the keys start right after the index tables. keystart = 7 * 4 + 16 * len(messages) valuestart = keystart + len(ids) # The string table first has the list of keys, then the list of values. # Each entry has first the size of the string, then the file offset. koffsets = [] voffsets = [] for o1, l1, o2, l2 in offsets: koffsets += [l1, o1 + keystart] voffsets += [l2, o2 + valuestart] offsets = koffsets + voffsets fileobj.write(struct.pack('Iiiiiii', LE_MAGIC, # magic 0, # version len(messages), # number of entries 7 * 4, # start of key index 7 * 4 + len(messages) * 8, # start of value index 0, 0 # size and offset of hash table ) + array.array.tobytes(array.array("i", offsets)) + ids + strs)