# :Id: $Id: latex2mathml.py 9338 2023-04-08 21:08:47Z milde $ # :Copyright: © 2005 Jens Jørgen Mortensen [1]_ # © 2010, 2021 Günter Milde. # # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. # This file is offered as-is, without any warranty. # # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause # # .. [1] the original `rst2mathml.py` in `sandbox/jensj/latex_math` """Convert LaTex maths code into presentational MathML. This module is provisional: the API is not settled and may change with any minor Docutils version. """ # Usage: # # >>> from latex2mathml import * import re import unicodedata from docutils.utils.math import tex2unichar, toplevel_code # Character data # -------------- # LaTeX math macro to Unicode mappings. # Character categories. # identifiers -> letters = tex2unichar.mathalpha letters['hbar'] = '\u210F' # compatibility mapping to ℏ (\hslash). # (ħ LATIN SMALL LETTER H WITH STROKE is upright) # special case: Capital Greek letters: (upright in TeX style) greek_capitals = { 'Phi': '\u03a6', 'Xi': '\u039e', 'Sigma': '\u03a3', 'Psi': '\u03a8', 'Delta': '\u0394', 'Theta': '\u0398', 'Upsilon': '\u03d2', 'Pi': '\u03a0', 'Omega': '\u03a9', 'Gamma': '\u0393', 'Lambda': '\u039b'} # functions -> functions = { # functions with a space in the name 'liminf': 'lim\u202finf', 'limsup': 'lim\u202fsup', 'injlim': 'inj\u202flim', 'projlim': 'proj\u202flim', # embellished function names (see handle_cmd() below) 'varlimsup': 'lim', 'varliminf': 'lim', 'varprojlim': 'lim', 'varinjlim': 'lim', # custom function name 'operatorname': None, } functions.update((name, name) for name in ('arccos', 'arcsin', 'arctan', 'arg', 'cos', 'cosh', 'cot', 'coth', 'csc', 'deg', 'det', 'dim', 'exp', 'gcd', 'hom', 'ker', 'lg', 'ln', 'log', 'Pr', 'sec', 'sin', 'sinh', 'tan', 'tanh')) # Function with limits: 'lim', 'sup', 'inf', 'max', 'min': # use to allow "movablelimits" attribute (see below). # modulo operator/arithmetic modulo_functions = { # cmdname: (binary, named, parentheses, padding) 'bmod': (True, True, False, '0.278em'), # a mod n 'pmod': (False, True, True, '0.444em'), # a (mod n) 'mod': (False, True, False, '0.667em'), # a mod n 'pod': (False, False, True, '0.444em'), # a (n) } # math font selection -> or math_alphabets = { # 'cmdname': 'mathvariant value' # package 'boldsymbol': 'bold', 'mathbf': 'bold', 'mathit': 'italic', 'mathtt': 'monospace', 'mathrm': 'normal', 'mathsf': 'sans-serif', 'mathcal': 'script', 'mathbfit': 'bold-italic', # isomath 'mathbb': 'double-struck', # amssymb 'mathfrak': 'fraktur', # amssymb 'mathsfit': 'sans-serif-italic', # isomath 'mathsfbfit': 'sans-serif-bold-italic', # isomath 'mathscr': 'script', # mathrsfs # unsupported: bold-fraktur # bold-script # bold-sans-serif } # operator, fence, or separator -> stretchables = { # extensible delimiters allowed in left/right cmds 'backslash': '\\', 'uparrow': '\u2191', # ↑ UPWARDS ARROW 'downarrow': '\u2193', # ↓ DOWNWARDS ARROW 'updownarrow': '\u2195', # ↕ UP DOWN ARROW 'Uparrow': '\u21d1', # ⇑ UPWARDS DOUBLE ARROW 'Downarrow': '\u21d3', # ⇓ DOWNWARDS DOUBLE ARROW 'Updownarrow': '\u21d5', # ⇕ UP DOWN DOUBLE ARROW 'lmoustache': '\u23b0', # ⎰ … CURLY BRACKET SECTION 'rmoustache': '\u23b1', # ⎱ … LEFT CURLY BRACKET SECTION 'arrowvert': '\u23d0', # ⏐ VERTICAL LINE EXTENSION 'bracevert': '\u23aa', # ⎪ CURLY BRACKET EXTENSION 'lvert': '|', # left | 'lVert': '\u2016', # left ‖ 'rvert': '|', # right | 'rVert': '\u2016', # right ‖ 'Arrowvert': '\u2016', # ‖ } stretchables.update(tex2unichar.mathfence) stretchables.update(tex2unichar.mathopen) # Braces stretchables.update(tex2unichar.mathclose) # Braces # >>> print(' '.join(sorted(set(stretchables.values())))) # [ \ ] { | } ‖ ↑ ↓ ↕ ⇑ ⇓ ⇕ ⌈ ⌉ ⌊ ⌋ ⌜ ⌝ ⌞ ⌟ ⎪ ⎰ ⎱ ⏐ ⟅ ⟆ ⟦ ⟧ ⟨ ⟩ ⟮ ⟯ ⦇ ⦈ operators = { # negated symbols without pre-composed Unicode character 'nleqq': '\u2266\u0338', # ≦̸ 'ngeqq': '\u2267\u0338', # ≧̸ 'nleqslant': '\u2a7d\u0338', # ⩽̸ 'ngeqslant': '\u2a7e\u0338', # ⩾̸ 'ngtrless': '\u2277\u0338', # txfonts 'nlessgtr': '\u2276\u0338', # txfonts 'nsubseteqq': '\u2AC5\u0338', # ⫅̸ 'nsupseteqq': '\u2AC6\u0338', # ⫆̸ # compatibility definitions: 'centerdot': '\u2B1D', # BLACK VERY SMALL SQUARE | mathbin 'varnothing': '\u2300', # ⌀ DIAMETER SIGN | empty set 'varpropto': '\u221d', # ∝ PROPORTIONAL TO | sans serif 'triangle': '\u25B3', # WHITE UP-POINTING TRIANGLE | mathord 'triangledown': '\u25BD', # WHITE DOWN-POINTING TRIANGLE | mathord # alias commands: 'dotsb': '\u22ef', # ⋯ with binary operators/relations 'dotsc': '\u2026', # … with commas 'dotsi': '\u22ef', # ⋯ with integrals 'dotsm': '\u22ef', # ⋯ multiplication dots 'dotso': '\u2026', # … other dots # functions with movable limits (requires ) 'lim': 'lim', 'sup': 'sup', 'inf': 'inf', 'max': 'max', 'min': 'min', } operators.update(tex2unichar.mathbin) # Binary symbols operators.update(tex2unichar.mathrel) # Relation symbols, arrow symbols operators.update(tex2unichar.mathord) # Miscellaneous symbols operators.update(tex2unichar.mathpunct) # Punctuation operators.update(tex2unichar.mathop) # Variable-sized symbols operators.update(stretchables) # special cases thick_operators = { # style='font-weight: bold;' 'thicksim': '\u223C', # ∼ 'thickapprox': '\u2248', # ≈ } small_operators = { # mathsize='75%' 'shortmid': '\u2223', # ∣ 'shortparallel': '\u2225', # ∥ 'nshortmid': '\u2224', # ∤ 'nshortparallel': '\u2226', # ∦ 'smallfrown': '\u2322', # ⌢ FROWN 'smallsmile': '\u2323', # ⌣ SMILE 'smallint': '\u222b', # ∫ INTEGRAL } # Operators and functions with limits above/below in display formulas # and in index position inline (movablelimits=True) movablelimits = ('bigcap', 'bigcup', 'bigodot', 'bigoplus', 'bigotimes', 'bigsqcup', 'biguplus', 'bigvee', 'bigwedge', 'coprod', 'intop', 'ointop', 'prod', 'sum', 'lim', 'max', 'min', 'sup', 'inf') # Depending on settings, integrals may also be in this category. # (e.g. if "amsmath" is loaded with option "intlimits", see # http://mirror.ctan.org/macros/latex/required/amsmath/amsldoc.pdf) # movablelimits.extend(('fint', 'iiiint', 'iiint', 'iint', 'int', 'oiint', # 'oint', 'ointctrclockwise', 'sqint', # 'varointclockwise',)) # horizontal space -> spaces = {'qquad': '2em', # two \quad 'quad': '1em', # 18 mu 'thickspace': '0.2778em', # 5mu = 5/18em ';': '0.2778em', # 5mu thickspace ' ': '0.25em', # inter word space 'medspace': '0.2222em', # 4mu = 2/9em ':': '0.2222em', # 4mu medspace 'thinspace': '0.1667em', # 3mu = 1/6em ',': '0.1667em', # 3mu thinspace 'negthinspace': '-0.1667em', # -3mu = -1/6em '!': '-0.1667em', # negthinspace 'negmedspace': '-0.2222em', # -4mu = -2/9em 'negthickspace': '-0.2778em', # -5mu = -5/18em } # accents -> accents = { # TeX: (spacing, combining) 'acute': ('´', '\u0301'), 'bar': ('ˉ', '\u0304'), 'breve': ('˘', '\u0306'), 'check': ('ˇ', '\u030C'), 'dot': ('˙', '\u0307'), 'ddot': ('¨', '\u0308'), 'dddot': ('⋯', '\u20DB'), 'grave': ('`', '\u0300'), 'hat': ('ˆ', '\u0302'), 'mathring': ('˚', '\u030A'), 'tilde': ('˜', '\u0303'), # tilde ~ or small tilde ˜? 'vec': ('→', '\u20d7'), # → too heavy, accents="false" # TODO: ddddot } # limits etc. -> or over = { # TeX: (char, offset-correction/em) 'overbrace': ('\u23DE', -0.2), # DejaVu Math -0.6 'overleftarrow': ('\u2190', -0.2), 'overleftrightarrow': ('\u2194', -0.2), 'overline': ('_', -0.2), # \u2012 does not stretch 'overrightarrow': ('\u2192', -0.2), 'widehat': ('^', -0.5), 'widetilde': ('~', -0.3), } under = {'underbrace': ('\u23DF', 0.1), # DejaVu Math -0.7 'underleftarrow': ('\u2190', -0.2), 'underleftrightarrow': ('\u2194', -0.2), 'underline': ('_', -0.8), 'underrightarrow': ('\u2192', -0.2), } # Character translations # ---------------------- # characters with preferred alternative in mathematical use # cf. https://www.w3.org/TR/MathML3/chapter7.html#chars.anomalous anomalous_chars = {'-': '\u2212', # HYPHEN-MINUS -> MINUS SIGN ':': '\u2236', # COLON -> RATIO '~': '\u00a0', # NO-BREAK SPACE } # blackboard bold (Greek characters not working with "mathvariant" (Firefox 78) mathbb = {'Γ': '\u213E', # ℾ 'Π': '\u213F', # ℿ 'Σ': '\u2140', # ⅀ 'γ': '\u213D', # ℽ 'π': '\u213C', # ℼ } # Matrix environments matrices = { # name: fences 'matrix': ('', ''), 'smallmatrix': ('', ''), # smaller, see begin_environment()! 'pmatrix': ('(', ')'), 'bmatrix': ('[', ']'), 'Bmatrix': ('{', '}'), 'vmatrix': ('|', '|'), 'Vmatrix': ('\u2016', '\u2016'), # ‖ 'cases': ('{', ''), } layout_styles = { 'displaystyle': {'displaystyle': True, 'scriptlevel': 0}, 'textstyle': {'displaystyle': False, 'scriptlevel': 0}, 'scriptstyle': {'displaystyle': False, 'scriptlevel': 1}, 'scriptscriptstyle': {'displaystyle': False, 'scriptlevel': 2}, } # See also https://www.w3.org/TR/MathML3/chapter3.html#presm.scriptlevel fractions = { # name: style_attrs, frac_attrs 'frac': ({}, {}), 'cfrac': ({'displaystyle': True, 'scriptlevel': 0, 'CLASS': 'cfrac'}, {}), # in LaTeX with padding 'dfrac': (layout_styles['displaystyle'], {}), 'tfrac': (layout_styles['textstyle'], {}), 'binom': ({}, {'linethickness': 0}), 'dbinom': (layout_styles['displaystyle'], {'linethickness': 0}), 'tbinom': (layout_styles['textstyle'], {'linethickness': 0}), } delimiter_sizes = ['', '1.2em', '1.623em', '2.047em', '2.470em'] bigdelimiters = {'left': 0, 'right': 0, 'bigl': 1, 'bigr': 1, 'Bigl': 2, 'Bigr': 2, 'biggl': 3, 'biggr': 3, 'Biggl': 4, 'Biggr': 4, } # MathML element classes # ---------------------- class math: """Base class for MathML elements and root of MathML trees.""" nchildren = None """Expected number of children or None""" # cf. https://www.w3.org/TR/MathML3/chapter3.html#id.3.1.3.2 parent = None """Parent node in MathML DOM tree.""" _level = 0 # indentation level (static class variable) xml_entities = { # for invalid and invisible characters ord('<'): '<', ord('>'): '>', ord('&'): '&', 0x2061: '⁡', } _boolstrings = {True: 'true', False: 'false'} """String representation of boolean MathML attribute values.""" html_tagname = 'span' """Tag name for HTML representation.""" def __init__(self, *children, **attributes): """Set up node with `children` and `attributes`. Attributes are downcased: Use CLASS to set "class" value. >>> math(mn(3), CLASS='test') math(mn(3), class='test') >>> math(CLASS='test').toprettyxml() '

\n

' """ self.children = [] self.extend(children) self.attributes = {} for key in attributes.keys(): # Use .lower() to allow argument `CLASS` for attribute `class` # (Python keyword). MathML uses only lowercase attributes. self.attributes[key.lower()] = attributes[key] def __repr__(self): content = [repr(item) for item in getattr(self, 'children', [])] if hasattr(self, 'data'): content.append(repr(self.data)) if isinstance(self, MathSchema) and self.switch: content.append('switch=True') content += ["%s=%r"%(k, v) for k, v in self.attributes.items() if v is not None] return self.__class__.__name__ + '(%s)' % ', '.join(content) def __len__(self): return len(self.children) # emulate dictionary-like access to attributes # see `docutils.nodes.Element` for dict/list interface def __getitem__(self, key): return self.attributes[key] def __setitem__(self, key, item): self.attributes[key] = item def get(self, *args, **kwargs): return self.attributes.get(*args, **kwargs) def full(self): """Return boolean indicating whether children may be appended.""" return (self.nchildren is not None and len(self) >= self.nchildren) def append(self, child): """Append child and return self or first non-full parent. If self is full, go up the tree and return first non-full node or `None`. """ if self.full(): raise SyntaxError('Node %s already full!' % self) self.children.append(child) child.parent = self if self.full(): return self.close() return self def extend(self, children): for child in children: self.append(child) return self def close(self): """Close element and return first non-full parent or None.""" parent = self.parent while parent is not None and parent.full(): parent = parent.parent return parent def toprettyxml(self): """Return XML representation of self as string.""" return ''.join(self._xml()) def _xml(self, level=0): return ([self.xml_starttag()] + self._xml_body(level) + ['' % self.__class__.__name__]) def xml_starttag(self): attrs = ('%s="%s"' % (k, str(v).replace('True', 'true').replace('False', 'false')) for k, v in self.attributes.items() if v is not None) return '<%s>' % ' '.join((self.__class__.__name__, *attrs)) def _xml_body(self, level=0): xml = [] for child in self.children: xml.extend(['\n', ' ' * (level+1)]) xml.extend(child._xml(level+1)) xml.extend(['\n', ' ' * level]) return xml def is_block(self): """Return true, if `self` or a parent has ``display='block'``.""" try: return self['display'] == 'block' except KeyError: try: return self.parent.is_block() except AttributeError: return False # >>> n2 = math(mn(2)) # >>> n2 # math(mn(2)) # >>> n2.toprettyxml() # '

\n 2 \n

' # >>> len(n2) # 1 # >>> eq3 = math(id='eq3', display='block') # >>> eq3 # math(id='eq3', display='block') # >>> eq3.toprettyxml() # '

\n

' # >>> len(eq3) # 0 # >>> math(CLASS='bold').xml_starttag() # '

'
# >>> n2.is_block()
# False
# >>> node = n2.append(mrow())
# >>> node.is_block()
# False
# >>> eq3.is_block()
# True
# >>> node = eq3.append(mrow())
# >>> node.is_block()
# True


class mtable(math): pass


# >>> mt = mtable(displaystyle=True)
# >>> mt
# mtable(displaystyle=True)
# >>> math(mt).toprettyxml()
# ' \n \begin{matrix} \n \end{matrix} \n'

class mrow(math):
    """Group sub-expressions as a horizontal row."""

    def close(self):
        """Close element and return first non-full parent or None.

        Remove, if it is single child and the parent infers an mrow
        or if it has only one child element.
        """
        parent = self.parent
        if isinstance(parent, MathRowSchema) and parent.nchildren == 1:
            parent.nchildren = len(parent.children)
            parent.children = self.children
            for child in self.children:
                child.parent = parent
            return parent.close()
        if len(self) == 1:
            try:
                parent.children[parent.children.index(self)] = self.children[0]
                self.children[0].parent = parent
            except (AttributeError, ValueError):
                return self.children[0]
        return super().close()

# >>> mrow(displaystyle=False)
# mrow(displaystyle=False)


# The elements \sqrt{,,,,}