# $Id: misc.py 9358 2023-04-19 23:31:13Z milde $ # Authors: David Goodger ; Dethe Elza # Copyright: This module has been placed in the public domain. """Miscellaneous directives.""" __docformat__ = 'reStructuredText' from pathlib import Path import re import time from urllib.request import urlopen from urllib.error import URLError from docutils import io, nodes, statemachine, utils from docutils.parsers.rst import Directive, convert_directive_function from docutils.parsers.rst import directives, roles, states from docutils.parsers.rst.directives.body import CodeBlock, NumberLines from docutils.transforms import misc class Include(Directive): """ Include content read from a separate source file. Content may be parsed by the parser, or included as a literal block. The encoding of the included file can be specified. Only a part of the given file argument may be included by specifying start and end line or text to match before and/or after the text to be used. https://docutils.sourceforge.io/docs/ref/rst/directives.html#including-an-external-document-fragment """ required_arguments = 1 optional_arguments = 0 final_argument_whitespace = True option_spec = {'literal': directives.flag, 'code': directives.unchanged, 'encoding': directives.encoding, 'parser': directives.parser_name, 'tab-width': int, 'start-line': int, 'end-line': int, 'start-after': directives.unchanged_required, 'end-before': directives.unchanged_required, # ignored except for 'literal' or 'code': 'number-lines': directives.unchanged, # integer or None 'class': directives.class_option, 'name': directives.unchanged} standard_include_path = Path(states.__file__).parent / 'include' def run(self): """Include a file as part of the content of this reST file. Depending on the options, the file (or a clipping) is converted to nodes and returned or inserted into the input stream. """ if not self.state.document.settings.file_insertion_enabled: raise self.warning('"%s" directive disabled.' % self.name) current_source = self.state.document.current_source path = directives.path(self.arguments[0]) if path.startswith('<') and path.endswith('>'): _base = self.standard_include_path path = path[1:-1] else: _base = Path(current_source).parent path = utils.relative_path(None, _base/path) encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) e_handler = self.state.document.settings.input_encoding_error_handler tab_width = self.options.get( 'tab-width', self.state.document.settings.tab_width) try: include_file = io.FileInput(source_path=path, encoding=encoding, error_handler=e_handler) except UnicodeEncodeError: raise self.severe(f'Problems with "{self.name}" directive path:\n' f'Cannot encode input file path "{path}" ' '(wrong locale?).') except OSError as error: raise self.severe(f'Problems with "{self.name}" directive ' f'path:\n{io.error_string(error)}.') else: self.state.document.settings.record_dependencies.add(path) # Get to-be-included content startline = self.options.get('start-line', None) endline = self.options.get('end-line', None) try: if startline or (endline is not None): lines = include_file.readlines() rawtext = ''.join(lines[startline:endline]) else: rawtext = include_file.read() except UnicodeError as error: raise self.severe(f'Problem with "{self.name}" directive:\n' + io.error_string(error)) # start-after/end-before: no restrictions on newlines in match-text, # and no restrictions on matching inside lines vs. line boundaries after_text = self.options.get('start-after', None) if after_text: # skip content in rawtext before *and incl.* a matching text after_index = rawtext.find(after_text) if after_index < 0: raise self.severe('Problem with "start-after" option of "%s" ' 'directive:\nText not found.' % self.name) rawtext = rawtext[after_index + len(after_text):] before_text = self.options.get('end-before', None) if before_text: # skip content in rawtext after *and incl.* a matching text before_index = rawtext.find(before_text) if before_index < 0: raise self.severe('Problem with "end-before" option of "%s" ' 'directive:\nText not found.' % self.name) rawtext = rawtext[:before_index] include_lines = statemachine.string2lines(rawtext, tab_width, convert_whitespace=True) for i, line in enumerate(include_lines): if len(line) > self.state.document.settings.line_length_limit: raise self.warning('"%s": line %d exceeds the' ' line-length-limit.' % (path, i+1)) if 'literal' in self.options: # Don't convert tabs to spaces, if `tab_width` is negative. if tab_width >= 0: text = rawtext.expandtabs(tab_width) else: text = rawtext literal_block = nodes.literal_block( rawtext, source=path, classes=self.options.get('class', [])) literal_block.line = 1 self.add_name(literal_block) if 'number-lines' in self.options: try: startline = int(self.options['number-lines'] or 1) except ValueError: raise self.error(':number-lines: with non-integer ' 'start value') endline = startline + len(include_lines) if text.endswith('\n'): text = text[:-1] tokens = NumberLines([([], text)], startline, endline) for classes, value in tokens: if classes: literal_block += nodes.inline(value, value, classes=classes) else: literal_block += nodes.Text(value) else: literal_block += nodes.Text(text) return [literal_block] if 'code' in self.options: self.options['source'] = path # Don't convert tabs to spaces, if `tab_width` is negative: if tab_width < 0: include_lines = rawtext.splitlines() codeblock = CodeBlock(self.name, [self.options.pop('code')], # arguments self.options, include_lines, # content self.lineno, self.content_offset, self.block_text, self.state, self.state_machine) return codeblock.run() # Prevent circular inclusion: clip_options = (startline, endline, before_text, after_text) include_log = self.state.document.include_log # log entries are tuples (, ) if not include_log: # new document, initialize with document source include_log.append((utils.relative_path(None, current_source), (None, None, None, None))) if (path, clip_options) in include_log: master_paths = (pth for (pth, opt) in reversed(include_log)) inclusion_chain = '\n> '.join((path, *master_paths)) raise self.warning('circular inclusion in "%s" directive:\n%s' % (self.name, inclusion_chain)) if 'parser' in self.options: # parse into a dummy document and return created nodes document = utils.new_document(path, self.state.document.settings) document.include_log = include_log + [(path, clip_options)] parser = self.options['parser']() parser.parse('\n'.join(include_lines), document) # clean up doctree and complete parsing document.transformer.populate_from_components((parser,)) document.transformer.apply_transforms() return document.children # Include as rST source: # # mark end (cf. parsers.rst.states.Body.comment()) include_lines += ['', '.. end of inclusion from "%s"' % path] self.state_machine.insert_input(include_lines, path) # update include-log include_log.append((path, clip_options)) return [] class Raw(Directive): """ Pass through content unchanged Content is included in output based on type argument Content may be included inline (content section of directive) or imported from a file or url. """ required_arguments = 1 optional_arguments = 0 final_argument_whitespace = True option_spec = {'file': directives.path, 'url': directives.uri, 'encoding': directives.encoding, 'class': directives.class_option} has_content = True def run(self): if (not self.state.document.settings.raw_enabled or (not self.state.document.settings.file_insertion_enabled and ('file' in self.options or 'url' in self.options))): raise self.warning('"%s" directive disabled.' % self.name) attributes = {'format': ' '.join(self.arguments[0].lower().split())} encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) e_handler = self.state.document.settings.input_encoding_error_handler if self.content: if 'file' in self.options or 'url' in self.options: raise self.error( '"%s" directive may not both specify an external file ' 'and have content.' % self.name) text = '\n'.join(self.content) elif 'file' in self.options: if 'url' in self.options: raise self.error( 'The "file" and "url" options may not be simultaneously ' 'specified for the "%s" directive.' % self.name) path = self.options['file'] _base = Path(self.state.document.current_source).parent path = utils.relative_path(None, _base/path) try: raw_file = io.FileInput(source_path=path, encoding=encoding, error_handler=e_handler) except OSError as error: raise self.severe(f'Problems with "{self.name}" directive ' f'path:\n{io.error_string(error)}.') else: # TODO: currently, raw input files are recorded as # dependencies even if not used for the chosen output format. self.state.document.settings.record_dependencies.add(path) try: text = raw_file.read() except UnicodeError as error: raise self.severe(f'Problem with "{self.name}" directive:\n' + io.error_string(error)) attributes['source'] = path elif 'url' in self.options: source = self.options['url'] try: raw_text = urlopen(source).read() except (URLError, OSError) as error: raise self.severe(f'Problems with "{self.name}" directive URL ' f'"{self.options["url"]}":\n' f'{io.error_string(error)}.') raw_file = io.StringInput(source=raw_text, source_path=source, encoding=encoding, error_handler=e_handler) try: text = raw_file.read() except UnicodeError as error: raise self.severe(f'Problem with "{self.name}" directive:\n' + io.error_string(error)) attributes['source'] = source else: # This will always fail because there is no content. self.assert_has_content() raw_node = nodes.raw('', text, classes=self.options.get('class', []), **attributes) (raw_node.source, raw_node.line) = self.state_machine.get_source_and_line(self.lineno) return [raw_node] class Replace(Directive): has_content = True def run(self): if not isinstance(self.state, states.SubstitutionDef): raise self.error( 'Invalid context: the "%s" directive can only be used within ' 'a substitution definition.' % self.name) self.assert_has_content() text = '\n'.join(self.content) element = nodes.Element(text) self.state.nested_parse(self.content, self.content_offset, element) # element might contain [paragraph] + system_message(s) node = None messages = [] for elem in element: if not node and isinstance(elem, nodes.paragraph): node = elem elif isinstance(elem, nodes.system_message): elem['backrefs'] = [] messages.append(elem) else: return [ self.reporter.error( f'Error in "{self.name}" directive: may contain ' 'a single paragraph only.', line=self.lineno)] if node: return messages + node.children return messages class Unicode(Directive): r""" Convert Unicode character codes (numbers) to characters. Codes may be decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``, ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character entities (e.g. ``☮``). Text following ".." is a comment and is ignored. Spaces are ignored, and any other text remains as-is. """ required_arguments = 1 optional_arguments = 0 final_argument_whitespace = True option_spec = {'trim': directives.flag, 'ltrim': directives.flag, 'rtrim': directives.flag} comment_pattern = re.compile(r'( |\n|^)\.\. ') def run(self): if not isinstance(self.state, states.SubstitutionDef): raise self.error( 'Invalid context: the "%s" directive can only be used within ' 'a substitution definition.' % self.name) substitution_definition = self.state_machine.node if 'trim' in self.options: substitution_definition.attributes['ltrim'] = 1 substitution_definition.attributes['rtrim'] = 1 if 'ltrim' in self.options: substitution_definition.attributes['ltrim'] = 1 if 'rtrim' in self.options: substitution_definition.attributes['rtrim'] = 1 codes = self.comment_pattern.split(self.arguments[0])[0].split() element = nodes.Element() for code in codes: try: decoded = directives.unicode_code(code) except ValueError as error: raise self.error('Invalid character code: %s\n%s' % (code, io.error_string(error))) element += nodes.Text(decoded) return element.children class Class(Directive): """ Set a "class" attribute on the directive content or the next element. When applied to the next element, a "pending" element is inserted, and a transform does the work later. """ required_arguments = 1 optional_arguments = 0 final_argument_whitespace = True has_content = True def run(self): try: class_value = directives.class_option(self.arguments[0]) except ValueError: raise self.error( 'Invalid class attribute value for "%s" directive: "%s".' % (self.name, self.arguments[0])) node_list = [] if self.content: container = nodes.Element() self.state.nested_parse(self.content, self.content_offset, container) for node in container: node['classes'].extend(class_value) node_list.extend(container.children) else: pending = nodes.pending( misc.ClassAttribute, {'class': class_value, 'directive': self.name}, self.block_text) self.state_machine.document.note_pending(pending) node_list.append(pending) return node_list class Role(Directive): has_content = True argument_pattern = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$' % ((states.Inliner.simplename,) * 2)) def run(self): """Dynamically create and register a custom interpreted text role.""" if self.content_offset > self.lineno or not self.content: raise self.error('"%s" directive requires arguments on the first ' 'line.' % self.name) args = self.content[0] match = self.argument_pattern.match(args) if not match: raise self.error('"%s" directive arguments not valid role names: ' '"%s".' % (self.name, args)) new_role_name = match.group(1) base_role_name = match.group(3) messages = [] if base_role_name: base_role, messages = roles.role( base_role_name, self.state_machine.language, self.lineno, self.state.reporter) if base_role is None: error = self.state.reporter.error( 'Unknown interpreted text role "%s".' % base_role_name, nodes.literal_block(self.block_text, self.block_text), line=self.lineno) return messages + [error] else: base_role = roles.generic_custom_role assert not hasattr(base_role, 'arguments'), ( 'Supplemental directive arguments for "%s" directive not ' 'supported (specified by "%r" role).' % (self.name, base_role)) try: converted_role = convert_directive_function(base_role) (arguments, options, content, content_offset ) = self.state.parse_directive_block( self.content[1:], self.content_offset, converted_role, option_presets={}) except states.MarkupError as detail: error = self.reporter.error( 'Error in "%s" directive:\n%s.' % (self.name, detail), nodes.literal_block(self.block_text, self.block_text), line=self.lineno) return messages + [error] if 'class' not in options: try: options['class'] = directives.class_option(new_role_name) except ValueError as detail: error = self.reporter.error( 'Invalid argument for "%s" directive:\n%s.' % (self.name, detail), nodes.literal_block(self.block_text, self.block_text), line=self.lineno) return messages + [error] role = roles.CustomRole(new_role_name, base_role, options, content) roles.register_local_role(new_role_name, role) return messages class DefaultRole(Directive): """Set the default interpreted text role.""" optional_arguments = 1 final_argument_whitespace = False def run(self): if not self.arguments: if '' in roles._roles: # restore the "default" default role del roles._roles[''] return [] role_name = self.arguments[0] role, messages = roles.role(role_name, self.state_machine.language, self.lineno, self.state.reporter) if role is None: error = self.state.reporter.error( 'Unknown interpreted text role "%s".' % role_name, nodes.literal_block(self.block_text, self.block_text), line=self.lineno) return messages + [error] roles._roles[''] = role return messages class Title(Directive): required_arguments = 1 optional_arguments = 0 final_argument_whitespace = True def run(self): self.state_machine.document['title'] = self.arguments[0] return [] class MetaBody(states.SpecializedBody): def field_marker(self, match, context, next_state): """Meta element.""" node, blank_finish = self.parsemeta(match) self.parent += node return [], next_state, [] def parsemeta(self, match): name = self.parse_field_marker(match) name = nodes.unescape(utils.escape2null(name)) (indented, indent, line_offset, blank_finish ) = self.state_machine.get_first_known_indented(match.end()) node = nodes.meta() node['content'] = nodes.unescape(utils.escape2null( ' '.join(indented))) if not indented: line = self.state_machine.line msg = self.reporter.info( 'No content for meta tag "%s".' % name, nodes.literal_block(line, line)) return msg, blank_finish tokens = name.split() try: attname, val = utils.extract_name_value(tokens[0])[0] node[attname.lower()] = val except utils.NameValueError: node['name'] = tokens[0] for token in tokens[1:]: try: attname, val = utils.extract_name_value(token)[0] node[attname.lower()] = val except utils.NameValueError as detail: line = self.state_machine.line msg = self.reporter.error( 'Error parsing meta tag attribute "%s": %s.' % (token, detail), nodes.literal_block(line, line)) return msg, blank_finish return node, blank_finish class Meta(Directive): has_content = True SMkwargs = {'state_classes': (MetaBody,)} def run(self): self.assert_has_content() node = nodes.Element() new_line_offset, blank_finish = self.state.nested_list_parse( self.content, self.content_offset, node, initial_state='MetaBody', blank_finish=True, state_machine_kwargs=self.SMkwargs) if (new_line_offset - self.content_offset) != len(self.content): # incomplete parse of block? error = self.reporter.error( 'Invalid meta directive.', nodes.literal_block(self.block_text, self.block_text), line=self.lineno) node += error # insert at begin of document index = self.state.document.first_child_not_matching_class( (nodes.Titular, nodes.meta)) or 0 self.state.document[index:index] = node.children return [] class Date(Directive): has_content = True def run(self): if not isinstance(self.state, states.SubstitutionDef): raise self.error( 'Invalid context: the "%s" directive can only be used within ' 'a substitution definition.' % self.name) format_str = '\n'.join(self.content) or '%Y-%m-%d' # @@@ # Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable? # Pro: Docutils-generated documentation # can easily be part of `reproducible software builds`__ # # __ https://reproducible-builds.org/ # # Con: Changes the specs, hard to predict behaviour, # # See also the discussion about \date \time \year in TeX # http://tug.org/pipermail/tex-k/2016-May/002704.html # source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH') # if (source_date_epoch): # text = time.strftime(format_str, # time.gmtime(int(source_date_epoch))) # else: text = time.strftime(format_str) return [nodes.Text(text)] class TestDirective(Directive): """This directive is useful only for testing purposes.""" optional_arguments = 1 final_argument_whitespace = True option_spec = {'option': directives.unchanged_required} has_content = True def run(self): if self.content: text = '\n'.join(self.content) info = self.reporter.info( 'Directive processed. Type="%s", arguments=%r, options=%r, ' 'content:' % (self.name, self.arguments, self.options), nodes.literal_block(text, text), line=self.lineno) else: info = self.reporter.info( 'Directive processed. Type="%s", arguments=%r, options=%r, ' 'content: None' % (self.name, self.arguments, self.options), line=self.lineno) return [info]