# Copyright © 2018–2019 Io Mintz # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the “Software”), # to deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # This file primarily consists of code vendored from the CPython standard library. # It is used under the Python Software Foundation License Version 2. # See LICENSE for details. import collections import io import re import string import tokenize as tokenize_ from token import * # TODO only import what we need vars().update({k: v for k, v in vars(tokenize_).items() if not k.startswith('_')}) from .constants import * tokenize_.TokenInfo.value = property(lambda self: self.string) is_import = lambda token: token.type == tokenize_.ERRORTOKEN and token.string == IMPORT_OP NEWLINES = {NEWLINE, tokenize_.NL} def fix_syntax(s: str, filename=DEFAULT_FILENAME) -> bytes: try: tokens = list(tokenize(s)) except tokenize_.TokenError as ex: message, (lineno, offset) = ex.args try: source_line = s.splitlines()[lineno-2] except IndexError: source_line = None raise SyntaxError(message, (filename, lineno-1, offset, source_line)) from None return Untokenizer().untokenize(tokens) # modified from Lib/tokenize.py at 3.6 class Untokenizer: def __init__(self): self.tokens = collections.deque() self.indents = collections.deque() self.prev_row = 1 self.prev_col = 0 self.startline = False self.encoding = None def add_whitespace(self, start): row, col = start if row < self.prev_row or row == self.prev_row and col < self.prev_col: raise ValueError( "start ({},{}) precedes previous end ({},{})".format(row, col, self.prev_row, self.prev_col)) col_offset = col - self.prev_col self.tokens.append(" " * col_offset) def untokenize(self, iterable): indents = [] startline = False for token in iterable: if token.type == tokenize_.ENCODING: self.encoding = token.value continue if token.type == tokenize_.ENDMARKER: break # XXX this abomination comes from tokenize.py # i tried to move it to a separate method but failed if token.type == tokenize_.INDENT: indents.append(token.value) continue elif token.type == tokenize_.DEDENT: indents.pop() self.prev_row, self.prev_col = token.end continue elif token.type in NEWLINES: startline = True elif startline and indents: indent = indents[-1] start_row, start_col = token.start if start_col >= len(indent): self.tokens.append(indent) self.prev_col = len(indent) startline = False # end abomination self.add_whitespace(token.start) if is_import(token): self.tokens.append(MARKER) else: self.tokens.append(token.value) self.prev_row, self.prev_col = token.end # don't ask me why this shouldn't be "in NEWLINES", # but ignoring tokenize_.NL here fixes #3 if token.type == NEWLINE: self.prev_row += 1 self.prev_col = 0 return "".join(self.tokens) def tokenize(string): return tokenize_.tokenize(io.BytesIO(string.encode('utf-8')).readline)