128 lines
3.9 KiB
Python
128 lines
3.9 KiB
Python
|
# Copyright © 2018–2019 Io Mintz <io@mintz.cc>
|
|||
|
|
|||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|||
|
# of this software and associated documentation files (the “Software”),
|
|||
|
# to deal in the Software without restriction, including without limitation the
|
|||
|
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|||
|
# sell copies of the Software, and to permit persons to whom the Software is
|
|||
|
# furnished to do so, subject to the following conditions:
|
|||
|
|
|||
|
# The above copyright notice and this permission notice shall be included in
|
|||
|
# all copies or substantial portions of the Software.
|
|||
|
|
|||
|
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|||
|
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|||
|
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|||
|
# THE SOFTWARE.
|
|||
|
|
|||
|
# This file primarily consists of code vendored from the CPython standard library.
|
|||
|
# It is used under the Python Software Foundation License Version 2.
|
|||
|
# See LICENSE for details.
|
|||
|
|
|||
|
import collections
|
|||
|
import io
|
|||
|
import re
|
|||
|
import string
|
|||
|
import tokenize as tokenize_
|
|||
|
from token import *
|
|||
|
# TODO only import what we need
|
|||
|
vars().update({k: v for k, v in vars(tokenize_).items() if not k.startswith('_')})
|
|||
|
|
|||
|
from .constants import *
|
|||
|
|
|||
|
tokenize_.TokenInfo.value = property(lambda self: self.string)
|
|||
|
|
|||
|
is_import = lambda token: token.type == tokenize_.ERRORTOKEN and token.string == IMPORT_OP
|
|||
|
|
|||
|
NEWLINES = {NEWLINE, tokenize_.NL}
|
|||
|
|
|||
|
def fix_syntax(s: str, filename=DEFAULT_FILENAME) -> bytes:
|
|||
|
try:
|
|||
|
tokens = list(tokenize(s))
|
|||
|
except tokenize_.TokenError as ex:
|
|||
|
message, (lineno, offset) = ex.args
|
|||
|
|
|||
|
try:
|
|||
|
source_line = s.splitlines()[lineno-2]
|
|||
|
except IndexError:
|
|||
|
source_line = None
|
|||
|
|
|||
|
raise SyntaxError(message, (filename, lineno-1, offset, source_line)) from None
|
|||
|
|
|||
|
return Untokenizer().untokenize(tokens)
|
|||
|
|
|||
|
# modified from Lib/tokenize.py at 3.6
|
|||
|
class Untokenizer:
|
|||
|
def __init__(self):
|
|||
|
self.tokens = collections.deque()
|
|||
|
self.indents = collections.deque()
|
|||
|
self.prev_row = 1
|
|||
|
self.prev_col = 0
|
|||
|
self.startline = False
|
|||
|
self.encoding = None
|
|||
|
|
|||
|
def add_whitespace(self, start):
|
|||
|
row, col = start
|
|||
|
if row < self.prev_row or row == self.prev_row and col < self.prev_col:
|
|||
|
raise ValueError(
|
|||
|
"start ({},{}) precedes previous end ({},{})".format(row, col, self.prev_row, self.prev_col))
|
|||
|
|
|||
|
col_offset = col - self.prev_col
|
|||
|
self.tokens.append(" " * col_offset)
|
|||
|
|
|||
|
def untokenize(self, iterable):
|
|||
|
indents = []
|
|||
|
startline = False
|
|||
|
for token in iterable:
|
|||
|
if token.type == tokenize_.ENCODING:
|
|||
|
self.encoding = token.value
|
|||
|
continue
|
|||
|
|
|||
|
if token.type == tokenize_.ENDMARKER:
|
|||
|
break
|
|||
|
|
|||
|
# XXX this abomination comes from tokenize.py
|
|||
|
# i tried to move it to a separate method but failed
|
|||
|
|
|||
|
if token.type == tokenize_.INDENT:
|
|||
|
indents.append(token.value)
|
|||
|
continue
|
|||
|
elif token.type == tokenize_.DEDENT:
|
|||
|
indents.pop()
|
|||
|
self.prev_row, self.prev_col = token.end
|
|||
|
continue
|
|||
|
elif token.type in NEWLINES:
|
|||
|
startline = True
|
|||
|
elif startline and indents:
|
|||
|
indent = indents[-1]
|
|||
|
start_row, start_col = token.start
|
|||
|
if start_col >= len(indent):
|
|||
|
self.tokens.append(indent)
|
|||
|
self.prev_col = len(indent)
|
|||
|
startline = False
|
|||
|
|
|||
|
# end abomination
|
|||
|
|
|||
|
self.add_whitespace(token.start)
|
|||
|
|
|||
|
if is_import(token):
|
|||
|
self.tokens.append(MARKER)
|
|||
|
else:
|
|||
|
self.tokens.append(token.value)
|
|||
|
|
|||
|
self.prev_row, self.prev_col = token.end
|
|||
|
|
|||
|
# don't ask me why this shouldn't be "in NEWLINES",
|
|||
|
# but ignoring tokenize_.NL here fixes #3
|
|||
|
if token.type == NEWLINE:
|
|||
|
self.prev_row += 1
|
|||
|
self.prev_col = 0
|
|||
|
|
|||
|
return "".join(self.tokens)
|
|||
|
|
|||
|
def tokenize(string):
|
|||
|
return tokenize_.tokenize(io.BytesIO(string.encode('utf-8')).readline)
|