tuxbot-bot/venv/lib/python3.7/site-packages/import_expression/_syntax.py

128 lines
3.9 KiB
Python
Raw Normal View History

2019-12-16 18:12:10 +01:00
# Copyright © 20182019 Io Mintz <io@mintz.cc>
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”),
# to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# This file primarily consists of code vendored from the CPython standard library.
# It is used under the Python Software Foundation License Version 2.
# See LICENSE for details.
import collections
import io
import re
import string
import tokenize as tokenize_
from token import *
# TODO only import what we need
vars().update({k: v for k, v in vars(tokenize_).items() if not k.startswith('_')})
from .constants import *
tokenize_.TokenInfo.value = property(lambda self: self.string)
is_import = lambda token: token.type == tokenize_.ERRORTOKEN and token.string == IMPORT_OP
NEWLINES = {NEWLINE, tokenize_.NL}
def fix_syntax(s: str, filename=DEFAULT_FILENAME) -> bytes:
try:
tokens = list(tokenize(s))
except tokenize_.TokenError as ex:
message, (lineno, offset) = ex.args
try:
source_line = s.splitlines()[lineno-2]
except IndexError:
source_line = None
raise SyntaxError(message, (filename, lineno-1, offset, source_line)) from None
return Untokenizer().untokenize(tokens)
# modified from Lib/tokenize.py at 3.6
class Untokenizer:
def __init__(self):
self.tokens = collections.deque()
self.indents = collections.deque()
self.prev_row = 1
self.prev_col = 0
self.startline = False
self.encoding = None
def add_whitespace(self, start):
row, col = start
if row < self.prev_row or row == self.prev_row and col < self.prev_col:
raise ValueError(
"start ({},{}) precedes previous end ({},{})".format(row, col, self.prev_row, self.prev_col))
col_offset = col - self.prev_col
self.tokens.append(" " * col_offset)
def untokenize(self, iterable):
indents = []
startline = False
for token in iterable:
if token.type == tokenize_.ENCODING:
self.encoding = token.value
continue
if token.type == tokenize_.ENDMARKER:
break
# XXX this abomination comes from tokenize.py
# i tried to move it to a separate method but failed
if token.type == tokenize_.INDENT:
indents.append(token.value)
continue
elif token.type == tokenize_.DEDENT:
indents.pop()
self.prev_row, self.prev_col = token.end
continue
elif token.type in NEWLINES:
startline = True
elif startline and indents:
indent = indents[-1]
start_row, start_col = token.start
if start_col >= len(indent):
self.tokens.append(indent)
self.prev_col = len(indent)
startline = False
# end abomination
self.add_whitespace(token.start)
if is_import(token):
self.tokens.append(MARKER)
else:
self.tokens.append(token.value)
self.prev_row, self.prev_col = token.end
# don't ask me why this shouldn't be "in NEWLINES",
# but ignoring tokenize_.NL here fixes #3
if token.type == NEWLINE:
self.prev_row += 1
self.prev_col = 0
return "".join(self.tokens)
def tokenize(string):
return tokenize_.tokenize(io.BytesIO(string.encode('utf-8')).readline)