# Copyright © 2018–2019 Io Mintz <io@mintz.cc>

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”),
# to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

# This file primarily consists of code vendored from the CPython standard library.
# It is used under the Python Software Foundation License Version 2.
# See LICENSE for details.

import collections
import io
import re
import string
import tokenize as tokenize_
from token import *
# TODO only import what we need
vars().update({k: v for k, v in vars(tokenize_).items() if not k.startswith('_')})

from .constants import *

tokenize_.TokenInfo.value = property(lambda self: self.string)

is_import = lambda token: token.type == tokenize_.ERRORTOKEN and token.string == IMPORT_OP

NEWLINES = {NEWLINE, tokenize_.NL}

def fix_syntax(s: str, filename=DEFAULT_FILENAME) -> bytes:
	try:
		tokens = list(tokenize(s))
	except tokenize_.TokenError as ex:
		message, (lineno, offset) = ex.args

		try:
			source_line = s.splitlines()[lineno-2]
		except IndexError:
			source_line = None

		raise SyntaxError(message, (filename, lineno-1, offset, source_line)) from None

	return Untokenizer().untokenize(tokens)

# modified from Lib/tokenize.py at 3.6
class Untokenizer:
	def __init__(self):
		self.tokens = collections.deque()
		self.indents = collections.deque()
		self.prev_row = 1
		self.prev_col = 0
		self.startline = False
		self.encoding = None

	def add_whitespace(self, start):
		row, col = start
		if row < self.prev_row or row == self.prev_row and col < self.prev_col:
			raise ValueError(
				"start ({},{}) precedes previous end ({},{})".format(row, col, self.prev_row, self.prev_col))

		col_offset = col - self.prev_col
		self.tokens.append(" " * col_offset)

	def untokenize(self, iterable):
		indents = []
		startline = False
		for token in iterable:
			if token.type == tokenize_.ENCODING:
				self.encoding = token.value
				continue

			if token.type == tokenize_.ENDMARKER:
				break

			# XXX this abomination comes from tokenize.py
			# i tried to move it to a separate method but failed

			if token.type == tokenize_.INDENT:
				indents.append(token.value)
				continue
			elif token.type == tokenize_.DEDENT:
				indents.pop()
				self.prev_row, self.prev_col = token.end
				continue
			elif token.type in NEWLINES:
				startline = True
			elif startline and indents:
				indent = indents[-1]
				start_row, start_col = token.start
				if start_col >= len(indent):
					self.tokens.append(indent)
					self.prev_col = len(indent)
				startline = False

			# end abomination

			self.add_whitespace(token.start)

			if is_import(token):
				self.tokens.append(MARKER)
			else:
				self.tokens.append(token.value)

			self.prev_row, self.prev_col = token.end

			# don't ask me why this shouldn't be "in NEWLINES",
			# but ignoring tokenize_.NL here fixes #3
			if token.type == NEWLINE:
				self.prev_row += 1
				self.prev_col = 0

		return "".join(self.tokens)

def tokenize(string):
	return tokenize_.tokenize(io.BytesIO(string.encode('utf-8')).readline)