398 lines
12 KiB
Python
398 lines
12 KiB
Python
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
|
|
#
|
|
# This module is part of GitDB and is released under
|
|
# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
|
|
import binascii
|
|
import os
|
|
import mmap
|
|
import sys
|
|
import time
|
|
import errno
|
|
|
|
from io import BytesIO
|
|
|
|
from smmap import (
|
|
StaticWindowMapManager,
|
|
SlidingWindowMapManager,
|
|
SlidingWindowMapBuffer
|
|
)
|
|
|
|
# initialize our global memory manager instance
|
|
# Use it to free cached (and unused) resources.
|
|
mman = SlidingWindowMapManager()
|
|
# END handle mman
|
|
|
|
import hashlib
|
|
|
|
try:
|
|
from struct import unpack_from
|
|
except ImportError:
|
|
from struct import unpack, calcsize
|
|
__calcsize_cache = dict()
|
|
|
|
def unpack_from(fmt, data, offset=0):
|
|
try:
|
|
size = __calcsize_cache[fmt]
|
|
except KeyError:
|
|
size = calcsize(fmt)
|
|
__calcsize_cache[fmt] = size
|
|
# END exception handling
|
|
return unpack(fmt, data[offset: offset + size])
|
|
# END own unpack_from implementation
|
|
|
|
|
|
#{ Aliases
|
|
|
|
hex_to_bin = binascii.a2b_hex
|
|
bin_to_hex = binascii.b2a_hex
|
|
|
|
# errors
|
|
ENOENT = errno.ENOENT
|
|
|
|
# os shortcuts
|
|
exists = os.path.exists
|
|
mkdir = os.mkdir
|
|
chmod = os.chmod
|
|
isdir = os.path.isdir
|
|
isfile = os.path.isfile
|
|
rename = os.rename
|
|
dirname = os.path.dirname
|
|
basename = os.path.basename
|
|
join = os.path.join
|
|
read = os.read
|
|
write = os.write
|
|
close = os.close
|
|
fsync = os.fsync
|
|
|
|
|
|
def _retry(func, *args, **kwargs):
|
|
# Wrapper around functions, that are problematic on "Windows". Sometimes
|
|
# the OS or someone else has still a handle to the file
|
|
if sys.platform == "win32":
|
|
for _ in range(10):
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except Exception:
|
|
time.sleep(0.1)
|
|
return func(*args, **kwargs)
|
|
else:
|
|
return func(*args, **kwargs)
|
|
|
|
|
|
def remove(*args, **kwargs):
|
|
return _retry(os.remove, *args, **kwargs)
|
|
|
|
|
|
# Backwards compatibility imports
|
|
from gitdb.const import (
|
|
NULL_BIN_SHA,
|
|
NULL_HEX_SHA
|
|
)
|
|
|
|
#} END Aliases
|
|
|
|
#{ compatibility stuff ...
|
|
|
|
|
|
class _RandomAccessBytesIO(object):
|
|
|
|
"""Wrapper to provide required functionality in case memory maps cannot or may
|
|
not be used. This is only really required in python 2.4"""
|
|
__slots__ = '_sio'
|
|
|
|
def __init__(self, buf=''):
|
|
self._sio = BytesIO(buf)
|
|
|
|
def __getattr__(self, attr):
|
|
return getattr(self._sio, attr)
|
|
|
|
def __len__(self):
|
|
return len(self.getvalue())
|
|
|
|
def __getitem__(self, i):
|
|
return self.getvalue()[i]
|
|
|
|
def __getslice__(self, start, end):
|
|
return self.getvalue()[start:end]
|
|
|
|
|
|
def byte_ord(b):
|
|
"""
|
|
Return the integer representation of the byte string. This supports Python
|
|
3 byte arrays as well as standard strings.
|
|
"""
|
|
try:
|
|
return ord(b)
|
|
except TypeError:
|
|
return b
|
|
|
|
#} END compatibility stuff ...
|
|
|
|
#{ Routines
|
|
|
|
|
|
def make_sha(source=''.encode("ascii")):
|
|
"""A python2.4 workaround for the sha/hashlib module fiasco
|
|
|
|
**Note** From the dulwich project """
|
|
try:
|
|
return hashlib.sha1(source)
|
|
except NameError:
|
|
import sha
|
|
sha1 = sha.sha(source)
|
|
return sha1
|
|
|
|
|
|
def allocate_memory(size):
|
|
""":return: a file-protocol accessible memory block of the given size"""
|
|
if size == 0:
|
|
return _RandomAccessBytesIO(b'')
|
|
# END handle empty chunks gracefully
|
|
|
|
try:
|
|
return mmap.mmap(-1, size) # read-write by default
|
|
except EnvironmentError:
|
|
# setup real memory instead
|
|
# this of course may fail if the amount of memory is not available in
|
|
# one chunk - would only be the case in python 2.4, being more likely on
|
|
# 32 bit systems.
|
|
return _RandomAccessBytesIO(b"\0" * size)
|
|
# END handle memory allocation
|
|
|
|
|
|
def file_contents_ro(fd, stream=False, allow_mmap=True):
|
|
""":return: read-only contents of the file represented by the file descriptor fd
|
|
|
|
:param fd: file descriptor opened for reading
|
|
:param stream: if False, random access is provided, otherwise the stream interface
|
|
is provided.
|
|
:param allow_mmap: if True, its allowed to map the contents into memory, which
|
|
allows large files to be handled and accessed efficiently. The file-descriptor
|
|
will change its position if this is False"""
|
|
try:
|
|
if allow_mmap:
|
|
# supports stream and random access
|
|
try:
|
|
return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
|
|
except EnvironmentError:
|
|
# python 2.4 issue, 0 wants to be the actual size
|
|
return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
|
|
# END handle python 2.4
|
|
except OSError:
|
|
pass
|
|
# END exception handling
|
|
|
|
# read manully
|
|
contents = os.read(fd, os.fstat(fd).st_size)
|
|
if stream:
|
|
return _RandomAccessBytesIO(contents)
|
|
return contents
|
|
|
|
|
|
def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
|
|
"""Get the file contents at filepath as fast as possible
|
|
|
|
:return: random access compatible memory of the given filepath
|
|
:param stream: see ``file_contents_ro``
|
|
:param allow_mmap: see ``file_contents_ro``
|
|
:param flags: additional flags to pass to os.open
|
|
:raise OSError: If the file could not be opened
|
|
|
|
**Note** for now we don't try to use O_NOATIME directly as the right value needs to be
|
|
shared per database in fact. It only makes a real difference for loose object
|
|
databases anyway, and they use it with the help of the ``flags`` parameter"""
|
|
fd = os.open(filepath, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags)
|
|
try:
|
|
return file_contents_ro(fd, stream, allow_mmap)
|
|
finally:
|
|
close(fd)
|
|
# END assure file is closed
|
|
|
|
|
|
def sliding_ro_buffer(filepath, flags=0):
|
|
"""
|
|
:return: a buffer compatible object which uses our mapped memory manager internally
|
|
ready to read the whole given filepath"""
|
|
return SlidingWindowMapBuffer(mman.make_cursor(filepath), flags=flags)
|
|
|
|
|
|
def to_hex_sha(sha):
|
|
""":return: hexified version of sha"""
|
|
if len(sha) == 40:
|
|
return sha
|
|
return bin_to_hex(sha)
|
|
|
|
|
|
def to_bin_sha(sha):
|
|
if len(sha) == 20:
|
|
return sha
|
|
return hex_to_bin(sha)
|
|
|
|
|
|
#} END routines
|
|
|
|
|
|
#{ Utilities
|
|
|
|
class LazyMixin(object):
|
|
|
|
"""
|
|
Base class providing an interface to lazily retrieve attribute values upon
|
|
first access. If slots are used, memory will only be reserved once the attribute
|
|
is actually accessed and retrieved the first time. All future accesses will
|
|
return the cached value as stored in the Instance's dict or slot.
|
|
"""
|
|
|
|
__slots__ = tuple()
|
|
|
|
def __getattr__(self, attr):
|
|
"""
|
|
Whenever an attribute is requested that we do not know, we allow it
|
|
to be created and set. Next time the same attribute is reqeusted, it is simply
|
|
returned from our dict/slots. """
|
|
self._set_cache_(attr)
|
|
# will raise in case the cache was not created
|
|
return object.__getattribute__(self, attr)
|
|
|
|
def _set_cache_(self, attr):
|
|
"""
|
|
This method should be overridden in the derived class.
|
|
It should check whether the attribute named by attr can be created
|
|
and cached. Do nothing if you do not know the attribute or call your subclass
|
|
|
|
The derived class may create as many additional attributes as it deems
|
|
necessary in case a git command returns more information than represented
|
|
in the single attribute."""
|
|
pass
|
|
|
|
|
|
class LockedFD(object):
|
|
|
|
"""
|
|
This class facilitates a safe read and write operation to a file on disk.
|
|
If we write to 'file', we obtain a lock file at 'file.lock' and write to
|
|
that instead. If we succeed, the lock file will be renamed to overwrite
|
|
the original file.
|
|
|
|
When reading, we obtain a lock file, but to prevent other writers from
|
|
succeeding while we are reading the file.
|
|
|
|
This type handles error correctly in that it will assure a consistent state
|
|
on destruction.
|
|
|
|
**note** with this setup, parallel reading is not possible"""
|
|
__slots__ = ("_filepath", '_fd', '_write')
|
|
|
|
def __init__(self, filepath):
|
|
"""Initialize an instance with the givne filepath"""
|
|
self._filepath = filepath
|
|
self._fd = None
|
|
self._write = None # if True, we write a file
|
|
|
|
def __del__(self):
|
|
# will do nothing if the file descriptor is already closed
|
|
if self._fd is not None:
|
|
self.rollback()
|
|
|
|
def _lockfilepath(self):
|
|
return "%s.lock" % self._filepath
|
|
|
|
def open(self, write=False, stream=False):
|
|
"""
|
|
Open the file descriptor for reading or writing, both in binary mode.
|
|
|
|
:param write: if True, the file descriptor will be opened for writing. Other
|
|
wise it will be opened read-only.
|
|
:param stream: if True, the file descriptor will be wrapped into a simple stream
|
|
object which supports only reading or writing
|
|
:return: fd to read from or write to. It is still maintained by this instance
|
|
and must not be closed directly
|
|
:raise IOError: if the lock could not be retrieved
|
|
:raise OSError: If the actual file could not be opened for reading
|
|
|
|
**note** must only be called once"""
|
|
if self._write is not None:
|
|
raise AssertionError("Called %s multiple times" % self.open)
|
|
|
|
self._write = write
|
|
|
|
# try to open the lock file
|
|
binary = getattr(os, 'O_BINARY', 0)
|
|
lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
|
|
try:
|
|
fd = os.open(self._lockfilepath(), lockmode, int("600", 8))
|
|
if not write:
|
|
os.close(fd)
|
|
else:
|
|
self._fd = fd
|
|
# END handle file descriptor
|
|
except OSError:
|
|
raise IOError("Lock at %r could not be obtained" % self._lockfilepath())
|
|
# END handle lock retrieval
|
|
|
|
# open actual file if required
|
|
if self._fd is None:
|
|
# we could specify exlusive here, as we obtained the lock anyway
|
|
try:
|
|
self._fd = os.open(self._filepath, os.O_RDONLY | binary)
|
|
except:
|
|
# assure we release our lockfile
|
|
remove(self._lockfilepath())
|
|
raise
|
|
# END handle lockfile
|
|
# END open descriptor for reading
|
|
|
|
if stream:
|
|
# need delayed import
|
|
from gitdb.stream import FDStream
|
|
return FDStream(self._fd)
|
|
else:
|
|
return self._fd
|
|
# END handle stream
|
|
|
|
def commit(self):
|
|
"""When done writing, call this function to commit your changes into the
|
|
actual file.
|
|
The file descriptor will be closed, and the lockfile handled.
|
|
|
|
**Note** can be called multiple times"""
|
|
self._end_writing(successful=True)
|
|
|
|
def rollback(self):
|
|
"""Abort your operation without any changes. The file descriptor will be
|
|
closed, and the lock released.
|
|
|
|
**Note** can be called multiple times"""
|
|
self._end_writing(successful=False)
|
|
|
|
def _end_writing(self, successful=True):
|
|
"""Handle the lock according to the write mode """
|
|
if self._write is None:
|
|
raise AssertionError("Cannot end operation if it wasn't started yet")
|
|
|
|
if self._fd is None:
|
|
return
|
|
|
|
os.close(self._fd)
|
|
self._fd = None
|
|
|
|
lockfile = self._lockfilepath()
|
|
if self._write and successful:
|
|
# on windows, rename does not silently overwrite the existing one
|
|
if sys.platform == "win32":
|
|
if isfile(self._filepath):
|
|
remove(self._filepath)
|
|
# END remove if exists
|
|
# END win32 special handling
|
|
os.rename(lockfile, self._filepath)
|
|
|
|
# assure others can at least read the file - the tmpfile left it at rw--
|
|
# We may also write that file, on windows that boils down to a remove-
|
|
# protection as well
|
|
chmod(self._filepath, int("644", 8))
|
|
else:
|
|
# just delete the file so far, we failed
|
|
remove(lockfile)
|
|
# END successful handling
|
|
|
|
#} END utilities
|