aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules7
-rw-r--r--doc/source/changes.rst29
-rw-r--r--git/__init__.py8
-rw-r--r--git/base.py311
-rw-r--r--git/cmd.py29
-rw-r--r--git/config.py13
-rw-r--r--git/db.py61
-rw-r--r--git/db/__init__.py6
-rw-r--r--git/db/cmd/__init__.py4
-rw-r--r--git/db/cmd/base.py821
-rw-r--r--git/db/cmd/complex.py16
-rw-r--r--git/db/compat.py31
-rw-r--r--git/db/complex.py28
-rw-r--r--git/db/interface.py838
-rw-r--r--git/db/py/__init__.py4
-rw-r--r--git/db/py/base.py474
-rw-r--r--git/db/py/complex.py128
-rw-r--r--git/db/py/loose.py263
-rw-r--r--git/db/py/mem.py112
-rw-r--r--git/db/py/pack.py212
-rw-r--r--git/db/py/ref.py77
-rw-r--r--git/db/py/resolve.py (renamed from git/repo/fun.py)135
-rw-r--r--git/db/py/submodule.py33
-rw-r--r--git/db/py/transport.py58
-rw-r--r--git/diff.py2
-rw-r--r--git/exc.py49
m---------git/ext/async0
m---------git/ext/gitdb0
-rw-r--r--git/fun.py674
-rw-r--r--git/index/base.py13
-rw-r--r--git/index/fun.py4
-rw-r--r--git/objects/base.py68
-rw-r--r--git/objects/blob.py8
-rw-r--r--git/objects/commit.py267
-rw-r--r--git/objects/fun.py2
-rw-r--r--git/objects/submodule/__init__.py4
-rw-r--r--git/objects/submodule/base.py41
-rw-r--r--git/objects/submodule/root.py5
-rw-r--r--git/objects/submodule/util.py4
-rw-r--r--git/objects/tag.py26
-rw-r--r--git/objects/tree.py25
-rw-r--r--git/objects/util.py1
-rw-r--r--git/odict.py5
-rw-r--r--git/pack.py1005
-rw-r--r--git/refs/__init__.py11
-rw-r--r--git/refs/head.py169
-rw-r--r--git/refs/headref.py170
-rw-r--r--git/refs/log.py5
-rw-r--r--git/refs/reference.py18
-rw-r--r--git/refs/remote.py22
-rw-r--r--git/refs/symbolic.py158
-rw-r--r--git/refs/tag.py7
-rw-r--r--git/remote.py360
-rw-r--r--git/repo.py45
-rw-r--r--git/repo/__init__.py3
-rw-r--r--git/repo/base.py753
-rw-r--r--git/stream.py694
-rw-r--r--git/test/__init__.py9
-rw-r--r--git/test/db/__init__.py4
-rw-r--r--git/test/db/base.py (renamed from git/test/test_repo.py)67
-rw-r--r--git/test/db/cmd/__init__.py4
-rw-r--r--git/test/db/cmd/test_base.py32
-rw-r--r--git/test/db/lib.py246
-rw-r--r--git/test/db/py/__init__.py4
-rw-r--r--git/test/db/py/test_base.py16
-rw-r--r--git/test/db/py/test_git.py51
-rw-r--r--git/test/db/py/test_loose.py36
-rw-r--r--git/test/db/py/test_mem.py30
-rw-r--r--git/test/db/py/test_pack.py76
-rw-r--r--git/test/db/py/test_ref.py62
-rw-r--r--git/test/db/test_base.py20
-rw-r--r--git/test/fixtures/git_config16
-rw-r--r--git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309bbin0 -> 446 bytes
-rw-r--r--git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idxbin0 -> 1912 bytes
-rw-r--r--git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.packbin0 -> 51875 bytes
-rw-r--r--git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idxbin0 -> 2248 bytes
-rw-r--r--git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.packbin0 -> 3732 bytes
-rw-r--r--git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idxbin0 -> 2672 bytes
-rw-r--r--git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.packbin0 -> 49113 bytes
-rw-r--r--git/test/lib/__init__.py5
-rw-r--r--git/test/lib/base.py200
-rw-r--r--git/test/lib/helper.py75
-rw-r--r--git/test/objects/__init__.py1
-rw-r--r--git/test/objects/lib.py14
-rw-r--r--git/test/objects/test_blob.py (renamed from git/test/test_blob.py)8
-rw-r--r--git/test/objects/test_commit.py (renamed from git/test/test_commit.py)14
-rw-r--r--git/test/objects/test_submodule.py (renamed from git/test/test_submodule.py)63
-rw-r--r--git/test/objects/test_tree.py (renamed from git/test/test_tree.py)10
-rw-r--r--git/test/performance/__init__.py2
-rw-r--r--git/test/performance/db/__init__.py1
-rw-r--r--git/test/performance/db/looseodb_impl.py132
-rw-r--r--git/test/performance/db/odb_impl.py72
-rw-r--r--git/test/performance/db/packedodb_impl.py107
-rw-r--r--git/test/performance/db/test_looseodb_cmd.py11
-rw-r--r--git/test/performance/db/test_looseodb_pure.py6
-rw-r--r--git/test/performance/db/test_odb_cmd.py6
-rw-r--r--git/test/performance/db/test_odb_pure.py6
-rw-r--r--git/test/performance/db/test_packedodb_pure.py90
-rw-r--r--git/test/performance/lib.py38
-rw-r--r--git/test/performance/objects/__init__.py1
-rw-r--r--git/test/performance/objects/test_commit.py (renamed from git/test/performance/test_commit.py)21
-rw-r--r--git/test/performance/test_odb.py70
-rw-r--r--git/test/performance/test_streams.py131
-rw-r--r--git/test/test_actor.py36
-rw-r--r--git/test/test_base.py122
-rw-r--r--git/test/test_config.py12
-rw-r--r--git/test/test_db.py25
-rw-r--r--git/test/test_diff.py11
-rw-r--r--git/test/test_example.py64
-rw-r--r--git/test/test_fun.py8
-rw-r--r--git/test/test_git.py148
-rw-r--r--git/test/test_import.py58
-rw-r--r--git/test/test_index.py8
-rw-r--r--git/test/test_pack.py247
-rw-r--r--git/test/test_reflog.py2
-rw-r--r--git/test/test_refs.py76
-rw-r--r--git/test/test_remote.py857
-rw-r--r--git/test/test_stats.py8
-rw-r--r--git/test/test_stream.py155
-rw-r--r--git/test/test_util.py129
-rw-r--r--git/typ.py27
-rw-r--r--git/util.py904
122 files changed, 10035 insertions, 2935 deletions
diff --git a/.gitmodules b/.gitmodules
index 83a5207e..57b06fc8 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,3 @@
-[submodule "gitdb"]
- path = git/ext/gitdb
- url = git://github.com/gitpython-developers/gitdb.git
- branch = master
+[submodule "git/ext/async"]
+ path = git/ext/async
+ url = git://github.com/gitpython-developers/async.git
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index 2433d00e..5564cfd7 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -4,8 +4,33 @@ Changelog
NEXT
====
-* Blob Type
- * Added mode constants to ease the manual creation of blobs
+* ### Class Renames ###
+
+ * Renamed **GitCmdObjectDB** to **CmdGitDB** (analogue to **PureCmdDB**)
+
+* ### Interface Changes ###
+
+ * **SymbolicReference**
+
+ * object_binsha property added
+
+ * **Blob** Type
+
+ * Added mode constants to ease the manual creation of blobs
+
+ * **Repo** (i.e. **HighLevelRepository**) now supports a progress instance to be provided for its ``clone()`` and ``clone_from`` methods.
+
+* ### Module Changes ###
+
+ * Removed rev_parse function from git.repo.fun - the respective functionality is available only through the repository's rev_parse method, which might in turn translate to any implementation.
+
+* ### Git Cmd ###
+
+ * Added ``version_info`` property to git command, returning a tuple of version numbers.
+
+* ### Exceptions ###
+
+ * There is a new common base for all exceptions git-python will throw, namely `GitPythonError`.
0.3.1 Beta 2
============
diff --git a/git/__init__.py b/git/__init__.py
index 0658c330..4a4200cc 100644
--- a/git/__init__.py
+++ b/git/__init__.py
@@ -14,12 +14,12 @@ __version__ = 'git'
#{ Initialization
def _init_externals():
"""Initialize external projects by putting them into the path"""
- sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'gitdb'))
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'async'))
try:
- import gitdb
+ import async
except ImportError:
- raise ImportError("'gitdb' could not be found in your PYTHONPATH")
+ raise ImportError("'async' could not be found in your PYTHONPATH")
#END verify import
#} END initialization
@@ -37,9 +37,9 @@ from git.diff import *
from git.exc import *
from git.db import *
from git.cmd import Git
-from git.repo import Repo
from git.remote import *
from git.index import *
+from git.repo import Repo
from git.util import (
LockFile,
BlockingLockFile,
diff --git a/git/base.py b/git/base.py
new file mode 100644
index 00000000..ff1062bf
--- /dev/null
+++ b/git/base.py
@@ -0,0 +1,311 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module with basic data structures - they are designed to be lightweight and fast"""
+from util import (
+ bin_to_hex,
+ zlib
+ )
+
+from fun import (
+ type_id_to_type_map,
+ type_to_type_id_map
+ )
+
+__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo',
+ 'OStream', 'OPackStream', 'ODeltaPackStream',
+ 'IStream', 'InvalidOInfo', 'InvalidOStream' )
+
+#{ ODB Bases
+
+class OInfo(tuple):
+ """Carries information about an object in an ODB, provding information
+ about the binary sha of the object, the type_string as well as the uncompressed size
+ in bytes.
+
+ It can be accessed using tuple notation and using attribute access notation::
+
+ assert dbi[0] == dbi.binsha
+ assert dbi[1] == dbi.type
+ assert dbi[2] == dbi.size
+
+ The type is designed to be as lighteight as possible."""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, type, size):
+ return tuple.__new__(cls, (sha, type, size))
+
+ def __init__(self, *args):
+ tuple.__init__(self)
+
+ #{ Interface
+ @property
+ def binsha(self):
+ """:return: our sha as binary, 20 bytes"""
+ return self[0]
+
+ @property
+ def hexsha(self):
+ """:return: our sha, hex encoded, 40 bytes"""
+ return bin_to_hex(self[0])
+
+ @property
+ def type(self):
+ return self[1]
+
+ @property
+ def type_id(self):
+ return type_to_type_id_map[self[1]]
+
+ @property
+ def size(self):
+ return self[2]
+ #} END interface
+
+
+class OPackInfo(tuple):
+ """As OInfo, but provides a type_id property to retrieve the numerical type id, and
+ does not include a sha.
+
+ Additionally, the pack_offset is the absolute offset into the packfile at which
+ all object information is located. The data_offset property points to the abosolute
+ location in the pack at which that actual data stream can be found."""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size):
+ return tuple.__new__(cls, (packoffset,type, size))
+
+ def __init__(self, *args):
+ tuple.__init__(self)
+
+ #{ Interface
+
+ @property
+ def pack_offset(self):
+ return self[0]
+
+ @property
+ def type(self):
+ return type_id_to_type_map[self[1]]
+
+ @property
+ def type_id(self):
+ return self[1]
+
+ @property
+ def size(self):
+ return self[2]
+
+ #} END interface
+
+
+class ODeltaPackInfo(OPackInfo):
+ """Adds delta specific information,
+ Either the 20 byte sha which points to some object in the database,
+ or the negative offset from the pack_offset, so that pack_offset - delta_info yields
+ the pack offset of the base object"""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size, delta_info):
+ return tuple.__new__(cls, (packoffset, type, size, delta_info))
+
+ #{ Interface
+ @property
+ def delta_info(self):
+ return self[3]
+ #} END interface
+
+
+class OStream(OInfo):
+ """Base for object streams retrieved from the database, providing additional
+ information about the stream.
+ Generally, ODB streams are read-only as objects are immutable"""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, type, size, stream, *args, **kwargs):
+ """Helps with the initialization of subclasses"""
+ return tuple.__new__(cls, (sha, type, size, stream))
+
+
+ def __init__(self, *args, **kwargs):
+ tuple.__init__(self)
+
+ #{ Stream Reader Interface
+
+ def read(self, size=-1):
+ return self[3].read(size)
+
+ @property
+ def stream(self):
+ return self[3]
+
+ #} END stream reader interface
+
+
+class ODeltaStream(OStream):
+ """Uses size info of its stream, delaying reads"""
+
+ def __new__(cls, sha, type, size, stream, *args, **kwargs):
+ """Helps with the initialization of subclasses"""
+ return tuple.__new__(cls, (sha, type, size, stream))
+
+ #{ Stream Reader Interface
+
+ @property
+ def size(self):
+ return self[3].size
+
+ #} END stream reader interface
+
+
+class OPackStream(OPackInfo):
+ """Next to pack object information, a stream outputting an undeltified base object
+ is provided"""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size, stream, *args):
+ """Helps with the initialization of subclasses"""
+ return tuple.__new__(cls, (packoffset, type, size, stream))
+
+ #{ Stream Reader Interface
+ def read(self, size=-1):
+ return self[3].read(size)
+
+ @property
+ def stream(self):
+ return self[3]
+ #} END stream reader interface
+
+
+class ODeltaPackStream(ODeltaPackInfo):
+ """Provides a stream outputting the uncompressed offset delta information"""
+ __slots__ = tuple()
+
+ def __new__(cls, packoffset, type, size, delta_info, stream):
+ return tuple.__new__(cls, (packoffset, type, size, delta_info, stream))
+
+
+ #{ Stream Reader Interface
+ def read(self, size=-1):
+ return self[4].read(size)
+
+ @property
+ def stream(self):
+ return self[4]
+ #} END stream reader interface
+
+
+class IStream(list):
+ """Represents an input content stream to be fed into the ODB. It is mutable to allow
+ the ODB to record information about the operations outcome right in this instance.
+
+ It provides interfaces for the OStream and a StreamReader to allow the instance
+ to blend in without prior conversion.
+
+ The only method your content stream must support is 'read'"""
+ __slots__ = tuple()
+
+ def __new__(cls, type, size, stream, sha=None):
+ return list.__new__(cls, (sha, type, size, stream, None))
+
+ def __init__(self, type, size, stream, sha=None):
+ list.__init__(self, (sha, type, size, stream, None))
+
+ #{ Interface
+ @property
+ def hexsha(self):
+ """:return: our sha, hex encoded, 40 bytes"""
+ return bin_to_hex(self[0])
+
+ def _error(self):
+ """:return: the error that occurred when processing the stream, or None"""
+ return self[4]
+
+ def _set_error(self, exc):
+ """Set this input stream to the given exc, may be None to reset the error"""
+ self[4] = exc
+
+ error = property(_error, _set_error)
+
+ #} END interface
+
+ #{ Stream Reader Interface
+
+ def read(self, size=-1):
+ """Implements a simple stream reader interface, passing the read call on
+ to our internal stream"""
+ return self[3].read(size)
+
+ #} END stream reader interface
+
+ #{ interface
+
+ def _set_binsha(self, binsha):
+ self[0] = binsha
+
+ def _binsha(self):
+ return self[0]
+
+ binsha = property(_binsha, _set_binsha)
+
+
+ def _type(self):
+ return self[1]
+
+ def _set_type(self, type):
+ self[1] = type
+
+ type = property(_type, _set_type)
+
+ def _size(self):
+ return self[2]
+
+ def _set_size(self, size):
+ self[2] = size
+
+ size = property(_size, _set_size)
+
+ def _stream(self):
+ return self[3]
+
+ def _set_stream(self, stream):
+ self[3] = stream
+
+ stream = property(_stream, _set_stream)
+
+ #} END odb info interface
+
+
+class InvalidOInfo(tuple):
+ """Carries information about a sha identifying an object which is invalid in
+ the queried database. The exception attribute provides more information about
+ the cause of the issue"""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, exc):
+ return tuple.__new__(cls, (sha, exc))
+
+ def __init__(self, sha, exc):
+ tuple.__init__(self, (sha, exc))
+
+ @property
+ def binsha(self):
+ return self[0]
+
+ @property
+ def hexsha(self):
+ return bin_to_hex(self[0])
+
+ @property
+ def error(self):
+ """:return: exception instance explaining the failure"""
+ return self[1]
+
+
+class InvalidOStream(InvalidOInfo):
+ """Carries information about an invalid ODB stream"""
+ __slots__ = tuple()
+
+#} END ODB Bases
+
diff --git a/git/cmd.py b/git/cmd.py
index 60887f5d..29d942ae 100644
--- a/git/cmd.py
+++ b/git/cmd.py
@@ -5,7 +5,10 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os, sys
-from util import *
+from util import (
+ LazyMixin,
+ stream_copy
+ )
from exc import GitCommandError
from subprocess import (
@@ -26,7 +29,7 @@ __all__ = ('Git', )
def dashify(string):
return string.replace('_', '-')
-class Git(object):
+class Git(LazyMixin):
"""
The Git class manages communication with the Git binary.
@@ -41,7 +44,7 @@ class Git(object):
of the command to stdout.
Set its value to 'full' to see details about the returned values.
"""
- __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
+ __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info")
# CONFIGURATION
# The size in bytes read from stdout when copying git's output to another stream
@@ -214,14 +217,30 @@ class Git(object):
"""A convenience method as it allows to call the command as if it was
an object.
:return: Callable object that will execute call _call_process with your arguments."""
- if name[:1] == '_':
- raise AttributeError(name)
+ if name[0] == '_':
+ return LazyMixin.__getattr__(self, name)
return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+ def _set_cache_(self, attr):
+ if attr == '_version_info':
+ version_numbers = self._call_process('version').rpartition(' ')[2]
+ self._version_info = tuple(int(n) for n in version_numbers.split('.'))
+ else:
+ super(Git, self)._set_cache_(attr)
+ #END handle version info
+
+
@property
def working_dir(self):
""":return: Git directory we are working on"""
return self._working_dir
+
+ @property
+ def version_info(self):
+ """:return: tuple(int, ...) tuple with integers representing the major, minor
+ and additional version numbers as parsed from git version.
+ This value is generated on demand and is cached"""
+ return self._version_info
def execute(self, command,
istream=None,
diff --git a/git/config.py b/git/config.py
index f1a8832e..c71bb8ca 100644
--- a/git/config.py
+++ b/git/config.py
@@ -120,11 +120,12 @@ class GitConfigParser(cp.RawConfigParser, object):
# They must be compatible to the LockFile interface.
# A suitable alternative would be the BlockingLockFile
t_lock = LockFile
+ re_comment = re.compile('^\s*[#;]')
#} END configuration
OPTCRE = re.compile(
- r'\s?(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace
+ r'\s*(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace
r'\s*(?P<vi>[:=])\s*' # any number of space/tab,
# followed by separator
# (either : or =), followed
@@ -211,16 +212,16 @@ class GitConfigParser(cp.RawConfigParser, object):
break
lineno = lineno + 1
# comment or blank line?
- if line.strip() == '' or line[0] in '#;':
+ if line.strip() == '' or self.re_comment.match(line):
continue
if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR":
# no leading whitespace
continue
else:
# is it a section header?
- mo = self.SECTCRE.match(line)
+ mo = self.SECTCRE.match(line.strip())
if mo:
- sectname = mo.group('header')
+ sectname = mo.group('header').strip()
if sectname in self._sections:
cursect = self._sections[sectname]
elif sectname == cp.DEFAULTSECT:
@@ -332,6 +333,10 @@ class GitConfigParser(cp.RawConfigParser, object):
close_fp = True
else:
fp.seek(0)
+ # make sure we do not overwrite into an existing file
+ if hasattr(fp, 'truncate'):
+ fp.truncate()
+ #END
# END handle stream or file
# WRITE DATA
diff --git a/git/db.py b/git/db.py
deleted file mode 100644
index b1c65377..00000000
--- a/git/db.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Module with our own gitdb implementation - it uses the git command"""
-from exc import (
- GitCommandError,
- BadObject
- )
-
-from gitdb.base import (
- OInfo,
- OStream
- )
-
-from gitdb.util import (
- bin_to_hex,
- hex_to_bin
- )
-from gitdb.db import GitDB
-from gitdb.db import LooseObjectDB
-
-
-__all__ = ('GitCmdObjectDB', 'GitDB' )
-
-#class GitCmdObjectDB(CompoundDB, ObjectDBW):
-class GitCmdObjectDB(LooseObjectDB):
- """A database representing the default git object store, which includes loose
- objects, pack files and an alternates file
-
- It will create objects only in the loose object database.
- :note: for now, we use the git command to do all the lookup, just until he
- have packs and the other implementations
- """
- def __init__(self, root_path, git):
- """Initialize this instance with the root and a git command"""
- super(GitCmdObjectDB, self).__init__(root_path)
- self._git = git
-
- def info(self, sha):
- hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
- return OInfo(hex_to_bin(hexsha), typename, size)
-
- def stream(self, sha):
- """For now, all lookup is done by git itself"""
- hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
- return OStream(hex_to_bin(hexsha), typename, size, stream)
-
-
- # { Interface
-
- def partial_to_complete_sha_hex(self, partial_hexsha):
- """:return: Full binary 20 byte sha from the given partial hexsha
- :raise AmbiguousObjectName:
- :raise BadObject:
- :note: currently we only raise BadObject as git does not communicate
- AmbiguousObjects separately"""
- try:
- hexsha, typename, size = self._git.get_object_header(partial_hexsha)
- return hex_to_bin(hexsha)
- except (GitCommandError, ValueError):
- raise BadObject(partial_hexsha)
- # END handle exceptions
-
- #} END interface
diff --git a/git/db/__init__.py b/git/db/__init__.py
new file mode 100644
index 00000000..25948326
--- /dev/null
+++ b/git/db/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from interface import *
diff --git a/git/db/cmd/__init__.py b/git/db/cmd/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/db/cmd/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py
new file mode 100644
index 00000000..ef22c931
--- /dev/null
+++ b/git/db/cmd/base.py
@@ -0,0 +1,821 @@
+"""module with git command implementations of the basic interfaces
+:note: we could add all implementations of the basic interfaces, its more efficient though
+ to obtain them from the pure implementation"""
+from git.exc import (
+ GitCommandError,
+ BadObject
+ )
+
+from git.base import (
+ OInfo,
+ OStream
+ )
+
+from git.util import (
+ bin_to_hex,
+ hex_to_bin,
+ isfile,
+ join_path,
+ join,
+ Actor,
+ IterableList,
+ )
+from git.db.interface import (
+ FetchInfo,
+ PushInfo,
+ HighLevelRepository,
+ TransportDB,
+ RemoteProgress
+ )
+from git.cmd import Git
+from git.refs import (
+ Reference,
+ RemoteReference,
+ SymbolicReference,
+ TagReference
+ )
+from git.objects.commit import Commit
+from cStringIO import StringIO
+import re
+import os
+import sys
+
+
+__all__ = ('CmdTransportMixin', 'GitCommandMixin', 'CmdPushInfo', 'CmdFetchInfo',
+ 'CmdRemoteProgress', 'CmdObjectDBRMixin', 'CmdHighLevelRepository')
+
+
+#{ Utilities
+
+def touch(filename):
+ fp = open(filename, "a")
+ fp.close()
+
+
+def digest_process_messages(fh, progress):
+ """Read progress messages from file-like object fh, supplying the respective
+ progress messages to the progress instance.
+
+ :return: list(line, ...) list of lines without linebreaks that did
+ not contain progress information"""
+ line_so_far = ''
+ dropped_lines = list()
+ while True:
+ char = fh.read(1)
+ if not char:
+ break
+
+ if char in ('\r', '\n'):
+ dropped_lines.extend(progress._parse_progress_line(line_so_far))
+ line_so_far = ''
+ else:
+ line_so_far += char
+ # END process parsed line
+ # END while file is not done reading
+ return dropped_lines
+
+def finalize_process(proc):
+ """Wait for the process (fetch, pull or push) and handle its errors accordingly"""
+ try:
+ proc.wait()
+ except GitCommandError,e:
+ # if a push has rejected items, the command has non-zero return status
+ # a return status of 128 indicates a connection error - reraise the previous one
+ if proc.poll() == 128:
+ raise
+ pass
+ # END exception handling
+
+
+def get_fetch_info_from_stderr(repo, proc, progress):
+ # skip first line as it is some remote info we are not interested in
+ output = IterableList('name')
+
+
+ # lines which are no progress are fetch info lines
+ # this also waits for the command to finish
+ # Skip some progress lines that don't provide relevant information
+ fetch_info_lines = list()
+ for line in digest_process_messages(proc.stderr, progress):
+ if line.startswith('From') or line.startswith('remote: Total'):
+ continue
+ elif line.startswith('warning:'):
+ print >> sys.stderr, line
+ continue
+ elif line.startswith('fatal:'):
+ raise GitCommandError(("Error when fetching: %s" % line,), 2)
+ # END handle special messages
+ fetch_info_lines.append(line)
+ # END for each line
+
+ # read head information
+ fp = open(join(repo.git_dir, 'FETCH_HEAD'),'r')
+ fetch_head_info = fp.readlines()
+ fp.close()
+
+ assert len(fetch_info_lines) == len(fetch_head_info)
+
+ output.extend(CmdFetchInfo._from_line(repo, err_line, fetch_line)
+ for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info))
+
+ finalize_process(proc)
+ return output
+
+def get_push_info(repo, remotename_or_url, proc, progress):
+ # read progress information from stderr
+ # we hope stdout can hold all the data, it should ...
+ # read the lines manually as it will use carriage returns between the messages
+ # to override the previous one. This is why we read the bytes manually
+ digest_process_messages(proc.stderr, progress)
+
+ output = IterableList('name')
+ for line in proc.stdout.readlines():
+ try:
+ output.append(CmdPushInfo._from_line(repo, remotename_or_url, line))
+ except ValueError:
+ # if an error happens, additional info is given which we cannot parse
+ pass
+ # END exception handling
+ # END for each line
+
+ finalize_process(proc)
+ return output
+
+def add_progress(kwargs, git, progress):
+ """Add the --progress flag to the given kwargs dict if supported by the
+ git command. If the actual progress in the given progress instance is not
+ given, we do not request any progress
+ :return: possibly altered kwargs"""
+ if progress._progress is not None:
+ v = git.version_info
+ if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3:
+ kwargs['progress'] = True
+ #END handle --progress
+ #END handle progress
+ return kwargs
+
+#} END utilities
+
+class CmdRemoteProgress(RemoteProgress):
+ """
+ A Remote progress implementation taking a user derived progress to call the
+ respective methods on.
+ """
+ __slots__ = ("_seen_ops", '_progress')
+ re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)")
+ re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)")
+
+ def __init__(self, progress_instance = None):
+ self._seen_ops = list()
+ if progress_instance is None:
+ progress_instance = RemoteProgress()
+ #END assure proper instance
+ self._progress = progress_instance
+
+ def _parse_progress_line(self, line):
+ """Parse progress information from the given line as retrieved by git-push
+ or git-fetch
+
+ Call the own update(), __call__() and line_dropped() methods according
+ to the parsed result.
+
+ :return: list(line, ...) list of lines that could not be processed"""
+ # handle
+ # Counting objects: 4, done.
+ # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done.
+ sub_lines = line.split('\r')
+ failed_lines = list()
+ for sline in sub_lines:
+ # find esacpe characters and cut them away - regex will not work with
+ # them as they are non-ascii. As git might expect a tty, it will send them
+ last_valid_index = None
+ for i,c in enumerate(reversed(sline)):
+ if ord(c) < 32:
+ # its a slice index
+ last_valid_index = -i-1
+ # END character was non-ascii
+ # END for each character in sline
+ if last_valid_index is not None:
+ sline = sline[:last_valid_index]
+ # END cut away invalid part
+ sline = sline.rstrip()
+
+ cur_count, max_count = None, None
+ match = self.re_op_relative.match(sline)
+ if match is None:
+ match = self.re_op_absolute.match(sline)
+
+ if not match:
+ self._progress.line_dropped(sline)
+ failed_lines.append(sline)
+ continue
+ # END could not get match
+
+ op_code = 0
+ remote, op_name, percent, cur_count, max_count, message = match.groups()
+
+ # get operation id
+ if op_name == "Counting objects":
+ op_code |= self.COUNTING
+ elif op_name == "Compressing objects":
+ op_code |= self.COMPRESSING
+ elif op_name == "Writing objects":
+ op_code |= self.WRITING
+ elif op_name == "Receiving objects":
+ op_code |= self.RECEIVING
+ elif op_name == "Resolving deltas":
+ op_code |= self.RESOLVING
+ else:
+ raise ValueError("Operation name %r unknown" % op_name)
+
+ # figure out stage
+ if op_code not in self._seen_ops:
+ self._seen_ops.append(op_code)
+ op_code |= self.BEGIN
+ # END begin opcode
+
+ if message is None:
+ message = ''
+ # END message handling
+
+ message = message.strip()
+ done_token = ', done.'
+ if message.endswith(done_token):
+ op_code |= self.END
+ message = message[:-len(done_token)]
+ # END end message handling
+
+ self._progress.update(op_code, cur_count, max_count, message, line)
+ self._progress(message, line)
+ # END for each sub line
+ return failed_lines
+
+
+class CmdPushInfo(PushInfo):
+ """
+ Pure Python implementation of a PushInfo interface
+ """
+ __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha',
+ '_remotename_or_url', 'repo', 'summary')
+
+ _flag_map = { 'X' : PushInfo.NO_MATCH,
+ '-' : PushInfo.DELETED, '*' : 0,
+ '+' : PushInfo.FORCED_UPDATE,
+ ' ' : PushInfo.FAST_FORWARD,
+ '=' : PushInfo.UP_TO_DATE,
+ '!' : PushInfo.ERROR }
+
+ def __init__(self, flags, local_ref, remote_ref_string, repo, remotename_or_url, old_commit_binsha=None,
+ summary=''):
+ """ Initialize a new instance """
+ self.flags = flags
+ self.local_ref = local_ref
+ self.repo = repo
+ self.remote_ref_string = remote_ref_string
+ self._remotename_or_url = remotename_or_url
+ self.old_commit_binsha = old_commit_binsha
+ self.summary = summary
+
+ @property
+ def remote_ref(self):
+ """
+ :return:
+ Remote Reference or TagReference in the local repository corresponding
+ to the remote_ref_string kept in this instance."""
+ # translate heads to a local remote, tags stay as they are
+ if self.remote_ref_string.startswith("refs/tags"):
+ return TagReference(self.repo, self.remote_ref_string)
+ elif self.remote_ref_string.startswith("refs/heads"):
+ remote_ref = Reference(self.repo, self.remote_ref_string)
+ if '/' in self._remotename_or_url:
+ sys.stderr.write("Cannot provide RemoteReference instance if it was created from a url instead of of a remote name: %s. Returning Reference instance instead" % sefl._remotename_or_url)
+ return remote_ref
+ #END assert correct input
+ return RemoteReference(self.repo, "refs/remotes/%s/%s" % (str(self._remotename_or_url), remote_ref.name))
+ else:
+ raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string)
+ # END
+
+ @classmethod
+ def _from_line(cls, repo, remotename_or_url, line):
+ """Create a new PushInfo instance as parsed from line which is expected to be like
+ refs/heads/master:refs/heads/master 05d2687..1d0568e"""
+ control_character, from_to, summary = line.split('\t', 3)
+ flags = 0
+
+ # control character handling
+ try:
+ flags |= cls._flag_map[ control_character ]
+ except KeyError:
+ raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line))
+ # END handle control character
+
+ # from_to handling
+ from_ref_string, to_ref_string = from_to.split(':')
+ if flags & cls.DELETED:
+ from_ref = None
+ else:
+ from_ref = Reference.from_path(repo, from_ref_string)
+
+ # commit handling, could be message or commit info
+ old_commit_binsha = None
+ if summary.startswith('['):
+ if "[rejected]" in summary:
+ flags |= cls.REJECTED
+ elif "[remote rejected]" in summary:
+ flags |= cls.REMOTE_REJECTED
+ elif "[remote failure]" in summary:
+ flags |= cls.REMOTE_FAILURE
+ elif "[no match]" in summary:
+ flags |= cls.ERROR
+ elif "[new tag]" in summary:
+ flags |= cls.NEW_TAG
+ elif "[new branch]" in summary:
+ flags |= cls.NEW_HEAD
+ # uptodate encoded in control character
+ else:
+ # fast-forward or forced update - was encoded in control character,
+ # but we parse the old and new commit
+ split_token = "..."
+ if control_character == " ":
+ split_token = ".."
+ old_sha, new_sha = summary.split(' ')[0].split(split_token)
+ old_commit_binsha = repo.resolve(old_sha)
+ # END message handling
+
+ return cls(flags, from_ref, to_ref_string, repo, remotename_or_url, old_commit_binsha, summary)
+
+
+class CmdFetchInfo(FetchInfo):
+ """
+ Pure python implementation of a FetchInfo interface
+ """
+ __slots__ = ('ref','old_commit_binsha', 'flags', 'note')
+
+ # %c %-*s %-*s -> %s (%s)
+ re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?")
+
+ _flag_map = { '!' : FetchInfo.ERROR,
+ '+' : FetchInfo.FORCED_UPDATE,
+ '-' : FetchInfo.TAG_UPDATE,
+ '*' : 0,
+ '=' : FetchInfo.HEAD_UPTODATE,
+ ' ' : FetchInfo.FAST_FORWARD }
+
+ def __init__(self, ref, flags, note = '', old_commit_binsha = None):
+ """
+ Initialize a new instance
+ """
+ self.ref = ref
+ self.flags = flags
+ self.note = note
+ self.old_commit_binsha = old_commit_binsha
+
+ def __str__(self):
+ return self.name
+
+ @property
+ def name(self):
+ """:return: Name of our remote ref"""
+ return self.ref.name
+
+ @property
+ def commit(self):
+ """:return: Commit of our remote ref"""
+ return self.ref.commit
+
+ @classmethod
+ def _from_line(cls, repo, line, fetch_line):
+ """Parse information from the given line as returned by git-fetch -v
+ and return a new CmdFetchInfo object representing this information.
+
+ We can handle a line as follows
+ "%c %-*s %-*s -> %s%s"
+
+ Where c is either ' ', !, +, -, *, or =
+ ! means error
+ + means success forcing update
+ - means a tag was updated
+ * means birth of new branch or tag
+ = means the head was up to date ( and not moved )
+ ' ' means a fast-forward
+
+ fetch line is the corresponding line from FETCH_HEAD, like
+ acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo"""
+ match = cls.re_fetch_result.match(line)
+ if match is None:
+ raise ValueError("Failed to parse line: %r" % line)
+
+ # parse lines
+ control_character, operation, local_remote_ref, remote_local_ref, note = match.groups()
+ try:
+ new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t")
+ ref_type_name, fetch_note = fetch_note.split(' ', 1)
+ except ValueError: # unpack error
+ raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line)
+
+ # handle FETCH_HEAD and figure out ref type
+ # If we do not specify a target branch like master:refs/remotes/origin/master,
+ # the fetch result is stored in FETCH_HEAD which destroys the rule we usually
+ # have. In that case we use a symbolic reference which is detached
+ ref_type = None
+ if remote_local_ref == "FETCH_HEAD":
+ ref_type = SymbolicReference
+ elif ref_type_name == "branch":
+ ref_type = RemoteReference
+ elif ref_type_name == "tag":
+ ref_type = TagReference
+ else:
+ raise TypeError("Cannot handle reference type: %r" % ref_type_name)
+
+ # create ref instance
+ if ref_type is SymbolicReference:
+ remote_local_ref = ref_type(repo, "FETCH_HEAD")
+ else:
+ remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip()))
+ # END create ref instance
+
+ note = ( note and note.strip() ) or ''
+
+ # parse flags from control_character
+ flags = 0
+ try:
+ flags |= cls._flag_map[control_character]
+ except KeyError:
+ raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line))
+ # END control char exception hanlding
+
+ # parse operation string for more info - makes no sense for symbolic refs
+ old_commit_binsha = None
+ if isinstance(remote_local_ref, Reference):
+ if 'rejected' in operation:
+ flags |= cls.REJECTED
+ if 'new tag' in operation:
+ flags |= cls.NEW_TAG
+ if 'new branch' in operation:
+ flags |= cls.NEW_HEAD
+ if '...' in operation or '..' in operation:
+ split_token = '...'
+ if control_character == ' ':
+ split_token = split_token[:-1]
+ old_commit_binsha = repo.resolve(operation.split(split_token)[0])
+ # END handle refspec
+ # END reference flag handling
+
+ return cls(remote_local_ref, flags, note, old_commit_binsha)
+
+
+class GitCommandMixin(object):
+ """A mixin to provide the git command object through the git property"""
+
+ def __init__(self, *args, **kwargs):
+ """Initialize this instance with the root and a git command"""
+ super(GitCommandMixin, self).__init__(*args, **kwargs)
+ self._git = Git(self.working_dir)
+
+ @property
+ def git(self):
+ return self._git
+
+
+class CmdObjectDBRMixin(object):
+ """A mixing implementing object reading through a git command
+ It will create objects only in the loose object database.
+ :note: for now, we use the git command to do all the lookup, just until he
+ have packs and the other implementations
+ """
+ #{ ODB Interface
+ # overrides from PureOdb Implementation, which is responsible only for writing
+ # objects
+ def info(self, sha):
+ hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
+ return OInfo(hex_to_bin(hexsha), typename, size)
+
+ def stream(self, sha):
+ """For now, all lookup is done by git itself
+ :note: As we don't know when the stream is actually read (and if it is
+ stored for later use) we read the data rigth away and cache it.
+ This has HUGE performance implication, both for memory as for
+ reading/deserializing objects, but we have no other choice in order
+ to make the database behaviour consistent with other implementations !"""
+
+ hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
+ return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: Full binary 20 byte sha from the given partial hexsha
+ :raise AmbiguousObjectName:
+ :raise BadObject:
+ :note: currently we only raise BadObject as git does not communicate
+ AmbiguousObjects separately"""
+ try:
+ hexsha, typename, size = self._git.get_object_header(partial_hexsha)
+ return hex_to_bin(hexsha)
+ except (GitCommandError, ValueError):
+ raise BadObject(partial_hexsha)
+ # END handle exceptions
+
+ #} END odb interface
+
+
+class CmdTransportMixin(TransportDB):
+ """A mixin requiring the .git property as well as repository paths
+
+ It will create objects only in the loose object database.
+ :note: for now, we use the git command to do all the lookup, just until he
+ have packs and the other implementations
+ """
+
+ #{ Transport DB interface
+
+ def push(self, url, refspecs=None, progress=None, **kwargs):
+ """Push given refspecs using the git default implementation
+ :param url: may be a remote name or a url
+ :param refspecs: single string, RefSpec instance or list of such or None.
+ :param progress: RemoteProgress derived instance or None
+ :param **kwargs: Additional arguments to be passed to the git-push process"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **add_progress(kwargs, self.git, progress))
+ return get_push_info(self, url, proc, progress)
+
+ def pull(self, url, refspecs=None, progress=None, **kwargs):
+ """Fetch and merge the given refspecs.
+ If not refspecs are given, the merge will only work properly if you
+ have setup upstream (tracking) branches.
+ :param url: may be a remote name or a url
+ :param refspecs: see push()
+ :param progress: see push()"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress))
+ return get_fetch_info_from_stderr(self, proc, progress)
+
+ def fetch(self, url, refspecs=None, progress=None, **kwargs):
+ """Fetch the latest changes
+ :param url: may be a remote name or a url
+ :param refspecs: see push()
+ :param progress: see push()"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress))
+ return get_fetch_info_from_stderr(self, proc, progress)
+
+ #} end transport db interface
+
+
+class CmdHighLevelRepository(HighLevelRepository):
+ """An intermediate interface carrying advanced git functionality that can be used
+ in other comound repositories which do not implement this functionality themselves.
+
+ The mixin must be used with repositories compatible to the GitCommandMixin.
+
+ :note: at some point, methods provided here are supposed to be provided by custom interfaces"""
+ DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
+
+ # precompiled regex
+ re_whitespace = re.compile(r'\s+')
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
+ re_author_committer_start = re.compile(r'^(author|committer)')
+ re_tab_full_line = re.compile(r'^\t(.*)$')
+
+ #{ Configuration
+ CommitCls = Commit
+ GitCls = Git
+ #} END configuration
+
+ def daemon_export():
+ def _get_daemon_export(self):
+ filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
+ return os.path.exists(filename)
+
+ def _set_daemon_export(self, value):
+ filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
+ fileexists = os.path.exists(filename)
+ if value and not fileexists:
+ touch(filename)
+ elif not value and fileexists:
+ os.unlink(filename)
+
+ return property(_get_daemon_export, _set_daemon_export,
+ doc="If True, git-daemon may export this repository")
+
+ daemon_export = daemon_export()
+
+ def is_dirty(self, index=True, working_tree=True, untracked_files=False):
+ if self._bare:
+ # Bare repositories with no associated working directory are
+ # always consired to be clean.
+ return False
+
+ # start from the one which is fastest to evaluate
+ default_args = ('--abbrev=40', '--full-index', '--raw')
+ if index:
+ # diff index against HEAD
+ if isfile(self.index.path) and self.head.is_valid() and \
+ len(self.git.diff('HEAD', '--cached', *default_args)):
+ return True
+ # END index handling
+ if working_tree:
+ # diff index against working tree
+ if len(self.git.diff(*default_args)):
+ return True
+ # END working tree handling
+ if untracked_files:
+ if len(self.untracked_files):
+ return True
+ # END untracked files
+ return False
+
+ @property
+ def untracked_files(self):
+ # make sure we get all files, no only untracked directores
+ proc = self.git.status(untracked_files=True, as_process=True)
+ stream = iter(proc.stdout)
+ untracked_files = list()
+ for line in stream:
+ if not line.startswith("# Untracked files:"):
+ continue
+ # skip two lines
+ stream.next()
+ stream.next()
+
+ for untracked_info in stream:
+ if not untracked_info.startswith("#\t"):
+ break
+ untracked_files.append(untracked_info.replace("#\t", "").rstrip())
+ # END for each utracked info line
+ # END for each line
+ return untracked_files
+
+ def blame(self, rev, file):
+ data = self.git.blame(rev, '--', file, p=True)
+ commits = dict()
+ blames = list()
+ info = None
+
+ for line in data.splitlines(False):
+ parts = self.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ if self.re_hexsha_only.search(firstpart):
+ # handles
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
+ digits = parts[-1].split(" ")
+ if len(digits) == 3:
+ info = {'id': firstpart}
+ blames.append([None, []])
+ # END blame data initialization
+ else:
+ m = self.re_author_committer_start.search(firstpart)
+ if m:
+ # handles:
+ # author Tom Preston-Werner
+ # author-mail <tom@mojombo.com>
+ # author-time 1192271832
+ # author-tz -0700
+ # committer Tom Preston-Werner
+ # committer-mail <tom@mojombo.com>
+ # committer-time 1192271832
+ # committer-tz -0700 - IGNORED BY US
+ role = m.group(0)
+ if firstpart.endswith('-mail'):
+ info["%s_email" % role] = parts[-1]
+ elif firstpart.endswith('-time'):
+ info["%s_date" % role] = int(parts[-1])
+ elif role == firstpart:
+ info[role] = parts[-1]
+ # END distinguish mail,time,name
+ else:
+ # handle
+ # filename lib/grit.rb
+ # summary add Blob
+ # <and rest>
+ if firstpart.startswith('filename'):
+ info['filename'] = parts[-1]
+ elif firstpart.startswith('summary'):
+ info['summary'] = parts[-1]
+ elif firstpart == '':
+ if info:
+ sha = info['id']
+ c = commits.get(sha)
+ if c is None:
+ c = self.CommitCls( self, hex_to_bin(sha),
+ author=Actor._from_string(info['author'] + ' ' + info['author_email']),
+ authored_date=info['author_date'],
+ committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
+ committed_date=info['committer_date'],
+ message=info['summary'])
+ commits[sha] = c
+ # END if commit objects needs initial creation
+ m = self.re_tab_full_line.search(line)
+ text, = m.groups()
+ blames[-1][0] = c
+ blames[-1][1].append( text )
+ info = None
+ # END if we collected commit info
+ # END distinguish filename,summary,rest
+ # END distinguish author|committer vs filename,summary,rest
+ # END distinguish hexsha vs other information
+ return blames
+
+ @classmethod
+ def init(cls, path=None, mkdir=True, **kwargs):
+ """
+ :parm kwargs:
+ keyword arguments serving as additional options to the git-init command
+
+ For more information, see the respective docs of HighLevelRepository"""
+
+ if mkdir and path and not os.path.exists(path):
+ os.makedirs(path, 0755)
+
+ # git command automatically chdir into the directory
+ git = cls.GitCls(path)
+ output = git.init(**kwargs)
+ return cls(path)
+
+ @classmethod
+ def _clone(cls, git, url, path, progress, **kwargs):
+ # special handling for windows for path at which the clone should be
+ # created.
+ # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence
+ # we at least give a proper error instead of letting git fail
+ prev_cwd = None
+ prev_path = None
+ if os.name == 'nt':
+ if '~' in path:
+ raise OSError("Git cannot handle the ~ character in path %r correctly" % path)
+
+ # on windows, git will think paths like c: are relative and prepend the
+ # current working dir ( before it fails ). We temporarily adjust the working
+ # dir to make this actually work
+ match = re.match("(\w:[/\\\])(.*)", path)
+ if match:
+ prev_cwd = os.getcwd()
+ prev_path = path
+ drive, rest_of_path = match.groups()
+ os.chdir(drive)
+ path = rest_of_path
+ kwargs['with_keep_cwd'] = True
+ # END cwd preparation
+ # END windows handling
+
+ try:
+ proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress))
+ if progress is not None:
+ digest_process_messages(proc.stderr, progress)
+ #END digest progress messages
+ finalize_process(proc)
+ finally:
+ if prev_cwd is not None:
+ os.chdir(prev_cwd)
+ path = prev_path
+ # END reset previous working dir
+ # END bad windows handling
+
+ # our git command could have a different working dir than our actual
+ # environment, hence we prepend its working dir if required
+ if not os.path.isabs(path) and git.working_dir:
+ path = join(git._working_dir, path)
+
+ # adjust remotes - there may be operating systems which use backslashes,
+ # These might be given as initial paths, but when handling the config file
+ # that contains the remote from which we were clones, git stops liking it
+ # as it will escape the backslashes. Hence we undo the escaping just to be
+ # sure
+ repo = cls(os.path.abspath(path))
+ if repo.remotes:
+ repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/"))
+ # END handle remote repo
+ return repo
+
+ def clone(self, path, progress = None, **kwargs):
+ """
+ :param kwargs:
+ All remaining keyword arguments are given to the git-clone command
+
+ For more information, see the respective method in HighLevelRepository"""
+ return self._clone(self.git, self.git_dir, path, CmdRemoteProgress(progress), **kwargs)
+
+ @classmethod
+ def clone_from(cls, url, to_path, progress = None, **kwargs):
+ """
+ :param kwargs: see the ``clone`` method
+ For more information, see the respective method in the HighLevelRepository"""
+ return cls._clone(cls.GitCls(os.getcwd()), url, to_path, CmdRemoteProgress(progress), **kwargs)
+
+ def archive(self, ostream, treeish=None, prefix=None, **kwargs):
+ """For all args see HighLevelRepository interface
+ :parm kwargs:
+ Additional arguments passed to git-archive
+ NOTE: Use the 'format' argument to define the kind of format. Use
+ specialized ostreams to write any format supported by python
+
+ :raise GitCommandError: in case something went wrong"""
+ if treeish is None:
+ treeish = self.head.commit
+ if prefix and 'prefix' not in kwargs:
+ kwargs['prefix'] = prefix
+ kwargs['output_stream'] = ostream
+
+ self.git.archive(treeish, **kwargs)
+ return self
diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py
new file mode 100644
index 00000000..49e8c590
--- /dev/null
+++ b/git/db/cmd/complex.py
@@ -0,0 +1,16 @@
+"""Module with our own git implementation - it uses the git command"""
+
+from git.db.compat import RepoCompatibilityInterface
+from base import *
+
+
+__all__ = ['CmdPartialGitDB']
+
+
+class CmdPartialGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin,
+ CmdHighLevelRepository ):
+ """Utility repository which only partially implements all required methods.
+ It cannot be reliably used alone, but is provided to allow mixing it with other
+ implementations"""
+ pass
+
diff --git a/git/db/compat.py b/git/db/compat.py
new file mode 100644
index 00000000..767ab5e0
--- /dev/null
+++ b/git/db/compat.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module providing adaptors to maintain backwards compatability"""
+
+class RepoCompatibilityInterface(object):
+ """Interface to install backwards compatability of the new complex repository
+ types with the previous, all in one, repository."""
+
+ @property
+ def bare(self):
+ return self.is_bare
+
+ def rev_parse(self, *args, **kwargs):
+ return self.resolve_object(*args, **kwargs)
+
+ @property
+ def odb(self):
+ """The odb is now an integrated part of each repository"""
+ return self
+
+ @property
+ def active_branch(self):
+ """The name of the currently active branch.
+
+ :return: Head to the active branch"""
+ return self.head.reference
+
+ def __repr__(self):
+ return '<git.Repo "%s">' % self.git_dir
diff --git a/git/db/complex.py b/git/db/complex.py
new file mode 100644
index 00000000..31b047a0
--- /dev/null
+++ b/git/db/complex.py
@@ -0,0 +1,28 @@
+"""Module with many useful complex databases with different useful combinations of primary implementations"""
+
+from py.complex import PurePartialGitDB
+from cmd.complex import CmdPartialGitDB
+from compat import RepoCompatibilityInterface
+
+__all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityGitDB']
+
+class CmdGitDB(CmdPartialGitDB, PurePartialGitDB):
+ """A database which uses primarily the git command implementation, but falls back
+ to pure python where it is more feasible
+ :note: To assure consistent behaviour across implementations, when calling the
+ ``stream()`` method a cache is created. This makes this implementation a bad
+ choice when reading big files as these are streamed from memory in all cases."""
+
+class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB):
+ """A database which fills in its missing implementation using the pure python
+ implementation"""
+ pass
+
+class PureGitDB(PurePartialGitDB, CmdPartialGitDB):
+ """A repository which uses the pure implementation primarily, but falls back
+ on using the git command for high-level functionality"""
+
+class PureCompatibilityGitDB(RepoCompatibilityInterface, PureGitDB):
+ """Repository which uses the pure implementation primarily, but falls back
+ to the git command implementation. Please note that the CmdGitDB does it
+ the opposite way around."""
diff --git a/git/db/interface.py b/git/db/interface.py
new file mode 100644
index 00000000..a4c05265
--- /dev/null
+++ b/git/db/interface.py
@@ -0,0 +1,838 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains interfaces for basic database building blocks"""
+
+__all__ = ( 'ObjectDBR', 'ObjectDBW', 'RootPathDB', 'CompoundDB', 'CachingDB',
+ 'TransportDB', 'ConfigurationMixin', 'RepositoryPathsMixin',
+ 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin', 'SubmoduleDB',
+ 'IndexDB', 'HighLevelRepository')
+
+
+class ObjectDBR(object):
+ """Defines an interface for object database lookup.
+ Objects are identified either by their 20 byte bin sha"""
+
+ def __contains__(self, sha):
+ return self.has_obj(sha)
+
+ #{ Query Interface
+ def has_object(self, sha):
+ """
+ :return: True if the object identified by the given 20 bytes
+ binary sha is contained in the database"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def has_object_async(self, reader):
+ """Return a reader yielding information about the membership of objects
+ as identified by shas
+ :param reader: Reader yielding 20 byte shas.
+ :return: async.Reader yielding tuples of (sha, bool) pairs which indicate
+ whether the given sha exists in the database or not"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info(self, sha):
+ """ :return: OInfo instance
+ :param sha: bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info_async(self, reader):
+ """Retrieve information of a multitude of objects asynchronously
+ :param reader: Channel yielding the sha's of the objects of interest
+ :return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream(self, sha):
+ """:return: OStream instance
+ :param sha: 20 bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream_async(self, reader):
+ """Retrieve the OStream of multiple objects
+ :param reader: see ``info``
+ :param max_threads: see ``ObjectDBW.store``
+ :return: async.Reader yielding OStream|InvalidOStream instances in any order
+ :note: depending on the system configuration, it might not be possible to
+ read all OStreams at once. Instead, read them individually using reader.read(x)
+ where x is small enough."""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def size(self):
+ """:return: amount of objects in this database"""
+ raise NotImplementedError()
+
+ def sha_iter(self):
+ """Return iterator yielding 20 byte shas for all objects in this data base"""
+ raise NotImplementedError()
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """
+ :return: 20 byte binary sha1 from the given less-than-40 byte hexsha
+ :param partial_hexsha: hexsha with less than 40 byte
+ :raise AmbiguousObjectName: If multiple objects would match the given sha
+ :raies BadObject: If object was not found"""
+ raise NotImplementedError()
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical (hexadecimal) representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ #} END query interface
+
+
+class ObjectDBW(object):
+ """Defines an interface to create objects in the database"""
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ """
+ Adjusts the stream to which all data should be sent when storing new objects
+
+ :param stream: if not None, the stream to use, if None the default stream
+ will be used.
+ :return: previously installed stream, or None if there was no override
+ :raise TypeError: if the stream doesn't have the supported functionality"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def ostream(self):
+ """
+ :return: overridden output stream this instance will write to, or None
+ if it will write to the default stream"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store(self, istream):
+ """
+ Create a new object in the database
+ :return: the input istream object with its sha set to its corresponding value
+
+ :param istream: IStream compatible instance. If its sha is already set
+ to a value, the object will just be stored in the our database format,
+ in which case the input stream is expected to be in object format ( header + contents ).
+ :raise IOError: if data could not be written"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store_async(self, reader):
+ """
+ Create multiple new objects in the database asynchronously. The method will
+ return right away, returning an output channel which receives the results as
+ they are computed.
+
+ :return: Channel yielding your IStream which served as input, in any order.
+ The IStreams sha will be set to the sha it received during the process,
+ or its error attribute will be set to the exception informing about the error.
+
+ :param reader: async.Reader yielding IStream instances.
+ The same instances will be used in the output channel as were received
+ in by the Reader.
+
+ :note:As some ODB implementations implement this operation atomic, they might
+ abort the whole operation if one item could not be processed. Hence check how
+ many items have actually been produced."""
+ raise NotImplementedError("To be implemented in subclass")
+
+ #} END edit interface
+
+
+class RootPathDB(object):
+ """Provides basic facilities to retrieve files of interest"""
+
+ def __init__(self, root_path):
+ """Initialize this instance to look for its files at the given root path
+ All subsequent operations will be relative to this path
+ :raise InvalidDBRoot:
+ :note: The base will not perform any accessablity checking as the base
+ might not yet be accessible, but become accessible before the first
+ access."""
+ super(RootPathDB, self).__init__(root_path)
+
+ #{ Interface
+ def root_path(self):
+ """:return: path at which this db operates"""
+ raise NotImplementedError()
+
+ def db_path(self, rela_path):
+ """
+ :return: the given relative path relative to our database root, allowing
+ to pontentially access datafiles"""
+ raise NotImplementedError()
+ #} END interface
+
+
+class CachingDB(object):
+ """A database which uses caches to speed-up access"""
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Call this method if the underlying data changed to trigger an update
+ of the internal caching structures.
+
+ :param force: if True, the update must be performed. Otherwise the implementation
+ may decide not to perform an update if it thinks nothing has changed.
+ :return: True if an update was performed as something change indeed"""
+
+ # END interface
+
+
+class CompoundDB(object):
+ """A database which delegates calls to sub-databases.
+ They should usually be cached and lazy-loaded"""
+
+ #{ Interface
+
+ def databases(self):
+ """:return: tuple of database instances we use for lookups"""
+ raise NotImplementedError()
+
+ #} END interface
+
+
+class IndexDB(object):
+ """A database which provides a flattened index to all objects in its currently
+ active tree."""
+ @property
+ def index(self):
+ """:return: IndexFile compatible instance"""
+ raise NotImplementedError()
+
+
+class RefSpec(object):
+ """A refspec is a simple container which provides information about the way
+ something should be fetched or pushed. It requires to use symbols to describe
+ the actual objects which is done using reference names (or respective instances
+ which resolve to actual reference names)."""
+ __slots__ = ('source', 'destination', 'force')
+
+ def __init__(self, source, destination, force=False):
+ """initalize the instance with the required values
+ :param source: reference name or instance. If None, the Destination
+ is supposed to be deleted."""
+ self.source = source
+ self.destination = destination
+ self.force = force
+ if self.destination is None:
+ raise ValueError("Destination must be set")
+
+ def __str__(self):
+ """:return: a git-style refspec"""
+ s = str(self.source)
+ if self.source is None:
+ s = ''
+ #END handle source
+ d = str(self.destination)
+ p = ''
+ if self.force:
+ p = '+'
+ #END handle force
+ res = "%s%s:%s" % (p, s, d)
+
+ def delete_destination(self):
+ return self.source is None
+
+
+class RemoteProgress(object):
+ """
+ Handler providing an interface to parse progress information emitted by git-push
+ and git-fetch and to dispatch callbacks allowing subclasses to react to the progress.
+
+ Subclasses should derive from this type.
+ """
+ _num_op_codes = 7
+ BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)]
+ STAGE_MASK = BEGIN|END
+ OP_MASK = ~STAGE_MASK
+
+ #{ Subclass Interface
+
+ def line_dropped(self, line):
+ """Called whenever a line could not be understood and was therefore dropped."""
+ pass
+
+ def update(self, op_code, cur_count, max_count=None, message='', input=''):
+ """Called whenever the progress changes
+
+ :param op_code:
+ Integer allowing to be compared against Operation IDs and stage IDs.
+
+ Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation
+ ID as well as END. It may be that BEGIN and END are set at once in case only
+ one progress message was emitted due to the speed of the operation.
+ Between BEGIN and END, none of these flags will be set
+
+ Operation IDs are all held within the OP_MASK. Only one Operation ID will
+ be active per call.
+ :param cur_count: Current absolute count of items
+
+ :param max_count:
+ The maximum count of items we expect. It may be None in case there is
+ no maximum number of items or if it is (yet) unknown.
+
+ :param message:
+ In case of the 'WRITING' operation, it contains the amount of bytes
+ transferred. It may possibly be used for other purposes as well.
+
+ :param input:
+ The actual input string that was used to parse the information from.
+ This is usually a line from the output of git-fetch, but really
+ depends on the implementation
+
+ You may read the contents of the current line in self._cur_line"""
+ pass
+
+ def __call__(self, message, input=''):
+ """Same as update, but with a simpler interface which only provides the
+ message of the operation.
+ :note: This method will be called in addition to the update method. It is
+ up to you which one you implement"""
+ pass
+ #} END subclass interface
+
+
+class PushInfo(object):
+ """A type presenting information about the result of a push operation for exactly
+ one refspec
+
+ flags # bitflags providing more information about the result
+ local_ref # Reference pointing to the local reference that was pushed
+ # It is None if the ref was deleted.
+ remote_ref_string # path to the remote reference located on the remote side
+ remote_ref # Remote Reference on the local side corresponding to
+ # the remote_ref_string. It can be a TagReference as well.
+ old_commit_binsha # binary sha to commit at which the remote_ref was standing before we pushed
+ # it to local_ref.commit. Will be None if an error was indicated
+ summary # summary line providing human readable english text about the push
+ """
+ __slots__ = tuple()
+
+ NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \
+ FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ]
+
+
+class FetchInfo(object):
+ """A type presenting information about the fetch operation on exactly one refspec
+
+ The following members are defined:
+ ref # name of the reference to the changed
+ # remote head or FETCH_HEAD. Implementations can provide
+ # actual class instance which convert to a respective string
+ flags # additional flags to be & with enumeration members,
+ # i.e. info.flags & info.REJECTED
+ # is 0 if ref is FETCH_HEAD
+ note # additional notes given by the fetch-pack implementation intended for the user
+ old_commit_binsha# if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD,
+ # field is set to the previous location of ref as binary sha or None"""
+ __slots__ = tuple()
+
+ NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \
+ FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ]
+
+
+class TransportDB(object):
+ """A database which allows to transport objects from and to different locations
+ which are specified by urls (location) and refspecs (what to transport,
+ see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html).
+
+ At the beginning of a transport operation, it will be determined which objects
+ have to be sent (either by this or by the other side).
+
+ Afterwards a pack with the required objects is sent (or received). If there is
+ nothing to send, the pack will be empty.
+
+ As refspecs involve symbolic names for references to be handled, we require
+ RefParse functionality. How this is done is up to the actual implementation."""
+ # The following variables need to be set by the derived class
+
+ #{ Interface
+
+ def fetch(self, url, refspecs, progress=None, **kwargs):
+ """Fetch the objects defined by the given refspec from the given url.
+ :param url: url identifying the source of the objects. It may also be
+ a symbol from which the respective url can be resolved, like the
+ name of the remote. The implementation should allow objects as input
+ as well, these are assumed to resovle to a meaningful string though.
+ :param refspecs: iterable of reference specifiers or RefSpec instance,
+ identifying the references to be fetch from the remote.
+ :param progress: RemoteProgress derived instance which receives progress messages for user consumption or None
+ :param kwargs: may be used for additional parameters that the actual implementation could
+ find useful.
+ :return: List of FetchInfo compatible instances which provide information about what
+ was previously fetched, in the order of the input refspecs.
+ :note: even if the operation fails, one of the returned FetchInfo instances
+ may still contain errors or failures in only part of the refspecs.
+ :raise: if any issue occours during the transport or if the url is not
+ supported by the protocol.
+ """
+ raise NotImplementedError()
+
+ def push(self, url, refspecs, progress=None, **kwargs):
+ """Transport the objects identified by the given refspec to the remote
+ at the given url.
+ :param url: Decribes the location which is to receive the objects
+ see fetch() for more details
+ :param refspecs: iterable of refspecs strings or RefSpec instances
+ to identify the objects to push
+ :param progress: see fetch()
+ :param kwargs: additional arguments which may be provided by the caller
+ as they may be useful to the actual implementation
+ :todo: what to return ?
+ :raise: if any issue arises during transport or if the url cannot be handled"""
+ raise NotImplementedError()
+
+ @property
+ def remotes(self):
+ """:return: An IterableList of Remote objects allowing to access and manipulate remotes
+ :note: Remote objects can also be used for the actual push or fetch operation"""
+ raise NotImplementedError()
+
+ def remote(self, name='origin'):
+ """:return: Remote object with the given name
+ :note: it does not necessarily exist, hence this is just a more convenient way
+ to construct Remote objects"""
+ raise NotImplementedError()
+
+ #}end interface
+
+
+ #{ Utility Methods
+
+ def create_remote(self, name, url, **kwargs):
+ """Create a new remote with the given name pointing to the given url
+ :return: Remote instance, compatible to the Remote interface"""
+ return Remote.create(self, name, url, **kwargs)
+
+ def delete_remote(self, remote):
+ """Delete the given remote.
+ :param remote: a Remote instance"""
+ return Remote.remove(self, remote)
+
+ #} END utility methods
+
+
+class ReferencesMixin(object):
+ """Database providing reference objects which in turn point to database objects
+ like Commits or Tag(Object)s.
+
+ The returned types are compatible to the interfaces of the pure python
+ reference implementation in GitDB.ref"""
+
+ def resolve(self, name):
+ """Resolve the given name into a binary sha. Valid names are as defined
+ in the rev-parse documentation http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
+ :return: binary sha matching the name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ raise NotImplementedError()
+
+ def resolve_object(self, name):
+ """As ``resolve()``, but returns the Objecft instance pointed to by the
+ resolved binary sha
+ :return: Object instance of the correct type, e.g. shas pointing to commits
+ will be represented by a Commit object"""
+ raise NotImplementedError()
+
+ @property
+ def references(self):
+ """:return: iterable list of all Reference objects representing tags, heads
+ and remote references. This is the most general method to obtain any
+ references."""
+ raise NotImplementedError()
+
+ @property
+ def heads(self):
+ """:return: IterableList with HeadReference objects pointing to all
+ heads in the repository."""
+ raise NotImplementedError()
+
+ @property
+ def head(self):
+ """:return: HEAD Object pointing to the current head reference"""
+ raise NotImplementedError()
+
+ @property
+ def tags(self):
+ """:return: An IterableList of TagReferences or compatible items that
+ are available in this repo"""
+ raise NotImplementedError()
+
+ #{ Utility Methods
+
+ def tag(self, name):
+ """:return: Tag with the given name
+ :note: It does not necessarily exist, hence this is just a more convenient
+ way to construct TagReference objects"""
+ raise NotImplementedError()
+
+
+ def commit(self, rev=None):
+ """The Commit object for the specified revision
+ :param rev: revision specifier, see git-rev-parse for viable options.
+ :return: Commit compatible object"""
+ raise NotImplementedError()
+
+ def iter_trees(self, *args, **kwargs):
+ """:return: Iterator yielding Tree compatible objects
+ :note: Takes all arguments known to iter_commits method"""
+ raise NotImplementedError()
+
+ def tree(self, rev=None):
+ """The Tree (compatible) object for the given treeish revision
+ Examples::
+
+ repo.tree(repo.heads[0])
+
+ :param rev: is a revision pointing to a Treeish ( being a commit or tree )
+ :return: ``git.Tree``
+
+ :note:
+ If you need a non-root level tree, find it by iterating the root tree. Otherwise
+ it cannot know about its path relative to the repository root and subsequent
+ operations might have unexpected results."""
+ raise NotImplementedError()
+
+ def iter_commits(self, rev=None, paths='', **kwargs):
+ """A list of Commit objects representing the history of a given ref/commit
+
+ :parm rev:
+ revision specifier, see git-rev-parse for viable options.
+ If None, the active branch will be used.
+
+ :parm paths:
+ is an optional path or a list of paths to limit the returned commits to
+ Commits that do not contain that path or the paths will not be returned.
+
+ :parm kwargs:
+ Arguments to be passed to git-rev-list - common ones are
+ max_count and skip
+
+ :note: to receive only commits between two named revisions, use the
+ "revA..revB" revision specifier
+
+ :return: iterator yielding Commit compatible instances"""
+ raise NotImplementedError()
+
+
+ #} END utility methods
+
+ #{ Edit Methods
+
+ def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
+ """Create a new head within the repository.
+ :param commit: a resolvable name to the commit or a Commit or Reference instance the new head should point to
+ :param force: if True, a head will be created even though it already exists
+ Otherwise an exception will be raised.
+ :param logmsg: message to append to the reference log. If None, a default message
+ will be used
+ :return: newly created Head instances"""
+ raise NotImplementedError()
+
+ def delete_head(self, *heads):
+ """Delete the given heads
+ :param heads: list of Head references that are to be deleted"""
+ raise NotImplementedError()
+
+ def create_tag(self, path, ref='HEAD', message=None, force=False):
+ """Create a new tag reference.
+ :param path: name or path of the new tag.
+ :param ref: resolvable name of the reference or commit, or Commit or Reference
+ instance describing the commit the tag should point to.
+ :param message: message to be attached to the tag reference. This will
+ create an actual Tag object carrying the message. Otherwise a TagReference
+ will be generated.
+ :param force: if True, the Tag will be created even if another tag does already
+ exist at the given path. Otherwise an exception will be thrown
+ :return: TagReference object """
+ raise NotImplementedError()
+
+ def delete_tag(self, *tags):
+ """Delete the given tag references
+ :param tags: TagReferences to delete"""
+ raise NotImplementedError()
+
+ #}END edit methods
+
+ #{ Backward Compatability
+ # These aliases need to be provided by the implementing interface as well
+ refs = references
+ branches = heads
+ #} END backward compatability
+
+
+
+
+class RepositoryPathsMixin(object):
+ """Represents basic functionality of a full git repository. This involves an
+ optional working tree, a git directory with references and an object directory.
+
+ This type collects the respective paths and verifies the provided base path
+ truly is a git repository.
+
+ If the underlying type provides the config_reader() method, we can properly determine
+ whether this is a bare repository as well. Otherwise it will make an educated guess
+ based on the path name."""
+ #{ Subclass Interface
+ def _initialize(self, path):
+ """initialize this instance with the given path. It may point to
+ any location within the repositories own data, as well as the working tree.
+
+ The implementation will move up and search for traces of a git repository,
+ which is indicated by a child directory ending with .git or the
+ current path portion ending with .git.
+
+ The paths made available for query are suitable for full git repositories
+ only. Plain object databases need to be fed the "objects" directory path.
+
+ :param path: the path to initialize the repository with
+ It is a path to either the root git directory or the bare git repo::
+
+ repo = Repo("/Users/mtrier/Development/git-python")
+ repo = Repo("/Users/mtrier/Development/git-python.git")
+ repo = Repo("~/Development/git-python.git")
+ repo = Repo("$REPOSITORIES/Development/git-python.git")
+
+ :raise InvalidDBRoot:
+ """
+ raise NotImplementedError()
+ #} end subclass interface
+
+ #{ Object Interface
+
+ def __eq__(self, rhs):
+ raise NotImplementedError()
+
+ def __ne__(self, rhs):
+ raise NotImplementedError()
+
+ def __hash__(self):
+ raise NotImplementedError()
+
+ def __repr__(self):
+ raise NotImplementedError()
+
+ #} END object interface
+
+ #{ Interface
+
+ @property
+ def is_bare(self):
+ """:return: True if this is a bare repository
+ :note: this value is cached upon initialization"""
+ raise NotImplementedError()
+
+ @property
+ def git_dir(self):
+ """:return: path to directory containing this actual git repository (which
+ in turn provides access to objects and references"""
+ raise NotImplementedError()
+
+ @property
+ def working_tree_dir(self):
+ """:return: path to directory containing the working tree checkout of our
+ git repository.
+ :raise AssertionError: If this is a bare repository"""
+ raise NotImplementedError()
+
+ @property
+ def objects_dir(self):
+ """:return: path to the repository's objects directory"""
+ raise NotImplementedError()
+
+ @property
+ def working_dir(self):
+ """:return: working directory of the git process or related tools, being
+ either the working_tree_dir if available or the git_path"""
+ raise NotImplementedError()
+
+ @property
+ def description(self):
+ """:return: description text associated with this repository or set the
+ description."""
+ raise NotImplementedError()
+
+ #} END interface
+
+
+class ConfigurationMixin(object):
+ """Interface providing configuration handler instances, which provide locked access
+ to a single git-style configuration file (ini like format, using tabs as improve readablity).
+
+ Configuration readers can be initialized with multiple files at once, whose information is concatenated
+ when reading. Lower-level files overwrite values from higher level files, i.e. a repository configuration file
+ overwrites information coming from a system configuration file
+
+ :note: for the 'repository' config level, a git_path() compatible type is required"""
+ config_level = ("system", "global", "repository")
+
+ #{ Interface
+
+ def config_reader(self, config_level=None):
+ """
+ :return:
+ GitConfigParser allowing to read the full git configuration, but not to write it
+
+ The configuration will include values from the system, user and repository
+ configuration files.
+
+ :param config_level:
+ For possible values, see config_writer method
+ If None, all applicable levels will be used. Specify a level in case
+ you know which exact file you whish to read to prevent reading multiple files for
+ instance
+ :note: On windows, system configuration cannot currently be read as the path is
+ unknown, instead the global path will be used."""
+ raise NotImplementedError()
+
+ def config_writer(self, config_level="repository"):
+ """
+ :return:
+ GitConfigParser allowing to write values of the specified configuration file level.
+ Config writers should be retrieved, used to change the configuration ,and written
+ right away as they will lock the configuration file in question and prevent other's
+ to write it.
+
+ :param config_level:
+ One of the following values
+ system = sytem wide configuration file
+ global = user level configuration file
+ repository = configuration file for this repostory only"""
+ raise NotImplementedError()
+
+
+ #} END interface
+
+
+class SubmoduleDB(object):
+ """Interface providing access to git repository submodules.
+ The actual implementation is found in the Submodule object type, which is
+ currently only available in one implementation."""
+
+ @property
+ def submodules(self):
+ """
+ :return: git.IterableList(Submodule, ...) of direct submodules
+ available from the current head"""
+ raise NotImplementedError()
+
+ def submodule(self, name):
+ """ :return: Submodule with the given name
+ :raise ValueError: If no such submodule exists"""
+ raise NotImplementedError()
+
+ def create_submodule(self, *args, **kwargs):
+ """Create a new submodule
+
+ :note: See the documentation of Submodule.add for a description of the
+ applicable parameters
+ :return: created submodules"""
+ raise NotImplementedError()
+
+ def iter_submodules(self, *args, **kwargs):
+ """An iterator yielding Submodule instances, see Traversable interface
+ for a description of args and kwargs
+ :return: Iterator"""
+ raise NotImplementedError()
+
+ def submodule_update(self, *args, **kwargs):
+ """Update the submodules, keeping the repository consistent as it will
+ take the previous state into consideration. For more information, please
+ see the documentation of RootModule.update"""
+ raise NotImplementedError()
+
+
+class HighLevelRepository(object):
+ """An interface combining several high-level repository functionality and properties"""
+
+ @property
+ def daemon_export(self):
+ """:return: True if the repository may be published by the git-daemon"""
+ raise NotImplementedError()
+
+ def is_dirty(self, index=True, working_tree=True, untracked_files=False):
+ """
+ :return:
+ ``True``, the repository is considered dirty. By default it will react
+ like a git-status without untracked files, hence it is dirty if the
+ index or the working copy have changes."""
+ raise NotImplementedError()
+
+ @property
+ def untracked_files(self):
+ """
+ :return:
+ list(str,...)
+
+ :note:
+ ignored files will not appear here, i.e. files mentioned in .gitignore.
+ Bare repositories never have untracked files"""
+ raise NotImplementedError()
+
+ def blame(self, rev, file):
+ """The blame information for the given file at the given revision.
+
+ :parm rev: revision specifier, see git-rev-parse for viable options.
+ :return:
+ list: [Commit, list: [<line>]]
+ A list of tuples associating a Commit object with a list of lines that
+ changed within the given commit. The Commit objects will be given in order
+ of appearance."""
+ raise NotImplementedError()
+
+ @classmethod
+ def init(cls, path=None, mkdir=True):
+ """Initialize a git repository at the given path if specified
+
+ :param path:
+ is the full path to the repo (traditionally ends with /<name>.git)
+ or None in which case the repository will be created in the current
+ working directory
+
+ :parm mkdir:
+ if specified will create the repository directory if it doesn't
+ already exists. Creates the directory with a mode=0755.
+ Only effective if a path is explicitly given
+
+ :return: Instance pointing to the newly created repository with similar capabilities
+ of this class"""
+ raise NotImplementedError()
+
+ def clone(self, path, progress = None):
+ """Create a clone from this repository.
+ :param path:
+ is the full path of the new repo (traditionally ends with ./<name>.git).
+
+ :param progress:
+ a RemoteProgress instance or None if no progress information is required
+
+ :return: ``git.Repo`` (the newly cloned repo)"""
+ raise NotImplementedError()
+
+ @classmethod
+ def clone_from(cls, url, to_path, progress = None):
+ """Create a clone from the given URL
+ :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS
+ :param to_path: Path to which the repository should be cloned to
+ :param progress:
+ a RemoteProgress instance or None if no progress information is required
+ :return: instance pointing to the cloned directory with similar capabilities as this class"""
+ raise NotImplementedError()
+
+ def archive(self, ostream, treeish=None, prefix=None):
+ """Archive the tree at the given revision.
+ :parm ostream: file compatible stream object to which the archive will be written
+ :parm treeish: is the treeish name/id, defaults to active branch
+ :parm prefix: is the optional prefix to prepend to each filename in the archive
+ :parm kwargs:
+ Additional arguments passed to git-archive
+ NOTE: Use the 'format' argument to define the kind of format. Use
+ specialized ostreams to write any format supported by python
+ :return: self"""
+ raise NotImplementedError()
+
+
diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/db/py/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/db/py/base.py b/git/db/py/base.py
new file mode 100644
index 00000000..2fdbd202
--- /dev/null
+++ b/git/db/py/base.py
@@ -0,0 +1,474 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains basic implementations for the interface building blocks"""
+from git.db.interface import *
+
+from git.util import (
+ pool,
+ join,
+ isfile,
+ normpath,
+ abspath,
+ dirname,
+ LazyMixin,
+ hex_to_bin,
+ bin_to_hex,
+ expandvars,
+ expanduser,
+ exists,
+ is_git_dir,
+ )
+
+from git.index import IndexFile
+from git.config import GitConfigParser
+from git.exc import (
+ BadObject,
+ AmbiguousObjectName,
+ InvalidGitRepositoryError,
+ NoSuchPathError
+ )
+
+from async import ChannelThreadTask
+
+from itertools import chain
+import sys
+import os
+
+
+__all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB',
+ 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin',
+ 'PureIndexDB')
+
+
+class PureObjectDBR(ObjectDBR):
+
+ #{ Query Interface
+
+ def has_object_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
+ return pool.add_task(task)
+
+ def info_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.info_async), self.info)
+ return pool.add_task(task)
+
+ def stream_async(self, reader):
+ # base implementation just uses the stream method repeatedly
+ task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
+ return pool.add_task(task)
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ len_partial_hexsha = len(partial_hexsha)
+ if len_partial_hexsha % 2 != 0:
+ partial_binsha = hex_to_bin(partial_hexsha + "0")
+ else:
+ partial_binsha = hex_to_bin(partial_hexsha)
+ # END assure successful binary conversion
+ return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha))
+
+ #} END query interface
+
+
+class PureObjectDBW(ObjectDBW):
+
+ def __init__(self, *args, **kwargs):
+ super(PureObjectDBW, self).__init__(*args, **kwargs)
+ self._ostream = None
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ cstream = self._ostream
+ self._ostream = stream
+ return cstream
+
+ def ostream(self):
+ return self._ostream
+
+ def store_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.store_async), self.store)
+ return pool.add_task(task)
+
+ #} END edit interface
+
+
+class PureRootPathDB(RootPathDB):
+
+ def __init__(self, root_path):
+ self._root_path = root_path
+ super(PureRootPathDB, self).__init__(root_path)
+
+
+
+ #{ Interface
+ def root_path(self):
+ return self._root_path
+
+ def db_path(self, rela_path):
+ return join(self._root_path, rela_path)
+ #} END interface
+
+
+def _databases_recursive(database, output):
+ """Fill output list with database from db, in order. Deals with Loose, Packed
+ and compound databases."""
+ if isinstance(database, CompoundDB):
+ compounds = list()
+ dbs = database.databases()
+ output.extend(db for db in dbs if not isinstance(db, CompoundDB))
+ for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
+ _databases_recursive(cdb, output)
+ else:
+ output.append(database)
+ # END handle database type
+
+
+class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB):
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ elif attr == '_obj_cache':
+ self._obj_cache = dict()
+ else:
+ super(PureCompoundDB, self)._set_cache_(attr)
+
+ def _db_query(self, sha):
+ """:return: database containing the given 20 byte sha
+ :raise BadObject:"""
+ # most databases use binary representations, prevent converting
+ # it everytime a database is being queried
+ try:
+ return self._obj_cache[sha]
+ except KeyError:
+ pass
+ # END first level cache
+
+ for db in self._dbs:
+ if db.has_object(sha):
+ self._obj_cache[sha] = db
+ return db
+ # END for each database
+ raise BadObject(sha)
+
+ #{ PureObjectDBR interface
+
+ def has_object(self, sha):
+ try:
+ self._db_query(sha)
+ return True
+ except BadObject:
+ return False
+ # END handle exceptions
+
+ def info(self, sha):
+ return self._db_query(sha).info(sha)
+
+ def stream(self, sha):
+ return self._db_query(sha).stream(sha)
+
+ def size(self):
+ return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
+
+ def sha_iter(self):
+ return chain(*(db.sha_iter() for db in self._dbs))
+
+ #} END object DBR Interface
+
+ #{ Interface
+
+ def databases(self):
+ return tuple(self._dbs)
+
+ def update_cache(self, force=False):
+ # something might have changed, clear everything
+ self._obj_cache.clear()
+ stat = False
+ for db in self._dbs:
+ if isinstance(db, CachingDB):
+ stat |= db.update_cache(force)
+ # END if is caching db
+ # END for each database to update
+ return stat
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ len_partial_hexsha = len(partial_hexsha)
+ if len_partial_hexsha % 2 != 0:
+ partial_binsha = hex_to_bin(partial_hexsha + "0")
+ else:
+ partial_binsha = hex_to_bin(partial_hexsha)
+ # END assure successful binary conversion
+
+ candidate = None
+ for db in self._dbs:
+ full_bin_sha = None
+ try:
+ if hasattr(db, 'partial_to_complete_sha_hex'):
+ full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
+ else:
+ full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
+ # END handle database type
+ except BadObject:
+ continue
+ # END ignore bad objects
+ if full_bin_sha:
+ if candidate and candidate != full_bin_sha:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = full_bin_sha
+ # END handle candidate
+ # END for each db
+ if not candidate:
+ raise BadObject(partial_binsha)
+ return candidate
+
+ def partial_to_complete_sha(self, partial_binsha, hex_len):
+ """Simple adaptor to feed into our implementation"""
+ return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len])
+ #} END interface
+
+
+class PureRepositoryPathsMixin(RepositoryPathsMixin):
+ # slots has no effect here, its just to keep track of used attrs
+ __slots__ = ("_git_path", '_bare')
+
+ #{ Configuration
+ repo_dir = '.git'
+ objs_dir = 'objects'
+ #} END configuration
+
+ #{ Subclass Interface
+ def _initialize(self, path):
+ epath = abspath(expandvars(expanduser(path or os.getcwd())))
+
+ if not exists(epath):
+ raise NoSuchPathError(epath)
+ #END check file
+
+ self._working_tree_dir = None
+ self._git_path = None
+ curpath = epath
+
+ # walk up the path to find the .git dir
+ while curpath:
+ if is_git_dir(curpath):
+ self._git_path = curpath
+ self._working_tree_dir = os.path.dirname(curpath)
+ break
+ gitpath = join(curpath, self.repo_dir)
+ if is_git_dir(gitpath):
+ self._git_path = gitpath
+ self._working_tree_dir = curpath
+ break
+ curpath, dummy = os.path.split(curpath)
+ if not dummy:
+ break
+ # END while curpath
+
+ if self._git_path is None:
+ raise InvalidGitRepositoryError(epath)
+ # END path not found
+
+ self._bare = self._git_path.endswith(self.repo_dir)
+ if hasattr(self, 'config_reader'):
+ try:
+ self._bare = self.config_reader("repository").getboolean('core','bare')
+ except Exception:
+ # lets not assume the option exists, although it should
+ pass
+ #END check bare flag
+
+ #} end subclass interface
+
+ #{ Object Interface
+
+ def __eq__(self, rhs):
+ if hasattr(rhs, 'git_dir'):
+ return self.git_dir == rhs.git_dir
+ return False
+
+ def __ne__(self, rhs):
+ return not self.__eq__(rhs)
+
+ def __hash__(self):
+ return hash(self.git_dir)
+
+ def __repr__(self):
+ return "%s(%r)" % (type(self).__name__, self.git_dir)
+
+ #} END object interface
+
+ #{ Interface
+
+ @property
+ def is_bare(self):
+ return self._bare
+
+ @property
+ def git_dir(self):
+ return self._git_path
+
+ @property
+ def working_tree_dir(self):
+ if self.is_bare:
+ raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_dir)
+ #END assertion
+ return dirname(self.git_dir)
+
+ @property
+ def objects_dir(self):
+ return join(self.git_dir, self.objs_dir)
+
+ @property
+ def working_dir(self):
+ if self.is_bare:
+ return self.git_dir
+ else:
+ return self.working_tree_dir
+ #END handle bare state
+
+ def _mk_description():
+ def _get_description(self):
+ filename = join(self.git_dir, 'description')
+ return file(filename).read().rstrip()
+
+ def _set_description(self, descr):
+ filename = join(self.git_dir, 'description')
+ file(filename, 'w').write(descr+'\n')
+
+ return property(_get_description, _set_description, "Descriptive text for the content of the repository")
+
+ description = _mk_description()
+ del(_mk_description)
+
+ #} END interface
+
+
+class PureConfigurationMixin(ConfigurationMixin):
+
+ #{ Configuration
+ system_config_file_name = "gitconfig"
+ repo_config_file_name = "config"
+ #} END
+
+ def __init__(self, *args, **kwargs):
+ """Verify prereqs"""
+ super(PureConfigurationMixin, self).__init__(*args, **kwargs)
+ assert hasattr(self, 'git_dir')
+
+ def _path_at_level(self, level ):
+ # we do not support an absolute path of the gitconfig on windows ,
+ # use the global config instead
+ if sys.platform == "win32" and level == "system":
+ level = "global"
+ #END handle windows
+
+ if level == "system":
+ return "/etc/%s" % self.system_config_file_name
+ elif level == "global":
+ return normpath(expanduser("~/.%s" % self.system_config_file_name))
+ elif level == "repository":
+ return join(self.git_dir, self.repo_config_file_name)
+ #END handle level
+
+ raise ValueError("Invalid configuration level: %r" % level)
+
+ #{ Interface
+
+ def config_reader(self, config_level=None):
+ files = None
+ if config_level is None:
+ files = [ self._path_at_level(f) for f in self.config_level ]
+ else:
+ files = [ self._path_at_level(config_level) ]
+ #END handle level
+ return GitConfigParser(files, read_only=True)
+
+ def config_writer(self, config_level="repository"):
+ return GitConfigParser(self._path_at_level(config_level), read_only=False)
+
+
+ #} END interface
+
+
+class PureIndexDB(IndexDB):
+ #{ Configuration
+ IndexCls = IndexFile
+ #} END configuration
+
+ @property
+ def index(self):
+ return self.IndexCls(self)
+
+
+class PureAlternatesFileMixin(object):
+ """Utility able to read and write an alternates file through the alternates property
+ It needs to be part of a type with the git_dir or db_path property.
+
+ The file by default is assumed to be located at the default location as imposed
+ by the standard git repository layout"""
+
+ #{ Configuration
+ alternates_filepath = os.path.join('info', 'alternates') # relative path to alternates file
+
+ #} END configuration
+
+ def __init__(self, *args, **kwargs):
+ super(PureAlternatesFileMixin, self).__init__(*args, **kwargs)
+ self._alternates_path() # throws on incompatible type
+
+ #{ Interface
+
+ def _alternates_path(self):
+ if hasattr(self, 'git_dir'):
+ return join(self.git_dir, 'objects', self.alternates_filepath)
+ elif hasattr(self, 'db_path'):
+ return self.db_path(self.alternates_filepath)
+ else:
+ raise AssertionError("This mixin requires a parent type with either the git_dir property or db_path method")
+ #END handle path
+
+ def _get_alternates(self):
+ """The list of alternates for this repo from which objects can be retrieved
+
+ :return: list of strings being pathnames of alternates"""
+ alternates_path = self._alternates_path()
+
+ if os.path.exists(alternates_path):
+ try:
+ f = open(alternates_path)
+ alts = f.read()
+ finally:
+ f.close()
+ return alts.strip().splitlines()
+ else:
+ return list()
+ # END handle path exists
+
+ def _set_alternates(self, alts):
+ """Sets the alternates
+
+ :parm alts:
+ is the array of string paths representing the alternates at which
+ git should look for objects, i.e. /home/user/repo/.git/objects
+
+ :raise NoSuchPathError:
+ :note:
+ The method does not check for the existance of the paths in alts
+ as the caller is responsible."""
+ alternates_path = self._alternates_path()
+ if not alts:
+ if isfile(alternates_path):
+ os.remove(alternates_path)
+ else:
+ try:
+ f = open(alternates_path, 'w')
+ f.write("\n".join(alts))
+ finally:
+ f.close()
+ # END file handling
+ # END alts handling
+
+ alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
+
+ #} END interface
+
diff --git a/git/db/py/complex.py b/git/db/py/complex.py
new file mode 100644
index 00000000..d5c185f3
--- /dev/null
+++ b/git/db/py/complex.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of PurePartialGitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.db.interface import HighLevelRepository
+from base import (
+ PureCompoundDB,
+ PureObjectDBW,
+ PureRootPathDB,
+ PureRepositoryPathsMixin,
+ PureConfigurationMixin,
+ PureAlternatesFileMixin,
+ PureIndexDB,
+ )
+from transport import PureTransportDB
+from resolve import PureReferencesMixin
+
+from loose import PureLooseObjectODB
+from pack import PurePackedODB
+from ref import PureReferenceDB
+from submodule import PureSubmoduleDB
+
+from git.db.compat import RepoCompatibilityInterface
+
+from git.util import (
+ LazyMixin,
+ normpath,
+ join,
+ dirname
+ )
+from git.exc import (
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
+import os
+
+__all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB')
+
+
+class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureAlternatesFileMixin):
+ """A git-style object-only database, which contains all objects in the 'objects'
+ subdirectory.
+ :note: The type needs to be initialized on the ./objects directory to function,
+ as it deals solely with object lookup. Use a PurePartialGitDB type if you need
+ reference and push support."""
+ # Configuration
+ PackDBCls = PurePackedODB
+ LooseDBCls = PureLooseObjectODB
+ PureReferenceDBCls = PureReferenceDB
+
+ # Directories
+ packs_dir = 'pack'
+ loose_dir = ''
+
+
+ def __init__(self, root_path):
+ """Initialize ourselves on a git ./objects directory"""
+ super(PureGitODB, self).__init__(root_path)
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs' or attr == '_loose_db':
+ self._dbs = list()
+ loose_db = None
+ for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
+ (self.loose_dir, self.LooseDBCls),
+ (self.alternates_filepath, self.PureReferenceDBCls)):
+ path = self.db_path(subpath)
+ if os.path.exists(path):
+ self._dbs.append(dbcls(path))
+ if dbcls is self.LooseDBCls:
+ loose_db = self._dbs[-1]
+ # END remember loose db
+ # END check path exists
+ # END for each db type
+
+ # should have at least one subdb
+ if not self._dbs:
+ raise InvalidDBRoot(self.root_path())
+ # END handle error
+
+ # we the first one should have the store method
+ assert loose_db is not None and hasattr(loose_db, 'store'), "One database needs store functionality"
+
+ # finally set the value
+ self._loose_db = loose_db
+ else:
+ super(PureGitODB, self)._set_cache_(attr)
+ # END handle attrs
+
+ #{ PureObjectDBW interface
+
+ def store(self, istream):
+ return self._loose_db.store(istream)
+
+ def ostream(self):
+ return self._loose_db.ostream()
+
+ def set_ostream(self, ostream):
+ return self._loose_db.set_ostream(ostream)
+
+ #} END objectdbw interface
+
+
+
+class PurePartialGitDB(PureGitODB,
+ PureRepositoryPathsMixin, PureConfigurationMixin,
+ PureReferencesMixin, PureSubmoduleDB,
+ PureIndexDB, PureTransportDB
+ # HighLevelRepository Currently not implemented !
+ ):
+ """Git like database with support for object lookup as well as reference resolution.
+ Our rootpath is set to the actual .git directory (bare on unbare).
+
+ The root_path will be the git objects directory. Use git_path() to obtain the actual top-level
+ git directory."""
+ #directories
+
+ def __init__(self, root_path):
+ """Initialize ourselves on the .git directory, or the .git/objects directory."""
+ PureRepositoryPathsMixin._initialize(self, root_path)
+ super(PurePartialGitDB, self).__init__(self.objects_dir)
+
+
+
+class PureCompatibilityGitDB(PurePartialGitDB, RepoCompatibilityInterface):
+ """Pure git database with a compatability layer required by 0.3x code"""
+
diff --git a/git/db/py/loose.py b/git/db/py/loose.py
new file mode 100644
index 00000000..6e72aff0
--- /dev/null
+++ b/git/db/py/loose.py
@@ -0,0 +1,263 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import (
+ PureRootPathDB,
+ PureObjectDBR,
+ PureObjectDBW
+ )
+
+
+from git.exc import (
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
+
+from git.stream import (
+ DecompressMemMapReader,
+ FDCompressedSha1Writer,
+ FDStream,
+ Sha1Writer
+ )
+
+from git.base import (
+ OStream,
+ OInfo
+ )
+
+from git.util import (
+ file_contents_ro_filepath,
+ ENOENT,
+ hex_to_bin,
+ bin_to_hex,
+ exists,
+ chmod,
+ isdir,
+ isfile,
+ remove,
+ mkdir,
+ rename,
+ dirname,
+ basename,
+ join
+ )
+
+from git.fun import (
+ chunk_size,
+ loose_object_header_info,
+ write_object,
+ stream_copy
+ )
+
+import tempfile
+import mmap
+import sys
+import os
+
+
+__all__ = ( 'PureLooseObjectODB', )
+
+
+class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW):
+ """A database which operates on loose object files"""
+
+ # CONFIGURATION
+ # chunks in which data will be copied between streams
+ stream_chunk_size = chunk_size
+
+ # On windows we need to keep it writable, otherwise it cannot be removed
+ # either
+ new_objects_mode = 0444
+ if os.name == 'nt':
+ new_objects_mode = 0644
+
+
+ def __init__(self, root_path):
+ super(PureLooseObjectODB, self).__init__(root_path)
+ self._hexsha_to_file = dict()
+ # Additional Flags - might be set to 0 after the first failure
+ # Depending on the root, this might work for some mounts, for others not, which
+ # is why it is per instance
+ self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+
+ #{ Interface
+ def object_path(self, hexsha):
+ """
+ :return: path at which the object with the given hexsha would be stored,
+ relative to the database root"""
+ return join(hexsha[:2], hexsha[2:])
+
+ def readable_db_object_path(self, hexsha):
+ """
+ :return: readable object path to the object identified by hexsha
+ :raise BadObject: If the object file does not exist"""
+ try:
+ return self._hexsha_to_file[hexsha]
+ except KeyError:
+ pass
+ # END ignore cache misses
+
+ # try filesystem
+ path = self.db_path(self.object_path(hexsha))
+ if exists(path):
+ self._hexsha_to_file[hexsha] = path
+ return path
+ # END handle cache
+ raise BadObject(hexsha)
+
+
+ #} END interface
+
+ def _map_loose_object(self, sha):
+ """
+ :return: memory map of that file to allow random read access
+ :raise BadObject: if object could not be located"""
+ db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+ try:
+ return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+ except OSError,e:
+ if e.errno != ENOENT:
+ # try again without noatime
+ try:
+ return file_contents_ro_filepath(db_path)
+ except OSError:
+ raise BadObject(sha)
+ # didn't work because of our flag, don't try it again
+ self._fd_open_flags = 0
+ else:
+ raise BadObject(sha)
+ # END handle error
+ # END exception handling
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ finally:
+ os.close(fd)
+ # END assure file is closed
+
+ def set_ostream(self, stream):
+ """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+ if stream is not None and not isinstance(stream, Sha1Writer):
+ raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+ return super(PureLooseObjectODB, self).set_ostream(stream)
+
+ def info(self, sha):
+ m = self._map_loose_object(sha)
+ try:
+ type, size = loose_object_header_info(m)
+ return OInfo(sha, type, size)
+ finally:
+ m.close()
+ # END assure release of system resources
+
+ def stream(self, sha):
+ m = self._map_loose_object(sha)
+ type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+ return OStream(sha, type, size, stream)
+
+ def has_object(self, sha):
+ try:
+ self.readable_db_object_path(bin_to_hex(sha))
+ return True
+ except BadObject:
+ return False
+ # END check existance
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: 20 byte binary sha1 string which matches the given name uniquely
+ :param name: hexadecimal partial name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for binsha in self.sha_iter():
+ if bin_to_hex(binsha).startswith(partial_hexsha):
+ # it can't ever find the same object twice
+ if candidate is not None:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = binsha
+ # END for each object
+ if candidate is None:
+ raise BadObject(partial_hexsha)
+ return candidate
+
+ def store(self, istream):
+ """note: The sha we produce will be hex by nature"""
+ tmp_path = None
+ writer = self.ostream()
+ if writer is None:
+ # open a tmp file to write the data to
+ fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+
+ if istream.binsha is None:
+ writer = FDCompressedSha1Writer(fd)
+ else:
+ writer = FDStream(fd)
+ # END handle direct stream copies
+ # END handle custom writer
+
+ try:
+ try:
+ if istream.binsha is not None:
+ # copy as much as possible, the actual uncompressed item size might
+ # be smaller than the compressed version
+ stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+ else:
+ # write object with header, we have to make a new one
+ write_object(istream.type, istream.size, istream.read, writer.write,
+ chunk_size=self.stream_chunk_size)
+ # END handle direct stream copies
+ finally:
+ if tmp_path:
+ writer.close()
+ # END assure target stream is closed
+ except:
+ if tmp_path:
+ os.remove(tmp_path)
+ raise
+ # END assure tmpfile removal on error
+
+ hexsha = None
+ if istream.binsha:
+ hexsha = istream.hexsha
+ else:
+ hexsha = writer.sha(as_hex=True)
+ # END handle sha
+
+ if tmp_path:
+ obj_path = self.db_path(self.object_path(hexsha))
+ obj_dir = dirname(obj_path)
+ if not isdir(obj_dir):
+ mkdir(obj_dir)
+ # END handle destination directory
+ # rename onto existing doesn't work on windows
+ if os.name == 'nt' and isfile(obj_path):
+ remove(obj_path)
+ # END handle win322
+ rename(tmp_path, obj_path)
+
+ # make sure its readable for all ! It started out as rw-- tmp file
+ # but needs to be rwrr
+ chmod(obj_path, self.new_objects_mode)
+ # END handle dry_run
+
+ istream.binsha = hex_to_bin(hexsha)
+ return istream
+
+ def sha_iter(self):
+ # find all files which look like an object, extract sha from there
+ for root, dirs, files in os.walk(self.root_path()):
+ root_base = basename(root)
+ if len(root_base) != 2:
+ continue
+
+ for f in files:
+ if len(f) != 38:
+ continue
+ yield hex_to_bin(root_base + f)
+ # END for each file
+ # END for each walk iteration
+
+ def size(self):
+ return len(tuple(self.sha_iter()))
+
diff --git a/git/db/py/mem.py b/git/db/py/mem.py
new file mode 100644
index 00000000..da02dbdd
--- /dev/null
+++ b/git/db/py/mem.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains the MemoryDatabase implementation"""
+from base import (
+ PureObjectDBR,
+ PureObjectDBW
+ )
+from loose import PureLooseObjectODB
+from git.base import (
+ OStream,
+ IStream,
+ )
+
+from git.exc import (
+ BadObject,
+ UnsupportedOperation
+ )
+from git.stream import (
+ ZippedStoreShaWriter,
+ DecompressMemMapReader,
+ )
+
+from cStringIO import StringIO
+
+__all__ = ("PureMemoryDB", )
+
+class PureMemoryDB(PureObjectDBR, PureObjectDBW):
+ """A memory database stores everything to memory, providing fast IO and object
+ retrieval. It should be used to buffer results and obtain SHAs before writing
+ it to the actual physical storage, as it allows to query whether object already
+ exists in the target storage before introducing actual IO
+
+ :note: memory is currently not threadsafe, hence the async methods cannot be used
+ for storing"""
+
+ def __init__(self):
+ super(PureMemoryDB, self).__init__()
+ self._db = PureLooseObjectODB("path/doesnt/matter")
+
+ # maps 20 byte shas to their OStream objects
+ self._cache = dict()
+
+ def set_ostream(self, stream):
+ raise UnsupportedOperation("PureMemoryDB's always stream into memory")
+
+ def store(self, istream):
+ zstream = ZippedStoreShaWriter()
+ self._db.set_ostream(zstream)
+
+ istream = self._db.store(istream)
+ zstream.close() # close to flush
+ zstream.seek(0)
+
+ # don't provide a size, the stream is written in object format, hence the
+ # header needs decompression
+ decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
+ self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
+
+ return istream
+
+ def store_async(self, reader):
+ raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access")
+
+ def has_object(self, sha):
+ return sha in self._cache
+
+ def info(self, sha):
+ # we always return streams, which are infos as well
+ return self.stream(sha)
+
+ def stream(self, sha):
+ try:
+ ostream = self._cache[sha]
+ # rewind stream for the next one to read
+ ostream.stream.seek(0)
+ return ostream
+ except KeyError:
+ raise BadObject(sha)
+ # END exception handling
+
+ def size(self):
+ return len(self._cache)
+
+ def sha_iter(self):
+ return self._cache.iterkeys()
+
+
+ #{ Interface
+ def stream_copy(self, sha_iter, odb):
+ """Copy the streams as identified by sha's yielded by sha_iter into the given odb
+ The streams will be copied directly
+ :note: the object will only be written if it did not exist in the target db
+ :return: amount of streams actually copied into odb. If smaller than the amount
+ of input shas, one or more objects did already exist in odb"""
+ count = 0
+ for sha in sha_iter:
+ if odb.has_object(sha):
+ continue
+ # END check object existance
+
+ ostream = self.stream(sha)
+ # compressed data including header
+ sio = StringIO(ostream.stream.data())
+ istream = IStream(ostream.type, ostream.size, sio, sha)
+
+ odb.store(istream)
+ count += 1
+ # END for each sha
+ return count
+ #} END interface
diff --git a/git/db/py/pack.py b/git/db/py/pack.py
new file mode 100644
index 00000000..75b75468
--- /dev/null
+++ b/git/db/py/pack.py
@@ -0,0 +1,212 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing a database to deal with packs"""
+from git.db import CachingDB
+from base import (
+ PureRootPathDB,
+ PureObjectDBR
+ )
+
+from git.util import LazyMixin
+
+from git.exc import (
+ BadObject,
+ UnsupportedOperation,
+ AmbiguousObjectName
+ )
+
+from git.pack import PackEntity
+
+import os
+import glob
+
+__all__ = ('PurePackedODB', )
+
+#{ Utilities
+
+
+class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin):
+ """A database operating on a set of object packs"""
+
+ # the type to use when instantiating a pack entity
+ PackEntityCls = PackEntity
+
+ # sort the priority list every N queries
+ # Higher values are better, performance tests don't show this has
+ # any effect, but it should have one
+ _sort_interval = 500
+
+ def __init__(self, root_path):
+ super(PurePackedODB, self).__init__(root_path)
+ # list of lists with three items:
+ # * hits - number of times the pack was hit with a request
+ # * entity - Pack entity instance
+ # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+ # self._entities = list() # lazy loaded list
+ self._hit_count = 0 # amount of hits
+ self._st_mtime = 0 # last modification data of our root path
+
+ def _set_cache_(self, attr):
+ if attr == '_entities':
+ self._entities = list()
+ self.update_cache(force=True)
+ # END handle entities initialization
+
+ def _sort_entities(self):
+ self._entities.sort(key=lambda l: l[0], reverse=True)
+
+ def _pack_info(self, sha):
+ """:return: tuple(entity, index) for an item at the given sha
+ :param sha: 20 or 40 byte sha
+ :raise BadObject:
+ :note: This method is not thread-safe, but may be hit in multi-threaded
+ operation. The worst thing that can happen though is a counter that
+ was not incremented, or the list being in wrong order. So we safe
+ the time for locking here, lets see how that goes"""
+ # presort ?
+ if self._hit_count % self._sort_interval == 0:
+ self._sort_entities()
+ # END update sorting
+
+ for item in self._entities:
+ index = item[2](sha)
+ if index is not None:
+ item[0] += 1 # one hit for you
+ self._hit_count += 1 # general hit count
+ return (item[1], index)
+ # END index found in pack
+ # END for each item
+
+ # no hit, see whether we have to update packs
+ # NOTE: considering packs don't change very often, we safe this call
+ # and leave it to the super-caller to trigger that
+ raise BadObject(sha)
+
+ #{ Object DB Read
+
+ def has_object(self, sha):
+ try:
+ self._pack_info(sha)
+ return True
+ except BadObject:
+ return False
+ # END exception handling
+
+ def info(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.info_at_index(index)
+
+ def stream(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.stream_at_index(index)
+
+ def sha_iter(self):
+ sha_list = list()
+ for entity in self.entities():
+ index = entity.index()
+ sha_by_index = index.sha
+ for index in xrange(index.size()):
+ yield sha_by_index(index)
+ # END for each index
+ # END for each entity
+
+ def size(self):
+ sizes = [item[1].index().size() for item in self._entities]
+ return reduce(lambda x,y: x+y, sizes, 0)
+
+ #} END object db read
+
+ #{ object db write
+
+ def store(self, istream):
+ """Storing individual objects is not feasible as a pack is designed to
+ hold multiple objects. Writing or rewriting packs for single objects is
+ inefficient"""
+ raise UnsupportedOperation()
+
+ def store_async(self, reader):
+ # TODO: add PureObjectDBRW before implementing this
+ raise NotImplementedError()
+
+ #} END object db write
+
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Update our cache with the acutally existing packs on disk. Add new ones,
+ and remove deleted ones. We keep the unchanged ones
+
+ :param force: If True, the cache will be updated even though the directory
+ does not appear to have changed according to its modification timestamp.
+ :return: True if the packs have been updated so there is new information,
+ False if there was no change to the pack database"""
+ stat = os.stat(self.root_path())
+ if not force and stat.st_mtime <= self._st_mtime:
+ return False
+ # END abort early on no change
+ self._st_mtime = stat.st_mtime
+
+ # packs are supposed to be prefixed with pack- by git-convention
+ # get all pack files, figure out what changed
+ pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+ our_pack_files = set(item[1].pack().path() for item in self._entities)
+
+ # new packs
+ for pack_file in (pack_files - our_pack_files):
+ # init the hit-counter/priority with the size, a good measure for hit-
+ # probability. Its implemented so that only 12 bytes will be read
+ entity = self.PackEntityCls(pack_file)
+ self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+ # END for each new packfile
+
+ # removed packs
+ for pack_file in (our_pack_files - pack_files):
+ del_index = -1
+ for i, item in enumerate(self._entities):
+ if item[1].pack().path() == pack_file:
+ del_index = i
+ break
+ # END found index
+ # END for each entity
+ assert del_index != -1
+ del(self._entities[del_index])
+ # END for each removed pack
+
+ # reinitialize prioritiess
+ self._sort_entities()
+ return True
+
+ def entities(self):
+ """:return: list of pack entities operated upon by this database"""
+ return [ item[1] for item in self._entities ]
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for item in self._entities:
+ item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+ if item_index is not None:
+ sha = item[1].index().sha(item_index)
+ if candidate and candidate != sha:
+ raise AmbiguousObjectName(partial_binsha)
+ candidate = sha
+ # END handle full sha could be found
+ # END for each entity
+
+ if candidate:
+ return candidate
+
+ # still not found ?
+ raise BadObject(partial_binsha)
+
+ #} END interface
diff --git a/git/db/py/ref.py b/git/db/py/ref.py
new file mode 100644
index 00000000..d2c77a3a
--- /dev/null
+++ b/git/db/py/ref.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import PureCompoundDB
+
+import os
+__all__ = ('PureReferenceDB', )
+
+class PureReferenceDB(PureCompoundDB):
+ """A database consisting of database referred to in a file"""
+
+ # Configuration
+ # Specifies the object database to use for the paths found in the alternates
+ # file. If None, it defaults to the PureGitODB
+ ObjectDBCls = None
+
+ def __init__(self, ref_file):
+ super(PureReferenceDB, self).__init__()
+ self._ref_file = ref_file
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ self._update_dbs_from_ref_file()
+ else:
+ super(PureReferenceDB, self)._set_cache_(attr)
+ # END handle attrs
+
+ def _update_dbs_from_ref_file(self):
+ dbcls = self.ObjectDBCls
+ if dbcls is None:
+ # late import
+ import complex
+ dbcls = complex.PureGitODB
+ # END get db type
+
+ # try to get as many as possible, don't fail if some are unavailable
+ ref_paths = list()
+ try:
+ ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
+ except (OSError, IOError):
+ pass
+ # END handle alternates
+
+ ref_paths_set = set(ref_paths)
+ cur_ref_paths_set = set(db.root_path() for db in self._dbs)
+
+ # remove existing
+ for path in (cur_ref_paths_set - ref_paths_set):
+ for i, db in enumerate(self._dbs[:]):
+ if db.root_path() == path:
+ del(self._dbs[i])
+ continue
+ # END del matching db
+ # END for each path to remove
+
+ # add new
+ # sort them to maintain order
+ added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
+ for path in added_paths:
+ try:
+ db = dbcls(path)
+ # force an update to verify path
+ if isinstance(db, PureCompoundDB):
+ db.databases()
+ # END verification
+ self._dbs.append(db)
+ except Exception, e:
+ # ignore invalid paths or issues
+ pass
+ # END for each path to add
+
+ def update_cache(self, force=False):
+ # re-read alternates and update databases
+ self._update_dbs_from_ref_file()
+ return super(PureReferenceDB, self).update_cache(force)
diff --git a/git/repo/fun.py b/git/db/py/resolve.py
index 03d55716..7bea779e 100644
--- a/git/repo/fun.py
+++ b/git/db/py/resolve.py
@@ -1,35 +1,36 @@
-"""Package with general repository related functions"""
-import os
-from gitdb.exc import BadObject
-from git.refs import SymbolicReference
-from git.objects import Object
-from gitdb.util import (
+"""Module with an implementation for refspec parsing. It is the pure-python
+version assuming compatible interface for reference and object types"""
+
+from git.db.interface import ReferencesMixin
+from git.exc import BadObject
+from git.refs import (
+ SymbolicReference,
+ Reference,
+ HEAD,
+ Head,
+ TagReference
+ )
+from git.refs.head import HEAD
+from git.refs.headref import Head
+from git.refs.tag import TagReference
+
+from git.objects.base import Object
+from git.objects.commit import Commit
+from git.util import (
join,
isdir,
isfile,
hex_to_bin,
- bin_to_hex
+ bin_to_hex,
+ is_git_dir
)
from string import digits
+import os
+import re
-__all__ = ('rev_parse', 'is_git_dir', 'touch')
-
-def touch(filename):
- fp = open(filename, "a")
- fp.close()
-
-def is_git_dir(d):
- """ This is taken from the git setup.c:is_git_directory
- function."""
- if isdir(d) and \
- isdir(join(d, 'objects')) and \
- isdir(join(d, 'refs')):
- headref = join(d, 'HEAD')
- return isfile(headref) or \
- (os.path.islink(headref) and
- os.readlink(headref).startswith('refs'))
- return False
+__all__ = ["PureReferencesMixin"]
+#{ Utilities
def short_to_long(odb, hexsha):
""":return: long hexadecimal sha1 from the given less-than-40 byte hexsha
@@ -89,7 +90,7 @@ def name_to_object(repo, name, return_ref=False):
return Object.new_from_sha(repo, hex_to_bin(hexsha))
def deref_tag(tag):
- """Recursively dereerence a tag and return the resulting object"""
+ """Recursively dereference a tag and return the resulting object"""
while True:
try:
tag = tag.object
@@ -185,7 +186,7 @@ def rev_parse(repo, rev):
# END handle tag
elif token == '@':
# try single int
- assert ref is not None, "Requre Reference to access reflog"
+ assert ref is not None, "Require Reference to access reflog"
revlog_index = None
try:
# transform reversed index into the format of our revlog
@@ -282,3 +283,85 @@ def rev_parse(repo, rev):
raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))
return obj
+
+#} END utilities
+
+class PureReferencesMixin(ReferencesMixin):
+ """Pure-Python refparse implementation"""
+
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
+
+ #{ Configuration
+ # Types to use when instatiating references
+ TagReferenceCls = TagReference
+ HeadCls = Head
+ ReferenceCls = Reference
+ HEADCls = HEAD
+ CommitCls = Commit
+ #} END configuration
+
+ def resolve(self, name):
+ return self.resolve_object(name).binsha
+
+ def resolve_object(self, name):
+ return rev_parse(self, name)
+
+ @property
+ def references(self):
+ return self.ReferenceCls.list_items(self)
+
+ @property
+ def heads(self):
+ return self.HeadCls.list_items(self)
+
+ @property
+ def tags(self):
+ return self.TagReferenceCls.list_items(self)
+
+ def tag(self, name):
+ return self.TagReferenceCls(self, self.TagReferenceCls.to_full_path(name))
+
+ def commit(self, rev=None):
+ if rev is None:
+ return self.head.commit
+ else:
+ return self.resolve_object(str(rev)+"^0")
+ #END handle revision
+
+ def iter_trees(self, *args, **kwargs):
+ return ( c.tree for c in self.iter_commits(*args, **kwargs) )
+
+ def tree(self, rev=None):
+ if rev is None:
+ return self.head.commit.tree
+ else:
+ return self.resolve_object(str(rev)+"^{tree}")
+
+ def iter_commits(self, rev=None, paths='', **kwargs):
+ if rev is None:
+ rev = self.head.commit
+
+ return self.CommitCls.iter_items(self, rev, paths, **kwargs)
+
+
+ @property
+ def head(self):
+ return self.HEADCls(self,'HEAD')
+
+ def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
+ return self.HeadCls.create(self, path, commit, force, logmsg)
+
+ def delete_head(self, *heads, **kwargs):
+ return self.HeadCls.delete(self, *heads, **kwargs)
+
+ def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs):
+ return self.TagReferenceCls.create(self, path, ref, message, force, **kwargs)
+
+ def delete_tag(self, *tags):
+ return self.TagReferenceCls.delete(self, *tags)
+
+
+ # compat
+ branches = heads
+ refs = references
diff --git a/git/db/py/submodule.py b/git/db/py/submodule.py
new file mode 100644
index 00000000..735f90b1
--- /dev/null
+++ b/git/db/py/submodule.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.objects.submodule.base import Submodule
+from git.objects.submodule.root import RootModule
+from git.db.interface import SubmoduleDB
+
+__all__ = ["PureSubmoduleDB"]
+
+class PureSubmoduleDB(SubmoduleDB):
+ """Pure python implementation of submodule functionality"""
+
+ @property
+ def submodules(self):
+ return Submodule.list_items(self)
+
+ def submodule(self, name):
+ try:
+ return self.submodules[name]
+ except IndexError:
+ raise ValueError("Didn't find submodule named %r" % name)
+ # END exception handling
+
+ def create_submodule(self, *args, **kwargs):
+ return Submodule.add(self, *args, **kwargs)
+
+ def iter_submodules(self, *args, **kwargs):
+ return RootModule(self).traverse(*args, **kwargs)
+
+ def submodule_update(self, *args, **kwargs):
+ return RootModule(self).update(*args, **kwargs)
+
diff --git a/git/db/py/transport.py b/git/db/py/transport.py
new file mode 100644
index 00000000..00d222b0
--- /dev/null
+++ b/git/db/py/transport.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Implement a transport compatible database which sends objects using the git protocol"""
+
+from git.db.interface import ( TransportDB,
+ PushInfo,
+ FetchInfo,
+ RefSpec )
+
+from git.refs.remote import RemoteReference
+from git.remote import Remote
+
+
+__all__ = ["PureTransportDB"]
+
+class PurePushInfo(PushInfo):
+ """TODO: Implementation"""
+ __slots__ = tuple()
+
+
+
+class PureFetchInfo(FetchInfo):
+ """TODO"""
+ __slots__ = tuple()
+
+
+class PureTransportDB(TransportDB):
+ # The following variables need to be set by the derived class
+ #{Configuration
+ protocol = None
+ RemoteCls = Remote
+ #}end configuraiton
+
+ #{ Interface
+
+ def fetch(self, url, refspecs, progress=None, **kwargs):
+ raise NotImplementedError()
+
+ def push(self, url, refspecs, progress=None, **kwargs):
+ raise NotImplementedError()
+
+ @property
+ def remotes(self):
+ return self.RemoteCls.list_items(self)
+
+ def remote(self, name='origin'):
+ return self.remotes[name]
+
+ def create_remote(self, name, url, **kwargs):
+ return self.RemoteCls.create(self, name, url, **kwargs)
+
+ def delete_remote(self, remote):
+ return self.RemoteCls.remove(self, remote)
+
+ #}end interface
+
diff --git a/git/diff.py b/git/diff.py
index 7b3bf6b5..d1c6c0ac 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -9,7 +9,7 @@ from objects.blob import Blob
from objects.util import mode_str_to_int
from exc import GitCommandError
-from gitdb.util import hex_to_bin
+from git.util import hex_to_bin
__all__ = ('Diffable', 'DiffIndex', 'Diff')
diff --git a/git/exc.py b/git/exc.py
index d2cb8d7e..e14fb7f1 100644
--- a/git/exc.py
+++ b/git/exc.py
@@ -5,17 +5,53 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
""" Module containing all exceptions thrown througout the git package, """
-from gitdb.exc import *
+from util import to_hex_sha
-class InvalidGitRepositoryError(Exception):
+class GitPythonError(Exception):
+ """Base exception for all git-python related errors"""
+
+class ODBError(GitPythonError):
+ """All errors thrown by the object database"""
+
+
+class InvalidDBRoot(ODBError):
+ """Thrown if an object database cannot be initialized at the given path"""
+
+
+class BadObject(ODBError):
+ """The object with the given SHA does not exist. Instantiate with the
+ failed sha"""
+
+ def __str__(self):
+ return "BadObject: %s" % to_hex_sha(self.args[0])
+
+
+class ParseError(ODBError):
+ """Thrown if the parsing of a file failed due to an invalid format"""
+
+
+class AmbiguousObjectName(ODBError):
+ """Thrown if a possibly shortened name does not uniquely represent a single object
+ in the database"""
+
+
+class BadObjectType(ODBError):
+ """The object had an unsupported type"""
+
+
+class UnsupportedOperation(ODBError):
+ """Thrown if the given operation cannot be supported by the object database"""
+
+
+class InvalidGitRepositoryError(InvalidDBRoot):
""" Thrown if the given repository appears to have an invalid format. """
-class NoSuchPathError(OSError):
+class NoSuchPathError(InvalidDBRoot):
""" Thrown if a path could not be access by the system. """
-class GitCommandError(Exception):
+class GitCommandError(GitPythonError):
""" Thrown if execution of the git command fails with non-zero status code. """
def __init__(self, command, status, stderr=None):
self.stderr = stderr
@@ -27,7 +63,7 @@ class GitCommandError(Exception):
(' '.join(str(i) for i in self.command), self.status, self.stderr))
-class CheckoutError( Exception ):
+class CheckoutError(GitPythonError):
"""Thrown if a file could not be checked out from the index as it contained
changes.
@@ -50,9 +86,10 @@ class CheckoutError( Exception ):
return Exception.__str__(self) + ":%s" % self.failed_files
-class CacheError(Exception):
+class CacheError(GitPythonError):
"""Base for all errors related to the git index, which is called cache internally"""
+
class UnmergedEntriesError(CacheError):
"""Thrown if an operation cannot proceed as there are still unmerged
entries in the cache"""
diff --git a/git/ext/async b/git/ext/async
new file mode 160000
+Subproject 10310824c001deab8fea85b88ebda0696f964b3
diff --git a/git/ext/gitdb b/git/ext/gitdb
deleted file mode 160000
-Subproject 17d9d1395fb6d18d553e085150138463b5827a2
diff --git a/git/fun.py b/git/fun.py
new file mode 100644
index 00000000..5bbe8efc
--- /dev/null
+++ b/git/fun.py
@@ -0,0 +1,674 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains basic c-functions which usually contain performance critical code
+Keeping this code separate from the beginning makes it easier to out-source
+it into c later, if required"""
+
+from exc import (
+ BadObjectType
+ )
+
+from util import zlib
+decompressobj = zlib.decompressobj
+
+import mmap
+from itertools import islice, izip
+
+from cStringIO import StringIO
+
+# INVARIANTS
+OFS_DELTA = 6
+REF_DELTA = 7
+delta_types = (OFS_DELTA, REF_DELTA)
+
+type_id_to_type_map = {
+ 0 : "", # EXT 1
+ 1 : "commit",
+ 2 : "tree",
+ 3 : "blob",
+ 4 : "tag",
+ 5 : "", # EXT 2
+ OFS_DELTA : "OFS_DELTA", # OFFSET DELTA
+ REF_DELTA : "REF_DELTA" # REFERENCE DELTA
+ }
+
+type_to_type_id_map = dict(
+ commit=1,
+ tree=2,
+ blob=3,
+ tag=4,
+ OFS_DELTA=OFS_DELTA,
+ REF_DELTA=REF_DELTA
+ )
+
+# used when dealing with larger streams
+chunk_size = 1000*mmap.PAGESIZE
+
+__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
+ 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
+ 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
+
+
+#{ Structures
+
+def _set_delta_rbound(d, size):
+ """Truncate the given delta to the given size
+ :param size: size relative to our target offset, may not be 0, must be smaller or equal
+ to our size
+ :return: d"""
+ d.ts = size
+
+ # NOTE: data is truncated automatically when applying the delta
+ # MUST NOT DO THIS HERE
+ return d
+
+def _move_delta_lbound(d, bytes):
+ """Move the delta by the given amount of bytes, reducing its size so that its
+ right bound stays static
+ :param bytes: amount of bytes to move, must be smaller than delta size
+ :return: d"""
+ if bytes == 0:
+ return
+
+ d.to += bytes
+ d.so += bytes
+ d.ts -= bytes
+ if d.data is not None:
+ d.data = d.data[bytes:]
+ # END handle data
+
+ return d
+
+def delta_duplicate(src):
+ return DeltaChunk(src.to, src.ts, src.so, src.data)
+
+def delta_chunk_apply(dc, bbuf, write):
+ """Apply own data to the target buffer
+ :param bbuf: buffer providing source bytes for copy operations
+ :param write: write method to call with data to write"""
+ if dc.data is None:
+ # COPY DATA FROM SOURCE
+ write(buffer(bbuf, dc.so, dc.ts))
+ else:
+ # APPEND DATA
+ # whats faster: if + 4 function calls or just a write with a slice ?
+ # Considering data can be larger than 127 bytes now, it should be worth it
+ if dc.ts < len(dc.data):
+ write(dc.data[:dc.ts])
+ else:
+ write(dc.data)
+ # END handle truncation
+ # END handle chunk mode
+
+
+class DeltaChunk(object):
+ """Represents a piece of a delta, it can either add new data, or copy existing
+ one from a source buffer"""
+ __slots__ = (
+ 'to', # start offset in the target buffer in bytes
+ 'ts', # size of this chunk in the target buffer in bytes
+ 'so', # start offset in the source buffer in bytes or None
+ 'data', # chunk of bytes to be added to the target buffer,
+ # DeltaChunkList to use as base, or None
+ )
+
+ def __init__(self, to, ts, so, data):
+ self.to = to
+ self.ts = ts
+ self.so = so
+ self.data = data
+
+ def __repr__(self):
+ return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "")
+
+ #{ Interface
+
+ def rbound(self):
+ return self.to + self.ts
+
+ def has_data(self):
+ """:return: True if the instance has data to add to the target stream"""
+ return self.data is not None
+
+ #} END interface
+
+def _closest_index(dcl, absofs):
+ """:return: index at which the given absofs should be inserted. The index points
+ to the DeltaChunk with a target buffer absofs that equals or is greater than
+ absofs.
+ :note: global method for performance only, it belongs to DeltaChunkList"""
+ lo = 0
+ hi = len(dcl)
+ while lo < hi:
+ mid = (lo + hi) / 2
+ dc = dcl[mid]
+ if dc.to > absofs:
+ hi = mid
+ elif dc.rbound() > absofs or dc.to == absofs:
+ return mid
+ else:
+ lo = mid + 1
+ # END handle bound
+ # END for each delta absofs
+ return len(dcl)-1
+
+def delta_list_apply(dcl, bbuf, write):
+ """Apply the chain's changes and write the final result using the passed
+ write function.
+ :param bbuf: base buffer containing the base of all deltas contained in this
+ list. It will only be used if the chunk in question does not have a base
+ chain.
+ :param write: function taking a string of bytes to write to the output"""
+ for dc in dcl:
+ delta_chunk_apply(dc, bbuf, write)
+ # END for each dc
+
+def delta_list_slice(dcl, absofs, size, ndcl):
+ """:return: Subsection of this list at the given absolute offset, with the given
+ size in bytes.
+ :return: None"""
+ cdi = _closest_index(dcl, absofs) # delta start index
+ cd = dcl[cdi]
+ slen = len(dcl)
+ lappend = ndcl.append
+
+ if cd.to != absofs:
+ tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+ _move_delta_lbound(tcd, absofs - cd.to)
+ tcd.ts = min(tcd.ts, size)
+ lappend(tcd)
+ size -= tcd.ts
+ cdi += 1
+ # END lbound overlap handling
+
+ while cdi < slen and size:
+ # are we larger than the current block
+ cd = dcl[cdi]
+ if cd.ts <= size:
+ lappend(DeltaChunk(cd.to, cd.ts, cd.so, cd.data))
+ size -= cd.ts
+ else:
+ tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+ tcd.ts = size
+ lappend(tcd)
+ size -= tcd.ts
+ break
+ # END hadle size
+ cdi += 1
+ # END for each chunk
+
+
+class DeltaChunkList(list):
+ """List with special functionality to deal with DeltaChunks.
+ There are two types of lists we represent. The one was created bottom-up, working
+ towards the latest delta, the other kind was created top-down, working from the
+ latest delta down to the earliest ancestor. This attribute is queryable
+ after all processing with is_reversed."""
+
+ __slots__ = tuple()
+
+ def rbound(self):
+ """:return: rightmost extend in bytes, absolute"""
+ if len(self) == 0:
+ return 0
+ return self[-1].rbound()
+
+ def lbound(self):
+ """:return: leftmost byte at which this chunklist starts"""
+ if len(self) == 0:
+ return 0
+ return self[0].to
+
+ def size(self):
+ """:return: size of bytes as measured by our delta chunks"""
+ return self.rbound() - self.lbound()
+
+ def apply(self, bbuf, write):
+ """Only used by public clients, internally we only use the global routines
+ for performance"""
+ return delta_list_apply(self, bbuf, write)
+
+ def compress(self):
+ """Alter the list to reduce the amount of nodes. Currently we concatenate
+ add-chunks
+ :return: self"""
+ slen = len(self)
+ if slen < 2:
+ return self
+ i = 0
+ slen_orig = slen
+
+ first_data_index = None
+ while i < slen:
+ dc = self[i]
+ i += 1
+ if dc.data is None:
+ if first_data_index is not None and i-2-first_data_index > 1:
+ #if first_data_index is not None:
+ nd = StringIO() # new data
+ so = self[first_data_index].to # start offset in target buffer
+ for x in xrange(first_data_index, i-1):
+ xdc = self[x]
+ nd.write(xdc.data[:xdc.ts])
+ # END collect data
+
+ del(self[first_data_index:i-1])
+ buf = nd.getvalue()
+ self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))
+
+ slen = len(self)
+ i = first_data_index + 1
+
+ # END concatenate data
+ first_data_index = None
+ continue
+ # END skip non-data chunks
+
+ if first_data_index is None:
+ first_data_index = i-1
+ # END iterate list
+
+ #if slen_orig != len(self):
+ # print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
+ return self
+
+ def check_integrity(self, target_size=-1):
+ """Verify the list has non-overlapping chunks only, and the total size matches
+ target_size
+ :param target_size: if not -1, the total size of the chain must be target_size
+ :raise AssertionError: if the size doen't match"""
+ if target_size > -1:
+ assert self[-1].rbound() == target_size
+ assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size
+ # END target size verification
+
+ if len(self) < 2:
+ return
+
+ # check data
+ for dc in self:
+ assert dc.ts > 0
+ if dc.has_data():
+ assert len(dc.data) >= dc.ts
+ # END for each dc
+
+ left = islice(self, 0, len(self)-1)
+ right = iter(self)
+ right.next()
+ # this is very pythonic - we might have just use index based access here,
+ # but this could actually be faster
+ for lft,rgt in izip(left, right):
+ assert lft.rbound() == rgt.to
+ assert lft.to + lft.ts == rgt.to
+ # END for each pair
+
+
+class TopdownDeltaChunkList(DeltaChunkList):
+ """Represents a list which is generated by feeding its ancestor streams one by
+ one"""
+ __slots__ = tuple()
+
+ def connect_with_next_base(self, bdcl):
+ """Connect this chain with the next level of our base delta chunklist.
+ The goal in this game is to mark as many of our chunks rigid, hence they
+ cannot be changed by any of the upcoming bases anymore. Once all our
+ chunks are marked like that, we can stop all processing
+ :param bdcl: data chunk list being one of our bases. They must be fed in
+ consequtively and in order, towards the earliest ancestor delta
+ :return: True if processing was done. Use it to abort processing of
+ remaining streams if False is returned"""
+ nfc = 0 # number of frozen chunks
+ dci = 0 # delta chunk index
+ slen = len(self) # len of self
+ ccl = list() # temporary list
+ while dci < slen:
+ dc = self[dci]
+ dci += 1
+
+ # all add-chunks which are already topmost don't need additional processing
+ if dc.data is not None:
+ nfc += 1
+ continue
+ # END skip add chunks
+
+ # copy chunks
+ # integrate the portion of the base list into ourselves. Lists
+ # dont support efficient insertion ( just one at a time ), but for now
+ # we live with it. Internally, its all just a 32/64bit pointer, and
+ # the portions of moved memory should be smallish. Maybe we just rebuild
+ # ourselves in order to reduce the amount of insertions ...
+ del(ccl[:])
+ delta_list_slice(bdcl, dc.so, dc.ts, ccl)
+
+ # move the target bounds into place to match with our chunk
+ ofs = dc.to - dc.so
+ for cdc in ccl:
+ cdc.to += ofs
+ # END update target bounds
+
+ if len(ccl) == 1:
+ self[dci-1] = ccl[0]
+ else:
+ # maybe try to compute the expenses here, and pick the right algorithm
+ # It would normally be faster than copying everything physically though
+ # TODO: Use a deque here, and decide by the index whether to extend
+ # or extend left !
+ post_dci = self[dci:]
+ del(self[dci-1:]) # include deletion of dc
+ self.extend(ccl)
+ self.extend(post_dci)
+
+ slen = len(self)
+ dci += len(ccl)-1 # deleted dc, added rest
+
+ # END handle chunk replacement
+ # END for each chunk
+
+ if nfc == slen:
+ return False
+ # END handle completeness
+ return True
+
+
+#} END structures
+
+#{ Routines
+
+def is_loose_object(m):
+ """
+ :return: True the file contained in memory map m appears to be a loose object.
+ Only the first two bytes are needed"""
+ b0, b1 = map(ord, m[:2])
+ word = (b0 << 8) + b1
+ return b0 == 0x78 and (word % 31) == 0
+
+def loose_object_header_info(m):
+ """
+ :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
+ object as well as its uncompressed size in bytes.
+ :param m: memory map from which to read the compressed object data"""
+ decompress_size = 8192 # is used in cgit as well
+ hdr = decompressobj().decompress(m, decompress_size)
+ type_name, size = hdr[:hdr.find("\0")].split(" ")
+ return type_name, int(size)
+
+def pack_object_header_info(data):
+ """
+ :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
+ The type_id should be interpreted according to the ``type_id_to_type_map`` map
+ The byte-offset specifies the start of the actual zlib compressed datastream
+ :param m: random-access memory, like a string or memory map"""
+ c = ord(data[0]) # first byte
+ i = 1 # next char to read
+ type_id = (c >> 4) & 7 # numeric type
+ size = c & 15 # starting size
+ s = 4 # starting bit-shift size
+ while c & 0x80:
+ c = ord(data[i])
+ i += 1
+ size += (c & 0x7f) << s
+ s += 7
+ # END character loop
+ return (type_id, size, i)
+
+def create_pack_object_header(obj_type, obj_size):
+ """:return: string defining the pack header comprised of the object type
+ and its incompressed size in bytes
+ :parmam obj_type: pack type_id of the object
+ :param obj_size: uncompressed size in bytes of the following object stream"""
+ c = 0 # 1 byte
+ hdr = str() # output string
+
+ c = (obj_type << 4) | (obj_size & 0xf)
+ obj_size >>= 4
+ while obj_size:
+ hdr += chr(c | 0x80)
+ c = obj_size & 0x7f
+ obj_size >>= 7
+ #END until size is consumed
+ hdr += chr(c)
+ return hdr
+
+def msb_size(data, offset=0):
+ """
+ :return: tuple(read_bytes, size) read the msb size from the given random
+ access data starting at the given byte offset"""
+ size = 0
+ i = 0
+ l = len(data)
+ hit_msb = False
+ while i < l:
+ c = ord(data[i+offset])
+ size |= (c & 0x7f) << i*7
+ i += 1
+ if not c & 0x80:
+ hit_msb = True
+ break
+ # END check msb bit
+ # END while in range
+ if not hit_msb:
+ raise AssertionError("Could not find terminating MSB byte in data stream")
+ return i+offset, size
+
+def loose_object_header(type, size):
+ """
+ :return: string representing the loose object header, which is immediately
+ followed by the content stream of size 'size'"""
+ return "%s %i\0" % (type, size)
+
+def write_object(type, size, read, write, chunk_size=chunk_size):
+ """
+ Write the object as identified by type, size and source_stream into the
+ target_stream
+
+ :param type: type string of the object
+ :param size: amount of bytes to write from source_stream
+ :param read: read method of a stream providing the content data
+ :param write: write method of the output stream
+ :param close_target_stream: if True, the target stream will be closed when
+ the routine exits, even if an error is thrown
+ :return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
+ tbw = 0 # total num bytes written
+
+ # WRITE HEADER: type SP size NULL
+ tbw += write(loose_object_header(type, size))
+ tbw += stream_copy(read, write, size, chunk_size)
+
+ return tbw
+
+def stream_copy(read, write, size, chunk_size):
+ """
+ Copy a stream up to size bytes using the provided read and write methods,
+ in chunks of chunk_size
+
+ :note: its much like stream_copy utility, but operates just using methods"""
+ dbw = 0 # num data bytes written
+
+ # WRITE ALL DATA UP TO SIZE
+ while True:
+ cs = min(chunk_size, size-dbw)
+ # NOTE: not all write methods return the amount of written bytes, like
+ # mmap.write. Its bad, but we just deal with it ... perhaps its not
+ # even less efficient
+ # data_len = write(read(cs))
+ # dbw += data_len
+ data = read(cs)
+ data_len = len(data)
+ dbw += data_len
+ write(data)
+ if data_len < cs or dbw == size:
+ break
+ # END check for stream end
+ # END duplicate data
+ return dbw
+
+def connect_deltas(dstreams):
+ """
+ Read the condensed delta chunk information from dstream and merge its information
+ into a list of existing delta chunks
+
+ :param dstreams: iterable of delta stream objects, the delta to be applied last
+ comes first, then all its ancestors in order
+ :return: DeltaChunkList, containing all operations to apply"""
+ tdcl = None # topmost dcl
+
+ dcl = tdcl = TopdownDeltaChunkList()
+ for dsi, ds in enumerate(dstreams):
+ # print "Stream", dsi
+ db = ds.read()
+ delta_buf_size = ds.size
+
+ # read header
+ i, base_size = msb_size(db)
+ i, target_size = msb_size(db, i)
+
+ # interpret opcodes
+ tbw = 0 # amount of target bytes written
+ while i < delta_buf_size:
+ c = ord(db[i])
+ i += 1
+ if c & 0x80:
+ cp_off, cp_size = 0, 0
+ if (c & 0x01):
+ cp_off = ord(db[i])
+ i += 1
+ if (c & 0x02):
+ cp_off |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x04):
+ cp_off |= (ord(db[i]) << 16)
+ i += 1
+ if (c & 0x08):
+ cp_off |= (ord(db[i]) << 24)
+ i += 1
+ if (c & 0x10):
+ cp_size = ord(db[i])
+ i += 1
+ if (c & 0x20):
+ cp_size |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x40):
+ cp_size |= (ord(db[i]) << 16)
+ i += 1
+
+ if not cp_size:
+ cp_size = 0x10000
+
+ rbound = cp_off + cp_size
+ if (rbound < cp_size or
+ rbound > base_size):
+ break
+
+ dcl.append(DeltaChunk(tbw, cp_size, cp_off, None))
+ tbw += cp_size
+ elif c:
+ # NOTE: in C, the data chunks should probably be concatenated here.
+ # In python, we do it as a post-process
+ dcl.append(DeltaChunk(tbw, c, 0, db[i:i+c]))
+ i += c
+ tbw += c
+ else:
+ raise ValueError("unexpected delta opcode 0")
+ # END handle command byte
+ # END while processing delta data
+
+ dcl.compress()
+
+ # merge the lists !
+ if dsi > 0:
+ if not tdcl.connect_with_next_base(dcl):
+ break
+ # END handle merge
+
+ # prepare next base
+ dcl = DeltaChunkList()
+ # END for each delta stream
+
+ return tdcl
+
+def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
+ """
+ Apply data from a delta buffer using a source buffer to the target file
+
+ :param src_buf: random access data from which the delta was created
+ :param src_buf_size: size of the source buffer in bytes
+ :param delta_buf_size: size fo the delta buffer in bytes
+ :param delta_buf: random access delta data
+ :param write: write method taking a chunk of bytes
+ :note: transcribed to python from the similar routine in patch-delta.c"""
+ i = 0
+ db = delta_buf
+ while i < delta_buf_size:
+ c = ord(db[i])
+ i += 1
+ if c & 0x80:
+ cp_off, cp_size = 0, 0
+ if (c & 0x01):
+ cp_off = ord(db[i])
+ i += 1
+ if (c & 0x02):
+ cp_off |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x04):
+ cp_off |= (ord(db[i]) << 16)
+ i += 1
+ if (c & 0x08):
+ cp_off |= (ord(db[i]) << 24)
+ i += 1
+ if (c & 0x10):
+ cp_size = ord(db[i])
+ i += 1
+ if (c & 0x20):
+ cp_size |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x40):
+ cp_size |= (ord(db[i]) << 16)
+ i += 1
+
+ if not cp_size:
+ cp_size = 0x10000
+
+ rbound = cp_off + cp_size
+ if (rbound < cp_size or
+ rbound > src_buf_size):
+ break
+ write(buffer(src_buf, cp_off, cp_size))
+ elif c:
+ write(db[i:i+c])
+ i += c
+ else:
+ raise ValueError("unexpected delta opcode 0")
+ # END handle command byte
+ # END while processing delta data
+
+ # yes, lets use the exact same error message that git uses :)
+ assert i == delta_buf_size, "delta replay has gone wild"
+
+
+def is_equal_canonical_sha(canonical_length, match, sha1):
+ """
+ :return: True if the given lhs and rhs 20 byte binary shas
+ The comparison will take the canonical_length of the match sha into account,
+ hence the comparison will only use the last 4 bytes for uneven canonical representations
+ :param match: less than 20 byte sha
+ :param sha1: 20 byte sha"""
+ binary_length = canonical_length/2
+ if match[:binary_length] != sha1[:binary_length]:
+ return False
+
+ if canonical_length - binary_length and \
+ (ord(match[-1]) ^ ord(sha1[len(match)-1])) & 0xf0:
+ return False
+ # END handle uneven canonnical length
+ return True
+
+#} END routines
+
+
+try:
+ # raise ImportError; # DEBUG
+ from _perf import connect_deltas
+except ImportError:
+ pass
diff --git a/git/index/base.py b/git/index/base.py
index 88410e20..12097922 100644
--- a/git/index/base.py
+++ b/git/index/base.py
@@ -62,9 +62,8 @@ from fun import (
S_IFGITLINK
)
-from gitdb.base import IStream
-from gitdb.db import MemoryDB
-from gitdb.util import to_bin_sha
+from git.base import IStream
+from git.util import to_bin_sha
from itertools import izip
__all__ = ( 'IndexFile', 'CheckoutError' )
@@ -512,7 +511,9 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
:raise UnmergedEntriesError: """
# we obtain no lock as we just flush our contents to disk as tree
# If we are a new index, the entries access will load our data accordingly
- mdb = MemoryDB()
+ # Needs delayed import as db.py import IndexFile as well
+ import git.db.py.mem
+ mdb = git.db.py.mem.PureMemoryDB()
entries = self._entries_sorted()
binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries)))
@@ -959,12 +960,16 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "):
is_a_dir = " is a directory"
unlink_issue = "unable to unlink old '"
+ already_exists_issue = ' already exists, no checkout' # created by entry.c:checkout_entry(...)
if line.endswith(is_a_dir):
failed_files.append(line[:-len(is_a_dir)])
failed_reasons.append(is_a_dir)
elif line.startswith(unlink_issue):
failed_files.append(line[len(unlink_issue):line.rfind("'")])
failed_reasons.append(unlink_issue)
+ elif line.endswith(already_exists_issue):
+ failed_files.append(line[:-len(already_exists_issue)])
+ failed_reasons.append(already_exists_issue)
else:
unknown_lines.append(line)
continue
diff --git a/git/index/fun.py b/git/index/fun.py
index 9b35bf04..e2813c0b 100644
--- a/git/index/fun.py
+++ b/git/index/fun.py
@@ -36,8 +36,8 @@ from util import (
unpack
)
-from gitdb.base import IStream
-from gitdb.typ import str_tree_type
+from git.base import IStream
+from git.typ import str_tree_type
__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key',
'stat_mode_to_index_mode', 'S_IFGITLINK')
diff --git a/git/objects/base.py b/git/objects/base.py
index 5f2f7809..61b3e674 100644
--- a/git/objects/base.py
+++ b/git/objects/base.py
@@ -3,15 +3,20 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.util import LazyMixin, join_path_native, stream_copy
+
from util import get_object_type_by_name
-from gitdb.util import (
+from git.util import (
hex_to_bin,
bin_to_hex,
- basename
+ dirname,
+ basename,
+ LazyMixin,
+ join_path_native,
+ stream_copy
)
-
-import gitdb.typ as dbtyp
+from git.db.interface import RepositoryPathsMixin
+from git.exc import UnsupportedOperation
+from git.typ import ObjectType
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
@@ -22,24 +27,26 @@ class Object(LazyMixin):
NULL_HEX_SHA = '0'*40
NULL_BIN_SHA = '\0'*20
- TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type)
- __slots__ = ("repo", "binsha", "size" )
+ TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag)
+ __slots__ = ("odb", "binsha", "size" )
+
type = None # to be set by subclass
+ type_id = None # to be set by subclass
- def __init__(self, repo, binsha):
+ def __init__(self, odb, binsha):
"""Initialize an object by identifying it by its binary sha.
All keyword arguments will be set on demand if None.
- :param repo: repository this object is located in
+ :param odb: repository this object is located in
:param binsha: 20 byte SHA1"""
super(Object,self).__init__()
- self.repo = repo
+ self.odb = odb
self.binsha = binsha
assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
@classmethod
- def new(cls, repo, id):
+ def new(cls, odb, id):
"""
:return: New Object instance of a type appropriate to the object type behind
id. The id of the newly created object will be a binsha even though
@@ -49,27 +56,27 @@ class Object(LazyMixin):
:note: This cannot be a __new__ method as it would always call __init__
with the input id which is not necessarily a binsha."""
- return repo.rev_parse(str(id))
+ return odb.rev_parse(str(id))
@classmethod
- def new_from_sha(cls, repo, sha1):
+ def new_from_sha(cls, odb, sha1):
"""
:return: new object instance of a type appropriate to represent the given
binary sha1
:param sha1: 20 byte binary sha1"""
if sha1 == cls.NULL_BIN_SHA:
# the NULL binsha is always the root commit
- return get_object_type_by_name('commit')(repo, sha1)
+ return get_object_type_by_name('commit')(odb, sha1)
#END handle special case
- oinfo = repo.odb.info(sha1)
- inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha)
+ oinfo = odb.info(sha1)
+ inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha)
inst.size = oinfo.size
return inst
def _set_cache_(self, attr):
"""Retrieve object information"""
if attr == "size":
- oinfo = self.repo.odb.info(self.binsha)
+ oinfo = self.odb.info(self.binsha)
self.size = oinfo.size
# assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
else:
@@ -77,10 +84,14 @@ class Object(LazyMixin):
def __eq__(self, other):
""":return: True if the objects have the same SHA1"""
+ if not hasattr(other, 'binsha'):
+ return False
return self.binsha == other.binsha
def __ne__(self, other):
""":return: True if the objects do not have the same SHA1 """
+ if not hasattr(other, 'binsha'):
+ return True
return self.binsha != other.binsha
def __hash__(self):
@@ -104,13 +115,13 @@ class Object(LazyMixin):
def data_stream(self):
""" :return: File Object compatible stream to the uncompressed raw data of the object
:note: returned streams must be read in order"""
- return self.repo.odb.stream(self.binsha)
+ return self.odb.stream(self.binsha)
def stream_data(self, ostream):
"""Writes our data directly to the given output stream
:param ostream: File object compatible stream object.
:return: self"""
- istream = self.repo.odb.stream(self.binsha)
+ istream = self.odb.stream(self.binsha)
stream_copy(istream, ostream)
return self
@@ -123,9 +134,9 @@ class IndexObject(Object):
# for compatability with iterable lists
_id_attribute_ = 'path'
- def __init__(self, repo, binsha, mode=None, path=None):
+ def __init__(self, odb, binsha, mode=None, path=None):
"""Initialize a newly instanced IndexObject
- :param repo: is the Repo we are located in
+ :param odb: is the object database we are located in
:param binsha: 20 byte sha1
:param mode: is the stat compatible file mode as int, use the stat module
to evaluate the infomration
@@ -135,7 +146,7 @@ class IndexObject(Object):
:note:
Path may not be set of the index object has been created directly as it cannot
be retrieved without knowing the parent tree."""
- super(IndexObject, self).__init__(repo, binsha)
+ super(IndexObject, self).__init__(odb, binsha)
if mode is not None:
self.mode = mode
if path is not None:
@@ -167,6 +178,15 @@ class IndexObject(Object):
Absolute path to this index object in the file system ( as opposed to the
.path field which is a path relative to the git repository ).
- The returned path will be native to the system and contains '\' on windows. """
- return join_path_native(self.repo.working_tree_dir, self.path)
+ The returned path will be native to the system and contains '\' on windows.
+ :raise UnsupportedOperation: if underlying odb does not support the required method to obtain a working dir"""
+ # TODO: Here we suddenly need something better than a plain object database
+ # which indicates our odb should better be named repo !
+ root = ''
+ if isinstance(self.odb, RepositoryPathsMixin):
+ root = self.odb.working_tree_dir
+ else:
+ raise UnsupportedOperation("Cannot provide absolute path from a database without Repository path support")
+ #END handle odb type
+ return join_path_native(root, self.path)
diff --git a/git/objects/blob.py b/git/objects/blob.py
index f52d1a53..9c51f99f 100644
--- a/git/objects/blob.py
+++ b/git/objects/blob.py
@@ -4,15 +4,19 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.util import RepoAliasMixin
from mimetypes import guess_type
+from git.typ import ObjectType
+
import base
__all__ = ('Blob', )
-class Blob(base.IndexObject):
+class Blob(base.IndexObject, RepoAliasMixin):
"""A Blob encapsulates a git blob object"""
DEFAULT_MIME_TYPE = "text/plain"
- type = "blob"
+ type = ObjectType.blob
+ type_id = ObjectType.blob_id
# valid blob modes
executable_mode = 0100755
diff --git a/git/objects/commit.py b/git/objects/commit.py
index fd4187b0..c32bbf1a 100644
--- a/git/objects/commit.py
+++ b/git/objects/commit.py
@@ -3,42 +3,45 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+import base
-from git.util import (
- Actor,
- Iterable,
- Stats,
- )
-from git.diff import Diffable
+from git.typ import ObjectType
from tree import Tree
-from gitdb import IStream
from cStringIO import StringIO
-import base
-from gitdb.util import (
- hex_to_bin
+from git.util import (
+ hex_to_bin,
+ Actor,
+ RepoAliasMixin,
+ Iterable,
+ Actor,
+ Stats
)
+
from util import (
- Traversable,
- Serializable,
- parse_date,
- altz_to_utctz_str,
- parse_actor_and_date
- )
-from time import (
- time,
- altzone
+ Traversable,
+ Serializable,
+ altz_to_utctz_str,
+ parse_actor_and_date
)
+from git.diff import Diffable
+from git.base import IStream
+from cStringIO import StringIO
+
+from util import parse_date
+from time import altzone, time
+
import os
import sys
__all__ = ('Commit', )
-class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
+class Commit(Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable):
"""Wraps a git Commit object.
This class will act lazily on some of its attributes and will query the
value on demand only if it involves calling the git binary."""
+ __slots__ = tuple()
# ENVIRONMENT VARIABLES
# read when creating new commits
@@ -53,92 +56,16 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
# object configuration
- type = "commit"
+ type = ObjectType.commit
+ type_id = ObjectType.commit_id
+
__slots__ = ("tree",
"author", "authored_date", "author_tz_offset",
"committer", "committed_date", "committer_tz_offset",
"message", "parents", "encoding")
_id_attribute_ = "binsha"
- def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
- committer=None, committed_date=None, committer_tz_offset=None,
- message=None, parents=None, encoding=None):
- """Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set on first query.
-
- :param binsha: 20 byte sha1
- :param parents: tuple( Commit, ... )
- is a tuple of commit ids or actual Commits
- :param tree: Tree
- Tree object
- :param author: Actor
- is the author string ( will be implicitly converted into an Actor object )
- :param authored_date: int_seconds_since_epoch
- is the authored DateTime - use time.gmtime() to convert it into a
- different format
- :param author_tz_offset: int_seconds_west_of_utc
- is the timezone that the authored_date is in
- :param committer: Actor
- is the committer string
- :param committed_date: int_seconds_since_epoch
- is the committed DateTime - use time.gmtime() to convert it into a
- different format
- :param committer_tz_offset: int_seconds_west_of_utc
- is the timezone that the authored_date is in
- :param message: string
- is the commit message
- :param encoding: string
- encoding of the message, defaults to UTF-8
- :param parents:
- List or tuple of Commit objects which are our parent(s) in the commit
- dependency graph
- :return: git.Commit
-
- :note: Timezone information is in the same format and in the same sign
- as what time.altzone returns. The sign is inverted compared to git's
- UTC timezone."""
- super(Commit,self).__init__(repo, binsha)
- if tree is not None:
- assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
- if tree is not None:
- self.tree = tree
- if author is not None:
- self.author = author
- if authored_date is not None:
- self.authored_date = authored_date
- if author_tz_offset is not None:
- self.author_tz_offset = author_tz_offset
- if committer is not None:
- self.committer = committer
- if committed_date is not None:
- self.committed_date = committed_date
- if committer_tz_offset is not None:
- self.committer_tz_offset = committer_tz_offset
- if message is not None:
- self.message = message
- if parents is not None:
- self.parents = parents
- if encoding is not None:
- self.encoding = encoding
-
- @classmethod
- def _get_intermediate_items(cls, commit):
- return commit.parents
-
- def _set_cache_(self, attr):
- if attr in Commit.__slots__:
- # read the data in a chunk, its faster - then provide a file wrapper
- binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha)
- self._deserialize(StringIO(stream.read()))
- else:
- super(Commit, self)._set_cache_(attr)
- # END handle attrs
-
- @property
- def summary(self):
- """:return: First line of the commit message"""
- return self.message.split('\n', 1)[0]
-
+
def count(self, paths='', **kwargs):
"""Count the number of commits reachable from this commit
@@ -225,33 +152,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True)
return Stats._list_from_string(self.repo, text)
- @classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream):
- """Parse out commit information into a list of Commit objects
- We expect one-line per commit, and parse the actual commit information directly
- from our lighting fast object database
-
- :param proc: git-rev-list process instance - one sha per line
- :return: iterator returning Commit objects"""
- stream = proc_or_stream
- if not hasattr(stream,'readline'):
- stream = proc_or_stream.stdout
-
- readline = stream.readline
- while True:
- line = readline()
- if not line:
- break
- hexsha = line.strip()
- if len(hexsha) > 40:
- # split additional information, as returned by bisect for instance
- hexsha, rest = line.split(None, 1)
- # END handle extra info
-
- assert len(hexsha) == 40, "Invalid line: %s" % hexsha
- yield Commit(repo, hex_to_bin(hexsha))
- # END for each line in stream
-
@classmethod
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
@@ -361,6 +261,112 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
# END advance head handling
return new_commit
+
+ def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ committer=None, committed_date=None, committer_tz_offset=None,
+ message=None, parents=None, encoding=None):
+ """Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set on first query.
+
+ :param binsha: 20 byte sha1
+ :param parents: tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+ :param tree: Tree
+ Tree object
+ :param author: Actor
+ is the author string ( will be implicitly converted into an Actor object )
+ :param authored_date: int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
+ :param author_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param committer: Actor
+ is the committer string
+ :param committed_date: int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
+ :param committer_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param message: string
+ is the commit message
+ :param encoding: string
+ encoding of the message, defaults to UTF-8
+ :param parents:
+ List or tuple of Commit objects which are our parent(s) in the commit
+ dependency graph
+ :return: git.Commit
+
+ :note: Timezone information is in the same format and in the same sign
+ as what time.altzone returns. The sign is inverted compared to git's
+ UTC timezone."""
+ super(Commit,self).__init__(odb, binsha)
+ if tree is not None:
+ assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
+ if tree is not None:
+ self.tree = tree
+ if author is not None:
+ self.author = author
+ if authored_date is not None:
+ self.authored_date = authored_date
+ if author_tz_offset is not None:
+ self.author_tz_offset = author_tz_offset
+ if committer is not None:
+ self.committer = committer
+ if committed_date is not None:
+ self.committed_date = committed_date
+ if committer_tz_offset is not None:
+ self.committer_tz_offset = committer_tz_offset
+ if message is not None:
+ self.message = message
+ if parents is not None:
+ self.parents = parents
+ if encoding is not None:
+ self.encoding = encoding
+
+ @classmethod
+ def _get_intermediate_items(cls, commit):
+ return commit.parents
+
+ def _set_cache_(self, attr):
+ if attr in Commit.__slots__:
+ # read the data in a chunk, its faster - then provide a file wrapper
+ binsha, typename, self.size, stream = self.odb.stream(self.binsha)
+ self._deserialize(StringIO(stream.read()))
+ else:
+ super(Commit, self)._set_cache_(attr)
+ # END handle attrs
+
+ @property
+ def summary(self):
+ """:return: First line of the commit message"""
+ return self.message.split('\n', 1)[0]
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, odb, proc_or_stream):
+ """Parse out commit information into a list of Commit objects
+ We expect one-line per commit, and parse the actual commit information directly
+ from our lighting fast object database
+
+ :param proc: git-rev-list process instance - one sha per line
+ :return: iterator returning Commit objects"""
+ stream = proc_or_stream
+ if not hasattr(stream,'readline'):
+ stream = proc_or_stream.stdout
+
+ readline = stream.readline
+ while True:
+ line = readline()
+ if not line:
+ break
+ hexsha = line.strip()
+ if len(hexsha) > 40:
+ # split additional information, as returned by bisect for instance
+ hexsha, rest = line.split(None, 1)
+ # END handle extra info
+
+ assert len(hexsha) == 40, "Invalid line: %s" % hexsha
+ yield cls(odb, hex_to_bin(hexsha))
+ # END for each line in stream
#{ Serializable Implementation
@@ -408,7 +414,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
readline = stream.readline
- self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
+ self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
self.parents = list()
next_line = None
@@ -418,7 +424,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
next_line = parent_line
break
# END abort reading parents
- self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1])))
+ self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
# END for each parent line
self.parents = tuple(self.parents)
@@ -461,5 +467,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable):
print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
# END exception handling
return self
-
+
#} END serializable implementation
+
diff --git a/git/objects/fun.py b/git/objects/fun.py
index 9b0a377c..6f2eaaad 100644
--- a/git/objects/fun.py
+++ b/git/objects/fun.py
@@ -1,4 +1,5 @@
"""Module with functions which are supposed to be as fast as possible"""
+
from stat import S_ISDIR
__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
@@ -197,3 +198,4 @@ def traverse_tree_recursive(odb, tree_sha, path_prefix):
# END for each item
return entries
+
diff --git a/git/objects/submodule/__init__.py b/git/objects/submodule/__init__.py
index 82df59b0..c8bf2d49 100644
--- a/git/objects/submodule/__init__.py
+++ b/git/objects/submodule/__init__.py
@@ -1,2 +1,6 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
# NOTE: Cannot import anything here as the top-level _init_ has to handle
# our dependencies
diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py
index 2160299b..0fdb121d 100644
--- a/git/objects/submodule/base.py
+++ b/git/objects/submodule/base.py
@@ -1,3 +1,8 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.util import RepoAliasMixin
import util
from util import (
mkhead,
@@ -13,9 +18,10 @@ from git.util import (
Iterable,
join_path_native,
to_native_path_linux,
- RemoteProgress
)
+from git.db.interface import RemoteProgress
+
from git.config import SectionConstraint
from git.exc import (
InvalidGitRepositoryError,
@@ -23,7 +29,7 @@ from git.exc import (
)
import stat
-import git
+import git # we use some types indirectly to prevent cyclic imports !
import os
import sys
@@ -53,7 +59,7 @@ UPDWKTREE = UpdateProgress.UPDWKTREE
# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import
# mechanism which cause plenty of trouble of the only reason for packages and
# modules is refactoring - subpackages shoudn't depend on parent packages
-class Submodule(util.IndexObject, Iterable, Traversable):
+class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin):
"""Implements access to a git submodule. They are special in that their sha
represents a commit in the submodule's repository which is to be checked out
at the path of this instance.
@@ -71,6 +77,9 @@ class Submodule(util.IndexObject, Iterable, Traversable):
# this is a bogus type for base class compatability
type = 'submodule'
+ # this type doesn't really have a type id
+ type_id = 0
+
__slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__')
_cache_attrs = ('path', '_url', '_branch_path')
@@ -195,7 +204,7 @@ class Submodule(util.IndexObject, Iterable, Traversable):
#{ Edit Interface
@classmethod
- def add(cls, repo, name, path, url=None, branch=None, no_checkout=False):
+ def add(cls, repo, name, path, url=None, branch=None, no_checkout=False, repoType=None):
"""Add a new submodule to the given repository. This will alter the index
as well as the .gitmodules file, but will not create a new commit.
If the submodule already exists, no matter if the configuration differs
@@ -220,6 +229,8 @@ class Submodule(util.IndexObject, Iterable, Traversable):
Examples are 'master' or 'feature/new'
:param no_checkout: if True, and if the repository has to be cloned manually,
no checkout will be performed
+ :param repoType: The repository type to use. It must provide the clone_from method.
+ If None, the default implementation is used.
:return: The newly created submodule instance
:note: works atomically, such that no change will be done if the repository
update fails for instance"""
@@ -227,6 +238,8 @@ class Submodule(util.IndexObject, Iterable, Traversable):
raise InvalidGitRepositoryError("Cannot add submodules to bare repositories")
# END handle bare repos
+ repoType = repoType or git.Repo
+
path = to_native_path_linux(path)
if path.endswith('/'):
path = path[:-1]
@@ -280,7 +293,7 @@ class Submodule(util.IndexObject, Iterable, Traversable):
if not branch_is_default:
kwargs['b'] = br.name
# END setup checkout-branch
- mrepo = git.Repo.clone_from(url, path, **kwargs)
+ mrepo = repoType.clone_from(url, path, **kwargs)
# END verify url
# update configuration and index
@@ -306,7 +319,7 @@ class Submodule(util.IndexObject, Iterable, Traversable):
return sm
def update(self, recursive=False, init=True, to_latest_revision=False, progress=None,
- dry_run=False):
+ dry_run=False, ):
"""Update the repository of this submodule to point to the checkout
we point at with the binsha of this instance.
@@ -368,7 +381,6 @@ class Submodule(util.IndexObject, Iterable, Traversable):
if not init:
return self
# END early abort if init is not allowed
- import git
# there is no git-repository yet - but delete empty paths
module_path = join_path_native(self.repo.working_tree_dir, self.path)
@@ -384,7 +396,7 @@ class Submodule(util.IndexObject, Iterable, Traversable):
# branch according to the remote-HEAD if possible
progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name))
if not dry_run:
- mrepo = git.Repo.clone_from(self.url, module_path, n=True)
+ mrepo = type(self.repo).clone_from(self.url, module_path, n=True)
#END handle dry-run
progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path)
@@ -760,14 +772,19 @@ class Submodule(util.IndexObject, Iterable, Traversable):
#{ Query Interface
@unbare_repo
- def module(self):
- """:return: Repo instance initialized from the repository at our submodule path
+ def module(self, repoType=None):
+ """:return: Repository instance initialized from the repository at our submodule path
+ :param repoType: The type of repository to be created. It must be possible to instatiate it
+ from a single repository path.
+ If None, a default repository type will be used
:raise InvalidGitRepositoryError: if a repository was not available. This could
also mean that it was not yet initialized"""
# late import to workaround circular dependencies
- module_path = self.abspath
+ module_path = self.abspath
+ repoType = repoType or git.Repo
+
try:
- repo = git.Repo(module_path)
+ repo = repoType(module_path)
if repo != self.repo:
return repo
# END handle repo uninitialized
diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py
index 132604f6..5e4cad2d 100644
--- a/git/objects/submodule/root.py
+++ b/git/objects/submodule/root.py
@@ -1,3 +1,7 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
from base import Submodule, UpdateProgress
from util import (
find_first_remote_branch
@@ -24,6 +28,7 @@ BRANCHCHANGE = RootUpdateProgress.BRANCHCHANGE
URLCHANGE = RootUpdateProgress.URLCHANGE
PATHCHANGE = RootUpdateProgress.PATHCHANGE
+
class RootModule(Submodule):
"""A (virtual) Root of all submodules in the given repository. It can be used
to more easily traverse all submodules of the master repository"""
diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py
index 9b32807a..2c5f6bc1 100644
--- a/git/objects/submodule/util.py
+++ b/git/objects/submodule/util.py
@@ -1,3 +1,7 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
import git
from git.exc import InvalidGitRepositoryError
from git.config import GitConfigParser
diff --git a/git/objects/tag.py b/git/objects/tag.py
index c7d02abe..5dcd9bf9 100644
--- a/git/objects/tag.py
+++ b/git/objects/tag.py
@@ -5,24 +5,28 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
""" Module containing all object based types. """
import base
-from gitdb.util import hex_to_bin
+from git.util import RepoAliasMixin
+from git.util import hex_to_bin
from util import (
- get_object_type_by_name,
- parse_actor_and_date
- )
+ get_object_type_by_name,
+ parse_actor_and_date
+ )
+from git.typ import ObjectType
__all__ = ("TagObject", )
-class TagObject(base.Object):
+class TagObject(base.Object, RepoAliasMixin):
"""Non-Lightweight tag carrying additional information about an object we are pointing to."""
- type = "tag"
+ type = ObjectType.tag
+ type_id = ObjectType.tag_id
+
__slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
- def __init__(self, repo, binsha, object=None, tag=None,
+ def __init__(self, odb, binsha, object=None, tag=None,
tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
"""Initialize a tag object with additional data
- :param repo: repository this object is located in
+ :param odb: repository this object is located in
:param binsha: 20 byte SHA1
:param object: Object instance of object we are pointing to
:param tag: name of this tag
@@ -32,7 +36,7 @@ class TagObject(base.Object):
it into a different format
:param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the
authored_date is in, in a format similar to time.altzone"""
- super(TagObject, self).__init__(repo, binsha )
+ super(TagObject, self).__init__(odb, binsha )
if object is not None:
self.object = object
if tag is not None:
@@ -49,12 +53,12 @@ class TagObject(base.Object):
def _set_cache_(self, attr):
"""Cache all our attributes at once"""
if attr in TagObject.__slots__:
- ostream = self.repo.odb.stream(self.binsha)
+ ostream = self.odb.stream(self.binsha)
lines = ostream.read().splitlines()
obj, hexsha = lines[0].split(" ") # object <hexsha>
type_token, type_name = lines[1].split(" ") # type <type_name>
- self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha))
+ self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha))
self.tag = lines[2][4:] # tag <tag name>
diff --git a/git/objects/tree.py b/git/objects/tree.py
index 67431686..31f2602d 100644
--- a/git/objects/tree.py
+++ b/git/objects/tree.py
@@ -3,21 +3,23 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import util
+from git.util import RepoAliasMixin
+import git.diff as diff
+from git.typ import ObjectType
from base import IndexObject
-from git.util import join_path
from blob import Blob
from submodule.base import Submodule
-import git.diff as diff
from fun import (
tree_entries_from_data,
tree_to_stream
)
-from gitdb.util import (
- to_bin_sha,
+from git.util import (
+ to_bin_sha,
+ join_path
)
+import util
__all__ = ("TreeModifier", "Tree")
@@ -100,7 +102,7 @@ class TreeModifier(object):
#} END mutators
-class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
+class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable, RepoAliasMixin):
"""Tree objects represent an ordered list of Blobs and other Trees.
``Tree as a list``::
@@ -112,7 +114,9 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
blob = tree[0]
"""
- type = "tree"
+ type = ObjectType.tree
+ type_id = ObjectType.tree_id
+
__slots__ = "_cache"
# actual integer ids for comparison
@@ -121,6 +125,9 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
symlink_id = 012
tree_id = 004
+ #{ Configuration
+
+ # override in subclass if you would like your own types to be instantiated instead
_map_id_to_type = {
commit_id : Submodule,
blob_id : Blob,
@@ -128,6 +135,8 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
# tree id added once Tree is defined
}
+ #} end configuration
+
def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
super(Tree, self).__init__(repo, binsha, mode, path)
@@ -141,7 +150,7 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable):
def _set_cache_(self, attr):
if attr == "_cache":
# Set the data when we need it
- ostream = self.repo.odb.stream(self.binsha)
+ ostream = self.odb.stream(self.binsha)
self._cache = tree_entries_from_data(ostream.read())
else:
super(Tree, self)._set_cache_(attr)
diff --git a/git/objects/util.py b/git/objects/util.py
index 4c9323b8..8ac590f2 100644
--- a/git/objects/util.py
+++ b/git/objects/util.py
@@ -20,6 +20,7 @@ __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date',
'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
'verify_utctz', 'Actor')
+
#{ Functions
def mode_str_to_int(modestr):
diff --git a/git/odict.py b/git/odict.py
index 2c8391d7..80f6965f 100644
--- a/git/odict.py
+++ b/git/odict.py
@@ -16,16 +16,11 @@
"""A dict that keeps keys in insertion order"""
from __future__ import generators
-
__author__ = ('Nicola Larosa <nico-NoSp@m-tekNico.net>,'
'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>')
-
__docformat__ = "restructuredtext en"
-
__revision__ = '$Id: odict.py 129 2005-09-12 18:15:28Z teknico $'
-
__version__ = '0.2.2'
-
__all__ = ['OrderedDict', 'SequenceOrderedDict']
import sys
diff --git a/git/pack.py b/git/pack.py
new file mode 100644
index 00000000..62e9ae03
--- /dev/null
+++ b/git/pack.py
@@ -0,0 +1,1005 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains PackIndexFile and PackFile implementations"""
+from git.exc import (
+ BadObject,
+ UnsupportedOperation,
+ ParseError
+ )
+from util import (
+ zlib,
+ LazyMixin,
+ unpack_from,
+ bin_to_hex,
+ file_contents_ro_filepath,
+ )
+
+from fun import (
+ create_pack_object_header,
+ pack_object_header_info,
+ is_equal_canonical_sha,
+ type_id_to_type_map,
+ write_object,
+ stream_copy,
+ chunk_size,
+ delta_types,
+ OFS_DELTA,
+ REF_DELTA,
+ msb_size
+ )
+
+try:
+ from _perf import PackIndexFile_sha_to_index
+except ImportError:
+ pass
+# END try c module
+
+from base import ( # Amazing !
+ OInfo,
+ OStream,
+ OPackInfo,
+ OPackStream,
+ ODeltaStream,
+ ODeltaPackInfo,
+ ODeltaPackStream,
+ )
+from stream import (
+ DecompressMemMapReader,
+ DeltaApplyReader,
+ Sha1Writer,
+ NullStream,
+ FlexibleSha1Writer
+ )
+
+from struct import (
+ pack,
+ unpack,
+ )
+
+from binascii import crc32
+
+from itertools import izip
+import tempfile
+import array
+import os
+import sys
+
+__all__ = ('PackIndexFile', 'PackFile', 'PackEntity')
+
+
+
+
+#{ Utilities
+
+def pack_object_at(data, offset, as_stream):
+ """
+ :return: Tuple(abs_data_offset, PackInfo|PackStream)
+ an object of the correct type according to the type_id of the object.
+ If as_stream is True, the object will contain a stream, allowing the
+ data to be read decompressed.
+ :param data: random accessable data containing all required information
+ :parma offset: offset in to the data at which the object information is located
+ :param as_stream: if True, a stream object will be returned that can read
+ the data, otherwise you receive an info object only"""
+ data = buffer(data, offset)
+ type_id, uncomp_size, data_rela_offset = pack_object_header_info(data)
+ total_rela_offset = None # set later, actual offset until data stream begins
+ delta_info = None
+
+ # OFFSET DELTA
+ if type_id == OFS_DELTA:
+ i = data_rela_offset
+ c = ord(data[i])
+ i += 1
+ delta_offset = c & 0x7f
+ while c & 0x80:
+ c = ord(data[i])
+ i += 1
+ delta_offset += 1
+ delta_offset = (delta_offset << 7) + (c & 0x7f)
+ # END character loop
+ delta_info = delta_offset
+ total_rela_offset = i
+ # REF DELTA
+ elif type_id == REF_DELTA:
+ total_rela_offset = data_rela_offset+20
+ delta_info = data[data_rela_offset:total_rela_offset]
+ # BASE OBJECT
+ else:
+ # assume its a base object
+ total_rela_offset = data_rela_offset
+ # END handle type id
+
+ abs_data_offset = offset + total_rela_offset
+ if as_stream:
+ stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size)
+ if delta_info is None:
+ return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream)
+ else:
+ return abs_data_offset, ODeltaPackStream(offset, type_id, uncomp_size, delta_info, stream)
+ else:
+ if delta_info is None:
+ return abs_data_offset, OPackInfo(offset, type_id, uncomp_size)
+ else:
+ return abs_data_offset, ODeltaPackInfo(offset, type_id, uncomp_size, delta_info)
+ # END handle info
+ # END handle stream
+
+def write_stream_to_pack(read, write, zstream, base_crc=None):
+ """Copy a stream as read from read function, zip it, and write the result.
+ Count the number of written bytes and return it
+ :param base_crc: if not None, the crc will be the base for all compressed data
+ we consecutively write and generate a crc32 from. If None, no crc will be generated
+ :return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if base_crc
+ was false"""
+ br = 0 # bytes read
+ bw = 0 # bytes written
+ want_crc = base_crc is not None
+ crc = 0
+ if want_crc:
+ crc = base_crc
+ #END initialize crc
+
+ while True:
+ chunk = read(chunk_size)
+ br += len(chunk)
+ compressed = zstream.compress(chunk)
+ bw += len(compressed)
+ write(compressed) # cannot assume return value
+
+ if want_crc:
+ crc = crc32(compressed, crc)
+ #END handle crc
+
+ if len(chunk) != chunk_size:
+ break
+ #END copy loop
+
+ compressed = zstream.flush()
+ bw += len(compressed)
+ write(compressed)
+ if want_crc:
+ crc = crc32(compressed, crc)
+ #END handle crc
+
+ return (br, bw, crc)
+
+
+#} END utilities
+
+
+class IndexWriter(object):
+ """Utility to cache index information, allowing to write all information later
+ in one go to the given stream
+ :note: currently only writes v2 indices"""
+ __slots__ = '_objs'
+
+ def __init__(self):
+ self._objs = list()
+
+ def append(self, binsha, crc, offset):
+ """Append one piece of object information"""
+ self._objs.append((binsha, crc, offset))
+
+ def write(self, pack_sha, write):
+ """Write the index file using the given write method
+ :param pack_sha: binary sha over the whole pack that we index
+ :return: sha1 binary sha over all index file contents"""
+ # sort for sha1 hash
+ self._objs.sort(key=lambda o: o[0])
+
+ sha_writer = FlexibleSha1Writer(write)
+ sha_write = sha_writer.write
+ sha_write(PackIndexFile.index_v2_signature)
+ sha_write(pack(">L", PackIndexFile.index_version_default))
+
+ # fanout
+ tmplist = list((0,)*256) # fanout or list with 64 bit offsets
+ for t in self._objs:
+ tmplist[ord(t[0][0])] += 1
+ #END prepare fanout
+ for i in xrange(255):
+ v = tmplist[i]
+ sha_write(pack('>L', v))
+ tmplist[i+1] += v
+ #END write each fanout entry
+ sha_write(pack('>L', tmplist[255]))
+
+ # sha1 ordered
+ # save calls, that is push them into c
+ sha_write(''.join(t[0] for t in self._objs))
+
+ # crc32
+ for t in self._objs:
+ sha_write(pack('>L', t[1]&0xffffffff))
+ #END for each crc
+
+ tmplist = list()
+ # offset 32
+ for t in self._objs:
+ ofs = t[2]
+ if ofs > 0x7fffffff:
+ tmplist.append(ofs)
+ ofs = 0x80000000 + len(tmplist)-1
+ #END hande 64 bit offsets
+ sha_write(pack('>L', ofs&0xffffffff))
+ #END for each offset
+
+ # offset 64
+ for ofs in tmplist:
+ sha_write(pack(">Q", ofs))
+ #END for each offset
+
+ # trailer
+ assert(len(pack_sha) == 20)
+ sha_write(pack_sha)
+ sha = sha_writer.sha(as_hex=False)
+ write(sha)
+ return sha
+
+
+
+class PackIndexFile(LazyMixin):
+ """A pack index provides offsets into the corresponding pack, allowing to find
+ locations for offsets faster."""
+
+ # Dont use slots as we dynamically bind functions for each version, need a dict for this
+ # The slots you see here are just to keep track of our instance variables
+ # __slots__ = ('_indexpath', '_fanout_table', '_data', '_version',
+ # '_sha_list_offset', '_crc_list_offset', '_pack_offset', '_pack_64_offset')
+
+ # used in v2 indices
+ _sha_list_offset = 8 + 1024
+ index_v2_signature = '\377tOc'
+ index_version_default = 2
+
+ def __init__(self, indexpath):
+ super(PackIndexFile, self).__init__()
+ self._indexpath = indexpath
+
+ def _set_cache_(self, attr):
+ if attr == "_packfile_checksum":
+ self._packfile_checksum = self._data[-40:-20]
+ elif attr == "_packfile_checksum":
+ self._packfile_checksum = self._data[-20:]
+ elif attr == "_data":
+ # Note: We don't lock the file when reading as we cannot be sure
+ # that we can actually write to the location - it could be a read-only
+ # alternate for instance
+ self._data = file_contents_ro_filepath(self._indexpath)
+ else:
+ # now its time to initialize everything - if we are here, someone wants
+ # to access the fanout table or related properties
+
+ # CHECK VERSION
+ self._version = (self._data[:4] == self.index_v2_signature and 2) or 1
+ if self._version == 2:
+ version_id = unpack_from(">L", self._data, 4)[0]
+ assert version_id == self._version, "Unsupported index version: %i" % version_id
+ # END assert version
+
+ # SETUP FUNCTIONS
+ # setup our functions according to the actual version
+ for fname in ('entry', 'offset', 'sha', 'crc'):
+ setattr(self, fname, getattr(self, "_%s_v%i" % (fname, self._version)))
+ # END for each function to initialize
+
+
+ # INITIALIZE DATA
+ # byte offset is 8 if version is 2, 0 otherwise
+ self._initialize()
+ # END handle attributes
+
+
+ #{ Access V1
+
+ def _entry_v1(self, i):
+ """:return: tuple(offset, binsha, 0)"""
+ return unpack_from(">L20s", self._data, 1024 + i*24) + (0, )
+
+ def _offset_v1(self, i):
+ """see ``_offset_v2``"""
+ return unpack_from(">L", self._data, 1024 + i*24)[0]
+
+ def _sha_v1(self, i):
+ """see ``_sha_v2``"""
+ base = 1024 + (i*24)+4
+ return self._data[base:base+20]
+
+ def _crc_v1(self, i):
+ """unsupported"""
+ return 0
+
+ #} END access V1
+
+ #{ Access V2
+ def _entry_v2(self, i):
+ """:return: tuple(offset, binsha, crc)"""
+ return (self._offset_v2(i), self._sha_v2(i), self._crc_v2(i))
+
+ def _offset_v2(self, i):
+ """:return: 32 or 64 byte offset into pack files. 64 byte offsets will only
+ be returned if the pack is larger than 4 GiB, or 2^32"""
+ offset = unpack_from(">L", self._data, self._pack_offset + i * 4)[0]
+
+ # if the high-bit is set, this indicates that we have to lookup the offset
+ # in the 64 bit region of the file. The current offset ( lower 31 bits )
+ # are the index into it
+ if offset & 0x80000000:
+ offset = unpack_from(">Q", self._data, self._pack_64_offset + (offset & ~0x80000000) * 8)[0]
+ # END handle 64 bit offset
+
+ return offset
+
+ def _sha_v2(self, i):
+ """:return: sha at the given index of this file index instance"""
+ base = self._sha_list_offset + i * 20
+ return self._data[base:base+20]
+
+ def _crc_v2(self, i):
+ """:return: 4 bytes crc for the object at index i"""
+ return unpack_from(">L", self._data, self._crc_list_offset + i * 4)[0]
+
+ #} END access V2
+
+ #{ Initialization
+
+ def _initialize(self):
+ """initialize base data"""
+ self._fanout_table = self._read_fanout((self._version == 2) * 8)
+
+ if self._version == 2:
+ self._crc_list_offset = self._sha_list_offset + self.size() * 20
+ self._pack_offset = self._crc_list_offset + self.size() * 4
+ self._pack_64_offset = self._pack_offset + self.size() * 4
+ # END setup base
+
+ def _read_fanout(self, byte_offset):
+ """Generate a fanout table from our data"""
+ d = self._data
+ out = list()
+ append = out.append
+ for i in range(256):
+ append(unpack_from('>L', d, byte_offset + i*4)[0])
+ # END for each entry
+ return out
+
+ #} END initialization
+
+ #{ Properties
+ def version(self):
+ return self._version
+
+ def size(self):
+ """:return: amount of objects referred to by this index"""
+ return self._fanout_table[255]
+
+ def path(self):
+ """:return: path to the packindexfile"""
+ return self._indexpath
+
+ def packfile_checksum(self):
+ """:return: 20 byte sha representing the sha1 hash of the pack file"""
+ return self._data[-40:-20]
+
+ def indexfile_checksum(self):
+ """:return: 20 byte sha representing the sha1 hash of this index file"""
+ return self._data[-20:]
+
+ def offsets(self):
+ """:return: sequence of all offsets in the order in which they were written
+ :note: return value can be random accessed, but may be immmutable"""
+ if self._version == 2:
+ # read stream to array, convert to tuple
+ a = array.array('I') # 4 byte unsigned int, long are 8 byte on 64 bit it appears
+ a.fromstring(buffer(self._data, self._pack_offset, self._pack_64_offset - self._pack_offset))
+
+ # networkbyteorder to something array likes more
+ if sys.byteorder == 'little':
+ a.byteswap()
+ return a
+ else:
+ return tuple(self.offset(index) for index in xrange(self.size()))
+ # END handle version
+
+ def sha_to_index(self, sha):
+ """
+ :return: index usable with the ``offset`` or ``entry`` method, or None
+ if the sha was not found in this pack index
+ :param sha: 20 byte sha to lookup"""
+ first_byte = ord(sha[0])
+ get_sha = self.sha
+ lo = 0 # lower index, the left bound of the bisection
+ if first_byte != 0:
+ lo = self._fanout_table[first_byte-1]
+ hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
+
+ # bisect until we have the sha
+ while lo < hi:
+ mid = (lo + hi) / 2
+ c = cmp(sha, get_sha(mid))
+ if c < 0:
+ hi = mid
+ elif not c:
+ return mid
+ else:
+ lo = mid + 1
+ # END handle midpoint
+ # END bisect
+ return None
+
+ def partial_sha_to_index(self, partial_bin_sha, canonical_length):
+ """
+ :return: index as in `sha_to_index` or None if the sha was not found in this
+ index file
+ :param partial_bin_sha: an at least two bytes of a partial binary sha
+ :param canonical_length: lenght of the original hexadecimal representation of the
+ given partial binary sha
+ :raise AmbiguousObjectName:"""
+ if len(partial_bin_sha) < 2:
+ raise ValueError("Require at least 2 bytes of partial sha")
+
+ first_byte = ord(partial_bin_sha[0])
+ get_sha = self.sha
+ lo = 0 # lower index, the left bound of the bisection
+ if first_byte != 0:
+ lo = self._fanout_table[first_byte-1]
+ hi = self._fanout_table[first_byte] # the upper, right bound of the bisection
+
+ # fill the partial to full 20 bytes
+ filled_sha = partial_bin_sha + '\0'*(20 - len(partial_bin_sha))
+
+ # find lowest
+ while lo < hi:
+ mid = (lo + hi) / 2
+ c = cmp(filled_sha, get_sha(mid))
+ if c < 0:
+ hi = mid
+ elif not c:
+ # perfect match
+ lo = mid
+ break
+ else:
+ lo = mid + 1
+ # END handle midpoint
+ # END bisect
+
+ if lo < self.size():
+ cur_sha = get_sha(lo)
+ if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha):
+ next_sha = None
+ if lo+1 < self.size():
+ next_sha = get_sha(lo+1)
+ if next_sha and next_sha == cur_sha:
+ raise AmbiguousObjectName(partial_bin_sha)
+ return lo
+ # END if we have a match
+ # END if we found something
+ return None
+
+ if 'PackIndexFile_sha_to_index' in globals():
+ # NOTE: Its just about 25% faster, the major bottleneck might be the attr
+ # accesses
+ def sha_to_index(self, sha):
+ return PackIndexFile_sha_to_index(self, sha)
+ # END redefine heavy-hitter with c version
+
+ #} END properties
+
+
+class PackFile(LazyMixin):
+ """A pack is a file written according to the Version 2 for git packs
+
+ As we currently use memory maps, it could be assumed that the maximum size of
+ packs therefor is 32 bit on 32 bit systems. On 64 bit systems, this should be
+ fine though.
+
+ :note: at some point, this might be implemented using streams as well, or
+ streams are an alternate path in the case memory maps cannot be created
+ for some reason - one clearly doesn't want to read 10GB at once in that
+ case"""
+
+ __slots__ = ('_packpath', '_data', '_size', '_version')
+ pack_signature = 0x5041434b # 'PACK'
+ pack_version_default = 2
+
+ # offset into our data at which the first object starts
+ first_object_offset = 3*4 # header bytes
+ footer_size = 20 # final sha
+
+ def __init__(self, packpath):
+ self._packpath = packpath
+
+ def _set_cache_(self, attr):
+ if attr == '_data':
+ self._data = file_contents_ro_filepath(self._packpath)
+
+ # read the header information
+ type_id, self._version, self._size = unpack_from(">LLL", self._data, 0)
+
+ # TODO: figure out whether we should better keep the lock, or maybe
+ # add a .keep file instead ?
+ else: # must be '_size' or '_version'
+ # read header info - we do that just with a file stream
+ type_id, self._version, self._size = unpack(">LLL", open(self._packpath).read(12))
+ # END handle header
+
+ if type_id != self.pack_signature:
+ raise ParseError("Invalid pack signature: %i" % type_id)
+ #END assert type id
+
+ def _iter_objects(self, start_offset, as_stream=True):
+ """Handle the actual iteration of objects within this pack"""
+ data = self._data
+ content_size = len(data) - self.footer_size
+ cur_offset = start_offset or self.first_object_offset
+
+ null = NullStream()
+ while cur_offset < content_size:
+ data_offset, ostream = pack_object_at(data, cur_offset, True)
+ # scrub the stream to the end - this decompresses the object, but yields
+ # the amount of compressed bytes we need to get to the next offset
+
+ stream_copy(ostream.read, null.write, ostream.size, chunk_size)
+ cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read()
+
+
+ # if a stream is requested, reset it beforehand
+ # Otherwise return the Stream object directly, its derived from the
+ # info object
+ if as_stream:
+ ostream.stream.seek(0)
+ yield ostream
+ # END until we have read everything
+
+ #{ Pack Information
+
+ def size(self):
+ """:return: The amount of objects stored in this pack"""
+ return self._size
+
+ def version(self):
+ """:return: the version of this pack"""
+ return self._version
+
+ def data(self):
+ """
+ :return: read-only data of this pack. It provides random access and usually
+ is a memory map"""
+ return self._data
+
+ def checksum(self):
+ """:return: 20 byte sha1 hash on all object sha's contained in this file"""
+ return self._data[-20:]
+
+ def path(self):
+ """:return: path to the packfile"""
+ return self._packpath
+ #} END pack information
+
+ #{ Pack Specific
+
+ def collect_streams(self, offset):
+ """
+ :return: list of pack streams which are required to build the object
+ at the given offset. The first entry of the list is the object at offset,
+ the last one is either a full object, or a REF_Delta stream. The latter
+ type needs its reference object to be locked up in an ODB to form a valid
+ delta chain.
+ If the object at offset is no delta, the size of the list is 1.
+ :param offset: specifies the first byte of the object within this pack"""
+ out = list()
+ while True:
+ ostream = pack_object_at(self._data, offset, True)[1]
+ out.append(ostream)
+ if ostream.type_id == OFS_DELTA:
+ offset = ostream.pack_offset - ostream.delta_info
+ else:
+ # the only thing we can lookup are OFFSET deltas. Everything
+ # else is either an object, or a ref delta, in the latter
+ # case someone else has to find it
+ break
+ # END handle type
+ # END while chaining streams
+ return out
+
+ #} END pack specific
+
+ #{ Read-Database like Interface
+
+ def info(self, offset):
+ """Retrieve information about the object at the given file-absolute offset
+
+ :param offset: byte offset
+ :return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
+ return pack_object_at(self._data, offset or self.first_object_offset, False)[1]
+
+ def stream(self, offset):
+ """Retrieve an object at the given file-relative offset as stream along with its information
+
+ :param offset: byte offset
+ :return: OPackStream instance, the actual type differs depending on the type_id attribute"""
+ return pack_object_at(self._data, offset or self.first_object_offset, True)[1]
+
+ def stream_iter(self, start_offset=0):
+ """
+ :return: iterator yielding OPackStream compatible instances, allowing
+ to access the data in the pack directly.
+ :param start_offset: offset to the first object to iterate. If 0, iteration
+ starts at the very first object in the pack.
+ :note: Iterating a pack directly is costly as the datastream has to be decompressed
+ to determine the bounds between the objects"""
+ return self._iter_objects(start_offset, as_stream=True)
+
+ #} END Read-Database like Interface
+
+
+class PackEntity(LazyMixin):
+ """Combines the PackIndexFile and the PackFile into one, allowing the
+ actual objects to be resolved and iterated"""
+
+ __slots__ = ( '_index', # our index file
+ '_pack', # our pack file
+ '_offset_map' # on demand dict mapping one offset to the next consecutive one
+ )
+
+ IndexFileCls = PackIndexFile
+ PackFileCls = PackFile
+
+ def __init__(self, pack_or_index_path):
+ """Initialize ourselves with the path to the respective pack or index file"""
+ basename, ext = os.path.splitext(pack_or_index_path)
+ self._index = self.IndexFileCls("%s.idx" % basename) # PackIndexFile instance
+ self._pack = self.PackFileCls("%s.pack" % basename) # corresponding PackFile instance
+
+ def _set_cache_(self, attr):
+ # currently this can only be _offset_map
+ # TODO: make this a simple sorted offset array which can be bisected
+ # to find the respective entry, from which we can take a +1 easily
+ # This might be slower, but should also be much lighter in memory !
+ offsets_sorted = sorted(self._index.offsets())
+ last_offset = len(self._pack.data()) - self._pack.footer_size
+ assert offsets_sorted, "Cannot handle empty indices"
+
+ offset_map = None
+ if len(offsets_sorted) == 1:
+ offset_map = { offsets_sorted[0] : last_offset }
+ else:
+ iter_offsets = iter(offsets_sorted)
+ iter_offsets_plus_one = iter(offsets_sorted)
+ iter_offsets_plus_one.next()
+ consecutive = izip(iter_offsets, iter_offsets_plus_one)
+
+ offset_map = dict(consecutive)
+
+ # the last offset is not yet set
+ offset_map[offsets_sorted[-1]] = last_offset
+ # END handle offset amount
+ self._offset_map = offset_map
+
+ def _sha_to_index(self, sha):
+ """:return: index for the given sha, or raise"""
+ index = self._index.sha_to_index(sha)
+ if index is None:
+ raise BadObject(sha)
+ return index
+
+ def _iter_objects(self, as_stream):
+ """Iterate over all objects in our index and yield their OInfo or OStream instences"""
+ _sha = self._index.sha
+ _object = self._object
+ for index in xrange(self._index.size()):
+ yield _object(_sha(index), as_stream, index)
+ # END for each index
+
+ def _object(self, sha, as_stream, index=-1):
+ """:return: OInfo or OStream object providing information about the given sha
+ :param index: if not -1, its assumed to be the sha's index in the IndexFile"""
+ # its a little bit redundant here, but it needs to be efficient
+ if index < 0:
+ index = self._sha_to_index(sha)
+ if sha is None:
+ sha = self._index.sha(index)
+ # END assure sha is present ( in output )
+ offset = self._index.offset(index)
+ type_id, uncomp_size, data_rela_offset = pack_object_header_info(buffer(self._pack._data, offset))
+ if as_stream:
+ if type_id not in delta_types:
+ packstream = self._pack.stream(offset)
+ return OStream(sha, packstream.type, packstream.size, packstream.stream)
+ # END handle non-deltas
+
+ # produce a delta stream containing all info
+ # To prevent it from applying the deltas when querying the size,
+ # we extract it from the delta stream ourselves
+ streams = self.collect_streams_at_offset(offset)
+ dstream = DeltaApplyReader.new(streams)
+
+ return ODeltaStream(sha, dstream.type, None, dstream)
+ else:
+ if type_id not in delta_types:
+ return OInfo(sha, type_id_to_type_map[type_id], uncomp_size)
+ # END handle non-deltas
+
+ # deltas are a little tougher - unpack the first bytes to obtain
+ # the actual target size, as opposed to the size of the delta data
+ streams = self.collect_streams_at_offset(offset)
+ buf = streams[0].read(512)
+ offset, src_size = msb_size(buf)
+ offset, target_size = msb_size(buf, offset)
+
+ # collect the streams to obtain the actual object type
+ if streams[-1].type_id in delta_types:
+ raise BadObject(sha, "Could not resolve delta object")
+ return OInfo(sha, streams[-1].type, target_size)
+ # END handle stream
+
+ #{ Read-Database like Interface
+
+ def info(self, sha):
+ """Retrieve information about the object identified by the given sha
+
+ :param sha: 20 byte sha1
+ :raise BadObject:
+ :return: OInfo instance, with 20 byte sha"""
+ return self._object(sha, False)
+
+ def stream(self, sha):
+ """Retrieve an object stream along with its information as identified by the given sha
+
+ :param sha: 20 byte sha1
+ :raise BadObject:
+ :return: OStream instance, with 20 byte sha"""
+ return self._object(sha, True)
+
+ def info_at_index(self, index):
+ """As ``info``, but uses a PackIndexFile compatible index to refer to the object"""
+ return self._object(None, False, index)
+
+ def stream_at_index(self, index):
+ """As ``stream``, but uses a PackIndexFile compatible index to refer to the
+ object"""
+ return self._object(None, True, index)
+
+ #} END Read-Database like Interface
+
+ #{ Interface
+
+ def pack(self):
+ """:return: the underlying pack file instance"""
+ return self._pack
+
+ def index(self):
+ """:return: the underlying pack index file instance"""
+ return self._index
+
+ def is_valid_stream(self, sha, use_crc=False):
+ """
+ Verify that the stream at the given sha is valid.
+
+ :param use_crc: if True, the index' crc is run over the compressed stream of
+ the object, which is much faster than checking the sha1. It is also
+ more prone to unnoticed corruption or manipulation.
+ :param sha: 20 byte sha1 of the object whose stream to verify
+ whether the compressed stream of the object is valid. If it is
+ a delta, this only verifies that the delta's data is valid, not the
+ data of the actual undeltified object, as it depends on more than
+ just this stream.
+ If False, the object will be decompressed and the sha generated. It must
+ match the given sha
+
+ :return: True if the stream is valid
+ :raise UnsupportedOperation: If the index is version 1 only
+ :raise BadObject: sha was not found"""
+ if use_crc:
+ if self._index.version() < 2:
+ raise UnsupportedOperation("Version 1 indices do not contain crc's, verify by sha instead")
+ # END handle index version
+
+ index = self._sha_to_index(sha)
+ offset = self._index.offset(index)
+ next_offset = self._offset_map[offset]
+ crc_value = self._index.crc(index)
+
+ # create the current crc value, on the compressed object data
+ # Read it in chunks, without copying the data
+ crc_update = zlib.crc32
+ pack_data = self._pack.data()
+ cur_pos = offset
+ this_crc_value = 0
+ while cur_pos < next_offset:
+ rbound = min(cur_pos + chunk_size, next_offset)
+ size = rbound - cur_pos
+ this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value)
+ cur_pos += size
+ # END window size loop
+
+ # crc returns signed 32 bit numbers, the AND op forces it into unsigned
+ # mode ... wow, sneaky, from dulwich.
+ return (this_crc_value & 0xffffffff) == crc_value
+ else:
+ shawriter = Sha1Writer()
+ stream = self._object(sha, as_stream=True)
+ # write a loose object, which is the basis for the sha
+ write_object(stream.type, stream.size, stream.read, shawriter.write)
+
+ assert shawriter.sha(as_hex=False) == sha
+ return shawriter.sha(as_hex=False) == sha
+ # END handle crc/sha verification
+ return True
+
+ def info_iter(self):
+ """
+ :return: Iterator over all objects in this pack. The iterator yields
+ OInfo instances"""
+ return self._iter_objects(as_stream=False)
+
+ def stream_iter(self):
+ """
+ :return: iterator over all objects in this pack. The iterator yields
+ OStream instances"""
+ return self._iter_objects(as_stream=True)
+
+ def collect_streams_at_offset(self, offset):
+ """
+ As the version in the PackFile, but can resolve REF deltas within this pack
+ For more info, see ``collect_streams``
+
+ :param offset: offset into the pack file at which the object can be found"""
+ streams = self._pack.collect_streams(offset)
+
+ # try to resolve the last one if needed. It is assumed to be either
+ # a REF delta, or a base object, as OFFSET deltas are resolved by the pack
+ if streams[-1].type_id == REF_DELTA:
+ stream = streams[-1]
+ while stream.type_id in delta_types:
+ if stream.type_id == REF_DELTA:
+ sindex = self._index.sha_to_index(stream.delta_info)
+ if sindex is None:
+ break
+ stream = self._pack.stream(self._index.offset(sindex))
+ streams.append(stream)
+ else:
+ # must be another OFS DELTA - this could happen if a REF
+ # delta we resolve previously points to an OFS delta. Who
+ # would do that ;) ? We can handle it though
+ stream = self._pack.stream(stream.delta_info)
+ streams.append(stream)
+ # END handle ref delta
+ # END resolve ref streams
+ # END resolve streams
+
+ return streams
+
+ def collect_streams(self, sha):
+ """
+ As ``PackFile.collect_streams``, but takes a sha instead of an offset.
+ Additionally, ref_delta streams will be resolved within this pack.
+ If this is not possible, the stream will be left alone, hence it is adivsed
+ to check for unresolved ref-deltas and resolve them before attempting to
+ construct a delta stream.
+
+ :param sha: 20 byte sha1 specifying the object whose related streams you want to collect
+ :return: list of streams, first being the actual object delta, the last being
+ a possibly unresolved base object.
+ :raise BadObject:"""
+ return self.collect_streams_at_offset(self._index.offset(self._sha_to_index(sha)))
+
+
+ @classmethod
+ def write_pack(cls, object_iter, pack_write, index_write=None,
+ object_count = None, zlib_compression = zlib.Z_BEST_SPEED):
+ """
+ Create a new pack by putting all objects obtained by the object_iterator
+ into a pack which is written using the pack_write method.
+ The respective index is produced as well if index_write is not Non.
+
+ :param object_iter: iterator yielding odb output objects
+ :param pack_write: function to receive strings to write into the pack stream
+ :param indx_write: if not None, the function writes the index file corresponding
+ to the pack.
+ :param object_count: if you can provide the amount of objects in your iteration,
+ this would be the place to put it. Otherwise we have to pre-iterate and store
+ all items into a list to get the number, which uses more memory than necessary.
+ :param zlib_compression: the zlib compression level to use
+ :return: tuple(pack_sha, index_binsha) binary sha over all the contents of the pack
+ and over all contents of the index. If index_write was None, index_binsha will be None
+ :note: The destination of the write functions is up to the user. It could
+ be a socket, or a file for instance
+ :note: writes only undeltified objects"""
+ objs = object_iter
+ if not object_count:
+ if not isinstance(object_iter, (tuple, list)):
+ objs = list(object_iter)
+ #END handle list type
+ object_count = len(objs)
+ #END handle object
+
+ pack_writer = FlexibleSha1Writer(pack_write)
+ pwrite = pack_writer.write
+ ofs = 0 # current offset into the pack file
+ index = None
+ wants_index = index_write is not None
+
+ # write header
+ pwrite(pack('>LLL', PackFile.pack_signature, PackFile.pack_version_default, object_count))
+ ofs += 12
+
+ if wants_index:
+ index = IndexWriter()
+ #END handle index header
+
+ actual_count = 0
+ for obj in objs:
+ actual_count += 1
+ crc = 0
+
+ # object header
+ hdr = create_pack_object_header(obj.type_id, obj.size)
+ if index_write:
+ crc = crc32(hdr)
+ else:
+ crc = None
+ #END handle crc
+ pwrite(hdr)
+
+ # data stream
+ zstream = zlib.compressobj(zlib_compression)
+ ostream = obj.stream
+ br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc = crc)
+ assert(br == obj.size)
+ if wants_index:
+ index.append(obj.binsha, crc, ofs)
+ #END handle index
+
+ ofs += len(hdr) + bw
+ if actual_count == object_count:
+ break
+ #END abort once we are done
+ #END for each object
+
+ if actual_count != object_count:
+ raise ValueError("Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count))
+ #END count assertion
+
+ # write footer
+ pack_sha = pack_writer.sha(as_hex = False)
+ assert len(pack_sha) == 20
+ pack_write(pack_sha)
+ ofs += len(pack_sha) # just for completeness ;)
+
+ index_sha = None
+ if wants_index:
+ index_sha = index.write(pack_sha, index_write)
+ #END handle index
+
+ return pack_sha, index_sha
+
+ @classmethod
+ def create(cls, object_iter, base_dir, object_count = None, zlib_compression = zlib.Z_BEST_SPEED):
+ """Create a new on-disk entity comprised of a properly named pack file and a properly named
+ and corresponding index file. The pack contains all OStream objects contained in object iter.
+ :param base_dir: directory which is to contain the files
+ :return: PackEntity instance initialized with the new pack
+ :note: for more information on the other parameters see the write_pack method"""
+ pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir)
+ index_fd, index_path = tempfile.mkstemp('', 'index', base_dir)
+ pack_write = lambda d: os.write(pack_fd, d)
+ index_write = lambda d: os.write(index_fd, d)
+
+ pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression)
+ os.close(pack_fd)
+ os.close(index_fd)
+
+ fmt = "pack-%s.%s"
+ new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack'))
+ new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx'))
+ os.rename(pack_path, new_pack_path)
+ os.rename(index_path, new_index_path)
+
+ return cls(new_pack_path)
+
+
+ #} END interface
diff --git a/git/refs/__init__.py b/git/refs/__init__.py
index fc8ce644..35b69fca 100644
--- a/git/refs/__init__.py
+++ b/git/refs/__init__.py
@@ -2,19 +2,20 @@
# import all modules in order, fix the names they require
from symbolic import *
from reference import *
+from headref import *
from head import *
from tag import *
from remote import *
# name fixes
-import head
-head.RemoteReference = RemoteReference
-del(head)
+import headref
+headref.Head.RemoteReferenceCls = RemoteReference
+del(headref)
import symbolic
-for item in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference):
- setattr(symbolic, item.__name__, item)
+for item in (HEAD, Head, RemoteReference, TagReference, Reference):
+ setattr(symbolic.SymbolicReference, item.__name__+'Cls', item)
del(symbolic)
diff --git a/git/refs/head.py b/git/refs/head.py
index d8729434..4345528b 100644
--- a/git/refs/head.py
+++ b/git/refs/head.py
@@ -1,19 +1,14 @@
-from symbolic import SymbolicReference
-from reference import Reference
-
-from git.config import SectionConstraint
-
-from git.util import join_path
+from symbolic import SymbolicReference
from git.exc import GitCommandError
-__all__ = ["HEAD", "Head"]
-
+__all__ = ["HEAD"]
class HEAD(SymbolicReference):
- """Special case of a Symbolic Reference as it represents the repository's
- HEAD reference."""
+ """Provides additional functionality using the git command"""
+ __slots__ = tuple()
+
_HEAD_NAME = 'HEAD'
_ORIG_HEAD_NAME = 'ORIG_HEAD'
__slots__ = tuple()
@@ -90,157 +85,3 @@ class HEAD(SymbolicReference):
return self
-
-class Head(Reference):
- """A Head is a named reference to a Commit. Every Head instance contains a name
- and a Commit object.
-
- Examples::
-
- >>> repo = Repo("/path/to/repo")
- >>> head = repo.heads[0]
-
- >>> head.name
- 'master'
-
- >>> head.commit
- <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455">
-
- >>> head.commit.hexsha
- '1c09f116cbc2cb4100fb6935bb162daa4723f455'"""
- _common_path_default = "refs/heads"
- k_config_remote = "remote"
- k_config_remote_ref = "merge" # branch to merge from remote
-
- @classmethod
- def delete(cls, repo, *heads, **kwargs):
- """Delete the given heads
- :param force:
- If True, the heads will be deleted even if they are not yet merged into
- the main development stream.
- Default False"""
- force = kwargs.get("force", False)
- flag = "-d"
- if force:
- flag = "-D"
- repo.git.branch(flag, *heads)
-
- def set_tracking_branch(self, remote_reference):
- """
- Configure this branch to track the given remote reference. This will alter
- this branch's configuration accordingly.
-
- :param remote_reference: The remote reference to track or None to untrack
- any references
- :return: self"""
- if remote_reference is not None and not isinstance(remote_reference, RemoteReference):
- raise ValueError("Incorrect parameter type: %r" % remote_reference)
- # END handle type
-
- writer = self.config_writer()
- if remote_reference is None:
- writer.remove_option(self.k_config_remote)
- writer.remove_option(self.k_config_remote_ref)
- if len(writer.options()) == 0:
- writer.remove_section()
- # END handle remove section
- else:
- writer.set_value(self.k_config_remote, remote_reference.remote_name)
- writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head))
- # END handle ref value
-
- return self
-
-
- def tracking_branch(self):
- """
- :return: The remote_reference we are tracking, or None if we are
- not a tracking branch"""
- reader = self.config_reader()
- if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref):
- ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref)))
- remote_refpath = RemoteReference.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name))
- return RemoteReference(self.repo, remote_refpath)
- # END handle have tracking branch
-
- # we are not a tracking branch
- return None
-
- def rename(self, new_path, force=False):
- """Rename self to a new path
-
- :param new_path:
- Either a simple name or a path, i.e. new_name or features/new_name.
- The prefix refs/heads is implied
-
- :param force:
- If True, the rename will succeed even if a head with the target name
- already exists.
-
- :return: self
- :note: respects the ref log as git commands are used"""
- flag = "-m"
- if force:
- flag = "-M"
-
- self.repo.git.branch(flag, self, new_path)
- self.path = "%s/%s" % (self._common_path_default, new_path)
- return self
-
- def checkout(self, force=False, **kwargs):
- """Checkout this head by setting the HEAD to this reference, by updating the index
- to reflect the tree we point to and by updating the working tree to reflect
- the latest index.
-
- The command will fail if changed working tree files would be overwritten.
-
- :param force:
- If True, changes to the index and the working tree will be discarded.
- If False, GitCommandError will be raised in that situation.
-
- :param kwargs:
- Additional keyword arguments to be passed to git checkout, i.e.
- b='new_branch' to create a new branch at the given spot.
-
- :return:
- The active branch after the checkout operation, usually self unless
- a new branch has been created.
-
- :note:
- By default it is only allowed to checkout heads - everything else
- will leave the HEAD detached which is allowed and possible, but remains
- a special state that some tools might not be able to handle."""
- args = list()
- kwargs['f'] = force
- if kwargs['f'] == False:
- kwargs.pop('f')
-
- self.repo.git.checkout(self, **kwargs)
- return self.repo.active_branch
-
- #{ Configruation
-
- def _config_parser(self, read_only):
- if read_only:
- parser = self.repo.config_reader()
- else:
- parser = self.repo.config_writer()
- # END handle parser instance
-
- return SectionConstraint(parser, 'branch "%s"' % self.name)
-
- def config_reader(self):
- """
- :return: A configuration parser instance constrained to only read
- this instance's values"""
- return self._config_parser(read_only=True)
-
- def config_writer(self):
- """
- :return: A configuration writer instance with read-and write acccess
- to options of this head"""
- return self._config_parser(read_only=False)
-
- #} END configuration
-
-
diff --git a/git/refs/headref.py b/git/refs/headref.py
new file mode 100644
index 00000000..67117e96
--- /dev/null
+++ b/git/refs/headref.py
@@ -0,0 +1,170 @@
+from reference import Reference
+from git.config import SectionConstraint
+from git.util import join_path
+
+__all__ = ["Head"]
+
+class Head(Reference):
+ """The GitPyhton Head implementation provides more git-command based features
+
+ A Head is a named reference to a Commit. Every Head instance contains a name
+ and a Commit object.
+
+ Examples::
+
+ >>> repo = Repo("/path/to/repo")
+ >>> head = repo.heads[0]
+
+ >>> head.name
+ 'master'
+
+ >>> head.commit
+ <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455">
+
+ >>> head.commit.hexsha
+ '1c09f116cbc2cb4100fb6935bb162daa4723f455'"""
+ __slots__ = tuple()
+
+ _common_path_default = "refs/heads"
+ k_config_remote = "remote"
+ k_config_remote_ref = "merge" # branch to merge from remote
+
+ # will be set by init method !
+ RemoteReferenceCls = None
+
+ #{ Configuration
+
+ def set_tracking_branch(self, remote_reference):
+ """
+ Configure this branch to track the given remote reference. This will alter
+ this branch's configuration accordingly.
+
+ :param remote_reference: The remote reference to track or None to untrack
+ any references
+ :return: self"""
+ if remote_reference is not None and not isinstance(remote_reference, self.RemoteReferenceCls):
+ raise ValueError("Incorrect parameter type: %r" % remote_reference)
+ # END handle type
+
+ writer = self.config_writer()
+ if remote_reference is None:
+ writer.remove_option(self.k_config_remote)
+ writer.remove_option(self.k_config_remote_ref)
+ if len(writer.options()) == 0:
+ writer.remove_section()
+ # END handle remove section
+ else:
+ writer.set_value(self.k_config_remote, remote_reference.remote_name)
+ writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head))
+ # END handle ref value
+
+ return self
+
+ def tracking_branch(self):
+ """
+ :return: The remote_reference we are tracking, or None if we are
+ not a tracking branch"""
+ reader = self.config_reader()
+ if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref):
+ ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref)))
+ remote_refpath = self.RemoteReferenceCls.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name))
+ return self.RemoteReferenceCls(self.repo, remote_refpath)
+ # END handle have tracking branch
+
+ # we are not a tracking branch
+ return None
+
+
+ #{ Configruation
+
+ def _config_parser(self, read_only):
+ if read_only:
+ parser = self.repo.config_reader()
+ else:
+ parser = self.repo.config_writer()
+ # END handle parser instance
+
+ return SectionConstraint(parser, 'branch "%s"' % self.name)
+
+ def config_reader(self):
+ """
+ :return: A configuration parser instance constrained to only read
+ this instance's values"""
+ return self._config_parser(read_only=True)
+
+ def config_writer(self):
+ """
+ :return: A configuration writer instance with read-and write acccess
+ to options of this head"""
+ return self._config_parser(read_only=False)
+
+ #} END configuration
+
+ @classmethod
+ def delete(cls, repo, *heads, **kwargs):
+ """Delete the given heads
+ :param force:
+ If True, the heads will be deleted even if they are not yet merged into
+ the main development stream.
+ Default False"""
+ force = kwargs.get("force", False)
+ flag = "-d"
+ if force:
+ flag = "-D"
+ repo.git.branch(flag, *heads)
+
+
+ def rename(self, new_path, force=False):
+ """Rename self to a new path
+
+ :param new_path:
+ Either a simple name or a path, i.e. new_name or features/new_name.
+ The prefix refs/heads is implied
+
+ :param force:
+ If True, the rename will succeed even if a head with the target name
+ already exists.
+
+ :return: self
+ :note: respects the ref log as git commands are used"""
+ flag = "-m"
+ if force:
+ flag = "-M"
+
+ self.repo.git.branch(flag, self, new_path)
+ self.path = "%s/%s" % (self._common_path_default, new_path)
+ return self
+
+ def checkout(self, force=False, **kwargs):
+ """Checkout this head by setting the HEAD to this reference, by updating the index
+ to reflect the tree we point to and by updating the working tree to reflect
+ the latest index.
+
+ The command will fail if changed working tree files would be overwritten.
+
+ :param force:
+ If True, changes to the index and the working tree will be discarded.
+ If False, GitCommandError will be raised in that situation.
+
+ :param kwargs:
+ Additional keyword arguments to be passed to git checkout, i.e.
+ b='new_branch' to create a new branch at the given spot.
+
+ :return:
+ The active branch after the checkout operation, usually self unless
+ a new branch has been created.
+
+ :note:
+ By default it is only allowed to checkout heads - everything else
+ will leave the HEAD detached which is allowed and possible, but remains
+ a special state that some tools might not be able to handle."""
+ args = list()
+ kwargs['f'] = force
+ if kwargs['f'] == False:
+ kwargs.pop('f')
+
+ self.repo.git.checkout(self, **kwargs)
+ return self.repo.active_branch
+
+
+
diff --git a/git/refs/log.py b/git/refs/log.py
index f49c07fd..3b9d8514 100644
--- a/git/refs/log.py
+++ b/git/refs/log.py
@@ -5,12 +5,9 @@ from git.util import (
LockFile,
assure_directory_exists,
to_native_path,
- )
-
-from gitdb.util import (
bin_to_hex,
join,
- file_contents_ro_filepath,
+ file_contents_ro_filepath
)
from git.objects.util import (
diff --git a/git/refs/reference.py b/git/refs/reference.py
index 1a745ee9..5cff74bb 100644
--- a/git/refs/reference.py
+++ b/git/refs/reference.py
@@ -1,12 +1,10 @@
-from symbolic import SymbolicReference
import os
-from git.objects import Object
-from git.util import (
- LazyMixin,
- Iterable,
- )
-from gitdb.util import (
+from symbolic import SymbolicReference
+from head import HEAD
+from git.util import (
+ LazyMixin,
+ Iterable,
isfile,
hex_to_bin
)
@@ -30,7 +28,7 @@ class Reference(SymbolicReference, LazyMixin, Iterable):
Path relative to the .git/ directory pointing to the ref in question, i.e.
refs/heads/master"""
if not path.startswith(self._common_path_default+'/'):
- raise ValueError("Cannot instantiate %r from path %s" % ( self.__class__.__name__, path ))
+ raise ValueError("Cannot instantiate %r from path %s, maybe use %s.to_full_path(name) to safely generate a valid full path from a name" % ( self.__class__.__name__, path, type(self).__name__))
super(Reference, self).__init__(repo, path)
@@ -40,8 +38,8 @@ class Reference(SymbolicReference, LazyMixin, Iterable):
def set_object(self, object, logmsg = None):
"""Special version which checks if the head-log needs an update as well"""
oldbinsha = None
+ head = HEAD(self.repo)
if logmsg is not None:
- head = self.repo.head
if not head.is_detached and head.ref == self:
oldbinsha = self.commit.binsha
#END handle commit retrieval
@@ -62,7 +60,7 @@ class Reference(SymbolicReference, LazyMixin, Iterable):
# * check with HEAD only which should cover 99% of all usage
# * scenarios (even 100% of the default ones).
# */
- self.repo.head.log_append(oldbinsha, logmsg)
+ head.log_append(oldbinsha, logmsg)
#END check if the head
# NOTE: Don't have to overwrite properties as the will only work without a the log
diff --git a/git/refs/remote.py b/git/refs/remote.py
index b7b07d4b..f2dc72ee 100644
--- a/git/refs/remote.py
+++ b/git/refs/remote.py
@@ -1,15 +1,17 @@
-from head import Head
-from git.util import join_path
-from gitdb.util import join
-
import os
-
+from headref import Head
+from git.util import (
+ join,
+ join_path
+ )
__all__ = ["RemoteReference"]
class RemoteReference(Head):
"""Represents a reference pointing to a remote head."""
+ __slots__ = tuple()
+
_common_path_default = "refs/remotes"
@@ -41,6 +43,11 @@ class RemoteReference(Head):
return '/'.join(tokens[3:])
@classmethod
+ def create(cls, *args, **kwargs):
+ """Used to disable this method"""
+ raise TypeError("Cannot explicitly create remote references")
+
+ @classmethod
def delete(cls, repo, *refs, **kwargs):
"""Delete the given remote references.
:note:
@@ -56,8 +63,3 @@ class RemoteReference(Head):
except OSError:
pass
# END for each ref
-
- @classmethod
- def create(cls, *args, **kwargs):
- """Used to disable this method"""
- raise TypeError("Cannot explicitly create remote references")
diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py
index aec68750..ddee3809 100644
--- a/git/refs/symbolic.py
+++ b/git/refs/symbolic.py
@@ -1,24 +1,26 @@
import os
-from git.objects import Object, Commit
+import re
+
+from git.objects import (
+ Object,
+ Commit
+ )
from git.util import (
join_path,
join_path_native,
to_native_path_linux,
- assure_directory_exists
+ assure_directory_exists,
+ join,
+ dirname,
+ isdir,
+ exists,
+ isfile,
+ rename,
+ hex_to_bin,
+ LockedFD
)
-from gitdb.exc import BadObject
-from gitdb.util import (
- join,
- dirname,
- isdir,
- exists,
- isfile,
- rename,
- hex_to_bin,
- LockedFD
- )
-
+from git.exc import BadObject
from log import RefLog
__all__ = ["SymbolicReference"]
@@ -30,11 +32,27 @@ class SymbolicReference(object):
A typical example for a symbolic reference is HEAD."""
__slots__ = ("repo", "path")
+
_resolve_ref_on_create = False
_points_to_commits_only = True
_common_path_default = ""
_id_attribute_ = "name"
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+
+ #{ Configuration
+ # Object class to be used when instantiating objects
+ ObjectCls = Object
+ CommitCls = Commit
+
+ # all of the following are set by the package initializer
+ HEADCls = None
+ HeadCls = None
+ RemoteReferenceCls = None
+ TagReferenceCls = None
+ ReferenceCls = None
+ #}END configuration
+
def __init__(self, repo, path):
self.repo = repo
self.path = path
@@ -143,20 +161,53 @@ class SymbolicReference(object):
return (None, tokens[1])
# its a commit
- if repo.re_hexsha_only.match(tokens[0]):
+ if cls.re_hexsha_only.match(tokens[0]):
return (tokens[0], None)
raise ValueError("Failed to parse reference information from %r" % ref_path)
- def _get_object(self):
+ def _get_object_sha(self):
"""
:return:
- The object our ref currently refers to. Refs can be cached, they will
+ The binary sha to the object our ref currently refers to. Refs can be cached, they will
always point to the actual object as it gets re-created on each query"""
+ return hex_to_bin(self.dereference_recursive(self.repo, self.path))
+
+ def _get_object(self):
+ """
+ :return:
+ The object our ref currently refers to."""
# have to be dynamic here as we may be a tag which can point to anything
# Our path will be resolved to the hexsha which will be used accordingly
- return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path)))
+ return self.ObjectCls.new_from_sha(self.repo, self._get_object_sha())
+ def set_object(self, object_id, logmsg = None):
+ """Set the object we point to, possibly dereference our symbolic reference first.
+ If the reference does not exist, it will be created
+
+ :param object: a reference specifier string, a SymbolicReference or an object hex sha.
+ SymbolicReferences will be dereferenced beforehand to obtain the object they point to
+ :param logmsg: If not None, the message will be used in the reflog entry to be
+ written. Otherwise the reflog is not altered
+ :note: plain SymbolicReferences may not actually point to objects by convention
+ :return: self"""
+ if isinstance(object_id, SymbolicReference):
+ object = object.object
+ #END resolve references
+
+ is_detached = True
+ try:
+ is_detached = self.is_detached
+ except ValueError:
+ pass
+ # END handle non-existing ones
+
+ if is_detached:
+ return self.set_reference(object_id, logmsg)
+
+ # set the commit on our reference
+ return self._get_reference().set_object(object_id, logmsg)
+
def _get_commit(self):
"""
:return:
@@ -167,7 +218,7 @@ class SymbolicReference(object):
obj = obj.object
#END dereference tag
- if obj.type != Commit.type:
+ if obj.type != self.CommitCls.type:
raise TypeError("Symbolic Reference pointed to object %r, commit was required" % obj)
#END handle type
return obj
@@ -179,20 +230,20 @@ class SymbolicReference(object):
a commit
:return: self"""
# check the type - assume the best if it is a base-string
- invalid_type = False
- if isinstance(commit, Object):
- invalid_type = commit.type != Commit.type
+ is_invalid_type = False
+ if isinstance(commit, self.ObjectCls):
+ is_invalid_type = commit.type != self.CommitCls.type
elif isinstance(commit, SymbolicReference):
- invalid_type = commit.object.type != Commit.type
+ is_invalid_type = commit.object.type != self.CommitCls.type
else:
try:
- invalid_type = self.repo.rev_parse(commit).type != Commit.type
+ is_invalid_type = self.repo.resolve_object(commit).type != self.CommitCls.type
except BadObject:
raise ValueError("Invalid object: %s" % commit)
#END handle exception
# END verify type
- if invalid_type:
+ if is_invalid_type:
raise ValueError("Need commit, got %r" % commit)
#END handle raise
@@ -202,35 +253,9 @@ class SymbolicReference(object):
return self
- def set_object(self, object, logmsg = None):
- """Set the object we point to, possibly dereference our symbolic reference first.
- If the reference does not exist, it will be created
-
- :param object: a refspec, a SymbolicReference or an Object instance. SymbolicReferences
- will be dereferenced beforehand to obtain the object they point to
- :param logmsg: If not None, the message will be used in the reflog entry to be
- written. Otherwise the reflog is not altered
- :note: plain SymbolicReferences may not actually point to objects by convention
- :return: self"""
- if isinstance(object, SymbolicReference):
- object = object.object
- #END resolve references
-
- is_detached = True
- try:
- is_detached = self.is_detached
- except ValueError:
- pass
- # END handle non-existing ones
-
- if is_detached:
- return self.set_reference(object, logmsg)
-
- # set the commit on our reference
- return self._get_reference().set_object(object, logmsg)
-
commit = property(_get_commit, set_commit, doc="Query or set commits directly")
object = property(_get_object, set_object, doc="Return the object our ref currently refers to")
+ object_binsha = property(_get_object_sha, set_object, doc="Return the object our ref currently refers to")
def _get_reference(self):
""":return: Reference Object we point to
@@ -247,7 +272,7 @@ class SymbolicReference(object):
will be set which effectively detaches the refererence if it was a purely
symbolic one.
- :param ref: SymbolicReference instance, Object instance or refspec string
+ :param ref: SymbolicReference instance, hexadecimal sha string or refspec string
Only if the ref is a SymbolicRef instance, we will point to it. Everthiny
else is dereferenced to obtain the actual object.
:param logmsg: If set to a string, the message will be used in the reflog.
@@ -263,12 +288,12 @@ class SymbolicReference(object):
obj = None
if isinstance(ref, SymbolicReference):
write_value = "ref: %s" % ref.path
- elif isinstance(ref, Object):
+ elif isinstance(ref, self.ObjectCls):
obj = ref
write_value = ref.hexsha
elif isinstance(ref, basestring):
try:
- obj = self.repo.rev_parse(ref+"^{}") # optionally deref tags
+ obj = self.repo.resolve_object(ref+"^{}") # optionally deref tags
write_value = obj.hexsha
except BadObject:
raise ValueError("Could not extract object from %s" % ref)
@@ -318,7 +343,7 @@ class SymbolicReference(object):
a valid object or reference."""
try:
self.object
- except (OSError, ValueError):
+ except (OSError, ValueError, BadObject):
return False
else:
return True
@@ -449,7 +474,16 @@ class SymbolicReference(object):
# figure out target data
target = reference
if resolve:
- target = repo.rev_parse(str(reference))
+ # could just use the resolve method, but it could be expensive
+ # so we handle most common cases ourselves
+ if isinstance(reference, cls.ObjectCls):
+ target = reference.hexsha
+ elif isinstance(reference, SymbolicReference):
+ target = reference.object.hexsha
+ else:
+ target = repo.resolve_object(str(reference))
+ #END handle resoltion
+ #END need resolution
if not force and isfile(abs_ref_path):
target_data = str(target)
@@ -579,7 +613,7 @@ class SymbolicReference(object):
def iter_items(cls, repo, common_path = None):
"""Find all refs in the repository
- :param repo: is the Repo
+ :param repo: is the repo
:param common_path:
Optional keyword argument to the path which is to be shared by all
@@ -588,12 +622,12 @@ class SymbolicReference(object):
refs suitable for the actual class are returned.
:return:
- git.SymbolicReference[], each of them is guaranteed to be a symbolic
- ref which is not detached.
+ git.SymbolicReference[], each of them is guaranteed to be a *only* a symbolic
+ ref, or a derived class which is not detached
List is lexigraphically sorted
The returned objects represent actual subclasses, such as Head or TagReference"""
- return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached )
+ return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == cls or not r.is_detached )
@classmethod
def from_path(cls, repo, path):
@@ -606,7 +640,7 @@ class SymbolicReference(object):
if not path:
raise ValueError("Cannot create Reference from %r" % path)
- for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference):
+ for ref_type in (cls.HEADCls, cls.HeadCls, cls.RemoteReferenceCls, cls.TagReferenceCls, cls.ReferenceCls, cls):
try:
instance = ref_type(repo, path)
if instance.__class__ == SymbolicReference and instance.is_detached:
diff --git a/git/refs/tag.py b/git/refs/tag.py
index c09d814d..3a1433be 100644
--- a/git/refs/tag.py
+++ b/git/refs/tag.py
@@ -2,8 +2,6 @@ from reference import Reference
__all__ = ["TagReference", "Tag"]
-
-
class TagReference(Reference):
"""Class representing a lightweight tag reference which either points to a commit
,a tag object or any other object. In the latter case additional information,
@@ -16,7 +14,6 @@ class TagReference(Reference):
print tagref.commit.message
if tagref.tag is not None:
print tagref.tag.message"""
-
__slots__ = tuple()
_common_path_default = "refs/tags"
@@ -45,7 +42,7 @@ class TagReference(Reference):
# make object read-only
# It should be reasonably hard to adjust an existing tag
object = property(Reference._get_object)
-
+
@classmethod
def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs):
"""Create a new tag reference.
@@ -85,7 +82,5 @@ class TagReference(Reference):
"""Delete the given existing tag or tags"""
repo.git.tag("-d", *tags)
-
-
# provide an alias
Tag = TagReference
diff --git a/git/remote.py b/git/remote.py
index d3639f7b..47adedbf 100644
--- a/git/remote.py
+++ b/git/remote.py
@@ -13,255 +13,27 @@ from config import SectionConstraint
from git.util import (
LazyMixin,
Iterable,
- IterableList,
- RemoteProgress
+ IterableList
)
+from git.db.interface import TransportDB
+from refs import RemoteReference
-from refs import (
- Reference,
- RemoteReference,
- SymbolicReference,
- TagReference
- )
-
-from git.util import join_path
-from gitdb.util import join
-
-import re
import os
-import sys
-__all__ = ('RemoteProgress', 'PushInfo', 'FetchInfo', 'Remote')
+__all__ = ['Remote']
-
class PushInfo(object):
- """
- Carries information about the result of a push operation of a single head::
-
- info = remote.push()[0]
- info.flags # bitflags providing more information about the result
- info.local_ref # Reference pointing to the local reference that was pushed
- # It is None if the ref was deleted.
- info.remote_ref_string # path to the remote reference located on the remote side
- info.remote_ref # Remote Reference on the local side corresponding to
- # the remote_ref_string. It can be a TagReference as well.
- info.old_commit # commit at which the remote_ref was standing before we pushed
- # it to local_ref.commit. Will be None if an error was indicated
- info.summary # summary line providing human readable english text about the push
- """
- __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary')
-
- NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \
- FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ]
-
- _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0,
- '+' : FORCED_UPDATE, ' ' : FAST_FORWARD,
- '=' : UP_TO_DATE, '!' : ERROR }
-
- def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None,
- summary=''):
- """ Initialize a new instance """
- self.flags = flags
- self.local_ref = local_ref
- self.remote_ref_string = remote_ref_string
- self._remote = remote
- self.old_commit = old_commit
- self.summary = summary
-
- @property
- def remote_ref(self):
- """
- :return:
- Remote Reference or TagReference in the local repository corresponding
- to the remote_ref_string kept in this instance."""
- # translate heads to a local remote, tags stay as they are
- if self.remote_ref_string.startswith("refs/tags"):
- return TagReference(self._remote.repo, self.remote_ref_string)
- elif self.remote_ref_string.startswith("refs/heads"):
- remote_ref = Reference(self._remote.repo, self.remote_ref_string)
- return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name))
- else:
- raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string)
- # END
-
- @classmethod
- def _from_line(cls, remote, line):
- """Create a new PushInfo instance as parsed from line which is expected to be like
- refs/heads/master:refs/heads/master 05d2687..1d0568e"""
- control_character, from_to, summary = line.split('\t', 3)
- flags = 0
-
- # control character handling
- try:
- flags |= cls._flag_map[ control_character ]
- except KeyError:
- raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line))
- # END handle control character
-
- # from_to handling
- from_ref_string, to_ref_string = from_to.split(':')
- if flags & cls.DELETED:
- from_ref = None
- else:
- from_ref = Reference.from_path(remote.repo, from_ref_string)
-
- # commit handling, could be message or commit info
- old_commit = None
- if summary.startswith('['):
- if "[rejected]" in summary:
- flags |= cls.REJECTED
- elif "[remote rejected]" in summary:
- flags |= cls.REMOTE_REJECTED
- elif "[remote failure]" in summary:
- flags |= cls.REMOTE_FAILURE
- elif "[no match]" in summary:
- flags |= cls.ERROR
- elif "[new tag]" in summary:
- flags |= cls.NEW_TAG
- elif "[new branch]" in summary:
- flags |= cls.NEW_HEAD
- # uptodate encoded in control character
- else:
- # fast-forward or forced update - was encoded in control character,
- # but we parse the old and new commit
- split_token = "..."
- if control_character == " ":
- split_token = ".."
- old_sha, new_sha = summary.split(' ')[0].split(split_token)
- # have to use constructor here as the sha usually is abbreviated
- old_commit = remote.repo.commit(old_sha)
- # END message handling
-
- return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary)
-
-
-class FetchInfo(object):
- """
- Carries information about the results of a fetch operation of a single head::
-
- info = remote.fetch()[0]
- info.ref # Symbolic Reference or RemoteReference to the changed
- # remote head or FETCH_HEAD
- info.flags # additional flags to be & with enumeration members,
- # i.e. info.flags & info.REJECTED
- # is 0 if ref is SymbolicReference
- info.note # additional notes given by git-fetch intended for the user
- info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD,
- # field is set to the previous location of ref, otherwise None
- """
- __slots__ = ('ref','old_commit', 'flags', 'note')
-
- NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \
- FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ]
+ """Wrapper for basic PushInfo to provide the previous interface which includes
+ resolved objects instead of plain shas
- # %c %-*s %-*s -> %s (%s)
- re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?")
+ old_commit # object for the corresponding old_commit_sha"""
- _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0,
- '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD }
- def __init__(self, ref, flags, note = '', old_commit = None):
- """
- Initialize a new instance
- """
- self.ref = ref
- self.flags = flags
- self.note = note
- self.old_commit = old_commit
-
- def __str__(self):
- return self.name
-
- @property
- def name(self):
- """:return: Name of our remote ref"""
- return self.ref.name
-
- @property
- def commit(self):
- """:return: Commit of our remote ref"""
- return self.ref.commit
-
- @classmethod
- def _from_line(cls, repo, line, fetch_line):
- """Parse information from the given line as returned by git-fetch -v
- and return a new FetchInfo object representing this information.
-
- We can handle a line as follows
- "%c %-*s %-*s -> %s%s"
-
- Where c is either ' ', !, +, -, *, or =
- ! means error
- + means success forcing update
- - means a tag was updated
- * means birth of new branch or tag
- = means the head was up to date ( and not moved )
- ' ' means a fast-forward
-
- fetch line is the corresponding line from FETCH_HEAD, like
- acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo"""
- match = cls.re_fetch_result.match(line)
- if match is None:
- raise ValueError("Failed to parse line: %r" % line)
-
- # parse lines
- control_character, operation, local_remote_ref, remote_local_ref, note = match.groups()
- try:
- new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t")
- ref_type_name, fetch_note = fetch_note.split(' ', 1)
- except ValueError: # unpack error
- raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line)
-
- # handle FETCH_HEAD and figure out ref type
- # If we do not specify a target branch like master:refs/remotes/origin/master,
- # the fetch result is stored in FETCH_HEAD which destroys the rule we usually
- # have. In that case we use a symbolic reference which is detached
- ref_type = None
- if remote_local_ref == "FETCH_HEAD":
- ref_type = SymbolicReference
- elif ref_type_name == "branch":
- ref_type = RemoteReference
- elif ref_type_name == "tag":
- ref_type = TagReference
- else:
- raise TypeError("Cannot handle reference type: %r" % ref_type_name)
-
- # create ref instance
- if ref_type is SymbolicReference:
- remote_local_ref = ref_type(repo, "FETCH_HEAD")
- else:
- remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip()))
- # END create ref instance
-
- note = ( note and note.strip() ) or ''
-
- # parse flags from control_character
- flags = 0
- try:
- flags |= cls._flag_map[control_character]
- except KeyError:
- raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line))
- # END control char exception hanlding
-
- # parse operation string for more info - makes no sense for symbolic refs
- old_commit = None
- if isinstance(remote_local_ref, Reference):
- if 'rejected' in operation:
- flags |= cls.REJECTED
- if 'new tag' in operation:
- flags |= cls.NEW_TAG
- if 'new branch' in operation:
- flags |= cls.NEW_HEAD
- if '...' in operation or '..' in operation:
- split_token = '...'
- if control_character == ' ':
- split_token = split_token[:-1]
- old_commit = repo.rev_parse(operation.split(split_token)[0])
- # END handle refspec
- # END reference flag handling
-
- return cls(remote_local_ref, flags, note, old_commit)
+class FetchInfo(object):
+ """Wrapper to restore the previous interface, resolving objects and wrapping
+ references"""
+
class Remote(LazyMixin, Iterable):
"""Provides easy read and write access to a git remote.
@@ -280,6 +52,16 @@ class Remote(LazyMixin, Iterable):
:param repo: The repository we are a remote of
:param name: the name of the remote, i.e. 'origin'"""
+ if not hasattr(repo, 'git'):
+ # note: at some point we could just create a git command instance ourselves
+ # but lets just be lazy for now
+ raise AssertionError("Require repository to provide a git command instance currently")
+ #END assert git cmd
+
+ if not isinstance(repo, TransportDB):
+ raise AssertionError("Require TransportDB interface implementation")
+ #END verify interface
+
self.repo = repo
self.name = name
@@ -432,97 +214,6 @@ class Remote(LazyMixin, Iterable):
self.repo.git.remote("update", self.name)
return self
- def _digest_process_messages(self, fh, progress):
- """Read progress messages from file-like object fh, supplying the respective
- progress messages to the progress instance.
-
- :return: list(line, ...) list of lines without linebreaks that did
- not contain progress information"""
- line_so_far = ''
- dropped_lines = list()
- while True:
- char = fh.read(1)
- if not char:
- break
-
- if char in ('\r', '\n'):
- dropped_lines.extend(progress._parse_progress_line(line_so_far))
- line_so_far = ''
- else:
- line_so_far += char
- # END process parsed line
- # END while file is not done reading
- return dropped_lines
-
-
- def _finalize_proc(self, proc):
- """Wait for the process (fetch, pull or push) and handle its errors accordingly"""
- try:
- proc.wait()
- except GitCommandError,e:
- # if a push has rejected items, the command has non-zero return status
- # a return status of 128 indicates a connection error - reraise the previous one
- if proc.poll() == 128:
- raise
- pass
- # END exception handling
-
-
- def _get_fetch_info_from_stderr(self, proc, progress):
- # skip first line as it is some remote info we are not interested in
- output = IterableList('name')
-
-
- # lines which are no progress are fetch info lines
- # this also waits for the command to finish
- # Skip some progress lines that don't provide relevant information
- fetch_info_lines = list()
- for line in self._digest_process_messages(proc.stderr, progress):
- if line.startswith('From') or line.startswith('remote: Total'):
- continue
- elif line.startswith('warning:'):
- print >> sys.stderr, line
- continue
- elif line.startswith('fatal:'):
- raise GitCommandError(("Error when fetching: %s" % line,), 2)
- # END handle special messages
- fetch_info_lines.append(line)
- # END for each line
-
- # read head information
- fp = open(join(self.repo.git_dir, 'FETCH_HEAD'),'r')
- fetch_head_info = fp.readlines()
- fp.close()
-
- assert len(fetch_info_lines) == len(fetch_head_info)
-
- output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line)
- for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info))
-
- self._finalize_proc(proc)
- return output
-
- def _get_push_info(self, proc, progress):
- # read progress information from stderr
- # we hope stdout can hold all the data, it should ...
- # read the lines manually as it will use carriage returns between the messages
- # to override the previous one. This is why we read the bytes manually
- self._digest_process_messages(proc.stderr, progress)
-
- output = IterableList('name')
- for line in proc.stdout.readlines():
- try:
- output.append(PushInfo._from_line(self, line))
- except ValueError:
- # if an error happens, additional info is given which we cannot parse
- pass
- # END exception handling
- # END for each line
-
- self._finalize_proc(proc)
- return output
-
-
def fetch(self, refspec=None, progress=None, **kwargs):
"""Fetch the latest changes for this remote
@@ -546,8 +237,7 @@ class Remote(LazyMixin, Iterable):
:note:
As fetch does not provide progress information to non-ttys, we cannot make
it available here unfortunately as in the 'push' method."""
- proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs)
- return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress())
+ return self.repo.fetch(self.name, refspec, progress, **kwargs)
def pull(self, refspec=None, progress=None, **kwargs):
"""Pull changes from the given branch, being the same as a fetch followed
@@ -557,8 +247,7 @@ class Remote(LazyMixin, Iterable):
:param progress: see 'push' method
:param kwargs: Additional arguments to be passed to git-pull
:return: Please see 'fetch' method """
- proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs)
- return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress())
+ return self.repo.pull(self.name, refspec, progress, **kwargs)
def push(self, refspec=None, progress=None, **kwargs):
"""Push changes from source branch in refspec to target branch in refspec.
@@ -578,8 +267,7 @@ class Remote(LazyMixin, Iterable):
in their flags.
If the operation fails completely, the length of the returned IterableList will
be null."""
- proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs)
- return self._get_push_info(proc, progress or RemoteProgress())
+ return self.repo.push(self.name, refspec, progress, **kwargs)
@property
def config_reader(self):
diff --git a/git/repo.py b/git/repo.py
new file mode 100644
index 00000000..8d5c4021
--- /dev/null
+++ b/git/repo.py
@@ -0,0 +1,45 @@
+# repo.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""This module is just to maintain compatibility to git-python 0.3x"""
+
+from git.db.complex import CmdCompatibilityGitDB
+
+
+import warnings
+
+__all__ = ('Repo', )
+
+
+class Repo(CmdCompatibilityGitDB):
+ """Represents a git repository and allows you to query references,
+ gather commit information, generate diffs, create and clone repositories query
+ the log.
+
+ The following attributes are worth using:
+
+ 'working_dir' is the working directory of the git command, wich is the working tree
+ directory if available or the .git directory in case of bare repositories
+
+ 'working_tree_dir' is the working tree directory, but will raise AssertionError
+ if we are a bare repository.
+
+ 'git_dir' is the .git repository directoy, which is always set."""
+
+ def __init__(self, path=None, odbt = None):
+ """Create a new Repo instance
+
+ :param path: is the path to either the root git directory or the bare git repo::
+
+ repo = Repo("/Users/mtrier/Development/git-python")
+ repo = Repo("/Users/mtrier/Development/git-python.git")
+ repo = Repo("~/Development/git-python.git")
+ repo = Repo("$REPOSITORIES/Development/git-python.git")
+ :raise InvalidDBRoot:
+ :return: git.Repo """
+ if odbt is not None:
+ warnings.warn("deprecated use of odbt", DeprecationWarning)
+ #END handle old parameter
+ super(Repo, self).__init__(path)
diff --git a/git/repo/__init__.py b/git/repo/__init__.py
deleted file mode 100644
index 8902a254..00000000
--- a/git/repo/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Initialize the Repo package"""
-
-from base import * \ No newline at end of file
diff --git a/git/repo/base.py b/git/repo/base.py
deleted file mode 100644
index 0405a5f9..00000000
--- a/git/repo/base.py
+++ /dev/null
@@ -1,753 +0,0 @@
-# repo.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-from git.exc import InvalidGitRepositoryError, NoSuchPathError
-from git.cmd import Git
-from git.util import Actor
-from git.refs import *
-from git.index import IndexFile
-from git.objects import *
-from git.config import GitConfigParser
-from git.remote import Remote
-from git.db import (
- GitCmdObjectDB,
- GitDB
- )
-
-
-from gitdb.util import (
- join,
- isfile,
- hex_to_bin
- )
-
-from fun import (
- rev_parse,
- is_git_dir,
- touch
- )
-
-import os
-import sys
-import re
-
-DefaultDBType = GitDB
-if sys.version_info[1] < 5: # python 2.4 compatiblity
- DefaultDBType = GitCmdObjectDB
-# END handle python 2.4
-
-
-__all__ = ('Repo', )
-
-
-class Repo(object):
- """Represents a git repository and allows you to query references,
- gather commit information, generate diffs, create and clone repositories query
- the log.
-
- The following attributes are worth using:
-
- 'working_dir' is the working directory of the git command, wich is the working tree
- directory if available or the .git directory in case of bare repositories
-
- 'working_tree_dir' is the working tree directory, but will raise AssertionError
- if we are a bare repository.
-
- 'git_dir' is the .git repository directoy, which is always set."""
- DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
- __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" )
-
- # precompiled regex
- re_whitespace = re.compile(r'\s+')
- re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
- re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
- re_author_committer_start = re.compile(r'^(author|committer)')
- re_tab_full_line = re.compile(r'^\t(.*)$')
-
- # invariants
- # represents the configuration level of a configuration file
- config_level = ("system", "global", "repository")
-
- def __init__(self, path=None, odbt = DefaultDBType):
- """Create a new Repo instance
-
- :param path: is the path to either the root git directory or the bare git repo::
-
- repo = Repo("/Users/mtrier/Development/git-python")
- repo = Repo("/Users/mtrier/Development/git-python.git")
- repo = Repo("~/Development/git-python.git")
- repo = Repo("$REPOSITORIES/Development/git-python.git")
-
- :param odbt: Object DataBase type - a type which is constructed by providing
- the directory containing the database objects, i.e. .git/objects. It will
- be used to access all object data
- :raise InvalidGitRepositoryError:
- :raise NoSuchPathError:
- :return: git.Repo """
- epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd())))
-
- if not os.path.exists(epath):
- raise NoSuchPathError(epath)
-
- self.working_dir = None
- self._working_tree_dir = None
- self.git_dir = None
- curpath = epath
-
- # walk up the path to find the .git dir
- while curpath:
- if is_git_dir(curpath):
- self.git_dir = curpath
- self._working_tree_dir = os.path.dirname(curpath)
- break
- gitpath = join(curpath, '.git')
- if is_git_dir(gitpath):
- self.git_dir = gitpath
- self._working_tree_dir = curpath
- break
- curpath, dummy = os.path.split(curpath)
- if not dummy:
- break
- # END while curpath
-
- if self.git_dir is None:
- raise InvalidGitRepositoryError(epath)
-
- self._bare = False
- try:
- self._bare = self.config_reader("repository").getboolean('core','bare')
- except Exception:
- # lets not assume the option exists, although it should
- pass
-
- # adjust the wd in case we are actually bare - we didn't know that
- # in the first place
- if self._bare:
- self._working_tree_dir = None
- # END working dir handling
-
- self.working_dir = self._working_tree_dir or self.git_dir
- self.git = Git(self.working_dir)
-
- # special handling, in special times
- args = [join(self.git_dir, 'objects')]
- if issubclass(odbt, GitCmdObjectDB):
- args.append(self.git)
- self.odb = odbt(*args)
-
- def __eq__(self, rhs):
- if isinstance(rhs, Repo):
- return self.git_dir == rhs.git_dir
- return False
-
- def __ne__(self, rhs):
- return not self.__eq__(rhs)
-
- def __hash__(self):
- return hash(self.git_dir)
-
- def __repr__(self):
- return "%s(%r)" % (type(self).__name__, self.git_dir)
-
- # Description property
- def _get_description(self):
- filename = join(self.git_dir, 'description')
- return file(filename).read().rstrip()
-
- def _set_description(self, descr):
- filename = join(self.git_dir, 'description')
- file(filename, 'w').write(descr+'\n')
-
- description = property(_get_description, _set_description,
- doc="the project's description")
- del _get_description
- del _set_description
-
-
-
- @property
- def working_tree_dir(self):
- """:return: The working tree directory of our git repository
- :raise AssertionError: If we are a bare repository"""
- if self._working_tree_dir is None:
- raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir )
- return self._working_tree_dir
-
- @property
- def bare(self):
- """:return: True if the repository is bare"""
- return self._bare
-
- @property
- def heads(self):
- """A list of ``Head`` objects representing the branch heads in
- this repo
-
- :return: ``git.IterableList(Head, ...)``"""
- return Head.list_items(self)
-
- @property
- def references(self):
- """A list of Reference objects representing tags, heads and remote references.
-
- :return: IterableList(Reference, ...)"""
- return Reference.list_items(self)
-
- # alias for references
- refs = references
-
- # alias for heads
- branches = heads
-
- @property
- def index(self):
- """:return: IndexFile representing this repository's index."""
- return IndexFile(self)
-
- @property
- def head(self):
- """:return: HEAD Object pointing to the current head reference"""
- return HEAD(self,'HEAD')
-
- @property
- def remotes(self):
- """A list of Remote objects allowing to access and manipulate remotes
- :return: ``git.IterableList(Remote, ...)``"""
- return Remote.list_items(self)
-
- def remote(self, name='origin'):
- """:return: Remote with the specified name
- :raise ValueError: if no remote with such a name exists"""
- return Remote(self, name)
-
- #{ Submodules
-
- @property
- def submodules(self):
- """
- :return: git.IterableList(Submodule, ...) of direct submodules
- available from the current head"""
- return Submodule.list_items(self)
-
- def submodule(self, name):
- """ :return: Submodule with the given name
- :raise ValueError: If no such submodule exists"""
- try:
- return self.submodules[name]
- except IndexError:
- raise ValueError("Didn't find submodule named %r" % name)
- # END exception handling
-
- def create_submodule(self, *args, **kwargs):
- """Create a new submodule
-
- :note: See the documentation of Submodule.add for a description of the
- applicable parameters
- :return: created submodules"""
- return Submodule.add(self, *args, **kwargs)
-
- def iter_submodules(self, *args, **kwargs):
- """An iterator yielding Submodule instances, see Traversable interface
- for a description of args and kwargs
- :return: Iterator"""
- return RootModule(self).traverse(*args, **kwargs)
-
- def submodule_update(self, *args, **kwargs):
- """Update the submodules, keeping the repository consistent as it will
- take the previous state into consideration. For more information, please
- see the documentation of RootModule.update"""
- return RootModule(self).update(*args, **kwargs)
-
- #}END submodules
-
- @property
- def tags(self):
- """A list of ``Tag`` objects that are available in this repo
- :return: ``git.IterableList(TagReference, ...)`` """
- return TagReference.list_items(self)
-
- def tag(self,path):
- """:return: TagReference Object, reference pointing to a Commit or Tag
- :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """
- return TagReference(self, path)
-
- def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
- """Create a new head within the repository.
- For more documentation, please see the Head.create method.
-
- :return: newly created Head Reference"""
- return Head.create(self, path, commit, force, logmsg)
-
- def delete_head(self, *heads, **kwargs):
- """Delete the given heads
-
- :param kwargs: Additional keyword arguments to be passed to git-branch"""
- return Head.delete(self, *heads, **kwargs)
-
- def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs):
- """Create a new tag reference.
- For more documentation, please see the TagReference.create method.
-
- :return: TagReference object """
- return TagReference.create(self, path, ref, message, force, **kwargs)
-
- def delete_tag(self, *tags):
- """Delete the given tag references"""
- return TagReference.delete(self, *tags)
-
- def create_remote(self, name, url, **kwargs):
- """Create a new remote.
-
- For more information, please see the documentation of the Remote.create
- methods
-
- :return: Remote reference"""
- return Remote.create(self, name, url, **kwargs)
-
- def delete_remote(self, remote):
- """Delete the given remote."""
- return Remote.remove(self, remote)
-
- def _get_config_path(self, config_level ):
- # we do not support an absolute path of the gitconfig on windows ,
- # use the global config instead
- if sys.platform == "win32" and config_level == "system":
- config_level = "global"
-
- if config_level == "system":
- return "/etc/gitconfig"
- elif config_level == "global":
- return os.path.normpath(os.path.expanduser("~/.gitconfig"))
- elif config_level == "repository":
- return join(self.git_dir, "config")
-
- raise ValueError( "Invalid configuration level: %r" % config_level )
-
- def config_reader(self, config_level=None):
- """
- :return:
- GitConfigParser allowing to read the full git configuration, but not to write it
-
- The configuration will include values from the system, user and repository
- configuration files.
-
- :param config_level:
- For possible values, see config_writer method
- If None, all applicable levels will be used. Specify a level in case
- you know which exact file you whish to read to prevent reading multiple files for
- instance
- :note: On windows, system configuration cannot currently be read as the path is
- unknown, instead the global path will be used."""
- files = None
- if config_level is None:
- files = [ self._get_config_path(f) for f in self.config_level ]
- else:
- files = [ self._get_config_path(config_level) ]
- return GitConfigParser(files, read_only=True)
-
- def config_writer(self, config_level="repository"):
- """
- :return:
- GitConfigParser allowing to write values of the specified configuration file level.
- Config writers should be retrieved, used to change the configuration ,and written
- right away as they will lock the configuration file in question and prevent other's
- to write it.
-
- :param config_level:
- One of the following values
- system = sytem wide configuration file
- global = user level configuration file
- repository = configuration file for this repostory only"""
- return GitConfigParser(self._get_config_path(config_level), read_only = False)
-
- def commit(self, rev=None):
- """The Commit object for the specified revision
- :param rev: revision specifier, see git-rev-parse for viable options.
- :return: ``git.Commit``"""
- if rev is None:
- return self.head.commit
- else:
- return self.rev_parse(str(rev)+"^0")
-
- def iter_trees(self, *args, **kwargs):
- """:return: Iterator yielding Tree objects
- :note: Takes all arguments known to iter_commits method"""
- return ( c.tree for c in self.iter_commits(*args, **kwargs) )
-
- def tree(self, rev=None):
- """The Tree object for the given treeish revision
- Examples::
-
- repo.tree(repo.heads[0])
-
- :param rev: is a revision pointing to a Treeish ( being a commit or tree )
- :return: ``git.Tree``
-
- :note:
- If you need a non-root level tree, find it by iterating the root tree. Otherwise
- it cannot know about its path relative to the repository root and subsequent
- operations might have unexpected results."""
- if rev is None:
- return self.head.commit.tree
- else:
- return self.rev_parse(str(rev)+"^{tree}")
-
- def iter_commits(self, rev=None, paths='', **kwargs):
- """A list of Commit objects representing the history of a given ref/commit
-
- :parm rev:
- revision specifier, see git-rev-parse for viable options.
- If None, the active branch will be used.
-
- :parm paths:
- is an optional path or a list of paths to limit the returned commits to
- Commits that do not contain that path or the paths will not be returned.
-
- :parm kwargs:
- Arguments to be passed to git-rev-list - common ones are
- max_count and skip
-
- :note: to receive only commits between two named revisions, use the
- "revA..revB" revision specifier
-
- :return ``git.Commit[]``"""
- if rev is None:
- rev = self.head.commit
-
- return Commit.iter_items(self, rev, paths, **kwargs)
-
- def _get_daemon_export(self):
- filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
- return os.path.exists(filename)
-
- def _set_daemon_export(self, value):
- filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
- fileexists = os.path.exists(filename)
- if value and not fileexists:
- touch(filename)
- elif not value and fileexists:
- os.unlink(filename)
-
- daemon_export = property(_get_daemon_export, _set_daemon_export,
- doc="If True, git-daemon may export this repository")
- del _get_daemon_export
- del _set_daemon_export
-
- def _get_alternates(self):
- """The list of alternates for this repo from which objects can be retrieved
-
- :return: list of strings being pathnames of alternates"""
- alternates_path = join(self.git_dir, 'objects', 'info', 'alternates')
-
- if os.path.exists(alternates_path):
- try:
- f = open(alternates_path)
- alts = f.read()
- finally:
- f.close()
- return alts.strip().splitlines()
- else:
- return list()
-
- def _set_alternates(self, alts):
- """Sets the alternates
-
- :parm alts:
- is the array of string paths representing the alternates at which
- git should look for objects, i.e. /home/user/repo/.git/objects
-
- :raise NoSuchPathError:
- :note:
- The method does not check for the existance of the paths in alts
- as the caller is responsible."""
- alternates_path = join(self.git_dir, 'objects', 'info', 'alternates')
- if not alts:
- if isfile(alternates_path):
- os.remove(alternates_path)
- else:
- try:
- f = open(alternates_path, 'w')
- f.write("\n".join(alts))
- finally:
- f.close()
- # END file handling
- # END alts handling
-
- alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
-
- def is_dirty(self, index=True, working_tree=True, untracked_files=False):
- """
- :return:
- ``True``, the repository is considered dirty. By default it will react
- like a git-status without untracked files, hence it is dirty if the
- index or the working copy have changes."""
- if self._bare:
- # Bare repositories with no associated working directory are
- # always consired to be clean.
- return False
-
- # start from the one which is fastest to evaluate
- default_args = ('--abbrev=40', '--full-index', '--raw')
- if index:
- # diff index against HEAD
- if isfile(self.index.path) and self.head.is_valid() and \
- len(self.git.diff('HEAD', '--cached', *default_args)):
- return True
- # END index handling
- if working_tree:
- # diff index against working tree
- if len(self.git.diff(*default_args)):
- return True
- # END working tree handling
- if untracked_files:
- if len(self.untracked_files):
- return True
- # END untracked files
- return False
-
- @property
- def untracked_files(self):
- """
- :return:
- list(str,...)
-
- Files currently untracked as they have not been staged yet. Paths
- are relative to the current working directory of the git command.
-
- :note:
- ignored files will not appear here, i.e. files mentioned in .gitignore"""
- # make sure we get all files, no only untracked directores
- proc = self.git.status(untracked_files=True, as_process=True)
- stream = iter(proc.stdout)
- untracked_files = list()
- for line in stream:
- if not line.startswith("# Untracked files:"):
- continue
- # skip two lines
- stream.next()
- stream.next()
-
- for untracked_info in stream:
- if not untracked_info.startswith("#\t"):
- break
- untracked_files.append(untracked_info.replace("#\t", "").rstrip())
- # END for each utracked info line
- # END for each line
- return untracked_files
-
- @property
- def active_branch(self):
- """The name of the currently active branch.
-
- :return: Head to the active branch"""
- return self.head.reference
-
- def blame(self, rev, file):
- """The blame information for the given file at the given revision.
-
- :parm rev: revision specifier, see git-rev-parse for viable options.
- :return:
- list: [git.Commit, list: [<line>]]
- A list of tuples associating a Commit object with a list of lines that
- changed within the given commit. The Commit objects will be given in order
- of appearance."""
- data = self.git.blame(rev, '--', file, p=True)
- commits = dict()
- blames = list()
- info = None
-
- for line in data.splitlines(False):
- parts = self.re_whitespace.split(line, 1)
- firstpart = parts[0]
- if self.re_hexsha_only.search(firstpart):
- # handles
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
- # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
- digits = parts[-1].split(" ")
- if len(digits) == 3:
- info = {'id': firstpart}
- blames.append([None, []])
- # END blame data initialization
- else:
- m = self.re_author_committer_start.search(firstpart)
- if m:
- # handles:
- # author Tom Preston-Werner
- # author-mail <tom@mojombo.com>
- # author-time 1192271832
- # author-tz -0700
- # committer Tom Preston-Werner
- # committer-mail <tom@mojombo.com>
- # committer-time 1192271832
- # committer-tz -0700 - IGNORED BY US
- role = m.group(0)
- if firstpart.endswith('-mail'):
- info["%s_email" % role] = parts[-1]
- elif firstpart.endswith('-time'):
- info["%s_date" % role] = int(parts[-1])
- elif role == firstpart:
- info[role] = parts[-1]
- # END distinguish mail,time,name
- else:
- # handle
- # filename lib/grit.rb
- # summary add Blob
- # <and rest>
- if firstpart.startswith('filename'):
- info['filename'] = parts[-1]
- elif firstpart.startswith('summary'):
- info['summary'] = parts[-1]
- elif firstpart == '':
- if info:
- sha = info['id']
- c = commits.get(sha)
- if c is None:
- c = Commit( self, hex_to_bin(sha),
- author=Actor._from_string(info['author'] + ' ' + info['author_email']),
- authored_date=info['author_date'],
- committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
- committed_date=info['committer_date'],
- message=info['summary'])
- commits[sha] = c
- # END if commit objects needs initial creation
- m = self.re_tab_full_line.search(line)
- text, = m.groups()
- blames[-1][0] = c
- blames[-1][1].append( text )
- info = None
- # END if we collected commit info
- # END distinguish filename,summary,rest
- # END distinguish author|committer vs filename,summary,rest
- # END distinguish hexsha vs other information
- return blames
-
- @classmethod
- def init(cls, path=None, mkdir=True, **kwargs):
- """Initialize a git repository at the given path if specified
-
- :param path:
- is the full path to the repo (traditionally ends with /<name>.git)
- or None in which case the repository will be created in the current
- working directory
-
- :parm mkdir:
- if specified will create the repository directory if it doesn't
- already exists. Creates the directory with a mode=0755.
- Only effective if a path is explicitly given
-
- :parm kwargs:
- keyword arguments serving as additional options to the git-init command
-
- :return: ``git.Repo`` (the newly created repo)"""
-
- if mkdir and path and not os.path.exists(path):
- os.makedirs(path, 0755)
-
- # git command automatically chdir into the directory
- git = Git(path)
- output = git.init(**kwargs)
- return Repo(path)
-
- @classmethod
- def _clone(cls, git, url, path, odb_default_type, **kwargs):
- # special handling for windows for path at which the clone should be
- # created.
- # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence
- # we at least give a proper error instead of letting git fail
- prev_cwd = None
- prev_path = None
- odbt = kwargs.pop('odbt', odb_default_type)
- if os.name == 'nt':
- if '~' in path:
- raise OSError("Git cannot handle the ~ character in path %r correctly" % path)
-
- # on windows, git will think paths like c: are relative and prepend the
- # current working dir ( before it fails ). We temporarily adjust the working
- # dir to make this actually work
- match = re.match("(\w:[/\\\])(.*)", path)
- if match:
- prev_cwd = os.getcwd()
- prev_path = path
- drive, rest_of_path = match.groups()
- os.chdir(drive)
- path = rest_of_path
- kwargs['with_keep_cwd'] = True
- # END cwd preparation
- # END windows handling
-
- try:
- git.clone(url, path, **kwargs)
- finally:
- if prev_cwd is not None:
- os.chdir(prev_cwd)
- path = prev_path
- # END reset previous working dir
- # END bad windows handling
-
- # our git command could have a different working dir than our actual
- # environment, hence we prepend its working dir if required
- if not os.path.isabs(path) and git.working_dir:
- path = join(git._working_dir, path)
-
- # adjust remotes - there may be operating systems which use backslashes,
- # These might be given as initial paths, but when handling the config file
- # that contains the remote from which we were clones, git stops liking it
- # as it will escape the backslashes. Hence we undo the escaping just to be
- # sure
- repo = cls(os.path.abspath(path), odbt = odbt)
- if repo.remotes:
- repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/"))
- # END handle remote repo
- return repo
-
- def clone(self, path, **kwargs):
- """Create a clone from this repository.
- :param path:
- is the full path of the new repo (traditionally ends with ./<name>.git).
-
- :param kwargs:
- odbt = ObjectDatabase Type, allowing to determine the object database
- implementation used by the returned Repo instance
-
- All remaining keyword arguments are given to the git-clone command
-
- :return: ``git.Repo`` (the newly cloned repo)"""
- return self._clone(self.git, self.git_dir, path, type(self.odb), **kwargs)
-
- @classmethod
- def clone_from(cls, url, to_path, **kwargs):
- """Create a clone from the given URL
- :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS
- :param to_path: Path to which the repository should be cloned to
- :param kwargs: see the ``clone`` method
- :return: Repo instance pointing to the cloned directory"""
- return cls._clone(Git(os.getcwd()), url, to_path, GitCmdObjectDB, **kwargs)
-
- def archive(self, ostream, treeish=None, prefix=None, **kwargs):
- """Archive the tree at the given revision.
- :parm ostream: file compatible stream object to which the archive will be written
- :parm treeish: is the treeish name/id, defaults to active branch
- :parm prefix: is the optional prefix to prepend to each filename in the archive
- :parm kwargs:
- Additional arguments passed to git-archive
- NOTE: Use the 'format' argument to define the kind of format. Use
- specialized ostreams to write any format supported by python
-
- :raise GitCommandError: in case something went wrong
- :return: self"""
- if treeish is None:
- treeish = self.head.commit
- if prefix and 'prefix' not in kwargs:
- kwargs['prefix'] = prefix
- kwargs['output_stream'] = ostream
-
- self.git.archive(treeish, **kwargs)
- return self
-
- rev_parse = rev_parse
-
- def __repr__(self):
- return '<git.Repo "%s">' % self.git_dir
diff --git a/git/stream.py b/git/stream.py
new file mode 100644
index 00000000..8010a055
--- /dev/null
+++ b/git/stream.py
@@ -0,0 +1,694 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from cStringIO import StringIO
+import errno
+import mmap
+import os
+
+from fun import (
+ msb_size,
+ stream_copy,
+ apply_delta_data,
+ connect_deltas,
+ DeltaChunkList,
+ delta_types
+ )
+
+from util import (
+ allocate_memory,
+ LazyMixin,
+ make_sha,
+ write,
+ close,
+ zlib
+ )
+
+has_perf_mod = False
+try:
+ from _perf import apply_delta as c_apply_delta
+ has_perf_mod = True
+except ImportError:
+ pass
+
+__all__ = ( 'DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader',
+ 'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer',
+ 'FDStream', 'NullStream')
+
+
+#{ RO Streams
+
+class DecompressMemMapReader(LazyMixin):
+ """Reads data in chunks from a memory map and decompresses it. The client sees
+ only the uncompressed data, respective file-like read calls are handling on-demand
+ buffered decompression accordingly
+
+ A constraint on the total size of bytes is activated, simulating
+ a logical file within a possibly larger physical memory area
+
+ To read efficiently, you clearly don't want to read individual bytes, instead,
+ read a few kilobytes at least.
+
+ :note: The chunk-size should be carefully selected as it will involve quite a bit
+ of string copying due to the way the zlib is implemented. Its very wasteful,
+ hence we try to find a good tradeoff between allocation time and number of
+ times we actually allocate. An own zlib implementation would be good here
+ to better support streamed reading - it would only need to keep the mmap
+ and decompress it into chunks, thats all ... """
+ __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close',
+ '_cbr', '_phi')
+
+ max_read_size = 512*1024 # currently unused
+
+ def __init__(self, m, close_on_deletion, size=None):
+ """Initialize with mmap for stream reading
+ :param m: must be content data - use new if you have object data and no size"""
+ self._m = m
+ self._zip = zlib.decompressobj()
+ self._buf = None # buffer of decompressed bytes
+ self._buflen = 0 # length of bytes in buffer
+ if size is not None:
+ self._s = size # size of uncompressed data to read in total
+ self._br = 0 # num uncompressed bytes read
+ self._cws = 0 # start byte of compression window
+ self._cwe = 0 # end byte of compression window
+ self._cbr = 0 # number of compressed bytes read
+ self._phi = False # is True if we parsed the header info
+ self._close = close_on_deletion # close the memmap on deletion ?
+
+ def _set_cache_(self, attr):
+ assert attr == '_s'
+ # only happens for size, which is a marker to indicate we still
+ # have to parse the header from the stream
+ self._parse_header_info()
+
+ def __del__(self):
+ if self._close:
+ self._m.close()
+ # END handle resource freeing
+
+ def _parse_header_info(self):
+ """If this stream contains object data, parse the header info and skip the
+ stream to a point where each read will yield object content
+
+ :return: parsed type_string, size"""
+ # read header
+ maxb = 512 # should really be enough, cgit uses 8192 I believe
+ self._s = maxb
+ hdr = self.read(maxb)
+ hdrend = hdr.find("\0")
+ type, size = hdr[:hdrend].split(" ")
+ size = int(size)
+ self._s = size
+
+ # adjust internal state to match actual header length that we ignore
+ # The buffer will be depleted first on future reads
+ self._br = 0
+ hdrend += 1 # count terminating \0
+ self._buf = StringIO(hdr[hdrend:])
+ self._buflen = len(hdr) - hdrend
+
+ self._phi = True
+
+ return type, size
+
+ #{ Interface
+
+ @classmethod
+ def new(self, m, close_on_deletion=False):
+ """Create a new DecompressMemMapReader instance for acting as a read-only stream
+ This method parses the object header from m and returns the parsed
+ type and size, as well as the created stream instance.
+
+ :param m: memory map on which to oparate. It must be object data ( header + contents )
+ :param close_on_deletion: if True, the memory map will be closed once we are
+ being deleted"""
+ inst = DecompressMemMapReader(m, close_on_deletion, 0)
+ type, size = inst._parse_header_info()
+ return type, size, inst
+
+ def data(self):
+ """:return: random access compatible data we are working on"""
+ return self._m
+
+ def compressed_bytes_read(self):
+ """
+ :return: number of compressed bytes read. This includes the bytes it
+ took to decompress the header ( if there was one )"""
+ # ABSTRACT: When decompressing a byte stream, it can be that the first
+ # x bytes which were requested match the first x bytes in the loosely
+ # compressed datastream. This is the worst-case assumption that the reader
+ # does, it assumes that it will get at least X bytes from X compressed bytes
+ # in call cases.
+ # The caveat is that the object, according to our known uncompressed size,
+ # is already complete, but there are still some bytes left in the compressed
+ # stream that contribute to the amount of compressed bytes.
+ # How can we know that we are truly done, and have read all bytes we need
+ # to read ?
+ # Without help, we cannot know, as we need to obtain the status of the
+ # decompression. If it is not finished, we need to decompress more data
+ # until it is finished, to yield the actual number of compressed bytes
+ # belonging to the decompressed object
+ # We are using a custom zlib module for this, if its not present,
+ # we try to put in additional bytes up for decompression if feasible
+ # and check for the unused_data.
+
+ # Only scrub the stream forward if we are officially done with the
+ # bytes we were to have.
+ if self._br == self._s and not self._zip.unused_data:
+ # manipulate the bytes-read to allow our own read method to coninute
+ # but keep the window at its current position
+ self._br = 0
+ if hasattr(self._zip, 'status'):
+ while self._zip.status == zlib.Z_OK:
+ self.read(mmap.PAGESIZE)
+ # END scrub-loop custom zlib
+ else:
+ # pass in additional pages, until we have unused data
+ while not self._zip.unused_data and self._cbr != len(self._m):
+ self.read(mmap.PAGESIZE)
+ # END scrub-loop default zlib
+ # END handle stream scrubbing
+
+ # reset bytes read, just to be sure
+ self._br = self._s
+ # END handle stream scrubbing
+
+ # unused data ends up in the unconsumed tail, which was removed
+ # from the count already
+ return self._cbr
+
+ #} END interface
+
+ def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+ """Allows to reset the stream to restart reading
+ :raise ValueError: If offset and whence are not 0"""
+ if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+ raise ValueError("Can only seek to position 0")
+ # END handle offset
+
+ self._zip = zlib.decompressobj()
+ self._br = self._cws = self._cwe = self._cbr = 0
+ if self._phi:
+ self._phi = False
+ del(self._s) # trigger header parsing on first access
+ # END skip header
+
+ def read(self, size=-1):
+ if size < 1:
+ size = self._s - self._br
+ else:
+ size = min(size, self._s - self._br)
+ # END clamp size
+
+ if size == 0:
+ return str()
+ # END handle depletion
+
+
+ # deplete the buffer, then just continue using the decompress object
+ # which has an own buffer. We just need this to transparently parse the
+ # header from the zlib stream
+ dat = str()
+ if self._buf:
+ if self._buflen >= size:
+ # have enough data
+ dat = self._buf.read(size)
+ self._buflen -= size
+ self._br += size
+ return dat
+ else:
+ dat = self._buf.read() # ouch, duplicates data
+ size -= self._buflen
+ self._br += self._buflen
+
+ self._buflen = 0
+ self._buf = None
+ # END handle buffer len
+ # END handle buffer
+
+ # decompress some data
+ # Abstract: zlib needs to operate on chunks of our memory map ( which may
+ # be large ), as it will otherwise and always fill in the 'unconsumed_tail'
+ # attribute which possible reads our whole map to the end, forcing
+ # everything to be read from disk even though just a portion was requested.
+ # As this would be a nogo, we workaround it by passing only chunks of data,
+ # moving the window into the memory map along as we decompress, which keeps
+ # the tail smaller than our chunk-size. This causes 'only' the chunk to be
+ # copied once, and another copy of a part of it when it creates the unconsumed
+ # tail. We have to use it to hand in the appropriate amount of bytes durin g
+ # the next read.
+ tail = self._zip.unconsumed_tail
+ if tail:
+ # move the window, make it as large as size demands. For code-clarity,
+ # we just take the chunk from our map again instead of reusing the unconsumed
+ # tail. The latter one would safe some memory copying, but we could end up
+ # with not getting enough data uncompressed, so we had to sort that out as well.
+ # Now we just assume the worst case, hence the data is uncompressed and the window
+ # needs to be as large as the uncompressed bytes we want to read.
+ self._cws = self._cwe - len(tail)
+ self._cwe = self._cws + size
+ else:
+ cws = self._cws
+ self._cws = self._cwe
+ self._cwe = cws + size
+ # END handle tail
+
+
+ # if window is too small, make it larger so zip can decompress something
+ if self._cwe - self._cws < 8:
+ self._cwe = self._cws + 8
+ # END adjust winsize
+
+ # takes a slice, but doesn't copy the data, it says ...
+ indata = buffer(self._m, self._cws, self._cwe - self._cws)
+
+ # get the actual window end to be sure we don't use it for computations
+ self._cwe = self._cws + len(indata)
+ dcompdat = self._zip.decompress(indata, size)
+ # update the amount of compressed bytes read
+ # We feed possibly overlapping chunks, which is why the unconsumed tail
+ # has to be taken into consideration, as well as the unused data
+ # if we hit the end of the stream
+ self._cbr += len(indata) - len(self._zip.unconsumed_tail)
+ self._br += len(dcompdat)
+
+ if dat:
+ dcompdat = dat + dcompdat
+ # END prepend our cached data
+
+ # it can happen, depending on the compression, that we get less bytes
+ # than ordered as it needs the final portion of the data as well.
+ # Recursively resolve that.
+ # Note: dcompdat can be empty even though we still appear to have bytes
+ # to read, if we are called by compressed_bytes_read - it manipulates
+ # us to empty the stream
+ if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s:
+ dcompdat += self.read(size-len(dcompdat))
+ # END handle special case
+ return dcompdat
+
+
+class DeltaApplyReader(LazyMixin):
+ """A reader which dynamically applies pack deltas to a base object, keeping the
+ memory demands to a minimum.
+
+ The size of the final object is only obtainable once all deltas have been
+ applied, unless it is retrieved from a pack index.
+
+ The uncompressed Delta has the following layout (MSB being a most significant
+ bit encoded dynamic size):
+
+ * MSB Source Size - the size of the base against which the delta was created
+ * MSB Target Size - the size of the resulting data after the delta was applied
+ * A list of one byte commands (cmd) which are followed by a specific protocol:
+
+ * cmd & 0x80 - copy delta_data[offset:offset+size]
+
+ * Followed by an encoded offset into the delta data
+ * Followed by an encoded size of the chunk to copy
+
+ * cmd & 0x7f - insert
+
+ * insert cmd bytes from the delta buffer into the output stream
+
+ * cmd == 0 - invalid operation ( or error in delta stream )
+ """
+ __slots__ = (
+ "_bstream", # base stream to which to apply the deltas
+ "_dstreams", # tuple of delta stream readers
+ "_mm_target", # memory map of the delta-applied data
+ "_size", # actual number of bytes in _mm_target
+ "_br" # number of bytes read
+ )
+
+ #{ Configuration
+ k_max_memory_move = 250*1000*1000
+ #} END configuration
+
+ def __init__(self, stream_list):
+ """Initialize this instance with a list of streams, the first stream being
+ the delta to apply on top of all following deltas, the last stream being the
+ base object onto which to apply the deltas"""
+ assert len(stream_list) > 1, "Need at least one delta and one base stream"
+
+ self._bstream = stream_list[-1]
+ self._dstreams = tuple(stream_list[:-1])
+ self._br = 0
+
+ def _set_cache_too_slow_without_c(self, attr):
+ # the direct algorithm is fastest and most direct if there is only one
+ # delta. Also, the extra overhead might not be worth it for items smaller
+ # than X - definitely the case in python, every function call costs
+ # huge amounts of time
+ # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move:
+ if len(self._dstreams) == 1:
+ return self._set_cache_brute_(attr)
+
+ # Aggregate all deltas into one delta in reverse order. Hence we take
+ # the last delta, and reverse-merge its ancestor delta, until we receive
+ # the final delta data stream.
+ # print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams])
+ dcl = connect_deltas(self._dstreams)
+
+ # call len directly, as the (optional) c version doesn't implement the sequence
+ # protocol
+ if dcl.rbound() == 0:
+ self._size = 0
+ self._mm_target = allocate_memory(0)
+ return
+ # END handle empty list
+
+ self._size = dcl.rbound()
+ self._mm_target = allocate_memory(self._size)
+
+ bbuf = allocate_memory(self._bstream.size)
+ stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)
+
+ # APPLY CHUNKS
+ write = self._mm_target.write
+ dcl.apply(bbuf, write)
+
+ self._mm_target.seek(0)
+
+ def _set_cache_brute_(self, attr):
+ """If we are here, we apply the actual deltas"""
+
+ # TODO: There should be a special case if there is only one stream
+ # Then the default-git algorithm should perform a tad faster, as the
+ # delta is not peaked into, causing less overhead.
+ buffer_info_list = list()
+ max_target_size = 0
+ for dstream in self._dstreams:
+ buf = dstream.read(512) # read the header information + X
+ offset, src_size = msb_size(buf)
+ offset, target_size = msb_size(buf, offset)
+ buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size))
+ max_target_size = max(max_target_size, target_size)
+ # END for each delta stream
+
+ # sanity check - the first delta to apply should have the same source
+ # size as our actual base stream
+ base_size = self._bstream.size
+ target_size = max_target_size
+
+ # if we have more than 1 delta to apply, we will swap buffers, hence we must
+ # assure that all buffers we use are large enough to hold all the results
+ if len(self._dstreams) > 1:
+ base_size = target_size = max(base_size, max_target_size)
+ # END adjust buffer sizes
+
+
+ # Allocate private memory map big enough to hold the first base buffer
+ # We need random access to it
+ bbuf = allocate_memory(base_size)
+ stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)
+
+ # allocate memory map large enough for the largest (intermediate) target
+ # We will use it as scratch space for all delta ops. If the final
+ # target buffer is smaller than our allocated space, we just use parts
+ # of it upon return.
+ tbuf = allocate_memory(target_size)
+
+ # for each delta to apply, memory map the decompressed delta and
+ # work on the op-codes to reconstruct everything.
+ # For the actual copying, we use a seek and write pattern of buffer
+ # slices.
+ final_target_size = None
+ for (dbuf, offset, src_size, target_size), dstream in reversed(zip(buffer_info_list, self._dstreams)):
+ # allocate a buffer to hold all delta data - fill in the data for
+ # fast access. We do this as we know that reading individual bytes
+ # from our stream would be slower than necessary ( although possible )
+ # The dbuf buffer contains commands after the first two MSB sizes, the
+ # offset specifies the amount of bytes read to get the sizes.
+ ddata = allocate_memory(dstream.size - offset)
+ ddata.write(dbuf)
+ # read the rest from the stream. The size we give is larger than necessary
+ stream_copy(dstream.read, ddata.write, dstream.size, 256*mmap.PAGESIZE)
+
+ #######################################################################
+ if 'c_apply_delta' in globals():
+ c_apply_delta(bbuf, ddata, tbuf);
+ else:
+ apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
+ #######################################################################
+
+ # finally, swap out source and target buffers. The target is now the
+ # base for the next delta to apply
+ bbuf, tbuf = tbuf, bbuf
+ bbuf.seek(0)
+ tbuf.seek(0)
+ final_target_size = target_size
+ # END for each delta to apply
+
+ # its already seeked to 0, constrain it to the actual size
+ # NOTE: in the end of the loop, it swaps buffers, hence our target buffer
+ # is not tbuf, but bbuf !
+ self._mm_target = bbuf
+ self._size = final_target_size
+
+
+ #{ Configuration
+ if not has_perf_mod:
+ _set_cache_ = _set_cache_brute_
+ else:
+ _set_cache_ = _set_cache_too_slow_without_c
+
+ #} END configuration
+
+ def read(self, count=0):
+ bl = self._size - self._br # bytes left
+ if count < 1 or count > bl:
+ count = bl
+ # NOTE: we could check for certain size limits, and possibly
+ # return buffers instead of strings to prevent byte copying
+ data = self._mm_target.read(count)
+ self._br += len(data)
+ return data
+
+ def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+ """Allows to reset the stream to restart reading
+
+ :raise ValueError: If offset and whence are not 0"""
+ if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+ raise ValueError("Can only seek to position 0")
+ # END handle offset
+ self._br = 0
+ self._mm_target.seek(0)
+
+ #{ Interface
+
+ @classmethod
+ def new(cls, stream_list):
+ """
+ Convert the given list of streams into a stream which resolves deltas
+ when reading from it.
+
+ :param stream_list: two or more stream objects, first stream is a Delta
+ to the object that you want to resolve, followed by N additional delta
+ streams. The list's last stream must be a non-delta stream.
+
+ :return: Non-Delta OPackStream object whose stream can be used to obtain
+ the decompressed resolved data
+ :raise ValueError: if the stream list cannot be handled"""
+ if len(stream_list) < 2:
+ raise ValueError("Need at least two streams")
+ # END single object special handling
+
+ if stream_list[-1].type_id in delta_types:
+ raise ValueError("Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type)
+ # END check stream
+
+ return cls(stream_list)
+
+ #} END interface
+
+
+ #{ OInfo like Interface
+
+ @property
+ def type(self):
+ return self._bstream.type
+
+ @property
+ def type_id(self):
+ return self._bstream.type_id
+
+ @property
+ def size(self):
+ """:return: number of uncompressed bytes in the stream"""
+ return self._size
+
+ #} END oinfo like interface
+
+
+#} END RO streams
+
+
+#{ W Streams
+
+class Sha1Writer(object):
+ """Simple stream writer which produces a sha whenever you like as it degests
+ everything it is supposed to write"""
+ __slots__ = "sha1"
+
+ def __init__(self):
+ self.sha1 = make_sha()
+
+ #{ Stream Interface
+
+ def write(self, data):
+ """:raise IOError: If not all bytes could be written
+ :return: lenght of incoming data"""
+ self.sha1.update(data)
+ return len(data)
+
+ # END stream interface
+
+ #{ Interface
+
+ def sha(self, as_hex = False):
+ """:return: sha so far
+ :param as_hex: if True, sha will be hex-encoded, binary otherwise"""
+ if as_hex:
+ return self.sha1.hexdigest()
+ return self.sha1.digest()
+
+ #} END interface
+
+
+class FlexibleSha1Writer(Sha1Writer):
+ """Writer producing a sha1 while passing on the written bytes to the given
+ write function"""
+ __slots__ = 'writer'
+
+ def __init__(self, writer):
+ Sha1Writer.__init__(self)
+ self.writer = writer
+
+ def write(self, data):
+ Sha1Writer.write(self, data)
+ self.writer(data)
+
+
+class ZippedStoreShaWriter(Sha1Writer):
+ """Remembers everything someone writes to it and generates a sha"""
+ __slots__ = ('buf', 'zip')
+ def __init__(self):
+ Sha1Writer.__init__(self)
+ self.buf = StringIO()
+ self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
+
+ def __getattr__(self, attr):
+ return getattr(self.buf, attr)
+
+ def write(self, data):
+ alen = Sha1Writer.write(self, data)
+ self.buf.write(self.zip.compress(data))
+ return alen
+
+ def close(self):
+ self.buf.write(self.zip.flush())
+
+ def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)):
+ """Seeking currently only supports to rewind written data
+ Multiple writes are not supported"""
+ if offset != 0 or whence != getattr(os, 'SEEK_SET', 0):
+ raise ValueError("Can only seek to position 0")
+ # END handle offset
+ self.buf.seek(0)
+
+ def getvalue(self):
+ """:return: string value from the current stream position to the end"""
+ return self.buf.getvalue()
+
+
+class FDCompressedSha1Writer(Sha1Writer):
+ """Digests data written to it, making the sha available, then compress the
+ data and write it to the file descriptor
+
+ :note: operates on raw file descriptors
+ :note: for this to work, you have to use the close-method of this instance"""
+ __slots__ = ("fd", "sha1", "zip")
+
+ # default exception
+ exc = IOError("Failed to write all bytes to filedescriptor")
+
+ def __init__(self, fd):
+ super(FDCompressedSha1Writer, self).__init__()
+ self.fd = fd
+ self.zip = zlib.compressobj(zlib.Z_BEST_SPEED)
+
+ #{ Stream Interface
+
+ def write(self, data):
+ """:raise IOError: If not all bytes could be written
+ :return: lenght of incoming data"""
+ self.sha1.update(data)
+ cdata = self.zip.compress(data)
+ bytes_written = write(self.fd, cdata)
+ if bytes_written != len(cdata):
+ raise self.exc
+ return len(data)
+
+ def close(self):
+ remainder = self.zip.flush()
+ if write(self.fd, remainder) != len(remainder):
+ raise self.exc
+ return close(self.fd)
+
+ #} END stream interface
+
+
+class FDStream(object):
+ """A simple wrapper providing the most basic functions on a file descriptor
+ with the fileobject interface. Cannot use os.fdopen as the resulting stream
+ takes ownership"""
+ __slots__ = ("_fd", '_pos')
+ def __init__(self, fd):
+ self._fd = fd
+ self._pos = 0
+
+ def write(self, data):
+ self._pos += len(data)
+ os.write(self._fd, data)
+
+ def read(self, count=0):
+ if count == 0:
+ count = os.path.getsize(self._filepath)
+ # END handle read everything
+
+ bytes = os.read(self._fd, count)
+ self._pos += len(bytes)
+ return bytes
+
+ def fileno(self):
+ return self._fd
+
+ def tell(self):
+ return self._pos
+
+ def close(self):
+ close(self._fd)
+
+
+class NullStream(object):
+ """A stream that does nothing but providing a stream interface.
+ Use it like /dev/null"""
+ __slots__ = tuple()
+
+ def read(self, size=0):
+ return ''
+
+ def close(self):
+ pass
+
+ def write(self, data):
+ return len(data)
+
+
+#} END W streams
+
+
diff --git a/git/test/__init__.py b/git/test/__init__.py
index 757cbad1..a29d96a7 100644
--- a/git/test/__init__.py
+++ b/git/test/__init__.py
@@ -3,3 +3,12 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+import git.util
+
+def _init_pool():
+ """Assure the pool is actually threaded"""
+ size = 2
+ print "Setting ThreadPool to %i" % size
+ git.util.pool.set_size(size)
+
diff --git a/git/test/db/__init__.py b/git/test/db/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/test/db/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/test_repo.py b/git/test/db/base.py
index deadbe9a..5291ba03 100644
--- a/git/test/test_repo.py
+++ b/git/test/db/base.py
@@ -3,30 +3,41 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import TestDBBase
from git.test.lib import *
-from git import *
+from git.cmd import Git
+from git.objects import *
+from git.exc import *
+from git.index import *
+from git.refs import *
from git.util import join_path_native
from git.exc import BadObject
-from gitdb.util import hex_to_bin, bin_to_hex
+from git.util import hex_to_bin, bin_to_hex
import os, sys
import tempfile
import shutil
from cStringIO import StringIO
+from git.db.compat import RepoCompatibilityInterface
-class TestRepo(TestBase):
+
+class RepoGlobalsItemDeletorMetaCls(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'RepoBase'
+
+
+class RepoBase(TestDBBase):
+ """Basic test for everything a fully implemented repository should support"""
+ __metaclass__ = RepoGlobalsItemDeletorMetaCls
- @raises(InvalidGitRepositoryError)
def test_new_should_raise_on_invalid_repo_location(self):
- Repo(tempfile.gettempdir())
+ self.failUnlessRaises(InvalidGitRepositoryError, self.RepoCls, tempfile.gettempdir())
- @raises(NoSuchPathError)
def test_new_should_raise_on_non_existant_path(self):
- Repo("repos/foobar")
+ self.failUnlessRaises(NoSuchPathError, self.RepoCls, "repos/foobar")
def test_repo_creation_from_different_paths(self):
- r_from_gitdir = Repo(self.rorepo.git_dir)
+ r_from_gitdir = self.RepoCls(self.rorepo.git_dir)
assert r_from_gitdir.git_dir == self.rorepo.git_dir
assert r_from_gitdir.git_dir.endswith('.git')
assert not self.rorepo.git.working_dir.endswith('.git')
@@ -133,8 +144,8 @@ class TestRepo(TestBase):
try:
# with specific path
for path in (git_dir_rela, git_dir_abs):
- r = Repo.init(path=path, bare=True)
- assert isinstance(r, Repo)
+ r = self.RepoCls.init(path=path, bare=True)
+ assert isinstance(r, self.RepoCls)
assert r.bare == True
assert os.path.isdir(r.git_dir)
@@ -155,7 +166,7 @@ class TestRepo(TestBase):
# END exception handling
# try again, this time with the absolute version
- rc = Repo.clone_from(r.git_dir, clone_path)
+ rc = self.RepoCls.clone_from(r.git_dir, clone_path)
self._assert_empty_repo(rc)
shutil.rmtree(git_dir_abs)
@@ -171,7 +182,7 @@ class TestRepo(TestBase):
os.makedirs(git_dir_rela)
os.chdir(git_dir_rela)
- r = Repo.init(bare=False)
+ r = self.RepoCls.init(bare=False)
r.bare == False
self._assert_empty_repo(r)
@@ -184,7 +195,10 @@ class TestRepo(TestBase):
# END restore previous state
def test_bare_property(self):
- self.rorepo.bare
+ if isinstance(self.rorepo, RepoCompatibilityInterface):
+ self.rorepo.bare
+ #END handle compatability
+ self.rorepo.is_bare
def test_daemon_export(self):
orig_val = self.rorepo.daemon_export
@@ -204,8 +218,7 @@ class TestRepo(TestBase):
self.rorepo.alternates = cur_alternates
def test_repr(self):
- path = os.path.join(os.path.abspath(GIT_REPO), '.git')
- assert_equal('<git.Repo "%s">' % path, repr(self.rorepo))
+ assert_equal('<git.Repo "%s">' % rorepo_dir(), repr(self.rorepo))
def test_is_dirty_with_bare_repository(self):
orig_value = self.rorepo._bare
@@ -235,6 +248,7 @@ class TestRepo(TestBase):
assert isinstance(index, IndexFile)
def test_tag(self):
+ assert self.rorepo.tag('0.1.5').commit
assert self.rorepo.tag('refs/tags/0.1.5').commit
def test_archive(self):
@@ -570,8 +584,15 @@ class TestRepo(TestBase):
assert rev_parse(refspec+":CHANGES").type == 'blob'
#END operate on non-detached head
- # the last position
- assert rev_parse('@{1}') != head.commit
+ # the most recent previous position of the currently checked out branch
+
+ try:
+ assert rev_parse('@{1}') != head.commit
+ except IndexError:
+ # on new checkouts, there isn't even a single past branch position
+ # in the log
+ pass
+ #END handle fresh checkouts
# position doesn't exist
self.failUnlessRaises(IndexError, rev_parse, '@{10000}')
@@ -579,17 +600,13 @@ class TestRepo(TestBase):
# currently, nothing more is supported
self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}")
- def test_repo_odbtype(self):
- target_type = GitDB
- if sys.version_info[1] < 5:
- target_type = GitCmdObjectDB
- assert isinstance(self.rorepo.odb, target_type)
-
def test_submodules(self):
assert len(self.rorepo.submodules) == 1 # non-recursive
- assert len(list(self.rorepo.iter_submodules())) == 2
+ # in previous configurations, we had recursive repositories so this would compare to 2
+ # now there is only one left, as gitdb was merged
+ assert len(list(self.rorepo.iter_submodules())) == 1
- assert isinstance(self.rorepo.submodule("gitdb"), Submodule)
+ assert isinstance(self.rorepo.submodule("git/ext/async"), Submodule)
self.failUnlessRaises(ValueError, self.rorepo.submodule, "doesn't exist")
@with_rw_repo('HEAD', bare=False)
diff --git a/git/test/db/cmd/__init__.py b/git/test/db/cmd/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/test/db/cmd/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/db/cmd/test_base.py b/git/test/db/cmd/test_base.py
new file mode 100644
index 00000000..959be16b
--- /dev/null
+++ b/git/test/db/cmd/test_base.py
@@ -0,0 +1,32 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.lib import rorepo_dir
+from git.test.db.base import RepoBase
+
+from git.util import bin_to_hex
+from git.exc import BadObject
+
+from git.db.complex import CmdCompatibilityGitDB
+from git.db.cmd.base import *
+
+class TestBase(RepoBase):
+ RepoCls = CmdCompatibilityGitDB
+
+ def test_basics(self):
+ gdb = self.rorepo
+
+ # partial to complete - works with everything
+ hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
+ assert len(hexsha) == 40
+
+ assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha
+
+ # fails with BadObject
+ for invalid_rev in ("0000", "bad/ref", "super bad"):
+ self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
+
+ def test_fetch_info(self):
+ self.failUnlessRaises(ValueError, CmdFetchInfo._from_line, self.rorepo, "nonsense", '')
+ self.failUnlessRaises(ValueError, CmdFetchInfo._from_line, self.rorepo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '')
diff --git a/git/test/db/lib.py b/git/test/db/lib.py
new file mode 100644
index 00000000..499ca252
--- /dev/null
+++ b/git/test/db/lib.py
@@ -0,0 +1,246 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Base classes for object db testing"""
+from git.test.lib import (
+ with_rw_directory,
+ with_packs_rw,
+ ZippedStoreShaWriter,
+ fixture_path,
+ TestBase,
+ rorepo_dir,
+ )
+
+from git.stream import Sha1Writer
+from git.base import (
+ IStream,
+ OStream,
+ OInfo
+ )
+
+from git.exc import BadObject
+from git.typ import str_blob_type
+
+from async import IteratorReader
+from cStringIO import StringIO
+from struct import pack
+
+
+__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path')
+
+class TestDBBase(TestBase):
+ """Base Class providing default functionality to all tests such as:
+
+ - Utility functions provided by the TestCase base of the unittest method such as::
+ self.fail("todo")
+ self.failUnlessRaises(...)
+
+ - Class level repository which is considered read-only as it is shared among
+ all test cases in your type.
+ Access it using::
+ self.rorepo # 'ro' stands for read-only
+
+ The rorepo is in fact your current project's git repo. If you refer to specific
+ shas for your objects, be sure you choose some that are part of the immutable portion
+ of the project history ( to assure tests don't fail for others ).
+
+ Derived types can override the default repository type to create a different
+ read-only repo, allowing to test their specific type
+ """
+
+ # data
+ two_lines = "1234\nhello world"
+ all_data = (two_lines, )
+
+ #{ Configuration
+ # The repository type to instantiate. It takes at least a path to operate upon
+ # during instantiation.
+ RepoCls = None
+
+ # if True, a read-only repo will be provided and RepoCls must be set.
+ # Otherwise it may remain unset
+ needs_ro_repo = True
+ #} END configuration
+
+ @classmethod
+ def setUpAll(cls):
+ """
+ Dynamically add a read-only repository to our actual type. This way
+ each test type has its own repository
+ """
+ if cls.needs_ro_repo:
+ assert cls.RepoCls is not None, "RepoCls class member must be set"
+ cls.rorepo = cls.RepoCls(rorepo_dir())
+ #END handle rorepo
+
+ def _assert_object_writing_simple(self, db):
+ # write a bunch of objects and query their streams and info
+ null_objs = db.size()
+ ni = 250
+ for i in xrange(ni):
+ data = pack(">L", i)
+ istream = IStream(str_blob_type, len(data), StringIO(data))
+ new_istream = db.store(istream)
+ assert new_istream is istream
+ assert db.has_object(istream.binsha)
+
+ info = db.info(istream.binsha)
+ assert isinstance(info, OInfo)
+ assert info.type == istream.type and info.size == istream.size
+
+ stream = db.stream(istream.binsha)
+ assert isinstance(stream, OStream)
+ assert stream.binsha == info.binsha and stream.type == info.type
+ assert stream.read() == data
+ # END for each item
+
+ assert db.size() == null_objs + ni
+ shas = list(db.sha_iter())
+ assert len(shas) == db.size()
+ assert len(shas[0]) == 20
+
+
+ def _assert_object_writing(self, db):
+ """General tests to verify object writing, compatible to ObjectDBW
+ :note: requires write access to the database"""
+ # start in 'dry-run' mode, using a simple sha1 writer
+ ostreams = (ZippedStoreShaWriter, None)
+ for ostreamcls in ostreams:
+ for data in self.all_data:
+ dry_run = ostreamcls is not None
+ ostream = None
+ if ostreamcls is not None:
+ ostream = ostreamcls()
+ assert isinstance(ostream, Sha1Writer)
+ # END create ostream
+
+ prev_ostream = db.set_ostream(ostream)
+ assert type(prev_ostream) in ostreams or prev_ostream in ostreams
+
+ istream = IStream(str_blob_type, len(data), StringIO(data))
+
+ # store returns same istream instance, with new sha set
+ my_istream = db.store(istream)
+ sha = istream.binsha
+ assert my_istream is istream
+ assert db.has_object(sha) != dry_run
+ assert len(sha) == 20
+
+ # verify data - the slow way, we want to run code
+ if not dry_run:
+ info = db.info(sha)
+ assert str_blob_type == info.type
+ assert info.size == len(data)
+
+ ostream = db.stream(sha)
+ assert ostream.read() == data
+ assert ostream.type == str_blob_type
+ assert ostream.size == len(data)
+ else:
+ self.failUnlessRaises(BadObject, db.info, sha)
+ self.failUnlessRaises(BadObject, db.stream, sha)
+
+ # DIRECT STREAM COPY
+ # our data hase been written in object format to the StringIO
+ # we pasesd as output stream. No physical database representation
+ # was created.
+ # Test direct stream copy of object streams, the result must be
+ # identical to what we fed in
+ ostream.seek(0)
+ istream.stream = ostream
+ assert istream.binsha is not None
+ prev_sha = istream.binsha
+
+ db.set_ostream(ZippedStoreShaWriter())
+ db.store(istream)
+ assert istream.binsha == prev_sha
+ new_ostream = db.ostream()
+
+ # note: only works as long our store write uses the same compression
+ # level, which is zip_best
+ assert ostream.getvalue() == new_ostream.getvalue()
+ # END for each data set
+ # END for each dry_run mode
+
+ def _assert_object_writing_async(self, db):
+ """Test generic object writing using asynchronous access"""
+ ni = 5000
+ def istream_generator(offset=0, ni=ni):
+ for data_src in xrange(ni):
+ data = str(data_src + offset)
+ yield IStream(str_blob_type, len(data), StringIO(data))
+ # END for each item
+ # END generator utility
+
+ # for now, we are very trusty here as we expect it to work if it worked
+ # in the single-stream case
+
+ # write objects
+ reader = IteratorReader(istream_generator())
+ istream_reader = db.store_async(reader)
+ istreams = istream_reader.read() # read all
+ assert istream_reader.task().error() is None
+ assert len(istreams) == ni
+
+ for stream in istreams:
+ assert stream.error is None
+ assert len(stream.binsha) == 20
+ assert isinstance(stream, IStream)
+ # END assert each stream
+
+ # test has-object-async - we must have all previously added ones
+ reader = IteratorReader( istream.binsha for istream in istreams )
+ hasobject_reader = db.has_object_async(reader)
+ count = 0
+ for sha, has_object in hasobject_reader:
+ assert has_object
+ count += 1
+ # END for each sha
+ assert count == ni
+
+ # read the objects we have just written
+ reader = IteratorReader( istream.binsha for istream in istreams )
+ ostream_reader = db.stream_async(reader)
+
+ # read items individually to prevent hitting possible sys-limits
+ count = 0
+ for ostream in ostream_reader:
+ assert isinstance(ostream, OStream)
+ count += 1
+ # END for each ostream
+ assert ostream_reader.task().error() is None
+ assert count == ni
+
+ # get info about our items
+ reader = IteratorReader( istream.binsha for istream in istreams )
+ info_reader = db.info_async(reader)
+
+ count = 0
+ for oinfo in info_reader:
+ assert isinstance(oinfo, OInfo)
+ count += 1
+ # END for each oinfo instance
+ assert count == ni
+
+
+ # combined read-write using a converter
+ # add 2500 items, and obtain their output streams
+ nni = 2500
+ reader = IteratorReader(istream_generator(offset=ni, ni=nni))
+ istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ]
+
+ istream_reader = db.store_async(reader)
+ istream_reader.set_post_cb(istream_to_sha)
+
+ ostream_reader = db.stream_async(istream_reader)
+
+ count = 0
+ # read it individually, otherwise we might run into the ulimit
+ for ostream in ostream_reader:
+ assert isinstance(ostream, OStream)
+ count += 1
+ # END for each ostream
+ assert count == nni
+
+
diff --git a/git/test/db/py/__init__.py b/git/test/db/py/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/test/db/py/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/test/db/py/test_base.py b/git/test/db/py/test_base.py
new file mode 100644
index 00000000..6b06bbe9
--- /dev/null
+++ b/git/test/db/py/test_base.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.lib import rorepo_dir
+from git.test.db.base import RepoBase
+
+from git.db.complex import PureCompatibilityGitDB
+
+class TestPyDBBase(RepoBase):
+
+ RepoCls = PureCompatibilityGitDB
+
+ def test_basics(self):
+ pass
+
diff --git a/git/test/db/py/test_git.py b/git/test/db/py/test_git.py
new file mode 100644
index 00000000..ecaa5c8f
--- /dev/null
+++ b/git/test/db/py/test_git.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.lib import rorepo_dir
+from git.test.db.lib import TestDBBase, with_rw_directory
+from git.exc import BadObject
+from git.db.py.complex import PureGitODB
+from git.base import OStream, OInfo
+from git.util import hex_to_bin, bin_to_hex
+
+import os
+
+class TestGitDB(TestDBBase):
+ needs_ro_repo = False
+
+ def test_reading(self):
+ gdb = PureGitODB(os.path.join(rorepo_dir(), 'objects'))
+
+ # we have packs and loose objects, alternates doesn't necessarily exist
+ assert 1 < len(gdb.databases()) < 4
+
+ # access should be possible
+ git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
+ assert isinstance(gdb.info(git_sha), OInfo)
+ assert isinstance(gdb.stream(git_sha), OStream)
+ assert gdb.size() > 200
+ sha_list = list(gdb.sha_iter())
+ assert len(sha_list) == gdb.size()
+
+
+ # This is actually a test for compound functionality, but it doesn't
+ # have a separate test module
+ # test partial shas
+ # this one as uneven and quite short
+ assert gdb.partial_to_complete_sha_hex('5aebcd') == hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
+
+ # mix even/uneven hexshas
+ for i, binsha in enumerate(sha_list[:50]):
+ assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
+ # END for each sha
+
+ self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
+
+ @with_rw_directory
+ def test_writing(self, path):
+ gdb = PureGitODB(path)
+
+ # its possible to write objects
+ self._assert_object_writing(gdb)
+ self._assert_object_writing_async(gdb)
diff --git a/git/test/db/py/test_loose.py b/git/test/db/py/test_loose.py
new file mode 100644
index 00000000..0c9b4831
--- /dev/null
+++ b/git/test/db/py/test_loose.py
@@ -0,0 +1,36 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import TestDBBase, with_rw_directory
+from git.db.py.loose import PureLooseObjectODB
+from git.exc import BadObject
+from git.util import bin_to_hex
+
+class TestLooseDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_rw_directory
+ def test_basics(self, path):
+ ldb = PureLooseObjectODB(path)
+
+ # write data
+ self._assert_object_writing(ldb)
+ self._assert_object_writing_async(ldb)
+
+ # verify sha iteration and size
+ shas = list(ldb.sha_iter())
+ assert shas and len(shas[0]) == 20
+
+ assert len(shas) == ldb.size()
+
+ # verify find short object
+ long_sha = bin_to_hex(shas[-1])
+ for short_sha in (long_sha[:20], long_sha[:5]):
+ assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
+ # END for each sha
+
+ self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
+ # raises if no object could be foudn
+
diff --git a/git/test/db/py/test_mem.py b/git/test/db/py/test_mem.py
new file mode 100644
index 00000000..bc98dc56
--- /dev/null
+++ b/git/test/db/py/test_mem.py
@@ -0,0 +1,30 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import TestDBBase, with_rw_directory
+from git.db.py.mem import PureMemoryDB
+from git.db.py.loose import PureLooseObjectODB
+
+class TestPureMemoryDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_rw_directory
+ def test_writing(self, path):
+ mdb = PureMemoryDB()
+
+ # write data
+ self._assert_object_writing_simple(mdb)
+
+ # test stream copy
+ ldb = PureLooseObjectODB(path)
+ assert ldb.size() == 0
+ num_streams_copied = mdb.stream_copy(mdb.sha_iter(), ldb)
+ assert num_streams_copied == mdb.size()
+
+ assert ldb.size() == mdb.size()
+ for sha in mdb.sha_iter():
+ assert ldb.has_object(sha)
+ assert ldb.stream(sha).read() == mdb.stream(sha).read()
+ # END verify objects where copied and are equal
diff --git a/git/test/db/py/test_pack.py b/git/test/db/py/test_pack.py
new file mode 100644
index 00000000..5043f446
--- /dev/null
+++ b/git/test/db/py/test_pack.py
@@ -0,0 +1,76 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import TestDBBase, with_packs_rw
+
+from git.db.py.pack import PurePackedODB
+from git.test.lib import fixture_path
+
+from git.exc import BadObject, AmbiguousObjectName
+
+import os
+import random
+
+class TestPackDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_packs_rw
+ def test_writing(self, path):
+ pdb = PurePackedODB(path)
+
+ # on demand, we init our pack cache
+ num_packs = len(pdb.entities())
+ assert num_packs
+ assert pdb._st_mtime != 0
+
+ # test pack directory changed:
+ # packs removed - rename a file, should affect the glob
+ pack_path = pdb.entities()[0].pack().path()
+ new_pack_path = pack_path + "renamed"
+ os.rename(pack_path, new_pack_path)
+
+ pdb.update_cache(force=True)
+ assert len(pdb.entities()) == num_packs - 1
+
+ # packs added
+ os.rename(new_pack_path, pack_path)
+ pdb.update_cache(force=True)
+ assert len(pdb.entities()) == num_packs
+
+ # bang on the cache
+ # access the Entities directly, as there is no iteration interface
+ # yet ( or required for now )
+ sha_list = list(pdb.sha_iter())
+ assert len(sha_list) == pdb.size()
+
+ # hit all packs in random order
+ random.shuffle(sha_list)
+
+ for sha in sha_list:
+ info = pdb.info(sha)
+ stream = pdb.stream(sha)
+ # END for each sha to query
+
+
+ # test short finding - be a bit more brutal here
+ max_bytes = 19
+ min_bytes = 2
+ num_ambiguous = 0
+ for i, sha in enumerate(sha_list):
+ short_sha = sha[:max((i % max_bytes), min_bytes)]
+ try:
+ assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha
+ except AmbiguousObjectName:
+ num_ambiguous += 1
+ pass # valid, we can have short objects
+ # END exception handling
+ # END for each sha to find
+
+ # we should have at least one ambiguous, considering the small sizes
+ # but in our pack, there is no ambigious ...
+ # assert num_ambiguous
+
+ # non-existing
+ self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0", 4)
diff --git a/git/test/db/py/test_ref.py b/git/test/db/py/test_ref.py
new file mode 100644
index 00000000..c5374dc9
--- /dev/null
+++ b/git/test/db/py/test_ref.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.test.db.lib import *
+from git.db.py.ref import PureReferenceDB
+
+from git.util import (
+ NULL_BIN_SHA,
+ hex_to_bin
+ )
+
+import os
+
+class TestPureReferenceDB(TestDBBase):
+
+ needs_ro_repo = False
+
+ def make_alt_file(self, alt_path, alt_list):
+ """Create an alternates file which contains the given alternates.
+ The list can be empty"""
+ alt_file = open(alt_path, "wb")
+ for alt in alt_list:
+ alt_file.write(alt + "\n")
+ alt_file.close()
+
+ @with_rw_directory
+ def test_writing(self, path):
+ NULL_BIN_SHA = '\0' * 20
+
+ alt_path = os.path.join(path, 'alternates')
+ rdb = PureReferenceDB(alt_path)
+ assert len(rdb.databases()) == 0
+ assert rdb.size() == 0
+ assert len(list(rdb.sha_iter())) == 0
+
+ # try empty, non-existing
+ assert not rdb.has_object(NULL_BIN_SHA)
+
+
+ # setup alternate file
+ # add two, one is invalid
+ own_repo_path = fixture_path('../../../.git/objects') # use own repo
+ self.make_alt_file(alt_path, [own_repo_path, "invalid/path"])
+ rdb.update_cache()
+ assert len(rdb.databases()) == 1
+
+ # we should now find a default revision of ours
+ git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655")
+ assert rdb.has_object(git_sha)
+
+ # remove valid
+ self.make_alt_file(alt_path, ["just/one/invalid/path"])
+ rdb.update_cache()
+ assert len(rdb.databases()) == 0
+
+ # add valid
+ self.make_alt_file(alt_path, [own_repo_path])
+ rdb.update_cache()
+ assert len(rdb.databases()) == 1
+
+
diff --git a/git/test/db/test_base.py b/git/test/db/test_base.py
new file mode 100644
index 00000000..2a882d0a
--- /dev/null
+++ b/git/test/db/test_base.py
@@ -0,0 +1,20 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import *
+from git.db import RefSpec
+
+class TestBase(TestDBBase):
+
+ needs_ro_repo = False
+
+ @with_rw_directory
+ def test_basics(self, path):
+ self.failUnlessRaises(ValueError, RefSpec, None, None)
+ rs = RefSpec(None, "something")
+ assert rs.force == False
+ assert rs.delete_destination()
+ assert rs.source is None
+ assert rs.destination == "something"
+
diff --git a/git/test/fixtures/git_config b/git/test/fixtures/git_config
index 3c91985f..ff8e7114 100644
--- a/git/test/fixtures/git_config
+++ b/git/test/fixtures/git_config
@@ -1,22 +1,28 @@
[core]
repositoryformatversion = 0
filemode = true
- bare = false
- logallrefupdates = true
+ bare = false
+ logallrefupdates = true
[remote "origin"]
fetch = +refs/heads/*:refs/remotes/origin/*
url = git://gitorious.org/~byron/git-python/byrons-clone.git
pushurl = git@gitorious.org:~byron/git-python/byrons-clone.git
-[branch "master"]
+# a tab indented section header
+ [branch "master"]
remote = origin
merge = refs/heads/master
-[remote "mainline"]
+# an space indented section header
+ [remote "mainline"]
+ # space indented comment
url = git://gitorious.org/git-python/mainline.git
fetch = +refs/heads/*:refs/remotes/mainline/*
+
[remote "MartinMarcher"]
+ # tab indented comment
url = git://gitorious.org/~martin.marcher/git-python/serverhorror.git
fetch = +refs/heads/*:refs/remotes/MartinMarcher/*
-[gui]
+ # can handle comments - the section name is supposed to be stripped
+[ gui ]
geometry = 1316x820+219+243 207 192
[branch "mainline_performance"]
remote = mainline
diff --git a/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b
new file mode 100644
index 00000000..021c2db3
--- /dev/null
+++ b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b
Binary files differ
diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx
new file mode 100644
index 00000000..fda5969b
--- /dev/null
+++ b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx
Binary files differ
diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack
new file mode 100644
index 00000000..a3209d2b
--- /dev/null
+++ b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack
Binary files differ
diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx
new file mode 100644
index 00000000..a7d6c717
--- /dev/null
+++ b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx
Binary files differ
diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack
new file mode 100644
index 00000000..955c424c
--- /dev/null
+++ b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack
Binary files differ
diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx
new file mode 100644
index 00000000..87c635f4
--- /dev/null
+++ b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx
Binary files differ
diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack
new file mode 100644
index 00000000..a69b28ac
--- /dev/null
+++ b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack
Binary files differ
diff --git a/git/test/lib/__init__.py b/git/test/lib/__init__.py
index 77512794..a0656438 100644
--- a/git/test/lib/__init__.py
+++ b/git/test/lib/__init__.py
@@ -5,9 +5,14 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import inspect
+# TODO: Separate names - they do repeat unfortunately. Also deduplicate it,
+# redesign decorators to support multiple database types in succession.
+from base import *
+
from mock import *
from asserts import *
from helper import *
+
__all__ = [ name for name, obj in locals().items()
if not (name.startswith('_') or inspect.ismodule(obj)) ]
diff --git a/git/test/lib/base.py b/git/test/lib/base.py
new file mode 100644
index 00000000..bc160783
--- /dev/null
+++ b/git/test/lib/base.py
@@ -0,0 +1,200 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of PureCompatibilityGitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Utilities used in ODB testing"""
+from git.base import OStream
+from git.stream import (
+ Sha1Writer,
+ ZippedStoreShaWriter
+ )
+
+from git.util import (
+ zlib,
+ dirname
+ )
+
+import sys
+import random
+from array import array
+from cStringIO import StringIO
+
+import glob
+import unittest
+import tempfile
+import shutil
+import os
+import gc
+
+
+#{ Decorators
+
+def with_rw_directory(func):
+ """Create a temporary directory which can be written to, remove it if the
+ test suceeds, but leave it otherwise to aid additional debugging"""
+ def wrapper(self):
+ path = maketemp(prefix=func.__name__)
+ os.mkdir(path)
+ keep = False
+ try:
+ try:
+ return func(self, path)
+ except Exception:
+ print >> sys.stderr, "Test %s.%s failed, output is at %r" % (type(self).__name__, func.__name__, path)
+ keep = True
+ raise
+ finally:
+ # Need to collect here to be sure all handles have been closed. It appears
+ # a windows-only issue. In fact things should be deleted, as well as
+ # memory maps closed, once objects go out of scope. For some reason
+ # though this is not the case here unless we collect explicitly.
+ if not keep:
+ gc.collect()
+ shutil.rmtree(path)
+ # END handle exception
+ # END wrapper
+
+ wrapper.__name__ = func.__name__
+ return wrapper
+
+
+def with_rw_repo(func):
+ """Create a copy of our repository and put it into a writable location. It will
+ be removed if the test doesn't result in an error.
+ As we can currently only copy the fully working tree, tests must not rely on
+ being on a certain branch or on anything really except for the default tags
+ that should exist
+ Wrapped function obtains a git repository """
+ def wrapper(self, path):
+ src_dir = dirname(dirname(dirname(__file__)))
+ assert(os.path.isdir(path))
+ os.rmdir(path) # created by wrapper, but must not exist for copy operation
+ shutil.copytree(src_dir, path)
+ target_gitdir = os.path.join(path, '.git')
+ assert os.path.isdir(target_gitdir)
+ return func(self, self.RepoCls(target_gitdir))
+ #END wrapper
+ wrapper.__name__ = func.__name__
+ return with_rw_directory(wrapper)
+
+
+
+def with_packs_rw(func):
+ """Function that provides a path into which the packs for testing should be
+ copied. Will pass on the path to the actual function afterwards
+
+ :note: needs with_rw_directory wrapped around it"""
+ def wrapper(self, path):
+ src_pack_glob = fixture_path('packs/*')
+ print src_pack_glob
+ copy_files_globbed(src_pack_glob, path, hard_link_ok=True)
+ return func(self, path)
+ # END wrapper
+
+ wrapper.__name__ = func.__name__
+ return with_rw_directory(wrapper)
+
+#} END decorators
+
+#{ Routines
+
+def rorepo_dir():
+ """:return: path to our own repository, being our own .git directory.
+ :note: doesn't work in bare repositories"""
+ base = os.path.join(dirname(dirname(dirname(dirname(__file__)))), '.git')
+ assert os.path.isdir(base)
+ return base
+
+def maketemp(*args, **kwargs):
+ """Wrapper around default tempfile.mktemp to fix an osx issue"""
+ tdir = tempfile.mktemp(*args, **kwargs)
+ if sys.platform == 'darwin':
+ tdir = '/private' + tdir
+ return tdir
+
+def fixture_path(relapath=''):
+ """:return: absolute path into the fixture directory
+ :param relapath: relative path into the fixtures directory, or ''
+ to obtain the fixture directory itself"""
+ test_dir = os.path.dirname(os.path.dirname(__file__))
+ return os.path.join(test_dir, "fixtures", relapath)
+
+def fixture(name):
+ return open(fixture_path(name), 'rb').read()
+
+def absolute_project_path():
+ return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+
+def copy_files_globbed(source_glob, target_dir, hard_link_ok=False):
+ """Copy all files found according to the given source glob into the target directory
+ :param hard_link_ok: if True, hard links will be created if possible. Otherwise
+ the files will be copied"""
+ for src_file in glob.glob(source_glob):
+ if hard_link_ok and hasattr(os, 'link'):
+ target = os.path.join(target_dir, os.path.basename(src_file))
+ try:
+ os.link(src_file, target)
+ except OSError:
+ shutil.copy(src_file, target_dir)
+ # END handle cross device links ( and resulting failure )
+ else:
+ shutil.copy(src_file, target_dir)
+ # END try hard link
+ # END for each file to copy
+
+
+def make_bytes(size_in_bytes, randomize=False):
+ """:return: string with given size in bytes
+ :param randomize: try to produce a very random stream"""
+ actual_size = size_in_bytes / 4
+ producer = xrange(actual_size)
+ if randomize:
+ producer = list(producer)
+ random.shuffle(producer)
+ # END randomize
+ a = array('i', producer)
+ return a.tostring()
+
+def make_object(type, data):
+ """:return: bytes resembling an uncompressed object"""
+ odata = "blob %i\0" % len(data)
+ return odata + data
+
+def make_memory_file(size_in_bytes, randomize=False):
+ """:return: tuple(size_of_stream, stream)
+ :param randomize: try to produce a very random stream"""
+ d = make_bytes(size_in_bytes, randomize)
+ return len(d), StringIO(d)
+
+#} END routines
+
+#{ Stream Utilities
+
+class DummyStream(object):
+ def __init__(self):
+ self.was_read = False
+ self.bytes = 0
+ self.closed = False
+
+ def read(self, size):
+ self.was_read = True
+ self.bytes = size
+
+ def close(self):
+ self.closed = True
+
+ def _assert(self):
+ assert self.was_read
+
+
+class DeriveTest(OStream):
+ def __init__(self, sha, type, size, stream, *args, **kwargs):
+ self.myarg = kwargs.pop('myarg')
+ self.args = args
+
+ def _assert(self):
+ assert self.args
+ assert self.myarg
+
+#} END stream utilitiess
+
diff --git a/git/test/lib/helper.py b/git/test/lib/helper.py
index 76aaaa38..5776f526 100644
--- a/git/test/lib/helper.py
+++ b/git/test/lib/helper.py
@@ -12,26 +12,18 @@ import tempfile
import shutil
import cStringIO
-GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+from base import (
+ maketemp,
+ rorepo_dir
+ )
-__all__ = (
- 'fixture_path', 'fixture', 'absolute_project_path', 'StringProcessAdapter',
- 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase', 'GIT_REPO'
- )
-
-#{ Routines
-
-def fixture_path(name):
- test_dir = os.path.dirname(os.path.dirname(__file__))
- return os.path.join(test_dir, "fixtures", name)
-def fixture(name):
- return open(fixture_path(name), 'rb').read()
+__all__ = (
+ 'StringProcessAdapter', 'GlobalsItemDeletorMetaCls',
+ 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase',
+ )
-def absolute_project_path():
- return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
-#} END routines
#{ Adapters
@@ -52,13 +44,6 @@ class StringProcessAdapter(object):
#{ Decorators
-def _mktemp(*args):
- """Wrapper around default tempfile.mktemp to fix an osx issue"""
- tdir = tempfile.mktemp(*args)
- if sys.platform == 'darwin':
- tdir = '/private' + tdir
- return tdir
-
def _rmtree_onerror(osremove, fullpath, exec_info):
"""
Handle the case on windows that read-only files cannot be deleted by
@@ -87,7 +72,7 @@ def with_rw_repo(working_tree_ref, bare=False):
if bare:
prefix = ''
#END handle prefix
- repo_dir = _mktemp("%sbare_%s" % (prefix, func.__name__))
+ repo_dir = maketemp("%sbare_%s" % (prefix, func.__name__))
rw_repo = self.rorepo.clone(repo_dir, shared=True, bare=bare, n=True)
rw_repo.head.commit = rw_repo.commit(working_tree_ref)
@@ -143,8 +128,8 @@ def with_rw_and_rw_remote_repo(working_tree_ref):
assert isinstance(working_tree_ref, basestring), "Decorator requires ref name for working tree checkout"
def argument_passer(func):
def remote_repo_creator(self):
- remote_repo_dir = _mktemp("remote_repo_%s" % func.__name__)
- repo_dir = _mktemp("remote_clone_non_bare_repo")
+ remote_repo_dir = maketemp("remote_repo_%s" % func.__name__)
+ repo_dir = maketemp("remote_clone_non_bare_repo")
rw_remote_repo = self.rorepo.clone(remote_repo_dir, shared=True, bare=True)
rw_repo = rw_remote_repo.clone(repo_dir, shared=True, bare=False, n=True) # recursive alternates info ?
@@ -205,32 +190,40 @@ def with_rw_and_rw_remote_repo(working_tree_ref):
return argument_passer
#} END decorators
+
+#{ Meta Classes
+class GlobalsItemDeletorMetaCls(type):
+ """Utiltiy to prevent the RepoBase to be picked up by nose as the metacls
+ will delete the instance from the globals"""
+ #{ Configuration
+ # Set this to a string name of the module to delete
+ ModuleToDelete = None
+ #} END configuration
+
+ def __new__(metacls, name, bases, clsdict):
+ assert metacls.ModuleToDelete is not None, "Invalid metaclass configuration"
+ new_type = super(GlobalsItemDeletorMetaCls, metacls).__new__(metacls, name, bases, clsdict)
+ if name != metacls.ModuleToDelete:
+ mod = __import__(new_type.__module__, globals(), locals(), new_type.__module__)
+ delattr(mod, metacls.ModuleToDelete)
+ #END handle deletion
+ return new_type
+
+#} END meta classes
class TestBase(TestCase):
"""
Base Class providing default functionality to all tests such as:
-
- Utility functions provided by the TestCase base of the unittest method such as::
self.fail("todo")
self.failUnlessRaises(...)
-
- - Class level repository which is considered read-only as it is shared among
- all test cases in your type.
- Access it using::
- self.rorepo # 'ro' stands for read-only
-
- The rorepo is in fact your current project's git repo. If you refer to specific
- shas for your objects, be sure you choose some that are part of the immutable portion
- of the project history ( to assure tests don't fail for others ).
"""
@classmethod
def setUpAll(cls):
- """
- Dynamically add a read-only repository to our actual type. This way
- each test type has its own repository
- """
- cls.rorepo = Repo(GIT_REPO)
+ """This method is only called to provide the most basic functionality
+ Subclasses may just override it or implement it differently"""
+ cls.rorepo = Repo(rorepo_dir())
def _make_file(self, rela_path, data, repo=None):
"""
diff --git a/git/test/objects/__init__.py b/git/test/objects/__init__.py
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/git/test/objects/__init__.py
@@ -0,0 +1 @@
+
diff --git a/git/test/objects/lib.py b/git/test/objects/lib.py
new file mode 100644
index 00000000..fe1d9f9d
--- /dev/null
+++ b/git/test/objects/lib.py
@@ -0,0 +1,14 @@
+"""Provide customized obhject testing facilities"""
+
+from git.test.lib import (
+ rorepo_dir,
+ TestBase,
+ assert_equal,
+ assert_not_equal,
+ with_rw_repo,
+ StringProcessAdapter,
+ )
+
+class TestObjectBase(TestBase):
+ """Provides a default read-only repository in the rorepo member"""
+ pass
diff --git a/git/test/test_blob.py b/git/test/objects/test_blob.py
index 661c0501..58ac25b7 100644
--- a/git/test/test_blob.py
+++ b/git/test/objects/test_blob.py
@@ -4,11 +4,11 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
-from gitdb.util import hex_to_bin
+from lib import *
+from git.objects.blob import *
+from git.util import hex_to_bin
-class TestBlob(TestBase):
+class TestBlob(TestObjectBase):
def test_mime_type_should_return_mime_type_for_known_types(self):
blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA, 'path': 'foo.png'})
diff --git a/git/test/test_commit.py b/git/test/objects/test_commit.py
index 4a8d8b87..80326fe9 100644
--- a/git/test/test_commit.py
+++ b/git/test/objects/test_commit.py
@@ -5,10 +5,14 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
-from gitdb import IStream
-from gitdb.util import hex_to_bin
+from lib import *
+from git.objects.commit import *
+from git.base import IStream
+
+from git.util import (
+ hex_to_bin,
+ Actor,
+ )
from cStringIO import StringIO
import time
@@ -65,7 +69,7 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False)
# END handle performance info
-class TestCommit(TestBase):
+class TestCommit(TestObjectBase):
def test_bake(self):
diff --git a/git/test/test_submodule.py b/git/test/objects/test_submodule.py
index adb4fb82..14cb074c 100644
--- a/git/test/test_submodule.py
+++ b/git/test/objects/test_submodule.py
@@ -1,29 +1,51 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
+from lib import *
from git.exc import *
from git.objects.submodule.base import Submodule
from git.objects.submodule.root import RootModule, RootUpdateProgress
from git.util import to_native_path_linux, join_path_native
+
import shutil
import git
import os
+import sys
class TestRootProgress(RootUpdateProgress):
"""Just prints messages, for now without checking the correctness of the states"""
- def update(self, op, index, max_count, message=''):
+ def update(self, op, index, max_count, message='', input=''):
print message
prog = TestRootProgress()
-class TestSubmodule(TestBase):
+class TestSubmodule(TestObjectBase):
k_subm_current = "468cad66ff1f80ddaeee4123c24e4d53a032c00d"
k_subm_changed = "394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3"
k_no_subm_tag = "0.1.6"
+ k_github_gitdb_url = 'git://github.com/gitpython-developers/gitdb.git'
+ env_gitdb_local_path = "GITPYTHON_TEST_GITDB_LOCAL_PATH"
+ def _generate_async_local_path(self):
+ return to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, 'git/ext/async'))
+
+ def _rewrite_gitdb_to_local_path(self, smgitdb):
+ """Rewrites the given submodule to point to the local path of the gitdb repository, if possible.
+ Otherwise it leaves it unchanged
+ :return: new clone path, or None if no new path was set"""
+ new_smclone_path = os.environ.get(self.env_gitdb_local_path)
+ if new_smclone_path is not None:
+ writer = smgitdb.config_writer()
+ writer.set_value('url', new_smclone_path)
+ del(writer)
+ assert smgitdb.config_reader().get_value('url') == new_smclone_path
+ assert smgitdb.url == new_smclone_path
+ else:
+ sys.stderr.write("Submodule tests need the gitdb repository. You can specify a local source setting the %s environment variable. Otherwise it will be downloaded from the internet" % self.env_gitdb_local_path)
+ #END handle submodule path
+ return new_smclone_path
def _do_base_tests(self, rwrepo):
"""Perform all tests in the given repository, it may be bare or nonbare"""
@@ -42,7 +64,7 @@ class TestSubmodule(TestBase):
assert sm.path == 'git/ext/gitdb'
assert sm.path != sm.name # in our case, we have ids there, which don't equal the path
- assert sm.url == 'git://github.com/gitpython-developers/gitdb.git'
+ assert sm.url == self.k_github_gitdb_url
assert sm.branch_path == 'refs/heads/master' # the default ...
assert sm.branch_name == 'master'
assert sm.parent_commit == rwrepo.head.commit
@@ -73,13 +95,11 @@ class TestSubmodule(TestBase):
if rwrepo.bare:
self.failUnlessRaises(InvalidGitRepositoryError, sm.config_writer)
else:
- writer = sm.config_writer()
# for faster checkout, set the url to the local path
- new_smclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path))
- writer.set_value('url', new_smclone_path)
- del(writer)
- assert sm.config_reader().get_value('url') == new_smclone_path
- assert sm.url == new_smclone_path
+ # Note: This is nice but doesn't work anymore with the latest git-python
+ # version. This would also mean we need internet for this to work which
+ # is why we allow an override using an environment variable
+ new_smclone_path = self._rewrite_gitdb_to_local_path(sm)
# END handle bare repo
smold.config_reader()
@@ -175,7 +195,8 @@ class TestSubmodule(TestBase):
csm_repopath = csm.path
# adjust the path of the submodules module to point to the local destination
- new_csmclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path, csm.path))
+ # In the current gitpython version, async is used directly by gitpython
+ new_csmclone_path = self._generate_async_local_path()
csm.config_writer().set_value('url', new_csmclone_path)
assert csm.url == new_csmclone_path
@@ -247,6 +268,10 @@ class TestSubmodule(TestBase):
self.failUnlessRaises(InvalidGitRepositoryError, sm.remove, dry_run=True)
sm.module().index.reset(working_tree=True)
+ # make sure sub-submodule is not modified by forcing it to update
+ # to the revision it is supposed to point to.
+ csm.update()
+
# this would work
assert sm.remove(dry_run=True) is sm
assert sm.module_exists()
@@ -384,9 +409,9 @@ class TestSubmodule(TestBase):
rm.config_reader()
rm.config_writer()
- # deep traversal gitdb / async
+ # deep traversal git / async
rsmsp = [sm.path for sm in rm.traverse()]
- assert len(rsmsp) == 2 # gitdb and async, async being a child of gitdb
+ assert len(rsmsp) == 1 # git and async, async being a child of git
# cannot set the parent commit as root module's path didn't exist
self.failUnlessRaises(ValueError, rm.set_parent_commit, 'HEAD')
@@ -406,8 +431,8 @@ class TestSubmodule(TestBase):
prep = sm.path
assert not sm.module_exists() # was never updated after rwrepo's clone
- # assure we clone from a local source
- sm.config_writer().set_value('url', to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path)))
+ # assure we clone from a local source
+ self._rewrite_gitdb_to_local_path(sm)
# dry-run does nothing
sm.update(recursive=False, dry_run=True, progress=prog)
@@ -440,7 +465,7 @@ class TestSubmodule(TestBase):
#================
nsmn = "newsubmodule"
nsmp = "submrepo"
- async_url = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1]))
+ async_url = self._generate_async_local_path()
nsm = Submodule.add(rwrepo, nsmn, nsmp, url=async_url)
csmadded = rwrepo.index.commit("Added submodule").hexsha # make sure we don't keep the repo reference
nsm.set_parent_commit(csmadded)
@@ -482,7 +507,11 @@ class TestSubmodule(TestBase):
# to the first repository, this way we have a fast checkout, and a completely different
# repository at the different url
nsm.set_parent_commit(csmremoved)
- nsmurl = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0]))
+ nsmurl = os.environ.get(self.env_gitdb_local_path, self.k_github_gitdb_url)
+
+ # Note: We would have liked to have a different url, but we cannot
+ # provoke this case
+ assert nsm.url != nsmurl
nsm.config_writer().set_value('url', nsmurl)
csmpathchange = rwrepo.index.commit("changed url")
nsm.set_parent_commit(csmpathchange)
diff --git a/git/test/test_tree.py b/git/test/objects/test_tree.py
index ec10e962..bc8d3f97 100644
--- a/git/test/test_tree.py
+++ b/git/test/objects/test_tree.py
@@ -4,16 +4,18 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import os
-from git.test.lib import *
-from git import *
+
+from lib import *
from git.objects.fun import (
traverse_tree_recursive,
traverse_trees_recursive
)
+from git.objects.blob import Blob
+from git.objects.tree import Tree
from cStringIO import StringIO
+import os
-class TestTree(TestBase):
+class TestTree(TestObjectBase):
def test_serializable(self):
# tree at the given commit contains a submodule as well
diff --git a/git/test/performance/__init__.py b/git/test/performance/__init__.py
new file mode 100644
index 00000000..6bd117b9
--- /dev/null
+++ b/git/test/performance/__init__.py
@@ -0,0 +1,2 @@
+"""Note: This init file makes the performance tests an integral part of the test suite
+as nose will now pick them up. Previously the init file was intentionally omitted"""
diff --git a/git/test/performance/db/__init__.py b/git/test/performance/db/__init__.py
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/git/test/performance/db/__init__.py
@@ -0,0 +1 @@
+
diff --git a/git/test/performance/db/looseodb_impl.py b/git/test/performance/db/looseodb_impl.py
new file mode 100644
index 00000000..6d3c1fa6
--- /dev/null
+++ b/git/test/performance/db/looseodb_impl.py
@@ -0,0 +1,132 @@
+"""Performance data streaming performance"""
+from git.db.py import *
+from git.base import *
+from git.stream import *
+from async import ChannelThreadTask
+from git.util import (
+ pool,
+ bin_to_hex
+ )
+import os
+import sys
+from time import time
+
+from git.test.lib import (
+ GlobalsItemDeletorMetaCls,
+ make_memory_file,
+ with_rw_repo
+ )
+
+from git.test.performance.lib import TestBigRepoR
+
+
+#{ Utilities
+
+def read_chunked_stream(stream):
+ total = 0
+ while True:
+ chunk = stream.read(chunk_size)
+ total += len(chunk)
+ if len(chunk) < chunk_size:
+ break
+ # END read stream loop
+ assert total == stream.size
+ return stream
+
+
+class TestStreamReader(ChannelThreadTask):
+ """Expects input streams and reads them in chunks. It will read one at a time,
+ requireing a queue chunk of size 1"""
+ def __init__(self, *args):
+ super(TestStreamReader, self).__init__(*args)
+ self.fun = read_chunked_stream
+ self.max_chunksize = 1
+
+
+#} END utilities
+
+class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'TestLooseDBWPerformanceBase'
+
+
+class TestLooseDBWPerformanceBase(TestBigRepoR):
+ __metaclass__ = PerfBaseDeletorMetaClass
+
+ large_data_size_bytes = 1000*1000*10 # some MiB should do it
+ moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
+
+ #{ Configuration
+ LooseODBCls = None
+ #} END configuration
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestLooseDBWPerformanceBase, cls).setUpAll()
+ if cls.LooseODBCls is None:
+ raise AssertionError("LooseODBCls must be set in subtype")
+ #END assert configuration
+ # currently there is no additional configuration
+
+ @with_rw_repo("HEAD")
+ def test_large_data_streaming(self, rwrepo):
+ # TODO: This part overlaps with the same file in git.test.performance.test_stream
+ # It should be shared if possible
+ objects_path = rwrepo.db_path('')
+ ldb = self.LooseODBCls(objects_path)
+
+ for randomize in range(2):
+ desc = (randomize and 'random ') or ''
+ print >> sys.stderr, "Creating %s data ..." % desc
+ st = time()
+ size, stream = make_memory_file(self.large_data_size_bytes, randomize)
+ elapsed = time() - st
+ print >> sys.stderr, "Done (in %f s)" % elapsed
+
+ # writing - due to the compression it will seem faster than it is
+ st = time()
+ binsha = ldb.store(IStream('blob', size, stream)).binsha
+ elapsed_add = time() - st
+ assert ldb.has_object(binsha)
+ hexsha = bin_to_hex(binsha)
+ db_file = os.path.join(objects_path, hexsha[:2], hexsha[2:])
+ fsize_kib = os.path.getsize(db_file) / 1000
+
+
+ size_kib = size / 1000
+ print >> sys.stderr, "%s: Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (self.LooseODBCls.__name__, size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
+
+ # reading all at once
+ st = time()
+ ostream = ldb.stream(binsha)
+ shadata = ostream.read()
+ elapsed_readall = time() - st
+
+ stream.seek(0)
+ assert shadata == stream.getvalue()
+ print >> sys.stderr, "%s: Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (self.LooseODBCls.__name__, size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
+
+
+ # reading in chunks of 1 MiB
+ cs = 512*1000
+ chunks = list()
+ st = time()
+ ostream = ldb.stream(binsha)
+ while True:
+ data = ostream.read(cs)
+ chunks.append(data)
+ if len(data) < cs:
+ break
+ # END read in chunks
+ elapsed_readchunks = time() - st
+
+ stream.seek(0)
+ assert ''.join(chunks) == stream.getvalue()
+
+ cs_kib = cs / 1000
+ print >> sys.stderr, "%s: Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (self.LooseODBCls.__name__, size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
+
+ # del db file so git has something to do
+ os.remove(db_file)
+ # END for each randomization factor
+
+
diff --git a/git/test/performance/db/odb_impl.py b/git/test/performance/db/odb_impl.py
new file mode 100644
index 00000000..677cf6a8
--- /dev/null
+++ b/git/test/performance/db/odb_impl.py
@@ -0,0 +1,72 @@
+"""Performance tests for object store"""
+
+from time import time
+import sys
+import stat
+
+from git.test.performance.lib import (
+ TestBigRepoR,
+ GlobalsItemDeletorMetaCls
+ )
+
+class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'TestObjDBPerformanceBase'
+
+
+class TestObjDBPerformanceBase(TestBigRepoR):
+ __metaclass__ = PerfBaseDeletorMetaClass
+
+ #{ Configuration
+ RepoCls = None # to be set by subclass
+ #} END configuration
+
+ def test_random_access_test(self):
+ repo = self.rorepo
+
+ # GET COMMITS
+ st = time()
+ root_commit = repo.commit(self.head_sha_2k)
+ commits = list(root_commit.traverse())
+ nc = len(commits)
+ elapsed = time() - st
+
+ print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed)
+
+ # GET TREES
+ # walk all trees of all commits
+ st = time()
+ blobs_per_commit = list()
+ nt = 0
+ for commit in commits:
+ tree = commit.tree
+ blobs = list()
+ for item in tree.traverse():
+ nt += 1
+ if item.type == 'blob':
+ blobs.append(item)
+ # direct access for speed
+ # END while trees are there for walking
+ blobs_per_commit.append(blobs)
+ # END for each commit
+ elapsed = time() - st
+
+ print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed)
+
+ # GET BLOBS
+ st = time()
+ nb = 0
+ too_many = 15000
+ data_bytes = 0
+ for blob_list in blobs_per_commit:
+ for blob in blob_list:
+ data_bytes += len(blob.data_stream.read())
+ # END for each blobsha
+ nb += len(blob_list)
+ if nb > too_many:
+ break
+ # END for each bloblist
+ elapsed = time() - st
+
+ print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed)
+
+
diff --git a/git/test/performance/db/packedodb_impl.py b/git/test/performance/db/packedodb_impl.py
new file mode 100644
index 00000000..b95a8d13
--- /dev/null
+++ b/git/test/performance/db/packedodb_impl.py
@@ -0,0 +1,107 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Performance tests for object store"""
+from git.test.performance.lib import (
+ TestBigRepoR,
+ GlobalsItemDeletorMetaCls
+ )
+
+from git.exc import UnsupportedOperation
+
+import sys
+import os
+from time import time
+import random
+
+
+class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls):
+ ModuleToDelete = 'TestPurePackedODBPerformanceBase'
+
+class TestPurePackedODBPerformanceBase(TestBigRepoR):
+ __metaclass__ = PerfBaseDeletorMetaClass
+
+ #{ Configuration
+ PackedODBCls = None
+ #} END configuration
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestPurePackedODBPerformanceBase, cls).setUpAll()
+ if cls.PackedODBCls is None:
+ raise AssertionError("PackedODBCls must be set in subclass")
+ #END assert configuration
+ cls.ropdb = cls.PackedODBCls(cls.rorepo.db_path("pack"))
+
+ def test_pack_random_access(self):
+ pdb = self.ropdb
+
+ # sha lookup
+ st = time()
+ sha_list = list(pdb.sha_iter())
+ elapsed = time() - st
+ ns = len(sha_list)
+ print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
+
+ # sha lookup: best-case and worst case access
+ pdb_pack_info = pdb._pack_info
+ # END shuffle shas
+ st = time()
+ for sha in sha_list:
+ pdb_pack_info(sha)
+ # END for each sha to look up
+ elapsed = time() - st
+
+ # discard cache
+ del(pdb._entities)
+ pdb.entities()
+ print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed)
+ # END for each random mode
+
+ # query info and streams only
+ max_items = 10000 # can wait longer when testing memory
+ for pdb_fun in (pdb.info, pdb.stream):
+ st = time()
+ for sha in sha_list[:max_items]:
+ pdb_fun(sha)
+ elapsed = time() - st
+ print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
+ # END for each function
+
+ # retrieve stream and read all
+ max_items = 5000
+ pdb_stream = pdb.stream
+ total_size = 0
+ st = time()
+ for sha in sha_list[:max_items]:
+ stream = pdb_stream(sha)
+ stream.read()
+ total_size += stream.size
+ elapsed = time() - st
+ total_kib = total_size / 1000
+ print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
+
+ def test_correctness(self):
+ pdb = self.ropdb
+ # disabled for now as it used to work perfectly, checking big repositories takes a long time
+ print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)"
+ for crc in range(2):
+ count = 0
+ st = time()
+ for entity in pdb.entities():
+ pack_verify = entity.is_valid_stream
+ sha_by_index = entity.index().sha
+ for index in xrange(entity.index().size()):
+ try:
+ assert pack_verify(sha_by_index(index), use_crc=crc)
+ count += 1
+ except UnsupportedOperation:
+ pass
+ # END ignore old indices
+ # END for each index
+ # END for each entity
+ elapsed = time() - st
+ print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed)
+ # END for each verify mode
+
diff --git a/git/test/performance/db/test_looseodb_cmd.py b/git/test/performance/db/test_looseodb_cmd.py
new file mode 100644
index 00000000..9738278c
--- /dev/null
+++ b/git/test/performance/db/test_looseodb_cmd.py
@@ -0,0 +1,11 @@
+from git.db.complex import CmdCompatibilityGitDB
+from looseodb_impl import TestLooseDBWPerformanceBase
+
+import sys
+
+class TestCmdLooseDB(TestLooseDBWPerformanceBase):
+ LooseODBCls = CmdCompatibilityGitDB
+
+ def test_info(self):
+ sys.stderr.write("This test does not check the write performance of the git command as it is implemented in pure python")
+
diff --git a/git/test/performance/db/test_looseodb_pure.py b/git/test/performance/db/test_looseodb_pure.py
new file mode 100644
index 00000000..46f39d5e
--- /dev/null
+++ b/git/test/performance/db/test_looseodb_pure.py
@@ -0,0 +1,6 @@
+from git.db.py.loose import PureLooseObjectODB
+from looseodb_impl import TestLooseDBWPerformanceBase
+
+class TestPureLooseDB(TestLooseDBWPerformanceBase):
+ LooseODBCls = PureLooseObjectODB
+
diff --git a/git/test/performance/db/test_odb_cmd.py b/git/test/performance/db/test_odb_cmd.py
new file mode 100644
index 00000000..acd55cc9
--- /dev/null
+++ b/git/test/performance/db/test_odb_cmd.py
@@ -0,0 +1,6 @@
+from git.db.complex import CmdCompatibilityGitDB
+from odb_impl import TestObjDBPerformanceBase
+
+class TestCmdDB(TestObjDBPerformanceBase):
+ RepoCls = CmdCompatibilityGitDB
+
diff --git a/git/test/performance/db/test_odb_pure.py b/git/test/performance/db/test_odb_pure.py
new file mode 100644
index 00000000..6ed3585d
--- /dev/null
+++ b/git/test/performance/db/test_odb_pure.py
@@ -0,0 +1,6 @@
+from git.db.complex import PureCompatibilityGitDB
+from odb_impl import TestObjDBPerformanceBase
+
+class TestPureDB(TestObjDBPerformanceBase):
+ RepoCls = PureCompatibilityGitDB
+
diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py
new file mode 100644
index 00000000..4ea09779
--- /dev/null
+++ b/git/test/performance/db/test_packedodb_pure.py
@@ -0,0 +1,90 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from packedodb_impl import TestPurePackedODBPerformanceBase
+from git.db.py.pack import PurePackedODB
+
+from git.stream import NullStream
+
+from git.pack import PackEntity
+
+import os
+import sys
+
+from time import time
+from nose import SkipTest
+
+
+class CountedNullStream(NullStream):
+ __slots__ = '_bw'
+ def __init__(self):
+ self._bw = 0
+
+ def bytes_written(self):
+ return self._bw
+
+ def write(self, d):
+ self._bw += NullStream.write(self, d)
+
+
+class TestPurePackedODB(TestPurePackedODBPerformanceBase):
+ #{ Configuration
+ PackedODBCls = PurePackedODB
+ #} END configuration
+
+ def test_pack_writing_note(self):
+ sys.stderr.write("test_pack_writing should be adjusted to support different databases to read from - see test for more info")
+ raise SkipTest()
+
+ def test_pack_writing(self):
+ # see how fast we can write a pack from object streams.
+ # This will not be fast, as we take time for decompressing the streams as well
+ # For now we test the fast streaming and slow streaming versions manually
+ ostream = CountedNullStream()
+ # NOTE: We use the same repo twice to see whether OS caching helps
+ for rorepo in (self.rorepo, self.rorepo, self.ropdb):
+
+ ni = 5000
+ count = 0
+ total_size = 0
+ st = time()
+ objs = list()
+ for sha in rorepo.sha_iter():
+ count += 1
+ objs.append(rorepo.stream(sha))
+ if count == ni:
+ break
+ #END gather objects for pack-writing
+ elapsed = time() - st
+ print >> sys.stderr, "PDB Streaming: Got %i streams from %s by sha in in %f s ( %f streams/s )" % (ni, rorepo.__class__.__name__, elapsed, ni / elapsed)
+
+ st = time()
+ PackEntity.write_pack(objs, ostream.write)
+ elapsed = time() - st
+ total_kb = ostream.bytes_written() / 1000
+ print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
+ #END for each rorepo
+
+
+ def test_stream_reading(self):
+ raise SkipTest("This test was only used for --with-profile runs")
+ pdb = self.ropdb
+
+ # streaming only, meant for --with-profile runs
+ ni = 5000
+ count = 0
+ pdb_stream = pdb.stream
+ total_size = 0
+ st = time()
+ for sha in pdb.sha_iter():
+ if count == ni:
+ break
+ stream = pdb_stream(sha)
+ stream.read()
+ total_size += stream.size
+ count += 1
+ elapsed = time() - st
+ total_kib = total_size / 1000
+ print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
+
diff --git a/git/test/performance/lib.py b/git/test/performance/lib.py
index d0727b60..758d402d 100644
--- a/git/test/performance/lib.py
+++ b/git/test/performance/lib.py
@@ -1,17 +1,13 @@
"""Contains library functions"""
import os
-from git.test.lib import *
+from git.test.lib import (
+ TestBase,
+ GlobalsItemDeletorMetaCls
+ )
import shutil
import tempfile
-from git.db import (
- GitCmdObjectDB,
- GitDB
- )
-
-from git import (
- Repo
- )
+from git import Repo
#{ Invvariants
k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE"
@@ -38,11 +34,7 @@ class TestBigRepoR(TestBase):
* gitrorepo
- * Read-Only git repository - actually the repo of git itself
-
- * puregitrorepo
-
- * As gitrepo, but uses pure python implementation
+ * a big read-only git repository
"""
#{ Invariants
@@ -50,29 +42,33 @@ class TestBigRepoR(TestBase):
head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
#} END invariants
+ #{ Configuration
+ RepoCls = Repo
+ #} END configuration
+
@classmethod
def setUpAll(cls):
super(TestBigRepoR, cls).setUpAll()
- repo_path = resolve_or_fail(k_env_git_repo)
- cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB)
- cls.puregitrorepo = Repo(repo_path, odbt=GitDB)
+ if cls.RepoCls is None:
+ raise AssertionError("Require RepoCls in class %s to be set" % cls)
+ #END assert configuration
+ cls.rorepo = cls.RepoCls(resolve_or_fail(k_env_git_repo))
class TestBigRepoRW(TestBigRepoR):
"""As above, but provides a big repository that we can write to.
- Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``"""
+ Provides ``self.rwrepo``"""
@classmethod
def setUpAll(cls):
super(TestBigRepoRW, cls).setUpAll()
dirname = tempfile.mktemp()
os.mkdir(dirname)
- cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB)
- cls.puregitrwrepo = Repo(dirname, odbt=GitDB)
+ cls.rwrepo = cls.rorepo.clone(dirname, shared=True, bare=True)
@classmethod
def tearDownAll(cls):
- shutil.rmtree(cls.gitrwrepo.working_dir)
+ shutil.rmtree(cls.rwrepo.working_dir)
#} END base classes
diff --git a/git/test/performance/objects/__init__.py b/git/test/performance/objects/__init__.py
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/git/test/performance/objects/__init__.py
@@ -0,0 +1 @@
+
diff --git a/git/test/performance/test_commit.py b/git/test/performance/objects/test_commit.py
index 80421aa2..685fba2f 100644
--- a/git/test/performance/test_commit.py
+++ b/git/test/performance/objects/test_commit.py
@@ -4,18 +4,18 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from lib import *
+from git.test.performance.lib import TestBigRepoRW
from git import *
-from gitdb import IStream
-from git.test.test_commit import assert_commit_serialization
+from git.base import IStream
+from git.test.objects.test_commit import assert_commit_serialization
from cStringIO import StringIO
from time import time
import sys
class TestPerformance(TestBigRepoRW):
-
+
# ref with about 100 commits in its history
- ref_100 = '0.1.6'
+ ref_100 = 'v0.99'
def _query_commit_info(self, c):
c.author
@@ -45,13 +45,14 @@ class TestPerformance(TestBigRepoRW):
# END for each object
# END for each commit
elapsed_time = time() - st
+ assert no, "Should have traversed a few objects"
print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time)
def test_commit_traversal(self):
# bound to cat-file parsing performance
nc = 0
st = time()
- for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False):
+ for c in self.rorepo.commit(self.head_sha_2k).traverse(branch_first=False):
nc += 1
self._query_commit_info(c)
# END for each traversed commit
@@ -62,7 +63,7 @@ class TestPerformance(TestBigRepoRW):
# bound to stream parsing performance
nc = 0
st = time()
- for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k):
+ for c in Commit.iter_items(self.rorepo, self.head_sha_2k):
nc += 1
self._query_commit_info(c)
# END for each traversed commit
@@ -70,10 +71,10 @@ class TestPerformance(TestBigRepoRW):
print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
def test_commit_serialization(self):
- assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True)
+ assert_commit_serialization(self.rwrepo, self.head_sha_2k, True)
- rwrepo = self.gitrwrepo
- make_object = rwrepo.odb.store
+ rwrepo = self.rwrepo
+ make_object = rwrepo.store
# direct serialization - deserialization can be tested afterwards
# serialization is probably limited on IO
hc = rwrepo.commit(self.head_sha_2k)
diff --git a/git/test/performance/test_odb.py b/git/test/performance/test_odb.py
deleted file mode 100644
index 32b70f69..00000000
--- a/git/test/performance/test_odb.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""Performance tests for object store"""
-
-from time import time
-import sys
-import stat
-
-from lib import (
- TestBigRepoR
- )
-
-
-class TestObjDBPerformance(TestBigRepoR):
-
- def test_random_access(self):
- results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ]
- for repo in (self.gitrorepo, self.puregitrorepo):
- # GET COMMITS
- st = time()
- root_commit = repo.commit(self.head_sha_2k)
- commits = list(root_commit.traverse())
- nc = len(commits)
- elapsed = time() - st
-
- print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed)
- results[0].append(elapsed)
-
- # GET TREES
- # walk all trees of all commits
- st = time()
- blobs_per_commit = list()
- nt = 0
- for commit in commits:
- tree = commit.tree
- blobs = list()
- for item in tree.traverse():
- nt += 1
- if item.type == 'blob':
- blobs.append(item)
- # direct access for speed
- # END while trees are there for walking
- blobs_per_commit.append(blobs)
- # END for each commit
- elapsed = time() - st
-
- print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed)
- results[1].append(elapsed)
-
- # GET BLOBS
- st = time()
- nb = 0
- too_many = 15000
- data_bytes = 0
- for blob_list in blobs_per_commit:
- for blob in blob_list:
- data_bytes += len(blob.data_stream.read())
- # END for each blobsha
- nb += len(blob_list)
- if nb > too_many:
- break
- # END for each bloblist
- elapsed = time() - st
-
- print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed)
- results[2].append(elapsed)
- # END for each repo type
-
- # final results
- for test_name, a, b in results:
- print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a)
- # END for each result
diff --git a/git/test/performance/test_streams.py b/git/test/performance/test_streams.py
deleted file mode 100644
index 7f17d722..00000000
--- a/git/test/performance/test_streams.py
+++ /dev/null
@@ -1,131 +0,0 @@
-"""Performance data streaming performance"""
-
-from git.test.lib import *
-from gitdb import *
-from gitdb.util import bin_to_hex
-
-from time import time
-import os
-import sys
-import stat
-import subprocess
-
-from gitdb.test.lib import make_memory_file
-
-from lib import (
- TestBigRepoR
- )
-
-
-class TestObjDBPerformance(TestBigRepoR):
-
- large_data_size_bytes = 1000*1000*10 # some MiB should do it
- moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
-
- @with_rw_repo('HEAD', bare=True)
- def test_large_data_streaming(self, rwrepo):
- # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
- # It should be shared if possible
- ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
-
- for randomize in range(2):
- desc = (randomize and 'random ') or ''
- print >> sys.stderr, "Creating %s data ..." % desc
- st = time()
- size, stream = make_memory_file(self.large_data_size_bytes, randomize)
- elapsed = time() - st
- print >> sys.stderr, "Done (in %f s)" % elapsed
-
- # writing - due to the compression it will seem faster than it is
- st = time()
- binsha = ldb.store(IStream('blob', size, stream)).binsha
- elapsed_add = time() - st
- assert ldb.has_object(binsha)
- db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
- fsize_kib = os.path.getsize(db_file) / 1000
-
-
- size_kib = size / 1000
- print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
-
- # reading all at once
- st = time()
- ostream = ldb.stream(binsha)
- shadata = ostream.read()
- elapsed_readall = time() - st
-
- stream.seek(0)
- assert shadata == stream.getvalue()
- print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
-
-
- # reading in chunks of 1 MiB
- cs = 512*1000
- chunks = list()
- st = time()
- ostream = ldb.stream(binsha)
- while True:
- data = ostream.read(cs)
- chunks.append(data)
- if len(data) < cs:
- break
- # END read in chunks
- elapsed_readchunks = time() - st
-
- stream.seek(0)
- assert ''.join(chunks) == stream.getvalue()
-
- cs_kib = cs / 1000
- print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
-
- # del db file so git has something to do
- os.remove(db_file)
-
- # VS. CGIT
- ##########
- # CGIT ! Can using the cgit programs be faster ?
- proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)
-
- # write file - pump everything in at once to be a fast as possible
- data = stream.getvalue() # cache it
- st = time()
- proc.stdin.write(data)
- proc.stdin.close()
- gitsha = proc.stdout.read().strip()
- proc.wait()
- gelapsed_add = time() - st
- del(data)
- assert gitsha == bin_to_hex(binsha) # we do it the same way, right ?
-
- # as its the same sha, we reuse our path
- fsize_kib = os.path.getsize(db_file) / 1000
- print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
-
- # compare ...
- print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc)
-
-
- # read all
- st = time()
- s, t, size, data = rwrepo.git.get_object_data(gitsha)
- gelapsed_readall = time() - st
- print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall)
-
- # compare
- print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc)
-
-
- # read chunks
- st = time()
- s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
- while True:
- data = stream.read(cs)
- if len(data) < cs:
- break
- # END read stream
- gelapsed_readchunks = time() - st
- print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
-
- # compare
- print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
- # END for each randomization factor
diff --git a/git/test/test_actor.py b/git/test/test_actor.py
deleted file mode 100644
index b8e5ba3b..00000000
--- a/git/test/test_actor.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# test_actor.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-import os
-from git.test.lib import *
-from git import *
-
-class TestActor(object):
- def test_from_string_should_separate_name_and_email(self):
- a = Actor._from_string("Michael Trier <mtrier@example.com>")
- assert_equal("Michael Trier", a.name)
- assert_equal("mtrier@example.com", a.email)
-
- # base type capabilities
- assert a == a
- assert not ( a != a )
- m = set()
- m.add(a)
- m.add(a)
- assert len(m) == 1
-
- def test_from_string_should_handle_just_name(self):
- a = Actor._from_string("Michael Trier")
- assert_equal("Michael Trier", a.name)
- assert_equal(None, a.email)
-
- def test_should_display_representation(self):
- a = Actor._from_string("Michael Trier <mtrier@example.com>")
- assert_equal('<git.Actor "Michael Trier <mtrier@example.com>">', repr(a))
-
- def test_str_should_alias_name(self):
- a = Actor._from_string("Michael Trier <mtrier@example.com>")
- assert_equal(a.name, str(a)) \ No newline at end of file
diff --git a/git/test/test_base.py b/git/test/test_base.py
index e630d151..7488ac6b 100644
--- a/git/test/test_base.py
+++ b/git/test/test_base.py
@@ -3,18 +3,48 @@
#
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+from lib import (
+ TestBase,
+ with_rw_repo,
+ DummyStream,
+ DeriveTest,
+ with_rw_and_rw_remote_repo
+ )
import git.objects.base as base
+from git.objects import (
+ Blob,
+ Tree,
+ Commit,
+ TagObject
+ )
import git.refs as refs
-import os
-from git.test.lib import *
-from git import *
+
from itertools import chain
from git.objects.util import get_object_type_by_name
-from gitdb.util import hex_to_bin
+from git.util import hex_to_bin
import tempfile
+##################
+
+from git.util import (
+ NULL_BIN_SHA
+ )
+
+from git.typ import str_blob_type
+from git.base import (
+ OInfo,
+ OPackInfo,
+ ODeltaPackInfo,
+ OStream,
+ OPackStream,
+ ODeltaPackStream,
+ IStream,
+ )
+
+import os
+
class TestBase(TestBase):
type_tuples = ( ("blob", "8741fc1d09d61f02ffd8cded15ff603eff1ec070", "blob.py"),
@@ -77,7 +107,7 @@ class TestBase(TestBase):
assert base.Object in get_object_type_by_name(tname).mro()
# END for each known type
- assert_raises( ValueError, get_object_type_by_name, "doesntexist" )
+ self.failUnlessRaises(ValueError, get_object_type_by_name, "doesntexist")
def test_object_resolution(self):
# objects must be resolved to shas so they compare equal
@@ -98,3 +128,85 @@ class TestBase(TestBase):
assert not rw_repo.config_reader("repository").getboolean("core", "bare")
assert rw_remote_repo.config_reader("repository").getboolean("core", "bare")
assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib'))
+
+
+
+class TestBaseTypes(TestBase):
+
+ def test_streams(self):
+ # test info
+ sha = NULL_BIN_SHA
+ s = 20
+ blob_id = 3
+
+ info = OInfo(sha, str_blob_type, s)
+ assert info.binsha == sha
+ assert info.type == str_blob_type
+ assert info.type_id == blob_id
+ assert info.size == s
+
+ # test pack info
+ # provides type_id
+ pinfo = OPackInfo(0, blob_id, s)
+ assert pinfo.type == str_blob_type
+ assert pinfo.type_id == blob_id
+ assert pinfo.pack_offset == 0
+
+ dpinfo = ODeltaPackInfo(0, blob_id, s, sha)
+ assert dpinfo.type == str_blob_type
+ assert dpinfo.type_id == blob_id
+ assert dpinfo.delta_info == sha
+ assert dpinfo.pack_offset == 0
+
+
+ # test ostream
+ stream = DummyStream()
+ ostream = OStream(*(info + (stream, )))
+ assert ostream.stream is stream
+ ostream.read(15)
+ stream._assert()
+ assert stream.bytes == 15
+ ostream.read(20)
+ assert stream.bytes == 20
+
+ # test packstream
+ postream = OPackStream(*(pinfo + (stream, )))
+ assert postream.stream is stream
+ postream.read(10)
+ stream._assert()
+ assert stream.bytes == 10
+
+ # test deltapackstream
+ dpostream = ODeltaPackStream(*(dpinfo + (stream, )))
+ dpostream.stream is stream
+ dpostream.read(5)
+ stream._assert()
+ assert stream.bytes == 5
+
+ # derive with own args
+ DeriveTest(sha, str_blob_type, s, stream, 'mine',myarg = 3)._assert()
+
+ # test istream
+ istream = IStream(str_blob_type, s, stream)
+ assert istream.binsha == None
+ istream.binsha = sha
+ assert istream.binsha == sha
+
+ assert len(istream.binsha) == 20
+ assert len(istream.hexsha) == 40
+
+ assert istream.size == s
+ istream.size = s * 2
+ istream.size == s * 2
+ assert istream.type == str_blob_type
+ istream.type = "something"
+ assert istream.type == "something"
+ assert istream.stream is stream
+ istream.stream = None
+ assert istream.stream is None
+
+ assert istream.error is None
+ istream.error = Exception()
+ assert isinstance(istream.error, Exception)
+
+
diff --git a/git/test/test_config.py b/git/test/test_config.py
index 173e380c..d2e199e3 100644
--- a/git/test/test_config.py
+++ b/git/test/test_config.py
@@ -4,13 +4,13 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import TestBase, fixture_path
import StringIO
+from git.config import *
from copy import copy
from ConfigParser import NoSectionError
-class TestBase(TestCase):
+class TestConfig(TestBase):
def _to_memcache(self, file_path):
fp = open(file_path, "r")
@@ -30,7 +30,9 @@ class TestBase(TestCase):
w_config.read() # enforce reading
assert w_config._sections
w_config.write() # enforce writing
- assert file_obj.getvalue() == file_obj_orig.getvalue()
+
+ # we stripped lines when reading, so the results differ
+ assert file_obj.getvalue() != file_obj_orig.getvalue()
# creating an additional config writer must fail due to exclusive access
self.failUnlessRaises(IOError, GitConfigParser, file_obj, read_only = False)
@@ -56,10 +58,10 @@ class TestBase(TestCase):
file_obj.seek(0)
r_config = GitConfigParser(file_obj, read_only=True)
+ #print file_obj.getvalue()
assert r_config.has_section(sname)
assert r_config.has_option(sname, oname)
assert r_config.get(sname, oname) == val
-
# END for each filename
def test_base(self):
diff --git a/git/test/test_db.py b/git/test/test_db.py
deleted file mode 100644
index db2d7983..00000000
--- a/git/test/test_db.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# test_repo.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git.db import *
-from gitdb.util import bin_to_hex
-from git.exc import BadObject
-import os
-
-class TestDB(TestBase):
-
- def test_base(self):
- gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, 'objects'), self.rorepo.git)
-
- # partial to complete - works with everything
- hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
- assert len(hexsha) == 40
-
- assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha
-
- # fails with BadObject
- for invalid_rev in ("0000", "bad/ref", "super bad"):
- self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
diff --git a/git/test/test_diff.py b/git/test/test_diff.py
index 83db2df6..79f038e8 100644
--- a/git/test/test_diff.py
+++ b/git/test/test_diff.py
@@ -4,8 +4,15 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import (
+ TestBase,
+ StringProcessAdapter,
+ fixture,
+ assert_equal,
+ assert_true
+ )
+
+from git.diff import *
class TestDiff(TestBase):
diff --git a/git/test/test_example.py b/git/test/test_example.py
new file mode 100644
index 00000000..dbab3118
--- /dev/null
+++ b/git/test/test_example.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module with examples from the tutorial section of the docs"""
+from lib import TestBase, fixture_path
+from git.base import IStream
+from git.db.py.loose import PureLooseObjectODB
+from git.util import pool
+
+from cStringIO import StringIO
+
+from async import IteratorReader
+
+class TestExamples(TestBase):
+
+ def test_base(self):
+ ldb = PureLooseObjectODB(fixture_path("../../../.git/objects"))
+
+ for sha1 in ldb.sha_iter():
+ oinfo = ldb.info(sha1)
+ ostream = ldb.stream(sha1)
+ assert oinfo[:3] == ostream[:3]
+
+ assert len(ostream.read()) == ostream.size
+ assert ldb.has_object(oinfo.binsha)
+ # END for each sha in database
+ # assure we close all files
+ try:
+ del(ostream)
+ del(oinfo)
+ except UnboundLocalError:
+ pass
+ # END ignore exception if there are no loose objects
+
+ data = "my data"
+ istream = IStream("blob", len(data), StringIO(data))
+
+ # the object does not yet have a sha
+ assert istream.binsha is None
+ ldb.store(istream)
+ # now the sha is set
+ assert len(istream.binsha) == 20
+ assert ldb.has_object(istream.binsha)
+
+
+ # async operation
+ # Create a reader from an iterator
+ reader = IteratorReader(ldb.sha_iter())
+
+ # get reader for object streams
+ info_reader = ldb.stream_async(reader)
+
+ # read one
+ info = info_reader.read(1)[0]
+
+ # read all the rest until depletion
+ ostreams = info_reader.read()
+
+ # set the pool to use two threads
+ pool.set_size(2)
+
+ # synchronize the mode of operation
+ pool.set_size(0)
diff --git a/git/test/test_fun.py b/git/test/test_fun.py
index b7991cdb..ed069912 100644
--- a/git/test/test_fun.py
+++ b/git/test/test_fun.py
@@ -1,4 +1,4 @@
-from git.test.lib import *
+from git.test.lib import TestBase, with_rw_repo
from git.objects.fun import (
traverse_tree_recursive,
traverse_trees_recursive,
@@ -9,9 +9,9 @@ from git.index.fun import (
aggressive_tree_merge
)
-from gitdb.util import bin_to_hex
-from gitdb.base import IStream
-from gitdb.typ import str_tree_type
+from git.util import bin_to_hex
+from git.base import IStream
+from git.typ import str_tree_type
from stat import (
S_IFDIR,
diff --git a/git/test/test_git.py b/git/test/test_git.py
index c92a642b..b9a0b617 100644
--- a/git/test/test_git.py
+++ b/git/test/test_git.py
@@ -5,80 +5,96 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os, sys
-from git.test.lib import *
+from git.test.lib import (
+ TestBase,
+ patch_object,
+ raises,
+ assert_equal,
+ assert_true,
+ assert_match,
+ fixture_path
+ )
from git import Git, GitCommandError
-class TestGit(TestCase):
-
- @classmethod
- def setUpAll(cls):
- cls.git = Git(GIT_REPO)
+class TestGit(TestBase):
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestGit, cls).setUpAll()
+ cls.git = Git(cls.rorepo.working_dir)
- @patch_object(Git, 'execute')
- def test_call_process_calls_execute(self, git):
- git.return_value = ''
- self.git.version()
- assert_true(git.called)
- assert_equal(git.call_args, ((['git', 'version'],), {}))
+ @patch_object(Git, 'execute')
+ def test_call_process_calls_execute(self, git):
+ git.return_value = ''
+ self.git.version()
+ assert_true(git.called)
+ assert_equal(git.call_args, ((['git', 'version'],), {}))
- @raises(GitCommandError)
- def test_it_raises_errors(self):
- self.git.this_does_not_exist()
+ @raises(GitCommandError)
+ def test_it_raises_errors(self):
+ self.git.this_does_not_exist()
- def test_it_transforms_kwargs_into_git_command_arguments(self):
- assert_equal(["-s"], self.git.transform_kwargs(**{'s': True}))
- assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5}))
+ def test_it_transforms_kwargs_into_git_command_arguments(self):
+ assert_equal(["-s"], self.git.transform_kwargs(**{'s': True}))
+ assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5}))
- assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True}))
- assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5}))
+ assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True}))
+ assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5}))
- assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True}))
+ assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True}))
- def test_it_executes_git_to_shell_and_returns_result(self):
- assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"]))
+ def test_it_executes_git_to_shell_and_returns_result(self):
+ assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"]))
- def test_it_accepts_stdin(self):
- filename = fixture_path("cat_file_blob")
- fh = open(filename, 'r')
- assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8",
- self.git.hash_object(istream=fh, stdin=True))
- fh.close()
+ def test_it_accepts_stdin(self):
+ filename = fixture_path("cat_file_blob")
+ fh = open(filename, 'r')
+ assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8",
+ self.git.hash_object(istream=fh, stdin=True))
+ fh.close()
- @patch_object(Git, 'execute')
- def test_it_ignores_false_kwargs(self, git):
- # this_should_not_be_ignored=False implies it *should* be ignored
- output = self.git.version(pass_this_kwarg=False)
- assert_true("pass_this_kwarg" not in git.call_args[1])
-
- def test_persistent_cat_file_command(self):
- # read header only
- import subprocess as sp
- hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167"
- g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True)
- g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
- g.stdin.flush()
- obj_info = g.stdout.readline()
-
- # read header + data
- g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True)
- g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
- g.stdin.flush()
- obj_info_two = g.stdout.readline()
- assert obj_info == obj_info_two
-
- # read data - have to read it in one large chunk
- size = int(obj_info.split()[2])
- data = g.stdout.read(size)
- terminating_newline = g.stdout.read(1)
-
- # now we should be able to read a new object
- g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
- g.stdin.flush()
- assert g.stdout.readline() == obj_info
-
-
- # same can be achived using the respective command functions
- hexsha, typename, size = self.git.get_object_header(hexsha)
- hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha)
- assert typename == typename_two and size == size_two
+ @patch_object(Git, 'execute')
+ def test_it_ignores_false_kwargs(self, git):
+ # this_should_not_be_ignored=False implies it *should* be ignored
+ output = self.git.version(pass_this_kwarg=False)
+ assert_true("pass_this_kwarg" not in git.call_args[1])
+
+ def test_persistent_cat_file_command(self):
+ # read header only
+ import subprocess as sp
+ hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167"
+ g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True)
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ obj_info = g.stdout.readline()
+
+ # read header + data
+ g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True)
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ obj_info_two = g.stdout.readline()
+ assert obj_info == obj_info_two
+
+ # read data - have to read it in one large chunk
+ size = int(obj_info.split()[2])
+ data = g.stdout.read(size)
+ terminating_newline = g.stdout.read(1)
+
+ # now we should be able to read a new object
+ g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n")
+ g.stdin.flush()
+ assert g.stdout.readline() == obj_info
+
+
+ # same can be achived using the respective command functions
+ hexsha, typename, size = self.git.get_object_header(hexsha)
+ hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha)
+ assert typename == typename_two and size == size_two
+
+ def test_version(self):
+ v = self.git.version_info
+ assert isinstance(v, tuple)
+ for n in v:
+ assert isinstance(n, int)
+ #END verify number types
diff --git a/git/test/test_import.py b/git/test/test_import.py
new file mode 100644
index 00000000..a5a1d11b
--- /dev/null
+++ b/git/test/test_import.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""This module's whole purpose is to verify the __all__ descriptions in the respective
+module, by importing using from x import *"""
+
+# perform the actual imports
+import os
+
+from git import *
+
+def import_all(topdir, topmodule='git', skip = "test"):
+ base = os.path.basename
+ join = os.path.join
+ init_script = '__init__.py'
+ prev_cwd = os.getcwd()
+ try:
+ os.chdir(os.path.dirname(topdir))
+ for root, dirs, files in os.walk(base(topdir)):
+ if init_script not in files:
+ del(dirs[:])
+ continue
+ #END ignore non-packages
+
+ if skip in root:
+ continue
+ #END handle ignores
+
+ for relafile in files:
+ if not relafile.endswith('.py'):
+ continue
+ if relafile == init_script:
+ continue
+ module_path = join(root, os.path.splitext(relafile)[0]).replace("/", ".").replace("\\", ".")
+
+ m = __import__(module_path, globals(), locals(), [""])
+ try:
+ attrlist = m.__all__
+ for attr in attrlist:
+ assert hasattr(m, attr), "Invalid item in %s.__all__: %s" % (module_path, attr)
+ #END veriy
+ except AttributeError:
+ pass
+ # END try each listed attribute
+ #END for each file in dir
+ #END for each item
+ finally:
+ os.chdir(prev_cwd)
+ #END handle previous currentdir
+
+
+
+class TestDummy(object):
+ def test_base(self):
+ dn = os.path.dirname
+ # NOTE: i don't think this is working, as the __all__ variable is not used in this case
+ import_all(dn(dn(__file__)))
diff --git a/git/test/test_index.py b/git/test/test_index.py
index 5d227897..7d65cb9b 100644
--- a/git/test/test_index.py
+++ b/git/test/test_index.py
@@ -4,7 +4,12 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
+from git.test.lib import (
+ TestBase,
+ with_rw_repo,
+ fixture_path,
+ fixture
+ )
from git import *
import inspect
import os
@@ -12,6 +17,7 @@ import sys
import tempfile
import glob
import shutil
+import time
from stat import *
class TestIndex(TestBase):
diff --git a/git/test/test_pack.py b/git/test/test_pack.py
new file mode 100644
index 00000000..c398fc56
--- /dev/null
+++ b/git/test/test_pack.py
@@ -0,0 +1,247 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test everything about packs reading and writing"""
+from lib import (
+ TestBase,
+ with_rw_directory,
+ with_packs_rw,
+ fixture_path
+ )
+from git.stream import DeltaApplyReader
+
+from git.pack import (
+ PackEntity,
+ PackIndexFile,
+ PackFile
+ )
+
+from git.base import (
+ OInfo,
+ OStream,
+ )
+
+from git.fun import delta_types
+from git.exc import UnsupportedOperation
+from git.util import to_bin_sha
+from itertools import izip, chain
+from nose import SkipTest
+
+import os
+import sys
+import tempfile
+
+
+#{ Utilities
+def bin_sha_from_filename(filename):
+ return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:])
+#} END utilities
+
+class TestPack(TestBase):
+
+ packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
+ packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30)
+ packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
+ packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
+ packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
+ packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
+
+
+ def _assert_index_file(self, index, version, size):
+ assert index.packfile_checksum() != index.indexfile_checksum()
+ assert len(index.packfile_checksum()) == 20
+ assert len(index.indexfile_checksum()) == 20
+ assert index.version() == version
+ assert index.size() == size
+ assert len(index.offsets()) == size
+
+ # get all data of all objects
+ for oidx in xrange(index.size()):
+ sha = index.sha(oidx)
+ assert oidx == index.sha_to_index(sha)
+
+ entry = index.entry(oidx)
+ assert len(entry) == 3
+
+ assert entry[0] == index.offset(oidx)
+ assert entry[1] == sha
+ assert entry[2] == index.crc(oidx)
+
+ # verify partial sha
+ for l in (4,8,11,17,20):
+ assert index.partial_sha_to_index(sha[:l], l*2) == oidx
+
+ # END for each object index in indexfile
+ self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2)
+
+
+ def _assert_pack_file(self, pack, version, size):
+ assert pack.version() == 2
+ assert pack.size() == size
+ assert len(pack.checksum()) == 20
+
+ num_obj = 0
+ for obj in pack.stream_iter():
+ num_obj += 1
+ info = pack.info(obj.pack_offset)
+ stream = pack.stream(obj.pack_offset)
+
+ assert info.pack_offset == stream.pack_offset
+ assert info.type_id == stream.type_id
+ assert hasattr(stream, 'read')
+
+ # it should be possible to read from both streams
+ assert obj.read() == stream.read()
+
+ streams = pack.collect_streams(obj.pack_offset)
+ assert streams
+
+ # read the stream
+ try:
+ dstream = DeltaApplyReader.new(streams)
+ except ValueError:
+ # ignore these, old git versions use only ref deltas,
+ # which we havent resolved ( as we are without an index )
+ # Also ignore non-delta streams
+ continue
+ # END get deltastream
+
+ # read all
+ data = dstream.read()
+ assert len(data) == dstream.size
+
+ # test seek
+ dstream.seek(0)
+ assert dstream.read() == data
+
+
+ # read chunks
+ # NOTE: the current implementation is safe, it basically transfers
+ # all calls to the underlying memory map
+
+ # END for each object
+ assert num_obj == size
+
+
+ def test_pack_index(self):
+ # check version 1 and 2
+ for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2):
+ index = PackIndexFile(indexfile)
+ self._assert_index_file(index, version, size)
+ # END run tests
+
+ def test_pack(self):
+ # there is this special version 3, but apparently its like 2 ...
+ for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2):
+ pack = PackFile(packfile)
+ self._assert_pack_file(pack, version, size)
+ # END for each pack to test
+
+ @with_rw_directory
+ def test_pack_entity(self, rw_dir):
+ pack_objs = list()
+ for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1),
+ (self.packfile_v2_2, self.packindexfile_v2),
+ (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
+ packfile, version, size = packinfo
+ indexfile, version, size = indexinfo
+ entity = PackEntity(packfile)
+ assert entity.pack().path() == packfile
+ assert entity.index().path() == indexfile
+ pack_objs.extend(entity.stream_iter())
+
+ count = 0
+ for info, stream in izip(entity.info_iter(), entity.stream_iter()):
+ count += 1
+ assert info.binsha == stream.binsha
+ assert len(info.binsha) == 20
+ assert info.type_id == stream.type_id
+ assert info.size == stream.size
+
+ # we return fully resolved items, which is implied by the sha centric access
+ assert not info.type_id in delta_types
+
+ # try all calls
+ assert len(entity.collect_streams(info.binsha))
+ oinfo = entity.info(info.binsha)
+ assert isinstance(oinfo, OInfo)
+ assert oinfo.binsha is not None
+ ostream = entity.stream(info.binsha)
+ assert isinstance(ostream, OStream)
+ assert ostream.binsha is not None
+
+ # verify the stream
+ try:
+ assert entity.is_valid_stream(info.binsha, use_crc=True)
+ except UnsupportedOperation:
+ pass
+ # END ignore version issues
+ assert entity.is_valid_stream(info.binsha, use_crc=False)
+ # END for each info, stream tuple
+ assert count == size
+
+ # END for each entity
+
+ # pack writing - write all packs into one
+ # index path can be None
+ pack_path = tempfile.mktemp('', "pack", rw_dir)
+ index_path = tempfile.mktemp('', 'index', rw_dir)
+ iteration = 0
+ def rewind_streams():
+ for obj in pack_objs:
+ obj.stream.seek(0)
+ #END utility
+ for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)):
+ pfile = open(ppath, 'wb')
+ iwrite = None
+ if ipath:
+ ifile = open(ipath, 'wb')
+ iwrite = ifile.write
+ #END handle ip
+
+ # make sure we rewind the streams ... we work on the same objects over and over again
+ if iteration > 0:
+ rewind_streams()
+ #END rewind streams
+ iteration += 1
+
+ pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj)
+ pfile.close()
+ assert os.path.getsize(ppath) > 100
+
+ # verify pack
+ pf = PackFile(ppath)
+ assert pf.size() == len(pack_objs)
+ assert pf.version() == PackFile.pack_version_default
+ assert pf.checksum() == pack_sha
+
+ # verify index
+ if ipath is not None:
+ ifile.close()
+ assert os.path.getsize(ipath) > 100
+ idx = PackIndexFile(ipath)
+ assert idx.version() == PackIndexFile.index_version_default
+ assert idx.packfile_checksum() == pack_sha
+ assert idx.indexfile_checksum() == index_sha
+ assert idx.size() == len(pack_objs)
+ #END verify files exist
+ #END for each packpath, indexpath pair
+
+ # verify the packs throughly
+ rewind_streams()
+ entity = PackEntity.create(pack_objs, rw_dir)
+ count = 0
+ for info in entity.info_iter():
+ count += 1
+ for use_crc in range(2):
+ assert entity.is_valid_stream(info.binsha, use_crc)
+ # END for each crc mode
+ #END for each info
+ assert count == len(pack_objs)
+
+
+ def test_pack_64(self):
+ # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets
+ # of course without really needing such a huge pack
+ raise SkipTest()
diff --git a/git/test/test_reflog.py b/git/test/test_reflog.py
index 3fdf1fae..271924aa 100644
--- a/git/test/test_reflog.py
+++ b/git/test/test_reflog.py
@@ -1,4 +1,4 @@
-from git.test.lib import *
+from git.test.lib import TestBase, fixture_path
from git.objects import IndexObject
from git.refs import *
from git.util import Actor
diff --git a/git/test/test_refs.py b/git/test/test_refs.py
index 2338b4e4..e49b23ab 100644
--- a/git/test/test_refs.py
+++ b/git/test/test_refs.py
@@ -4,20 +4,25 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from mock import *
-from git.test.lib import *
-from git import *
-import git.refs as refs
+from git.test.lib import TestBase, with_rw_repo
+from git.refs import *
+import git.refs as ref
+
from git.util import Actor
from git.objects.tag import TagObject
+
+from git.exc import GitCommandError
+
from itertools import chain
import os
+from nose import SkipTest
+
class TestRefs(TestBase):
def test_from_path(self):
# should be able to create any reference directly
- for ref_type in ( Reference, Head, TagReference, RemoteReference ):
+ for ref_type in (Reference, Head, TagReference, RemoteReference):
for name in ('rela_name', 'path/rela_name'):
full_path = ref_type.to_full_path(name)
instance = ref_type.from_path(self.rorepo, full_path)
@@ -27,20 +32,20 @@ class TestRefs(TestBase):
def test_tag_base(self):
tag_object_refs = list()
- for tag in self.rorepo.tags:
+ for tag in TagReference.list_items(self.rorepo):
assert "refs/tags" in tag.path
assert tag.name
- assert isinstance( tag.commit, Commit )
+ assert isinstance(tag.commit, tag.CommitCls)
if tag.tag is not None:
- tag_object_refs.append( tag )
+ tag_object_refs.append(tag)
tagobj = tag.tag
# have no dict
self.failUnlessRaises(AttributeError, setattr, tagobj, 'someattr', 1)
- assert isinstance( tagobj, TagObject )
+ assert isinstance(tagobj, TagObject)
assert tagobj.tag == tag.name
- assert isinstance( tagobj.tagger, Actor )
- assert isinstance( tagobj.tagged_date, int )
- assert isinstance( tagobj.tagger_tz_offset, int )
+ assert isinstance(tagobj.tagger, Actor)
+ assert isinstance(tagobj.tagged_date, int)
+ assert isinstance(tagobj.tagger_tz_offset, int)
assert tagobj.message
assert tag.object == tagobj
# can't assign the object
@@ -48,15 +53,15 @@ class TestRefs(TestBase):
# END if we have a tag object
# END for tag in repo-tags
assert tag_object_refs
- assert isinstance(self.rorepo.tags['0.1.5'], TagReference)
+ assert isinstance(TagReference.list_items(self.rorepo)['0.1.6'], TagReference)
def test_tags(self):
# tag refs can point to tag objects or to commits
s = set()
ref_count = 0
- for ref in chain(self.rorepo.tags, self.rorepo.heads):
+ for ref in chain(TagReference.list_items(self.rorepo), Head.list_items(self.rorepo)):
ref_count += 1
- assert isinstance(ref, refs.Reference)
+ assert isinstance(ref, Reference)
assert str(ref) == ref.name
assert repr(ref)
assert ref == ref
@@ -66,9 +71,9 @@ class TestRefs(TestBase):
assert len(s) == ref_count
assert len(s|s) == ref_count
- @with_rw_repo('HEAD', bare=False)
- def test_heads(self, rwrepo):
- for head in rwrepo.heads:
+ @with_rw_repo("0.1.6")
+ def test_heads(self, rw_repo):
+ for head in Head.iter_items(rw_repo):
assert head.name
assert head.path
assert "refs/heads" in head.path
@@ -88,7 +93,7 @@ class TestRefs(TestBase):
# after the clone, we might still have a tracking branch setup
head.set_tracking_branch(None)
assert head.tracking_branch() is None
- remote_ref = rwrepo.remotes[0].refs[0]
+ remote_ref = RemoteReference.list_items(rw_repo)[0]
assert head.set_tracking_branch(remote_ref) is head
assert head.tracking_branch() == remote_ref
head.set_tracking_branch(None)
@@ -96,7 +101,7 @@ class TestRefs(TestBase):
# END for each head
# verify REFLOG gets altered
- head = rwrepo.head
+ head = HEAD(rw_repo)
cur_head = head.ref
cur_commit = cur_head.commit
pcommit = cur_head.commit.parents[0].parents[0]
@@ -130,7 +135,7 @@ class TestRefs(TestBase):
assert len(cur_head.log()) == blog_len+2
# a new branch has just a single entry
- other_head = Head.create(rwrepo, 'mynewhead', pcommit, logmsg='new head created')
+ other_head = Head.create(rw_repo, 'mynewhead', pcommit, logmsg='new head created')
log = other_head.log()
assert len(log) == 1
assert log[0].oldhexsha == pcommit.NULL_HEX_SHA
@@ -139,24 +144,25 @@ class TestRefs(TestBase):
def test_refs(self):
types_found = set()
- for ref in self.rorepo.refs:
+ for ref in Reference.list_items(self.rorepo):
types_found.add(type(ref))
assert len(types_found) >= 3
def test_is_valid(self):
assert Reference(self.rorepo, 'refs/doesnt/exist').is_valid() == False
- assert self.rorepo.head.is_valid()
- assert self.rorepo.head.reference.is_valid()
+ assert HEAD(self.rorepo).is_valid()
+ assert HEAD(self.rorepo).reference.is_valid()
assert SymbolicReference(self.rorepo, 'hellothere').is_valid() == False
def test_orig_head(self):
- assert type(self.rorepo.head.orig_head()) == SymbolicReference
+ assert type(HEAD(self.rorepo).orig_head()) == SymbolicReference
- @with_rw_repo('0.1.6')
+ @with_rw_repo("0.1.6")
def test_head_reset(self, rw_repo):
- cur_head = rw_repo.head
+ cur_head = HEAD(rw_repo)
old_head_commit = cur_head.commit
new_head_commit = cur_head.ref.commit.parents[0]
+
cur_head.reset(new_head_commit, index=True) # index only
assert cur_head.reference.commit == new_head_commit
@@ -176,10 +182,9 @@ class TestRefs(TestBase):
cur_head.reset(new_head_commit)
rw_repo.index.checkout(["lib"], force=True)#
-
# now that we have a write write repo, change the HEAD reference - its
# like git-reset --soft
- heads = rw_repo.heads
+ heads = Head.list_items(rw_repo)
assert heads
for head in heads:
cur_head.reference = head
@@ -198,7 +203,7 @@ class TestRefs(TestBase):
self.failUnlessRaises(TypeError, getattr, cur_head, "reference")
# tags are references, hence we can point to them
- some_tag = rw_repo.tags[0]
+ some_tag = TagReference.list_items(rw_repo)[0]
cur_head.reference = some_tag
assert not cur_head.is_detached
assert cur_head.commit == some_tag.commit
@@ -231,7 +236,7 @@ class TestRefs(TestBase):
old_name = new_head.name
assert new_head.rename("hello").name == "hello"
- assert new_head.rename("hello/world").name == "hello/world"
+ assert new_head.rename("hello/world").name == "hello/world" # yes, this must work
assert new_head.rename(old_name).name == old_name and new_head.path == old_path
# rename with force
@@ -414,12 +419,11 @@ class TestRefs(TestBase):
symbol_ref_path = "refs/symbol_ref"
symref = SymbolicReference(rw_repo, symbol_ref_path)
assert symref.path == symbol_ref_path
- symbol_ref_abspath = os.path.join(rw_repo.git_dir, symref.path)
# set it
symref.reference = new_head
assert symref.reference == new_head
- assert os.path.isfile(symbol_ref_abspath)
+ assert os.path.isfile(symref.abspath)
assert symref.commit == new_head.commit
for name in ('absname','folder/rela_name'):
@@ -471,7 +475,7 @@ class TestRefs(TestBase):
rw_repo.head.reference = Head.create(rw_repo, "master")
# At least the head should still exist
- assert os.path.isfile(os.path.join(rw_repo.git_dir, 'HEAD'))
+ assert os.path.isfile(rw_repo.head.abspath)
refs = list(SymbolicReference.iter_items(rw_repo))
assert len(refs) == 1
@@ -517,5 +521,7 @@ class TestRefs(TestBase):
assert SymbolicReference.dereference_recursive(self.rorepo, 'HEAD')
def test_reflog(self):
- assert isinstance(self.rorepo.heads.master.log(), RefLog)
+ assert isinstance(Head.list_items(self.rorepo).master.log(), RefLog)
+ def test_pure_python_rename(self):
+ raise SkipTest("Pure python reference renames cannot properly handle refnames which become a directory after rename")
diff --git a/git/test/test_remote.py b/git/test/test_remote.py
index af6915a3..8ae9fe43 100644
--- a/git/test/test_remote.py
+++ b/git/test/test_remote.py
@@ -4,9 +4,23 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import (
+ TestBase,
+ with_rw_and_rw_remote_repo,
+ with_rw_repo,
+ )
from git.util import IterableList
+from git.db.interface import PushInfo, FetchInfo, RemoteProgress
+from git.remote import *
+from git.exc import GitCommandError
+from git.refs import (
+ Reference,
+ TagReference,
+ RemoteReference,
+ Head,
+ SymbolicReference
+ )
+
import tempfile
import shutil
import os
@@ -16,430 +30,421 @@ import random
random.seed(0)
class TestRemoteProgress(RemoteProgress):
- __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages' )
- def __init__(self):
- super(TestRemoteProgress, self).__init__()
- self._seen_lines = list()
- self._stages_per_op = dict()
- self._num_progress_messages = 0
-
- def _parse_progress_line(self, line):
- # we may remove the line later if it is dropped
- # Keep it for debugging
- self._seen_lines.append(line)
- rval = super(TestRemoteProgress, self)._parse_progress_line(line)
- assert len(line) > 1, "line %r too short" % line
- return rval
-
- def line_dropped(self, line):
- try:
- self._seen_lines.remove(line)
- except ValueError:
- pass
-
- def update(self, op_code, cur_count, max_count=None, message=''):
- # check each stage only comes once
- op_id = op_code & self.OP_MASK
- assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING)
-
- self._stages_per_op.setdefault(op_id, 0)
- self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK)
-
- if op_code & (self.WRITING|self.END) == (self.WRITING|self.END):
- assert message
- # END check we get message
-
- self._num_progress_messages += 1
-
-
- def make_assertion(self):
- # we don't always receive messages
- if not self._seen_lines:
- return
-
- # sometimes objects are not compressed which is okay
- assert len(self._seen_ops) in (2,3)
- assert self._stages_per_op
-
- # must have seen all stages
- for op, stages in self._stages_per_op.items():
- assert stages & self.STAGE_MASK == self.STAGE_MASK
- # END for each op/stage
+ __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages')
+ def __init__(self):
+ super(TestRemoteProgress, self).__init__()
+ self._seen_lines = list()
+ self._stages_per_op = dict()
+ self._seen_ops = set()
+ self._num_progress_messages = 0
+
+ def line_dropped(self, line):
+ try:
+ self._seen_lines.remove(line)
+ except ValueError:
+ pass
+
+ def __call__(self, message, input=''):
+ pass
+
+ def update(self, op_code, cur_count, max_count=None, message='', input=''):
+ # check each stage only comes once
+ if input:
+ self._seen_lines.append(input)
+ #END handle input
+ op_id = op_code & self.OP_MASK
+ assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING)
+
+ self._stages_per_op.setdefault(op_id, 0)
+ self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK)
+
+ if op_code & (self.WRITING|self.END) == (self.WRITING|self.END):
+ assert message
+ # END check we get message
+
+ self._num_progress_messages += 1
+
+
+ def make_assertion(self):
+ # we don't always receive messages
+ if not self._seen_lines:
+ return
+
+ # sometimes objects are not compressed which is okay
+ assert len(self._stages_per_op.keys()) in (2,3)
+ assert self._stages_per_op
+
+ # must have seen all stages
+ for op, stages in self._stages_per_op.items():
+ assert stages & self.STAGE_MASK == self.STAGE_MASK
+ # END for each op/stage
- def assert_received_message(self):
- assert self._num_progress_messages
-
+ def assert_received_message(self):
+ assert self._num_progress_messages
+
class TestRemote(TestBase):
-
- def _print_fetchhead(self, repo):
- fp = open(os.path.join(repo.git_dir, "FETCH_HEAD"))
- fp.close()
-
-
- def _do_test_fetch_result(self, results, remote):
- # self._print_fetchhead(remote.repo)
- assert len(results) > 0 and isinstance(results[0], FetchInfo)
- for info in results:
- assert isinstance(info.note, basestring)
- if isinstance(info.ref, Reference):
- assert info.flags != 0
- # END reference type flags handling
- assert isinstance(info.ref, (SymbolicReference, Reference))
- if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD):
- assert isinstance(info.old_commit, Commit)
- else:
- assert info.old_commit is None
- # END forced update checking
- # END for each info
-
- def _do_test_push_result(self, results, remote):
- assert len(results) > 0 and isinstance(results[0], PushInfo)
- for info in results:
- assert info.flags
- assert isinstance(info.summary, basestring)
- if info.old_commit is not None:
- assert isinstance(info.old_commit, Commit)
- if info.flags & info.ERROR:
- has_one = False
- for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE):
- has_one |= bool(info.flags & bitflag)
- # END for each bitflag
- assert has_one
- else:
- # there must be a remote commit
- if info.flags & info.DELETED == 0:
- assert isinstance(info.local_ref, Reference)
- else:
- assert info.local_ref is None
- assert type(info.remote_ref) in (TagReference, RemoteReference)
- # END error checking
- # END for each info
-
-
- def _do_test_fetch_info(self, repo):
- self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "nonsense", '')
- self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '')
-
- def _commit_random_file(self, repo):
- #Create a file with a random name and random data and commit it to repo.
- # Return the commited absolute file path
- index = repo.index
- new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo)
- index.add([new_file])
- index.commit("Committing %s" % new_file)
- return new_file
-
- def _do_test_fetch(self,remote, rw_repo, remote_repo):
- # specialized fetch testing to de-clutter the main test
- self._do_test_fetch_info(rw_repo)
-
- def fetch_and_test(remote, **kwargs):
- progress = TestRemoteProgress()
- kwargs['progress'] = progress
- res = remote.fetch(**kwargs)
- progress.make_assertion()
- self._do_test_fetch_result(res, remote)
- return res
- # END fetch and check
-
- def get_info(res, remote, name):
- return res["%s/%s"%(remote,name)]
-
- # put remote head to master as it is garantueed to exist
- remote_repo.head.reference = remote_repo.heads.master
-
- res = fetch_and_test(remote)
- # all uptodate
- for info in res:
- assert info.flags & info.HEAD_UPTODATE
-
- # rewind remote head to trigger rejection
- # index must be false as remote is a bare repo
- rhead = remote_repo.head
- remote_commit = rhead.commit
- rhead.reset("HEAD~2", index=False)
- res = fetch_and_test(remote)
- mkey = "%s/%s"%(remote,'master')
- master_info = res[mkey]
- assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None
-
- # normal fast forward - set head back to previous one
- rhead.commit = remote_commit
- res = fetch_and_test(remote)
- assert res[mkey].flags & FetchInfo.FAST_FORWARD
-
- # new remote branch
- new_remote_branch = Head.create(remote_repo, "new_branch")
- res = fetch_and_test(remote)
- new_branch_info = get_info(res, remote, new_remote_branch)
- assert new_branch_info.flags & FetchInfo.NEW_HEAD
-
- # remote branch rename ( causes creation of a new one locally )
- new_remote_branch.rename("other_branch_name")
- res = fetch_and_test(remote)
- other_branch_info = get_info(res, remote, new_remote_branch)
- assert other_branch_info.ref.commit == new_branch_info.ref.commit
-
- # remove new branch
- Head.delete(new_remote_branch.repo, new_remote_branch)
- res = fetch_and_test(remote)
- # deleted remote will not be fetched
- self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch)
-
- # prune stale tracking branches
- stale_refs = remote.stale_refs
- assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference)
- RemoteReference.delete(rw_repo, *stale_refs)
-
- # test single branch fetch with refspec including target remote
- res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote)
- assert len(res) == 1 and get_info(res, remote, 'master')
-
- # ... with respec and no target
- res = fetch_and_test(remote, refspec='master')
- assert len(res) == 1
-
- # add new tag reference
- rtag = TagReference.create(remote_repo, "1.0-RV_hello.there")
- res = fetch_and_test(remote, tags=True)
- tinfo = res[str(rtag)]
- assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit
- assert tinfo.flags & tinfo.NEW_TAG
-
- # adjust tag commit
- Reference.set_object(rtag, rhead.commit.parents[0].parents[0])
- res = fetch_and_test(remote, tags=True)
- tinfo = res[str(rtag)]
- assert tinfo.commit == rtag.commit
- assert tinfo.flags & tinfo.TAG_UPDATE
-
- # delete remote tag - local one will stay
- TagReference.delete(remote_repo, rtag)
- res = fetch_and_test(remote, tags=True)
- self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag))
-
- # provoke to receive actual objects to see what kind of output we have to
- # expect. For that we need a remote transport protocol
- # Create a new UN-shared repo and fetch into it after we pushed a change
- # to the shared repo
- other_repo_dir = tempfile.mktemp("other_repo")
- # must clone with a local path for the repo implementation not to freak out
- # as it wants local paths only ( which I can understand )
- other_repo = remote_repo.clone(other_repo_dir, shared=False)
- remote_repo_url = "git://localhost%s"%remote_repo.git_dir
-
- # put origin to git-url
- other_origin = other_repo.remotes.origin
- other_origin.config_writer.set("url", remote_repo_url)
- # it automatically creates alternates as remote_repo is shared as well.
- # It will use the transport though and ignore alternates when fetching
- # assert not other_repo.alternates # this would fail
-
- # assure we are in the right state
- rw_repo.head.reset(remote.refs.master, working_tree=True)
- try:
- self._commit_random_file(rw_repo)
- remote.push(rw_repo.head.reference)
-
- # here I would expect to see remote-information about packing
- # objects and so on. Unfortunately, this does not happen
- # if we are redirecting the output - git explicitly checks for this
- # and only provides progress information to ttys
- res = fetch_and_test(other_origin)
- finally:
- shutil.rmtree(other_repo_dir)
- # END test and cleanup
-
- def _test_push_and_pull(self,remote, rw_repo, remote_repo):
- # push our changes
- lhead = rw_repo.head
- lindex = rw_repo.index
- # assure we are on master and it is checked out where the remote is
- try:
- lhead.reference = rw_repo.heads.master
- except AttributeError:
- # if the author is on a non-master branch, the clones might not have
- # a local master yet. We simply create it
- lhead.reference = rw_repo.create_head('master')
- # END master handling
- lhead.reset(remote.refs.master, working_tree=True)
-
- # push without spec should fail ( without further configuration )
- # well, works nicely
- # self.failUnlessRaises(GitCommandError, remote.push)
-
- # simple file push
- self._commit_random_file(rw_repo)
- progress = TestRemoteProgress()
- res = remote.push(lhead.reference, progress)
- assert isinstance(res, IterableList)
- self._do_test_push_result(res, remote)
- progress.make_assertion()
-
- # rejected - undo last commit
- lhead.reset("HEAD~1")
- res = remote.push(lhead.reference)
- assert res[0].flags & PushInfo.ERROR
- assert res[0].flags & PushInfo.REJECTED
- self._do_test_push_result(res, remote)
-
- # force rejected pull
- res = remote.push('+%s' % lhead.reference)
- assert res[0].flags & PushInfo.ERROR == 0
- assert res[0].flags & PushInfo.FORCED_UPDATE
- self._do_test_push_result(res, remote)
-
- # invalid refspec
- res = remote.push("hellothere")
- assert len(res) == 0
-
- # push new tags
- progress = TestRemoteProgress()
- to_be_updated = "my_tag.1.0RV"
- new_tag = TagReference.create(rw_repo, to_be_updated)
- other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message")
- res = remote.push(progress=progress, tags=True)
- assert res[-1].flags & PushInfo.NEW_TAG
- progress.make_assertion()
- self._do_test_push_result(res, remote)
-
- # update push new tags
- # Rejection is default
- new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True)
- res = remote.push(tags=True)
- self._do_test_push_result(res, remote)
- assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR
-
- # push force this tag
- res = remote.push("+%s" % new_tag.path)
- assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE
-
- # delete tag - have to do it using refspec
- res = remote.push(":%s" % new_tag.path)
- self._do_test_push_result(res, remote)
- assert res[0].flags & PushInfo.DELETED
- # Currently progress is not properly transferred, especially not using
- # the git daemon
- # progress.assert_received_message()
-
- # push new branch
- new_head = Head.create(rw_repo, "my_new_branch")
- progress = TestRemoteProgress()
- res = remote.push(new_head, progress)
- assert res[0].flags & PushInfo.NEW_HEAD
- progress.make_assertion()
- self._do_test_push_result(res, remote)
-
- # delete new branch on the remote end and locally
- res = remote.push(":%s" % new_head.path)
- self._do_test_push_result(res, remote)
- Head.delete(rw_repo, new_head)
- assert res[-1].flags & PushInfo.DELETED
-
- # --all
- res = remote.push(all=True)
- self._do_test_push_result(res, remote)
-
- remote.pull('master')
-
- # cleanup - delete created tags and branches as we are in an innerloop on
- # the same repository
- TagReference.delete(rw_repo, new_tag, other_tag)
- remote.push(":%s" % other_tag.path)
-
- @with_rw_and_rw_remote_repo('0.1.6')
- def test_base(self, rw_repo, remote_repo):
- num_remotes = 0
- remote_set = set()
- ran_fetch_test = False
-
- for remote in rw_repo.remotes:
- num_remotes += 1
- assert remote == remote
- assert str(remote) != repr(remote)
- remote_set.add(remote)
- remote_set.add(remote) # should already exist
-
- # REFS
- refs = remote.refs
- assert refs
- for ref in refs:
- assert ref.remote_name == remote.name
- assert ref.remote_head
- # END for each ref
-
- # OPTIONS
- # cannot use 'fetch' key anymore as it is now a method
- for opt in ("url", ):
- val = getattr(remote, opt)
- reader = remote.config_reader
- assert reader.get(opt) == val
- assert reader.get_value(opt, None) == val
-
- # unable to write with a reader
- self.failUnlessRaises(IOError, reader.set, opt, "test")
-
- # change value
- writer = remote.config_writer
- new_val = "myval"
- writer.set(opt, new_val)
- assert writer.get(opt) == new_val
- writer.set(opt, val)
- assert writer.get(opt) == val
- del(writer)
- assert getattr(remote, opt) == val
- # END for each default option key
-
- # RENAME
- other_name = "totally_other_name"
- prev_name = remote.name
- assert remote.rename(other_name) == remote
- assert prev_name != remote.name
- # multiple times
- for time in range(2):
- assert remote.rename(prev_name).name == prev_name
- # END for each rename ( back to prev_name )
-
- # PUSH/PULL TESTING
- self._test_push_and_pull(remote, rw_repo, remote_repo)
-
- # FETCH TESTING
- # Only for remotes - local cases are the same or less complicated
- # as additional progress information will never be emitted
- if remote.name == "daemon_origin":
- self._do_test_fetch(remote, rw_repo, remote_repo)
- ran_fetch_test = True
- # END fetch test
-
- remote.update()
- # END for each remote
-
- assert ran_fetch_test
- assert num_remotes
- assert num_remotes == len(remote_set)
-
- origin = rw_repo.remote('origin')
- assert origin == rw_repo.remotes.origin
-
- @with_rw_repo('HEAD', bare=True)
- def test_creation_and_removal(self, bare_rw_repo):
- new_name = "test_new_one"
- arg_list = (new_name, "git@server:hello.git")
- remote = Remote.create(bare_rw_repo, *arg_list )
- assert remote.name == "test_new_one"
- assert remote in bare_rw_repo.remotes
-
- # create same one again
- self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list)
-
- Remote.remove(bare_rw_repo, new_name)
-
- for remote in bare_rw_repo.remotes:
- if remote.name == new_name:
- raise AssertionError("Remote removal failed")
- # END if deleted remote matches existing remote's name
- # END for each remote
-
-
-
+
+ def _print_fetchhead(self, repo):
+ fp = open(os.path.join(repo.git_dir, "FETCH_HEAD"))
+ fp.close()
+
+
+ def _do_test_fetch_result(self, results, remote):
+ # self._print_fetchhead(remote.repo)
+ assert len(results) > 0 and isinstance(results[0], FetchInfo)
+ for info in results:
+ assert isinstance(info.note, basestring)
+ if isinstance(info.ref, Reference):
+ assert info.flags != 0
+ # END reference type flags handling
+ assert isinstance(info.ref, (SymbolicReference, Reference))
+ if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD):
+ assert isinstance(info.old_commit_binsha, str) and len(info.old_commit_binsha) == 20
+ else:
+ assert info.old_commit_binsha is None
+ # END forced update checking
+ # END for each info
+
+ def _do_test_push_result(self, results, remote):
+ assert len(results) > 0 and isinstance(results[0], PushInfo)
+ for info in results:
+ assert info.flags
+ assert isinstance(info.summary, basestring)
+ if info.old_commit_binsha is not None:
+ assert isinstance(info.old_commit_binsha, str) and len(info.old_commit_binsha) == 20
+ if info.flags & info.ERROR:
+ has_one = False
+ for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE):
+ has_one |= bool(info.flags & bitflag)
+ # END for each bitflag
+ assert has_one
+ else:
+ # there must be a remote commit
+ if info.flags & info.DELETED == 0:
+ assert isinstance(info.local_ref, Reference)
+ else:
+ assert info.local_ref is None
+ assert type(info.remote_ref) in (TagReference, RemoteReference)
+ # END error checking
+ # END for each info
+
+ def _commit_random_file(self, repo):
+ #Create a file with a random name and random data and commit it to repo.
+ # Return the commited absolute file path
+ index = repo.index
+ new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo)
+ index.add([new_file])
+ index.commit("Committing %s" % new_file)
+ return new_file
+
+ def _do_test_fetch(self,remote, rw_repo, remote_repo):
+ def fetch_and_test(remote, **kwargs):
+ progress = TestRemoteProgress()
+ kwargs['progress'] = progress
+ res = remote.fetch(**kwargs)
+ progress.make_assertion()
+ self._do_test_fetch_result(res, remote)
+ return res
+ # END fetch and check
+
+ def get_info(res, remote, name):
+ return res["%s/%s"%(remote,name)]
+
+ # put remote head to master as it is garantueed to exist
+ remote_repo.head.reference = remote_repo.heads.master
+
+ res = fetch_and_test(remote)
+ # all uptodate
+ for info in res:
+ assert info.flags & info.HEAD_UPTODATE
+
+ # rewind remote head to trigger rejection
+ # index must be false as remote is a bare repo
+ rhead = remote_repo.head
+ remote_commit = rhead.commit
+ rhead.reset("HEAD~2", index=False)
+ res = fetch_and_test(remote)
+ mkey = "%s/%s"%(remote,'master')
+ master_info = res[mkey]
+ assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None
+
+ # normal fast forward - set head back to previous one
+ rhead.commit = remote_commit
+ res = fetch_and_test(remote)
+ assert res[mkey].flags & FetchInfo.FAST_FORWARD
+
+ # new remote branch
+ new_remote_branch = Head.create(remote_repo, "new_branch")
+ res = fetch_and_test(remote)
+ new_branch_info = get_info(res, remote, new_remote_branch)
+ assert new_branch_info.flags & FetchInfo.NEW_HEAD
+
+ # remote branch rename ( causes creation of a new one locally )
+ new_remote_branch.rename("other_branch_name")
+ res = fetch_and_test(remote)
+ other_branch_info = get_info(res, remote, new_remote_branch)
+ assert other_branch_info.ref.commit == new_branch_info.ref.commit
+
+ # remove new branch
+ Head.delete(new_remote_branch.repo, new_remote_branch)
+ res = fetch_and_test(remote)
+ # deleted remote will not be fetched
+ self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch)
+
+ # prune stale tracking branches
+ stale_refs = remote.stale_refs
+ assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference)
+ RemoteReference.delete(rw_repo, *stale_refs)
+
+ # test single branch fetch with refspec including target remote
+ res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote)
+ assert len(res) == 1 and get_info(res, remote, 'master')
+
+ # ... with respec and no target
+ res = fetch_and_test(remote, refspec='master')
+ assert len(res) == 1
+
+ # add new tag reference
+ rtag = TagReference.create(remote_repo, "1.0-RV_hello.there")
+ res = fetch_and_test(remote, tags=True)
+ tinfo = res[str(rtag)]
+ assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit
+ assert tinfo.flags & tinfo.NEW_TAG
+
+ # adjust tag commit
+ Reference.set_object(rtag, rhead.commit.parents[0].parents[0])
+ res = fetch_and_test(remote, tags=True)
+ tinfo = res[str(rtag)]
+ assert tinfo.commit == rtag.commit
+ assert tinfo.flags & tinfo.TAG_UPDATE
+
+ # delete remote tag - local one will stay
+ TagReference.delete(remote_repo, rtag)
+ res = fetch_and_test(remote, tags=True)
+ self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag))
+
+ # provoke to receive actual objects to see what kind of output we have to
+ # expect. For that we need a remote transport protocol
+ # Create a new UN-shared repo and fetch into it after we pushed a change
+ # to the shared repo
+ other_repo_dir = tempfile.mktemp("other_repo")
+ # must clone with a local path for the repo implementation not to freak out
+ # as it wants local paths only ( which I can understand )
+ other_repo = remote_repo.clone(other_repo_dir, shared=False)
+ remote_repo_url = "git://localhost%s"%remote_repo.git_dir
+
+ # put origin to git-url
+ other_origin = other_repo.remotes.origin
+ other_origin.config_writer.set("url", remote_repo_url)
+ # it automatically creates alternates as remote_repo is shared as well.
+ # It will use the transport though and ignore alternates when fetching
+ # assert not other_repo.alternates # this would fail
+
+ # assure we are in the right state
+ rw_repo.head.reset(remote.refs.master, working_tree=True)
+ try:
+ self._commit_random_file(rw_repo)
+ remote.push(rw_repo.head.reference)
+
+ # here I would expect to see remote-information about packing
+ # objects and so on. Unfortunately, this does not happen
+ # if we are redirecting the output - git explicitly checks for this
+ # and only provides progress information to ttys
+ res = fetch_and_test(other_origin)
+ finally:
+ shutil.rmtree(other_repo_dir)
+ # END test and cleanup
+
+ def _test_push_and_pull(self,remote, rw_repo, remote_repo):
+ # push our changes
+ lhead = rw_repo.head
+ lindex = rw_repo.index
+ # assure we are on master and it is checked out where the remote is
+ try:
+ lhead.reference = rw_repo.heads.master
+ except AttributeError:
+ # if the author is on a non-master branch, the clones might not have
+ # a local master yet. We simply create it
+ lhead.reference = rw_repo.create_head('master')
+ # END master handling
+ lhead.reset(remote.refs.master, working_tree=True)
+
+ # push without spec should fail ( without further configuration )
+ # well, works nicely
+ # self.failUnlessRaises(GitCommandError, remote.push)
+
+ # simple file push
+ self._commit_random_file(rw_repo)
+ progress = TestRemoteProgress()
+ res = remote.push(lhead.reference, progress)
+ assert isinstance(res, IterableList)
+ self._do_test_push_result(res, remote)
+ progress.make_assertion()
+
+ # rejected - undo last commit
+ lhead.reset("HEAD~1")
+ res = remote.push(lhead.reference)
+ assert res[0].flags & PushInfo.ERROR
+ assert res[0].flags & PushInfo.REJECTED
+ self._do_test_push_result(res, remote)
+
+ # force rejected pull
+ res = remote.push('+%s' % lhead.reference)
+ assert res[0].flags & PushInfo.ERROR == 0
+ assert res[0].flags & PushInfo.FORCED_UPDATE
+ self._do_test_push_result(res, remote)
+
+ # invalid refspec
+ res = remote.push("hellothere")
+ assert len(res) == 0
+
+ # push new tags
+ progress = TestRemoteProgress()
+ to_be_updated = "my_tag.1.0RV"
+ new_tag = TagReference.create(rw_repo, to_be_updated)
+ other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message")
+ res = remote.push(progress=progress, tags=True)
+ assert res[-1].flags & PushInfo.NEW_TAG
+ progress.make_assertion()
+ self._do_test_push_result(res, remote)
+
+ # update push new tags
+ # Rejection is default
+ new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True)
+ res = remote.push(tags=True)
+ self._do_test_push_result(res, remote)
+ assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR
+
+ # push force this tag
+ res = remote.push("+%s" % new_tag.path)
+ assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE
+
+ # delete tag - have to do it using refspec
+ res = remote.push(":%s" % new_tag.path)
+ self._do_test_push_result(res, remote)
+ assert res[0].flags & PushInfo.DELETED
+ # Currently progress is not properly transferred, especially not using
+ # the git daemon
+ # progress.assert_received_message()
+
+ # push new branch
+ new_head = Head.create(rw_repo, "my_new_branch")
+ progress = TestRemoteProgress()
+ res = remote.push(new_head, progress)
+ assert res[0].flags & PushInfo.NEW_HEAD
+ progress.make_assertion()
+ self._do_test_push_result(res, remote)
+
+ # delete new branch on the remote end and locally
+ res = remote.push(":%s" % new_head.path)
+ self._do_test_push_result(res, remote)
+ Head.delete(rw_repo, new_head)
+ assert res[-1].flags & PushInfo.DELETED
+
+ # --all
+ res = remote.push(all=True)
+ self._do_test_push_result(res, remote)
+
+ remote.pull('master')
+
+ # cleanup - delete created tags and branches as we are in an innerloop on
+ # the same repository
+ TagReference.delete(rw_repo, new_tag, other_tag)
+ remote.push(":%s" % other_tag.path)
+
+ @with_rw_and_rw_remote_repo('0.1.6')
+ def test_base(self, rw_repo, remote_repo):
+ num_remotes = 0
+ remote_set = set()
+ ran_fetch_test = False
+
+ for remote in rw_repo.remotes:
+ num_remotes += 1
+ assert remote == remote
+ assert str(remote) != repr(remote)
+ remote_set.add(remote)
+ remote_set.add(remote) # should already exist
+
+ # REFS
+ refs = remote.refs
+ assert refs
+ for ref in refs:
+ assert ref.remote_name == remote.name
+ assert ref.remote_head
+ # END for each ref
+
+ # OPTIONS
+ # cannot use 'fetch' key anymore as it is now a method
+ for opt in ("url", ):
+ val = getattr(remote, opt)
+ reader = remote.config_reader
+ assert reader.get(opt) == val
+ assert reader.get_value(opt, None) == val
+
+ # unable to write with a reader
+ self.failUnlessRaises(IOError, reader.set, opt, "test")
+
+ # change value
+ writer = remote.config_writer
+ new_val = "myval"
+ writer.set(opt, new_val)
+ assert writer.get(opt) == new_val
+ writer.set(opt, val)
+ assert writer.get(opt) == val
+ del(writer)
+ assert getattr(remote, opt) == val
+ # END for each default option key
+
+ # RENAME
+ other_name = "totally_other_name"
+ prev_name = remote.name
+ assert remote.rename(other_name) == remote
+ assert prev_name != remote.name
+ # multiple times
+ for time in range(2):
+ assert remote.rename(prev_name).name == prev_name
+ # END for each rename ( back to prev_name )
+
+ # PUSH/PULL TESTING
+ self._test_push_and_pull(remote, rw_repo, remote_repo)
+
+ # FETCH TESTING
+ # Only for remotes - local cases are the same or less complicated
+ # as additional progress information will never be emitted
+ if remote.name == "daemon_origin":
+ self._do_test_fetch(remote, rw_repo, remote_repo)
+ ran_fetch_test = True
+ # END fetch test
+
+ remote.update()
+ # END for each remote
+
+ assert ran_fetch_test
+ assert num_remotes
+ assert num_remotes == len(remote_set)
+
+ origin = rw_repo.remote('origin')
+ assert origin == rw_repo.remotes.origin
+
+ @with_rw_repo('HEAD', bare=True)
+ def test_creation_and_removal(self, bare_rw_repo):
+ new_name = "test_new_one"
+ arg_list = (new_name, "git@server:hello.git")
+ remote = Remote.create(bare_rw_repo, *arg_list )
+ assert remote.name == "test_new_one"
+ assert remote in bare_rw_repo.remotes
+
+ # create same one again
+ self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list)
+
+ Remote.remove(bare_rw_repo, new_name)
+
+ for remote in bare_rw_repo.remotes:
+ if remote.name == new_name:
+ raise AssertionError("Remote removal failed")
+ # END if deleted remote matches existing remote's name
+ # END for each remote
+
+
+
diff --git a/git/test/test_stats.py b/git/test/test_stats.py
index 2bdb0a89..27be6a77 100644
--- a/git/test/test_stats.py
+++ b/git/test/test_stats.py
@@ -4,8 +4,12 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from git.test.lib import *
-from git import *
+from git.test.lib import (
+ TestBase,
+ fixture,
+ assert_equal
+ )
+from git.util import Stats
class TestStats(TestBase):
diff --git a/git/test/test_stream.py b/git/test/test_stream.py
new file mode 100644
index 00000000..8d7a5f9a
--- /dev/null
+++ b/git/test/test_stream.py
@@ -0,0 +1,155 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Test for object db"""
+from lib import (
+ TestBase,
+ DummyStream,
+ Sha1Writer,
+ make_bytes,
+ make_object,
+ fixture_path
+ )
+
+from git.stream import *
+from git.util import (
+ NULL_HEX_SHA,
+ hex_to_bin
+ )
+
+from git.util import zlib
+from git.typ import (
+ str_blob_type
+ )
+
+from git.db.py.loose import PureLooseObjectODB
+import time
+import tempfile
+import os
+
+
+
+
+class TestStream(TestBase):
+ """Test stream classes"""
+
+ data_sizes = (15, 10000, 1000*1024+512)
+
+ def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None):
+ """Make stream tests - the orig_stream is seekable, allowing it to be
+ rewound and reused
+ :param cdata: the data we expect to read from stream, the contents
+ :param rewind_stream: function called to rewind the stream to make it ready
+ for reuse"""
+ ns = 10
+ assert len(cdata) > ns-1, "Data must be larger than %i, was %i" % (ns, len(cdata))
+
+ # read in small steps
+ ss = len(cdata) / ns
+ for i in range(ns):
+ data = stream.read(ss)
+ chunk = cdata[i*ss:(i+1)*ss]
+ assert data == chunk
+ # END for each step
+ rest = stream.read()
+ if rest:
+ assert rest == cdata[-len(rest):]
+ # END handle rest
+
+ if isinstance(stream, DecompressMemMapReader):
+ assert len(stream.data()) == stream.compressed_bytes_read()
+ # END handle special type
+
+ rewind_stream(stream)
+
+ # read everything
+ rdata = stream.read()
+ assert rdata == cdata
+
+ if isinstance(stream, DecompressMemMapReader):
+ assert len(stream.data()) == stream.compressed_bytes_read()
+ # END handle special type
+
+ def test_decompress_reader(self):
+ for close_on_deletion in range(2):
+ for with_size in range(2):
+ for ds in self.data_sizes:
+ cdata = make_bytes(ds, randomize=False)
+
+ # zdata = zipped actual data
+ # cdata = original content data
+
+ # create reader
+ if with_size:
+ # need object data
+ zdata = zlib.compress(make_object(str_blob_type, cdata))
+ type, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion)
+ assert size == len(cdata)
+ assert type == str_blob_type
+
+ # even if we don't set the size, it will be set automatically on first read
+ test_reader = DecompressMemMapReader(zdata, close_on_deletion=False)
+ assert test_reader._s == len(cdata)
+ else:
+ # here we need content data
+ zdata = zlib.compress(cdata)
+ reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata))
+ assert reader._s == len(cdata)
+ # END get reader
+
+ self._assert_stream_reader(reader, cdata, lambda r: r.seek(0))
+
+ # put in a dummy stream for closing
+ dummy = DummyStream()
+ reader._m = dummy
+
+ assert not dummy.closed
+ del(reader)
+ assert dummy.closed == close_on_deletion
+ # END for each datasize
+ # END whether size should be used
+ # END whether stream should be closed when deleted
+
+ def test_sha_writer(self):
+ writer = Sha1Writer()
+ assert 2 == writer.write("hi")
+ assert len(writer.sha(as_hex=1)) == 40
+ assert len(writer.sha(as_hex=0)) == 20
+
+ # make sure it does something ;)
+ prev_sha = writer.sha()
+ writer.write("hi again")
+ assert writer.sha() != prev_sha
+
+ def test_compressed_writer(self):
+ for ds in self.data_sizes:
+ fd, path = tempfile.mkstemp()
+ ostream = FDCompressedSha1Writer(fd)
+ data = make_bytes(ds, randomize=False)
+
+ # for now, just a single write, code doesn't care about chunking
+ assert len(data) == ostream.write(data)
+ ostream.close()
+
+ # its closed already
+ self.failUnlessRaises(OSError, os.close, fd)
+
+ # read everything back, compare to data we zip
+ fd = os.open(path, os.O_RDONLY|getattr(os, 'O_BINARY', 0))
+ written_data = os.read(fd, os.path.getsize(path))
+ assert len(written_data) == os.path.getsize(path)
+ os.close(fd)
+ assert written_data == zlib.compress(data, 1) # best speed
+
+ os.remove(path)
+ # END for each os
+
+ def test_decompress_reader_special_case(self):
+ odb = PureLooseObjectODB(fixture_path('objects'))
+ ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b'))
+
+ # if there is a bug, we will be missing one byte exactly !
+ data = ostream.read()
+ assert len(data) == ostream.size
+
diff --git a/git/test/test_util.py b/git/test/test_util.py
index e55a6d15..f737e660 100644
--- a/git/test/test_util.py
+++ b/git/test/test_util.py
@@ -7,7 +7,7 @@
import os
import tempfile
-from git.test.lib import *
+from lib import TestBase
from git.util import *
from git.objects.util import *
from git import *
@@ -15,6 +15,14 @@ from git.cmd import dashify
import time
+from git.util import (
+ to_hex_sha,
+ to_bin_sha,
+ NULL_HEX_SHA,
+ LockedFD,
+ Actor
+ )
+
class TestUtils(TestBase):
def setup(self):
@@ -25,8 +33,8 @@ class TestUtils(TestBase):
}
def test_it_should_dashify(self):
- assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
- assert_equal('foo', dashify('foo'))
+ assert 'this-is-my-argument' == dashify('this_is_my_argument')
+ assert 'foo' == dashify('foo')
def test_lock_file(self):
@@ -107,3 +115,118 @@ class TestUtils(TestBase):
assert isinstance(Actor.committer(cr), Actor)
assert isinstance(Actor.author(cr), Actor)
#END assure config reader is handled
+
+ def test_basics(self):
+ assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA
+ assert len(to_bin_sha(NULL_HEX_SHA)) == 20
+ assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA
+
+ def _cmp_contents(self, file_path, data):
+ # raise if data from file at file_path
+ # does not match data string
+ fp = open(file_path, "rb")
+ try:
+ assert fp.read() == data
+ finally:
+ fp.close()
+
+ def test_lockedfd(self):
+ my_file = tempfile.mktemp()
+ orig_data = "hello"
+ new_data = "world"
+ my_file_fp = open(my_file, "wb")
+ my_file_fp.write(orig_data)
+ my_file_fp.close()
+
+ try:
+ lfd = LockedFD(my_file)
+ lockfilepath = lfd._lockfilepath()
+
+ # cannot end before it was started
+ self.failUnlessRaises(AssertionError, lfd.rollback)
+ self.failUnlessRaises(AssertionError, lfd.commit)
+
+ # open for writing
+ assert not os.path.isfile(lockfilepath)
+ wfd = lfd.open(write=True)
+ assert lfd._fd is wfd
+ assert os.path.isfile(lockfilepath)
+
+ # write data and fail
+ os.write(wfd, new_data)
+ lfd.rollback()
+ assert lfd._fd is None
+ self._cmp_contents(my_file, orig_data)
+ assert not os.path.isfile(lockfilepath)
+
+ # additional call doesnt fail
+ lfd.commit()
+ lfd.rollback()
+
+ # test reading
+ lfd = LockedFD(my_file)
+ rfd = lfd.open(write=False)
+ assert os.read(rfd, len(orig_data)) == orig_data
+
+ assert os.path.isfile(lockfilepath)
+ # deletion rolls back
+ del(lfd)
+ assert not os.path.isfile(lockfilepath)
+
+
+ # write data - concurrently
+ lfd = LockedFD(my_file)
+ olfd = LockedFD(my_file)
+ assert not os.path.isfile(lockfilepath)
+ wfdstream = lfd.open(write=True, stream=True) # this time as stream
+ assert os.path.isfile(lockfilepath)
+ # another one fails
+ self.failUnlessRaises(IOError, olfd.open)
+
+ wfdstream.write(new_data)
+ lfd.commit()
+ assert not os.path.isfile(lockfilepath)
+ self._cmp_contents(my_file, new_data)
+
+ # could test automatic _end_writing on destruction
+ finally:
+ os.remove(my_file)
+ # END final cleanup
+
+ # try non-existing file for reading
+ lfd = LockedFD(tempfile.mktemp())
+ try:
+ lfd.open(write=False)
+ except OSError:
+ assert not os.path.exists(lfd._lockfilepath())
+ else:
+ self.fail("expected OSError")
+ # END handle exceptions
+
+
+class TestActor(TestBase):
+ def test_from_string_should_separate_name_and_email(self):
+ a = Actor._from_string("Michael Trier <mtrier@example.com>")
+ assert "Michael Trier" == a.name
+ assert "mtrier@example.com" == a.email
+
+ # base type capabilities
+ assert a == a
+ assert not ( a != a )
+ m = set()
+ m.add(a)
+ m.add(a)
+ assert len(m) == 1
+
+ def test_from_string_should_handle_just_name(self):
+ a = Actor._from_string("Michael Trier")
+ assert "Michael Trier" == a.name
+ assert None == a.email
+
+ def test_should_display_representation(self):
+ a = Actor._from_string("Michael Trier <mtrier@example.com>")
+ assert '<git.Actor "Michael Trier <mtrier@example.com>">' == repr(a)
+
+ def test_str_should_alias_name(self):
+ a = Actor._from_string("Michael Trier <mtrier@example.com>")
+ assert a.name == str(a)
diff --git a/git/typ.py b/git/typ.py
new file mode 100644
index 00000000..a2e719be
--- /dev/null
+++ b/git/typ.py
@@ -0,0 +1,27 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing information about types known to the database"""
+
+#{ String types
+
+# For compatability only, use ObjectType instead
+str_blob_type = "blob"
+str_commit_type = "commit"
+str_tree_type = "tree"
+str_tag_type = "tag"
+
+class ObjectType(object):
+ """Enumeration providing object types as strings and ids"""
+ blob = str_blob_type
+ commit = str_commit_type
+ tree = str_tree_type
+ tag = str_tag_type
+
+ commit_id = 1
+ tree_id = 2
+ blob_id = 3
+ tag_id = 4
+
+#} END string types
diff --git a/git/util.py b/git/util.py
index 7cbef07f..6009e158 100644
--- a/git/util.py
+++ b/git/util.py
@@ -4,28 +4,149 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+import platform
+import binascii
import os
-import re
+import mmap
import sys
+import errno
+import re
import time
import tempfile
-import platform
-
-from gitdb.util import (
- make_sha,
- LockedFD,
- file_contents_ro,
- LazyMixin,
- to_hex_sha,
- to_bin_sha
- )
__all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux",
"join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList",
"BlockingLockFile", "LockFile", 'Actor', 'get_user_id', 'assure_directory_exists',
- 'RemoteProgress')
+ 'RepoAliasMixin', 'LockedFD', 'LazyMixin' )
+
+from cStringIO import StringIO
+
+# in py 2.4, StringIO is only StringI, without write support.
+# Hence we must use the python implementation for this
+if sys.version_info[1] < 5:
+ from StringIO import StringIO
+# END handle python 2.4
+
+try:
+ import async.mod.zlib as zlib
+except ImportError:
+ import zlib
+# END try async zlib
+
+from async import ThreadPool
+
+try:
+ import hashlib
+except ImportError:
+ import sha
+
+try:
+ from struct import unpack_from
+except ImportError:
+ from struct import unpack, calcsize
+ __calcsize_cache = dict()
+ def unpack_from(fmt, data, offset=0):
+ try:
+ size = __calcsize_cache[fmt]
+ except KeyError:
+ size = calcsize(fmt)
+ __calcsize_cache[fmt] = size
+ # END exception handling
+ return unpack(fmt, data[offset : offset + size])
+ # END own unpack_from implementation
+
+
+#{ Globals
+
+# A pool distributing tasks, initially with zero threads, hence everything
+# will be handled in the main thread
+pool = ThreadPool(0)
+
+#} END globals
+
+
+#{ Aliases
+
+hex_to_bin = binascii.a2b_hex
+bin_to_hex = binascii.b2a_hex
+
+# errors
+ENOENT = errno.ENOENT
+
+# os shortcuts
+exists = os.path.exists
+mkdir = os.mkdir
+chmod = os.chmod
+isdir = os.path.isdir
+isfile = os.path.isfile
+rename = os.rename
+remove = os.remove
+dirname = os.path.dirname
+basename = os.path.basename
+normpath = os.path.normpath
+expandvars = os.path.expandvars
+expanduser = os.path.expanduser
+abspath = os.path.abspath
+join = os.path.join
+read = os.read
+write = os.write
+close = os.close
+fsync = os.fsync
+
+# constants
+NULL_HEX_SHA = "0"*40
+NULL_BIN_SHA = "\0"*20
+
+#} END Aliases
+
+#{ compatibility stuff ...
+
+class _RandomAccessStringIO(object):
+ """Wrapper to provide required functionality in case memory maps cannot or may
+ not be used. This is only really required in python 2.4"""
+ __slots__ = '_sio'
+
+ def __init__(self, buf=''):
+ self._sio = StringIO(buf)
+
+ def __getattr__(self, attr):
+ return getattr(self._sio, attr)
+
+ def __len__(self):
+ return len(self.getvalue())
+
+ def __getitem__(self, i):
+ return self.getvalue()[i]
+
+ def __getslice__(self, start, end):
+ return self.getvalue()[start:end]
+
+#} END compatibility stuff ...
+
+#{ Routines
+
+def get_user_id():
+ """:return: string identifying the currently active system user as name@node
+ :note: user can be set with the 'USER' environment variable, usually set on windows"""
+ ukn = 'UNKNOWN'
+ username = os.environ.get('USER', os.environ.get('USERNAME', ukn))
+ if username == ukn and hasattr(os, 'getlogin'):
+ username = os.getlogin()
+ # END get username from login
+ return "%s@%s" % (username, platform.node())
+
+def is_git_dir(d):
+ """ This is taken from the git setup.c:is_git_directory
+ function."""
+ if isdir(d) and \
+ isdir(join(d, 'objects')) and \
+ isdir(join(d, 'refs')):
+ headref = join(d, 'HEAD')
+ return isfile(headref) or \
+ (os.path.islink(headref) and
+ os.readlink(headref).startswith('refs'))
+ return False
-#{ Utility Methods
def stream_copy(source, destination, chunk_size=512*1024):
"""Copy all data from the source stream into the destination stream in chunks
@@ -41,6 +162,87 @@ def stream_copy(source, destination, chunk_size=512*1024):
break
# END reading output stream
return br
+
+def make_sha(source=''):
+ """A python2.4 workaround for the sha/hashlib module fiasco
+ :note: From the dulwich project """
+ try:
+ return hashlib.sha1(source)
+ except NameError:
+ sha1 = sha.sha(source)
+ return sha1
+
+def allocate_memory(size):
+ """:return: a file-protocol accessible memory block of the given size"""
+ if size == 0:
+ return _RandomAccessStringIO('')
+ # END handle empty chunks gracefully
+
+ try:
+ return mmap.mmap(-1, size) # read-write by default
+ except EnvironmentError:
+ # setup real memory instead
+ # this of course may fail if the amount of memory is not available in
+ # one chunk - would only be the case in python 2.4, being more likely on
+ # 32 bit systems.
+ return _RandomAccessStringIO("\0"*size)
+ # END handle memory allocation
+
+
+def file_contents_ro(fd, stream=False, allow_mmap=True):
+ """:return: read-only contents of the file represented by the file descriptor fd
+ :param fd: file descriptor opened for reading
+ :param stream: if False, random access is provided, otherwise the stream interface
+ is provided.
+ :param allow_mmap: if True, its allowed to map the contents into memory, which
+ allows large files to be handled and accessed efficiently. The file-descriptor
+ will change its position if this is False"""
+ try:
+ if allow_mmap:
+ # supports stream and random access
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ except EnvironmentError:
+ # python 2.4 issue, 0 wants to be the actual size
+ return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
+ # END handle python 2.4
+ except OSError:
+ pass
+ # END exception handling
+
+ # read manully
+ contents = os.read(fd, os.fstat(fd).st_size)
+ if stream:
+ return _RandomAccessStringIO(contents)
+ return contents
+
+def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
+ """Get the file contents at filepath as fast as possible
+ :return: random access compatible memory of the given filepath
+ :param stream: see ``file_contents_ro``
+ :param allow_mmap: see ``file_contents_ro``
+ :param flags: additional flags to pass to os.open
+ :raise OSError: If the file could not be opened
+ :note: for now we don't try to use O_NOATIME directly as the right value needs to be
+ shared per database in fact. It only makes a real difference for loose object
+ databases anyway, and they use it with the help of the ``flags`` parameter"""
+ fd = os.open(filepath, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags)
+ try:
+ return file_contents_ro(fd, stream, allow_mmap)
+ finally:
+ close(fd)
+ # END assure file is closed
+
+def to_hex_sha(sha):
+ """:return: hexified version of sha"""
+ if len(sha) == 40:
+ return sha
+ return bin_to_hex(sha)
+
+def to_bin_sha(sha):
+ if len(sha) == 20:
+ return sha
+ return hex_to_bin(sha)
def join_path(a, *p):
"""Join path tokens together similar to os.path.join, but always use
@@ -61,6 +263,7 @@ def to_native_path_windows(path):
def to_native_path_linux(path):
return path.replace('\\','/')
+
if sys.platform.startswith('win'):
to_native_path = to_native_path_windows
else:
@@ -75,7 +278,7 @@ def join_path_native(a, *p):
needed to play it safe on my dear windows and to assure nice paths that only
use '\'"""
return to_native_path(join_path(a, *p))
-
+
def assure_directory_exists(path, is_file=False):
"""Assure that the directory pointed to by path exists.
@@ -89,138 +292,287 @@ def assure_directory_exists(path, is_file=False):
os.makedirs(path)
return True
return False
-
-def get_user_id():
- """:return: string identifying the currently active system user as name@node
- :note: user can be set with the 'USER' environment variable, usually set on windows"""
- ukn = 'UNKNOWN'
- username = os.environ.get('USER', os.environ.get('USERNAME', ukn))
- if username == ukn and hasattr(os, 'getlogin'):
- username = os.getlogin()
- # END get username from login
- return "%s@%s" % (username, platform.node())
-#} END utilities
-#{ Classes
+#} END routines
+
-class RemoteProgress(object):
+#{ Utilities
+
+class LazyMixin(object):
"""
- Handler providing an interface to parse progress information emitted by git-push
- and git-fetch and to dispatch callbacks allowing subclasses to react to the progress.
+ Base class providing an interface to lazily retrieve attribute values upon
+ first access. If slots are used, memory will only be reserved once the attribute
+ is actually accessed and retrieved the first time. All future accesses will
+ return the cached value as stored in the Instance's dict or slot.
"""
- _num_op_codes = 5
- BEGIN, END, COUNTING, COMPRESSING, WRITING = [1 << x for x in range(_num_op_codes)]
- STAGE_MASK = BEGIN|END
- OP_MASK = ~STAGE_MASK
-
- __slots__ = ("_cur_line", "_seen_ops")
- re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)")
- re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)")
-
- def __init__(self):
- self._seen_ops = list()
-
- def _parse_progress_line(self, line):
- """Parse progress information from the given line as retrieved by git-push
- or git-fetch
-
- :return: list(line, ...) list of lines that could not be processed"""
- # handle
- # Counting objects: 4, done.
- # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done.
- self._cur_line = line
- sub_lines = line.split('\r')
- failed_lines = list()
- for sline in sub_lines:
- # find esacpe characters and cut them away - regex will not work with
- # them as they are non-ascii. As git might expect a tty, it will send them
- last_valid_index = None
- for i,c in enumerate(reversed(sline)):
- if ord(c) < 32:
- # its a slice index
- last_valid_index = -i-1
- # END character was non-ascii
- # END for each character in sline
- if last_valid_index is not None:
- sline = sline[:last_valid_index]
- # END cut away invalid part
- sline = sline.rstrip()
-
- cur_count, max_count = None, None
- match = self.re_op_relative.match(sline)
- if match is None:
- match = self.re_op_absolute.match(sline)
-
- if not match:
- self.line_dropped(sline)
- failed_lines.append(sline)
- continue
- # END could not get match
-
- op_code = 0
- remote, op_name, percent, cur_count, max_count, message = match.groups()
-
- # get operation id
- if op_name == "Counting objects":
- op_code |= self.COUNTING
- elif op_name == "Compressing objects":
- op_code |= self.COMPRESSING
- elif op_name == "Writing objects":
- op_code |= self.WRITING
- else:
- raise ValueError("Operation name %r unknown" % op_name)
-
- # figure out stage
- if op_code not in self._seen_ops:
- self._seen_ops.append(op_code)
- op_code |= self.BEGIN
- # END begin opcode
-
- if message is None:
- message = ''
- # END message handling
-
- message = message.strip()
- done_token = ', done.'
- if message.endswith(done_token):
- op_code |= self.END
- message = message[:-len(done_token)]
- # END end message handling
-
- self.update(op_code, cur_count, max_count, message)
- # END for each sub line
- return failed_lines
- def line_dropped(self, line):
- """Called whenever a line could not be understood and was therefore dropped."""
+ __slots__ = tuple()
+
+ def __getattr__(self, attr):
+ """
+ Whenever an attribute is requested that we do not know, we allow it
+ to be created and set. Next time the same attribute is reqeusted, it is simply
+ returned from our dict/slots. """
+ self._set_cache_(attr)
+ # will raise in case the cache was not created
+ return object.__getattribute__(self, attr)
+
+ def _set_cache_(self, attr):
+ """
+ This method should be overridden in the derived class.
+ It should check whether the attribute named by attr can be created
+ and cached. Do nothing if you do not know the attribute or call your subclass
+
+ The derived class may create as many additional attributes as it deems
+ necessary in case a git command returns more information than represented
+ in the single attribute."""
pass
+
+
+class LockedFD(object):
+ """
+ This class facilitates a safe read and write operation to a file on disk.
+ If we write to 'file', we obtain a lock file at 'file.lock' and write to
+ that instead. If we succeed, the lock file will be renamed to overwrite
+ the original file.
+
+ When reading, we obtain a lock file, but to prevent other writers from
+ succeeding while we are reading the file.
+
+ This type handles error correctly in that it will assure a consistent state
+ on destruction.
- def update(self, op_code, cur_count, max_count=None, message=''):
- """Called whenever the progress changes
+ :note: with this setup, parallel reading is not possible"""
+ __slots__ = ("_filepath", '_fd', '_write')
+
+ def __init__(self, filepath):
+ """Initialize an instance with the givne filepath"""
+ self._filepath = filepath
+ self._fd = None
+ self._write = None # if True, we write a file
+
+ def __del__(self):
+ # will do nothing if the file descriptor is already closed
+ if self._fd is not None:
+ self.rollback()
+
+ def _lockfilepath(self):
+ return "%s.lock" % self._filepath
+
+ def open(self, write=False, stream=False):
+ """
+ Open the file descriptor for reading or writing, both in binary mode.
+
+ :param write: if True, the file descriptor will be opened for writing. Other
+ wise it will be opened read-only.
+ :param stream: if True, the file descriptor will be wrapped into a simple stream
+ object which supports only reading or writing
+ :return: fd to read from or write to. It is still maintained by this instance
+ and must not be closed directly
+ :raise IOError: if the lock could not be retrieved
+ :raise OSError: If the actual file could not be opened for reading
+ :note: must only be called once"""
+ if self._write is not None:
+ raise AssertionError("Called %s multiple times" % self.open)
+
+ self._write = write
+
+ # try to open the lock file
+ binary = getattr(os, 'O_BINARY', 0)
+ lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
+ try:
+ fd = os.open(self._lockfilepath(), lockmode, 0600)
+ if not write:
+ os.close(fd)
+ else:
+ self._fd = fd
+ # END handle file descriptor
+ except OSError:
+ raise IOError("Lock at %r could not be obtained" % self._lockfilepath())
+ # END handle lock retrieval
+
+ # open actual file if required
+ if self._fd is None:
+ # we could specify exlusive here, as we obtained the lock anyway
+ try:
+ self._fd = os.open(self._filepath, os.O_RDONLY | binary)
+ except:
+ # assure we release our lockfile
+ os.remove(self._lockfilepath())
+ raise
+ # END handle lockfile
+ # END open descriptor for reading
+
+ if stream:
+ # need delayed import
+ from stream import FDStream
+ return FDStream(self._fd)
+ else:
+ return self._fd
+ # END handle stream
+
+ def commit(self):
+ """When done writing, call this function to commit your changes into the
+ actual file.
+ The file descriptor will be closed, and the lockfile handled.
+ :note: can be called multiple times"""
+ self._end_writing(successful=True)
+
+ def rollback(self):
+ """Abort your operation without any changes. The file descriptor will be
+ closed, and the lock released.
+ :note: can be called multiple times"""
+ self._end_writing(successful=False)
+
+ def _end_writing(self, successful=True):
+ """Handle the lock according to the write mode """
+ if self._write is None:
+ raise AssertionError("Cannot end operation if it wasn't started yet")
+
+ if self._fd is None:
+ return
+
+ os.close(self._fd)
+ self._fd = None
- :param op_code:
- Integer allowing to be compared against Operation IDs and stage IDs.
+ lockfile = self._lockfilepath()
+ if self._write and successful:
+ # on windows, rename does not silently overwrite the existing one
+ if sys.platform == "win32":
+ if isfile(self._filepath):
+ os.remove(self._filepath)
+ # END remove if exists
+ # END win32 special handling
+ os.rename(lockfile, self._filepath)
- Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation
- ID as well as END. It may be that BEGIN and END are set at once in case only
- one progress message was emitted due to the speed of the operation.
- Between BEGIN and END, none of these flags will be set
+ # assure others can at least read the file - the tmpfile left it at rw--
+ # We may also write that file, on windows that boils down to a remove-
+ # protection as well
+ chmod(self._filepath, 0644)
+ else:
+ # just delete the file so far, we failed
+ os.remove(lockfile)
+ # END successful handling
+
+
+class LockFile(object):
+ """Provides methods to obtain, check for, and release a file based lock which
+ should be used to handle concurrent access to the same file.
+
+ As we are a utility class to be derived from, we only use protected methods.
+
+ Locks will automatically be released on destruction"""
+ __slots__ = ("_file_path", "_owns_lock")
+
+ def __init__(self, file_path):
+ self._file_path = file_path
+ self._owns_lock = False
+
+ def __del__(self):
+ self._release_lock()
+
+ def _lock_file_path(self):
+ """:return: Path to lockfile"""
+ return "%s.lock" % (self._file_path)
+
+ def _has_lock(self):
+ """:return: True if we have a lock and if the lockfile still exists
+ :raise AssertionError: if our lock-file does not exist"""
+ if not self._owns_lock:
+ return False
+
+ return True
+
+ def _obtain_lock_or_raise(self):
+ """Create a lock file as flag for other instances, mark our instance as lock-holder
+
+ :raise IOError: if a lock was already present or a lock file could not be written"""
+ if self._has_lock():
+ return
+ lock_file = self._lock_file_path()
+ if os.path.isfile(lock_file):
+ raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file))
- Operation IDs are all held within the OP_MASK. Only one Operation ID will
- be active per call.
- :param cur_count: Current absolute count of items
+ try:
+ fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
+ os.close(fd)
+ except OSError,e:
+ raise IOError(str(e))
+
+ self._owns_lock = True
+
+ def _obtain_lock(self):
+ """The default implementation will raise if a lock cannot be obtained.
+ Subclasses may override this method to provide a different implementation"""
+ return self._obtain_lock_or_raise()
+
+ def _release_lock(self):
+ """Release our lock if we have one"""
+ if not self._has_lock():
+ return
- :param max_count:
- The maximum count of items we expect. It may be None in case there is
- no maximum number of items or if it is (yet) unknown.
+ # if someone removed our file beforhand, lets just flag this issue
+ # instead of failing, to make it more usable.
+ lfp = self._lock_file_path()
+ try:
+ # on bloody windows, the file needs write permissions to be removable.
+ # Why ...
+ if os.name == 'nt':
+ os.chmod(lfp, 0777)
+ # END handle win32
+ os.remove(lfp)
+ except OSError:
+ pass
+ self._owns_lock = False
+
+
+class BlockingLockFile(LockFile):
+ """The lock file will block until a lock could be obtained, or fail after
+ a specified timeout.
+
+ :note: If the directory containing the lock was removed, an exception will
+ be raised during the blocking period, preventing hangs as the lock
+ can never be obtained."""
+ __slots__ = ("_check_interval", "_max_block_time")
+ def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint):
+ """Configure the instance
- :param message:
- In case of the 'WRITING' operation, it contains the amount of bytes
- transferred. It may possibly be used for other purposes as well.
+ :parm check_interval_s:
+ Period of time to sleep until the lock is checked the next time.
+ By default, it waits a nearly unlimited time
- You may read the contents of the current line in self._cur_line"""
- pass
+ :parm max_block_time_s: Maximum amount of seconds we may lock"""
+ super(BlockingLockFile, self).__init__(file_path)
+ self._check_interval = check_interval_s
+ self._max_block_time = max_block_time_s
+
+ def _obtain_lock(self):
+ """This method blocks until it obtained the lock, or raises IOError if
+ it ran out of time or if the parent directory was not available anymore.
+ If this method returns, you are guranteed to own the lock"""
+ starttime = time.time()
+ maxtime = starttime + float(self._max_block_time)
+ while True:
+ try:
+ super(BlockingLockFile, self)._obtain_lock()
+ except IOError:
+ # synity check: if the directory leading to the lockfile is not
+ # readable anymore, raise an execption
+ curtime = time.time()
+ if not os.path.isdir(os.path.dirname(self._lock_file_path())):
+ msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime)
+ raise IOError(msg)
+ # END handle missing directory
+
+ if curtime >= maxtime:
+ msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path())
+ raise IOError(msg)
+ # END abort if we wait too long
+ time.sleep(self._check_interval)
+ else:
+ break
+ # END endless loop
class Actor(object):
@@ -274,20 +626,20 @@ class Actor(object):
m = cls.name_email_regex.search(string)
if m:
name, email = m.groups()
- return Actor(name, email)
+ return cls(name, email)
else:
m = cls.name_only_regex.search(string)
if m:
- return Actor(m.group(1), None)
+ return cls(m.group(1), None)
else:
# assume best and use the whole string as name
- return Actor(string, None)
+ return cls(string, None)
# END special case name
# END handle name/email matching
@classmethod
def _main_actor(cls, env_name, env_email, config_reader=None):
- actor = Actor('', '')
+ actor = cls('', '')
default_email = get_user_id()
default_name = default_email.split('@')[0]
@@ -324,6 +676,95 @@ class Actor(object):
return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader)
+class Iterable(object):
+ """Defines an interface for iterable items which is to assure a uniform
+ way to retrieve and iterate items within the git repository"""
+ __slots__ = tuple()
+ _id_attribute_ = "attribute that most suitably identifies your instance"
+
+ @classmethod
+ def list_items(cls, repo, *args, **kwargs):
+ """
+ Find all items of this type - subclasses can specify args and kwargs differently.
+ If no args are given, subclasses are obliged to return all items if no additional
+ arguments arg given.
+
+ :note: Favor the iter_items method as it will
+
+ :return:list(Item,...) list of item instances"""
+ out_list = IterableList( cls._id_attribute_ )
+ out_list.extend(cls.iter_items(repo, *args, **kwargs))
+ return out_list
+
+
+ @classmethod
+ def iter_items(cls, repo, *args, **kwargs):
+ """For more information about the arguments, see list_items
+ :return: iterator yielding Items"""
+ raise NotImplementedError("To be implemented by Subclass")
+
+
+class IterableList(list):
+ """
+ List of iterable objects allowing to query an object by id or by named index::
+
+ heads = repo.heads
+ heads.master
+ heads['master']
+ heads[0]
+
+ It requires an id_attribute name to be set which will be queried from its
+ contained items to have a means for comparison.
+
+ A prefix can be specified which is to be used in case the id returned by the
+ items always contains a prefix that does not matter to the user, so it
+ can be left out."""
+ __slots__ = ('_id_attr', '_prefix')
+
+ def __new__(cls, id_attr, prefix=''):
+ return super(IterableList,cls).__new__(cls)
+
+ def __init__(self, id_attr, prefix=''):
+ self._id_attr = id_attr
+ self._prefix = prefix
+ if not isinstance(id_attr, basestring):
+ raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
+ # END help debugging !
+
+ def __getattr__(self, attr):
+ attr = self._prefix + attr
+ for item in self:
+ if getattr(item, self._id_attr) == attr:
+ return item
+ # END for each item
+ return list.__getattribute__(self, attr)
+
+ def __getitem__(self, index):
+ if isinstance(index, int):
+ return list.__getitem__(self,index)
+
+ try:
+ return getattr(self, index)
+ except AttributeError:
+ raise IndexError( "No item found with id %r" % (self._prefix + index) )
+
+
+
+#} END utilities
+
+#{ Classes
+
+class RepoAliasMixin(object):
+ """Simple utility providing a repo-property which resolves to the 'odb' attribute
+ of the actual type. This is for api compatability only, as the types previously
+ held repository instances, now they hold odb instances instead"""
+ __slots__ = tuple()
+
+ @property
+ def repo(self):
+ return self.odb
+
+
class Stats(object):
"""
Represents stat information as presented by git at the end of a merge. It is
@@ -407,195 +848,4 @@ class IndexFileSHA1Writer(object):
return self.f.tell()
-class LockFile(object):
- """Provides methods to obtain, check for, and release a file based lock which
- should be used to handle concurrent access to the same file.
-
- As we are a utility class to be derived from, we only use protected methods.
-
- Locks will automatically be released on destruction"""
- __slots__ = ("_file_path", "_owns_lock")
-
- def __init__(self, file_path):
- self._file_path = file_path
- self._owns_lock = False
-
- def __del__(self):
- self._release_lock()
-
- def _lock_file_path(self):
- """:return: Path to lockfile"""
- return "%s.lock" % (self._file_path)
-
- def _has_lock(self):
- """:return: True if we have a lock and if the lockfile still exists
- :raise AssertionError: if our lock-file does not exist"""
- if not self._owns_lock:
- return False
-
- return True
-
- def _obtain_lock_or_raise(self):
- """Create a lock file as flag for other instances, mark our instance as lock-holder
-
- :raise IOError: if a lock was already present or a lock file could not be written"""
- if self._has_lock():
- return
- lock_file = self._lock_file_path()
- if os.path.isfile(lock_file):
- raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file))
-
- try:
- fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
- os.close(fd)
- except OSError,e:
- raise IOError(str(e))
-
- self._owns_lock = True
-
- def _obtain_lock(self):
- """The default implementation will raise if a lock cannot be obtained.
- Subclasses may override this method to provide a different implementation"""
- return self._obtain_lock_or_raise()
-
- def _release_lock(self):
- """Release our lock if we have one"""
- if not self._has_lock():
- return
-
- # if someone removed our file beforhand, lets just flag this issue
- # instead of failing, to make it more usable.
- lfp = self._lock_file_path()
- try:
- # on bloody windows, the file needs write permissions to be removable.
- # Why ...
- if os.name == 'nt':
- os.chmod(lfp, 0777)
- # END handle win32
- os.remove(lfp)
- except OSError:
- pass
- self._owns_lock = False
-
-
-class BlockingLockFile(LockFile):
- """The lock file will block until a lock could be obtained, or fail after
- a specified timeout.
-
- :note: If the directory containing the lock was removed, an exception will
- be raised during the blocking period, preventing hangs as the lock
- can never be obtained."""
- __slots__ = ("_check_interval", "_max_block_time")
- def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint):
- """Configure the instance
-
- :parm check_interval_s:
- Period of time to sleep until the lock is checked the next time.
- By default, it waits a nearly unlimited time
-
- :parm max_block_time_s: Maximum amount of seconds we may lock"""
- super(BlockingLockFile, self).__init__(file_path)
- self._check_interval = check_interval_s
- self._max_block_time = max_block_time_s
-
- def _obtain_lock(self):
- """This method blocks until it obtained the lock, or raises IOError if
- it ran out of time or if the parent directory was not available anymore.
- If this method returns, you are guranteed to own the lock"""
- starttime = time.time()
- maxtime = starttime + float(self._max_block_time)
- while True:
- try:
- super(BlockingLockFile, self)._obtain_lock()
- except IOError:
- # synity check: if the directory leading to the lockfile is not
- # readable anymore, raise an execption
- curtime = time.time()
- if not os.path.isdir(os.path.dirname(self._lock_file_path())):
- msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime)
- raise IOError(msg)
- # END handle missing directory
-
- if curtime >= maxtime:
- msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path())
- raise IOError(msg)
- # END abort if we wait too long
- time.sleep(self._check_interval)
- else:
- break
- # END endless loop
-
-
-class IterableList(list):
- """
- List of iterable objects allowing to query an object by id or by named index::
-
- heads = repo.heads
- heads.master
- heads['master']
- heads[0]
-
- It requires an id_attribute name to be set which will be queried from its
- contained items to have a means for comparison.
-
- A prefix can be specified which is to be used in case the id returned by the
- items always contains a prefix that does not matter to the user, so it
- can be left out."""
- __slots__ = ('_id_attr', '_prefix')
-
- def __new__(cls, id_attr, prefix=''):
- return super(IterableList,cls).__new__(cls)
-
- def __init__(self, id_attr, prefix=''):
- self._id_attr = id_attr
- self._prefix = prefix
- if not isinstance(id_attr, basestring):
- raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
- # END help debugging !
-
- def __getattr__(self, attr):
- attr = self._prefix + attr
- for item in self:
- if getattr(item, self._id_attr) == attr:
- return item
- # END for each item
- return list.__getattribute__(self, attr)
-
- def __getitem__(self, index):
- if isinstance(index, int):
- return list.__getitem__(self,index)
-
- try:
- return getattr(self, index)
- except AttributeError:
- raise IndexError( "No item found with id %r" % (self._prefix + index) )
-
-
-class Iterable(object):
- """Defines an interface for iterable items which is to assure a uniform
- way to retrieve and iterate items within the git repository"""
- __slots__ = tuple()
- _id_attribute_ = "attribute that most suitably identifies your instance"
-
- @classmethod
- def list_items(cls, repo, *args, **kwargs):
- """
- Find all items of this type - subclasses can specify args and kwargs differently.
- If no args are given, subclasses are obliged to return all items if no additional
- arguments arg given.
-
- :note: Favor the iter_items method as it will
-
- :return:list(Item,...) list of item instances"""
- out_list = IterableList( cls._id_attribute_ )
- out_list.extend(cls.iter_items(repo, *args, **kwargs))
- return out_list
-
-
- @classmethod
- def iter_items(cls, repo, *args, **kwargs):
- """For more information about the arguments, see list_items
- :return: iterator yielding Items"""
- raise NotImplementedError("To be implemented by Subclass")
-
#} END classes