diff options
Diffstat (limited to 'git')
120 files changed, 10005 insertions, 2929 deletions
diff --git a/git/__init__.py b/git/__init__.py index 0658c330..4a4200cc 100644 --- a/git/__init__.py +++ b/git/__init__.py @@ -14,12 +14,12 @@ __version__ = 'git' #{ Initialization def _init_externals(): """Initialize external projects by putting them into the path""" - sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'gitdb')) + sys.path.append(os.path.join(os.path.dirname(__file__), 'ext', 'async')) try: - import gitdb + import async except ImportError: - raise ImportError("'gitdb' could not be found in your PYTHONPATH") + raise ImportError("'async' could not be found in your PYTHONPATH") #END verify import #} END initialization @@ -37,9 +37,9 @@ from git.diff import * from git.exc import * from git.db import * from git.cmd import Git -from git.repo import Repo from git.remote import * from git.index import * +from git.repo import Repo from git.util import ( LockFile, BlockingLockFile, diff --git a/git/base.py b/git/base.py new file mode 100644 index 00000000..ff1062bf --- /dev/null +++ b/git/base.py @@ -0,0 +1,311 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module with basic data structures - they are designed to be lightweight and fast""" +from util import ( + bin_to_hex, + zlib + ) + +from fun import ( + type_id_to_type_map, + type_to_type_id_map + ) + +__all__ = ('OInfo', 'OPackInfo', 'ODeltaPackInfo', + 'OStream', 'OPackStream', 'ODeltaPackStream', + 'IStream', 'InvalidOInfo', 'InvalidOStream' ) + +#{ ODB Bases + +class OInfo(tuple): + """Carries information about an object in an ODB, provding information + about the binary sha of the object, the type_string as well as the uncompressed size + in bytes. + + It can be accessed using tuple notation and using attribute access notation:: + + assert dbi[0] == dbi.binsha + assert dbi[1] == dbi.type + assert dbi[2] == dbi.size + + The type is designed to be as lighteight as possible.""" + __slots__ = tuple() + + def __new__(cls, sha, type, size): + return tuple.__new__(cls, (sha, type, size)) + + def __init__(self, *args): + tuple.__init__(self) + + #{ Interface + @property + def binsha(self): + """:return: our sha as binary, 20 bytes""" + return self[0] + + @property + def hexsha(self): + """:return: our sha, hex encoded, 40 bytes""" + return bin_to_hex(self[0]) + + @property + def type(self): + return self[1] + + @property + def type_id(self): + return type_to_type_id_map[self[1]] + + @property + def size(self): + return self[2] + #} END interface + + +class OPackInfo(tuple): + """As OInfo, but provides a type_id property to retrieve the numerical type id, and + does not include a sha. + + Additionally, the pack_offset is the absolute offset into the packfile at which + all object information is located. The data_offset property points to the abosolute + location in the pack at which that actual data stream can be found.""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size): + return tuple.__new__(cls, (packoffset,type, size)) + + def __init__(self, *args): + tuple.__init__(self) + + #{ Interface + + @property + def pack_offset(self): + return self[0] + + @property + def type(self): + return type_id_to_type_map[self[1]] + + @property + def type_id(self): + return self[1] + + @property + def size(self): + return self[2] + + #} END interface + + +class ODeltaPackInfo(OPackInfo): + """Adds delta specific information, + Either the 20 byte sha which points to some object in the database, + or the negative offset from the pack_offset, so that pack_offset - delta_info yields + the pack offset of the base object""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size, delta_info): + return tuple.__new__(cls, (packoffset, type, size, delta_info)) + + #{ Interface + @property + def delta_info(self): + return self[3] + #} END interface + + +class OStream(OInfo): + """Base for object streams retrieved from the database, providing additional + information about the stream. + Generally, ODB streams are read-only as objects are immutable""" + __slots__ = tuple() + + def __new__(cls, sha, type, size, stream, *args, **kwargs): + """Helps with the initialization of subclasses""" + return tuple.__new__(cls, (sha, type, size, stream)) + + + def __init__(self, *args, **kwargs): + tuple.__init__(self) + + #{ Stream Reader Interface + + def read(self, size=-1): + return self[3].read(size) + + @property + def stream(self): + return self[3] + + #} END stream reader interface + + +class ODeltaStream(OStream): + """Uses size info of its stream, delaying reads""" + + def __new__(cls, sha, type, size, stream, *args, **kwargs): + """Helps with the initialization of subclasses""" + return tuple.__new__(cls, (sha, type, size, stream)) + + #{ Stream Reader Interface + + @property + def size(self): + return self[3].size + + #} END stream reader interface + + +class OPackStream(OPackInfo): + """Next to pack object information, a stream outputting an undeltified base object + is provided""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size, stream, *args): + """Helps with the initialization of subclasses""" + return tuple.__new__(cls, (packoffset, type, size, stream)) + + #{ Stream Reader Interface + def read(self, size=-1): + return self[3].read(size) + + @property + def stream(self): + return self[3] + #} END stream reader interface + + +class ODeltaPackStream(ODeltaPackInfo): + """Provides a stream outputting the uncompressed offset delta information""" + __slots__ = tuple() + + def __new__(cls, packoffset, type, size, delta_info, stream): + return tuple.__new__(cls, (packoffset, type, size, delta_info, stream)) + + + #{ Stream Reader Interface + def read(self, size=-1): + return self[4].read(size) + + @property + def stream(self): + return self[4] + #} END stream reader interface + + +class IStream(list): + """Represents an input content stream to be fed into the ODB. It is mutable to allow + the ODB to record information about the operations outcome right in this instance. + + It provides interfaces for the OStream and a StreamReader to allow the instance + to blend in without prior conversion. + + The only method your content stream must support is 'read'""" + __slots__ = tuple() + + def __new__(cls, type, size, stream, sha=None): + return list.__new__(cls, (sha, type, size, stream, None)) + + def __init__(self, type, size, stream, sha=None): + list.__init__(self, (sha, type, size, stream, None)) + + #{ Interface + @property + def hexsha(self): + """:return: our sha, hex encoded, 40 bytes""" + return bin_to_hex(self[0]) + + def _error(self): + """:return: the error that occurred when processing the stream, or None""" + return self[4] + + def _set_error(self, exc): + """Set this input stream to the given exc, may be None to reset the error""" + self[4] = exc + + error = property(_error, _set_error) + + #} END interface + + #{ Stream Reader Interface + + def read(self, size=-1): + """Implements a simple stream reader interface, passing the read call on + to our internal stream""" + return self[3].read(size) + + #} END stream reader interface + + #{ interface + + def _set_binsha(self, binsha): + self[0] = binsha + + def _binsha(self): + return self[0] + + binsha = property(_binsha, _set_binsha) + + + def _type(self): + return self[1] + + def _set_type(self, type): + self[1] = type + + type = property(_type, _set_type) + + def _size(self): + return self[2] + + def _set_size(self, size): + self[2] = size + + size = property(_size, _set_size) + + def _stream(self): + return self[3] + + def _set_stream(self, stream): + self[3] = stream + + stream = property(_stream, _set_stream) + + #} END odb info interface + + +class InvalidOInfo(tuple): + """Carries information about a sha identifying an object which is invalid in + the queried database. The exception attribute provides more information about + the cause of the issue""" + __slots__ = tuple() + + def __new__(cls, sha, exc): + return tuple.__new__(cls, (sha, exc)) + + def __init__(self, sha, exc): + tuple.__init__(self, (sha, exc)) + + @property + def binsha(self): + return self[0] + + @property + def hexsha(self): + return bin_to_hex(self[0]) + + @property + def error(self): + """:return: exception instance explaining the failure""" + return self[1] + + +class InvalidOStream(InvalidOInfo): + """Carries information about an invalid ODB stream""" + __slots__ = tuple() + +#} END ODB Bases + @@ -5,7 +5,10 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os, sys -from util import * +from util import ( + LazyMixin, + stream_copy + ) from exc import GitCommandError from subprocess import ( @@ -26,7 +29,7 @@ __all__ = ('Git', ) def dashify(string): return string.replace('_', '-') -class Git(object): +class Git(LazyMixin): """ The Git class manages communication with the Git binary. @@ -41,7 +44,7 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ - __slots__ = ("_working_dir", "cat_file_all", "cat_file_header") + __slots__ = ("_working_dir", "cat_file_all", "cat_file_header", "_version_info") # CONFIGURATION # The size in bytes read from stdout when copying git's output to another stream @@ -214,14 +217,30 @@ class Git(object): """A convenience method as it allows to call the command as if it was an object. :return: Callable object that will execute call _call_process with your arguments.""" - if name[:1] == '_': - raise AttributeError(name) + if name[0] == '_': + return LazyMixin.__getattr__(self, name) return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) + def _set_cache_(self, attr): + if attr == '_version_info': + version_numbers = self._call_process('version').rpartition(' ')[2] + self._version_info = tuple(int(n) for n in version_numbers.split('.')) + else: + super(Git, self)._set_cache_(attr) + #END handle version info + + @property def working_dir(self): """:return: Git directory we are working on""" return self._working_dir + + @property + def version_info(self): + """:return: tuple(int, ...) tuple with integers representing the major, minor + and additional version numbers as parsed from git version. + This value is generated on demand and is cached""" + return self._version_info def execute(self, command, istream=None, diff --git a/git/config.py b/git/config.py index f1a8832e..c71bb8ca 100644 --- a/git/config.py +++ b/git/config.py @@ -120,11 +120,12 @@ class GitConfigParser(cp.RawConfigParser, object): # They must be compatible to the LockFile interface. # A suitable alternative would be the BlockingLockFile t_lock = LockFile + re_comment = re.compile('^\s*[#;]') #} END configuration OPTCRE = re.compile( - r'\s?(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace + r'\s*(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace r'\s*(?P<vi>[:=])\s*' # any number of space/tab, # followed by separator # (either : or =), followed @@ -211,16 +212,16 @@ class GitConfigParser(cp.RawConfigParser, object): break lineno = lineno + 1 # comment or blank line? - if line.strip() == '' or line[0] in '#;': + if line.strip() == '' or self.re_comment.match(line): continue if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR": # no leading whitespace continue else: # is it a section header? - mo = self.SECTCRE.match(line) + mo = self.SECTCRE.match(line.strip()) if mo: - sectname = mo.group('header') + sectname = mo.group('header').strip() if sectname in self._sections: cursect = self._sections[sectname] elif sectname == cp.DEFAULTSECT: @@ -332,6 +333,10 @@ class GitConfigParser(cp.RawConfigParser, object): close_fp = True else: fp.seek(0) + # make sure we do not overwrite into an existing file + if hasattr(fp, 'truncate'): + fp.truncate() + #END # END handle stream or file # WRITE DATA diff --git a/git/db.py b/git/db.py deleted file mode 100644 index b1c65377..00000000 --- a/git/db.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Module with our own gitdb implementation - it uses the git command""" -from exc import ( - GitCommandError, - BadObject - ) - -from gitdb.base import ( - OInfo, - OStream - ) - -from gitdb.util import ( - bin_to_hex, - hex_to_bin - ) -from gitdb.db import GitDB -from gitdb.db import LooseObjectDB - - -__all__ = ('GitCmdObjectDB', 'GitDB' ) - -#class GitCmdObjectDB(CompoundDB, ObjectDBW): -class GitCmdObjectDB(LooseObjectDB): - """A database representing the default git object store, which includes loose - objects, pack files and an alternates file - - It will create objects only in the loose object database. - :note: for now, we use the git command to do all the lookup, just until he - have packs and the other implementations - """ - def __init__(self, root_path, git): - """Initialize this instance with the root and a git command""" - super(GitCmdObjectDB, self).__init__(root_path) - self._git = git - - def info(self, sha): - hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) - return OInfo(hex_to_bin(hexsha), typename, size) - - def stream(self, sha): - """For now, all lookup is done by git itself""" - hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) - return OStream(hex_to_bin(hexsha), typename, size, stream) - - - # { Interface - - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: Full binary 20 byte sha from the given partial hexsha - :raise AmbiguousObjectName: - :raise BadObject: - :note: currently we only raise BadObject as git does not communicate - AmbiguousObjects separately""" - try: - hexsha, typename, size = self._git.get_object_header(partial_hexsha) - return hex_to_bin(hexsha) - except (GitCommandError, ValueError): - raise BadObject(partial_hexsha) - # END handle exceptions - - #} END interface diff --git a/git/db/__init__.py b/git/db/__init__.py new file mode 100644 index 00000000..25948326 --- /dev/null +++ b/git/db/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php + +from interface import * diff --git a/git/db/cmd/__init__.py b/git/db/cmd/__init__.py new file mode 100644 index 00000000..8a681e42 --- /dev/null +++ b/git/db/cmd/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py new file mode 100644 index 00000000..ef22c931 --- /dev/null +++ b/git/db/cmd/base.py @@ -0,0 +1,821 @@ +"""module with git command implementations of the basic interfaces +:note: we could add all implementations of the basic interfaces, its more efficient though + to obtain them from the pure implementation""" +from git.exc import ( + GitCommandError, + BadObject + ) + +from git.base import ( + OInfo, + OStream + ) + +from git.util import ( + bin_to_hex, + hex_to_bin, + isfile, + join_path, + join, + Actor, + IterableList, + ) +from git.db.interface import ( + FetchInfo, + PushInfo, + HighLevelRepository, + TransportDB, + RemoteProgress + ) +from git.cmd import Git +from git.refs import ( + Reference, + RemoteReference, + SymbolicReference, + TagReference + ) +from git.objects.commit import Commit +from cStringIO import StringIO +import re +import os +import sys + + +__all__ = ('CmdTransportMixin', 'GitCommandMixin', 'CmdPushInfo', 'CmdFetchInfo', + 'CmdRemoteProgress', 'CmdObjectDBRMixin', 'CmdHighLevelRepository') + + +#{ Utilities + +def touch(filename): + fp = open(filename, "a") + fp.close() + + +def digest_process_messages(fh, progress): + """Read progress messages from file-like object fh, supplying the respective + progress messages to the progress instance. + + :return: list(line, ...) list of lines without linebreaks that did + not contain progress information""" + line_so_far = '' + dropped_lines = list() + while True: + char = fh.read(1) + if not char: + break + + if char in ('\r', '\n'): + dropped_lines.extend(progress._parse_progress_line(line_so_far)) + line_so_far = '' + else: + line_so_far += char + # END process parsed line + # END while file is not done reading + return dropped_lines + +def finalize_process(proc): + """Wait for the process (fetch, pull or push) and handle its errors accordingly""" + try: + proc.wait() + except GitCommandError,e: + # if a push has rejected items, the command has non-zero return status + # a return status of 128 indicates a connection error - reraise the previous one + if proc.poll() == 128: + raise + pass + # END exception handling + + +def get_fetch_info_from_stderr(repo, proc, progress): + # skip first line as it is some remote info we are not interested in + output = IterableList('name') + + + # lines which are no progress are fetch info lines + # this also waits for the command to finish + # Skip some progress lines that don't provide relevant information + fetch_info_lines = list() + for line in digest_process_messages(proc.stderr, progress): + if line.startswith('From') or line.startswith('remote: Total'): + continue + elif line.startswith('warning:'): + print >> sys.stderr, line + continue + elif line.startswith('fatal:'): + raise GitCommandError(("Error when fetching: %s" % line,), 2) + # END handle special messages + fetch_info_lines.append(line) + # END for each line + + # read head information + fp = open(join(repo.git_dir, 'FETCH_HEAD'),'r') + fetch_head_info = fp.readlines() + fp.close() + + assert len(fetch_info_lines) == len(fetch_head_info) + + output.extend(CmdFetchInfo._from_line(repo, err_line, fetch_line) + for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) + + finalize_process(proc) + return output + +def get_push_info(repo, remotename_or_url, proc, progress): + # read progress information from stderr + # we hope stdout can hold all the data, it should ... + # read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually + digest_process_messages(proc.stderr, progress) + + output = IterableList('name') + for line in proc.stdout.readlines(): + try: + output.append(CmdPushInfo._from_line(repo, remotename_or_url, line)) + except ValueError: + # if an error happens, additional info is given which we cannot parse + pass + # END exception handling + # END for each line + + finalize_process(proc) + return output + +def add_progress(kwargs, git, progress): + """Add the --progress flag to the given kwargs dict if supported by the + git command. If the actual progress in the given progress instance is not + given, we do not request any progress + :return: possibly altered kwargs""" + if progress._progress is not None: + v = git.version_info + if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3: + kwargs['progress'] = True + #END handle --progress + #END handle progress + return kwargs + +#} END utilities + +class CmdRemoteProgress(RemoteProgress): + """ + A Remote progress implementation taking a user derived progress to call the + respective methods on. + """ + __slots__ = ("_seen_ops", '_progress') + re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") + re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") + + def __init__(self, progress_instance = None): + self._seen_ops = list() + if progress_instance is None: + progress_instance = RemoteProgress() + #END assure proper instance + self._progress = progress_instance + + def _parse_progress_line(self, line): + """Parse progress information from the given line as retrieved by git-push + or git-fetch + + Call the own update(), __call__() and line_dropped() methods according + to the parsed result. + + :return: list(line, ...) list of lines that could not be processed""" + # handle + # Counting objects: 4, done. + # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. + sub_lines = line.split('\r') + failed_lines = list() + for sline in sub_lines: + # find esacpe characters and cut them away - regex will not work with + # them as they are non-ascii. As git might expect a tty, it will send them + last_valid_index = None + for i,c in enumerate(reversed(sline)): + if ord(c) < 32: + # its a slice index + last_valid_index = -i-1 + # END character was non-ascii + # END for each character in sline + if last_valid_index is not None: + sline = sline[:last_valid_index] + # END cut away invalid part + sline = sline.rstrip() + + cur_count, max_count = None, None + match = self.re_op_relative.match(sline) + if match is None: + match = self.re_op_absolute.match(sline) + + if not match: + self._progress.line_dropped(sline) + failed_lines.append(sline) + continue + # END could not get match + + op_code = 0 + remote, op_name, percent, cur_count, max_count, message = match.groups() + + # get operation id + if op_name == "Counting objects": + op_code |= self.COUNTING + elif op_name == "Compressing objects": + op_code |= self.COMPRESSING + elif op_name == "Writing objects": + op_code |= self.WRITING + elif op_name == "Receiving objects": + op_code |= self.RECEIVING + elif op_name == "Resolving deltas": + op_code |= self.RESOLVING + else: + raise ValueError("Operation name %r unknown" % op_name) + + # figure out stage + if op_code not in self._seen_ops: + self._seen_ops.append(op_code) + op_code |= self.BEGIN + # END begin opcode + + if message is None: + message = '' + # END message handling + + message = message.strip() + done_token = ', done.' + if message.endswith(done_token): + op_code |= self.END + message = message[:-len(done_token)] + # END end message handling + + self._progress.update(op_code, cur_count, max_count, message, line) + self._progress(message, line) + # END for each sub line + return failed_lines + + +class CmdPushInfo(PushInfo): + """ + Pure Python implementation of a PushInfo interface + """ + __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', + '_remotename_or_url', 'repo', 'summary') + + _flag_map = { 'X' : PushInfo.NO_MATCH, + '-' : PushInfo.DELETED, '*' : 0, + '+' : PushInfo.FORCED_UPDATE, + ' ' : PushInfo.FAST_FORWARD, + '=' : PushInfo.UP_TO_DATE, + '!' : PushInfo.ERROR } + + def __init__(self, flags, local_ref, remote_ref_string, repo, remotename_or_url, old_commit_binsha=None, + summary=''): + """ Initialize a new instance """ + self.flags = flags + self.local_ref = local_ref + self.repo = repo + self.remote_ref_string = remote_ref_string + self._remotename_or_url = remotename_or_url + self.old_commit_binsha = old_commit_binsha + self.summary = summary + + @property + def remote_ref(self): + """ + :return: + Remote Reference or TagReference in the local repository corresponding + to the remote_ref_string kept in this instance.""" + # translate heads to a local remote, tags stay as they are + if self.remote_ref_string.startswith("refs/tags"): + return TagReference(self.repo, self.remote_ref_string) + elif self.remote_ref_string.startswith("refs/heads"): + remote_ref = Reference(self.repo, self.remote_ref_string) + if '/' in self._remotename_or_url: + sys.stderr.write("Cannot provide RemoteReference instance if it was created from a url instead of of a remote name: %s. Returning Reference instance instead" % sefl._remotename_or_url) + return remote_ref + #END assert correct input + return RemoteReference(self.repo, "refs/remotes/%s/%s" % (str(self._remotename_or_url), remote_ref.name)) + else: + raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) + # END + + @classmethod + def _from_line(cls, repo, remotename_or_url, line): + """Create a new PushInfo instance as parsed from line which is expected to be like + refs/heads/master:refs/heads/master 05d2687..1d0568e""" + control_character, from_to, summary = line.split('\t', 3) + flags = 0 + + # control character handling + try: + flags |= cls._flag_map[ control_character ] + except KeyError: + raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) + # END handle control character + + # from_to handling + from_ref_string, to_ref_string = from_to.split(':') + if flags & cls.DELETED: + from_ref = None + else: + from_ref = Reference.from_path(repo, from_ref_string) + + # commit handling, could be message or commit info + old_commit_binsha = None + if summary.startswith('['): + if "[rejected]" in summary: + flags |= cls.REJECTED + elif "[remote rejected]" in summary: + flags |= cls.REMOTE_REJECTED + elif "[remote failure]" in summary: + flags |= cls.REMOTE_FAILURE + elif "[no match]" in summary: + flags |= cls.ERROR + elif "[new tag]" in summary: + flags |= cls.NEW_TAG + elif "[new branch]" in summary: + flags |= cls.NEW_HEAD + # uptodate encoded in control character + else: + # fast-forward or forced update - was encoded in control character, + # but we parse the old and new commit + split_token = "..." + if control_character == " ": + split_token = ".." + old_sha, new_sha = summary.split(' ')[0].split(split_token) + old_commit_binsha = repo.resolve(old_sha) + # END message handling + + return cls(flags, from_ref, to_ref_string, repo, remotename_or_url, old_commit_binsha, summary) + + +class CmdFetchInfo(FetchInfo): + """ + Pure python implementation of a FetchInfo interface + """ + __slots__ = ('ref','old_commit_binsha', 'flags', 'note') + + # %c %-*s %-*s -> %s (%s) + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") + + _flag_map = { '!' : FetchInfo.ERROR, + '+' : FetchInfo.FORCED_UPDATE, + '-' : FetchInfo.TAG_UPDATE, + '*' : 0, + '=' : FetchInfo.HEAD_UPTODATE, + ' ' : FetchInfo.FAST_FORWARD } + + def __init__(self, ref, flags, note = '', old_commit_binsha = None): + """ + Initialize a new instance + """ + self.ref = ref + self.flags = flags + self.note = note + self.old_commit_binsha = old_commit_binsha + + def __str__(self): + return self.name + + @property + def name(self): + """:return: Name of our remote ref""" + return self.ref.name + + @property + def commit(self): + """:return: Commit of our remote ref""" + return self.ref.commit + + @classmethod + def _from_line(cls, repo, line, fetch_line): + """Parse information from the given line as returned by git-fetch -v + and return a new CmdFetchInfo object representing this information. + + We can handle a line as follows + "%c %-*s %-*s -> %s%s" + + Where c is either ' ', !, +, -, *, or = + ! means error + + means success forcing update + - means a tag was updated + * means birth of new branch or tag + = means the head was up to date ( and not moved ) + ' ' means a fast-forward + + fetch line is the corresponding line from FETCH_HEAD, like + acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" + match = cls.re_fetch_result.match(line) + if match is None: + raise ValueError("Failed to parse line: %r" % line) + + # parse lines + control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() + try: + new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") + ref_type_name, fetch_note = fetch_note.split(' ', 1) + except ValueError: # unpack error + raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) + + # handle FETCH_HEAD and figure out ref type + # If we do not specify a target branch like master:refs/remotes/origin/master, + # the fetch result is stored in FETCH_HEAD which destroys the rule we usually + # have. In that case we use a symbolic reference which is detached + ref_type = None + if remote_local_ref == "FETCH_HEAD": + ref_type = SymbolicReference + elif ref_type_name == "branch": + ref_type = RemoteReference + elif ref_type_name == "tag": + ref_type = TagReference + else: + raise TypeError("Cannot handle reference type: %r" % ref_type_name) + + # create ref instance + if ref_type is SymbolicReference: + remote_local_ref = ref_type(repo, "FETCH_HEAD") + else: + remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) + # END create ref instance + + note = ( note and note.strip() ) or '' + + # parse flags from control_character + flags = 0 + try: + flags |= cls._flag_map[control_character] + except KeyError: + raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) + # END control char exception hanlding + + # parse operation string for more info - makes no sense for symbolic refs + old_commit_binsha = None + if isinstance(remote_local_ref, Reference): + if 'rejected' in operation: + flags |= cls.REJECTED + if 'new tag' in operation: + flags |= cls.NEW_TAG + if 'new branch' in operation: + flags |= cls.NEW_HEAD + if '...' in operation or '..' in operation: + split_token = '...' + if control_character == ' ': + split_token = split_token[:-1] + old_commit_binsha = repo.resolve(operation.split(split_token)[0]) + # END handle refspec + # END reference flag handling + + return cls(remote_local_ref, flags, note, old_commit_binsha) + + +class GitCommandMixin(object): + """A mixin to provide the git command object through the git property""" + + def __init__(self, *args, **kwargs): + """Initialize this instance with the root and a git command""" + super(GitCommandMixin, self).__init__(*args, **kwargs) + self._git = Git(self.working_dir) + + @property + def git(self): + return self._git + + +class CmdObjectDBRMixin(object): + """A mixing implementing object reading through a git command + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + #{ ODB Interface + # overrides from PureOdb Implementation, which is responsible only for writing + # objects + def info(self, sha): + hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) + return OInfo(hex_to_bin(hexsha), typename, size) + + def stream(self, sha): + """For now, all lookup is done by git itself + :note: As we don't know when the stream is actually read (and if it is + stored for later use) we read the data rigth away and cache it. + This has HUGE performance implication, both for memory as for + reading/deserializing objects, but we have no other choice in order + to make the database behaviour consistent with other implementations !""" + + hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha)) + return OStream(hex_to_bin(hexsha), typename, size, StringIO(data)) + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: Full binary 20 byte sha from the given partial hexsha + :raise AmbiguousObjectName: + :raise BadObject: + :note: currently we only raise BadObject as git does not communicate + AmbiguousObjects separately""" + try: + hexsha, typename, size = self._git.get_object_header(partial_hexsha) + return hex_to_bin(hexsha) + except (GitCommandError, ValueError): + raise BadObject(partial_hexsha) + # END handle exceptions + + #} END odb interface + + +class CmdTransportMixin(TransportDB): + """A mixin requiring the .git property as well as repository paths + + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + + #{ Transport DB interface + + def push(self, url, refspecs=None, progress=None, **kwargs): + """Push given refspecs using the git default implementation + :param url: may be a remote name or a url + :param refspecs: single string, RefSpec instance or list of such or None. + :param progress: RemoteProgress derived instance or None + :param **kwargs: Additional arguments to be passed to the git-push process""" + progress = CmdRemoteProgress(progress) + proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **add_progress(kwargs, self.git, progress)) + return get_push_info(self, url, proc, progress) + + def pull(self, url, refspecs=None, progress=None, **kwargs): + """Fetch and merge the given refspecs. + If not refspecs are given, the merge will only work properly if you + have setup upstream (tracking) branches. + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + progress = CmdRemoteProgress(progress) + proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress)) + return get_fetch_info_from_stderr(self, proc, progress) + + def fetch(self, url, refspecs=None, progress=None, **kwargs): + """Fetch the latest changes + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + progress = CmdRemoteProgress(progress) + proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress)) + return get_fetch_info_from_stderr(self, proc, progress) + + #} end transport db interface + + +class CmdHighLevelRepository(HighLevelRepository): + """An intermediate interface carrying advanced git functionality that can be used + in other comound repositories which do not implement this functionality themselves. + + The mixin must be used with repositories compatible to the GitCommandMixin. + + :note: at some point, methods provided here are supposed to be provided by custom interfaces""" + DAEMON_EXPORT_FILE = 'git-daemon-export-ok' + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + + #{ Configuration + CommitCls = Commit + GitCls = Git + #} END configuration + + def daemon_export(): + def _get_daemon_export(self): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + return os.path.exists(filename) + + def _set_daemon_export(self, value): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + fileexists = os.path.exists(filename) + if value and not fileexists: + touch(filename) + elif not value and fileexists: + os.unlink(filename) + + return property(_get_daemon_export, _set_daemon_export, + doc="If True, git-daemon may export this repository") + + daemon_export = daemon_export() + + def is_dirty(self, index=True, working_tree=True, untracked_files=False): + if self._bare: + # Bare repositories with no associated working directory are + # always consired to be clean. + return False + + # start from the one which is fastest to evaluate + default_args = ('--abbrev=40', '--full-index', '--raw') + if index: + # diff index against HEAD + if isfile(self.index.path) and self.head.is_valid() and \ + len(self.git.diff('HEAD', '--cached', *default_args)): + return True + # END index handling + if working_tree: + # diff index against working tree + if len(self.git.diff(*default_args)): + return True + # END working tree handling + if untracked_files: + if len(self.untracked_files): + return True + # END untracked files + return False + + @property + def untracked_files(self): + # make sure we get all files, no only untracked directores + proc = self.git.status(untracked_files=True, as_process=True) + stream = iter(proc.stdout) + untracked_files = list() + for line in stream: + if not line.startswith("# Untracked files:"): + continue + # skip two lines + stream.next() + stream.next() + + for untracked_info in stream: + if not untracked_info.startswith("#\t"): + break + untracked_files.append(untracked_info.replace("#\t", "").rstrip()) + # END for each utracked info line + # END for each line + return untracked_files + + def blame(self, rev, file): + data = self.git.blame(rev, '--', file, p=True) + commits = dict() + blames = list() + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail <tom@mojombo.com> + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail <tom@mojombo.com> + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = int(parts[-1]) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # <and rest> + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = self.CommitCls( self, hex_to_bin(sha), + author=Actor._from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + @classmethod + def init(cls, path=None, mkdir=True, **kwargs): + """ + :parm kwargs: + keyword arguments serving as additional options to the git-init command + + For more information, see the respective docs of HighLevelRepository""" + + if mkdir and path and not os.path.exists(path): + os.makedirs(path, 0755) + + # git command automatically chdir into the directory + git = cls.GitCls(path) + output = git.init(**kwargs) + return cls(path) + + @classmethod + def _clone(cls, git, url, path, progress, **kwargs): + # special handling for windows for path at which the clone should be + # created. + # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence + # we at least give a proper error instead of letting git fail + prev_cwd = None + prev_path = None + if os.name == 'nt': + if '~' in path: + raise OSError("Git cannot handle the ~ character in path %r correctly" % path) + + # on windows, git will think paths like c: are relative and prepend the + # current working dir ( before it fails ). We temporarily adjust the working + # dir to make this actually work + match = re.match("(\w:[/\\\])(.*)", path) + if match: + prev_cwd = os.getcwd() + prev_path = path + drive, rest_of_path = match.groups() + os.chdir(drive) + path = rest_of_path + kwargs['with_keep_cwd'] = True + # END cwd preparation + # END windows handling + + try: + proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) + if progress is not None: + digest_process_messages(proc.stderr, progress) + #END digest progress messages + finalize_process(proc) + finally: + if prev_cwd is not None: + os.chdir(prev_cwd) + path = prev_path + # END reset previous working dir + # END bad windows handling + + # our git command could have a different working dir than our actual + # environment, hence we prepend its working dir if required + if not os.path.isabs(path) and git.working_dir: + path = join(git._working_dir, path) + + # adjust remotes - there may be operating systems which use backslashes, + # These might be given as initial paths, but when handling the config file + # that contains the remote from which we were clones, git stops liking it + # as it will escape the backslashes. Hence we undo the escaping just to be + # sure + repo = cls(os.path.abspath(path)) + if repo.remotes: + repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) + # END handle remote repo + return repo + + def clone(self, path, progress = None, **kwargs): + """ + :param kwargs: + All remaining keyword arguments are given to the git-clone command + + For more information, see the respective method in HighLevelRepository""" + return self._clone(self.git, self.git_dir, path, CmdRemoteProgress(progress), **kwargs) + + @classmethod + def clone_from(cls, url, to_path, progress = None, **kwargs): + """ + :param kwargs: see the ``clone`` method + For more information, see the respective method in the HighLevelRepository""" + return cls._clone(cls.GitCls(os.getcwd()), url, to_path, CmdRemoteProgress(progress), **kwargs) + + def archive(self, ostream, treeish=None, prefix=None, **kwargs): + """For all args see HighLevelRepository interface + :parm kwargs: + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + + :raise GitCommandError: in case something went wrong""" + if treeish is None: + treeish = self.head.commit + if prefix and 'prefix' not in kwargs: + kwargs['prefix'] = prefix + kwargs['output_stream'] = ostream + + self.git.archive(treeish, **kwargs) + return self diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py new file mode 100644 index 00000000..49e8c590 --- /dev/null +++ b/git/db/cmd/complex.py @@ -0,0 +1,16 @@ +"""Module with our own git implementation - it uses the git command""" + +from git.db.compat import RepoCompatibilityInterface +from base import * + + +__all__ = ['CmdPartialGitDB'] + + +class CmdPartialGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin, + CmdHighLevelRepository ): + """Utility repository which only partially implements all required methods. + It cannot be reliably used alone, but is provided to allow mixing it with other + implementations""" + pass + diff --git a/git/db/compat.py b/git/db/compat.py new file mode 100644 index 00000000..767ab5e0 --- /dev/null +++ b/git/db/compat.py @@ -0,0 +1,31 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module providing adaptors to maintain backwards compatability""" + +class RepoCompatibilityInterface(object): + """Interface to install backwards compatability of the new complex repository + types with the previous, all in one, repository.""" + + @property + def bare(self): + return self.is_bare + + def rev_parse(self, *args, **kwargs): + return self.resolve_object(*args, **kwargs) + + @property + def odb(self): + """The odb is now an integrated part of each repository""" + return self + + @property + def active_branch(self): + """The name of the currently active branch. + + :return: Head to the active branch""" + return self.head.reference + + def __repr__(self): + return '<git.Repo "%s">' % self.git_dir diff --git a/git/db/complex.py b/git/db/complex.py new file mode 100644 index 00000000..31b047a0 --- /dev/null +++ b/git/db/complex.py @@ -0,0 +1,28 @@ +"""Module with many useful complex databases with different useful combinations of primary implementations""" + +from py.complex import PurePartialGitDB +from cmd.complex import CmdPartialGitDB +from compat import RepoCompatibilityInterface + +__all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityGitDB'] + +class CmdGitDB(CmdPartialGitDB, PurePartialGitDB): + """A database which uses primarily the git command implementation, but falls back + to pure python where it is more feasible + :note: To assure consistent behaviour across implementations, when calling the + ``stream()`` method a cache is created. This makes this implementation a bad + choice when reading big files as these are streamed from memory in all cases.""" + +class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB): + """A database which fills in its missing implementation using the pure python + implementation""" + pass + +class PureGitDB(PurePartialGitDB, CmdPartialGitDB): + """A repository which uses the pure implementation primarily, but falls back + on using the git command for high-level functionality""" + +class PureCompatibilityGitDB(RepoCompatibilityInterface, PureGitDB): + """Repository which uses the pure implementation primarily, but falls back + to the git command implementation. Please note that the CmdGitDB does it + the opposite way around.""" diff --git a/git/db/interface.py b/git/db/interface.py new file mode 100644 index 00000000..a4c05265 --- /dev/null +++ b/git/db/interface.py @@ -0,0 +1,838 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains interfaces for basic database building blocks""" + +__all__ = ( 'ObjectDBR', 'ObjectDBW', 'RootPathDB', 'CompoundDB', 'CachingDB', + 'TransportDB', 'ConfigurationMixin', 'RepositoryPathsMixin', + 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin', 'SubmoduleDB', + 'IndexDB', 'HighLevelRepository') + + +class ObjectDBR(object): + """Defines an interface for object database lookup. + Objects are identified either by their 20 byte bin sha""" + + def __contains__(self, sha): + return self.has_obj(sha) + + #{ Query Interface + def has_object(self, sha): + """ + :return: True if the object identified by the given 20 bytes + binary sha is contained in the database""" + raise NotImplementedError("To be implemented in subclass") + + def has_object_async(self, reader): + """Return a reader yielding information about the membership of objects + as identified by shas + :param reader: Reader yielding 20 byte shas. + :return: async.Reader yielding tuples of (sha, bool) pairs which indicate + whether the given sha exists in the database or not""" + raise NotImplementedError("To be implemented in subclass") + + def info(self, sha): + """ :return: OInfo instance + :param sha: bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def info_async(self, reader): + """Retrieve information of a multitude of objects asynchronously + :param reader: Channel yielding the sha's of the objects of interest + :return: async.Reader yielding OInfo|InvalidOInfo, in any order""" + raise NotImplementedError("To be implemented in subclass") + + def stream(self, sha): + """:return: OStream instance + :param sha: 20 bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def stream_async(self, reader): + """Retrieve the OStream of multiple objects + :param reader: see ``info`` + :param max_threads: see ``ObjectDBW.store`` + :return: async.Reader yielding OStream|InvalidOStream instances in any order + :note: depending on the system configuration, it might not be possible to + read all OStreams at once. Instead, read them individually using reader.read(x) + where x is small enough.""" + raise NotImplementedError("To be implemented in subclass") + + def size(self): + """:return: amount of objects in this database""" + raise NotImplementedError() + + def sha_iter(self): + """Return iterator yielding 20 byte shas for all objects in this data base""" + raise NotImplementedError() + + def partial_to_complete_sha_hex(self, partial_hexsha): + """ + :return: 20 byte binary sha1 from the given less-than-40 byte hexsha + :param partial_hexsha: hexsha with less than 40 byte + :raise AmbiguousObjectName: If multiple objects would match the given sha + :raies BadObject: If object was not found""" + raise NotImplementedError() + + def partial_to_complete_sha(self, partial_binsha, canonical_length): + """:return: 20 byte sha as inferred by the given partial binary sha + :param partial_binsha: binary sha with less than 20 bytes + :param canonical_length: length of the corresponding canonical (hexadecimal) representation. + It is required as binary sha's cannot display whether the original hex sha + had an odd or even number of characters + :raise AmbiguousObjectName: + :raise BadObject: """ + #} END query interface + + +class ObjectDBW(object): + """Defines an interface to create objects in the database""" + + #{ Edit Interface + def set_ostream(self, stream): + """ + Adjusts the stream to which all data should be sent when storing new objects + + :param stream: if not None, the stream to use, if None the default stream + will be used. + :return: previously installed stream, or None if there was no override + :raise TypeError: if the stream doesn't have the supported functionality""" + raise NotImplementedError("To be implemented in subclass") + + def ostream(self): + """ + :return: overridden output stream this instance will write to, or None + if it will write to the default stream""" + raise NotImplementedError("To be implemented in subclass") + + def store(self, istream): + """ + Create a new object in the database + :return: the input istream object with its sha set to its corresponding value + + :param istream: IStream compatible instance. If its sha is already set + to a value, the object will just be stored in the our database format, + in which case the input stream is expected to be in object format ( header + contents ). + :raise IOError: if data could not be written""" + raise NotImplementedError("To be implemented in subclass") + + def store_async(self, reader): + """ + Create multiple new objects in the database asynchronously. The method will + return right away, returning an output channel which receives the results as + they are computed. + + :return: Channel yielding your IStream which served as input, in any order. + The IStreams sha will be set to the sha it received during the process, + or its error attribute will be set to the exception informing about the error. + + :param reader: async.Reader yielding IStream instances. + The same instances will be used in the output channel as were received + in by the Reader. + + :note:As some ODB implementations implement this operation atomic, they might + abort the whole operation if one item could not be processed. Hence check how + many items have actually been produced.""" + raise NotImplementedError("To be implemented in subclass") + + #} END edit interface + + +class RootPathDB(object): + """Provides basic facilities to retrieve files of interest""" + + def __init__(self, root_path): + """Initialize this instance to look for its files at the given root path + All subsequent operations will be relative to this path + :raise InvalidDBRoot: + :note: The base will not perform any accessablity checking as the base + might not yet be accessible, but become accessible before the first + access.""" + super(RootPathDB, self).__init__(root_path) + + #{ Interface + def root_path(self): + """:return: path at which this db operates""" + raise NotImplementedError() + + def db_path(self, rela_path): + """ + :return: the given relative path relative to our database root, allowing + to pontentially access datafiles""" + raise NotImplementedError() + #} END interface + + +class CachingDB(object): + """A database which uses caches to speed-up access""" + + #{ Interface + + def update_cache(self, force=False): + """ + Call this method if the underlying data changed to trigger an update + of the internal caching structures. + + :param force: if True, the update must be performed. Otherwise the implementation + may decide not to perform an update if it thinks nothing has changed. + :return: True if an update was performed as something change indeed""" + + # END interface + + +class CompoundDB(object): + """A database which delegates calls to sub-databases. + They should usually be cached and lazy-loaded""" + + #{ Interface + + def databases(self): + """:return: tuple of database instances we use for lookups""" + raise NotImplementedError() + + #} END interface + + +class IndexDB(object): + """A database which provides a flattened index to all objects in its currently + active tree.""" + @property + def index(self): + """:return: IndexFile compatible instance""" + raise NotImplementedError() + + +class RefSpec(object): + """A refspec is a simple container which provides information about the way + something should be fetched or pushed. It requires to use symbols to describe + the actual objects which is done using reference names (or respective instances + which resolve to actual reference names).""" + __slots__ = ('source', 'destination', 'force') + + def __init__(self, source, destination, force=False): + """initalize the instance with the required values + :param source: reference name or instance. If None, the Destination + is supposed to be deleted.""" + self.source = source + self.destination = destination + self.force = force + if self.destination is None: + raise ValueError("Destination must be set") + + def __str__(self): + """:return: a git-style refspec""" + s = str(self.source) + if self.source is None: + s = '' + #END handle source + d = str(self.destination) + p = '' + if self.force: + p = '+' + #END handle force + res = "%s%s:%s" % (p, s, d) + + def delete_destination(self): + return self.source is None + + +class RemoteProgress(object): + """ + Handler providing an interface to parse progress information emitted by git-push + and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. + + Subclasses should derive from this type. + """ + _num_op_codes = 7 + BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)] + STAGE_MASK = BEGIN|END + OP_MASK = ~STAGE_MASK + + #{ Subclass Interface + + def line_dropped(self, line): + """Called whenever a line could not be understood and was therefore dropped.""" + pass + + def update(self, op_code, cur_count, max_count=None, message='', input=''): + """Called whenever the progress changes + + :param op_code: + Integer allowing to be compared against Operation IDs and stage IDs. + + Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation + ID as well as END. It may be that BEGIN and END are set at once in case only + one progress message was emitted due to the speed of the operation. + Between BEGIN and END, none of these flags will be set + + Operation IDs are all held within the OP_MASK. Only one Operation ID will + be active per call. + :param cur_count: Current absolute count of items + + :param max_count: + The maximum count of items we expect. It may be None in case there is + no maximum number of items or if it is (yet) unknown. + + :param message: + In case of the 'WRITING' operation, it contains the amount of bytes + transferred. It may possibly be used for other purposes as well. + + :param input: + The actual input string that was used to parse the information from. + This is usually a line from the output of git-fetch, but really + depends on the implementation + + You may read the contents of the current line in self._cur_line""" + pass + + def __call__(self, message, input=''): + """Same as update, but with a simpler interface which only provides the + message of the operation. + :note: This method will be called in addition to the update method. It is + up to you which one you implement""" + pass + #} END subclass interface + + +class PushInfo(object): + """A type presenting information about the result of a push operation for exactly + one refspec + + flags # bitflags providing more information about the result + local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + remote_ref_string # path to the remote reference located on the remote side + remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + old_commit_binsha # binary sha to commit at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + summary # summary line providing human readable english text about the push + """ + __slots__ = tuple() + + NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ + FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] + + +class FetchInfo(object): + """A type presenting information about the fetch operation on exactly one refspec + + The following members are defined: + ref # name of the reference to the changed + # remote head or FETCH_HEAD. Implementations can provide + # actual class instance which convert to a respective string + flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is FETCH_HEAD + note # additional notes given by the fetch-pack implementation intended for the user + old_commit_binsha# if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref as binary sha or None""" + __slots__ = tuple() + + NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ + FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] + + +class TransportDB(object): + """A database which allows to transport objects from and to different locations + which are specified by urls (location) and refspecs (what to transport, + see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). + + At the beginning of a transport operation, it will be determined which objects + have to be sent (either by this or by the other side). + + Afterwards a pack with the required objects is sent (or received). If there is + nothing to send, the pack will be empty. + + As refspecs involve symbolic names for references to be handled, we require + RefParse functionality. How this is done is up to the actual implementation.""" + # The following variables need to be set by the derived class + + #{ Interface + + def fetch(self, url, refspecs, progress=None, **kwargs): + """Fetch the objects defined by the given refspec from the given url. + :param url: url identifying the source of the objects. It may also be + a symbol from which the respective url can be resolved, like the + name of the remote. The implementation should allow objects as input + as well, these are assumed to resovle to a meaningful string though. + :param refspecs: iterable of reference specifiers or RefSpec instance, + identifying the references to be fetch from the remote. + :param progress: RemoteProgress derived instance which receives progress messages for user consumption or None + :param kwargs: may be used for additional parameters that the actual implementation could + find useful. + :return: List of FetchInfo compatible instances which provide information about what + was previously fetched, in the order of the input refspecs. + :note: even if the operation fails, one of the returned FetchInfo instances + may still contain errors or failures in only part of the refspecs. + :raise: if any issue occours during the transport or if the url is not + supported by the protocol. + """ + raise NotImplementedError() + + def push(self, url, refspecs, progress=None, **kwargs): + """Transport the objects identified by the given refspec to the remote + at the given url. + :param url: Decribes the location which is to receive the objects + see fetch() for more details + :param refspecs: iterable of refspecs strings or RefSpec instances + to identify the objects to push + :param progress: see fetch() + :param kwargs: additional arguments which may be provided by the caller + as they may be useful to the actual implementation + :todo: what to return ? + :raise: if any issue arises during transport or if the url cannot be handled""" + raise NotImplementedError() + + @property + def remotes(self): + """:return: An IterableList of Remote objects allowing to access and manipulate remotes + :note: Remote objects can also be used for the actual push or fetch operation""" + raise NotImplementedError() + + def remote(self, name='origin'): + """:return: Remote object with the given name + :note: it does not necessarily exist, hence this is just a more convenient way + to construct Remote objects""" + raise NotImplementedError() + + #}end interface + + + #{ Utility Methods + + def create_remote(self, name, url, **kwargs): + """Create a new remote with the given name pointing to the given url + :return: Remote instance, compatible to the Remote interface""" + return Remote.create(self, name, url, **kwargs) + + def delete_remote(self, remote): + """Delete the given remote. + :param remote: a Remote instance""" + return Remote.remove(self, remote) + + #} END utility methods + + +class ReferencesMixin(object): + """Database providing reference objects which in turn point to database objects + like Commits or Tag(Object)s. + + The returned types are compatible to the interfaces of the pure python + reference implementation in GitDB.ref""" + + def resolve(self, name): + """Resolve the given name into a binary sha. Valid names are as defined + in the rev-parse documentation http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html + :return: binary sha matching the name + :raise AmbiguousObjectName: + :raise BadObject: """ + raise NotImplementedError() + + def resolve_object(self, name): + """As ``resolve()``, but returns the Objecft instance pointed to by the + resolved binary sha + :return: Object instance of the correct type, e.g. shas pointing to commits + will be represented by a Commit object""" + raise NotImplementedError() + + @property + def references(self): + """:return: iterable list of all Reference objects representing tags, heads + and remote references. This is the most general method to obtain any + references.""" + raise NotImplementedError() + + @property + def heads(self): + """:return: IterableList with HeadReference objects pointing to all + heads in the repository.""" + raise NotImplementedError() + + @property + def head(self): + """:return: HEAD Object pointing to the current head reference""" + raise NotImplementedError() + + @property + def tags(self): + """:return: An IterableList of TagReferences or compatible items that + are available in this repo""" + raise NotImplementedError() + + #{ Utility Methods + + def tag(self, name): + """:return: Tag with the given name + :note: It does not necessarily exist, hence this is just a more convenient + way to construct TagReference objects""" + raise NotImplementedError() + + + def commit(self, rev=None): + """The Commit object for the specified revision + :param rev: revision specifier, see git-rev-parse for viable options. + :return: Commit compatible object""" + raise NotImplementedError() + + def iter_trees(self, *args, **kwargs): + """:return: Iterator yielding Tree compatible objects + :note: Takes all arguments known to iter_commits method""" + raise NotImplementedError() + + def tree(self, rev=None): + """The Tree (compatible) object for the given treeish revision + Examples:: + + repo.tree(repo.heads[0]) + + :param rev: is a revision pointing to a Treeish ( being a commit or tree ) + :return: ``git.Tree`` + + :note: + If you need a non-root level tree, find it by iterating the root tree. Otherwise + it cannot know about its path relative to the repository root and subsequent + operations might have unexpected results.""" + raise NotImplementedError() + + def iter_commits(self, rev=None, paths='', **kwargs): + """A list of Commit objects representing the history of a given ref/commit + + :parm rev: + revision specifier, see git-rev-parse for viable options. + If None, the active branch will be used. + + :parm paths: + is an optional path or a list of paths to limit the returned commits to + Commits that do not contain that path or the paths will not be returned. + + :parm kwargs: + Arguments to be passed to git-rev-list - common ones are + max_count and skip + + :note: to receive only commits between two named revisions, use the + "revA..revB" revision specifier + + :return: iterator yielding Commit compatible instances""" + raise NotImplementedError() + + + #} END utility methods + + #{ Edit Methods + + def create_head(self, path, commit='HEAD', force=False, logmsg=None ): + """Create a new head within the repository. + :param commit: a resolvable name to the commit or a Commit or Reference instance the new head should point to + :param force: if True, a head will be created even though it already exists + Otherwise an exception will be raised. + :param logmsg: message to append to the reference log. If None, a default message + will be used + :return: newly created Head instances""" + raise NotImplementedError() + + def delete_head(self, *heads): + """Delete the given heads + :param heads: list of Head references that are to be deleted""" + raise NotImplementedError() + + def create_tag(self, path, ref='HEAD', message=None, force=False): + """Create a new tag reference. + :param path: name or path of the new tag. + :param ref: resolvable name of the reference or commit, or Commit or Reference + instance describing the commit the tag should point to. + :param message: message to be attached to the tag reference. This will + create an actual Tag object carrying the message. Otherwise a TagReference + will be generated. + :param force: if True, the Tag will be created even if another tag does already + exist at the given path. Otherwise an exception will be thrown + :return: TagReference object """ + raise NotImplementedError() + + def delete_tag(self, *tags): + """Delete the given tag references + :param tags: TagReferences to delete""" + raise NotImplementedError() + + #}END edit methods + + #{ Backward Compatability + # These aliases need to be provided by the implementing interface as well + refs = references + branches = heads + #} END backward compatability + + + + +class RepositoryPathsMixin(object): + """Represents basic functionality of a full git repository. This involves an + optional working tree, a git directory with references and an object directory. + + This type collects the respective paths and verifies the provided base path + truly is a git repository. + + If the underlying type provides the config_reader() method, we can properly determine + whether this is a bare repository as well. Otherwise it will make an educated guess + based on the path name.""" + #{ Subclass Interface + def _initialize(self, path): + """initialize this instance with the given path. It may point to + any location within the repositories own data, as well as the working tree. + + The implementation will move up and search for traces of a git repository, + which is indicated by a child directory ending with .git or the + current path portion ending with .git. + + The paths made available for query are suitable for full git repositories + only. Plain object databases need to be fed the "objects" directory path. + + :param path: the path to initialize the repository with + It is a path to either the root git directory or the bare git repo:: + + repo = Repo("/Users/mtrier/Development/git-python") + repo = Repo("/Users/mtrier/Development/git-python.git") + repo = Repo("~/Development/git-python.git") + repo = Repo("$REPOSITORIES/Development/git-python.git") + + :raise InvalidDBRoot: + """ + raise NotImplementedError() + #} end subclass interface + + #{ Object Interface + + def __eq__(self, rhs): + raise NotImplementedError() + + def __ne__(self, rhs): + raise NotImplementedError() + + def __hash__(self): + raise NotImplementedError() + + def __repr__(self): + raise NotImplementedError() + + #} END object interface + + #{ Interface + + @property + def is_bare(self): + """:return: True if this is a bare repository + :note: this value is cached upon initialization""" + raise NotImplementedError() + + @property + def git_dir(self): + """:return: path to directory containing this actual git repository (which + in turn provides access to objects and references""" + raise NotImplementedError() + + @property + def working_tree_dir(self): + """:return: path to directory containing the working tree checkout of our + git repository. + :raise AssertionError: If this is a bare repository""" + raise NotImplementedError() + + @property + def objects_dir(self): + """:return: path to the repository's objects directory""" + raise NotImplementedError() + + @property + def working_dir(self): + """:return: working directory of the git process or related tools, being + either the working_tree_dir if available or the git_path""" + raise NotImplementedError() + + @property + def description(self): + """:return: description text associated with this repository or set the + description.""" + raise NotImplementedError() + + #} END interface + + +class ConfigurationMixin(object): + """Interface providing configuration handler instances, which provide locked access + to a single git-style configuration file (ini like format, using tabs as improve readablity). + + Configuration readers can be initialized with multiple files at once, whose information is concatenated + when reading. Lower-level files overwrite values from higher level files, i.e. a repository configuration file + overwrites information coming from a system configuration file + + :note: for the 'repository' config level, a git_path() compatible type is required""" + config_level = ("system", "global", "repository") + + #{ Interface + + def config_reader(self, config_level=None): + """ + :return: + GitConfigParser allowing to read the full git configuration, but not to write it + + The configuration will include values from the system, user and repository + configuration files. + + :param config_level: + For possible values, see config_writer method + If None, all applicable levels will be used. Specify a level in case + you know which exact file you whish to read to prevent reading multiple files for + instance + :note: On windows, system configuration cannot currently be read as the path is + unknown, instead the global path will be used.""" + raise NotImplementedError() + + def config_writer(self, config_level="repository"): + """ + :return: + GitConfigParser allowing to write values of the specified configuration file level. + Config writers should be retrieved, used to change the configuration ,and written + right away as they will lock the configuration file in question and prevent other's + to write it. + + :param config_level: + One of the following values + system = sytem wide configuration file + global = user level configuration file + repository = configuration file for this repostory only""" + raise NotImplementedError() + + + #} END interface + + +class SubmoduleDB(object): + """Interface providing access to git repository submodules. + The actual implementation is found in the Submodule object type, which is + currently only available in one implementation.""" + + @property + def submodules(self): + """ + :return: git.IterableList(Submodule, ...) of direct submodules + available from the current head""" + raise NotImplementedError() + + def submodule(self, name): + """ :return: Submodule with the given name + :raise ValueError: If no such submodule exists""" + raise NotImplementedError() + + def create_submodule(self, *args, **kwargs): + """Create a new submodule + + :note: See the documentation of Submodule.add for a description of the + applicable parameters + :return: created submodules""" + raise NotImplementedError() + + def iter_submodules(self, *args, **kwargs): + """An iterator yielding Submodule instances, see Traversable interface + for a description of args and kwargs + :return: Iterator""" + raise NotImplementedError() + + def submodule_update(self, *args, **kwargs): + """Update the submodules, keeping the repository consistent as it will + take the previous state into consideration. For more information, please + see the documentation of RootModule.update""" + raise NotImplementedError() + + +class HighLevelRepository(object): + """An interface combining several high-level repository functionality and properties""" + + @property + def daemon_export(self): + """:return: True if the repository may be published by the git-daemon""" + raise NotImplementedError() + + def is_dirty(self, index=True, working_tree=True, untracked_files=False): + """ + :return: + ``True``, the repository is considered dirty. By default it will react + like a git-status without untracked files, hence it is dirty if the + index or the working copy have changes.""" + raise NotImplementedError() + + @property + def untracked_files(self): + """ + :return: + list(str,...) + + :note: + ignored files will not appear here, i.e. files mentioned in .gitignore. + Bare repositories never have untracked files""" + raise NotImplementedError() + + def blame(self, rev, file): + """The blame information for the given file at the given revision. + + :parm rev: revision specifier, see git-rev-parse for viable options. + :return: + list: [Commit, list: [<line>]] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance.""" + raise NotImplementedError() + + @classmethod + def init(cls, path=None, mkdir=True): + """Initialize a git repository at the given path if specified + + :param path: + is the full path to the repo (traditionally ends with /<name>.git) + or None in which case the repository will be created in the current + working directory + + :parm mkdir: + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. + Only effective if a path is explicitly given + + :return: Instance pointing to the newly created repository with similar capabilities + of this class""" + raise NotImplementedError() + + def clone(self, path, progress = None): + """Create a clone from this repository. + :param path: + is the full path of the new repo (traditionally ends with ./<name>.git). + + :param progress: + a RemoteProgress instance or None if no progress information is required + + :return: ``git.Repo`` (the newly cloned repo)""" + raise NotImplementedError() + + @classmethod + def clone_from(cls, url, to_path, progress = None): + """Create a clone from the given URL + :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS + :param to_path: Path to which the repository should be cloned to + :param progress: + a RemoteProgress instance or None if no progress information is required + :return: instance pointing to the cloned directory with similar capabilities as this class""" + raise NotImplementedError() + + def archive(self, ostream, treeish=None, prefix=None): + """Archive the tree at the given revision. + :parm ostream: file compatible stream object to which the archive will be written + :parm treeish: is the treeish name/id, defaults to active branch + :parm prefix: is the optional prefix to prepend to each filename in the archive + :parm kwargs: + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + :return: self""" + raise NotImplementedError() + + diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py new file mode 100644 index 00000000..8a681e42 --- /dev/null +++ b/git/db/py/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php diff --git a/git/db/py/base.py b/git/db/py/base.py new file mode 100644 index 00000000..2fdbd202 --- /dev/null +++ b/git/db/py/base.py @@ -0,0 +1,474 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains basic implementations for the interface building blocks""" +from git.db.interface import * + +from git.util import ( + pool, + join, + isfile, + normpath, + abspath, + dirname, + LazyMixin, + hex_to_bin, + bin_to_hex, + expandvars, + expanduser, + exists, + is_git_dir, + ) + +from git.index import IndexFile +from git.config import GitConfigParser +from git.exc import ( + BadObject, + AmbiguousObjectName, + InvalidGitRepositoryError, + NoSuchPathError + ) + +from async import ChannelThreadTask + +from itertools import chain +import sys +import os + + +__all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', + 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin', + 'PureIndexDB') + + +class PureObjectDBR(ObjectDBR): + + #{ Query Interface + + def has_object_async(self, reader): + task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha))) + return pool.add_task(task) + + def info_async(self, reader): + task = ChannelThreadTask(reader, str(self.info_async), self.info) + return pool.add_task(task) + + def stream_async(self, reader): + # base implementation just uses the stream method repeatedly + task = ChannelThreadTask(reader, str(self.stream_async), self.stream) + return pool.add_task(task) + + def partial_to_complete_sha_hex(self, partial_hexsha): + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha)) + + #} END query interface + + +class PureObjectDBW(ObjectDBW): + + def __init__(self, *args, **kwargs): + super(PureObjectDBW, self).__init__(*args, **kwargs) + self._ostream = None + + #{ Edit Interface + def set_ostream(self, stream): + cstream = self._ostream + self._ostream = stream + return cstream + + def ostream(self): + return self._ostream + + def store_async(self, reader): + task = ChannelThreadTask(reader, str(self.store_async), self.store) + return pool.add_task(task) + + #} END edit interface + + +class PureRootPathDB(RootPathDB): + + def __init__(self, root_path): + self._root_path = root_path + super(PureRootPathDB, self).__init__(root_path) + + + + #{ Interface + def root_path(self): + return self._root_path + + def db_path(self, rela_path): + return join(self._root_path, rela_path) + #} END interface + + +def _databases_recursive(database, output): + """Fill output list with database from db, in order. Deals with Loose, Packed + and compound databases.""" + if isinstance(database, CompoundDB): + compounds = list() + dbs = database.databases() + output.extend(db for db in dbs if not isinstance(db, CompoundDB)) + for cdb in (db for db in dbs if isinstance(db, CompoundDB)): + _databases_recursive(cdb, output) + else: + output.append(database) + # END handle database type + + +class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + elif attr == '_obj_cache': + self._obj_cache = dict() + else: + super(PureCompoundDB, self)._set_cache_(attr) + + def _db_query(self, sha): + """:return: database containing the given 20 byte sha + :raise BadObject:""" + # most databases use binary representations, prevent converting + # it everytime a database is being queried + try: + return self._obj_cache[sha] + except KeyError: + pass + # END first level cache + + for db in self._dbs: + if db.has_object(sha): + self._obj_cache[sha] = db + return db + # END for each database + raise BadObject(sha) + + #{ PureObjectDBR interface + + def has_object(self, sha): + try: + self._db_query(sha) + return True + except BadObject: + return False + # END handle exceptions + + def info(self, sha): + return self._db_query(sha).info(sha) + + def stream(self, sha): + return self._db_query(sha).stream(sha) + + def size(self): + return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) + + def sha_iter(self): + return chain(*(db.sha_iter() for db in self._dbs)) + + #} END object DBR Interface + + #{ Interface + + def databases(self): + return tuple(self._dbs) + + def update_cache(self, force=False): + # something might have changed, clear everything + self._obj_cache.clear() + stat = False + for db in self._dbs: + if isinstance(db, CachingDB): + stat |= db.update_cache(force) + # END if is caching db + # END for each database to update + return stat + + def partial_to_complete_sha_hex(self, partial_hexsha): + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + + candidate = None + for db in self._dbs: + full_bin_sha = None + try: + if hasattr(db, 'partial_to_complete_sha_hex'): + full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) + else: + full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) + # END handle database type + except BadObject: + continue + # END ignore bad objects + if full_bin_sha: + if candidate and candidate != full_bin_sha: + raise AmbiguousObjectName(partial_hexsha) + candidate = full_bin_sha + # END handle candidate + # END for each db + if not candidate: + raise BadObject(partial_binsha) + return candidate + + def partial_to_complete_sha(self, partial_binsha, hex_len): + """Simple adaptor to feed into our implementation""" + return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len]) + #} END interface + + +class PureRepositoryPathsMixin(RepositoryPathsMixin): + # slots has no effect here, its just to keep track of used attrs + __slots__ = ("_git_path", '_bare') + + #{ Configuration + repo_dir = '.git' + objs_dir = 'objects' + #} END configuration + + #{ Subclass Interface + def _initialize(self, path): + epath = abspath(expandvars(expanduser(path or os.getcwd()))) + + if not exists(epath): + raise NoSuchPathError(epath) + #END check file + + self._working_tree_dir = None + self._git_path = None + curpath = epath + + # walk up the path to find the .git dir + while curpath: + if is_git_dir(curpath): + self._git_path = curpath + self._working_tree_dir = os.path.dirname(curpath) + break + gitpath = join(curpath, self.repo_dir) + if is_git_dir(gitpath): + self._git_path = gitpath + self._working_tree_dir = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + # END while curpath + + if self._git_path is None: + raise InvalidGitRepositoryError(epath) + # END path not found + + self._bare = self._git_path.endswith(self.repo_dir) + if hasattr(self, 'config_reader'): + try: + self._bare = self.config_reader("repository").getboolean('core','bare') + except Exception: + # lets not assume the option exists, although it should + pass + #END check bare flag + + #} end subclass interface + + #{ Object Interface + + def __eq__(self, rhs): + if hasattr(rhs, 'git_dir'): + return self.git_dir == rhs.git_dir + return False + + def __ne__(self, rhs): + return not self.__eq__(rhs) + + def __hash__(self): + return hash(self.git_dir) + + def __repr__(self): + return "%s(%r)" % (type(self).__name__, self.git_dir) + + #} END object interface + + #{ Interface + + @property + def is_bare(self): + return self._bare + + @property + def git_dir(self): + return self._git_path + + @property + def working_tree_dir(self): + if self.is_bare: + raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_dir) + #END assertion + return dirname(self.git_dir) + + @property + def objects_dir(self): + return join(self.git_dir, self.objs_dir) + + @property + def working_dir(self): + if self.is_bare: + return self.git_dir + else: + return self.working_tree_dir + #END handle bare state + + def _mk_description(): + def _get_description(self): + filename = join(self.git_dir, 'description') + return file(filename).read().rstrip() + + def _set_description(self, descr): + filename = join(self.git_dir, 'description') + file(filename, 'w').write(descr+'\n') + + return property(_get_description, _set_description, "Descriptive text for the content of the repository") + + description = _mk_description() + del(_mk_description) + + #} END interface + + +class PureConfigurationMixin(ConfigurationMixin): + + #{ Configuration + system_config_file_name = "gitconfig" + repo_config_file_name = "config" + #} END + + def __init__(self, *args, **kwargs): + """Verify prereqs""" + super(PureConfigurationMixin, self).__init__(*args, **kwargs) + assert hasattr(self, 'git_dir') + + def _path_at_level(self, level ): + # we do not support an absolute path of the gitconfig on windows , + # use the global config instead + if sys.platform == "win32" and level == "system": + level = "global" + #END handle windows + + if level == "system": + return "/etc/%s" % self.system_config_file_name + elif level == "global": + return normpath(expanduser("~/.%s" % self.system_config_file_name)) + elif level == "repository": + return join(self.git_dir, self.repo_config_file_name) + #END handle level + + raise ValueError("Invalid configuration level: %r" % level) + + #{ Interface + + def config_reader(self, config_level=None): + files = None + if config_level is None: + files = [ self._path_at_level(f) for f in self.config_level ] + else: + files = [ self._path_at_level(config_level) ] + #END handle level + return GitConfigParser(files, read_only=True) + + def config_writer(self, config_level="repository"): + return GitConfigParser(self._path_at_level(config_level), read_only=False) + + + #} END interface + + +class PureIndexDB(IndexDB): + #{ Configuration + IndexCls = IndexFile + #} END configuration + + @property + def index(self): + return self.IndexCls(self) + + +class PureAlternatesFileMixin(object): + """Utility able to read and write an alternates file through the alternates property + It needs to be part of a type with the git_dir or db_path property. + + The file by default is assumed to be located at the default location as imposed + by the standard git repository layout""" + + #{ Configuration + alternates_filepath = os.path.join('info', 'alternates') # relative path to alternates file + + #} END configuration + + def __init__(self, *args, **kwargs): + super(PureAlternatesFileMixin, self).__init__(*args, **kwargs) + self._alternates_path() # throws on incompatible type + + #{ Interface + + def _alternates_path(self): + if hasattr(self, 'git_dir'): + return join(self.git_dir, 'objects', self.alternates_filepath) + elif hasattr(self, 'db_path'): + return self.db_path(self.alternates_filepath) + else: + raise AssertionError("This mixin requires a parent type with either the git_dir property or db_path method") + #END handle path + + def _get_alternates(self): + """The list of alternates for this repo from which objects can be retrieved + + :return: list of strings being pathnames of alternates""" + alternates_path = self._alternates_path() + + if os.path.exists(alternates_path): + try: + f = open(alternates_path) + alts = f.read() + finally: + f.close() + return alts.strip().splitlines() + else: + return list() + # END handle path exists + + def _set_alternates(self, alts): + """Sets the alternates + + :parm alts: + is the array of string paths representing the alternates at which + git should look for objects, i.e. /home/user/repo/.git/objects + + :raise NoSuchPathError: + :note: + The method does not check for the existance of the paths in alts + as the caller is responsible.""" + alternates_path = self._alternates_path() + if not alts: + if isfile(alternates_path): + os.remove(alternates_path) + else: + try: + f = open(alternates_path, 'w') + f.write("\n".join(alts)) + finally: + f.close() + # END file handling + # END alts handling + + alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") + + #} END interface + diff --git a/git/db/py/complex.py b/git/db/py/complex.py new file mode 100644 index 00000000..d5c185f3 --- /dev/null +++ b/git/db/py/complex.py @@ -0,0 +1,128 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PurePartialGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.db.interface import HighLevelRepository +from base import ( + PureCompoundDB, + PureObjectDBW, + PureRootPathDB, + PureRepositoryPathsMixin, + PureConfigurationMixin, + PureAlternatesFileMixin, + PureIndexDB, + ) +from transport import PureTransportDB +from resolve import PureReferencesMixin + +from loose import PureLooseObjectODB +from pack import PurePackedODB +from ref import PureReferenceDB +from submodule import PureSubmoduleDB + +from git.db.compat import RepoCompatibilityInterface + +from git.util import ( + LazyMixin, + normpath, + join, + dirname + ) +from git.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) +import os + +__all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB') + + +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureAlternatesFileMixin): + """A git-style object-only database, which contains all objects in the 'objects' + subdirectory. + :note: The type needs to be initialized on the ./objects directory to function, + as it deals solely with object lookup. Use a PurePartialGitDB type if you need + reference and push support.""" + # Configuration + PackDBCls = PurePackedODB + LooseDBCls = PureLooseObjectODB + PureReferenceDBCls = PureReferenceDB + + # Directories + packs_dir = 'pack' + loose_dir = '' + + + def __init__(self, root_path): + """Initialize ourselves on a git ./objects directory""" + super(PureGitODB, self).__init__(root_path) + + def _set_cache_(self, attr): + if attr == '_dbs' or attr == '_loose_db': + self._dbs = list() + loose_db = None + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + (self.loose_dir, self.LooseDBCls), + (self.alternates_filepath, self.PureReferenceDBCls)): + path = self.db_path(subpath) + if os.path.exists(path): + self._dbs.append(dbcls(path)) + if dbcls is self.LooseDBCls: + loose_db = self._dbs[-1] + # END remember loose db + # END check path exists + # END for each db type + + # should have at least one subdb + if not self._dbs: + raise InvalidDBRoot(self.root_path()) + # END handle error + + # we the first one should have the store method + assert loose_db is not None and hasattr(loose_db, 'store'), "One database needs store functionality" + + # finally set the value + self._loose_db = loose_db + else: + super(PureGitODB, self)._set_cache_(attr) + # END handle attrs + + #{ PureObjectDBW interface + + def store(self, istream): + return self._loose_db.store(istream) + + def ostream(self): + return self._loose_db.ostream() + + def set_ostream(self, ostream): + return self._loose_db.set_ostream(ostream) + + #} END objectdbw interface + + + +class PurePartialGitDB(PureGitODB, + PureRepositoryPathsMixin, PureConfigurationMixin, + PureReferencesMixin, PureSubmoduleDB, + PureIndexDB, PureTransportDB + # HighLevelRepository Currently not implemented ! + ): + """Git like database with support for object lookup as well as reference resolution. + Our rootpath is set to the actual .git directory (bare on unbare). + + The root_path will be the git objects directory. Use git_path() to obtain the actual top-level + git directory.""" + #directories + + def __init__(self, root_path): + """Initialize ourselves on the .git directory, or the .git/objects directory.""" + PureRepositoryPathsMixin._initialize(self, root_path) + super(PurePartialGitDB, self).__init__(self.objects_dir) + + + +class PureCompatibilityGitDB(PurePartialGitDB, RepoCompatibilityInterface): + """Pure git database with a compatability layer required by 0.3x code""" + diff --git a/git/db/py/loose.py b/git/db/py/loose.py new file mode 100644 index 00000000..6e72aff0 --- /dev/null +++ b/git/db/py/loose.py @@ -0,0 +1,263 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureRootPathDB, + PureObjectDBR, + PureObjectDBW + ) + + +from git.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) + +from git.stream import ( + DecompressMemMapReader, + FDCompressedSha1Writer, + FDStream, + Sha1Writer + ) + +from git.base import ( + OStream, + OInfo + ) + +from git.util import ( + file_contents_ro_filepath, + ENOENT, + hex_to_bin, + bin_to_hex, + exists, + chmod, + isdir, + isfile, + remove, + mkdir, + rename, + dirname, + basename, + join + ) + +from git.fun import ( + chunk_size, + loose_object_header_info, + write_object, + stream_copy + ) + +import tempfile +import mmap +import sys +import os + + +__all__ = ( 'PureLooseObjectODB', ) + + +class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): + """A database which operates on loose object files""" + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = chunk_size + + # On windows we need to keep it writable, otherwise it cannot be removed + # either + new_objects_mode = 0444 + if os.name == 'nt': + new_objects_mode = 0644 + + + def __init__(self, root_path): + super(PureLooseObjectODB, self).__init__(root_path) + self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = getattr(os, 'O_NOATIME', 0) + + #{ Interface + def object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + def readable_db_object_path(self, hexsha): + """ + :return: readable object path to the object identified by hexsha + :raise BadObject: If the object file does not exist""" + try: + return self._hexsha_to_file[hexsha] + except KeyError: + pass + # END ignore cache misses + + # try filesystem + path = self.db_path(self.object_path(hexsha)) + if exists(path): + self._hexsha_to_file[hexsha] = path + return path + # END handle cache + raise BadObject(hexsha) + + + #} END interface + + def _map_loose_object(self, sha): + """ + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(bin_to_hex(sha))) + try: + return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + return file_contents_ro_filepath(db_path) + except OSError: + raise BadObject(sha) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(sha) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(PureLooseObjectODB, self).set_ostream(stream) + + def info(self, sha): + m = self._map_loose_object(sha) + try: + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) + finally: + m.close() + # END assure release of system resources + + def stream(self, sha): + m = self._map_loose_object(sha) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) + + def has_object(self, sha): + try: + self.readable_db_object_path(bin_to_hex(sha)) + return True + except BadObject: + return False + # END check existance + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate + + def store(self, istream): + """note: The sha we produce will be hex by nature""" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + + if istream.binsha is None: + writer = FDCompressedSha1Writer(fd) + else: + writer = FDStream(fd) + # END handle direct stream copies + # END handle custom writer + + try: + try: + if istream.binsha is not None: + # copy as much as possible, the actual uncompressed item size might + # be smaller than the compressed version + stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) + else: + # write object with header, we have to make a new one + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + # END handle direct stream copies + finally: + if tmp_path: + writer.close() + # END assure target stream is closed + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + + hexsha = None + if istream.binsha: + hexsha = istream.hexsha + else: + hexsha = writer.sha(as_hex=True) + # END handle sha + + if tmp_path: + obj_path = self.db_path(self.object_path(hexsha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + # rename onto existing doesn't work on windows + if os.name == 'nt' and isfile(obj_path): + remove(obj_path) + # END handle win322 + rename(tmp_path, obj_path) + + # make sure its readable for all ! It started out as rw-- tmp file + # but needs to be rwrr + chmod(obj_path, self.new_objects_mode) + # END handle dry_run + + istream.binsha = hex_to_bin(hexsha) + return istream + + def sha_iter(self): + # find all files which look like an object, extract sha from there + for root, dirs, files in os.walk(self.root_path()): + root_base = basename(root) + if len(root_base) != 2: + continue + + for f in files: + if len(f) != 38: + continue + yield hex_to_bin(root_base + f) + # END for each file + # END for each walk iteration + + def size(self): + return len(tuple(self.sha_iter())) + diff --git a/git/db/py/mem.py b/git/db/py/mem.py new file mode 100644 index 00000000..da02dbdd --- /dev/null +++ b/git/db/py/mem.py @@ -0,0 +1,112 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains the MemoryDatabase implementation""" +from base import ( + PureObjectDBR, + PureObjectDBW + ) +from loose import PureLooseObjectODB +from git.base import ( + OStream, + IStream, + ) + +from git.exc import ( + BadObject, + UnsupportedOperation + ) +from git.stream import ( + ZippedStoreShaWriter, + DecompressMemMapReader, + ) + +from cStringIO import StringIO + +__all__ = ("PureMemoryDB", ) + +class PureMemoryDB(PureObjectDBR, PureObjectDBW): + """A memory database stores everything to memory, providing fast IO and object + retrieval. It should be used to buffer results and obtain SHAs before writing + it to the actual physical storage, as it allows to query whether object already + exists in the target storage before introducing actual IO + + :note: memory is currently not threadsafe, hence the async methods cannot be used + for storing""" + + def __init__(self): + super(PureMemoryDB, self).__init__() + self._db = PureLooseObjectODB("path/doesnt/matter") + + # maps 20 byte shas to their OStream objects + self._cache = dict() + + def set_ostream(self, stream): + raise UnsupportedOperation("PureMemoryDB's always stream into memory") + + def store(self, istream): + zstream = ZippedStoreShaWriter() + self._db.set_ostream(zstream) + + istream = self._db.store(istream) + zstream.close() # close to flush + zstream.seek(0) + + # don't provide a size, the stream is written in object format, hence the + # header needs decompression + decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) + self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream) + + return istream + + def store_async(self, reader): + raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access") + + def has_object(self, sha): + return sha in self._cache + + def info(self, sha): + # we always return streams, which are infos as well + return self.stream(sha) + + def stream(self, sha): + try: + ostream = self._cache[sha] + # rewind stream for the next one to read + ostream.stream.seek(0) + return ostream + except KeyError: + raise BadObject(sha) + # END exception handling + + def size(self): + return len(self._cache) + + def sha_iter(self): + return self._cache.iterkeys() + + + #{ Interface + def stream_copy(self, sha_iter, odb): + """Copy the streams as identified by sha's yielded by sha_iter into the given odb + The streams will be copied directly + :note: the object will only be written if it did not exist in the target db + :return: amount of streams actually copied into odb. If smaller than the amount + of input shas, one or more objects did already exist in odb""" + count = 0 + for sha in sha_iter: + if odb.has_object(sha): + continue + # END check object existance + + ostream = self.stream(sha) + # compressed data including header + sio = StringIO(ostream.stream.data()) + istream = IStream(ostream.type, ostream.size, sio, sha) + + odb.store(istream) + count += 1 + # END for each sha + return count + #} END interface diff --git a/git/db/py/pack.py b/git/db/py/pack.py new file mode 100644 index 00000000..75b75468 --- /dev/null +++ b/git/db/py/pack.py @@ -0,0 +1,212 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module containing a database to deal with packs""" +from git.db import CachingDB +from base import ( + PureRootPathDB, + PureObjectDBR + ) + +from git.util import LazyMixin + +from git.exc import ( + BadObject, + UnsupportedOperation, + AmbiguousObjectName + ) + +from git.pack import PackEntity + +import os +import glob + +__all__ = ('PurePackedODB', ) + +#{ Utilities + + +class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin): + """A database operating on a set of object packs""" + + # the type to use when instantiating a pack entity + PackEntityCls = PackEntity + + # sort the priority list every N queries + # Higher values are better, performance tests don't show this has + # any effect, but it should have one + _sort_interval = 500 + + def __init__(self, root_path): + super(PurePackedODB, self).__init__(root_path) + # list of lists with three items: + # * hits - number of times the pack was hit with a request + # * entity - Pack entity instance + # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query + # self._entities = list() # lazy loaded list + self._hit_count = 0 # amount of hits + self._st_mtime = 0 # last modification data of our root path + + def _set_cache_(self, attr): + if attr == '_entities': + self._entities = list() + self.update_cache(force=True) + # END handle entities initialization + + def _sort_entities(self): + self._entities.sort(key=lambda l: l[0], reverse=True) + + def _pack_info(self, sha): + """:return: tuple(entity, index) for an item at the given sha + :param sha: 20 or 40 byte sha + :raise BadObject: + :note: This method is not thread-safe, but may be hit in multi-threaded + operation. The worst thing that can happen though is a counter that + was not incremented, or the list being in wrong order. So we safe + the time for locking here, lets see how that goes""" + # presort ? + if self._hit_count % self._sort_interval == 0: + self._sort_entities() + # END update sorting + + for item in self._entities: + index = item[2](sha) + if index is not None: + item[0] += 1 # one hit for you + self._hit_count += 1 # general hit count + return (item[1], index) + # END index found in pack + # END for each item + + # no hit, see whether we have to update packs + # NOTE: considering packs don't change very often, we safe this call + # and leave it to the super-caller to trigger that + raise BadObject(sha) + + #{ Object DB Read + + def has_object(self, sha): + try: + self._pack_info(sha) + return True + except BadObject: + return False + # END exception handling + + def info(self, sha): + entity, index = self._pack_info(sha) + return entity.info_at_index(index) + + def stream(self, sha): + entity, index = self._pack_info(sha) + return entity.stream_at_index(index) + + def sha_iter(self): + sha_list = list() + for entity in self.entities(): + index = entity.index() + sha_by_index = index.sha + for index in xrange(index.size()): + yield sha_by_index(index) + # END for each index + # END for each entity + + def size(self): + sizes = [item[1].index().size() for item in self._entities] + return reduce(lambda x,y: x+y, sizes, 0) + + #} END object db read + + #{ object db write + + def store(self, istream): + """Storing individual objects is not feasible as a pack is designed to + hold multiple objects. Writing or rewriting packs for single objects is + inefficient""" + raise UnsupportedOperation() + + def store_async(self, reader): + # TODO: add PureObjectDBRW before implementing this + raise NotImplementedError() + + #} END object db write + + + #{ Interface + + def update_cache(self, force=False): + """ + Update our cache with the acutally existing packs on disk. Add new ones, + and remove deleted ones. We keep the unchanged ones + + :param force: If True, the cache will be updated even though the directory + does not appear to have changed according to its modification timestamp. + :return: True if the packs have been updated so there is new information, + False if there was no change to the pack database""" + stat = os.stat(self.root_path()) + if not force and stat.st_mtime <= self._st_mtime: + return False + # END abort early on no change + self._st_mtime = stat.st_mtime + + # packs are supposed to be prefixed with pack- by git-convention + # get all pack files, figure out what changed + pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) + our_pack_files = set(item[1].pack().path() for item in self._entities) + + # new packs + for pack_file in (pack_files - our_pack_files): + # init the hit-counter/priority with the size, a good measure for hit- + # probability. Its implemented so that only 12 bytes will be read + entity = self.PackEntityCls(pack_file) + self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) + # END for each new packfile + + # removed packs + for pack_file in (our_pack_files - pack_files): + del_index = -1 + for i, item in enumerate(self._entities): + if item[1].pack().path() == pack_file: + del_index = i + break + # END found index + # END for each entity + assert del_index != -1 + del(self._entities[del_index]) + # END for each removed pack + + # reinitialize prioritiess + self._sort_entities() + return True + + def entities(self): + """:return: list of pack entities operated upon by this database""" + return [ item[1] for item in self._entities ] + + def partial_to_complete_sha(self, partial_binsha, canonical_length): + """:return: 20 byte sha as inferred by the given partial binary sha + :param partial_binsha: binary sha with less than 20 bytes + :param canonical_length: length of the corresponding canonical representation. + It is required as binary sha's cannot display whether the original hex sha + had an odd or even number of characters + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for item in self._entities: + item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) + if item_index is not None: + sha = item[1].index().sha(item_index) + if candidate and candidate != sha: + raise AmbiguousObjectName(partial_binsha) + candidate = sha + # END handle full sha could be found + # END for each entity + + if candidate: + return candidate + + # still not found ? + raise BadObject(partial_binsha) + + #} END interface diff --git a/git/db/py/ref.py b/git/db/py/ref.py new file mode 100644 index 00000000..d2c77a3a --- /dev/null +++ b/git/db/py/ref.py @@ -0,0 +1,77 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import PureCompoundDB + +import os +__all__ = ('PureReferenceDB', ) + +class PureReferenceDB(PureCompoundDB): + """A database consisting of database referred to in a file""" + + # Configuration + # Specifies the object database to use for the paths found in the alternates + # file. If None, it defaults to the PureGitODB + ObjectDBCls = None + + def __init__(self, ref_file): + super(PureReferenceDB, self).__init__() + self._ref_file = ref_file + + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + self._update_dbs_from_ref_file() + else: + super(PureReferenceDB, self)._set_cache_(attr) + # END handle attrs + + def _update_dbs_from_ref_file(self): + dbcls = self.ObjectDBCls + if dbcls is None: + # late import + import complex + dbcls = complex.PureGitODB + # END get db type + + # try to get as many as possible, don't fail if some are unavailable + ref_paths = list() + try: + ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()] + except (OSError, IOError): + pass + # END handle alternates + + ref_paths_set = set(ref_paths) + cur_ref_paths_set = set(db.root_path() for db in self._dbs) + + # remove existing + for path in (cur_ref_paths_set - ref_paths_set): + for i, db in enumerate(self._dbs[:]): + if db.root_path() == path: + del(self._dbs[i]) + continue + # END del matching db + # END for each path to remove + + # add new + # sort them to maintain order + added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p)) + for path in added_paths: + try: + db = dbcls(path) + # force an update to verify path + if isinstance(db, PureCompoundDB): + db.databases() + # END verification + self._dbs.append(db) + except Exception, e: + # ignore invalid paths or issues + pass + # END for each path to add + + def update_cache(self, force=False): + # re-read alternates and update databases + self._update_dbs_from_ref_file() + return super(PureReferenceDB, self).update_cache(force) diff --git a/git/repo/fun.py b/git/db/py/resolve.py index 03d55716..7bea779e 100644 --- a/git/repo/fun.py +++ b/git/db/py/resolve.py @@ -1,35 +1,36 @@ -"""Package with general repository related functions""" -import os -from gitdb.exc import BadObject -from git.refs import SymbolicReference -from git.objects import Object -from gitdb.util import ( +"""Module with an implementation for refspec parsing. It is the pure-python +version assuming compatible interface for reference and object types""" + +from git.db.interface import ReferencesMixin +from git.exc import BadObject +from git.refs import ( + SymbolicReference, + Reference, + HEAD, + Head, + TagReference + ) +from git.refs.head import HEAD +from git.refs.headref import Head +from git.refs.tag import TagReference + +from git.objects.base import Object +from git.objects.commit import Commit +from git.util import ( join, isdir, isfile, hex_to_bin, - bin_to_hex + bin_to_hex, + is_git_dir ) from string import digits +import os +import re -__all__ = ('rev_parse', 'is_git_dir', 'touch') - -def touch(filename): - fp = open(filename, "a") - fp.close() - -def is_git_dir(d): - """ This is taken from the git setup.c:is_git_directory - function.""" - if isdir(d) and \ - isdir(join(d, 'objects')) and \ - isdir(join(d, 'refs')): - headref = join(d, 'HEAD') - return isfile(headref) or \ - (os.path.islink(headref) and - os.readlink(headref).startswith('refs')) - return False +__all__ = ["PureReferencesMixin"] +#{ Utilities def short_to_long(odb, hexsha): """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha @@ -89,7 +90,7 @@ def name_to_object(repo, name, return_ref=False): return Object.new_from_sha(repo, hex_to_bin(hexsha)) def deref_tag(tag): - """Recursively dereerence a tag and return the resulting object""" + """Recursively dereference a tag and return the resulting object""" while True: try: tag = tag.object @@ -185,7 +186,7 @@ def rev_parse(repo, rev): # END handle tag elif token == '@': # try single int - assert ref is not None, "Requre Reference to access reflog" + assert ref is not None, "Require Reference to access reflog" revlog_index = None try: # transform reversed index into the format of our revlog @@ -282,3 +283,85 @@ def rev_parse(repo, rev): raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) return obj + +#} END utilities + +class PureReferencesMixin(ReferencesMixin): + """Pure-Python refparse implementation""" + + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + + #{ Configuration + # Types to use when instatiating references + TagReferenceCls = TagReference + HeadCls = Head + ReferenceCls = Reference + HEADCls = HEAD + CommitCls = Commit + #} END configuration + + def resolve(self, name): + return self.resolve_object(name).binsha + + def resolve_object(self, name): + return rev_parse(self, name) + + @property + def references(self): + return self.ReferenceCls.list_items(self) + + @property + def heads(self): + return self.HeadCls.list_items(self) + + @property + def tags(self): + return self.TagReferenceCls.list_items(self) + + def tag(self, name): + return self.TagReferenceCls(self, self.TagReferenceCls.to_full_path(name)) + + def commit(self, rev=None): + if rev is None: + return self.head.commit + else: + return self.resolve_object(str(rev)+"^0") + #END handle revision + + def iter_trees(self, *args, **kwargs): + return ( c.tree for c in self.iter_commits(*args, **kwargs) ) + + def tree(self, rev=None): + if rev is None: + return self.head.commit.tree + else: + return self.resolve_object(str(rev)+"^{tree}") + + def iter_commits(self, rev=None, paths='', **kwargs): + if rev is None: + rev = self.head.commit + + return self.CommitCls.iter_items(self, rev, paths, **kwargs) + + + @property + def head(self): + return self.HEADCls(self,'HEAD') + + def create_head(self, path, commit='HEAD', force=False, logmsg=None ): + return self.HeadCls.create(self, path, commit, force, logmsg) + + def delete_head(self, *heads, **kwargs): + return self.HeadCls.delete(self, *heads, **kwargs) + + def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): + return self.TagReferenceCls.create(self, path, ref, message, force, **kwargs) + + def delete_tag(self, *tags): + return self.TagReferenceCls.delete(self, *tags) + + + # compat + branches = heads + refs = references diff --git a/git/db/py/submodule.py b/git/db/py/submodule.py new file mode 100644 index 00000000..735f90b1 --- /dev/null +++ b/git/db/py/submodule.py @@ -0,0 +1,33 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.objects.submodule.base import Submodule +from git.objects.submodule.root import RootModule +from git.db.interface import SubmoduleDB + +__all__ = ["PureSubmoduleDB"] + +class PureSubmoduleDB(SubmoduleDB): + """Pure python implementation of submodule functionality""" + + @property + def submodules(self): + return Submodule.list_items(self) + + def submodule(self, name): + try: + return self.submodules[name] + except IndexError: + raise ValueError("Didn't find submodule named %r" % name) + # END exception handling + + def create_submodule(self, *args, **kwargs): + return Submodule.add(self, *args, **kwargs) + + def iter_submodules(self, *args, **kwargs): + return RootModule(self).traverse(*args, **kwargs) + + def submodule_update(self, *args, **kwargs): + return RootModule(self).update(*args, **kwargs) + diff --git a/git/db/py/transport.py b/git/db/py/transport.py new file mode 100644 index 00000000..00d222b0 --- /dev/null +++ b/git/db/py/transport.py @@ -0,0 +1,58 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Implement a transport compatible database which sends objects using the git protocol""" + +from git.db.interface import ( TransportDB, + PushInfo, + FetchInfo, + RefSpec ) + +from git.refs.remote import RemoteReference +from git.remote import Remote + + +__all__ = ["PureTransportDB"] + +class PurePushInfo(PushInfo): + """TODO: Implementation""" + __slots__ = tuple() + + + +class PureFetchInfo(FetchInfo): + """TODO""" + __slots__ = tuple() + + +class PureTransportDB(TransportDB): + # The following variables need to be set by the derived class + #{Configuration + protocol = None + RemoteCls = Remote + #}end configuraiton + + #{ Interface + + def fetch(self, url, refspecs, progress=None, **kwargs): + raise NotImplementedError() + + def push(self, url, refspecs, progress=None, **kwargs): + raise NotImplementedError() + + @property + def remotes(self): + return self.RemoteCls.list_items(self) + + def remote(self, name='origin'): + return self.remotes[name] + + def create_remote(self, name, url, **kwargs): + return self.RemoteCls.create(self, name, url, **kwargs) + + def delete_remote(self, remote): + return self.RemoteCls.remove(self, remote) + + #}end interface + diff --git a/git/diff.py b/git/diff.py index 7b3bf6b5..d1c6c0ac 100644 --- a/git/diff.py +++ b/git/diff.py @@ -9,7 +9,7 @@ from objects.blob import Blob from objects.util import mode_str_to_int from exc import GitCommandError -from gitdb.util import hex_to_bin +from git.util import hex_to_bin __all__ = ('Diffable', 'DiffIndex', 'Diff') @@ -5,17 +5,53 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all exceptions thrown througout the git package, """ -from gitdb.exc import * +from util import to_hex_sha -class InvalidGitRepositoryError(Exception): +class GitPythonError(Exception): + """Base exception for all git-python related errors""" + +class ODBError(GitPythonError): + """All errors thrown by the object database""" + + +class InvalidDBRoot(ODBError): + """Thrown if an object database cannot be initialized at the given path""" + + +class BadObject(ODBError): + """The object with the given SHA does not exist. Instantiate with the + failed sha""" + + def __str__(self): + return "BadObject: %s" % to_hex_sha(self.args[0]) + + +class ParseError(ODBError): + """Thrown if the parsing of a file failed due to an invalid format""" + + +class AmbiguousObjectName(ODBError): + """Thrown if a possibly shortened name does not uniquely represent a single object + in the database""" + + +class BadObjectType(ODBError): + """The object had an unsupported type""" + + +class UnsupportedOperation(ODBError): + """Thrown if the given operation cannot be supported by the object database""" + + +class InvalidGitRepositoryError(InvalidDBRoot): """ Thrown if the given repository appears to have an invalid format. """ -class NoSuchPathError(OSError): +class NoSuchPathError(InvalidDBRoot): """ Thrown if a path could not be access by the system. """ -class GitCommandError(Exception): +class GitCommandError(GitPythonError): """ Thrown if execution of the git command fails with non-zero status code. """ def __init__(self, command, status, stderr=None): self.stderr = stderr @@ -27,7 +63,7 @@ class GitCommandError(Exception): (' '.join(str(i) for i in self.command), self.status, self.stderr)) -class CheckoutError( Exception ): +class CheckoutError(GitPythonError): """Thrown if a file could not be checked out from the index as it contained changes. @@ -50,9 +86,10 @@ class CheckoutError( Exception ): return Exception.__str__(self) + ":%s" % self.failed_files -class CacheError(Exception): +class CacheError(GitPythonError): """Base for all errors related to the git index, which is called cache internally""" + class UnmergedEntriesError(CacheError): """Thrown if an operation cannot proceed as there are still unmerged entries in the cache""" diff --git a/git/ext/async b/git/ext/async new file mode 160000 +Subproject 10310824c001deab8fea85b88ebda0696f964b3 diff --git a/git/ext/gitdb b/git/ext/gitdb deleted file mode 160000 -Subproject 17d9d1395fb6d18d553e085150138463b5827a2 diff --git a/git/fun.py b/git/fun.py new file mode 100644 index 00000000..5bbe8efc --- /dev/null +++ b/git/fun.py @@ -0,0 +1,674 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains basic c-functions which usually contain performance critical code +Keeping this code separate from the beginning makes it easier to out-source +it into c later, if required""" + +from exc import ( + BadObjectType + ) + +from util import zlib +decompressobj = zlib.decompressobj + +import mmap +from itertools import islice, izip + +from cStringIO import StringIO + +# INVARIANTS +OFS_DELTA = 6 +REF_DELTA = 7 +delta_types = (OFS_DELTA, REF_DELTA) + +type_id_to_type_map = { + 0 : "", # EXT 1 + 1 : "commit", + 2 : "tree", + 3 : "blob", + 4 : "tag", + 5 : "", # EXT 2 + OFS_DELTA : "OFS_DELTA", # OFFSET DELTA + REF_DELTA : "REF_DELTA" # REFERENCE DELTA + } + +type_to_type_id_map = dict( + commit=1, + tree=2, + blob=3, + tag=4, + OFS_DELTA=OFS_DELTA, + REF_DELTA=REF_DELTA + ) + +# used when dealing with larger streams +chunk_size = 1000*mmap.PAGESIZE + +__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info', + 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data', + 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header') + + +#{ Structures + +def _set_delta_rbound(d, size): + """Truncate the given delta to the given size + :param size: size relative to our target offset, may not be 0, must be smaller or equal + to our size + :return: d""" + d.ts = size + + # NOTE: data is truncated automatically when applying the delta + # MUST NOT DO THIS HERE + return d + +def _move_delta_lbound(d, bytes): + """Move the delta by the given amount of bytes, reducing its size so that its + right bound stays static + :param bytes: amount of bytes to move, must be smaller than delta size + :return: d""" + if bytes == 0: + return + + d.to += bytes + d.so += bytes + d.ts -= bytes + if d.data is not None: + d.data = d.data[bytes:] + # END handle data + + return d + +def delta_duplicate(src): + return DeltaChunk(src.to, src.ts, src.so, src.data) + +def delta_chunk_apply(dc, bbuf, write): + """Apply own data to the target buffer + :param bbuf: buffer providing source bytes for copy operations + :param write: write method to call with data to write""" + if dc.data is None: + # COPY DATA FROM SOURCE + write(buffer(bbuf, dc.so, dc.ts)) + else: + # APPEND DATA + # whats faster: if + 4 function calls or just a write with a slice ? + # Considering data can be larger than 127 bytes now, it should be worth it + if dc.ts < len(dc.data): + write(dc.data[:dc.ts]) + else: + write(dc.data) + # END handle truncation + # END handle chunk mode + + +class DeltaChunk(object): + """Represents a piece of a delta, it can either add new data, or copy existing + one from a source buffer""" + __slots__ = ( + 'to', # start offset in the target buffer in bytes + 'ts', # size of this chunk in the target buffer in bytes + 'so', # start offset in the source buffer in bytes or None + 'data', # chunk of bytes to be added to the target buffer, + # DeltaChunkList to use as base, or None + ) + + def __init__(self, to, ts, so, data): + self.to = to + self.ts = ts + self.so = so + self.data = data + + def __repr__(self): + return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "") + + #{ Interface + + def rbound(self): + return self.to + self.ts + + def has_data(self): + """:return: True if the instance has data to add to the target stream""" + return self.data is not None + + #} END interface + +def _closest_index(dcl, absofs): + """:return: index at which the given absofs should be inserted. The index points + to the DeltaChunk with a target buffer absofs that equals or is greater than + absofs. + :note: global method for performance only, it belongs to DeltaChunkList""" + lo = 0 + hi = len(dcl) + while lo < hi: + mid = (lo + hi) / 2 + dc = dcl[mid] + if dc.to > absofs: + hi = mid + elif dc.rbound() > absofs or dc.to == absofs: + return mid + else: + lo = mid + 1 + # END handle bound + # END for each delta absofs + return len(dcl)-1 + +def delta_list_apply(dcl, bbuf, write): + """Apply the chain's changes and write the final result using the passed + write function. + :param bbuf: base buffer containing the base of all deltas contained in this + list. It will only be used if the chunk in question does not have a base + chain. + :param write: function taking a string of bytes to write to the output""" + for dc in dcl: + delta_chunk_apply(dc, bbuf, write) + # END for each dc + +def delta_list_slice(dcl, absofs, size, ndcl): + """:return: Subsection of this list at the given absolute offset, with the given + size in bytes. + :return: None""" + cdi = _closest_index(dcl, absofs) # delta start index + cd = dcl[cdi] + slen = len(dcl) + lappend = ndcl.append + + if cd.to != absofs: + tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data) + _move_delta_lbound(tcd, absofs - cd.to) + tcd.ts = min(tcd.ts, size) + lappend(tcd) + size -= tcd.ts + cdi += 1 + # END lbound overlap handling + + while cdi < slen and size: + # are we larger than the current block + cd = dcl[cdi] + if cd.ts <= size: + lappend(DeltaChunk(cd.to, cd.ts, cd.so, cd.data)) + size -= cd.ts + else: + tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data) + tcd.ts = size + lappend(tcd) + size -= tcd.ts + break + # END hadle size + cdi += 1 + # END for each chunk + + +class DeltaChunkList(list): + """List with special functionality to deal with DeltaChunks. + There are two types of lists we represent. The one was created bottom-up, working + towards the latest delta, the other kind was created top-down, working from the + latest delta down to the earliest ancestor. This attribute is queryable + after all processing with is_reversed.""" + + __slots__ = tuple() + + def rbound(self): + """:return: rightmost extend in bytes, absolute""" + if len(self) == 0: + return 0 + return self[-1].rbound() + + def lbound(self): + """:return: leftmost byte at which this chunklist starts""" + if len(self) == 0: + return 0 + return self[0].to + + def size(self): + """:return: size of bytes as measured by our delta chunks""" + return self.rbound() - self.lbound() + + def apply(self, bbuf, write): + """Only used by public clients, internally we only use the global routines + for performance""" + return delta_list_apply(self, bbuf, write) + + def compress(self): + """Alter the list to reduce the amount of nodes. Currently we concatenate + add-chunks + :return: self""" + slen = len(self) + if slen < 2: + return self + i = 0 + slen_orig = slen + + first_data_index = None + while i < slen: + dc = self[i] + i += 1 + if dc.data is None: + if first_data_index is not None and i-2-first_data_index > 1: + #if first_data_index is not None: + nd = StringIO() # new data + so = self[first_data_index].to # start offset in target buffer + for x in xrange(first_data_index, i-1): + xdc = self[x] + nd.write(xdc.data[:xdc.ts]) + # END collect data + + del(self[first_data_index:i-1]) + buf = nd.getvalue() + self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf)) + + slen = len(self) + i = first_data_index + 1 + + # END concatenate data + first_data_index = None + continue + # END skip non-data chunks + + if first_data_index is None: + first_data_index = i-1 + # END iterate list + + #if slen_orig != len(self): + # print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100) + return self + + def check_integrity(self, target_size=-1): + """Verify the list has non-overlapping chunks only, and the total size matches + target_size + :param target_size: if not -1, the total size of the chain must be target_size + :raise AssertionError: if the size doen't match""" + if target_size > -1: + assert self[-1].rbound() == target_size + assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size + # END target size verification + + if len(self) < 2: + return + + # check data + for dc in self: + assert dc.ts > 0 + if dc.has_data(): + assert len(dc.data) >= dc.ts + # END for each dc + + left = islice(self, 0, len(self)-1) + right = iter(self) + right.next() + # this is very pythonic - we might have just use index based access here, + # but this could actually be faster + for lft,rgt in izip(left, right): + assert lft.rbound() == rgt.to + assert lft.to + lft.ts == rgt.to + # END for each pair + + +class TopdownDeltaChunkList(DeltaChunkList): + """Represents a list which is generated by feeding its ancestor streams one by + one""" + __slots__ = tuple() + + def connect_with_next_base(self, bdcl): + """Connect this chain with the next level of our base delta chunklist. + The goal in this game is to mark as many of our chunks rigid, hence they + cannot be changed by any of the upcoming bases anymore. Once all our + chunks are marked like that, we can stop all processing + :param bdcl: data chunk list being one of our bases. They must be fed in + consequtively and in order, towards the earliest ancestor delta + :return: True if processing was done. Use it to abort processing of + remaining streams if False is returned""" + nfc = 0 # number of frozen chunks + dci = 0 # delta chunk index + slen = len(self) # len of self + ccl = list() # temporary list + while dci < slen: + dc = self[dci] + dci += 1 + + # all add-chunks which are already topmost don't need additional processing + if dc.data is not None: + nfc += 1 + continue + # END skip add chunks + + # copy chunks + # integrate the portion of the base list into ourselves. Lists + # dont support efficient insertion ( just one at a time ), but for now + # we live with it. Internally, its all just a 32/64bit pointer, and + # the portions of moved memory should be smallish. Maybe we just rebuild + # ourselves in order to reduce the amount of insertions ... + del(ccl[:]) + delta_list_slice(bdcl, dc.so, dc.ts, ccl) + + # move the target bounds into place to match with our chunk + ofs = dc.to - dc.so + for cdc in ccl: + cdc.to += ofs + # END update target bounds + + if len(ccl) == 1: + self[dci-1] = ccl[0] + else: + # maybe try to compute the expenses here, and pick the right algorithm + # It would normally be faster than copying everything physically though + # TODO: Use a deque here, and decide by the index whether to extend + # or extend left ! + post_dci = self[dci:] + del(self[dci-1:]) # include deletion of dc + self.extend(ccl) + self.extend(post_dci) + + slen = len(self) + dci += len(ccl)-1 # deleted dc, added rest + + # END handle chunk replacement + # END for each chunk + + if nfc == slen: + return False + # END handle completeness + return True + + +#} END structures + +#{ Routines + +def is_loose_object(m): + """ + :return: True the file contained in memory map m appears to be a loose object. + Only the first two bytes are needed""" + b0, b1 = map(ord, m[:2]) + word = (b0 << 8) + b1 + return b0 == 0x78 and (word % 31) == 0 + +def loose_object_header_info(m): + """ + :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the + object as well as its uncompressed size in bytes. + :param m: memory map from which to read the compressed object data""" + decompress_size = 8192 # is used in cgit as well + hdr = decompressobj().decompress(m, decompress_size) + type_name, size = hdr[:hdr.find("\0")].split(" ") + return type_name, int(size) + +def pack_object_header_info(data): + """ + :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset) + The type_id should be interpreted according to the ``type_id_to_type_map`` map + The byte-offset specifies the start of the actual zlib compressed datastream + :param m: random-access memory, like a string or memory map""" + c = ord(data[0]) # first byte + i = 1 # next char to read + type_id = (c >> 4) & 7 # numeric type + size = c & 15 # starting size + s = 4 # starting bit-shift size + while c & 0x80: + c = ord(data[i]) + i += 1 + size += (c & 0x7f) << s + s += 7 + # END character loop + return (type_id, size, i) + +def create_pack_object_header(obj_type, obj_size): + """:return: string defining the pack header comprised of the object type + and its incompressed size in bytes + :parmam obj_type: pack type_id of the object + :param obj_size: uncompressed size in bytes of the following object stream""" + c = 0 # 1 byte + hdr = str() # output string + + c = (obj_type << 4) | (obj_size & 0xf) + obj_size >>= 4 + while obj_size: + hdr += chr(c | 0x80) + c = obj_size & 0x7f + obj_size >>= 7 + #END until size is consumed + hdr += chr(c) + return hdr + +def msb_size(data, offset=0): + """ + :return: tuple(read_bytes, size) read the msb size from the given random + access data starting at the given byte offset""" + size = 0 + i = 0 + l = len(data) + hit_msb = False + while i < l: + c = ord(data[i+offset]) + size |= (c & 0x7f) << i*7 + i += 1 + if not c & 0x80: + hit_msb = True + break + # END check msb bit + # END while in range + if not hit_msb: + raise AssertionError("Could not find terminating MSB byte in data stream") + return i+offset, size + +def loose_object_header(type, size): + """ + :return: string representing the loose object header, which is immediately + followed by the content stream of size 'size'""" + return "%s %i\0" % (type, size) + +def write_object(type, size, read, write, chunk_size=chunk_size): + """ + Write the object as identified by type, size and source_stream into the + target_stream + + :param type: type string of the object + :param size: amount of bytes to write from source_stream + :param read: read method of a stream providing the content data + :param write: write method of the output stream + :param close_target_stream: if True, the target stream will be closed when + the routine exits, even if an error is thrown + :return: The actual amount of bytes written to stream, which includes the header and a trailing newline""" + tbw = 0 # total num bytes written + + # WRITE HEADER: type SP size NULL + tbw += write(loose_object_header(type, size)) + tbw += stream_copy(read, write, size, chunk_size) + + return tbw + +def stream_copy(read, write, size, chunk_size): + """ + Copy a stream up to size bytes using the provided read and write methods, + in chunks of chunk_size + + :note: its much like stream_copy utility, but operates just using methods""" + dbw = 0 # num data bytes written + + # WRITE ALL DATA UP TO SIZE + while True: + cs = min(chunk_size, size-dbw) + # NOTE: not all write methods return the amount of written bytes, like + # mmap.write. Its bad, but we just deal with it ... perhaps its not + # even less efficient + # data_len = write(read(cs)) + # dbw += data_len + data = read(cs) + data_len = len(data) + dbw += data_len + write(data) + if data_len < cs or dbw == size: + break + # END check for stream end + # END duplicate data + return dbw + +def connect_deltas(dstreams): + """ + Read the condensed delta chunk information from dstream and merge its information + into a list of existing delta chunks + + :param dstreams: iterable of delta stream objects, the delta to be applied last + comes first, then all its ancestors in order + :return: DeltaChunkList, containing all operations to apply""" + tdcl = None # topmost dcl + + dcl = tdcl = TopdownDeltaChunkList() + for dsi, ds in enumerate(dstreams): + # print "Stream", dsi + db = ds.read() + delta_buf_size = ds.size + + # read header + i, base_size = msb_size(db) + i, target_size = msb_size(db, i) + + # interpret opcodes + tbw = 0 # amount of target bytes written + while i < delta_buf_size: + c = ord(db[i]) + i += 1 + if c & 0x80: + cp_off, cp_size = 0, 0 + if (c & 0x01): + cp_off = ord(db[i]) + i += 1 + if (c & 0x02): + cp_off |= (ord(db[i]) << 8) + i += 1 + if (c & 0x04): + cp_off |= (ord(db[i]) << 16) + i += 1 + if (c & 0x08): + cp_off |= (ord(db[i]) << 24) + i += 1 + if (c & 0x10): + cp_size = ord(db[i]) + i += 1 + if (c & 0x20): + cp_size |= (ord(db[i]) << 8) + i += 1 + if (c & 0x40): + cp_size |= (ord(db[i]) << 16) + i += 1 + + if not cp_size: + cp_size = 0x10000 + + rbound = cp_off + cp_size + if (rbound < cp_size or + rbound > base_size): + break + + dcl.append(DeltaChunk(tbw, cp_size, cp_off, None)) + tbw += cp_size + elif c: + # NOTE: in C, the data chunks should probably be concatenated here. + # In python, we do it as a post-process + dcl.append(DeltaChunk(tbw, c, 0, db[i:i+c])) + i += c + tbw += c + else: + raise ValueError("unexpected delta opcode 0") + # END handle command byte + # END while processing delta data + + dcl.compress() + + # merge the lists ! + if dsi > 0: + if not tdcl.connect_with_next_base(dcl): + break + # END handle merge + + # prepare next base + dcl = DeltaChunkList() + # END for each delta stream + + return tdcl + +def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write): + """ + Apply data from a delta buffer using a source buffer to the target file + + :param src_buf: random access data from which the delta was created + :param src_buf_size: size of the source buffer in bytes + :param delta_buf_size: size fo the delta buffer in bytes + :param delta_buf: random access delta data + :param write: write method taking a chunk of bytes + :note: transcribed to python from the similar routine in patch-delta.c""" + i = 0 + db = delta_buf + while i < delta_buf_size: + c = ord(db[i]) + i += 1 + if c & 0x80: + cp_off, cp_size = 0, 0 + if (c & 0x01): + cp_off = ord(db[i]) + i += 1 + if (c & 0x02): + cp_off |= (ord(db[i]) << 8) + i += 1 + if (c & 0x04): + cp_off |= (ord(db[i]) << 16) + i += 1 + if (c & 0x08): + cp_off |= (ord(db[i]) << 24) + i += 1 + if (c & 0x10): + cp_size = ord(db[i]) + i += 1 + if (c & 0x20): + cp_size |= (ord(db[i]) << 8) + i += 1 + if (c & 0x40): + cp_size |= (ord(db[i]) << 16) + i += 1 + + if not cp_size: + cp_size = 0x10000 + + rbound = cp_off + cp_size + if (rbound < cp_size or + rbound > src_buf_size): + break + write(buffer(src_buf, cp_off, cp_size)) + elif c: + write(db[i:i+c]) + i += c + else: + raise ValueError("unexpected delta opcode 0") + # END handle command byte + # END while processing delta data + + # yes, lets use the exact same error message that git uses :) + assert i == delta_buf_size, "delta replay has gone wild" + + +def is_equal_canonical_sha(canonical_length, match, sha1): + """ + :return: True if the given lhs and rhs 20 byte binary shas + The comparison will take the canonical_length of the match sha into account, + hence the comparison will only use the last 4 bytes for uneven canonical representations + :param match: less than 20 byte sha + :param sha1: 20 byte sha""" + binary_length = canonical_length/2 + if match[:binary_length] != sha1[:binary_length]: + return False + + if canonical_length - binary_length and \ + (ord(match[-1]) ^ ord(sha1[len(match)-1])) & 0xf0: + return False + # END handle uneven canonnical length + return True + +#} END routines + + +try: + # raise ImportError; # DEBUG + from _perf import connect_deltas +except ImportError: + pass diff --git a/git/index/base.py b/git/index/base.py index 88410e20..12097922 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -62,9 +62,8 @@ from fun import ( S_IFGITLINK ) -from gitdb.base import IStream -from gitdb.db import MemoryDB -from gitdb.util import to_bin_sha +from git.base import IStream +from git.util import to_bin_sha from itertools import izip __all__ = ( 'IndexFile', 'CheckoutError' ) @@ -512,7 +511,9 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): :raise UnmergedEntriesError: """ # we obtain no lock as we just flush our contents to disk as tree # If we are a new index, the entries access will load our data accordingly - mdb = MemoryDB() + # Needs delayed import as db.py import IndexFile as well + import git.db.py.mem + mdb = git.db.py.mem.PureMemoryDB() entries = self._entries_sorted() binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries))) @@ -959,12 +960,16 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): is_a_dir = " is a directory" unlink_issue = "unable to unlink old '" + already_exists_issue = ' already exists, no checkout' # created by entry.c:checkout_entry(...) if line.endswith(is_a_dir): failed_files.append(line[:-len(is_a_dir)]) failed_reasons.append(is_a_dir) elif line.startswith(unlink_issue): failed_files.append(line[len(unlink_issue):line.rfind("'")]) failed_reasons.append(unlink_issue) + elif line.endswith(already_exists_issue): + failed_files.append(line[:-len(already_exists_issue)]) + failed_reasons.append(already_exists_issue) else: unknown_lines.append(line) continue diff --git a/git/index/fun.py b/git/index/fun.py index 9b35bf04..e2813c0b 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -36,8 +36,8 @@ from util import ( unpack ) -from gitdb.base import IStream -from gitdb.typ import str_tree_type +from git.base import IStream +from git.typ import str_tree_type __all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key', 'stat_mode_to_index_mode', 'S_IFGITLINK') diff --git a/git/objects/base.py b/git/objects/base.py index 5f2f7809..61b3e674 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -3,15 +3,20 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import LazyMixin, join_path_native, stream_copy + from util import get_object_type_by_name -from gitdb.util import ( +from git.util import ( hex_to_bin, bin_to_hex, - basename + dirname, + basename, + LazyMixin, + join_path_native, + stream_copy ) - -import gitdb.typ as dbtyp +from git.db.interface import RepositoryPathsMixin +from git.exc import UnsupportedOperation +from git.typ import ObjectType _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" @@ -22,24 +27,26 @@ class Object(LazyMixin): NULL_HEX_SHA = '0'*40 NULL_BIN_SHA = '\0'*20 - TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) - __slots__ = ("repo", "binsha", "size" ) + TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag) + __slots__ = ("odb", "binsha", "size" ) + type = None # to be set by subclass + type_id = None # to be set by subclass - def __init__(self, repo, binsha): + def __init__(self, odb, binsha): """Initialize an object by identifying it by its binary sha. All keyword arguments will be set on demand if None. - :param repo: repository this object is located in + :param odb: repository this object is located in :param binsha: 20 byte SHA1""" super(Object,self).__init__() - self.repo = repo + self.odb = odb self.binsha = binsha assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) @classmethod - def new(cls, repo, id): + def new(cls, odb, id): """ :return: New Object instance of a type appropriate to the object type behind id. The id of the newly created object will be a binsha even though @@ -49,27 +56,27 @@ class Object(LazyMixin): :note: This cannot be a __new__ method as it would always call __init__ with the input id which is not necessarily a binsha.""" - return repo.rev_parse(str(id)) + return odb.rev_parse(str(id)) @classmethod - def new_from_sha(cls, repo, sha1): + def new_from_sha(cls, odb, sha1): """ :return: new object instance of a type appropriate to represent the given binary sha1 :param sha1: 20 byte binary sha1""" if sha1 == cls.NULL_BIN_SHA: # the NULL binsha is always the root commit - return get_object_type_by_name('commit')(repo, sha1) + return get_object_type_by_name('commit')(odb, sha1) #END handle special case - oinfo = repo.odb.info(sha1) - inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) + oinfo = odb.info(sha1) + inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha) inst.size = oinfo.size return inst def _set_cache_(self, attr): """Retrieve object information""" if attr == "size": - oinfo = self.repo.odb.info(self.binsha) + oinfo = self.odb.info(self.binsha) self.size = oinfo.size # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) else: @@ -77,10 +84,14 @@ class Object(LazyMixin): def __eq__(self, other): """:return: True if the objects have the same SHA1""" + if not hasattr(other, 'binsha'): + return False return self.binsha == other.binsha def __ne__(self, other): """:return: True if the objects do not have the same SHA1 """ + if not hasattr(other, 'binsha'): + return True return self.binsha != other.binsha def __hash__(self): @@ -104,13 +115,13 @@ class Object(LazyMixin): def data_stream(self): """ :return: File Object compatible stream to the uncompressed raw data of the object :note: returned streams must be read in order""" - return self.repo.odb.stream(self.binsha) + return self.odb.stream(self.binsha) def stream_data(self, ostream): """Writes our data directly to the given output stream :param ostream: File object compatible stream object. :return: self""" - istream = self.repo.odb.stream(self.binsha) + istream = self.odb.stream(self.binsha) stream_copy(istream, ostream) return self @@ -123,9 +134,9 @@ class IndexObject(Object): # for compatability with iterable lists _id_attribute_ = 'path' - def __init__(self, repo, binsha, mode=None, path=None): + def __init__(self, odb, binsha, mode=None, path=None): """Initialize a newly instanced IndexObject - :param repo: is the Repo we are located in + :param odb: is the object database we are located in :param binsha: 20 byte sha1 :param mode: is the stat compatible file mode as int, use the stat module to evaluate the infomration @@ -135,7 +146,7 @@ class IndexObject(Object): :note: Path may not be set of the index object has been created directly as it cannot be retrieved without knowing the parent tree.""" - super(IndexObject, self).__init__(repo, binsha) + super(IndexObject, self).__init__(odb, binsha) if mode is not None: self.mode = mode if path is not None: @@ -167,6 +178,15 @@ class IndexObject(Object): Absolute path to this index object in the file system ( as opposed to the .path field which is a path relative to the git repository ). - The returned path will be native to the system and contains '\' on windows. """ - return join_path_native(self.repo.working_tree_dir, self.path) + The returned path will be native to the system and contains '\' on windows. + :raise UnsupportedOperation: if underlying odb does not support the required method to obtain a working dir""" + # TODO: Here we suddenly need something better than a plain object database + # which indicates our odb should better be named repo ! + root = '' + if isinstance(self.odb, RepositoryPathsMixin): + root = self.odb.working_tree_dir + else: + raise UnsupportedOperation("Cannot provide absolute path from a database without Repository path support") + #END handle odb type + return join_path_native(root, self.path) diff --git a/git/objects/blob.py b/git/objects/blob.py index f52d1a53..9c51f99f 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -4,15 +4,19 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.util import RepoAliasMixin from mimetypes import guess_type +from git.typ import ObjectType + import base __all__ = ('Blob', ) -class Blob(base.IndexObject): +class Blob(base.IndexObject, RepoAliasMixin): """A Blob encapsulates a git blob object""" DEFAULT_MIME_TYPE = "text/plain" - type = "blob" + type = ObjectType.blob + type_id = ObjectType.blob_id # valid blob modes executable_mode = 0100755 diff --git a/git/objects/commit.py b/git/objects/commit.py index fd4187b0..c32bbf1a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -3,42 +3,45 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +import base -from git.util import ( - Actor, - Iterable, - Stats, - ) -from git.diff import Diffable +from git.typ import ObjectType from tree import Tree -from gitdb import IStream from cStringIO import StringIO -import base -from gitdb.util import ( - hex_to_bin +from git.util import ( + hex_to_bin, + Actor, + RepoAliasMixin, + Iterable, + Actor, + Stats ) + from util import ( - Traversable, - Serializable, - parse_date, - altz_to_utctz_str, - parse_actor_and_date - ) -from time import ( - time, - altzone + Traversable, + Serializable, + altz_to_utctz_str, + parse_actor_and_date ) +from git.diff import Diffable +from git.base import IStream +from cStringIO import StringIO + +from util import parse_date +from time import altzone, time + import os import sys __all__ = ('Commit', ) -class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): +class Commit(Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): """Wraps a git Commit object. This class will act lazily on some of its attributes and will query the value on demand only if it involves calling the git binary.""" + __slots__ = tuple() # ENVIRONMENT VARIABLES # read when creating new commits @@ -53,92 +56,16 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # object configuration - type = "commit" + type = ObjectType.commit + type_id = ObjectType.commit_id + __slots__ = ("tree", "author", "authored_date", "author_tz_offset", "committer", "committed_date", "committer_tz_offset", "message", "parents", "encoding") _id_attribute_ = "binsha" - def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, - committer=None, committed_date=None, committer_tz_offset=None, - message=None, parents=None, encoding=None): - """Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set on first query. - - :param binsha: 20 byte sha1 - :param parents: tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - :param tree: Tree - Tree object - :param author: Actor - is the author string ( will be implicitly converted into an Actor object ) - :param authored_date: int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - :param author_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param committer: Actor - is the committer string - :param committed_date: int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - :param committer_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param message: string - is the commit message - :param encoding: string - encoding of the message, defaults to UTF-8 - :param parents: - List or tuple of Commit objects which are our parent(s) in the commit - dependency graph - :return: git.Commit - - :note: Timezone information is in the same format and in the same sign - as what time.altzone returns. The sign is inverted compared to git's - UTC timezone.""" - super(Commit,self).__init__(repo, binsha) - if tree is not None: - assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) - if tree is not None: - self.tree = tree - if author is not None: - self.author = author - if authored_date is not None: - self.authored_date = authored_date - if author_tz_offset is not None: - self.author_tz_offset = author_tz_offset - if committer is not None: - self.committer = committer - if committed_date is not None: - self.committed_date = committed_date - if committer_tz_offset is not None: - self.committer_tz_offset = committer_tz_offset - if message is not None: - self.message = message - if parents is not None: - self.parents = parents - if encoding is not None: - self.encoding = encoding - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents - - def _set_cache_(self, attr): - if attr in Commit.__slots__: - # read the data in a chunk, its faster - then provide a file wrapper - binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) - else: - super(Commit, self)._set_cache_(attr) - # END handle attrs - - @property - def summary(self): - """:return: First line of the commit message""" - return self.message.split('\n', 1)[0] - + def count(self, paths='', **kwargs): """Count the number of commits reachable from this commit @@ -225,33 +152,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) return Stats._list_from_string(self.repo, text) - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): - """Parse out commit information into a list of Commit objects - We expect one-line per commit, and parse the actual commit information directly - from our lighting fast object database - - :param proc: git-rev-list process instance - one sha per line - :return: iterator returning Commit objects""" - stream = proc_or_stream - if not hasattr(stream,'readline'): - stream = proc_or_stream.stdout - - readline = stream.readline - while True: - line = readline() - if not line: - break - hexsha = line.strip() - if len(hexsha) > 40: - # split additional information, as returned by bisect for instance - hexsha, rest = line.split(None, 1) - # END handle extra info - - assert len(hexsha) == 40, "Invalid line: %s" % hexsha - yield Commit(repo, hex_to_bin(hexsha)) - # END for each line in stream - @classmethod def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): @@ -361,6 +261,112 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # END advance head handling return new_commit + + def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, + committer=None, committed_date=None, committer_tz_offset=None, + message=None, parents=None, encoding=None): + """Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set on first query. + + :param binsha: 20 byte sha1 + :param parents: tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + :param tree: Tree + Tree object + :param author: Actor + is the author string ( will be implicitly converted into an Actor object ) + :param authored_date: int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + :param author_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param committer: Actor + is the committer string + :param committed_date: int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + :param committer_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param message: string + is the commit message + :param encoding: string + encoding of the message, defaults to UTF-8 + :param parents: + List or tuple of Commit objects which are our parent(s) in the commit + dependency graph + :return: git.Commit + + :note: Timezone information is in the same format and in the same sign + as what time.altzone returns. The sign is inverted compared to git's + UTC timezone.""" + super(Commit,self).__init__(odb, binsha) + if tree is not None: + assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) + if tree is not None: + self.tree = tree + if author is not None: + self.author = author + if authored_date is not None: + self.authored_date = authored_date + if author_tz_offset is not None: + self.author_tz_offset = author_tz_offset + if committer is not None: + self.committer = committer + if committed_date is not None: + self.committed_date = committed_date + if committer_tz_offset is not None: + self.committer_tz_offset = committer_tz_offset + if message is not None: + self.message = message + if parents is not None: + self.parents = parents + if encoding is not None: + self.encoding = encoding + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents + + def _set_cache_(self, attr): + if attr in Commit.__slots__: + # read the data in a chunk, its faster - then provide a file wrapper + binsha, typename, self.size, stream = self.odb.stream(self.binsha) + self._deserialize(StringIO(stream.read())) + else: + super(Commit, self)._set_cache_(attr) + # END handle attrs + + @property + def summary(self): + """:return: First line of the commit message""" + return self.message.split('\n', 1)[0] + + @classmethod + def _iter_from_process_or_stream(cls, odb, proc_or_stream): + """Parse out commit information into a list of Commit objects + We expect one-line per commit, and parse the actual commit information directly + from our lighting fast object database + + :param proc: git-rev-list process instance - one sha per line + :return: iterator returning Commit objects""" + stream = proc_or_stream + if not hasattr(stream,'readline'): + stream = proc_or_stream.stdout + + readline = stream.readline + while True: + line = readline() + if not line: + break + hexsha = line.strip() + if len(hexsha) > 40: + # split additional information, as returned by bisect for instance + hexsha, rest = line.split(None, 1) + # END handle extra info + + assert len(hexsha) == 40, "Invalid line: %s" % hexsha + yield cls(odb, hex_to_bin(hexsha)) + # END for each line in stream #{ Serializable Implementation @@ -408,7 +414,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): """:param from_rev_list: if true, the stream format is coming from the rev-list command Otherwise it is assumed to be a plain data stream from our object""" readline = stream.readline - self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') + self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') self.parents = list() next_line = None @@ -418,7 +424,7 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): next_line = parent_line break # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) + self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1]))) # END for each parent line self.parents = tuple(self.parents) @@ -461,5 +467,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) # END exception handling return self - + #} END serializable implementation + diff --git a/git/objects/fun.py b/git/objects/fun.py index 9b0a377c..6f2eaaad 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,4 +1,5 @@ """Module with functions which are supposed to be as fast as possible""" + from stat import S_ISDIR __all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', @@ -197,3 +198,4 @@ def traverse_tree_recursive(odb, tree_sha, path_prefix): # END for each item return entries + diff --git a/git/objects/submodule/__init__.py b/git/objects/submodule/__init__.py index 82df59b0..c8bf2d49 100644 --- a/git/objects/submodule/__init__.py +++ b/git/objects/submodule/__init__.py @@ -1,2 +1,6 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php # NOTE: Cannot import anything here as the top-level _init_ has to handle # our dependencies diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 2160299b..0fdb121d 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,3 +1,8 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.util import RepoAliasMixin import util from util import ( mkhead, @@ -13,9 +18,10 @@ from git.util import ( Iterable, join_path_native, to_native_path_linux, - RemoteProgress ) +from git.db.interface import RemoteProgress + from git.config import SectionConstraint from git.exc import ( InvalidGitRepositoryError, @@ -23,7 +29,7 @@ from git.exc import ( ) import stat -import git +import git # we use some types indirectly to prevent cyclic imports ! import os import sys @@ -53,7 +59,7 @@ UPDWKTREE = UpdateProgress.UPDWKTREE # IndexObject comes via util module, its a 'hacky' fix thanks to pythons import # mechanism which cause plenty of trouble of the only reason for packages and # modules is refactoring - subpackages shoudn't depend on parent packages -class Submodule(util.IndexObject, Iterable, Traversable): +class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. @@ -71,6 +77,9 @@ class Submodule(util.IndexObject, Iterable, Traversable): # this is a bogus type for base class compatability type = 'submodule' + # this type doesn't really have a type id + type_id = 0 + __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') _cache_attrs = ('path', '_url', '_branch_path') @@ -195,7 +204,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): #{ Edit Interface @classmethod - def add(cls, repo, name, path, url=None, branch=None, no_checkout=False): + def add(cls, repo, name, path, url=None, branch=None, no_checkout=False, repoType=None): """Add a new submodule to the given repository. This will alter the index as well as the .gitmodules file, but will not create a new commit. If the submodule already exists, no matter if the configuration differs @@ -220,6 +229,8 @@ class Submodule(util.IndexObject, Iterable, Traversable): Examples are 'master' or 'feature/new' :param no_checkout: if True, and if the repository has to be cloned manually, no checkout will be performed + :param repoType: The repository type to use. It must provide the clone_from method. + If None, the default implementation is used. :return: The newly created submodule instance :note: works atomically, such that no change will be done if the repository update fails for instance""" @@ -227,6 +238,8 @@ class Submodule(util.IndexObject, Iterable, Traversable): raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") # END handle bare repos + repoType = repoType or git.Repo + path = to_native_path_linux(path) if path.endswith('/'): path = path[:-1] @@ -280,7 +293,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): if not branch_is_default: kwargs['b'] = br.name # END setup checkout-branch - mrepo = git.Repo.clone_from(url, path, **kwargs) + mrepo = repoType.clone_from(url, path, **kwargs) # END verify url # update configuration and index @@ -306,7 +319,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): return sm def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, - dry_run=False): + dry_run=False, ): """Update the repository of this submodule to point to the checkout we point at with the binsha of this instance. @@ -368,7 +381,6 @@ class Submodule(util.IndexObject, Iterable, Traversable): if not init: return self # END early abort if init is not allowed - import git # there is no git-repository yet - but delete empty paths module_path = join_path_native(self.repo.working_tree_dir, self.path) @@ -384,7 +396,7 @@ class Submodule(util.IndexObject, Iterable, Traversable): # branch according to the remote-HEAD if possible progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name)) if not dry_run: - mrepo = git.Repo.clone_from(self.url, module_path, n=True) + mrepo = type(self.repo).clone_from(self.url, module_path, n=True) #END handle dry-run progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path) @@ -760,14 +772,19 @@ class Submodule(util.IndexObject, Iterable, Traversable): #{ Query Interface @unbare_repo - def module(self): - """:return: Repo instance initialized from the repository at our submodule path + def module(self, repoType=None): + """:return: Repository instance initialized from the repository at our submodule path + :param repoType: The type of repository to be created. It must be possible to instatiate it + from a single repository path. + If None, a default repository type will be used :raise InvalidGitRepositoryError: if a repository was not available. This could also mean that it was not yet initialized""" # late import to workaround circular dependencies - module_path = self.abspath + module_path = self.abspath + repoType = repoType or git.Repo + try: - repo = git.Repo(module_path) + repo = repoType(module_path) if repo != self.repo: return repo # END handle repo uninitialized diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py index 132604f6..5e4cad2d 100644 --- a/git/objects/submodule/root.py +++ b/git/objects/submodule/root.py @@ -1,3 +1,7 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php from base import Submodule, UpdateProgress from util import ( find_first_remote_branch @@ -24,6 +28,7 @@ BRANCHCHANGE = RootUpdateProgress.BRANCHCHANGE URLCHANGE = RootUpdateProgress.URLCHANGE PATHCHANGE = RootUpdateProgress.PATHCHANGE + class RootModule(Submodule): """A (virtual) Root of all submodules in the given repository. It can be used to more easily traverse all submodules of the master repository""" diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py index 9b32807a..2c5f6bc1 100644 --- a/git/objects/submodule/util.py +++ b/git/objects/submodule/util.py @@ -1,3 +1,7 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php import git from git.exc import InvalidGitRepositoryError from git.config import GitConfigParser diff --git a/git/objects/tag.py b/git/objects/tag.py index c7d02abe..5dcd9bf9 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -5,24 +5,28 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all object based types. """ import base -from gitdb.util import hex_to_bin +from git.util import RepoAliasMixin +from git.util import hex_to_bin from util import ( - get_object_type_by_name, - parse_actor_and_date - ) + get_object_type_by_name, + parse_actor_and_date + ) +from git.typ import ObjectType __all__ = ("TagObject", ) -class TagObject(base.Object): +class TagObject(base.Object, RepoAliasMixin): """Non-Lightweight tag carrying additional information about an object we are pointing to.""" - type = "tag" + type = ObjectType.tag + type_id = ObjectType.tag_id + __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) - def __init__(self, repo, binsha, object=None, tag=None, + def __init__(self, odb, binsha, object=None, tag=None, tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): """Initialize a tag object with additional data - :param repo: repository this object is located in + :param odb: repository this object is located in :param binsha: 20 byte SHA1 :param object: Object instance of object we are pointing to :param tag: name of this tag @@ -32,7 +36,7 @@ class TagObject(base.Object): it into a different format :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the authored_date is in, in a format similar to time.altzone""" - super(TagObject, self).__init__(repo, binsha ) + super(TagObject, self).__init__(odb, binsha ) if object is not None: self.object = object if tag is not None: @@ -49,12 +53,12 @@ class TagObject(base.Object): def _set_cache_(self, attr): """Cache all our attributes at once""" if attr in TagObject.__slots__: - ostream = self.repo.odb.stream(self.binsha) + ostream = self.odb.stream(self.binsha) lines = ostream.read().splitlines() obj, hexsha = lines[0].split(" ") # object <hexsha> type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) + self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha)) self.tag = lines[2][4:] # tag <tag name> diff --git a/git/objects/tree.py b/git/objects/tree.py index 67431686..31f2602d 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -3,21 +3,23 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -import util +from git.util import RepoAliasMixin +import git.diff as diff +from git.typ import ObjectType from base import IndexObject -from git.util import join_path from blob import Blob from submodule.base import Submodule -import git.diff as diff from fun import ( tree_entries_from_data, tree_to_stream ) -from gitdb.util import ( - to_bin_sha, +from git.util import ( + to_bin_sha, + join_path ) +import util __all__ = ("TreeModifier", "Tree") @@ -100,7 +102,7 @@ class TreeModifier(object): #} END mutators -class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): +class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable, RepoAliasMixin): """Tree objects represent an ordered list of Blobs and other Trees. ``Tree as a list``:: @@ -112,7 +114,9 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): blob = tree[0] """ - type = "tree" + type = ObjectType.tree + type_id = ObjectType.tree_id + __slots__ = "_cache" # actual integer ids for comparison @@ -121,6 +125,9 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): symlink_id = 012 tree_id = 004 + #{ Configuration + + # override in subclass if you would like your own types to be instantiated instead _map_id_to_type = { commit_id : Submodule, blob_id : Blob, @@ -128,6 +135,8 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): # tree id added once Tree is defined } + #} end configuration + def __init__(self, repo, binsha, mode=tree_id<<12, path=None): super(Tree, self).__init__(repo, binsha, mode, path) @@ -141,7 +150,7 @@ class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): def _set_cache_(self, attr): if attr == "_cache": # Set the data when we need it - ostream = self.repo.odb.stream(self.binsha) + ostream = self.odb.stream(self.binsha) self._cache = tree_entries_from_data(ostream.read()) else: super(Tree, self)._set_cache_(attr) diff --git a/git/objects/util.py b/git/objects/util.py index 4c9323b8..8ac590f2 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -20,6 +20,7 @@ __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 'verify_utctz', 'Actor') + #{ Functions def mode_str_to_int(modestr): diff --git a/git/odict.py b/git/odict.py index 2c8391d7..80f6965f 100644 --- a/git/odict.py +++ b/git/odict.py @@ -16,16 +16,11 @@ """A dict that keeps keys in insertion order""" from __future__ import generators - __author__ = ('Nicola Larosa <nico-NoSp@m-tekNico.net>,' 'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>') - __docformat__ = "restructuredtext en" - __revision__ = '$Id: odict.py 129 2005-09-12 18:15:28Z teknico $' - __version__ = '0.2.2' - __all__ = ['OrderedDict', 'SequenceOrderedDict'] import sys diff --git a/git/pack.py b/git/pack.py new file mode 100644 index 00000000..62e9ae03 --- /dev/null +++ b/git/pack.py @@ -0,0 +1,1005 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains PackIndexFile and PackFile implementations""" +from git.exc import ( + BadObject, + UnsupportedOperation, + ParseError + ) +from util import ( + zlib, + LazyMixin, + unpack_from, + bin_to_hex, + file_contents_ro_filepath, + ) + +from fun import ( + create_pack_object_header, + pack_object_header_info, + is_equal_canonical_sha, + type_id_to_type_map, + write_object, + stream_copy, + chunk_size, + delta_types, + OFS_DELTA, + REF_DELTA, + msb_size + ) + +try: + from _perf import PackIndexFile_sha_to_index +except ImportError: + pass +# END try c module + +from base import ( # Amazing ! + OInfo, + OStream, + OPackInfo, + OPackStream, + ODeltaStream, + ODeltaPackInfo, + ODeltaPackStream, + ) +from stream import ( + DecompressMemMapReader, + DeltaApplyReader, + Sha1Writer, + NullStream, + FlexibleSha1Writer + ) + +from struct import ( + pack, + unpack, + ) + +from binascii import crc32 + +from itertools import izip +import tempfile +import array +import os +import sys + +__all__ = ('PackIndexFile', 'PackFile', 'PackEntity') + + + + +#{ Utilities + +def pack_object_at(data, offset, as_stream): + """ + :return: Tuple(abs_data_offset, PackInfo|PackStream) + an object of the correct type according to the type_id of the object. + If as_stream is True, the object will contain a stream, allowing the + data to be read decompressed. + :param data: random accessable data containing all required information + :parma offset: offset in to the data at which the object information is located + :param as_stream: if True, a stream object will be returned that can read + the data, otherwise you receive an info object only""" + data = buffer(data, offset) + type_id, uncomp_size, data_rela_offset = pack_object_header_info(data) + total_rela_offset = None # set later, actual offset until data stream begins + delta_info = None + + # OFFSET DELTA + if type_id == OFS_DELTA: + i = data_rela_offset + c = ord(data[i]) + i += 1 + delta_offset = c & 0x7f + while c & 0x80: + c = ord(data[i]) + i += 1 + delta_offset += 1 + delta_offset = (delta_offset << 7) + (c & 0x7f) + # END character loop + delta_info = delta_offset + total_rela_offset = i + # REF DELTA + elif type_id == REF_DELTA: + total_rela_offset = data_rela_offset+20 + delta_info = data[data_rela_offset:total_rela_offset] + # BASE OBJECT + else: + # assume its a base object + total_rela_offset = data_rela_offset + # END handle type id + + abs_data_offset = offset + total_rela_offset + if as_stream: + stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size) + if delta_info is None: + return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream) + else: + return abs_data_offset, ODeltaPackStream(offset, type_id, uncomp_size, delta_info, stream) + else: + if delta_info is None: + return abs_data_offset, OPackInfo(offset, type_id, uncomp_size) + else: + return abs_data_offset, ODeltaPackInfo(offset, type_id, uncomp_size, delta_info) + # END handle info + # END handle stream + +def write_stream_to_pack(read, write, zstream, base_crc=None): + """Copy a stream as read from read function, zip it, and write the result. + Count the number of written bytes and return it + :param base_crc: if not None, the crc will be the base for all compressed data + we consecutively write and generate a crc32 from. If None, no crc will be generated + :return: tuple(no bytes read, no bytes written, crc32) crc might be 0 if base_crc + was false""" + br = 0 # bytes read + bw = 0 # bytes written + want_crc = base_crc is not None + crc = 0 + if want_crc: + crc = base_crc + #END initialize crc + + while True: + chunk = read(chunk_size) + br += len(chunk) + compressed = zstream.compress(chunk) + bw += len(compressed) + write(compressed) # cannot assume return value + + if want_crc: + crc = crc32(compressed, crc) + #END handle crc + + if len(chunk) != chunk_size: + break + #END copy loop + + compressed = zstream.flush() + bw += len(compressed) + write(compressed) + if want_crc: + crc = crc32(compressed, crc) + #END handle crc + + return (br, bw, crc) + + +#} END utilities + + +class IndexWriter(object): + """Utility to cache index information, allowing to write all information later + in one go to the given stream + :note: currently only writes v2 indices""" + __slots__ = '_objs' + + def __init__(self): + self._objs = list() + + def append(self, binsha, crc, offset): + """Append one piece of object information""" + self._objs.append((binsha, crc, offset)) + + def write(self, pack_sha, write): + """Write the index file using the given write method + :param pack_sha: binary sha over the whole pack that we index + :return: sha1 binary sha over all index file contents""" + # sort for sha1 hash + self._objs.sort(key=lambda o: o[0]) + + sha_writer = FlexibleSha1Writer(write) + sha_write = sha_writer.write + sha_write(PackIndexFile.index_v2_signature) + sha_write(pack(">L", PackIndexFile.index_version_default)) + + # fanout + tmplist = list((0,)*256) # fanout or list with 64 bit offsets + for t in self._objs: + tmplist[ord(t[0][0])] += 1 + #END prepare fanout + for i in xrange(255): + v = tmplist[i] + sha_write(pack('>L', v)) + tmplist[i+1] += v + #END write each fanout entry + sha_write(pack('>L', tmplist[255])) + + # sha1 ordered + # save calls, that is push them into c + sha_write(''.join(t[0] for t in self._objs)) + + # crc32 + for t in self._objs: + sha_write(pack('>L', t[1]&0xffffffff)) + #END for each crc + + tmplist = list() + # offset 32 + for t in self._objs: + ofs = t[2] + if ofs > 0x7fffffff: + tmplist.append(ofs) + ofs = 0x80000000 + len(tmplist)-1 + #END hande 64 bit offsets + sha_write(pack('>L', ofs&0xffffffff)) + #END for each offset + + # offset 64 + for ofs in tmplist: + sha_write(pack(">Q", ofs)) + #END for each offset + + # trailer + assert(len(pack_sha) == 20) + sha_write(pack_sha) + sha = sha_writer.sha(as_hex=False) + write(sha) + return sha + + + +class PackIndexFile(LazyMixin): + """A pack index provides offsets into the corresponding pack, allowing to find + locations for offsets faster.""" + + # Dont use slots as we dynamically bind functions for each version, need a dict for this + # The slots you see here are just to keep track of our instance variables + # __slots__ = ('_indexpath', '_fanout_table', '_data', '_version', + # '_sha_list_offset', '_crc_list_offset', '_pack_offset', '_pack_64_offset') + + # used in v2 indices + _sha_list_offset = 8 + 1024 + index_v2_signature = '\377tOc' + index_version_default = 2 + + def __init__(self, indexpath): + super(PackIndexFile, self).__init__() + self._indexpath = indexpath + + def _set_cache_(self, attr): + if attr == "_packfile_checksum": + self._packfile_checksum = self._data[-40:-20] + elif attr == "_packfile_checksum": + self._packfile_checksum = self._data[-20:] + elif attr == "_data": + # Note: We don't lock the file when reading as we cannot be sure + # that we can actually write to the location - it could be a read-only + # alternate for instance + self._data = file_contents_ro_filepath(self._indexpath) + else: + # now its time to initialize everything - if we are here, someone wants + # to access the fanout table or related properties + + # CHECK VERSION + self._version = (self._data[:4] == self.index_v2_signature and 2) or 1 + if self._version == 2: + version_id = unpack_from(">L", self._data, 4)[0] + assert version_id == self._version, "Unsupported index version: %i" % version_id + # END assert version + + # SETUP FUNCTIONS + # setup our functions according to the actual version + for fname in ('entry', 'offset', 'sha', 'crc'): + setattr(self, fname, getattr(self, "_%s_v%i" % (fname, self._version))) + # END for each function to initialize + + + # INITIALIZE DATA + # byte offset is 8 if version is 2, 0 otherwise + self._initialize() + # END handle attributes + + + #{ Access V1 + + def _entry_v1(self, i): + """:return: tuple(offset, binsha, 0)""" + return unpack_from(">L20s", self._data, 1024 + i*24) + (0, ) + + def _offset_v1(self, i): + """see ``_offset_v2``""" + return unpack_from(">L", self._data, 1024 + i*24)[0] + + def _sha_v1(self, i): + """see ``_sha_v2``""" + base = 1024 + (i*24)+4 + return self._data[base:base+20] + + def _crc_v1(self, i): + """unsupported""" + return 0 + + #} END access V1 + + #{ Access V2 + def _entry_v2(self, i): + """:return: tuple(offset, binsha, crc)""" + return (self._offset_v2(i), self._sha_v2(i), self._crc_v2(i)) + + def _offset_v2(self, i): + """:return: 32 or 64 byte offset into pack files. 64 byte offsets will only + be returned if the pack is larger than 4 GiB, or 2^32""" + offset = unpack_from(">L", self._data, self._pack_offset + i * 4)[0] + + # if the high-bit is set, this indicates that we have to lookup the offset + # in the 64 bit region of the file. The current offset ( lower 31 bits ) + # are the index into it + if offset & 0x80000000: + offset = unpack_from(">Q", self._data, self._pack_64_offset + (offset & ~0x80000000) * 8)[0] + # END handle 64 bit offset + + return offset + + def _sha_v2(self, i): + """:return: sha at the given index of this file index instance""" + base = self._sha_list_offset + i * 20 + return self._data[base:base+20] + + def _crc_v2(self, i): + """:return: 4 bytes crc for the object at index i""" + return unpack_from(">L", self._data, self._crc_list_offset + i * 4)[0] + + #} END access V2 + + #{ Initialization + + def _initialize(self): + """initialize base data""" + self._fanout_table = self._read_fanout((self._version == 2) * 8) + + if self._version == 2: + self._crc_list_offset = self._sha_list_offset + self.size() * 20 + self._pack_offset = self._crc_list_offset + self.size() * 4 + self._pack_64_offset = self._pack_offset + self.size() * 4 + # END setup base + + def _read_fanout(self, byte_offset): + """Generate a fanout table from our data""" + d = self._data + out = list() + append = out.append + for i in range(256): + append(unpack_from('>L', d, byte_offset + i*4)[0]) + # END for each entry + return out + + #} END initialization + + #{ Properties + def version(self): + return self._version + + def size(self): + """:return: amount of objects referred to by this index""" + return self._fanout_table[255] + + def path(self): + """:return: path to the packindexfile""" + return self._indexpath + + def packfile_checksum(self): + """:return: 20 byte sha representing the sha1 hash of the pack file""" + return self._data[-40:-20] + + def indexfile_checksum(self): + """:return: 20 byte sha representing the sha1 hash of this index file""" + return self._data[-20:] + + def offsets(self): + """:return: sequence of all offsets in the order in which they were written + :note: return value can be random accessed, but may be immmutable""" + if self._version == 2: + # read stream to array, convert to tuple + a = array.array('I') # 4 byte unsigned int, long are 8 byte on 64 bit it appears + a.fromstring(buffer(self._data, self._pack_offset, self._pack_64_offset - self._pack_offset)) + + # networkbyteorder to something array likes more + if sys.byteorder == 'little': + a.byteswap() + return a + else: + return tuple(self.offset(index) for index in xrange(self.size())) + # END handle version + + def sha_to_index(self, sha): + """ + :return: index usable with the ``offset`` or ``entry`` method, or None + if the sha was not found in this pack index + :param sha: 20 byte sha to lookup""" + first_byte = ord(sha[0]) + get_sha = self.sha + lo = 0 # lower index, the left bound of the bisection + if first_byte != 0: + lo = self._fanout_table[first_byte-1] + hi = self._fanout_table[first_byte] # the upper, right bound of the bisection + + # bisect until we have the sha + while lo < hi: + mid = (lo + hi) / 2 + c = cmp(sha, get_sha(mid)) + if c < 0: + hi = mid + elif not c: + return mid + else: + lo = mid + 1 + # END handle midpoint + # END bisect + return None + + def partial_sha_to_index(self, partial_bin_sha, canonical_length): + """ + :return: index as in `sha_to_index` or None if the sha was not found in this + index file + :param partial_bin_sha: an at least two bytes of a partial binary sha + :param canonical_length: lenght of the original hexadecimal representation of the + given partial binary sha + :raise AmbiguousObjectName:""" + if len(partial_bin_sha) < 2: + raise ValueError("Require at least 2 bytes of partial sha") + + first_byte = ord(partial_bin_sha[0]) + get_sha = self.sha + lo = 0 # lower index, the left bound of the bisection + if first_byte != 0: + lo = self._fanout_table[first_byte-1] + hi = self._fanout_table[first_byte] # the upper, right bound of the bisection + + # fill the partial to full 20 bytes + filled_sha = partial_bin_sha + '\0'*(20 - len(partial_bin_sha)) + + # find lowest + while lo < hi: + mid = (lo + hi) / 2 + c = cmp(filled_sha, get_sha(mid)) + if c < 0: + hi = mid + elif not c: + # perfect match + lo = mid + break + else: + lo = mid + 1 + # END handle midpoint + # END bisect + + if lo < self.size(): + cur_sha = get_sha(lo) + if is_equal_canonical_sha(canonical_length, partial_bin_sha, cur_sha): + next_sha = None + if lo+1 < self.size(): + next_sha = get_sha(lo+1) + if next_sha and next_sha == cur_sha: + raise AmbiguousObjectName(partial_bin_sha) + return lo + # END if we have a match + # END if we found something + return None + + if 'PackIndexFile_sha_to_index' in globals(): + # NOTE: Its just about 25% faster, the major bottleneck might be the attr + # accesses + def sha_to_index(self, sha): + return PackIndexFile_sha_to_index(self, sha) + # END redefine heavy-hitter with c version + + #} END properties + + +class PackFile(LazyMixin): + """A pack is a file written according to the Version 2 for git packs + + As we currently use memory maps, it could be assumed that the maximum size of + packs therefor is 32 bit on 32 bit systems. On 64 bit systems, this should be + fine though. + + :note: at some point, this might be implemented using streams as well, or + streams are an alternate path in the case memory maps cannot be created + for some reason - one clearly doesn't want to read 10GB at once in that + case""" + + __slots__ = ('_packpath', '_data', '_size', '_version') + pack_signature = 0x5041434b # 'PACK' + pack_version_default = 2 + + # offset into our data at which the first object starts + first_object_offset = 3*4 # header bytes + footer_size = 20 # final sha + + def __init__(self, packpath): + self._packpath = packpath + + def _set_cache_(self, attr): + if attr == '_data': + self._data = file_contents_ro_filepath(self._packpath) + + # read the header information + type_id, self._version, self._size = unpack_from(">LLL", self._data, 0) + + # TODO: figure out whether we should better keep the lock, or maybe + # add a .keep file instead ? + else: # must be '_size' or '_version' + # read header info - we do that just with a file stream + type_id, self._version, self._size = unpack(">LLL", open(self._packpath).read(12)) + # END handle header + + if type_id != self.pack_signature: + raise ParseError("Invalid pack signature: %i" % type_id) + #END assert type id + + def _iter_objects(self, start_offset, as_stream=True): + """Handle the actual iteration of objects within this pack""" + data = self._data + content_size = len(data) - self.footer_size + cur_offset = start_offset or self.first_object_offset + + null = NullStream() + while cur_offset < content_size: + data_offset, ostream = pack_object_at(data, cur_offset, True) + # scrub the stream to the end - this decompresses the object, but yields + # the amount of compressed bytes we need to get to the next offset + + stream_copy(ostream.read, null.write, ostream.size, chunk_size) + cur_offset += (data_offset - ostream.pack_offset) + ostream.stream.compressed_bytes_read() + + + # if a stream is requested, reset it beforehand + # Otherwise return the Stream object directly, its derived from the + # info object + if as_stream: + ostream.stream.seek(0) + yield ostream + # END until we have read everything + + #{ Pack Information + + def size(self): + """:return: The amount of objects stored in this pack""" + return self._size + + def version(self): + """:return: the version of this pack""" + return self._version + + def data(self): + """ + :return: read-only data of this pack. It provides random access and usually + is a memory map""" + return self._data + + def checksum(self): + """:return: 20 byte sha1 hash on all object sha's contained in this file""" + return self._data[-20:] + + def path(self): + """:return: path to the packfile""" + return self._packpath + #} END pack information + + #{ Pack Specific + + def collect_streams(self, offset): + """ + :return: list of pack streams which are required to build the object + at the given offset. The first entry of the list is the object at offset, + the last one is either a full object, or a REF_Delta stream. The latter + type needs its reference object to be locked up in an ODB to form a valid + delta chain. + If the object at offset is no delta, the size of the list is 1. + :param offset: specifies the first byte of the object within this pack""" + out = list() + while True: + ostream = pack_object_at(self._data, offset, True)[1] + out.append(ostream) + if ostream.type_id == OFS_DELTA: + offset = ostream.pack_offset - ostream.delta_info + else: + # the only thing we can lookup are OFFSET deltas. Everything + # else is either an object, or a ref delta, in the latter + # case someone else has to find it + break + # END handle type + # END while chaining streams + return out + + #} END pack specific + + #{ Read-Database like Interface + + def info(self, offset): + """Retrieve information about the object at the given file-absolute offset + + :param offset: byte offset + :return: OPackInfo instance, the actual type differs depending on the type_id attribute""" + return pack_object_at(self._data, offset or self.first_object_offset, False)[1] + + def stream(self, offset): + """Retrieve an object at the given file-relative offset as stream along with its information + + :param offset: byte offset + :return: OPackStream instance, the actual type differs depending on the type_id attribute""" + return pack_object_at(self._data, offset or self.first_object_offset, True)[1] + + def stream_iter(self, start_offset=0): + """ + :return: iterator yielding OPackStream compatible instances, allowing + to access the data in the pack directly. + :param start_offset: offset to the first object to iterate. If 0, iteration + starts at the very first object in the pack. + :note: Iterating a pack directly is costly as the datastream has to be decompressed + to determine the bounds between the objects""" + return self._iter_objects(start_offset, as_stream=True) + + #} END Read-Database like Interface + + +class PackEntity(LazyMixin): + """Combines the PackIndexFile and the PackFile into one, allowing the + actual objects to be resolved and iterated""" + + __slots__ = ( '_index', # our index file + '_pack', # our pack file + '_offset_map' # on demand dict mapping one offset to the next consecutive one + ) + + IndexFileCls = PackIndexFile + PackFileCls = PackFile + + def __init__(self, pack_or_index_path): + """Initialize ourselves with the path to the respective pack or index file""" + basename, ext = os.path.splitext(pack_or_index_path) + self._index = self.IndexFileCls("%s.idx" % basename) # PackIndexFile instance + self._pack = self.PackFileCls("%s.pack" % basename) # corresponding PackFile instance + + def _set_cache_(self, attr): + # currently this can only be _offset_map + # TODO: make this a simple sorted offset array which can be bisected + # to find the respective entry, from which we can take a +1 easily + # This might be slower, but should also be much lighter in memory ! + offsets_sorted = sorted(self._index.offsets()) + last_offset = len(self._pack.data()) - self._pack.footer_size + assert offsets_sorted, "Cannot handle empty indices" + + offset_map = None + if len(offsets_sorted) == 1: + offset_map = { offsets_sorted[0] : last_offset } + else: + iter_offsets = iter(offsets_sorted) + iter_offsets_plus_one = iter(offsets_sorted) + iter_offsets_plus_one.next() + consecutive = izip(iter_offsets, iter_offsets_plus_one) + + offset_map = dict(consecutive) + + # the last offset is not yet set + offset_map[offsets_sorted[-1]] = last_offset + # END handle offset amount + self._offset_map = offset_map + + def _sha_to_index(self, sha): + """:return: index for the given sha, or raise""" + index = self._index.sha_to_index(sha) + if index is None: + raise BadObject(sha) + return index + + def _iter_objects(self, as_stream): + """Iterate over all objects in our index and yield their OInfo or OStream instences""" + _sha = self._index.sha + _object = self._object + for index in xrange(self._index.size()): + yield _object(_sha(index), as_stream, index) + # END for each index + + def _object(self, sha, as_stream, index=-1): + """:return: OInfo or OStream object providing information about the given sha + :param index: if not -1, its assumed to be the sha's index in the IndexFile""" + # its a little bit redundant here, but it needs to be efficient + if index < 0: + index = self._sha_to_index(sha) + if sha is None: + sha = self._index.sha(index) + # END assure sha is present ( in output ) + offset = self._index.offset(index) + type_id, uncomp_size, data_rela_offset = pack_object_header_info(buffer(self._pack._data, offset)) + if as_stream: + if type_id not in delta_types: + packstream = self._pack.stream(offset) + return OStream(sha, packstream.type, packstream.size, packstream.stream) + # END handle non-deltas + + # produce a delta stream containing all info + # To prevent it from applying the deltas when querying the size, + # we extract it from the delta stream ourselves + streams = self.collect_streams_at_offset(offset) + dstream = DeltaApplyReader.new(streams) + + return ODeltaStream(sha, dstream.type, None, dstream) + else: + if type_id not in delta_types: + return OInfo(sha, type_id_to_type_map[type_id], uncomp_size) + # END handle non-deltas + + # deltas are a little tougher - unpack the first bytes to obtain + # the actual target size, as opposed to the size of the delta data + streams = self.collect_streams_at_offset(offset) + buf = streams[0].read(512) + offset, src_size = msb_size(buf) + offset, target_size = msb_size(buf, offset) + + # collect the streams to obtain the actual object type + if streams[-1].type_id in delta_types: + raise BadObject(sha, "Could not resolve delta object") + return OInfo(sha, streams[-1].type, target_size) + # END handle stream + + #{ Read-Database like Interface + + def info(self, sha): + """Retrieve information about the object identified by the given sha + + :param sha: 20 byte sha1 + :raise BadObject: + :return: OInfo instance, with 20 byte sha""" + return self._object(sha, False) + + def stream(self, sha): + """Retrieve an object stream along with its information as identified by the given sha + + :param sha: 20 byte sha1 + :raise BadObject: + :return: OStream instance, with 20 byte sha""" + return self._object(sha, True) + + def info_at_index(self, index): + """As ``info``, but uses a PackIndexFile compatible index to refer to the object""" + return self._object(None, False, index) + + def stream_at_index(self, index): + """As ``stream``, but uses a PackIndexFile compatible index to refer to the + object""" + return self._object(None, True, index) + + #} END Read-Database like Interface + + #{ Interface + + def pack(self): + """:return: the underlying pack file instance""" + return self._pack + + def index(self): + """:return: the underlying pack index file instance""" + return self._index + + def is_valid_stream(self, sha, use_crc=False): + """ + Verify that the stream at the given sha is valid. + + :param use_crc: if True, the index' crc is run over the compressed stream of + the object, which is much faster than checking the sha1. It is also + more prone to unnoticed corruption or manipulation. + :param sha: 20 byte sha1 of the object whose stream to verify + whether the compressed stream of the object is valid. If it is + a delta, this only verifies that the delta's data is valid, not the + data of the actual undeltified object, as it depends on more than + just this stream. + If False, the object will be decompressed and the sha generated. It must + match the given sha + + :return: True if the stream is valid + :raise UnsupportedOperation: If the index is version 1 only + :raise BadObject: sha was not found""" + if use_crc: + if self._index.version() < 2: + raise UnsupportedOperation("Version 1 indices do not contain crc's, verify by sha instead") + # END handle index version + + index = self._sha_to_index(sha) + offset = self._index.offset(index) + next_offset = self._offset_map[offset] + crc_value = self._index.crc(index) + + # create the current crc value, on the compressed object data + # Read it in chunks, without copying the data + crc_update = zlib.crc32 + pack_data = self._pack.data() + cur_pos = offset + this_crc_value = 0 + while cur_pos < next_offset: + rbound = min(cur_pos + chunk_size, next_offset) + size = rbound - cur_pos + this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value) + cur_pos += size + # END window size loop + + # crc returns signed 32 bit numbers, the AND op forces it into unsigned + # mode ... wow, sneaky, from dulwich. + return (this_crc_value & 0xffffffff) == crc_value + else: + shawriter = Sha1Writer() + stream = self._object(sha, as_stream=True) + # write a loose object, which is the basis for the sha + write_object(stream.type, stream.size, stream.read, shawriter.write) + + assert shawriter.sha(as_hex=False) == sha + return shawriter.sha(as_hex=False) == sha + # END handle crc/sha verification + return True + + def info_iter(self): + """ + :return: Iterator over all objects in this pack. The iterator yields + OInfo instances""" + return self._iter_objects(as_stream=False) + + def stream_iter(self): + """ + :return: iterator over all objects in this pack. The iterator yields + OStream instances""" + return self._iter_objects(as_stream=True) + + def collect_streams_at_offset(self, offset): + """ + As the version in the PackFile, but can resolve REF deltas within this pack + For more info, see ``collect_streams`` + + :param offset: offset into the pack file at which the object can be found""" + streams = self._pack.collect_streams(offset) + + # try to resolve the last one if needed. It is assumed to be either + # a REF delta, or a base object, as OFFSET deltas are resolved by the pack + if streams[-1].type_id == REF_DELTA: + stream = streams[-1] + while stream.type_id in delta_types: + if stream.type_id == REF_DELTA: + sindex = self._index.sha_to_index(stream.delta_info) + if sindex is None: + break + stream = self._pack.stream(self._index.offset(sindex)) + streams.append(stream) + else: + # must be another OFS DELTA - this could happen if a REF + # delta we resolve previously points to an OFS delta. Who + # would do that ;) ? We can handle it though + stream = self._pack.stream(stream.delta_info) + streams.append(stream) + # END handle ref delta + # END resolve ref streams + # END resolve streams + + return streams + + def collect_streams(self, sha): + """ + As ``PackFile.collect_streams``, but takes a sha instead of an offset. + Additionally, ref_delta streams will be resolved within this pack. + If this is not possible, the stream will be left alone, hence it is adivsed + to check for unresolved ref-deltas and resolve them before attempting to + construct a delta stream. + + :param sha: 20 byte sha1 specifying the object whose related streams you want to collect + :return: list of streams, first being the actual object delta, the last being + a possibly unresolved base object. + :raise BadObject:""" + return self.collect_streams_at_offset(self._index.offset(self._sha_to_index(sha))) + + + @classmethod + def write_pack(cls, object_iter, pack_write, index_write=None, + object_count = None, zlib_compression = zlib.Z_BEST_SPEED): + """ + Create a new pack by putting all objects obtained by the object_iterator + into a pack which is written using the pack_write method. + The respective index is produced as well if index_write is not Non. + + :param object_iter: iterator yielding odb output objects + :param pack_write: function to receive strings to write into the pack stream + :param indx_write: if not None, the function writes the index file corresponding + to the pack. + :param object_count: if you can provide the amount of objects in your iteration, + this would be the place to put it. Otherwise we have to pre-iterate and store + all items into a list to get the number, which uses more memory than necessary. + :param zlib_compression: the zlib compression level to use + :return: tuple(pack_sha, index_binsha) binary sha over all the contents of the pack + and over all contents of the index. If index_write was None, index_binsha will be None + :note: The destination of the write functions is up to the user. It could + be a socket, or a file for instance + :note: writes only undeltified objects""" + objs = object_iter + if not object_count: + if not isinstance(object_iter, (tuple, list)): + objs = list(object_iter) + #END handle list type + object_count = len(objs) + #END handle object + + pack_writer = FlexibleSha1Writer(pack_write) + pwrite = pack_writer.write + ofs = 0 # current offset into the pack file + index = None + wants_index = index_write is not None + + # write header + pwrite(pack('>LLL', PackFile.pack_signature, PackFile.pack_version_default, object_count)) + ofs += 12 + + if wants_index: + index = IndexWriter() + #END handle index header + + actual_count = 0 + for obj in objs: + actual_count += 1 + crc = 0 + + # object header + hdr = create_pack_object_header(obj.type_id, obj.size) + if index_write: + crc = crc32(hdr) + else: + crc = None + #END handle crc + pwrite(hdr) + + # data stream + zstream = zlib.compressobj(zlib_compression) + ostream = obj.stream + br, bw, crc = write_stream_to_pack(ostream.read, pwrite, zstream, base_crc = crc) + assert(br == obj.size) + if wants_index: + index.append(obj.binsha, crc, ofs) + #END handle index + + ofs += len(hdr) + bw + if actual_count == object_count: + break + #END abort once we are done + #END for each object + + if actual_count != object_count: + raise ValueError("Expected to write %i objects into pack, but received only %i from iterators" % (object_count, actual_count)) + #END count assertion + + # write footer + pack_sha = pack_writer.sha(as_hex = False) + assert len(pack_sha) == 20 + pack_write(pack_sha) + ofs += len(pack_sha) # just for completeness ;) + + index_sha = None + if wants_index: + index_sha = index.write(pack_sha, index_write) + #END handle index + + return pack_sha, index_sha + + @classmethod + def create(cls, object_iter, base_dir, object_count = None, zlib_compression = zlib.Z_BEST_SPEED): + """Create a new on-disk entity comprised of a properly named pack file and a properly named + and corresponding index file. The pack contains all OStream objects contained in object iter. + :param base_dir: directory which is to contain the files + :return: PackEntity instance initialized with the new pack + :note: for more information on the other parameters see the write_pack method""" + pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir) + index_fd, index_path = tempfile.mkstemp('', 'index', base_dir) + pack_write = lambda d: os.write(pack_fd, d) + index_write = lambda d: os.write(index_fd, d) + + pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression) + os.close(pack_fd) + os.close(index_fd) + + fmt = "pack-%s.%s" + new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack')) + new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx')) + os.rename(pack_path, new_pack_path) + os.rename(index_path, new_index_path) + + return cls(new_pack_path) + + + #} END interface diff --git a/git/refs/__init__.py b/git/refs/__init__.py index fc8ce644..35b69fca 100644 --- a/git/refs/__init__.py +++ b/git/refs/__init__.py @@ -2,19 +2,20 @@ # import all modules in order, fix the names they require from symbolic import * from reference import * +from headref import * from head import * from tag import * from remote import * # name fixes -import head -head.RemoteReference = RemoteReference -del(head) +import headref +headref.Head.RemoteReferenceCls = RemoteReference +del(headref) import symbolic -for item in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): - setattr(symbolic, item.__name__, item) +for item in (HEAD, Head, RemoteReference, TagReference, Reference): + setattr(symbolic.SymbolicReference, item.__name__+'Cls', item) del(symbolic) diff --git a/git/refs/head.py b/git/refs/head.py index d8729434..4345528b 100644 --- a/git/refs/head.py +++ b/git/refs/head.py @@ -1,19 +1,14 @@ -from symbolic import SymbolicReference -from reference import Reference - -from git.config import SectionConstraint - -from git.util import join_path +from symbolic import SymbolicReference from git.exc import GitCommandError -__all__ = ["HEAD", "Head"] - +__all__ = ["HEAD"] class HEAD(SymbolicReference): - """Special case of a Symbolic Reference as it represents the repository's - HEAD reference.""" + """Provides additional functionality using the git command""" + __slots__ = tuple() + _HEAD_NAME = 'HEAD' _ORIG_HEAD_NAME = 'ORIG_HEAD' __slots__ = tuple() @@ -90,157 +85,3 @@ class HEAD(SymbolicReference): return self - -class Head(Reference): - """A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. - - Examples:: - - >>> repo = Repo("/path/to/repo") - >>> head = repo.heads[0] - - >>> head.name - 'master' - - >>> head.commit - <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> - - >>> head.commit.hexsha - '1c09f116cbc2cb4100fb6935bb162daa4723f455'""" - _common_path_default = "refs/heads" - k_config_remote = "remote" - k_config_remote_ref = "merge" # branch to merge from remote - - @classmethod - def delete(cls, repo, *heads, **kwargs): - """Delete the given heads - :param force: - If True, the heads will be deleted even if they are not yet merged into - the main development stream. - Default False""" - force = kwargs.get("force", False) - flag = "-d" - if force: - flag = "-D" - repo.git.branch(flag, *heads) - - def set_tracking_branch(self, remote_reference): - """ - Configure this branch to track the given remote reference. This will alter - this branch's configuration accordingly. - - :param remote_reference: The remote reference to track or None to untrack - any references - :return: self""" - if remote_reference is not None and not isinstance(remote_reference, RemoteReference): - raise ValueError("Incorrect parameter type: %r" % remote_reference) - # END handle type - - writer = self.config_writer() - if remote_reference is None: - writer.remove_option(self.k_config_remote) - writer.remove_option(self.k_config_remote_ref) - if len(writer.options()) == 0: - writer.remove_section() - # END handle remove section - else: - writer.set_value(self.k_config_remote, remote_reference.remote_name) - writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head)) - # END handle ref value - - return self - - - def tracking_branch(self): - """ - :return: The remote_reference we are tracking, or None if we are - not a tracking branch""" - reader = self.config_reader() - if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref): - ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref))) - remote_refpath = RemoteReference.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name)) - return RemoteReference(self.repo, remote_refpath) - # END handle have tracking branch - - # we are not a tracking branch - return None - - def rename(self, new_path, force=False): - """Rename self to a new path - - :param new_path: - Either a simple name or a path, i.e. new_name or features/new_name. - The prefix refs/heads is implied - - :param force: - If True, the rename will succeed even if a head with the target name - already exists. - - :return: self - :note: respects the ref log as git commands are used""" - flag = "-m" - if force: - flag = "-M" - - self.repo.git.branch(flag, self, new_path) - self.path = "%s/%s" % (self._common_path_default, new_path) - return self - - def checkout(self, force=False, **kwargs): - """Checkout this head by setting the HEAD to this reference, by updating the index - to reflect the tree we point to and by updating the working tree to reflect - the latest index. - - The command will fail if changed working tree files would be overwritten. - - :param force: - If True, changes to the index and the working tree will be discarded. - If False, GitCommandError will be raised in that situation. - - :param kwargs: - Additional keyword arguments to be passed to git checkout, i.e. - b='new_branch' to create a new branch at the given spot. - - :return: - The active branch after the checkout operation, usually self unless - a new branch has been created. - - :note: - By default it is only allowed to checkout heads - everything else - will leave the HEAD detached which is allowed and possible, but remains - a special state that some tools might not be able to handle.""" - args = list() - kwargs['f'] = force - if kwargs['f'] == False: - kwargs.pop('f') - - self.repo.git.checkout(self, **kwargs) - return self.repo.active_branch - - #{ Configruation - - def _config_parser(self, read_only): - if read_only: - parser = self.repo.config_reader() - else: - parser = self.repo.config_writer() - # END handle parser instance - - return SectionConstraint(parser, 'branch "%s"' % self.name) - - def config_reader(self): - """ - :return: A configuration parser instance constrained to only read - this instance's values""" - return self._config_parser(read_only=True) - - def config_writer(self): - """ - :return: A configuration writer instance with read-and write acccess - to options of this head""" - return self._config_parser(read_only=False) - - #} END configuration - - diff --git a/git/refs/headref.py b/git/refs/headref.py new file mode 100644 index 00000000..67117e96 --- /dev/null +++ b/git/refs/headref.py @@ -0,0 +1,170 @@ +from reference import Reference +from git.config import SectionConstraint +from git.util import join_path + +__all__ = ["Head"] + +class Head(Reference): + """The GitPyhton Head implementation provides more git-command based features + + A Head is a named reference to a Commit. Every Head instance contains a name + and a Commit object. + + Examples:: + + >>> repo = Repo("/path/to/repo") + >>> head = repo.heads[0] + + >>> head.name + 'master' + + >>> head.commit + <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> + + >>> head.commit.hexsha + '1c09f116cbc2cb4100fb6935bb162daa4723f455'""" + __slots__ = tuple() + + _common_path_default = "refs/heads" + k_config_remote = "remote" + k_config_remote_ref = "merge" # branch to merge from remote + + # will be set by init method ! + RemoteReferenceCls = None + + #{ Configuration + + def set_tracking_branch(self, remote_reference): + """ + Configure this branch to track the given remote reference. This will alter + this branch's configuration accordingly. + + :param remote_reference: The remote reference to track or None to untrack + any references + :return: self""" + if remote_reference is not None and not isinstance(remote_reference, self.RemoteReferenceCls): + raise ValueError("Incorrect parameter type: %r" % remote_reference) + # END handle type + + writer = self.config_writer() + if remote_reference is None: + writer.remove_option(self.k_config_remote) + writer.remove_option(self.k_config_remote_ref) + if len(writer.options()) == 0: + writer.remove_section() + # END handle remove section + else: + writer.set_value(self.k_config_remote, remote_reference.remote_name) + writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head)) + # END handle ref value + + return self + + def tracking_branch(self): + """ + :return: The remote_reference we are tracking, or None if we are + not a tracking branch""" + reader = self.config_reader() + if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref): + ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref))) + remote_refpath = self.RemoteReferenceCls.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name)) + return self.RemoteReferenceCls(self.repo, remote_refpath) + # END handle have tracking branch + + # we are not a tracking branch + return None + + + #{ Configruation + + def _config_parser(self, read_only): + if read_only: + parser = self.repo.config_reader() + else: + parser = self.repo.config_writer() + # END handle parser instance + + return SectionConstraint(parser, 'branch "%s"' % self.name) + + def config_reader(self): + """ + :return: A configuration parser instance constrained to only read + this instance's values""" + return self._config_parser(read_only=True) + + def config_writer(self): + """ + :return: A configuration writer instance with read-and write acccess + to options of this head""" + return self._config_parser(read_only=False) + + #} END configuration + + @classmethod + def delete(cls, repo, *heads, **kwargs): + """Delete the given heads + :param force: + If True, the heads will be deleted even if they are not yet merged into + the main development stream. + Default False""" + force = kwargs.get("force", False) + flag = "-d" + if force: + flag = "-D" + repo.git.branch(flag, *heads) + + + def rename(self, new_path, force=False): + """Rename self to a new path + + :param new_path: + Either a simple name or a path, i.e. new_name or features/new_name. + The prefix refs/heads is implied + + :param force: + If True, the rename will succeed even if a head with the target name + already exists. + + :return: self + :note: respects the ref log as git commands are used""" + flag = "-m" + if force: + flag = "-M" + + self.repo.git.branch(flag, self, new_path) + self.path = "%s/%s" % (self._common_path_default, new_path) + return self + + def checkout(self, force=False, **kwargs): + """Checkout this head by setting the HEAD to this reference, by updating the index + to reflect the tree we point to and by updating the working tree to reflect + the latest index. + + The command will fail if changed working tree files would be overwritten. + + :param force: + If True, changes to the index and the working tree will be discarded. + If False, GitCommandError will be raised in that situation. + + :param kwargs: + Additional keyword arguments to be passed to git checkout, i.e. + b='new_branch' to create a new branch at the given spot. + + :return: + The active branch after the checkout operation, usually self unless + a new branch has been created. + + :note: + By default it is only allowed to checkout heads - everything else + will leave the HEAD detached which is allowed and possible, but remains + a special state that some tools might not be able to handle.""" + args = list() + kwargs['f'] = force + if kwargs['f'] == False: + kwargs.pop('f') + + self.repo.git.checkout(self, **kwargs) + return self.repo.active_branch + + + diff --git a/git/refs/log.py b/git/refs/log.py index f49c07fd..3b9d8514 100644 --- a/git/refs/log.py +++ b/git/refs/log.py @@ -5,12 +5,9 @@ from git.util import ( LockFile, assure_directory_exists, to_native_path, - ) - -from gitdb.util import ( bin_to_hex, join, - file_contents_ro_filepath, + file_contents_ro_filepath ) from git.objects.util import ( diff --git a/git/refs/reference.py b/git/refs/reference.py index 1a745ee9..5cff74bb 100644 --- a/git/refs/reference.py +++ b/git/refs/reference.py @@ -1,12 +1,10 @@ -from symbolic import SymbolicReference import os -from git.objects import Object -from git.util import ( - LazyMixin, - Iterable, - ) -from gitdb.util import ( +from symbolic import SymbolicReference +from head import HEAD +from git.util import ( + LazyMixin, + Iterable, isfile, hex_to_bin ) @@ -30,7 +28,7 @@ class Reference(SymbolicReference, LazyMixin, Iterable): Path relative to the .git/ directory pointing to the ref in question, i.e. refs/heads/master""" if not path.startswith(self._common_path_default+'/'): - raise ValueError("Cannot instantiate %r from path %s" % ( self.__class__.__name__, path )) + raise ValueError("Cannot instantiate %r from path %s, maybe use %s.to_full_path(name) to safely generate a valid full path from a name" % ( self.__class__.__name__, path, type(self).__name__)) super(Reference, self).__init__(repo, path) @@ -40,8 +38,8 @@ class Reference(SymbolicReference, LazyMixin, Iterable): def set_object(self, object, logmsg = None): """Special version which checks if the head-log needs an update as well""" oldbinsha = None + head = HEAD(self.repo) if logmsg is not None: - head = self.repo.head if not head.is_detached and head.ref == self: oldbinsha = self.commit.binsha #END handle commit retrieval @@ -62,7 +60,7 @@ class Reference(SymbolicReference, LazyMixin, Iterable): # * check with HEAD only which should cover 99% of all usage # * scenarios (even 100% of the default ones). # */ - self.repo.head.log_append(oldbinsha, logmsg) + head.log_append(oldbinsha, logmsg) #END check if the head # NOTE: Don't have to overwrite properties as the will only work without a the log diff --git a/git/refs/remote.py b/git/refs/remote.py index b7b07d4b..f2dc72ee 100644 --- a/git/refs/remote.py +++ b/git/refs/remote.py @@ -1,15 +1,17 @@ -from head import Head -from git.util import join_path -from gitdb.util import join - import os - +from headref import Head +from git.util import ( + join, + join_path + ) __all__ = ["RemoteReference"] class RemoteReference(Head): """Represents a reference pointing to a remote head.""" + __slots__ = tuple() + _common_path_default = "refs/remotes" @@ -41,6 +43,11 @@ class RemoteReference(Head): return '/'.join(tokens[3:]) @classmethod + def create(cls, *args, **kwargs): + """Used to disable this method""" + raise TypeError("Cannot explicitly create remote references") + + @classmethod def delete(cls, repo, *refs, **kwargs): """Delete the given remote references. :note: @@ -56,8 +63,3 @@ class RemoteReference(Head): except OSError: pass # END for each ref - - @classmethod - def create(cls, *args, **kwargs): - """Used to disable this method""" - raise TypeError("Cannot explicitly create remote references") diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py index aec68750..ddee3809 100644 --- a/git/refs/symbolic.py +++ b/git/refs/symbolic.py @@ -1,24 +1,26 @@ import os -from git.objects import Object, Commit +import re + +from git.objects import ( + Object, + Commit + ) from git.util import ( join_path, join_path_native, to_native_path_linux, - assure_directory_exists + assure_directory_exists, + join, + dirname, + isdir, + exists, + isfile, + rename, + hex_to_bin, + LockedFD ) -from gitdb.exc import BadObject -from gitdb.util import ( - join, - dirname, - isdir, - exists, - isfile, - rename, - hex_to_bin, - LockedFD - ) - +from git.exc import BadObject from log import RefLog __all__ = ["SymbolicReference"] @@ -30,11 +32,27 @@ class SymbolicReference(object): A typical example for a symbolic reference is HEAD.""" __slots__ = ("repo", "path") + _resolve_ref_on_create = False _points_to_commits_only = True _common_path_default = "" _id_attribute_ = "name" + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + + #{ Configuration + # Object class to be used when instantiating objects + ObjectCls = Object + CommitCls = Commit + + # all of the following are set by the package initializer + HEADCls = None + HeadCls = None + RemoteReferenceCls = None + TagReferenceCls = None + ReferenceCls = None + #}END configuration + def __init__(self, repo, path): self.repo = repo self.path = path @@ -143,20 +161,53 @@ class SymbolicReference(object): return (None, tokens[1]) # its a commit - if repo.re_hexsha_only.match(tokens[0]): + if cls.re_hexsha_only.match(tokens[0]): return (tokens[0], None) raise ValueError("Failed to parse reference information from %r" % ref_path) - def _get_object(self): + def _get_object_sha(self): """ :return: - The object our ref currently refers to. Refs can be cached, they will + The binary sha to the object our ref currently refers to. Refs can be cached, they will always point to the actual object as it gets re-created on each query""" + return hex_to_bin(self.dereference_recursive(self.repo, self.path)) + + def _get_object(self): + """ + :return: + The object our ref currently refers to.""" # have to be dynamic here as we may be a tag which can point to anything # Our path will be resolved to the hexsha which will be used accordingly - return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path))) + return self.ObjectCls.new_from_sha(self.repo, self._get_object_sha()) + def set_object(self, object_id, logmsg = None): + """Set the object we point to, possibly dereference our symbolic reference first. + If the reference does not exist, it will be created + + :param object: a reference specifier string, a SymbolicReference or an object hex sha. + SymbolicReferences will be dereferenced beforehand to obtain the object they point to + :param logmsg: If not None, the message will be used in the reflog entry to be + written. Otherwise the reflog is not altered + :note: plain SymbolicReferences may not actually point to objects by convention + :return: self""" + if isinstance(object_id, SymbolicReference): + object = object.object + #END resolve references + + is_detached = True + try: + is_detached = self.is_detached + except ValueError: + pass + # END handle non-existing ones + + if is_detached: + return self.set_reference(object_id, logmsg) + + # set the commit on our reference + return self._get_reference().set_object(object_id, logmsg) + def _get_commit(self): """ :return: @@ -167,7 +218,7 @@ class SymbolicReference(object): obj = obj.object #END dereference tag - if obj.type != Commit.type: + if obj.type != self.CommitCls.type: raise TypeError("Symbolic Reference pointed to object %r, commit was required" % obj) #END handle type return obj @@ -179,20 +230,20 @@ class SymbolicReference(object): a commit :return: self""" # check the type - assume the best if it is a base-string - invalid_type = False - if isinstance(commit, Object): - invalid_type = commit.type != Commit.type + is_invalid_type = False + if isinstance(commit, self.ObjectCls): + is_invalid_type = commit.type != self.CommitCls.type elif isinstance(commit, SymbolicReference): - invalid_type = commit.object.type != Commit.type + is_invalid_type = commit.object.type != self.CommitCls.type else: try: - invalid_type = self.repo.rev_parse(commit).type != Commit.type + is_invalid_type = self.repo.resolve_object(commit).type != self.CommitCls.type except BadObject: raise ValueError("Invalid object: %s" % commit) #END handle exception # END verify type - if invalid_type: + if is_invalid_type: raise ValueError("Need commit, got %r" % commit) #END handle raise @@ -202,35 +253,9 @@ class SymbolicReference(object): return self - def set_object(self, object, logmsg = None): - """Set the object we point to, possibly dereference our symbolic reference first. - If the reference does not exist, it will be created - - :param object: a refspec, a SymbolicReference or an Object instance. SymbolicReferences - will be dereferenced beforehand to obtain the object they point to - :param logmsg: If not None, the message will be used in the reflog entry to be - written. Otherwise the reflog is not altered - :note: plain SymbolicReferences may not actually point to objects by convention - :return: self""" - if isinstance(object, SymbolicReference): - object = object.object - #END resolve references - - is_detached = True - try: - is_detached = self.is_detached - except ValueError: - pass - # END handle non-existing ones - - if is_detached: - return self.set_reference(object, logmsg) - - # set the commit on our reference - return self._get_reference().set_object(object, logmsg) - commit = property(_get_commit, set_commit, doc="Query or set commits directly") object = property(_get_object, set_object, doc="Return the object our ref currently refers to") + object_binsha = property(_get_object_sha, set_object, doc="Return the object our ref currently refers to") def _get_reference(self): """:return: Reference Object we point to @@ -247,7 +272,7 @@ class SymbolicReference(object): will be set which effectively detaches the refererence if it was a purely symbolic one. - :param ref: SymbolicReference instance, Object instance or refspec string + :param ref: SymbolicReference instance, hexadecimal sha string or refspec string Only if the ref is a SymbolicRef instance, we will point to it. Everthiny else is dereferenced to obtain the actual object. :param logmsg: If set to a string, the message will be used in the reflog. @@ -263,12 +288,12 @@ class SymbolicReference(object): obj = None if isinstance(ref, SymbolicReference): write_value = "ref: %s" % ref.path - elif isinstance(ref, Object): + elif isinstance(ref, self.ObjectCls): obj = ref write_value = ref.hexsha elif isinstance(ref, basestring): try: - obj = self.repo.rev_parse(ref+"^{}") # optionally deref tags + obj = self.repo.resolve_object(ref+"^{}") # optionally deref tags write_value = obj.hexsha except BadObject: raise ValueError("Could not extract object from %s" % ref) @@ -318,7 +343,7 @@ class SymbolicReference(object): a valid object or reference.""" try: self.object - except (OSError, ValueError): + except (OSError, ValueError, BadObject): return False else: return True @@ -449,7 +474,16 @@ class SymbolicReference(object): # figure out target data target = reference if resolve: - target = repo.rev_parse(str(reference)) + # could just use the resolve method, but it could be expensive + # so we handle most common cases ourselves + if isinstance(reference, cls.ObjectCls): + target = reference.hexsha + elif isinstance(reference, SymbolicReference): + target = reference.object.hexsha + else: + target = repo.resolve_object(str(reference)) + #END handle resoltion + #END need resolution if not force and isfile(abs_ref_path): target_data = str(target) @@ -579,7 +613,7 @@ class SymbolicReference(object): def iter_items(cls, repo, common_path = None): """Find all refs in the repository - :param repo: is the Repo + :param repo: is the repo :param common_path: Optional keyword argument to the path which is to be shared by all @@ -588,12 +622,12 @@ class SymbolicReference(object): refs suitable for the actual class are returned. :return: - git.SymbolicReference[], each of them is guaranteed to be a symbolic - ref which is not detached. + git.SymbolicReference[], each of them is guaranteed to be a *only* a symbolic + ref, or a derived class which is not detached List is lexigraphically sorted The returned objects represent actual subclasses, such as Head or TagReference""" - return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached ) + return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == cls or not r.is_detached ) @classmethod def from_path(cls, repo, path): @@ -606,7 +640,7 @@ class SymbolicReference(object): if not path: raise ValueError("Cannot create Reference from %r" % path) - for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): + for ref_type in (cls.HEADCls, cls.HeadCls, cls.RemoteReferenceCls, cls.TagReferenceCls, cls.ReferenceCls, cls): try: instance = ref_type(repo, path) if instance.__class__ == SymbolicReference and instance.is_detached: diff --git a/git/refs/tag.py b/git/refs/tag.py index c09d814d..3a1433be 100644 --- a/git/refs/tag.py +++ b/git/refs/tag.py @@ -2,8 +2,6 @@ from reference import Reference __all__ = ["TagReference", "Tag"] - - class TagReference(Reference): """Class representing a lightweight tag reference which either points to a commit ,a tag object or any other object. In the latter case additional information, @@ -16,7 +14,6 @@ class TagReference(Reference): print tagref.commit.message if tagref.tag is not None: print tagref.tag.message""" - __slots__ = tuple() _common_path_default = "refs/tags" @@ -45,7 +42,7 @@ class TagReference(Reference): # make object read-only # It should be reasonably hard to adjust an existing tag object = property(Reference._get_object) - + @classmethod def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs): """Create a new tag reference. @@ -85,7 +82,5 @@ class TagReference(Reference): """Delete the given existing tag or tags""" repo.git.tag("-d", *tags) - - # provide an alias Tag = TagReference diff --git a/git/remote.py b/git/remote.py index d3639f7b..47adedbf 100644 --- a/git/remote.py +++ b/git/remote.py @@ -13,255 +13,27 @@ from config import SectionConstraint from git.util import ( LazyMixin, Iterable, - IterableList, - RemoteProgress + IterableList ) +from git.db.interface import TransportDB +from refs import RemoteReference -from refs import ( - Reference, - RemoteReference, - SymbolicReference, - TagReference - ) - -from git.util import join_path -from gitdb.util import join - -import re import os -import sys -__all__ = ('RemoteProgress', 'PushInfo', 'FetchInfo', 'Remote') +__all__ = ['Remote'] - class PushInfo(object): - """ - Carries information about the result of a push operation of a single head:: - - info = remote.push()[0] - info.flags # bitflags providing more information about the result - info.local_ref # Reference pointing to the local reference that was pushed - # It is None if the ref was deleted. - info.remote_ref_string # path to the remote reference located on the remote side - info.remote_ref # Remote Reference on the local side corresponding to - # the remote_ref_string. It can be a TagReference as well. - info.old_commit # commit at which the remote_ref was standing before we pushed - # it to local_ref.commit. Will be None if an error was indicated - info.summary # summary line providing human readable english text about the push - """ - __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary') - - NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ - FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] - - _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0, - '+' : FORCED_UPDATE, ' ' : FAST_FORWARD, - '=' : UP_TO_DATE, '!' : ERROR } - - def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None, - summary=''): - """ Initialize a new instance """ - self.flags = flags - self.local_ref = local_ref - self.remote_ref_string = remote_ref_string - self._remote = remote - self.old_commit = old_commit - self.summary = summary - - @property - def remote_ref(self): - """ - :return: - Remote Reference or TagReference in the local repository corresponding - to the remote_ref_string kept in this instance.""" - # translate heads to a local remote, tags stay as they are - if self.remote_ref_string.startswith("refs/tags"): - return TagReference(self._remote.repo, self.remote_ref_string) - elif self.remote_ref_string.startswith("refs/heads"): - remote_ref = Reference(self._remote.repo, self.remote_ref_string) - return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) - else: - raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) - # END - - @classmethod - def _from_line(cls, remote, line): - """Create a new PushInfo instance as parsed from line which is expected to be like - refs/heads/master:refs/heads/master 05d2687..1d0568e""" - control_character, from_to, summary = line.split('\t', 3) - flags = 0 - - # control character handling - try: - flags |= cls._flag_map[ control_character ] - except KeyError: - raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) - # END handle control character - - # from_to handling - from_ref_string, to_ref_string = from_to.split(':') - if flags & cls.DELETED: - from_ref = None - else: - from_ref = Reference.from_path(remote.repo, from_ref_string) - - # commit handling, could be message or commit info - old_commit = None - if summary.startswith('['): - if "[rejected]" in summary: - flags |= cls.REJECTED - elif "[remote rejected]" in summary: - flags |= cls.REMOTE_REJECTED - elif "[remote failure]" in summary: - flags |= cls.REMOTE_FAILURE - elif "[no match]" in summary: - flags |= cls.ERROR - elif "[new tag]" in summary: - flags |= cls.NEW_TAG - elif "[new branch]" in summary: - flags |= cls.NEW_HEAD - # uptodate encoded in control character - else: - # fast-forward or forced update - was encoded in control character, - # but we parse the old and new commit - split_token = "..." - if control_character == " ": - split_token = ".." - old_sha, new_sha = summary.split(' ')[0].split(split_token) - # have to use constructor here as the sha usually is abbreviated - old_commit = remote.repo.commit(old_sha) - # END message handling - - return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary) - - -class FetchInfo(object): - """ - Carries information about the results of a fetch operation of a single head:: - - info = remote.fetch()[0] - info.ref # Symbolic Reference or RemoteReference to the changed - # remote head or FETCH_HEAD - info.flags # additional flags to be & with enumeration members, - # i.e. info.flags & info.REJECTED - # is 0 if ref is SymbolicReference - info.note # additional notes given by git-fetch intended for the user - info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref, otherwise None - """ - __slots__ = ('ref','old_commit', 'flags', 'note') - - NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ - FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] + """Wrapper for basic PushInfo to provide the previous interface which includes + resolved objects instead of plain shas - # %c %-*s %-*s -> %s (%s) - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") + old_commit # object for the corresponding old_commit_sha""" - _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0, - '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD } - def __init__(self, ref, flags, note = '', old_commit = None): - """ - Initialize a new instance - """ - self.ref = ref - self.flags = flags - self.note = note - self.old_commit = old_commit - - def __str__(self): - return self.name - - @property - def name(self): - """:return: Name of our remote ref""" - return self.ref.name - - @property - def commit(self): - """:return: Commit of our remote ref""" - return self.ref.commit - - @classmethod - def _from_line(cls, repo, line, fetch_line): - """Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. - - We can handle a line as follows - "%c %-*s %-*s -> %s%s" - - Where c is either ' ', !, +, -, *, or = - ! means error - + means success forcing update - - means a tag was updated - * means birth of new branch or tag - = means the head was up to date ( and not moved ) - ' ' means a fast-forward - - fetch line is the corresponding line from FETCH_HEAD, like - acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" - match = cls.re_fetch_result.match(line) - if match is None: - raise ValueError("Failed to parse line: %r" % line) - - # parse lines - control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() - try: - new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") - ref_type_name, fetch_note = fetch_note.split(' ', 1) - except ValueError: # unpack error - raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) - - # handle FETCH_HEAD and figure out ref type - # If we do not specify a target branch like master:refs/remotes/origin/master, - # the fetch result is stored in FETCH_HEAD which destroys the rule we usually - # have. In that case we use a symbolic reference which is detached - ref_type = None - if remote_local_ref == "FETCH_HEAD": - ref_type = SymbolicReference - elif ref_type_name == "branch": - ref_type = RemoteReference - elif ref_type_name == "tag": - ref_type = TagReference - else: - raise TypeError("Cannot handle reference type: %r" % ref_type_name) - - # create ref instance - if ref_type is SymbolicReference: - remote_local_ref = ref_type(repo, "FETCH_HEAD") - else: - remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) - # END create ref instance - - note = ( note and note.strip() ) or '' - - # parse flags from control_character - flags = 0 - try: - flags |= cls._flag_map[control_character] - except KeyError: - raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) - # END control char exception hanlding - - # parse operation string for more info - makes no sense for symbolic refs - old_commit = None - if isinstance(remote_local_ref, Reference): - if 'rejected' in operation: - flags |= cls.REJECTED - if 'new tag' in operation: - flags |= cls.NEW_TAG - if 'new branch' in operation: - flags |= cls.NEW_HEAD - if '...' in operation or '..' in operation: - split_token = '...' - if control_character == ' ': - split_token = split_token[:-1] - old_commit = repo.rev_parse(operation.split(split_token)[0]) - # END handle refspec - # END reference flag handling - - return cls(remote_local_ref, flags, note, old_commit) +class FetchInfo(object): + """Wrapper to restore the previous interface, resolving objects and wrapping + references""" + class Remote(LazyMixin, Iterable): """Provides easy read and write access to a git remote. @@ -280,6 +52,16 @@ class Remote(LazyMixin, Iterable): :param repo: The repository we are a remote of :param name: the name of the remote, i.e. 'origin'""" + if not hasattr(repo, 'git'): + # note: at some point we could just create a git command instance ourselves + # but lets just be lazy for now + raise AssertionError("Require repository to provide a git command instance currently") + #END assert git cmd + + if not isinstance(repo, TransportDB): + raise AssertionError("Require TransportDB interface implementation") + #END verify interface + self.repo = repo self.name = name @@ -432,97 +214,6 @@ class Remote(LazyMixin, Iterable): self.repo.git.remote("update", self.name) return self - def _digest_process_messages(self, fh, progress): - """Read progress messages from file-like object fh, supplying the respective - progress messages to the progress instance. - - :return: list(line, ...) list of lines without linebreaks that did - not contain progress information""" - line_so_far = '' - dropped_lines = list() - while True: - char = fh.read(1) - if not char: - break - - if char in ('\r', '\n'): - dropped_lines.extend(progress._parse_progress_line(line_so_far)) - line_so_far = '' - else: - line_so_far += char - # END process parsed line - # END while file is not done reading - return dropped_lines - - - def _finalize_proc(self, proc): - """Wait for the process (fetch, pull or push) and handle its errors accordingly""" - try: - proc.wait() - except GitCommandError,e: - # if a push has rejected items, the command has non-zero return status - # a return status of 128 indicates a connection error - reraise the previous one - if proc.poll() == 128: - raise - pass - # END exception handling - - - def _get_fetch_info_from_stderr(self, proc, progress): - # skip first line as it is some remote info we are not interested in - output = IterableList('name') - - - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information - fetch_info_lines = list() - for line in self._digest_process_messages(proc.stderr, progress): - if line.startswith('From') or line.startswith('remote: Total'): - continue - elif line.startswith('warning:'): - print >> sys.stderr, line - continue - elif line.startswith('fatal:'): - raise GitCommandError(("Error when fetching: %s" % line,), 2) - # END handle special messages - fetch_info_lines.append(line) - # END for each line - - # read head information - fp = open(join(self.repo.git_dir, 'FETCH_HEAD'),'r') - fetch_head_info = fp.readlines() - fp.close() - - assert len(fetch_info_lines) == len(fetch_head_info) - - output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) - for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) - - self._finalize_proc(proc) - return output - - def _get_push_info(self, proc, progress): - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually - self._digest_process_messages(proc.stderr, progress) - - output = IterableList('name') - for line in proc.stdout.readlines(): - try: - output.append(PushInfo._from_line(self, line)) - except ValueError: - # if an error happens, additional info is given which we cannot parse - pass - # END exception handling - # END for each line - - self._finalize_proc(proc) - return output - - def fetch(self, refspec=None, progress=None, **kwargs): """Fetch the latest changes for this remote @@ -546,8 +237,7 @@ class Remote(LazyMixin, Iterable): :note: As fetch does not provide progress information to non-ttys, we cannot make it available here unfortunately as in the 'push' method.""" - proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + return self.repo.fetch(self.name, refspec, progress, **kwargs) def pull(self, refspec=None, progress=None, **kwargs): """Pull changes from the given branch, being the same as a fetch followed @@ -557,8 +247,7 @@ class Remote(LazyMixin, Iterable): :param progress: see 'push' method :param kwargs: Additional arguments to be passed to git-pull :return: Please see 'fetch' method """ - proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + return self.repo.pull(self.name, refspec, progress, **kwargs) def push(self, refspec=None, progress=None, **kwargs): """Push changes from source branch in refspec to target branch in refspec. @@ -578,8 +267,7 @@ class Remote(LazyMixin, Iterable): in their flags. If the operation fails completely, the length of the returned IterableList will be null.""" - proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs) - return self._get_push_info(proc, progress or RemoteProgress()) + return self.repo.push(self.name, refspec, progress, **kwargs) @property def config_reader(self): diff --git a/git/repo.py b/git/repo.py new file mode 100644 index 00000000..8d5c4021 --- /dev/null +++ b/git/repo.py @@ -0,0 +1,45 @@ +# repo.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +"""This module is just to maintain compatibility to git-python 0.3x""" + +from git.db.complex import CmdCompatibilityGitDB + + +import warnings + +__all__ = ('Repo', ) + + +class Repo(CmdCompatibilityGitDB): + """Represents a git repository and allows you to query references, + gather commit information, generate diffs, create and clone repositories query + the log. + + The following attributes are worth using: + + 'working_dir' is the working directory of the git command, wich is the working tree + directory if available or the .git directory in case of bare repositories + + 'working_tree_dir' is the working tree directory, but will raise AssertionError + if we are a bare repository. + + 'git_dir' is the .git repository directoy, which is always set.""" + + def __init__(self, path=None, odbt = None): + """Create a new Repo instance + + :param path: is the path to either the root git directory or the bare git repo:: + + repo = Repo("/Users/mtrier/Development/git-python") + repo = Repo("/Users/mtrier/Development/git-python.git") + repo = Repo("~/Development/git-python.git") + repo = Repo("$REPOSITORIES/Development/git-python.git") + :raise InvalidDBRoot: + :return: git.Repo """ + if odbt is not None: + warnings.warn("deprecated use of odbt", DeprecationWarning) + #END handle old parameter + super(Repo, self).__init__(path) diff --git a/git/repo/__init__.py b/git/repo/__init__.py deleted file mode 100644 index 8902a254..00000000 --- a/git/repo/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Initialize the Repo package""" - -from base import *
\ No newline at end of file diff --git a/git/repo/base.py b/git/repo/base.py deleted file mode 100644 index 0405a5f9..00000000 --- a/git/repo/base.py +++ /dev/null @@ -1,753 +0,0 @@ -# repo.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from git.exc import InvalidGitRepositoryError, NoSuchPathError -from git.cmd import Git -from git.util import Actor -from git.refs import * -from git.index import IndexFile -from git.objects import * -from git.config import GitConfigParser -from git.remote import Remote -from git.db import ( - GitCmdObjectDB, - GitDB - ) - - -from gitdb.util import ( - join, - isfile, - hex_to_bin - ) - -from fun import ( - rev_parse, - is_git_dir, - touch - ) - -import os -import sys -import re - -DefaultDBType = GitDB -if sys.version_info[1] < 5: # python 2.4 compatiblity - DefaultDBType = GitCmdObjectDB -# END handle python 2.4 - - -__all__ = ('Repo', ) - - -class Repo(object): - """Represents a git repository and allows you to query references, - gather commit information, generate diffs, create and clone repositories query - the log. - - The following attributes are worth using: - - 'working_dir' is the working directory of the git command, wich is the working tree - directory if available or the .git directory in case of bare repositories - - 'working_tree_dir' is the working tree directory, but will raise AssertionError - if we are a bare repository. - - 'git_dir' is the .git repository directoy, which is always set.""" - DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" ) - - # precompiled regex - re_whitespace = re.compile(r'\s+') - re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') - re_author_committer_start = re.compile(r'^(author|committer)') - re_tab_full_line = re.compile(r'^\t(.*)$') - - # invariants - # represents the configuration level of a configuration file - config_level = ("system", "global", "repository") - - def __init__(self, path=None, odbt = DefaultDBType): - """Create a new Repo instance - - :param path: is the path to either the root git directory or the bare git repo:: - - repo = Repo("/Users/mtrier/Development/git-python") - repo = Repo("/Users/mtrier/Development/git-python.git") - repo = Repo("~/Development/git-python.git") - repo = Repo("$REPOSITORIES/Development/git-python.git") - - :param odbt: Object DataBase type - a type which is constructed by providing - the directory containing the database objects, i.e. .git/objects. It will - be used to access all object data - :raise InvalidGitRepositoryError: - :raise NoSuchPathError: - :return: git.Repo """ - epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd()))) - - if not os.path.exists(epath): - raise NoSuchPathError(epath) - - self.working_dir = None - self._working_tree_dir = None - self.git_dir = None - curpath = epath - - # walk up the path to find the .git dir - while curpath: - if is_git_dir(curpath): - self.git_dir = curpath - self._working_tree_dir = os.path.dirname(curpath) - break - gitpath = join(curpath, '.git') - if is_git_dir(gitpath): - self.git_dir = gitpath - self._working_tree_dir = curpath - break - curpath, dummy = os.path.split(curpath) - if not dummy: - break - # END while curpath - - if self.git_dir is None: - raise InvalidGitRepositoryError(epath) - - self._bare = False - try: - self._bare = self.config_reader("repository").getboolean('core','bare') - except Exception: - # lets not assume the option exists, although it should - pass - - # adjust the wd in case we are actually bare - we didn't know that - # in the first place - if self._bare: - self._working_tree_dir = None - # END working dir handling - - self.working_dir = self._working_tree_dir or self.git_dir - self.git = Git(self.working_dir) - - # special handling, in special times - args = [join(self.git_dir, 'objects')] - if issubclass(odbt, GitCmdObjectDB): - args.append(self.git) - self.odb = odbt(*args) - - def __eq__(self, rhs): - if isinstance(rhs, Repo): - return self.git_dir == rhs.git_dir - return False - - def __ne__(self, rhs): - return not self.__eq__(rhs) - - def __hash__(self): - return hash(self.git_dir) - - def __repr__(self): - return "%s(%r)" % (type(self).__name__, self.git_dir) - - # Description property - def _get_description(self): - filename = join(self.git_dir, 'description') - return file(filename).read().rstrip() - - def _set_description(self, descr): - filename = join(self.git_dir, 'description') - file(filename, 'w').write(descr+'\n') - - description = property(_get_description, _set_description, - doc="the project's description") - del _get_description - del _set_description - - - - @property - def working_tree_dir(self): - """:return: The working tree directory of our git repository - :raise AssertionError: If we are a bare repository""" - if self._working_tree_dir is None: - raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir ) - return self._working_tree_dir - - @property - def bare(self): - """:return: True if the repository is bare""" - return self._bare - - @property - def heads(self): - """A list of ``Head`` objects representing the branch heads in - this repo - - :return: ``git.IterableList(Head, ...)``""" - return Head.list_items(self) - - @property - def references(self): - """A list of Reference objects representing tags, heads and remote references. - - :return: IterableList(Reference, ...)""" - return Reference.list_items(self) - - # alias for references - refs = references - - # alias for heads - branches = heads - - @property - def index(self): - """:return: IndexFile representing this repository's index.""" - return IndexFile(self) - - @property - def head(self): - """:return: HEAD Object pointing to the current head reference""" - return HEAD(self,'HEAD') - - @property - def remotes(self): - """A list of Remote objects allowing to access and manipulate remotes - :return: ``git.IterableList(Remote, ...)``""" - return Remote.list_items(self) - - def remote(self, name='origin'): - """:return: Remote with the specified name - :raise ValueError: if no remote with such a name exists""" - return Remote(self, name) - - #{ Submodules - - @property - def submodules(self): - """ - :return: git.IterableList(Submodule, ...) of direct submodules - available from the current head""" - return Submodule.list_items(self) - - def submodule(self, name): - """ :return: Submodule with the given name - :raise ValueError: If no such submodule exists""" - try: - return self.submodules[name] - except IndexError: - raise ValueError("Didn't find submodule named %r" % name) - # END exception handling - - def create_submodule(self, *args, **kwargs): - """Create a new submodule - - :note: See the documentation of Submodule.add for a description of the - applicable parameters - :return: created submodules""" - return Submodule.add(self, *args, **kwargs) - - def iter_submodules(self, *args, **kwargs): - """An iterator yielding Submodule instances, see Traversable interface - for a description of args and kwargs - :return: Iterator""" - return RootModule(self).traverse(*args, **kwargs) - - def submodule_update(self, *args, **kwargs): - """Update the submodules, keeping the repository consistent as it will - take the previous state into consideration. For more information, please - see the documentation of RootModule.update""" - return RootModule(self).update(*args, **kwargs) - - #}END submodules - - @property - def tags(self): - """A list of ``Tag`` objects that are available in this repo - :return: ``git.IterableList(TagReference, ...)`` """ - return TagReference.list_items(self) - - def tag(self,path): - """:return: TagReference Object, reference pointing to a Commit or Tag - :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """ - return TagReference(self, path) - - def create_head(self, path, commit='HEAD', force=False, logmsg=None ): - """Create a new head within the repository. - For more documentation, please see the Head.create method. - - :return: newly created Head Reference""" - return Head.create(self, path, commit, force, logmsg) - - def delete_head(self, *heads, **kwargs): - """Delete the given heads - - :param kwargs: Additional keyword arguments to be passed to git-branch""" - return Head.delete(self, *heads, **kwargs) - - def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): - """Create a new tag reference. - For more documentation, please see the TagReference.create method. - - :return: TagReference object """ - return TagReference.create(self, path, ref, message, force, **kwargs) - - def delete_tag(self, *tags): - """Delete the given tag references""" - return TagReference.delete(self, *tags) - - def create_remote(self, name, url, **kwargs): - """Create a new remote. - - For more information, please see the documentation of the Remote.create - methods - - :return: Remote reference""" - return Remote.create(self, name, url, **kwargs) - - def delete_remote(self, remote): - """Delete the given remote.""" - return Remote.remove(self, remote) - - def _get_config_path(self, config_level ): - # we do not support an absolute path of the gitconfig on windows , - # use the global config instead - if sys.platform == "win32" and config_level == "system": - config_level = "global" - - if config_level == "system": - return "/etc/gitconfig" - elif config_level == "global": - return os.path.normpath(os.path.expanduser("~/.gitconfig")) - elif config_level == "repository": - return join(self.git_dir, "config") - - raise ValueError( "Invalid configuration level: %r" % config_level ) - - def config_reader(self, config_level=None): - """ - :return: - GitConfigParser allowing to read the full git configuration, but not to write it - - The configuration will include values from the system, user and repository - configuration files. - - :param config_level: - For possible values, see config_writer method - If None, all applicable levels will be used. Specify a level in case - you know which exact file you whish to read to prevent reading multiple files for - instance - :note: On windows, system configuration cannot currently be read as the path is - unknown, instead the global path will be used.""" - files = None - if config_level is None: - files = [ self._get_config_path(f) for f in self.config_level ] - else: - files = [ self._get_config_path(config_level) ] - return GitConfigParser(files, read_only=True) - - def config_writer(self, config_level="repository"): - """ - :return: - GitConfigParser allowing to write values of the specified configuration file level. - Config writers should be retrieved, used to change the configuration ,and written - right away as they will lock the configuration file in question and prevent other's - to write it. - - :param config_level: - One of the following values - system = sytem wide configuration file - global = user level configuration file - repository = configuration file for this repostory only""" - return GitConfigParser(self._get_config_path(config_level), read_only = False) - - def commit(self, rev=None): - """The Commit object for the specified revision - :param rev: revision specifier, see git-rev-parse for viable options. - :return: ``git.Commit``""" - if rev is None: - return self.head.commit - else: - return self.rev_parse(str(rev)+"^0") - - def iter_trees(self, *args, **kwargs): - """:return: Iterator yielding Tree objects - :note: Takes all arguments known to iter_commits method""" - return ( c.tree for c in self.iter_commits(*args, **kwargs) ) - - def tree(self, rev=None): - """The Tree object for the given treeish revision - Examples:: - - repo.tree(repo.heads[0]) - - :param rev: is a revision pointing to a Treeish ( being a commit or tree ) - :return: ``git.Tree`` - - :note: - If you need a non-root level tree, find it by iterating the root tree. Otherwise - it cannot know about its path relative to the repository root and subsequent - operations might have unexpected results.""" - if rev is None: - return self.head.commit.tree - else: - return self.rev_parse(str(rev)+"^{tree}") - - def iter_commits(self, rev=None, paths='', **kwargs): - """A list of Commit objects representing the history of a given ref/commit - - :parm rev: - revision specifier, see git-rev-parse for viable options. - If None, the active branch will be used. - - :parm paths: - is an optional path or a list of paths to limit the returned commits to - Commits that do not contain that path or the paths will not be returned. - - :parm kwargs: - Arguments to be passed to git-rev-list - common ones are - max_count and skip - - :note: to receive only commits between two named revisions, use the - "revA..revB" revision specifier - - :return ``git.Commit[]``""" - if rev is None: - rev = self.head.commit - - return Commit.iter_items(self, rev, paths, **kwargs) - - def _get_daemon_export(self): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - return os.path.exists(filename) - - def _set_daemon_export(self, value): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - fileexists = os.path.exists(filename) - if value and not fileexists: - touch(filename) - elif not value and fileexists: - os.unlink(filename) - - daemon_export = property(_get_daemon_export, _set_daemon_export, - doc="If True, git-daemon may export this repository") - del _get_daemon_export - del _set_daemon_export - - def _get_alternates(self): - """The list of alternates for this repo from which objects can be retrieved - - :return: list of strings being pathnames of alternates""" - alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') - - if os.path.exists(alternates_path): - try: - f = open(alternates_path) - alts = f.read() - finally: - f.close() - return alts.strip().splitlines() - else: - return list() - - def _set_alternates(self, alts): - """Sets the alternates - - :parm alts: - is the array of string paths representing the alternates at which - git should look for objects, i.e. /home/user/repo/.git/objects - - :raise NoSuchPathError: - :note: - The method does not check for the existance of the paths in alts - as the caller is responsible.""" - alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') - if not alts: - if isfile(alternates_path): - os.remove(alternates_path) - else: - try: - f = open(alternates_path, 'w') - f.write("\n".join(alts)) - finally: - f.close() - # END file handling - # END alts handling - - alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") - - def is_dirty(self, index=True, working_tree=True, untracked_files=False): - """ - :return: - ``True``, the repository is considered dirty. By default it will react - like a git-status without untracked files, hence it is dirty if the - index or the working copy have changes.""" - if self._bare: - # Bare repositories with no associated working directory are - # always consired to be clean. - return False - - # start from the one which is fastest to evaluate - default_args = ('--abbrev=40', '--full-index', '--raw') - if index: - # diff index against HEAD - if isfile(self.index.path) and self.head.is_valid() and \ - len(self.git.diff('HEAD', '--cached', *default_args)): - return True - # END index handling - if working_tree: - # diff index against working tree - if len(self.git.diff(*default_args)): - return True - # END working tree handling - if untracked_files: - if len(self.untracked_files): - return True - # END untracked files - return False - - @property - def untracked_files(self): - """ - :return: - list(str,...) - - Files currently untracked as they have not been staged yet. Paths - are relative to the current working directory of the git command. - - :note: - ignored files will not appear here, i.e. files mentioned in .gitignore""" - # make sure we get all files, no only untracked directores - proc = self.git.status(untracked_files=True, as_process=True) - stream = iter(proc.stdout) - untracked_files = list() - for line in stream: - if not line.startswith("# Untracked files:"): - continue - # skip two lines - stream.next() - stream.next() - - for untracked_info in stream: - if not untracked_info.startswith("#\t"): - break - untracked_files.append(untracked_info.replace("#\t", "").rstrip()) - # END for each utracked info line - # END for each line - return untracked_files - - @property - def active_branch(self): - """The name of the currently active branch. - - :return: Head to the active branch""" - return self.head.reference - - def blame(self, rev, file): - """The blame information for the given file at the given revision. - - :parm rev: revision specifier, see git-rev-parse for viable options. - :return: - list: [git.Commit, list: [<line>]] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance.""" - data = self.git.blame(rev, '--', file, p=True) - commits = dict() - blames = list() - info = None - - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] - if self.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - digits = parts[-1].split(" ") - if len(digits) == 3: - info = {'id': firstpart} - blames.append([None, []]) - # END blame data initialization - else: - m = self.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail <tom@mojombo.com> - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail <tom@mojombo.com> - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = int(parts[-1]) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # <and rest> - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( self, hex_to_bin(sha), - author=Actor._from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - @classmethod - def init(cls, path=None, mkdir=True, **kwargs): - """Initialize a git repository at the given path if specified - - :param path: - is the full path to the repo (traditionally ends with /<name>.git) - or None in which case the repository will be created in the current - working directory - - :parm mkdir: - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - Only effective if a path is explicitly given - - :parm kwargs: - keyword arguments serving as additional options to the git-init command - - :return: ``git.Repo`` (the newly created repo)""" - - if mkdir and path and not os.path.exists(path): - os.makedirs(path, 0755) - - # git command automatically chdir into the directory - git = Git(path) - output = git.init(**kwargs) - return Repo(path) - - @classmethod - def _clone(cls, git, url, path, odb_default_type, **kwargs): - # special handling for windows for path at which the clone should be - # created. - # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence - # we at least give a proper error instead of letting git fail - prev_cwd = None - prev_path = None - odbt = kwargs.pop('odbt', odb_default_type) - if os.name == 'nt': - if '~' in path: - raise OSError("Git cannot handle the ~ character in path %r correctly" % path) - - # on windows, git will think paths like c: are relative and prepend the - # current working dir ( before it fails ). We temporarily adjust the working - # dir to make this actually work - match = re.match("(\w:[/\\\])(.*)", path) - if match: - prev_cwd = os.getcwd() - prev_path = path - drive, rest_of_path = match.groups() - os.chdir(drive) - path = rest_of_path - kwargs['with_keep_cwd'] = True - # END cwd preparation - # END windows handling - - try: - git.clone(url, path, **kwargs) - finally: - if prev_cwd is not None: - os.chdir(prev_cwd) - path = prev_path - # END reset previous working dir - # END bad windows handling - - # our git command could have a different working dir than our actual - # environment, hence we prepend its working dir if required - if not os.path.isabs(path) and git.working_dir: - path = join(git._working_dir, path) - - # adjust remotes - there may be operating systems which use backslashes, - # These might be given as initial paths, but when handling the config file - # that contains the remote from which we were clones, git stops liking it - # as it will escape the backslashes. Hence we undo the escaping just to be - # sure - repo = cls(os.path.abspath(path), odbt = odbt) - if repo.remotes: - repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) - # END handle remote repo - return repo - - def clone(self, path, **kwargs): - """Create a clone from this repository. - :param path: - is the full path of the new repo (traditionally ends with ./<name>.git). - - :param kwargs: - odbt = ObjectDatabase Type, allowing to determine the object database - implementation used by the returned Repo instance - - All remaining keyword arguments are given to the git-clone command - - :return: ``git.Repo`` (the newly cloned repo)""" - return self._clone(self.git, self.git_dir, path, type(self.odb), **kwargs) - - @classmethod - def clone_from(cls, url, to_path, **kwargs): - """Create a clone from the given URL - :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS - :param to_path: Path to which the repository should be cloned to - :param kwargs: see the ``clone`` method - :return: Repo instance pointing to the cloned directory""" - return cls._clone(Git(os.getcwd()), url, to_path, GitCmdObjectDB, **kwargs) - - def archive(self, ostream, treeish=None, prefix=None, **kwargs): - """Archive the tree at the given revision. - :parm ostream: file compatible stream object to which the archive will be written - :parm treeish: is the treeish name/id, defaults to active branch - :parm prefix: is the optional prefix to prepend to each filename in the archive - :parm kwargs: - Additional arguments passed to git-archive - NOTE: Use the 'format' argument to define the kind of format. Use - specialized ostreams to write any format supported by python - - :raise GitCommandError: in case something went wrong - :return: self""" - if treeish is None: - treeish = self.head.commit - if prefix and 'prefix' not in kwargs: - kwargs['prefix'] = prefix - kwargs['output_stream'] = ostream - - self.git.archive(treeish, **kwargs) - return self - - rev_parse = rev_parse - - def __repr__(self): - return '<git.Repo "%s">' % self.git_dir diff --git a/git/stream.py b/git/stream.py new file mode 100644 index 00000000..8010a055 --- /dev/null +++ b/git/stream.py @@ -0,0 +1,694 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php + +from cStringIO import StringIO +import errno +import mmap +import os + +from fun import ( + msb_size, + stream_copy, + apply_delta_data, + connect_deltas, + DeltaChunkList, + delta_types + ) + +from util import ( + allocate_memory, + LazyMixin, + make_sha, + write, + close, + zlib + ) + +has_perf_mod = False +try: + from _perf import apply_delta as c_apply_delta + has_perf_mod = True +except ImportError: + pass + +__all__ = ( 'DecompressMemMapReader', 'FDCompressedSha1Writer', 'DeltaApplyReader', + 'Sha1Writer', 'FlexibleSha1Writer', 'ZippedStoreShaWriter', 'FDCompressedSha1Writer', + 'FDStream', 'NullStream') + + +#{ RO Streams + +class DecompressMemMapReader(LazyMixin): + """Reads data in chunks from a memory map and decompresses it. The client sees + only the uncompressed data, respective file-like read calls are handling on-demand + buffered decompression accordingly + + A constraint on the total size of bytes is activated, simulating + a logical file within a possibly larger physical memory area + + To read efficiently, you clearly don't want to read individual bytes, instead, + read a few kilobytes at least. + + :note: The chunk-size should be carefully selected as it will involve quite a bit + of string copying due to the way the zlib is implemented. Its very wasteful, + hence we try to find a good tradeoff between allocation time and number of + times we actually allocate. An own zlib implementation would be good here + to better support streamed reading - it would only need to keep the mmap + and decompress it into chunks, thats all ... """ + __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close', + '_cbr', '_phi') + + max_read_size = 512*1024 # currently unused + + def __init__(self, m, close_on_deletion, size=None): + """Initialize with mmap for stream reading + :param m: must be content data - use new if you have object data and no size""" + self._m = m + self._zip = zlib.decompressobj() + self._buf = None # buffer of decompressed bytes + self._buflen = 0 # length of bytes in buffer + if size is not None: + self._s = size # size of uncompressed data to read in total + self._br = 0 # num uncompressed bytes read + self._cws = 0 # start byte of compression window + self._cwe = 0 # end byte of compression window + self._cbr = 0 # number of compressed bytes read + self._phi = False # is True if we parsed the header info + self._close = close_on_deletion # close the memmap on deletion ? + + def _set_cache_(self, attr): + assert attr == '_s' + # only happens for size, which is a marker to indicate we still + # have to parse the header from the stream + self._parse_header_info() + + def __del__(self): + if self._close: + self._m.close() + # END handle resource freeing + + def _parse_header_info(self): + """If this stream contains object data, parse the header info and skip the + stream to a point where each read will yield object content + + :return: parsed type_string, size""" + # read header + maxb = 512 # should really be enough, cgit uses 8192 I believe + self._s = maxb + hdr = self.read(maxb) + hdrend = hdr.find("\0") + type, size = hdr[:hdrend].split(" ") + size = int(size) + self._s = size + + # adjust internal state to match actual header length that we ignore + # The buffer will be depleted first on future reads + self._br = 0 + hdrend += 1 # count terminating \0 + self._buf = StringIO(hdr[hdrend:]) + self._buflen = len(hdr) - hdrend + + self._phi = True + + return type, size + + #{ Interface + + @classmethod + def new(self, m, close_on_deletion=False): + """Create a new DecompressMemMapReader instance for acting as a read-only stream + This method parses the object header from m and returns the parsed + type and size, as well as the created stream instance. + + :param m: memory map on which to oparate. It must be object data ( header + contents ) + :param close_on_deletion: if True, the memory map will be closed once we are + being deleted""" + inst = DecompressMemMapReader(m, close_on_deletion, 0) + type, size = inst._parse_header_info() + return type, size, inst + + def data(self): + """:return: random access compatible data we are working on""" + return self._m + + def compressed_bytes_read(self): + """ + :return: number of compressed bytes read. This includes the bytes it + took to decompress the header ( if there was one )""" + # ABSTRACT: When decompressing a byte stream, it can be that the first + # x bytes which were requested match the first x bytes in the loosely + # compressed datastream. This is the worst-case assumption that the reader + # does, it assumes that it will get at least X bytes from X compressed bytes + # in call cases. + # The caveat is that the object, according to our known uncompressed size, + # is already complete, but there are still some bytes left in the compressed + # stream that contribute to the amount of compressed bytes. + # How can we know that we are truly done, and have read all bytes we need + # to read ? + # Without help, we cannot know, as we need to obtain the status of the + # decompression. If it is not finished, we need to decompress more data + # until it is finished, to yield the actual number of compressed bytes + # belonging to the decompressed object + # We are using a custom zlib module for this, if its not present, + # we try to put in additional bytes up for decompression if feasible + # and check for the unused_data. + + # Only scrub the stream forward if we are officially done with the + # bytes we were to have. + if self._br == self._s and not self._zip.unused_data: + # manipulate the bytes-read to allow our own read method to coninute + # but keep the window at its current position + self._br = 0 + if hasattr(self._zip, 'status'): + while self._zip.status == zlib.Z_OK: + self.read(mmap.PAGESIZE) + # END scrub-loop custom zlib + else: + # pass in additional pages, until we have unused data + while not self._zip.unused_data and self._cbr != len(self._m): + self.read(mmap.PAGESIZE) + # END scrub-loop default zlib + # END handle stream scrubbing + + # reset bytes read, just to be sure + self._br = self._s + # END handle stream scrubbing + + # unused data ends up in the unconsumed tail, which was removed + # from the count already + return self._cbr + + #} END interface + + def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)): + """Allows to reset the stream to restart reading + :raise ValueError: If offset and whence are not 0""" + if offset != 0 or whence != getattr(os, 'SEEK_SET', 0): + raise ValueError("Can only seek to position 0") + # END handle offset + + self._zip = zlib.decompressobj() + self._br = self._cws = self._cwe = self._cbr = 0 + if self._phi: + self._phi = False + del(self._s) # trigger header parsing on first access + # END skip header + + def read(self, size=-1): + if size < 1: + size = self._s - self._br + else: + size = min(size, self._s - self._br) + # END clamp size + + if size == 0: + return str() + # END handle depletion + + + # deplete the buffer, then just continue using the decompress object + # which has an own buffer. We just need this to transparently parse the + # header from the zlib stream + dat = str() + if self._buf: + if self._buflen >= size: + # have enough data + dat = self._buf.read(size) + self._buflen -= size + self._br += size + return dat + else: + dat = self._buf.read() # ouch, duplicates data + size -= self._buflen + self._br += self._buflen + + self._buflen = 0 + self._buf = None + # END handle buffer len + # END handle buffer + + # decompress some data + # Abstract: zlib needs to operate on chunks of our memory map ( which may + # be large ), as it will otherwise and always fill in the 'unconsumed_tail' + # attribute which possible reads our whole map to the end, forcing + # everything to be read from disk even though just a portion was requested. + # As this would be a nogo, we workaround it by passing only chunks of data, + # moving the window into the memory map along as we decompress, which keeps + # the tail smaller than our chunk-size. This causes 'only' the chunk to be + # copied once, and another copy of a part of it when it creates the unconsumed + # tail. We have to use it to hand in the appropriate amount of bytes durin g + # the next read. + tail = self._zip.unconsumed_tail + if tail: + # move the window, make it as large as size demands. For code-clarity, + # we just take the chunk from our map again instead of reusing the unconsumed + # tail. The latter one would safe some memory copying, but we could end up + # with not getting enough data uncompressed, so we had to sort that out as well. + # Now we just assume the worst case, hence the data is uncompressed and the window + # needs to be as large as the uncompressed bytes we want to read. + self._cws = self._cwe - len(tail) + self._cwe = self._cws + size + else: + cws = self._cws + self._cws = self._cwe + self._cwe = cws + size + # END handle tail + + + # if window is too small, make it larger so zip can decompress something + if self._cwe - self._cws < 8: + self._cwe = self._cws + 8 + # END adjust winsize + + # takes a slice, but doesn't copy the data, it says ... + indata = buffer(self._m, self._cws, self._cwe - self._cws) + + # get the actual window end to be sure we don't use it for computations + self._cwe = self._cws + len(indata) + dcompdat = self._zip.decompress(indata, size) + # update the amount of compressed bytes read + # We feed possibly overlapping chunks, which is why the unconsumed tail + # has to be taken into consideration, as well as the unused data + # if we hit the end of the stream + self._cbr += len(indata) - len(self._zip.unconsumed_tail) + self._br += len(dcompdat) + + if dat: + dcompdat = dat + dcompdat + # END prepend our cached data + + # it can happen, depending on the compression, that we get less bytes + # than ordered as it needs the final portion of the data as well. + # Recursively resolve that. + # Note: dcompdat can be empty even though we still appear to have bytes + # to read, if we are called by compressed_bytes_read - it manipulates + # us to empty the stream + if dcompdat and (len(dcompdat) - len(dat)) < size and self._br < self._s: + dcompdat += self.read(size-len(dcompdat)) + # END handle special case + return dcompdat + + +class DeltaApplyReader(LazyMixin): + """A reader which dynamically applies pack deltas to a base object, keeping the + memory demands to a minimum. + + The size of the final object is only obtainable once all deltas have been + applied, unless it is retrieved from a pack index. + + The uncompressed Delta has the following layout (MSB being a most significant + bit encoded dynamic size): + + * MSB Source Size - the size of the base against which the delta was created + * MSB Target Size - the size of the resulting data after the delta was applied + * A list of one byte commands (cmd) which are followed by a specific protocol: + + * cmd & 0x80 - copy delta_data[offset:offset+size] + + * Followed by an encoded offset into the delta data + * Followed by an encoded size of the chunk to copy + + * cmd & 0x7f - insert + + * insert cmd bytes from the delta buffer into the output stream + + * cmd == 0 - invalid operation ( or error in delta stream ) + """ + __slots__ = ( + "_bstream", # base stream to which to apply the deltas + "_dstreams", # tuple of delta stream readers + "_mm_target", # memory map of the delta-applied data + "_size", # actual number of bytes in _mm_target + "_br" # number of bytes read + ) + + #{ Configuration + k_max_memory_move = 250*1000*1000 + #} END configuration + + def __init__(self, stream_list): + """Initialize this instance with a list of streams, the first stream being + the delta to apply on top of all following deltas, the last stream being the + base object onto which to apply the deltas""" + assert len(stream_list) > 1, "Need at least one delta and one base stream" + + self._bstream = stream_list[-1] + self._dstreams = tuple(stream_list[:-1]) + self._br = 0 + + def _set_cache_too_slow_without_c(self, attr): + # the direct algorithm is fastest and most direct if there is only one + # delta. Also, the extra overhead might not be worth it for items smaller + # than X - definitely the case in python, every function call costs + # huge amounts of time + # if len(self._dstreams) * self._bstream.size < self.k_max_memory_move: + if len(self._dstreams) == 1: + return self._set_cache_brute_(attr) + + # Aggregate all deltas into one delta in reverse order. Hence we take + # the last delta, and reverse-merge its ancestor delta, until we receive + # the final delta data stream. + # print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams]) + dcl = connect_deltas(self._dstreams) + + # call len directly, as the (optional) c version doesn't implement the sequence + # protocol + if dcl.rbound() == 0: + self._size = 0 + self._mm_target = allocate_memory(0) + return + # END handle empty list + + self._size = dcl.rbound() + self._mm_target = allocate_memory(self._size) + + bbuf = allocate_memory(self._bstream.size) + stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE) + + # APPLY CHUNKS + write = self._mm_target.write + dcl.apply(bbuf, write) + + self._mm_target.seek(0) + + def _set_cache_brute_(self, attr): + """If we are here, we apply the actual deltas""" + + # TODO: There should be a special case if there is only one stream + # Then the default-git algorithm should perform a tad faster, as the + # delta is not peaked into, causing less overhead. + buffer_info_list = list() + max_target_size = 0 + for dstream in self._dstreams: + buf = dstream.read(512) # read the header information + X + offset, src_size = msb_size(buf) + offset, target_size = msb_size(buf, offset) + buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size)) + max_target_size = max(max_target_size, target_size) + # END for each delta stream + + # sanity check - the first delta to apply should have the same source + # size as our actual base stream + base_size = self._bstream.size + target_size = max_target_size + + # if we have more than 1 delta to apply, we will swap buffers, hence we must + # assure that all buffers we use are large enough to hold all the results + if len(self._dstreams) > 1: + base_size = target_size = max(base_size, max_target_size) + # END adjust buffer sizes + + + # Allocate private memory map big enough to hold the first base buffer + # We need random access to it + bbuf = allocate_memory(base_size) + stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE) + + # allocate memory map large enough for the largest (intermediate) target + # We will use it as scratch space for all delta ops. If the final + # target buffer is smaller than our allocated space, we just use parts + # of it upon return. + tbuf = allocate_memory(target_size) + + # for each delta to apply, memory map the decompressed delta and + # work on the op-codes to reconstruct everything. + # For the actual copying, we use a seek and write pattern of buffer + # slices. + final_target_size = None + for (dbuf, offset, src_size, target_size), dstream in reversed(zip(buffer_info_list, self._dstreams)): + # allocate a buffer to hold all delta data - fill in the data for + # fast access. We do this as we know that reading individual bytes + # from our stream would be slower than necessary ( although possible ) + # The dbuf buffer contains commands after the first two MSB sizes, the + # offset specifies the amount of bytes read to get the sizes. + ddata = allocate_memory(dstream.size - offset) + ddata.write(dbuf) + # read the rest from the stream. The size we give is larger than necessary + stream_copy(dstream.read, ddata.write, dstream.size, 256*mmap.PAGESIZE) + + ####################################################################### + if 'c_apply_delta' in globals(): + c_apply_delta(bbuf, ddata, tbuf); + else: + apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write) + ####################################################################### + + # finally, swap out source and target buffers. The target is now the + # base for the next delta to apply + bbuf, tbuf = tbuf, bbuf + bbuf.seek(0) + tbuf.seek(0) + final_target_size = target_size + # END for each delta to apply + + # its already seeked to 0, constrain it to the actual size + # NOTE: in the end of the loop, it swaps buffers, hence our target buffer + # is not tbuf, but bbuf ! + self._mm_target = bbuf + self._size = final_target_size + + + #{ Configuration + if not has_perf_mod: + _set_cache_ = _set_cache_brute_ + else: + _set_cache_ = _set_cache_too_slow_without_c + + #} END configuration + + def read(self, count=0): + bl = self._size - self._br # bytes left + if count < 1 or count > bl: + count = bl + # NOTE: we could check for certain size limits, and possibly + # return buffers instead of strings to prevent byte copying + data = self._mm_target.read(count) + self._br += len(data) + return data + + def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)): + """Allows to reset the stream to restart reading + + :raise ValueError: If offset and whence are not 0""" + if offset != 0 or whence != getattr(os, 'SEEK_SET', 0): + raise ValueError("Can only seek to position 0") + # END handle offset + self._br = 0 + self._mm_target.seek(0) + + #{ Interface + + @classmethod + def new(cls, stream_list): + """ + Convert the given list of streams into a stream which resolves deltas + when reading from it. + + :param stream_list: two or more stream objects, first stream is a Delta + to the object that you want to resolve, followed by N additional delta + streams. The list's last stream must be a non-delta stream. + + :return: Non-Delta OPackStream object whose stream can be used to obtain + the decompressed resolved data + :raise ValueError: if the stream list cannot be handled""" + if len(stream_list) < 2: + raise ValueError("Need at least two streams") + # END single object special handling + + if stream_list[-1].type_id in delta_types: + raise ValueError("Cannot resolve deltas if there is no base object stream, last one was type: %s" % stream_list[-1].type) + # END check stream + + return cls(stream_list) + + #} END interface + + + #{ OInfo like Interface + + @property + def type(self): + return self._bstream.type + + @property + def type_id(self): + return self._bstream.type_id + + @property + def size(self): + """:return: number of uncompressed bytes in the stream""" + return self._size + + #} END oinfo like interface + + +#} END RO streams + + +#{ W Streams + +class Sha1Writer(object): + """Simple stream writer which produces a sha whenever you like as it degests + everything it is supposed to write""" + __slots__ = "sha1" + + def __init__(self): + self.sha1 = make_sha() + + #{ Stream Interface + + def write(self, data): + """:raise IOError: If not all bytes could be written + :return: lenght of incoming data""" + self.sha1.update(data) + return len(data) + + # END stream interface + + #{ Interface + + def sha(self, as_hex = False): + """:return: sha so far + :param as_hex: if True, sha will be hex-encoded, binary otherwise""" + if as_hex: + return self.sha1.hexdigest() + return self.sha1.digest() + + #} END interface + + +class FlexibleSha1Writer(Sha1Writer): + """Writer producing a sha1 while passing on the written bytes to the given + write function""" + __slots__ = 'writer' + + def __init__(self, writer): + Sha1Writer.__init__(self) + self.writer = writer + + def write(self, data): + Sha1Writer.write(self, data) + self.writer(data) + + +class ZippedStoreShaWriter(Sha1Writer): + """Remembers everything someone writes to it and generates a sha""" + __slots__ = ('buf', 'zip') + def __init__(self): + Sha1Writer.__init__(self) + self.buf = StringIO() + self.zip = zlib.compressobj(zlib.Z_BEST_SPEED) + + def __getattr__(self, attr): + return getattr(self.buf, attr) + + def write(self, data): + alen = Sha1Writer.write(self, data) + self.buf.write(self.zip.compress(data)) + return alen + + def close(self): + self.buf.write(self.zip.flush()) + + def seek(self, offset, whence=getattr(os, 'SEEK_SET', 0)): + """Seeking currently only supports to rewind written data + Multiple writes are not supported""" + if offset != 0 or whence != getattr(os, 'SEEK_SET', 0): + raise ValueError("Can only seek to position 0") + # END handle offset + self.buf.seek(0) + + def getvalue(self): + """:return: string value from the current stream position to the end""" + return self.buf.getvalue() + + +class FDCompressedSha1Writer(Sha1Writer): + """Digests data written to it, making the sha available, then compress the + data and write it to the file descriptor + + :note: operates on raw file descriptors + :note: for this to work, you have to use the close-method of this instance""" + __slots__ = ("fd", "sha1", "zip") + + # default exception + exc = IOError("Failed to write all bytes to filedescriptor") + + def __init__(self, fd): + super(FDCompressedSha1Writer, self).__init__() + self.fd = fd + self.zip = zlib.compressobj(zlib.Z_BEST_SPEED) + + #{ Stream Interface + + def write(self, data): + """:raise IOError: If not all bytes could be written + :return: lenght of incoming data""" + self.sha1.update(data) + cdata = self.zip.compress(data) + bytes_written = write(self.fd, cdata) + if bytes_written != len(cdata): + raise self.exc + return len(data) + + def close(self): + remainder = self.zip.flush() + if write(self.fd, remainder) != len(remainder): + raise self.exc + return close(self.fd) + + #} END stream interface + + +class FDStream(object): + """A simple wrapper providing the most basic functions on a file descriptor + with the fileobject interface. Cannot use os.fdopen as the resulting stream + takes ownership""" + __slots__ = ("_fd", '_pos') + def __init__(self, fd): + self._fd = fd + self._pos = 0 + + def write(self, data): + self._pos += len(data) + os.write(self._fd, data) + + def read(self, count=0): + if count == 0: + count = os.path.getsize(self._filepath) + # END handle read everything + + bytes = os.read(self._fd, count) + self._pos += len(bytes) + return bytes + + def fileno(self): + return self._fd + + def tell(self): + return self._pos + + def close(self): + close(self._fd) + + +class NullStream(object): + """A stream that does nothing but providing a stream interface. + Use it like /dev/null""" + __slots__ = tuple() + + def read(self, size=0): + return '' + + def close(self): + pass + + def write(self, data): + return len(data) + + +#} END W streams + + diff --git a/git/test/__init__.py b/git/test/__init__.py index 757cbad1..a29d96a7 100644 --- a/git/test/__init__.py +++ b/git/test/__init__.py @@ -3,3 +3,12 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import git.util + +def _init_pool(): + """Assure the pool is actually threaded""" + size = 2 + print "Setting ThreadPool to %i" % size + git.util.pool.set_size(size) + diff --git a/git/test/db/__init__.py b/git/test/db/__init__.py new file mode 100644 index 00000000..8a681e42 --- /dev/null +++ b/git/test/db/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php diff --git a/git/test/test_repo.py b/git/test/db/base.py index deadbe9a..5291ba03 100644 --- a/git/test/test_repo.py +++ b/git/test/db/base.py @@ -3,30 +3,41 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import TestDBBase from git.test.lib import * -from git import * +from git.cmd import Git +from git.objects import * +from git.exc import * +from git.index import * +from git.refs import * from git.util import join_path_native from git.exc import BadObject -from gitdb.util import hex_to_bin, bin_to_hex +from git.util import hex_to_bin, bin_to_hex import os, sys import tempfile import shutil from cStringIO import StringIO +from git.db.compat import RepoCompatibilityInterface -class TestRepo(TestBase): + +class RepoGlobalsItemDeletorMetaCls(GlobalsItemDeletorMetaCls): + ModuleToDelete = 'RepoBase' + + +class RepoBase(TestDBBase): + """Basic test for everything a fully implemented repository should support""" + __metaclass__ = RepoGlobalsItemDeletorMetaCls - @raises(InvalidGitRepositoryError) def test_new_should_raise_on_invalid_repo_location(self): - Repo(tempfile.gettempdir()) + self.failUnlessRaises(InvalidGitRepositoryError, self.RepoCls, tempfile.gettempdir()) - @raises(NoSuchPathError) def test_new_should_raise_on_non_existant_path(self): - Repo("repos/foobar") + self.failUnlessRaises(NoSuchPathError, self.RepoCls, "repos/foobar") def test_repo_creation_from_different_paths(self): - r_from_gitdir = Repo(self.rorepo.git_dir) + r_from_gitdir = self.RepoCls(self.rorepo.git_dir) assert r_from_gitdir.git_dir == self.rorepo.git_dir assert r_from_gitdir.git_dir.endswith('.git') assert not self.rorepo.git.working_dir.endswith('.git') @@ -133,8 +144,8 @@ class TestRepo(TestBase): try: # with specific path for path in (git_dir_rela, git_dir_abs): - r = Repo.init(path=path, bare=True) - assert isinstance(r, Repo) + r = self.RepoCls.init(path=path, bare=True) + assert isinstance(r, self.RepoCls) assert r.bare == True assert os.path.isdir(r.git_dir) @@ -155,7 +166,7 @@ class TestRepo(TestBase): # END exception handling # try again, this time with the absolute version - rc = Repo.clone_from(r.git_dir, clone_path) + rc = self.RepoCls.clone_from(r.git_dir, clone_path) self._assert_empty_repo(rc) shutil.rmtree(git_dir_abs) @@ -171,7 +182,7 @@ class TestRepo(TestBase): os.makedirs(git_dir_rela) os.chdir(git_dir_rela) - r = Repo.init(bare=False) + r = self.RepoCls.init(bare=False) r.bare == False self._assert_empty_repo(r) @@ -184,7 +195,10 @@ class TestRepo(TestBase): # END restore previous state def test_bare_property(self): - self.rorepo.bare + if isinstance(self.rorepo, RepoCompatibilityInterface): + self.rorepo.bare + #END handle compatability + self.rorepo.is_bare def test_daemon_export(self): orig_val = self.rorepo.daemon_export @@ -204,8 +218,7 @@ class TestRepo(TestBase): self.rorepo.alternates = cur_alternates def test_repr(self): - path = os.path.join(os.path.abspath(GIT_REPO), '.git') - assert_equal('<git.Repo "%s">' % path, repr(self.rorepo)) + assert_equal('<git.Repo "%s">' % rorepo_dir(), repr(self.rorepo)) def test_is_dirty_with_bare_repository(self): orig_value = self.rorepo._bare @@ -235,6 +248,7 @@ class TestRepo(TestBase): assert isinstance(index, IndexFile) def test_tag(self): + assert self.rorepo.tag('0.1.5').commit assert self.rorepo.tag('refs/tags/0.1.5').commit def test_archive(self): @@ -570,8 +584,15 @@ class TestRepo(TestBase): assert rev_parse(refspec+":CHANGES").type == 'blob' #END operate on non-detached head - # the last position - assert rev_parse('@{1}') != head.commit + # the most recent previous position of the currently checked out branch + + try: + assert rev_parse('@{1}') != head.commit + except IndexError: + # on new checkouts, there isn't even a single past branch position + # in the log + pass + #END handle fresh checkouts # position doesn't exist self.failUnlessRaises(IndexError, rev_parse, '@{10000}') @@ -579,17 +600,13 @@ class TestRepo(TestBase): # currently, nothing more is supported self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}") - def test_repo_odbtype(self): - target_type = GitDB - if sys.version_info[1] < 5: - target_type = GitCmdObjectDB - assert isinstance(self.rorepo.odb, target_type) - def test_submodules(self): assert len(self.rorepo.submodules) == 1 # non-recursive - assert len(list(self.rorepo.iter_submodules())) == 2 + # in previous configurations, we had recursive repositories so this would compare to 2 + # now there is only one left, as gitdb was merged + assert len(list(self.rorepo.iter_submodules())) == 1 - assert isinstance(self.rorepo.submodule("gitdb"), Submodule) + assert isinstance(self.rorepo.submodule("git/ext/async"), Submodule) self.failUnlessRaises(ValueError, self.rorepo.submodule, "doesn't exist") @with_rw_repo('HEAD', bare=False) diff --git a/git/test/db/cmd/__init__.py b/git/test/db/cmd/__init__.py new file mode 100644 index 00000000..8a681e42 --- /dev/null +++ b/git/test/db/cmd/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php diff --git a/git/test/db/cmd/test_base.py b/git/test/db/cmd/test_base.py new file mode 100644 index 00000000..959be16b --- /dev/null +++ b/git/test/db/cmd/test_base.py @@ -0,0 +1,32 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.test.lib import rorepo_dir +from git.test.db.base import RepoBase + +from git.util import bin_to_hex +from git.exc import BadObject + +from git.db.complex import CmdCompatibilityGitDB +from git.db.cmd.base import * + +class TestBase(RepoBase): + RepoCls = CmdCompatibilityGitDB + + def test_basics(self): + gdb = self.rorepo + + # partial to complete - works with everything + hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6")) + assert len(hexsha) == 40 + + assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha + + # fails with BadObject + for invalid_rev in ("0000", "bad/ref", "super bad"): + self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev) + + def test_fetch_info(self): + self.failUnlessRaises(ValueError, CmdFetchInfo._from_line, self.rorepo, "nonsense", '') + self.failUnlessRaises(ValueError, CmdFetchInfo._from_line, self.rorepo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '') diff --git a/git/test/db/lib.py b/git/test/db/lib.py new file mode 100644 index 00000000..499ca252 --- /dev/null +++ b/git/test/db/lib.py @@ -0,0 +1,246 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Base classes for object db testing""" +from git.test.lib import ( + with_rw_directory, + with_packs_rw, + ZippedStoreShaWriter, + fixture_path, + TestBase, + rorepo_dir, + ) + +from git.stream import Sha1Writer +from git.base import ( + IStream, + OStream, + OInfo + ) + +from git.exc import BadObject +from git.typ import str_blob_type + +from async import IteratorReader +from cStringIO import StringIO +from struct import pack + + +__all__ = ('TestDBBase', 'with_rw_directory', 'with_packs_rw', 'fixture_path') + +class TestDBBase(TestBase): + """Base Class providing default functionality to all tests such as: + + - Utility functions provided by the TestCase base of the unittest method such as:: + self.fail("todo") + self.failUnlessRaises(...) + + - Class level repository which is considered read-only as it is shared among + all test cases in your type. + Access it using:: + self.rorepo # 'ro' stands for read-only + + The rorepo is in fact your current project's git repo. If you refer to specific + shas for your objects, be sure you choose some that are part of the immutable portion + of the project history ( to assure tests don't fail for others ). + + Derived types can override the default repository type to create a different + read-only repo, allowing to test their specific type + """ + + # data + two_lines = "1234\nhello world" + all_data = (two_lines, ) + + #{ Configuration + # The repository type to instantiate. It takes at least a path to operate upon + # during instantiation. + RepoCls = None + + # if True, a read-only repo will be provided and RepoCls must be set. + # Otherwise it may remain unset + needs_ro_repo = True + #} END configuration + + @classmethod + def setUpAll(cls): + """ + Dynamically add a read-only repository to our actual type. This way + each test type has its own repository + """ + if cls.needs_ro_repo: + assert cls.RepoCls is not None, "RepoCls class member must be set" + cls.rorepo = cls.RepoCls(rorepo_dir()) + #END handle rorepo + + def _assert_object_writing_simple(self, db): + # write a bunch of objects and query their streams and info + null_objs = db.size() + ni = 250 + for i in xrange(ni): + data = pack(">L", i) + istream = IStream(str_blob_type, len(data), StringIO(data)) + new_istream = db.store(istream) + assert new_istream is istream + assert db.has_object(istream.binsha) + + info = db.info(istream.binsha) + assert isinstance(info, OInfo) + assert info.type == istream.type and info.size == istream.size + + stream = db.stream(istream.binsha) + assert isinstance(stream, OStream) + assert stream.binsha == info.binsha and stream.type == info.type + assert stream.read() == data + # END for each item + + assert db.size() == null_objs + ni + shas = list(db.sha_iter()) + assert len(shas) == db.size() + assert len(shas[0]) == 20 + + + def _assert_object_writing(self, db): + """General tests to verify object writing, compatible to ObjectDBW + :note: requires write access to the database""" + # start in 'dry-run' mode, using a simple sha1 writer + ostreams = (ZippedStoreShaWriter, None) + for ostreamcls in ostreams: + for data in self.all_data: + dry_run = ostreamcls is not None + ostream = None + if ostreamcls is not None: + ostream = ostreamcls() + assert isinstance(ostream, Sha1Writer) + # END create ostream + + prev_ostream = db.set_ostream(ostream) + assert type(prev_ostream) in ostreams or prev_ostream in ostreams + + istream = IStream(str_blob_type, len(data), StringIO(data)) + + # store returns same istream instance, with new sha set + my_istream = db.store(istream) + sha = istream.binsha + assert my_istream is istream + assert db.has_object(sha) != dry_run + assert len(sha) == 20 + + # verify data - the slow way, we want to run code + if not dry_run: + info = db.info(sha) + assert str_blob_type == info.type + assert info.size == len(data) + + ostream = db.stream(sha) + assert ostream.read() == data + assert ostream.type == str_blob_type + assert ostream.size == len(data) + else: + self.failUnlessRaises(BadObject, db.info, sha) + self.failUnlessRaises(BadObject, db.stream, sha) + + # DIRECT STREAM COPY + # our data hase been written in object format to the StringIO + # we pasesd as output stream. No physical database representation + # was created. + # Test direct stream copy of object streams, the result must be + # identical to what we fed in + ostream.seek(0) + istream.stream = ostream + assert istream.binsha is not None + prev_sha = istream.binsha + + db.set_ostream(ZippedStoreShaWriter()) + db.store(istream) + assert istream.binsha == prev_sha + new_ostream = db.ostream() + + # note: only works as long our store write uses the same compression + # level, which is zip_best + assert ostream.getvalue() == new_ostream.getvalue() + # END for each data set + # END for each dry_run mode + + def _assert_object_writing_async(self, db): + """Test generic object writing using asynchronous access""" + ni = 5000 + def istream_generator(offset=0, ni=ni): + for data_src in xrange(ni): + data = str(data_src + offset) + yield IStream(str_blob_type, len(data), StringIO(data)) + # END for each item + # END generator utility + + # for now, we are very trusty here as we expect it to work if it worked + # in the single-stream case + + # write objects + reader = IteratorReader(istream_generator()) + istream_reader = db.store_async(reader) + istreams = istream_reader.read() # read all + assert istream_reader.task().error() is None + assert len(istreams) == ni + + for stream in istreams: + assert stream.error is None + assert len(stream.binsha) == 20 + assert isinstance(stream, IStream) + # END assert each stream + + # test has-object-async - we must have all previously added ones + reader = IteratorReader( istream.binsha for istream in istreams ) + hasobject_reader = db.has_object_async(reader) + count = 0 + for sha, has_object in hasobject_reader: + assert has_object + count += 1 + # END for each sha + assert count == ni + + # read the objects we have just written + reader = IteratorReader( istream.binsha for istream in istreams ) + ostream_reader = db.stream_async(reader) + + # read items individually to prevent hitting possible sys-limits + count = 0 + for ostream in ostream_reader: + assert isinstance(ostream, OStream) + count += 1 + # END for each ostream + assert ostream_reader.task().error() is None + assert count == ni + + # get info about our items + reader = IteratorReader( istream.binsha for istream in istreams ) + info_reader = db.info_async(reader) + + count = 0 + for oinfo in info_reader: + assert isinstance(oinfo, OInfo) + count += 1 + # END for each oinfo instance + assert count == ni + + + # combined read-write using a converter + # add 2500 items, and obtain their output streams + nni = 2500 + reader = IteratorReader(istream_generator(offset=ni, ni=nni)) + istream_to_sha = lambda istreams: [ istream.binsha for istream in istreams ] + + istream_reader = db.store_async(reader) + istream_reader.set_post_cb(istream_to_sha) + + ostream_reader = db.stream_async(istream_reader) + + count = 0 + # read it individually, otherwise we might run into the ulimit + for ostream in ostream_reader: + assert isinstance(ostream, OStream) + count += 1 + # END for each ostream + assert count == nni + + diff --git a/git/test/db/py/__init__.py b/git/test/db/py/__init__.py new file mode 100644 index 00000000..8a681e42 --- /dev/null +++ b/git/test/db/py/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php diff --git a/git/test/db/py/test_base.py b/git/test/db/py/test_base.py new file mode 100644 index 00000000..6b06bbe9 --- /dev/null +++ b/git/test/db/py/test_base.py @@ -0,0 +1,16 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.test.lib import rorepo_dir +from git.test.db.base import RepoBase + +from git.db.complex import PureCompatibilityGitDB + +class TestPyDBBase(RepoBase): + + RepoCls = PureCompatibilityGitDB + + def test_basics(self): + pass + diff --git a/git/test/db/py/test_git.py b/git/test/db/py/test_git.py new file mode 100644 index 00000000..ecaa5c8f --- /dev/null +++ b/git/test/db/py/test_git.py @@ -0,0 +1,51 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.test.lib import rorepo_dir +from git.test.db.lib import TestDBBase, with_rw_directory +from git.exc import BadObject +from git.db.py.complex import PureGitODB +from git.base import OStream, OInfo +from git.util import hex_to_bin, bin_to_hex + +import os + +class TestGitDB(TestDBBase): + needs_ro_repo = False + + def test_reading(self): + gdb = PureGitODB(os.path.join(rorepo_dir(), 'objects')) + + # we have packs and loose objects, alternates doesn't necessarily exist + assert 1 < len(gdb.databases()) < 4 + + # access should be possible + git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655") + assert isinstance(gdb.info(git_sha), OInfo) + assert isinstance(gdb.stream(git_sha), OStream) + assert gdb.size() > 200 + sha_list = list(gdb.sha_iter()) + assert len(sha_list) == gdb.size() + + + # This is actually a test for compound functionality, but it doesn't + # have a separate test module + # test partial shas + # this one as uneven and quite short + assert gdb.partial_to_complete_sha_hex('5aebcd') == hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655") + + # mix even/uneven hexshas + for i, binsha in enumerate(sha_list[:50]): + assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha + # END for each sha + + self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000") + + @with_rw_directory + def test_writing(self, path): + gdb = PureGitODB(path) + + # its possible to write objects + self._assert_object_writing(gdb) + self._assert_object_writing_async(gdb) diff --git a/git/test/db/py/test_loose.py b/git/test/db/py/test_loose.py new file mode 100644 index 00000000..0c9b4831 --- /dev/null +++ b/git/test/db/py/test_loose.py @@ -0,0 +1,36 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.test.db.lib import TestDBBase, with_rw_directory +from git.db.py.loose import PureLooseObjectODB +from git.exc import BadObject +from git.util import bin_to_hex + +class TestLooseDB(TestDBBase): + + needs_ro_repo = False + + @with_rw_directory + def test_basics(self, path): + ldb = PureLooseObjectODB(path) + + # write data + self._assert_object_writing(ldb) + self._assert_object_writing_async(ldb) + + # verify sha iteration and size + shas = list(ldb.sha_iter()) + assert shas and len(shas[0]) == 20 + + assert len(shas) == ldb.size() + + # verify find short object + long_sha = bin_to_hex(shas[-1]) + for short_sha in (long_sha[:20], long_sha[:5]): + assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha + # END for each sha + + self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000') + # raises if no object could be foudn + diff --git a/git/test/db/py/test_mem.py b/git/test/db/py/test_mem.py new file mode 100644 index 00000000..bc98dc56 --- /dev/null +++ b/git/test/db/py/test_mem.py @@ -0,0 +1,30 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.test.db.lib import TestDBBase, with_rw_directory +from git.db.py.mem import PureMemoryDB +from git.db.py.loose import PureLooseObjectODB + +class TestPureMemoryDB(TestDBBase): + + needs_ro_repo = False + + @with_rw_directory + def test_writing(self, path): + mdb = PureMemoryDB() + + # write data + self._assert_object_writing_simple(mdb) + + # test stream copy + ldb = PureLooseObjectODB(path) + assert ldb.size() == 0 + num_streams_copied = mdb.stream_copy(mdb.sha_iter(), ldb) + assert num_streams_copied == mdb.size() + + assert ldb.size() == mdb.size() + for sha in mdb.sha_iter(): + assert ldb.has_object(sha) + assert ldb.stream(sha).read() == mdb.stream(sha).read() + # END verify objects where copied and are equal diff --git a/git/test/db/py/test_pack.py b/git/test/db/py/test_pack.py new file mode 100644 index 00000000..5043f446 --- /dev/null +++ b/git/test/db/py/test_pack.py @@ -0,0 +1,76 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.test.db.lib import TestDBBase, with_packs_rw + +from git.db.py.pack import PurePackedODB +from git.test.lib import fixture_path + +from git.exc import BadObject, AmbiguousObjectName + +import os +import random + +class TestPackDB(TestDBBase): + + needs_ro_repo = False + + @with_packs_rw + def test_writing(self, path): + pdb = PurePackedODB(path) + + # on demand, we init our pack cache + num_packs = len(pdb.entities()) + assert num_packs + assert pdb._st_mtime != 0 + + # test pack directory changed: + # packs removed - rename a file, should affect the glob + pack_path = pdb.entities()[0].pack().path() + new_pack_path = pack_path + "renamed" + os.rename(pack_path, new_pack_path) + + pdb.update_cache(force=True) + assert len(pdb.entities()) == num_packs - 1 + + # packs added + os.rename(new_pack_path, pack_path) + pdb.update_cache(force=True) + assert len(pdb.entities()) == num_packs + + # bang on the cache + # access the Entities directly, as there is no iteration interface + # yet ( or required for now ) + sha_list = list(pdb.sha_iter()) + assert len(sha_list) == pdb.size() + + # hit all packs in random order + random.shuffle(sha_list) + + for sha in sha_list: + info = pdb.info(sha) + stream = pdb.stream(sha) + # END for each sha to query + + + # test short finding - be a bit more brutal here + max_bytes = 19 + min_bytes = 2 + num_ambiguous = 0 + for i, sha in enumerate(sha_list): + short_sha = sha[:max((i % max_bytes), min_bytes)] + try: + assert pdb.partial_to_complete_sha(short_sha, len(short_sha)*2) == sha + except AmbiguousObjectName: + num_ambiguous += 1 + pass # valid, we can have short objects + # END exception handling + # END for each sha to find + + # we should have at least one ambiguous, considering the small sizes + # but in our pack, there is no ambigious ... + # assert num_ambiguous + + # non-existing + self.failUnlessRaises(BadObject, pdb.partial_to_complete_sha, "\0\0", 4) diff --git a/git/test/db/py/test_ref.py b/git/test/db/py/test_ref.py new file mode 100644 index 00000000..c5374dc9 --- /dev/null +++ b/git/test/db/py/test_ref.py @@ -0,0 +1,62 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.test.db.lib import * +from git.db.py.ref import PureReferenceDB + +from git.util import ( + NULL_BIN_SHA, + hex_to_bin + ) + +import os + +class TestPureReferenceDB(TestDBBase): + + needs_ro_repo = False + + def make_alt_file(self, alt_path, alt_list): + """Create an alternates file which contains the given alternates. + The list can be empty""" + alt_file = open(alt_path, "wb") + for alt in alt_list: + alt_file.write(alt + "\n") + alt_file.close() + + @with_rw_directory + def test_writing(self, path): + NULL_BIN_SHA = '\0' * 20 + + alt_path = os.path.join(path, 'alternates') + rdb = PureReferenceDB(alt_path) + assert len(rdb.databases()) == 0 + assert rdb.size() == 0 + assert len(list(rdb.sha_iter())) == 0 + + # try empty, non-existing + assert not rdb.has_object(NULL_BIN_SHA) + + + # setup alternate file + # add two, one is invalid + own_repo_path = fixture_path('../../../.git/objects') # use own repo + self.make_alt_file(alt_path, [own_repo_path, "invalid/path"]) + rdb.update_cache() + assert len(rdb.databases()) == 1 + + # we should now find a default revision of ours + git_sha = hex_to_bin("5aebcd5cb3340fb31776941d7e4d518a712a8655") + assert rdb.has_object(git_sha) + + # remove valid + self.make_alt_file(alt_path, ["just/one/invalid/path"]) + rdb.update_cache() + assert len(rdb.databases()) == 0 + + # add valid + self.make_alt_file(alt_path, [own_repo_path]) + rdb.update_cache() + assert len(rdb.databases()) == 1 + + diff --git a/git/test/db/test_base.py b/git/test/db/test_base.py new file mode 100644 index 00000000..2a882d0a --- /dev/null +++ b/git/test/db/test_base.py @@ -0,0 +1,20 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import * +from git.db import RefSpec + +class TestBase(TestDBBase): + + needs_ro_repo = False + + @with_rw_directory + def test_basics(self, path): + self.failUnlessRaises(ValueError, RefSpec, None, None) + rs = RefSpec(None, "something") + assert rs.force == False + assert rs.delete_destination() + assert rs.source is None + assert rs.destination == "something" + diff --git a/git/test/fixtures/git_config b/git/test/fixtures/git_config index 3c91985f..ff8e7114 100644 --- a/git/test/fixtures/git_config +++ b/git/test/fixtures/git_config @@ -1,22 +1,28 @@ [core] repositoryformatversion = 0 filemode = true - bare = false - logallrefupdates = true + bare = false + logallrefupdates = true [remote "origin"] fetch = +refs/heads/*:refs/remotes/origin/* url = git://gitorious.org/~byron/git-python/byrons-clone.git pushurl = git@gitorious.org:~byron/git-python/byrons-clone.git -[branch "master"] +# a tab indented section header + [branch "master"] remote = origin merge = refs/heads/master -[remote "mainline"] +# an space indented section header + [remote "mainline"] + # space indented comment url = git://gitorious.org/git-python/mainline.git fetch = +refs/heads/*:refs/remotes/mainline/* + [remote "MartinMarcher"] + # tab indented comment url = git://gitorious.org/~martin.marcher/git-python/serverhorror.git fetch = +refs/heads/*:refs/remotes/MartinMarcher/* -[gui] + # can handle comments - the section name is supposed to be stripped +[ gui ] geometry = 1316x820+219+243 207 192 [branch "mainline_performance"] remote = mainline diff --git a/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b Binary files differnew file mode 100644 index 00000000..021c2db3 --- /dev/null +++ b/git/test/fixtures/objects/7b/b839852ed5e3a069966281bb08d50012fb309b diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx Binary files differnew file mode 100644 index 00000000..fda5969b --- /dev/null +++ b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx diff --git a/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack Binary files differnew file mode 100644 index 00000000..a3209d2b --- /dev/null +++ b/git/test/fixtures/packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx Binary files differnew file mode 100644 index 00000000..a7d6c717 --- /dev/null +++ b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx diff --git a/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack Binary files differnew file mode 100644 index 00000000..955c424c --- /dev/null +++ b/git/test/fixtures/packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx Binary files differnew file mode 100644 index 00000000..87c635f4 --- /dev/null +++ b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx diff --git a/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack Binary files differnew file mode 100644 index 00000000..a69b28ac --- /dev/null +++ b/git/test/fixtures/packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack diff --git a/git/test/lib/__init__.py b/git/test/lib/__init__.py index 77512794..a0656438 100644 --- a/git/test/lib/__init__.py +++ b/git/test/lib/__init__.py @@ -5,9 +5,14 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import inspect +# TODO: Separate names - they do repeat unfortunately. Also deduplicate it, +# redesign decorators to support multiple database types in succession. +from base import * + from mock import * from asserts import * from helper import * + __all__ = [ name for name, obj in locals().items() if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/git/test/lib/base.py b/git/test/lib/base.py new file mode 100644 index 00000000..bc160783 --- /dev/null +++ b/git/test/lib/base.py @@ -0,0 +1,200 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PureCompatibilityGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Utilities used in ODB testing""" +from git.base import OStream +from git.stream import ( + Sha1Writer, + ZippedStoreShaWriter + ) + +from git.util import ( + zlib, + dirname + ) + +import sys +import random +from array import array +from cStringIO import StringIO + +import glob +import unittest +import tempfile +import shutil +import os +import gc + + +#{ Decorators + +def with_rw_directory(func): + """Create a temporary directory which can be written to, remove it if the + test suceeds, but leave it otherwise to aid additional debugging""" + def wrapper(self): + path = maketemp(prefix=func.__name__) + os.mkdir(path) + keep = False + try: + try: + return func(self, path) + except Exception: + print >> sys.stderr, "Test %s.%s failed, output is at %r" % (type(self).__name__, func.__name__, path) + keep = True + raise + finally: + # Need to collect here to be sure all handles have been closed. It appears + # a windows-only issue. In fact things should be deleted, as well as + # memory maps closed, once objects go out of scope. For some reason + # though this is not the case here unless we collect explicitly. + if not keep: + gc.collect() + shutil.rmtree(path) + # END handle exception + # END wrapper + + wrapper.__name__ = func.__name__ + return wrapper + + +def with_rw_repo(func): + """Create a copy of our repository and put it into a writable location. It will + be removed if the test doesn't result in an error. + As we can currently only copy the fully working tree, tests must not rely on + being on a certain branch or on anything really except for the default tags + that should exist + Wrapped function obtains a git repository """ + def wrapper(self, path): + src_dir = dirname(dirname(dirname(__file__))) + assert(os.path.isdir(path)) + os.rmdir(path) # created by wrapper, but must not exist for copy operation + shutil.copytree(src_dir, path) + target_gitdir = os.path.join(path, '.git') + assert os.path.isdir(target_gitdir) + return func(self, self.RepoCls(target_gitdir)) + #END wrapper + wrapper.__name__ = func.__name__ + return with_rw_directory(wrapper) + + + +def with_packs_rw(func): + """Function that provides a path into which the packs for testing should be + copied. Will pass on the path to the actual function afterwards + + :note: needs with_rw_directory wrapped around it""" + def wrapper(self, path): + src_pack_glob = fixture_path('packs/*') + print src_pack_glob + copy_files_globbed(src_pack_glob, path, hard_link_ok=True) + return func(self, path) + # END wrapper + + wrapper.__name__ = func.__name__ + return with_rw_directory(wrapper) + +#} END decorators + +#{ Routines + +def rorepo_dir(): + """:return: path to our own repository, being our own .git directory. + :note: doesn't work in bare repositories""" + base = os.path.join(dirname(dirname(dirname(dirname(__file__)))), '.git') + assert os.path.isdir(base) + return base + +def maketemp(*args, **kwargs): + """Wrapper around default tempfile.mktemp to fix an osx issue""" + tdir = tempfile.mktemp(*args, **kwargs) + if sys.platform == 'darwin': + tdir = '/private' + tdir + return tdir + +def fixture_path(relapath=''): + """:return: absolute path into the fixture directory + :param relapath: relative path into the fixtures directory, or '' + to obtain the fixture directory itself""" + test_dir = os.path.dirname(os.path.dirname(__file__)) + return os.path.join(test_dir, "fixtures", relapath) + +def fixture(name): + return open(fixture_path(name), 'rb').read() + +def absolute_project_path(): + return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) + +def copy_files_globbed(source_glob, target_dir, hard_link_ok=False): + """Copy all files found according to the given source glob into the target directory + :param hard_link_ok: if True, hard links will be created if possible. Otherwise + the files will be copied""" + for src_file in glob.glob(source_glob): + if hard_link_ok and hasattr(os, 'link'): + target = os.path.join(target_dir, os.path.basename(src_file)) + try: + os.link(src_file, target) + except OSError: + shutil.copy(src_file, target_dir) + # END handle cross device links ( and resulting failure ) + else: + shutil.copy(src_file, target_dir) + # END try hard link + # END for each file to copy + + +def make_bytes(size_in_bytes, randomize=False): + """:return: string with given size in bytes + :param randomize: try to produce a very random stream""" + actual_size = size_in_bytes / 4 + producer = xrange(actual_size) + if randomize: + producer = list(producer) + random.shuffle(producer) + # END randomize + a = array('i', producer) + return a.tostring() + +def make_object(type, data): + """:return: bytes resembling an uncompressed object""" + odata = "blob %i\0" % len(data) + return odata + data + +def make_memory_file(size_in_bytes, randomize=False): + """:return: tuple(size_of_stream, stream) + :param randomize: try to produce a very random stream""" + d = make_bytes(size_in_bytes, randomize) + return len(d), StringIO(d) + +#} END routines + +#{ Stream Utilities + +class DummyStream(object): + def __init__(self): + self.was_read = False + self.bytes = 0 + self.closed = False + + def read(self, size): + self.was_read = True + self.bytes = size + + def close(self): + self.closed = True + + def _assert(self): + assert self.was_read + + +class DeriveTest(OStream): + def __init__(self, sha, type, size, stream, *args, **kwargs): + self.myarg = kwargs.pop('myarg') + self.args = args + + def _assert(self): + assert self.args + assert self.myarg + +#} END stream utilitiess + diff --git a/git/test/lib/helper.py b/git/test/lib/helper.py index 76aaaa38..5776f526 100644 --- a/git/test/lib/helper.py +++ b/git/test/lib/helper.py @@ -12,26 +12,18 @@ import tempfile import shutil import cStringIO -GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) +from base import ( + maketemp, + rorepo_dir + ) -__all__ = ( - 'fixture_path', 'fixture', 'absolute_project_path', 'StringProcessAdapter', - 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase', 'GIT_REPO' - ) - -#{ Routines - -def fixture_path(name): - test_dir = os.path.dirname(os.path.dirname(__file__)) - return os.path.join(test_dir, "fixtures", name) -def fixture(name): - return open(fixture_path(name), 'rb').read() +__all__ = ( + 'StringProcessAdapter', 'GlobalsItemDeletorMetaCls', + 'with_rw_repo', 'with_rw_and_rw_remote_repo', 'TestBase', 'TestCase', + ) -def absolute_project_path(): - return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) -#} END routines #{ Adapters @@ -52,13 +44,6 @@ class StringProcessAdapter(object): #{ Decorators -def _mktemp(*args): - """Wrapper around default tempfile.mktemp to fix an osx issue""" - tdir = tempfile.mktemp(*args) - if sys.platform == 'darwin': - tdir = '/private' + tdir - return tdir - def _rmtree_onerror(osremove, fullpath, exec_info): """ Handle the case on windows that read-only files cannot be deleted by @@ -87,7 +72,7 @@ def with_rw_repo(working_tree_ref, bare=False): if bare: prefix = '' #END handle prefix - repo_dir = _mktemp("%sbare_%s" % (prefix, func.__name__)) + repo_dir = maketemp("%sbare_%s" % (prefix, func.__name__)) rw_repo = self.rorepo.clone(repo_dir, shared=True, bare=bare, n=True) rw_repo.head.commit = rw_repo.commit(working_tree_ref) @@ -143,8 +128,8 @@ def with_rw_and_rw_remote_repo(working_tree_ref): assert isinstance(working_tree_ref, basestring), "Decorator requires ref name for working tree checkout" def argument_passer(func): def remote_repo_creator(self): - remote_repo_dir = _mktemp("remote_repo_%s" % func.__name__) - repo_dir = _mktemp("remote_clone_non_bare_repo") + remote_repo_dir = maketemp("remote_repo_%s" % func.__name__) + repo_dir = maketemp("remote_clone_non_bare_repo") rw_remote_repo = self.rorepo.clone(remote_repo_dir, shared=True, bare=True) rw_repo = rw_remote_repo.clone(repo_dir, shared=True, bare=False, n=True) # recursive alternates info ? @@ -205,32 +190,40 @@ def with_rw_and_rw_remote_repo(working_tree_ref): return argument_passer #} END decorators + +#{ Meta Classes +class GlobalsItemDeletorMetaCls(type): + """Utiltiy to prevent the RepoBase to be picked up by nose as the metacls + will delete the instance from the globals""" + #{ Configuration + # Set this to a string name of the module to delete + ModuleToDelete = None + #} END configuration + + def __new__(metacls, name, bases, clsdict): + assert metacls.ModuleToDelete is not None, "Invalid metaclass configuration" + new_type = super(GlobalsItemDeletorMetaCls, metacls).__new__(metacls, name, bases, clsdict) + if name != metacls.ModuleToDelete: + mod = __import__(new_type.__module__, globals(), locals(), new_type.__module__) + delattr(mod, metacls.ModuleToDelete) + #END handle deletion + return new_type + +#} END meta classes class TestBase(TestCase): """ Base Class providing default functionality to all tests such as: - - Utility functions provided by the TestCase base of the unittest method such as:: self.fail("todo") self.failUnlessRaises(...) - - - Class level repository which is considered read-only as it is shared among - all test cases in your type. - Access it using:: - self.rorepo # 'ro' stands for read-only - - The rorepo is in fact your current project's git repo. If you refer to specific - shas for your objects, be sure you choose some that are part of the immutable portion - of the project history ( to assure tests don't fail for others ). """ @classmethod def setUpAll(cls): - """ - Dynamically add a read-only repository to our actual type. This way - each test type has its own repository - """ - cls.rorepo = Repo(GIT_REPO) + """This method is only called to provide the most basic functionality + Subclasses may just override it or implement it differently""" + cls.rorepo = Repo(rorepo_dir()) def _make_file(self, rela_path, data, repo=None): """ diff --git a/git/test/objects/__init__.py b/git/test/objects/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/git/test/objects/__init__.py @@ -0,0 +1 @@ + diff --git a/git/test/objects/lib.py b/git/test/objects/lib.py new file mode 100644 index 00000000..fe1d9f9d --- /dev/null +++ b/git/test/objects/lib.py @@ -0,0 +1,14 @@ +"""Provide customized obhject testing facilities""" + +from git.test.lib import ( + rorepo_dir, + TestBase, + assert_equal, + assert_not_equal, + with_rw_repo, + StringProcessAdapter, + ) + +class TestObjectBase(TestBase): + """Provides a default read-only repository in the rorepo member""" + pass diff --git a/git/test/test_blob.py b/git/test/objects/test_blob.py index 661c0501..58ac25b7 100644 --- a/git/test/test_blob.py +++ b/git/test/objects/test_blob.py @@ -4,11 +4,11 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * -from git import * -from gitdb.util import hex_to_bin +from lib import * +from git.objects.blob import * +from git.util import hex_to_bin -class TestBlob(TestBase): +class TestBlob(TestObjectBase): def test_mime_type_should_return_mime_type_for_known_types(self): blob = Blob(self.rorepo, **{'binsha': Blob.NULL_BIN_SHA, 'path': 'foo.png'}) diff --git a/git/test/test_commit.py b/git/test/objects/test_commit.py index 4a8d8b87..80326fe9 100644 --- a/git/test/test_commit.py +++ b/git/test/objects/test_commit.py @@ -5,10 +5,14 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * -from git import * -from gitdb import IStream -from gitdb.util import hex_to_bin +from lib import * +from git.objects.commit import * +from git.base import IStream + +from git.util import ( + hex_to_bin, + Actor, + ) from cStringIO import StringIO import time @@ -65,7 +69,7 @@ def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False) # END handle performance info -class TestCommit(TestBase): +class TestCommit(TestObjectBase): def test_bake(self): diff --git a/git/test/test_submodule.py b/git/test/objects/test_submodule.py index adb4fb82..14cb074c 100644 --- a/git/test/test_submodule.py +++ b/git/test/objects/test_submodule.py @@ -1,29 +1,51 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * +from lib import * from git.exc import * from git.objects.submodule.base import Submodule from git.objects.submodule.root import RootModule, RootUpdateProgress from git.util import to_native_path_linux, join_path_native + import shutil import git import os +import sys class TestRootProgress(RootUpdateProgress): """Just prints messages, for now without checking the correctness of the states""" - def update(self, op, index, max_count, message=''): + def update(self, op, index, max_count, message='', input=''): print message prog = TestRootProgress() -class TestSubmodule(TestBase): +class TestSubmodule(TestObjectBase): k_subm_current = "468cad66ff1f80ddaeee4123c24e4d53a032c00d" k_subm_changed = "394ed7006ee5dc8bddfd132b64001d5dfc0ffdd3" k_no_subm_tag = "0.1.6" + k_github_gitdb_url = 'git://github.com/gitpython-developers/gitdb.git' + env_gitdb_local_path = "GITPYTHON_TEST_GITDB_LOCAL_PATH" + def _generate_async_local_path(self): + return to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, 'git/ext/async')) + + def _rewrite_gitdb_to_local_path(self, smgitdb): + """Rewrites the given submodule to point to the local path of the gitdb repository, if possible. + Otherwise it leaves it unchanged + :return: new clone path, or None if no new path was set""" + new_smclone_path = os.environ.get(self.env_gitdb_local_path) + if new_smclone_path is not None: + writer = smgitdb.config_writer() + writer.set_value('url', new_smclone_path) + del(writer) + assert smgitdb.config_reader().get_value('url') == new_smclone_path + assert smgitdb.url == new_smclone_path + else: + sys.stderr.write("Submodule tests need the gitdb repository. You can specify a local source setting the %s environment variable. Otherwise it will be downloaded from the internet" % self.env_gitdb_local_path) + #END handle submodule path + return new_smclone_path def _do_base_tests(self, rwrepo): """Perform all tests in the given repository, it may be bare or nonbare""" @@ -42,7 +64,7 @@ class TestSubmodule(TestBase): assert sm.path == 'git/ext/gitdb' assert sm.path != sm.name # in our case, we have ids there, which don't equal the path - assert sm.url == 'git://github.com/gitpython-developers/gitdb.git' + assert sm.url == self.k_github_gitdb_url assert sm.branch_path == 'refs/heads/master' # the default ... assert sm.branch_name == 'master' assert sm.parent_commit == rwrepo.head.commit @@ -73,13 +95,11 @@ class TestSubmodule(TestBase): if rwrepo.bare: self.failUnlessRaises(InvalidGitRepositoryError, sm.config_writer) else: - writer = sm.config_writer() # for faster checkout, set the url to the local path - new_smclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path)) - writer.set_value('url', new_smclone_path) - del(writer) - assert sm.config_reader().get_value('url') == new_smclone_path - assert sm.url == new_smclone_path + # Note: This is nice but doesn't work anymore with the latest git-python + # version. This would also mean we need internet for this to work which + # is why we allow an override using an environment variable + new_smclone_path = self._rewrite_gitdb_to_local_path(sm) # END handle bare repo smold.config_reader() @@ -175,7 +195,8 @@ class TestSubmodule(TestBase): csm_repopath = csm.path # adjust the path of the submodules module to point to the local destination - new_csmclone_path = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path, csm.path)) + # In the current gitpython version, async is used directly by gitpython + new_csmclone_path = self._generate_async_local_path() csm.config_writer().set_value('url', new_csmclone_path) assert csm.url == new_csmclone_path @@ -247,6 +268,10 @@ class TestSubmodule(TestBase): self.failUnlessRaises(InvalidGitRepositoryError, sm.remove, dry_run=True) sm.module().index.reset(working_tree=True) + # make sure sub-submodule is not modified by forcing it to update + # to the revision it is supposed to point to. + csm.update() + # this would work assert sm.remove(dry_run=True) is sm assert sm.module_exists() @@ -384,9 +409,9 @@ class TestSubmodule(TestBase): rm.config_reader() rm.config_writer() - # deep traversal gitdb / async + # deep traversal git / async rsmsp = [sm.path for sm in rm.traverse()] - assert len(rsmsp) == 2 # gitdb and async, async being a child of gitdb + assert len(rsmsp) == 1 # git and async, async being a child of git # cannot set the parent commit as root module's path didn't exist self.failUnlessRaises(ValueError, rm.set_parent_commit, 'HEAD') @@ -406,8 +431,8 @@ class TestSubmodule(TestBase): prep = sm.path assert not sm.module_exists() # was never updated after rwrepo's clone - # assure we clone from a local source - sm.config_writer().set_value('url', to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, sm.path))) + # assure we clone from a local source + self._rewrite_gitdb_to_local_path(sm) # dry-run does nothing sm.update(recursive=False, dry_run=True, progress=prog) @@ -440,7 +465,7 @@ class TestSubmodule(TestBase): #================ nsmn = "newsubmodule" nsmp = "submrepo" - async_url = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1])) + async_url = self._generate_async_local_path() nsm = Submodule.add(rwrepo, nsmn, nsmp, url=async_url) csmadded = rwrepo.index.commit("Added submodule").hexsha # make sure we don't keep the repo reference nsm.set_parent_commit(csmadded) @@ -482,7 +507,11 @@ class TestSubmodule(TestBase): # to the first repository, this way we have a fast checkout, and a completely different # repository at the different url nsm.set_parent_commit(csmremoved) - nsmurl = to_native_path_linux(join_path_native(self.rorepo.working_tree_dir, rsmsp[0])) + nsmurl = os.environ.get(self.env_gitdb_local_path, self.k_github_gitdb_url) + + # Note: We would have liked to have a different url, but we cannot + # provoke this case + assert nsm.url != nsmurl nsm.config_writer().set_value('url', nsmurl) csmpathchange = rwrepo.index.commit("changed url") nsm.set_parent_commit(csmpathchange) diff --git a/git/test/test_tree.py b/git/test/objects/test_tree.py index ec10e962..bc8d3f97 100644 --- a/git/test/test_tree.py +++ b/git/test/objects/test_tree.py @@ -4,16 +4,18 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -import os -from git.test.lib import * -from git import * + +from lib import * from git.objects.fun import ( traverse_tree_recursive, traverse_trees_recursive ) +from git.objects.blob import Blob +from git.objects.tree import Tree from cStringIO import StringIO +import os -class TestTree(TestBase): +class TestTree(TestObjectBase): def test_serializable(self): # tree at the given commit contains a submodule as well diff --git a/git/test/performance/__init__.py b/git/test/performance/__init__.py new file mode 100644 index 00000000..6bd117b9 --- /dev/null +++ b/git/test/performance/__init__.py @@ -0,0 +1,2 @@ +"""Note: This init file makes the performance tests an integral part of the test suite +as nose will now pick them up. Previously the init file was intentionally omitted""" diff --git a/git/test/performance/db/__init__.py b/git/test/performance/db/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/git/test/performance/db/__init__.py @@ -0,0 +1 @@ + diff --git a/git/test/performance/db/looseodb_impl.py b/git/test/performance/db/looseodb_impl.py new file mode 100644 index 00000000..6d3c1fa6 --- /dev/null +++ b/git/test/performance/db/looseodb_impl.py @@ -0,0 +1,132 @@ +"""Performance data streaming performance""" +from git.db.py import * +from git.base import * +from git.stream import * +from async import ChannelThreadTask +from git.util import ( + pool, + bin_to_hex + ) +import os +import sys +from time import time + +from git.test.lib import ( + GlobalsItemDeletorMetaCls, + make_memory_file, + with_rw_repo + ) + +from git.test.performance.lib import TestBigRepoR + + +#{ Utilities + +def read_chunked_stream(stream): + total = 0 + while True: + chunk = stream.read(chunk_size) + total += len(chunk) + if len(chunk) < chunk_size: + break + # END read stream loop + assert total == stream.size + return stream + + +class TestStreamReader(ChannelThreadTask): + """Expects input streams and reads them in chunks. It will read one at a time, + requireing a queue chunk of size 1""" + def __init__(self, *args): + super(TestStreamReader, self).__init__(*args) + self.fun = read_chunked_stream + self.max_chunksize = 1 + + +#} END utilities + +class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls): + ModuleToDelete = 'TestLooseDBWPerformanceBase' + + +class TestLooseDBWPerformanceBase(TestBigRepoR): + __metaclass__ = PerfBaseDeletorMetaClass + + large_data_size_bytes = 1000*1000*10 # some MiB should do it + moderate_data_size_bytes = 1000*1000*1 # just 1 MiB + + #{ Configuration + LooseODBCls = None + #} END configuration + + @classmethod + def setUpAll(cls): + super(TestLooseDBWPerformanceBase, cls).setUpAll() + if cls.LooseODBCls is None: + raise AssertionError("LooseODBCls must be set in subtype") + #END assert configuration + # currently there is no additional configuration + + @with_rw_repo("HEAD") + def test_large_data_streaming(self, rwrepo): + # TODO: This part overlaps with the same file in git.test.performance.test_stream + # It should be shared if possible + objects_path = rwrepo.db_path('') + ldb = self.LooseODBCls(objects_path) + + for randomize in range(2): + desc = (randomize and 'random ') or '' + print >> sys.stderr, "Creating %s data ..." % desc + st = time() + size, stream = make_memory_file(self.large_data_size_bytes, randomize) + elapsed = time() - st + print >> sys.stderr, "Done (in %f s)" % elapsed + + # writing - due to the compression it will seem faster than it is + st = time() + binsha = ldb.store(IStream('blob', size, stream)).binsha + elapsed_add = time() - st + assert ldb.has_object(binsha) + hexsha = bin_to_hex(binsha) + db_file = os.path.join(objects_path, hexsha[:2], hexsha[2:]) + fsize_kib = os.path.getsize(db_file) / 1000 + + + size_kib = size / 1000 + print >> sys.stderr, "%s: Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (self.LooseODBCls.__name__, size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) + + # reading all at once + st = time() + ostream = ldb.stream(binsha) + shadata = ostream.read() + elapsed_readall = time() - st + + stream.seek(0) + assert shadata == stream.getvalue() + print >> sys.stderr, "%s: Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (self.LooseODBCls.__name__, size_kib, desc, elapsed_readall, size_kib / elapsed_readall) + + + # reading in chunks of 1 MiB + cs = 512*1000 + chunks = list() + st = time() + ostream = ldb.stream(binsha) + while True: + data = ostream.read(cs) + chunks.append(data) + if len(data) < cs: + break + # END read in chunks + elapsed_readchunks = time() - st + + stream.seek(0) + assert ''.join(chunks) == stream.getvalue() + + cs_kib = cs / 1000 + print >> sys.stderr, "%s: Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (self.LooseODBCls.__name__, size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) + + # del db file so git has something to do + os.remove(db_file) + # END for each randomization factor + + diff --git a/git/test/performance/db/odb_impl.py b/git/test/performance/db/odb_impl.py new file mode 100644 index 00000000..677cf6a8 --- /dev/null +++ b/git/test/performance/db/odb_impl.py @@ -0,0 +1,72 @@ +"""Performance tests for object store""" + +from time import time +import sys +import stat + +from git.test.performance.lib import ( + TestBigRepoR, + GlobalsItemDeletorMetaCls + ) + +class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls): + ModuleToDelete = 'TestObjDBPerformanceBase' + + +class TestObjDBPerformanceBase(TestBigRepoR): + __metaclass__ = PerfBaseDeletorMetaClass + + #{ Configuration + RepoCls = None # to be set by subclass + #} END configuration + + def test_random_access_test(self): + repo = self.rorepo + + # GET COMMITS + st = time() + root_commit = repo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + + print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed) + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st + + print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + data_bytes = 0 + for blob_list in blobs_per_commit: + for blob in blob_list: + data_bytes += len(blob.data_stream.read()) + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed) + + diff --git a/git/test/performance/db/packedodb_impl.py b/git/test/performance/db/packedodb_impl.py new file mode 100644 index 00000000..b95a8d13 --- /dev/null +++ b/git/test/performance/db/packedodb_impl.py @@ -0,0 +1,107 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Performance tests for object store""" +from git.test.performance.lib import ( + TestBigRepoR, + GlobalsItemDeletorMetaCls + ) + +from git.exc import UnsupportedOperation + +import sys +import os +from time import time +import random + + +class PerfBaseDeletorMetaClass(GlobalsItemDeletorMetaCls): + ModuleToDelete = 'TestPurePackedODBPerformanceBase' + +class TestPurePackedODBPerformanceBase(TestBigRepoR): + __metaclass__ = PerfBaseDeletorMetaClass + + #{ Configuration + PackedODBCls = None + #} END configuration + + @classmethod + def setUpAll(cls): + super(TestPurePackedODBPerformanceBase, cls).setUpAll() + if cls.PackedODBCls is None: + raise AssertionError("PackedODBCls must be set in subclass") + #END assert configuration + cls.ropdb = cls.PackedODBCls(cls.rorepo.db_path("pack")) + + def test_pack_random_access(self): + pdb = self.ropdb + + # sha lookup + st = time() + sha_list = list(pdb.sha_iter()) + elapsed = time() - st + ns = len(sha_list) + print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed) + + # sha lookup: best-case and worst case access + pdb_pack_info = pdb._pack_info + # END shuffle shas + st = time() + for sha in sha_list: + pdb_pack_info(sha) + # END for each sha to look up + elapsed = time() - st + + # discard cache + del(pdb._entities) + pdb.entities() + print >> sys.stderr, "PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" % (ns, len(pdb.entities()), elapsed, ns / elapsed) + # END for each random mode + + # query info and streams only + max_items = 10000 # can wait longer when testing memory + for pdb_fun in (pdb.info, pdb.stream): + st = time() + for sha in sha_list[:max_items]: + pdb_fun(sha) + elapsed = time() - st + print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed) + # END for each function + + # retrieve stream and read all + max_items = 5000 + pdb_stream = pdb.stream + total_size = 0 + st = time() + for sha in sha_list[:max_items]: + stream = pdb_stream(sha) + stream.read() + total_size += stream.size + elapsed = time() - st + total_kib = total_size / 1000 + print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed) + + def test_correctness(self): + pdb = self.ropdb + # disabled for now as it used to work perfectly, checking big repositories takes a long time + print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)" + for crc in range(2): + count = 0 + st = time() + for entity in pdb.entities(): + pack_verify = entity.is_valid_stream + sha_by_index = entity.index().sha + for index in xrange(entity.index().size()): + try: + assert pack_verify(sha_by_index(index), use_crc=crc) + count += 1 + except UnsupportedOperation: + pass + # END ignore old indices + # END for each index + # END for each entity + elapsed = time() - st + print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed) + # END for each verify mode + diff --git a/git/test/performance/db/test_looseodb_cmd.py b/git/test/performance/db/test_looseodb_cmd.py new file mode 100644 index 00000000..9738278c --- /dev/null +++ b/git/test/performance/db/test_looseodb_cmd.py @@ -0,0 +1,11 @@ +from git.db.complex import CmdCompatibilityGitDB +from looseodb_impl import TestLooseDBWPerformanceBase + +import sys + +class TestCmdLooseDB(TestLooseDBWPerformanceBase): + LooseODBCls = CmdCompatibilityGitDB + + def test_info(self): + sys.stderr.write("This test does not check the write performance of the git command as it is implemented in pure python") + diff --git a/git/test/performance/db/test_looseodb_pure.py b/git/test/performance/db/test_looseodb_pure.py new file mode 100644 index 00000000..46f39d5e --- /dev/null +++ b/git/test/performance/db/test_looseodb_pure.py @@ -0,0 +1,6 @@ +from git.db.py.loose import PureLooseObjectODB +from looseodb_impl import TestLooseDBWPerformanceBase + +class TestPureLooseDB(TestLooseDBWPerformanceBase): + LooseODBCls = PureLooseObjectODB + diff --git a/git/test/performance/db/test_odb_cmd.py b/git/test/performance/db/test_odb_cmd.py new file mode 100644 index 00000000..acd55cc9 --- /dev/null +++ b/git/test/performance/db/test_odb_cmd.py @@ -0,0 +1,6 @@ +from git.db.complex import CmdCompatibilityGitDB +from odb_impl import TestObjDBPerformanceBase + +class TestCmdDB(TestObjDBPerformanceBase): + RepoCls = CmdCompatibilityGitDB + diff --git a/git/test/performance/db/test_odb_pure.py b/git/test/performance/db/test_odb_pure.py new file mode 100644 index 00000000..6ed3585d --- /dev/null +++ b/git/test/performance/db/test_odb_pure.py @@ -0,0 +1,6 @@ +from git.db.complex import PureCompatibilityGitDB +from odb_impl import TestObjDBPerformanceBase + +class TestPureDB(TestObjDBPerformanceBase): + RepoCls = PureCompatibilityGitDB + diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py new file mode 100644 index 00000000..4ea09779 --- /dev/null +++ b/git/test/performance/db/test_packedodb_pure.py @@ -0,0 +1,90 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from packedodb_impl import TestPurePackedODBPerformanceBase +from git.db.py.pack import PurePackedODB + +from git.stream import NullStream + +from git.pack import PackEntity + +import os +import sys + +from time import time +from nose import SkipTest + + +class CountedNullStream(NullStream): + __slots__ = '_bw' + def __init__(self): + self._bw = 0 + + def bytes_written(self): + return self._bw + + def write(self, d): + self._bw += NullStream.write(self, d) + + +class TestPurePackedODB(TestPurePackedODBPerformanceBase): + #{ Configuration + PackedODBCls = PurePackedODB + #} END configuration + + def test_pack_writing_note(self): + sys.stderr.write("test_pack_writing should be adjusted to support different databases to read from - see test for more info") + raise SkipTest() + + def test_pack_writing(self): + # see how fast we can write a pack from object streams. + # This will not be fast, as we take time for decompressing the streams as well + # For now we test the fast streaming and slow streaming versions manually + ostream = CountedNullStream() + # NOTE: We use the same repo twice to see whether OS caching helps + for rorepo in (self.rorepo, self.rorepo, self.ropdb): + + ni = 5000 + count = 0 + total_size = 0 + st = time() + objs = list() + for sha in rorepo.sha_iter(): + count += 1 + objs.append(rorepo.stream(sha)) + if count == ni: + break + #END gather objects for pack-writing + elapsed = time() - st + print >> sys.stderr, "PDB Streaming: Got %i streams from %s by sha in in %f s ( %f streams/s )" % (ni, rorepo.__class__.__name__, elapsed, ni / elapsed) + + st = time() + PackEntity.write_pack(objs, ostream.write) + elapsed = time() - st + total_kb = ostream.bytes_written() / 1000 + print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed) + #END for each rorepo + + + def test_stream_reading(self): + raise SkipTest("This test was only used for --with-profile runs") + pdb = self.ropdb + + # streaming only, meant for --with-profile runs + ni = 5000 + count = 0 + pdb_stream = pdb.stream + total_size = 0 + st = time() + for sha in pdb.sha_iter(): + if count == ni: + break + stream = pdb_stream(sha) + stream.read() + total_size += stream.size + count += 1 + elapsed = time() - st + total_kib = total_size / 1000 + print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed) + diff --git a/git/test/performance/lib.py b/git/test/performance/lib.py index d0727b60..758d402d 100644 --- a/git/test/performance/lib.py +++ b/git/test/performance/lib.py @@ -1,17 +1,13 @@ """Contains library functions""" import os -from git.test.lib import * +from git.test.lib import ( + TestBase, + GlobalsItemDeletorMetaCls + ) import shutil import tempfile -from git.db import ( - GitCmdObjectDB, - GitDB - ) - -from git import ( - Repo - ) +from git import Repo #{ Invvariants k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE" @@ -38,11 +34,7 @@ class TestBigRepoR(TestBase): * gitrorepo - * Read-Only git repository - actually the repo of git itself - - * puregitrorepo - - * As gitrepo, but uses pure python implementation + * a big read-only git repository """ #{ Invariants @@ -50,29 +42,33 @@ class TestBigRepoR(TestBase): head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' #} END invariants + #{ Configuration + RepoCls = Repo + #} END configuration + @classmethod def setUpAll(cls): super(TestBigRepoR, cls).setUpAll() - repo_path = resolve_or_fail(k_env_git_repo) - cls.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB) - cls.puregitrorepo = Repo(repo_path, odbt=GitDB) + if cls.RepoCls is None: + raise AssertionError("Require RepoCls in class %s to be set" % cls) + #END assert configuration + cls.rorepo = cls.RepoCls(resolve_or_fail(k_env_git_repo)) class TestBigRepoRW(TestBigRepoR): """As above, but provides a big repository that we can write to. - Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``""" + Provides ``self.rwrepo``""" @classmethod def setUpAll(cls): super(TestBigRepoRW, cls).setUpAll() dirname = tempfile.mktemp() os.mkdir(dirname) - cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB) - cls.puregitrwrepo = Repo(dirname, odbt=GitDB) + cls.rwrepo = cls.rorepo.clone(dirname, shared=True, bare=True) @classmethod def tearDownAll(cls): - shutil.rmtree(cls.gitrwrepo.working_dir) + shutil.rmtree(cls.rwrepo.working_dir) #} END base classes diff --git a/git/test/performance/objects/__init__.py b/git/test/performance/objects/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/git/test/performance/objects/__init__.py @@ -0,0 +1 @@ + diff --git a/git/test/performance/test_commit.py b/git/test/performance/objects/test_commit.py index 80421aa2..685fba2f 100644 --- a/git/test/performance/test_commit.py +++ b/git/test/performance/objects/test_commit.py @@ -4,18 +4,18 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from lib import * +from git.test.performance.lib import TestBigRepoRW from git import * -from gitdb import IStream -from git.test.test_commit import assert_commit_serialization +from git.base import IStream +from git.test.objects.test_commit import assert_commit_serialization from cStringIO import StringIO from time import time import sys class TestPerformance(TestBigRepoRW): - + # ref with about 100 commits in its history - ref_100 = '0.1.6' + ref_100 = 'v0.99' def _query_commit_info(self, c): c.author @@ -45,13 +45,14 @@ class TestPerformance(TestBigRepoRW): # END for each object # END for each commit elapsed_time = time() - st + assert no, "Should have traversed a few objects" print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) def test_commit_traversal(self): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False): + for c in self.rorepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -62,7 +63,7 @@ class TestPerformance(TestBigRepoRW): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k): + for c in Commit.iter_items(self.rorepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -70,10 +71,10 @@ class TestPerformance(TestBigRepoRW): print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) def test_commit_serialization(self): - assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) + assert_commit_serialization(self.rwrepo, self.head_sha_2k, True) - rwrepo = self.gitrwrepo - make_object = rwrepo.odb.store + rwrepo = self.rwrepo + make_object = rwrepo.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(self.head_sha_2k) diff --git a/git/test/performance/test_odb.py b/git/test/performance/test_odb.py deleted file mode 100644 index 32b70f69..00000000 --- a/git/test/performance/test_odb.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Performance tests for object store""" - -from time import time -import sys -import stat - -from lib import ( - TestBigRepoR - ) - - -class TestObjDBPerformance(TestBigRepoR): - - def test_random_access(self): - results = [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ] - for repo in (self.gitrorepo, self.puregitrorepo): - # GET COMMITS - st = time() - root_commit = repo.commit(self.head_sha_2k) - commits = list(root_commit.traverse()) - nc = len(commits) - elapsed = time() - st - - print >> sys.stderr, "%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (type(repo.odb), nc, elapsed, nc / elapsed) - results[0].append(elapsed) - - # GET TREES - # walk all trees of all commits - st = time() - blobs_per_commit = list() - nt = 0 - for commit in commits: - tree = commit.tree - blobs = list() - for item in tree.traverse(): - nt += 1 - if item.type == 'blob': - blobs.append(item) - # direct access for speed - # END while trees are there for walking - blobs_per_commit.append(blobs) - # END for each commit - elapsed = time() - st - - print >> sys.stderr, "%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (type(repo.odb), nt, len(commits), elapsed, nt / elapsed) - results[1].append(elapsed) - - # GET BLOBS - st = time() - nb = 0 - too_many = 15000 - data_bytes = 0 - for blob_list in blobs_per_commit: - for blob in blob_list: - data_bytes += len(blob.data_stream.read()) - # END for each blobsha - nb += len(blob_list) - if nb > too_many: - break - # END for each bloblist - elapsed = time() - st - - print >> sys.stderr, "%s: Retrieved %i blob (%i KiB) and their data in %g s ( %f blobs / s, %f KiB / s )" % (type(repo.odb), nb, data_bytes/1000, elapsed, nb / elapsed, (data_bytes / 1000) / elapsed) - results[2].append(elapsed) - # END for each repo type - - # final results - for test_name, a, b in results: - print >> sys.stderr, "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a) - # END for each result diff --git a/git/test/performance/test_streams.py b/git/test/performance/test_streams.py deleted file mode 100644 index 7f17d722..00000000 --- a/git/test/performance/test_streams.py +++ /dev/null @@ -1,131 +0,0 @@ -"""Performance data streaming performance""" - -from git.test.lib import * -from gitdb import * -from gitdb.util import bin_to_hex - -from time import time -import os -import sys -import stat -import subprocess - -from gitdb.test.lib import make_memory_file - -from lib import ( - TestBigRepoR - ) - - -class TestObjDBPerformance(TestBigRepoR): - - large_data_size_bytes = 1000*1000*10 # some MiB should do it - moderate_data_size_bytes = 1000*1000*1 # just 1 MiB - - @with_rw_repo('HEAD', bare=True) - def test_large_data_streaming(self, rwrepo): - # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream - # It should be shared if possible - ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) - - for randomize in range(2): - desc = (randomize and 'random ') or '' - print >> sys.stderr, "Creating %s data ..." % desc - st = time() - size, stream = make_memory_file(self.large_data_size_bytes, randomize) - elapsed = time() - st - print >> sys.stderr, "Done (in %f s)" % elapsed - - # writing - due to the compression it will seem faster than it is - st = time() - binsha = ldb.store(IStream('blob', size, stream)).binsha - elapsed_add = time() - st - assert ldb.has_object(binsha) - db_file = ldb.readable_db_object_path(bin_to_hex(binsha)) - fsize_kib = os.path.getsize(db_file) / 1000 - - - size_kib = size / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) - - # reading all at once - st = time() - ostream = ldb.stream(binsha) - shadata = ostream.read() - elapsed_readall = time() - st - - stream.seek(0) - assert shadata == stream.getvalue() - print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) - - - # reading in chunks of 1 MiB - cs = 512*1000 - chunks = list() - st = time() - ostream = ldb.stream(binsha) - while True: - data = ostream.read(cs) - chunks.append(data) - if len(data) < cs: - break - # END read in chunks - elapsed_readchunks = time() - st - - stream.seek(0) - assert ''.join(chunks) == stream.getvalue() - - cs_kib = cs / 1000 - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) - - # del db file so git has something to do - os.remove(db_file) - - # VS. CGIT - ########## - # CGIT ! Can using the cgit programs be faster ? - proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) - - # write file - pump everything in at once to be a fast as possible - data = stream.getvalue() # cache it - st = time() - proc.stdin.write(data) - proc.stdin.close() - gitsha = proc.stdout.read().strip() - proc.wait() - gelapsed_add = time() - st - del(data) - assert gitsha == bin_to_hex(binsha) # we do it the same way, right ? - - # as its the same sha, we reuse our path - fsize_kib = os.path.getsize(db_file) / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) - - # compare ... - print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc) - - - # read all - st = time() - s, t, size, data = rwrepo.git.get_object_data(gitsha) - gelapsed_readall = time() - st - print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall) - - # compare - print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc) - - - # read chunks - st = time() - s, t, size, stream = rwrepo.git.stream_object_data(gitsha) - while True: - data = stream.read(cs) - if len(data) < cs: - break - # END read stream - gelapsed_readchunks = time() - st - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) - - # compare - print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc) - # END for each randomization factor diff --git a/git/test/test_actor.py b/git/test/test_actor.py deleted file mode 100644 index b8e5ba3b..00000000 --- a/git/test/test_actor.py +++ /dev/null @@ -1,36 +0,0 @@ -# test_actor.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os -from git.test.lib import * -from git import * - -class TestActor(object): - def test_from_string_should_separate_name_and_email(self): - a = Actor._from_string("Michael Trier <mtrier@example.com>") - assert_equal("Michael Trier", a.name) - assert_equal("mtrier@example.com", a.email) - - # base type capabilities - assert a == a - assert not ( a != a ) - m = set() - m.add(a) - m.add(a) - assert len(m) == 1 - - def test_from_string_should_handle_just_name(self): - a = Actor._from_string("Michael Trier") - assert_equal("Michael Trier", a.name) - assert_equal(None, a.email) - - def test_should_display_representation(self): - a = Actor._from_string("Michael Trier <mtrier@example.com>") - assert_equal('<git.Actor "Michael Trier <mtrier@example.com>">', repr(a)) - - def test_str_should_alias_name(self): - a = Actor._from_string("Michael Trier <mtrier@example.com>") - assert_equal(a.name, str(a))
\ No newline at end of file diff --git a/git/test/test_base.py b/git/test/test_base.py index e630d151..7488ac6b 100644 --- a/git/test/test_base.py +++ b/git/test/test_base.py @@ -3,18 +3,48 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +from lib import ( + TestBase, + with_rw_repo, + DummyStream, + DeriveTest, + with_rw_and_rw_remote_repo + ) import git.objects.base as base +from git.objects import ( + Blob, + Tree, + Commit, + TagObject + ) import git.refs as refs -import os -from git.test.lib import * -from git import * + from itertools import chain from git.objects.util import get_object_type_by_name -from gitdb.util import hex_to_bin +from git.util import hex_to_bin import tempfile +################## + +from git.util import ( + NULL_BIN_SHA + ) + +from git.typ import str_blob_type +from git.base import ( + OInfo, + OPackInfo, + ODeltaPackInfo, + OStream, + OPackStream, + ODeltaPackStream, + IStream, + ) + +import os + class TestBase(TestBase): type_tuples = ( ("blob", "8741fc1d09d61f02ffd8cded15ff603eff1ec070", "blob.py"), @@ -77,7 +107,7 @@ class TestBase(TestBase): assert base.Object in get_object_type_by_name(tname).mro() # END for each known type - assert_raises( ValueError, get_object_type_by_name, "doesntexist" ) + self.failUnlessRaises(ValueError, get_object_type_by_name, "doesntexist") def test_object_resolution(self): # objects must be resolved to shas so they compare equal @@ -98,3 +128,85 @@ class TestBase(TestBase): assert not rw_repo.config_reader("repository").getboolean("core", "bare") assert rw_remote_repo.config_reader("repository").getboolean("core", "bare") assert os.path.isdir(os.path.join(rw_repo.working_tree_dir,'lib')) + + + +class TestBaseTypes(TestBase): + + def test_streams(self): + # test info + sha = NULL_BIN_SHA + s = 20 + blob_id = 3 + + info = OInfo(sha, str_blob_type, s) + assert info.binsha == sha + assert info.type == str_blob_type + assert info.type_id == blob_id + assert info.size == s + + # test pack info + # provides type_id + pinfo = OPackInfo(0, blob_id, s) + assert pinfo.type == str_blob_type + assert pinfo.type_id == blob_id + assert pinfo.pack_offset == 0 + + dpinfo = ODeltaPackInfo(0, blob_id, s, sha) + assert dpinfo.type == str_blob_type + assert dpinfo.type_id == blob_id + assert dpinfo.delta_info == sha + assert dpinfo.pack_offset == 0 + + + # test ostream + stream = DummyStream() + ostream = OStream(*(info + (stream, ))) + assert ostream.stream is stream + ostream.read(15) + stream._assert() + assert stream.bytes == 15 + ostream.read(20) + assert stream.bytes == 20 + + # test packstream + postream = OPackStream(*(pinfo + (stream, ))) + assert postream.stream is stream + postream.read(10) + stream._assert() + assert stream.bytes == 10 + + # test deltapackstream + dpostream = ODeltaPackStream(*(dpinfo + (stream, ))) + dpostream.stream is stream + dpostream.read(5) + stream._assert() + assert stream.bytes == 5 + + # derive with own args + DeriveTest(sha, str_blob_type, s, stream, 'mine',myarg = 3)._assert() + + # test istream + istream = IStream(str_blob_type, s, stream) + assert istream.binsha == None + istream.binsha = sha + assert istream.binsha == sha + + assert len(istream.binsha) == 20 + assert len(istream.hexsha) == 40 + + assert istream.size == s + istream.size = s * 2 + istream.size == s * 2 + assert istream.type == str_blob_type + istream.type = "something" + assert istream.type == "something" + assert istream.stream is stream + istream.stream = None + assert istream.stream is None + + assert istream.error is None + istream.error = Exception() + assert isinstance(istream.error, Exception) + + diff --git a/git/test/test_config.py b/git/test/test_config.py index 173e380c..d2e199e3 100644 --- a/git/test/test_config.py +++ b/git/test/test_config.py @@ -4,13 +4,13 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * -from git import * +from git.test.lib import TestBase, fixture_path import StringIO +from git.config import * from copy import copy from ConfigParser import NoSectionError -class TestBase(TestCase): +class TestConfig(TestBase): def _to_memcache(self, file_path): fp = open(file_path, "r") @@ -30,7 +30,9 @@ class TestBase(TestCase): w_config.read() # enforce reading assert w_config._sections w_config.write() # enforce writing - assert file_obj.getvalue() == file_obj_orig.getvalue() + + # we stripped lines when reading, so the results differ + assert file_obj.getvalue() != file_obj_orig.getvalue() # creating an additional config writer must fail due to exclusive access self.failUnlessRaises(IOError, GitConfigParser, file_obj, read_only = False) @@ -56,10 +58,10 @@ class TestBase(TestCase): file_obj.seek(0) r_config = GitConfigParser(file_obj, read_only=True) + #print file_obj.getvalue() assert r_config.has_section(sname) assert r_config.has_option(sname, oname) assert r_config.get(sname, oname) == val - # END for each filename def test_base(self): diff --git a/git/test/test_db.py b/git/test/test_db.py deleted file mode 100644 index db2d7983..00000000 --- a/git/test/test_db.py +++ /dev/null @@ -1,25 +0,0 @@ -# test_repo.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * -from git.db import * -from gitdb.util import bin_to_hex -from git.exc import BadObject -import os - -class TestDB(TestBase): - - def test_base(self): - gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, 'objects'), self.rorepo.git) - - # partial to complete - works with everything - hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6")) - assert len(hexsha) == 40 - - assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha - - # fails with BadObject - for invalid_rev in ("0000", "bad/ref", "super bad"): - self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev) diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 83db2df6..79f038e8 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -4,8 +4,15 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * -from git import * +from git.test.lib import ( + TestBase, + StringProcessAdapter, + fixture, + assert_equal, + assert_true + ) + +from git.diff import * class TestDiff(TestBase): diff --git a/git/test/test_example.py b/git/test/test_example.py new file mode 100644 index 00000000..dbab3118 --- /dev/null +++ b/git/test/test_example.py @@ -0,0 +1,64 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module with examples from the tutorial section of the docs""" +from lib import TestBase, fixture_path +from git.base import IStream +from git.db.py.loose import PureLooseObjectODB +from git.util import pool + +from cStringIO import StringIO + +from async import IteratorReader + +class TestExamples(TestBase): + + def test_base(self): + ldb = PureLooseObjectODB(fixture_path("../../../.git/objects")) + + for sha1 in ldb.sha_iter(): + oinfo = ldb.info(sha1) + ostream = ldb.stream(sha1) + assert oinfo[:3] == ostream[:3] + + assert len(ostream.read()) == ostream.size + assert ldb.has_object(oinfo.binsha) + # END for each sha in database + # assure we close all files + try: + del(ostream) + del(oinfo) + except UnboundLocalError: + pass + # END ignore exception if there are no loose objects + + data = "my data" + istream = IStream("blob", len(data), StringIO(data)) + + # the object does not yet have a sha + assert istream.binsha is None + ldb.store(istream) + # now the sha is set + assert len(istream.binsha) == 20 + assert ldb.has_object(istream.binsha) + + + # async operation + # Create a reader from an iterator + reader = IteratorReader(ldb.sha_iter()) + + # get reader for object streams + info_reader = ldb.stream_async(reader) + + # read one + info = info_reader.read(1)[0] + + # read all the rest until depletion + ostreams = info_reader.read() + + # set the pool to use two threads + pool.set_size(2) + + # synchronize the mode of operation + pool.set_size(0) diff --git a/git/test/test_fun.py b/git/test/test_fun.py index b7991cdb..ed069912 100644 --- a/git/test/test_fun.py +++ b/git/test/test_fun.py @@ -1,4 +1,4 @@ -from git.test.lib import * +from git.test.lib import TestBase, with_rw_repo from git.objects.fun import ( traverse_tree_recursive, traverse_trees_recursive, @@ -9,9 +9,9 @@ from git.index.fun import ( aggressive_tree_merge ) -from gitdb.util import bin_to_hex -from gitdb.base import IStream -from gitdb.typ import str_tree_type +from git.util import bin_to_hex +from git.base import IStream +from git.typ import str_tree_type from stat import ( S_IFDIR, diff --git a/git/test/test_git.py b/git/test/test_git.py index c92a642b..b9a0b617 100644 --- a/git/test/test_git.py +++ b/git/test/test_git.py @@ -5,80 +5,96 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os, sys -from git.test.lib import * +from git.test.lib import ( + TestBase, + patch_object, + raises, + assert_equal, + assert_true, + assert_match, + fixture_path + ) from git import Git, GitCommandError -class TestGit(TestCase): - - @classmethod - def setUpAll(cls): - cls.git = Git(GIT_REPO) +class TestGit(TestBase): + + @classmethod + def setUpAll(cls): + super(TestGit, cls).setUpAll() + cls.git = Git(cls.rorepo.working_dir) - @patch_object(Git, 'execute') - def test_call_process_calls_execute(self, git): - git.return_value = '' - self.git.version() - assert_true(git.called) - assert_equal(git.call_args, ((['git', 'version'],), {})) + @patch_object(Git, 'execute') + def test_call_process_calls_execute(self, git): + git.return_value = '' + self.git.version() + assert_true(git.called) + assert_equal(git.call_args, ((['git', 'version'],), {})) - @raises(GitCommandError) - def test_it_raises_errors(self): - self.git.this_does_not_exist() + @raises(GitCommandError) + def test_it_raises_errors(self): + self.git.this_does_not_exist() - def test_it_transforms_kwargs_into_git_command_arguments(self): - assert_equal(["-s"], self.git.transform_kwargs(**{'s': True})) - assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5})) + def test_it_transforms_kwargs_into_git_command_arguments(self): + assert_equal(["-s"], self.git.transform_kwargs(**{'s': True})) + assert_equal(["-s5"], self.git.transform_kwargs(**{'s': 5})) - assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True})) - assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5})) + assert_equal(["--max-count"], self.git.transform_kwargs(**{'max_count': True})) + assert_equal(["--max-count=5"], self.git.transform_kwargs(**{'max_count': 5})) - assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True})) + assert_equal(["-s", "-t"], self.git.transform_kwargs(**{'s': True, 't': True})) - def test_it_executes_git_to_shell_and_returns_result(self): - assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"])) + def test_it_executes_git_to_shell_and_returns_result(self): + assert_match('^git version [\d\.]{2}.*$', self.git.execute(["git","version"])) - def test_it_accepts_stdin(self): - filename = fixture_path("cat_file_blob") - fh = open(filename, 'r') - assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8", - self.git.hash_object(istream=fh, stdin=True)) - fh.close() + def test_it_accepts_stdin(self): + filename = fixture_path("cat_file_blob") + fh = open(filename, 'r') + assert_equal("70c379b63ffa0795fdbfbc128e5a2818397b7ef8", + self.git.hash_object(istream=fh, stdin=True)) + fh.close() - @patch_object(Git, 'execute') - def test_it_ignores_false_kwargs(self, git): - # this_should_not_be_ignored=False implies it *should* be ignored - output = self.git.version(pass_this_kwarg=False) - assert_true("pass_this_kwarg" not in git.call_args[1]) - - def test_persistent_cat_file_command(self): - # read header only - import subprocess as sp - hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167" - g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True) - g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") - g.stdin.flush() - obj_info = g.stdout.readline() - - # read header + data - g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True) - g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") - g.stdin.flush() - obj_info_two = g.stdout.readline() - assert obj_info == obj_info_two - - # read data - have to read it in one large chunk - size = int(obj_info.split()[2]) - data = g.stdout.read(size) - terminating_newline = g.stdout.read(1) - - # now we should be able to read a new object - g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") - g.stdin.flush() - assert g.stdout.readline() == obj_info - - - # same can be achived using the respective command functions - hexsha, typename, size = self.git.get_object_header(hexsha) - hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha) - assert typename == typename_two and size == size_two + @patch_object(Git, 'execute') + def test_it_ignores_false_kwargs(self, git): + # this_should_not_be_ignored=False implies it *should* be ignored + output = self.git.version(pass_this_kwarg=False) + assert_true("pass_this_kwarg" not in git.call_args[1]) + + def test_persistent_cat_file_command(self): + # read header only + import subprocess as sp + hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167" + g = self.git.cat_file(batch_check=True, istream=sp.PIPE,as_process=True) + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + obj_info = g.stdout.readline() + + # read header + data + g = self.git.cat_file(batch=True, istream=sp.PIPE,as_process=True) + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + obj_info_two = g.stdout.readline() + assert obj_info == obj_info_two + + # read data - have to read it in one large chunk + size = int(obj_info.split()[2]) + data = g.stdout.read(size) + terminating_newline = g.stdout.read(1) + + # now we should be able to read a new object + g.stdin.write("b2339455342180c7cc1e9bba3e9f181f7baa5167\n") + g.stdin.flush() + assert g.stdout.readline() == obj_info + + + # same can be achived using the respective command functions + hexsha, typename, size = self.git.get_object_header(hexsha) + hexsha, typename_two, size_two, data = self.git.get_object_data(hexsha) + assert typename == typename_two and size == size_two + + def test_version(self): + v = self.git.version_info + assert isinstance(v, tuple) + for n in v: + assert isinstance(n, int) + #END verify number types diff --git a/git/test/test_import.py b/git/test/test_import.py new file mode 100644 index 00000000..a5a1d11b --- /dev/null +++ b/git/test/test_import.py @@ -0,0 +1,58 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""This module's whole purpose is to verify the __all__ descriptions in the respective +module, by importing using from x import *""" + +# perform the actual imports +import os + +from git import * + +def import_all(topdir, topmodule='git', skip = "test"): + base = os.path.basename + join = os.path.join + init_script = '__init__.py' + prev_cwd = os.getcwd() + try: + os.chdir(os.path.dirname(topdir)) + for root, dirs, files in os.walk(base(topdir)): + if init_script not in files: + del(dirs[:]) + continue + #END ignore non-packages + + if skip in root: + continue + #END handle ignores + + for relafile in files: + if not relafile.endswith('.py'): + continue + if relafile == init_script: + continue + module_path = join(root, os.path.splitext(relafile)[0]).replace("/", ".").replace("\\", ".") + + m = __import__(module_path, globals(), locals(), [""]) + try: + attrlist = m.__all__ + for attr in attrlist: + assert hasattr(m, attr), "Invalid item in %s.__all__: %s" % (module_path, attr) + #END veriy + except AttributeError: + pass + # END try each listed attribute + #END for each file in dir + #END for each item + finally: + os.chdir(prev_cwd) + #END handle previous currentdir + + + +class TestDummy(object): + def test_base(self): + dn = os.path.dirname + # NOTE: i don't think this is working, as the __all__ variable is not used in this case + import_all(dn(dn(__file__))) diff --git a/git/test/test_index.py b/git/test/test_index.py index 5d227897..7d65cb9b 100644 --- a/git/test/test_index.py +++ b/git/test/test_index.py @@ -4,7 +4,12 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * +from git.test.lib import ( + TestBase, + with_rw_repo, + fixture_path, + fixture + ) from git import * import inspect import os @@ -12,6 +17,7 @@ import sys import tempfile import glob import shutil +import time from stat import * class TestIndex(TestBase): diff --git a/git/test/test_pack.py b/git/test/test_pack.py new file mode 100644 index 00000000..c398fc56 --- /dev/null +++ b/git/test/test_pack.py @@ -0,0 +1,247 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Test everything about packs reading and writing""" +from lib import ( + TestBase, + with_rw_directory, + with_packs_rw, + fixture_path + ) +from git.stream import DeltaApplyReader + +from git.pack import ( + PackEntity, + PackIndexFile, + PackFile + ) + +from git.base import ( + OInfo, + OStream, + ) + +from git.fun import delta_types +from git.exc import UnsupportedOperation +from git.util import to_bin_sha +from itertools import izip, chain +from nose import SkipTest + +import os +import sys +import tempfile + + +#{ Utilities +def bin_sha_from_filename(filename): + return to_bin_sha(os.path.splitext(os.path.basename(filename))[0][5:]) +#} END utilities + +class TestPack(TestBase): + + packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67) + packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30) + packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42) + packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2]) + packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2]) + packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2]) + + + def _assert_index_file(self, index, version, size): + assert index.packfile_checksum() != index.indexfile_checksum() + assert len(index.packfile_checksum()) == 20 + assert len(index.indexfile_checksum()) == 20 + assert index.version() == version + assert index.size() == size + assert len(index.offsets()) == size + + # get all data of all objects + for oidx in xrange(index.size()): + sha = index.sha(oidx) + assert oidx == index.sha_to_index(sha) + + entry = index.entry(oidx) + assert len(entry) == 3 + + assert entry[0] == index.offset(oidx) + assert entry[1] == sha + assert entry[2] == index.crc(oidx) + + # verify partial sha + for l in (4,8,11,17,20): + assert index.partial_sha_to_index(sha[:l], l*2) == oidx + + # END for each object index in indexfile + self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2) + + + def _assert_pack_file(self, pack, version, size): + assert pack.version() == 2 + assert pack.size() == size + assert len(pack.checksum()) == 20 + + num_obj = 0 + for obj in pack.stream_iter(): + num_obj += 1 + info = pack.info(obj.pack_offset) + stream = pack.stream(obj.pack_offset) + + assert info.pack_offset == stream.pack_offset + assert info.type_id == stream.type_id + assert hasattr(stream, 'read') + + # it should be possible to read from both streams + assert obj.read() == stream.read() + + streams = pack.collect_streams(obj.pack_offset) + assert streams + + # read the stream + try: + dstream = DeltaApplyReader.new(streams) + except ValueError: + # ignore these, old git versions use only ref deltas, + # which we havent resolved ( as we are without an index ) + # Also ignore non-delta streams + continue + # END get deltastream + + # read all + data = dstream.read() + assert len(data) == dstream.size + + # test seek + dstream.seek(0) + assert dstream.read() == data + + + # read chunks + # NOTE: the current implementation is safe, it basically transfers + # all calls to the underlying memory map + + # END for each object + assert num_obj == size + + + def test_pack_index(self): + # check version 1 and 2 + for indexfile, version, size in (self.packindexfile_v1, self.packindexfile_v2): + index = PackIndexFile(indexfile) + self._assert_index_file(index, version, size) + # END run tests + + def test_pack(self): + # there is this special version 3, but apparently its like 2 ... + for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2): + pack = PackFile(packfile) + self._assert_pack_file(pack, version, size) + # END for each pack to test + + @with_rw_directory + def test_pack_entity(self, rw_dir): + pack_objs = list() + for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1), + (self.packfile_v2_2, self.packindexfile_v2), + (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): + packfile, version, size = packinfo + indexfile, version, size = indexinfo + entity = PackEntity(packfile) + assert entity.pack().path() == packfile + assert entity.index().path() == indexfile + pack_objs.extend(entity.stream_iter()) + + count = 0 + for info, stream in izip(entity.info_iter(), entity.stream_iter()): + count += 1 + assert info.binsha == stream.binsha + assert len(info.binsha) == 20 + assert info.type_id == stream.type_id + assert info.size == stream.size + + # we return fully resolved items, which is implied by the sha centric access + assert not info.type_id in delta_types + + # try all calls + assert len(entity.collect_streams(info.binsha)) + oinfo = entity.info(info.binsha) + assert isinstance(oinfo, OInfo) + assert oinfo.binsha is not None + ostream = entity.stream(info.binsha) + assert isinstance(ostream, OStream) + assert ostream.binsha is not None + + # verify the stream + try: + assert entity.is_valid_stream(info.binsha, use_crc=True) + except UnsupportedOperation: + pass + # END ignore version issues + assert entity.is_valid_stream(info.binsha, use_crc=False) + # END for each info, stream tuple + assert count == size + + # END for each entity + + # pack writing - write all packs into one + # index path can be None + pack_path = tempfile.mktemp('', "pack", rw_dir) + index_path = tempfile.mktemp('', 'index', rw_dir) + iteration = 0 + def rewind_streams(): + for obj in pack_objs: + obj.stream.seek(0) + #END utility + for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)): + pfile = open(ppath, 'wb') + iwrite = None + if ipath: + ifile = open(ipath, 'wb') + iwrite = ifile.write + #END handle ip + + # make sure we rewind the streams ... we work on the same objects over and over again + if iteration > 0: + rewind_streams() + #END rewind streams + iteration += 1 + + pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj) + pfile.close() + assert os.path.getsize(ppath) > 100 + + # verify pack + pf = PackFile(ppath) + assert pf.size() == len(pack_objs) + assert pf.version() == PackFile.pack_version_default + assert pf.checksum() == pack_sha + + # verify index + if ipath is not None: + ifile.close() + assert os.path.getsize(ipath) > 100 + idx = PackIndexFile(ipath) + assert idx.version() == PackIndexFile.index_version_default + assert idx.packfile_checksum() == pack_sha + assert idx.indexfile_checksum() == index_sha + assert idx.size() == len(pack_objs) + #END verify files exist + #END for each packpath, indexpath pair + + # verify the packs throughly + rewind_streams() + entity = PackEntity.create(pack_objs, rw_dir) + count = 0 + for info in entity.info_iter(): + count += 1 + for use_crc in range(2): + assert entity.is_valid_stream(info.binsha, use_crc) + # END for each crc mode + #END for each info + assert count == len(pack_objs) + + + def test_pack_64(self): + # TODO: hex-edit a pack helping us to verify that we can handle 64 byte offsets + # of course without really needing such a huge pack + raise SkipTest() diff --git a/git/test/test_reflog.py b/git/test/test_reflog.py index 3fdf1fae..271924aa 100644 --- a/git/test/test_reflog.py +++ b/git/test/test_reflog.py @@ -1,4 +1,4 @@ -from git.test.lib import * +from git.test.lib import TestBase, fixture_path from git.objects import IndexObject from git.refs import * from git.util import Actor diff --git a/git/test/test_refs.py b/git/test/test_refs.py index 2338b4e4..e49b23ab 100644 --- a/git/test/test_refs.py +++ b/git/test/test_refs.py @@ -4,20 +4,25 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from mock import * -from git.test.lib import * -from git import * -import git.refs as refs +from git.test.lib import TestBase, with_rw_repo +from git.refs import * +import git.refs as ref + from git.util import Actor from git.objects.tag import TagObject + +from git.exc import GitCommandError + from itertools import chain import os +from nose import SkipTest + class TestRefs(TestBase): def test_from_path(self): # should be able to create any reference directly - for ref_type in ( Reference, Head, TagReference, RemoteReference ): + for ref_type in (Reference, Head, TagReference, RemoteReference): for name in ('rela_name', 'path/rela_name'): full_path = ref_type.to_full_path(name) instance = ref_type.from_path(self.rorepo, full_path) @@ -27,20 +32,20 @@ class TestRefs(TestBase): def test_tag_base(self): tag_object_refs = list() - for tag in self.rorepo.tags: + for tag in TagReference.list_items(self.rorepo): assert "refs/tags" in tag.path assert tag.name - assert isinstance( tag.commit, Commit ) + assert isinstance(tag.commit, tag.CommitCls) if tag.tag is not None: - tag_object_refs.append( tag ) + tag_object_refs.append(tag) tagobj = tag.tag # have no dict self.failUnlessRaises(AttributeError, setattr, tagobj, 'someattr', 1) - assert isinstance( tagobj, TagObject ) + assert isinstance(tagobj, TagObject) assert tagobj.tag == tag.name - assert isinstance( tagobj.tagger, Actor ) - assert isinstance( tagobj.tagged_date, int ) - assert isinstance( tagobj.tagger_tz_offset, int ) + assert isinstance(tagobj.tagger, Actor) + assert isinstance(tagobj.tagged_date, int) + assert isinstance(tagobj.tagger_tz_offset, int) assert tagobj.message assert tag.object == tagobj # can't assign the object @@ -48,15 +53,15 @@ class TestRefs(TestBase): # END if we have a tag object # END for tag in repo-tags assert tag_object_refs - assert isinstance(self.rorepo.tags['0.1.5'], TagReference) + assert isinstance(TagReference.list_items(self.rorepo)['0.1.6'], TagReference) def test_tags(self): # tag refs can point to tag objects or to commits s = set() ref_count = 0 - for ref in chain(self.rorepo.tags, self.rorepo.heads): + for ref in chain(TagReference.list_items(self.rorepo), Head.list_items(self.rorepo)): ref_count += 1 - assert isinstance(ref, refs.Reference) + assert isinstance(ref, Reference) assert str(ref) == ref.name assert repr(ref) assert ref == ref @@ -66,9 +71,9 @@ class TestRefs(TestBase): assert len(s) == ref_count assert len(s|s) == ref_count - @with_rw_repo('HEAD', bare=False) - def test_heads(self, rwrepo): - for head in rwrepo.heads: + @with_rw_repo("0.1.6") + def test_heads(self, rw_repo): + for head in Head.iter_items(rw_repo): assert head.name assert head.path assert "refs/heads" in head.path @@ -88,7 +93,7 @@ class TestRefs(TestBase): # after the clone, we might still have a tracking branch setup head.set_tracking_branch(None) assert head.tracking_branch() is None - remote_ref = rwrepo.remotes[0].refs[0] + remote_ref = RemoteReference.list_items(rw_repo)[0] assert head.set_tracking_branch(remote_ref) is head assert head.tracking_branch() == remote_ref head.set_tracking_branch(None) @@ -96,7 +101,7 @@ class TestRefs(TestBase): # END for each head # verify REFLOG gets altered - head = rwrepo.head + head = HEAD(rw_repo) cur_head = head.ref cur_commit = cur_head.commit pcommit = cur_head.commit.parents[0].parents[0] @@ -130,7 +135,7 @@ class TestRefs(TestBase): assert len(cur_head.log()) == blog_len+2 # a new branch has just a single entry - other_head = Head.create(rwrepo, 'mynewhead', pcommit, logmsg='new head created') + other_head = Head.create(rw_repo, 'mynewhead', pcommit, logmsg='new head created') log = other_head.log() assert len(log) == 1 assert log[0].oldhexsha == pcommit.NULL_HEX_SHA @@ -139,24 +144,25 @@ class TestRefs(TestBase): def test_refs(self): types_found = set() - for ref in self.rorepo.refs: + for ref in Reference.list_items(self.rorepo): types_found.add(type(ref)) assert len(types_found) >= 3 def test_is_valid(self): assert Reference(self.rorepo, 'refs/doesnt/exist').is_valid() == False - assert self.rorepo.head.is_valid() - assert self.rorepo.head.reference.is_valid() + assert HEAD(self.rorepo).is_valid() + assert HEAD(self.rorepo).reference.is_valid() assert SymbolicReference(self.rorepo, 'hellothere').is_valid() == False def test_orig_head(self): - assert type(self.rorepo.head.orig_head()) == SymbolicReference + assert type(HEAD(self.rorepo).orig_head()) == SymbolicReference - @with_rw_repo('0.1.6') + @with_rw_repo("0.1.6") def test_head_reset(self, rw_repo): - cur_head = rw_repo.head + cur_head = HEAD(rw_repo) old_head_commit = cur_head.commit new_head_commit = cur_head.ref.commit.parents[0] + cur_head.reset(new_head_commit, index=True) # index only assert cur_head.reference.commit == new_head_commit @@ -176,10 +182,9 @@ class TestRefs(TestBase): cur_head.reset(new_head_commit) rw_repo.index.checkout(["lib"], force=True)# - # now that we have a write write repo, change the HEAD reference - its # like git-reset --soft - heads = rw_repo.heads + heads = Head.list_items(rw_repo) assert heads for head in heads: cur_head.reference = head @@ -198,7 +203,7 @@ class TestRefs(TestBase): self.failUnlessRaises(TypeError, getattr, cur_head, "reference") # tags are references, hence we can point to them - some_tag = rw_repo.tags[0] + some_tag = TagReference.list_items(rw_repo)[0] cur_head.reference = some_tag assert not cur_head.is_detached assert cur_head.commit == some_tag.commit @@ -231,7 +236,7 @@ class TestRefs(TestBase): old_name = new_head.name assert new_head.rename("hello").name == "hello" - assert new_head.rename("hello/world").name == "hello/world" + assert new_head.rename("hello/world").name == "hello/world" # yes, this must work assert new_head.rename(old_name).name == old_name and new_head.path == old_path # rename with force @@ -414,12 +419,11 @@ class TestRefs(TestBase): symbol_ref_path = "refs/symbol_ref" symref = SymbolicReference(rw_repo, symbol_ref_path) assert symref.path == symbol_ref_path - symbol_ref_abspath = os.path.join(rw_repo.git_dir, symref.path) # set it symref.reference = new_head assert symref.reference == new_head - assert os.path.isfile(symbol_ref_abspath) + assert os.path.isfile(symref.abspath) assert symref.commit == new_head.commit for name in ('absname','folder/rela_name'): @@ -471,7 +475,7 @@ class TestRefs(TestBase): rw_repo.head.reference = Head.create(rw_repo, "master") # At least the head should still exist - assert os.path.isfile(os.path.join(rw_repo.git_dir, 'HEAD')) + assert os.path.isfile(rw_repo.head.abspath) refs = list(SymbolicReference.iter_items(rw_repo)) assert len(refs) == 1 @@ -517,5 +521,7 @@ class TestRefs(TestBase): assert SymbolicReference.dereference_recursive(self.rorepo, 'HEAD') def test_reflog(self): - assert isinstance(self.rorepo.heads.master.log(), RefLog) + assert isinstance(Head.list_items(self.rorepo).master.log(), RefLog) + def test_pure_python_rename(self): + raise SkipTest("Pure python reference renames cannot properly handle refnames which become a directory after rename") diff --git a/git/test/test_remote.py b/git/test/test_remote.py index af6915a3..8ae9fe43 100644 --- a/git/test/test_remote.py +++ b/git/test/test_remote.py @@ -4,9 +4,23 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * -from git import * +from git.test.lib import ( + TestBase, + with_rw_and_rw_remote_repo, + with_rw_repo, + ) from git.util import IterableList +from git.db.interface import PushInfo, FetchInfo, RemoteProgress +from git.remote import * +from git.exc import GitCommandError +from git.refs import ( + Reference, + TagReference, + RemoteReference, + Head, + SymbolicReference + ) + import tempfile import shutil import os @@ -16,430 +30,421 @@ import random random.seed(0) class TestRemoteProgress(RemoteProgress): - __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages' ) - def __init__(self): - super(TestRemoteProgress, self).__init__() - self._seen_lines = list() - self._stages_per_op = dict() - self._num_progress_messages = 0 - - def _parse_progress_line(self, line): - # we may remove the line later if it is dropped - # Keep it for debugging - self._seen_lines.append(line) - rval = super(TestRemoteProgress, self)._parse_progress_line(line) - assert len(line) > 1, "line %r too short" % line - return rval - - def line_dropped(self, line): - try: - self._seen_lines.remove(line) - except ValueError: - pass - - def update(self, op_code, cur_count, max_count=None, message=''): - # check each stage only comes once - op_id = op_code & self.OP_MASK - assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING) - - self._stages_per_op.setdefault(op_id, 0) - self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK) - - if op_code & (self.WRITING|self.END) == (self.WRITING|self.END): - assert message - # END check we get message - - self._num_progress_messages += 1 - - - def make_assertion(self): - # we don't always receive messages - if not self._seen_lines: - return - - # sometimes objects are not compressed which is okay - assert len(self._seen_ops) in (2,3) - assert self._stages_per_op - - # must have seen all stages - for op, stages in self._stages_per_op.items(): - assert stages & self.STAGE_MASK == self.STAGE_MASK - # END for each op/stage + __slots__ = ( "_seen_lines", "_stages_per_op", '_num_progress_messages') + def __init__(self): + super(TestRemoteProgress, self).__init__() + self._seen_lines = list() + self._stages_per_op = dict() + self._seen_ops = set() + self._num_progress_messages = 0 + + def line_dropped(self, line): + try: + self._seen_lines.remove(line) + except ValueError: + pass + + def __call__(self, message, input=''): + pass + + def update(self, op_code, cur_count, max_count=None, message='', input=''): + # check each stage only comes once + if input: + self._seen_lines.append(input) + #END handle input + op_id = op_code & self.OP_MASK + assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING) + + self._stages_per_op.setdefault(op_id, 0) + self._stages_per_op[ op_id ] = self._stages_per_op[ op_id ] | (op_code & self.STAGE_MASK) + + if op_code & (self.WRITING|self.END) == (self.WRITING|self.END): + assert message + # END check we get message + + self._num_progress_messages += 1 + + + def make_assertion(self): + # we don't always receive messages + if not self._seen_lines: + return + + # sometimes objects are not compressed which is okay + assert len(self._stages_per_op.keys()) in (2,3) + assert self._stages_per_op + + # must have seen all stages + for op, stages in self._stages_per_op.items(): + assert stages & self.STAGE_MASK == self.STAGE_MASK + # END for each op/stage - def assert_received_message(self): - assert self._num_progress_messages - + def assert_received_message(self): + assert self._num_progress_messages + class TestRemote(TestBase): - - def _print_fetchhead(self, repo): - fp = open(os.path.join(repo.git_dir, "FETCH_HEAD")) - fp.close() - - - def _do_test_fetch_result(self, results, remote): - # self._print_fetchhead(remote.repo) - assert len(results) > 0 and isinstance(results[0], FetchInfo) - for info in results: - assert isinstance(info.note, basestring) - if isinstance(info.ref, Reference): - assert info.flags != 0 - # END reference type flags handling - assert isinstance(info.ref, (SymbolicReference, Reference)) - if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD): - assert isinstance(info.old_commit, Commit) - else: - assert info.old_commit is None - # END forced update checking - # END for each info - - def _do_test_push_result(self, results, remote): - assert len(results) > 0 and isinstance(results[0], PushInfo) - for info in results: - assert info.flags - assert isinstance(info.summary, basestring) - if info.old_commit is not None: - assert isinstance(info.old_commit, Commit) - if info.flags & info.ERROR: - has_one = False - for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE): - has_one |= bool(info.flags & bitflag) - # END for each bitflag - assert has_one - else: - # there must be a remote commit - if info.flags & info.DELETED == 0: - assert isinstance(info.local_ref, Reference) - else: - assert info.local_ref is None - assert type(info.remote_ref) in (TagReference, RemoteReference) - # END error checking - # END for each info - - - def _do_test_fetch_info(self, repo): - self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "nonsense", '') - self.failUnlessRaises(ValueError, FetchInfo._from_line, repo, "? [up to date] 0.1.7RC -> origin/0.1.7RC", '') - - def _commit_random_file(self, repo): - #Create a file with a random name and random data and commit it to repo. - # Return the commited absolute file path - index = repo.index - new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo) - index.add([new_file]) - index.commit("Committing %s" % new_file) - return new_file - - def _do_test_fetch(self,remote, rw_repo, remote_repo): - # specialized fetch testing to de-clutter the main test - self._do_test_fetch_info(rw_repo) - - def fetch_and_test(remote, **kwargs): - progress = TestRemoteProgress() - kwargs['progress'] = progress - res = remote.fetch(**kwargs) - progress.make_assertion() - self._do_test_fetch_result(res, remote) - return res - # END fetch and check - - def get_info(res, remote, name): - return res["%s/%s"%(remote,name)] - - # put remote head to master as it is garantueed to exist - remote_repo.head.reference = remote_repo.heads.master - - res = fetch_and_test(remote) - # all uptodate - for info in res: - assert info.flags & info.HEAD_UPTODATE - - # rewind remote head to trigger rejection - # index must be false as remote is a bare repo - rhead = remote_repo.head - remote_commit = rhead.commit - rhead.reset("HEAD~2", index=False) - res = fetch_and_test(remote) - mkey = "%s/%s"%(remote,'master') - master_info = res[mkey] - assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None - - # normal fast forward - set head back to previous one - rhead.commit = remote_commit - res = fetch_and_test(remote) - assert res[mkey].flags & FetchInfo.FAST_FORWARD - - # new remote branch - new_remote_branch = Head.create(remote_repo, "new_branch") - res = fetch_and_test(remote) - new_branch_info = get_info(res, remote, new_remote_branch) - assert new_branch_info.flags & FetchInfo.NEW_HEAD - - # remote branch rename ( causes creation of a new one locally ) - new_remote_branch.rename("other_branch_name") - res = fetch_and_test(remote) - other_branch_info = get_info(res, remote, new_remote_branch) - assert other_branch_info.ref.commit == new_branch_info.ref.commit - - # remove new branch - Head.delete(new_remote_branch.repo, new_remote_branch) - res = fetch_and_test(remote) - # deleted remote will not be fetched - self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch) - - # prune stale tracking branches - stale_refs = remote.stale_refs - assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference) - RemoteReference.delete(rw_repo, *stale_refs) - - # test single branch fetch with refspec including target remote - res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote) - assert len(res) == 1 and get_info(res, remote, 'master') - - # ... with respec and no target - res = fetch_and_test(remote, refspec='master') - assert len(res) == 1 - - # add new tag reference - rtag = TagReference.create(remote_repo, "1.0-RV_hello.there") - res = fetch_and_test(remote, tags=True) - tinfo = res[str(rtag)] - assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit - assert tinfo.flags & tinfo.NEW_TAG - - # adjust tag commit - Reference.set_object(rtag, rhead.commit.parents[0].parents[0]) - res = fetch_and_test(remote, tags=True) - tinfo = res[str(rtag)] - assert tinfo.commit == rtag.commit - assert tinfo.flags & tinfo.TAG_UPDATE - - # delete remote tag - local one will stay - TagReference.delete(remote_repo, rtag) - res = fetch_and_test(remote, tags=True) - self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag)) - - # provoke to receive actual objects to see what kind of output we have to - # expect. For that we need a remote transport protocol - # Create a new UN-shared repo and fetch into it after we pushed a change - # to the shared repo - other_repo_dir = tempfile.mktemp("other_repo") - # must clone with a local path for the repo implementation not to freak out - # as it wants local paths only ( which I can understand ) - other_repo = remote_repo.clone(other_repo_dir, shared=False) - remote_repo_url = "git://localhost%s"%remote_repo.git_dir - - # put origin to git-url - other_origin = other_repo.remotes.origin - other_origin.config_writer.set("url", remote_repo_url) - # it automatically creates alternates as remote_repo is shared as well. - # It will use the transport though and ignore alternates when fetching - # assert not other_repo.alternates # this would fail - - # assure we are in the right state - rw_repo.head.reset(remote.refs.master, working_tree=True) - try: - self._commit_random_file(rw_repo) - remote.push(rw_repo.head.reference) - - # here I would expect to see remote-information about packing - # objects and so on. Unfortunately, this does not happen - # if we are redirecting the output - git explicitly checks for this - # and only provides progress information to ttys - res = fetch_and_test(other_origin) - finally: - shutil.rmtree(other_repo_dir) - # END test and cleanup - - def _test_push_and_pull(self,remote, rw_repo, remote_repo): - # push our changes - lhead = rw_repo.head - lindex = rw_repo.index - # assure we are on master and it is checked out where the remote is - try: - lhead.reference = rw_repo.heads.master - except AttributeError: - # if the author is on a non-master branch, the clones might not have - # a local master yet. We simply create it - lhead.reference = rw_repo.create_head('master') - # END master handling - lhead.reset(remote.refs.master, working_tree=True) - - # push without spec should fail ( without further configuration ) - # well, works nicely - # self.failUnlessRaises(GitCommandError, remote.push) - - # simple file push - self._commit_random_file(rw_repo) - progress = TestRemoteProgress() - res = remote.push(lhead.reference, progress) - assert isinstance(res, IterableList) - self._do_test_push_result(res, remote) - progress.make_assertion() - - # rejected - undo last commit - lhead.reset("HEAD~1") - res = remote.push(lhead.reference) - assert res[0].flags & PushInfo.ERROR - assert res[0].flags & PushInfo.REJECTED - self._do_test_push_result(res, remote) - - # force rejected pull - res = remote.push('+%s' % lhead.reference) - assert res[0].flags & PushInfo.ERROR == 0 - assert res[0].flags & PushInfo.FORCED_UPDATE - self._do_test_push_result(res, remote) - - # invalid refspec - res = remote.push("hellothere") - assert len(res) == 0 - - # push new tags - progress = TestRemoteProgress() - to_be_updated = "my_tag.1.0RV" - new_tag = TagReference.create(rw_repo, to_be_updated) - other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message") - res = remote.push(progress=progress, tags=True) - assert res[-1].flags & PushInfo.NEW_TAG - progress.make_assertion() - self._do_test_push_result(res, remote) - - # update push new tags - # Rejection is default - new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True) - res = remote.push(tags=True) - self._do_test_push_result(res, remote) - assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR - - # push force this tag - res = remote.push("+%s" % new_tag.path) - assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE - - # delete tag - have to do it using refspec - res = remote.push(":%s" % new_tag.path) - self._do_test_push_result(res, remote) - assert res[0].flags & PushInfo.DELETED - # Currently progress is not properly transferred, especially not using - # the git daemon - # progress.assert_received_message() - - # push new branch - new_head = Head.create(rw_repo, "my_new_branch") - progress = TestRemoteProgress() - res = remote.push(new_head, progress) - assert res[0].flags & PushInfo.NEW_HEAD - progress.make_assertion() - self._do_test_push_result(res, remote) - - # delete new branch on the remote end and locally - res = remote.push(":%s" % new_head.path) - self._do_test_push_result(res, remote) - Head.delete(rw_repo, new_head) - assert res[-1].flags & PushInfo.DELETED - - # --all - res = remote.push(all=True) - self._do_test_push_result(res, remote) - - remote.pull('master') - - # cleanup - delete created tags and branches as we are in an innerloop on - # the same repository - TagReference.delete(rw_repo, new_tag, other_tag) - remote.push(":%s" % other_tag.path) - - @with_rw_and_rw_remote_repo('0.1.6') - def test_base(self, rw_repo, remote_repo): - num_remotes = 0 - remote_set = set() - ran_fetch_test = False - - for remote in rw_repo.remotes: - num_remotes += 1 - assert remote == remote - assert str(remote) != repr(remote) - remote_set.add(remote) - remote_set.add(remote) # should already exist - - # REFS - refs = remote.refs - assert refs - for ref in refs: - assert ref.remote_name == remote.name - assert ref.remote_head - # END for each ref - - # OPTIONS - # cannot use 'fetch' key anymore as it is now a method - for opt in ("url", ): - val = getattr(remote, opt) - reader = remote.config_reader - assert reader.get(opt) == val - assert reader.get_value(opt, None) == val - - # unable to write with a reader - self.failUnlessRaises(IOError, reader.set, opt, "test") - - # change value - writer = remote.config_writer - new_val = "myval" - writer.set(opt, new_val) - assert writer.get(opt) == new_val - writer.set(opt, val) - assert writer.get(opt) == val - del(writer) - assert getattr(remote, opt) == val - # END for each default option key - - # RENAME - other_name = "totally_other_name" - prev_name = remote.name - assert remote.rename(other_name) == remote - assert prev_name != remote.name - # multiple times - for time in range(2): - assert remote.rename(prev_name).name == prev_name - # END for each rename ( back to prev_name ) - - # PUSH/PULL TESTING - self._test_push_and_pull(remote, rw_repo, remote_repo) - - # FETCH TESTING - # Only for remotes - local cases are the same or less complicated - # as additional progress information will never be emitted - if remote.name == "daemon_origin": - self._do_test_fetch(remote, rw_repo, remote_repo) - ran_fetch_test = True - # END fetch test - - remote.update() - # END for each remote - - assert ran_fetch_test - assert num_remotes - assert num_remotes == len(remote_set) - - origin = rw_repo.remote('origin') - assert origin == rw_repo.remotes.origin - - @with_rw_repo('HEAD', bare=True) - def test_creation_and_removal(self, bare_rw_repo): - new_name = "test_new_one" - arg_list = (new_name, "git@server:hello.git") - remote = Remote.create(bare_rw_repo, *arg_list ) - assert remote.name == "test_new_one" - assert remote in bare_rw_repo.remotes - - # create same one again - self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list) - - Remote.remove(bare_rw_repo, new_name) - - for remote in bare_rw_repo.remotes: - if remote.name == new_name: - raise AssertionError("Remote removal failed") - # END if deleted remote matches existing remote's name - # END for each remote - - - + + def _print_fetchhead(self, repo): + fp = open(os.path.join(repo.git_dir, "FETCH_HEAD")) + fp.close() + + + def _do_test_fetch_result(self, results, remote): + # self._print_fetchhead(remote.repo) + assert len(results) > 0 and isinstance(results[0], FetchInfo) + for info in results: + assert isinstance(info.note, basestring) + if isinstance(info.ref, Reference): + assert info.flags != 0 + # END reference type flags handling + assert isinstance(info.ref, (SymbolicReference, Reference)) + if info.flags & (info.FORCED_UPDATE|info.FAST_FORWARD): + assert isinstance(info.old_commit_binsha, str) and len(info.old_commit_binsha) == 20 + else: + assert info.old_commit_binsha is None + # END forced update checking + # END for each info + + def _do_test_push_result(self, results, remote): + assert len(results) > 0 and isinstance(results[0], PushInfo) + for info in results: + assert info.flags + assert isinstance(info.summary, basestring) + if info.old_commit_binsha is not None: + assert isinstance(info.old_commit_binsha, str) and len(info.old_commit_binsha) == 20 + if info.flags & info.ERROR: + has_one = False + for bitflag in (info.REJECTED, info.REMOTE_REJECTED, info.REMOTE_FAILURE): + has_one |= bool(info.flags & bitflag) + # END for each bitflag + assert has_one + else: + # there must be a remote commit + if info.flags & info.DELETED == 0: + assert isinstance(info.local_ref, Reference) + else: + assert info.local_ref is None + assert type(info.remote_ref) in (TagReference, RemoteReference) + # END error checking + # END for each info + + def _commit_random_file(self, repo): + #Create a file with a random name and random data and commit it to repo. + # Return the commited absolute file path + index = repo.index + new_file = self._make_file(os.path.basename(tempfile.mktemp()),str(random.random()), repo) + index.add([new_file]) + index.commit("Committing %s" % new_file) + return new_file + + def _do_test_fetch(self,remote, rw_repo, remote_repo): + def fetch_and_test(remote, **kwargs): + progress = TestRemoteProgress() + kwargs['progress'] = progress + res = remote.fetch(**kwargs) + progress.make_assertion() + self._do_test_fetch_result(res, remote) + return res + # END fetch and check + + def get_info(res, remote, name): + return res["%s/%s"%(remote,name)] + + # put remote head to master as it is garantueed to exist + remote_repo.head.reference = remote_repo.heads.master + + res = fetch_and_test(remote) + # all uptodate + for info in res: + assert info.flags & info.HEAD_UPTODATE + + # rewind remote head to trigger rejection + # index must be false as remote is a bare repo + rhead = remote_repo.head + remote_commit = rhead.commit + rhead.reset("HEAD~2", index=False) + res = fetch_and_test(remote) + mkey = "%s/%s"%(remote,'master') + master_info = res[mkey] + assert master_info.flags & FetchInfo.FORCED_UPDATE and master_info.note is not None + + # normal fast forward - set head back to previous one + rhead.commit = remote_commit + res = fetch_and_test(remote) + assert res[mkey].flags & FetchInfo.FAST_FORWARD + + # new remote branch + new_remote_branch = Head.create(remote_repo, "new_branch") + res = fetch_and_test(remote) + new_branch_info = get_info(res, remote, new_remote_branch) + assert new_branch_info.flags & FetchInfo.NEW_HEAD + + # remote branch rename ( causes creation of a new one locally ) + new_remote_branch.rename("other_branch_name") + res = fetch_and_test(remote) + other_branch_info = get_info(res, remote, new_remote_branch) + assert other_branch_info.ref.commit == new_branch_info.ref.commit + + # remove new branch + Head.delete(new_remote_branch.repo, new_remote_branch) + res = fetch_and_test(remote) + # deleted remote will not be fetched + self.failUnlessRaises(IndexError, get_info, res, remote, new_remote_branch) + + # prune stale tracking branches + stale_refs = remote.stale_refs + assert len(stale_refs) == 2 and isinstance(stale_refs[0], RemoteReference) + RemoteReference.delete(rw_repo, *stale_refs) + + # test single branch fetch with refspec including target remote + res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master"%remote) + assert len(res) == 1 and get_info(res, remote, 'master') + + # ... with respec and no target + res = fetch_and_test(remote, refspec='master') + assert len(res) == 1 + + # add new tag reference + rtag = TagReference.create(remote_repo, "1.0-RV_hello.there") + res = fetch_and_test(remote, tags=True) + tinfo = res[str(rtag)] + assert isinstance(tinfo.ref, TagReference) and tinfo.ref.commit == rtag.commit + assert tinfo.flags & tinfo.NEW_TAG + + # adjust tag commit + Reference.set_object(rtag, rhead.commit.parents[0].parents[0]) + res = fetch_and_test(remote, tags=True) + tinfo = res[str(rtag)] + assert tinfo.commit == rtag.commit + assert tinfo.flags & tinfo.TAG_UPDATE + + # delete remote tag - local one will stay + TagReference.delete(remote_repo, rtag) + res = fetch_and_test(remote, tags=True) + self.failUnlessRaises(IndexError, get_info, res, remote, str(rtag)) + + # provoke to receive actual objects to see what kind of output we have to + # expect. For that we need a remote transport protocol + # Create a new UN-shared repo and fetch into it after we pushed a change + # to the shared repo + other_repo_dir = tempfile.mktemp("other_repo") + # must clone with a local path for the repo implementation not to freak out + # as it wants local paths only ( which I can understand ) + other_repo = remote_repo.clone(other_repo_dir, shared=False) + remote_repo_url = "git://localhost%s"%remote_repo.git_dir + + # put origin to git-url + other_origin = other_repo.remotes.origin + other_origin.config_writer.set("url", remote_repo_url) + # it automatically creates alternates as remote_repo is shared as well. + # It will use the transport though and ignore alternates when fetching + # assert not other_repo.alternates # this would fail + + # assure we are in the right state + rw_repo.head.reset(remote.refs.master, working_tree=True) + try: + self._commit_random_file(rw_repo) + remote.push(rw_repo.head.reference) + + # here I would expect to see remote-information about packing + # objects and so on. Unfortunately, this does not happen + # if we are redirecting the output - git explicitly checks for this + # and only provides progress information to ttys + res = fetch_and_test(other_origin) + finally: + shutil.rmtree(other_repo_dir) + # END test and cleanup + + def _test_push_and_pull(self,remote, rw_repo, remote_repo): + # push our changes + lhead = rw_repo.head + lindex = rw_repo.index + # assure we are on master and it is checked out where the remote is + try: + lhead.reference = rw_repo.heads.master + except AttributeError: + # if the author is on a non-master branch, the clones might not have + # a local master yet. We simply create it + lhead.reference = rw_repo.create_head('master') + # END master handling + lhead.reset(remote.refs.master, working_tree=True) + + # push without spec should fail ( without further configuration ) + # well, works nicely + # self.failUnlessRaises(GitCommandError, remote.push) + + # simple file push + self._commit_random_file(rw_repo) + progress = TestRemoteProgress() + res = remote.push(lhead.reference, progress) + assert isinstance(res, IterableList) + self._do_test_push_result(res, remote) + progress.make_assertion() + + # rejected - undo last commit + lhead.reset("HEAD~1") + res = remote.push(lhead.reference) + assert res[0].flags & PushInfo.ERROR + assert res[0].flags & PushInfo.REJECTED + self._do_test_push_result(res, remote) + + # force rejected pull + res = remote.push('+%s' % lhead.reference) + assert res[0].flags & PushInfo.ERROR == 0 + assert res[0].flags & PushInfo.FORCED_UPDATE + self._do_test_push_result(res, remote) + + # invalid refspec + res = remote.push("hellothere") + assert len(res) == 0 + + # push new tags + progress = TestRemoteProgress() + to_be_updated = "my_tag.1.0RV" + new_tag = TagReference.create(rw_repo, to_be_updated) + other_tag = TagReference.create(rw_repo, "my_obj_tag.2.1aRV", message="my message") + res = remote.push(progress=progress, tags=True) + assert res[-1].flags & PushInfo.NEW_TAG + progress.make_assertion() + self._do_test_push_result(res, remote) + + # update push new tags + # Rejection is default + new_tag = TagReference.create(rw_repo, to_be_updated, ref='HEAD~1', force=True) + res = remote.push(tags=True) + self._do_test_push_result(res, remote) + assert res[-1].flags & PushInfo.REJECTED and res[-1].flags & PushInfo.ERROR + + # push force this tag + res = remote.push("+%s" % new_tag.path) + assert res[-1].flags & PushInfo.ERROR == 0 and res[-1].flags & PushInfo.FORCED_UPDATE + + # delete tag - have to do it using refspec + res = remote.push(":%s" % new_tag.path) + self._do_test_push_result(res, remote) + assert res[0].flags & PushInfo.DELETED + # Currently progress is not properly transferred, especially not using + # the git daemon + # progress.assert_received_message() + + # push new branch + new_head = Head.create(rw_repo, "my_new_branch") + progress = TestRemoteProgress() + res = remote.push(new_head, progress) + assert res[0].flags & PushInfo.NEW_HEAD + progress.make_assertion() + self._do_test_push_result(res, remote) + + # delete new branch on the remote end and locally + res = remote.push(":%s" % new_head.path) + self._do_test_push_result(res, remote) + Head.delete(rw_repo, new_head) + assert res[-1].flags & PushInfo.DELETED + + # --all + res = remote.push(all=True) + self._do_test_push_result(res, remote) + + remote.pull('master') + + # cleanup - delete created tags and branches as we are in an innerloop on + # the same repository + TagReference.delete(rw_repo, new_tag, other_tag) + remote.push(":%s" % other_tag.path) + + @with_rw_and_rw_remote_repo('0.1.6') + def test_base(self, rw_repo, remote_repo): + num_remotes = 0 + remote_set = set() + ran_fetch_test = False + + for remote in rw_repo.remotes: + num_remotes += 1 + assert remote == remote + assert str(remote) != repr(remote) + remote_set.add(remote) + remote_set.add(remote) # should already exist + + # REFS + refs = remote.refs + assert refs + for ref in refs: + assert ref.remote_name == remote.name + assert ref.remote_head + # END for each ref + + # OPTIONS + # cannot use 'fetch' key anymore as it is now a method + for opt in ("url", ): + val = getattr(remote, opt) + reader = remote.config_reader + assert reader.get(opt) == val + assert reader.get_value(opt, None) == val + + # unable to write with a reader + self.failUnlessRaises(IOError, reader.set, opt, "test") + + # change value + writer = remote.config_writer + new_val = "myval" + writer.set(opt, new_val) + assert writer.get(opt) == new_val + writer.set(opt, val) + assert writer.get(opt) == val + del(writer) + assert getattr(remote, opt) == val + # END for each default option key + + # RENAME + other_name = "totally_other_name" + prev_name = remote.name + assert remote.rename(other_name) == remote + assert prev_name != remote.name + # multiple times + for time in range(2): + assert remote.rename(prev_name).name == prev_name + # END for each rename ( back to prev_name ) + + # PUSH/PULL TESTING + self._test_push_and_pull(remote, rw_repo, remote_repo) + + # FETCH TESTING + # Only for remotes - local cases are the same or less complicated + # as additional progress information will never be emitted + if remote.name == "daemon_origin": + self._do_test_fetch(remote, rw_repo, remote_repo) + ran_fetch_test = True + # END fetch test + + remote.update() + # END for each remote + + assert ran_fetch_test + assert num_remotes + assert num_remotes == len(remote_set) + + origin = rw_repo.remote('origin') + assert origin == rw_repo.remotes.origin + + @with_rw_repo('HEAD', bare=True) + def test_creation_and_removal(self, bare_rw_repo): + new_name = "test_new_one" + arg_list = (new_name, "git@server:hello.git") + remote = Remote.create(bare_rw_repo, *arg_list ) + assert remote.name == "test_new_one" + assert remote in bare_rw_repo.remotes + + # create same one again + self.failUnlessRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list) + + Remote.remove(bare_rw_repo, new_name) + + for remote in bare_rw_repo.remotes: + if remote.name == new_name: + raise AssertionError("Remote removal failed") + # END if deleted remote matches existing remote's name + # END for each remote + + + diff --git a/git/test/test_stats.py b/git/test/test_stats.py index 2bdb0a89..27be6a77 100644 --- a/git/test/test_stats.py +++ b/git/test/test_stats.py @@ -4,8 +4,12 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.test.lib import * -from git import * +from git.test.lib import ( + TestBase, + fixture, + assert_equal + ) +from git.util import Stats class TestStats(TestBase): diff --git a/git/test/test_stream.py b/git/test/test_stream.py new file mode 100644 index 00000000..8d7a5f9a --- /dev/null +++ b/git/test/test_stream.py @@ -0,0 +1,155 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Test for object db""" +from lib import ( + TestBase, + DummyStream, + Sha1Writer, + make_bytes, + make_object, + fixture_path + ) + +from git.stream import * +from git.util import ( + NULL_HEX_SHA, + hex_to_bin + ) + +from git.util import zlib +from git.typ import ( + str_blob_type + ) + +from git.db.py.loose import PureLooseObjectODB +import time +import tempfile +import os + + + + +class TestStream(TestBase): + """Test stream classes""" + + data_sizes = (15, 10000, 1000*1024+512) + + def _assert_stream_reader(self, stream, cdata, rewind_stream=lambda s: None): + """Make stream tests - the orig_stream is seekable, allowing it to be + rewound and reused + :param cdata: the data we expect to read from stream, the contents + :param rewind_stream: function called to rewind the stream to make it ready + for reuse""" + ns = 10 + assert len(cdata) > ns-1, "Data must be larger than %i, was %i" % (ns, len(cdata)) + + # read in small steps + ss = len(cdata) / ns + for i in range(ns): + data = stream.read(ss) + chunk = cdata[i*ss:(i+1)*ss] + assert data == chunk + # END for each step + rest = stream.read() + if rest: + assert rest == cdata[-len(rest):] + # END handle rest + + if isinstance(stream, DecompressMemMapReader): + assert len(stream.data()) == stream.compressed_bytes_read() + # END handle special type + + rewind_stream(stream) + + # read everything + rdata = stream.read() + assert rdata == cdata + + if isinstance(stream, DecompressMemMapReader): + assert len(stream.data()) == stream.compressed_bytes_read() + # END handle special type + + def test_decompress_reader(self): + for close_on_deletion in range(2): + for with_size in range(2): + for ds in self.data_sizes: + cdata = make_bytes(ds, randomize=False) + + # zdata = zipped actual data + # cdata = original content data + + # create reader + if with_size: + # need object data + zdata = zlib.compress(make_object(str_blob_type, cdata)) + type, size, reader = DecompressMemMapReader.new(zdata, close_on_deletion) + assert size == len(cdata) + assert type == str_blob_type + + # even if we don't set the size, it will be set automatically on first read + test_reader = DecompressMemMapReader(zdata, close_on_deletion=False) + assert test_reader._s == len(cdata) + else: + # here we need content data + zdata = zlib.compress(cdata) + reader = DecompressMemMapReader(zdata, close_on_deletion, len(cdata)) + assert reader._s == len(cdata) + # END get reader + + self._assert_stream_reader(reader, cdata, lambda r: r.seek(0)) + + # put in a dummy stream for closing + dummy = DummyStream() + reader._m = dummy + + assert not dummy.closed + del(reader) + assert dummy.closed == close_on_deletion + # END for each datasize + # END whether size should be used + # END whether stream should be closed when deleted + + def test_sha_writer(self): + writer = Sha1Writer() + assert 2 == writer.write("hi") + assert len(writer.sha(as_hex=1)) == 40 + assert len(writer.sha(as_hex=0)) == 20 + + # make sure it does something ;) + prev_sha = writer.sha() + writer.write("hi again") + assert writer.sha() != prev_sha + + def test_compressed_writer(self): + for ds in self.data_sizes: + fd, path = tempfile.mkstemp() + ostream = FDCompressedSha1Writer(fd) + data = make_bytes(ds, randomize=False) + + # for now, just a single write, code doesn't care about chunking + assert len(data) == ostream.write(data) + ostream.close() + + # its closed already + self.failUnlessRaises(OSError, os.close, fd) + + # read everything back, compare to data we zip + fd = os.open(path, os.O_RDONLY|getattr(os, 'O_BINARY', 0)) + written_data = os.read(fd, os.path.getsize(path)) + assert len(written_data) == os.path.getsize(path) + os.close(fd) + assert written_data == zlib.compress(data, 1) # best speed + + os.remove(path) + # END for each os + + def test_decompress_reader_special_case(self): + odb = PureLooseObjectODB(fixture_path('objects')) + ostream = odb.stream(hex_to_bin('7bb839852ed5e3a069966281bb08d50012fb309b')) + + # if there is a bug, we will be missing one byte exactly ! + data = ostream.read() + assert len(data) == ostream.size + diff --git a/git/test/test_util.py b/git/test/test_util.py index e55a6d15..f737e660 100644 --- a/git/test/test_util.py +++ b/git/test/test_util.py @@ -7,7 +7,7 @@ import os import tempfile -from git.test.lib import * +from lib import TestBase from git.util import * from git.objects.util import * from git import * @@ -15,6 +15,14 @@ from git.cmd import dashify import time +from git.util import ( + to_hex_sha, + to_bin_sha, + NULL_HEX_SHA, + LockedFD, + Actor + ) + class TestUtils(TestBase): def setup(self): @@ -25,8 +33,8 @@ class TestUtils(TestBase): } def test_it_should_dashify(self): - assert_equal('this-is-my-argument', dashify('this_is_my_argument')) - assert_equal('foo', dashify('foo')) + assert 'this-is-my-argument' == dashify('this_is_my_argument') + assert 'foo' == dashify('foo') def test_lock_file(self): @@ -107,3 +115,118 @@ class TestUtils(TestBase): assert isinstance(Actor.committer(cr), Actor) assert isinstance(Actor.author(cr), Actor) #END assure config reader is handled + + def test_basics(self): + assert to_hex_sha(NULL_HEX_SHA) == NULL_HEX_SHA + assert len(to_bin_sha(NULL_HEX_SHA)) == 20 + assert to_hex_sha(to_bin_sha(NULL_HEX_SHA)) == NULL_HEX_SHA + + def _cmp_contents(self, file_path, data): + # raise if data from file at file_path + # does not match data string + fp = open(file_path, "rb") + try: + assert fp.read() == data + finally: + fp.close() + + def test_lockedfd(self): + my_file = tempfile.mktemp() + orig_data = "hello" + new_data = "world" + my_file_fp = open(my_file, "wb") + my_file_fp.write(orig_data) + my_file_fp.close() + + try: + lfd = LockedFD(my_file) + lockfilepath = lfd._lockfilepath() + + # cannot end before it was started + self.failUnlessRaises(AssertionError, lfd.rollback) + self.failUnlessRaises(AssertionError, lfd.commit) + + # open for writing + assert not os.path.isfile(lockfilepath) + wfd = lfd.open(write=True) + assert lfd._fd is wfd + assert os.path.isfile(lockfilepath) + + # write data and fail + os.write(wfd, new_data) + lfd.rollback() + assert lfd._fd is None + self._cmp_contents(my_file, orig_data) + assert not os.path.isfile(lockfilepath) + + # additional call doesnt fail + lfd.commit() + lfd.rollback() + + # test reading + lfd = LockedFD(my_file) + rfd = lfd.open(write=False) + assert os.read(rfd, len(orig_data)) == orig_data + + assert os.path.isfile(lockfilepath) + # deletion rolls back + del(lfd) + assert not os.path.isfile(lockfilepath) + + + # write data - concurrently + lfd = LockedFD(my_file) + olfd = LockedFD(my_file) + assert not os.path.isfile(lockfilepath) + wfdstream = lfd.open(write=True, stream=True) # this time as stream + assert os.path.isfile(lockfilepath) + # another one fails + self.failUnlessRaises(IOError, olfd.open) + + wfdstream.write(new_data) + lfd.commit() + assert not os.path.isfile(lockfilepath) + self._cmp_contents(my_file, new_data) + + # could test automatic _end_writing on destruction + finally: + os.remove(my_file) + # END final cleanup + + # try non-existing file for reading + lfd = LockedFD(tempfile.mktemp()) + try: + lfd.open(write=False) + except OSError: + assert not os.path.exists(lfd._lockfilepath()) + else: + self.fail("expected OSError") + # END handle exceptions + + +class TestActor(TestBase): + def test_from_string_should_separate_name_and_email(self): + a = Actor._from_string("Michael Trier <mtrier@example.com>") + assert "Michael Trier" == a.name + assert "mtrier@example.com" == a.email + + # base type capabilities + assert a == a + assert not ( a != a ) + m = set() + m.add(a) + m.add(a) + assert len(m) == 1 + + def test_from_string_should_handle_just_name(self): + a = Actor._from_string("Michael Trier") + assert "Michael Trier" == a.name + assert None == a.email + + def test_should_display_representation(self): + a = Actor._from_string("Michael Trier <mtrier@example.com>") + assert '<git.Actor "Michael Trier <mtrier@example.com>">' == repr(a) + + def test_str_should_alias_name(self): + a = Actor._from_string("Michael Trier <mtrier@example.com>") + assert a.name == str(a) diff --git a/git/typ.py b/git/typ.py new file mode 100644 index 00000000..a2e719be --- /dev/null +++ b/git/typ.py @@ -0,0 +1,27 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module containing information about types known to the database""" + +#{ String types + +# For compatability only, use ObjectType instead +str_blob_type = "blob" +str_commit_type = "commit" +str_tree_type = "tree" +str_tag_type = "tag" + +class ObjectType(object): + """Enumeration providing object types as strings and ids""" + blob = str_blob_type + commit = str_commit_type + tree = str_tree_type + tag = str_tag_type + + commit_id = 1 + tree_id = 2 + blob_id = 3 + tag_id = 4 + +#} END string types diff --git a/git/util.py b/git/util.py index 7cbef07f..6009e158 100644 --- a/git/util.py +++ b/git/util.py @@ -4,28 +4,149 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +import platform +import binascii import os -import re +import mmap import sys +import errno +import re import time import tempfile -import platform - -from gitdb.util import ( - make_sha, - LockedFD, - file_contents_ro, - LazyMixin, - to_hex_sha, - to_bin_sha - ) __all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux", "join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList", "BlockingLockFile", "LockFile", 'Actor', 'get_user_id', 'assure_directory_exists', - 'RemoteProgress') + 'RepoAliasMixin', 'LockedFD', 'LazyMixin' ) + +from cStringIO import StringIO + +# in py 2.4, StringIO is only StringI, without write support. +# Hence we must use the python implementation for this +if sys.version_info[1] < 5: + from StringIO import StringIO +# END handle python 2.4 + +try: + import async.mod.zlib as zlib +except ImportError: + import zlib +# END try async zlib + +from async import ThreadPool + +try: + import hashlib +except ImportError: + import sha + +try: + from struct import unpack_from +except ImportError: + from struct import unpack, calcsize + __calcsize_cache = dict() + def unpack_from(fmt, data, offset=0): + try: + size = __calcsize_cache[fmt] + except KeyError: + size = calcsize(fmt) + __calcsize_cache[fmt] = size + # END exception handling + return unpack(fmt, data[offset : offset + size]) + # END own unpack_from implementation + + +#{ Globals + +# A pool distributing tasks, initially with zero threads, hence everything +# will be handled in the main thread +pool = ThreadPool(0) + +#} END globals + + +#{ Aliases + +hex_to_bin = binascii.a2b_hex +bin_to_hex = binascii.b2a_hex + +# errors +ENOENT = errno.ENOENT + +# os shortcuts +exists = os.path.exists +mkdir = os.mkdir +chmod = os.chmod +isdir = os.path.isdir +isfile = os.path.isfile +rename = os.rename +remove = os.remove +dirname = os.path.dirname +basename = os.path.basename +normpath = os.path.normpath +expandvars = os.path.expandvars +expanduser = os.path.expanduser +abspath = os.path.abspath +join = os.path.join +read = os.read +write = os.write +close = os.close +fsync = os.fsync + +# constants +NULL_HEX_SHA = "0"*40 +NULL_BIN_SHA = "\0"*20 + +#} END Aliases + +#{ compatibility stuff ... + +class _RandomAccessStringIO(object): + """Wrapper to provide required functionality in case memory maps cannot or may + not be used. This is only really required in python 2.4""" + __slots__ = '_sio' + + def __init__(self, buf=''): + self._sio = StringIO(buf) + + def __getattr__(self, attr): + return getattr(self._sio, attr) + + def __len__(self): + return len(self.getvalue()) + + def __getitem__(self, i): + return self.getvalue()[i] + + def __getslice__(self, start, end): + return self.getvalue()[start:end] + +#} END compatibility stuff ... + +#{ Routines + +def get_user_id(): + """:return: string identifying the currently active system user as name@node + :note: user can be set with the 'USER' environment variable, usually set on windows""" + ukn = 'UNKNOWN' + username = os.environ.get('USER', os.environ.get('USERNAME', ukn)) + if username == ukn and hasattr(os, 'getlogin'): + username = os.getlogin() + # END get username from login + return "%s@%s" % (username, platform.node()) + +def is_git_dir(d): + """ This is taken from the git setup.c:is_git_directory + function.""" + if isdir(d) and \ + isdir(join(d, 'objects')) and \ + isdir(join(d, 'refs')): + headref = join(d, 'HEAD') + return isfile(headref) or \ + (os.path.islink(headref) and + os.readlink(headref).startswith('refs')) + return False -#{ Utility Methods def stream_copy(source, destination, chunk_size=512*1024): """Copy all data from the source stream into the destination stream in chunks @@ -41,6 +162,87 @@ def stream_copy(source, destination, chunk_size=512*1024): break # END reading output stream return br + +def make_sha(source=''): + """A python2.4 workaround for the sha/hashlib module fiasco + :note: From the dulwich project """ + try: + return hashlib.sha1(source) + except NameError: + sha1 = sha.sha(source) + return sha1 + +def allocate_memory(size): + """:return: a file-protocol accessible memory block of the given size""" + if size == 0: + return _RandomAccessStringIO('') + # END handle empty chunks gracefully + + try: + return mmap.mmap(-1, size) # read-write by default + except EnvironmentError: + # setup real memory instead + # this of course may fail if the amount of memory is not available in + # one chunk - would only be the case in python 2.4, being more likely on + # 32 bit systems. + return _RandomAccessStringIO("\0"*size) + # END handle memory allocation + + +def file_contents_ro(fd, stream=False, allow_mmap=True): + """:return: read-only contents of the file represented by the file descriptor fd + :param fd: file descriptor opened for reading + :param stream: if False, random access is provided, otherwise the stream interface + is provided. + :param allow_mmap: if True, its allowed to map the contents into memory, which + allows large files to be handled and accessed efficiently. The file-descriptor + will change its position if this is False""" + try: + if allow_mmap: + # supports stream and random access + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + except EnvironmentError: + # python 2.4 issue, 0 wants to be the actual size + return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ) + # END handle python 2.4 + except OSError: + pass + # END exception handling + + # read manully + contents = os.read(fd, os.fstat(fd).st_size) + if stream: + return _RandomAccessStringIO(contents) + return contents + +def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): + """Get the file contents at filepath as fast as possible + :return: random access compatible memory of the given filepath + :param stream: see ``file_contents_ro`` + :param allow_mmap: see ``file_contents_ro`` + :param flags: additional flags to pass to os.open + :raise OSError: If the file could not be opened + :note: for now we don't try to use O_NOATIME directly as the right value needs to be + shared per database in fact. It only makes a real difference for loose object + databases anyway, and they use it with the help of the ``flags`` parameter""" + fd = os.open(filepath, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags) + try: + return file_contents_ro(fd, stream, allow_mmap) + finally: + close(fd) + # END assure file is closed + +def to_hex_sha(sha): + """:return: hexified version of sha""" + if len(sha) == 40: + return sha + return bin_to_hex(sha) + +def to_bin_sha(sha): + if len(sha) == 20: + return sha + return hex_to_bin(sha) def join_path(a, *p): """Join path tokens together similar to os.path.join, but always use @@ -61,6 +263,7 @@ def to_native_path_windows(path): def to_native_path_linux(path): return path.replace('\\','/') + if sys.platform.startswith('win'): to_native_path = to_native_path_windows else: @@ -75,7 +278,7 @@ def join_path_native(a, *p): needed to play it safe on my dear windows and to assure nice paths that only use '\'""" return to_native_path(join_path(a, *p)) - + def assure_directory_exists(path, is_file=False): """Assure that the directory pointed to by path exists. @@ -89,138 +292,287 @@ def assure_directory_exists(path, is_file=False): os.makedirs(path) return True return False - -def get_user_id(): - """:return: string identifying the currently active system user as name@node - :note: user can be set with the 'USER' environment variable, usually set on windows""" - ukn = 'UNKNOWN' - username = os.environ.get('USER', os.environ.get('USERNAME', ukn)) - if username == ukn and hasattr(os, 'getlogin'): - username = os.getlogin() - # END get username from login - return "%s@%s" % (username, platform.node()) -#} END utilities -#{ Classes +#} END routines + -class RemoteProgress(object): +#{ Utilities + +class LazyMixin(object): """ - Handler providing an interface to parse progress information emitted by git-push - and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. + Base class providing an interface to lazily retrieve attribute values upon + first access. If slots are used, memory will only be reserved once the attribute + is actually accessed and retrieved the first time. All future accesses will + return the cached value as stored in the Instance's dict or slot. """ - _num_op_codes = 5 - BEGIN, END, COUNTING, COMPRESSING, WRITING = [1 << x for x in range(_num_op_codes)] - STAGE_MASK = BEGIN|END - OP_MASK = ~STAGE_MASK - - __slots__ = ("_cur_line", "_seen_ops") - re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") - re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") - - def __init__(self): - self._seen_ops = list() - - def _parse_progress_line(self, line): - """Parse progress information from the given line as retrieved by git-push - or git-fetch - - :return: list(line, ...) list of lines that could not be processed""" - # handle - # Counting objects: 4, done. - # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. - self._cur_line = line - sub_lines = line.split('\r') - failed_lines = list() - for sline in sub_lines: - # find esacpe characters and cut them away - regex will not work with - # them as they are non-ascii. As git might expect a tty, it will send them - last_valid_index = None - for i,c in enumerate(reversed(sline)): - if ord(c) < 32: - # its a slice index - last_valid_index = -i-1 - # END character was non-ascii - # END for each character in sline - if last_valid_index is not None: - sline = sline[:last_valid_index] - # END cut away invalid part - sline = sline.rstrip() - - cur_count, max_count = None, None - match = self.re_op_relative.match(sline) - if match is None: - match = self.re_op_absolute.match(sline) - - if not match: - self.line_dropped(sline) - failed_lines.append(sline) - continue - # END could not get match - - op_code = 0 - remote, op_name, percent, cur_count, max_count, message = match.groups() - - # get operation id - if op_name == "Counting objects": - op_code |= self.COUNTING - elif op_name == "Compressing objects": - op_code |= self.COMPRESSING - elif op_name == "Writing objects": - op_code |= self.WRITING - else: - raise ValueError("Operation name %r unknown" % op_name) - - # figure out stage - if op_code not in self._seen_ops: - self._seen_ops.append(op_code) - op_code |= self.BEGIN - # END begin opcode - - if message is None: - message = '' - # END message handling - - message = message.strip() - done_token = ', done.' - if message.endswith(done_token): - op_code |= self.END - message = message[:-len(done_token)] - # END end message handling - - self.update(op_code, cur_count, max_count, message) - # END for each sub line - return failed_lines - def line_dropped(self, line): - """Called whenever a line could not be understood and was therefore dropped.""" + __slots__ = tuple() + + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) + + def _set_cache_(self, attr): + """ + This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" pass + + +class LockedFD(object): + """ + This class facilitates a safe read and write operation to a file on disk. + If we write to 'file', we obtain a lock file at 'file.lock' and write to + that instead. If we succeed, the lock file will be renamed to overwrite + the original file. + + When reading, we obtain a lock file, but to prevent other writers from + succeeding while we are reading the file. + + This type handles error correctly in that it will assure a consistent state + on destruction. - def update(self, op_code, cur_count, max_count=None, message=''): - """Called whenever the progress changes + :note: with this setup, parallel reading is not possible""" + __slots__ = ("_filepath", '_fd', '_write') + + def __init__(self, filepath): + """Initialize an instance with the givne filepath""" + self._filepath = filepath + self._fd = None + self._write = None # if True, we write a file + + def __del__(self): + # will do nothing if the file descriptor is already closed + if self._fd is not None: + self.rollback() + + def _lockfilepath(self): + return "%s.lock" % self._filepath + + def open(self, write=False, stream=False): + """ + Open the file descriptor for reading or writing, both in binary mode. + + :param write: if True, the file descriptor will be opened for writing. Other + wise it will be opened read-only. + :param stream: if True, the file descriptor will be wrapped into a simple stream + object which supports only reading or writing + :return: fd to read from or write to. It is still maintained by this instance + and must not be closed directly + :raise IOError: if the lock could not be retrieved + :raise OSError: If the actual file could not be opened for reading + :note: must only be called once""" + if self._write is not None: + raise AssertionError("Called %s multiple times" % self.open) + + self._write = write + + # try to open the lock file + binary = getattr(os, 'O_BINARY', 0) + lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary + try: + fd = os.open(self._lockfilepath(), lockmode, 0600) + if not write: + os.close(fd) + else: + self._fd = fd + # END handle file descriptor + except OSError: + raise IOError("Lock at %r could not be obtained" % self._lockfilepath()) + # END handle lock retrieval + + # open actual file if required + if self._fd is None: + # we could specify exlusive here, as we obtained the lock anyway + try: + self._fd = os.open(self._filepath, os.O_RDONLY | binary) + except: + # assure we release our lockfile + os.remove(self._lockfilepath()) + raise + # END handle lockfile + # END open descriptor for reading + + if stream: + # need delayed import + from stream import FDStream + return FDStream(self._fd) + else: + return self._fd + # END handle stream + + def commit(self): + """When done writing, call this function to commit your changes into the + actual file. + The file descriptor will be closed, and the lockfile handled. + :note: can be called multiple times""" + self._end_writing(successful=True) + + def rollback(self): + """Abort your operation without any changes. The file descriptor will be + closed, and the lock released. + :note: can be called multiple times""" + self._end_writing(successful=False) + + def _end_writing(self, successful=True): + """Handle the lock according to the write mode """ + if self._write is None: + raise AssertionError("Cannot end operation if it wasn't started yet") + + if self._fd is None: + return + + os.close(self._fd) + self._fd = None - :param op_code: - Integer allowing to be compared against Operation IDs and stage IDs. + lockfile = self._lockfilepath() + if self._write and successful: + # on windows, rename does not silently overwrite the existing one + if sys.platform == "win32": + if isfile(self._filepath): + os.remove(self._filepath) + # END remove if exists + # END win32 special handling + os.rename(lockfile, self._filepath) - Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation - ID as well as END. It may be that BEGIN and END are set at once in case only - one progress message was emitted due to the speed of the operation. - Between BEGIN and END, none of these flags will be set + # assure others can at least read the file - the tmpfile left it at rw-- + # We may also write that file, on windows that boils down to a remove- + # protection as well + chmod(self._filepath, 0644) + else: + # just delete the file so far, we failed + os.remove(lockfile) + # END successful handling + + +class LockFile(object): + """Provides methods to obtain, check for, and release a file based lock which + should be used to handle concurrent access to the same file. + + As we are a utility class to be derived from, we only use protected methods. + + Locks will automatically be released on destruction""" + __slots__ = ("_file_path", "_owns_lock") + + def __init__(self, file_path): + self._file_path = file_path + self._owns_lock = False + + def __del__(self): + self._release_lock() + + def _lock_file_path(self): + """:return: Path to lockfile""" + return "%s.lock" % (self._file_path) + + def _has_lock(self): + """:return: True if we have a lock and if the lockfile still exists + :raise AssertionError: if our lock-file does not exist""" + if not self._owns_lock: + return False + + return True + + def _obtain_lock_or_raise(self): + """Create a lock file as flag for other instances, mark our instance as lock-holder + + :raise IOError: if a lock was already present or a lock file could not be written""" + if self._has_lock(): + return + lock_file = self._lock_file_path() + if os.path.isfile(lock_file): + raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) - Operation IDs are all held within the OP_MASK. Only one Operation ID will - be active per call. - :param cur_count: Current absolute count of items + try: + fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) + os.close(fd) + except OSError,e: + raise IOError(str(e)) + + self._owns_lock = True + + def _obtain_lock(self): + """The default implementation will raise if a lock cannot be obtained. + Subclasses may override this method to provide a different implementation""" + return self._obtain_lock_or_raise() + + def _release_lock(self): + """Release our lock if we have one""" + if not self._has_lock(): + return - :param max_count: - The maximum count of items we expect. It may be None in case there is - no maximum number of items or if it is (yet) unknown. + # if someone removed our file beforhand, lets just flag this issue + # instead of failing, to make it more usable. + lfp = self._lock_file_path() + try: + # on bloody windows, the file needs write permissions to be removable. + # Why ... + if os.name == 'nt': + os.chmod(lfp, 0777) + # END handle win32 + os.remove(lfp) + except OSError: + pass + self._owns_lock = False + + +class BlockingLockFile(LockFile): + """The lock file will block until a lock could be obtained, or fail after + a specified timeout. + + :note: If the directory containing the lock was removed, an exception will + be raised during the blocking period, preventing hangs as the lock + can never be obtained.""" + __slots__ = ("_check_interval", "_max_block_time") + def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): + """Configure the instance - :param message: - In case of the 'WRITING' operation, it contains the amount of bytes - transferred. It may possibly be used for other purposes as well. + :parm check_interval_s: + Period of time to sleep until the lock is checked the next time. + By default, it waits a nearly unlimited time - You may read the contents of the current line in self._cur_line""" - pass + :parm max_block_time_s: Maximum amount of seconds we may lock""" + super(BlockingLockFile, self).__init__(file_path) + self._check_interval = check_interval_s + self._max_block_time = max_block_time_s + + def _obtain_lock(self): + """This method blocks until it obtained the lock, or raises IOError if + it ran out of time or if the parent directory was not available anymore. + If this method returns, you are guranteed to own the lock""" + starttime = time.time() + maxtime = starttime + float(self._max_block_time) + while True: + try: + super(BlockingLockFile, self)._obtain_lock() + except IOError: + # synity check: if the directory leading to the lockfile is not + # readable anymore, raise an execption + curtime = time.time() + if not os.path.isdir(os.path.dirname(self._lock_file_path())): + msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) + raise IOError(msg) + # END handle missing directory + + if curtime >= maxtime: + msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) + raise IOError(msg) + # END abort if we wait too long + time.sleep(self._check_interval) + else: + break + # END endless loop class Actor(object): @@ -274,20 +626,20 @@ class Actor(object): m = cls.name_email_regex.search(string) if m: name, email = m.groups() - return Actor(name, email) + return cls(name, email) else: m = cls.name_only_regex.search(string) if m: - return Actor(m.group(1), None) + return cls(m.group(1), None) else: # assume best and use the whole string as name - return Actor(string, None) + return cls(string, None) # END special case name # END handle name/email matching @classmethod def _main_actor(cls, env_name, env_email, config_reader=None): - actor = Actor('', '') + actor = cls('', '') default_email = get_user_id() default_name = default_email.split('@')[0] @@ -324,6 +676,95 @@ class Actor(object): return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader) +class Iterable(object): + """Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository""" + __slots__ = tuple() + _id_attribute_ = "attribute that most suitably identifies your instance" + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + :note: Favor the iter_items method as it will + + :return:list(Item,...) list of item instances""" + out_list = IterableList( cls._id_attribute_ ) + out_list.extend(cls.iter_items(repo, *args, **kwargs)) + return out_list + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """For more information about the arguments, see list_items + :return: iterator yielding Items""" + raise NotImplementedError("To be implemented by Subclass") + + +class IterableList(list): + """ + List of iterable objects allowing to query an object by id or by named index:: + + heads = repo.heads + heads.master + heads['master'] + heads[0] + + It requires an id_attribute name to be set which will be queried from its + contained items to have a means for comparison. + + A prefix can be specified which is to be used in case the id returned by the + items always contains a prefix that does not matter to the user, so it + can be left out.""" + __slots__ = ('_id_attr', '_prefix') + + def __new__(cls, id_attr, prefix=''): + return super(IterableList,cls).__new__(cls) + + def __init__(self, id_attr, prefix=''): + self._id_attr = id_attr + self._prefix = prefix + if not isinstance(id_attr, basestring): + raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") + # END help debugging ! + + def __getattr__(self, attr): + attr = self._prefix + attr + for item in self: + if getattr(item, self._id_attr) == attr: + return item + # END for each item + return list.__getattribute__(self, attr) + + def __getitem__(self, index): + if isinstance(index, int): + return list.__getitem__(self,index) + + try: + return getattr(self, index) + except AttributeError: + raise IndexError( "No item found with id %r" % (self._prefix + index) ) + + + +#} END utilities + +#{ Classes + +class RepoAliasMixin(object): + """Simple utility providing a repo-property which resolves to the 'odb' attribute + of the actual type. This is for api compatability only, as the types previously + held repository instances, now they hold odb instances instead""" + __slots__ = tuple() + + @property + def repo(self): + return self.odb + + class Stats(object): """ Represents stat information as presented by git at the end of a merge. It is @@ -407,195 +848,4 @@ class IndexFileSHA1Writer(object): return self.f.tell() -class LockFile(object): - """Provides methods to obtain, check for, and release a file based lock which - should be used to handle concurrent access to the same file. - - As we are a utility class to be derived from, we only use protected methods. - - Locks will automatically be released on destruction""" - __slots__ = ("_file_path", "_owns_lock") - - def __init__(self, file_path): - self._file_path = file_path - self._owns_lock = False - - def __del__(self): - self._release_lock() - - def _lock_file_path(self): - """:return: Path to lockfile""" - return "%s.lock" % (self._file_path) - - def _has_lock(self): - """:return: True if we have a lock and if the lockfile still exists - :raise AssertionError: if our lock-file does not exist""" - if not self._owns_lock: - return False - - return True - - def _obtain_lock_or_raise(self): - """Create a lock file as flag for other instances, mark our instance as lock-holder - - :raise IOError: if a lock was already present or a lock file could not be written""" - if self._has_lock(): - return - lock_file = self._lock_file_path() - if os.path.isfile(lock_file): - raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) - - try: - fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) - os.close(fd) - except OSError,e: - raise IOError(str(e)) - - self._owns_lock = True - - def _obtain_lock(self): - """The default implementation will raise if a lock cannot be obtained. - Subclasses may override this method to provide a different implementation""" - return self._obtain_lock_or_raise() - - def _release_lock(self): - """Release our lock if we have one""" - if not self._has_lock(): - return - - # if someone removed our file beforhand, lets just flag this issue - # instead of failing, to make it more usable. - lfp = self._lock_file_path() - try: - # on bloody windows, the file needs write permissions to be removable. - # Why ... - if os.name == 'nt': - os.chmod(lfp, 0777) - # END handle win32 - os.remove(lfp) - except OSError: - pass - self._owns_lock = False - - -class BlockingLockFile(LockFile): - """The lock file will block until a lock could be obtained, or fail after - a specified timeout. - - :note: If the directory containing the lock was removed, an exception will - be raised during the blocking period, preventing hangs as the lock - can never be obtained.""" - __slots__ = ("_check_interval", "_max_block_time") - def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): - """Configure the instance - - :parm check_interval_s: - Period of time to sleep until the lock is checked the next time. - By default, it waits a nearly unlimited time - - :parm max_block_time_s: Maximum amount of seconds we may lock""" - super(BlockingLockFile, self).__init__(file_path) - self._check_interval = check_interval_s - self._max_block_time = max_block_time_s - - def _obtain_lock(self): - """This method blocks until it obtained the lock, or raises IOError if - it ran out of time or if the parent directory was not available anymore. - If this method returns, you are guranteed to own the lock""" - starttime = time.time() - maxtime = starttime + float(self._max_block_time) - while True: - try: - super(BlockingLockFile, self)._obtain_lock() - except IOError: - # synity check: if the directory leading to the lockfile is not - # readable anymore, raise an execption - curtime = time.time() - if not os.path.isdir(os.path.dirname(self._lock_file_path())): - msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) - raise IOError(msg) - # END handle missing directory - - if curtime >= maxtime: - msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) - raise IOError(msg) - # END abort if we wait too long - time.sleep(self._check_interval) - else: - break - # END endless loop - - -class IterableList(list): - """ - List of iterable objects allowing to query an object by id or by named index:: - - heads = repo.heads - heads.master - heads['master'] - heads[0] - - It requires an id_attribute name to be set which will be queried from its - contained items to have a means for comparison. - - A prefix can be specified which is to be used in case the id returned by the - items always contains a prefix that does not matter to the user, so it - can be left out.""" - __slots__ = ('_id_attr', '_prefix') - - def __new__(cls, id_attr, prefix=''): - return super(IterableList,cls).__new__(cls) - - def __init__(self, id_attr, prefix=''): - self._id_attr = id_attr - self._prefix = prefix - if not isinstance(id_attr, basestring): - raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") - # END help debugging ! - - def __getattr__(self, attr): - attr = self._prefix + attr - for item in self: - if getattr(item, self._id_attr) == attr: - return item - # END for each item - return list.__getattribute__(self, attr) - - def __getitem__(self, index): - if isinstance(index, int): - return list.__getitem__(self,index) - - try: - return getattr(self, index) - except AttributeError: - raise IndexError( "No item found with id %r" % (self._prefix + index) ) - - -class Iterable(object): - """Defines an interface for iterable items which is to assure a uniform - way to retrieve and iterate items within the git repository""" - __slots__ = tuple() - _id_attribute_ = "attribute that most suitably identifies your instance" - - @classmethod - def list_items(cls, repo, *args, **kwargs): - """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. - - :note: Favor the iter_items method as it will - - :return:list(Item,...) list of item instances""" - out_list = IterableList( cls._id_attribute_ ) - out_list.extend(cls.iter_items(repo, *args, **kwargs)) - return out_list - - - @classmethod - def iter_items(cls, repo, *args, **kwargs): - """For more information about the arguments, see list_items - :return: iterator yielding Items""" - raise NotImplementedError("To be implemented by Subclass") - #} END classes |
