From 4177eefd7bdaea96a529b00ba9cf751924ede202 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 May 2011 19:43:22 +0200 Subject: Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work --- git/db/__init__.py | 6 + git/db/cmd/__init__.py | 1 + git/db/cmd/git.py | 437 +++++++++++++++++++++++++++++++++++++++++++++ git/db/interface.py | 469 +++++++++++++++++++++++++++++++++++++++++++++++++ git/db/py/__init__.py | 13 ++ git/db/py/base.py | 351 ++++++++++++++++++++++++++++++++++++ git/db/py/git.py | 113 ++++++++++++ git/db/py/loose.py | 262 +++++++++++++++++++++++++++ git/db/py/mem.py | 113 ++++++++++++ git/db/py/pack.py | 212 ++++++++++++++++++++++ git/db/py/ref.py | 77 ++++++++ git/db/py/resolve.py | 297 +++++++++++++++++++++++++++++++ git/db/py/transport.py | 89 ++++++++++ 13 files changed, 2440 insertions(+) create mode 100644 git/db/__init__.py create mode 100644 git/db/cmd/__init__.py create mode 100644 git/db/cmd/git.py create mode 100644 git/db/interface.py create mode 100644 git/db/py/__init__.py create mode 100644 git/db/py/base.py create mode 100644 git/db/py/git.py create mode 100644 git/db/py/loose.py create mode 100644 git/db/py/mem.py create mode 100644 git/db/py/pack.py create mode 100644 git/db/py/ref.py create mode 100644 git/db/py/resolve.py create mode 100644 git/db/py/transport.py (limited to 'git/db') diff --git a/git/db/__init__.py b/git/db/__init__.py new file mode 100644 index 00000000..25948326 --- /dev/null +++ b/git/db/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php + +from interface import * diff --git a/git/db/cmd/__init__.py b/git/db/cmd/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/git/db/cmd/__init__.py @@ -0,0 +1 @@ + diff --git a/git/db/cmd/git.py b/git/db/cmd/git.py new file mode 100644 index 00000000..5f977c6f --- /dev/null +++ b/git/db/cmd/git.py @@ -0,0 +1,437 @@ +"""Module with our own gitdb implementation - it uses the git command""" +from exc import ( + GitCommandError, + BadObject + ) + +from gitdb.base import ( + OInfo, + OStream + ) + +from gitdb.util import ( + bin_to_hex, + hex_to_bin + ) +from gitdb.db.py import ( + PureGitDB, + PureLooseObjectODB + ) +from git.util import RemoteProgress +from gitdb.db.py.base import TransportDB +from gitdb.db.interface import FetchInfo as GitdbFetchInfo +from gitdb.db.interface import PushInfo as GitdbPushInfo + +from git.util import join_path +from gitdb.util import join + +from refs import ( + Reference, + RemoteReference, + SymbolicReference, + TagReference + ) + +import re +import sys + + +__all__ = ('GitCmdObjectDB', 'PureGitDB', 'RemoteProgress' ) + + +class PushInfo(GitdbPushInfo): + """ + Carries information about the result of a push operation of a single head:: + + info = remote.push()[0] + info.flags # bitflags providing more information about the result + info.local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + info.remote_ref_string # path to the remote reference located on the remote side + info.remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + info.old_commit_binsha # binary sha at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + info.summary # summary line providing human readable english text about the push + """ + __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', '_remote', 'summary') + + _flag_map = { 'X' : GitdbPushInfo.NO_MATCH, + '-' : GitdbPushInfo.DELETED, '*' : 0, + '+' : GitdbPushInfo.FORCED_UPDATE, + ' ' : GitdbPushInfo.FAST_FORWARD, + '=' : GitdbPushInfo.UP_TO_DATE, + '!' : GitdbPushInfo.ERROR } + + def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit_binsha=None, + summary=''): + """ Initialize a new instance """ + self.flags = flags + self.local_ref = local_ref + self.remote_ref_string = remote_ref_string + self._remote = remote + self.old_commit_binsha = old_commit_binsha + self.summary = summary + + @property + def remote_ref(self): + """ + :return: + Remote Reference or TagReference in the local repository corresponding + to the remote_ref_string kept in this instance.""" + # translate heads to a local remote, tags stay as they are + if self.remote_ref_string.startswith("refs/tags"): + return TagReference(self._remote.repo, self.remote_ref_string) + elif self.remote_ref_string.startswith("refs/heads"): + remote_ref = Reference(self._remote.repo, self.remote_ref_string) + return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) + else: + raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) + # END + + @classmethod + def _from_line(cls, remote, line): + """Create a new PushInfo instance as parsed from line which is expected to be like + refs/heads/master:refs/heads/master 05d2687..1d0568e""" + control_character, from_to, summary = line.split('\t', 3) + flags = 0 + + # control character handling + try: + flags |= cls._flag_map[ control_character ] + except KeyError: + raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) + # END handle control character + + # from_to handling + from_ref_string, to_ref_string = from_to.split(':') + if flags & cls.DELETED: + from_ref = None + else: + from_ref = Reference.from_path(remote.repo, from_ref_string) + + # commit handling, could be message or commit info + old_commit_binsha = None + if summary.startswith('['): + if "[rejected]" in summary: + flags |= cls.REJECTED + elif "[remote rejected]" in summary: + flags |= cls.REMOTE_REJECTED + elif "[remote failure]" in summary: + flags |= cls.REMOTE_FAILURE + elif "[no match]" in summary: + flags |= cls.ERROR + elif "[new tag]" in summary: + flags |= cls.NEW_TAG + elif "[new branch]" in summary: + flags |= cls.NEW_HEAD + # uptodate encoded in control character + else: + # fast-forward or forced update - was encoded in control character, + # but we parse the old and new commit + split_token = "..." + if control_character == " ": + split_token = ".." + old_sha, new_sha = summary.split(' ')[0].split(split_token) + # have to use constructor here as the sha usually is abbreviated + old_commit_binsha = remote.repo.commit(old_sha) + # END message handling + + return PushInfo(flags, from_ref, to_ref_string, remote, old_commit_binsha, summary) + + +class FetchInfo(GitdbFetchInfo): + """ + Carries information about the results of a fetch operation of a single head:: + + info = remote.fetch()[0] + info.ref # Symbolic Reference or RemoteReference to the changed + # remote head or FETCH_HEAD + info.flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is FETCH_HEAD + info.note # additional notes given by git-fetch intended for the user + info.old_commit_binsha # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref, otherwise None + """ + __slots__ = ('ref','old_commit_binsha', 'flags', 'note') + + # %c %-*s %-*s -> %s (%s) + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") + + _flag_map = { '!' : GitdbFetchInfo.ERROR, + '+' : GitdbFetchInfo.FORCED_UPDATE, + '-' : GitdbFetchInfo.TAG_UPDATE, + '*' : 0, + '=' : GitdbFetchInfo.HEAD_UPTODATE, + ' ' : GitdbFetchInfo.FAST_FORWARD } + + def __init__(self, ref, flags, note = '', old_commit_binsha = None): + """ + Initialize a new instance + """ + self.ref = ref + self.flags = flags + self.note = note + self.old_commit_binsha = old_commit_binsha + + def __str__(self): + return self.name + + @property + def name(self): + """:return: Name of our remote ref""" + return self.ref.name + + @property + def commit(self): + """:return: Commit of our remote ref""" + return self.ref.commit + + @classmethod + def _from_line(cls, repo, line, fetch_line): + """Parse information from the given line as returned by git-fetch -v + and return a new FetchInfo object representing this information. + + We can handle a line as follows + "%c %-*s %-*s -> %s%s" + + Where c is either ' ', !, +, -, *, or = + ! means error + + means success forcing update + - means a tag was updated + * means birth of new branch or tag + = means the head was up to date ( and not moved ) + ' ' means a fast-forward + + fetch line is the corresponding line from FETCH_HEAD, like + acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" + match = cls.re_fetch_result.match(line) + if match is None: + raise ValueError("Failed to parse line: %r" % line) + + # parse lines + control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() + try: + new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") + ref_type_name, fetch_note = fetch_note.split(' ', 1) + except ValueError: # unpack error + raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) + + # handle FETCH_HEAD and figure out ref type + # If we do not specify a target branch like master:refs/remotes/origin/master, + # the fetch result is stored in FETCH_HEAD which destroys the rule we usually + # have. In that case we use a symbolic reference which is detached + ref_type = None + if remote_local_ref == "FETCH_HEAD": + ref_type = SymbolicReference + elif ref_type_name == "branch": + ref_type = RemoteReference + elif ref_type_name == "tag": + ref_type = TagReference + else: + raise TypeError("Cannot handle reference type: %r" % ref_type_name) + + # create ref instance + if ref_type is SymbolicReference: + remote_local_ref = ref_type(repo, "FETCH_HEAD") + else: + remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) + # END create ref instance + + note = ( note and note.strip() ) or '' + + # parse flags from control_character + flags = 0 + try: + flags |= cls._flag_map[control_character] + except KeyError: + raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) + # END control char exception hanlding + + # parse operation string for more info - makes no sense for symbolic refs + old_commit_binsha = None + if isinstance(remote_local_ref, Reference): + if 'rejected' in operation: + flags |= cls.REJECTED + if 'new tag' in operation: + flags |= cls.NEW_TAG + if 'new branch' in operation: + flags |= cls.NEW_HEAD + if '...' in operation or '..' in operation: + split_token = '...' + if control_character == ' ': + split_token = split_token[:-1] + old_commit_binsha = repo.rev_parse(operation.split(split_token)[0]) + # END handle refspec + # END reference flag handling + + return cls(remote_local_ref, flags, note, old_commit_binsha) + + +class GitCmdObjectDB(PureLooseObjectODB, TransportDB): + """A database representing the default git object store, which includes loose + objects, pack files and an alternates file + + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + def __init__(self, root_path, git): + """Initialize this instance with the root and a git command""" + super(GitCmdObjectDB, self).__init__(root_path) + self._git = git + + @classmethod + def _digest_process_messages(cls, fh, progress): + """Read progress messages from file-like object fh, supplying the respective + progress messages to the progress instance. + + :return: list(line, ...) list of lines without linebreaks that did + not contain progress information""" + line_so_far = '' + dropped_lines = list() + while True: + char = fh.read(1) + if not char: + break + + if char in ('\r', '\n'): + dropped_lines.extend(progress._parse_progress_line(line_so_far)) + line_so_far = '' + else: + line_so_far += char + # END process parsed line + # END while file is not done reading + return dropped_lines + + @classmethod + def _finalize_proc(cls, proc): + """Wait for the process (fetch, pull or push) and handle its errors accordingly""" + try: + proc.wait() + except GitCommandError,e: + # if a push has rejected items, the command has non-zero return status + # a return status of 128 indicates a connection error - reraise the previous one + if proc.poll() == 128: + raise + pass + # END exception handling + + + def _get_fetch_info_from_stderr(self, proc, progress): + # skip first line as it is some remote info we are not interested in + output = IterableList('name') + + + # lines which are no progress are fetch info lines + # this also waits for the command to finish + # Skip some progress lines that don't provide relevant information + fetch_info_lines = list() + for line in self._digest_process_messages(proc.stderr, progress): + if line.startswith('From') or line.startswith('remote: Total'): + continue + elif line.startswith('warning:'): + print >> sys.stderr, line + continue + elif line.startswith('fatal:'): + raise GitCommandError(("Error when fetching: %s" % line,), 2) + # END handle special messages + fetch_info_lines.append(line) + # END for each line + + # read head information + fp = open(join(self.root_path(), 'FETCH_HEAD'),'r') + fetch_head_info = fp.readlines() + fp.close() + + assert len(fetch_info_lines) == len(fetch_head_info) + + output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) + for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) + + self._finalize_proc(proc) + return output + + def _get_push_info(self, proc, progress): + # read progress information from stderr + # we hope stdout can hold all the data, it should ... + # read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually + self._digest_process_messages(proc.stderr, progress) + + output = IterableList('name') + for line in proc.stdout.readlines(): + try: + output.append(PushInfo._from_line(self, line)) + except ValueError: + # if an error happens, additional info is given which we cannot parse + pass + # END exception handling + # END for each line + + self._finalize_proc(proc) + return output + + + + #{ ODB Interface + + def info(self, sha): + hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) + return OInfo(hex_to_bin(hexsha), typename, size) + + def stream(self, sha): + """For now, all lookup is done by git itself""" + hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) + return OStream(hex_to_bin(hexsha), typename, size, stream) + + #} END odb interface + + # { Interface + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: Full binary 20 byte sha from the given partial hexsha + :raise AmbiguousObjectName: + :raise BadObject: + :note: currently we only raise BadObject as git does not communicate + AmbiguousObjects separately""" + try: + hexsha, typename, size = self._git.get_object_header(partial_hexsha) + return hex_to_bin(hexsha) + except (GitCommandError, ValueError): + raise BadObject(partial_hexsha) + # END handle exceptions + + #} END interface + + #{ Transport DB interface + + def push(self, url, refspecs=None, progress=None, **kwargs): + """Push given refspecs using the git default implementation + :param url: may be a remote name or a url + :param refspecs: single string, RefSpec instance or list of such or None. + :param progress: RemoteProgress derived instance or None + :param **kwargs: Additional arguments to be passed to the git-push process""" + proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) + return self._get_push_info(proc, progress or RemoteProgress()) + + def pull(self, url, refspecs=None, progress=None, **kwargs): + """Fetch and merge the given refspecs. + If not refspecs are given, the merge will only work properly if you + have setup upstream (tracking) branches. + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + proc = self._git.pull(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + def fetch(self, url, refspecs=None, progress=None, **kwargs): + """Fetch the latest changes + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + proc = self._git.fetch(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + #} end transport db interface diff --git a/git/db/interface.py b/git/db/interface.py new file mode 100644 index 00000000..b7c167c5 --- /dev/null +++ b/git/db/interface.py @@ -0,0 +1,469 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains interfaces for basic database building blocks""" + +__all__ = ( 'ObjectDBR', 'ObjectDBW', 'RootPathDB', 'CompoundDB', 'CachingDB', + 'TransportDB', 'ConfigurationMixin', 'RepositoryPathsMixin', + 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin') + + +class ObjectDBR(object): + """Defines an interface for object database lookup. + Objects are identified either by their 20 byte bin sha""" + + def __contains__(self, sha): + return self.has_obj(sha) + + #{ Query Interface + def has_object(self, sha): + """ + :return: True if the object identified by the given 20 bytes + binary sha is contained in the database""" + raise NotImplementedError("To be implemented in subclass") + + def has_object_async(self, reader): + """Return a reader yielding information about the membership of objects + as identified by shas + :param reader: Reader yielding 20 byte shas. + :return: async.Reader yielding tuples of (sha, bool) pairs which indicate + whether the given sha exists in the database or not""" + raise NotImplementedError("To be implemented in subclass") + + def info(self, sha): + """ :return: OInfo instance + :param sha: bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def info_async(self, reader): + """Retrieve information of a multitude of objects asynchronously + :param reader: Channel yielding the sha's of the objects of interest + :return: async.Reader yielding OInfo|InvalidOInfo, in any order""" + raise NotImplementedError("To be implemented in subclass") + + def stream(self, sha): + """:return: OStream instance + :param sha: 20 bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def stream_async(self, reader): + """Retrieve the OStream of multiple objects + :param reader: see ``info`` + :param max_threads: see ``ObjectDBW.store`` + :return: async.Reader yielding OStream|InvalidOStream instances in any order + :note: depending on the system configuration, it might not be possible to + read all OStreams at once. Instead, read them individually using reader.read(x) + where x is small enough.""" + raise NotImplementedError("To be implemented in subclass") + + def size(self): + """:return: amount of objects in this database""" + raise NotImplementedError() + + def sha_iter(self): + """Return iterator yielding 20 byte shas for all objects in this data base""" + raise NotImplementedError() + + def partial_to_complete_sha_hex(self, partial_hexsha): + """ + :return: 20 byte binary sha1 from the given less-than-40 byte hexsha + :param partial_hexsha: hexsha with less than 40 byte + :raise AmbiguousObjectName: If multiple objects would match the given sha + :raies BadObject: If object was not found""" + raise NotImplementedError() + + def partial_to_complete_sha(self, partial_binsha, canonical_length): + """:return: 20 byte sha as inferred by the given partial binary sha + :param partial_binsha: binary sha with less than 20 bytes + :param canonical_length: length of the corresponding canonical (hexadecimal) representation. + It is required as binary sha's cannot display whether the original hex sha + had an odd or even number of characters + :raise AmbiguousObjectName: + :raise BadObject: """ + #} END query interface + + +class ObjectDBW(object): + """Defines an interface to create objects in the database""" + + #{ Edit Interface + def set_ostream(self, stream): + """ + Adjusts the stream to which all data should be sent when storing new objects + + :param stream: if not None, the stream to use, if None the default stream + will be used. + :return: previously installed stream, or None if there was no override + :raise TypeError: if the stream doesn't have the supported functionality""" + raise NotImplementedError("To be implemented in subclass") + + def ostream(self): + """ + :return: overridden output stream this instance will write to, or None + if it will write to the default stream""" + raise NotImplementedError("To be implemented in subclass") + + def store(self, istream): + """ + Create a new object in the database + :return: the input istream object with its sha set to its corresponding value + + :param istream: IStream compatible instance. If its sha is already set + to a value, the object will just be stored in the our database format, + in which case the input stream is expected to be in object format ( header + contents ). + :raise IOError: if data could not be written""" + raise NotImplementedError("To be implemented in subclass") + + def store_async(self, reader): + """ + Create multiple new objects in the database asynchronously. The method will + return right away, returning an output channel which receives the results as + they are computed. + + :return: Channel yielding your IStream which served as input, in any order. + The IStreams sha will be set to the sha it received during the process, + or its error attribute will be set to the exception informing about the error. + + :param reader: async.Reader yielding IStream instances. + The same instances will be used in the output channel as were received + in by the Reader. + + :note:As some ODB implementations implement this operation atomic, they might + abort the whole operation if one item could not be processed. Hence check how + many items have actually been produced.""" + raise NotImplementedError("To be implemented in subclass") + + #} END edit interface + + +class RootPathDB(object): + """Provides basic facilities to retrieve files of interest""" + + def __init__(self, root_path): + """Initialize this instance to look for its files at the given root path + All subsequent operations will be relative to this path + :raise InvalidDBRoot: + :note: The base will not perform any accessablity checking as the base + might not yet be accessible, but become accessible before the first + access.""" + super(RootPathDB, self).__init__(root_path) + + #{ Interface + def root_path(self): + """:return: path at which this db operates""" + raise NotImplementedError() + + def db_path(self, rela_path): + """ + :return: the given relative path relative to our database root, allowing + to pontentially access datafiles""" + raise NotImplementedError() + #} END interface + + +class CachingDB(object): + """A database which uses caches to speed-up access""" + + #{ Interface + + def update_cache(self, force=False): + """ + Call this method if the underlying data changed to trigger an update + of the internal caching structures. + + :param force: if True, the update must be performed. Otherwise the implementation + may decide not to perform an update if it thinks nothing has changed. + :return: True if an update was performed as something change indeed""" + + # END interface + +class CompoundDB(object): + """A database which delegates calls to sub-databases. + They should usually be cached and lazy-loaded""" + + #{ Interface + + def databases(self): + """:return: tuple of database instances we use for lookups""" + raise NotImplementedError() + + #} END interface + + +class RefSpec(object): + """A refspec is a simple container which provides information about the way + something should be fetched or pushed. It requires to use symbols to describe + the actual objects which is done using reference names (or respective instances + which resolve to actual reference names).""" + __slots__ = ('source', 'destination', 'force') + + def __init__(self, source, destination, force=False): + """initalize the instance with the required values + :param source: reference name or instance. If None, the Destination + is supposed to be deleted.""" + self.source = source + self.destination = destination + self.force = force + if self.destination is None: + raise ValueError("Destination must be set") + + def __str__(self): + """:return: a git-style refspec""" + s = str(self.source) + if self.source is None: + s = '' + #END handle source + d = str(self.destination) + p = '' + if self.force: + p = '+' + #END handle force + res = "%s%s:%s" % (p, s, d) + + def delete_destination(self): + return self.source is None + + +class PushInfo(object): + """A type presenting information about the result of a push operation for exactly + one refspec + + flags # bitflags providing more information about the result + local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + remote_ref_string # path to the remote reference located on the remote side + remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + old_commit # commit at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + summary # summary line providing human readable english text about the push + """ + __slots__ = tuple() + + NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ + FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] + + +class FetchInfo(object): + """A type presenting information about the fetch operation on exactly one refspec + + The following members are defined: + ref # name of the reference to the changed + # remote head or FETCH_HEAD. Implementations can provide + # actual class instance which convert to a respective string + flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is FETCH_HEAD + note # additional notes given by the fetch-pack implementation intended for the user + old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref as hexsha or None + # Implementors may use their own type too, but it should decay into a + # string of its hexadecimal sha representation""" + __slots__ = tuple() + + NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ + FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] + + +class TransportDB(object): + """A database which allows to transport objects from and to different locations + which are specified by urls (location) and refspecs (what to transport, + see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). + + At the beginning of a transport operation, it will be determined which objects + have to be sent (either by this or by the other side). + + Afterwards a pack with the required objects is sent (or received). If there is + nothing to send, the pack will be empty. + + As refspecs involve symbolic names for references to be handled, we require + RefParse functionality. How this is done is up to the actual implementation.""" + # The following variables need to be set by the derived class + + #{ Interface + + def fetch(self, url, refspecs, progress=None, **kwargs): + """Fetch the objects defined by the given refspec from the given url. + :param url: url identifying the source of the objects. It may also be + a symbol from which the respective url can be resolved, like the + name of the remote. The implementation should allow objects as input + as well, these are assumed to resovle to a meaningful string though. + :param refspecs: iterable of reference specifiers or RefSpec instance, + identifying the references to be fetch from the remote. + :param progress: callable which receives progress messages for user consumption + :param kwargs: may be used for additional parameters that the actual implementation could + find useful. + :return: List of FetchInfo compatible instances which provide information about what + was previously fetched, in the order of the input refspecs. + :note: even if the operation fails, one of the returned FetchInfo instances + may still contain errors or failures in only part of the refspecs. + :raise: if any issue occours during the transport or if the url is not + supported by the protocol. + """ + raise NotImplementedError() + + def push(self, url, refspecs, progress=None, **kwargs): + """Transport the objects identified by the given refspec to the remote + at the given url. + :param url: Decribes the location which is to receive the objects + see fetch() for more details + :param refspecs: iterable of refspecs strings or RefSpec instances + to identify the objects to push + :param progress: see fetch() + :param kwargs: additional arguments which may be provided by the caller + as they may be useful to the actual implementation + :todo: what to return ? + :raise: if any issue arises during transport or if the url cannot be handled""" + raise NotImplementedError() + + @property + def remotes(self): + """:return: An IterableList of Remote objects allowing to access and manipulate remotes + :note: Remote objects can also be used for the actual push or fetch operation""" + raise NotImplementedError() + + #}end interface + + +class ReferencesMixin(object): + """Database providing reference objects which in turn point to database objects + like Commits or Tag(Object)s. + + The returned types are compatible to the interfaces of the pure python + reference implementation in GitDB.ref""" + + def resolve(self, name): + """Resolve the given name into a binary sha. Valid names are as defined + in the rev-parse documentation http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html + :return: binary sha matching the name + :raise AmbiguousObjectName: + :raise BadObject: """ + raise NotImplementedError() + + @property + def references(self): + """:return: iterable list of all Reference objects representing tags, heads + and remote references. This is the most general method to obtain any + references.""" + raise NotImplementedError() + + @property + def heads(self): + """:return: IterableList with HeadReference objects pointing to all + heads in the repository.""" + raise NotImplementedError() + + @property + def tags(self): + """:return: An IterableList of TagReferences that are available in this repo""" + raise NotImplementedError() + + +class RepositoryPathsMixin(object): + """Represents basic functionality of a full git repository. This involves an + optional working tree, a git directory with references and an object directory. + + This type collects the respective paths and verifies the provided base path + truly is a git repository. + + If the underlying type provides the config_reader() method, we can properly determine + whether this is a bare repository as well. Otherwise it will make an educated guess + based on the path name.""" + #{ Subclass Interface + def _initialize(self, path): + """initialize this instance with the given path. It may point to + any location within the repositories own data, as well as the working tree. + + The implementation will move up and search for traces of a git repository, + which is indicated by a child directory ending with .git or the + current path portion ending with .git. + + The paths made available for query are suitable for full git repositories + only. Plain object databases need to be fed the "objects" directory path. + + :param path: the path to initialize the repository with + :raise InvalidDBRoot: + """ + raise NotImplementedError() + #} end subclass interface + + #{ Interface + + def is_bare(self): + """:return: True if this is a bare repository + :note: this value is cached upon initialization""" + raise NotImplementedError() + + def git_path(self): + """:return: path to directory containing this actual git repository (which + in turn provides access to objects and references""" + raise NotImplementedError() + + def working_tree_path(self): + """:return: path to directory containing the working tree checkout of our + git repository. + :raise AssertionError: If this is a bare repository""" + raise NotImplementedError() + + def objects_path(self): + """:return: path to the repository's objects directory""" + raise NotImplementedError() + + def working_dir(self): + """:return: working directory of the git process or related tools, being + either the working_tree_path if available or the git_path""" + raise NotImplementedError() + + #} END interface + + +class ConfigurationMixin(object): + """Interface providing configuration handler instances, which provide locked access + to a single git-style configuration file (ini like format, using tabs as improve readablity). + + Configuration readers can be initialized with multiple files at once, whose information is concatenated + when reading. Lower-level files overwrite values from higher level files, i.e. a repository configuration file + overwrites information coming from a system configuration file + + :note: for the 'repository' config level, a git_path() compatible type is required""" + config_level = ("system", "global", "repository") + + #{ Interface + + def config_reader(self, config_level=None): + """ + :return: + GitConfigParser allowing to read the full git configuration, but not to write it + + The configuration will include values from the system, user and repository + configuration files. + + :param config_level: + For possible values, see config_writer method + If None, all applicable levels will be used. Specify a level in case + you know which exact file you whish to read to prevent reading multiple files for + instance + :note: On windows, system configuration cannot currently be read as the path is + unknown, instead the global path will be used.""" + raise NotImplementedError() + + def config_writer(self, config_level="repository"): + """ + :return: + GitConfigParser allowing to write values of the specified configuration file level. + Config writers should be retrieved, used to change the configuration ,and written + right away as they will lock the configuration file in question and prevent other's + to write it. + + :param config_level: + One of the following values + system = sytem wide configuration file + global = user level configuration file + repository = configuration file for this repostory only""" + raise NotImplementedError() + + #} END interface + diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py new file mode 100644 index 00000000..046c699d --- /dev/null +++ b/git/db/py/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php + +from base import * +from loose import * +from mem import * +from pack import * +from git import * +from ref import * +from resolve import * +from transport import * diff --git a/git/db/py/base.py b/git/db/py/base.py new file mode 100644 index 00000000..c378b10e --- /dev/null +++ b/git/db/py/base.py @@ -0,0 +1,351 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains basic implementations for the interface building blocks""" + +from gitdb.db.interface import * + +from gitdb.util import ( + pool, + join, + normpath, + abspath, + dirname, + LazyMixin, + hex_to_bin, + bin_to_hex, + expandvars, + expanduser, + exists, + is_git_dir + ) + +from gitdb.config import GitConfigParser +from gitdb.exc import ( + BadObject, + AmbiguousObjectName, + InvalidDBRoot + ) + +from async import ChannelThreadTask + +from itertools import chain +import sys +import os + + +__all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', + 'PureConfigurationMixin', 'PureRepositoryPathsMixin') + + +class PureObjectDBR(ObjectDBR): + + #{ Query Interface + + def has_object_async(self, reader): + task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha))) + return pool.add_task(task) + + def info_async(self, reader): + task = ChannelThreadTask(reader, str(self.info_async), self.info) + return pool.add_task(task) + + def stream_async(self, reader): + # base implementation just uses the stream method repeatedly + task = ChannelThreadTask(reader, str(self.stream_async), self.stream) + return pool.add_task(task) + + def partial_to_complete_sha_hex(self, partial_hexsha): + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha)) + + #} END query interface + + +class PureObjectDBW(ObjectDBW): + + def __init__(self, *args, **kwargs): + super(PureObjectDBW, self).__init__(*args, **kwargs) + self._ostream = None + + #{ Edit Interface + def set_ostream(self, stream): + cstream = self._ostream + self._ostream = stream + return cstream + + def ostream(self): + return self._ostream + + def store_async(self, reader): + task = ChannelThreadTask(reader, str(self.store_async), self.store) + return pool.add_task(task) + + #} END edit interface + + +class PureRootPathDB(RootPathDB): + + def __init__(self, root_path): + super(PureRootPathDB, self).__init__(root_path) + self._root_path = root_path + + + #{ Interface + def root_path(self): + return self._root_path + + def db_path(self, rela_path): + return join(self._root_path, rela_path) + #} END interface + + +def _databases_recursive(database, output): + """Fill output list with database from db, in order. Deals with Loose, Packed + and compound databases.""" + if isinstance(database, CompoundDB): + compounds = list() + dbs = database.databases() + output.extend(db for db in dbs if not isinstance(db, CompoundDB)) + for cdb in (db for db in dbs if isinstance(db, CompoundDB)): + _databases_recursive(cdb, output) + else: + output.append(database) + # END handle database type + + +class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + elif attr == '_db_cache': + self._db_cache = dict() + else: + super(PureCompoundDB, self)._set_cache_(attr) + + def _db_query(self, sha): + """:return: database containing the given 20 byte sha + :raise BadObject:""" + # most databases use binary representations, prevent converting + # it everytime a database is being queried + try: + return self._db_cache[sha] + except KeyError: + pass + # END first level cache + + for db in self._dbs: + if db.has_object(sha): + self._db_cache[sha] = db + return db + # END for each database + raise BadObject(sha) + + #{ PureObjectDBR interface + + def has_object(self, sha): + try: + self._db_query(sha) + return True + except BadObject: + return False + # END handle exceptions + + def info(self, sha): + return self._db_query(sha).info(sha) + + def stream(self, sha): + return self._db_query(sha).stream(sha) + + def size(self): + return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) + + def sha_iter(self): + return chain(*(db.sha_iter() for db in self._dbs)) + + #} END object DBR Interface + + #{ Interface + + def databases(self): + return tuple(self._dbs) + + def update_cache(self, force=False): + # something might have changed, clear everything + self._db_cache.clear() + stat = False + for db in self._dbs: + if isinstance(db, CachingDB): + stat |= db.update_cache(force) + # END if is caching db + # END for each database to update + return stat + + def partial_to_complete_sha_hex(self, partial_hexsha): + databases = self.databases() + + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + + candidate = None + for db in self._dbs: + full_bin_sha = None + try: + if hasattr(db, 'partial_to_complete_sha_hex'): + full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) + else: + full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) + # END handle database type + except BadObject: + continue + # END ignore bad objects + if full_bin_sha: + if candidate and candidate != full_bin_sha: + raise AmbiguousObjectName(partial_hexsha) + candidate = full_bin_sha + # END handle candidate + # END for each db + if not candidate: + raise BadObject(partial_binsha) + return candidate + + def partial_to_complete_sha(self, partial_binsha, hex_len): + """Simple adaptor to feed into our implementation""" + return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len]) + #} END interface + + +class PureRepositoryPathsMixin(RepositoryPathsMixin): + # slots has no effect here, its just to keep track of used attrs + __slots__ = ("_git_path", '_bare') + + #{ Configuration + repo_dir = '.git' + objs_dir = 'objects' + #} END configuration + + #{ Subclass Interface + def _initialize(self, path): + epath = abspath(expandvars(expanduser(path or os.getcwd()))) + + if not exists(epath): + raise InvalidDBRoot(epath) + #END check file + + self._working_tree_dir = None + self._git_path = None + curpath = epath + + # walk up the path to find the .git dir + while curpath: + if is_git_dir(curpath): + self._git_path = curpath + self._working_tree_dir = os.path.dirname(curpath) + break + gitpath = join(curpath, self.repo_dir) + if is_git_dir(gitpath): + self._git_path = gitpath + self._working_tree_dir = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + # END while curpath + + if self._git_path is None: + raise InvalidDBRoot(epath) + # END path not found + + self._bare = self._git_path.endswith(self.repo_dir) + if hasattr(self, 'config_reader'): + try: + self._bare = self.config_reader("repository").getboolean('core','bare') + except Exception: + # lets not assume the option exists, although it should + pass + #END check bare flag + + + #} end subclass interface + + #{ Interface + + def is_bare(self): + return self._bare + + def git_path(self): + return self._git_path + + def working_tree_path(self): + if self.is_bare(): + raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_path()) + #END assertion + return dirname(self.git_path()) + + def objects_path(self): + return join(self.git_path(), self.objs_dir) + + def working_dir(self): + if self.is_bare(): + return self.git_path() + else: + return self.working_tree_dir() + #END handle bare state + + #} END interface + + +class PureConfigurationMixin(ConfigurationMixin): + + #{ Configuration + system_config_file_name = "gitconfig" + repo_config_file_name = "config" + #} END + + def __init__(self, *args, **kwargs): + """Verify prereqs""" + assert hasattr(self, 'git_path') + + def _path_at_level(self, level ): + # we do not support an absolute path of the gitconfig on windows , + # use the global config instead + if sys.platform == "win32" and level == "system": + level = "global" + #END handle windows + + if level == "system": + return "/etc/%s" % self.system_config_file_name + elif level == "global": + return normpath(expanduser("~/.%s" % self.system_config_file_name)) + elif level == "repository": + return join(self.git_path(), self.repo_config_file_name) + #END handle level + + raise ValueError("Invalid configuration level: %r" % level) + + #{ Interface + + def config_reader(self, config_level=None): + files = None + if config_level is None: + files = [ self._path_at_level(f) for f in self.config_level ] + else: + files = [ self._path_at_level(config_level) ] + #END handle level + return GitConfigParser(files, read_only=True) + + def config_writer(self, config_level="repository"): + return GitConfigParser(self._path_at_level(config_level), read_only=False) + + #} END interface + diff --git a/git/db/py/git.py b/git/db/py/git.py new file mode 100644 index 00000000..bc148c6f --- /dev/null +++ b/git/db/py/git.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PureGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureCompoundDB, + PureObjectDBW, + PureRootPathDB, + PureRepositoryPathsMixin, + PureConfigurationMixin, + ) + +from resolve import PureReferencesMixin + +from loose import PureLooseObjectODB +from pack import PurePackedODB +from ref import PureReferenceDB + +from gitdb.util import ( + LazyMixin, + normpath, + join, + dirname + ) +from gitdb.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) +import os + +__all__ = ('PureGitODB', 'PureGitDB') + + +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): + """A git-style object-only database, which contains all objects in the 'objects' + subdirectory. + :note: The type needs to be initialized on the ./objects directory to function, + as it deals solely with object lookup. Use a PureGitDB type if you need + reference and push support.""" + # Configuration + PackDBCls = PurePackedODB + LooseDBCls = PureLooseObjectODB + PureReferenceDBCls = PureReferenceDB + + # Directories + packs_dir = 'pack' + loose_dir = '' + alternates_dir = os.path.join('info', 'alternates') + + def __init__(self, root_path): + """Initialize ourselves on a git ./objects directory""" + super(PureGitODB, self).__init__(root_path) + + def _set_cache_(self, attr): + if attr == '_dbs' or attr == '_loose_db': + self._dbs = list() + loose_db = None + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + (self.loose_dir, self.LooseDBCls), + (self.alternates_dir, self.PureReferenceDBCls)): + path = self.db_path(subpath) + if os.path.exists(path): + self._dbs.append(dbcls(path)) + if dbcls is self.LooseDBCls: + loose_db = self._dbs[-1] + # END remember loose db + # END check path exists + # END for each db type + + # should have at least one subdb + if not self._dbs: + raise InvalidDBRoot(self.root_path()) + # END handle error + + # we the first one should have the store method + assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + + # finally set the value + self._loose_db = loose_db + else: + super(PureGitODB, self)._set_cache_(attr) + # END handle attrs + + #{ PureObjectDBW interface + + def store(self, istream): + return self._loose_db.store(istream) + + def ostream(self): + return self._loose_db.ostream() + + def set_ostream(self, ostream): + return self._loose_db.set_ostream(ostream) + + #} END objectdbw interface + + +class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): + """Git like database with support for object lookup as well as reference resolution. + Our rootpath is set to the actual .git directory (bare on unbare). + + The root_path will be the git objects directory. Use git_path() to obtain the actual top-level + git directory.""" + #directories + + def __init__(self, root_path): + """Initialize ourselves on the .git directory, or the .git/objects directory.""" + PureRepositoryPathsMixin._initialize(self, root_path) + super(PureGitDB, self).__init__(self.objects_path()) + + + diff --git a/git/db/py/loose.py b/git/db/py/loose.py new file mode 100644 index 00000000..34e31da6 --- /dev/null +++ b/git/db/py/loose.py @@ -0,0 +1,262 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureRootPathDB, + PureObjectDBR, + PureObjectDBW + ) + + +from gitdb.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) + +from gitdb.stream import ( + DecompressMemMapReader, + FDCompressedSha1Writer, + FDStream, + Sha1Writer + ) + +from gitdb.base import ( + OStream, + OInfo + ) + +from gitdb.util import ( + file_contents_ro_filepath, + ENOENT, + hex_to_bin, + bin_to_hex, + exists, + chmod, + isdir, + isfile, + remove, + mkdir, + rename, + dirname, + basename, + join + ) + +from gitdb.fun import ( + chunk_size, + loose_object_header_info, + write_object, + stream_copy + ) + +import tempfile +import mmap +import sys +import os + + +__all__ = ( 'PureLooseObjectODB', ) + + +class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): + """A database which operates on loose object files""" + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = chunk_size + + # On windows we need to keep it writable, otherwise it cannot be removed + # either + new_objects_mode = 0444 + if os.name == 'nt': + new_objects_mode = 0644 + + + def __init__(self, root_path): + super(PureLooseObjectODB, self).__init__(root_path) + self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = getattr(os, 'O_NOATIME', 0) + + #{ Interface + def object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + def readable_db_object_path(self, hexsha): + """ + :return: readable object path to the object identified by hexsha + :raise BadObject: If the object file does not exist""" + try: + return self._hexsha_to_file[hexsha] + except KeyError: + pass + # END ignore cache misses + + # try filesystem + path = self.db_path(self.object_path(hexsha)) + if exists(path): + self._hexsha_to_file[hexsha] = path + return path + # END handle cache + raise BadObject(hexsha) + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate + + #} END interface + + def _map_loose_object(self, sha): + """ + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(bin_to_hex(sha))) + try: + return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + return file_contents_ro_filepath(db_path) + except OSError: + raise BadObject(sha) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(sha) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(PureLooseObjectODB, self).set_ostream(stream) + + def info(self, sha): + m = self._map_loose_object(sha) + try: + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) + finally: + m.close() + # END assure release of system resources + + def stream(self, sha): + m = self._map_loose_object(sha) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) + + def has_object(self, sha): + try: + self.readable_db_object_path(bin_to_hex(sha)) + return True + except BadObject: + return False + # END check existance + + def store(self, istream): + """note: The sha we produce will be hex by nature""" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + + if istream.binsha is None: + writer = FDCompressedSha1Writer(fd) + else: + writer = FDStream(fd) + # END handle direct stream copies + # END handle custom writer + + try: + try: + if istream.binsha is not None: + # copy as much as possible, the actual uncompressed item size might + # be smaller than the compressed version + stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) + else: + # write object with header, we have to make a new one + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + # END handle direct stream copies + finally: + if tmp_path: + writer.close() + # END assure target stream is closed + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + + hexsha = None + if istream.binsha: + hexsha = istream.hexsha + else: + hexsha = writer.sha(as_hex=True) + # END handle sha + + if tmp_path: + obj_path = self.db_path(self.object_path(hexsha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + # rename onto existing doesn't work on windows + if os.name == 'nt' and isfile(obj_path): + remove(obj_path) + # END handle win322 + rename(tmp_path, obj_path) + + # make sure its readable for all ! It started out as rw-- tmp file + # but needs to be rwrr + chmod(obj_path, self.new_objects_mode) + # END handle dry_run + + istream.binsha = hex_to_bin(hexsha) + return istream + + def sha_iter(self): + # find all files which look like an object, extract sha from there + for root, dirs, files in os.walk(self.root_path()): + root_base = basename(root) + if len(root_base) != 2: + continue + + for f in files: + if len(f) != 38: + continue + yield hex_to_bin(root_base + f) + # END for each file + # END for each walk iteration + + def size(self): + return len(tuple(self.sha_iter())) + diff --git a/git/db/py/mem.py b/git/db/py/mem.py new file mode 100644 index 00000000..ba922e96 --- /dev/null +++ b/git/db/py/mem.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains the MemoryDatabase implementation""" +from loose import PureLooseObjectODB +from base import ( + PureObjectDBR, + PureObjectDBW + ) + +from gitdb.base import ( + OStream, + IStream, + ) + +from gitdb.exc import ( + BadObject, + UnsupportedOperation + ) +from gitdb.stream import ( + ZippedStoreShaWriter, + DecompressMemMapReader, + ) + +from cStringIO import StringIO + +__all__ = ("PureMemoryDB", ) + +class PureMemoryDB(PureObjectDBR, PureObjectDBW): + """A memory database stores everything to memory, providing fast IO and object + retrieval. It should be used to buffer results and obtain SHAs before writing + it to the actual physical storage, as it allows to query whether object already + exists in the target storage before introducing actual IO + + :note: memory is currently not threadsafe, hence the async methods cannot be used + for storing""" + + def __init__(self): + super(PureMemoryDB, self).__init__() + self._db = PureLooseObjectODB("path/doesnt/matter") + + # maps 20 byte shas to their OStream objects + self._cache = dict() + + def set_ostream(self, stream): + raise UnsupportedOperation("PureMemoryDB's always stream into memory") + + def store(self, istream): + zstream = ZippedStoreShaWriter() + self._db.set_ostream(zstream) + + istream = self._db.store(istream) + zstream.close() # close to flush + zstream.seek(0) + + # don't provide a size, the stream is written in object format, hence the + # header needs decompression + decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) + self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream) + + return istream + + def store_async(self, reader): + raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access") + + def has_object(self, sha): + return sha in self._cache + + def info(self, sha): + # we always return streams, which are infos as well + return self.stream(sha) + + def stream(self, sha): + try: + ostream = self._cache[sha] + # rewind stream for the next one to read + ostream.stream.seek(0) + return ostream + except KeyError: + raise BadObject(sha) + # END exception handling + + def size(self): + return len(self._cache) + + def sha_iter(self): + return self._cache.iterkeys() + + + #{ Interface + def stream_copy(self, sha_iter, odb): + """Copy the streams as identified by sha's yielded by sha_iter into the given odb + The streams will be copied directly + :note: the object will only be written if it did not exist in the target db + :return: amount of streams actually copied into odb. If smaller than the amount + of input shas, one or more objects did already exist in odb""" + count = 0 + for sha in sha_iter: + if odb.has_object(sha): + continue + # END check object existance + + ostream = self.stream(sha) + # compressed data including header + sio = StringIO(ostream.stream.data()) + istream = IStream(ostream.type, ostream.size, sio, sha) + + odb.store(istream) + count += 1 + # END for each sha + return count + #} END interface diff --git a/git/db/py/pack.py b/git/db/py/pack.py new file mode 100644 index 00000000..1d0e9bfc --- /dev/null +++ b/git/db/py/pack.py @@ -0,0 +1,212 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module containing a database to deal with packs""" +from gitdb.db import CachingDB +from base import ( + PureRootPathDB, + PureObjectDBR + ) + +from gitdb.util import LazyMixin + +from gitdb.exc import ( + BadObject, + UnsupportedOperation, + AmbiguousObjectName + ) + +from gitdb.pack import PackEntity + +import os +import glob + +__all__ = ('PurePackedODB', ) + +#{ Utilities + + +class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin): + """A database operating on a set of object packs""" + + # the type to use when instantiating a pack entity + PackEntityCls = PackEntity + + # sort the priority list every N queries + # Higher values are better, performance tests don't show this has + # any effect, but it should have one + _sort_interval = 500 + + def __init__(self, root_path): + super(PurePackedODB, self).__init__(root_path) + # list of lists with three items: + # * hits - number of times the pack was hit with a request + # * entity - Pack entity instance + # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query + # self._entities = list() # lazy loaded list + self._hit_count = 0 # amount of hits + self._st_mtime = 0 # last modification data of our root path + + def _set_cache_(self, attr): + if attr == '_entities': + self._entities = list() + self.update_cache(force=True) + # END handle entities initialization + + def _sort_entities(self): + self._entities.sort(key=lambda l: l[0], reverse=True) + + def _pack_info(self, sha): + """:return: tuple(entity, index) for an item at the given sha + :param sha: 20 or 40 byte sha + :raise BadObject: + :note: This method is not thread-safe, but may be hit in multi-threaded + operation. The worst thing that can happen though is a counter that + was not incremented, or the list being in wrong order. So we safe + the time for locking here, lets see how that goes""" + # presort ? + if self._hit_count % self._sort_interval == 0: + self._sort_entities() + # END update sorting + + for item in self._entities: + index = item[2](sha) + if index is not None: + item[0] += 1 # one hit for you + self._hit_count += 1 # general hit count + return (item[1], index) + # END index found in pack + # END for each item + + # no hit, see whether we have to update packs + # NOTE: considering packs don't change very often, we safe this call + # and leave it to the super-caller to trigger that + raise BadObject(sha) + + #{ Object DB Read + + def has_object(self, sha): + try: + self._pack_info(sha) + return True + except BadObject: + return False + # END exception handling + + def info(self, sha): + entity, index = self._pack_info(sha) + return entity.info_at_index(index) + + def stream(self, sha): + entity, index = self._pack_info(sha) + return entity.stream_at_index(index) + + def sha_iter(self): + sha_list = list() + for entity in self.entities(): + index = entity.index() + sha_by_index = index.sha + for index in xrange(index.size()): + yield sha_by_index(index) + # END for each index + # END for each entity + + def size(self): + sizes = [item[1].index().size() for item in self._entities] + return reduce(lambda x,y: x+y, sizes, 0) + + #} END object db read + + #{ object db write + + def store(self, istream): + """Storing individual objects is not feasible as a pack is designed to + hold multiple objects. Writing or rewriting packs for single objects is + inefficient""" + raise UnsupportedOperation() + + def store_async(self, reader): + # TODO: add PureObjectDBRW before implementing this + raise NotImplementedError() + + #} END object db write + + + #{ Interface + + def update_cache(self, force=False): + """ + Update our cache with the acutally existing packs on disk. Add new ones, + and remove deleted ones. We keep the unchanged ones + + :param force: If True, the cache will be updated even though the directory + does not appear to have changed according to its modification timestamp. + :return: True if the packs have been updated so there is new information, + False if there was no change to the pack database""" + stat = os.stat(self.root_path()) + if not force and stat.st_mtime <= self._st_mtime: + return False + # END abort early on no change + self._st_mtime = stat.st_mtime + + # packs are supposed to be prefixed with pack- by git-convention + # get all pack files, figure out what changed + pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) + our_pack_files = set(item[1].pack().path() for item in self._entities) + + # new packs + for pack_file in (pack_files - our_pack_files): + # init the hit-counter/priority with the size, a good measure for hit- + # probability. Its implemented so that only 12 bytes will be read + entity = self.PackEntityCls(pack_file) + self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) + # END for each new packfile + + # removed packs + for pack_file in (our_pack_files - pack_files): + del_index = -1 + for i, item in enumerate(self._entities): + if item[1].pack().path() == pack_file: + del_index = i + break + # END found index + # END for each entity + assert del_index != -1 + del(self._entities[del_index]) + # END for each removed pack + + # reinitialize prioritiess + self._sort_entities() + return True + + def entities(self): + """:return: list of pack entities operated upon by this database""" + return [ item[1] for item in self._entities ] + + def partial_to_complete_sha(self, partial_binsha, canonical_length): + """:return: 20 byte sha as inferred by the given partial binary sha + :param partial_binsha: binary sha with less than 20 bytes + :param canonical_length: length of the corresponding canonical representation. + It is required as binary sha's cannot display whether the original hex sha + had an odd or even number of characters + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for item in self._entities: + item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) + if item_index is not None: + sha = item[1].index().sha(item_index) + if candidate and candidate != sha: + raise AmbiguousObjectName(partial_binsha) + candidate = sha + # END handle full sha could be found + # END for each entity + + if candidate: + return candidate + + # still not found ? + raise BadObject(partial_binsha) + + #} END interface diff --git a/git/db/py/ref.py b/git/db/py/ref.py new file mode 100644 index 00000000..951f0437 --- /dev/null +++ b/git/db/py/ref.py @@ -0,0 +1,77 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import PureCompoundDB + +import os +__all__ = ('PureReferenceDB', ) + +class PureReferenceDB(PureCompoundDB): + """A database consisting of database referred to in a file""" + + # Configuration + # Specifies the object database to use for the paths found in the alternates + # file. If None, it defaults to the PureGitODB + ObjectDBCls = None + + def __init__(self, ref_file): + super(PureReferenceDB, self).__init__() + self._ref_file = ref_file + + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + self._update_dbs_from_ref_file() + else: + super(PureReferenceDB, self)._set_cache_(attr) + # END handle attrs + + def _update_dbs_from_ref_file(self): + dbcls = self.ObjectDBCls + if dbcls is None: + # late import + from git import PureGitODB + dbcls = PureGitODB + # END get db type + + # try to get as many as possible, don't fail if some are unavailable + ref_paths = list() + try: + ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()] + except (OSError, IOError): + pass + # END handle alternates + + ref_paths_set = set(ref_paths) + cur_ref_paths_set = set(db.root_path() for db in self._dbs) + + # remove existing + for path in (cur_ref_paths_set - ref_paths_set): + for i, db in enumerate(self._dbs[:]): + if db.root_path() == path: + del(self._dbs[i]) + continue + # END del matching db + # END for each path to remove + + # add new + # sort them to maintain order + added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p)) + for path in added_paths: + try: + db = dbcls(path) + # force an update to verify path + if isinstance(db, PureCompoundDB): + db.databases() + # END verification + self._dbs.append(db) + except Exception, e: + # ignore invalid paths or issues + pass + # END for each path to add + + def update_cache(self, force=False): + # re-read alternates and update databases + self._update_dbs_from_ref_file() + return super(PureReferenceDB, self).update_cache(force) diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py new file mode 100644 index 00000000..86c1e594 --- /dev/null +++ b/git/db/py/resolve.py @@ -0,0 +1,297 @@ +"""Module with an implementation for refspec parsing. It is the pure-python +version assuming compatible interface for reference and object types""" + +from gitdb.db.interface import ReferencesMixin +from gitdb.exc import BadObject +from gitdb.ref import SymbolicReference +from gitdb.object.base import Object +from gitdb.util import ( + join, + isdir, + isfile, + hex_to_bin, + bin_to_hex, + is_git_dir + ) +from string import digits +import os +import re + +__all__ = ["PureReferencesMixin"] + +#{ Utilities + +def short_to_long(odb, hexsha): + """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha + or None if no candidate could be found. + :param hexsha: hexsha with less than 40 byte""" + try: + return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) + except BadObject: + return None + # END exception handling + + +def name_to_object(repo, name, return_ref=False): + """ + :return: object specified by the given name, hexshas ( short and long ) + as well as references are supported + :param return_ref: if name specifies a reference, we will return the reference + instead of the object. Otherwise it will raise BadObject + """ + hexsha = None + + # is it a hexsha ? Try the most common ones, which is 7 to 40 + if repo.re_hexsha_shortened.match(name): + if len(name) != 40: + # find long sha for short sha + hexsha = short_to_long(repo.odb, name) + else: + hexsha = name + # END handle short shas + #END find sha if it matches + + # if we couldn't find an object for what seemed to be a short hexsha + # try to find it as reference anyway, it could be named 'aaa' for instance + if hexsha is None: + for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): + try: + hexsha = SymbolicReference.dereference_recursive(repo, base % name) + if return_ref: + return SymbolicReference(repo, base % name) + #END handle symbolic ref + break + except ValueError: + pass + # END for each base + # END handle hexsha + + # didn't find any ref, this is an error + if return_ref: + raise BadObject("Couldn't find reference named %r" % name) + #END handle return ref + + # tried everything ? fail + if hexsha is None: + raise BadObject(name) + # END assert hexsha was found + + return Object.new_from_sha(repo, hex_to_bin(hexsha)) + +def deref_tag(tag): + """Recursively dereference a tag and return the resulting object""" + while True: + try: + tag = tag.object + except AttributeError: + break + # END dereference tag + return tag + +def to_commit(obj): + """Convert the given object to a commit if possible and return it""" + if obj.type == 'tag': + obj = deref_tag(obj) + + if obj.type != "commit": + raise ValueError("Cannot convert object %r to type commit" % obj) + # END verify type + return obj + +def rev_parse(repo, rev): + """ + :return: Object at the given revision, either Commit, Tag, Tree or Blob + :param rev: git-rev-parse compatible revision specification, please see + http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html + for details + :note: Currently there is no access to the rev-log, rev-specs may only contain + topological tokens such ~ and ^. + :raise BadObject: if the given revision could not be found + :raise ValueError: If rev couldn't be parsed + :raise IndexError: If invalid reflog index is specified""" + + # colon search mode ? + if rev.startswith(':/'): + # colon search mode + raise NotImplementedError("commit by message search ( regex )") + # END handle search + + obj = None + ref = None + output_type = "commit" + start = 0 + parsed_to = 0 + lr = len(rev) + while start < lr: + if rev[start] not in "^~:@": + start += 1 + continue + # END handle start + + token = rev[start] + + if obj is None: + # token is a rev name + if start == 0: + ref = repo.head.ref + else: + if token == '@': + ref = name_to_object(repo, rev[:start], return_ref=True) + else: + obj = name_to_object(repo, rev[:start]) + #END handle token + #END handle refname + + if ref is not None: + obj = ref.commit + #END handle ref + # END initialize obj on first token + + + start += 1 + + # try to parse {type} + if start < lr and rev[start] == '{': + end = rev.find('}', start) + if end == -1: + raise ValueError("Missing closing brace to define type in %s" % rev) + output_type = rev[start+1:end] # exclude brace + + # handle type + if output_type == 'commit': + pass # default + elif output_type == 'tree': + try: + obj = to_commit(obj).tree + except (AttributeError, ValueError): + pass # error raised later + # END exception handling + elif output_type in ('', 'blob'): + if obj.type == 'tag': + obj = deref_tag(obj) + else: + # cannot do anything for non-tags + pass + # END handle tag + elif token == '@': + # try single int + assert ref is not None, "Require Reference to access reflog" + revlog_index = None + try: + # transform reversed index into the format of our revlog + revlog_index = -(int(output_type)+1) + except ValueError: + # TODO: Try to parse the other date options, using parse_date + # maybe + raise NotImplementedError("Support for additional @{...} modes not implemented") + #END handle revlog index + + try: + entry = ref.log_entry(revlog_index) + except IndexError: + raise IndexError("Invalid revlog index: %i" % revlog_index) + #END handle index out of bound + + obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) + + # make it pass the following checks + output_type = None + else: + raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) + # END handle output type + + # empty output types don't require any specific type, its just about dereferencing tags + if output_type and obj.type != output_type: + raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) + # END verify ouput type + + start = end+1 # skip brace + parsed_to = start + continue + # END parse type + + # try to parse a number + num = 0 + if token != ":": + found_digit = False + while start < lr: + if rev[start] in digits: + num = num * 10 + int(rev[start]) + start += 1 + found_digit = True + else: + break + # END handle number + # END number parse loop + + # no explicit number given, 1 is the default + # It could be 0 though + if not found_digit: + num = 1 + # END set default num + # END number parsing only if non-blob mode + + + parsed_to = start + # handle hiererarchy walk + try: + if token == "~": + obj = to_commit(obj) + for item in xrange(num): + obj = obj.parents[0] + # END for each history item to walk + elif token == "^": + obj = to_commit(obj) + # must be n'th parent + if num: + obj = obj.parents[num-1] + elif token == ":": + if obj.type != "tree": + obj = obj.tree + # END get tree type + obj = obj[rev[start:]] + parsed_to = lr + else: + raise ValueError("Invalid token: %r" % token) + # END end handle tag + except (IndexError, AttributeError): + raise BadObject("Invalid Revision in %s" % rev) + # END exception handling + # END parse loop + + # still no obj ? Its probably a simple name + if obj is None: + obj = name_to_object(repo, rev) + parsed_to = lr + # END handle simple name + + if obj is None: + raise ValueError("Revision specifier could not be parsed: %s" % rev) + + if parsed_to != lr: + raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) + + return obj + +#} END utilities + +class PureReferencesMixin(ReferencesMixin): + """Pure-Python refparse implementation""" + + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + + def resolve(self, name): + return rev_parse(self, name) + + @property + def references(self): + raise NotImplementedError() + + @property + def heads(self): + raise NotImplementedError() + + @property + def tags(self): + raise NotImplementedError() diff --git a/git/db/py/transport.py b/git/db/py/transport.py new file mode 100644 index 00000000..783fb8d5 --- /dev/null +++ b/git/db/py/transport.py @@ -0,0 +1,89 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Implement a transport compatible database which sends objects using the git protocol""" + +from gitdb.db.interface import ( TransportDB, + PushInfo, + FetchInfo, + RefSpec ) + +__all__ = ["PureTransportDB"] + +class PurePushInfo(PushInfo): + """TODO: Implementation""" + __slots__ = tuple() + + + +class PureFetchInfo(FetchInfo): + """TODO""" + __slots__ = tuple() + + +class PureTransportDB(TransportDB): + """A database which allows to transport objects from and to different locations + which are specified by urls (location) and refspecs (what to transport, + see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). + + At the beginning of a transport operation, it will be determined which objects + have to be sent (either by this or by the other side). + + Afterwards a pack with the required objects is sent (or received). If there is + nothing to send, the pack will be empty. + + The communication itself if implemented using a protocol instance which deals + with the actual formatting of the lines sent. + + As refspecs involve symbolic names for references to be handled, we require + RefParse functionality. How this is done is up to the actual implementation.""" + # The following variables need to be set by the derived class + #{Configuration + protocol = None + #}end configuraiton + + #{ Interface + + def fetch(self, url, refspecs, progress=None, **kwargs): + """Fetch the objects defined by the given refspec from the given url. + :param url: url identifying the source of the objects. It may also be + a symbol from which the respective url can be resolved, like the + name of the remote. The implementation should allow objects as input + as well, these are assumed to resovle to a meaningful string though. + :param refspecs: iterable of reference specifiers or RefSpec instance, + identifying the references to be fetch from the remote. + :param progress: callable which receives progress messages for user consumption + :param kwargs: may be used for additional parameters that the actual implementation could + find useful. + :return: List of PureFetchInfo compatible instances which provide information about what + was previously fetched, in the order of the input refspecs. + :note: even if the operation fails, one of the returned PureFetchInfo instances + may still contain errors or failures in only part of the refspecs. + :raise: if any issue occours during the transport or if the url is not + supported by the protocol. + """ + raise NotImplementedError() + + def push(self, url, refspecs, progress=None, **kwargs): + """Transport the objects identified by the given refspec to the remote + at the given url. + :param url: Decribes the location which is to receive the objects + see fetch() for more details + :param refspecs: iterable of refspecs strings or RefSpec instances + to identify the objects to push + :param progress: see fetch() + :param kwargs: additional arguments which may be provided by the caller + as they may be useful to the actual implementation + :todo: what to return ? + :raise: if any issue arises during transport or if the url cannot be handled""" + raise NotImplementedError() + + @property + def remotes(self): + """:return: An IterableList of Remote objects allowing to access and manipulate remotes + :note: Remote objects can also be used for the actual push or fetch operation""" + raise NotImplementedError() + + #}end interface + -- cgit v1.2.3 From acf5e6ea64a2f24117f1d419c208ed1c38c43690 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 15:03:14 +0200 Subject: replaced all gitdb strings with git --- git/db/cmd/git.py | 16 ++++++++-------- git/db/py/base.py | 8 ++++---- git/db/py/git.py | 4 ++-- git/db/py/loose.py | 10 +++++----- git/db/py/mem.py | 6 +++--- git/db/py/pack.py | 8 ++++---- git/db/py/resolve.py | 10 +++++----- git/db/py/transport.py | 2 +- 8 files changed, 32 insertions(+), 32 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/git.py b/git/db/cmd/git.py index 5f977c6f..6c060f0e 100644 --- a/git/db/cmd/git.py +++ b/git/db/cmd/git.py @@ -1,29 +1,29 @@ -"""Module with our own gitdb implementation - it uses the git command""" +"""Module with our own git implementation - it uses the git command""" from exc import ( GitCommandError, BadObject ) -from gitdb.base import ( +from git.base import ( OInfo, OStream ) -from gitdb.util import ( +from git.util import ( bin_to_hex, hex_to_bin ) -from gitdb.db.py import ( +from git.db.py import ( PureGitDB, PureLooseObjectODB ) from git.util import RemoteProgress -from gitdb.db.py.base import TransportDB -from gitdb.db.interface import FetchInfo as GitdbFetchInfo -from gitdb.db.interface import PushInfo as GitdbPushInfo +from git.db.py.base import TransportDB +from git.db.interface import FetchInfo as GitdbFetchInfo +from git.db.interface import PushInfo as GitdbPushInfo from git.util import join_path -from gitdb.util import join +from git.util import join from refs import ( Reference, diff --git a/git/db/py/base.py b/git/db/py/base.py index c378b10e..28bbf258 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -4,9 +4,9 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains basic implementations for the interface building blocks""" -from gitdb.db.interface import * +from git.db.interface import * -from gitdb.util import ( +from git.util import ( pool, join, normpath, @@ -21,8 +21,8 @@ from gitdb.util import ( is_git_dir ) -from gitdb.config import GitConfigParser -from gitdb.exc import ( +from git.config import GitConfigParser +from git.exc import ( BadObject, AmbiguousObjectName, InvalidDBRoot diff --git a/git/db/py/git.py b/git/db/py/git.py index bc148c6f..1f929e31 100644 --- a/git/db/py/git.py +++ b/git/db/py/git.py @@ -16,13 +16,13 @@ from loose import PureLooseObjectODB from pack import PurePackedODB from ref import PureReferenceDB -from gitdb.util import ( +from git.util import ( LazyMixin, normpath, join, dirname ) -from gitdb.exc import ( +from git.exc import ( InvalidDBRoot, BadObject, AmbiguousObjectName diff --git a/git/db/py/loose.py b/git/db/py/loose.py index 34e31da6..56915f18 100644 --- a/git/db/py/loose.py +++ b/git/db/py/loose.py @@ -9,25 +9,25 @@ from base import ( ) -from gitdb.exc import ( +from git.exc import ( InvalidDBRoot, BadObject, AmbiguousObjectName ) -from gitdb.stream import ( +from git.stream import ( DecompressMemMapReader, FDCompressedSha1Writer, FDStream, Sha1Writer ) -from gitdb.base import ( +from git.base import ( OStream, OInfo ) -from gitdb.util import ( +from git.util import ( file_contents_ro_filepath, ENOENT, hex_to_bin, @@ -44,7 +44,7 @@ from gitdb.util import ( join ) -from gitdb.fun import ( +from git.fun import ( chunk_size, loose_object_header_info, write_object, diff --git a/git/db/py/mem.py b/git/db/py/mem.py index ba922e96..5851aebc 100644 --- a/git/db/py/mem.py +++ b/git/db/py/mem.py @@ -9,16 +9,16 @@ from base import ( PureObjectDBW ) -from gitdb.base import ( +from git.base import ( OStream, IStream, ) -from gitdb.exc import ( +from git.exc import ( BadObject, UnsupportedOperation ) -from gitdb.stream import ( +from git.stream import ( ZippedStoreShaWriter, DecompressMemMapReader, ) diff --git a/git/db/py/pack.py b/git/db/py/pack.py index 1d0e9bfc..75b75468 100644 --- a/git/db/py/pack.py +++ b/git/db/py/pack.py @@ -3,21 +3,21 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing a database to deal with packs""" -from gitdb.db import CachingDB +from git.db import CachingDB from base import ( PureRootPathDB, PureObjectDBR ) -from gitdb.util import LazyMixin +from git.util import LazyMixin -from gitdb.exc import ( +from git.exc import ( BadObject, UnsupportedOperation, AmbiguousObjectName ) -from gitdb.pack import PackEntity +from git.pack import PackEntity import os import glob diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 86c1e594..7c03bcd1 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -1,11 +1,11 @@ """Module with an implementation for refspec parsing. It is the pure-python version assuming compatible interface for reference and object types""" -from gitdb.db.interface import ReferencesMixin -from gitdb.exc import BadObject -from gitdb.ref import SymbolicReference -from gitdb.object.base import Object -from gitdb.util import ( +from git.db.interface import ReferencesMixin +from git.exc import BadObject +from git.ref import SymbolicReference +from git.object.base import Object +from git.util import ( join, isdir, isfile, diff --git a/git/db/py/transport.py b/git/db/py/transport.py index 783fb8d5..f8edfb23 100644 --- a/git/db/py/transport.py +++ b/git/db/py/transport.py @@ -4,7 +4,7 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Implement a transport compatible database which sends objects using the git protocol""" -from gitdb.db.interface import ( TransportDB, +from git.db.interface import ( TransportDB, PushInfo, FetchInfo, RefSpec ) -- cgit v1.2.3 From 7ae36c3e019a5cc16924d1b6007774bfb625036f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 18:53:59 +0200 Subject: Started to fix imports - tests still have no chance to work as database changed drastically. Now the actual work begins --- git/db/cmd/__init__.py | 1 + git/db/cmd/complex.py | 434 ++++++++++++++++++++++++++++++++++++++++++++++++ git/db/cmd/git.py | 437 ------------------------------------------------- git/db/py/__init__.py | 9 +- git/db/py/base.py | 1 - git/db/py/complex.py | 113 +++++++++++++ git/db/py/git.py | 113 ------------- git/db/py/mem.py | 3 +- git/db/py/ref.py | 2 +- git/db/py/resolve.py | 4 +- 10 files changed, 553 insertions(+), 564 deletions(-) create mode 100644 git/db/cmd/complex.py delete mode 100644 git/db/cmd/git.py create mode 100644 git/db/py/complex.py delete mode 100644 git/db/py/git.py (limited to 'git/db') diff --git a/git/db/cmd/__init__.py b/git/db/cmd/__init__.py index 8b137891..968d8c11 100644 --- a/git/db/cmd/__init__.py +++ b/git/db/cmd/__init__.py @@ -1 +1,2 @@ +from complex import * diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py new file mode 100644 index 00000000..73e2048f --- /dev/null +++ b/git/db/cmd/complex.py @@ -0,0 +1,434 @@ +"""Module with our own git implementation - it uses the git command""" +from git.exc import ( + GitCommandError, + BadObject + ) + +from git.base import ( + OInfo, + OStream + ) + +from git.util import ( + bin_to_hex, + hex_to_bin + ) +from git.db.py.loose import PureLooseObjectODB +from git.util import RemoteProgress +from git.db.py.base import TransportDB +from git.db.interface import FetchInfo as GitdbFetchInfo +from git.db.interface import PushInfo as GitdbPushInfo + +from git.util import join_path +from git.util import join + +from git.refs import ( + Reference, + RemoteReference, + SymbolicReference, + TagReference + ) + +import re +import sys + + +__all__ = ('CmdGitDB', 'RemoteProgress' ) + + +class PushInfo(GitdbPushInfo): + """ + Carries information about the result of a push operation of a single head:: + + info = remote.push()[0] + info.flags # bitflags providing more information about the result + info.local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + info.remote_ref_string # path to the remote reference located on the remote side + info.remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + info.old_commit_binsha # binary sha at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + info.summary # summary line providing human readable english text about the push + """ + __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', '_remote', 'summary') + + _flag_map = { 'X' : GitdbPushInfo.NO_MATCH, + '-' : GitdbPushInfo.DELETED, '*' : 0, + '+' : GitdbPushInfo.FORCED_UPDATE, + ' ' : GitdbPushInfo.FAST_FORWARD, + '=' : GitdbPushInfo.UP_TO_DATE, + '!' : GitdbPushInfo.ERROR } + + def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit_binsha=None, + summary=''): + """ Initialize a new instance """ + self.flags = flags + self.local_ref = local_ref + self.remote_ref_string = remote_ref_string + self._remote = remote + self.old_commit_binsha = old_commit_binsha + self.summary = summary + + @property + def remote_ref(self): + """ + :return: + Remote Reference or TagReference in the local repository corresponding + to the remote_ref_string kept in this instance.""" + # translate heads to a local remote, tags stay as they are + if self.remote_ref_string.startswith("refs/tags"): + return TagReference(self._remote.repo, self.remote_ref_string) + elif self.remote_ref_string.startswith("refs/heads"): + remote_ref = Reference(self._remote.repo, self.remote_ref_string) + return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) + else: + raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) + # END + + @classmethod + def _from_line(cls, remote, line): + """Create a new PushInfo instance as parsed from line which is expected to be like + refs/heads/master:refs/heads/master 05d2687..1d0568e""" + control_character, from_to, summary = line.split('\t', 3) + flags = 0 + + # control character handling + try: + flags |= cls._flag_map[ control_character ] + except KeyError: + raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) + # END handle control character + + # from_to handling + from_ref_string, to_ref_string = from_to.split(':') + if flags & cls.DELETED: + from_ref = None + else: + from_ref = Reference.from_path(remote.repo, from_ref_string) + + # commit handling, could be message or commit info + old_commit_binsha = None + if summary.startswith('['): + if "[rejected]" in summary: + flags |= cls.REJECTED + elif "[remote rejected]" in summary: + flags |= cls.REMOTE_REJECTED + elif "[remote failure]" in summary: + flags |= cls.REMOTE_FAILURE + elif "[no match]" in summary: + flags |= cls.ERROR + elif "[new tag]" in summary: + flags |= cls.NEW_TAG + elif "[new branch]" in summary: + flags |= cls.NEW_HEAD + # uptodate encoded in control character + else: + # fast-forward or forced update - was encoded in control character, + # but we parse the old and new commit + split_token = "..." + if control_character == " ": + split_token = ".." + old_sha, new_sha = summary.split(' ')[0].split(split_token) + # have to use constructor here as the sha usually is abbreviated + old_commit_binsha = remote.repo.commit(old_sha) + # END message handling + + return PushInfo(flags, from_ref, to_ref_string, remote, old_commit_binsha, summary) + + +class FetchInfo(GitdbFetchInfo): + """ + Carries information about the results of a fetch operation of a single head:: + + info = remote.fetch()[0] + info.ref # Symbolic Reference or RemoteReference to the changed + # remote head or FETCH_HEAD + info.flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is FETCH_HEAD + info.note # additional notes given by git-fetch intended for the user + info.old_commit_binsha # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref, otherwise None + """ + __slots__ = ('ref','old_commit_binsha', 'flags', 'note') + + # %c %-*s %-*s -> %s (%s) + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") + + _flag_map = { '!' : GitdbFetchInfo.ERROR, + '+' : GitdbFetchInfo.FORCED_UPDATE, + '-' : GitdbFetchInfo.TAG_UPDATE, + '*' : 0, + '=' : GitdbFetchInfo.HEAD_UPTODATE, + ' ' : GitdbFetchInfo.FAST_FORWARD } + + def __init__(self, ref, flags, note = '', old_commit_binsha = None): + """ + Initialize a new instance + """ + self.ref = ref + self.flags = flags + self.note = note + self.old_commit_binsha = old_commit_binsha + + def __str__(self): + return self.name + + @property + def name(self): + """:return: Name of our remote ref""" + return self.ref.name + + @property + def commit(self): + """:return: Commit of our remote ref""" + return self.ref.commit + + @classmethod + def _from_line(cls, repo, line, fetch_line): + """Parse information from the given line as returned by git-fetch -v + and return a new FetchInfo object representing this information. + + We can handle a line as follows + "%c %-*s %-*s -> %s%s" + + Where c is either ' ', !, +, -, *, or = + ! means error + + means success forcing update + - means a tag was updated + * means birth of new branch or tag + = means the head was up to date ( and not moved ) + ' ' means a fast-forward + + fetch line is the corresponding line from FETCH_HEAD, like + acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" + match = cls.re_fetch_result.match(line) + if match is None: + raise ValueError("Failed to parse line: %r" % line) + + # parse lines + control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() + try: + new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") + ref_type_name, fetch_note = fetch_note.split(' ', 1) + except ValueError: # unpack error + raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) + + # handle FETCH_HEAD and figure out ref type + # If we do not specify a target branch like master:refs/remotes/origin/master, + # the fetch result is stored in FETCH_HEAD which destroys the rule we usually + # have. In that case we use a symbolic reference which is detached + ref_type = None + if remote_local_ref == "FETCH_HEAD": + ref_type = SymbolicReference + elif ref_type_name == "branch": + ref_type = RemoteReference + elif ref_type_name == "tag": + ref_type = TagReference + else: + raise TypeError("Cannot handle reference type: %r" % ref_type_name) + + # create ref instance + if ref_type is SymbolicReference: + remote_local_ref = ref_type(repo, "FETCH_HEAD") + else: + remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) + # END create ref instance + + note = ( note and note.strip() ) or '' + + # parse flags from control_character + flags = 0 + try: + flags |= cls._flag_map[control_character] + except KeyError: + raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) + # END control char exception hanlding + + # parse operation string for more info - makes no sense for symbolic refs + old_commit_binsha = None + if isinstance(remote_local_ref, Reference): + if 'rejected' in operation: + flags |= cls.REJECTED + if 'new tag' in operation: + flags |= cls.NEW_TAG + if 'new branch' in operation: + flags |= cls.NEW_HEAD + if '...' in operation or '..' in operation: + split_token = '...' + if control_character == ' ': + split_token = split_token[:-1] + old_commit_binsha = repo.rev_parse(operation.split(split_token)[0]) + # END handle refspec + # END reference flag handling + + return cls(remote_local_ref, flags, note, old_commit_binsha) + + +class CmdGitDB(PureLooseObjectODB, TransportDB): + """A database representing the default git object store, which includes loose + objects, pack files and an alternates file + + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + def __init__(self, root_path, git): + """Initialize this instance with the root and a git command""" + super(CmdGitDB, self).__init__(root_path) + self._git = git + + @classmethod + def _digest_process_messages(cls, fh, progress): + """Read progress messages from file-like object fh, supplying the respective + progress messages to the progress instance. + + :return: list(line, ...) list of lines without linebreaks that did + not contain progress information""" + line_so_far = '' + dropped_lines = list() + while True: + char = fh.read(1) + if not char: + break + + if char in ('\r', '\n'): + dropped_lines.extend(progress._parse_progress_line(line_so_far)) + line_so_far = '' + else: + line_so_far += char + # END process parsed line + # END while file is not done reading + return dropped_lines + + @classmethod + def _finalize_proc(cls, proc): + """Wait for the process (fetch, pull or push) and handle its errors accordingly""" + try: + proc.wait() + except GitCommandError,e: + # if a push has rejected items, the command has non-zero return status + # a return status of 128 indicates a connection error - reraise the previous one + if proc.poll() == 128: + raise + pass + # END exception handling + + + def _get_fetch_info_from_stderr(self, proc, progress): + # skip first line as it is some remote info we are not interested in + output = IterableList('name') + + + # lines which are no progress are fetch info lines + # this also waits for the command to finish + # Skip some progress lines that don't provide relevant information + fetch_info_lines = list() + for line in self._digest_process_messages(proc.stderr, progress): + if line.startswith('From') or line.startswith('remote: Total'): + continue + elif line.startswith('warning:'): + print >> sys.stderr, line + continue + elif line.startswith('fatal:'): + raise GitCommandError(("Error when fetching: %s" % line,), 2) + # END handle special messages + fetch_info_lines.append(line) + # END for each line + + # read head information + fp = open(join(self.root_path(), 'FETCH_HEAD'),'r') + fetch_head_info = fp.readlines() + fp.close() + + assert len(fetch_info_lines) == len(fetch_head_info) + + output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) + for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) + + self._finalize_proc(proc) + return output + + def _get_push_info(self, proc, progress): + # read progress information from stderr + # we hope stdout can hold all the data, it should ... + # read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually + self._digest_process_messages(proc.stderr, progress) + + output = IterableList('name') + for line in proc.stdout.readlines(): + try: + output.append(PushInfo._from_line(self, line)) + except ValueError: + # if an error happens, additional info is given which we cannot parse + pass + # END exception handling + # END for each line + + self._finalize_proc(proc) + return output + + + + #{ ODB Interface + + def info(self, sha): + hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) + return OInfo(hex_to_bin(hexsha), typename, size) + + def stream(self, sha): + """For now, all lookup is done by git itself""" + hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) + return OStream(hex_to_bin(hexsha), typename, size, stream) + + #} END odb interface + + # { Interface + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: Full binary 20 byte sha from the given partial hexsha + :raise AmbiguousObjectName: + :raise BadObject: + :note: currently we only raise BadObject as git does not communicate + AmbiguousObjects separately""" + try: + hexsha, typename, size = self._git.get_object_header(partial_hexsha) + return hex_to_bin(hexsha) + except (GitCommandError, ValueError): + raise BadObject(partial_hexsha) + # END handle exceptions + + #} END interface + + #{ Transport DB interface + + def push(self, url, refspecs=None, progress=None, **kwargs): + """Push given refspecs using the git default implementation + :param url: may be a remote name or a url + :param refspecs: single string, RefSpec instance or list of such or None. + :param progress: RemoteProgress derived instance or None + :param **kwargs: Additional arguments to be passed to the git-push process""" + proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) + return self._get_push_info(proc, progress or RemoteProgress()) + + def pull(self, url, refspecs=None, progress=None, **kwargs): + """Fetch and merge the given refspecs. + If not refspecs are given, the merge will only work properly if you + have setup upstream (tracking) branches. + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + proc = self._git.pull(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + def fetch(self, url, refspecs=None, progress=None, **kwargs): + """Fetch the latest changes + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + proc = self._git.fetch(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + #} end transport db interface diff --git a/git/db/cmd/git.py b/git/db/cmd/git.py deleted file mode 100644 index 6c060f0e..00000000 --- a/git/db/cmd/git.py +++ /dev/null @@ -1,437 +0,0 @@ -"""Module with our own git implementation - it uses the git command""" -from exc import ( - GitCommandError, - BadObject - ) - -from git.base import ( - OInfo, - OStream - ) - -from git.util import ( - bin_to_hex, - hex_to_bin - ) -from git.db.py import ( - PureGitDB, - PureLooseObjectODB - ) -from git.util import RemoteProgress -from git.db.py.base import TransportDB -from git.db.interface import FetchInfo as GitdbFetchInfo -from git.db.interface import PushInfo as GitdbPushInfo - -from git.util import join_path -from git.util import join - -from refs import ( - Reference, - RemoteReference, - SymbolicReference, - TagReference - ) - -import re -import sys - - -__all__ = ('GitCmdObjectDB', 'PureGitDB', 'RemoteProgress' ) - - -class PushInfo(GitdbPushInfo): - """ - Carries information about the result of a push operation of a single head:: - - info = remote.push()[0] - info.flags # bitflags providing more information about the result - info.local_ref # Reference pointing to the local reference that was pushed - # It is None if the ref was deleted. - info.remote_ref_string # path to the remote reference located on the remote side - info.remote_ref # Remote Reference on the local side corresponding to - # the remote_ref_string. It can be a TagReference as well. - info.old_commit_binsha # binary sha at which the remote_ref was standing before we pushed - # it to local_ref.commit. Will be None if an error was indicated - info.summary # summary line providing human readable english text about the push - """ - __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', '_remote', 'summary') - - _flag_map = { 'X' : GitdbPushInfo.NO_MATCH, - '-' : GitdbPushInfo.DELETED, '*' : 0, - '+' : GitdbPushInfo.FORCED_UPDATE, - ' ' : GitdbPushInfo.FAST_FORWARD, - '=' : GitdbPushInfo.UP_TO_DATE, - '!' : GitdbPushInfo.ERROR } - - def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit_binsha=None, - summary=''): - """ Initialize a new instance """ - self.flags = flags - self.local_ref = local_ref - self.remote_ref_string = remote_ref_string - self._remote = remote - self.old_commit_binsha = old_commit_binsha - self.summary = summary - - @property - def remote_ref(self): - """ - :return: - Remote Reference or TagReference in the local repository corresponding - to the remote_ref_string kept in this instance.""" - # translate heads to a local remote, tags stay as they are - if self.remote_ref_string.startswith("refs/tags"): - return TagReference(self._remote.repo, self.remote_ref_string) - elif self.remote_ref_string.startswith("refs/heads"): - remote_ref = Reference(self._remote.repo, self.remote_ref_string) - return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) - else: - raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) - # END - - @classmethod - def _from_line(cls, remote, line): - """Create a new PushInfo instance as parsed from line which is expected to be like - refs/heads/master:refs/heads/master 05d2687..1d0568e""" - control_character, from_to, summary = line.split('\t', 3) - flags = 0 - - # control character handling - try: - flags |= cls._flag_map[ control_character ] - except KeyError: - raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) - # END handle control character - - # from_to handling - from_ref_string, to_ref_string = from_to.split(':') - if flags & cls.DELETED: - from_ref = None - else: - from_ref = Reference.from_path(remote.repo, from_ref_string) - - # commit handling, could be message or commit info - old_commit_binsha = None - if summary.startswith('['): - if "[rejected]" in summary: - flags |= cls.REJECTED - elif "[remote rejected]" in summary: - flags |= cls.REMOTE_REJECTED - elif "[remote failure]" in summary: - flags |= cls.REMOTE_FAILURE - elif "[no match]" in summary: - flags |= cls.ERROR - elif "[new tag]" in summary: - flags |= cls.NEW_TAG - elif "[new branch]" in summary: - flags |= cls.NEW_HEAD - # uptodate encoded in control character - else: - # fast-forward or forced update - was encoded in control character, - # but we parse the old and new commit - split_token = "..." - if control_character == " ": - split_token = ".." - old_sha, new_sha = summary.split(' ')[0].split(split_token) - # have to use constructor here as the sha usually is abbreviated - old_commit_binsha = remote.repo.commit(old_sha) - # END message handling - - return PushInfo(flags, from_ref, to_ref_string, remote, old_commit_binsha, summary) - - -class FetchInfo(GitdbFetchInfo): - """ - Carries information about the results of a fetch operation of a single head:: - - info = remote.fetch()[0] - info.ref # Symbolic Reference or RemoteReference to the changed - # remote head or FETCH_HEAD - info.flags # additional flags to be & with enumeration members, - # i.e. info.flags & info.REJECTED - # is 0 if ref is FETCH_HEAD - info.note # additional notes given by git-fetch intended for the user - info.old_commit_binsha # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref, otherwise None - """ - __slots__ = ('ref','old_commit_binsha', 'flags', 'note') - - # %c %-*s %-*s -> %s (%s) - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") - - _flag_map = { '!' : GitdbFetchInfo.ERROR, - '+' : GitdbFetchInfo.FORCED_UPDATE, - '-' : GitdbFetchInfo.TAG_UPDATE, - '*' : 0, - '=' : GitdbFetchInfo.HEAD_UPTODATE, - ' ' : GitdbFetchInfo.FAST_FORWARD } - - def __init__(self, ref, flags, note = '', old_commit_binsha = None): - """ - Initialize a new instance - """ - self.ref = ref - self.flags = flags - self.note = note - self.old_commit_binsha = old_commit_binsha - - def __str__(self): - return self.name - - @property - def name(self): - """:return: Name of our remote ref""" - return self.ref.name - - @property - def commit(self): - """:return: Commit of our remote ref""" - return self.ref.commit - - @classmethod - def _from_line(cls, repo, line, fetch_line): - """Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. - - We can handle a line as follows - "%c %-*s %-*s -> %s%s" - - Where c is either ' ', !, +, -, *, or = - ! means error - + means success forcing update - - means a tag was updated - * means birth of new branch or tag - = means the head was up to date ( and not moved ) - ' ' means a fast-forward - - fetch line is the corresponding line from FETCH_HEAD, like - acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" - match = cls.re_fetch_result.match(line) - if match is None: - raise ValueError("Failed to parse line: %r" % line) - - # parse lines - control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() - try: - new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") - ref_type_name, fetch_note = fetch_note.split(' ', 1) - except ValueError: # unpack error - raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) - - # handle FETCH_HEAD and figure out ref type - # If we do not specify a target branch like master:refs/remotes/origin/master, - # the fetch result is stored in FETCH_HEAD which destroys the rule we usually - # have. In that case we use a symbolic reference which is detached - ref_type = None - if remote_local_ref == "FETCH_HEAD": - ref_type = SymbolicReference - elif ref_type_name == "branch": - ref_type = RemoteReference - elif ref_type_name == "tag": - ref_type = TagReference - else: - raise TypeError("Cannot handle reference type: %r" % ref_type_name) - - # create ref instance - if ref_type is SymbolicReference: - remote_local_ref = ref_type(repo, "FETCH_HEAD") - else: - remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) - # END create ref instance - - note = ( note and note.strip() ) or '' - - # parse flags from control_character - flags = 0 - try: - flags |= cls._flag_map[control_character] - except KeyError: - raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) - # END control char exception hanlding - - # parse operation string for more info - makes no sense for symbolic refs - old_commit_binsha = None - if isinstance(remote_local_ref, Reference): - if 'rejected' in operation: - flags |= cls.REJECTED - if 'new tag' in operation: - flags |= cls.NEW_TAG - if 'new branch' in operation: - flags |= cls.NEW_HEAD - if '...' in operation or '..' in operation: - split_token = '...' - if control_character == ' ': - split_token = split_token[:-1] - old_commit_binsha = repo.rev_parse(operation.split(split_token)[0]) - # END handle refspec - # END reference flag handling - - return cls(remote_local_ref, flags, note, old_commit_binsha) - - -class GitCmdObjectDB(PureLooseObjectODB, TransportDB): - """A database representing the default git object store, which includes loose - objects, pack files and an alternates file - - It will create objects only in the loose object database. - :note: for now, we use the git command to do all the lookup, just until he - have packs and the other implementations - """ - def __init__(self, root_path, git): - """Initialize this instance with the root and a git command""" - super(GitCmdObjectDB, self).__init__(root_path) - self._git = git - - @classmethod - def _digest_process_messages(cls, fh, progress): - """Read progress messages from file-like object fh, supplying the respective - progress messages to the progress instance. - - :return: list(line, ...) list of lines without linebreaks that did - not contain progress information""" - line_so_far = '' - dropped_lines = list() - while True: - char = fh.read(1) - if not char: - break - - if char in ('\r', '\n'): - dropped_lines.extend(progress._parse_progress_line(line_so_far)) - line_so_far = '' - else: - line_so_far += char - # END process parsed line - # END while file is not done reading - return dropped_lines - - @classmethod - def _finalize_proc(cls, proc): - """Wait for the process (fetch, pull or push) and handle its errors accordingly""" - try: - proc.wait() - except GitCommandError,e: - # if a push has rejected items, the command has non-zero return status - # a return status of 128 indicates a connection error - reraise the previous one - if proc.poll() == 128: - raise - pass - # END exception handling - - - def _get_fetch_info_from_stderr(self, proc, progress): - # skip first line as it is some remote info we are not interested in - output = IterableList('name') - - - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information - fetch_info_lines = list() - for line in self._digest_process_messages(proc.stderr, progress): - if line.startswith('From') or line.startswith('remote: Total'): - continue - elif line.startswith('warning:'): - print >> sys.stderr, line - continue - elif line.startswith('fatal:'): - raise GitCommandError(("Error when fetching: %s" % line,), 2) - # END handle special messages - fetch_info_lines.append(line) - # END for each line - - # read head information - fp = open(join(self.root_path(), 'FETCH_HEAD'),'r') - fetch_head_info = fp.readlines() - fp.close() - - assert len(fetch_info_lines) == len(fetch_head_info) - - output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) - for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) - - self._finalize_proc(proc) - return output - - def _get_push_info(self, proc, progress): - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually - self._digest_process_messages(proc.stderr, progress) - - output = IterableList('name') - for line in proc.stdout.readlines(): - try: - output.append(PushInfo._from_line(self, line)) - except ValueError: - # if an error happens, additional info is given which we cannot parse - pass - # END exception handling - # END for each line - - self._finalize_proc(proc) - return output - - - - #{ ODB Interface - - def info(self, sha): - hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) - return OInfo(hex_to_bin(hexsha), typename, size) - - def stream(self, sha): - """For now, all lookup is done by git itself""" - hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) - return OStream(hex_to_bin(hexsha), typename, size, stream) - - #} END odb interface - - # { Interface - - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: Full binary 20 byte sha from the given partial hexsha - :raise AmbiguousObjectName: - :raise BadObject: - :note: currently we only raise BadObject as git does not communicate - AmbiguousObjects separately""" - try: - hexsha, typename, size = self._git.get_object_header(partial_hexsha) - return hex_to_bin(hexsha) - except (GitCommandError, ValueError): - raise BadObject(partial_hexsha) - # END handle exceptions - - #} END interface - - #{ Transport DB interface - - def push(self, url, refspecs=None, progress=None, **kwargs): - """Push given refspecs using the git default implementation - :param url: may be a remote name or a url - :param refspecs: single string, RefSpec instance or list of such or None. - :param progress: RemoteProgress derived instance or None - :param **kwargs: Additional arguments to be passed to the git-push process""" - proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) - return self._get_push_info(proc, progress or RemoteProgress()) - - def pull(self, url, refspecs=None, progress=None, **kwargs): - """Fetch and merge the given refspecs. - If not refspecs are given, the merge will only work properly if you - have setup upstream (tracking) branches. - :param url: may be a remote name or a url - :param refspecs: see push() - :param progress: see push()""" - proc = self._git.pull(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def fetch(self, url, refspecs=None, progress=None, **kwargs): - """Fetch the latest changes - :param url: may be a remote name or a url - :param refspecs: see push() - :param progress: see push()""" - proc = self._git.fetch(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - #} end transport db interface diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py index 046c699d..73cc2bdf 100644 --- a/git/db/py/__init__.py +++ b/git/db/py/__init__.py @@ -3,11 +3,4 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import * -from loose import * -from mem import * -from pack import * -from git import * -from ref import * -from resolve import * -from transport import * +from complex import * diff --git a/git/db/py/base.py b/git/db/py/base.py index 28bbf258..5c470ba4 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -3,7 +3,6 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains basic implementations for the interface building blocks""" - from git.db.interface import * from git.util import ( diff --git a/git/db/py/complex.py b/git/db/py/complex.py new file mode 100644 index 00000000..1f929e31 --- /dev/null +++ b/git/db/py/complex.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PureGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureCompoundDB, + PureObjectDBW, + PureRootPathDB, + PureRepositoryPathsMixin, + PureConfigurationMixin, + ) + +from resolve import PureReferencesMixin + +from loose import PureLooseObjectODB +from pack import PurePackedODB +from ref import PureReferenceDB + +from git.util import ( + LazyMixin, + normpath, + join, + dirname + ) +from git.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) +import os + +__all__ = ('PureGitODB', 'PureGitDB') + + +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): + """A git-style object-only database, which contains all objects in the 'objects' + subdirectory. + :note: The type needs to be initialized on the ./objects directory to function, + as it deals solely with object lookup. Use a PureGitDB type if you need + reference and push support.""" + # Configuration + PackDBCls = PurePackedODB + LooseDBCls = PureLooseObjectODB + PureReferenceDBCls = PureReferenceDB + + # Directories + packs_dir = 'pack' + loose_dir = '' + alternates_dir = os.path.join('info', 'alternates') + + def __init__(self, root_path): + """Initialize ourselves on a git ./objects directory""" + super(PureGitODB, self).__init__(root_path) + + def _set_cache_(self, attr): + if attr == '_dbs' or attr == '_loose_db': + self._dbs = list() + loose_db = None + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + (self.loose_dir, self.LooseDBCls), + (self.alternates_dir, self.PureReferenceDBCls)): + path = self.db_path(subpath) + if os.path.exists(path): + self._dbs.append(dbcls(path)) + if dbcls is self.LooseDBCls: + loose_db = self._dbs[-1] + # END remember loose db + # END check path exists + # END for each db type + + # should have at least one subdb + if not self._dbs: + raise InvalidDBRoot(self.root_path()) + # END handle error + + # we the first one should have the store method + assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + + # finally set the value + self._loose_db = loose_db + else: + super(PureGitODB, self)._set_cache_(attr) + # END handle attrs + + #{ PureObjectDBW interface + + def store(self, istream): + return self._loose_db.store(istream) + + def ostream(self): + return self._loose_db.ostream() + + def set_ostream(self, ostream): + return self._loose_db.set_ostream(ostream) + + #} END objectdbw interface + + +class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): + """Git like database with support for object lookup as well as reference resolution. + Our rootpath is set to the actual .git directory (bare on unbare). + + The root_path will be the git objects directory. Use git_path() to obtain the actual top-level + git directory.""" + #directories + + def __init__(self, root_path): + """Initialize ourselves on the .git directory, or the .git/objects directory.""" + PureRepositoryPathsMixin._initialize(self, root_path) + super(PureGitDB, self).__init__(self.objects_path()) + + + diff --git a/git/db/py/git.py b/git/db/py/git.py deleted file mode 100644 index 1f929e31..00000000 --- a/git/db/py/git.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors -# -# This module is part of PureGitDB and is released under -# the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import ( - PureCompoundDB, - PureObjectDBW, - PureRootPathDB, - PureRepositoryPathsMixin, - PureConfigurationMixin, - ) - -from resolve import PureReferencesMixin - -from loose import PureLooseObjectODB -from pack import PurePackedODB -from ref import PureReferenceDB - -from git.util import ( - LazyMixin, - normpath, - join, - dirname - ) -from git.exc import ( - InvalidDBRoot, - BadObject, - AmbiguousObjectName - ) -import os - -__all__ = ('PureGitODB', 'PureGitDB') - - -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): - """A git-style object-only database, which contains all objects in the 'objects' - subdirectory. - :note: The type needs to be initialized on the ./objects directory to function, - as it deals solely with object lookup. Use a PureGitDB type if you need - reference and push support.""" - # Configuration - PackDBCls = PurePackedODB - LooseDBCls = PureLooseObjectODB - PureReferenceDBCls = PureReferenceDB - - # Directories - packs_dir = 'pack' - loose_dir = '' - alternates_dir = os.path.join('info', 'alternates') - - def __init__(self, root_path): - """Initialize ourselves on a git ./objects directory""" - super(PureGitODB, self).__init__(root_path) - - def _set_cache_(self, attr): - if attr == '_dbs' or attr == '_loose_db': - self._dbs = list() - loose_db = None - for subpath, dbcls in ((self.packs_dir, self.PackDBCls), - (self.loose_dir, self.LooseDBCls), - (self.alternates_dir, self.PureReferenceDBCls)): - path = self.db_path(subpath) - if os.path.exists(path): - self._dbs.append(dbcls(path)) - if dbcls is self.LooseDBCls: - loose_db = self._dbs[-1] - # END remember loose db - # END check path exists - # END for each db type - - # should have at least one subdb - if not self._dbs: - raise InvalidDBRoot(self.root_path()) - # END handle error - - # we the first one should have the store method - assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" - - # finally set the value - self._loose_db = loose_db - else: - super(PureGitODB, self)._set_cache_(attr) - # END handle attrs - - #{ PureObjectDBW interface - - def store(self, istream): - return self._loose_db.store(istream) - - def ostream(self): - return self._loose_db.ostream() - - def set_ostream(self, ostream): - return self._loose_db.set_ostream(ostream) - - #} END objectdbw interface - - -class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): - """Git like database with support for object lookup as well as reference resolution. - Our rootpath is set to the actual .git directory (bare on unbare). - - The root_path will be the git objects directory. Use git_path() to obtain the actual top-level - git directory.""" - #directories - - def __init__(self, root_path): - """Initialize ourselves on the .git directory, or the .git/objects directory.""" - PureRepositoryPathsMixin._initialize(self, root_path) - super(PureGitDB, self).__init__(self.objects_path()) - - - diff --git a/git/db/py/mem.py b/git/db/py/mem.py index 5851aebc..da02dbdd 100644 --- a/git/db/py/mem.py +++ b/git/db/py/mem.py @@ -3,12 +3,11 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains the MemoryDatabase implementation""" -from loose import PureLooseObjectODB from base import ( PureObjectDBR, PureObjectDBW ) - +from loose import PureLooseObjectODB from git.base import ( OStream, IStream, diff --git a/git/db/py/ref.py b/git/db/py/ref.py index 951f0437..94887fb8 100644 --- a/git/db/py/ref.py +++ b/git/db/py/ref.py @@ -31,7 +31,7 @@ class PureReferenceDB(PureCompoundDB): dbcls = self.ObjectDBCls if dbcls is None: # late import - from git import PureGitODB + from complex import PureGitODB # TODO: This should be a configurable for flexibility dbcls = PureGitODB # END get db type diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 7c03bcd1..9cce8efe 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -3,8 +3,8 @@ version assuming compatible interface for reference and object types""" from git.db.interface import ReferencesMixin from git.exc import BadObject -from git.ref import SymbolicReference -from git.object.base import Object +from git.refs import SymbolicReference +from git.objects.base import Object from git.util import ( join, isdir, -- cgit v1.2.3 From cd26aaebbda94dc3740e41bbd3f91ba6b1a25c10 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 10 May 2011 10:21:26 +0200 Subject: Made repository paths methods a property to be compatible with the existing repo interface. Added submodule interface ... goal is to provide all of the extra repo functionality in custom interfaces --- git/db/compat.py | 13 +++++++++ git/db/interface.py | 79 +++++++++++++++++++++++++++++++++++++++++++++----- git/db/py/base.py | 67 ++++++++++++++++++++++++++++++++---------- git/db/py/complex.py | 5 ++-- git/db/py/submodule.py | 33 +++++++++++++++++++++ 5 files changed, 173 insertions(+), 24 deletions(-) create mode 100644 git/db/compat.py create mode 100644 git/db/py/submodule.py (limited to 'git/db') diff --git a/git/db/compat.py b/git/db/compat.py new file mode 100644 index 00000000..1c0ba6f5 --- /dev/null +++ b/git/db/compat.py @@ -0,0 +1,13 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module providing adaptors to maintain backwards compatability""" + +class RepoCompatInterface(object): + """Interface to install backwards compatability of the new complex repository + types with the previous, all in one, repository.""" + + @property + def bare(self): + return self.is_bare diff --git a/git/db/interface.py b/git/db/interface.py index b7c167c5..bdda70b3 100644 --- a/git/db/interface.py +++ b/git/db/interface.py @@ -6,7 +6,7 @@ __all__ = ( 'ObjectDBR', 'ObjectDBW', 'RootPathDB', 'CompoundDB', 'CachingDB', 'TransportDB', 'ConfigurationMixin', 'RepositoryPathsMixin', - 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin') + 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin', 'SubmoduleDB') class ObjectDBR(object): @@ -151,7 +151,7 @@ class RootPathDB(object): access.""" super(RootPathDB, self).__init__(root_path) - #{ Interface + #{ Interface def root_path(self): """:return: path at which this db operates""" raise NotImplementedError() @@ -390,33 +390,60 @@ class RepositoryPathsMixin(object): raise NotImplementedError() #} end subclass interface + #{ Object Interface + + def __eq__(self, rhs): + raise NotImplementedError() + + def __ne__(self, rhs): + raise NotImplementedError() + + def __hash__(self): + raise NotImplementedError() + + def __repr__(self): + raise NotImplementedError() + + #} END object interface + #{ Interface + @property def is_bare(self): """:return: True if this is a bare repository :note: this value is cached upon initialization""" raise NotImplementedError() - def git_path(self): + @property + def git_dir(self): """:return: path to directory containing this actual git repository (which in turn provides access to objects and references""" raise NotImplementedError() - def working_tree_path(self): + @property + def working_tree_dir(self): """:return: path to directory containing the working tree checkout of our git repository. :raise AssertionError: If this is a bare repository""" raise NotImplementedError() - def objects_path(self): + @property + def objects_dir(self): """:return: path to the repository's objects directory""" raise NotImplementedError() + @property def working_dir(self): """:return: working directory of the git process or related tools, being - either the working_tree_path if available or the git_path""" + either the working_tree_dir if available or the git_path""" raise NotImplementedError() - + + @property + def description(self): + """:return: description text associated with this repository or set the + description.""" + raise NotImplementedError() + #} END interface @@ -465,5 +492,43 @@ class ConfigurationMixin(object): repository = configuration file for this repostory only""" raise NotImplementedError() + #} END interface + +class SubmoduleDB(object): + """Interface providing access to git repository submodules. + The actual implementation is found in the Submodule object type, which is + currently only available in one implementation.""" + + @property + def submodules(self): + """ + :return: git.IterableList(Submodule, ...) of direct submodules + available from the current head""" + raise NotImplementedError() + + def submodule(self, name): + """ :return: Submodule with the given name + :raise ValueError: If no such submodule exists""" + raise NotImplementedError() + + def create_submodule(self, *args, **kwargs): + """Create a new submodule + + :note: See the documentation of Submodule.add for a description of the + applicable parameters + :return: created submodules""" + raise NotImplementedError() + + def iter_submodules(self, *args, **kwargs): + """An iterator yielding Submodule instances, see Traversable interface + for a description of args and kwargs + :return: Iterator""" + raise NotImplementedError() + + def submodule_update(self, *args, **kwargs): + """Update the submodules, keeping the repository consistent as it will + take the previous state into consideration. For more information, please + see the documentation of RootModule.update""" + raise NotImplementedError() diff --git a/git/db/py/base.py b/git/db/py/base.py index 5c470ba4..f45711d5 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -273,34 +273,70 @@ class PureRepositoryPathsMixin(RepositoryPathsMixin): # lets not assume the option exists, although it should pass #END check bare flag + + #} end subclass interface + + #{ Object Interface + + def __eq__(self, rhs): + if hasattr(rhs, 'git_dir'): + return self.git_dir == rhs.git_dir + return False + + def __ne__(self, rhs): + return not self.__eq__(rhs) + + def __hash__(self): + return hash(self.git_dir) + def __repr__(self): + return "%s(%r)" % (type(self).__name__, self.git_dir) - #} end subclass interface + #} END object interface #{ Interface + @property def is_bare(self): return self._bare - def git_path(self): + @property + def git_dir(self): return self._git_path - def working_tree_path(self): - if self.is_bare(): - raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_path()) + @property + def working_tree_dir(self): + if self.is_bare: + raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_dir) #END assertion - return dirname(self.git_path()) - - def objects_path(self): - return join(self.git_path(), self.objs_dir) - + return dirname(self.git_dir) + + @property + def objects_dir(self): + return join(self.git_dir, self.objs_dir) + + @property def working_dir(self): - if self.is_bare(): - return self.git_path() + if self.is_bare: + return self.git_dir else: - return self.working_tree_dir() + return self.working_tree_dir #END handle bare state + def _mk_description(): + def _get_description(self): + filename = join(self.git_dir, 'description') + return file(filename).read().rstrip() + + def _set_description(self, descr): + filename = join(self.git_dir, 'description') + file(filename, 'w').write(descr+'\n') + + return property(_get_description, _set_description, "Descriptive text for the content of the repository") + + description = _mk_description() + del(_mk_description) + #} END interface @@ -313,7 +349,7 @@ class PureConfigurationMixin(ConfigurationMixin): def __init__(self, *args, **kwargs): """Verify prereqs""" - assert hasattr(self, 'git_path') + assert hasattr(self, 'git_dir') def _path_at_level(self, level ): # we do not support an absolute path of the gitconfig on windows , @@ -327,7 +363,7 @@ class PureConfigurationMixin(ConfigurationMixin): elif level == "global": return normpath(expanduser("~/.%s" % self.system_config_file_name)) elif level == "repository": - return join(self.git_path(), self.repo_config_file_name) + return join(self.git_dir, self.repo_config_file_name) #END handle level raise ValueError("Invalid configuration level: %r" % level) @@ -346,5 +382,6 @@ class PureConfigurationMixin(ConfigurationMixin): def config_writer(self, config_level="repository"): return GitConfigParser(self._path_at_level(config_level), read_only=False) + #} END interface diff --git a/git/db/py/complex.py b/git/db/py/complex.py index 1f929e31..de68d4fd 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -15,6 +15,7 @@ from resolve import PureReferencesMixin from loose import PureLooseObjectODB from pack import PurePackedODB from ref import PureReferenceDB +from submodule import PureSubmoduleDB from git.util import ( LazyMixin, @@ -32,7 +33,7 @@ import os __all__ = ('PureGitODB', 'PureGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -107,7 +108,7 @@ class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, Pu def __init__(self, root_path): """Initialize ourselves on the .git directory, or the .git/objects directory.""" PureRepositoryPathsMixin._initialize(self, root_path) - super(PureGitDB, self).__init__(self.objects_path()) + super(PureGitDB, self).__init__(self.objects_dir) diff --git a/git/db/py/submodule.py b/git/db/py/submodule.py new file mode 100644 index 00000000..735f90b1 --- /dev/null +++ b/git/db/py/submodule.py @@ -0,0 +1,33 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.objects.submodule.base import Submodule +from git.objects.submodule.root import RootModule +from git.db.interface import SubmoduleDB + +__all__ = ["PureSubmoduleDB"] + +class PureSubmoduleDB(SubmoduleDB): + """Pure python implementation of submodule functionality""" + + @property + def submodules(self): + return Submodule.list_items(self) + + def submodule(self, name): + try: + return self.submodules[name] + except IndexError: + raise ValueError("Didn't find submodule named %r" % name) + # END exception handling + + def create_submodule(self, *args, **kwargs): + return Submodule.add(self, *args, **kwargs) + + def iter_submodules(self, *args, **kwargs): + return RootModule(self).traverse(*args, **kwargs) + + def submodule_update(self, *args, **kwargs): + return RootModule(self).update(*args, **kwargs) + -- cgit v1.2.3 From 0996049122842a343e0ea7fbbecafddb2b4ba9d3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 29 May 2011 21:59:12 +0200 Subject: Intermediate commit with quite some progress in order to put all extra methods on the default Repo implementation into interfaces or something that can be abstracted. It shows that it would indeed be good to keep the differentiation between Repositories which contain an object database as it is clearly easier to setup any combination of repositories that use git and those that do not, with just the addition of one more level of indirection. Lets see how it will end up --- git/db/cmd/advanced.py | 368 +++++++++++++++++++++++++++++++++++++++++++++++++ git/db/cmd/complex.py | 25 +++- git/db/compat.py | 17 +++ git/db/interface.py | 89 +++++++++++- git/db/py/base.py | 74 +++++++++- git/db/py/complex.py | 18 ++- git/db/py/resolve.py | 44 +++++- git/db/py/transport.py | 61 ++------ 8 files changed, 635 insertions(+), 61 deletions(-) create mode 100644 git/db/cmd/advanced.py (limited to 'git/db') diff --git a/git/db/cmd/advanced.py b/git/db/cmd/advanced.py new file mode 100644 index 00000000..c8bd2cd6 --- /dev/null +++ b/git/db/cmd/advanced.py @@ -0,0 +1,368 @@ +# repo.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +class AdvancedFunctionalityMixin(object): + """An intermediate interface carrying advanced git functionality that can be used + in other comound repositories which do not implement this functionality themselves. + + The mixin must be used with repositories that provide a git command object under + self.git. + + :note: at some point, methods provided here are supposed to be provided by custom interfaces""" + DAEMON_EXPORT_FILE = 'git-daemon-export-ok' + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + + @property + def index(self): + """:return: IndexFile representing this repository's index.""" + return IndexFile(self) + + def commit(self, rev=None): + """The Commit object for the specified revision + :param rev: revision specifier, see git-rev-parse for viable options. + :return: ``git.Commit``""" + if rev is None: + return self.head.commit + else: + return self.rev_parse(str(rev)+"^0") + + def iter_trees(self, *args, **kwargs): + """:return: Iterator yielding Tree objects + :note: Takes all arguments known to iter_commits method""" + return ( c.tree for c in self.iter_commits(*args, **kwargs) ) + + def tree(self, rev=None): + """The Tree object for the given treeish revision + Examples:: + + repo.tree(repo.heads[0]) + + :param rev: is a revision pointing to a Treeish ( being a commit or tree ) + :return: ``git.Tree`` + + :note: + If you need a non-root level tree, find it by iterating the root tree. Otherwise + it cannot know about its path relative to the repository root and subsequent + operations might have unexpected results.""" + if rev is None: + return self.head.commit.tree + else: + return self.rev_parse(str(rev)+"^{tree}") + + def iter_commits(self, rev=None, paths='', **kwargs): + """A list of Commit objects representing the history of a given ref/commit + + :parm rev: + revision specifier, see git-rev-parse for viable options. + If None, the active branch will be used. + + :parm paths: + is an optional path or a list of paths to limit the returned commits to + Commits that do not contain that path or the paths will not be returned. + + :parm kwargs: + Arguments to be passed to git-rev-list - common ones are + max_count and skip + + :note: to receive only commits between two named revisions, use the + "revA..revB" revision specifier + + :return ``git.Commit[]``""" + if rev is None: + rev = self.head.commit + + return Commit.iter_items(self, rev, paths, **kwargs) + + def _get_daemon_export(self): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + return os.path.exists(filename) + + def _set_daemon_export(self, value): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + fileexists = os.path.exists(filename) + if value and not fileexists: + touch(filename) + elif not value and fileexists: + os.unlink(filename) + + daemon_export = property(_get_daemon_export, _set_daemon_export, + doc="If True, git-daemon may export this repository") + del _get_daemon_export + del _set_daemon_export + + def is_dirty(self, index=True, working_tree=True, untracked_files=False): + """ + :return: + ``True``, the repository is considered dirty. By default it will react + like a git-status without untracked files, hence it is dirty if the + index or the working copy have changes.""" + if self._bare: + # Bare repositories with no associated working directory are + # always consired to be clean. + return False + + # start from the one which is fastest to evaluate + default_args = ('--abbrev=40', '--full-index', '--raw') + if index: + # diff index against HEAD + if isfile(self.index.path) and self.head.is_valid() and \ + len(self.git.diff('HEAD', '--cached', *default_args)): + return True + # END index handling + if working_tree: + # diff index against working tree + if len(self.git.diff(*default_args)): + return True + # END working tree handling + if untracked_files: + if len(self.untracked_files): + return True + # END untracked files + return False + + @property + def untracked_files(self): + """ + :return: + list(str,...) + + Files currently untracked as they have not been staged yet. Paths + are relative to the current working directory of the git command. + + :note: + ignored files will not appear here, i.e. files mentioned in .gitignore""" + # make sure we get all files, no only untracked directores + proc = self.git.status(untracked_files=True, as_process=True) + stream = iter(proc.stdout) + untracked_files = list() + for line in stream: + if not line.startswith("# Untracked files:"): + continue + # skip two lines + stream.next() + stream.next() + + for untracked_info in stream: + if not untracked_info.startswith("#\t"): + break + untracked_files.append(untracked_info.replace("#\t", "").rstrip()) + # END for each utracked info line + # END for each line + return untracked_files + + def blame(self, rev, file): + """The blame information for the given file at the given revision. + + :parm rev: revision specifier, see git-rev-parse for viable options. + :return: + list: [git.Commit, list: []] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance.""" + data = self.git.blame(rev, '--', file, p=True) + commits = dict() + blames = list() + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = int(parts[-1]) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( self, hex_to_bin(sha), + author=Actor._from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + @classmethod + def init(cls, path=None, mkdir=True, **kwargs): + """Initialize a git repository at the given path if specified + + :param path: + is the full path to the repo (traditionally ends with /.git) + or None in which case the repository will be created in the current + working directory + + :parm mkdir: + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. + Only effective if a path is explicitly given + + :parm kwargs: + keyword arguments serving as additional options to the git-init command + + :return: ``git.Repo`` (the newly created repo)""" + + if mkdir and path and not os.path.exists(path): + os.makedirs(path, 0755) + + # git command automatically chdir into the directory + git = Git(path) + output = git.init(**kwargs) + return Repo(path) + + @classmethod + def _clone(cls, git, url, path, odb_default_type, **kwargs): + # special handling for windows for path at which the clone should be + # created. + # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence + # we at least give a proper error instead of letting git fail + prev_cwd = None + prev_path = None + odbt = kwargs.pop('odbt', odb_default_type) + if os.name == 'nt': + if '~' in path: + raise OSError("Git cannot handle the ~ character in path %r correctly" % path) + + # on windows, git will think paths like c: are relative and prepend the + # current working dir ( before it fails ). We temporarily adjust the working + # dir to make this actually work + match = re.match("(\w:[/\\\])(.*)", path) + if match: + prev_cwd = os.getcwd() + prev_path = path + drive, rest_of_path = match.groups() + os.chdir(drive) + path = rest_of_path + kwargs['with_keep_cwd'] = True + # END cwd preparation + # END windows handling + + try: + git.clone(url, path, **kwargs) + finally: + if prev_cwd is not None: + os.chdir(prev_cwd) + path = prev_path + # END reset previous working dir + # END bad windows handling + + # our git command could have a different working dir than our actual + # environment, hence we prepend its working dir if required + if not os.path.isabs(path) and git.working_dir: + path = join(git._working_dir, path) + + # adjust remotes - there may be operating systems which use backslashes, + # These might be given as initial paths, but when handling the config file + # that contains the remote from which we were clones, git stops liking it + # as it will escape the backslashes. Hence we undo the escaping just to be + # sure + repo = cls(os.path.abspath(path), odbt = odbt) + if repo.remotes: + repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) + # END handle remote repo + return repo + + def clone(self, path, **kwargs): + """Create a clone from this repository. + :param path: + is the full path of the new repo (traditionally ends with ./.git). + + :param kwargs: + odbt = ObjectDatabase Type, allowing to determine the object database + implementation used by the returned Repo instance + + All remaining keyword arguments are given to the git-clone command + + :return: ``git.Repo`` (the newly cloned repo)""" + return self._clone(self.git, self.git_dir, path, type(self.odb), **kwargs) + + @classmethod + def clone_from(cls, url, to_path, **kwargs): + """Create a clone from the given URL + :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS + :param to_path: Path to which the repository should be cloned to + :param kwargs: see the ``clone`` method + :return: Repo instance pointing to the cloned directory""" + return cls._clone(Git(os.getcwd()), url, to_path, CmdGitDB, **kwargs) + + def archive(self, ostream, treeish=None, prefix=None, **kwargs): + """Archive the tree at the given revision. + :parm ostream: file compatible stream object to which the archive will be written + :parm treeish: is the treeish name/id, defaults to active branch + :parm prefix: is the optional prefix to prepend to each filename in the archive + :parm kwargs: + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + + :raise GitCommandError: in case something went wrong + :return: self""" + if treeish is None: + treeish = self.head.commit + if prefix and 'prefix' not in kwargs: + kwargs['prefix'] = prefix + kwargs['output_stream'] = ostream + + self.git.archive(treeish, **kwargs) + return self + + def rev_parse(self, name): + return self.odb.resolve(name) + + def __repr__(self): + return '' % self.git_dir diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py index 73e2048f..2eed17ad 100644 --- a/git/db/cmd/complex.py +++ b/git/db/cmd/complex.py @@ -14,8 +14,13 @@ from git.util import ( hex_to_bin ) from git.db.py.loose import PureLooseObjectODB +from git.db.compat import RepoCompatInterface from git.util import RemoteProgress -from git.db.py.base import TransportDB +from git.db.py.base import ( + TransportDB, + PureRepositoryPathsMixin, + PureAlternatesFileMixin + ) from git.db.interface import FetchInfo as GitdbFetchInfo from git.db.interface import PushInfo as GitdbPushInfo @@ -33,7 +38,7 @@ import re import sys -__all__ = ('CmdGitDB', 'RemoteProgress' ) +__all__ = ('CmdGitDB', 'RemoteProgress', 'CmdCompatibilityGitDB' ) class PushInfo(GitdbPushInfo): @@ -266,7 +271,7 @@ class FetchInfo(GitdbFetchInfo): return cls(remote_local_ref, flags, note, old_commit_binsha) -class CmdGitDB(PureLooseObjectODB, TransportDB): +class CmdGitDB(PureLooseObjectODB, TransportDB, PureRepositoryPathsMixin, PureAlternatesFileMixin): """A database representing the default git object store, which includes loose objects, pack files and an alternates file @@ -276,7 +281,8 @@ class CmdGitDB(PureLooseObjectODB, TransportDB): """ def __init__(self, root_path, git): """Initialize this instance with the root and a git command""" - super(CmdGitDB, self).__init__(root_path) + self._initialize(root_path) + super(CmdGitDB, self).__init__(self.objects_dir) self._git = git @classmethod @@ -373,7 +379,8 @@ class CmdGitDB(PureLooseObjectODB, TransportDB): #{ ODB Interface - + # overrides from PureOdb Implementation, which is responsible only for writing + # objects def info(self, sha): hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) return OInfo(hex_to_bin(hexsha), typename, size) @@ -399,6 +406,10 @@ class CmdGitDB(PureLooseObjectODB, TransportDB): except (GitCommandError, ValueError): raise BadObject(partial_hexsha) # END handle exceptions + + @property + def git(self): + return self._git #} END interface @@ -432,3 +443,7 @@ class CmdGitDB(PureLooseObjectODB, TransportDB): return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) #} end transport db interface + + +class CmdCompatibilityGitDB(CmdGitDB, RepoCompatInterface): + """Command git database with the compatabilty interface added for 0.3x code""" diff --git a/git/db/compat.py b/git/db/compat.py index 1c0ba6f5..16bf0562 100644 --- a/git/db/compat.py +++ b/git/db/compat.py @@ -11,3 +11,20 @@ class RepoCompatInterface(object): @property def bare(self): return self.is_bare + + def rev_parse(self, *args, **kwargs): + return self.resolve_object(*args, **kwargs) + + @property + def odb(self): + """The odb is now an integrated part of each repository""" + return self + + @property + def active_branch(self): + """The name of the currently active branch. + + :return: Head to the active branch""" + return self.head.reference + + diff --git a/git/db/interface.py b/git/db/interface.py index bdda70b3..90421433 100644 --- a/git/db/interface.py +++ b/git/db/interface.py @@ -180,6 +180,7 @@ class CachingDB(object): # END interface + class CompoundDB(object): """A database which delegates calls to sub-databases. They should usually be cached and lazy-loaded""" @@ -282,6 +283,7 @@ class TransportDB(object): As refspecs involve symbolic names for references to be handled, we require RefParse functionality. How this is done is up to the actual implementation.""" # The following variables need to be set by the derived class + __slots__ = tuple() #{ Interface @@ -325,6 +327,22 @@ class TransportDB(object): :note: Remote objects can also be used for the actual push or fetch operation""" raise NotImplementedError() + def remote(self, name='origin'): + """:return: Remote object with the given name + :note: it does not necessarily exist, hence this is just a more convenient way + to construct Remote objects""" + raise NotImplementedError() + + def create_remote(self, name, url, **kwargs): + """Create a new remote with the given name pointing to the given url + :return: Remote instance, compatible to the Remote interface""" + return Remote.create(self, name, url, **kwargs) + + def delete_remote(self, remote): + """Delete the given remote. + :param remote: a Remote instance""" + return Remote.remove(self, remote) + #}end interface @@ -334,6 +352,7 @@ class ReferencesMixin(object): The returned types are compatible to the interfaces of the pure python reference implementation in GitDB.ref""" + __slots__ = tuple() def resolve(self, name): """Resolve the given name into a binary sha. Valid names are as defined @@ -342,6 +361,13 @@ class ReferencesMixin(object): :raise AmbiguousObjectName: :raise BadObject: """ raise NotImplementedError() + + def resolve_object(self, name): + """As ``resolve()``, but returns the Objecft instance pointed to by the + resolved binary sha + :return: Object instance of the correct type, e.g. shas pointing to commits + will be represented by a Commit object""" + raise NotImplementedError() @property def references(self): @@ -356,11 +382,65 @@ class ReferencesMixin(object): heads in the repository.""" raise NotImplementedError() + @property + def head(self): + """:return: HEAD Object pointing to the current head reference""" + raise NotImplementedError() + @property def tags(self): - """:return: An IterableList of TagReferences that are available in this repo""" + """:return: An IterableList of TagReferences or compatible items that + are available in this repo""" + raise NotImplementedError() + + def tag(self, name): + """:return: Tag with the given name + :note: It does not necessarily exist, hence this is just a more convenient + way to construct TagReference objects""" raise NotImplementedError() + def create_head(self, path, commit='HEAD', force=False, logmsg=None ): + """Create a new head within the repository. + :param commit: a resolvable name to the commit or a Commit or Reference instance the new head should point to + :param force: if True, a head will be created even though it already exists + Otherwise an exception will be raised. + :param logmsg: message to append to the reference log. If None, a default message + will be used + :return: newly created Head instances""" + raise NotImplementedError() + + def delete_head(self, *heads): + """Delete the given heads + :param heads: list of Head references that are to be deleted""" + raise NotImplementedError() + + def create_tag(self, path, ref='HEAD', message=None, force=False): + """Create a new tag reference. + :param path: name or path of the new tag. + :param ref: resolvable name of the reference or commit, or Commit or Reference + instance describing the commit the tag should point to. + :param message: message to be attached to the tag reference. This will + create an actual Tag object carrying the message. Otherwise a TagReference + will be generated. + :param force: if True, the Tag will be created even if another tag does already + exist at the given path. Otherwise an exception will be thrown + :return: TagReference object """ + raise NotImplementedError() + + def delete_tag(self, *tags): + """Delete the given tag references + :param tags: TagReferences to delete""" + raise NotImplementedError() + + + #{ Backward Compatability + # These aliases need to be provided by the implementing interface as well + refs = references + branches = heads + #} END backward compatability + + + class RepositoryPathsMixin(object): """Represents basic functionality of a full git repository. This involves an @@ -385,6 +465,13 @@ class RepositoryPathsMixin(object): only. Plain object databases need to be fed the "objects" directory path. :param path: the path to initialize the repository with + It is a path to either the root git directory or the bare git repo:: + + repo = Repo("/Users/mtrier/Development/git-python") + repo = Repo("/Users/mtrier/Development/git-python.git") + repo = Repo("~/Development/git-python.git") + repo = Repo("$REPOSITORIES/Development/git-python.git") + :raise InvalidDBRoot: """ raise NotImplementedError() diff --git a/git/db/py/base.py b/git/db/py/base.py index f45711d5..cc326c27 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -35,7 +35,7 @@ import os __all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', - 'PureConfigurationMixin', 'PureRepositoryPathsMixin') + 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin') class PureObjectDBR(ObjectDBR): @@ -385,3 +385,75 @@ class PureConfigurationMixin(ConfigurationMixin): #} END interface + +class PureAlternatesFileMixin(object): + """Utility able to read and write an alternates file through the alternates property + It needs to be part of a type with the git_dir or db_path property. + + The file by default is assumed to be located at the default location as imposed + by the standard git repository layout""" + + #{ Configuration + alternates_filepath = os.path.join('info', 'alternates') # relative path to alternates file + + #} END configuration + + def __init__(self, *args, **kwargs): + super(PureAlternatesFileMixin, self).__init__(*args, **kwargs) + self._alternates_path() # throws on incompatible type + + #{ Interface + + def _alternates_path(self): + if hasattr(self, 'git_dir'): + return join(self.git_dir, 'objects', self.alternates_filepath) + elif hasattr(self, 'db_path'): + return self.db_path(self.alternates_filepath) + else: + raise AssertionError("This mixin requires a parent type with either the git_dir property or db_path method") + #END handle path + + def _get_alternates(self): + """The list of alternates for this repo from which objects can be retrieved + + :return: list of strings being pathnames of alternates""" + alternates_path = self._alternates_path() + + if os.path.exists(alternates_path): + try: + f = open(alternates_path) + alts = f.read() + finally: + f.close() + return alts.strip().splitlines() + else: + return list() + # END handle path exists + + def _set_alternates(self, alts): + """Sets the alternates + + :parm alts: + is the array of string paths representing the alternates at which + git should look for objects, i.e. /home/user/repo/.git/objects + + :raise NoSuchPathError: + :note: + The method does not check for the existance of the paths in alts + as the caller is responsible.""" + alternates_path = self._alternates_path() + if not alts: + if isfile(alternates_path): + os.remove(alternates_path) + else: + try: + f = open(alternates_path, 'w') + f.write("\n".join(alts)) + finally: + f.close() + # END file handling + # END alts handling + + alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") + + #} END interface diff --git a/git/db/py/complex.py b/git/db/py/complex.py index de68d4fd..6504b3ed 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -8,6 +8,7 @@ from base import ( PureRootPathDB, PureRepositoryPathsMixin, PureConfigurationMixin, + PureAlternatesFileMixin, ) from resolve import PureReferencesMixin @@ -17,6 +18,8 @@ from pack import PurePackedODB from ref import PureReferenceDB from submodule import PureSubmoduleDB +from git.db.compat import RepoCompatInterface + from git.util import ( LazyMixin, normpath, @@ -30,10 +33,11 @@ from git.exc import ( ) import os -__all__ = ('PureGitODB', 'PureGitDB') +__all__ = ('PureGitODB', 'PureGitDB', 'PureCompatibilityGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, + PureSubmoduleDB, PureAlternatesFileMixin): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -47,7 +51,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) # Directories packs_dir = 'pack' loose_dir = '' - alternates_dir = os.path.join('info', 'alternates') + def __init__(self, root_path): """Initialize ourselves on a git ./objects directory""" @@ -59,7 +63,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) loose_db = None for subpath, dbcls in ((self.packs_dir, self.PackDBCls), (self.loose_dir, self.LooseDBCls), - (self.alternates_dir, self.PureReferenceDBCls)): + (self.alternates_filepath, self.PureReferenceDBCls)): path = self.db_path(subpath) if os.path.exists(path): self._dbs.append(dbcls(path)) @@ -75,7 +79,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) # END handle error # we the first one should have the store method - assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + assert loose_db is not None and hasattr(loose_db, 'store'), "One database needs store functionality" # finally set the value self._loose_db = loose_db @@ -97,6 +101,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) #} END objectdbw interface + class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): """Git like database with support for object lookup as well as reference resolution. Our rootpath is set to the actual .git directory (bare on unbare). @@ -112,3 +117,6 @@ class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, Pu +class PureCompatibilityGitDB(PureGitDB, RepoCompatInterface): + """Pure git database with a compatability layer required by 0.3x code""" + diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 9cce8efe..94992d11 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -5,6 +5,9 @@ from git.db.interface import ReferencesMixin from git.exc import BadObject from git.refs import SymbolicReference from git.objects.base import Object +from git.refs.head import HEAD +from git.refs.headref import Head +from git.refs.tag import TagReference from git.util import ( join, isdir, @@ -281,17 +284,52 @@ class PureReferencesMixin(ReferencesMixin): re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + #{ Configuration + # Types to use when instatiating references + TagReferenceCls = TagReference + HeadCls = Head + ReferenceCls = Reference + HEADCls = HEAD + #} END configuration + def resolve(self, name): + return self.resolve_object(name).binsha + + def resolve_object(self, name): return rev_parse(self, name) @property def references(self): - raise NotImplementedError() + return self.ReferenceCls.list_items(self) @property def heads(self): - raise NotImplementedError() + return self.HeadCls.list_items(self) @property def tags(self): - raise NotImplementedError() + return self.TagReferenceCls.list_items(self) + + def tag(self, name): + return self.tags[name] + + @property + def head(self): + return self.HEADCls(self,'HEAD') + + def create_head(self, path, commit='HEAD', force=False, logmsg=None ): + return self.HeadCls.create(self, path, commit, force, logmsg) + + def delete_head(self, *heads, **kwargs): + return self.HeadCls.delete(self, *heads, **kwargs) + + def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): + return self.TagReferenceCls.create(self, path, ref, message, force, **kwargs) + + def delete_tag(self, *tags): + return self.TagReferenceCls.delete(self, *tags) + + + # compat + branches = heads + refs = references diff --git a/git/db/py/transport.py b/git/db/py/transport.py index f8edfb23..00d222b0 100644 --- a/git/db/py/transport.py +++ b/git/db/py/transport.py @@ -9,6 +9,10 @@ from git.db.interface import ( TransportDB, FetchInfo, RefSpec ) +from git.refs.remote import RemoteReference +from git.remote import Remote + + __all__ = ["PureTransportDB"] class PurePushInfo(PushInfo): @@ -23,67 +27,32 @@ class PureFetchInfo(FetchInfo): class PureTransportDB(TransportDB): - """A database which allows to transport objects from and to different locations - which are specified by urls (location) and refspecs (what to transport, - see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). - - At the beginning of a transport operation, it will be determined which objects - have to be sent (either by this or by the other side). - - Afterwards a pack with the required objects is sent (or received). If there is - nothing to send, the pack will be empty. - - The communication itself if implemented using a protocol instance which deals - with the actual formatting of the lines sent. - - As refspecs involve symbolic names for references to be handled, we require - RefParse functionality. How this is done is up to the actual implementation.""" # The following variables need to be set by the derived class #{Configuration protocol = None + RemoteCls = Remote #}end configuraiton #{ Interface def fetch(self, url, refspecs, progress=None, **kwargs): - """Fetch the objects defined by the given refspec from the given url. - :param url: url identifying the source of the objects. It may also be - a symbol from which the respective url can be resolved, like the - name of the remote. The implementation should allow objects as input - as well, these are assumed to resovle to a meaningful string though. - :param refspecs: iterable of reference specifiers or RefSpec instance, - identifying the references to be fetch from the remote. - :param progress: callable which receives progress messages for user consumption - :param kwargs: may be used for additional parameters that the actual implementation could - find useful. - :return: List of PureFetchInfo compatible instances which provide information about what - was previously fetched, in the order of the input refspecs. - :note: even if the operation fails, one of the returned PureFetchInfo instances - may still contain errors or failures in only part of the refspecs. - :raise: if any issue occours during the transport or if the url is not - supported by the protocol. - """ raise NotImplementedError() def push(self, url, refspecs, progress=None, **kwargs): - """Transport the objects identified by the given refspec to the remote - at the given url. - :param url: Decribes the location which is to receive the objects - see fetch() for more details - :param refspecs: iterable of refspecs strings or RefSpec instances - to identify the objects to push - :param progress: see fetch() - :param kwargs: additional arguments which may be provided by the caller - as they may be useful to the actual implementation - :todo: what to return ? - :raise: if any issue arises during transport or if the url cannot be handled""" raise NotImplementedError() @property def remotes(self): - """:return: An IterableList of Remote objects allowing to access and manipulate remotes - :note: Remote objects can also be used for the actual push or fetch operation""" - raise NotImplementedError() + return self.RemoteCls.list_items(self) + + def remote(self, name='origin'): + return self.remotes[name] + def create_remote(self, name, url, **kwargs): + return self.RemoteCls.create(self, name, url, **kwargs) + + def delete_remote(self, remote): + return self.RemoteCls.remove(self, remote) + #}end interface -- cgit v1.2.3 From 112bb1672d6b28f203e7839e320b985486636800 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 01:23:28 +0200 Subject: Finished moving all repository methods to the respective interfaces and implementations. It seems theoretically work together now, although it clearly is much more complex than ever before. The repo package was slimmed down to being a module once again, which is only there for compatability actually --- git/db/cmd/advanced.py | 368 -------------------------- git/db/cmd/base.py | 706 +++++++++++++++++++++++++++++++++++++++++++++++++ git/db/cmd/complex.py | 444 +------------------------------ git/db/compat.py | 3 +- git/db/complex.py | 12 + git/db/interface.py | 166 +++++++++++- git/db/py/base.py | 14 +- git/db/py/complex.py | 12 +- git/db/py/resolve.py | 26 ++ 9 files changed, 935 insertions(+), 816 deletions(-) delete mode 100644 git/db/cmd/advanced.py create mode 100644 git/db/cmd/base.py create mode 100644 git/db/complex.py (limited to 'git/db') diff --git a/git/db/cmd/advanced.py b/git/db/cmd/advanced.py deleted file mode 100644 index c8bd2cd6..00000000 --- a/git/db/cmd/advanced.py +++ /dev/null @@ -1,368 +0,0 @@ -# repo.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -class AdvancedFunctionalityMixin(object): - """An intermediate interface carrying advanced git functionality that can be used - in other comound repositories which do not implement this functionality themselves. - - The mixin must be used with repositories that provide a git command object under - self.git. - - :note: at some point, methods provided here are supposed to be provided by custom interfaces""" - DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - - # precompiled regex - re_whitespace = re.compile(r'\s+') - re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') - re_author_committer_start = re.compile(r'^(author|committer)') - re_tab_full_line = re.compile(r'^\t(.*)$') - - @property - def index(self): - """:return: IndexFile representing this repository's index.""" - return IndexFile(self) - - def commit(self, rev=None): - """The Commit object for the specified revision - :param rev: revision specifier, see git-rev-parse for viable options. - :return: ``git.Commit``""" - if rev is None: - return self.head.commit - else: - return self.rev_parse(str(rev)+"^0") - - def iter_trees(self, *args, **kwargs): - """:return: Iterator yielding Tree objects - :note: Takes all arguments known to iter_commits method""" - return ( c.tree for c in self.iter_commits(*args, **kwargs) ) - - def tree(self, rev=None): - """The Tree object for the given treeish revision - Examples:: - - repo.tree(repo.heads[0]) - - :param rev: is a revision pointing to a Treeish ( being a commit or tree ) - :return: ``git.Tree`` - - :note: - If you need a non-root level tree, find it by iterating the root tree. Otherwise - it cannot know about its path relative to the repository root and subsequent - operations might have unexpected results.""" - if rev is None: - return self.head.commit.tree - else: - return self.rev_parse(str(rev)+"^{tree}") - - def iter_commits(self, rev=None, paths='', **kwargs): - """A list of Commit objects representing the history of a given ref/commit - - :parm rev: - revision specifier, see git-rev-parse for viable options. - If None, the active branch will be used. - - :parm paths: - is an optional path or a list of paths to limit the returned commits to - Commits that do not contain that path or the paths will not be returned. - - :parm kwargs: - Arguments to be passed to git-rev-list - common ones are - max_count and skip - - :note: to receive only commits between two named revisions, use the - "revA..revB" revision specifier - - :return ``git.Commit[]``""" - if rev is None: - rev = self.head.commit - - return Commit.iter_items(self, rev, paths, **kwargs) - - def _get_daemon_export(self): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - return os.path.exists(filename) - - def _set_daemon_export(self, value): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - fileexists = os.path.exists(filename) - if value and not fileexists: - touch(filename) - elif not value and fileexists: - os.unlink(filename) - - daemon_export = property(_get_daemon_export, _set_daemon_export, - doc="If True, git-daemon may export this repository") - del _get_daemon_export - del _set_daemon_export - - def is_dirty(self, index=True, working_tree=True, untracked_files=False): - """ - :return: - ``True``, the repository is considered dirty. By default it will react - like a git-status without untracked files, hence it is dirty if the - index or the working copy have changes.""" - if self._bare: - # Bare repositories with no associated working directory are - # always consired to be clean. - return False - - # start from the one which is fastest to evaluate - default_args = ('--abbrev=40', '--full-index', '--raw') - if index: - # diff index against HEAD - if isfile(self.index.path) and self.head.is_valid() and \ - len(self.git.diff('HEAD', '--cached', *default_args)): - return True - # END index handling - if working_tree: - # diff index against working tree - if len(self.git.diff(*default_args)): - return True - # END working tree handling - if untracked_files: - if len(self.untracked_files): - return True - # END untracked files - return False - - @property - def untracked_files(self): - """ - :return: - list(str,...) - - Files currently untracked as they have not been staged yet. Paths - are relative to the current working directory of the git command. - - :note: - ignored files will not appear here, i.e. files mentioned in .gitignore""" - # make sure we get all files, no only untracked directores - proc = self.git.status(untracked_files=True, as_process=True) - stream = iter(proc.stdout) - untracked_files = list() - for line in stream: - if not line.startswith("# Untracked files:"): - continue - # skip two lines - stream.next() - stream.next() - - for untracked_info in stream: - if not untracked_info.startswith("#\t"): - break - untracked_files.append(untracked_info.replace("#\t", "").rstrip()) - # END for each utracked info line - # END for each line - return untracked_files - - def blame(self, rev, file): - """The blame information for the given file at the given revision. - - :parm rev: revision specifier, see git-rev-parse for viable options. - :return: - list: [git.Commit, list: []] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance.""" - data = self.git.blame(rev, '--', file, p=True) - commits = dict() - blames = list() - info = None - - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] - if self.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - digits = parts[-1].split(" ") - if len(digits) == 3: - info = {'id': firstpart} - blames.append([None, []]) - # END blame data initialization - else: - m = self.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = int(parts[-1]) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( self, hex_to_bin(sha), - author=Actor._from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - @classmethod - def init(cls, path=None, mkdir=True, **kwargs): - """Initialize a git repository at the given path if specified - - :param path: - is the full path to the repo (traditionally ends with /.git) - or None in which case the repository will be created in the current - working directory - - :parm mkdir: - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - Only effective if a path is explicitly given - - :parm kwargs: - keyword arguments serving as additional options to the git-init command - - :return: ``git.Repo`` (the newly created repo)""" - - if mkdir and path and not os.path.exists(path): - os.makedirs(path, 0755) - - # git command automatically chdir into the directory - git = Git(path) - output = git.init(**kwargs) - return Repo(path) - - @classmethod - def _clone(cls, git, url, path, odb_default_type, **kwargs): - # special handling for windows for path at which the clone should be - # created. - # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence - # we at least give a proper error instead of letting git fail - prev_cwd = None - prev_path = None - odbt = kwargs.pop('odbt', odb_default_type) - if os.name == 'nt': - if '~' in path: - raise OSError("Git cannot handle the ~ character in path %r correctly" % path) - - # on windows, git will think paths like c: are relative and prepend the - # current working dir ( before it fails ). We temporarily adjust the working - # dir to make this actually work - match = re.match("(\w:[/\\\])(.*)", path) - if match: - prev_cwd = os.getcwd() - prev_path = path - drive, rest_of_path = match.groups() - os.chdir(drive) - path = rest_of_path - kwargs['with_keep_cwd'] = True - # END cwd preparation - # END windows handling - - try: - git.clone(url, path, **kwargs) - finally: - if prev_cwd is not None: - os.chdir(prev_cwd) - path = prev_path - # END reset previous working dir - # END bad windows handling - - # our git command could have a different working dir than our actual - # environment, hence we prepend its working dir if required - if not os.path.isabs(path) and git.working_dir: - path = join(git._working_dir, path) - - # adjust remotes - there may be operating systems which use backslashes, - # These might be given as initial paths, but when handling the config file - # that contains the remote from which we were clones, git stops liking it - # as it will escape the backslashes. Hence we undo the escaping just to be - # sure - repo = cls(os.path.abspath(path), odbt = odbt) - if repo.remotes: - repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) - # END handle remote repo - return repo - - def clone(self, path, **kwargs): - """Create a clone from this repository. - :param path: - is the full path of the new repo (traditionally ends with ./.git). - - :param kwargs: - odbt = ObjectDatabase Type, allowing to determine the object database - implementation used by the returned Repo instance - - All remaining keyword arguments are given to the git-clone command - - :return: ``git.Repo`` (the newly cloned repo)""" - return self._clone(self.git, self.git_dir, path, type(self.odb), **kwargs) - - @classmethod - def clone_from(cls, url, to_path, **kwargs): - """Create a clone from the given URL - :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS - :param to_path: Path to which the repository should be cloned to - :param kwargs: see the ``clone`` method - :return: Repo instance pointing to the cloned directory""" - return cls._clone(Git(os.getcwd()), url, to_path, CmdGitDB, **kwargs) - - def archive(self, ostream, treeish=None, prefix=None, **kwargs): - """Archive the tree at the given revision. - :parm ostream: file compatible stream object to which the archive will be written - :parm treeish: is the treeish name/id, defaults to active branch - :parm prefix: is the optional prefix to prepend to each filename in the archive - :parm kwargs: - Additional arguments passed to git-archive - NOTE: Use the 'format' argument to define the kind of format. Use - specialized ostreams to write any format supported by python - - :raise GitCommandError: in case something went wrong - :return: self""" - if treeish is None: - treeish = self.head.commit - if prefix and 'prefix' not in kwargs: - kwargs['prefix'] = prefix - kwargs['output_stream'] = ostream - - self.git.archive(treeish, **kwargs) - return self - - def rev_parse(self, name): - return self.odb.resolve(name) - - def __repr__(self): - return '' % self.git_dir diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py new file mode 100644 index 00000000..0025e9b0 --- /dev/null +++ b/git/db/cmd/base.py @@ -0,0 +1,706 @@ +"""module with git command implementations of the basic interfaces +:note: we could add all implementations of the basic interfaces, its more efficient though + to obtain them from the pure implementation""" +from git.exc import ( + GitCommandError, + BadObject + ) + +from git.base import ( + OInfo, + OStream + ) + +from git.util import ( + bin_to_hex, + hex_to_bin + ) +from git.db.compat import RepoCompatInterface +from git.util import RemoteProgress +from git.db.interface import FetchInfo as GitdbFetchInfo +from git.db.interface import PushInfo as GitdbPushInfo +from git.db.interface import HighLevelRepository + +from git.util import join_path +from git.util import join +from git.cmd import Git +from git.refs import ( + Reference, + RemoteReference, + SymbolicReference, + TagReference + ) + +import re +import sys + + +__all__ = ('CmdTransportMixin', 'RemoteProgress', 'GitCommandMixin', + 'CmdObjectDBRMixin', 'CmdHighLevelRepository') + + +#{ Utilities + +def touch(filename): + fp = open(filename, "a") + fp.close() + +#} END utilities + +class PushInfo(GitdbPushInfo): + """ + Carries information about the result of a push operation of a single head:: + + info = remote.push()[0] + info.flags # bitflags providing more information about the result + info.local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + info.remote_ref_string # path to the remote reference located on the remote side + info.remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + info.old_commit_binsha # binary sha at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + info.summary # summary line providing human readable english text about the push + """ + __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', '_remote', 'summary') + + _flag_map = { 'X' : GitdbPushInfo.NO_MATCH, + '-' : GitdbPushInfo.DELETED, '*' : 0, + '+' : GitdbPushInfo.FORCED_UPDATE, + ' ' : GitdbPushInfo.FAST_FORWARD, + '=' : GitdbPushInfo.UP_TO_DATE, + '!' : GitdbPushInfo.ERROR } + + def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit_binsha=None, + summary=''): + """ Initialize a new instance """ + self.flags = flags + self.local_ref = local_ref + self.remote_ref_string = remote_ref_string + self._remote = remote + self.old_commit_binsha = old_commit_binsha + self.summary = summary + + @property + def remote_ref(self): + """ + :return: + Remote Reference or TagReference in the local repository corresponding + to the remote_ref_string kept in this instance.""" + # translate heads to a local remote, tags stay as they are + if self.remote_ref_string.startswith("refs/tags"): + return TagReference(self._remote.repo, self.remote_ref_string) + elif self.remote_ref_string.startswith("refs/heads"): + remote_ref = Reference(self._remote.repo, self.remote_ref_string) + return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) + else: + raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) + # END + + @classmethod + def _from_line(cls, remote, line): + """Create a new PushInfo instance as parsed from line which is expected to be like + refs/heads/master:refs/heads/master 05d2687..1d0568e""" + control_character, from_to, summary = line.split('\t', 3) + flags = 0 + + # control character handling + try: + flags |= cls._flag_map[ control_character ] + except KeyError: + raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) + # END handle control character + + # from_to handling + from_ref_string, to_ref_string = from_to.split(':') + if flags & cls.DELETED: + from_ref = None + else: + from_ref = Reference.from_path(remote.repo, from_ref_string) + + # commit handling, could be message or commit info + old_commit_binsha = None + if summary.startswith('['): + if "[rejected]" in summary: + flags |= cls.REJECTED + elif "[remote rejected]" in summary: + flags |= cls.REMOTE_REJECTED + elif "[remote failure]" in summary: + flags |= cls.REMOTE_FAILURE + elif "[no match]" in summary: + flags |= cls.ERROR + elif "[new tag]" in summary: + flags |= cls.NEW_TAG + elif "[new branch]" in summary: + flags |= cls.NEW_HEAD + # uptodate encoded in control character + else: + # fast-forward or forced update - was encoded in control character, + # but we parse the old and new commit + split_token = "..." + if control_character == " ": + split_token = ".." + old_sha, new_sha = summary.split(' ')[0].split(split_token) + # have to use constructor here as the sha usually is abbreviated + old_commit_binsha = remote.repo.commit(old_sha) + # END message handling + + return PushInfo(flags, from_ref, to_ref_string, remote, old_commit_binsha, summary) + + +class FetchInfo(GitdbFetchInfo): + """ + Carries information about the results of a fetch operation of a single head:: + + info = remote.fetch()[0] + info.ref # Symbolic Reference or RemoteReference to the changed + # remote head or FETCH_HEAD + info.flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is FETCH_HEAD + info.note # additional notes given by git-fetch intended for the user + info.old_commit_binsha # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref, otherwise None + """ + __slots__ = ('ref','old_commit_binsha', 'flags', 'note') + + # %c %-*s %-*s -> %s (%s) + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") + + _flag_map = { '!' : GitdbFetchInfo.ERROR, + '+' : GitdbFetchInfo.FORCED_UPDATE, + '-' : GitdbFetchInfo.TAG_UPDATE, + '*' : 0, + '=' : GitdbFetchInfo.HEAD_UPTODATE, + ' ' : GitdbFetchInfo.FAST_FORWARD } + + def __init__(self, ref, flags, note = '', old_commit_binsha = None): + """ + Initialize a new instance + """ + self.ref = ref + self.flags = flags + self.note = note + self.old_commit_binsha = old_commit_binsha + + def __str__(self): + return self.name + + @property + def name(self): + """:return: Name of our remote ref""" + return self.ref.name + + @property + def commit(self): + """:return: Commit of our remote ref""" + return self.ref.commit + + @classmethod + def _from_line(cls, repo, line, fetch_line): + """Parse information from the given line as returned by git-fetch -v + and return a new FetchInfo object representing this information. + + We can handle a line as follows + "%c %-*s %-*s -> %s%s" + + Where c is either ' ', !, +, -, *, or = + ! means error + + means success forcing update + - means a tag was updated + * means birth of new branch or tag + = means the head was up to date ( and not moved ) + ' ' means a fast-forward + + fetch line is the corresponding line from FETCH_HEAD, like + acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" + match = cls.re_fetch_result.match(line) + if match is None: + raise ValueError("Failed to parse line: %r" % line) + + # parse lines + control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() + try: + new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") + ref_type_name, fetch_note = fetch_note.split(' ', 1) + except ValueError: # unpack error + raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) + + # handle FETCH_HEAD and figure out ref type + # If we do not specify a target branch like master:refs/remotes/origin/master, + # the fetch result is stored in FETCH_HEAD which destroys the rule we usually + # have. In that case we use a symbolic reference which is detached + ref_type = None + if remote_local_ref == "FETCH_HEAD": + ref_type = SymbolicReference + elif ref_type_name == "branch": + ref_type = RemoteReference + elif ref_type_name == "tag": + ref_type = TagReference + else: + raise TypeError("Cannot handle reference type: %r" % ref_type_name) + + # create ref instance + if ref_type is SymbolicReference: + remote_local_ref = ref_type(repo, "FETCH_HEAD") + else: + remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) + # END create ref instance + + note = ( note and note.strip() ) or '' + + # parse flags from control_character + flags = 0 + try: + flags |= cls._flag_map[control_character] + except KeyError: + raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) + # END control char exception hanlding + + # parse operation string for more info - makes no sense for symbolic refs + old_commit_binsha = None + if isinstance(remote_local_ref, Reference): + if 'rejected' in operation: + flags |= cls.REJECTED + if 'new tag' in operation: + flags |= cls.NEW_TAG + if 'new branch' in operation: + flags |= cls.NEW_HEAD + if '...' in operation or '..' in operation: + split_token = '...' + if control_character == ' ': + split_token = split_token[:-1] + old_commit_binsha = repo.rev_parse(operation.split(split_token)[0]) + # END handle refspec + # END reference flag handling + + return cls(remote_local_ref, flags, note, old_commit_binsha) + + +class GitCommandMixin(object): + """A mixin to provide the git command object through the git property""" + + def __init__(self, *args, **kwargs): + """Initialize this instance with the root and a git command""" + super(GitCommandMixin, self).__init__(*args, **kwargs) + self._git = Git(self.working_dir) + + @property + def git(self): + return self._git + + +class CmdObjectDBRMixin(object): + """A mixing implementing object reading through a git command + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + #{ ODB Interface + # overrides from PureOdb Implementation, which is responsible only for writing + # objects + def info(self, sha): + hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) + return OInfo(hex_to_bin(hexsha), typename, size) + + def stream(self, sha): + """For now, all lookup is done by git itself""" + hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) + return OStream(hex_to_bin(hexsha), typename, size, stream) + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: Full binary 20 byte sha from the given partial hexsha + :raise AmbiguousObjectName: + :raise BadObject: + :note: currently we only raise BadObject as git does not communicate + AmbiguousObjects separately""" + try: + hexsha, typename, size = self._git.get_object_header(partial_hexsha) + return hex_to_bin(hexsha) + except (GitCommandError, ValueError): + raise BadObject(partial_hexsha) + # END handle exceptions + + #} END odb interface + + +class CmdTransportMixin(object): + """A mixin requiring the .git property as well as repository paths + + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + + @classmethod + def _digest_process_messages(cls, fh, progress): + """Read progress messages from file-like object fh, supplying the respective + progress messages to the progress instance. + + :return: list(line, ...) list of lines without linebreaks that did + not contain progress information""" + line_so_far = '' + dropped_lines = list() + while True: + char = fh.read(1) + if not char: + break + + if char in ('\r', '\n'): + dropped_lines.extend(progress._parse_progress_line(line_so_far)) + line_so_far = '' + else: + line_so_far += char + # END process parsed line + # END while file is not done reading + return dropped_lines + + @classmethod + def _finalize_proc(cls, proc): + """Wait for the process (fetch, pull or push) and handle its errors accordingly""" + try: + proc.wait() + except GitCommandError,e: + # if a push has rejected items, the command has non-zero return status + # a return status of 128 indicates a connection error - reraise the previous one + if proc.poll() == 128: + raise + pass + # END exception handling + + + def _get_fetch_info_from_stderr(self, proc, progress): + # skip first line as it is some remote info we are not interested in + output = IterableList('name') + + + # lines which are no progress are fetch info lines + # this also waits for the command to finish + # Skip some progress lines that don't provide relevant information + fetch_info_lines = list() + for line in self._digest_process_messages(proc.stderr, progress): + if line.startswith('From') or line.startswith('remote: Total'): + continue + elif line.startswith('warning:'): + print >> sys.stderr, line + continue + elif line.startswith('fatal:'): + raise GitCommandError(("Error when fetching: %s" % line,), 2) + # END handle special messages + fetch_info_lines.append(line) + # END for each line + + # read head information + fp = open(join(self.root_path(), 'FETCH_HEAD'),'r') + fetch_head_info = fp.readlines() + fp.close() + + assert len(fetch_info_lines) == len(fetch_head_info) + + output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) + for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) + + self._finalize_proc(proc) + return output + + def _get_push_info(self, proc, progress): + # read progress information from stderr + # we hope stdout can hold all the data, it should ... + # read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually + self._digest_process_messages(proc.stderr, progress) + + output = IterableList('name') + for line in proc.stdout.readlines(): + try: + output.append(PushInfo._from_line(self, line)) + except ValueError: + # if an error happens, additional info is given which we cannot parse + pass + # END exception handling + # END for each line + + self._finalize_proc(proc) + return output + + + #{ Transport DB interface + + def push(self, url, refspecs=None, progress=None, **kwargs): + """Push given refspecs using the git default implementation + :param url: may be a remote name or a url + :param refspecs: single string, RefSpec instance or list of such or None. + :param progress: RemoteProgress derived instance or None + :param **kwargs: Additional arguments to be passed to the git-push process""" + proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) + return self._get_push_info(proc, progress or RemoteProgress()) + + def pull(self, url, refspecs=None, progress=None, **kwargs): + """Fetch and merge the given refspecs. + If not refspecs are given, the merge will only work properly if you + have setup upstream (tracking) branches. + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + proc = self._git.pull(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + def fetch(self, url, refspecs=None, progress=None, **kwargs): + """Fetch the latest changes + :param url: may be a remote name or a url + :param refspecs: see push() + :param progress: see push()""" + proc = self._git.fetch(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + #} end transport db interface + + +class CmdHighLevelRepository(HighLevelRepository): + """An intermediate interface carrying advanced git functionality that can be used + in other comound repositories which do not implement this functionality themselves. + + The mixin must be used with repositories compatible to the GitCommandMixin. + + :note: at some point, methods provided here are supposed to be provided by custom interfaces""" + DAEMON_EXPORT_FILE = 'git-daemon-export-ok' + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + + def daemon_export(): + def _get_daemon_export(self): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + return os.path.exists(filename) + + def _set_daemon_export(self, value): + filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) + fileexists = os.path.exists(filename) + if value and not fileexists: + touch(filename) + elif not value and fileexists: + os.unlink(filename) + + return property(_get_daemon_export, _set_daemon_export, + doc="If True, git-daemon may export this repository") + + daemon_export = daemon_export() + + def is_dirty(self, index=True, working_tree=True, untracked_files=False): + if self._bare: + # Bare repositories with no associated working directory are + # always consired to be clean. + return False + + # start from the one which is fastest to evaluate + default_args = ('--abbrev=40', '--full-index', '--raw') + if index: + # diff index against HEAD + if isfile(self.index.path) and self.head.is_valid() and \ + len(self.git.diff('HEAD', '--cached', *default_args)): + return True + # END index handling + if working_tree: + # diff index against working tree + if len(self.git.diff(*default_args)): + return True + # END working tree handling + if untracked_files: + if len(self.untracked_files): + return True + # END untracked files + return False + + @property + def untracked_files(self): + # make sure we get all files, no only untracked directores + proc = self.git.status(untracked_files=True, as_process=True) + stream = iter(proc.stdout) + untracked_files = list() + for line in stream: + if not line.startswith("# Untracked files:"): + continue + # skip two lines + stream.next() + stream.next() + + for untracked_info in stream: + if not untracked_info.startswith("#\t"): + break + untracked_files.append(untracked_info.replace("#\t", "").rstrip()) + # END for each utracked info line + # END for each line + return untracked_files + + def blame(self, rev, file): + data = self.git.blame(rev, '--', file, p=True) + commits = dict() + blames = list() + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = int(parts[-1]) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( self, hex_to_bin(sha), + author=Actor._from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + @classmethod + def init(cls, path=None, mkdir=True, **kwargs): + """ + :parm kwargs: + keyword arguments serving as additional options to the git-init command + + For more information, see the respective docs of HighLevelRepository""" + + if mkdir and path and not os.path.exists(path): + os.makedirs(path, 0755) + + # git command automatically chdir into the directory + git = Git(path) + output = git.init(**kwargs) + return Repo(path) + + @classmethod + def _clone(cls, git, url, path, **kwargs): + # special handling for windows for path at which the clone should be + # created. + # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence + # we at least give a proper error instead of letting git fail + prev_cwd = None + prev_path = None + if os.name == 'nt': + if '~' in path: + raise OSError("Git cannot handle the ~ character in path %r correctly" % path) + + # on windows, git will think paths like c: are relative and prepend the + # current working dir ( before it fails ). We temporarily adjust the working + # dir to make this actually work + match = re.match("(\w:[/\\\])(.*)", path) + if match: + prev_cwd = os.getcwd() + prev_path = path + drive, rest_of_path = match.groups() + os.chdir(drive) + path = rest_of_path + kwargs['with_keep_cwd'] = True + # END cwd preparation + # END windows handling + + try: + git.clone(url, path, **kwargs) + finally: + if prev_cwd is not None: + os.chdir(prev_cwd) + path = prev_path + # END reset previous working dir + # END bad windows handling + + # our git command could have a different working dir than our actual + # environment, hence we prepend its working dir if required + if not os.path.isabs(path) and git.working_dir: + path = join(git._working_dir, path) + + # adjust remotes - there may be operating systems which use backslashes, + # These might be given as initial paths, but when handling the config file + # that contains the remote from which we were clones, git stops liking it + # as it will escape the backslashes. Hence we undo the escaping just to be + # sure + repo = cls(os.path.abspath(path)) + if repo.remotes: + repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) + # END handle remote repo + return repo + + def clone(self, path, **kwargs): + """:param kwargs: + All remaining keyword arguments are given to the git-clone command + + For more information, see the respective method in HighLevelRepository""" + return self._clone(self.git, self.git_dir, path, **kwargs) + + @classmethod + def clone_from(cls, url, to_path, **kwargs): + """ + :param kwargs: see the ``clone`` method + For more information, see the respective method in the HighLevelRepository""" + return cls._clone(type(self.git)(os.getcwd()), url, to_path, **kwargs) + + def archive(self, ostream, treeish=None, prefix=None, **kwargs): + """For all args see HighLevelRepository interface + :parm kwargs: + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + + :raise GitCommandError: in case something went wrong""" + if treeish is None: + treeish = self.head.commit + if prefix and 'prefix' not in kwargs: + kwargs['prefix'] = prefix + kwargs['output_stream'] = ostream + + self.git.archive(treeish, **kwargs) + return self diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py index 2eed17ad..9c26a8fa 100644 --- a/git/db/cmd/complex.py +++ b/git/db/cmd/complex.py @@ -1,449 +1,17 @@ """Module with our own git implementation - it uses the git command""" -from git.exc import ( - GitCommandError, - BadObject - ) -from git.base import ( - OInfo, - OStream - ) - -from git.util import ( - bin_to_hex, - hex_to_bin - ) -from git.db.py.loose import PureLooseObjectODB from git.db.compat import RepoCompatInterface -from git.util import RemoteProgress -from git.db.py.base import ( - TransportDB, - PureRepositoryPathsMixin, - PureAlternatesFileMixin - ) -from git.db.interface import FetchInfo as GitdbFetchInfo -from git.db.interface import PushInfo as GitdbPushInfo - -from git.util import join_path -from git.util import join - -from git.refs import ( - Reference, - RemoteReference, - SymbolicReference, - TagReference - ) - -import re -import sys - - -__all__ = ('CmdGitDB', 'RemoteProgress', 'CmdCompatibilityGitDB' ) +from git.db.py.complex import PureGitDB +from base import * -class PushInfo(GitdbPushInfo): - """ - Carries information about the result of a push operation of a single head:: - - info = remote.push()[0] - info.flags # bitflags providing more information about the result - info.local_ref # Reference pointing to the local reference that was pushed - # It is None if the ref was deleted. - info.remote_ref_string # path to the remote reference located on the remote side - info.remote_ref # Remote Reference on the local side corresponding to - # the remote_ref_string. It can be a TagReference as well. - info.old_commit_binsha # binary sha at which the remote_ref was standing before we pushed - # it to local_ref.commit. Will be None if an error was indicated - info.summary # summary line providing human readable english text about the push - """ - __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', '_remote', 'summary') - - _flag_map = { 'X' : GitdbPushInfo.NO_MATCH, - '-' : GitdbPushInfo.DELETED, '*' : 0, - '+' : GitdbPushInfo.FORCED_UPDATE, - ' ' : GitdbPushInfo.FAST_FORWARD, - '=' : GitdbPushInfo.UP_TO_DATE, - '!' : GitdbPushInfo.ERROR } - - def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit_binsha=None, - summary=''): - """ Initialize a new instance """ - self.flags = flags - self.local_ref = local_ref - self.remote_ref_string = remote_ref_string - self._remote = remote - self.old_commit_binsha = old_commit_binsha - self.summary = summary - - @property - def remote_ref(self): - """ - :return: - Remote Reference or TagReference in the local repository corresponding - to the remote_ref_string kept in this instance.""" - # translate heads to a local remote, tags stay as they are - if self.remote_ref_string.startswith("refs/tags"): - return TagReference(self._remote.repo, self.remote_ref_string) - elif self.remote_ref_string.startswith("refs/heads"): - remote_ref = Reference(self._remote.repo, self.remote_ref_string) - return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) - else: - raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) - # END - - @classmethod - def _from_line(cls, remote, line): - """Create a new PushInfo instance as parsed from line which is expected to be like - refs/heads/master:refs/heads/master 05d2687..1d0568e""" - control_character, from_to, summary = line.split('\t', 3) - flags = 0 - - # control character handling - try: - flags |= cls._flag_map[ control_character ] - except KeyError: - raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) - # END handle control character - - # from_to handling - from_ref_string, to_ref_string = from_to.split(':') - if flags & cls.DELETED: - from_ref = None - else: - from_ref = Reference.from_path(remote.repo, from_ref_string) - - # commit handling, could be message or commit info - old_commit_binsha = None - if summary.startswith('['): - if "[rejected]" in summary: - flags |= cls.REJECTED - elif "[remote rejected]" in summary: - flags |= cls.REMOTE_REJECTED - elif "[remote failure]" in summary: - flags |= cls.REMOTE_FAILURE - elif "[no match]" in summary: - flags |= cls.ERROR - elif "[new tag]" in summary: - flags |= cls.NEW_TAG - elif "[new branch]" in summary: - flags |= cls.NEW_HEAD - # uptodate encoded in control character - else: - # fast-forward or forced update - was encoded in control character, - # but we parse the old and new commit - split_token = "..." - if control_character == " ": - split_token = ".." - old_sha, new_sha = summary.split(' ')[0].split(split_token) - # have to use constructor here as the sha usually is abbreviated - old_commit_binsha = remote.repo.commit(old_sha) - # END message handling - - return PushInfo(flags, from_ref, to_ref_string, remote, old_commit_binsha, summary) - -class FetchInfo(GitdbFetchInfo): - """ - Carries information about the results of a fetch operation of a single head:: - - info = remote.fetch()[0] - info.ref # Symbolic Reference or RemoteReference to the changed - # remote head or FETCH_HEAD - info.flags # additional flags to be & with enumeration members, - # i.e. info.flags & info.REJECTED - # is 0 if ref is FETCH_HEAD - info.note # additional notes given by git-fetch intended for the user - info.old_commit_binsha # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref, otherwise None - """ - __slots__ = ('ref','old_commit_binsha', 'flags', 'note') - - # %c %-*s %-*s -> %s (%s) - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") - - _flag_map = { '!' : GitdbFetchInfo.ERROR, - '+' : GitdbFetchInfo.FORCED_UPDATE, - '-' : GitdbFetchInfo.TAG_UPDATE, - '*' : 0, - '=' : GitdbFetchInfo.HEAD_UPTODATE, - ' ' : GitdbFetchInfo.FAST_FORWARD } - - def __init__(self, ref, flags, note = '', old_commit_binsha = None): - """ - Initialize a new instance - """ - self.ref = ref - self.flags = flags - self.note = note - self.old_commit_binsha = old_commit_binsha - - def __str__(self): - return self.name - - @property - def name(self): - """:return: Name of our remote ref""" - return self.ref.name - - @property - def commit(self): - """:return: Commit of our remote ref""" - return self.ref.commit - - @classmethod - def _from_line(cls, repo, line, fetch_line): - """Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. - - We can handle a line as follows - "%c %-*s %-*s -> %s%s" - - Where c is either ' ', !, +, -, *, or = - ! means error - + means success forcing update - - means a tag was updated - * means birth of new branch or tag - = means the head was up to date ( and not moved ) - ' ' means a fast-forward - - fetch line is the corresponding line from FETCH_HEAD, like - acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" - match = cls.re_fetch_result.match(line) - if match is None: - raise ValueError("Failed to parse line: %r" % line) - - # parse lines - control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() - try: - new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") - ref_type_name, fetch_note = fetch_note.split(' ', 1) - except ValueError: # unpack error - raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) - - # handle FETCH_HEAD and figure out ref type - # If we do not specify a target branch like master:refs/remotes/origin/master, - # the fetch result is stored in FETCH_HEAD which destroys the rule we usually - # have. In that case we use a symbolic reference which is detached - ref_type = None - if remote_local_ref == "FETCH_HEAD": - ref_type = SymbolicReference - elif ref_type_name == "branch": - ref_type = RemoteReference - elif ref_type_name == "tag": - ref_type = TagReference - else: - raise TypeError("Cannot handle reference type: %r" % ref_type_name) - - # create ref instance - if ref_type is SymbolicReference: - remote_local_ref = ref_type(repo, "FETCH_HEAD") - else: - remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) - # END create ref instance - - note = ( note and note.strip() ) or '' - - # parse flags from control_character - flags = 0 - try: - flags |= cls._flag_map[control_character] - except KeyError: - raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) - # END control char exception hanlding - - # parse operation string for more info - makes no sense for symbolic refs - old_commit_binsha = None - if isinstance(remote_local_ref, Reference): - if 'rejected' in operation: - flags |= cls.REJECTED - if 'new tag' in operation: - flags |= cls.NEW_TAG - if 'new branch' in operation: - flags |= cls.NEW_HEAD - if '...' in operation or '..' in operation: - split_token = '...' - if control_character == ' ': - split_token = split_token[:-1] - old_commit_binsha = repo.rev_parse(operation.split(split_token)[0]) - # END handle refspec - # END reference flag handling - - return cls(remote_local_ref, flags, note, old_commit_binsha) - +__all__ = ['GitCmdDB', 'CmdCompatibilityGitDB'] -class CmdGitDB(PureLooseObjectODB, TransportDB, PureRepositoryPathsMixin, PureAlternatesFileMixin): - """A database representing the default git object store, which includes loose - objects, pack files and an alternates file - - It will create objects only in the loose object database. - :note: for now, we use the git command to do all the lookup, just until he - have packs and the other implementations - """ - def __init__(self, root_path, git): - """Initialize this instance with the root and a git command""" - self._initialize(root_path) - super(CmdGitDB, self).__init__(self.objects_dir) - self._git = git - @classmethod - def _digest_process_messages(cls, fh, progress): - """Read progress messages from file-like object fh, supplying the respective - progress messages to the progress instance. - - :return: list(line, ...) list of lines without linebreaks that did - not contain progress information""" - line_so_far = '' - dropped_lines = list() - while True: - char = fh.read(1) - if not char: - break - - if char in ('\r', '\n'): - dropped_lines.extend(progress._parse_progress_line(line_so_far)) - line_so_far = '' - else: - line_so_far += char - # END process parsed line - # END while file is not done reading - return dropped_lines - - @classmethod - def _finalize_proc(cls, proc): - """Wait for the process (fetch, pull or push) and handle its errors accordingly""" - try: - proc.wait() - except GitCommandError,e: - # if a push has rejected items, the command has non-zero return status - # a return status of 128 indicates a connection error - reraise the previous one - if proc.poll() == 128: - raise - pass - # END exception handling - - - def _get_fetch_info_from_stderr(self, proc, progress): - # skip first line as it is some remote info we are not interested in - output = IterableList('name') - - - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information - fetch_info_lines = list() - for line in self._digest_process_messages(proc.stderr, progress): - if line.startswith('From') or line.startswith('remote: Total'): - continue - elif line.startswith('warning:'): - print >> sys.stderr, line - continue - elif line.startswith('fatal:'): - raise GitCommandError(("Error when fetching: %s" % line,), 2) - # END handle special messages - fetch_info_lines.append(line) - # END for each line - - # read head information - fp = open(join(self.root_path(), 'FETCH_HEAD'),'r') - fetch_head_info = fp.readlines() - fp.close() - - assert len(fetch_info_lines) == len(fetch_head_info) - - output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) - for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) - - self._finalize_proc(proc) - return output - - def _get_push_info(self, proc, progress): - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually - self._digest_process_messages(proc.stderr, progress) - - output = IterableList('name') - for line in proc.stdout.readlines(): - try: - output.append(PushInfo._from_line(self, line)) - except ValueError: - # if an error happens, additional info is given which we cannot parse - pass - # END exception handling - # END for each line - - self._finalize_proc(proc) - return output - - +class CmdGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin, + CmdHighLevelRepository, PureGitDB): + pass - #{ ODB Interface - # overrides from PureOdb Implementation, which is responsible only for writing - # objects - def info(self, sha): - hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) - return OInfo(hex_to_bin(hexsha), typename, size) - - def stream(self, sha): - """For now, all lookup is done by git itself""" - hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) - return OStream(hex_to_bin(hexsha), typename, size, stream) - - #} END odb interface - - # { Interface - - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: Full binary 20 byte sha from the given partial hexsha - :raise AmbiguousObjectName: - :raise BadObject: - :note: currently we only raise BadObject as git does not communicate - AmbiguousObjects separately""" - try: - hexsha, typename, size = self._git.get_object_header(partial_hexsha) - return hex_to_bin(hexsha) - except (GitCommandError, ValueError): - raise BadObject(partial_hexsha) - # END handle exceptions - - @property - def git(self): - return self._git - - #} END interface - - #{ Transport DB interface - - def push(self, url, refspecs=None, progress=None, **kwargs): - """Push given refspecs using the git default implementation - :param url: may be a remote name or a url - :param refspecs: single string, RefSpec instance or list of such or None. - :param progress: RemoteProgress derived instance or None - :param **kwargs: Additional arguments to be passed to the git-push process""" - proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) - return self._get_push_info(proc, progress or RemoteProgress()) - - def pull(self, url, refspecs=None, progress=None, **kwargs): - """Fetch and merge the given refspecs. - If not refspecs are given, the merge will only work properly if you - have setup upstream (tracking) branches. - :param url: may be a remote name or a url - :param refspecs: see push() - :param progress: see push()""" - proc = self._git.pull(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def fetch(self, url, refspecs=None, progress=None, **kwargs): - """Fetch the latest changes - :param url: may be a remote name or a url - :param refspecs: see push() - :param progress: see push()""" - proc = self._git.fetch(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - #} end transport db interface - - class CmdCompatibilityGitDB(CmdGitDB, RepoCompatInterface): """Command git database with the compatabilty interface added for 0.3x code""" diff --git a/git/db/compat.py b/git/db/compat.py index 16bf0562..b0c042a5 100644 --- a/git/db/compat.py +++ b/git/db/compat.py @@ -27,4 +27,5 @@ class RepoCompatInterface(object): :return: Head to the active branch""" return self.head.reference - + def __repr__(self): + return '' % self.git_dir diff --git a/git/db/complex.py b/git/db/complex.py new file mode 100644 index 00000000..dc85a595 --- /dev/null +++ b/git/db/complex.py @@ -0,0 +1,12 @@ +"""Module with many useful complex databases with different useful combinations of primary implementations""" + +from py.complex import PureGitDB +from cmd.complex import CmdGitDB +from compat import RepoCompatInterface + +__all__ = ['CmdGitDB', 'PureGitDB', 'PureCmdGitDB'] + +class PureCmdGitDB(PureGitDB, CmdGitDB, RepoCompatInterface): + """Repository which uses the pure implementation primarily, but falls back + to the git command implementation. Please note that the CmdGitDB does it + the opposite way around.""" diff --git a/git/db/interface.py b/git/db/interface.py index 90421433..1a22bb7d 100644 --- a/git/db/interface.py +++ b/git/db/interface.py @@ -6,7 +6,8 @@ __all__ = ( 'ObjectDBR', 'ObjectDBW', 'RootPathDB', 'CompoundDB', 'CachingDB', 'TransportDB', 'ConfigurationMixin', 'RepositoryPathsMixin', - 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin', 'SubmoduleDB') + 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin', 'SubmoduleDB', + 'IndexDB', 'HighLevelRepository') class ObjectDBR(object): @@ -193,6 +194,15 @@ class CompoundDB(object): #} END interface + +class IndexDB(object): + """A database which provides a flattened index to all objects in its currently + active tree.""" + @property + def index(self): + """:return: IndexFile compatible instance""" + raise NotImplementedError() + class RefSpec(object): """A refspec is a simple container which provides information about the way @@ -283,7 +293,6 @@ class TransportDB(object): As refspecs involve symbolic names for references to be handled, we require RefParse functionality. How this is done is up to the actual implementation.""" # The following variables need to be set by the derived class - __slots__ = tuple() #{ Interface @@ -333,6 +342,11 @@ class TransportDB(object): to construct Remote objects""" raise NotImplementedError() + #}end interface + + + #{ Utility Methods + def create_remote(self, name, url, **kwargs): """Create a new remote with the given name pointing to the given url :return: Remote instance, compatible to the Remote interface""" @@ -343,7 +357,7 @@ class TransportDB(object): :param remote: a Remote instance""" return Remote.remove(self, remote) - #}end interface + #} END utility methods class ReferencesMixin(object): @@ -352,7 +366,6 @@ class ReferencesMixin(object): The returned types are compatible to the interfaces of the pure python reference implementation in GitDB.ref""" - __slots__ = tuple() def resolve(self, name): """Resolve the given name into a binary sha. Valid names are as defined @@ -392,13 +405,68 @@ class ReferencesMixin(object): """:return: An IterableList of TagReferences or compatible items that are available in this repo""" raise NotImplementedError() - + + #{ Utility Methods + def tag(self, name): """:return: Tag with the given name :note: It does not necessarily exist, hence this is just a more convenient way to construct TagReference objects""" raise NotImplementedError() + + def commit(self, rev=None): + """The Commit object for the specified revision + :param rev: revision specifier, see git-rev-parse for viable options. + :return: Commit compatible object""" + raise NotImplementedError() + + def iter_trees(self, *args, **kwargs): + """:return: Iterator yielding Tree compatible objects + :note: Takes all arguments known to iter_commits method""" + raise NotImplementedError() + + def tree(self, rev=None): + """The Tree (compatible) object for the given treeish revision + Examples:: + + repo.tree(repo.heads[0]) + + :param rev: is a revision pointing to a Treeish ( being a commit or tree ) + :return: ``git.Tree`` + + :note: + If you need a non-root level tree, find it by iterating the root tree. Otherwise + it cannot know about its path relative to the repository root and subsequent + operations might have unexpected results.""" + raise NotImplementedError() + + def iter_commits(self, rev=None, paths='', **kwargs): + """A list of Commit objects representing the history of a given ref/commit + + :parm rev: + revision specifier, see git-rev-parse for viable options. + If None, the active branch will be used. + + :parm paths: + is an optional path or a list of paths to limit the returned commits to + Commits that do not contain that path or the paths will not be returned. + + :parm kwargs: + Arguments to be passed to git-rev-list - common ones are + max_count and skip + + :note: to receive only commits between two named revisions, use the + "revA..revB" revision specifier + + :return: iterator yielding Commit compatible instances""" + raise NotImplementedError() + + + #} END utility methods + + #{ Edit Methods + def create_head(self, path, commit='HEAD', force=False, logmsg=None ): """Create a new head within the repository. :param commit: a resolvable name to the commit or a Commit or Reference instance the new head should point to @@ -432,6 +500,7 @@ class ReferencesMixin(object): :param tags: TagReferences to delete""" raise NotImplementedError() + #}END edit methods #{ Backward Compatability # These aliases need to be provided by the implementing interface as well @@ -619,3 +688,90 @@ class SubmoduleDB(object): take the previous state into consideration. For more information, please see the documentation of RootModule.update""" raise NotImplementedError() + + +class HighLevelRepository(object): + """An interface combining several high-level repository functionality and properties""" + + @property + def daemon_export(self): + """:return: True if the repository may be published by the git-daemon""" + raise NotImplementedError() + + def is_dirty(self, index=True, working_tree=True, untracked_files=False): + """ + :return: + ``True``, the repository is considered dirty. By default it will react + like a git-status without untracked files, hence it is dirty if the + index or the working copy have changes.""" + raise NotImplementedError() + + @property + def untracked_files(self): + """ + :return: + list(str,...) + + :note: + ignored files will not appear here, i.e. files mentioned in .gitignore. + Bare repositories never have untracked files""" + raise NotImplementedError() + + def blame(self, rev, file): + """The blame information for the given file at the given revision. + + :parm rev: revision specifier, see git-rev-parse for viable options. + :return: + list: [Commit, list: []] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance.""" + raise NotImplementedError() + + @classmethod + def init(cls, path=None, mkdir=True): + """Initialize a git repository at the given path if specified + + :param path: + is the full path to the repo (traditionally ends with /.git) + or None in which case the repository will be created in the current + working directory + + :parm mkdir: + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. + Only effective if a path is explicitly given + + :return: Instance pointing to the newly created repository with similar capabilities + of this class""" + raise NotImplementedError() + + def clone(self, path): + """Create a clone from this repository. + :param path: + is the full path of the new repo (traditionally ends with ./.git). + + :return: ``git.Repo`` (the newly cloned repo)""" + raise NotImplementedError() + + @classmethod + def clone_from(cls, url, to_path): + """Create a clone from the given URL + :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS + :param to_path: Path to which the repository should be cloned to + :return: instance pointing to the cloned directory with similar capabilities as this class""" + raise NotImplementedError() + + def archive(self, ostream, treeish=None, prefix=None): + """Archive the tree at the given revision. + :parm ostream: file compatible stream object to which the archive will be written + :parm treeish: is the treeish name/id, defaults to active branch + :parm prefix: is the optional prefix to prepend to each filename in the archive + :parm kwargs: + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + :return: self""" + raise NotImplementedError() + + diff --git a/git/db/py/base.py b/git/db/py/base.py index cc326c27..74b8beb9 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -20,6 +20,7 @@ from git.util import ( is_git_dir ) +from git.index import IndexFile from git.config import GitConfigParser from git.exc import ( BadObject, @@ -35,7 +36,8 @@ import os __all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', - 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin') + 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin', + 'PureIndexDB') class PureObjectDBR(ObjectDBR): @@ -386,6 +388,16 @@ class PureConfigurationMixin(ConfigurationMixin): #} END interface +class PureIndexDB(IndexDB): + #{ Configuration + IndexCls = IndexFile + #} END configuration + + @property + def index(self): + return self.IndexCls(self) + + class PureAlternatesFileMixin(object): """Utility able to read and write an alternates file through the alternates property It needs to be part of a type with the git_dir or db_path property. diff --git a/git/db/py/complex.py b/git/db/py/complex.py index 6504b3ed..efcbb2ba 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -2,6 +2,7 @@ # # This module is part of PureGitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.db.interface import HighLevelRepository from base import ( PureCompoundDB, PureObjectDBW, @@ -9,6 +10,7 @@ from base import ( PureRepositoryPathsMixin, PureConfigurationMixin, PureAlternatesFileMixin, + PureIndexDB, ) from resolve import PureReferencesMixin @@ -36,8 +38,7 @@ import os __all__ = ('PureGitODB', 'PureGitDB', 'PureCompatibilityGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, - PureSubmoduleDB, PureAlternatesFileMixin): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -102,7 +103,12 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, -class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): +class PureGitDB(PureGitODB, + PureRepositoryPathsMixin, PureConfigurationMixin, + PureReferencesMixin, PureSubmoduleDB, PureAlternatesFileMixin, + PureIndexDB, + # HighLevelRepository Currently not implemented ! + ): """Git like database with support for object lookup as well as reference resolution. Our rootpath is set to the actual .git directory (bare on unbare). diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 94992d11..d0685747 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -5,6 +5,7 @@ from git.db.interface import ReferencesMixin from git.exc import BadObject from git.refs import SymbolicReference from git.objects.base import Object +from git.objects.commit import Commit from git.refs.head import HEAD from git.refs.headref import Head from git.refs.tag import TagReference @@ -290,6 +291,7 @@ class PureReferencesMixin(ReferencesMixin): HeadCls = Head ReferenceCls = Reference HEADCls = HEAD + CommitCls = Commit #} END configuration def resolve(self, name): @@ -313,6 +315,30 @@ class PureReferencesMixin(ReferencesMixin): def tag(self, name): return self.tags[name] + + def commit(self, rev=None): + if rev is None: + return self.head.commit + else: + return self.resolve_object(str(rev)+"^0") + #END handle revision + + def iter_trees(self, *args, **kwargs): + return ( c.tree for c in self.iter_commits(*args, **kwargs) ) + + def tree(self, rev=None): + if rev is None: + return self.head.commit.tree + else: + return self.resolve_object(str(rev)+"^{tree}") + + def iter_commits(self, rev=None, paths='', **kwargs): + if rev is None: + rev = self.head.commit + + return self.CommitCls.iter_items(self, rev, paths, **kwargs) + + @property def head(self): return self.HEADCls(self,'HEAD') -- cgit v1.2.3 From 024adf37acddd6a5d8293b6b5d15795c59a142c0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 13:06:37 +0200 Subject: Fixed tests far enough to allow basic repository tests to be applied to any of the new database types. This reduces code duplication to the mere minimum, but allows custom tests to be added on top easily and flexibly --- git/db/cmd/__init__.py | 6 ++++-- git/db/cmd/base.py | 2 +- git/db/cmd/complex.py | 20 +++++++++++++++----- git/db/compat.py | 2 +- git/db/complex.py | 8 ++++---- git/db/py/__init__.py | 2 -- git/db/py/complex.py | 4 ++-- git/db/py/resolve.py | 13 ++++++++++--- 8 files changed, 37 insertions(+), 20 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/__init__.py b/git/db/cmd/__init__.py index 968d8c11..8a681e42 100644 --- a/git/db/cmd/__init__.py +++ b/git/db/cmd/__init__.py @@ -1,2 +1,4 @@ - -from complex import * +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index 0025e9b0..6a2473a3 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -15,7 +15,7 @@ from git.util import ( bin_to_hex, hex_to_bin ) -from git.db.compat import RepoCompatInterface +from git.db.compat import RepoCompatibilityInterface from git.util import RemoteProgress from git.db.interface import FetchInfo as GitdbFetchInfo from git.db.interface import PushInfo as GitdbPushInfo diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py index 9c26a8fa..3e6804f5 100644 --- a/git/db/cmd/complex.py +++ b/git/db/cmd/complex.py @@ -1,17 +1,27 @@ """Module with our own git implementation - it uses the git command""" -from git.db.compat import RepoCompatInterface +from git.db.compat import RepoCompatibilityInterface from git.db.py.complex import PureGitDB from base import * -__all__ = ['GitCmdDB', 'CmdCompatibilityGitDB'] +__all__ = ['GitCmdDB', 'CmdCompatibilityGitDB', 'CmdPartialGitDB'] -class CmdGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin, - CmdHighLevelRepository, PureGitDB): +class CmdPartialGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin, + CmdHighLevelRepository ): + """Utility repository which only partially implements all required methods. + It cannot be reliably used alone, but is provided to allow mixing it with other + implementations""" pass -class CmdCompatibilityGitDB(CmdGitDB, RepoCompatInterface): + +class CmdGitDB(CmdPartialGitDB, PureGitDB): + """A database which fills in its missing implementation using the pure python + implementation""" + pass + + +class CmdCompatibilityGitDB(CmdGitDB, RepoCompatibilityInterface): """Command git database with the compatabilty interface added for 0.3x code""" diff --git a/git/db/compat.py b/git/db/compat.py index b0c042a5..767ab5e0 100644 --- a/git/db/compat.py +++ b/git/db/compat.py @@ -4,7 +4,7 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module providing adaptors to maintain backwards compatability""" -class RepoCompatInterface(object): +class RepoCompatibilityInterface(object): """Interface to install backwards compatability of the new complex repository types with the previous, all in one, repository.""" diff --git a/git/db/complex.py b/git/db/complex.py index dc85a595..ef2013e3 100644 --- a/git/db/complex.py +++ b/git/db/complex.py @@ -1,12 +1,12 @@ """Module with many useful complex databases with different useful combinations of primary implementations""" from py.complex import PureGitDB -from cmd.complex import CmdGitDB -from compat import RepoCompatInterface +from cmd.complex import CmdPartialGitDB +from compat import RepoCompatibilityInterface -__all__ = ['CmdGitDB', 'PureGitDB', 'PureCmdGitDB'] +__all__ = ['CmdPartialGitDB', 'PureGitDB', 'PureCmdGitDB'] -class PureCmdGitDB(PureGitDB, CmdGitDB, RepoCompatInterface): +class PureCmdGitDB(PureGitDB, CmdPartialGitDB, RepoCompatibilityInterface): """Repository which uses the pure implementation primarily, but falls back to the git command implementation. Please note that the CmdGitDB does it the opposite way around.""" diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py index 73cc2bdf..8a681e42 100644 --- a/git/db/py/__init__.py +++ b/git/db/py/__init__.py @@ -2,5 +2,3 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php - -from complex import * diff --git a/git/db/py/complex.py b/git/db/py/complex.py index efcbb2ba..9d891537 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -20,7 +20,7 @@ from pack import PurePackedODB from ref import PureReferenceDB from submodule import PureSubmoduleDB -from git.db.compat import RepoCompatInterface +from git.db.compat import RepoCompatibilityInterface from git.util import ( LazyMixin, @@ -123,6 +123,6 @@ class PureGitDB(PureGitODB, -class PureCompatibilityGitDB(PureGitDB, RepoCompatInterface): +class PureCompatibilityGitDB(PureGitDB, RepoCompatibilityInterface): """Pure git database with a compatability layer required by 0.3x code""" diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index d0685747..7194149c 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -3,12 +3,19 @@ version assuming compatible interface for reference and object types""" from git.db.interface import ReferencesMixin from git.exc import BadObject -from git.refs import SymbolicReference -from git.objects.base import Object -from git.objects.commit import Commit +from git.refs import ( + SymbolicReference, + Reference, + HEAD, + Head, + TagReference + ) from git.refs.head import HEAD from git.refs.headref import Head from git.refs.tag import TagReference + +from git.objects.base import Object +from git.objects.commit import Commit from git.util import ( join, isdir, -- cgit v1.2.3 From 1f71ed94578799ee1667ba54b66a369e307f415b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 16:32:56 +0200 Subject: git cmd implementation of repository appears to work, at least this is what the test suggests. Pure python implementation still has some trouble, but this should be very fixable --- git/db/cmd/base.py | 30 ++++++++++++++++++------------ git/db/cmd/complex.py | 13 +------------ git/db/complex.py | 19 ++++++++++++++++--- git/db/py/base.py | 9 ++++++--- git/db/py/complex.py | 16 ++++++++-------- git/db/py/resolve.py | 3 +-- 6 files changed, 50 insertions(+), 40 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index 6a2473a3..b3354b0a 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -13,16 +13,16 @@ from git.base import ( from git.util import ( bin_to_hex, - hex_to_bin - ) -from git.db.compat import RepoCompatibilityInterface -from git.util import RemoteProgress + hex_to_bin, + RemoteProgress, + isfile, + join_path, + join, + Actor + ) from git.db.interface import FetchInfo as GitdbFetchInfo from git.db.interface import PushInfo as GitdbPushInfo from git.db.interface import HighLevelRepository - -from git.util import join_path -from git.util import join from git.cmd import Git from git.refs import ( Reference, @@ -30,8 +30,9 @@ from git.refs import ( SymbolicReference, TagReference ) - +from git.objects.commit import Commit import re +import os import sys @@ -472,6 +473,11 @@ class CmdHighLevelRepository(HighLevelRepository): re_author_committer_start = re.compile(r'^(author|committer)') re_tab_full_line = re.compile(r'^\t(.*)$') + #{ Configuration + CommitCls = Commit + GitCls = Git + #} END configuration + def daemon_export(): def _get_daemon_export(self): filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) @@ -588,7 +594,7 @@ class CmdHighLevelRepository(HighLevelRepository): sha = info['id'] c = commits.get(sha) if c is None: - c = Commit( self, hex_to_bin(sha), + c = self.CommitCls( self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), @@ -619,9 +625,9 @@ class CmdHighLevelRepository(HighLevelRepository): os.makedirs(path, 0755) # git command automatically chdir into the directory - git = Git(path) + git = cls.GitCls(path) output = git.init(**kwargs) - return Repo(path) + return cls(path) @classmethod def _clone(cls, git, url, path, **kwargs): @@ -686,7 +692,7 @@ class CmdHighLevelRepository(HighLevelRepository): """ :param kwargs: see the ``clone`` method For more information, see the respective method in the HighLevelRepository""" - return cls._clone(type(self.git)(os.getcwd()), url, to_path, **kwargs) + return cls._clone(cls.GitCls(os.getcwd()), url, to_path, **kwargs) def archive(self, ostream, treeish=None, prefix=None, **kwargs): """For all args see HighLevelRepository interface diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py index 3e6804f5..49e8c590 100644 --- a/git/db/cmd/complex.py +++ b/git/db/cmd/complex.py @@ -1,12 +1,10 @@ """Module with our own git implementation - it uses the git command""" from git.db.compat import RepoCompatibilityInterface -from git.db.py.complex import PureGitDB - from base import * -__all__ = ['GitCmdDB', 'CmdCompatibilityGitDB', 'CmdPartialGitDB'] +__all__ = ['CmdPartialGitDB'] class CmdPartialGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin, @@ -16,12 +14,3 @@ class CmdPartialGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin, implementations""" pass - -class CmdGitDB(CmdPartialGitDB, PureGitDB): - """A database which fills in its missing implementation using the pure python - implementation""" - pass - - -class CmdCompatibilityGitDB(CmdGitDB, RepoCompatibilityInterface): - """Command git database with the compatabilty interface added for 0.3x code""" diff --git a/git/db/complex.py b/git/db/complex.py index ef2013e3..71a39c45 100644 --- a/git/db/complex.py +++ b/git/db/complex.py @@ -1,12 +1,25 @@ """Module with many useful complex databases with different useful combinations of primary implementations""" -from py.complex import PureGitDB +from py.complex import PurePartialGitDB from cmd.complex import CmdPartialGitDB from compat import RepoCompatibilityInterface -__all__ = ['CmdPartialGitDB', 'PureGitDB', 'PureCmdGitDB'] +__all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityGitDB'] -class PureCmdGitDB(PureGitDB, CmdPartialGitDB, RepoCompatibilityInterface): +class CmdGitDB(CmdPartialGitDB, PurePartialGitDB): + """A database which uses primarily the git command implementation, but falls back + to pure python where it is more feasible""" + +class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB): + """A database which fills in its missing implementation using the pure python + implementation""" + pass + +class PureGitDB(PurePartialGitDB, CmdPartialGitDB): + """A repository which uses the pure implementation primarily, but falls back + on using the git command for high-level functionality""" + +class PureCompatibilityGitDB(RepoCompatibilityInterface, PureGitDB): """Repository which uses the pure implementation primarily, but falls back to the git command implementation. Please note that the CmdGitDB does it the opposite way around.""" diff --git a/git/db/py/base.py b/git/db/py/base.py index 74b8beb9..4d9b6e14 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -8,6 +8,7 @@ from git.db.interface import * from git.util import ( pool, join, + isfile, normpath, abspath, dirname, @@ -25,7 +26,8 @@ from git.config import GitConfigParser from git.exc import ( BadObject, AmbiguousObjectName, - InvalidDBRoot + InvalidGitRepositoryError, + NoSuchPathError ) from async import ChannelThreadTask @@ -240,7 +242,7 @@ class PureRepositoryPathsMixin(RepositoryPathsMixin): epath = abspath(expandvars(expanduser(path or os.getcwd()))) if not exists(epath): - raise InvalidDBRoot(epath) + raise NoSuchPathError(epath) #END check file self._working_tree_dir = None @@ -264,7 +266,7 @@ class PureRepositoryPathsMixin(RepositoryPathsMixin): # END while curpath if self._git_path is None: - raise InvalidDBRoot(epath) + raise InvalidGitRepositoryError(epath) # END path not found self._bare = self._git_path.endswith(self.repo_dir) @@ -351,6 +353,7 @@ class PureConfigurationMixin(ConfigurationMixin): def __init__(self, *args, **kwargs): """Verify prereqs""" + super(PureConfigurationMixin, self).__init__(*args, **kwargs) assert hasattr(self, 'git_dir') def _path_at_level(self, level ): diff --git a/git/db/py/complex.py b/git/db/py/complex.py index 9d891537..a51118b3 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -1,6 +1,6 @@ # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors # -# This module is part of PureGitDB and is released under +# This module is part of PurePartialGitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from git.db.interface import HighLevelRepository from base import ( @@ -12,7 +12,7 @@ from base import ( PureAlternatesFileMixin, PureIndexDB, ) - +from transport import PureTransportDB from resolve import PureReferencesMixin from loose import PureLooseObjectODB @@ -35,14 +35,14 @@ from git.exc import ( ) import os -__all__ = ('PureGitODB', 'PureGitDB', 'PureCompatibilityGitDB') +__all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB') class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, - as it deals solely with object lookup. Use a PureGitDB type if you need + as it deals solely with object lookup. Use a PurePartialGitDB type if you need reference and push support.""" # Configuration PackDBCls = PurePackedODB @@ -103,10 +103,10 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): -class PureGitDB(PureGitODB, +class PurePartialGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin, PureSubmoduleDB, PureAlternatesFileMixin, - PureIndexDB, + PureIndexDB, PureTransportDB # HighLevelRepository Currently not implemented ! ): """Git like database with support for object lookup as well as reference resolution. @@ -119,10 +119,10 @@ class PureGitDB(PureGitODB, def __init__(self, root_path): """Initialize ourselves on the .git directory, or the .git/objects directory.""" PureRepositoryPathsMixin._initialize(self, root_path) - super(PureGitDB, self).__init__(self.objects_dir) + super(PurePartialGitDB, self).__init__(self.objects_dir) -class PureCompatibilityGitDB(PureGitDB, RepoCompatibilityInterface): +class PureCompatibilityGitDB(PurePartialGitDB, RepoCompatibilityInterface): """Pure git database with a compatability layer required by 0.3x code""" diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 7194149c..7bea779e 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -320,8 +320,7 @@ class PureReferencesMixin(ReferencesMixin): return self.TagReferenceCls.list_items(self) def tag(self, name): - return self.tags[name] - + return self.TagReferenceCls(self, self.TagReferenceCls.to_full_path(name)) def commit(self, rev=None): if rev is None: -- cgit v1.2.3 From 4ea529dd7f545dddc8cfdfdb4b6209eef0494ec5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 17:16:18 +0200 Subject: Fixed pure python implementation to run the default repository tests --- git/db/py/base.py | 15 +++++++-------- git/db/py/complex.py | 4 ++-- git/db/py/ref.py | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) (limited to 'git/db') diff --git a/git/db/py/base.py b/git/db/py/base.py index 4d9b6e14..a2c9a4ef 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -96,8 +96,9 @@ class PureObjectDBW(ObjectDBW): class PureRootPathDB(RootPathDB): def __init__(self, root_path): - super(PureRootPathDB, self).__init__(root_path) self._root_path = root_path + super(PureRootPathDB, self).__init__(root_path) + #{ Interface @@ -127,8 +128,8 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): def _set_cache_(self, attr): if attr == '_dbs': self._dbs = list() - elif attr == '_db_cache': - self._db_cache = dict() + elif attr == '_obj_cache': + self._obj_cache = dict() else: super(PureCompoundDB, self)._set_cache_(attr) @@ -138,14 +139,14 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): # most databases use binary representations, prevent converting # it everytime a database is being queried try: - return self._db_cache[sha] + return self._obj_cache[sha] except KeyError: pass # END first level cache for db in self._dbs: if db.has_object(sha): - self._db_cache[sha] = db + self._obj_cache[sha] = db return db # END for each database raise BadObject(sha) @@ -181,7 +182,7 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): def update_cache(self, force=False): # something might have changed, clear everything - self._db_cache.clear() + self._obj_cache.clear() stat = False for db in self._dbs: if isinstance(db, CachingDB): @@ -191,8 +192,6 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): return stat def partial_to_complete_sha_hex(self, partial_hexsha): - databases = self.databases() - len_partial_hexsha = len(partial_hexsha) if len_partial_hexsha % 2 != 0: partial_binsha = hex_to_bin(partial_hexsha + "0") diff --git a/git/db/py/complex.py b/git/db/py/complex.py index a51118b3..d5c185f3 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -38,7 +38,7 @@ import os __all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureAlternatesFileMixin): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -105,7 +105,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): class PurePartialGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, - PureReferencesMixin, PureSubmoduleDB, PureAlternatesFileMixin, + PureReferencesMixin, PureSubmoduleDB, PureIndexDB, PureTransportDB # HighLevelRepository Currently not implemented ! ): diff --git a/git/db/py/ref.py b/git/db/py/ref.py index 94887fb8..d2c77a3a 100644 --- a/git/db/py/ref.py +++ b/git/db/py/ref.py @@ -31,8 +31,8 @@ class PureReferenceDB(PureCompoundDB): dbcls = self.ObjectDBCls if dbcls is None: # late import - from complex import PureGitODB # TODO: This should be a configurable for flexibility - dbcls = PureGitODB + import complex + dbcls = complex.PureGitODB # END get db type # try to get as many as possible, don't fail if some are unavailable -- cgit v1.2.3 From 47f14d527f61d30ffa49a6254838ca5c1aee3972 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 6 Jun 2011 18:59:46 +0200 Subject: Added loose object writing and reading performance tessts, in pure and command implementations. The previous performance test was truncated a bit as it compared directly with the git hash_object write performance. This is out, and if we wanted it we could implement it , but its actually slower for us --- git/db/py/loose.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'git/db') diff --git a/git/db/py/loose.py b/git/db/py/loose.py index 56915f18..6e72aff0 100644 --- a/git/db/py/loose.py +++ b/git/db/py/loose.py @@ -107,22 +107,6 @@ class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): # END handle cache raise BadObject(hexsha) - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: 20 byte binary sha1 string which matches the given name uniquely - :param name: hexadecimal partial name - :raise AmbiguousObjectName: - :raise BadObject: """ - candidate = None - for binsha in self.sha_iter(): - if bin_to_hex(binsha).startswith(partial_hexsha): - # it can't ever find the same object twice - if candidate is not None: - raise AmbiguousObjectName(partial_hexsha) - candidate = binsha - # END for each object - if candidate is None: - raise BadObject(partial_hexsha) - return candidate #} END interface @@ -179,6 +163,23 @@ class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): except BadObject: return False # END check existance + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate def store(self, istream): """note: The sha we produce will be hex by nature""" -- cgit v1.2.3 From a5497c432fe8ab1415d633d5d4b68f00a2807c26 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 6 Jun 2011 20:29:03 +0200 Subject: Streams returned by git cmd db are now containing all the data right away. This could cause several copies to exist, and makes the cmd implementation a bad choice if big files are involved --- git/db/cmd/base.py | 13 ++++++++++--- git/db/complex.py | 5 ++++- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index b3354b0a..735e71df 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -31,6 +31,7 @@ from git.refs import ( TagReference ) from git.objects.commit import Commit +from cStringIO import StringIO import re import os import sys @@ -305,9 +306,15 @@ class CmdObjectDBRMixin(object): return OInfo(hex_to_bin(hexsha), typename, size) def stream(self, sha): - """For now, all lookup is done by git itself""" - hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) - return OStream(hex_to_bin(hexsha), typename, size, stream) + """For now, all lookup is done by git itself + :note: As we don't know when the stream is actually read (and if it is + stored for later use) we read the data rigth away and cache it. + This has HUGE performance implication, both for memory as for + reading/deserializing objects, but we have no other choice in order + to make the database behaviour consistent with other implementations !""" + + hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha)) + return OStream(hex_to_bin(hexsha), typename, size, StringIO(data)) def partial_to_complete_sha_hex(self, partial_hexsha): """:return: Full binary 20 byte sha from the given partial hexsha diff --git a/git/db/complex.py b/git/db/complex.py index 71a39c45..31b047a0 100644 --- a/git/db/complex.py +++ b/git/db/complex.py @@ -8,7 +8,10 @@ __all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityG class CmdGitDB(CmdPartialGitDB, PurePartialGitDB): """A database which uses primarily the git command implementation, but falls back - to pure python where it is more feasible""" + to pure python where it is more feasible + :note: To assure consistent behaviour across implementations, when calling the + ``stream()`` method a cache is created. This makes this implementation a bad + choice when reading big files as these are streamed from memory in all cases.""" class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB): """A database which fills in its missing implementation using the pure python -- cgit v1.2.3 From 65f2dd0ab990adbe1a1470905090391ab5f2ce4e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 11:23:53 +0200 Subject: Fixed fetch/push/pull implementation. Next up is to integrate the consolidation changes from master to make clone use the same facilities --- git/db/cmd/base.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index 735e71df..e30d8fe6 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -18,11 +18,15 @@ from git.util import ( isfile, join_path, join, - Actor + Actor, + IterableList ) from git.db.interface import FetchInfo as GitdbFetchInfo from git.db.interface import PushInfo as GitdbPushInfo -from git.db.interface import HighLevelRepository +from git.db.interface import ( + HighLevelRepository, + TransportDB + ) from git.cmd import Git from git.refs import ( Reference, @@ -332,7 +336,7 @@ class CmdObjectDBRMixin(object): #} END odb interface -class CmdTransportMixin(object): +class CmdTransportMixin(TransportDB): """A mixin requiring the .git property as well as repository paths It will create objects only in the loose object database. @@ -399,13 +403,13 @@ class CmdTransportMixin(object): # END for each line # read head information - fp = open(join(self.root_path(), 'FETCH_HEAD'),'r') + fp = open(join(self.git_dir, 'FETCH_HEAD'),'r') fetch_head_info = fp.readlines() fp.close() assert len(fetch_info_lines) == len(fetch_head_info) - output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) + output.extend(FetchInfo._from_line(self, err_line, fetch_line) for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) self._finalize_proc(proc) @@ -450,7 +454,7 @@ class CmdTransportMixin(object): :param url: may be a remote name or a url :param refspecs: see push() :param progress: see push()""" - proc = self._git.pull(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) def fetch(self, url, refspecs=None, progress=None, **kwargs): @@ -458,7 +462,7 @@ class CmdTransportMixin(object): :param url: may be a remote name or a url :param refspecs: see push() :param progress: see push()""" - proc = self._git.fetch(url, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) #} end transport db interface -- cgit v1.2.3 From 09517bd78660ee3fbd6716c920c36b967f7a71cf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 12:26:29 +0200 Subject: clone and clone_from methods now support the RemoteProgress interface, using similar functionality as used by the fetch, push and pull methods --- git/db/cmd/base.py | 207 ++++++++++++++++++++++++++-------------------------- git/db/interface.py | 11 ++- 2 files changed, 113 insertions(+), 105 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index e30d8fe6..393d9262 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -50,6 +50,96 @@ __all__ = ('CmdTransportMixin', 'RemoteProgress', 'GitCommandMixin', def touch(filename): fp = open(filename, "a") fp.close() + + +def digest_process_messages(fh, progress): + """Read progress messages from file-like object fh, supplying the respective + progress messages to the progress instance. + + :return: list(line, ...) list of lines without linebreaks that did + not contain progress information""" + line_so_far = '' + dropped_lines = list() + while True: + char = fh.read(1) + if not char: + break + + if char in ('\r', '\n'): + dropped_lines.extend(progress._parse_progress_line(line_so_far)) + line_so_far = '' + else: + line_so_far += char + # END process parsed line + # END while file is not done reading + return dropped_lines + +def finalize_process(proc): + """Wait for the process (fetch, pull or push) and handle its errors accordingly""" + try: + proc.wait() + except GitCommandError,e: + # if a push has rejected items, the command has non-zero return status + # a return status of 128 indicates a connection error - reraise the previous one + if proc.poll() == 128: + raise + pass + # END exception handling + + +def get_fetch_info_from_stderr(repo, proc, progress): + # skip first line as it is some remote info we are not interested in + output = IterableList('name') + + + # lines which are no progress are fetch info lines + # this also waits for the command to finish + # Skip some progress lines that don't provide relevant information + fetch_info_lines = list() + for line in digest_process_messages(proc.stderr, progress): + if line.startswith('From') or line.startswith('remote: Total'): + continue + elif line.startswith('warning:'): + print >> sys.stderr, line + continue + elif line.startswith('fatal:'): + raise GitCommandError(("Error when fetching: %s" % line,), 2) + # END handle special messages + fetch_info_lines.append(line) + # END for each line + + # read head information + fp = open(join(repo.git_dir, 'FETCH_HEAD'),'r') + fetch_head_info = fp.readlines() + fp.close() + + assert len(fetch_info_lines) == len(fetch_head_info) + + output.extend(FetchInfo._from_line(repo, err_line, fetch_line) + for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) + + finalize_process(proc) + return output + +def get_push_info(repo, proc, progress): + # read progress information from stderr + # we hope stdout can hold all the data, it should ... + # read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually + digest_process_messages(proc.stderr, progress) + + output = IterableList('name') + for line in proc.stdout.readlines(): + try: + output.append(PushInfo._from_line(repo, line)) + except ValueError: + # if an error happens, additional info is given which we cannot parse + pass + # END exception handling + # END for each line + + finalize_process(proc) + return output #} END utilities @@ -344,98 +434,6 @@ class CmdTransportMixin(TransportDB): have packs and the other implementations """ - @classmethod - def _digest_process_messages(cls, fh, progress): - """Read progress messages from file-like object fh, supplying the respective - progress messages to the progress instance. - - :return: list(line, ...) list of lines without linebreaks that did - not contain progress information""" - line_so_far = '' - dropped_lines = list() - while True: - char = fh.read(1) - if not char: - break - - if char in ('\r', '\n'): - dropped_lines.extend(progress._parse_progress_line(line_so_far)) - line_so_far = '' - else: - line_so_far += char - # END process parsed line - # END while file is not done reading - return dropped_lines - - @classmethod - def _finalize_proc(cls, proc): - """Wait for the process (fetch, pull or push) and handle its errors accordingly""" - try: - proc.wait() - except GitCommandError,e: - # if a push has rejected items, the command has non-zero return status - # a return status of 128 indicates a connection error - reraise the previous one - if proc.poll() == 128: - raise - pass - # END exception handling - - - def _get_fetch_info_from_stderr(self, proc, progress): - # skip first line as it is some remote info we are not interested in - output = IterableList('name') - - - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information - fetch_info_lines = list() - for line in self._digest_process_messages(proc.stderr, progress): - if line.startswith('From') or line.startswith('remote: Total'): - continue - elif line.startswith('warning:'): - print >> sys.stderr, line - continue - elif line.startswith('fatal:'): - raise GitCommandError(("Error when fetching: %s" % line,), 2) - # END handle special messages - fetch_info_lines.append(line) - # END for each line - - # read head information - fp = open(join(self.git_dir, 'FETCH_HEAD'),'r') - fetch_head_info = fp.readlines() - fp.close() - - assert len(fetch_info_lines) == len(fetch_head_info) - - output.extend(FetchInfo._from_line(self, err_line, fetch_line) - for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) - - self._finalize_proc(proc) - return output - - def _get_push_info(self, proc, progress): - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually - self._digest_process_messages(proc.stderr, progress) - - output = IterableList('name') - for line in proc.stdout.readlines(): - try: - output.append(PushInfo._from_line(self, line)) - except ValueError: - # if an error happens, additional info is given which we cannot parse - pass - # END exception handling - # END for each line - - self._finalize_proc(proc) - return output - - #{ Transport DB interface def push(self, url, refspecs=None, progress=None, **kwargs): @@ -445,7 +443,7 @@ class CmdTransportMixin(TransportDB): :param progress: RemoteProgress derived instance or None :param **kwargs: Additional arguments to be passed to the git-push process""" proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) - return self._get_push_info(proc, progress or RemoteProgress()) + return get_push_info(self, proc, progress or RemoteProgress()) def pull(self, url, refspecs=None, progress=None, **kwargs): """Fetch and merge the given refspecs. @@ -455,7 +453,7 @@ class CmdTransportMixin(TransportDB): :param refspecs: see push() :param progress: see push()""" proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + return get_fetch_info_from_stderr(self, proc, progress or RemoteProgress()) def fetch(self, url, refspecs=None, progress=None, **kwargs): """Fetch the latest changes @@ -463,7 +461,7 @@ class CmdTransportMixin(TransportDB): :param refspecs: see push() :param progress: see push()""" proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + return get_fetch_info_from_stderr(self, proc, progress or RemoteProgress()) #} end transport db interface @@ -641,7 +639,7 @@ class CmdHighLevelRepository(HighLevelRepository): return cls(path) @classmethod - def _clone(cls, git, url, path, **kwargs): + def _clone(cls, git, url, path, progress, **kwargs): # special handling for windows for path at which the clone should be # created. # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence @@ -667,7 +665,11 @@ class CmdHighLevelRepository(HighLevelRepository): # END windows handling try: - git.clone(url, path, **kwargs) + proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **kwargs) + if progress is not None: + digest_process_messages(proc.stderr, progress) + #END digest progress messages + finalize_process(proc) finally: if prev_cwd is not None: os.chdir(prev_cwd) @@ -691,19 +693,20 @@ class CmdHighLevelRepository(HighLevelRepository): # END handle remote repo return repo - def clone(self, path, **kwargs): - """:param kwargs: + def clone(self, path, progress = None, **kwargs): + """ + :param kwargs: All remaining keyword arguments are given to the git-clone command For more information, see the respective method in HighLevelRepository""" - return self._clone(self.git, self.git_dir, path, **kwargs) + return self._clone(self.git, self.git_dir, path, progress or RemoteProgress(), **kwargs) @classmethod - def clone_from(cls, url, to_path, **kwargs): + def clone_from(cls, url, to_path, progress = None, **kwargs): """ :param kwargs: see the ``clone`` method For more information, see the respective method in the HighLevelRepository""" - return cls._clone(cls.GitCls(os.getcwd()), url, to_path, **kwargs) + return cls._clone(cls.GitCls(os.getcwd()), url, to_path, progress or RemoteProgress(), **kwargs) def archive(self, ostream, treeish=None, prefix=None, **kwargs): """For all args see HighLevelRepository interface diff --git a/git/db/interface.py b/git/db/interface.py index 1a22bb7d..a7502e85 100644 --- a/git/db/interface.py +++ b/git/db/interface.py @@ -304,7 +304,7 @@ class TransportDB(object): as well, these are assumed to resovle to a meaningful string though. :param refspecs: iterable of reference specifiers or RefSpec instance, identifying the references to be fetch from the remote. - :param progress: callable which receives progress messages for user consumption + :param progress: RemoteProgress derived instance which receives progress messages for user consumption or None :param kwargs: may be used for additional parameters that the actual implementation could find useful. :return: List of FetchInfo compatible instances which provide information about what @@ -746,19 +746,24 @@ class HighLevelRepository(object): of this class""" raise NotImplementedError() - def clone(self, path): + def clone(self, path, progress = None): """Create a clone from this repository. :param path: is the full path of the new repo (traditionally ends with ./.git). + :param progress: + a RemoteProgress instance or None if no progress information is required + :return: ``git.Repo`` (the newly cloned repo)""" raise NotImplementedError() @classmethod - def clone_from(cls, url, to_path): + def clone_from(cls, url, to_path, progress = None): """Create a clone from the given URL :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS :param to_path: Path to which the repository should be cloned to + :param progress: + a RemoteProgress instance or None if no progress information is required :return: instance pointing to the cloned directory with similar capabilities as this class""" raise NotImplementedError() -- cgit v1.2.3 From 9bf3fdec93fe427bb5f0bd39c986a4e977969f41 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 13:38:48 +0200 Subject: First run in order to fix the remote handling. Cleaned up interfaces and figured out that the implementation really should be specific to the git command. This leaves the interface open for other implemntations which use a different way to provide feedback (as we do not make assumptions about the format of a feedback line) --- git/db/cmd/base.py | 205 +++++++++++++++++++++++++++++++++++----------------- git/db/interface.py | 66 +++++++++++++++-- git/db/py/base.py | 5 +- 3 files changed, 204 insertions(+), 72 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index 393d9262..78adbc6e 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -14,18 +14,18 @@ from git.base import ( from git.util import ( bin_to_hex, hex_to_bin, - RemoteProgress, isfile, join_path, join, Actor, - IterableList + IterableList, ) -from git.db.interface import FetchInfo as GitdbFetchInfo -from git.db.interface import PushInfo as GitdbPushInfo from git.db.interface import ( + FetchInfo, + PushInfo, HighLevelRepository, - TransportDB + TransportDB, + RemoteProgress ) from git.cmd import Git from git.refs import ( @@ -41,8 +41,8 @@ import os import sys -__all__ = ('CmdTransportMixin', 'RemoteProgress', 'GitCommandMixin', - 'CmdObjectDBRMixin', 'CmdHighLevelRepository') +__all__ = ('CmdTransportMixin', 'GitCommandMixin', 'CmdPushInfo', 'CmdFetchInfo', + 'CmdRemoteProgress', 'CmdObjectDBRMixin', 'CmdHighLevelRepository') #{ Utilities @@ -115,13 +115,13 @@ def get_fetch_info_from_stderr(repo, proc, progress): assert len(fetch_info_lines) == len(fetch_head_info) - output.extend(FetchInfo._from_line(repo, err_line, fetch_line) + output.extend(CmdFetchInfo._from_line(repo, err_line, fetch_line) for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) finalize_process(proc) return output -def get_push_info(repo, proc, progress): +def get_push_info(repo, remotename_or_url, proc, progress): # read progress information from stderr # we hope stdout can hold all the data, it should ... # read the lines manually as it will use carriage returns between the messages @@ -131,7 +131,7 @@ def get_push_info(repo, proc, progress): output = IterableList('name') for line in proc.stdout.readlines(): try: - output.append(PushInfo._from_line(repo, line)) + output.append(CmdPushInfo._from_line(repo, remotename_or_url, line)) except ValueError: # if an error happens, additional info is given which we cannot parse pass @@ -143,37 +143,119 @@ def get_push_info(repo, proc, progress): #} END utilities -class PushInfo(GitdbPushInfo): +class CmdRemoteProgress(RemoteProgress): """ - Carries information about the result of a push operation of a single head:: - - info = remote.push()[0] - info.flags # bitflags providing more information about the result - info.local_ref # Reference pointing to the local reference that was pushed - # It is None if the ref was deleted. - info.remote_ref_string # path to the remote reference located on the remote side - info.remote_ref # Remote Reference on the local side corresponding to - # the remote_ref_string. It can be a TagReference as well. - info.old_commit_binsha # binary sha at which the remote_ref was standing before we pushed - # it to local_ref.commit. Will be None if an error was indicated - info.summary # summary line providing human readable english text about the push - """ - __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', '_remote', 'summary') + A Remote progress implementation taking a user derived progress to call the + respective methods on. + """ + __slots__ = ("_seen_ops", '_progress') + re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") + re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") + + def __init__(self, progress_instance = None): + self._seen_ops = list() + if progress_instance is None: + progress_instance = RemoteProgress() + #END assure proper instance + self._progress = progress_instance + + def _parse_progress_line(self, line): + """Parse progress information from the given line as retrieved by git-push + or git-fetch + + Call the own update(), __call__() and line_dropped() methods according + to the parsed result. + + :return: list(line, ...) list of lines that could not be processed""" + # handle + # Counting objects: 4, done. + # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. + sub_lines = line.split('\r') + failed_lines = list() + for sline in sub_lines: + # find esacpe characters and cut them away - regex will not work with + # them as they are non-ascii. As git might expect a tty, it will send them + last_valid_index = None + for i,c in enumerate(reversed(sline)): + if ord(c) < 32: + # its a slice index + last_valid_index = -i-1 + # END character was non-ascii + # END for each character in sline + if last_valid_index is not None: + sline = sline[:last_valid_index] + # END cut away invalid part + sline = sline.rstrip() + + cur_count, max_count = None, None + match = self.re_op_relative.match(sline) + if match is None: + match = self.re_op_absolute.match(sline) + + if not match: + self._progress.line_dropped(sline) + failed_lines.append(sline) + continue + # END could not get match + + op_code = 0 + remote, op_name, percent, cur_count, max_count, message = match.groups() + + # get operation id + if op_name == "Counting objects": + op_code |= self.COUNTING + elif op_name == "Compressing objects": + op_code |= self.COMPRESSING + elif op_name == "Writing objects": + op_code |= self.WRITING + else: + raise ValueError("Operation name %r unknown" % op_name) + + # figure out stage + if op_code not in self._seen_ops: + self._seen_ops.append(op_code) + op_code |= self.BEGIN + # END begin opcode + + if message is None: + message = '' + # END message handling + + message = message.strip() + done_token = ', done.' + if message.endswith(done_token): + op_code |= self.END + message = message[:-len(done_token)] + # END end message handling + + self._progress.update(op_code, cur_count, max_count, message, line) + self._progress(message, line) + # END for each sub line + return failed_lines + + +class CmdPushInfo(PushInfo): + """ + Pure Python implementation of a PushInfo interface + """ + __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha', + '_remotename_or_url', 'repo', 'summary') - _flag_map = { 'X' : GitdbPushInfo.NO_MATCH, - '-' : GitdbPushInfo.DELETED, '*' : 0, - '+' : GitdbPushInfo.FORCED_UPDATE, - ' ' : GitdbPushInfo.FAST_FORWARD, - '=' : GitdbPushInfo.UP_TO_DATE, - '!' : GitdbPushInfo.ERROR } + _flag_map = { 'X' : PushInfo.NO_MATCH, + '-' : PushInfo.DELETED, '*' : 0, + '+' : PushInfo.FORCED_UPDATE, + ' ' : PushInfo.FAST_FORWARD, + '=' : PushInfo.UP_TO_DATE, + '!' : PushInfo.ERROR } - def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit_binsha=None, + def __init__(self, flags, local_ref, remote_ref_string, repo, remotename_or_url, old_commit_binsha=None, summary=''): """ Initialize a new instance """ self.flags = flags self.local_ref = local_ref + self.repo = repo self.remote_ref_string = remote_ref_string - self._remote = remote + self._remotename_or_url = remotename_or_url self.old_commit_binsha = old_commit_binsha self.summary = summary @@ -185,16 +267,20 @@ class PushInfo(GitdbPushInfo): to the remote_ref_string kept in this instance.""" # translate heads to a local remote, tags stay as they are if self.remote_ref_string.startswith("refs/tags"): - return TagReference(self._remote.repo, self.remote_ref_string) + return TagReference(self.repo, self.remote_ref_string) elif self.remote_ref_string.startswith("refs/heads"): - remote_ref = Reference(self._remote.repo, self.remote_ref_string) - return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) + remote_ref = Reference(self.repo, self.remote_ref_string) + if '/' in self._remotename_or_url: + sys.stderr.write("Cannot provide RemoteReference instance if it was created from a url instead of of a remote name: %s. Returning Reference instance instead" % sefl._remotename_or_url) + return remote_ref + #END assert correct input + return RemoteReference(self.repo, "refs/remotes/%s/%s" % (str(self._remotename_or_url), remote_ref.name)) else: raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) # END @classmethod - def _from_line(cls, remote, line): + def _from_line(cls, repo, remotename_or_url, line): """Create a new PushInfo instance as parsed from line which is expected to be like refs/heads/master:refs/heads/master 05d2687..1d0568e""" control_character, from_to, summary = line.split('\t', 3) @@ -212,7 +298,7 @@ class PushInfo(GitdbPushInfo): if flags & cls.DELETED: from_ref = None else: - from_ref = Reference.from_path(remote.repo, from_ref_string) + from_ref = Reference.from_path(repo, from_ref_string) # commit handling, could be message or commit info old_commit_binsha = None @@ -237,38 +323,27 @@ class PushInfo(GitdbPushInfo): if control_character == " ": split_token = ".." old_sha, new_sha = summary.split(' ')[0].split(split_token) - # have to use constructor here as the sha usually is abbreviated - old_commit_binsha = remote.repo.commit(old_sha) + old_commit_binsha = repo.resolve(old_sha) # END message handling - return PushInfo(flags, from_ref, to_ref_string, remote, old_commit_binsha, summary) + return cls(flags, from_ref, to_ref_string, repo, remotename_or_url, old_commit_binsha, summary) -class FetchInfo(GitdbFetchInfo): +class CmdFetchInfo(FetchInfo): """ - Carries information about the results of a fetch operation of a single head:: - - info = remote.fetch()[0] - info.ref # Symbolic Reference or RemoteReference to the changed - # remote head or FETCH_HEAD - info.flags # additional flags to be & with enumeration members, - # i.e. info.flags & info.REJECTED - # is 0 if ref is FETCH_HEAD - info.note # additional notes given by git-fetch intended for the user - info.old_commit_binsha # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref, otherwise None + Pure python implementation of a FetchInfo interface """ __slots__ = ('ref','old_commit_binsha', 'flags', 'note') # %c %-*s %-*s -> %s (%s) re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") - _flag_map = { '!' : GitdbFetchInfo.ERROR, - '+' : GitdbFetchInfo.FORCED_UPDATE, - '-' : GitdbFetchInfo.TAG_UPDATE, + _flag_map = { '!' : FetchInfo.ERROR, + '+' : FetchInfo.FORCED_UPDATE, + '-' : FetchInfo.TAG_UPDATE, '*' : 0, - '=' : GitdbFetchInfo.HEAD_UPTODATE, - ' ' : GitdbFetchInfo.FAST_FORWARD } + '=' : FetchInfo.HEAD_UPTODATE, + ' ' : FetchInfo.FAST_FORWARD } def __init__(self, ref, flags, note = '', old_commit_binsha = None): """ @@ -295,7 +370,7 @@ class FetchInfo(GitdbFetchInfo): @classmethod def _from_line(cls, repo, line, fetch_line): """Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. + and return a new CmdFetchInfo object representing this information. We can handle a line as follows "%c %-*s %-*s -> %s%s" @@ -366,7 +441,7 @@ class FetchInfo(GitdbFetchInfo): split_token = '...' if control_character == ' ': split_token = split_token[:-1] - old_commit_binsha = repo.rev_parse(operation.split(split_token)[0]) + old_commit_binsha = repo.resolve(operation.split(split_token)[0]) # END handle refspec # END reference flag handling @@ -443,7 +518,7 @@ class CmdTransportMixin(TransportDB): :param progress: RemoteProgress derived instance or None :param **kwargs: Additional arguments to be passed to the git-push process""" proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) - return get_push_info(self, proc, progress or RemoteProgress()) + return get_push_info(self, url, proc, CmdRemoteProgress(progress)) def pull(self, url, refspecs=None, progress=None, **kwargs): """Fetch and merge the given refspecs. @@ -453,7 +528,7 @@ class CmdTransportMixin(TransportDB): :param refspecs: see push() :param progress: see push()""" proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) - return get_fetch_info_from_stderr(self, proc, progress or RemoteProgress()) + return get_fetch_info_from_stderr(self, proc, CmdRemoteProgress(progress)) def fetch(self, url, refspecs=None, progress=None, **kwargs): """Fetch the latest changes @@ -461,7 +536,7 @@ class CmdTransportMixin(TransportDB): :param refspecs: see push() :param progress: see push()""" proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) - return get_fetch_info_from_stderr(self, proc, progress or RemoteProgress()) + return get_fetch_info_from_stderr(self, proc, CmdRemoteProgress(progress)) #} end transport db interface @@ -699,14 +774,14 @@ class CmdHighLevelRepository(HighLevelRepository): All remaining keyword arguments are given to the git-clone command For more information, see the respective method in HighLevelRepository""" - return self._clone(self.git, self.git_dir, path, progress or RemoteProgress(), **kwargs) + return self._clone(self.git, self.git_dir, path, CmdRemoteProgress(progress), **kwargs) @classmethod def clone_from(cls, url, to_path, progress = None, **kwargs): """ :param kwargs: see the ``clone`` method For more information, see the respective method in the HighLevelRepository""" - return cls._clone(cls.GitCls(os.getcwd()), url, to_path, progress or RemoteProgress(), **kwargs) + return cls._clone(cls.GitCls(os.getcwd()), url, to_path, CmdRemoteProgress(progress), **kwargs) def archive(self, ostream, treeish=None, prefix=None, **kwargs): """For all args see HighLevelRepository interface diff --git a/git/db/interface.py b/git/db/interface.py index a7502e85..30b0c7c1 100644 --- a/git/db/interface.py +++ b/git/db/interface.py @@ -238,6 +238,64 @@ class RefSpec(object): return self.source is None +class RemoteProgress(object): + """ + Handler providing an interface to parse progress information emitted by git-push + and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. + + Subclasses should derive from this type. + """ + _num_op_codes = 5 + BEGIN, END, COUNTING, COMPRESSING, WRITING = [1 << x for x in range(_num_op_codes)] + STAGE_MASK = BEGIN|END + OP_MASK = ~STAGE_MASK + + #{ Subclass Interface + + def line_dropped(self, line): + """Called whenever a line could not be understood and was therefore dropped.""" + pass + + def update(self, op_code, cur_count, max_count=None, message='', input=''): + """Called whenever the progress changes + + :param op_code: + Integer allowing to be compared against Operation IDs and stage IDs. + + Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation + ID as well as END. It may be that BEGIN and END are set at once in case only + one progress message was emitted due to the speed of the operation. + Between BEGIN and END, none of these flags will be set + + Operation IDs are all held within the OP_MASK. Only one Operation ID will + be active per call. + :param cur_count: Current absolute count of items + + :param max_count: + The maximum count of items we expect. It may be None in case there is + no maximum number of items or if it is (yet) unknown. + + :param message: + In case of the 'WRITING' operation, it contains the amount of bytes + transferred. It may possibly be used for other purposes as well. + + :param input: + The actual input string that was used to parse the information from. + This is usually a line from the output of git-fetch, but really + depends on the implementation + + You may read the contents of the current line in self._cur_line""" + pass + + def __call__(self, message, input=''): + """Same as update, but with a simpler interface which only provides the + message of the operation. + :note: This method will be called in addition to the update method. It is + up to you which one you implement""" + pass + #} END subclass interface + + class PushInfo(object): """A type presenting information about the result of a push operation for exactly one refspec @@ -248,7 +306,7 @@ class PushInfo(object): remote_ref_string # path to the remote reference located on the remote side remote_ref # Remote Reference on the local side corresponding to # the remote_ref_string. It can be a TagReference as well. - old_commit # commit at which the remote_ref was standing before we pushed + old_commit_binsha # binary sha to commit at which the remote_ref was standing before we pushed # it to local_ref.commit. Will be None if an error was indicated summary # summary line providing human readable english text about the push """ @@ -269,10 +327,8 @@ class FetchInfo(object): # i.e. info.flags & info.REJECTED # is 0 if ref is FETCH_HEAD note # additional notes given by the fetch-pack implementation intended for the user - old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref as hexsha or None - # Implementors may use their own type too, but it should decay into a - # string of its hexadecimal sha representation""" + old_commit_binsha# if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref as binary sha or None""" __slots__ = tuple() NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ diff --git a/git/db/py/base.py b/git/db/py/base.py index a2c9a4ef..2fdbd202 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -18,7 +18,7 @@ from git.util import ( expandvars, expanduser, exists, - is_git_dir + is_git_dir, ) from git.index import IndexFile @@ -40,7 +40,7 @@ import os __all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin', 'PureIndexDB') - + class PureObjectDBR(ObjectDBR): @@ -471,3 +471,4 @@ class PureAlternatesFileMixin(object): alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") #} END interface + -- cgit v1.2.3 From 410cf1ec4a9906ffe7eb96b4aa559ce4dd2962d4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 19:02:56 +0200 Subject: The --progress flag will now automatically be used if possible when doing any push or fetch operation --- git/db/cmd/base.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index 78adbc6e..cbf4e29b 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -141,6 +141,16 @@ def get_push_info(repo, remotename_or_url, proc, progress): finalize_process(proc) return output +def add_progress(kwargs, git): + """Add the --progress flag to the given kwargs dict if supported by the + git command + :return: possibly altered kwargs""" + v = git.version_info + if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3: + kwargs['progress'] = True + #END handle --progress + return kwargs + #} END utilities class CmdRemoteProgress(RemoteProgress): @@ -517,7 +527,7 @@ class CmdTransportMixin(TransportDB): :param refspecs: single string, RefSpec instance or list of such or None. :param progress: RemoteProgress derived instance or None :param **kwargs: Additional arguments to be passed to the git-push process""" - proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **kwargs) + proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **add_progress(kwargs, self.git)) return get_push_info(self, url, proc, CmdRemoteProgress(progress)) def pull(self, url, refspecs=None, progress=None, **kwargs): @@ -527,7 +537,7 @@ class CmdTransportMixin(TransportDB): :param url: may be a remote name or a url :param refspecs: see push() :param progress: see push()""" - proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) + proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git)) return get_fetch_info_from_stderr(self, proc, CmdRemoteProgress(progress)) def fetch(self, url, refspecs=None, progress=None, **kwargs): @@ -535,7 +545,7 @@ class CmdTransportMixin(TransportDB): :param url: may be a remote name or a url :param refspecs: see push() :param progress: see push()""" - proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **kwargs) + proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git)) return get_fetch_info_from_stderr(self, proc, CmdRemoteProgress(progress)) #} end transport db interface @@ -740,7 +750,7 @@ class CmdHighLevelRepository(HighLevelRepository): # END windows handling try: - proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **kwargs) + proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git)) if progress is not None: digest_process_messages(proc.stderr, progress) #END digest progress messages -- cgit v1.2.3 From f6897c78be5a5530129df50742cb6cabfb8609c9 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 20:06:33 +0200 Subject: Added additional opcodes to remote progress to make it compatible to newer git versions. This bug existed for quite a while but didn't show up as progress wasn't sent most of the time. All methods that could use a progress will only activate it if a progress is actually given --- git/db/cmd/base.py | 36 +++++++++++++++++++++++------------- git/db/interface.py | 4 ++-- 2 files changed, 25 insertions(+), 15 deletions(-) (limited to 'git/db') diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py index cbf4e29b..ef22c931 100644 --- a/git/db/cmd/base.py +++ b/git/db/cmd/base.py @@ -141,14 +141,17 @@ def get_push_info(repo, remotename_or_url, proc, progress): finalize_process(proc) return output -def add_progress(kwargs, git): +def add_progress(kwargs, git, progress): """Add the --progress flag to the given kwargs dict if supported by the - git command + git command. If the actual progress in the given progress instance is not + given, we do not request any progress :return: possibly altered kwargs""" - v = git.version_info - if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3: - kwargs['progress'] = True - #END handle --progress + if progress._progress is not None: + v = git.version_info + if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3: + kwargs['progress'] = True + #END handle --progress + #END handle progress return kwargs #} END utilities @@ -218,6 +221,10 @@ class CmdRemoteProgress(RemoteProgress): op_code |= self.COMPRESSING elif op_name == "Writing objects": op_code |= self.WRITING + elif op_name == "Receiving objects": + op_code |= self.RECEIVING + elif op_name == "Resolving deltas": + op_code |= self.RESOLVING else: raise ValueError("Operation name %r unknown" % op_name) @@ -527,8 +534,9 @@ class CmdTransportMixin(TransportDB): :param refspecs: single string, RefSpec instance or list of such or None. :param progress: RemoteProgress derived instance or None :param **kwargs: Additional arguments to be passed to the git-push process""" - proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **add_progress(kwargs, self.git)) - return get_push_info(self, url, proc, CmdRemoteProgress(progress)) + progress = CmdRemoteProgress(progress) + proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **add_progress(kwargs, self.git, progress)) + return get_push_info(self, url, proc, progress) def pull(self, url, refspecs=None, progress=None, **kwargs): """Fetch and merge the given refspecs. @@ -537,16 +545,18 @@ class CmdTransportMixin(TransportDB): :param url: may be a remote name or a url :param refspecs: see push() :param progress: see push()""" - proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git)) - return get_fetch_info_from_stderr(self, proc, CmdRemoteProgress(progress)) + progress = CmdRemoteProgress(progress) + proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress)) + return get_fetch_info_from_stderr(self, proc, progress) def fetch(self, url, refspecs=None, progress=None, **kwargs): """Fetch the latest changes :param url: may be a remote name or a url :param refspecs: see push() :param progress: see push()""" - proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git)) - return get_fetch_info_from_stderr(self, proc, CmdRemoteProgress(progress)) + progress = CmdRemoteProgress(progress) + proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress)) + return get_fetch_info_from_stderr(self, proc, progress) #} end transport db interface @@ -750,7 +760,7 @@ class CmdHighLevelRepository(HighLevelRepository): # END windows handling try: - proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git)) + proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) if progress is not None: digest_process_messages(proc.stderr, progress) #END digest progress messages diff --git a/git/db/interface.py b/git/db/interface.py index 30b0c7c1..a4c05265 100644 --- a/git/db/interface.py +++ b/git/db/interface.py @@ -245,8 +245,8 @@ class RemoteProgress(object): Subclasses should derive from this type. """ - _num_op_codes = 5 - BEGIN, END, COUNTING, COMPRESSING, WRITING = [1 << x for x in range(_num_op_codes)] + _num_op_codes = 7 + BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)] STAGE_MASK = BEGIN|END OP_MASK = ~STAGE_MASK -- cgit v1.2.3