From a12a7618a1f6f61a4c97ddf4cc422158c3fa72ba Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2011 20:17:00 +0200 Subject: Updated objects to use the ones defined in gitdb as basis. Only the submodule implementation is left in git-python as it requires some advanced features. No tests where run yet --- git/objects/base.py | 166 +------------------------ git/objects/blob.py | 25 +--- git/objects/commit.py | 267 ++--------------------------------------- git/objects/fun.py | 197 ------------------------------ git/objects/submodule/base.py | 4 +- git/objects/tag.py | 71 +---------- git/objects/tree.py | 273 ++---------------------------------------- 7 files changed, 34 insertions(+), 969 deletions(-) (limited to 'git/objects') diff --git a/git/objects/base.py b/git/objects/base.py index 5f2f7809..42d7b600 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -3,170 +3,6 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import LazyMixin, join_path_native, stream_copy -from util import get_object_type_by_name -from gitdb.util import ( - hex_to_bin, - bin_to_hex, - basename - ) - -import gitdb.typ as dbtyp - -_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" - +from gitdb.object.base import Object, IndexObject __all__ = ("Object", "IndexObject") -class Object(LazyMixin): - """Implements an Object which may be Blobs, Trees, Commits and Tags""" - NULL_HEX_SHA = '0'*40 - NULL_BIN_SHA = '\0'*20 - - TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) - __slots__ = ("repo", "binsha", "size" ) - type = None # to be set by subclass - - def __init__(self, repo, binsha): - """Initialize an object by identifying it by its binary sha. - All keyword arguments will be set on demand if None. - - :param repo: repository this object is located in - - :param binsha: 20 byte SHA1""" - super(Object,self).__init__() - self.repo = repo - self.binsha = binsha - assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) - - @classmethod - def new(cls, repo, id): - """ - :return: New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a binsha even though - the input id may have been a Reference or Rev-Spec - - :param id: reference, rev-spec, or hexsha - - :note: This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a binsha.""" - return repo.rev_parse(str(id)) - - @classmethod - def new_from_sha(cls, repo, sha1): - """ - :return: new object instance of a type appropriate to represent the given - binary sha1 - :param sha1: 20 byte binary sha1""" - if sha1 == cls.NULL_BIN_SHA: - # the NULL binsha is always the root commit - return get_object_type_by_name('commit')(repo, sha1) - #END handle special case - oinfo = repo.odb.info(sha1) - inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) - inst.size = oinfo.size - return inst - - def _set_cache_(self, attr): - """Retrieve object information""" - if attr == "size": - oinfo = self.repo.odb.info(self.binsha) - self.size = oinfo.size - # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """:return: True if the objects have the same SHA1""" - return self.binsha == other.binsha - - def __ne__(self, other): - """:return: True if the objects do not have the same SHA1 """ - return self.binsha != other.binsha - - def __hash__(self): - """:return: Hash of our id allowing objects to be used in dicts and sets""" - return hash(self.binsha) - - def __str__(self): - """:return: string of our SHA1 as understood by all git commands""" - return bin_to_hex(self.binsha) - - def __repr__(self): - """:return: string with pythonic representation of our object""" - return '' % (self.__class__.__name__, self.hexsha) - - @property - def hexsha(self): - """:return: 40 byte hex version of our 20 byte binary sha""" - return bin_to_hex(self.binsha) - - @property - def data_stream(self): - """ :return: File Object compatible stream to the uncompressed raw data of the object - :note: returned streams must be read in order""" - return self.repo.odb.stream(self.binsha) - - def stream_data(self, ostream): - """Writes our data directly to the given output stream - :param ostream: File object compatible stream object. - :return: self""" - istream = self.repo.odb.stream(self.binsha) - stream_copy(istream, ostream) - return self - - -class IndexObject(Object): - """Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects""" - __slots__ = ("path", "mode") - - # for compatability with iterable lists - _id_attribute_ = 'path' - - def __init__(self, repo, binsha, mode=None, path=None): - """Initialize a newly instanced IndexObject - :param repo: is the Repo we are located in - :param binsha: 20 byte sha1 - :param mode: is the stat compatible file mode as int, use the stat module - to evaluate the infomration - :param path: - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - :note: - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree.""" - super(IndexObject, self).__init__(repo, binsha) - if mode is not None: - self.mode = mode - if path is not None: - self.path = path - - def __hash__(self): - """:return: - Hash of our path as index items are uniquely identifyable by path, not - by their data !""" - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - # END hanlde slot attribute - - @property - def name(self): - """:return: Name portion of the path, effectively being the basename""" - return basename(self.path) - - @property - def abspath(self): - """ - :return: - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. """ - return join_path_native(self.repo.working_tree_dir, self.path) - diff --git a/git/objects/blob.py b/git/objects/blob.py index f52d1a53..38834436 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -4,29 +4,10 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from mimetypes import guess_type -import base +from git.util import RepoAliasMixin +from gitdb.object.blob import Blob as GitDB_Blob __all__ = ('Blob', ) -class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" - - # valid blob modes - executable_mode = 0100755 - file_mode = 0100644 - link_mode = 0120000 - +class Blob(GitDB_Blob, RepoAliasMixin): __slots__ = tuple() - - @property - def mime_type(self): - """ - :return: String describing the mime type of this file (based on the filename) - :note: Defaults to 'text/plain' in case the actual file type is unknown. """ - guesses = None - if self.path: - guesses = guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/git/objects/commit.py b/git/objects/commit.py index fd4187b0..d932ab1a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -3,142 +3,28 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from git.util import ( - Actor, - Iterable, - Stats, - ) +from git.util import RepoAliasMixin +from gitdb.object.commit import Commit as GitDB_Commit from git.diff import Diffable -from tree import Tree +from gitdb.util import ( + Iterable, + Actor + ) + from gitdb import IStream from cStringIO import StringIO -import base -from gitdb.util import ( - hex_to_bin - ) -from util import ( - Traversable, - Serializable, - parse_date, - altz_to_utctz_str, - parse_actor_and_date - ) -from time import ( - time, - altzone - ) +from util import parse_date +from time import altzone + import os -import sys __all__ = ('Commit', ) -class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): - """Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary.""" - - # ENVIRONMENT VARIABLES - # read when creating new commits - env_author_date = "GIT_AUTHOR_DATE" - env_committer_date = "GIT_COMMITTER_DATE" - - # CONFIGURATION KEYS - conf_encoding = 'i18n.commitencoding' - - # INVARIANTS - default_encoding = "UTF-8" +class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): + """Provides additional git-command based functionality to the default gitdb commit object""" + __slots__ = tuple() - - # object configuration - type = "commit" - __slots__ = ("tree", - "author", "authored_date", "author_tz_offset", - "committer", "committed_date", "committer_tz_offset", - "message", "parents", "encoding") - _id_attribute_ = "binsha" - - def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, - committer=None, committed_date=None, committer_tz_offset=None, - message=None, parents=None, encoding=None): - """Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set on first query. - - :param binsha: 20 byte sha1 - :param parents: tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - :param tree: Tree - Tree object - :param author: Actor - is the author string ( will be implicitly converted into an Actor object ) - :param authored_date: int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - :param author_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param committer: Actor - is the committer string - :param committed_date: int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - :param committer_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param message: string - is the commit message - :param encoding: string - encoding of the message, defaults to UTF-8 - :param parents: - List or tuple of Commit objects which are our parent(s) in the commit - dependency graph - :return: git.Commit - - :note: Timezone information is in the same format and in the same sign - as what time.altzone returns. The sign is inverted compared to git's - UTC timezone.""" - super(Commit,self).__init__(repo, binsha) - if tree is not None: - assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) - if tree is not None: - self.tree = tree - if author is not None: - self.author = author - if authored_date is not None: - self.authored_date = authored_date - if author_tz_offset is not None: - self.author_tz_offset = author_tz_offset - if committer is not None: - self.committer = committer - if committed_date is not None: - self.committed_date = committed_date - if committer_tz_offset is not None: - self.committer_tz_offset = committer_tz_offset - if message is not None: - self.message = message - if parents is not None: - self.parents = parents - if encoding is not None: - self.encoding = encoding - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents - - def _set_cache_(self, attr): - if attr in Commit.__slots__: - # read the data in a chunk, its faster - then provide a file wrapper - binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) - else: - super(Commit, self)._set_cache_(attr) - # END handle attrs - - @property - def summary(self): - """:return: First line of the commit message""" - return self.message.split('\n', 1)[0] - def count(self, paths='', **kwargs): """Count the number of commits reachable from this commit @@ -225,33 +111,6 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) return Stats._list_from_string(self.repo, text) - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): - """Parse out commit information into a list of Commit objects - We expect one-line per commit, and parse the actual commit information directly - from our lighting fast object database - - :param proc: git-rev-list process instance - one sha per line - :return: iterator returning Commit objects""" - stream = proc_or_stream - if not hasattr(stream,'readline'): - stream = proc_or_stream.stdout - - readline = stream.readline - while True: - line = readline() - if not line: - break - hexsha = line.strip() - if len(hexsha) > 40: - # split additional information, as returned by bisect for instance - hexsha, rest = line.split(None, 1) - # END handle extra info - - assert len(hexsha) == 40, "Invalid line: %s" % hexsha - yield Commit(repo, hex_to_bin(hexsha)) - # END for each line in stream - @classmethod def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): @@ -361,105 +220,5 @@ class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): # END advance head handling return new_commit - - #{ Serializable Implementation - - def _serialize(self, stream): - write = stream.write - write("tree %s\n" % self.tree) - for p in self.parents: - write("parent %s\n" % p) - - a = self.author - aname = a.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - - c = self.committer - fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", aname, a.email, - self.authored_date, - altz_to_utctz_str(self.author_tz_offset))) - - # encode committer - aname = c.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - write(fmt % ("committer", aname, c.email, - self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))) - - if self.encoding != self.default_encoding: - write("encoding %s\n" % self.encoding) - - write("\n") - - # write plain bytes, be sure its encoded according to our encoding - if isinstance(self.message, unicode): - write(self.message.encode(self.encoding)) - else: - write(self.message) - # END handle encoding - return self - - def _deserialize(self, stream): - """:param from_rev_list: if true, the stream format is coming from the rev-list command - Otherwise it is assumed to be a plain data stream from our object""" - readline = stream.readline - self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') - - self.parents = list() - next_line = None - while True: - parent_line = readline() - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) - # END for each parent line - self.parents = tuple(self.parents) - - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) - - - # now we can have the encoding line, or an empty line followed by the optional - # message. - self.encoding = self.default_encoding - # read encoding or empty line to separate message - enc = readline() - enc = enc.strip() - if enc: - self.encoding = enc[enc.find(' ')+1:] - # now comes the message separator - readline() - # END handle encoding - - # decode the authors name - try: - self.author.name = self.author.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) - # END handle author's encoding - - # decode committer name - try: - self.committer.name = self.committer.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) - # END handle author's encoding - - # a stream from our data simply gives us the plain message - # The end of our message stream is marked with a newline that we strip - self.message = stream.read() - try: - self.message = self.message.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) - # END exception handling - return self #} END serializable implementation diff --git a/git/objects/fun.py b/git/objects/fun.py index 9b0a377c..22016b27 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,199 +1,2 @@ """Module with functions which are supposed to be as fast as possible""" -from stat import S_ISDIR -__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', - 'traverse_tree_recursive') - - - - -def tree_to_stream(entries, write): - """Write the give list of entries into a stream using its write method - :param entries: **sorted** list of tuples with (binsha, mode, name) - :param write: write method which takes a data string""" - ord_zero = ord('0') - bit_mask = 7 # 3 bits set - - for binsha, mode, name in entries: - mode_str = '' - for i in xrange(6): - mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str - # END for each 8 octal value - - # git slices away the first octal if its zero - if mode_str[0] == '0': - mode_str = mode_str[1:] - # END save a byte - - # here it comes: if the name is actually unicode, the replacement below - # will not work as the binsha is not part of the ascii unicode encoding - - # hence we must convert to an utf8 string for it to work properly. - # According to my tests, this is exactly what git does, that is it just - # takes the input literally, which appears to be utf8 on linux. - if isinstance(name, unicode): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) - # END for each item - - -def tree_entries_from_data(data): - """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data - :return: list(tuple(binsha, mode, tree_relative_path), ...)""" - ord_zero = ord('0') - len_data = len(data) - i = 0 - out = list() - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - - # default encoding for strings in git is utf8 - # Only use the respective unicode object if the byte stream was encoded - name = data[ns:i] - name_enc = name.decode("utf-8") - if len(name) > len(name_enc): - name = name_enc - # END handle encoding - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - out.append((sha, mode, name)) - # END for each byte in data stream - return out - - -def _find_by_name(tree_data, name, is_dir, start_at): - """return data entry matching the given name and tree mode - or None. - Before the item is returned, the respective data item is set - None in the tree_data list to mark it done""" - try: - item = tree_data[start_at] - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[start_at] = None - return item - except IndexError: - pass - # END exception handling - for index, item in enumerate(tree_data): - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[index] = None - return item - # END if item matches - # END for each item - return None - -def _to_full_path(item, path_prefix): - """Rebuild entry with given path prefix""" - if not item: - return item - return (item[0], item[1], path_prefix+item[2]) - -def traverse_trees_recursive(odb, tree_shas, path_prefix): - """ - :return: list with entries according to the given binary tree-shas. - The result is encoded in a list - of n tuple|None per blob/commit, (n == len(tree_shas)), where - * [0] == 20 byte sha - * [1] == mode as int - * [2] == path relative to working tree root - The entry tuple is None if the respective blob/commit did not - exist in the given tree. - :param tree_shas: iterable of shas pointing to trees. All trees must - be on the same level. A tree-sha may be None in which case None - :param path_prefix: a prefix to be added to the returned paths on this level, - set it '' for the first iteration - :note: The ordering of the returned items will be partially lost""" - trees_data = list() - nt = len(tree_shas) - for tree_sha in tree_shas: - if tree_sha is None: - data = list() - else: - data = tree_entries_from_data(odb.stream(tree_sha).read()) - # END handle muted trees - trees_data.append(data) - # END for each sha to get data for - - out = list() - out_append = out.append - - # find all matching entries and recursively process them together if the match - # is a tree. If the match is a non-tree item, put it into the result. - # Processed items will be set None - for ti, tree_data in enumerate(trees_data): - for ii, item in enumerate(tree_data): - if not item: - continue - # END skip already done items - entries = [ None for n in range(nt) ] - entries[ti] = item - sha, mode, name = item # its faster to unpack - is_dir = S_ISDIR(mode) # type mode bits - - # find this item in all other tree data items - # wrap around, but stop one before our current index, hence - # ti+nt, not ti+1+nt - for tio in range(ti+1, ti+nt): - tio = tio % nt - entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) - # END for each other item data - - # if we are a directory, enter recursion - if is_dir: - out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) - else: - out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) - # END handle recursion - - # finally mark it done - tree_data[ii] = None - # END for each item - - # we are done with one tree, set all its data empty - del(tree_data[:]) - # END for each tree_data chunk - return out - -def traverse_tree_recursive(odb, tree_sha, path_prefix): - """ - :return: list of entries of the tree pointed to by the binary tree_sha. An entry - has the following format: - * [0] 20 byte sha - * [1] mode as int - * [2] path relative to the repository - :param path_prefix: prefix to prepend to the front of all returned paths""" - entries = list() - data = tree_entries_from_data(odb.stream(tree_sha).read()) - - # unpacking/packing is faster than accessing individual items - for sha, mode, name in data: - if S_ISDIR(mode): - entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) - else: - entries.append((sha, mode, path_prefix+name)) - # END for each item - - return entries diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 2160299b..7997e5e5 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,3 +1,5 @@ +from git.util import RepoAliasMixin +from gitdb.object.submodule import Submodule as GitDB_Submodule import util from util import ( mkhead, @@ -53,7 +55,7 @@ UPDWKTREE = UpdateProgress.UPDWKTREE # IndexObject comes via util module, its a 'hacky' fix thanks to pythons import # mechanism which cause plenty of trouble of the only reason for packages and # modules is refactoring - subpackages shoudn't depend on parent packages -class Submodule(util.IndexObject, Iterable, Traversable): +class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin): """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. diff --git a/git/objects/tag.py b/git/objects/tag.py index c7d02abe..a3a85eef 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -4,73 +4,10 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all object based types. """ -import base -from gitdb.util import hex_to_bin -from util import ( - get_object_type_by_name, - parse_actor_and_date - ) - +from git.util import RepoAliasMixin +from gitdb.object.tag import GitDB_TagObject __all__ = ("TagObject", ) -class TagObject(base.Object): +class TagObject(GitDB_TagObject, RepoAliasMixin): """Non-Lightweight tag carrying additional information about an object we are pointing to.""" - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) - - def __init__(self, repo, binsha, object=None, tag=None, - tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): - """Initialize a tag object with additional data - - :param repo: repository this object is located in - :param binsha: 20 byte SHA1 - :param object: Object instance of object we are pointing to - :param tag: name of this tag - :param tagger: Actor identifying the tagger - :param tagged_date: int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the - authored_date is in, in a format similar to time.altzone""" - super(TagObject, self).__init__(repo, binsha ) - if object is not None: - self.object = object - if tag is not None: - self.tag = tag - if tagger is not None: - self.tagger = tagger - if tagged_date is not None: - self.tagged_date = tagged_date - if tagger_tz_offset is not None: - self.tagger_tz_offset = tagger_tz_offset - if message is not None: - self.message = message - - def _set_cache_(self, attr): - """Cache all our attributes at once""" - if attr in TagObject.__slots__: - ostream = self.repo.odb.stream(self.binsha) - lines = ostream.read().splitlines() - - obj, hexsha = lines[0].split(" ") # object - type_token, type_name = lines[1].split(" ") # type - self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) - - self.tag = lines[2][4:] # tag - - tagger_info = lines[3][7:]# tagger - self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) - - # line 4 empty - it could mark the beginning of the next header - # in case there really is no message, it would not exist. Otherwise - # a newline separates header from message - if len(lines) > 5: - self.message = "\n".join(lines[5:]) - else: - self.message = '' - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - - + __slots__ = tuple() diff --git a/git/objects/tree.py b/git/objects/tree.py index 67431686..23e1dfe4 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -3,278 +3,25 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -import util -from base import IndexObject -from git.util import join_path -from blob import Blob -from submodule.base import Submodule +from git.util import RepoAliasMixin +from gitdb.object.tree import GitDB_Tree, TreeModifier import git.diff as diff -from fun import ( - tree_entries_from_data, - tree_to_stream - ) - -from gitdb.util import ( - to_bin_sha, - ) +from submodule.base import Submodule __all__ = ("TreeModifier", "Tree") -class TreeModifier(object): - """A utility class providing methods to alter the underlying cache in a list-like fashion. - - Once all adjustments are complete, the _cache, which really is a refernce to - the cache of a tree, will be sorted. Assuring it will be in a serializable state""" - __slots__ = '_cache' - - def __init__(self, cache): - self._cache = cache - - def _index_by_name(self, name): - """:return: index of an item with name, or -1 if not found""" - for i, t in enumerate(self._cache): - if t[2] == name: - return i - # END found item - # END for each item in cache - return -1 - - #{ Interface - def set_done(self): - """Call this method once you are done modifying the tree information. - It may be called several times, but be aware that each call will cause - a sort operation - :return self:""" - self._cache.sort(key=lambda t: t[2]) # sort by name - return self - #} END interface - - #{ Mutators - def add(self, sha, mode, name, force=False): - """Add the given item to the tree. If an item with the given name already - exists, nothing will be done, but a ValueError will be raised if the - sha and mode of the existing item do not match the one you add, unless - force is True - - :param sha: The 20 or 40 byte sha of the item to add - :param mode: int representing the stat compatible mode of the item - :param force: If True, an item with your name and information will overwrite - any existing item with the same name, no matter which information it has - :return: self""" - if '/' in name: - raise ValueError("Name must not contain '/' characters") - if (mode >> 12) not in Tree._map_id_to_type: - raise ValueError("Invalid object type according to mode %o" % mode) - - sha = to_bin_sha(sha) - index = self._index_by_name(name) - item = (sha, mode, name) - if index == -1: - self._cache.append(item) - else: - if force: - self._cache[index] = item - else: - ex_item = self._cache[index] - if ex_item[0] != sha or ex_item[1] != mode: - raise ValueError("Item %r existed with different properties" % name) - # END handle mismatch - # END handle force - # END handle name exists - return self - - def add_unchecked(self, binsha, mode, name): - """Add the given item to the tree, its correctness is assumed, which - puts the caller into responsibility to assure the input is correct. - For more information on the parameters, see ``add`` - :param binsha: 20 byte binary sha""" - self._cache.append((binsha, mode, name)) - - def __delitem__(self, name): - """Deletes an item with the given name if it exists""" - index = self._index_by_name(name) - if index > -1: - del(self._cache[index]) - - #} END mutators - - -class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): - """Tree objects represent an ordered list of Blobs and other Trees. - - ``Tree as a list``:: - - Access a specific blob using the - tree['filename'] notation. - - You may as well access by index - blob = tree[0] - """ - - type = "tree" - __slots__ = "_cache" - - # actual integer ids for comparison - commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link - blob_id = 010 - symlink_id = 012 - tree_id = 004 +class Tree(GitDB_Tree, diff.Diffable): + """As opposed to the default GitDB tree implementation, this one can be diffed + and returns our own types""" + __slots__ = tuple() _map_id_to_type = { - commit_id : Submodule, - blob_id : Blob, - symlink_id : Blob + GitDB_Tree.commit_id : Submodule, + GitDB_Tree.blob_id : Blob, + GitDB_Tree.symlink_id : Blob # tree id added once Tree is defined } - - def __init__(self, repo, binsha, mode=tree_id<<12, path=None): - super(Tree, self).__init__(repo, binsha, mode, path) - - @classmethod - def _get_intermediate_items(cls, index_object): - if index_object.type == "tree": - return tuple(index_object._iter_convert_to_object(index_object._cache)) - return tuple() - - def _set_cache_(self, attr): - if attr == "_cache": - # Set the data when we need it - ostream = self.repo.odb.stream(self.binsha) - self._cache = tree_entries_from_data(ostream.read()) - else: - super(Tree, self)._set_cache_(attr) - # END handle attribute - - def _iter_convert_to_object(self, iterable): - """Iterable yields tuples of (binsha, mode, name), which will be converted - to the respective object representation""" - for binsha, mode, name in iterable: - path = join_path(self.path, name) - try: - yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) - except KeyError: - raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) - # END for each item - - def __div__(self, file): - """Find the named object in this tree's contents - :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` - - :raise KeyError: if given file or tree does not exist in tree""" - msg = "Blob or Tree named %r not found" - if '/' in file: - tree = self - item = self - tokens = file.split('/') - for i,token in enumerate(tokens): - item = tree[token] - if item.type == 'tree': - tree = item - else: - # safety assertion - blobs are at the end of the path - if i != len(tokens)-1: - raise KeyError(msg % file) - return item - # END handle item type - # END for each token of split path - if item == self: - raise KeyError(msg % file) - return item - else: - for info in self._cache: - if info[2] == file: # [2] == name - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - # END for each obj - raise KeyError( msg % file ) - # END handle long paths - - - @property - def trees(self): - """:return: list(Tree, ...) list of trees directly below this tree""" - return [ i for i in self if i.type == "tree" ] - - @property - def blobs(self): - """:return: list(Blob, ...) list of blobs directly below this tree""" - return [ i for i in self if i.type == "blob" ] - - @property - def cache(self): - """ - :return: An object allowing to modify the internal cache. This can be used - to change the tree's contents. When done, make sure you call ``set_done`` - on the tree modifier, or serialization behaviour will be incorrect. - See the ``TreeModifier`` for more information on how to alter the cache""" - return TreeModifier(self._cache) - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = False, ignore_self=1 ): - """For documentation, see util.Traversable.traverse - Trees are set to visit_once = False to gain more performance in the traversal""" - return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) - - # List protocol - def __getslice__(self, i, j): - return list(self._iter_convert_to_object(self._cache[i:j])) - - def __iter__(self): - return self._iter_convert_to_object(self._cache) - - def __len__(self): - return len(self._cache) - - def __getitem__(self, item): - if isinstance(item, int): - info = self._cache[item] - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - - if isinstance(item, basestring): - # compatability - return self.__div__(item) - # END index is basestring - - raise TypeError( "Invalid index type: %r" % item ) - - - def __contains__(self, item): - if isinstance(item, IndexObject): - for info in self._cache: - if item.binsha == info[0]: - return True - # END compare sha - # END for each entry - # END handle item is index object - # compatability - - # treat item as repo-relative path - path = self.path - for info in self._cache: - if item == join_path(path, info[2]): - return True - # END for each item - return False - - def __reversed__(self): - return reversed(self._iter_convert_to_object(self._cache)) - - def _serialize(self, stream): - """Serialize this tree into the stream. Please note that we will assume - our tree data to be in a sorted state. If this is not the case, serialization - will not generate a correct tree representation as these are assumed to be sorted - by algorithms""" - tree_to_stream(self._cache, stream.write) - return self - - def _deserialize(self, stream): - self._cache = tree_entries_from_data(stream.read()) - return self - - -# END tree - # finalize map definition Tree._map_id_to_type[Tree.tree_id] = Tree -- cgit v1.2.3 From 9fc7b9a068189cc0d249d0870dfb0112ab5dec92 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 May 2011 15:25:11 +0200 Subject: Made most primal imports work, but stopped here as there are many more changes when doing the merge --- git/objects/fun.py | 2 ++ git/objects/tag.py | 2 +- git/objects/tree.py | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'git/objects') diff --git a/git/objects/fun.py b/git/objects/fun.py index 22016b27..2443bad7 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,2 +1,4 @@ """Module with functions which are supposed to be as fast as possible""" +from gitdb.object.fun import * + diff --git a/git/objects/tag.py b/git/objects/tag.py index a3a85eef..59b2362e 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -5,7 +5,7 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all object based types. """ from git.util import RepoAliasMixin -from gitdb.object.tag import GitDB_TagObject +from gitdb.object.tag import TagObject as GitDB_TagObject __all__ = ("TagObject", ) class TagObject(GitDB_TagObject, RepoAliasMixin): diff --git a/git/objects/tree.py b/git/objects/tree.py index 23e1dfe4..00ef07fc 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -4,9 +4,11 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import RepoAliasMixin -from gitdb.object.tree import GitDB_Tree, TreeModifier +from gitdb.object.tree import Tree as GitDB_Tree +from gitdb.object.tree import TreeModifier import git.diff as diff +from blob import Blob from submodule.base import Submodule __all__ = ("TreeModifier", "Tree") -- cgit v1.2.3 From 4177eefd7bdaea96a529b00ba9cf751924ede202 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 May 2011 19:43:22 +0200 Subject: Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work --- git/objects/base.py | 173 +++++++++++++++++++++++++- git/objects/blob.py | 27 +++- git/objects/commit.py | 259 +++++++++++++++++++++++++++++++++++++- git/objects/fun.py | 199 ++++++++++++++++++++++++++++- git/objects/submodule/base.py | 3 + git/objects/tag.py | 73 ++++++++++- git/objects/tree.py | 282 ++++++++++++++++++++++++++++++++++++++++-- git/objects/util.py | 1 + 8 files changed, 993 insertions(+), 24 deletions(-) (limited to 'git/objects') diff --git a/git/objects/base.py b/git/objects/base.py index 42d7b600..24967e7b 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -3,6 +3,177 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from gitdb.object.base import Object, IndexObject + +from util import get_object_type_by_name +from git.util import ( + hex_to_bin, + bin_to_hex, + dirname, + basename, + LazyMixin, + join_path_native, + stream_copy + ) + +from git.typ import ObjectType + +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" + __all__ = ("Object", "IndexObject") +class Object(LazyMixin): + """Implements an Object which may be Blobs, Trees, Commits and Tags""" + NULL_HEX_SHA = '0'*40 + NULL_BIN_SHA = '\0'*20 + + TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag) + __slots__ = ("odb", "binsha", "size" ) + + type = None # to be set by subclass + type_id = None # to be set by subclass + + def __init__(self, odb, binsha): + """Initialize an object by identifying it by its binary sha. + All keyword arguments will be set on demand if None. + + :param odb: repository this object is located in + + :param binsha: 20 byte SHA1""" + super(Object,self).__init__() + self.odb = odb + self.binsha = binsha + assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) + + @classmethod + def new(cls, odb, id): + """ + :return: New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a binsha even though + the input id may have been a Reference or Rev-Spec + + :param id: reference, rev-spec, or hexsha + + :note: This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a binsha.""" + return odb.rev_parse(str(id)) + + @classmethod + def new_from_sha(cls, odb, sha1): + """ + :return: new object instance of a type appropriate to represent the given + binary sha1 + :param sha1: 20 byte binary sha1""" + if sha1 == cls.NULL_BIN_SHA: + # the NULL binsha is always the root commit + return get_object_type_by_name('commit')(odb, sha1) + #END handle special case + oinfo = odb.info(sha1) + inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha) + inst.size = oinfo.size + return inst + + def _set_cache_(self, attr): + """Retrieve object information""" + if attr == "size": + oinfo = self.odb.info(self.binsha) + self.size = oinfo.size + # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """:return: True if the objects have the same SHA1""" + return self.binsha == other.binsha + + def __ne__(self, other): + """:return: True if the objects do not have the same SHA1 """ + return self.binsha != other.binsha + + def __hash__(self): + """:return: Hash of our id allowing objects to be used in dicts and sets""" + return hash(self.binsha) + + def __str__(self): + """:return: string of our SHA1 as understood by all git commands""" + return bin_to_hex(self.binsha) + + def __repr__(self): + """:return: string with pythonic representation of our object""" + return '' % (self.__class__.__name__, self.hexsha) + + @property + def hexsha(self): + """:return: 40 byte hex version of our 20 byte binary sha""" + return bin_to_hex(self.binsha) + + @property + def data_stream(self): + """ :return: File Object compatible stream to the uncompressed raw data of the object + :note: returned streams must be read in order""" + return self.odb.stream(self.binsha) + + def stream_data(self, ostream): + """Writes our data directly to the given output stream + :param ostream: File object compatible stream object. + :return: self""" + istream = self.odb.stream(self.binsha) + stream_copy(istream, ostream) + return self + + +class IndexObject(Object): + """Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects""" + __slots__ = ("path", "mode") + + # for compatability with iterable lists + _id_attribute_ = 'path' + + def __init__(self, odb, binsha, mode=None, path=None): + """Initialize a newly instanced IndexObject + :param odb: is the object database we are located in + :param binsha: 20 byte sha1 + :param mode: is the stat compatible file mode as int, use the stat module + to evaluate the infomration + :param path: + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + :note: + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree.""" + super(IndexObject, self).__init__(odb, binsha) + if mode is not None: + self.mode = mode + if path is not None: + self.path = path + + def __hash__(self): + """:return: + Hash of our path as index items are uniquely identifyable by path, not + by their data !""" + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + # END hanlde slot attribute + + @property + def name(self): + """:return: Name portion of the path, effectively being the basename""" + return basename(self.path) + + @property + def abspath(self): + """ + :return: + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. """ + assert False, "Only works if repository is not bare - provide this check in an interface" + return join_path_native(dirname(self.odb.root_path()), self.path) + diff --git a/git/objects/blob.py b/git/objects/blob.py index 38834436..326c5459 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -5,9 +5,32 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import RepoAliasMixin -from gitdb.object.blob import Blob as GitDB_Blob +from mimetypes import guess_type +from gitdb.typ import ObjectType + +import base __all__ = ('Blob', ) -class Blob(GitDB_Blob, RepoAliasMixin): +class Blob(base.IndexObject, RepoAliasMixin): + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = ObjectType.blob + type_id = ObjectType.blob_id + + # valid blob modes + executable_mode = 0100755 + file_mode = 0100644 + link_mode = 0120000 + __slots__ = tuple() + + @property + def mime_type(self): + """ + :return: String describing the mime type of this file (based on the filename) + :note: Defaults to 'text/plain' in case the actual file type is unknown. """ + guesses = None + if self.path: + guesses = guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/git/objects/commit.py b/git/objects/commit.py index d932ab1a..30dcaa0a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -3,28 +3,68 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import RepoAliasMixin -from gitdb.object.commit import Commit as GitDB_Commit -from git.diff import Diffable +import base + +from gitdb.typ import ObjectType +from tree import Tree +from cStringIO import StringIO + from gitdb.util import ( + hex_to_bin, + Actor, + RepoAliasMixin, Iterable, Actor ) -from gitdb import IStream +from util import ( + Traversable, + Serializable, + altz_to_utctz_str, + parse_actor_and_date + ) +from git.diff import Diffable +from gitdb.base import IStream from cStringIO import StringIO from util import parse_date from time import altzone import os +import sys __all__ = ('Commit', ) -class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): - """Provides additional git-command based functionality to the default gitdb commit object""" +class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): + """Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary.""" __slots__ = tuple() + # ENVIRONMENT VARIABLES + # read when creating new commits + env_author_date = "GIT_AUTHOR_DATE" + env_committer_date = "GIT_COMMITTER_DATE" + + # CONFIGURATION KEYS + conf_encoding = 'i18n.commitencoding' + + # INVARIANTS + default_encoding = "UTF-8" + + + # object configuration + type = ObjectType.commit + type_id = ObjectType.commit_id + + __slots__ = ("tree", + "author", "authored_date", "author_tz_offset", + "committer", "committed_date", "committer_tz_offset", + "message", "parents", "encoding") + _id_attribute_ = "binsha" + + def count(self, paths='', **kwargs): """Count the number of commits reachable from this commit @@ -221,4 +261,211 @@ class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin): return new_commit + def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, + committer=None, committed_date=None, committer_tz_offset=None, + message=None, parents=None, encoding=None): + """Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set on first query. + + :param binsha: 20 byte sha1 + :param parents: tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + :param tree: Tree + Tree object + :param author: Actor + is the author string ( will be implicitly converted into an Actor object ) + :param authored_date: int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + :param author_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param committer: Actor + is the committer string + :param committed_date: int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + :param committer_tz_offset: int_seconds_west_of_utc + is the timezone that the authored_date is in + :param message: string + is the commit message + :param encoding: string + encoding of the message, defaults to UTF-8 + :param parents: + List or tuple of Commit objects which are our parent(s) in the commit + dependency graph + :return: git.Commit + + :note: Timezone information is in the same format and in the same sign + as what time.altzone returns. The sign is inverted compared to git's + UTC timezone.""" + super(Commit,self).__init__(odb, binsha) + if tree is not None: + assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) + if tree is not None: + self.tree = tree + if author is not None: + self.author = author + if authored_date is not None: + self.authored_date = authored_date + if author_tz_offset is not None: + self.author_tz_offset = author_tz_offset + if committer is not None: + self.committer = committer + if committed_date is not None: + self.committed_date = committed_date + if committer_tz_offset is not None: + self.committer_tz_offset = committer_tz_offset + if message is not None: + self.message = message + if parents is not None: + self.parents = parents + if encoding is not None: + self.encoding = encoding + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents + + def _set_cache_(self, attr): + if attr in Commit.__slots__: + # read the data in a chunk, its faster - then provide a file wrapper + binsha, typename, self.size, stream = self.odb.stream(self.binsha) + self._deserialize(StringIO(stream.read())) + else: + super(Commit, self)._set_cache_(attr) + # END handle attrs + + @property + def summary(self): + """:return: First line of the commit message""" + return self.message.split('\n', 1)[0] + + @classmethod + def _iter_from_process_or_stream(cls, odb, proc_or_stream): + """Parse out commit information into a list of Commit objects + We expect one-line per commit, and parse the actual commit information directly + from our lighting fast object database + + :param proc: git-rev-list process instance - one sha per line + :return: iterator returning Commit objects""" + stream = proc_or_stream + if not hasattr(stream,'readline'): + stream = proc_or_stream.stdout + + readline = stream.readline + while True: + line = readline() + if not line: + break + hexsha = line.strip() + if len(hexsha) > 40: + # split additional information, as returned by bisect for instance + hexsha, rest = line.split(None, 1) + # END handle extra info + + assert len(hexsha) == 40, "Invalid line: %s" % hexsha + yield cls(odb, hex_to_bin(hexsha)) + # END for each line in stream + + #{ Serializable Implementation + + def _serialize(self, stream): + write = stream.write + write("tree %s\n" % self.tree) + for p in self.parents: + write("parent %s\n" % p) + + a = self.author + aname = a.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + + c = self.committer + fmt = "%s %s <%s> %s %s\n" + write(fmt % ("author", aname, a.email, + self.authored_date, + altz_to_utctz_str(self.author_tz_offset))) + + # encode committer + aname = c.name + if isinstance(aname, unicode): + aname = aname.encode(self.encoding) + # END handle unicode in name + write(fmt % ("committer", aname, c.email, + self.committed_date, + altz_to_utctz_str(self.committer_tz_offset))) + + if self.encoding != self.default_encoding: + write("encoding %s\n" % self.encoding) + + write("\n") + + # write plain bytes, be sure its encoded according to our encoding + if isinstance(self.message, unicode): + write(self.message.encode(self.encoding)) + else: + write(self.message) + # END handle encoding + return self + + def _deserialize(self, stream): + """:param from_rev_list: if true, the stream format is coming from the rev-list command + Otherwise it is assumed to be a plain data stream from our object""" + readline = stream.readline + self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') + + self.parents = list() + next_line = None + while True: + parent_line = readline() + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1]))) + # END for each parent line + self.parents = tuple(self.parents) + + self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) + + + # now we can have the encoding line, or an empty line followed by the optional + # message. + self.encoding = self.default_encoding + # read encoding or empty line to separate message + enc = readline() + enc = enc.strip() + if enc: + self.encoding = enc[enc.find(' ')+1:] + # now comes the message separator + readline() + # END handle encoding + + # decode the authors name + try: + self.author.name = self.author.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) + # END handle author's encoding + + # decode committer name + try: + self.committer.name = self.committer.name.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) + # END handle author's encoding + + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read() + try: + self.message = self.message.decode(self.encoding) + except UnicodeDecodeError: + print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) + # END exception handling + return self + #} END serializable implementation + diff --git a/git/objects/fun.py b/git/objects/fun.py index 2443bad7..6f2eaaad 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,4 +1,201 @@ """Module with functions which are supposed to be as fast as possible""" -from gitdb.object.fun import * +from stat import S_ISDIR + +__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', + 'traverse_tree_recursive') + + + + +def tree_to_stream(entries, write): + """Write the give list of entries into a stream using its write method + :param entries: **sorted** list of tuples with (binsha, mode, name) + :param write: write method which takes a data string""" + ord_zero = ord('0') + bit_mask = 7 # 3 bits set + + for binsha, mode, name in entries: + mode_str = '' + for i in xrange(6): + mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str + # END for each 8 octal value + + # git slices away the first octal if its zero + if mode_str[0] == '0': + mode_str = mode_str[1:] + # END save a byte + + # here it comes: if the name is actually unicode, the replacement below + # will not work as the binsha is not part of the ascii unicode encoding - + # hence we must convert to an utf8 string for it to work properly. + # According to my tests, this is exactly what git does, that is it just + # takes the input literally, which appears to be utf8 on linux. + if isinstance(name, unicode): + name = name.encode("utf8") + write("%s %s\0%s" % (mode_str, name, binsha)) + # END for each item + + +def tree_entries_from_data(data): + """Reads the binary representation of a tree and returns tuples of Tree items + :param data: data block with tree data + :return: list(tuple(binsha, mode, tree_relative_path), ...)""" + ord_zero = ord('0') + len_data = len(data) + i = 0 + out = list() + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + + # default encoding for strings in git is utf8 + # Only use the respective unicode object if the byte stream was encoded + name = data[ns:i] + name_enc = name.decode("utf-8") + if len(name) > len(name_enc): + name = name_enc + # END handle encoding + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + out.append((sha, mode, name)) + # END for each byte in data stream + return out + + +def _find_by_name(tree_data, name, is_dir, start_at): + """return data entry matching the given name and tree mode + or None. + Before the item is returned, the respective data item is set + None in the tree_data list to mark it done""" + try: + item = tree_data[start_at] + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[start_at] = None + return item + except IndexError: + pass + # END exception handling + for index, item in enumerate(tree_data): + if item and item[2] == name and S_ISDIR(item[1]) == is_dir: + tree_data[index] = None + return item + # END if item matches + # END for each item + return None + +def _to_full_path(item, path_prefix): + """Rebuild entry with given path prefix""" + if not item: + return item + return (item[0], item[1], path_prefix+item[2]) + +def traverse_trees_recursive(odb, tree_shas, path_prefix): + """ + :return: list with entries according to the given binary tree-shas. + The result is encoded in a list + of n tuple|None per blob/commit, (n == len(tree_shas)), where + * [0] == 20 byte sha + * [1] == mode as int + * [2] == path relative to working tree root + The entry tuple is None if the respective blob/commit did not + exist in the given tree. + :param tree_shas: iterable of shas pointing to trees. All trees must + be on the same level. A tree-sha may be None in which case None + :param path_prefix: a prefix to be added to the returned paths on this level, + set it '' for the first iteration + :note: The ordering of the returned items will be partially lost""" + trees_data = list() + nt = len(tree_shas) + for tree_sha in tree_shas: + if tree_sha is None: + data = list() + else: + data = tree_entries_from_data(odb.stream(tree_sha).read()) + # END handle muted trees + trees_data.append(data) + # END for each sha to get data for + + out = list() + out_append = out.append + + # find all matching entries and recursively process them together if the match + # is a tree. If the match is a non-tree item, put it into the result. + # Processed items will be set None + for ti, tree_data in enumerate(trees_data): + for ii, item in enumerate(tree_data): + if not item: + continue + # END skip already done items + entries = [ None for n in range(nt) ] + entries[ti] = item + sha, mode, name = item # its faster to unpack + is_dir = S_ISDIR(mode) # type mode bits + + # find this item in all other tree data items + # wrap around, but stop one before our current index, hence + # ti+nt, not ti+1+nt + for tio in range(ti+1, ti+nt): + tio = tio % nt + entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) + # END for each other item data + + # if we are a directory, enter recursion + if is_dir: + out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) + else: + out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) + # END handle recursion + + # finally mark it done + tree_data[ii] = None + # END for each item + + # we are done with one tree, set all its data empty + del(tree_data[:]) + # END for each tree_data chunk + return out + +def traverse_tree_recursive(odb, tree_sha, path_prefix): + """ + :return: list of entries of the tree pointed to by the binary tree_sha. An entry + has the following format: + * [0] 20 byte sha + * [1] mode as int + * [2] path relative to the repository + :param path_prefix: prefix to prepend to the front of all returned paths""" + entries = list() + data = tree_entries_from_data(odb.stream(tree_sha).read()) + + # unpacking/packing is faster than accessing individual items + for sha, mode, name in data: + if S_ISDIR(mode): + entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) + else: + entries.append((sha, mode, path_prefix+name)) + # END for each item + + return entries diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 7997e5e5..9b45d9b6 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -73,6 +73,9 @@ class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin): # this is a bogus type for base class compatability type = 'submodule' + # this type doesn't really have a type id + type_id = 0 + __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') _cache_attrs = ('path', '_url', '_branch_path') diff --git a/git/objects/tag.py b/git/objects/tag.py index 59b2362e..0bd1d20c 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -4,10 +4,77 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php """ Module containing all object based types. """ +import base from git.util import RepoAliasMixin -from gitdb.object.tag import TagObject as GitDB_TagObject +from gitdb.util import hex_to_bin +from util import ( + get_object_type_by_name, + parse_actor_and_date + ) +from gitdb.typ import ObjectType + __all__ = ("TagObject", ) -class TagObject(GitDB_TagObject, RepoAliasMixin): +class TagObject(base.Object, RepoAliasMixin): """Non-Lightweight tag carrying additional information about an object we are pointing to.""" - __slots__ = tuple() + type = ObjectType.tag + type_id = ObjectType.tag_id + + __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) + + def __init__(self, odb, binsha, object=None, tag=None, + tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): + """Initialize a tag object with additional data + + :param odb: repository this object is located in + :param binsha: 20 byte SHA1 + :param object: Object instance of object we are pointing to + :param tag: name of this tag + :param tagger: Actor identifying the tagger + :param tagged_date: int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format + :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the + authored_date is in, in a format similar to time.altzone""" + super(TagObject, self).__init__(odb, binsha ) + if object is not None: + self.object = object + if tag is not None: + self.tag = tag + if tagger is not None: + self.tagger = tagger + if tagged_date is not None: + self.tagged_date = tagged_date + if tagger_tz_offset is not None: + self.tagger_tz_offset = tagger_tz_offset + if message is not None: + self.message = message + + def _set_cache_(self, attr): + """Cache all our attributes at once""" + if attr in TagObject.__slots__: + ostream = self.odb.stream(self.binsha) + lines = ostream.read().splitlines() + + obj, hexsha = lines[0].split(" ") # object + type_token, type_name = lines[1].split(" ") # type + self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha)) + + self.tag = lines[2][4:] # tag + + tagger_info = lines[3][7:]# tagger + self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) + + # line 4 empty - it could mark the beginning of the next header + # in case there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + + diff --git a/git/objects/tree.py b/git/objects/tree.py index 00ef07fc..1b5f7561 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -4,26 +4,286 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import RepoAliasMixin -from gitdb.object.tree import Tree as GitDB_Tree -from gitdb.object.tree import TreeModifier import git.diff as diff - +from gitdb.typ import ObjectType +from base import IndexObject from blob import Blob -from submodule.base import Submodule +from submodule import Submodule + +from fun import ( + tree_entries_from_data, + tree_to_stream + ) + +from gitdb.util import ( + to_bin_sha, + join_path + ) +import util __all__ = ("TreeModifier", "Tree") -class Tree(GitDB_Tree, diff.Diffable): - """As opposed to the default GitDB tree implementation, this one can be diffed - and returns our own types""" - __slots__ = tuple() +class TreeModifier(object): + """A utility class providing methods to alter the underlying cache in a list-like fashion. + + Once all adjustments are complete, the _cache, which really is a refernce to + the cache of a tree, will be sorted. Assuring it will be in a serializable state""" + __slots__ = '_cache' + + def __init__(self, cache): + self._cache = cache + + def _index_by_name(self, name): + """:return: index of an item with name, or -1 if not found""" + for i, t in enumerate(self._cache): + if t[2] == name: + return i + # END found item + # END for each item in cache + return -1 + + #{ Interface + def set_done(self): + """Call this method once you are done modifying the tree information. + It may be called several times, but be aware that each call will cause + a sort operation + :return self:""" + self._cache.sort(key=lambda t: t[2]) # sort by name + return self + #} END interface + + #{ Mutators + def add(self, sha, mode, name, force=False): + """Add the given item to the tree. If an item with the given name already + exists, nothing will be done, but a ValueError will be raised if the + sha and mode of the existing item do not match the one you add, unless + force is True + + :param sha: The 20 or 40 byte sha of the item to add + :param mode: int representing the stat compatible mode of the item + :param force: If True, an item with your name and information will overwrite + any existing item with the same name, no matter which information it has + :return: self""" + if '/' in name: + raise ValueError("Name must not contain '/' characters") + if (mode >> 12) not in Tree._map_id_to_type: + raise ValueError("Invalid object type according to mode %o" % mode) + + sha = to_bin_sha(sha) + index = self._index_by_name(name) + item = (sha, mode, name) + if index == -1: + self._cache.append(item) + else: + if force: + self._cache[index] = item + else: + ex_item = self._cache[index] + if ex_item[0] != sha or ex_item[1] != mode: + raise ValueError("Item %r existed with different properties" % name) + # END handle mismatch + # END handle force + # END handle name exists + return self + + def add_unchecked(self, binsha, mode, name): + """Add the given item to the tree, its correctness is assumed, which + puts the caller into responsibility to assure the input is correct. + For more information on the parameters, see ``add`` + :param binsha: 20 byte binary sha""" + self._cache.append((binsha, mode, name)) + + def __delitem__(self, name): + """Deletes an item with the given name if it exists""" + index = self._index_by_name(name) + if index > -1: + del(self._cache[index]) + + #} END mutators + + +class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable, RepoAliasMixin): + """Tree objects represent an ordered list of Blobs and other Trees. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + """ + + type = ObjectType.tree + type_id = ObjectType.tree_id + + __slots__ = "_cache" + + # actual integer ids for comparison + commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link + blob_id = 010 + symlink_id = 012 + tree_id = 004 + #{ Configuration + + # override in subclass if you would like your own types to be instantiated instead _map_id_to_type = { - GitDB_Tree.commit_id : Submodule, - GitDB_Tree.blob_id : Blob, - GitDB_Tree.symlink_id : Blob + commit_id : Submodule, + blob_id : Blob, + symlink_id : Blob # tree id added once Tree is defined } + #} end configuration + + + def __init__(self, repo, binsha, mode=tree_id<<12, path=None): + super(Tree, self).__init__(repo, binsha, mode, path) + + @classmethod + def _get_intermediate_items(cls, index_object): + if index_object.type == "tree": + return tuple(index_object._iter_convert_to_object(index_object._cache)) + return tuple() + + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + ostream = self.odb.stream(self.binsha) + self._cache = tree_entries_from_data(ostream.read()) + else: + super(Tree, self)._set_cache_(attr) + # END handle attribute + + def _iter_convert_to_object(self, iterable): + """Iterable yields tuples of (binsha, mode, name), which will be converted + to the respective object representation""" + for binsha, mode, name in iterable: + path = join_path(self.path, name) + try: + yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) + except KeyError: + raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) + # END for each item + + def __div__(self, file): + """Find the named object in this tree's contents + :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` + + :raise KeyError: if given file or tree does not exist in tree""" + msg = "Blob or Tree named %r not found" + if '/' in file: + tree = self + item = self + tokens = file.split('/') + for i,token in enumerate(tokens): + item = tree[token] + if item.type == 'tree': + tree = item + else: + # safety assertion - blobs are at the end of the path + if i != len(tokens)-1: + raise KeyError(msg % file) + return item + # END handle item type + # END for each token of split path + if item == self: + raise KeyError(msg % file) + return item + else: + for info in self._cache: + if info[2] == file: # [2] == name + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + # END for each obj + raise KeyError( msg % file ) + # END handle long paths + + + @property + def trees(self): + """:return: list(Tree, ...) list of trees directly below this tree""" + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """:return: list(Blob, ...) list of blobs directly below this tree""" + return [ i for i in self if i.type == "blob" ] + + @property + def cache(self): + """ + :return: An object allowing to modify the internal cache. This can be used + to change the tree's contents. When done, make sure you call ``set_done`` + on the tree modifier, or serialization behaviour will be incorrect. + See the ``TreeModifier`` for more information on how to alter the cache""" + return TreeModifier(self._cache) + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = False, ignore_self=1 ): + """For documentation, see util.Traversable.traverse + Trees are set to visit_once = False to gain more performance in the traversal""" + return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + + # List protocol + def __getslice__(self, i, j): + return list(self._iter_convert_to_object(self._cache[i:j])) + + def __iter__(self): + return self._iter_convert_to_object(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self, item): + if isinstance(item, int): + info = self._cache[item] + return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) + + if isinstance(item, basestring): + # compatability + return self.__div__(item) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self, item): + if isinstance(item, IndexObject): + for info in self._cache: + if item.binsha == info[0]: + return True + # END compare sha + # END for each entry + # END handle item is index object + # compatability + + # treat item as repo-relative path + path = self.path + for info in self._cache: + if item == join_path(path, info[2]): + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._iter_convert_to_object(self._cache)) + + def _serialize(self, stream): + """Serialize this tree into the stream. Please note that we will assume + our tree data to be in a sorted state. If this is not the case, serialization + will not generate a correct tree representation as these are assumed to be sorted + by algorithms""" + tree_to_stream(self._cache, stream.write) + return self + + def _deserialize(self, stream): + self._cache = tree_entries_from_data(stream.read()) + return self + + +# END tree + # finalize map definition Tree._map_id_to_type[Tree.tree_id] = Tree diff --git a/git/objects/util.py b/git/objects/util.py index 4c9323b8..8ac590f2 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -20,6 +20,7 @@ __all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 'verify_utctz', 'Actor') + #{ Functions def mode_str_to_int(modestr): -- cgit v1.2.3 From acf5e6ea64a2f24117f1d419c208ed1c38c43690 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 15:03:14 +0200 Subject: replaced all gitdb strings with git --- git/objects/blob.py | 2 +- git/objects/commit.py | 6 +++--- git/objects/submodule/base.py | 2 +- git/objects/tag.py | 4 ++-- git/objects/tree.py | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'git/objects') diff --git a/git/objects/blob.py b/git/objects/blob.py index 326c5459..9c51f99f 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -6,7 +6,7 @@ from git.util import RepoAliasMixin from mimetypes import guess_type -from gitdb.typ import ObjectType +from git.typ import ObjectType import base diff --git a/git/objects/commit.py b/git/objects/commit.py index 30dcaa0a..4ca5877e 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -5,11 +5,11 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import base -from gitdb.typ import ObjectType +from git.typ import ObjectType from tree import Tree from cStringIO import StringIO -from gitdb.util import ( +from git.util import ( hex_to_bin, Actor, RepoAliasMixin, @@ -24,7 +24,7 @@ from util import ( parse_actor_and_date ) from git.diff import Diffable -from gitdb.base import IStream +from git.base import IStream from cStringIO import StringIO from util import parse_date diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 9b45d9b6..f6cf278a 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,5 +1,5 @@ from git.util import RepoAliasMixin -from gitdb.object.submodule import Submodule as GitDB_Submodule +from git.object.submodule import Submodule as GitDB_Submodule import util from util import ( mkhead, diff --git a/git/objects/tag.py b/git/objects/tag.py index 0bd1d20c..5dcd9bf9 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -6,12 +6,12 @@ """ Module containing all object based types. """ import base from git.util import RepoAliasMixin -from gitdb.util import hex_to_bin +from git.util import hex_to_bin from util import ( get_object_type_by_name, parse_actor_and_date ) -from gitdb.typ import ObjectType +from git.typ import ObjectType __all__ = ("TagObject", ) diff --git a/git/objects/tree.py b/git/objects/tree.py index 1b5f7561..689aeaf6 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -5,7 +5,7 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import RepoAliasMixin import git.diff as diff -from gitdb.typ import ObjectType +from git.typ import ObjectType from base import IndexObject from blob import Blob from submodule import Submodule @@ -15,7 +15,7 @@ from fun import ( tree_to_stream ) -from gitdb.util import ( +from git.util import ( to_bin_sha, join_path ) -- cgit v1.2.3 From 7ae36c3e019a5cc16924d1b6007774bfb625036f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 18:53:59 +0200 Subject: Started to fix imports - tests still have no chance to work as database changed drastically. Now the actual work begins --- git/objects/commit.py | 2 +- git/objects/submodule/base.py | 3 +-- git/objects/tree.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'git/objects') diff --git a/git/objects/commit.py b/git/objects/commit.py index 4ca5877e..45a821a1 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -35,7 +35,7 @@ import sys __all__ = ('Commit', ) -class Commit(GitDB_Commit, Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): +class Commit(Diffable, Iterable, RepoAliasMixin, base.Object, Traversable, Serializable): """Wraps a git Commit object. This class will act lazily on some of its attributes and will query the diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index f6cf278a..62f4feee 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,5 +1,4 @@ from git.util import RepoAliasMixin -from git.object.submodule import Submodule as GitDB_Submodule import util from util import ( mkhead, @@ -55,7 +54,7 @@ UPDWKTREE = UpdateProgress.UPDWKTREE # IndexObject comes via util module, its a 'hacky' fix thanks to pythons import # mechanism which cause plenty of trouble of the only reason for packages and # modules is refactoring - subpackages shoudn't depend on parent packages -class Submodule(GitDB_Submodule, Iterable, Traversable, RepoAliasMixin): +class Submodule(Iterable, Traversable, RepoAliasMixin): """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. diff --git a/git/objects/tree.py b/git/objects/tree.py index 689aeaf6..31f2602d 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -8,7 +8,7 @@ import git.diff as diff from git.typ import ObjectType from base import IndexObject from blob import Blob -from submodule import Submodule +from submodule.base import Submodule from fun import ( tree_entries_from_data, -- cgit v1.2.3 From cd26aaebbda94dc3740e41bbd3f91ba6b1a25c10 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 10 May 2011 10:21:26 +0200 Subject: Made repository paths methods a property to be compatible with the existing repo interface. Added submodule interface ... goal is to provide all of the extra repo functionality in custom interfaces --- git/objects/submodule/__init__.py | 4 ++++ git/objects/submodule/base.py | 4 ++++ git/objects/submodule/root.py | 5 +++++ git/objects/submodule/util.py | 4 ++++ 4 files changed, 17 insertions(+) (limited to 'git/objects') diff --git a/git/objects/submodule/__init__.py b/git/objects/submodule/__init__.py index 82df59b0..c8bf2d49 100644 --- a/git/objects/submodule/__init__.py +++ b/git/objects/submodule/__init__.py @@ -1,2 +1,6 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php # NOTE: Cannot import anything here as the top-level _init_ has to handle # our dependencies diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 62f4feee..a57111d3 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,3 +1,7 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php from git.util import RepoAliasMixin import util from util import ( diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py index 132604f6..5e4cad2d 100644 --- a/git/objects/submodule/root.py +++ b/git/objects/submodule/root.py @@ -1,3 +1,7 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php from base import Submodule, UpdateProgress from util import ( find_first_remote_branch @@ -24,6 +28,7 @@ BRANCHCHANGE = RootUpdateProgress.BRANCHCHANGE URLCHANGE = RootUpdateProgress.URLCHANGE PATHCHANGE = RootUpdateProgress.PATHCHANGE + class RootModule(Submodule): """A (virtual) Root of all submodules in the given repository. It can be used to more easily traverse all submodules of the master repository""" diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py index 9b32807a..2c5f6bc1 100644 --- a/git/objects/submodule/util.py +++ b/git/objects/submodule/util.py @@ -1,3 +1,7 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php import git from git.exc import InvalidGitRepositoryError from git.config import GitConfigParser -- cgit v1.2.3 From 1f71ed94578799ee1667ba54b66a369e307f415b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 16:32:56 +0200 Subject: git cmd implementation of repository appears to work, at least this is what the test suggests. Pure python implementation still has some trouble, but this should be very fixable --- git/objects/base.py | 17 +++++++++++++---- git/objects/submodule/base.py | 2 +- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'git/objects') diff --git a/git/objects/base.py b/git/objects/base.py index 24967e7b..e51afbed 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -14,7 +14,8 @@ from git.util import ( join_path_native, stream_copy ) - +from git.db.interface import RepositoryPathsMixin +from git.exc import UnsupportedOperation from git.typ import ObjectType _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" @@ -173,7 +174,15 @@ class IndexObject(Object): Absolute path to this index object in the file system ( as opposed to the .path field which is a path relative to the git repository ). - The returned path will be native to the system and contains '\' on windows. """ - assert False, "Only works if repository is not bare - provide this check in an interface" - return join_path_native(dirname(self.odb.root_path()), self.path) + The returned path will be native to the system and contains '\' on windows. + :raise UnsupportedOperation: if underlying odb does not support the required method to obtain a working dir""" + # TODO: Here we suddenly need something better than a plain object database + # which indicates our odb should better be named repo ! + root = '' + if isinstance(self.odb, RepositoryPathsMixin): + root = self.odb.working_tree_dir + else: + raise UnsupportedOperation("Cannot provide absolute path from a database without Repository path support") + #END handle odb type + return join_path_native(root, self.path) diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index a57111d3..e38b94f8 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -58,7 +58,7 @@ UPDWKTREE = UpdateProgress.UPDWKTREE # IndexObject comes via util module, its a 'hacky' fix thanks to pythons import # mechanism which cause plenty of trouble of the only reason for packages and # modules is refactoring - subpackages shoudn't depend on parent packages -class Submodule(Iterable, Traversable, RepoAliasMixin): +class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. -- cgit v1.2.3 From 7fab60c596cdd2588f9c7b2b4eb9f93f8736b915 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 20:10:47 +0200 Subject: Fixed all of the object tests, except for the submodule handling which needs more work as the amount of submodules changed in fact. Maybe I should just generate a test repository with gitpython as submodule to get the recursion depth required to satisfy the test --- git/objects/commit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'git/objects') diff --git a/git/objects/commit.py b/git/objects/commit.py index 45a821a1..c201780c 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -14,7 +14,8 @@ from git.util import ( Actor, RepoAliasMixin, Iterable, - Actor + Actor, + Stats ) from util import ( -- cgit v1.2.3 From 6f960586feccff8c1f2c717765eb0a5e8b9cd6f3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 21:14:22 +0200 Subject: Fixed remaining tests as good as possible. remote/fetch/pull and submodule tests need some more work. Also, the tests need to be reorganized and move closer to their actual location within gitpython. Hence the refs tests go to git.test.refs, etc --- git/objects/commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git/objects') diff --git a/git/objects/commit.py b/git/objects/commit.py index c201780c..c32bbf1a 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -29,7 +29,7 @@ from git.base import IStream from cStringIO import StringIO from util import parse_date -from time import altzone +from time import altzone, time import os import sys -- cgit v1.2.3 From 9bf3fdec93fe427bb5f0bd39c986a4e977969f41 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 13:38:48 +0200 Subject: First run in order to fix the remote handling. Cleaned up interfaces and figured out that the implementation really should be specific to the git command. This leaves the interface open for other implemntations which use a different way to provide feedback (as we do not make assumptions about the format of a feedback line) --- git/objects/submodule/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'git/objects') diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index e38b94f8..c1cc51aa 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -18,9 +18,10 @@ from git.util import ( Iterable, join_path_native, to_native_path_linux, - RemoteProgress ) +from git.db.interface import RemoteProgress + from git.config import SectionConstraint from git.exc import ( InvalidGitRepositoryError, -- cgit v1.2.3 From f7ca1ce511b535b088ffcbd764415fe0d33f368e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 17:14:57 +0200 Subject: Submodule tests are nearly working. Only root module needs more attention --- git/objects/submodule/base.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'git/objects') diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index c1cc51aa..019fe18c 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -29,7 +29,7 @@ from git.exc import ( ) import stat -import git +import git # we use some types indirectly to prevent cyclic imports ! import os import sys @@ -769,14 +769,18 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): #{ Query Interface @unbare_repo - def module(self): - """:return: Repo instance initialized from the repository at our submodule path + def module(self, repoType=None): + """:return: Repository instance initialized from the repository at our submodule path + :param repoType: The type of repository to be created. It must be possible to instatiate it + from a single repository path. + If None, a default repository type will be used :raise InvalidGitRepositoryError: if a repository was not available. This could also mean that it was not yet initialized""" # late import to workaround circular dependencies - module_path = self.abspath + module_path = self.abspath + repoType = repoType or git.Repo try: - repo = git.Repo(module_path) + repo = repoType(module_path) if repo != self.repo: return repo # END handle repo uninitialized -- cgit v1.2.3 From fd5c46eb283090e84a90ac394d056decc742f8f4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 18:24:22 +0200 Subject: submodule now doesn't use hardcoded repository implementations anymore. Instead it allows the user to override the type in the classmethod he calls. Otherwise the type of the own repo will be respected --- git/objects/submodule/base.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'git/objects') diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 019fe18c..0fdb121d 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -204,7 +204,7 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): #{ Edit Interface @classmethod - def add(cls, repo, name, path, url=None, branch=None, no_checkout=False): + def add(cls, repo, name, path, url=None, branch=None, no_checkout=False, repoType=None): """Add a new submodule to the given repository. This will alter the index as well as the .gitmodules file, but will not create a new commit. If the submodule already exists, no matter if the configuration differs @@ -229,6 +229,8 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): Examples are 'master' or 'feature/new' :param no_checkout: if True, and if the repository has to be cloned manually, no checkout will be performed + :param repoType: The repository type to use. It must provide the clone_from method. + If None, the default implementation is used. :return: The newly created submodule instance :note: works atomically, such that no change will be done if the repository update fails for instance""" @@ -236,6 +238,8 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") # END handle bare repos + repoType = repoType or git.Repo + path = to_native_path_linux(path) if path.endswith('/'): path = path[:-1] @@ -289,7 +293,7 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): if not branch_is_default: kwargs['b'] = br.name # END setup checkout-branch - mrepo = git.Repo.clone_from(url, path, **kwargs) + mrepo = repoType.clone_from(url, path, **kwargs) # END verify url # update configuration and index @@ -315,7 +319,7 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): return sm def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, - dry_run=False): + dry_run=False, ): """Update the repository of this submodule to point to the checkout we point at with the binsha of this instance. @@ -377,7 +381,6 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): if not init: return self # END early abort if init is not allowed - import git # there is no git-repository yet - but delete empty paths module_path = join_path_native(self.repo.working_tree_dir, self.path) @@ -393,7 +396,7 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): # branch according to the remote-HEAD if possible progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name)) if not dry_run: - mrepo = git.Repo.clone_from(self.url, module_path, n=True) + mrepo = type(self.repo).clone_from(self.url, module_path, n=True) #END handle dry-run progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path) @@ -779,6 +782,7 @@ class Submodule(util.IndexObject, Iterable, Traversable, RepoAliasMixin): # late import to workaround circular dependencies module_path = self.abspath repoType = repoType or git.Repo + try: repo = repoType(module_path) if repo != self.repo: -- cgit v1.2.3 From 4786d195424e7e0efa13bd5e3c496005a9d8180c Mon Sep 17 00:00:00 2001 From: Julien Miotte Date: Thu, 19 May 2011 17:11:36 +0200 Subject: Making comparisons with non-GitPython objects more tolerant. --- git/objects/base.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'git/objects') diff --git a/git/objects/base.py b/git/objects/base.py index e51afbed..61b3e674 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -84,10 +84,14 @@ class Object(LazyMixin): def __eq__(self, other): """:return: True if the objects have the same SHA1""" + if not hasattr(other, 'binsha'): + return False return self.binsha == other.binsha def __ne__(self, other): """:return: True if the objects do not have the same SHA1 """ + if not hasattr(other, 'binsha'): + return True return self.binsha != other.binsha def __hash__(self): -- cgit v1.2.3