From 4c73e9cd66c77934f8a262b0c1bab9c2f15449ba Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 17:03:01 +0200 Subject: refs now take repo as first argument and derive from LazyMixin to allow them to dynamically retrieve their objects Improved way commits are returned by refs as they now use the path to be sure they always point to the ref even if it changes - previously it would use the sha intead so it would not update after being cached on the ref object --- lib/git/objects/base.py | 25 +------------------------ lib/git/objects/commit.py | 2 +- 2 files changed, 2 insertions(+), 25 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 43aa8dd1..5007f3a1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -4,30 +4,7 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os - -class LazyMixin(object): - lazy_properties = [] - __slots__ = tuple() - - def __getattr__(self, attr): - """ - Whenever an attribute is requested that we do not know, we allow it - to be created and set. Next time the same attribute is reqeusted, it is simply - returned from our dict/slots. - """ - self._set_cache_(attr) - # will raise in case the cache was not created - return object.__getattribute__(self, attr) - - def _set_cache_(self, attr): - """ This method should be overridden in the derived class. - It should check whether the attribute named by attr can be created - and cached. Do nothing if you do not know the attribute or call your subclass - - The derived class may create as many additional attributes as it deems - necessary in case a git command returns more information than represented - in the single attribute.""" - pass +from git.utils import LazyMixin class Object(LazyMixin): diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c3e97bf9..f1f878d7 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -37,7 +37,7 @@ class Commit(base.Object): The parameter documentation indicates the type of the argument after a colon ':'. ``id`` - is the sha id of the commit + is the sha id of the commit or a ref ``parents`` : tuple( Commit, ... ) is a tuple of commit ids or actual Commits -- cgit v1.2.3 From af9e37c5c8714136974124621d20c0436bb0735f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 17:40:13 +0200 Subject: IndexObjects are now checking their slots to raise a proper error message in case someone tries to access an unset path or mode - this information cannot be retrieved afterwards as IndexObject information is kept in the object that pointed at them. To find this information, one would have to search all objects which is not feasible --- lib/git/objects/base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 5007f3a1..d3e0d943 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -120,8 +120,15 @@ class IndexObject(Object): if isinstance(mode, basestring): self.mode = self._mode_str_to_int(mode) + def _set_cache_(self, attr): + if attr in self.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + @classmethod - def _mode_str_to_int( cls, modestr ): + def _mode_str_to_int(cls, modestr): """ ``modestr`` string like 755 or 644 or 100644 - only the last 3 chars will be used -- cgit v1.2.3 From a58a60ac5f322eb4bfd38741469ff21b5a33d2d5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 23:18:43 +0200 Subject: tree: now behaves like a list with string indexing functionality - using a dict as cache is a problem as the tree is ordered, added blobs, trees and traverse method repo: remove blob function as blobs are created directly or iterated - primitve types should not clutter the repo interface --- lib/git/objects/tree.py | 170 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 136 insertions(+), 34 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 273384a3..707cebaa 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -9,26 +9,57 @@ import blob import base class Tree(base.IndexObject): + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ type = "tree" - __slots__ = "_contents" + __slots__ = "_cache" def __init__(self, repo, id, mode=None, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): - if attr == "_contents": - # Read the tree contents. - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content__from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache(self.repo, self.id) else: super(Tree, self)._set_cache_(attr) - @staticmethod - def content__from_string(repo, text): + @classmethod + def _get_tree_cache(cls, repo, treeish): + """ + Return + list(object_instance, ...) + + ``treeish`` + sha or ref identifying a tree + """ + out = list() + for line in repo.git.ls_tree(treeish).splitlines(): + obj = cls.content_from_string(repo, line) + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + @classmethod + def content_from_string(cls, repo, text): """ Parse a content item and create the appropriate object @@ -40,6 +71,8 @@ class Tree(base.IndexObject): Returns ``git.Blob`` or ``git.Tree`` + + NOTE: Currently sub-modules are ignored ! """ try: mode, typ, id, path = text.expandtabs(1).split(" ", 3) @@ -51,6 +84,7 @@ class Tree(base.IndexObject): elif typ == "blob": return blob.Blob(repo, id, mode, path) elif typ == "commit": + # TODO: Return a submodule return None else: raise(TypeError, "Invalid type: %s" % typ) @@ -67,36 +101,104 @@ class Tree(base.IndexObject): Returns - ``git.Blob`` or ``git.Tree`` or ``None`` if not found + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree """ - return self.get(file) + return self[file] def __repr__(self): return '' % self.id + + @classmethod + def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ): + + for obj in tree: + # adjust path to be complete + obj.path = os.path.join(tree.path, obj.path) + if not predicate(obj): + continue + yield obj + if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ): + for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ): + yield recursive_obj + # END for each recursive object + # END if we may enter recursion + # END for each object + + def traverse(self, max_depth=-1, predicate = lambda i: True): + """ + Returns + Iterator to traverse the tree recursively up to the given level. + The iterator returns Blob and Tree objects + + ``max_depth`` + + if -1, the whole tree will be traversed + if 0, only the first level will be traversed which is the same as + the default non-recursive iterator + + ``predicate`` + + If predicate(item) returns True, item will be returned by iterator + """ + return self._iter_recursive( self.repo, self, 0, max_depth, predicate ) + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] - # Implement the basics of the dict protocol: - # directories/trees can be seen as object dicts. - def __getitem__(self, key): - return self._contents[key] + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + def __iter__(self): - return iter(self._contents) - + return iter(self._cache) + def __len__(self): - return len(self._contents) - - def __contains__(self, key): - return key in self._contents - - def get(self, key): - return self._contents.get(key) - - def items(self): - return self._contents.items() - - def keys(self): - return self._contents.keys() - - def values(self): - return self._contents.values() + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + for obj in self._cache: + if obj.path == item: + return obj + # END for each obj + raise KeyError( "Blob or Tree named %s not found" % item ) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) -- cgit v1.2.3 From 86fa577e135713e56b287169d69d976cde27ac97 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 17:36:27 +0200 Subject: tree: renamed content_from_string to _from_string to make it private. Removed tests that were testing that method --- lib/git/objects/tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 707cebaa..1bc35d95 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -50,7 +50,7 @@ class Tree(base.IndexObject): """ out = list() for line in repo.git.ls_tree(treeish).splitlines(): - obj = cls.content_from_string(repo, line) + obj = cls._from_string(repo, line) if obj is not None: out.append(obj) # END if object was handled @@ -59,7 +59,7 @@ class Tree(base.IndexObject): @classmethod - def content_from_string(cls, repo, text): + def _from_string(cls, repo, text): """ Parse a content item and create the appropriate object -- cgit v1.2.3 From 5eb0f2c241718bc7462be44e5e8e1e36e35f9b15 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 17:50:26 +0200 Subject: unified name of utils module, recently it was named util and utils in different packages --- lib/git/objects/tag.py | 2 +- lib/git/objects/util.py | 36 ------------------------------------ lib/git/objects/utils.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 37 deletions(-) delete mode 100644 lib/git/objects/util.py create mode 100644 lib/git/objects/utils.py (limited to 'lib/git/objects') diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index af1022f0..261d835f 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -8,7 +8,7 @@ Module containing all object based types. """ import base import commit -from util import get_object_type_by_name +from utils import get_object_type_by_name class TagObject(base.Object): """ diff --git a/lib/git/objects/util.py b/lib/git/objects/util.py deleted file mode 100644 index 15c1d114..00000000 --- a/lib/git/objects/util.py +++ /dev/null @@ -1,36 +0,0 @@ -# util.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" -Module for general utility functions -""" -import commit, tag, blob, tree - -def get_object_type_by_name(object_type_name): - """ - Returns - type suitable to handle the given object type name. - Use the type to create new instances. - - ``object_type_name`` - Member of TYPES - - Raises - ValueError: In case object_type_name is unknown - """ - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py new file mode 100644 index 00000000..15c1d114 --- /dev/null +++ b/lib/git/objects/utils.py @@ -0,0 +1,36 @@ +# util.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module for general utility functions +""" +import commit, tag, blob, tree + +def get_object_type_by_name(object_type_name): + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) -- cgit v1.2.3 From 6acec357c7609fdd2cb0f5fdb1d2756726c7fe98 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 21:26:19 +0200 Subject: renamed find_all to list_all, changed commit to use iterable interface in preparation for command changes --- lib/git/objects/commit.py | 71 ++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f1f878d7..c289b825 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -6,14 +6,14 @@ import re import time - +from git.utils import Iterable from git.actor import Actor -from tree import Tree import git.diff as diff import git.stats as stats +from tree import Tree import base -class Commit(base.Object): +class Commit(base.Object, Iterable): """ Wraps a git Commit object. @@ -81,7 +81,7 @@ class Commit(base.Object): We set all values at once. """ if attr in self.__slots__: - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + temp = Commit.list_items(self.repo, self.id, max_count=1)[0] self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -120,7 +120,7 @@ class Commit(base.Object): return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) @classmethod - def find_all(cls, repo, ref, path='', **kwargs): + def iter_items(cls, repo, ref, path='', **kwargs): """ Find all commits matching the given criteria. @@ -128,7 +128,7 @@ class Commit(base.Object): is the Repo ``ref`` - is the ref from which to begin (SHA1 or name) + is the ref from which to begin (SHA1, Head or name) ``path`` is an optinal path, if set only Commits that include the path @@ -146,49 +146,56 @@ class Commit(base.Object): options.update(kwargs) output = repo.git.rev_list(ref, '--', path, **options) - return cls._list_from_string(repo, output) + return cls._iter_from_stream(repo, iter(output.splitlines(False))) @classmethod - def _list_from_string(cls, repo, text): + def _iter_from_stream(cls, repo, stream): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``text`` - is the text output from the git-rev-list command (raw format) + ``stream`` + output stream from the git-rev-list command (raw format) Returns - git.Commit[] + iterator returning Commit objects """ - lines =text.splitlines(False) - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] + for line in stream: + id = line.split()[1] + assert line.split()[0] == "commit" + tree = stream.next().split()[1] parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - # END while there are parent lines - author, authored_date = cls._actor(lines.pop(0)) - committer, committed_date = cls._actor(lines.pop(0)) + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = cls._actor(next_line) + committer, committed_date = cls._actor(stream.next()) - # free line - lines.pop(0) + # empty line + stream.next() message_lines = [] - while lines and not lines[0].startswith('commit'): - message_lines.append(lines.pop(0).strip()) + next_line = None + for msg_line in stream: + if not msg_line.startswith(' '): + break + # END abort message reading + message_lines.append(msg_line.strip()) # END while there are message lines - message = '\n'.join(message_lines[:-1]) # last line is empty - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - # END while lines - return commits + message = '\n'.join(message_lines) + + yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream @classmethod def diff(cls, repo, a, b=None, paths=None): -- cgit v1.2.3 From ac1cec7066eaa12a8d1a61562bfc6ee77ff5f54d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 21:49:33 +0200 Subject: added Iterable interface to Ref type --- lib/git/objects/commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c289b825..f9245217 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -140,7 +140,7 @@ class Commit(base.Object, Iterable): ``skip`` is the number of commits to skip Returns - git.Commit[] + iterator yielding Commit items """ options = {'pretty': 'raw'} options.update(kwargs) -- cgit v1.2.3 From ead94f267065bb55303f79a0a6df477810b3c68d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 14:33:51 +0200 Subject: cmd: added option to return the process directly, allowing to read the output directly from the output stream commit: now reads commit information directly from the output stream of the process by implementing its iterator method repo: removed log method as it was redundant ( equal to the commits method ) --- lib/git/objects/commit.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f9245217..340686ea 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -142,26 +142,28 @@ class Commit(base.Object, Iterable): Returns iterator yielding Commit items """ - options = {'pretty': 'raw'} + options = {'pretty': 'raw', 'as_process' : True } options.update(kwargs) - output = repo.git.rev_list(ref, '--', path, **options) - return cls._iter_from_stream(repo, iter(output.splitlines(False))) + # the test system might confront us with string values - + proc = repo.git.rev_list(ref, '--', path, **options) + return cls._iter_from_process(repo, proc) @classmethod - def _iter_from_stream(cls, repo, stream): + def _iter_from_process(cls, repo, proc): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``stream`` - output stream from the git-rev-list command (raw format) + ``proc`` + git-rev-list process instance (raw format) Returns iterator returning Commit objects """ + stream = proc.stdout for line in stream: id = line.split()[1] assert line.split()[0] == "commit" -- cgit v1.2.3 From 6eeae8b24135b4de05f6d725b009c287577f053d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 17:24:15 +0200 Subject: test: Added time-consuming test which could also be a benchmark in fact - currently it cause hundreds of command invocations which is slow Fixed issue with trees not properly initialized with their default mode _set_cache_: some objects checked whether the attribute was within their __slots__ although it should have been accessed through its class --- lib/git/objects/base.py | 2 +- lib/git/objects/commit.py | 4 ++-- lib/git/objects/tag.py | 2 +- lib/git/objects/tree.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index d3e0d943..9789d72a 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -121,7 +121,7 @@ class IndexObject(Object): self.mode = self._mode_str_to_int(mode) def _set_cache_(self, attr): - if attr in self.__slots__: + if attr in IndexObject.__slots__: # they cannot be retrieved lateron ( not without searching for them ) raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) else: diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 340686ea..69fb3710 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -71,7 +71,7 @@ class Commit(base.Object, Iterable): # END for each parent to convert if self.id and tree is not None: - self.tree = Tree(repo, id=tree) + self.tree = Tree(repo, id=tree, path='') # END id to tree conversion def _set_cache_(self, attr): @@ -80,7 +80,7 @@ class Commit(base.Object, Iterable): to be set. We set all values at once. """ - if attr in self.__slots__: + if attr in Commit.__slots__: temp = Commit.list_items(self.repo, self.id, max_count=1)[0] self.parents = temp.parents self.tree = temp.tree diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index 261d835f..77d715c7 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -48,7 +48,7 @@ class TagObject(base.Object): """ Cache all our attributes at once """ - if attr in self.__slots__: + if attr in TagObject.__slots__: output = self.repo.git.cat_file(self.type,self.id) lines = output.split("\n") diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 1bc35d95..01dfb37b 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -29,7 +29,7 @@ class Tree(base.IndexObject): type = "tree" __slots__ = "_cache" - def __init__(self, repo, id, mode=None, path=None): + def __init__(self, repo, id, mode=0, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): -- cgit v1.2.3 From 6745f4542cfb74bbf3b933dba7a59ef2f54a4380 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 19:34:45 +0200 Subject: test_blob: removed many redundant tests that would fail now as the mock cannot handle the complexity of the command backend All objects but Tree now use the persistent command to read their object information - Trees get binary data and would need their own pretty-printing or they need to parse the data themselves which is my favorite --- lib/git/objects/base.py | 8 ++++++-- lib/git/objects/commit.py | 14 ++++++++++---- lib/git/objects/tag.py | 3 +-- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 9789d72a..7b693be9 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -48,9 +48,13 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + typename, self.size = self.repo.git.get_object_header(self.id) + assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) elif attr == "data": - self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + typename, self.size, self.data = self.repo.git.get_object_data(self.id) + assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) + else: + super(Object,self)._set_cache_(attr) def __eq__(self, other): """ diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 69fb3710..101014ab 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -81,7 +81,10 @@ class Commit(base.Object, Iterable): We set all values at once. """ if attr in Commit.__slots__: - temp = Commit.list_items(self.repo, self.id, max_count=1)[0] + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.id) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -147,10 +150,10 @@ class Commit(base.Object, Iterable): # the test system might confront us with string values - proc = repo.git.rev_list(ref, '--', path, **options) - return cls._iter_from_process(repo, proc) + return cls._iter_from_process_or_stream(repo, proc) @classmethod - def _iter_from_process(cls, repo, proc): + def _iter_from_process_or_stream(cls, repo, proc_or_stream): """ Parse out commit information into a list of Commit objects @@ -163,7 +166,10 @@ class Commit(base.Object, Iterable): Returns iterator returning Commit objects """ - stream = proc.stdout + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + for line in stream: id = line.split()[1] assert line.split()[0] == "commit" diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index 77d715c7..ecf6349d 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -49,8 +49,7 @@ class TagObject(base.Object): Cache all our attributes at once """ if attr in TagObject.__slots__: - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") + lines = self.data.splitlines() obj, hexsha = lines[0].split(" ") # object type_token, type_name = lines[1].split(" ") # type -- cgit v1.2.3 From c5df44408218003eb49e3b8fc94329c5e8b46c7d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 19:41:27 +0200 Subject: persistent command signature changed to also return the hexsha from a possible input ref - the objects pointed to by refs are now baked on demand - perhaps it should change to always be re-retrieved using a property as it is relatively fast - this way refs can always be cached --- lib/git/objects/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 7b693be9..6752a25e 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -48,10 +48,10 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - typename, self.size = self.repo.git.get_object_header(self.id) + hexsha, typename, self.size = self.repo.git.get_object_header(self.id) assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) elif attr == "data": - typename, self.size, self.data = self.repo.git.get_object_data(self.id) + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) else: super(Object,self)._set_cache_(attr) -- cgit v1.2.3 From 2e6d110fbfa1f2e6a96bc8329e936d0cf1192844 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 23:37:45 +0200 Subject: tree: now reads tress directly by parsing the binary data, allowing it to safe possibly hundreds of command calls --- lib/git/objects/base.py | 7 ++-- lib/git/objects/tree.py | 102 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 74 insertions(+), 35 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 6752a25e..07538ada 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -6,7 +6,8 @@ import os from git.utils import LazyMixin - +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" + class Object(LazyMixin): """ Implements an Object which may be Blobs, Trees, Commits and Tags @@ -49,10 +50,10 @@ class Object(LazyMixin): """ if attr == "size": hexsha, typename, self.size = self.repo.git.get_object_header(self.id) - assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) elif attr == "data": hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) - assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) else: super(Object,self)._set_cache_(attr) diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 01dfb37b..abfa9622 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -7,6 +7,13 @@ import os import blob import base +import binascii + +def sha_to_hex(sha): + """Takes a string and returns the hex of the sha within""" + hexsha = binascii.hexlify(sha) + assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha + return hexsha class Tree(base.IndexObject): """ @@ -29,18 +36,23 @@ class Tree(base.IndexObject): type = "tree" __slots__ = "_cache" + # using ascii codes for comparison + ascii_commit_id = (0x31 << 4) + 0x36 + ascii_blob_id = (0x31 << 4) + 0x30 + ascii_tree_id = (0x34 << 4) + 0x30 + + def __init__(self, repo, id, mode=0, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): if attr == "_cache": # Set the data when we need it - self._cache = self._get_tree_cache(self.repo, self.id) + self._cache = self._get_tree_cache() else: super(Tree, self)._set_cache_(attr) - @classmethod - def _get_tree_cache(cls, repo, treeish): + def _get_tree_cache(self): """ Return list(object_instance, ...) @@ -49,45 +61,71 @@ class Tree(base.IndexObject): sha or ref identifying a tree """ out = list() - for line in repo.git.ls_tree(treeish).splitlines(): - obj = cls._from_string(repo, line) + for obj in self._iter_from_data(): if obj is not None: out.append(obj) # END if object was handled # END for each line from ls-tree return out - - @classmethod - def _from_string(cls, repo, text): + + def _iter_from_data(self): """ - Parse a content item and create the appropriate object - - ``repo`` - is the Repo - - ``text`` - is the single line containing the items data in `git ls-tree` format - + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + Returns - ``git.Blob`` or ``git.Tree`` - - NOTE: Currently sub-modules are ignored ! + list(IndexObject, ...) """ - try: - mode, typ, id, path = text.expandtabs(1).split(" ", 3) - except: - return None + ord_zero = ord('0') + data = self.data + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + mode_boundary = i + 6 + + # keep it ascii - we compare against the respective values + type_id = (ord(data[i])<<4) + ord(data[i+1]) + i += 2 + + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.ascii_blob_id: + yield blob.Blob(self.repo, hexsha, mode, name) + elif type_id == self.ascii_tree_id: + yield Tree(self.repo, hexsha, mode, name) + elif type_id == self.ascii_commit_id: + # todo + yield None + else: + raise TypeError( "Unknown type found in tree data: %i" % type_id ) + # END for each byte in data stream - if typ == "tree": - return Tree(repo, id, mode, path) - elif typ == "blob": - return blob.Blob(repo, id, mode, path) - elif typ == "commit": - # TODO: Return a submodule - return None - else: - raise(TypeError, "Invalid type: %s" % typ) def __div__(self, file): """ -- cgit v1.2.3