From bb24f67e64b4ebe11c4d3ce7df021a6ad7ca98f2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Oct 2009 16:09:07 +0200 Subject: Fixed object bug that would cause object ids not to be resolved to sha's as this was assumed - now there is a test for it as well repo: removed diff and commit_diff methods, added 'head' property returning the current head as Reference object --- lib/git/objects/base.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 3b48e066..d780c7b3 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -15,22 +15,12 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: - inst = Object(repo,id) + inst = Object.new(repo,id) """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass - def __new__(cls, repo, id, *args, **kwargs): - if cls is Object: - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = super(Object,cls).__new__(obj_type, repo, hexsha, *args, **kwargs) - inst.size = size - return inst - else: - return super(Object,cls).__new__(cls, repo, id, *args, **kwargs) - def __init__(self, repo, id): """ Initialize an object by identifying it by its id. All keyword arguments @@ -45,7 +35,25 @@ class Object(LazyMixin): super(Object,self).__init__() self.repo = repo self.id = id - + + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + def _set_self_from_args_(self, args_dict): """ Initialize attributes on self from the given dict that was retrieved -- cgit v1.2.3 From b372e26366348920eae32ee81a47b469b511a21f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Oct 2009 19:19:57 +0200 Subject: added Diffable interface to objects.base, its used by Commit and Tree objects. Diff class has been prepared to process raw input, but its not yet more than a frame --- lib/git/objects/base.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++ lib/git/objects/commit.py | 56 +------------------------------------ lib/git/objects/tree.py | 3 +- 3 files changed, 74 insertions(+), 56 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index d780c7b3..1bb2e8f1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -172,3 +172,74 @@ class IndexObject(Object): return mode +class Diffable(object): + """ + Common interface for all object that can be diffed against another object of compatible type. + + NOTE: + Subclasses require a repo member as it is the case for Object instances, for practical + reasons we do not derive from Object. + """ + __slots__ = tuple() + + # subclasses provide additional arguments to the git-diff comamnd by supplynig + # them in this tuple + _diff_args = tuple() + + def diff(self, other=None, paths=None, create_patch=False, **kwargs): + """ + Creates diffs between two items being trees, trees and index or an + index and the working tree. + + ``other`` + Is the item to compare us with. + If None, we will be compared to the working tree. + + ``paths`` + is a list of paths or a single path to limit the diff to. + It will only include at least one of the givne path or paths. + + ``create_patch`` + If True, the returned Diff contains a detailed patch that if applied + makes the self to other. Patches are somwhat costly as blobs have to be read + and diffed. + + ``kwargs`` + Additional arguments passed to git-diff, such as + R=True to swap both sides of the diff. + + Returns + git.DiffIndex + + Note + Rename detection will only work if create_patch is True + """ + args = list(self._diff_args[:]) + args.append( "--abbrev=40" ) # we need full shas + args.append( "--full-index" ) # get full index paths, not only filenames + + if create_patch: + args.append("-p") + args.append("-M") # check for renames + else: + args.append("--raw") + + paths = paths or [] + if paths: + paths.insert(0, "--") + + if other is not None: + args.insert(0, other) + + args.insert(0,self) + args.extend(paths) + + kwargs['as_process'] = True + proc = self.repo.git.diff(*args, **kwargs) + + diff_method = diff.Diff._index_from_raw_format + if create_patch: + diff_method = diff.Diff._index_from_patch_format(self.repo, proc.stdout) + return diff_method(self.repo, proc.stdout) + + diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 847f4dec..7ed38703 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -11,7 +11,7 @@ from tree import Tree import base import utils -class Commit(base.Object, Iterable): +class Commit(base.Object, Iterable, base.Diffable): """ Wraps a git Commit object. @@ -176,60 +176,6 @@ class Commit(base.Object, Iterable): return self.iter_items( self.repo, self, paths, **kwargs ) - @classmethod - def diff(cls, repo, a, b=None, paths=None): - """ - Creates diffs between a tree and the index or between two trees: - - ``repo`` - is the Repo - - ``a`` - is a named commit - - ``b`` - is an optional named commit. Passing a list assumes you - wish to omit the second named commit and limit the diff to the - given paths. - - ``paths`` - is a list of paths to limit the diff to. - - Returns - git.Diff[]:: - - between tree and the index if only a is given - between two trees if a and b are given and are commits - """ - paths = paths or [] - - if isinstance(b, list): - paths = b - b = None - - if paths: - paths.insert(0, "--") - - if b: - paths.insert(0, b) - paths.insert(0, a) - text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff._list_from_string(repo, text) - - @property - def diffs(self): - """ - Returns - git.Diff[] - Diffs between this commit and its first parent or all changes if this - commit is the first commit and has no parent. - """ - if not self.parents: - d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - return diff.Diff._list_from_string(self.repo, d) - else: - return self.diff(self.repo, self.parents[0].id, self.id) - @property def stats(self): """ diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index abfa9622..4d3e9ebd 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -15,7 +15,7 @@ def sha_to_hex(sha): assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha return hexsha -class Tree(base.IndexObject): +class Tree(base.IndexObject, base.Diffable): """ Tress represent a ordered list of Blobs and other Trees. Hence it can be accessed like a list. @@ -169,6 +169,7 @@ class Tree(base.IndexObject): def traverse(self, max_depth=-1, predicate = lambda i: True): """ Returns + Iterator to traverse the tree recursively up to the given level. The iterator returns Blob and Tree objects -- cgit v1.2.3 From a5cf1bc1d3e38ab32a20707d66b08f1bb0beae91 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 17 Oct 2009 20:13:02 +0200 Subject: Removed a few diff-related test cases that fail now as the respective method is missing - these tests have to be redone in test-diff module accordingly --- lib/git/objects/commit.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 7ed38703..521130c5 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -214,6 +214,7 @@ class Commit(base.Object, Iterable, base.Diffable): if not hasattr(stream,'next'): stream = proc_or_stream.stdout + for line in stream: id = line.split()[1] assert line.split()[0] == "commit" -- cgit v1.2.3 From 9946e0ce07c8d93a43bd7b8900ddf5d913fe3b03 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 12:33:06 +0200 Subject: implemented diff tests, but will have to move the diff module as it needs to create objects, whose import would create a dependency cycle --- lib/git/objects/base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 1bb2e8f1..b347b5f1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -214,6 +214,9 @@ class Diffable(object): Note Rename detection will only work if create_patch is True """ + # import it in a retared fashion to avoid dependency cycle + from git.diff import Diff + args = list(self._diff_args[:]) args.append( "--abbrev=40" ) # we need full shas args.append( "--full-index" ) # get full index paths, not only filenames @@ -237,9 +240,9 @@ class Diffable(object): kwargs['as_process'] = True proc = self.repo.git.diff(*args, **kwargs) - diff_method = diff.Diff._index_from_raw_format + diff_method = Diff._index_from_raw_format if create_patch: - diff_method = diff.Diff._index_from_patch_format(self.repo, proc.stdout) + diff_method = Diff._index_from_patch_format return diff_method(self.repo, proc.stdout) -- cgit v1.2.3 From aed099a73025422f0550f5dd5c3e4651049494b2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 12:54:16 +0200 Subject: resolved cyclic inclusion issue by moving the Diffable interface into the diff module, which probably is the right thing to do anyway --- lib/git/objects/base.py | 75 ----------------------------------------------- lib/git/objects/commit.py | 2 +- lib/git/objects/tree.py | 3 +- 3 files changed, 3 insertions(+), 77 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index b347b5f1..ab1da7b0 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -170,79 +170,4 @@ class IndexObject(Object): mode += int(char) << iteration*3 # END for each char return mode - -class Diffable(object): - """ - Common interface for all object that can be diffed against another object of compatible type. - - NOTE: - Subclasses require a repo member as it is the case for Object instances, for practical - reasons we do not derive from Object. - """ - __slots__ = tuple() - - # subclasses provide additional arguments to the git-diff comamnd by supplynig - # them in this tuple - _diff_args = tuple() - - def diff(self, other=None, paths=None, create_patch=False, **kwargs): - """ - Creates diffs between two items being trees, trees and index or an - index and the working tree. - - ``other`` - Is the item to compare us with. - If None, we will be compared to the working tree. - - ``paths`` - is a list of paths or a single path to limit the diff to. - It will only include at least one of the givne path or paths. - - ``create_patch`` - If True, the returned Diff contains a detailed patch that if applied - makes the self to other. Patches are somwhat costly as blobs have to be read - and diffed. - - ``kwargs`` - Additional arguments passed to git-diff, such as - R=True to swap both sides of the diff. - - Returns - git.DiffIndex - - Note - Rename detection will only work if create_patch is True - """ - # import it in a retared fashion to avoid dependency cycle - from git.diff import Diff - - args = list(self._diff_args[:]) - args.append( "--abbrev=40" ) # we need full shas - args.append( "--full-index" ) # get full index paths, not only filenames - - if create_patch: - args.append("-p") - args.append("-M") # check for renames - else: - args.append("--raw") - - paths = paths or [] - if paths: - paths.insert(0, "--") - - if other is not None: - args.insert(0, other) - - args.insert(0,self) - args.extend(paths) - - kwargs['as_process'] = True - proc = self.repo.git.diff(*args, **kwargs) - - diff_method = Diff._index_from_raw_format - if create_patch: - diff_method = Diff._index_from_patch_format - return diff_method(self.repo, proc.stdout) - - diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 521130c5..181cbb52 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -11,7 +11,7 @@ from tree import Tree import base import utils -class Commit(base.Object, Iterable, base.Diffable): +class Commit(base.Object, Iterable, diff.Diffable): """ Wraps a git Commit object. diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 4d3e9ebd..c35c075e 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -8,6 +8,7 @@ import os import blob import base import binascii +import git.diff as diff def sha_to_hex(sha): """Takes a string and returns the hex of the sha within""" @@ -15,7 +16,7 @@ def sha_to_hex(sha): assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha return hexsha -class Tree(base.IndexObject, base.Diffable): +class Tree(base.IndexObject, diff.Diffable): """ Tress represent a ordered list of Blobs and other Trees. Hence it can be accessed like a list. -- cgit v1.2.3 From 657a57adbff49c553752254c106ce1d5b5690cf8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 17:20:30 +0200 Subject: Improved tagobject message handling by not assuming an empty fourth line anymore --- lib/git/objects/tag.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index f54d4b64..b2140551 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -60,8 +60,13 @@ class TagObject(base.Object): tagger_info = lines[3][7:]# tagger self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info) - # line 4 empty - check git source to figure out purpose - self.message = "\n".join(lines[5:]) + # line 4 empty - it could mark the beginning of the next header + # in csse there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' # END check our attributes else: super(TagObject, self)._set_cache_(attr) -- cgit v1.2.3 From 0b3ecf2dcace76b65765ddf1901504b0b4861b08 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 19 Oct 2009 22:49:52 +0200 Subject: commit.count: is an instance method now repo: added head , tag and iter_trees methods for completeness changes: headlines now sorted chronologically --- lib/git/objects/commit.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 181cbb52..a68a7bed 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -101,16 +101,9 @@ class Commit(base.Object, Iterable, diff.Diffable): """ return self.message.split('\n', 1)[0] - @classmethod - def count(cls, repo, rev, paths='', **kwargs): + def count(self, paths='', **kwargs): """ - Count the number of commits reachable from this revision - - ``repo`` - is the Repo - - ``rev`` - revision specifier, see git-rev-parse for viable options + Count the number of commits reachable from this commit ``paths`` is an optinal path or a list of paths restricting the return value @@ -121,7 +114,7 @@ class Commit(base.Object, Iterable, diff.Diffable): Returns int """ - return len(repo.git.rev_list(rev, '--', paths, **kwargs).strip().splitlines()) + return len(self.repo.git.rev_list(self.id, '--', paths, **kwargs).strip().splitlines()) @classmethod def iter_items(cls, repo, rev, paths='', **kwargs): -- cgit v1.2.3 From 989671780551b7587d57e1d7cb5eb1002ade75b4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 19 Oct 2009 23:44:18 +0200 Subject: Implemneted IterableLists for refs, commits and remote objects including simple tests --- lib/git/objects/commit.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index a68a7bed..4080305f 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -23,6 +23,7 @@ class Commit(base.Object, Iterable, diff.Diffable): type = "commit" __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", "message", "parents") + _id_attribute_ = "id" def __init__(self, repo, id, tree=None, author=None, authored_date=None, committer=None, committed_date=None, message=None, parents=None): -- cgit v1.2.3 From 2e68d907022c84392597e05afc22d9fe06bf0927 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 21 Oct 2009 18:45:41 +0200 Subject: tree.traverse: Added prune functionality - previously the predciate did both, pruning and preventing to return items --- lib/git/objects/tree.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index c35c075e..92aae881 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -152,22 +152,21 @@ class Tree(base.IndexObject, diff.Diffable): return '' % self.id @classmethod - def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ): + def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate, prune ): for obj in tree: # adjust path to be complete obj.path = os.path.join(tree.path, obj.path) - if not predicate(obj): - continue - yield obj - if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ): - for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ): + if predicate(obj): + yield obj + if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ) and not prune(obj): + for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate, prune ): yield recursive_obj # END for each recursive object # END if we may enter recursion # END for each object - def traverse(self, max_depth=-1, predicate = lambda i: True): + def traverse(self, max_depth=-1, predicate = lambda i: True, prune = lambda t: False): """ Returns @@ -183,8 +182,13 @@ class Tree(base.IndexObject, diff.Diffable): ``predicate`` If predicate(item) returns True, item will be returned by iterator + + ``prune`` + + If prune(tree) returns True, the traversal will not continue into the + given tree object. """ - return self._iter_recursive( self.repo, self, 0, max_depth, predicate ) + return self._iter_recursive( self.repo, self, 0, max_depth, predicate, prune ) @property def trees(self): -- cgit v1.2.3 From 33fa178eeb7bf519f5fff118ebc8e27e76098363 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 22 Oct 2009 11:04:30 +0200 Subject: added Object.data_stream property allowing to stream object data directly.Considering the implementation of the git commnd which temporarily keeps it in a cache, it doesnt make a huge diffence as the data is kept in memory while streaming. Only good thing is that it is in a different process so python will never see it if done properly --- lib/git/objects/base.py | 12 ++++++++++++ lib/git/objects/utils.py | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index ab1da7b0..dd67a3c7 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -16,6 +16,9 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: inst = Object.new(repo,id) + inst.id # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) @@ -115,6 +118,15 @@ class Object(LazyMixin): """ return '' % (self.__class__.__name__, self.id) + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.id, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") + class IndexObject(Object): """ diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index 367ed2b7..7bb4e8e2 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -52,3 +52,21 @@ def parse_actor_and_date(line): m = _re_actor_epoch.search(line) actor, epoch = m.groups() return (Actor._from_string(actor), int(epoch)) + + + +class ProcessStreamAdapter(object): + """ + Class wireing all calls to the contained Process instance. + + Use this type to hide the underlying process to provide access only to a specified + stream. The process is usually wrapped into an AutoInterrupt class to kill + it if the instance goes out of scope. + """ + __slots__ = ("_proc", "_stream") + def __init__(self, process, stream_name): + self._proc = process + self._stream = getattr(process, stream_name) + + def __getattr__(self, attr): + return getattr(self._stream, attr) -- cgit v1.2.3 From 4fe5cfa0e063a8d51a1eb6f014e2aaa994e5e7d4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 22 Oct 2009 12:28:04 +0200 Subject: Stream_data streams data to a given output stream most efficiently with a low memory footprint. Still, the git-cat-file command keeps all data in an interal buffer instead of streaming it directly. This is a git design issue though, and will be hard to address without some proper git-hacking. Conflicts: lib/git/cmd.py --- lib/git/objects/base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index dd67a3c7..0dfd1a23 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -127,6 +127,18 @@ class Object(LazyMixin): proc = self.repo.git.cat_file(self.type, self.id, as_process=True) return utils.ProcessStreamAdapter(proc, "stdout") + def stream_data(self, ostream): + """ + Writes our data directly to the given output stream + + ``ostream`` + File object compatible stream object. + + Returns + self + """ + self.repo.git.cat_file(self.type, self.id, output_stream=ostream) + return self class IndexObject(Object): """ -- cgit v1.2.3 From ea33fe8b21d2b02f902b131aba0d14389f2f8715 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 22 Oct 2009 22:14:02 +0200 Subject: Index: Is now diffable and appears to properly implement diffing against other items as well as the working tree Diff.Diffable: added callback allowing superclasses to preprocess diff arguments Diff.Diff: added eq, ne and hash methods, string methods would be nice --- lib/git/objects/__init__.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/git/objects') diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py index 39e650b7..192750e3 100644 --- a/lib/git/objects/__init__.py +++ b/lib/git/objects/__init__.py @@ -2,6 +2,7 @@ Import all submodules main classes into the package space """ import inspect +from base import * from tag import * from blob import * from tree import * -- cgit v1.2.3 From 1b89f39432cdb395f5fbb9553b56595d29e2b773 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 23 Oct 2009 16:39:02 +0200 Subject: commit.name_rev property added for convenience --- lib/git/objects/commit.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 4080305f..0f8ed7f8 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -117,6 +117,15 @@ class Commit(base.Object, Iterable, diff.Diffable): """ return len(self.repo.git.rev_list(self.id, '--', paths, **kwargs).strip().splitlines()) + @property + def name_rev(self): + """ + Returns + String describing the commits hex sha based on the closest Reference. + Mostly useful for UI purposes + """ + return self.repo.git.name_rev(self) + @classmethod def iter_items(cls, repo, rev, paths='', **kwargs): """ -- cgit v1.2.3 From 0cd09bd306486028f5442c56ef2e947355a06282 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 23 Oct 2009 21:49:13 +0200 Subject: index.remove implemented including throrough test --- lib/git/objects/base.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 0dfd1a23..0bece6f1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -20,6 +20,7 @@ class Object(LazyMixin): inst.size # objects uncompressed data size inst.data # byte string containing the whole data of the object """ + NULL_HEX_SHA = '0'*40 TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass -- cgit v1.2.3 From b999cae064fb6ac11a61a39856e074341baeefde Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 23 Oct 2009 23:44:49 +0200 Subject: actor: added __eq__, __ne__ and __hash__ methods including simple test commit: Fixed long-standing issue during message parsing that would fail to parse properly in case we were created from data. Also it would strip white space from the messages although it shouldn't --- lib/git/objects/commit.py | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 4080305f..00f3d0e4 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -83,7 +83,7 @@ class Commit(base.Object, Iterable, diff.Diffable): # prepare our data lines to match rev-list data_lines = self.data.splitlines() data_lines.insert(0, "commit %s" % self.id) - temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -111,7 +111,8 @@ class Commit(base.Object, Iterable, diff.Diffable): to commits actually containing the paths ``kwargs`` - Additional options to be passed to git-rev-list + Additional options to be passed to git-rev-list. They must not alter + the ouput style of the command, or parsing will yield incorrect results Returns int """ @@ -144,9 +145,8 @@ class Commit(base.Object, Iterable, diff.Diffable): options = {'pretty': 'raw', 'as_process' : True } options.update(kwargs) - # the test system might confront us with string values - proc = repo.git.rev_list(rev, '--', paths, **options) - return cls._iter_from_process_or_stream(repo, proc) + return cls._iter_from_process_or_stream(repo, proc, True) def iter_parents(self, paths='', **kwargs): """ @@ -191,7 +191,7 @@ class Commit(base.Object, Iterable, diff.Diffable): return stats.Stats._list_from_string(self.repo, text) @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): + def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list): """ Parse out commit information into a list of Commit objects @@ -201,6 +201,9 @@ class Commit(base.Object, Iterable, diff.Diffable): ``proc`` git-rev-list process instance (raw format) + ``from_rev_list`` + If True, the stream was created by rev-list in which case we parse + the message differently Returns iterator returning Commit objects """ @@ -208,10 +211,10 @@ class Commit(base.Object, Iterable, diff.Diffable): if not hasattr(stream,'next'): stream = proc_or_stream.stdout - for line in stream: - id = line.split()[1] - assert line.split()[0] == "commit" + commit_tokens = line.split() + id = commit_tokens[1] + assert commit_tokens[0] == "commit" tree = stream.next().split()[1] parents = [] @@ -231,13 +234,20 @@ class Commit(base.Object, Iterable, diff.Diffable): stream.next() message_lines = [] - next_line = None - for msg_line in stream: - if not msg_line.startswith(' '): - break - # END abort message reading - message_lines.append(msg_line.strip()) - # END while there are message lines + if from_rev_list: + for msg_line in stream: + if not msg_line.startswith(' '): + # and forget about this empty marker + break + # END abort message reading + # strip leading 4 spaces + message_lines.append(msg_line[4:]) + # END while there are message lines + else: + # a stream from our data simply gives us the plain message + for msg_line in stream: + message_lines.append(msg_line) + # END message parsing message = '\n'.join(message_lines) yield Commit(repo, id=id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, -- cgit v1.2.3 From 291d2f85bb861ec23b80854b974f3b7a8ded2921 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 26 Oct 2009 21:23:55 +0100 Subject: When parsing trees, we now store the originan type bits as well, previously we dropped it --- lib/git/objects/tree.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 92aae881..371c0dd3 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -38,9 +38,9 @@ class Tree(base.IndexObject, diff.Diffable): __slots__ = "_cache" # using ascii codes for comparison - ascii_commit_id = (0x31 << 4) + 0x36 - ascii_blob_id = (0x31 << 4) + 0x30 - ascii_tree_id = (0x34 << 4) + 0x30 + commit_id = 016 + blob_id = 010 + tree_id = 040 def __init__(self, repo, id, mode=0, path=None): @@ -88,8 +88,8 @@ class Tree(base.IndexObject, diff.Diffable): mode = 0 mode_boundary = i + 6 - # keep it ascii - we compare against the respective values - type_id = (ord(data[i])<<4) + ord(data[i+1]) + # read type + type_id = ((ord(data[i])-ord_zero)<<3) + (ord(data[i+1])-ord_zero) i += 2 while data[i] != ' ': @@ -115,12 +115,13 @@ class Tree(base.IndexObject, diff.Diffable): sha = data[i:i+20] i = i + 20 + mode |= type_id<<12 hexsha = sha_to_hex(sha) - if type_id == self.ascii_blob_id: + if type_id == self.blob_id: yield blob.Blob(self.repo, hexsha, mode, name) - elif type_id == self.ascii_tree_id: + elif type_id == self.tree_id: yield Tree(self.repo, hexsha, mode, name) - elif type_id == self.ascii_commit_id: + elif type_id == self.commit_id: # todo yield None else: -- cgit v1.2.3 From 3cb5ba18ab1a875ef6b62c65342de476be47871b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 3 Nov 2009 16:35:33 +0100 Subject: object: renamed id attribute to sha as it in fact is always being rewritten as sha, even if the passed in id was a ref. This is done to assure objects are uniquely identified and will compare correctly --- lib/git/objects/base.py | 34 +++++++++++++++++----------------- lib/git/objects/blob.py | 2 +- lib/git/objects/commit.py | 26 +++++++++++++------------- lib/git/objects/tag.py | 6 +++--- lib/git/objects/tree.py | 6 +++--- 5 files changed, 37 insertions(+), 37 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 0bece6f1..6dd03ba4 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -16,13 +16,13 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: inst = Object.new(repo,id) - inst.id # objects sha in hex + inst.sha # objects sha in hex inst.size # objects uncompressed data size inst.data # byte string containing the whole data of the object """ NULL_HEX_SHA = '0'*40 TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size", "data" ) + __slots__ = ("repo", "sha", "size", "data" ) type = None # to be set by subclass def __init__(self, repo, id): @@ -38,7 +38,7 @@ class Object(LazyMixin): """ super(Object,self).__init__() self.repo = repo - self.id = id + self.sha = id @classmethod def new(cls, repo, id): @@ -76,11 +76,11 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.id) - assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) - assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) else: super(Object,self)._set_cache_(attr) @@ -89,35 +89,35 @@ class Object(LazyMixin): Returns True if the objects have the same SHA1 """ - return self.id == other.id + return self.sha == other.sha def __ne__(self, other): """ Returns True if the objects do not have the same SHA1 """ - return self.id != other.id + return self.sha != other.sha def __hash__(self): """ Returns Hash of our id allowing objects to be used in dicts and sets """ - return hash(self.id) + return hash(self.sha) def __str__(self): """ Returns string of our SHA1 as understood by all git commands """ - return self.id + return self.sha def __repr__(self): """ Returns string with pythonic representation of our object """ - return '' % (self.__class__.__name__, self.id) + return '' % (self.__class__.__name__, self.sha) @property def data_stream(self): @@ -125,7 +125,7 @@ class Object(LazyMixin): Returns File Object compatible stream to the uncompressed raw data of the object """ - proc = self.repo.git.cat_file(self.type, self.id, as_process=True) + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) return utils.ProcessStreamAdapter(proc, "stdout") def stream_data(self, ostream): @@ -138,7 +138,7 @@ class Object(LazyMixin): Returns self """ - self.repo.git.cat_file(self.type, self.id, output_stream=ostream) + self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) return self class IndexObject(Object): @@ -148,13 +148,13 @@ class IndexObject(Object): """ __slots__ = ("path", "mode") - def __init__(self, repo, id, mode=None, path=None): + def __init__(self, repo, sha, mode=None, path=None): """ Initialize a newly instanced IndexObject ``repo`` is the Repo we are located in - ``id`` : string + ``sha`` : string is the git object id as hex sha ``mode`` : int @@ -168,7 +168,7 @@ class IndexObject(Object): Path may not be set of the index object has been created directly as it cannot be retrieved without knowing the parent tree. """ - super(IndexObject, self).__init__(repo, id) + super(IndexObject, self).__init__(repo, sha) self._set_self_from_args_(locals()) if isinstance(mode, basestring): self.mode = self._mode_str_to_int(mode) diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py index 88ca73d6..11dee323 100644 --- a/lib/git/objects/blob.py +++ b/lib/git/objects/blob.py @@ -33,4 +33,4 @@ class Blob(base.IndexObject): def __repr__(self): - return '' % self.id + return '' % self.sha diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index d9f87116..80b3ad23 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -23,9 +23,9 @@ class Commit(base.Object, Iterable, diff.Diffable): type = "commit" __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", "message", "parents") - _id_attribute_ = "id" + _id_attribute_ = "sha" - def __init__(self, repo, id, tree=None, author=None, authored_date=None, + def __init__(self, repo, sha, tree=None, author=None, authored_date=None, committer=None, committed_date=None, message=None, parents=None): """ Instantiate a new Commit. All keyword arguments taking None as default will @@ -33,7 +33,7 @@ class Commit(base.Object, Iterable, diff.Diffable): The parameter documentation indicates the type of the argument after a colon ':'. - ``id`` + ``sha`` is the sha id of the commit or a ref ``parents`` : tuple( Commit, ... ) @@ -62,15 +62,15 @@ class Commit(base.Object, Iterable, diff.Diffable): Returns git.Commit """ - super(Commit,self).__init__(repo, id) + super(Commit,self).__init__(repo, sha) self._set_self_from_args_(locals()) if parents is not None: self.parents = tuple( self.__class__(repo, p) for p in parents ) # END for each parent to convert - if self.id and tree is not None: - self.tree = Tree(repo, id=tree, path='') + if self.sha and tree is not None: + self.tree = Tree(repo, tree, path='') # END id to tree conversion def _set_cache_(self, attr): @@ -82,7 +82,7 @@ class Commit(base.Object, Iterable, diff.Diffable): if attr in Commit.__slots__: # prepare our data lines to match rev-list data_lines = self.data.splitlines() - data_lines.insert(0, "commit %s" % self.id) + data_lines.insert(0, "commit %s" % self.sha) temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() self.parents = temp.parents self.tree = temp.tree @@ -116,7 +116,7 @@ class Commit(base.Object, Iterable, diff.Diffable): Returns int """ - return len(self.repo.git.rev_list(self.id, '--', paths, **kwargs).strip().splitlines()) + return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).strip().splitlines()) @property def name_rev(self): @@ -189,14 +189,14 @@ class Commit(base.Object, Iterable, diff.Diffable): git.Stats """ if not self.parents: - text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) + text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True) text2 = "" for line in text.splitlines()[1:]: (insertions, deletions, filename) = line.split("\t") text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) text = text2 else: - text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) + text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True) return stats.Stats._list_from_string(self.repo, text) @classmethod @@ -259,15 +259,15 @@ class Commit(base.Object, Iterable, diff.Diffable): # END message parsing message = '\n'.join(message_lines) - yield Commit(repo, id=id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, + yield Commit(repo, id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, committer=committer, committed_date=committed_date, message=message) # END for each line in stream def __str__(self): """ Convert commit to string which is SHA1 """ - return self.id + return self.sha def __repr__(self): - return '' % self.id + return '' % self.sha diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index b2140551..c329edf7 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -17,7 +17,7 @@ class TagObject(base.Object): type = "tag" __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - def __init__(self, repo, id, object=None, tag=None, + def __init__(self, repo, sha, object=None, tag=None, tagger=None, tagged_date=None, message=None): """ Initialize a tag object with additional data @@ -25,7 +25,7 @@ class TagObject(base.Object): ``repo`` repository this object is located in - ``id`` + ``sha`` SHA1 or ref suitable for git-rev-parse ``object`` @@ -41,7 +41,7 @@ class TagObject(base.Object): is the DateTime of the tag creation - use time.gmtime to convert it into a different format """ - super(TagObject, self).__init__(repo, id ) + super(TagObject, self).__init__(repo, sha ) self._set_self_from_args_(locals()) def _set_cache_(self, attr): diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 371c0dd3..413efdb8 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -43,8 +43,8 @@ class Tree(base.IndexObject, diff.Diffable): tree_id = 040 - def __init__(self, repo, id, mode=0, path=None): - super(Tree, self).__init__(repo, id, mode, path) + def __init__(self, repo, sha, mode=0, path=None): + super(Tree, self).__init__(repo, sha, mode, path) def _set_cache_(self, attr): if attr == "_cache": @@ -150,7 +150,7 @@ class Tree(base.IndexObject, diff.Diffable): def __repr__(self): - return '' % self.id + return '' % self.sha @classmethod def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate, prune ): -- cgit v1.2.3 From f9bbdc87a7263f479344fcf67c4b9fd6005bb6cd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Nov 2009 11:28:45 +0100 Subject: tree: parsing would fail when symlinks where encountered. This has been fixed --- lib/git/objects/tree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 413efdb8..fb292677 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -40,6 +40,7 @@ class Tree(base.IndexObject, diff.Diffable): # using ascii codes for comparison commit_id = 016 blob_id = 010 + symlink_id = 012 tree_id = 040 @@ -117,7 +118,7 @@ class Tree(base.IndexObject, diff.Diffable): mode |= type_id<<12 hexsha = sha_to_hex(sha) - if type_id == self.blob_id: + if type_id == self.blob_id or type_id == self.symlink_id: yield blob.Blob(self.repo, hexsha, mode, name) elif type_id == self.tree_id: yield Tree(self.repo, hexsha, mode, name) -- cgit v1.2.3 From c4cde8df886112ee32b0a09fcac90c28c85ded7f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Nov 2009 12:46:37 +0100 Subject: IndexObject: assured that .path fields are relative to the repository ( previously it would just be a name ) added abspath property and name property to provide easy access to most common paths of an index object --- lib/git/objects/base.py | 17 +++++++++++++++++ lib/git/objects/tree.py | 10 +++++----- 2 files changed, 22 insertions(+), 5 deletions(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 6dd03ba4..b0989a43 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -196,3 +196,20 @@ class IndexObject(Object): # END for each char return mode + @property + def name(self): + """ + Returns + Name portion of the path, effectively being the basename + """ + return os.path.basename(self.path) + + @property + def abspath(self): + """ + Returns + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ) + """ + return os.path.join(self.repo.git.git_dir, self.path) + diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index fb292677..bcb805af 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -110,6 +110,7 @@ class Tree(base.IndexObject, diff.Diffable): i += 1 # END while not reached NULL name = data[ns:i] + path = os.path.join(self.path, name) # byte is NULL, get next 20 i += 1 @@ -119,9 +120,9 @@ class Tree(base.IndexObject, diff.Diffable): mode |= type_id<<12 hexsha = sha_to_hex(sha) if type_id == self.blob_id or type_id == self.symlink_id: - yield blob.Blob(self.repo, hexsha, mode, name) + yield blob.Blob(self.repo, hexsha, mode, path) elif type_id == self.tree_id: - yield Tree(self.repo, hexsha, mode, name) + yield Tree(self.repo, hexsha, mode, path) elif type_id == self.commit_id: # todo yield None @@ -157,8 +158,6 @@ class Tree(base.IndexObject, diff.Diffable): def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate, prune ): for obj in tree: - # adjust path to be complete - obj.path = os.path.join(tree.path, obj.path) if predicate(obj): yield obj if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ) and not prune(obj): @@ -173,7 +172,8 @@ class Tree(base.IndexObject, diff.Diffable): Returns Iterator to traverse the tree recursively up to the given level. - The iterator returns Blob and Tree objects + The iterator returns Blob and Tree objects with paths relative to their + repository. ``max_depth`` -- cgit v1.2.3 From f41d42ee7e264ce2fc32cea555e5f666fa1b1fe9 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Nov 2009 13:17:37 +0100 Subject: Improved cmd error handling in case an invalid revision is specified for an object repo.tree: improved to be less restricting --- lib/git/objects/tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/git/objects') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index bcb805af..27bd84d0 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -226,7 +226,7 @@ class Tree(base.IndexObject, diff.Diffable): if isinstance(item, basestring): # compatability for obj in self._cache: - if obj.path == item: + if obj.name == item: return obj # END for each obj raise KeyError( "Blob or Tree named %s not found" % item ) -- cgit v1.2.3