From 2b7f5cb25e0e03e06ec506d31c001c172dd71ef6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 8 Oct 2009 15:32:19 +0200 Subject: Commit._actor method made protected as it is only used by the Commit class and very specific so it's not suited to be part of the public API --- lib/git/commit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/git/commit.py b/lib/git/commit.py index edfe47ca..3d383df2 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -182,8 +182,8 @@ class Commit(LazyMixin): parents = [] while lines and lines[0].startswith('parent'): parents.append(lines.pop(0).split()[-1]) - author, authored_date = cls.actor(lines.pop(0)) - committer, committed_date = cls.actor(lines.pop(0)) + author, authored_date = cls._actor(lines.pop(0)) + committer, committed_date = cls._actor(lines.pop(0)) messages = [] while lines and lines[0].startswith(' '): @@ -284,7 +284,7 @@ class Commit(LazyMixin): return '' % self.id @classmethod - def actor(cls, line): + def _actor(cls, line): """ Parse out the actor (author or committer) info @@ -293,4 +293,4 @@ class Commit(LazyMixin): """ m = re.search(r'^.+? (.*) (\d+) .*$', line) actor, epoch = m.groups() - return [Actor.from_string(actor), time.gmtime(int(epoch))] + return (Actor.from_string(actor), time.gmtime(int(epoch))) -- cgit v1.2.3 From 92a97480edcc0f0de787a752bf90feed0445dd39 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Oct 2009 11:57:48 +0200 Subject: Blob|Tree: renamed 'name' member to 'path', updated tests and changelog as it would make existing code incompatible in some places --- lib/git/blob.py | 16 ++++++++-------- lib/git/diff.py | 4 ++-- lib/git/tree.py | 14 +++++++------- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/git/blob.py b/lib/git/blob.py index 82a41f73..dac0888f 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -15,7 +15,7 @@ class Blob(object): """A Blob encapsulates a git blob object""" DEFAULT_MIME_TYPE = "text/plain" - def __init__(self, repo, id, mode=None, name=None): + def __init__(self, repo, id, mode=None, path=None): """ Create an unbaked Blob containing just the specified attributes @@ -28,8 +28,8 @@ class Blob(object): ``mode`` is the file mode - ``name`` - is the file name + ``path`` + is the path to the file Returns git.Blob @@ -37,7 +37,7 @@ class Blob(object): self.repo = repo self.id = id self.mode = mode - self.name = name + self.path = path self._size = None self.data_stored = None @@ -83,17 +83,17 @@ class Blob(object): Defaults to 'text/plain' in case the actual file type is unknown. """ guesses = None - if self.name: - guesses = mimetypes.guess_type(self.name) + if self.path: + guesses = mimetypes.guess_type(self.path) return guesses and guesses[0] or self.DEFAULT_MIME_TYPE @property def basename(self): """ Returns - The basename of the Blobs file name + The basename of the Blobs file path """ - return os.path.basename(self.name) + return os.path.basename(self.path) @classmethod def blame(cls, repo, commit, file): diff --git a/lib/git/diff.py b/lib/git/diff.py index 44f55602..db12f1e4 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -38,11 +38,11 @@ class Diff(object): if not a_blob or re.search(r'^0{40}$', a_blob): self.a_blob = None else: - self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, name=a_path) + self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, path=a_path) if not b_blob or re.search(r'^0{40}$', b_blob): self.b_blob = None else: - self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, name=b_path) + self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, path=b_path) self.a_mode = a_mode self.b_mode = b_mode diff --git a/lib/git/tree.py b/lib/git/tree.py index cfb0881c..1b546f85 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -9,12 +9,12 @@ from lazy import LazyMixin import blob class Tree(LazyMixin): - def __init__(self, repo, id, mode=None, name=None): + def __init__(self, repo, id, mode=None, path=None): LazyMixin.__init__(self) self.repo = repo self.id = id self.mode = mode - self.name = name + self.path = path self._contents = None def __bake__(self): @@ -28,7 +28,7 @@ class Tree(LazyMixin): for line in self.repo.git.ls_tree(self.id).splitlines(): obj = self.content_from_string(self.repo, line) if obj is not None: - self._contents[obj.name] = obj + self._contents[obj.path] = obj @staticmethod def content_from_string(repo, text): @@ -45,14 +45,14 @@ class Tree(LazyMixin): ``git.Blob`` or ``git.Tree`` """ try: - mode, typ, id, name = text.expandtabs(1).split(" ", 3) + mode, typ, id, path = text.expandtabs(1).split(" ", 3) except: return None if typ == "tree": - return Tree(repo, id=id, mode=mode, name=name) + return Tree(repo, id=id, mode=mode, path=path) elif typ == "blob": - return blob.Blob(repo, id=id, mode=mode, name=name) + return blob.Blob(repo, id=id, mode=mode, path=path) elif typ == "commit": return None else: @@ -76,7 +76,7 @@ class Tree(LazyMixin): @property def basename(self): - os.path.basename(self.name) + os.path.basename(self.path) def __repr__(self): return '' % self.id -- cgit v1.2.3 From 07c20b4231b12fee42d15f1c44c948ce474f5851 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Oct 2009 12:05:02 +0200 Subject: deriving NoSuchPathError from OSError as it relates to it very well --- lib/git/errors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/git/errors.py b/lib/git/errors.py index 2632d5f3..45afb590 100644 --- a/lib/git/errors.py +++ b/lib/git/errors.py @@ -12,7 +12,7 @@ class InvalidGitRepositoryError(Exception): Thrown if the given repository appears to have an invalid format. """ -class NoSuchPathError(Exception): +class NoSuchPathError(OSError): """ Thrown if a path could not be access by the system. """ -- cgit v1.2.3 From 52ab307935bd2bbda52f853f9fc6b49f01897727 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Oct 2009 12:14:02 +0200 Subject: diff regex are now precompiled on class level, renamed a|b_blob to a|b_blob_id as it better reflects the actual value actor regex now precompiled on class level blob regex now precompiled on class level; made blame method more readable and faster although it can still be improved by making assumptions about the blame format and by reading the git command stream directly ( which is a general issue right now ) --- lib/git/actor.py | 8 +++- lib/git/blob.py | 111 ++++++++++++++++++++++++++++++++++--------------------- lib/git/diff.py | 45 +++++++++++----------- 3 files changed, 98 insertions(+), 66 deletions(-) (limited to 'lib') diff --git a/lib/git/actor.py b/lib/git/actor.py index bc1a4479..28f50e73 100644 --- a/lib/git/actor.py +++ b/lib/git/actor.py @@ -10,6 +10,10 @@ class Actor(object): """Actors hold information about a person acting on the repository. They can be committers and authors or anything with a name and an email as mentioned in the git log entries.""" + # precompiled regex + name_only_regex = re.compile( r'<.+>' ) + name_email_regex = re.compile( r'(.*) <(.+?)>' ) + def __init__(self, name, email): self.name = name self.email = email @@ -34,8 +38,8 @@ class Actor(object): Returns Actor """ - if re.search(r'<.+>', string): - m = re.search(r'(.*) <(.+?)>', string) + if cls.name_only_regex.search(string): + m = cls.name_email_regex.search(string) name, email = m.groups() return Actor(name, email) else: diff --git a/lib/git/blob.py b/lib/git/blob.py index dac0888f..1e8aa12b 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -15,6 +15,12 @@ class Blob(object): """A Blob encapsulates a git blob object""" DEFAULT_MIME_TYPE = "text/plain" + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + def __init__(self, repo, id, mode=None, path=None): """ Create an unbaked Blob containing just the specified attributes @@ -112,49 +118,68 @@ class Blob(object): info = None for line in data.splitlines(): - parts = re.split(r'\s+', line, 1) - if re.search(r'^[0-9A-Fa-f]{40}$', parts[0]): - if re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line): - m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+) (\d+)$', line) - id, origin_line, final_line, group_lines = m.groups() - info = {'id': id} - blames.append([None, []]) - elif re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line): - m = re.search(r'^([0-9A-Fa-f]{40}) (\d+) (\d+)$', line) - id, origin_line, final_line = m.groups() - info = {'id': id} - elif re.search(r'^(author|committer)', parts[0]): - if re.search(r'^(.+)-mail$', parts[0]): - m = re.search(r'^(.+)-mail$', parts[0]) - info["%s_email" % m.groups()[0]] = parts[-1] - elif re.search(r'^(.+)-time$', parts[0]): - m = re.search(r'^(.+)-time$', parts[0]) - info["%s_date" % m.groups()[0]] = time.gmtime(int(parts[-1])) - elif re.search(r'^(author|committer)$', parts[0]): - m = re.search(r'^(author|committer)$', parts[0]) - info[m.groups()[0]] = parts[-1] - elif re.search(r'^filename', parts[0]): - info['filename'] = parts[-1] - elif re.search(r'^summary', parts[0]): - info['summary'] = parts[-1] - elif parts[0] == '': - if info: - c = commits.has_key(info['id']) and commits[info['id']] - if not c: - c = Commit(repo, id=info['id'], - author=Actor.from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[info['id']] = c - - m = re.search(r'^\t(.*)$', line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - + parts = cls.re_whitespace.split(line, 1) + firstpart = parts[0] + if cls.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = cls.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = time.gmtime(int(parts[-1])) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( repo, id=sha, + author=Actor.from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = cls.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information return blames def __repr__(self): diff --git a/lib/git/diff.py b/lib/git/diff.py index db12f1e4..75450d70 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -29,20 +29,36 @@ class Diff(object): b_mode is None b_blob is NOne """ + + # precompiled regex + re_header = re.compile(r""" + #^diff[ ]--git + [ ]a/(?P\S+)[ ]b/(?P\S+)\n + (?:^similarity[ ]index[ ](?P\d+)%\n + ^rename[ ]from[ ](?P\S+)\n + ^rename[ ]to[ ](?P\S+)(?:\n|$))? + (?:^old[ ]mode[ ](?P\d+)\n + ^new[ ]mode[ ](?P\d+)(?:\n|$))? + (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? + (?:^index[ ](?P[0-9A-Fa-f]+) + \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? + """, re.VERBOSE | re.MULTILINE) + re_is_null_hexsha = re.compile( r'^0{40}$' ) - def __init__(self, repo, a_path, b_path, a_blob, b_blob, a_mode, + def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, rename_from, rename_to, diff): self.repo = repo - if not a_blob or re.search(r'^0{40}$', a_blob): + if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id): self.a_blob = None else: - self.a_blob = blob.Blob(repo, id=a_blob, mode=a_mode, path=a_path) - if not b_blob or re.search(r'^0{40}$', b_blob): + self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path) + if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id): self.b_blob = None else: - self.b_blob = blob.Blob(repo, id=b_blob, mode=b_mode, path=b_path) + self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path) self.a_mode = a_mode self.b_mode = b_mode @@ -68,29 +84,16 @@ class Diff(object): """ diffs = [] - diff_header = re.compile(r""" - #^diff[ ]--git - [ ]a/(?P\S+)[ ]b/(?P\S+)\n - (?:^similarity[ ]index[ ](?P\d+)%\n - ^rename[ ]from[ ](?P\S+)\n - ^rename[ ]to[ ](?P\S+)(?:\n|$))? - (?:^old[ ]mode[ ](?P\d+)\n - ^new[ ]mode[ ](?P\d+)(?:\n|$))? - (?:^new[ ]file[ ]mode[ ](?P.+)(?:\n|$))? - (?:^deleted[ ]file[ ]mode[ ](?P.+)(?:\n|$))? - (?:^index[ ](?P[0-9A-Fa-f]+) - \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? - """, re.VERBOSE | re.MULTILINE).match - + diff_header = cls.re_header.match for diff in ('\n' + text).split('\ndiff --git')[1:]: header = diff_header(diff) a_path, b_path, similarity_index, rename_from, rename_to, \ old_mode, new_mode, new_file_mode, deleted_file_mode, \ - a_blob, b_blob, b_mode = header.groups() + a_blob_id, b_blob_id, b_mode = header.groups() new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - diffs.append(Diff(repo, a_path, b_path, a_blob, b_blob, + diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, new_file, deleted_file, rename_from, rename_to, diff[header.end():])) -- cgit v1.2.3 From 9c0c2fc4ee2d8a5d0a2de50ba882657989dedc51 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 9 Oct 2009 16:04:54 +0200 Subject: finished cleaning usage of regular expressions - they are now precompiled --- lib/git/cmd.py | 1 - lib/git/commit.py | 11 ++++------- lib/git/repo.py | 1 - 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index aef53350..21e235b1 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -6,7 +6,6 @@ import os, sys import subprocess -import re from utils import * from errors import GitCommandError diff --git a/lib/git/commit.py b/lib/git/commit.py index 3d383df2..961e483b 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -20,6 +20,9 @@ class Commit(LazyMixin): This class will act lazily on some of its attributes and will query the value on demand only if it involves calling the git binary. """ + # precompiled regex + re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') + def __init__(self, repo, id, tree=None, author=None, authored_date=None, committer=None, committed_date=None, message=None, parents=None): """ @@ -246,12 +249,6 @@ class Commit(LazyMixin): """ if not self.parents: d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - if re.search(r'diff --git a', d): - if not re.search(r'^diff --git a', d): - p = re.compile(r'.+?(diff --git a)', re.MULTILINE | re.DOTALL) - d = p.sub(r'diff --git a', d, 1) - else: - d = '' return diff.Diff.list_from_string(self.repo, d) else: return self.diff(self.repo, self.parents[0].id, self.id) @@ -291,6 +288,6 @@ class Commit(LazyMixin): Returns [Actor, gmtime(acted at time)] """ - m = re.search(r'^.+? (.*) (\d+) .*$', line) + m = cls.re_actor_epoch.search(line) actor, epoch = m.groups() return (Actor.from_string(actor), time.gmtime(int(epoch))) diff --git a/lib/git/repo.py b/lib/git/repo.py index 1c4b4095..811cf6f0 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -5,7 +5,6 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os -import re import gzip import StringIO from errors import InvalidGitRepositoryError, NoSuchPathError -- cgit v1.2.3 From 8430529e1a9fb28d8586d24ee507a8195c370fa5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 8 Oct 2009 14:34:29 +0200 Subject: Renamed lazy.py to base.py to have a file for base classes - lazy not yet changed to allow proper rename tracking --- lib/git/base.py | 32 ++++++++++++++++++++++++++++++++ lib/git/commit.py | 2 +- lib/git/lazy.py | 32 -------------------------------- lib/git/tree.py | 2 +- 4 files changed, 34 insertions(+), 34 deletions(-) create mode 100644 lib/git/base.py delete mode 100644 lib/git/lazy.py (limited to 'lib') diff --git a/lib/git/base.py b/lib/git/base.py new file mode 100644 index 00000000..5e470181 --- /dev/null +++ b/lib/git/base.py @@ -0,0 +1,32 @@ +# lazy.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +class LazyMixin(object): + lazy_properties = [] + + def __init__(self): + self.__baked__ = False + + def __getattribute__(self, attr): + val = object.__getattribute__(self, attr) + if val is not None: + return val + else: + self.__prebake__() + return object.__getattribute__(self, attr) + + def __bake__(self): + """ This method should be overridden in the derived class. """ + raise NotImplementedError(" '__bake__' method has not been implemented.") + + def __prebake__(self): + if self.__baked__: + return + self.__bake__() + self.__baked__ = True + + def __bake_it__(self): + self.__baked__ = True diff --git a/lib/git/commit.py b/lib/git/commit.py index 961e483b..14e1ba68 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -8,7 +8,7 @@ import re import time from actor import Actor -from lazy import LazyMixin +from base import LazyMixin from tree import Tree import diff import stats diff --git a/lib/git/lazy.py b/lib/git/lazy.py deleted file mode 100644 index 5e470181..00000000 --- a/lib/git/lazy.py +++ /dev/null @@ -1,32 +0,0 @@ -# lazy.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -class LazyMixin(object): - lazy_properties = [] - - def __init__(self): - self.__baked__ = False - - def __getattribute__(self, attr): - val = object.__getattribute__(self, attr) - if val is not None: - return val - else: - self.__prebake__() - return object.__getattribute__(self, attr) - - def __bake__(self): - """ This method should be overridden in the derived class. """ - raise NotImplementedError(" '__bake__' method has not been implemented.") - - def __prebake__(self): - if self.__baked__: - return - self.__bake__() - self.__baked__ = True - - def __bake_it__(self): - self.__baked__ = True diff --git a/lib/git/tree.py b/lib/git/tree.py index 1b546f85..06c1a158 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -5,7 +5,7 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os -from lazy import LazyMixin +from base import LazyMixin import blob class Tree(LazyMixin): -- cgit v1.2.3 From 9ee31065abea645cbc2cf3e54b691d5983a228b2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 11:01:12 +0200 Subject: Intermediate commit: commit,tree and blob objects now derive from object - test is in place which still fails on purpose. Need to integrate tags which can be objects or just a special form of a ref --- lib/git/base.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/git/blob.py | 31 +++++++------------------ lib/git/commit.py | 13 ++++++----- lib/git/tag.py | 6 +++++ lib/git/tree.py | 17 ++++++-------- 5 files changed, 96 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/git/base.py b/lib/git/base.py index 5e470181..687fb50a 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -1,4 +1,4 @@ -# lazy.py +# base.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under @@ -30,3 +30,70 @@ class LazyMixin(object): def __bake_it__(self): self.__baked__ = True + + +class Object(LazyMixin): + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + """ + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "id", "size") + type = None # to be set by subclass + + def __init__(self, repo, id, size=None): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + ``id`` + SHA1 or ref suitable for git-rev-parse + ``size`` + Size of the object's data in bytes + """ + super(Object,self).__init__() + self.repo = repo + self.id = id + self.size = size + + def __bake__(self): + """ + Retrieve object information + """ + self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.id == other.id + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.id != other.id + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.id) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.id + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '' % (self.__class__.__name__, self.id) diff --git a/lib/git/blob.py b/lib/git/blob.py index 1e8aa12b..3ecd3a38 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -10,10 +10,13 @@ import re import time from actor import Actor from commit import Commit +import base -class Blob(object): +class Blob(base.Object): """A Blob encapsulates a git blob object""" DEFAULT_MIME_TYPE = "text/plain" + type = "blob" + __slots__ = ("mode", "path", "_data_stored") # precompiled regex re_whitespace = re.compile(r'\s+') @@ -40,28 +43,10 @@ class Blob(object): Returns git.Blob """ - self.repo = repo - self.id = id + super(Blob,self).__init__(repo, id, "blob") self.mode = mode self.path = path - - self._size = None - self.data_stored = None - - @property - def size(self): - """ - The size of this blob in bytes - - Returns - int - - NOTE - The size will be cached after the first access - """ - if self._size is None: - self._size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) - return self._size + self._data_stored = None @property def data(self): @@ -74,8 +59,8 @@ class Blob(object): NOTE The data will be cached after the first access. """ - self.data_stored = self.data_stored or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) - return self.data_stored + self._data_stored = self._data_stored or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + return self._data_stored @property def mime_type(self): diff --git a/lib/git/commit.py b/lib/git/commit.py index 14e1ba68..73fb8e7a 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -8,12 +8,12 @@ import re import time from actor import Actor -from base import LazyMixin from tree import Tree import diff import stats +import base -class Commit(LazyMixin): +class Commit(base.Object): """ Wraps a git Commit object. @@ -23,6 +23,9 @@ class Commit(LazyMixin): # precompiled regex re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') + # object configuration + type = "commit" + def __init__(self, repo, id, tree=None, author=None, authored_date=None, committer=None, committed_date=None, message=None, parents=None): """ @@ -58,10 +61,7 @@ class Commit(LazyMixin): Returns git.Commit """ - LazyMixin.__init__(self) - - self.repo = repo - self.id = id + super(Commit,self).__init__(repo, id, "commit") self.parents = None self.tree = None self.author = author @@ -87,6 +87,7 @@ class Commit(LazyMixin): Called by LazyMixin superclass when the first uninitialized member needs to be set as it is queried. """ + super(Commit, self).__bake__() temp = Commit.find_all(self.repo, self.id, max_count=1)[0] self.parents = temp.parents self.tree = temp.tree diff --git a/lib/git/tag.py b/lib/git/tag.py index 8413ce73..df3158a6 100644 --- a/lib/git/tag.py +++ b/lib/git/tag.py @@ -7,6 +7,12 @@ from commit import Commit class Tag(object): + """ + Class representing a tag reference which either points to a commit + or to a tag object. In the latter case additional information, like the signature + or the tag-creator, is available. + """ + def __init__(self, name, commit): """ Initialize a newly instantiated Tag diff --git a/lib/git/tree.py b/lib/git/tree.py index 06c1a158..6215f875 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -5,25 +5,22 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os -from base import LazyMixin import blob +import base -class Tree(LazyMixin): +class Tree(base.Object): + + type = "tree" + def __init__(self, repo, id, mode=None, path=None): - LazyMixin.__init__(self) - self.repo = repo - self.id = id + super(Tree, self).__init__(repo, id) self.mode = mode self.path = path self._contents = None def __bake__(self): - # Ensure the treeish references directly a tree - treeish = self.id - if not treeish.endswith(':'): - treeish = treeish + ':' - # Read the tree contents. + super(Tree, self).__bake__() self._contents = {} for line in self.repo.git.ls_tree(self.id).splitlines(): obj = self.content_from_string(self.repo, line) -- cgit v1.2.3 From 20f202d83bdf1f332a3cb8f010bcf8bf3c2807bd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 16:36:51 +0200 Subject: Re-designed the tag testing - it does not use fixtures anymore but dyamically checks the existance of tags within the repository - it basically tests the interface and checks that expected return types are actually returned --- lib/git/base.py | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/git/head.py | 89 ++++++------------------------ lib/git/tag.py | 166 +++++++++++++++++++++++++++++++++----------------------- 3 files changed, 273 insertions(+), 143 deletions(-) (limited to 'lib') diff --git a/lib/git/base.py b/lib/git/base.py index 687fb50a..84dd0754 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -3,10 +3,13 @@ # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php +import os class LazyMixin(object): lazy_properties = [] - + + __slots__ = "__baked__" + def __init__(self): self.__baked__ = False @@ -38,7 +41,7 @@ class Object(LazyMixin): """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size") - type = None # to be set by subclass + type = None # to be set by subclass def __init__(self, repo, id, size=None): """ @@ -46,9 +49,11 @@ class Object(LazyMixin): will be set on demand if None. ``repo`` - repository this object is located in + repository this object is located in + ``id`` SHA1 or ref suitable for git-rev-parse + ``size`` Size of the object's data in bytes """ @@ -97,3 +102,153 @@ class Object(LazyMixin): string with pythonic representation of our object """ return '' % (self.__class__.__name__, self.id) + + @classmethod + def get_type_by_name(cls, object_type_name): + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) + + +class Ref(object): + """ + Represents a named reference to any object + """ + __slots__ = ("path", "object") + + def __init__(self, path, object = None): + """ + Initialize this instance + + ``path`` + Path relative to the .git/ directory pointing to the ref in question, i.e. + refs/heads/master + + ``object`` + Object instance, will be retrieved on demand if None + """ + self.path = path + self.object = object + + def __str__(self): + return self.name() + + def __repr__(self): + return '' % (self.__class__.__name__, self.path) + + def __eq__(self, other): + return self.path == other.path and self.object == other.object + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.path) + + @property + def name(self): + """ + Returns + Name of this reference + """ + return os.path.basename(self.path) + + @classmethod + def find_all(cls, repo, common_path = "refs", **kwargs): + """ + Find all refs in the repository + + ``repo`` + is the Repo + + ``common_path`` + Optional keyword argument to the path which is to be shared by all + returned Ref objects + + ``kwargs`` + Additional options given as keyword arguments, will be passed + to git-for-each-ref + + Returns + git.Ref[] + + List is sorted by committerdate + The returned objects are compatible to the Ref base, but represent the + actual type, such as Head or Tag + """ + + options = {'sort': "committerdate", + 'format': "%(refname)%00%(objectname)%00%(objecttype)%00%(objectsize)"} + + options.update(kwargs) + + output = repo.git.for_each_ref(common_path, **options) + return cls.list_from_string(repo, output) + + @classmethod + def list_from_string(cls, repo, text): + """ + Parse out ref information into a list of Ref compatible objects + + ``repo`` + is the Repo + ``text`` + is the text output from the git-for-each-ref command + + Returns + git.Ref[] + + list of Ref objects + """ + heads = [] + + for line in text.splitlines(): + heads.append(cls.from_string(repo, line)) + + return heads + + @classmethod + def from_string(cls, repo, line): + """ + Create a new Ref instance from the given string. + + ``repo`` + is the Repo + + ``line`` + is the formatted ref information + + Format:: + + name: [a-zA-Z_/]+ + + id: [0-9A-Fa-f]{40} + + Returns + git.Head + """ + full_path, hexsha, type_name, object_size = line.split("\x00") + obj = Object.get_type_by_name(type_name)(repo, hexsha, object_size) + return cls(full_path, obj) diff --git a/lib/git/head.py b/lib/git/head.py index 639cee40..3c3f13ac 100644 --- a/lib/git/head.py +++ b/lib/git/head.py @@ -5,8 +5,9 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import commit +import base -class Head(object): +class Head(base.Ref): """ A Head is a named reference to a Commit. Every Head instance contains a name and a Commit object. @@ -26,93 +27,37 @@ class Head(object): '1c09f116cbc2cb4100fb6935bb162daa4723f455' """ - def __init__(self, name, commit): + def __init__(self, path, commit): """ Initialize a newly instanced Head - `name` - is the name of the head + ``path`` + is the path to the head ref, relative to the .git directory, i.e. + refs/heads/master `commit` is the Commit object that the head points to """ - self.name = name - self.commit = commit + super(Head, self).__init__(name, commit) - @classmethod - def find_all(cls, repo, **kwargs): - """ - Find all Heads in the repository - - `repo` - is the Repo - - `kwargs` - Additional options given as keyword arguments, will be passed - to git-for-each-ref - - Returns - git.Head[] - - List is sorted by committerdate - """ - - options = {'sort': "committerdate", - 'format': "%(refname)%00%(objectname)"} - options.update(kwargs) - - output = repo.git.for_each_ref("refs/heads", **options) - return cls.list_from_string(repo, output) - @classmethod - def list_from_string(cls, repo, text): + @property + def commit(self): """ - Parse out head information into a list of head objects - - ``repo`` - is the Repo - ``text`` - is the text output from the git-for-each-ref command - Returns - git.Head[] + Commit object the head points to """ - heads = [] - - for line in text.splitlines(): - heads.append(cls.from_string(repo, line)) - - return heads - + return self.object + @classmethod - def from_string(cls, repo, line): + def find_all(cls, repo, common_path = "refs/heads", **kwargs): """ - Create a new Head instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted head information - - Format:: - - name: [a-zA-Z_/]+ - - id: [0-9A-Fa-f]{40} - Returns - git.Head + git.Head[] + + For more documentation, please refer to git.base.Ref.find_all """ - full_name, ids = line.split("\x00") - - if full_name.startswith('refs/heads/'): - name = full_name[len('refs/heads/'):] - else: - name = full_name - - c = commit.Commit(repo, id=ids) - return Head(name, c) + return super(Head,cls).find_all(repo, common_path, **kwargs) def __repr__(self): return '' % self.name diff --git a/lib/git/tag.py b/lib/git/tag.py index df3158a6..0c4122ab 100644 --- a/lib/git/tag.py +++ b/lib/git/tag.py @@ -4,95 +4,125 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from commit import Commit +import commit +import base -class Tag(object): +class TagRef(base.Ref): """ - Class representing a tag reference which either points to a commit + Class representing a lightweight tag reference which either points to a commit or to a tag object. In the latter case additional information, like the signature or the tag-creator, is available. + + This tag object will always point to a commit object, but may carray additional + information in a tag object:: + + tagref = TagRef.find_all(repo)[0] + print tagref.commit.message + if tagref.tag is not None: + print tagref.tag.message """ - def __init__(self, name, commit): + __slots__ = "tag" + + def __init__(self, path, commit_or_tag): """ Initialize a newly instantiated Tag - ``name`` - is the name of the head + ``path`` + is the full path to the tag - ``commit`` - is the Commit that the head points to + ``commit_or_tag`` + is the Commit or TagObject that this tag ref points to """ - self.name = name - self.commit = commit - - @classmethod - def find_all(cls, repo, **kwargs): + super(TagRef, self).__init__(path, commit_or_tag) + self.tag = None + + if commit_or_tag.type == "tag": + self.tag = commit_or_tag + # END tag object handling + + @property + def commit(self): """ - Find all Tags in the repository - - ``repo`` - is the Repo - - ``kwargs`` - Additional options given as keyword arguments, will be passed - to git-for-each-ref - Returns - ``git.Tag[]`` - - List is sorted by committerdate + Commit object the tag ref points to """ - options = {'sort': "committerdate", - 'format': "%(refname)%00%(objectname)"} - options.update(**kwargs) - - output = repo.git.for_each_ref("refs/tags", **options) - return cls.list_from_string(repo, output) + if self.object.type == "commit": + return self.object + # it is a tag object + return self.object.object @classmethod - def list_from_string(cls, repo, text): + def find_all(cls, repo, common_path = "refs/tags", **kwargs): """ - Parse out tag information into an array of Tag objects - - ``repo`` - is the Repo - - ``text`` - is the text output from the git-for-each command - Returns git.Tag[] + + For more documentation, please refer to git.base.Ref.find_all """ - tags = [] - for line in text.splitlines(): - tags.append(cls.from_string(repo, line)) - return tags - - @classmethod - def from_string(cls, repo, line): + return super(TagRef,cls).find_all(repo, common_path, **kwargs) + + +# provide an alias +Tag = TagRef + +class TagObject(base.Object): + """ + Non-Lightweight tag carrying additional information about an object we are pointing + to. + """ + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) + + def __init__(self, repo, id, size=None, object=None, tag=None, + tagger=None, tagged_date=None, message=None): """ - Create a new Tag instance from the given string. - + Initialize a tag object with additional data + ``repo`` - is the Repo - - ``line`` - is the formatted tag information - - Format:: + repository this object is located in - name: [a-zA-Z_/]+ - - id: [0-9A-Fa-f]{40} - - Returns - git.Tag + ``id`` + SHA1 or ref suitable for git-rev-parse + + ``size`` + Size of the object's data in bytes + + ``object`` + Object instance of object we are pointing to + + ``tag`` + name of this tag + + ``tagger`` + Actor identifying the tagger + + ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the DateTime of the tag creation """ - full_name, ids = line.split("\x00") - name = full_name.split("/")[-1] - commit = Commit(repo, id=ids) - return Tag(name, commit) - - def __repr__(self): - return '' % self.name + super(TagObject, self).__init__(repo, id , size) + self.object = object + self.tag = tag + self.tagger = tagger + self.tagged_date = tagged_date + self.message = message + + def __bake__(self): + super(TagObject, self).__bake__() + + output = self.repo.git.cat_file(self.type,self.id) + lines = output.split("\n") + + obj, hexsha = lines[0].split(" ") # object + type_token, type_name = lines[1].split(" ") # type + self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag + + tagger_info = lines[3][7:]# tagger + self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + + # line 4 empty - check git source to figure out purpose + self.message = "\n".join(lines[5:]) + + -- cgit v1.2.3 From 9374a916588d9fe7169937ba262c86ad710cfa74 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 16:49:05 +0200 Subject: converted all spaces to tabs ( 4 spaces = 1 tab ) just to allow me and my editor to work with the files properly. Can convert it back for releaes --- lib/git/__init__.py | 2 +- lib/git/actor.py | 74 ++--- lib/git/base.py | 488 ++++++++++++++-------------- lib/git/blob.py | 306 +++++++++--------- lib/git/cmd.py | 388 +++++++++++----------- lib/git/commit.py | 556 +++++++++++++++---------------- lib/git/diff.py | 140 ++++---- lib/git/errors.py | 32 +- lib/git/head.py | 106 +++--- lib/git/repo.py | 918 ++++++++++++++++++++++++++-------------------------- lib/git/stats.py | 102 +++--- lib/git/tag.py | 224 ++++++------- lib/git/tree.py | 188 +++++------ lib/git/utils.py | 24 +- 14 files changed, 1774 insertions(+), 1774 deletions(-) (limited to 'lib') diff --git a/lib/git/__init__.py b/lib/git/__init__.py index 28d14d0c..45cb4673 100644 --- a/lib/git/__init__.py +++ b/lib/git/__init__.py @@ -24,4 +24,4 @@ from git.utils import dashify from git.utils import touch __all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ] + if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/lib/git/actor.py b/lib/git/actor.py index 28f50e73..f1aeda9b 100644 --- a/lib/git/actor.py +++ b/lib/git/actor.py @@ -7,40 +7,40 @@ import re class Actor(object): - """Actors hold information about a person acting on the repository. They - can be committers and authors or anything with a name and an email as - mentioned in the git log entries.""" - # precompiled regex - name_only_regex = re.compile( r'<.+>' ) - name_email_regex = re.compile( r'(.*) <(.+?)>' ) - - def __init__(self, name, email): - self.name = name - self.email = email - - def __str__(self): - return self.name - - def __repr__(self): - return '">' % (self.name, self.email) - - @classmethod - def from_string(cls, string): - """ - Create an Actor from a string. - - ``str`` - is the string, which is expected to be in regular git format - - Format - John Doe - - Returns - Actor - """ - if cls.name_only_regex.search(string): - m = cls.name_email_regex.search(string) - name, email = m.groups() - return Actor(name, email) - else: - return Actor(string, None) + """Actors hold information about a person acting on the repository. They + can be committers and authors or anything with a name and an email as + mentioned in the git log entries.""" + # precompiled regex + name_only_regex = re.compile( r'<.+>' ) + name_email_regex = re.compile( r'(.*) <(.+?)>' ) + + def __init__(self, name, email): + self.name = name + self.email = email + + def __str__(self): + return self.name + + def __repr__(self): + return '">' % (self.name, self.email) + + @classmethod + def from_string(cls, string): + """ + Create an Actor from a string. + + ``str`` + is the string, which is expected to be in regular git format + + Format + John Doe + + Returns + Actor + """ + if cls.name_only_regex.search(string): + m = cls.name_email_regex.search(string) + name, email = m.groups() + return Actor(name, email) + else: + return Actor(string, None) diff --git a/lib/git/base.py b/lib/git/base.py index 84dd0754..1f8e085d 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -6,249 +6,249 @@ import os class LazyMixin(object): - lazy_properties = [] - - __slots__ = "__baked__" - - def __init__(self): - self.__baked__ = False - - def __getattribute__(self, attr): - val = object.__getattribute__(self, attr) - if val is not None: - return val - else: - self.__prebake__() - return object.__getattribute__(self, attr) - - def __bake__(self): - """ This method should be overridden in the derived class. """ - raise NotImplementedError(" '__bake__' method has not been implemented.") - - def __prebake__(self): - if self.__baked__: - return - self.__bake__() - self.__baked__ = True - - def __bake_it__(self): - self.__baked__ = True - - + lazy_properties = [] + + __slots__ = "__baked__" + + def __init__(self): + self.__baked__ = False + + def __getattribute__(self, attr): + val = object.__getattribute__(self, attr) + if val is not None: + return val + else: + self.__prebake__() + return object.__getattribute__(self, attr) + + def __bake__(self): + """ This method should be overridden in the derived class. """ + raise NotImplementedError(" '__bake__' method has not been implemented.") + + def __prebake__(self): + if self.__baked__: + return + self.__bake__() + self.__baked__ = True + + def __bake_it__(self): + self.__baked__ = True + + class Object(LazyMixin): - """ - Implements an Object which may be Blobs, Trees, Commits and Tags - """ - TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size") - type = None # to be set by subclass - - def __init__(self, repo, id, size=None): - """ - Initialize an object by identifying it by its id. All keyword arguments - will be set on demand if None. - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - - ``size`` - Size of the object's data in bytes - """ - super(Object,self).__init__() - self.repo = repo - self.id = id - self.size = size - - def __bake__(self): - """ - Retrieve object information - """ - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) - - def __eq__(self, other): - """ - Returns - True if the objects have the same SHA1 - """ - return self.id == other.id - - def __ne__(self, other): - """ - Returns - True if the objects do not have the same SHA1 - """ - return self.id != other.id - - def __hash__(self): - """ - Returns - Hash of our id allowing objects to be used in dicts and sets - """ - return hash(self.id) - - def __str__(self): - """ - Returns - string of our SHA1 as understood by all git commands - """ - return self.id - - def __repr__(self): - """ - Returns - string with pythonic representation of our object - """ - return '' % (self.__class__.__name__, self.id) - - @classmethod - def get_type_by_name(cls, object_type_name): - """ - Returns - type suitable to handle the given object type name. - Use the type to create new instances. - - ``object_type_name`` - Member of TYPES - - Raises - ValueError: In case object_type_name is unknown - """ - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) - - + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + """ + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "id", "size") + type = None # to be set by subclass + + def __init__(self, repo, id, size=None): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + + ``size`` + Size of the object's data in bytes + """ + super(Object,self).__init__() + self.repo = repo + self.id = id + self.size = size + + def __bake__(self): + """ + Retrieve object information + """ + self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.id == other.id + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.id != other.id + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.id) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.id + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '' % (self.__class__.__name__, self.id) + + @classmethod + def get_type_by_name(cls, object_type_name): + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) + + class Ref(object): - """ - Represents a named reference to any object - """ - __slots__ = ("path", "object") - - def __init__(self, path, object = None): - """ - Initialize this instance - - ``path`` - Path relative to the .git/ directory pointing to the ref in question, i.e. - refs/heads/master - - ``object`` - Object instance, will be retrieved on demand if None - """ - self.path = path - self.object = object - - def __str__(self): - return self.name() - - def __repr__(self): - return '' % (self.__class__.__name__, self.path) - - def __eq__(self, other): - return self.path == other.path and self.object == other.object - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.path) - - @property - def name(self): - """ - Returns - Name of this reference - """ - return os.path.basename(self.path) - - @classmethod - def find_all(cls, repo, common_path = "refs", **kwargs): - """ - Find all refs in the repository - - ``repo`` - is the Repo - - ``common_path`` - Optional keyword argument to the path which is to be shared by all - returned Ref objects - - ``kwargs`` - Additional options given as keyword arguments, will be passed - to git-for-each-ref - - Returns - git.Ref[] - - List is sorted by committerdate - The returned objects are compatible to the Ref base, but represent the - actual type, such as Head or Tag - """ - - options = {'sort': "committerdate", - 'format': "%(refname)%00%(objectname)%00%(objecttype)%00%(objectsize)"} - - options.update(kwargs) - - output = repo.git.for_each_ref(common_path, **options) - return cls.list_from_string(repo, output) - - @classmethod - def list_from_string(cls, repo, text): - """ - Parse out ref information into a list of Ref compatible objects - - ``repo`` - is the Repo - ``text`` - is the text output from the git-for-each-ref command - - Returns - git.Ref[] - - list of Ref objects - """ - heads = [] - - for line in text.splitlines(): - heads.append(cls.from_string(repo, line)) - - return heads - - @classmethod - def from_string(cls, repo, line): - """ - Create a new Ref instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted ref information - - Format:: - - name: [a-zA-Z_/]+ - - id: [0-9A-Fa-f]{40} - - Returns - git.Head - """ - full_path, hexsha, type_name, object_size = line.split("\x00") - obj = Object.get_type_by_name(type_name)(repo, hexsha, object_size) - return cls(full_path, obj) + """ + Represents a named reference to any object + """ + __slots__ = ("path", "object") + + def __init__(self, path, object = None): + """ + Initialize this instance + + ``path`` + Path relative to the .git/ directory pointing to the ref in question, i.e. + refs/heads/master + + ``object`` + Object instance, will be retrieved on demand if None + """ + self.path = path + self.object = object + + def __str__(self): + return self.name() + + def __repr__(self): + return '' % (self.__class__.__name__, self.path) + + def __eq__(self, other): + return self.path == other.path and self.object == other.object + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.path) + + @property + def name(self): + """ + Returns + Name of this reference + """ + return os.path.basename(self.path) + + @classmethod + def find_all(cls, repo, common_path = "refs", **kwargs): + """ + Find all refs in the repository + + ``repo`` + is the Repo + + ``common_path`` + Optional keyword argument to the path which is to be shared by all + returned Ref objects + + ``kwargs`` + Additional options given as keyword arguments, will be passed + to git-for-each-ref + + Returns + git.Ref[] + + List is sorted by committerdate + The returned objects are compatible to the Ref base, but represent the + actual type, such as Head or Tag + """ + + options = {'sort': "committerdate", + 'format': "%(refname)%00%(objectname)%00%(objecttype)%00%(objectsize)"} + + options.update(kwargs) + + output = repo.git.for_each_ref(common_path, **options) + return cls.list_from_string(repo, output) + + @classmethod + def list_from_string(cls, repo, text): + """ + Parse out ref information into a list of Ref compatible objects + + ``repo`` + is the Repo + ``text`` + is the text output from the git-for-each-ref command + + Returns + git.Ref[] + + list of Ref objects + """ + heads = [] + + for line in text.splitlines(): + heads.append(cls.from_string(repo, line)) + + return heads + + @classmethod + def from_string(cls, repo, line): + """ + Create a new Ref instance from the given string. + + ``repo`` + is the Repo + + ``line`` + is the formatted ref information + + Format:: + + name: [a-zA-Z_/]+ + + id: [0-9A-Fa-f]{40} + + Returns + git.Head + """ + full_path, hexsha, type_name, object_size = line.split("\x00") + obj = Object.get_type_by_name(type_name)(repo, hexsha, object_size) + return cls(full_path, obj) diff --git a/lib/git/blob.py b/lib/git/blob.py index 3ecd3a38..d1b928cd 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -13,159 +13,159 @@ from commit import Commit import base class Blob(base.Object): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" - __slots__ = ("mode", "path", "_data_stored") - - # precompiled regex - re_whitespace = re.compile(r'\s+') - re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - re_author_committer_start = re.compile(r'^(author|committer)') - re_tab_full_line = re.compile(r'^\t(.*)$') - - def __init__(self, repo, id, mode=None, path=None): - """ - Create an unbaked Blob containing just the specified attributes - - ``repo`` - is the Repo - - ``id`` - is the git object id - - ``mode`` - is the file mode - - ``path`` - is the path to the file - - Returns - git.Blob - """ - super(Blob,self).__init__(repo, id, "blob") - self.mode = mode - self.path = path - self._data_stored = None - - @property - def data(self): - """ - The binary contents of this blob. - - Returns - str - - NOTE - The data will be cached after the first access. - """ - self._data_stored = self._data_stored or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) - return self._data_stored - - @property - def mime_type(self): - """ - The mime type of this file (based on the filename) - - Returns - str - - NOTE - Defaults to 'text/plain' in case the actual file type is unknown. - """ - guesses = None - if self.path: - guesses = mimetypes.guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE - - @property - def basename(self): - """ - Returns - The basename of the Blobs file path - """ - return os.path.basename(self.path) - - @classmethod - def blame(cls, repo, commit, file): - """ - The blame information for the given file at the given commit - - Returns - list: [git.Commit, list: []] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance. - """ - data = repo.git.blame(commit, '--', file, p=True) - commits = {} - blames = [] - info = None - - for line in data.splitlines(): - parts = cls.re_whitespace.split(line, 1) - firstpart = parts[0] - if cls.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - digits = parts[-1].split(" ") - if len(digits) == 3: + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = "blob" + __slots__ = ("mode", "path", "_data_stored") + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + + def __init__(self, repo, id, mode=None, path=None): + """ + Create an unbaked Blob containing just the specified attributes + + ``repo`` + is the Repo + + ``id`` + is the git object id + + ``mode`` + is the file mode + + ``path`` + is the path to the file + + Returns + git.Blob + """ + super(Blob,self).__init__(repo, id, "blob") + self.mode = mode + self.path = path + self._data_stored = None + + @property + def data(self): + """ + The binary contents of this blob. + + Returns + str + + NOTE + The data will be cached after the first access. + """ + self._data_stored = self._data_stored or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + return self._data_stored + + @property + def mime_type(self): + """ + The mime type of this file (based on the filename) + + Returns + str + + NOTE + Defaults to 'text/plain' in case the actual file type is unknown. + """ + guesses = None + if self.path: + guesses = mimetypes.guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE + + @property + def basename(self): + """ + Returns + The basename of the Blobs file path + """ + return os.path.basename(self.path) + + @classmethod + def blame(cls, repo, commit, file): + """ + The blame information for the given file at the given commit + + Returns + list: [git.Commit, list: []] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance. + """ + data = repo.git.blame(commit, '--', file, p=True) + commits = {} + blames = [] + info = None + + for line in data.splitlines(): + parts = cls.re_whitespace.split(line, 1) + firstpart = parts[0] + if cls.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization - else: - m = cls.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = time.gmtime(int(parts[-1])) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( repo, id=sha, - author=Actor.from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = cls.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - def __repr__(self): - return '' % self.id + else: + m = cls.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = time.gmtime(int(parts[-1])) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( repo, id=sha, + author=Actor.from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = cls.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + def __repr__(self): + return '' % self.id diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 21e235b1..940e35d1 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -13,208 +13,208 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'with_raw_output') + 'with_exceptions', 'with_raw_output') extra = {} if sys.platform == 'win32': - extra = {'shell': True} + extra = {'shell': True} class Git(object): - """ - The Git class manages communication with the Git binary. - + """ + The Git class manages communication with the Git binary. + It provides a convenient interface to calling the Git binary, such as in:: g = Git( git_dir ) - g.init() # calls 'git init' program + g.init() # calls 'git init' program rval = g.ls_files() # calls 'git ls-files' program ``Debugging`` - Set the GIT_PYTHON_TRACE environment variable print each invocation - of the command to stdout. - Set its value to 'full' to see details about the returned values. - """ - def __init__(self, git_dir=None): - """ - Initialize this instance with: - - ``git_dir`` - Git directory we should work in. If None, we always work in the current - directory as returned by os.getcwd() - """ - super(Git, self).__init__() - self.git_dir = git_dir - - def __getattr__(self, name): - """ - A convenience method as it allows to call the command as if it was - an object. - Returns - Callable object that will execute call _call_process with your arguments. - """ - if name[:1] == '_': - raise AttributeError(name) - return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) - - @property - def get_dir(self): - """ - Returns - Git directory we are working on - """ - return self.git_dir - - def execute(self, command, - istream=None, - with_keep_cwd=False, - with_extended_output=False, - with_exceptions=True, - with_raw_output=False, - ): - """ - Handles executing the command on the shell and consumes and returns - the returned information (stdout) - - ``command`` - The command argument list to execute. - It should be a string, or a sequence of program arguments. The - program to execute is the first item in the args sequence or string. - - ``istream`` - Standard input filehandle passed to subprocess.Popen. - - ``with_keep_cwd`` - Whether to use the current working directory from os.getcwd(). - GitPython uses get_work_tree() as its working directory by - default and get_git_dir() for bare repositories. - - ``with_extended_output`` - Whether to return a (status, stdout, stderr) tuple. - - ``with_exceptions`` - Whether to raise an exception when git returns a non-zero status. - - ``with_raw_output`` - Whether to avoid stripping off trailing whitespace. - - Returns:: - - str(output) # extended_output = False (Default) - tuple(int(status), str(stdout), str(stderr)) # extended_output = True - - Raise - GitCommandError - - NOTE - If you add additional keyword arguments to the signature of this method, - you must update the execute_kwargs tuple housed in this module. - """ - if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': - print ' '.join(command) - - # Allow the user to have the command executed in their working dir. - if with_keep_cwd or self.git_dir is None: - cwd = os.getcwd() - else: - cwd=self.git_dir - - # Start the process - proc = subprocess.Popen(command, - cwd=cwd, - stdin=istream, - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - **extra - ) - - # Wait for the process to return - try: - stdout_value = proc.stdout.read() - stderr_value = proc.stderr.read() - status = proc.wait() - finally: - proc.stdout.close() - proc.stderr.close() - - # Strip off trailing whitespace by default - if not with_raw_output: - stdout_value = stdout_value.rstrip() - stderr_value = stderr_value.rstrip() - - if with_exceptions and status != 0: - raise GitCommandError(command, status, stderr_value) - - if GIT_PYTHON_TRACE == 'full': - if stderr_value: - print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value) - elif stdout_value: - print "%s -> %d: '%s'" % (command, status, stdout_value) - else: - print "%s -> %d" % (command, status) - - # Allow access to the command's status code - if with_extended_output: - return (status, stdout_value, stderr_value) - else: - return stdout_value - - def transform_kwargs(self, **kwargs): - """ - Transforms Python style kwargs into git command line options. - """ - args = [] - for k, v in kwargs.items(): - if len(k) == 1: - if v is True: - args.append("-%s" % k) - elif type(v) is not bool: - args.append("-%s%s" % (k, v)) - else: - if v is True: - args.append("--%s" % dashify(k)) - elif type(v) is not bool: - args.append("--%s=%s" % (dashify(k), v)) - return args - - def _call_process(self, method, *args, **kwargs): - """ - Run the given git command with the specified arguments and return - the result as a String - - ``method`` - is the command. Contained "_" characters will be converted to dashes, - such as in 'ls_files' to call 'ls-files'. - - ``args`` - is the list of arguments - - ``kwargs`` - is a dict of keyword arguments. - This function accepts the same optional keyword arguments - as execute(). - - Examples:: - git.rev_list('master', max_count=10, header=True) - - Returns - Same as execute() - """ - - # Handle optional arguments prior to calling transform_kwargs - # otherwise these'll end up in args, which is bad. - _kwargs = {} - for kwarg in execute_kwargs: - try: - _kwargs[kwarg] = kwargs.pop(kwarg) - except KeyError: - pass - - # Prepare the argument list - opt_args = self.transform_kwargs(**kwargs) - ext_args = map(str, args) - args = opt_args + ext_args - - call = ["git", dashify(method)] - call.extend(args) - - return self.execute(call, **_kwargs) + Set the GIT_PYTHON_TRACE environment variable print each invocation + of the command to stdout. + Set its value to 'full' to see details about the returned values. + """ + def __init__(self, git_dir=None): + """ + Initialize this instance with: + + ``git_dir`` + Git directory we should work in. If None, we always work in the current + directory as returned by os.getcwd() + """ + super(Git, self).__init__() + self.git_dir = git_dir + + def __getattr__(self, name): + """ + A convenience method as it allows to call the command as if it was + an object. + Returns + Callable object that will execute call _call_process with your arguments. + """ + if name[:1] == '_': + raise AttributeError(name) + return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) + + @property + def get_dir(self): + """ + Returns + Git directory we are working on + """ + return self.git_dir + + def execute(self, command, + istream=None, + with_keep_cwd=False, + with_extended_output=False, + with_exceptions=True, + with_raw_output=False, + ): + """ + Handles executing the command on the shell and consumes and returns + the returned information (stdout) + + ``command`` + The command argument list to execute. + It should be a string, or a sequence of program arguments. The + program to execute is the first item in the args sequence or string. + + ``istream`` + Standard input filehandle passed to subprocess.Popen. + + ``with_keep_cwd`` + Whether to use the current working directory from os.getcwd(). + GitPython uses get_work_tree() as its working directory by + default and get_git_dir() for bare repositories. + + ``with_extended_output`` + Whether to return a (status, stdout, stderr) tuple. + + ``with_exceptions`` + Whether to raise an exception when git returns a non-zero status. + + ``with_raw_output`` + Whether to avoid stripping off trailing whitespace. + + Returns:: + + str(output) # extended_output = False (Default) + tuple(int(status), str(stdout), str(stderr)) # extended_output = True + + Raise + GitCommandError + + NOTE + If you add additional keyword arguments to the signature of this method, + you must update the execute_kwargs tuple housed in this module. + """ + if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': + print ' '.join(command) + + # Allow the user to have the command executed in their working dir. + if with_keep_cwd or self.git_dir is None: + cwd = os.getcwd() + else: + cwd=self.git_dir + + # Start the process + proc = subprocess.Popen(command, + cwd=cwd, + stdin=istream, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + **extra + ) + + # Wait for the process to return + try: + stdout_value = proc.stdout.read() + stderr_value = proc.stderr.read() + status = proc.wait() + finally: + proc.stdout.close() + proc.stderr.close() + + # Strip off trailing whitespace by default + if not with_raw_output: + stdout_value = stdout_value.rstrip() + stderr_value = stderr_value.rstrip() + + if with_exceptions and status != 0: + raise GitCommandError(command, status, stderr_value) + + if GIT_PYTHON_TRACE == 'full': + if stderr_value: + print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value) + elif stdout_value: + print "%s -> %d: '%s'" % (command, status, stdout_value) + else: + print "%s -> %d" % (command, status) + + # Allow access to the command's status code + if with_extended_output: + return (status, stdout_value, stderr_value) + else: + return stdout_value + + def transform_kwargs(self, **kwargs): + """ + Transforms Python style kwargs into git command line options. + """ + args = [] + for k, v in kwargs.items(): + if len(k) == 1: + if v is True: + args.append("-%s" % k) + elif type(v) is not bool: + args.append("-%s%s" % (k, v)) + else: + if v is True: + args.append("--%s" % dashify(k)) + elif type(v) is not bool: + args.append("--%s=%s" % (dashify(k), v)) + return args + + def _call_process(self, method, *args, **kwargs): + """ + Run the given git command with the specified arguments and return + the result as a String + + ``method`` + is the command. Contained "_" characters will be converted to dashes, + such as in 'ls_files' to call 'ls-files'. + + ``args`` + is the list of arguments + + ``kwargs`` + is a dict of keyword arguments. + This function accepts the same optional keyword arguments + as execute(). + + Examples:: + git.rev_list('master', max_count=10, header=True) + + Returns + Same as execute() + """ + + # Handle optional arguments prior to calling transform_kwargs + # otherwise these'll end up in args, which is bad. + _kwargs = {} + for kwarg in execute_kwargs: + try: + _kwargs[kwarg] = kwargs.pop(kwarg) + except KeyError: + pass + + # Prepare the argument list + opt_args = self.transform_kwargs(**kwargs) + ext_args = map(str, args) + args = opt_args + ext_args + + call = ["git", dashify(method)] + call.extend(args) + + return self.execute(call, **_kwargs) diff --git a/lib/git/commit.py b/lib/git/commit.py index 73fb8e7a..1ae84799 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -14,281 +14,281 @@ import stats import base class Commit(base.Object): - """ - Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary. - """ - # precompiled regex - re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') - - # object configuration - type = "commit" - - def __init__(self, repo, id, tree=None, author=None, authored_date=None, - committer=None, committed_date=None, message=None, parents=None): - """ - Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set if id names a valid sha. - - The parameter documentation indicates the type of the argument after a colon ':'. - - ``id`` - is the sha id of the commit - - ``parents`` : list( Commit, ... ) - is a list of commit ids - - ``tree`` : Tree - is the corresponding tree id - - ``author`` : Actor - is the author string ( will be implicitly converted into an Actor object ) - - ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) - is the authored DateTime - - ``committer`` : Actor - is the committer string - - ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) - is the committed DateTime - - ``message`` : string - is the commit message - - Returns - git.Commit - """ - super(Commit,self).__init__(repo, id, "commit") - self.parents = None - self.tree = None - self.author = author - self.authored_date = authored_date - self.committer = committer - self.committed_date = committed_date - self.message = message - - if self.id: - if parents is not None: - self.parents = [Commit(repo, p) for p in parents] - if tree is not None: - self.tree = Tree(repo, id=tree) - - def __eq__(self, other): - return self.id == other.id - - def __ne__(self, other): - return self.id != other.id - - def __bake__(self): - """ - Called by LazyMixin superclass when the first uninitialized member needs - to be set as it is queried. - """ - super(Commit, self).__bake__() - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.committer = temp.committer - self.committed_date = temp.committed_date - self.message = temp.message - - @property - def id_abbrev(self): - """ - Returns - First 7 bytes of the commit's sha id as an abbreviation of the full string. - """ - return self.id[0:7] - - @property - def summary(self): - """ - Returns - First line of the commit message. - """ - return self.message.split('\n', 1)[0] - - @classmethod - def count(cls, repo, ref, path=''): - """ - Count the number of commits reachable from this ref - - ``repo`` - is the Repo - - ``ref`` - is the ref from which to begin (SHA1 or name) - - ``path`` - is an optinal path - - Returns - int - """ - return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) - - @classmethod - def find_all(cls, repo, ref, path='', **kwargs): - """ - Find all commits matching the given criteria. - - ``repo`` - is the Repo - - ``ref`` - is the ref from which to begin (SHA1 or name) - - ``path`` - is an optinal path, if set only Commits that include the path - will be considered - - ``kwargs`` - optional keyword arguments to git where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - - Returns - git.Commit[] - """ - options = {'pretty': 'raw'} - options.update(kwargs) - - output = repo.git.rev_list(ref, '--', path, **options) - return cls.list_from_string(repo, output) - - @classmethod - def list_from_string(cls, repo, text): - """ - Parse out commit information into a list of Commit objects - - ``repo`` - is the Repo - - ``text`` - is the text output from the git-rev-list command (raw format) - - Returns - git.Commit[] - """ - lines = [l for l in text.splitlines() if l.strip('\r\n')] - - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] - - parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - author, authored_date = cls._actor(lines.pop(0)) - committer, committed_date = cls._actor(lines.pop(0)) - - messages = [] - while lines and lines[0].startswith(' '): - messages.append(lines.pop(0).strip()) - - message = '\n'.join(messages) - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - - return commits - - @classmethod - def diff(cls, repo, a, b=None, paths=None): - """ - Creates diffs between a tree and the index or between two trees: - - ``repo`` - is the Repo - - ``a`` - is a named commit - - ``b`` - is an optional named commit. Passing a list assumes you - wish to omit the second named commit and limit the diff to the - given paths. - - ``paths`` - is a list of paths to limit the diff to. - - Returns - git.Diff[]:: - - between tree and the index if only a is given - between two trees if a and b are given and are commits - """ - paths = paths or [] - - if isinstance(b, list): - paths = b - b = None - - if paths: - paths.insert(0, "--") - - if b: - paths.insert(0, b) - paths.insert(0, a) - text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff.list_from_string(repo, text) - - @property - def diffs(self): - """ - Returns - git.Diff[] - Diffs between this commit and its first parent or all changes if this - commit is the first commit and has no parent. - """ - if not self.parents: - d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - return diff.Diff.list_from_string(self.repo, d) - else: - return self.diff(self.repo, self.parents[0].id, self.id) - - @property - def stats(self): - """ - Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - Return - git.Stats - """ - if not self.parents: - text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) - return stats.Stats.list_from_string(self.repo, text) - - def __str__(self): - """ Convert commit to string which is SHA1 """ - return self.id - - def __repr__(self): - return '' % self.id - - @classmethod - def _actor(cls, line): - """ - Parse out the actor (author or committer) info - - Returns - [Actor, gmtime(acted at time)] - """ - m = cls.re_actor_epoch.search(line) - actor, epoch = m.groups() - return (Actor.from_string(actor), time.gmtime(int(epoch))) + """ + Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary. + """ + # precompiled regex + re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') + + # object configuration + type = "commit" + + def __init__(self, repo, id, tree=None, author=None, authored_date=None, + committer=None, committed_date=None, message=None, parents=None): + """ + Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set if id names a valid sha. + + The parameter documentation indicates the type of the argument after a colon ':'. + + ``id`` + is the sha id of the commit + + ``parents`` : list( Commit, ... ) + is a list of commit ids + + ``tree`` : Tree + is the corresponding tree id + + ``author`` : Actor + is the author string ( will be implicitly converted into an Actor object ) + + ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) + is the authored DateTime + + ``committer`` : Actor + is the committer string + + ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the committed DateTime + + ``message`` : string + is the commit message + + Returns + git.Commit + """ + super(Commit,self).__init__(repo, id, "commit") + self.parents = None + self.tree = None + self.author = author + self.authored_date = authored_date + self.committer = committer + self.committed_date = committed_date + self.message = message + + if self.id: + if parents is not None: + self.parents = [Commit(repo, p) for p in parents] + if tree is not None: + self.tree = Tree(repo, id=tree) + + def __eq__(self, other): + return self.id == other.id + + def __ne__(self, other): + return self.id != other.id + + def __bake__(self): + """ + Called by LazyMixin superclass when the first uninitialized member needs + to be set as it is queried. + """ + super(Commit, self).__bake__() + temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + + @property + def id_abbrev(self): + """ + Returns + First 7 bytes of the commit's sha id as an abbreviation of the full string. + """ + return self.id[0:7] + + @property + def summary(self): + """ + Returns + First line of the commit message. + """ + return self.message.split('\n', 1)[0] + + @classmethod + def count(cls, repo, ref, path=''): + """ + Count the number of commits reachable from this ref + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1 or name) + + ``path`` + is an optinal path + + Returns + int + """ + return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) + + @classmethod + def find_all(cls, repo, ref, path='', **kwargs): + """ + Find all commits matching the given criteria. + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1 or name) + + ``path`` + is an optinal path, if set only Commits that include the path + will be considered + + ``kwargs`` + optional keyword arguments to git where + ``max_count`` is the maximum number of commits to fetch + ``skip`` is the number of commits to skip + + Returns + git.Commit[] + """ + options = {'pretty': 'raw'} + options.update(kwargs) + + output = repo.git.rev_list(ref, '--', path, **options) + return cls.list_from_string(repo, output) + + @classmethod + def list_from_string(cls, repo, text): + """ + Parse out commit information into a list of Commit objects + + ``repo`` + is the Repo + + ``text`` + is the text output from the git-rev-list command (raw format) + + Returns + git.Commit[] + """ + lines = [l for l in text.splitlines() if l.strip('\r\n')] + + commits = [] + + while lines: + id = lines.pop(0).split()[1] + tree = lines.pop(0).split()[1] + + parents = [] + while lines and lines[0].startswith('parent'): + parents.append(lines.pop(0).split()[-1]) + author, authored_date = cls._actor(lines.pop(0)) + committer, committed_date = cls._actor(lines.pop(0)) + + messages = [] + while lines and lines[0].startswith(' '): + messages.append(lines.pop(0).strip()) + + message = '\n'.join(messages) + + commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message)) + + return commits + + @classmethod + def diff(cls, repo, a, b=None, paths=None): + """ + Creates diffs between a tree and the index or between two trees: + + ``repo`` + is the Repo + + ``a`` + is a named commit + + ``b`` + is an optional named commit. Passing a list assumes you + wish to omit the second named commit and limit the diff to the + given paths. + + ``paths`` + is a list of paths to limit the diff to. + + Returns + git.Diff[]:: + + between tree and the index if only a is given + between two trees if a and b are given and are commits + """ + paths = paths or [] + + if isinstance(b, list): + paths = b + b = None + + if paths: + paths.insert(0, "--") + + if b: + paths.insert(0, b) + paths.insert(0, a) + text = repo.git.diff('-M', full_index=True, *paths) + return diff.Diff.list_from_string(repo, text) + + @property + def diffs(self): + """ + Returns + git.Diff[] + Diffs between this commit and its first parent or all changes if this + commit is the first commit and has no parent. + """ + if not self.parents: + d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') + return diff.Diff.list_from_string(self.repo, d) + else: + return self.diff(self.repo, self.parents[0].id, self.id) + + @property + def stats(self): + """ + Create a git stat from changes between this commit and its first parent + or from all changes done if this is the very first commit. + + Return + git.Stats + """ + if not self.parents: + text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) + text2 = "" + for line in text.splitlines()[1:]: + (insertions, deletions, filename) = line.split("\t") + text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) + text = text2 + else: + text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) + return stats.Stats.list_from_string(self.repo, text) + + def __str__(self): + """ Convert commit to string which is SHA1 """ + return self.id + + def __repr__(self): + return '' % self.id + + @classmethod + def _actor(cls, line): + """ + Parse out the actor (author or committer) info + + Returns + [Actor, gmtime(acted at time)] + """ + m = cls.re_actor_epoch.search(line) + actor, epoch = m.groups() + return (Actor.from_string(actor), time.gmtime(int(epoch))) diff --git a/lib/git/diff.py b/lib/git/diff.py index 75450d70..ef58cb0e 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -8,30 +8,30 @@ import re import blob class Diff(object): - """ - A Diff contains diff information between two commits. - - It contains two sides a and b of the diff, members are prefixed with - "a" and "b" respectively to inidcate that. - - Diffs keep information about the changed blob objects, the file mode, renames, - deletions and new files. - - There are a few cases where None has to be expected as member variable value: - - ``New File``:: - - a_mode is None - a_blob is None - - ``Deleted File``:: - - b_mode is None - b_blob is NOne - """ - - # precompiled regex - re_header = re.compile(r""" + """ + A Diff contains diff information between two commits. + + It contains two sides a and b of the diff, members are prefixed with + "a" and "b" respectively to inidcate that. + + Diffs keep information about the changed blob objects, the file mode, renames, + deletions and new files. + + There are a few cases where None has to be expected as member variable value: + + ``New File``:: + + a_mode is None + a_blob is None + + ``Deleted File``:: + + b_mode is None + b_blob is NOne + """ + + # precompiled regex + re_header = re.compile(r""" #^diff[ ]--git [ ]a/(?P\S+)[ ]b/(?P\S+)\n (?:^similarity[ ]index[ ](?P\d+)%\n @@ -44,58 +44,58 @@ class Diff(object): (?:^index[ ](?P[0-9A-Fa-f]+) \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? """, re.VERBOSE | re.MULTILINE) - re_is_null_hexsha = re.compile( r'^0{40}$' ) + re_is_null_hexsha = re.compile( r'^0{40}$' ) - def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, - b_mode, new_file, deleted_file, rename_from, - rename_to, diff): - self.repo = repo + def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, + b_mode, new_file, deleted_file, rename_from, + rename_to, diff): + self.repo = repo - if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id): - self.a_blob = None - else: - self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path) - if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id): - self.b_blob = None - else: - self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path) + if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id): + self.a_blob = None + else: + self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path) + if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id): + self.b_blob = None + else: + self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path) - self.a_mode = a_mode - self.b_mode = b_mode - self.new_file = new_file - self.deleted_file = deleted_file - self.rename_from = rename_from - self.rename_to = rename_to - self.renamed = rename_from != rename_to - self.diff = diff + self.a_mode = a_mode + self.b_mode = b_mode + self.new_file = new_file + self.deleted_file = deleted_file + self.rename_from = rename_from + self.rename_to = rename_to + self.renamed = rename_from != rename_to + self.diff = diff - @classmethod - def list_from_string(cls, repo, text): - """ - Create a new diff object from the given text - ``repo`` - is the repository we are operating on - it is required - - ``text`` - result of 'git diff' between two commits or one commit and the index - - Returns - git.Diff[] - """ - diffs = [] + @classmethod + def list_from_string(cls, repo, text): + """ + Create a new diff object from the given text + ``repo`` + is the repository we are operating on - it is required + + ``text`` + result of 'git diff' between two commits or one commit and the index + + Returns + git.Diff[] + """ + diffs = [] - diff_header = cls.re_header.match - for diff in ('\n' + text).split('\ndiff --git')[1:]: - header = diff_header(diff) + diff_header = cls.re_header.match + for diff in ('\n' + text).split('\ndiff --git')[1:]: + header = diff_header(diff) - a_path, b_path, similarity_index, rename_from, rename_to, \ - old_mode, new_mode, new_file_mode, deleted_file_mode, \ - a_blob_id, b_blob_id, b_mode = header.groups() - new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) + a_path, b_path, similarity_index, rename_from, rename_to, \ + old_mode, new_mode, new_file_mode, deleted_file_mode, \ + a_blob_id, b_blob_id, b_mode = header.groups() + new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, - old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, - new_file, deleted_file, rename_from, rename_to, diff[header.end():])) + diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, + old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, + new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - return diffs + return diffs diff --git a/lib/git/errors.py b/lib/git/errors.py index 45afb590..e9a637c0 100644 --- a/lib/git/errors.py +++ b/lib/git/errors.py @@ -8,25 +8,25 @@ Module containing all exceptions thrown througout the git package, """ class InvalidGitRepositoryError(Exception): - """ - Thrown if the given repository appears to have an invalid format. - """ + """ + Thrown if the given repository appears to have an invalid format. + """ class NoSuchPathError(OSError): - """ - Thrown if a path could not be access by the system. - """ + """ + Thrown if a path could not be access by the system. + """ class GitCommandError(Exception): - """ - Thrown if execution of the git command fails with non-zero status code. - """ - def __init__(self, command, status, stderr=None): - self.stderr = stderr - self.status = status - self.command = command + """ + Thrown if execution of the git command fails with non-zero status code. + """ + def __init__(self, command, status, stderr=None): + self.stderr = stderr + self.status = status + self.command = command - def __str__(self): - return repr("%s returned exit status %d" % - (str(self.command), self.status)) + def __str__(self): + return repr("%s returned exit status %d" % + (str(self.command), self.status)) diff --git a/lib/git/head.py b/lib/git/head.py index 3c3f13ac..f4e94637 100644 --- a/lib/git/head.py +++ b/lib/git/head.py @@ -8,56 +8,56 @@ import commit import base class Head(base.Ref): - """ - A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. - - Examples:: - - >>> repo = Repo("/path/to/repo") - >>> head = repo.heads[0] - - >>> head.name - 'master' - - >>> head.commit - - - >>> head.commit.id - '1c09f116cbc2cb4100fb6935bb162daa4723f455' - """ - - def __init__(self, path, commit): - """ - Initialize a newly instanced Head - - ``path`` - is the path to the head ref, relative to the .git directory, i.e. - refs/heads/master - - `commit` - is the Commit object that the head points to - """ - super(Head, self).__init__(name, commit) - - - @property - def commit(self): - """ - Returns - Commit object the head points to - """ - return self.object - - @classmethod - def find_all(cls, repo, common_path = "refs/heads", **kwargs): - """ - Returns - git.Head[] - - For more documentation, please refer to git.base.Ref.find_all - """ - return super(Head,cls).find_all(repo, common_path, **kwargs) - - def __repr__(self): - return '' % self.name + """ + A Head is a named reference to a Commit. Every Head instance contains a name + and a Commit object. + + Examples:: + + >>> repo = Repo("/path/to/repo") + >>> head = repo.heads[0] + + >>> head.name + 'master' + + >>> head.commit + + + >>> head.commit.id + '1c09f116cbc2cb4100fb6935bb162daa4723f455' + """ + + def __init__(self, path, commit): + """ + Initialize a newly instanced Head + + ``path`` + is the path to the head ref, relative to the .git directory, i.e. + refs/heads/master + + `commit` + is the Commit object that the head points to + """ + super(Head, self).__init__(name, commit) + + + @property + def commit(self): + """ + Returns + Commit object the head points to + """ + return self.object + + @classmethod + def find_all(cls, repo, common_path = "refs/heads", **kwargs): + """ + Returns + git.Head[] + + For more documentation, please refer to git.base.Ref.find_all + """ + return super(Head,cls).find_all(repo, common_path, **kwargs) + + def __repr__(self): + return '' % self.name diff --git a/lib/git/repo.py b/lib/git/repo.py index 811cf6f0..3c872218 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -17,496 +17,496 @@ from commit import Commit from tree import Tree class Repo(object): - """ - Represents a git repository and allows you to query references, - gather commit information, generate diffs, create and clone repositories query - the log. - """ - DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - - def __init__(self, path=None): - """ - Create a new Repo instance - - ``path`` - is the path to either the root git directory or the bare git repo - - Examples:: - - repo = Repo("/Users/mtrier/Development/git-python") - repo = Repo("/Users/mtrier/Development/git-python.git") - - Raises - InvalidGitRepositoryError or NoSuchPathError - - Returns - ``git.Repo`` - """ - - epath = os.path.abspath(os.path.expanduser(path or os.getcwd())) - - if not os.path.exists(epath): - raise NoSuchPathError(epath) - - self.path = None - curpath = epath - while curpath: - if is_git_dir(curpath): - self.bare = True - self.path = curpath - self.wd = curpath - break - gitpath = os.path.join(curpath, '.git') - if is_git_dir(gitpath): - self.bare = False - self.path = gitpath - self.wd = curpath - break - curpath, dummy = os.path.split(curpath) - if not dummy: - break - - if self.path is None: - raise InvalidGitRepositoryError(epath) - - self.git = Git(self.wd) - - # Description property - def _get_description(self): - filename = os.path.join(self.path, 'description') - return file(filename).read().rstrip() - - def _set_description(self, descr): - filename = os.path.join(self.path, 'description') - file(filename, 'w').write(descr+'\n') - - description = property(_get_description, _set_description, - doc="the project's description") - del _get_description - del _set_description - - @property - def heads(self): - """ - A list of ``Head`` objects representing the branch heads in - this repo - - Returns - ``git.Head[]`` - """ - return Head.find_all(self) - - # alias heads - branches = heads - - @property - def tags(self): - """ - A list of ``Tag`` objects that are available in this repo - - Returns - ``git.Tag[]`` - """ - return Tag.find_all(self) - - def commits(self, start='master', path='', max_count=10, skip=0): - """ - A list of Commit objects representing the history of a given ref/commit - - ``start`` - is the branch/commit name (default 'master') - - ``path`` - is an optional path to limit the returned commits to - Commits that do not contain that path will not be returned. - - ``max_count`` - is the maximum number of commits to return (default 10) + """ + Represents a git repository and allows you to query references, + gather commit information, generate diffs, create and clone repositories query + the log. + """ + DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - ``skip`` - is the number of commits to skip (default 0) which will effectively - move your commit-window by the given number. + def __init__(self, path=None): + """ + Create a new Repo instance - Returns - ``git.Commit[]`` - """ - options = {'max_count': max_count, - 'skip': skip} + ``path`` + is the path to either the root git directory or the bare git repo - return Commit.find_all(self, start, path, **options) + Examples:: - def commits_between(self, frm, to): - """ - The Commits objects that are reachable via ``to`` but not via ``frm`` - Commits are returned in chronological order. + repo = Repo("/Users/mtrier/Development/git-python") + repo = Repo("/Users/mtrier/Development/git-python.git") - ``from`` - is the branch/commit name of the younger item + Raises + InvalidGitRepositoryError or NoSuchPathError + + Returns + ``git.Repo`` + """ + + epath = os.path.abspath(os.path.expanduser(path or os.getcwd())) + + if not os.path.exists(epath): + raise NoSuchPathError(epath) + + self.path = None + curpath = epath + while curpath: + if is_git_dir(curpath): + self.bare = True + self.path = curpath + self.wd = curpath + break + gitpath = os.path.join(curpath, '.git') + if is_git_dir(gitpath): + self.bare = False + self.path = gitpath + self.wd = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + + if self.path is None: + raise InvalidGitRepositoryError(epath) + + self.git = Git(self.wd) + + # Description property + def _get_description(self): + filename = os.path.join(self.path, 'description') + return file(filename).read().rstrip() + + def _set_description(self, descr): + filename = os.path.join(self.path, 'description') + file(filename, 'w').write(descr+'\n') + + description = property(_get_description, _set_description, + doc="the project's description") + del _get_description + del _set_description + + @property + def heads(self): + """ + A list of ``Head`` objects representing the branch heads in + this repo + + Returns + ``git.Head[]`` + """ + return Head.find_all(self) + + # alias heads + branches = heads + + @property + def tags(self): + """ + A list of ``Tag`` objects that are available in this repo + + Returns + ``git.Tag[]`` + """ + return Tag.find_all(self) + + def commits(self, start='master', path='', max_count=10, skip=0): + """ + A list of Commit objects representing the history of a given ref/commit + + ``start`` + is the branch/commit name (default 'master') + + ``path`` + is an optional path to limit the returned commits to + Commits that do not contain that path will not be returned. + + ``max_count`` + is the maximum number of commits to return (default 10) + + ``skip`` + is the number of commits to skip (default 0) which will effectively + move your commit-window by the given number. + + Returns + ``git.Commit[]`` + """ + options = {'max_count': max_count, + 'skip': skip} + + return Commit.find_all(self, start, path, **options) + + def commits_between(self, frm, to): + """ + The Commits objects that are reachable via ``to`` but not via ``frm`` + Commits are returned in chronological order. + + ``from`` + is the branch/commit name of the younger item + + ``to`` + is the branch/commit name of the older item + + Returns + ``git.Commit[]`` + """ + return reversed(Commit.find_all(self, "%s..%s" % (frm, to))) + + def commits_since(self, start='master', path='', since='1970-01-01'): + """ + The Commits objects that are newer than the specified date. + Commits are returned in chronological order. + + ``start`` + is the branch/commit name (default 'master') + + ``path`` + is an optinal path to limit the returned commits to. + + + ``since`` + is a string represeting a date/time + + Returns + ``git.Commit[]`` + """ + options = {'since': since} + + return Commit.find_all(self, start, path, **options) + + def commit_count(self, start='master', path=''): + """ + The number of commits reachable by the given branch/commit + + ``start`` + is the branch/commit name (default 'master') + + ``path`` + is an optional path + Commits that do not contain the path will not contribute to the count. + + Returns + ``int`` + """ + return Commit.count(self, start, path) + + def commit(self, id, path = ''): + """ + The Commit object for the specified id - ``to`` - is the branch/commit name of the older item - - Returns - ``git.Commit[]`` - """ - return reversed(Commit.find_all(self, "%s..%s" % (frm, to))) - - def commits_since(self, start='master', path='', since='1970-01-01'): - """ - The Commits objects that are newer than the specified date. - Commits are returned in chronological order. + ``id`` + is the SHA1 identifier of the commit - ``start`` - is the branch/commit name (default 'master') + ``path`` + is an optional path, if set the returned commit must contain the path. - ``path`` - is an optinal path to limit the returned commits to. - + Returns + ``git.Commit`` + """ + options = {'max_count': 1} - ``since`` - is a string represeting a date/time + commits = Commit.find_all(self, id, path, **options) - Returns - ``git.Commit[]`` - """ - options = {'since': since} + if not commits: + raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) + return commits[0] - return Commit.find_all(self, start, path, **options) - - def commit_count(self, start='master', path=''): - """ - The number of commits reachable by the given branch/commit - - ``start`` - is the branch/commit name (default 'master') + def commit_deltas_from(self, other_repo, ref='master', other_ref='master'): + """ + Returns a list of commits that is in ``other_repo`` but not in self - ``path`` - is an optional path - Commits that do not contain the path will not contribute to the count. - - Returns - ``int`` - """ - return Commit.count(self, start, path) - - def commit(self, id, path = ''): - """ - The Commit object for the specified id - - ``id`` - is the SHA1 identifier of the commit - - ``path`` - is an optional path, if set the returned commit must contain the path. - - Returns - ``git.Commit`` - """ - options = {'max_count': 1} - - commits = Commit.find_all(self, id, path, **options) - - if not commits: - raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) - return commits[0] - - def commit_deltas_from(self, other_repo, ref='master', other_ref='master'): - """ - Returns a list of commits that is in ``other_repo`` but not in self - - Returns - git.Commit[] - """ - repo_refs = self.git.rev_list(ref, '--').strip().splitlines() - other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() + Returns + git.Commit[] + """ + repo_refs = self.git.rev_list(ref, '--').strip().splitlines() + other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() - diff_refs = list(set(other_repo_refs) - set(repo_refs)) - return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs) + diff_refs = list(set(other_repo_refs) - set(repo_refs)) + return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs) - def tree(self, treeish='master'): - """ - The Tree object for the given treeish reference + def tree(self, treeish='master'): + """ + The Tree object for the given treeish reference - ``treeish`` - is the reference (default 'master') + ``treeish`` + is the reference (default 'master') - Examples:: + Examples:: - repo.tree('master') + repo.tree('master') - Returns - ``git.Tree`` - """ - return Tree(self, id=treeish) + Returns + ``git.Tree`` + """ + return Tree(self, id=treeish) + + def blob(self, id): + """ + The Blob object for the given id + + ``id`` + is the SHA1 id of the blob + + Returns + ``git.Blob`` + """ + return Blob(self, id=id) + + def log(self, commit='master', path=None, **kwargs): + """ + The Commit for a treeish, and all commits leading to it. + + ``kwargs`` + keyword arguments specifying flags to be used in git-log command, + i.e.: max_count=1 to limit the amount of commits returned + + Returns + ``git.Commit[]`` + """ + options = {'pretty': 'raw'} + options.update(kwargs) + arg = [commit, '--'] + if path: + arg.append(path) + commits = self.git.log(*arg, **options) + return Commit.list_from_string(self, commits) + + def diff(self, a, b, *paths): + """ + The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) + + ``a`` + is the base commit + ``b`` + is the other commit + + ``paths`` + is an optional list of file paths on which to restrict the diff + + Returns + ``str`` + """ + return self.git.diff(a, b, '--', *paths) + + def commit_diff(self, commit): + """ + The commit diff for the given commit + ``commit`` is the commit name/id - def blob(self, id): - """ - The Blob object for the given id - - ``id`` - is the SHA1 id of the blob - - Returns - ``git.Blob`` - """ - return Blob(self, id=id) - - def log(self, commit='master', path=None, **kwargs): - """ - The Commit for a treeish, and all commits leading to it. - - ``kwargs`` - keyword arguments specifying flags to be used in git-log command, - i.e.: max_count=1 to limit the amount of commits returned + Returns + ``git.Diff[]`` + """ + return Commit.diff(self, commit) - Returns - ``git.Commit[]`` - """ - options = {'pretty': 'raw'} - options.update(kwargs) - arg = [commit, '--'] - if path: - arg.append(path) - commits = self.git.log(*arg, **options) - return Commit.list_from_string(self, commits) + @classmethod + def init_bare(self, path, mkdir=True, **kwargs): + """ + Initialize a bare git repository at the given path - def diff(self, a, b, *paths): - """ - The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) - - ``a`` - is the base commit - ``b`` - is the other commit + ``path`` + is the full path to the repo (traditionally ends with /.git) - ``paths`` - is an optional list of file paths on which to restrict the diff - - Returns - ``str`` - """ - return self.git.diff(a, b, '--', *paths) + ``mkdir`` + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. - def commit_diff(self, commit): - """ - The commit diff for the given commit - ``commit`` is the commit name/id + ``kwargs`` + keyword arguments serving as additional options to the git init command - Returns - ``git.Diff[]`` - """ - return Commit.diff(self, commit) + Examples:: - @classmethod - def init_bare(self, path, mkdir=True, **kwargs): - """ - Initialize a bare git repository at the given path + git.Repo.init_bare('/var/git/myrepo.git') - ``path`` - is the full path to the repo (traditionally ends with /.git) - - ``mkdir`` - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - - ``kwargs`` - keyword arguments serving as additional options to the git init command - - Examples:: - - git.Repo.init_bare('/var/git/myrepo.git') - - Returns - ``git.Repo`` (the newly created repo) - """ - - if mkdir and not os.path.exists(path): - os.makedirs(path, 0755) - - git = Git(path) - output = git.init('--bare', **kwargs) - return Repo(path) - create = init_bare - - def fork_bare(self, path, **kwargs): - """ - Fork a bare git repository from this repo - - ``path`` - is the full path of the new repo (traditionally ends with /.git) - - ``kwargs`` - keyword arguments to be given to the git clone command - - Returns - ``git.Repo`` (the newly forked repo) - """ - options = {'bare': True} - options.update(kwargs) - self.git.clone(self.path, path, **options) - return Repo(path) - - def archive_tar(self, treeish='master', prefix=None): - """ - Archive the given treeish - - ``treeish`` - is the treeish name/id (default 'master') - - ``prefix`` - is the optional prefix to prepend to each filename in the archive - - Examples:: - - >>> repo.archive_tar - - - >>> repo.archive_tar('a87ff14') - - - >>> repo.archive_tar('master', 'myproject/') - - - Returns - str (containing bytes of tar archive) - """ - options = {} - if prefix: - options['prefix'] = prefix - return self.git.archive(treeish, **options) - - def archive_tar_gz(self, treeish='master', prefix=None): - """ - Archive and gzip the given treeish - - ``treeish`` - is the treeish name/id (default 'master') - - ``prefix`` - is the optional prefix to prepend to each filename in the archive - - Examples:: - - >>> repo.archive_tar_gz - - - >>> repo.archive_tar_gz('a87ff14') - - - >>> repo.archive_tar_gz('master', 'myproject/') - - - Returns - str (containing the bytes of tar.gz archive) - """ - kwargs = {} - if prefix: - kwargs['prefix'] = prefix - resultstr = self.git.archive(treeish, **kwargs) - sio = StringIO.StringIO() - gf = gzip.GzipFile(fileobj=sio, mode ='wb') - gf.write(resultstr) - gf.close() - return sio.getvalue() - - def _get_daemon_export(self): - filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) - return os.path.exists(filename) - - def _set_daemon_export(self, value): - filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) - fileexists = os.path.exists(filename) - if value and not fileexists: - touch(filename) - elif not value and fileexists: - os.unlink(filename) - - daemon_export = property(_get_daemon_export, _set_daemon_export, - doc="If True, git-daemon may export this repository") - del _get_daemon_export - del _set_daemon_export - - def _get_alternates(self): - """ - The list of alternates for this repo from which objects can be retrieved - - Returns - list of strings being pathnames of alternates - """ - alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates') - - if os.path.exists(alternates_path): - try: - f = open(alternates_path) - alts = f.read() - finally: - f.close() - return alts.strip().splitlines() - else: - return [] - - def _set_alternates(self, alts): - """ - Sets the alternates - - ``alts`` - is the array of string paths representing the alternates at which - git should look for objects, i.e. /home/user/repo/.git/objects + Returns + ``git.Repo`` (the newly created repo) + """ + + if mkdir and not os.path.exists(path): + os.makedirs(path, 0755) + + git = Git(path) + output = git.init('--bare', **kwargs) + return Repo(path) + create = init_bare + + def fork_bare(self, path, **kwargs): + """ + Fork a bare git repository from this repo + + ``path`` + is the full path of the new repo (traditionally ends with /.git) + + ``kwargs`` + keyword arguments to be given to the git clone command + + Returns + ``git.Repo`` (the newly forked repo) + """ + options = {'bare': True} + options.update(kwargs) + self.git.clone(self.path, path, **options) + return Repo(path) + + def archive_tar(self, treeish='master', prefix=None): + """ + Archive the given treeish + + ``treeish`` + is the treeish name/id (default 'master') + + ``prefix`` + is the optional prefix to prepend to each filename in the archive + + Examples:: + + >>> repo.archive_tar + + + >>> repo.archive_tar('a87ff14') + + + >>> repo.archive_tar('master', 'myproject/') + + + Returns + str (containing bytes of tar archive) + """ + options = {} + if prefix: + options['prefix'] = prefix + return self.git.archive(treeish, **options) + + def archive_tar_gz(self, treeish='master', prefix=None): + """ + Archive and gzip the given treeish + + ``treeish`` + is the treeish name/id (default 'master') + + ``prefix`` + is the optional prefix to prepend to each filename in the archive + + Examples:: + + >>> repo.archive_tar_gz + + + >>> repo.archive_tar_gz('a87ff14') + + + >>> repo.archive_tar_gz('master', 'myproject/') + + + Returns + str (containing the bytes of tar.gz archive) + """ + kwargs = {} + if prefix: + kwargs['prefix'] = prefix + resultstr = self.git.archive(treeish, **kwargs) + sio = StringIO.StringIO() + gf = gzip.GzipFile(fileobj=sio, mode ='wb') + gf.write(resultstr) + gf.close() + return sio.getvalue() + + def _get_daemon_export(self): + filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) + return os.path.exists(filename) + + def _set_daemon_export(self, value): + filename = os.path.join(self.path, self.DAEMON_EXPORT_FILE) + fileexists = os.path.exists(filename) + if value and not fileexists: + touch(filename) + elif not value and fileexists: + os.unlink(filename) + + daemon_export = property(_get_daemon_export, _set_daemon_export, + doc="If True, git-daemon may export this repository") + del _get_daemon_export + del _set_daemon_export + + def _get_alternates(self): + """ + The list of alternates for this repo from which objects can be retrieved + + Returns + list of strings being pathnames of alternates + """ + alternates_path = os.path.join(self.path, 'objects', 'info', 'alternates') + + if os.path.exists(alternates_path): + try: + f = open(alternates_path) + alts = f.read() + finally: + f.close() + return alts.strip().splitlines() + else: + return [] + + def _set_alternates(self, alts): + """ + Sets the alternates + + ``alts`` + is the array of string paths representing the alternates at which + git should look for objects, i.e. /home/user/repo/.git/objects Raises NoSuchPathError - Returns - None - """ - for alt in alts: - if not os.path.exists(alt): - raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt) - - if not alts: - os.remove(os.path.join(self.path, 'objects', 'info', 'alternates')) - else: - try: - f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w') - f.write("\n".join(alts)) - finally: - f.close() - - alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") - - @property - def is_dirty(self): - """ - Return the status of the index. - - Returns - ``True``, if the index has any uncommitted changes, - otherwise ``False`` + Returns + None + """ + for alt in alts: + if not os.path.exists(alt): + raise NoSuchPathError("Could not set alternates. Alternate path %s must exist" % alt) + + if not alts: + os.remove(os.path.join(self.path, 'objects', 'info', 'alternates')) + else: + try: + f = open(os.path.join(self.path, 'objects', 'info', 'alternates'), 'w') + f.write("\n".join(alts)) + finally: + f.close() + + alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") + + @property + def is_dirty(self): + """ + Return the status of the index. + + Returns + ``True``, if the index has any uncommitted changes, + otherwise ``False`` NOTE Working tree changes that have not been staged will not be detected ! - """ - if self.bare: - # Bare repositories with no associated working directory are - # always consired to be clean. - return False - - return len(self.git.diff('HEAD', '--').strip()) > 0 - - @property - def active_branch(self): - """ - The name of the currently active branch. - - Returns - str (the branch name) - """ - branch = self.git.symbolic_ref('HEAD').strip() - if branch.startswith('refs/heads/'): - branch = branch[len('refs/heads/'):] - - return branch - - def __repr__(self): - return '' % self.path + """ + if self.bare: + # Bare repositories with no associated working directory are + # always consired to be clean. + return False + + return len(self.git.diff('HEAD', '--').strip()) > 0 + + @property + def active_branch(self): + """ + The name of the currently active branch. + + Returns + str (the branch name) + """ + branch = self.git.symbolic_ref('HEAD').strip() + if branch.startswith('refs/heads/'): + branch = branch[len('refs/heads/'):] + + return branch + + def __repr__(self): + return '' % self.path diff --git a/lib/git/stats.py b/lib/git/stats.py index 307e2f2f..a39d1dab 100644 --- a/lib/git/stats.py +++ b/lib/git/stats.py @@ -5,55 +5,55 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php class Stats(object): - """ - Represents stat information as presented by git at the end of a merge. It is - created from the output of a diff operation. - - ``Example``:: - - c = Commit( sha1 ) - s = c.stats - s.total # full-stat-dict - s.files # dict( filepath : stat-dict ) - - ``stat-dict`` - - A dictionary with the following keys and values:: - - deletions = number of deleted lines as int - insertions = number of inserted lines as int - lines = total number of lines changed as int, or deletions + insertions - - ``full-stat-dict`` - - In addition to the items in the stat-dict, it features additional information:: - - files = number of changed files as int - - """ - def __init__(self, repo, total, files): - self.repo = repo - self.total = total - self.files = files + """ + Represents stat information as presented by git at the end of a merge. It is + created from the output of a diff operation. + + ``Example``:: + + c = Commit( sha1 ) + s = c.stats + s.total # full-stat-dict + s.files # dict( filepath : stat-dict ) + + ``stat-dict`` + + A dictionary with the following keys and values:: + + deletions = number of deleted lines as int + insertions = number of inserted lines as int + lines = total number of lines changed as int, or deletions + insertions + + ``full-stat-dict`` + + In addition to the items in the stat-dict, it features additional information:: + + files = number of changed files as int + + """ + def __init__(self, repo, total, files): + self.repo = repo + self.total = total + self.files = files - @classmethod - def list_from_string(cls, repo, text): - """ - Create a Stat object from output retrieved by git-diff. - - Returns - git.Stat - """ - hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}} - for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") - insertions = raw_insertions != '-' and int(raw_insertions) or 0 - deletions = raw_deletions != '-' and int(raw_deletions) or 0 - hsh['total']['insertions'] += insertions - hsh['total']['deletions'] += deletions - hsh['total']['lines'] += insertions + deletions - hsh['total']['files'] += 1 - hsh['files'][filename.strip()] = {'insertions': insertions, - 'deletions': deletions, - 'lines': insertions + deletions} - return Stats(repo, hsh['total'], hsh['files']) + @classmethod + def list_from_string(cls, repo, text): + """ + Create a Stat object from output retrieved by git-diff. + + Returns + git.Stat + """ + hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}} + for line in text.splitlines(): + (raw_insertions, raw_deletions, filename) = line.split("\t") + insertions = raw_insertions != '-' and int(raw_insertions) or 0 + deletions = raw_deletions != '-' and int(raw_deletions) or 0 + hsh['total']['insertions'] += insertions + hsh['total']['deletions'] += deletions + hsh['total']['lines'] += insertions + deletions + hsh['total']['files'] += 1 + hsh['files'][filename.strip()] = {'insertions': insertions, + 'deletions': deletions, + 'lines': insertions + deletions} + return Stats(repo, hsh['total'], hsh['files']) diff --git a/lib/git/tag.py b/lib/git/tag.py index 0c4122ab..4266a7a9 100644 --- a/lib/git/tag.py +++ b/lib/git/tag.py @@ -8,121 +8,121 @@ import commit import base class TagRef(base.Ref): - """ - Class representing a lightweight tag reference which either points to a commit - or to a tag object. In the latter case additional information, like the signature - or the tag-creator, is available. - - This tag object will always point to a commit object, but may carray additional - information in a tag object:: - - tagref = TagRef.find_all(repo)[0] - print tagref.commit.message - if tagref.tag is not None: - print tagref.tag.message - """ - - __slots__ = "tag" - - def __init__(self, path, commit_or_tag): - """ - Initialize a newly instantiated Tag + """ + Class representing a lightweight tag reference which either points to a commit + or to a tag object. In the latter case additional information, like the signature + or the tag-creator, is available. + + This tag object will always point to a commit object, but may carray additional + information in a tag object:: + + tagref = TagRef.find_all(repo)[0] + print tagref.commit.message + if tagref.tag is not None: + print tagref.tag.message + """ + + __slots__ = "tag" + + def __init__(self, path, commit_or_tag): + """ + Initialize a newly instantiated Tag - ``path`` - is the full path to the tag + ``path`` + is the full path to the tag - ``commit_or_tag`` - is the Commit or TagObject that this tag ref points to - """ - super(TagRef, self).__init__(path, commit_or_tag) - self.tag = None - - if commit_or_tag.type == "tag": - self.tag = commit_or_tag - # END tag object handling - - @property - def commit(self): - """ - Returns - Commit object the tag ref points to - """ - if self.object.type == "commit": - return self.object - # it is a tag object - return self.object.object + ``commit_or_tag`` + is the Commit or TagObject that this tag ref points to + """ + super(TagRef, self).__init__(path, commit_or_tag) + self.tag = None + + if commit_or_tag.type == "tag": + self.tag = commit_or_tag + # END tag object handling + + @property + def commit(self): + """ + Returns + Commit object the tag ref points to + """ + if self.object.type == "commit": + return self.object + # it is a tag object + return self.object.object - @classmethod - def find_all(cls, repo, common_path = "refs/tags", **kwargs): - """ - Returns - git.Tag[] - - For more documentation, please refer to git.base.Ref.find_all - """ - return super(TagRef,cls).find_all(repo, common_path, **kwargs) - - + @classmethod + def find_all(cls, repo, common_path = "refs/tags", **kwargs): + """ + Returns + git.Tag[] + + For more documentation, please refer to git.base.Ref.find_all + """ + return super(TagRef,cls).find_all(repo, common_path, **kwargs) + + # provide an alias Tag = TagRef - + class TagObject(base.Object): - """ - Non-Lightweight tag carrying additional information about an object we are pointing - to. - """ - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - - def __init__(self, repo, id, size=None, object=None, tag=None, - tagger=None, tagged_date=None, message=None): - """ - Initialize a tag object with additional data - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - - ``size`` - Size of the object's data in bytes - - ``object`` - Object instance of object we are pointing to - - ``tag`` - name of this tag - - ``tagger`` - Actor identifying the tagger - - ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) - is the DateTime of the tag creation - """ - super(TagObject, self).__init__(repo, id , size) - self.object = object - self.tag = tag - self.tagger = tagger - self.tagged_date = tagged_date - self.message = message - - def __bake__(self): - super(TagObject, self).__bake__() - - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") - - obj, hexsha = lines[0].split(" ") # object - type_token, type_name = lines[1].split(" ") # type - self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) - - self.tag = lines[2][4:] # tag - - tagger_info = lines[3][7:]# tagger - self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) - - # line 4 empty - check git source to figure out purpose - self.message = "\n".join(lines[5:]) - - + """ + Non-Lightweight tag carrying additional information about an object we are pointing + to. + """ + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) + + def __init__(self, repo, id, size=None, object=None, tag=None, + tagger=None, tagged_date=None, message=None): + """ + Initialize a tag object with additional data + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + + ``size`` + Size of the object's data in bytes + + ``object`` + Object instance of object we are pointing to + + ``tag`` + name of this tag + + ``tagger`` + Actor identifying the tagger + + ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the DateTime of the tag creation + """ + super(TagObject, self).__init__(repo, id , size) + self.object = object + self.tag = tag + self.tagger = tagger + self.tagged_date = tagged_date + self.message = message + + def __bake__(self): + super(TagObject, self).__bake__() + + output = self.repo.git.cat_file(self.type,self.id) + lines = output.split("\n") + + obj, hexsha = lines[0].split(" ") # object + type_token, type_name = lines[1].split(" ") # type + self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag + + tagger_info = lines[3][7:]# tagger + self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + + # line 4 empty - check git source to figure out purpose + self.message = "\n".join(lines[5:]) + + diff --git a/lib/git/tree.py b/lib/git/tree.py index 6215f875..3d4deb16 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -9,97 +9,97 @@ import blob import base class Tree(base.Object): - - type = "tree" - - def __init__(self, repo, id, mode=None, path=None): - super(Tree, self).__init__(repo, id) - self.mode = mode - self.path = path - self._contents = None - - def __bake__(self): - # Read the tree contents. - super(Tree, self).__bake__() - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content_from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj - - @staticmethod - def content_from_string(repo, text): - """ - Parse a content item and create the appropriate object - - ``repo`` - is the Repo - - ``text`` - is the single line containing the items data in `git ls-tree` format - - Returns - ``git.Blob`` or ``git.Tree`` - """ - try: - mode, typ, id, path = text.expandtabs(1).split(" ", 3) - except: - return None - - if typ == "tree": - return Tree(repo, id=id, mode=mode, path=path) - elif typ == "blob": - return blob.Blob(repo, id=id, mode=mode, path=path) - elif typ == "commit": - return None - else: - raise(TypeError, "Invalid type: %s" % typ) - - def __div__(self, file): - """ - Find the named object in this tree's contents - - Examples:: - - >>> Repo('/path/to/python-git').tree/'lib' - - >>> Repo('/path/to/python-git').tree/'README.txt' - - - Returns - ``git.Blob`` or ``git.Tree`` or ``None`` if not found - """ - return self.get(file) - - @property - def basename(self): - os.path.basename(self.path) - - def __repr__(self): - return '' % self.id - - # Implement the basics of the dict protocol: - # directories/trees can be seen as object dicts. - def __getitem__(self, key): - return self._contents[key] - - def __iter__(self): - return iter(self._contents) - - def __len__(self): - return len(self._contents) - - def __contains__(self, key): - return key in self._contents - - def get(self, key): - return self._contents.get(key) - - def items(self): - return self._contents.items() - - def keys(self): - return self._contents.keys() - - def values(self): - return self._contents.values() + + type = "tree" + + def __init__(self, repo, id, mode=None, path=None): + super(Tree, self).__init__(repo, id) + self.mode = mode + self.path = path + self._contents = None + + def __bake__(self): + # Read the tree contents. + super(Tree, self).__bake__() + self._contents = {} + for line in self.repo.git.ls_tree(self.id).splitlines(): + obj = self.content_from_string(self.repo, line) + if obj is not None: + self._contents[obj.path] = obj + + @staticmethod + def content_from_string(repo, text): + """ + Parse a content item and create the appropriate object + + ``repo`` + is the Repo + + ``text`` + is the single line containing the items data in `git ls-tree` format + + Returns + ``git.Blob`` or ``git.Tree`` + """ + try: + mode, typ, id, path = text.expandtabs(1).split(" ", 3) + except: + return None + + if typ == "tree": + return Tree(repo, id=id, mode=mode, path=path) + elif typ == "blob": + return blob.Blob(repo, id=id, mode=mode, path=path) + elif typ == "commit": + return None + else: + raise(TypeError, "Invalid type: %s" % typ) + + def __div__(self, file): + """ + Find the named object in this tree's contents + + Examples:: + + >>> Repo('/path/to/python-git').tree/'lib' + + >>> Repo('/path/to/python-git').tree/'README.txt' + + + Returns + ``git.Blob`` or ``git.Tree`` or ``None`` if not found + """ + return self.get(file) + + @property + def basename(self): + os.path.basename(self.path) + + def __repr__(self): + return '' % self.id + + # Implement the basics of the dict protocol: + # directories/trees can be seen as object dicts. + def __getitem__(self, key): + return self._contents[key] + + def __iter__(self): + return iter(self._contents) + + def __len__(self): + return len(self._contents) + + def __contains__(self, key): + return key in self._contents + + def get(self, key): + return self._contents.get(key) + + def items(self): + return self._contents.items() + + def keys(self): + return self._contents.keys() + + def values(self): + return self._contents.values() diff --git a/lib/git/utils.py b/lib/git/utils.py index 5d0ba8ca..c204c432 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -7,20 +7,20 @@ import os def dashify(string): - return string.replace('_', '-') + return string.replace('_', '-') def touch(filename): - os.utime(filename) + os.utime(filename) def is_git_dir(d): - """ This is taken from the git setup.c:is_git_directory - function.""" + """ This is taken from the git setup.c:is_git_directory + function.""" - if os.path.isdir(d) and \ - os.path.isdir(os.path.join(d, 'objects')) and \ - os.path.isdir(os.path.join(d, 'refs')): - headref = os.path.join(d, 'HEAD') - return os.path.isfile(headref) or \ - (os.path.islink(headref) and - os.readlink(headref).startswith('refs')) - return False + if os.path.isdir(d) and \ + os.path.isdir(os.path.join(d, 'objects')) and \ + os.path.isdir(os.path.join(d, 'refs')): + headref = os.path.join(d, 'HEAD') + return os.path.isfile(headref) or \ + (os.path.islink(headref) and + os.readlink(headref).startswith('refs')) + return False -- cgit v1.2.3 From 101fb1df36f29469ee8f4e0b9e7846d856b87daa Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 17:15:07 +0200 Subject: blob tests fixed to deal with changes to the Blob type --- lib/git/blob.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/git/blob.py b/lib/git/blob.py index d1b928cd..c6fb9c1d 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -43,10 +43,10 @@ class Blob(base.Object): Returns git.Blob """ - super(Blob,self).__init__(repo, id, "blob") + super(Blob,self).__init__(repo, id) self.mode = mode self.path = path - self._data_stored = None + self._data_stored = type(None) # serves as marker to prevent baking in this case @property def data(self): @@ -59,7 +59,7 @@ class Blob(base.Object): NOTE The data will be cached after the first access. """ - self._data_stored = self._data_stored or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + self._data_stored = ( self._data_stored is not type(None) and self._data_stored ) or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) return self._data_stored @property -- cgit v1.2.3 From 7a7eedde7f5d5082f7f207ef76acccd24a6113b1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 18:06:18 +0200 Subject: put Tree and Blob onto a new base class suitable to deal with IndexObjects --- lib/git/base.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/git/blob.py | 51 +++------------------------------------------------ lib/git/tree.py | 12 ++++-------- 3 files changed, 62 insertions(+), 57 deletions(-) (limited to 'lib') diff --git a/lib/git/base.py b/lib/git/base.py index 1f8e085d..22c73491 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -40,7 +40,7 @@ class Object(LazyMixin): Implements an Object which may be Blobs, Trees, Commits and Tags """ TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size") + __slots__ = ("repo", "id", "size", "_data_cached" ) type = None # to be set by subclass def __init__(self, repo, id, size=None): @@ -61,6 +61,7 @@ class Object(LazyMixin): self.repo = repo self.id = id self.size = size + self._data_cached = type(None) def __bake__(self): """ @@ -103,6 +104,20 @@ class Object(LazyMixin): """ return '' % (self.__class__.__name__, self.id) + @property + def data(self): + """ + The binary contents of this object. + + Returns + str + + NOTE + The data will be cached after the first access. + """ + self._data_cached = ( self._data_cached is not type(None) and self._data_cached ) or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + return self._data_cached + @classmethod def get_type_by_name(cls, object_type_name): """ @@ -132,6 +147,45 @@ class Object(LazyMixin): raise ValueError("Cannot handle unknown object type: %s" % object_type_name) +class IndexObject(Object): + """ + Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects + """ + __slots__ = ("path", "mode") + + def __init__(self, repo, id, mode=None, path=None, size = None): + """ + Initialize a newly instanced IndexObject + ``repo`` + is the Repo we are located in + + ``id`` : string + is the git object id as hex sha + + ``mode`` : int + is the file mode as int, use the stat module to evaluate the infomration + + ``path`` : str + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + + ``size`` : int + size of the object data in bytes + """ + super(IndexObject, self).__init__(repo, id, size) + self.mode = mode + self.path = path + + @property + def basename(self): + """ + Returns + The basename of the IndexObject's file path + """ + return os.path.basename(self.path) + + class Ref(object): """ Represents a named reference to any object diff --git a/lib/git/blob.py b/lib/git/blob.py index c6fb9c1d..b0e47a3c 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -12,11 +12,12 @@ from actor import Actor from commit import Commit import base -class Blob(base.Object): +class Blob(base.IndexObject): """A Blob encapsulates a git blob object""" DEFAULT_MIME_TYPE = "text/plain" type = "blob" - __slots__ = ("mode", "path", "_data_stored") + + __slots__ = tuple() # precompiled regex re_whitespace = re.compile(r'\s+') @@ -24,44 +25,6 @@ class Blob(base.Object): re_author_committer_start = re.compile(r'^(author|committer)') re_tab_full_line = re.compile(r'^\t(.*)$') - def __init__(self, repo, id, mode=None, path=None): - """ - Create an unbaked Blob containing just the specified attributes - - ``repo`` - is the Repo - - ``id`` - is the git object id - - ``mode`` - is the file mode - - ``path`` - is the path to the file - - Returns - git.Blob - """ - super(Blob,self).__init__(repo, id) - self.mode = mode - self.path = path - self._data_stored = type(None) # serves as marker to prevent baking in this case - - @property - def data(self): - """ - The binary contents of this blob. - - Returns - str - - NOTE - The data will be cached after the first access. - """ - self._data_stored = ( self._data_stored is not type(None) and self._data_stored ) or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) - return self._data_stored - @property def mime_type(self): """ @@ -78,14 +41,6 @@ class Blob(base.Object): guesses = mimetypes.guess_type(self.path) return guesses and guesses[0] or self.DEFAULT_MIME_TYPE - @property - def basename(self): - """ - Returns - The basename of the Blobs file path - """ - return os.path.basename(self.path) - @classmethod def blame(cls, repo, commit, file): """ diff --git a/lib/git/tree.py b/lib/git/tree.py index 3d4deb16..90f1b72d 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -8,14 +8,13 @@ import os import blob import base -class Tree(base.Object): +class Tree(base.IndexObject): type = "tree" + __slots__ = "_contents" - def __init__(self, repo, id, mode=None, path=None): - super(Tree, self).__init__(repo, id) - self.mode = mode - self.path = path + def __init__(self, repo, id, mode=None, path=None, size=None): + super(Tree, self).__init__(repo, id, mode, path, size) self._contents = None def __bake__(self): @@ -71,9 +70,6 @@ class Tree(base.Object): """ return self.get(file) - @property - def basename(self): - os.path.basename(self.path) def __repr__(self): return '' % self.id -- cgit v1.2.3 From 15b9129ec639112e94ea96b6a395ad9b149515d1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 19:07:03 +0200 Subject: lazymixin system now supports per-attribute baking, it is up to the class whether it bakes more. This also leads to more efficient use of memory as values are only cached and set when required - the baking system does not require an own tracking variable anymore, and values are only to be cached once - then python will natively find the cache without involving any additional overhead. This works by using __getattr__ instead of __get_attribute__ which would always be called --- lib/git/base.py | 96 +++++++++++++++++++++++++++---------------------------- lib/git/commit.py | 69 ++++++++++++++++----------------------- lib/git/head.py | 14 -------- lib/git/tag.py | 52 +++++++++++++++--------------- lib/git/tree.py | 25 ++++++++------- 5 files changed, 113 insertions(+), 143 deletions(-) (limited to 'lib') diff --git a/lib/git/base.py b/lib/git/base.py index 22c73491..f3510558 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -7,43 +7,38 @@ import os class LazyMixin(object): lazy_properties = [] + __slots__ = tuple() - __slots__ = "__baked__" - - def __init__(self): - self.__baked__ = False - - def __getattribute__(self, attr): - val = object.__getattribute__(self, attr) - if val is not None: - return val - else: - self.__prebake__() - return object.__getattribute__(self, attr) - - def __bake__(self): - """ This method should be overridden in the derived class. """ - raise NotImplementedError(" '__bake__' method has not been implemented.") - - def __prebake__(self): - if self.__baked__: - return - self.__bake__() - self.__baked__ = True + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) - def __bake_it__(self): - self.__baked__ = True + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass + class Object(LazyMixin): """ Implements an Object which may be Blobs, Trees, Commits and Tags """ TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size", "_data_cached" ) + __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass - def __init__(self, repo, id, size=None): + def __init__(self, repo, id): """ Initialize an object by identifying it by its id. All keyword arguments will be set on demand if None. @@ -53,21 +48,32 @@ class Object(LazyMixin): ``id`` SHA1 or ref suitable for git-rev-parse - - ``size`` - Size of the object's data in bytes """ super(Object,self).__init__() self.repo = repo self.id = id - self.size = size - self._data_cached = type(None) - def __bake__(self): + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): """ Retrieve object information """ - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + if attr == "size": + self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + elif attr == "data": + self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) def __eq__(self, other): """ @@ -105,18 +111,12 @@ class Object(LazyMixin): return '' % (self.__class__.__name__, self.id) @property - def data(self): + def id_abbrev(self): """ - The binary contents of this object. - Returns - str - - NOTE - The data will be cached after the first access. + First 7 bytes of the commit's sha id as an abbreviation of the full string. """ - self._data_cached = ( self._data_cached is not type(None) and self._data_cached ) or self.repo.git.cat_file(self.id, p=True, with_raw_output=True) - return self._data_cached + return self.id[0:7] @classmethod def get_type_by_name(cls, object_type_name): @@ -154,7 +154,7 @@ class IndexObject(Object): """ __slots__ = ("path", "mode") - def __init__(self, repo, id, mode=None, path=None, size = None): + def __init__(self, repo, id, mode=None, path=None): """ Initialize a newly instanced IndexObject ``repo`` @@ -169,14 +169,11 @@ class IndexObject(Object): ``path`` : str is the path to the file in the file system, relative to the git repository root, i.e. file.ext or folder/other.ext - - ``size`` : int - size of the object data in bytes """ - super(IndexObject, self).__init__(repo, id, size) + super(IndexObject, self).__init__(repo, id) self.mode = mode self.path = path - + @property def basename(self): """ @@ -304,5 +301,6 @@ class Ref(object): git.Head """ full_path, hexsha, type_name, object_size = line.split("\x00") - obj = Object.get_type_by_name(type_name)(repo, hexsha, object_size) + obj = Object.get_type_by_name(type_name)(repo, hexsha) + obj.size = object_size return cls(full_path, obj) diff --git a/lib/git/commit.py b/lib/git/commit.py index 1ae84799..5d494621 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -25,6 +25,8 @@ class Commit(base.Object): # object configuration type = "commit" + __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", + "message", "parents") def __init__(self, repo, id, tree=None, author=None, authored_date=None, committer=None, committed_date=None, message=None, parents=None): @@ -38,7 +40,7 @@ class Commit(base.Object): is the sha id of the commit ``parents`` : list( Commit, ... ) - is a list of commit ids + is a list of commit ids or actual Commits ``tree`` : Tree is the corresponding tree id @@ -61,49 +63,34 @@ class Commit(base.Object): Returns git.Commit """ - super(Commit,self).__init__(repo, id, "commit") - self.parents = None - self.tree = None - self.author = author - self.authored_date = authored_date - self.committer = committer - self.committed_date = committed_date - self.message = message - - if self.id: - if parents is not None: - self.parents = [Commit(repo, p) for p in parents] - if tree is not None: - self.tree = Tree(repo, id=tree) - - def __eq__(self, other): - return self.id == other.id - - def __ne__(self, other): - return self.id != other.id - - def __bake__(self): - """ - Called by LazyMixin superclass when the first uninitialized member needs - to be set as it is queried. - """ - super(Commit, self).__bake__() - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.committer = temp.committer - self.committed_date = temp.committed_date - self.message = temp.message + super(Commit,self).__init__(repo, id) + self._set_self_from_args_(locals()) - @property - def id_abbrev(self): + if parents is not None: + self.parents = tuple( self.__class__(repo, p) for p in parents ) + # END for each parent to convert + + if self.id and tree is not None: + self.tree = Tree(repo, id=tree) + # END id to tree conversion + + def _set_cache_(self, attr): """ - Returns - First 7 bytes of the commit's sha id as an abbreviation of the full string. + Called by LazyMixin superclass when the given uninitialized member needs + to be set. + We set all values at once. """ - return self.id[0:7] + if attr in self.__slots__: + temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + else: + super(Commit, self)._set_cache_(attr) @property def summary(self): diff --git a/lib/git/head.py b/lib/git/head.py index f4e94637..42dfd735 100644 --- a/lib/git/head.py +++ b/lib/git/head.py @@ -27,20 +27,6 @@ class Head(base.Ref): '1c09f116cbc2cb4100fb6935bb162daa4723f455' """ - def __init__(self, path, commit): - """ - Initialize a newly instanced Head - - ``path`` - is the path to the head ref, relative to the .git directory, i.e. - refs/heads/master - - `commit` - is the Commit object that the head points to - """ - super(Head, self).__init__(name, commit) - - @property def commit(self): """ diff --git a/lib/git/tag.py b/lib/git/tag.py index 4266a7a9..89060ee0 100644 --- a/lib/git/tag.py +++ b/lib/git/tag.py @@ -74,7 +74,7 @@ class TagObject(base.Object): type = "tag" __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - def __init__(self, repo, id, size=None, object=None, tag=None, + def __init__(self, repo, id, object=None, tag=None, tagger=None, tagged_date=None, message=None): """ Initialize a tag object with additional data @@ -85,9 +85,6 @@ class TagObject(base.Object): ``id`` SHA1 or ref suitable for git-rev-parse - ``size`` - Size of the object's data in bytes - ``object`` Object instance of object we are pointing to @@ -100,29 +97,30 @@ class TagObject(base.Object): ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) is the DateTime of the tag creation """ - super(TagObject, self).__init__(repo, id , size) - self.object = object - self.tag = tag - self.tagger = tagger - self.tagged_date = tagged_date - self.message = message - - def __bake__(self): - super(TagObject, self).__bake__() - - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") - - obj, hexsha = lines[0].split(" ") # object - type_token, type_name = lines[1].split(" ") # type - self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) - - self.tag = lines[2][4:] # tag - - tagger_info = lines[3][7:]# tagger - self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + super(TagObject, self).__init__(repo, id ) + self._set_self_from_args_(locals()) - # line 4 empty - check git source to figure out purpose - self.message = "\n".join(lines[5:]) + def _set_cache_(self, attr): + """ + Cache all our attributes at once + """ + if attr in self.__slots__: + output = self.repo.git.cat_file(self.type,self.id) + lines = output.split("\n") + + obj, hexsha = lines[0].split(" ") # object + type_token, type_name = lines[1].split(" ") # type + self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag + + tagger_info = lines[3][7:]# tagger + self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + + # line 4 empty - check git source to figure out purpose + self.message = "\n".join(lines[5:]) + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) diff --git a/lib/git/tree.py b/lib/git/tree.py index 90f1b72d..db4a3e22 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -13,18 +13,19 @@ class Tree(base.IndexObject): type = "tree" __slots__ = "_contents" - def __init__(self, repo, id, mode=None, path=None, size=None): - super(Tree, self).__init__(repo, id, mode, path, size) - self._contents = None - - def __bake__(self): - # Read the tree contents. - super(Tree, self).__bake__() - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content_from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj + def __init__(self, repo, id, mode=None, path=None): + super(Tree, self).__init__(repo, id, mode, path) + + def _set_cache_(self, attr): + if attr == "_contents": + # Read the tree contents. + self._contents = {} + for line in self.repo.git.ls_tree(self.id).splitlines(): + obj = self.content_from_string(self.repo, line) + if obj is not None: + self._contents[obj.path] = obj + else: + super(Tree, self)._set_cache_(attr) @staticmethod def content_from_string(repo, text): -- cgit v1.2.3 From 9a119924bd314934158515a1a5f5877be63f6f91 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 20:21:22 +0200 Subject: fixed issue in Ref.name implementation which would not handle components properly --- lib/git/base.py | 10 ++++++++-- lib/git/tree.py | 6 +++--- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/git/base.py b/lib/git/base.py index f3510558..b7976dab 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -222,9 +222,15 @@ class Ref(object): def name(self): """ Returns - Name of this reference + (shortest) Name of this reference - it may contain path components """ - return os.path.basename(self.path) + # first two path tokens are can be removed as they are + # refs/heads or refs/tags or refs/remotes + tokens = self.path.split('/') + if len(tokens) < 3: + return self.path # could be refs/HEAD + + return '/'.join(tokens[2:]) @classmethod def find_all(cls, repo, common_path = "refs", **kwargs): diff --git a/lib/git/tree.py b/lib/git/tree.py index db4a3e22..597668ae 100644 --- a/lib/git/tree.py +++ b/lib/git/tree.py @@ -47,11 +47,11 @@ class Tree(base.IndexObject): return None if typ == "tree": - return Tree(repo, id=id, mode=mode, path=path) + return Tree(repo, id, mode, path) elif typ == "blob": - return blob.Blob(repo, id=id, mode=mode, path=path) + return blob.Blob(repo, id, mode, path) elif typ == "commit": - return None + return None else: raise(TypeError, "Invalid type: %s" % typ) -- cgit v1.2.3 From 708b8dda8e7b87841a5f39c60b799c514e75a9c7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 21:17:59 +0200 Subject: commit: fixed failing commit tests as the mocked git command would always return the same thing which does not work anymore - re-implemented it in a more dynamic manner, but in the end tests will have to be revised anyway Added slots to Diff and Stats type respectively --- lib/git/blob.py | 2 +- lib/git/commit.py | 15 +++++++++------ lib/git/diff.py | 4 ++-- lib/git/stats.py | 7 ++++--- 4 files changed, 16 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/git/blob.py b/lib/git/blob.py index b0e47a3c..1fafb128 100644 --- a/lib/git/blob.py +++ b/lib/git/blob.py @@ -57,7 +57,7 @@ class Blob(base.IndexObject): blames = [] info = None - for line in data.splitlines(): + for line in data.splitlines(False): parts = cls.re_whitespace.split(line, 1) firstpart = parts[0] if cls.re_hexsha_only.search(firstpart): diff --git a/lib/git/commit.py b/lib/git/commit.py index 5d494621..9bf753e0 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -162,8 +162,7 @@ class Commit(base.Object): Returns git.Commit[] """ - lines = [l for l in text.splitlines() if l.strip('\r\n')] - + lines =text.splitlines(False) commits = [] while lines: @@ -173,18 +172,22 @@ class Commit(base.Object): parents = [] while lines and lines[0].startswith('parent'): parents.append(lines.pop(0).split()[-1]) + # END while there are parent lines author, authored_date = cls._actor(lines.pop(0)) committer, committed_date = cls._actor(lines.pop(0)) - + + # free line + lines.pop(0) + messages = [] - while lines and lines[0].startswith(' '): + while lines and not lines[0].startswith('commit'): messages.append(lines.pop(0).strip()) - + # END while there are message lines message = '\n'.join(messages) commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, committer=committer, committed_date=committed_date, message=message)) - + # END while lines return commits @classmethod diff --git a/lib/git/diff.py b/lib/git/diff.py index ef58cb0e..7200b7e3 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -45,12 +45,12 @@ class Diff(object): \.\.(?P[0-9A-Fa-f]+)[ ]?(?P.+)?(?:\n|$))? """, re.VERBOSE | re.MULTILINE) re_is_null_hexsha = re.compile( r'^0{40}$' ) + __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", + "rename_from", "rename_to", "renamed", "diff") def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, rename_from, rename_to, diff): - self.repo = repo - if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id): self.a_blob = None else: diff --git a/lib/git/stats.py b/lib/git/stats.py index a39d1dab..19b1591f 100644 --- a/lib/git/stats.py +++ b/lib/git/stats.py @@ -31,8 +31,9 @@ class Stats(object): files = number of changed files as int """ - def __init__(self, repo, total, files): - self.repo = repo + __slots__ = ("total", "files") + + def __init__(self, total, files): self.total = total self.files = files @@ -56,4 +57,4 @@ class Stats(object): hsh['files'][filename.strip()] = {'insertions': insertions, 'deletions': deletions, 'lines': insertions + deletions} - return Stats(repo, hsh['total'], hsh['files']) + return Stats(hsh['total'], hsh['files']) -- cgit v1.2.3 From b01824b1aecf8aadae4501e22feb45c20fb26bce Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 22:22:28 +0200 Subject: Fixed remaining tests to deal with the changes mode is now generally an int compatible to the stat module --- lib/git/base.py | 18 ++++++++++++++++++ lib/git/commit.py | 12 ++++++------ lib/git/diff.py | 4 ++++ 3 files changed, 28 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/git/base.py b/lib/git/base.py index b7976dab..4e5298e4 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -171,9 +171,27 @@ class IndexObject(Object): file.ext or folder/other.ext """ super(IndexObject, self).__init__(repo, id) + if isinstance(mode, basestring): + mode = self._mode_str_to_int(mode) self.mode = mode self.path = path + @classmethod + def _mode_str_to_int( cls, modestr ): + """ + ``modestr`` + string like 755 or 644 or 100644 - only the last 3 chars will be used + + Returns + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other + """ + mode = 0 + for iteration,char in enumerate(reversed(modestr[-3:])): + mode += int(char) << iteration*3 + # END for each char + return mode + @property def basename(self): """ diff --git a/lib/git/commit.py b/lib/git/commit.py index 9bf753e0..68415be5 100644 --- a/lib/git/commit.py +++ b/lib/git/commit.py @@ -39,11 +39,11 @@ class Commit(base.Object): ``id`` is the sha id of the commit - ``parents`` : list( Commit, ... ) - is a list of commit ids or actual Commits + ``parents`` : tuple( Commit, ... ) + is a tuple of commit ids or actual Commits ``tree`` : Tree - is the corresponding tree id + is the corresponding tree id or an actual Tree ``author`` : Actor is the author string ( will be implicitly converted into an Actor object ) @@ -179,11 +179,11 @@ class Commit(base.Object): # free line lines.pop(0) - messages = [] + message_lines = [] while lines and not lines[0].startswith('commit'): - messages.append(lines.pop(0).strip()) + message_lines.append(lines.pop(0).strip()) # END while there are message lines - message = '\n'.join(messages) + message = '\n'.join(message_lines[:-1]) # last line is empty commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, committer=committer, committed_date=committed_date, message=message)) diff --git a/lib/git/diff.py b/lib/git/diff.py index 7200b7e3..943fb08a 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -62,6 +62,10 @@ class Diff(object): self.a_mode = a_mode self.b_mode = b_mode + if self.a_mode: + self.a_mode = blob.Blob._mode_str_to_int( self.a_mode ) + if self.b_mode: + self.b_mode = blob.Blob._mode_str_to_int( self.b_mode ) self.new_file = new_file self.deleted_file = deleted_file self.rename_from = rename_from -- cgit v1.2.3 From c68459a17ff59043d29c90020fffe651b2164e6a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 11 Oct 2009 22:50:07 +0200 Subject: Added remaining tests for new base classes and removed some methods whose existance was doubtful or unsafe --- lib/git/__init__.py | 2 +- lib/git/base.py | 27 +++++++-------------------- 2 files changed, 8 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/git/__init__.py b/lib/git/__init__.py index 45cb4673..5ce3c122 100644 --- a/lib/git/__init__.py +++ b/lib/git/__init__.py @@ -18,7 +18,7 @@ from git.cmd import Git from git.head import Head from git.repo import Repo from git.stats import Stats -from git.tag import Tag +from git.tag import Tag,TagRef,TagObject from git.tree import Tree from git.utils import dashify from git.utils import touch diff --git a/lib/git/base.py b/lib/git/base.py index 4e5298e4..252ebe4b 100644 --- a/lib/git/base.py +++ b/lib/git/base.py @@ -110,14 +110,6 @@ class Object(LazyMixin): """ return '' % (self.__class__.__name__, self.id) - @property - def id_abbrev(self): - """ - Returns - First 7 bytes of the commit's sha id as an abbreviation of the full string. - """ - return self.id[0:7] - @classmethod def get_type_by_name(cls, object_type_name): """ @@ -169,12 +161,15 @@ class IndexObject(Object): ``path`` : str is the path to the file in the file system, relative to the git repository root, i.e. file.ext or folder/other.ext + + NOTE + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree. """ super(IndexObject, self).__init__(repo, id) + self._set_self_from_args_(locals()) if isinstance(mode, basestring): - mode = self._mode_str_to_int(mode) - self.mode = mode - self.path = path + self.mode = self._mode_str_to_int(mode) @classmethod def _mode_str_to_int( cls, modestr ): @@ -191,14 +186,6 @@ class IndexObject(Object): mode += int(char) << iteration*3 # END for each char return mode - - @property - def basename(self): - """ - Returns - The basename of the IndexObject's file path - """ - return os.path.basename(self.path) class Ref(object): @@ -222,7 +209,7 @@ class Ref(object): self.object = object def __str__(self): - return self.name() + return self.name def __repr__(self): return '' % (self.__class__.__name__, self.path) -- cgit v1.2.3 From f2834177c0fdf6b1af659e460fd3348f468b8ab0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 11:50:14 +0200 Subject: Reorganized package structure and cleaned up imports --- lib/git/__init__.py | 8 +- lib/git/base.py | 317 -------------------------------------------- lib/git/blob.py | 126 ------------------ lib/git/commit.py | 284 --------------------------------------- lib/git/diff.py | 2 +- lib/git/head.py | 49 ------- lib/git/objects/__init__.py | 11 ++ lib/git/objects/base.py | 162 ++++++++++++++++++++++ lib/git/objects/blob.py | 36 +++++ lib/git/objects/commit.py | 284 +++++++++++++++++++++++++++++++++++++++ lib/git/objects/tag.py | 71 ++++++++++ lib/git/objects/tree.py | 102 ++++++++++++++ lib/git/objects/util.py | 36 +++++ lib/git/refs.py | 241 +++++++++++++++++++++++++++++++++ lib/git/repo.py | 98 +++++++++++++- lib/git/tag.py | 126 ------------------ lib/git/tree.py | 102 -------------- 17 files changed, 1040 insertions(+), 1015 deletions(-) delete mode 100644 lib/git/base.py delete mode 100644 lib/git/blob.py delete mode 100644 lib/git/commit.py delete mode 100644 lib/git/head.py create mode 100644 lib/git/objects/__init__.py create mode 100644 lib/git/objects/base.py create mode 100644 lib/git/objects/blob.py create mode 100644 lib/git/objects/commit.py create mode 100644 lib/git/objects/tag.py create mode 100644 lib/git/objects/tree.py create mode 100644 lib/git/objects/util.py create mode 100644 lib/git/refs.py delete mode 100644 lib/git/tag.py delete mode 100644 lib/git/tree.py (limited to 'lib') diff --git a/lib/git/__init__.py b/lib/git/__init__.py index 5ce3c122..6f482128 100644 --- a/lib/git/__init__.py +++ b/lib/git/__init__.py @@ -9,19 +9,17 @@ import inspect __version__ = 'git' +from git.objects import * +from git.refs import * from git.actor import Actor -from git.blob import Blob -from git.commit import Commit from git.diff import Diff from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError from git.cmd import Git -from git.head import Head from git.repo import Repo from git.stats import Stats -from git.tag import Tag,TagRef,TagObject -from git.tree import Tree from git.utils import dashify from git.utils import touch + __all__ = [ name for name, obj in locals().items() if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/lib/git/base.py b/lib/git/base.py deleted file mode 100644 index 252ebe4b..00000000 --- a/lib/git/base.py +++ /dev/null @@ -1,317 +0,0 @@ -# base.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -import os - -class LazyMixin(object): - lazy_properties = [] - __slots__ = tuple() - - def __getattr__(self, attr): - """ - Whenever an attribute is requested that we do not know, we allow it - to be created and set. Next time the same attribute is reqeusted, it is simply - returned from our dict/slots. - """ - self._set_cache_(attr) - # will raise in case the cache was not created - return object.__getattribute__(self, attr) - - def _set_cache_(self, attr): - """ This method should be overridden in the derived class. - It should check whether the attribute named by attr can be created - and cached. Do nothing if you do not know the attribute or call your subclass - - The derived class may create as many additional attributes as it deems - necessary in case a git command returns more information than represented - in the single attribute.""" - pass - - -class Object(LazyMixin): - """ - Implements an Object which may be Blobs, Trees, Commits and Tags - """ - TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size", "data" ) - type = None # to be set by subclass - - def __init__(self, repo, id): - """ - Initialize an object by identifying it by its id. All keyword arguments - will be set on demand if None. - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - """ - super(Object,self).__init__() - self.repo = repo - self.id = id - - def _set_self_from_args_(self, args_dict): - """ - Initialize attributes on self from the given dict that was retrieved - from locals() in the calling method. - - Will only set an attribute on self if the corresponding value in args_dict - is not None - """ - for attr, val in args_dict.items(): - if attr != "self" and val is not None: - setattr( self, attr, val ) - # END set all non-None attributes - - def _set_cache_(self, attr): - """ - Retrieve object information - """ - if attr == "size": - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) - elif attr == "data": - self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) - - def __eq__(self, other): - """ - Returns - True if the objects have the same SHA1 - """ - return self.id == other.id - - def __ne__(self, other): - """ - Returns - True if the objects do not have the same SHA1 - """ - return self.id != other.id - - def __hash__(self): - """ - Returns - Hash of our id allowing objects to be used in dicts and sets - """ - return hash(self.id) - - def __str__(self): - """ - Returns - string of our SHA1 as understood by all git commands - """ - return self.id - - def __repr__(self): - """ - Returns - string with pythonic representation of our object - """ - return '' % (self.__class__.__name__, self.id) - - @classmethod - def get_type_by_name(cls, object_type_name): - """ - Returns - type suitable to handle the given object type name. - Use the type to create new instances. - - ``object_type_name`` - Member of TYPES - - Raises - ValueError: In case object_type_name is unknown - """ - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) - - -class IndexObject(Object): - """ - Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects - """ - __slots__ = ("path", "mode") - - def __init__(self, repo, id, mode=None, path=None): - """ - Initialize a newly instanced IndexObject - ``repo`` - is the Repo we are located in - - ``id`` : string - is the git object id as hex sha - - ``mode`` : int - is the file mode as int, use the stat module to evaluate the infomration - - ``path`` : str - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - - NOTE - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree. - """ - super(IndexObject, self).__init__(repo, id) - self._set_self_from_args_(locals()) - if isinstance(mode, basestring): - self.mode = self._mode_str_to_int(mode) - - @classmethod - def _mode_str_to_int( cls, modestr ): - """ - ``modestr`` - string like 755 or 644 or 100644 - only the last 3 chars will be used - - Returns - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other - """ - mode = 0 - for iteration,char in enumerate(reversed(modestr[-3:])): - mode += int(char) << iteration*3 - # END for each char - return mode - - -class Ref(object): - """ - Represents a named reference to any object - """ - __slots__ = ("path", "object") - - def __init__(self, path, object = None): - """ - Initialize this instance - - ``path`` - Path relative to the .git/ directory pointing to the ref in question, i.e. - refs/heads/master - - ``object`` - Object instance, will be retrieved on demand if None - """ - self.path = path - self.object = object - - def __str__(self): - return self.name - - def __repr__(self): - return '' % (self.__class__.__name__, self.path) - - def __eq__(self, other): - return self.path == other.path and self.object == other.object - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.path) - - @property - def name(self): - """ - Returns - (shortest) Name of this reference - it may contain path components - """ - # first two path tokens are can be removed as they are - # refs/heads or refs/tags or refs/remotes - tokens = self.path.split('/') - if len(tokens) < 3: - return self.path # could be refs/HEAD - - return '/'.join(tokens[2:]) - - @classmethod - def find_all(cls, repo, common_path = "refs", **kwargs): - """ - Find all refs in the repository - - ``repo`` - is the Repo - - ``common_path`` - Optional keyword argument to the path which is to be shared by all - returned Ref objects - - ``kwargs`` - Additional options given as keyword arguments, will be passed - to git-for-each-ref - - Returns - git.Ref[] - - List is sorted by committerdate - The returned objects are compatible to the Ref base, but represent the - actual type, such as Head or Tag - """ - - options = {'sort': "committerdate", - 'format': "%(refname)%00%(objectname)%00%(objecttype)%00%(objectsize)"} - - options.update(kwargs) - - output = repo.git.for_each_ref(common_path, **options) - return cls.list_from_string(repo, output) - - @classmethod - def list_from_string(cls, repo, text): - """ - Parse out ref information into a list of Ref compatible objects - - ``repo`` - is the Repo - ``text`` - is the text output from the git-for-each-ref command - - Returns - git.Ref[] - - list of Ref objects - """ - heads = [] - - for line in text.splitlines(): - heads.append(cls.from_string(repo, line)) - - return heads - - @classmethod - def from_string(cls, repo, line): - """ - Create a new Ref instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted ref information - - Format:: - - name: [a-zA-Z_/]+ - - id: [0-9A-Fa-f]{40} - - Returns - git.Head - """ - full_path, hexsha, type_name, object_size = line.split("\x00") - obj = Object.get_type_by_name(type_name)(repo, hexsha) - obj.size = object_size - return cls(full_path, obj) diff --git a/lib/git/blob.py b/lib/git/blob.py deleted file mode 100644 index 1fafb128..00000000 --- a/lib/git/blob.py +++ /dev/null @@ -1,126 +0,0 @@ -# blob.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import mimetypes -import os -import re -import time -from actor import Actor -from commit import Commit -import base - -class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" - - __slots__ = tuple() - - # precompiled regex - re_whitespace = re.compile(r'\s+') - re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - re_author_committer_start = re.compile(r'^(author|committer)') - re_tab_full_line = re.compile(r'^\t(.*)$') - - @property - def mime_type(self): - """ - The mime type of this file (based on the filename) - - Returns - str - - NOTE - Defaults to 'text/plain' in case the actual file type is unknown. - """ - guesses = None - if self.path: - guesses = mimetypes.guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE - - @classmethod - def blame(cls, repo, commit, file): - """ - The blame information for the given file at the given commit - - Returns - list: [git.Commit, list: []] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance. - """ - data = repo.git.blame(commit, '--', file, p=True) - commits = {} - blames = [] - info = None - - for line in data.splitlines(False): - parts = cls.re_whitespace.split(line, 1) - firstpart = parts[0] - if cls.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - digits = parts[-1].split(" ") - if len(digits) == 3: - info = {'id': firstpart} - blames.append([None, []]) - # END blame data initialization - else: - m = cls.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = time.gmtime(int(parts[-1])) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( repo, id=sha, - author=Actor.from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = cls.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - def __repr__(self): - return '' % self.id diff --git a/lib/git/commit.py b/lib/git/commit.py deleted file mode 100644 index 68415be5..00000000 --- a/lib/git/commit.py +++ /dev/null @@ -1,284 +0,0 @@ -# commit.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import re -import time - -from actor import Actor -from tree import Tree -import diff -import stats -import base - -class Commit(base.Object): - """ - Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary. - """ - # precompiled regex - re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') - - # object configuration - type = "commit" - __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", - "message", "parents") - - def __init__(self, repo, id, tree=None, author=None, authored_date=None, - committer=None, committed_date=None, message=None, parents=None): - """ - Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set if id names a valid sha. - - The parameter documentation indicates the type of the argument after a colon ':'. - - ``id`` - is the sha id of the commit - - ``parents`` : tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - - ``tree`` : Tree - is the corresponding tree id or an actual Tree - - ``author`` : Actor - is the author string ( will be implicitly converted into an Actor object ) - - ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) - is the authored DateTime - - ``committer`` : Actor - is the committer string - - ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) - is the committed DateTime - - ``message`` : string - is the commit message - - Returns - git.Commit - """ - super(Commit,self).__init__(repo, id) - self._set_self_from_args_(locals()) - - if parents is not None: - self.parents = tuple( self.__class__(repo, p) for p in parents ) - # END for each parent to convert - - if self.id and tree is not None: - self.tree = Tree(repo, id=tree) - # END id to tree conversion - - def _set_cache_(self, attr): - """ - Called by LazyMixin superclass when the given uninitialized member needs - to be set. - We set all values at once. - """ - if attr in self.__slots__: - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.committer = temp.committer - self.committed_date = temp.committed_date - self.message = temp.message - else: - super(Commit, self)._set_cache_(attr) - - @property - def summary(self): - """ - Returns - First line of the commit message. - """ - return self.message.split('\n', 1)[0] - - @classmethod - def count(cls, repo, ref, path=''): - """ - Count the number of commits reachable from this ref - - ``repo`` - is the Repo - - ``ref`` - is the ref from which to begin (SHA1 or name) - - ``path`` - is an optinal path - - Returns - int - """ - return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) - - @classmethod - def find_all(cls, repo, ref, path='', **kwargs): - """ - Find all commits matching the given criteria. - - ``repo`` - is the Repo - - ``ref`` - is the ref from which to begin (SHA1 or name) - - ``path`` - is an optinal path, if set only Commits that include the path - will be considered - - ``kwargs`` - optional keyword arguments to git where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - - Returns - git.Commit[] - """ - options = {'pretty': 'raw'} - options.update(kwargs) - - output = repo.git.rev_list(ref, '--', path, **options) - return cls.list_from_string(repo, output) - - @classmethod - def list_from_string(cls, repo, text): - """ - Parse out commit information into a list of Commit objects - - ``repo`` - is the Repo - - ``text`` - is the text output from the git-rev-list command (raw format) - - Returns - git.Commit[] - """ - lines =text.splitlines(False) - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] - - parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - # END while there are parent lines - author, authored_date = cls._actor(lines.pop(0)) - committer, committed_date = cls._actor(lines.pop(0)) - - # free line - lines.pop(0) - - message_lines = [] - while lines and not lines[0].startswith('commit'): - message_lines.append(lines.pop(0).strip()) - # END while there are message lines - message = '\n'.join(message_lines[:-1]) # last line is empty - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - # END while lines - return commits - - @classmethod - def diff(cls, repo, a, b=None, paths=None): - """ - Creates diffs between a tree and the index or between two trees: - - ``repo`` - is the Repo - - ``a`` - is a named commit - - ``b`` - is an optional named commit. Passing a list assumes you - wish to omit the second named commit and limit the diff to the - given paths. - - ``paths`` - is a list of paths to limit the diff to. - - Returns - git.Diff[]:: - - between tree and the index if only a is given - between two trees if a and b are given and are commits - """ - paths = paths or [] - - if isinstance(b, list): - paths = b - b = None - - if paths: - paths.insert(0, "--") - - if b: - paths.insert(0, b) - paths.insert(0, a) - text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff.list_from_string(repo, text) - - @property - def diffs(self): - """ - Returns - git.Diff[] - Diffs between this commit and its first parent or all changes if this - commit is the first commit and has no parent. - """ - if not self.parents: - d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - return diff.Diff.list_from_string(self.repo, d) - else: - return self.diff(self.repo, self.parents[0].id, self.id) - - @property - def stats(self): - """ - Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - Return - git.Stats - """ - if not self.parents: - text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) - return stats.Stats.list_from_string(self.repo, text) - - def __str__(self): - """ Convert commit to string which is SHA1 """ - return self.id - - def __repr__(self): - return '' % self.id - - @classmethod - def _actor(cls, line): - """ - Parse out the actor (author or committer) info - - Returns - [Actor, gmtime(acted at time)] - """ - m = cls.re_actor_epoch.search(line) - actor, epoch = m.groups() - return (Actor.from_string(actor), time.gmtime(int(epoch))) diff --git a/lib/git/diff.py b/lib/git/diff.py index 943fb08a..4bc88bf4 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -5,7 +5,7 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import re -import blob +import objects.blob as blob class Diff(object): """ diff --git a/lib/git/head.py b/lib/git/head.py deleted file mode 100644 index 42dfd735..00000000 --- a/lib/git/head.py +++ /dev/null @@ -1,49 +0,0 @@ -# head.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import commit -import base - -class Head(base.Ref): - """ - A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. - - Examples:: - - >>> repo = Repo("/path/to/repo") - >>> head = repo.heads[0] - - >>> head.name - 'master' - - >>> head.commit - - - >>> head.commit.id - '1c09f116cbc2cb4100fb6935bb162daa4723f455' - """ - - @property - def commit(self): - """ - Returns - Commit object the head points to - """ - return self.object - - @classmethod - def find_all(cls, repo, common_path = "refs/heads", **kwargs): - """ - Returns - git.Head[] - - For more documentation, please refer to git.base.Ref.find_all - """ - return super(Head,cls).find_all(repo, common_path, **kwargs) - - def __repr__(self): - return '' % self.name diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py new file mode 100644 index 00000000..39e650b7 --- /dev/null +++ b/lib/git/objects/__init__.py @@ -0,0 +1,11 @@ +""" +Import all submodules main classes into the package space +""" +import inspect +from tag import * +from blob import * +from tree import * +from commit import * + +__all__ = [ name for name, obj in locals().items() + if not (name.startswith('_') or inspect.ismodule(obj)) ] \ No newline at end of file diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py new file mode 100644 index 00000000..43aa8dd1 --- /dev/null +++ b/lib/git/objects/base.py @@ -0,0 +1,162 @@ +# base.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +import os + +class LazyMixin(object): + lazy_properties = [] + __slots__ = tuple() + + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) + + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass + + +class Object(LazyMixin): + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + """ + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "id", "size", "data" ) + type = None # to be set by subclass + + def __init__(self, repo, id): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + """ + super(Object,self).__init__() + self.repo = repo + self.id = id + + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): + """ + Retrieve object information + """ + if attr == "size": + self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + elif attr == "data": + self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.id == other.id + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.id != other.id + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.id) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.id + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '' % (self.__class__.__name__, self.id) + + +class IndexObject(Object): + """ + Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects + """ + __slots__ = ("path", "mode") + + def __init__(self, repo, id, mode=None, path=None): + """ + Initialize a newly instanced IndexObject + ``repo`` + is the Repo we are located in + + ``id`` : string + is the git object id as hex sha + + ``mode`` : int + is the file mode as int, use the stat module to evaluate the infomration + + ``path`` : str + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + + NOTE + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree. + """ + super(IndexObject, self).__init__(repo, id) + self._set_self_from_args_(locals()) + if isinstance(mode, basestring): + self.mode = self._mode_str_to_int(mode) + + @classmethod + def _mode_str_to_int( cls, modestr ): + """ + ``modestr`` + string like 755 or 644 or 100644 - only the last 3 chars will be used + + Returns + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other + """ + mode = 0 + for iteration,char in enumerate(reversed(modestr[-3:])): + mode += int(char) << iteration*3 + # END for each char + return mode + + diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py new file mode 100644 index 00000000..88ca73d6 --- /dev/null +++ b/lib/git/objects/blob.py @@ -0,0 +1,36 @@ +# blob.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import mimetypes +import base + +class Blob(base.IndexObject): + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = "blob" + + __slots__ = tuple() + + + @property + def mime_type(self): + """ + The mime type of this file (based on the filename) + + Returns + str + + NOTE + Defaults to 'text/plain' in case the actual file type is unknown. + """ + guesses = None + if self.path: + guesses = mimetypes.guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE + + + def __repr__(self): + return '' % self.id diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py new file mode 100644 index 00000000..c70b03e4 --- /dev/null +++ b/lib/git/objects/commit.py @@ -0,0 +1,284 @@ +# commit.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import re +import time + +from git.actor import Actor +from tree import Tree +import git.diff as diff +import git.stats as stats +import base + +class Commit(base.Object): + """ + Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary. + """ + # precompiled regex + re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') + + # object configuration + type = "commit" + __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", + "message", "parents") + + def __init__(self, repo, id, tree=None, author=None, authored_date=None, + committer=None, committed_date=None, message=None, parents=None): + """ + Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set if id names a valid sha. + + The parameter documentation indicates the type of the argument after a colon ':'. + + ``id`` + is the sha id of the commit + + ``parents`` : tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + + ``tree`` : Tree + is the corresponding tree id or an actual Tree + + ``author`` : Actor + is the author string ( will be implicitly converted into an Actor object ) + + ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) + is the authored DateTime + + ``committer`` : Actor + is the committer string + + ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the committed DateTime + + ``message`` : string + is the commit message + + Returns + git.Commit + """ + super(Commit,self).__init__(repo, id) + self._set_self_from_args_(locals()) + + if parents is not None: + self.parents = tuple( self.__class__(repo, p) for p in parents ) + # END for each parent to convert + + if self.id and tree is not None: + self.tree = Tree(repo, id=tree) + # END id to tree conversion + + def _set_cache_(self, attr): + """ + Called by LazyMixin superclass when the given uninitialized member needs + to be set. + We set all values at once. + """ + if attr in self.__slots__: + temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + else: + super(Commit, self)._set_cache_(attr) + + @property + def summary(self): + """ + Returns + First line of the commit message. + """ + return self.message.split('\n', 1)[0] + + @classmethod + def count(cls, repo, ref, path=''): + """ + Count the number of commits reachable from this ref + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1 or name) + + ``path`` + is an optinal path + + Returns + int + """ + return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) + + @classmethod + def find_all(cls, repo, ref, path='', **kwargs): + """ + Find all commits matching the given criteria. + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1 or name) + + ``path`` + is an optinal path, if set only Commits that include the path + will be considered + + ``kwargs`` + optional keyword arguments to git where + ``max_count`` is the maximum number of commits to fetch + ``skip`` is the number of commits to skip + + Returns + git.Commit[] + """ + options = {'pretty': 'raw'} + options.update(kwargs) + + output = repo.git.rev_list(ref, '--', path, **options) + return cls.list_from_string(repo, output) + + @classmethod + def list_from_string(cls, repo, text): + """ + Parse out commit information into a list of Commit objects + + ``repo`` + is the Repo + + ``text`` + is the text output from the git-rev-list command (raw format) + + Returns + git.Commit[] + """ + lines =text.splitlines(False) + commits = [] + + while lines: + id = lines.pop(0).split()[1] + tree = lines.pop(0).split()[1] + + parents = [] + while lines and lines[0].startswith('parent'): + parents.append(lines.pop(0).split()[-1]) + # END while there are parent lines + author, authored_date = cls._actor(lines.pop(0)) + committer, committed_date = cls._actor(lines.pop(0)) + + # free line + lines.pop(0) + + message_lines = [] + while lines and not lines[0].startswith('commit'): + message_lines.append(lines.pop(0).strip()) + # END while there are message lines + message = '\n'.join(message_lines[:-1]) # last line is empty + + commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message)) + # END while lines + return commits + + @classmethod + def diff(cls, repo, a, b=None, paths=None): + """ + Creates diffs between a tree and the index or between two trees: + + ``repo`` + is the Repo + + ``a`` + is a named commit + + ``b`` + is an optional named commit. Passing a list assumes you + wish to omit the second named commit and limit the diff to the + given paths. + + ``paths`` + is a list of paths to limit the diff to. + + Returns + git.Diff[]:: + + between tree and the index if only a is given + between two trees if a and b are given and are commits + """ + paths = paths or [] + + if isinstance(b, list): + paths = b + b = None + + if paths: + paths.insert(0, "--") + + if b: + paths.insert(0, b) + paths.insert(0, a) + text = repo.git.diff('-M', full_index=True, *paths) + return diff.Diff.list_from_string(repo, text) + + @property + def diffs(self): + """ + Returns + git.Diff[] + Diffs between this commit and its first parent or all changes if this + commit is the first commit and has no parent. + """ + if not self.parents: + d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') + return diff.Diff.list_from_string(self.repo, d) + else: + return self.diff(self.repo, self.parents[0].id, self.id) + + @property + def stats(self): + """ + Create a git stat from changes between this commit and its first parent + or from all changes done if this is the very first commit. + + Return + git.Stats + """ + if not self.parents: + text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) + text2 = "" + for line in text.splitlines()[1:]: + (insertions, deletions, filename) = line.split("\t") + text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) + text = text2 + else: + text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) + return stats.Stats.list_from_string(self.repo, text) + + def __str__(self): + """ Convert commit to string which is SHA1 """ + return self.id + + def __repr__(self): + return '' % self.id + + @classmethod + def _actor(cls, line): + """ + Parse out the actor (author or committer) info + + Returns + [Actor, gmtime(acted at time)] + """ + m = cls.re_actor_epoch.search(line) + actor, epoch = m.groups() + return (Actor.from_string(actor), time.gmtime(int(epoch))) diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py new file mode 100644 index 00000000..af1022f0 --- /dev/null +++ b/lib/git/objects/tag.py @@ -0,0 +1,71 @@ +# objects.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module containing all object based types. +""" +import base +import commit +from util import get_object_type_by_name + +class TagObject(base.Object): + """ + Non-Lightweight tag carrying additional information about an object we are pointing + to. + """ + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) + + def __init__(self, repo, id, object=None, tag=None, + tagger=None, tagged_date=None, message=None): + """ + Initialize a tag object with additional data + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + + ``object`` + Object instance of object we are pointing to + + ``tag`` + name of this tag + + ``tagger`` + Actor identifying the tagger + + ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the DateTime of the tag creation + """ + super(TagObject, self).__init__(repo, id ) + self._set_self_from_args_(locals()) + + def _set_cache_(self, attr): + """ + Cache all our attributes at once + """ + if attr in self.__slots__: + output = self.repo.git.cat_file(self.type,self.id) + lines = output.split("\n") + + obj, hexsha = lines[0].split(" ") # object + type_token, type_name = lines[1].split(" ") # type + self.object = get_object_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag + + tagger_info = lines[3][7:]# tagger + self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) + + # line 4 empty - check git source to figure out purpose + self.message = "\n".join(lines[5:]) + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + + diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py new file mode 100644 index 00000000..597668ae --- /dev/null +++ b/lib/git/objects/tree.py @@ -0,0 +1,102 @@ +# tree.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import os +import blob +import base + +class Tree(base.IndexObject): + + type = "tree" + __slots__ = "_contents" + + def __init__(self, repo, id, mode=None, path=None): + super(Tree, self).__init__(repo, id, mode, path) + + def _set_cache_(self, attr): + if attr == "_contents": + # Read the tree contents. + self._contents = {} + for line in self.repo.git.ls_tree(self.id).splitlines(): + obj = self.content_from_string(self.repo, line) + if obj is not None: + self._contents[obj.path] = obj + else: + super(Tree, self)._set_cache_(attr) + + @staticmethod + def content_from_string(repo, text): + """ + Parse a content item and create the appropriate object + + ``repo`` + is the Repo + + ``text`` + is the single line containing the items data in `git ls-tree` format + + Returns + ``git.Blob`` or ``git.Tree`` + """ + try: + mode, typ, id, path = text.expandtabs(1).split(" ", 3) + except: + return None + + if typ == "tree": + return Tree(repo, id, mode, path) + elif typ == "blob": + return blob.Blob(repo, id, mode, path) + elif typ == "commit": + return None + else: + raise(TypeError, "Invalid type: %s" % typ) + + def __div__(self, file): + """ + Find the named object in this tree's contents + + Examples:: + + >>> Repo('/path/to/python-git').tree/'lib' + + >>> Repo('/path/to/python-git').tree/'README.txt' + + + Returns + ``git.Blob`` or ``git.Tree`` or ``None`` if not found + """ + return self.get(file) + + + def __repr__(self): + return '' % self.id + + # Implement the basics of the dict protocol: + # directories/trees can be seen as object dicts. + def __getitem__(self, key): + return self._contents[key] + + def __iter__(self): + return iter(self._contents) + + def __len__(self): + return len(self._contents) + + def __contains__(self, key): + return key in self._contents + + def get(self, key): + return self._contents.get(key) + + def items(self): + return self._contents.items() + + def keys(self): + return self._contents.keys() + + def values(self): + return self._contents.values() diff --git a/lib/git/objects/util.py b/lib/git/objects/util.py new file mode 100644 index 00000000..15c1d114 --- /dev/null +++ b/lib/git/objects/util.py @@ -0,0 +1,36 @@ +# util.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module for general utility functions +""" +import commit, tag, blob, tree + +def get_object_type_by_name(object_type_name): + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) diff --git a/lib/git/refs.py b/lib/git/refs.py new file mode 100644 index 00000000..cb730edb --- /dev/null +++ b/lib/git/refs.py @@ -0,0 +1,241 @@ +# refs.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module containing all ref based objects +""" +from objects.base import Object +from objects.util import get_object_type_by_name + +class Ref(object): + """ + Represents a named reference to any object + """ + __slots__ = ("path", "object") + + def __init__(self, path, object = None): + """ + Initialize this instance + + ``path`` + Path relative to the .git/ directory pointing to the ref in question, i.e. + refs/heads/master + + ``object`` + Object instance, will be retrieved on demand if None + """ + self.path = path + self.object = object + + def __str__(self): + return self.name + + def __repr__(self): + return '' % (self.__class__.__name__, self.path) + + def __eq__(self, other): + return self.path == other.path and self.object == other.object + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.path) + + @property + def name(self): + """ + Returns + (shortest) Name of this reference - it may contain path components + """ + # first two path tokens are can be removed as they are + # refs/heads or refs/tags or refs/remotes + tokens = self.path.split('/') + if len(tokens) < 3: + return self.path # could be refs/HEAD + + return '/'.join(tokens[2:]) + + @classmethod + def find_all(cls, repo, common_path = "refs", **kwargs): + """ + Find all refs in the repository + + ``repo`` + is the Repo + + ``common_path`` + Optional keyword argument to the path which is to be shared by all + returned Ref objects + + ``kwargs`` + Additional options given as keyword arguments, will be passed + to git-for-each-ref + + Returns + git.Ref[] + + List is sorted by committerdate + The returned objects are compatible to the Ref base, but represent the + actual type, such as Head or Tag + """ + + options = {'sort': "committerdate", + 'format': "%(refname)%00%(objectname)%00%(objecttype)%00%(objectsize)"} + + options.update(kwargs) + + output = repo.git.for_each_ref(common_path, **options) + return cls.list_from_string(repo, output) + + @classmethod + def list_from_string(cls, repo, text): + """ + Parse out ref information into a list of Ref compatible objects + + ``repo`` + is the Repo + ``text`` + is the text output from the git-for-each-ref command + + Returns + git.Ref[] + + list of Ref objects + """ + heads = [] + + for line in text.splitlines(): + heads.append(cls.from_string(repo, line)) + + return heads + + @classmethod + def from_string(cls, repo, line): + """ + Create a new Ref instance from the given string. + + ``repo`` + is the Repo + + ``line`` + is the formatted ref information + + Format:: + + name: [a-zA-Z_/]+ + + id: [0-9A-Fa-f]{40} + + Returns + git.Head + """ + full_path, hexsha, type_name, object_size = line.split("\x00") + obj = get_object_type_by_name(type_name)(repo, hexsha) + obj.size = object_size + return cls(full_path, obj) + + +class Head(Ref): + """ + A Head is a named reference to a Commit. Every Head instance contains a name + and a Commit object. + + Examples:: + + >>> repo = Repo("/path/to/repo") + >>> head = repo.heads[0] + + >>> head.name + 'master' + + >>> head.commit + + + >>> head.commit.id + '1c09f116cbc2cb4100fb6935bb162daa4723f455' + """ + + @property + def commit(self): + """ + Returns + Commit object the head points to + """ + return self.object + + @classmethod + def find_all(cls, repo, common_path = "refs/heads", **kwargs): + """ + Returns + git.Head[] + + For more documentation, please refer to git.base.Ref.find_all + """ + return super(Head,cls).find_all(repo, common_path, **kwargs) + + def __repr__(self): + return '' % self.name + + + +class TagRef(Ref): + """ + Class representing a lightweight tag reference which either points to a commit + or to a tag object. In the latter case additional information, like the signature + or the tag-creator, is available. + + This tag object will always point to a commit object, but may carray additional + information in a tag object:: + + tagref = TagRef.find_all(repo)[0] + print tagref.commit.message + if tagref.tag is not None: + print tagref.tag.message + """ + + __slots__ = "tag" + + def __init__(self, path, commit_or_tag): + """ + Initialize a newly instantiated Tag + + ``path`` + is the full path to the tag + + ``commit_or_tag`` + is the Commit or TagObject that this tag ref points to + """ + super(TagRef, self).__init__(path, commit_or_tag) + self.tag = None + + if commit_or_tag.type == "tag": + self.tag = commit_or_tag + # END tag object handling + + @property + def commit(self): + """ + Returns + Commit object the tag ref points to + """ + if self.object.type == "commit": + return self.object + # it is a tag object + return self.object.object + + @classmethod + def find_all(cls, repo, common_path = "refs/tags", **kwargs): + """ + Returns + git.Tag[] + + For more documentation, please refer to git.base.Ref.find_all + """ + return super(TagRef,cls).find_all(repo, common_path, **kwargs) + + +# provide an alias +Tag = TagRef diff --git a/lib/git/repo.py b/lib/git/repo.py index 3c872218..6e23dbc6 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -5,16 +5,18 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os +import re import gzip import StringIO +import time + from errors import InvalidGitRepositoryError, NoSuchPathError from utils import touch, is_git_dir from cmd import Git -from head import Head -from blob import Blob -from tag import Tag -from commit import Commit -from tree import Tree +from actor import Actor +from refs import * +from objects import * + class Repo(object): """ @@ -23,6 +25,12 @@ class Repo(object): the log. """ DAEMON_EXPORT_FILE = 'git-daemon-export-ok' + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') def __init__(self, path=None): """ @@ -108,6 +116,86 @@ class Repo(object): ``git.Tag[]`` """ return Tag.find_all(self) + + def blame(self, commit, file): + """ + The blame information for the given file at the given commit + + Returns + list: [git.Commit, list: []] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance. + """ + data = self.git.blame(commit, '--', file, p=True) + commits = {} + blames = [] + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = time.gmtime(int(parts[-1])) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( self, id=sha, + author=Actor.from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames def commits(self, start='master', path='', max_count=10, skip=0): """ diff --git a/lib/git/tag.py b/lib/git/tag.py deleted file mode 100644 index 89060ee0..00000000 --- a/lib/git/tag.py +++ /dev/null @@ -1,126 +0,0 @@ -# tag.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import commit -import base - -class TagRef(base.Ref): - """ - Class representing a lightweight tag reference which either points to a commit - or to a tag object. In the latter case additional information, like the signature - or the tag-creator, is available. - - This tag object will always point to a commit object, but may carray additional - information in a tag object:: - - tagref = TagRef.find_all(repo)[0] - print tagref.commit.message - if tagref.tag is not None: - print tagref.tag.message - """ - - __slots__ = "tag" - - def __init__(self, path, commit_or_tag): - """ - Initialize a newly instantiated Tag - - ``path`` - is the full path to the tag - - ``commit_or_tag`` - is the Commit or TagObject that this tag ref points to - """ - super(TagRef, self).__init__(path, commit_or_tag) - self.tag = None - - if commit_or_tag.type == "tag": - self.tag = commit_or_tag - # END tag object handling - - @property - def commit(self): - """ - Returns - Commit object the tag ref points to - """ - if self.object.type == "commit": - return self.object - # it is a tag object - return self.object.object - - @classmethod - def find_all(cls, repo, common_path = "refs/tags", **kwargs): - """ - Returns - git.Tag[] - - For more documentation, please refer to git.base.Ref.find_all - """ - return super(TagRef,cls).find_all(repo, common_path, **kwargs) - - -# provide an alias -Tag = TagRef - -class TagObject(base.Object): - """ - Non-Lightweight tag carrying additional information about an object we are pointing - to. - """ - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - - def __init__(self, repo, id, object=None, tag=None, - tagger=None, tagged_date=None, message=None): - """ - Initialize a tag object with additional data - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - - ``object`` - Object instance of object we are pointing to - - ``tag`` - name of this tag - - ``tagger`` - Actor identifying the tagger - - ``tagged_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) - is the DateTime of the tag creation - """ - super(TagObject, self).__init__(repo, id ) - self._set_self_from_args_(locals()) - - def _set_cache_(self, attr): - """ - Cache all our attributes at once - """ - if attr in self.__slots__: - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") - - obj, hexsha = lines[0].split(" ") # object - type_token, type_name = lines[1].split(" ") # type - self.object = base.Object.get_type_by_name(type_name)(self.repo, hexsha) - - self.tag = lines[2][4:] # tag - - tagger_info = lines[3][7:]# tagger - self.tagger, self.tagged_date = commit.Commit._actor(tagger_info) - - # line 4 empty - check git source to figure out purpose - self.message = "\n".join(lines[5:]) - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - diff --git a/lib/git/tree.py b/lib/git/tree.py deleted file mode 100644 index 597668ae..00000000 --- a/lib/git/tree.py +++ /dev/null @@ -1,102 +0,0 @@ -# tree.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os -import blob -import base - -class Tree(base.IndexObject): - - type = "tree" - __slots__ = "_contents" - - def __init__(self, repo, id, mode=None, path=None): - super(Tree, self).__init__(repo, id, mode, path) - - def _set_cache_(self, attr): - if attr == "_contents": - # Read the tree contents. - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content_from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj - else: - super(Tree, self)._set_cache_(attr) - - @staticmethod - def content_from_string(repo, text): - """ - Parse a content item and create the appropriate object - - ``repo`` - is the Repo - - ``text`` - is the single line containing the items data in `git ls-tree` format - - Returns - ``git.Blob`` or ``git.Tree`` - """ - try: - mode, typ, id, path = text.expandtabs(1).split(" ", 3) - except: - return None - - if typ == "tree": - return Tree(repo, id, mode, path) - elif typ == "blob": - return blob.Blob(repo, id, mode, path) - elif typ == "commit": - return None - else: - raise(TypeError, "Invalid type: %s" % typ) - - def __div__(self, file): - """ - Find the named object in this tree's contents - - Examples:: - - >>> Repo('/path/to/python-git').tree/'lib' - - >>> Repo('/path/to/python-git').tree/'README.txt' - - - Returns - ``git.Blob`` or ``git.Tree`` or ``None`` if not found - """ - return self.get(file) - - - def __repr__(self): - return '' % self.id - - # Implement the basics of the dict protocol: - # directories/trees can be seen as object dicts. - def __getitem__(self, key): - return self._contents[key] - - def __iter__(self): - return iter(self._contents) - - def __len__(self): - return len(self._contents) - - def __contains__(self, key): - return key in self._contents - - def get(self, key): - return self._contents.get(key) - - def items(self): - return self._contents.items() - - def keys(self): - return self._contents.keys() - - def values(self): - return self._contents.values() -- cgit v1.2.3 From 637eadce54ca8bbe536bcf7c570c025e28e47129 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 14:56:47 +0200 Subject: renamed from_string and list_from_string to _from_string and _list_from_string to indicate their new status as private method, adjusted all callers respectively --- lib/git/actor.py | 2 +- lib/git/diff.py | 2 +- lib/git/objects/commit.py | 12 ++++++------ lib/git/objects/tree.py | 4 ++-- lib/git/refs.py | 8 ++++---- lib/git/repo.py | 6 +++--- lib/git/stats.py | 2 +- 7 files changed, 18 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/git/actor.py b/lib/git/actor.py index f1aeda9b..fe4a47e5 100644 --- a/lib/git/actor.py +++ b/lib/git/actor.py @@ -25,7 +25,7 @@ class Actor(object): return '">' % (self.name, self.email) @classmethod - def from_string(cls, string): + def _from_string(cls, string): """ Create an Actor from a string. diff --git a/lib/git/diff.py b/lib/git/diff.py index 4bc88bf4..0db83b4f 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -74,7 +74,7 @@ class Diff(object): self.diff = diff @classmethod - def list_from_string(cls, repo, text): + def _list_from_string(cls, repo, text): """ Create a new diff object from the given text ``repo`` diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c70b03e4..c3e97bf9 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -146,10 +146,10 @@ class Commit(base.Object): options.update(kwargs) output = repo.git.rev_list(ref, '--', path, **options) - return cls.list_from_string(repo, output) + return cls._list_from_string(repo, output) @classmethod - def list_from_string(cls, repo, text): + def _list_from_string(cls, repo, text): """ Parse out commit information into a list of Commit objects @@ -228,7 +228,7 @@ class Commit(base.Object): paths.insert(0, b) paths.insert(0, a) text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff.list_from_string(repo, text) + return diff.Diff._list_from_string(repo, text) @property def diffs(self): @@ -240,7 +240,7 @@ class Commit(base.Object): """ if not self.parents: d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - return diff.Diff.list_from_string(self.repo, d) + return diff.Diff._list_from_string(self.repo, d) else: return self.diff(self.repo, self.parents[0].id, self.id) @@ -262,7 +262,7 @@ class Commit(base.Object): text = text2 else: text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) - return stats.Stats.list_from_string(self.repo, text) + return stats.Stats._list_from_string(self.repo, text) def __str__(self): """ Convert commit to string which is SHA1 """ @@ -281,4 +281,4 @@ class Commit(base.Object): """ m = cls.re_actor_epoch.search(line) actor, epoch = m.groups() - return (Actor.from_string(actor), time.gmtime(int(epoch))) + return (Actor._from_string(actor), time.gmtime(int(epoch))) diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 597668ae..273384a3 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -21,14 +21,14 @@ class Tree(base.IndexObject): # Read the tree contents. self._contents = {} for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content_from_string(self.repo, line) + obj = self.content__from_string(self.repo, line) if obj is not None: self._contents[obj.path] = obj else: super(Tree, self)._set_cache_(attr) @staticmethod - def content_from_string(repo, text): + def content__from_string(repo, text): """ Parse a content item and create the appropriate object diff --git a/lib/git/refs.py b/lib/git/refs.py index cb730edb..820150d3 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -88,10 +88,10 @@ class Ref(object): options.update(kwargs) output = repo.git.for_each_ref(common_path, **options) - return cls.list_from_string(repo, output) + return cls._list_from_string(repo, output) @classmethod - def list_from_string(cls, repo, text): + def _list_from_string(cls, repo, text): """ Parse out ref information into a list of Ref compatible objects @@ -108,12 +108,12 @@ class Ref(object): heads = [] for line in text.splitlines(): - heads.append(cls.from_string(repo, line)) + heads.append(cls._from_string(repo, line)) return heads @classmethod - def from_string(cls, repo, line): + def _from_string(cls, repo, line): """ Create a new Ref instance from the given string. diff --git a/lib/git/repo.py b/lib/git/repo.py index 6e23dbc6..dd5acfc3 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -179,9 +179,9 @@ class Repo(object): c = commits.get(sha) if c is None: c = Commit( self, id=sha, - author=Actor.from_string(info['author'] + ' ' + info['author_email']), + author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], - committer=Actor.from_string(info['committer'] + ' ' + info['committer_email']), + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c @@ -357,7 +357,7 @@ class Repo(object): if path: arg.append(path) commits = self.git.log(*arg, **options) - return Commit.list_from_string(self, commits) + return Commit._list_from_string(self, commits) def diff(self, a, b, *paths): """ diff --git a/lib/git/stats.py b/lib/git/stats.py index 19b1591f..bda4e539 100644 --- a/lib/git/stats.py +++ b/lib/git/stats.py @@ -38,7 +38,7 @@ class Stats(object): self.files = files @classmethod - def list_from_string(cls, repo, text): + def _list_from_string(cls, repo, text): """ Create a Stat object from output retrieved by git-diff. -- cgit v1.2.3 From 4c73e9cd66c77934f8a262b0c1bab9c2f15449ba Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 17:03:01 +0200 Subject: refs now take repo as first argument and derive from LazyMixin to allow them to dynamically retrieve their objects Improved way commits are returned by refs as they now use the path to be sure they always point to the ref even if it changes - previously it would use the sha intead so it would not update after being cached on the ref object --- lib/git/objects/base.py | 25 +------------ lib/git/objects/commit.py | 2 +- lib/git/refs.py | 94 +++++++++++++++++++++-------------------------- lib/git/utils.py | 25 +++++++++++++ 4 files changed, 68 insertions(+), 78 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 43aa8dd1..5007f3a1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -4,30 +4,7 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os - -class LazyMixin(object): - lazy_properties = [] - __slots__ = tuple() - - def __getattr__(self, attr): - """ - Whenever an attribute is requested that we do not know, we allow it - to be created and set. Next time the same attribute is reqeusted, it is simply - returned from our dict/slots. - """ - self._set_cache_(attr) - # will raise in case the cache was not created - return object.__getattribute__(self, attr) - - def _set_cache_(self, attr): - """ This method should be overridden in the derived class. - It should check whether the attribute named by attr can be created - and cached. Do nothing if you do not know the attribute or call your subclass - - The derived class may create as many additional attributes as it deems - necessary in case a git command returns more information than represented - in the single attribute.""" - pass +from git.utils import LazyMixin class Object(LazyMixin): diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c3e97bf9..f1f878d7 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -37,7 +37,7 @@ class Commit(base.Object): The parameter documentation indicates the type of the argument after a colon ':'. ``id`` - is the sha id of the commit + is the sha id of the commit or a ref ``parents`` : tuple( Commit, ... ) is a tuple of commit ids or actual Commits diff --git a/lib/git/refs.py b/lib/git/refs.py index 820150d3..bc5cc005 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -8,16 +8,19 @@ Module containing all ref based objects """ from objects.base import Object from objects.util import get_object_type_by_name +from utils import LazyMixin -class Ref(object): +class Ref(LazyMixin): """ Represents a named reference to any object """ - __slots__ = ("path", "object") + __slots__ = ("repo", "path", "object") - def __init__(self, path, object = None): + def __init__(self, repo, path, object = None): """ Initialize this instance + ``repo`` + Our parent repository ``path`` Path relative to the .git/ directory pointing to the ref in question, i.e. @@ -26,8 +29,19 @@ class Ref(object): ``object`` Object instance, will be retrieved on demand if None """ + self.repo = repo self.path = path - self.object = object + if object is not None: + self.object = object + + def _set_cache_(self, attr): + if attr == "object": + # have to be dynamic here as we may be a tag which can point to anything + # it uses our path to stay dynamic + type_string = self.repo.git.cat_file(self.path, t=True).rstrip() + self.object = get_object_type_by_name(type_string)(self.repo, self.path) + else: + super(Ref, self)._set_cache_(attr) def __str__(self): return self.name @@ -92,19 +106,8 @@ class Ref(object): @classmethod def _list_from_string(cls, repo, text): - """ - Parse out ref information into a list of Ref compatible objects - - ``repo`` - is the Repo - ``text`` - is the text output from the git-for-each-ref command - - Returns - git.Ref[] - - list of Ref objects - """ + """ Parse out ref information into a list of Ref compatible objects + Returns git.Ref[] list of Ref objects """ heads = [] for line in text.splitlines(): @@ -114,28 +117,16 @@ class Ref(object): @classmethod def _from_string(cls, repo, line): - """ - Create a new Ref instance from the given string. - - ``repo`` - is the Repo - - ``line`` - is the formatted ref information - - Format:: - + """ Create a new Ref instance from the given string. + Format name: [a-zA-Z_/]+ id: [0-9A-Fa-f]{40} - - Returns - git.Head - """ + Returns git.Head """ full_path, hexsha, type_name, object_size = line.split("\x00") obj = get_object_type_by_name(type_name)(repo, hexsha) obj.size = object_size - return cls(full_path, obj) + return cls(repo, full_path, obj) class Head(Ref): @@ -196,24 +187,7 @@ class TagRef(Ref): print tagref.tag.message """ - __slots__ = "tag" - - def __init__(self, path, commit_or_tag): - """ - Initialize a newly instantiated Tag - - ``path`` - is the full path to the tag - - ``commit_or_tag`` - is the Commit or TagObject that this tag ref points to - """ - super(TagRef, self).__init__(path, commit_or_tag) - self.tag = None - - if commit_or_tag.type == "tag": - self.tag = commit_or_tag - # END tag object handling + __slots__ = tuple() @property def commit(self): @@ -223,8 +197,22 @@ class TagRef(Ref): """ if self.object.type == "commit": return self.object - # it is a tag object - return self.object.object + elif self.object.type == "tag": + # it is a tag object which carries the commit as an object - we can point to anything + return self.object.object + else: + raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) + + @property + def tag(self): + """ + Returns + Tag object this tag ref points to or None in case + we are a light weight tag + """ + if self.object.type == "tag": + return self.object + return None @classmethod def find_all(cls, repo, common_path = "refs/tags", **kwargs): diff --git a/lib/git/utils.py b/lib/git/utils.py index c204c432..39994bd5 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -24,3 +24,28 @@ def is_git_dir(d): (os.path.islink(headref) and os.readlink(headref).startswith('refs')) return False + + +class LazyMixin(object): + __slots__ = tuple() + + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) + + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass + -- cgit v1.2.3 From af9e37c5c8714136974124621d20c0436bb0735f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 17:40:13 +0200 Subject: IndexObjects are now checking their slots to raise a proper error message in case someone tries to access an unset path or mode - this information cannot be retrieved afterwards as IndexObject information is kept in the object that pointed at them. To find this information, one would have to search all objects which is not feasible --- lib/git/objects/base.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 5007f3a1..d3e0d943 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -120,8 +120,15 @@ class IndexObject(Object): if isinstance(mode, basestring): self.mode = self._mode_str_to_int(mode) + def _set_cache_(self, attr): + if attr in self.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + @classmethod - def _mode_str_to_int( cls, modestr ): + def _mode_str_to_int(cls, modestr): """ ``modestr`` string like 755 or 644 or 100644 - only the last 3 chars will be used -- cgit v1.2.3 From beb76aba0c835669629d95c905551f58cc927299 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 17:41:00 +0200 Subject: repo.active_branch now returns a Head object, not a string --- lib/git/repo.py | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/git/repo.py b/lib/git/repo.py index dd5acfc3..39e84088 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -277,12 +277,14 @@ class Repo(object): """ return Commit.count(self, start, path) - def commit(self, id, path = ''): + def commit(self, id=None, path = ''): """ The Commit object for the specified id ``id`` - is the SHA1 identifier of the commit + is the SHA1 identifier of the commit or a ref or a ref name + if None, it defaults to the active branch + ``path`` is an optional path, if set the returned commit must contain the path. @@ -290,6 +292,8 @@ class Repo(object): Returns ``git.Commit`` """ + if id is None: + id = self.active_branch options = {'max_count': 1} commits = Commit.find_all(self, id, path, **options) @@ -311,22 +315,34 @@ class Repo(object): diff_refs = list(set(other_repo_refs) - set(repo_refs)) return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs) - def tree(self, treeish='master'): + def tree(self, treeish=None): """ The Tree object for the given treeish reference ``treeish`` - is the reference (default 'master') + is a Ref instance defaulting to the active_branch if None. Examples:: - repo.tree('master') - + repo.tree(repo.heads[0]) Returns ``git.Tree`` + + NOTE + A ref is requried here to assure you point to a commit or tag. Otherwise + it is not garantueed that you point to the root-level tree. + + If you need a non-root level tree, find it by iterating the root tree. """ - return Tree(self, id=treeish) + if treeish is None: + treeish = self.active_branch + if not isinstance(treeish, Ref): + raise ValueError( "Treeish reference required, got %r" % treeish ) + + # we should also check whether the ref has a valid commit ... but lets n + # not be over-critical + return Tree(self, treeish) def blob(self, id): """ @@ -588,13 +604,9 @@ class Repo(object): The name of the currently active branch. Returns - str (the branch name) + Head to the active branch """ - branch = self.git.symbolic_ref('HEAD').strip() - if branch.startswith('refs/heads/'): - branch = branch[len('refs/heads/'):] - - return branch + return Head( self, self.git.symbolic_ref('HEAD').strip() ) def __repr__(self): return '' % self.path -- cgit v1.2.3 From a58a60ac5f322eb4bfd38741469ff21b5a33d2d5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 23:18:43 +0200 Subject: tree: now behaves like a list with string indexing functionality - using a dict as cache is a problem as the tree is ordered, added blobs, trees and traverse method repo: remove blob function as blobs are created directly or iterated - primitve types should not clutter the repo interface --- lib/git/objects/tree.py | 170 ++++++++++++++++++++++++++++++++++++++---------- lib/git/repo.py | 15 +---- 2 files changed, 139 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 273384a3..707cebaa 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -9,26 +9,57 @@ import blob import base class Tree(base.IndexObject): + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ type = "tree" - __slots__ = "_contents" + __slots__ = "_cache" def __init__(self, repo, id, mode=None, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): - if attr == "_contents": - # Read the tree contents. - self._contents = {} - for line in self.repo.git.ls_tree(self.id).splitlines(): - obj = self.content__from_string(self.repo, line) - if obj is not None: - self._contents[obj.path] = obj + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache(self.repo, self.id) else: super(Tree, self)._set_cache_(attr) - @staticmethod - def content__from_string(repo, text): + @classmethod + def _get_tree_cache(cls, repo, treeish): + """ + Return + list(object_instance, ...) + + ``treeish`` + sha or ref identifying a tree + """ + out = list() + for line in repo.git.ls_tree(treeish).splitlines(): + obj = cls.content_from_string(repo, line) + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + @classmethod + def content_from_string(cls, repo, text): """ Parse a content item and create the appropriate object @@ -40,6 +71,8 @@ class Tree(base.IndexObject): Returns ``git.Blob`` or ``git.Tree`` + + NOTE: Currently sub-modules are ignored ! """ try: mode, typ, id, path = text.expandtabs(1).split(" ", 3) @@ -51,6 +84,7 @@ class Tree(base.IndexObject): elif typ == "blob": return blob.Blob(repo, id, mode, path) elif typ == "commit": + # TODO: Return a submodule return None else: raise(TypeError, "Invalid type: %s" % typ) @@ -67,36 +101,104 @@ class Tree(base.IndexObject): Returns - ``git.Blob`` or ``git.Tree`` or ``None`` if not found + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree """ - return self.get(file) + return self[file] def __repr__(self): return '' % self.id + + @classmethod + def _iter_recursive(cls, repo, tree, cur_depth, max_depth, predicate ): + + for obj in tree: + # adjust path to be complete + obj.path = os.path.join(tree.path, obj.path) + if not predicate(obj): + continue + yield obj + if obj.type == "tree" and ( max_depth < 0 or cur_depth+1 <= max_depth ): + for recursive_obj in cls._iter_recursive( repo, obj, cur_depth+1, max_depth, predicate ): + yield recursive_obj + # END for each recursive object + # END if we may enter recursion + # END for each object + + def traverse(self, max_depth=-1, predicate = lambda i: True): + """ + Returns + Iterator to traverse the tree recursively up to the given level. + The iterator returns Blob and Tree objects + + ``max_depth`` + + if -1, the whole tree will be traversed + if 0, only the first level will be traversed which is the same as + the default non-recursive iterator + + ``predicate`` + + If predicate(item) returns True, item will be returned by iterator + """ + return self._iter_recursive( self.repo, self, 0, max_depth, predicate ) + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] - # Implement the basics of the dict protocol: - # directories/trees can be seen as object dicts. - def __getitem__(self, key): - return self._contents[key] + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + def __iter__(self): - return iter(self._contents) - + return iter(self._cache) + def __len__(self): - return len(self._contents) - - def __contains__(self, key): - return key in self._contents - - def get(self, key): - return self._contents.get(key) - - def items(self): - return self._contents.items() - - def keys(self): - return self._contents.keys() - - def values(self): - return self._contents.values() + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + for obj in self._cache: + if obj.path == item: + return obj + # END for each obj + raise KeyError( "Blob or Tree named %s not found" % item ) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) diff --git a/lib/git/repo.py b/lib/git/repo.py index 39e84088..c1387870 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -342,19 +342,10 @@ class Repo(object): # we should also check whether the ref has a valid commit ... but lets n # not be over-critical - return Tree(self, treeish) + # the root has an empty relative path and the default mode + root = Tree(self, treeish, 0, '') + return root - def blob(self, id): - """ - The Blob object for the given id - - ``id`` - is the SHA1 id of the blob - - Returns - ``git.Blob`` - """ - return Blob(self, id=id) def log(self, commit='master', path=None, **kwargs): """ -- cgit v1.2.3 From 86fa577e135713e56b287169d69d976cde27ac97 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 17:36:27 +0200 Subject: tree: renamed content_from_string to _from_string to make it private. Removed tests that were testing that method --- lib/git/objects/tree.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 707cebaa..1bc35d95 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -50,7 +50,7 @@ class Tree(base.IndexObject): """ out = list() for line in repo.git.ls_tree(treeish).splitlines(): - obj = cls.content_from_string(repo, line) + obj = cls._from_string(repo, line) if obj is not None: out.append(obj) # END if object was handled @@ -59,7 +59,7 @@ class Tree(base.IndexObject): @classmethod - def content_from_string(cls, repo, text): + def _from_string(cls, repo, text): """ Parse a content item and create the appropriate object -- cgit v1.2.3 From 5eb0f2c241718bc7462be44e5e8e1e36e35f9b15 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 17:50:26 +0200 Subject: unified name of utils module, recently it was named util and utils in different packages --- lib/git/objects/tag.py | 2 +- lib/git/objects/util.py | 36 ------------------------------------ lib/git/objects/utils.py | 36 ++++++++++++++++++++++++++++++++++++ lib/git/refs.py | 2 +- 4 files changed, 38 insertions(+), 38 deletions(-) delete mode 100644 lib/git/objects/util.py create mode 100644 lib/git/objects/utils.py (limited to 'lib') diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index af1022f0..261d835f 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -8,7 +8,7 @@ Module containing all object based types. """ import base import commit -from util import get_object_type_by_name +from utils import get_object_type_by_name class TagObject(base.Object): """ diff --git a/lib/git/objects/util.py b/lib/git/objects/util.py deleted file mode 100644 index 15c1d114..00000000 --- a/lib/git/objects/util.py +++ /dev/null @@ -1,36 +0,0 @@ -# util.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" -Module for general utility functions -""" -import commit, tag, blob, tree - -def get_object_type_by_name(object_type_name): - """ - Returns - type suitable to handle the given object type name. - Use the type to create new instances. - - ``object_type_name`` - Member of TYPES - - Raises - ValueError: In case object_type_name is unknown - """ - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py new file mode 100644 index 00000000..15c1d114 --- /dev/null +++ b/lib/git/objects/utils.py @@ -0,0 +1,36 @@ +# util.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php +""" +Module for general utility functions +""" +import commit, tag, blob, tree + +def get_object_type_by_name(object_type_name): + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) diff --git a/lib/git/refs.py b/lib/git/refs.py index bc5cc005..bdfff6f4 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -7,7 +7,7 @@ Module containing all ref based objects """ from objects.base import Object -from objects.util import get_object_type_by_name +from objects.utils import get_object_type_by_name from utils import LazyMixin class Ref(LazyMixin): -- cgit v1.2.3 From f4fa1cb3c3e84cad8b74edb28531d2e27508be26 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 19:35:49 +0200 Subject: Added base for all iteratable objects --- lib/git/utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'lib') diff --git a/lib/git/utils.py b/lib/git/utils.py index 39994bd5..96ec15b9 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -27,6 +27,12 @@ def is_git_dir(d): class LazyMixin(object): + """ + Base class providing an interface to lazily retrieve attribute values upon + first access. If slots are used, memory will only be reserved once the attribute + is actually accessed and retrieved the first time. All future accesses will + return the cached value as stored in the Instance's dict or slot. + """ __slots__ = tuple() def __getattr__(self, attr): @@ -49,3 +55,35 @@ class LazyMixin(object): in the single attribute.""" pass + +class Iterable(object): + """ + Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository + """ + __slots__ = tuple() + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + Note: Favor the iter_items method as it will + + Returns: + list(Item,...) list of item instances + """ + return list(cls.iter_items, repo, *args, **kwargs) + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """ + For more information about the arguments, see find_all + Return: + iterator yielding Items + """ + raise NotImplementedError("To be implemented by Subclass") + -- cgit v1.2.3 From 6acec357c7609fdd2cb0f5fdb1d2756726c7fe98 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 21:26:19 +0200 Subject: renamed find_all to list_all, changed commit to use iterable interface in preparation for command changes --- lib/git/objects/commit.py | 71 ++++++++++++++++++++++++++--------------------- lib/git/refs.py | 16 +++++------ lib/git/repo.py | 17 ++++++------ lib/git/utils.py | 4 +-- 4 files changed, 58 insertions(+), 50 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f1f878d7..c289b825 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -6,14 +6,14 @@ import re import time - +from git.utils import Iterable from git.actor import Actor -from tree import Tree import git.diff as diff import git.stats as stats +from tree import Tree import base -class Commit(base.Object): +class Commit(base.Object, Iterable): """ Wraps a git Commit object. @@ -81,7 +81,7 @@ class Commit(base.Object): We set all values at once. """ if attr in self.__slots__: - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + temp = Commit.list_items(self.repo, self.id, max_count=1)[0] self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -120,7 +120,7 @@ class Commit(base.Object): return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) @classmethod - def find_all(cls, repo, ref, path='', **kwargs): + def iter_items(cls, repo, ref, path='', **kwargs): """ Find all commits matching the given criteria. @@ -128,7 +128,7 @@ class Commit(base.Object): is the Repo ``ref`` - is the ref from which to begin (SHA1 or name) + is the ref from which to begin (SHA1, Head or name) ``path`` is an optinal path, if set only Commits that include the path @@ -146,49 +146,56 @@ class Commit(base.Object): options.update(kwargs) output = repo.git.rev_list(ref, '--', path, **options) - return cls._list_from_string(repo, output) + return cls._iter_from_stream(repo, iter(output.splitlines(False))) @classmethod - def _list_from_string(cls, repo, text): + def _iter_from_stream(cls, repo, stream): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``text`` - is the text output from the git-rev-list command (raw format) + ``stream`` + output stream from the git-rev-list command (raw format) Returns - git.Commit[] + iterator returning Commit objects """ - lines =text.splitlines(False) - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] + for line in stream: + id = line.split()[1] + assert line.split()[0] == "commit" + tree = stream.next().split()[1] parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - # END while there are parent lines - author, authored_date = cls._actor(lines.pop(0)) - committer, committed_date = cls._actor(lines.pop(0)) + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = cls._actor(next_line) + committer, committed_date = cls._actor(stream.next()) - # free line - lines.pop(0) + # empty line + stream.next() message_lines = [] - while lines and not lines[0].startswith('commit'): - message_lines.append(lines.pop(0).strip()) + next_line = None + for msg_line in stream: + if not msg_line.startswith(' '): + break + # END abort message reading + message_lines.append(msg_line.strip()) # END while there are message lines - message = '\n'.join(message_lines[:-1]) # last line is empty - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - # END while lines - return commits + message = '\n'.join(message_lines) + + yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream @classmethod def diff(cls, repo, a, b=None, paths=None): diff --git a/lib/git/refs.py b/lib/git/refs.py index bdfff6f4..32bc7784 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -73,7 +73,7 @@ class Ref(LazyMixin): return '/'.join(tokens[2:]) @classmethod - def find_all(cls, repo, common_path = "refs", **kwargs): + def list_items(cls, repo, common_path = "refs", **kwargs): """ Find all refs in the repository @@ -158,14 +158,14 @@ class Head(Ref): return self.object @classmethod - def find_all(cls, repo, common_path = "refs/heads", **kwargs): + def list_items(cls, repo, common_path = "refs/heads", **kwargs): """ Returns git.Head[] - For more documentation, please refer to git.base.Ref.find_all + For more documentation, please refer to git.base.Ref.list_items """ - return super(Head,cls).find_all(repo, common_path, **kwargs) + return super(Head,cls).list_items(repo, common_path, **kwargs) def __repr__(self): return '' % self.name @@ -181,7 +181,7 @@ class TagRef(Ref): This tag object will always point to a commit object, but may carray additional information in a tag object:: - tagref = TagRef.find_all(repo)[0] + tagref = TagRef.list_items(repo)[0] print tagref.commit.message if tagref.tag is not None: print tagref.tag.message @@ -215,14 +215,14 @@ class TagRef(Ref): return None @classmethod - def find_all(cls, repo, common_path = "refs/tags", **kwargs): + def list_items(cls, repo, common_path = "refs/tags", **kwargs): """ Returns git.Tag[] - For more documentation, please refer to git.base.Ref.find_all + For more documentation, please refer to git.base.Ref.list_items """ - return super(TagRef,cls).find_all(repo, common_path, **kwargs) + return super(TagRef,cls).list_items(repo, common_path, **kwargs) # provide an alias diff --git a/lib/git/repo.py b/lib/git/repo.py index c1387870..0dd776f6 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -102,7 +102,7 @@ class Repo(object): Returns ``git.Head[]`` """ - return Head.find_all(self) + return Head.list_items(self) # alias heads branches = heads @@ -115,7 +115,7 @@ class Repo(object): Returns ``git.Tag[]`` """ - return Tag.find_all(self) + return Tag.list_items(self) def blame(self, commit, file): """ @@ -221,7 +221,7 @@ class Repo(object): options = {'max_count': max_count, 'skip': skip} - return Commit.find_all(self, start, path, **options) + return Commit.list_items(self, start, path, **options) def commits_between(self, frm, to): """ @@ -237,7 +237,7 @@ class Repo(object): Returns ``git.Commit[]`` """ - return reversed(Commit.find_all(self, "%s..%s" % (frm, to))) + return reversed(Commit.list_items(self, "%s..%s" % (frm, to))) def commits_since(self, start='master', path='', since='1970-01-01'): """ @@ -259,7 +259,7 @@ class Repo(object): """ options = {'since': since} - return Commit.find_all(self, start, path, **options) + return Commit.list_items(self, start, path, **options) def commit_count(self, start='master', path=''): """ @@ -296,7 +296,7 @@ class Repo(object): id = self.active_branch options = {'max_count': 1} - commits = Commit.find_all(self, id, path, **options) + commits = Commit.list_items(self, id, path, **options) if not commits: raise ValueError, "Invalid identifier %s, or given path '%s' too restrictive" % ( id, path ) @@ -313,7 +313,7 @@ class Repo(object): other_repo_refs = other_repo.git.rev_list(other_ref, '--').strip().splitlines() diff_refs = list(set(other_repo_refs) - set(repo_refs)) - return map(lambda ref: Commit.find_all(other_repo, ref, max_count=1)[0], diff_refs) + return map(lambda ref: Commit.list_items(other_repo, ref, max_count=1)[0], diff_refs) def tree(self, treeish=None): """ @@ -364,7 +364,8 @@ class Repo(object): if path: arg.append(path) commits = self.git.log(*arg, **options) - return Commit._list_from_string(self, commits) + print commits.splitlines(False) + return list(Commit._iter_from_stream(self, iter(commits.splitlines()))) def diff(self, a, b, *paths): """ diff --git a/lib/git/utils.py b/lib/git/utils.py index 96ec15b9..f84c247d 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -75,13 +75,13 @@ class Iterable(object): Returns: list(Item,...) list of item instances """ - return list(cls.iter_items, repo, *args, **kwargs) + return list(cls.iter_items(repo, *args, **kwargs)) @classmethod def iter_items(cls, repo, *args, **kwargs): """ - For more information about the arguments, see find_all + For more information about the arguments, see list_items Return: iterator yielding Items """ -- cgit v1.2.3 From ac1cec7066eaa12a8d1a61562bfc6ee77ff5f54d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 21:49:33 +0200 Subject: added Iterable interface to Ref type --- lib/git/objects/commit.py | 2 +- lib/git/refs.py | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c289b825..f9245217 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -140,7 +140,7 @@ class Commit(base.Object, Iterable): ``skip`` is the number of commits to skip Returns - git.Commit[] + iterator yielding Commit items """ options = {'pretty': 'raw'} options.update(kwargs) diff --git a/lib/git/refs.py b/lib/git/refs.py index 32bc7784..df914b78 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -8,9 +8,9 @@ Module containing all ref based objects """ from objects.base import Object from objects.utils import get_object_type_by_name -from utils import LazyMixin +from utils import LazyMixin, Iterable -class Ref(LazyMixin): +class Ref(LazyMixin, Iterable): """ Represents a named reference to any object """ @@ -73,7 +73,7 @@ class Ref(LazyMixin): return '/'.join(tokens[2:]) @classmethod - def list_items(cls, repo, common_path = "refs", **kwargs): + def iter_items(cls, repo, common_path = "refs", **kwargs): """ Find all refs in the repository @@ -102,15 +102,15 @@ class Ref(LazyMixin): options.update(kwargs) output = repo.git.for_each_ref(common_path, **options) - return cls._list_from_string(repo, output) + return cls._iter_from_stream(repo, iter(output.splitlines())) @classmethod - def _list_from_string(cls, repo, text): + def _iter_from_stream(cls, repo, stream): """ Parse out ref information into a list of Ref compatible objects Returns git.Ref[] list of Ref objects """ heads = [] - for line in text.splitlines(): + for line in stream: heads.append(cls._from_string(repo, line)) return heads @@ -158,14 +158,14 @@ class Head(Ref): return self.object @classmethod - def list_items(cls, repo, common_path = "refs/heads", **kwargs): + def iter_items(cls, repo, common_path = "refs/heads", **kwargs): """ Returns - git.Head[] + Iterator yielding Head items For more documentation, please refer to git.base.Ref.list_items """ - return super(Head,cls).list_items(repo, common_path, **kwargs) + return super(Head,cls).iter_items(repo, common_path, **kwargs) def __repr__(self): return '' % self.name @@ -215,14 +215,14 @@ class TagRef(Ref): return None @classmethod - def list_items(cls, repo, common_path = "refs/tags", **kwargs): + def iter_items(cls, repo, common_path = "refs/tags", **kwargs): """ Returns - git.Tag[] + Iterator yielding commit items For more documentation, please refer to git.base.Ref.list_items """ - return super(TagRef,cls).list_items(repo, common_path, **kwargs) + return super(TagRef,cls).iter_items(repo, common_path, **kwargs) # provide an alias -- cgit v1.2.3 From ead94f267065bb55303f79a0a6df477810b3c68d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 14:33:51 +0200 Subject: cmd: added option to return the process directly, allowing to read the output directly from the output stream commit: now reads commit information directly from the output stream of the process by implementing its iterator method repo: removed log method as it was redundant ( equal to the commits method ) --- lib/git/cmd.py | 46 +++++++++++++++++++++++++++++++++++++++++++++- lib/git/objects/commit.py | 14 ++++++++------ lib/git/repo.py | 20 -------------------- 3 files changed, 53 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 940e35d1..867baee7 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -13,7 +13,7 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'with_raw_output') + 'with_exceptions', 'with_raw_output', 'as_process') extra = {} if sys.platform == 'win32': @@ -34,6 +34,35 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ + + class AutoInterrupt(object): + """ + Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object + """ + __slots__= "proc" + + def __init__(self, proc ): + self.proc = proc + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + subprocess.call(("TASKKILL", "/T", "/PID", self.proc.pid)) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + def __init__(self, git_dir=None): """ Initialize this instance with: @@ -70,6 +99,7 @@ class Git(object): with_extended_output=False, with_exceptions=True, with_raw_output=False, + as_process=False ): """ Handles executing the command on the shell and consumes and returns @@ -96,6 +126,16 @@ class Git(object): ``with_raw_output`` Whether to avoid stripping off trailing whitespace. + + ``as_process`` + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output, + with_exceptions and with_raw_output ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. Returns:: @@ -127,7 +167,11 @@ class Git(object): **extra ) + if as_process: + return self.AutoInterrupt(proc) + # Wait for the process to return + status = 0 try: stdout_value = proc.stdout.read() stderr_value = proc.stderr.read() diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f9245217..340686ea 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -142,26 +142,28 @@ class Commit(base.Object, Iterable): Returns iterator yielding Commit items """ - options = {'pretty': 'raw'} + options = {'pretty': 'raw', 'as_process' : True } options.update(kwargs) - output = repo.git.rev_list(ref, '--', path, **options) - return cls._iter_from_stream(repo, iter(output.splitlines(False))) + # the test system might confront us with string values - + proc = repo.git.rev_list(ref, '--', path, **options) + return cls._iter_from_process(repo, proc) @classmethod - def _iter_from_stream(cls, repo, stream): + def _iter_from_process(cls, repo, proc): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``stream`` - output stream from the git-rev-list command (raw format) + ``proc`` + git-rev-list process instance (raw format) Returns iterator returning Commit objects """ + stream = proc.stdout for line in stream: id = line.split()[1] assert line.split()[0] == "commit" diff --git a/lib/git/repo.py b/lib/git/repo.py index 0dd776f6..d5dab242 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -347,26 +347,6 @@ class Repo(object): return root - def log(self, commit='master', path=None, **kwargs): - """ - The Commit for a treeish, and all commits leading to it. - - ``kwargs`` - keyword arguments specifying flags to be used in git-log command, - i.e.: max_count=1 to limit the amount of commits returned - - Returns - ``git.Commit[]`` - """ - options = {'pretty': 'raw'} - options.update(kwargs) - arg = [commit, '--'] - if path: - arg.append(path) - commits = self.git.log(*arg, **options) - print commits.splitlines(False) - return list(Commit._iter_from_stream(self, iter(commits.splitlines()))) - def diff(self, a, b, *paths): """ The diff from commit ``a`` to commit ``b``, optionally restricted to the given file(s) -- cgit v1.2.3 From 6eeae8b24135b4de05f6d725b009c287577f053d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 17:24:15 +0200 Subject: test: Added time-consuming test which could also be a benchmark in fact - currently it cause hundreds of command invocations which is slow Fixed issue with trees not properly initialized with their default mode _set_cache_: some objects checked whether the attribute was within their __slots__ although it should have been accessed through its class --- lib/git/objects/base.py | 2 +- lib/git/objects/commit.py | 4 ++-- lib/git/objects/tag.py | 2 +- lib/git/objects/tree.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index d3e0d943..9789d72a 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -121,7 +121,7 @@ class IndexObject(Object): self.mode = self._mode_str_to_int(mode) def _set_cache_(self, attr): - if attr in self.__slots__: + if attr in IndexObject.__slots__: # they cannot be retrieved lateron ( not without searching for them ) raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) else: diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 340686ea..69fb3710 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -71,7 +71,7 @@ class Commit(base.Object, Iterable): # END for each parent to convert if self.id and tree is not None: - self.tree = Tree(repo, id=tree) + self.tree = Tree(repo, id=tree, path='') # END id to tree conversion def _set_cache_(self, attr): @@ -80,7 +80,7 @@ class Commit(base.Object, Iterable): to be set. We set all values at once. """ - if attr in self.__slots__: + if attr in Commit.__slots__: temp = Commit.list_items(self.repo, self.id, max_count=1)[0] self.parents = temp.parents self.tree = temp.tree diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index 261d835f..77d715c7 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -48,7 +48,7 @@ class TagObject(base.Object): """ Cache all our attributes at once """ - if attr in self.__slots__: + if attr in TagObject.__slots__: output = self.repo.git.cat_file(self.type,self.id) lines = output.split("\n") diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 1bc35d95..01dfb37b 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -29,7 +29,7 @@ class Tree(base.IndexObject): type = "tree" __slots__ = "_cache" - def __init__(self, repo, id, mode=None, path=None): + def __init__(self, repo, id, mode=0, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): -- cgit v1.2.3 From a28d3d18f9237af5101eb22e506a9ddda6d44025 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 18:50:55 +0200 Subject: Implemented git command facility to keep persistent commands for fast object information retrieval --- lib/git/cmd.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 867baee7..92ef3bda 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -34,7 +34,6 @@ class Git(object): of the command to stdout. Set its value to 'full' to see details about the returned values. """ - class AutoInterrupt(object): """ Kill/Interrupt the stored process instance once this instance goes out of scope. It is @@ -50,7 +49,7 @@ class Git(object): # did the process finish already so we have a return code ? if self.proc.poll() is not None: return - + # try to kill it try: os.kill(self.proc.pid, 2) # interrupt signal @@ -73,6 +72,10 @@ class Git(object): """ super(Git, self).__init__() self.git_dir = git_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None def __getattr__(self, name): """ @@ -262,3 +265,74 @@ class Git(object): call.extend(args) return self.execute(call, **_kwargs) + + def _parse_object_header(self, header_line): + """ + ``header_line`` + type_string size_as_int + + Returns + (type_string, size_as_int) + + Raises + ValueError if the header contains indication for an error due to incorrect + input sha + """ + tokens = header_line.split() + if len(tokens) != 3: + raise ValueError( "SHA named %s could not be resolved" % tokens[0] ) + + return (tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : subprocess.PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ + Use this method to quickly examine the type and size of the object behind + the given ref. + + NOTE + The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + Return: + (type_string, size_as_int) + """ + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ + As get_object_header, but returns object data as well + + Return: + (type_string, size_as_int,data_string) + """ + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + typename, size = self.__get_object_header(cmd, ref) + data = cmd.stdout.read(size) + cmd.stdout.read(1) # finishing newlines + + return (typename, size, data) -- cgit v1.2.3 From 6745f4542cfb74bbf3b933dba7a59ef2f54a4380 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 19:34:45 +0200 Subject: test_blob: removed many redundant tests that would fail now as the mock cannot handle the complexity of the command backend All objects but Tree now use the persistent command to read their object information - Trees get binary data and would need their own pretty-printing or they need to parse the data themselves which is my favorite --- lib/git/objects/base.py | 8 ++++++-- lib/git/objects/commit.py | 14 ++++++++++---- lib/git/objects/tag.py | 3 +-- lib/git/refs.py | 17 ++++++++++++----- 4 files changed, 29 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 9789d72a..7b693be9 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -48,9 +48,13 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - self.size = int(self.repo.git.cat_file(self.id, s=True).rstrip()) + typename, self.size = self.repo.git.get_object_header(self.id) + assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) elif attr == "data": - self.data = self.repo.git.cat_file(self.id, p=True, with_raw_output=True) + typename, self.size, self.data = self.repo.git.get_object_data(self.id) + assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) + else: + super(Object,self)._set_cache_(attr) def __eq__(self, other): """ diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 69fb3710..101014ab 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -81,7 +81,10 @@ class Commit(base.Object, Iterable): We set all values at once. """ if attr in Commit.__slots__: - temp = Commit.list_items(self.repo, self.id, max_count=1)[0] + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.id) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -147,10 +150,10 @@ class Commit(base.Object, Iterable): # the test system might confront us with string values - proc = repo.git.rev_list(ref, '--', path, **options) - return cls._iter_from_process(repo, proc) + return cls._iter_from_process_or_stream(repo, proc) @classmethod - def _iter_from_process(cls, repo, proc): + def _iter_from_process_or_stream(cls, repo, proc_or_stream): """ Parse out commit information into a list of Commit objects @@ -163,7 +166,10 @@ class Commit(base.Object, Iterable): Returns iterator returning Commit objects """ - stream = proc.stdout + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + for line in stream: id = line.split()[1] assert line.split()[0] == "commit" diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index 77d715c7..ecf6349d 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -49,8 +49,7 @@ class TagObject(base.Object): Cache all our attributes at once """ if attr in TagObject.__slots__: - output = self.repo.git.cat_file(self.type,self.id) - lines = output.split("\n") + lines = self.data.splitlines() obj, hexsha = lines[0].split(" ") # object type_token, type_name = lines[1].split(" ") # type diff --git a/lib/git/refs.py b/lib/git/refs.py index df914b78..9754f65d 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -38,8 +38,10 @@ class Ref(LazyMixin, Iterable): if attr == "object": # have to be dynamic here as we may be a tag which can point to anything # it uses our path to stay dynamic - type_string = self.repo.git.cat_file(self.path, t=True).rstrip() - self.object = get_object_type_by_name(type_string)(self.repo, self.path) + typename, size = self.repo.git.get_object_header(self.path) + # explicitly do not set the size as it may change if the our ref path points + # at some other place when the head changes for instance ... + self.object = get_object_type_by_name(typename)(self.repo, self.path) else: super(Ref, self)._set_cache_(attr) @@ -124,9 +126,14 @@ class Ref(LazyMixin, Iterable): id: [0-9A-Fa-f]{40} Returns git.Head """ full_path, hexsha, type_name, object_size = line.split("\x00") - obj = get_object_type_by_name(type_name)(repo, hexsha) - obj.size = object_size - return cls(repo, full_path, obj) + + # No, we keep the object dynamic by allowing it to be retrieved by + # our path on demand - due to perstent commands it is fast + return cls(repo, full_path) + + # obj = get_object_type_by_name(type_name)(repo, hexsha) + # obj.size = object_size + # return cls(repo, full_path, obj) class Head(Ref): -- cgit v1.2.3 From c5df44408218003eb49e3b8fc94329c5e8b46c7d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 19:41:27 +0200 Subject: persistent command signature changed to also return the hexsha from a possible input ref - the objects pointed to by refs are now baked on demand - perhaps it should change to always be re-retrieved using a property as it is relatively fast - this way refs can always be cached --- lib/git/cmd.py | 12 ++++++------ lib/git/objects/base.py | 4 ++-- lib/git/refs.py | 13 ++++++++----- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/git/cmd.py b/lib/git/cmd.py index 92ef3bda..2965eb8b 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -272,7 +272,7 @@ class Git(object): type_string size_as_int Returns - (type_string, size_as_int) + (hex_sha, type_string, size_as_int) Raises ValueError if the header contains indication for an error due to incorrect @@ -282,7 +282,7 @@ class Git(object): if len(tokens) != 3: raise ValueError( "SHA named %s could not be resolved" % tokens[0] ) - return (tokens[1], int(tokens[2])) + return (tokens[0], tokens[1], int(tokens[2])) def __prepare_ref(self, ref): # required for command to separate refs on stdin @@ -318,7 +318,7 @@ class Git(object): once and reuses the command in subsequent calls. Return: - (type_string, size_as_int) + (hexsha, type_string, size_as_int) """ cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) return self.__get_object_header(cmd, ref) @@ -328,11 +328,11 @@ class Git(object): As get_object_header, but returns object data as well Return: - (type_string, size_as_int,data_string) + (hexsha, type_string, size_as_int,data_string) """ cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) - typename, size = self.__get_object_header(cmd, ref) + hexsha, typename, size = self.__get_object_header(cmd, ref) data = cmd.stdout.read(size) cmd.stdout.read(1) # finishing newlines - return (typename, size, data) + return (hexsha, typename, size, data) diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 7b693be9..6752a25e 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -48,10 +48,10 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - typename, self.size = self.repo.git.get_object_header(self.id) + hexsha, typename, self.size = self.repo.git.get_object_header(self.id) assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) elif attr == "data": - typename, self.size, self.data = self.repo.git.get_object_data(self.id) + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) else: super(Object,self)._set_cache_(attr) diff --git a/lib/git/refs.py b/lib/git/refs.py index 9754f65d..be02fb40 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -38,10 +38,11 @@ class Ref(LazyMixin, Iterable): if attr == "object": # have to be dynamic here as we may be a tag which can point to anything # it uses our path to stay dynamic - typename, size = self.repo.git.get_object_header(self.path) - # explicitly do not set the size as it may change if the our ref path points - # at some other place when the head changes for instance ... - self.object = get_object_type_by_name(typename)(self.repo, self.path) + hexsha, typename, size = self.repo.git.get_object_header(self.path) + # pin-point our object to a specific sha, even though it might not + # reflect the our cached object anymore in case our rev now points + # to a different commit + self.object = get_object_type_by_name(typename)(self.repo, hexsha) else: super(Ref, self)._set_cache_(attr) @@ -128,7 +129,9 @@ class Ref(LazyMixin, Iterable): full_path, hexsha, type_name, object_size = line.split("\x00") # No, we keep the object dynamic by allowing it to be retrieved by - # our path on demand - due to perstent commands it is fast + # our path on demand - due to perstent commands it is fast. + # This reduces the risk that the object does not match + # the changed ref anymore in case it changes in the meanwhile return cls(repo, full_path) # obj = get_object_type_by_name(type_name)(repo, hexsha) -- cgit v1.2.3 From 832b56394b079c9f6e4c777934447a9e224facfe Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 19:46:24 +0200 Subject: Refs are now truly dynamic - this costs a little bit of (persistent command) work, but assures refs behave as expected --- lib/git/refs.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/git/refs.py b/lib/git/refs.py index be02fb40..3c9eb817 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -14,7 +14,7 @@ class Ref(LazyMixin, Iterable): """ Represents a named reference to any object """ - __slots__ = ("repo", "path", "object") + __slots__ = ("repo", "path") def __init__(self, repo, path, object = None): """ @@ -34,18 +34,6 @@ class Ref(LazyMixin, Iterable): if object is not None: self.object = object - def _set_cache_(self, attr): - if attr == "object": - # have to be dynamic here as we may be a tag which can point to anything - # it uses our path to stay dynamic - hexsha, typename, size = self.repo.git.get_object_header(self.path) - # pin-point our object to a specific sha, even though it might not - # reflect the our cached object anymore in case our rev now points - # to a different commit - self.object = get_object_type_by_name(typename)(self.repo, hexsha) - else: - super(Ref, self)._set_cache_(attr) - def __str__(self): return self.name @@ -74,7 +62,18 @@ class Ref(LazyMixin, Iterable): return self.path # could be refs/HEAD return '/'.join(tokens[2:]) - + + @property + def object(self): + """ + Returns + The object our ref currently refers to. Refs can be cached, they will + always point to the actual object as it gets re-created on each query + """ + # have to be dynamic here as we may be a tag which can point to anything + hexsha, typename, size = self.repo.git.get_object_header(self.path) + return get_object_type_by_name(typename)(self.repo, hexsha) + @classmethod def iter_items(cls, repo, common_path = "refs", **kwargs): """ -- cgit v1.2.3 From 2e6d110fbfa1f2e6a96bc8329e936d0cf1192844 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 23:37:45 +0200 Subject: tree: now reads tress directly by parsing the binary data, allowing it to safe possibly hundreds of command calls --- lib/git/objects/base.py | 7 ++-- lib/git/objects/tree.py | 102 +++++++++++++++++++++++++++++++++--------------- lib/git/repo.py | 16 ++++++-- 3 files changed, 86 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 6752a25e..07538ada 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -6,7 +6,8 @@ import os from git.utils import LazyMixin - +_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" + class Object(LazyMixin): """ Implements an Object which may be Blobs, Trees, Commits and Tags @@ -49,10 +50,10 @@ class Object(LazyMixin): """ if attr == "size": hexsha, typename, self.size = self.repo.git.get_object_header(self.id) - assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) elif attr == "data": hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) - assert typename == self.type, "Created object whose python type %r disagrees with the acutal git object type %r" % (typename, self.type) + assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) else: super(Object,self)._set_cache_(attr) diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 01dfb37b..abfa9622 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -7,6 +7,13 @@ import os import blob import base +import binascii + +def sha_to_hex(sha): + """Takes a string and returns the hex of the sha within""" + hexsha = binascii.hexlify(sha) + assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha + return hexsha class Tree(base.IndexObject): """ @@ -29,18 +36,23 @@ class Tree(base.IndexObject): type = "tree" __slots__ = "_cache" + # using ascii codes for comparison + ascii_commit_id = (0x31 << 4) + 0x36 + ascii_blob_id = (0x31 << 4) + 0x30 + ascii_tree_id = (0x34 << 4) + 0x30 + + def __init__(self, repo, id, mode=0, path=None): super(Tree, self).__init__(repo, id, mode, path) def _set_cache_(self, attr): if attr == "_cache": # Set the data when we need it - self._cache = self._get_tree_cache(self.repo, self.id) + self._cache = self._get_tree_cache() else: super(Tree, self)._set_cache_(attr) - @classmethod - def _get_tree_cache(cls, repo, treeish): + def _get_tree_cache(self): """ Return list(object_instance, ...) @@ -49,45 +61,71 @@ class Tree(base.IndexObject): sha or ref identifying a tree """ out = list() - for line in repo.git.ls_tree(treeish).splitlines(): - obj = cls._from_string(repo, line) + for obj in self._iter_from_data(): if obj is not None: out.append(obj) # END if object was handled # END for each line from ls-tree return out - - @classmethod - def _from_string(cls, repo, text): + + def _iter_from_data(self): """ - Parse a content item and create the appropriate object - - ``repo`` - is the Repo - - ``text`` - is the single line containing the items data in `git ls-tree` format - + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + Returns - ``git.Blob`` or ``git.Tree`` - - NOTE: Currently sub-modules are ignored ! + list(IndexObject, ...) """ - try: - mode, typ, id, path = text.expandtabs(1).split(" ", 3) - except: - return None + ord_zero = ord('0') + data = self.data + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + mode_boundary = i + 6 + + # keep it ascii - we compare against the respective values + type_id = (ord(data[i])<<4) + ord(data[i+1]) + i += 2 + + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.ascii_blob_id: + yield blob.Blob(self.repo, hexsha, mode, name) + elif type_id == self.ascii_tree_id: + yield Tree(self.repo, hexsha, mode, name) + elif type_id == self.ascii_commit_id: + # todo + yield None + else: + raise TypeError( "Unknown type found in tree data: %i" % type_id ) + # END for each byte in data stream - if typ == "tree": - return Tree(repo, id, mode, path) - elif typ == "blob": - return blob.Blob(repo, id, mode, path) - elif typ == "commit": - # TODO: Return a submodule - return None - else: - raise(TypeError, "Invalid type: %s" % typ) def __div__(self, file): """ diff --git a/lib/git/repo.py b/lib/git/repo.py index d5dab242..f07edbe0 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -340,11 +340,19 @@ class Repo(object): if not isinstance(treeish, Ref): raise ValueError( "Treeish reference required, got %r" % treeish ) - # we should also check whether the ref has a valid commit ... but lets n - # not be over-critical + + # As we are directly reading object information, we must make sure + # we truly point to a tree object. We resolve the ref to a sha in all cases + # to assure the returned tree can be compared properly. Except for + # heads, ids should always be hexshas + hexsha, typename, size = self.git.get_object_header( treeish ) + if typename != "tree": + hexsha, typename, size = self.git.get_object_header( str(treeish)+'^{tree}' ) + # END tree handling + treeish = hexsha + # the root has an empty relative path and the default mode - root = Tree(self, treeish, 0, '') - return root + return Tree(self, treeish, 0, '') def diff(self, a, b, *paths): -- cgit v1.2.3 From 1a4bfd979e5d4ea0d0457e552202eb2effc36cac Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 15 Oct 2009 00:06:08 +0200 Subject: test_performance: module containing benchmarks to get an idea of the achieved throughput repo.commits: max_count is None by default moved benchmark-like test from test_commit to test_performance --- lib/git/repo.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/git/repo.py b/lib/git/repo.py index f07edbe0..c74c7e8d 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -197,7 +197,7 @@ class Repo(object): # END distinguish hexsha vs other information return blames - def commits(self, start='master', path='', max_count=10, skip=0): + def commits(self, start='master', path='', max_count=None, skip=0): """ A list of Commit objects representing the history of a given ref/commit @@ -209,7 +209,7 @@ class Repo(object): Commits that do not contain that path will not be returned. ``max_count`` - is the maximum number of commits to return (default 10) + is the maximum number of commits to return (default None) ``skip`` is the number of commits to skip (default 0) which will effectively @@ -220,7 +220,10 @@ class Repo(object): """ options = {'max_count': max_count, 'skip': skip} - + + if max_count is None: + options.pop('max_count') + return Commit.list_items(self, start, path, **options) def commits_between(self, frm, to): -- cgit v1.2.3