aboutsummaryrefslogtreecommitdiff
path: root/git/db
diff options
context:
space:
mode:
Diffstat (limited to 'git/db')
-rw-r--r--git/db/__init__.py6
-rw-r--r--git/db/cmd/__init__.py4
-rw-r--r--git/db/cmd/base.py821
-rw-r--r--git/db/cmd/complex.py16
-rw-r--r--git/db/compat.py31
-rw-r--r--git/db/complex.py28
-rw-r--r--git/db/interface.py838
-rw-r--r--git/db/py/__init__.py4
-rw-r--r--git/db/py/base.py474
-rw-r--r--git/db/py/complex.py128
-rw-r--r--git/db/py/loose.py263
-rw-r--r--git/db/py/mem.py112
-rw-r--r--git/db/py/pack.py212
-rw-r--r--git/db/py/ref.py77
-rw-r--r--git/db/py/resolve.py367
-rw-r--r--git/db/py/submodule.py33
-rw-r--r--git/db/py/transport.py58
17 files changed, 3472 insertions, 0 deletions
diff --git a/git/db/__init__.py b/git/db/__init__.py
new file mode 100644
index 00000000..25948326
--- /dev/null
+++ b/git/db/__init__.py
@@ -0,0 +1,6 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from interface import *
diff --git a/git/db/cmd/__init__.py b/git/db/cmd/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/db/cmd/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py
new file mode 100644
index 00000000..ef22c931
--- /dev/null
+++ b/git/db/cmd/base.py
@@ -0,0 +1,821 @@
+"""module with git command implementations of the basic interfaces
+:note: we could add all implementations of the basic interfaces, its more efficient though
+ to obtain them from the pure implementation"""
+from git.exc import (
+ GitCommandError,
+ BadObject
+ )
+
+from git.base import (
+ OInfo,
+ OStream
+ )
+
+from git.util import (
+ bin_to_hex,
+ hex_to_bin,
+ isfile,
+ join_path,
+ join,
+ Actor,
+ IterableList,
+ )
+from git.db.interface import (
+ FetchInfo,
+ PushInfo,
+ HighLevelRepository,
+ TransportDB,
+ RemoteProgress
+ )
+from git.cmd import Git
+from git.refs import (
+ Reference,
+ RemoteReference,
+ SymbolicReference,
+ TagReference
+ )
+from git.objects.commit import Commit
+from cStringIO import StringIO
+import re
+import os
+import sys
+
+
+__all__ = ('CmdTransportMixin', 'GitCommandMixin', 'CmdPushInfo', 'CmdFetchInfo',
+ 'CmdRemoteProgress', 'CmdObjectDBRMixin', 'CmdHighLevelRepository')
+
+
+#{ Utilities
+
+def touch(filename):
+ fp = open(filename, "a")
+ fp.close()
+
+
+def digest_process_messages(fh, progress):
+ """Read progress messages from file-like object fh, supplying the respective
+ progress messages to the progress instance.
+
+ :return: list(line, ...) list of lines without linebreaks that did
+ not contain progress information"""
+ line_so_far = ''
+ dropped_lines = list()
+ while True:
+ char = fh.read(1)
+ if not char:
+ break
+
+ if char in ('\r', '\n'):
+ dropped_lines.extend(progress._parse_progress_line(line_so_far))
+ line_so_far = ''
+ else:
+ line_so_far += char
+ # END process parsed line
+ # END while file is not done reading
+ return dropped_lines
+
+def finalize_process(proc):
+ """Wait for the process (fetch, pull or push) and handle its errors accordingly"""
+ try:
+ proc.wait()
+ except GitCommandError,e:
+ # if a push has rejected items, the command has non-zero return status
+ # a return status of 128 indicates a connection error - reraise the previous one
+ if proc.poll() == 128:
+ raise
+ pass
+ # END exception handling
+
+
+def get_fetch_info_from_stderr(repo, proc, progress):
+ # skip first line as it is some remote info we are not interested in
+ output = IterableList('name')
+
+
+ # lines which are no progress are fetch info lines
+ # this also waits for the command to finish
+ # Skip some progress lines that don't provide relevant information
+ fetch_info_lines = list()
+ for line in digest_process_messages(proc.stderr, progress):
+ if line.startswith('From') or line.startswith('remote: Total'):
+ continue
+ elif line.startswith('warning:'):
+ print >> sys.stderr, line
+ continue
+ elif line.startswith('fatal:'):
+ raise GitCommandError(("Error when fetching: %s" % line,), 2)
+ # END handle special messages
+ fetch_info_lines.append(line)
+ # END for each line
+
+ # read head information
+ fp = open(join(repo.git_dir, 'FETCH_HEAD'),'r')
+ fetch_head_info = fp.readlines()
+ fp.close()
+
+ assert len(fetch_info_lines) == len(fetch_head_info)
+
+ output.extend(CmdFetchInfo._from_line(repo, err_line, fetch_line)
+ for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info))
+
+ finalize_process(proc)
+ return output
+
+def get_push_info(repo, remotename_or_url, proc, progress):
+ # read progress information from stderr
+ # we hope stdout can hold all the data, it should ...
+ # read the lines manually as it will use carriage returns between the messages
+ # to override the previous one. This is why we read the bytes manually
+ digest_process_messages(proc.stderr, progress)
+
+ output = IterableList('name')
+ for line in proc.stdout.readlines():
+ try:
+ output.append(CmdPushInfo._from_line(repo, remotename_or_url, line))
+ except ValueError:
+ # if an error happens, additional info is given which we cannot parse
+ pass
+ # END exception handling
+ # END for each line
+
+ finalize_process(proc)
+ return output
+
+def add_progress(kwargs, git, progress):
+ """Add the --progress flag to the given kwargs dict if supported by the
+ git command. If the actual progress in the given progress instance is not
+ given, we do not request any progress
+ :return: possibly altered kwargs"""
+ if progress._progress is not None:
+ v = git.version_info
+ if v[0] > 1 or v[1] > 7 or v[2] > 0 or v[3] > 3:
+ kwargs['progress'] = True
+ #END handle --progress
+ #END handle progress
+ return kwargs
+
+#} END utilities
+
+class CmdRemoteProgress(RemoteProgress):
+ """
+ A Remote progress implementation taking a user derived progress to call the
+ respective methods on.
+ """
+ __slots__ = ("_seen_ops", '_progress')
+ re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)")
+ re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)")
+
+ def __init__(self, progress_instance = None):
+ self._seen_ops = list()
+ if progress_instance is None:
+ progress_instance = RemoteProgress()
+ #END assure proper instance
+ self._progress = progress_instance
+
+ def _parse_progress_line(self, line):
+ """Parse progress information from the given line as retrieved by git-push
+ or git-fetch
+
+ Call the own update(), __call__() and line_dropped() methods according
+ to the parsed result.
+
+ :return: list(line, ...) list of lines that could not be processed"""
+ # handle
+ # Counting objects: 4, done.
+ # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done.
+ sub_lines = line.split('\r')
+ failed_lines = list()
+ for sline in sub_lines:
+ # find esacpe characters and cut them away - regex will not work with
+ # them as they are non-ascii. As git might expect a tty, it will send them
+ last_valid_index = None
+ for i,c in enumerate(reversed(sline)):
+ if ord(c) < 32:
+ # its a slice index
+ last_valid_index = -i-1
+ # END character was non-ascii
+ # END for each character in sline
+ if last_valid_index is not None:
+ sline = sline[:last_valid_index]
+ # END cut away invalid part
+ sline = sline.rstrip()
+
+ cur_count, max_count = None, None
+ match = self.re_op_relative.match(sline)
+ if match is None:
+ match = self.re_op_absolute.match(sline)
+
+ if not match:
+ self._progress.line_dropped(sline)
+ failed_lines.append(sline)
+ continue
+ # END could not get match
+
+ op_code = 0
+ remote, op_name, percent, cur_count, max_count, message = match.groups()
+
+ # get operation id
+ if op_name == "Counting objects":
+ op_code |= self.COUNTING
+ elif op_name == "Compressing objects":
+ op_code |= self.COMPRESSING
+ elif op_name == "Writing objects":
+ op_code |= self.WRITING
+ elif op_name == "Receiving objects":
+ op_code |= self.RECEIVING
+ elif op_name == "Resolving deltas":
+ op_code |= self.RESOLVING
+ else:
+ raise ValueError("Operation name %r unknown" % op_name)
+
+ # figure out stage
+ if op_code not in self._seen_ops:
+ self._seen_ops.append(op_code)
+ op_code |= self.BEGIN
+ # END begin opcode
+
+ if message is None:
+ message = ''
+ # END message handling
+
+ message = message.strip()
+ done_token = ', done.'
+ if message.endswith(done_token):
+ op_code |= self.END
+ message = message[:-len(done_token)]
+ # END end message handling
+
+ self._progress.update(op_code, cur_count, max_count, message, line)
+ self._progress(message, line)
+ # END for each sub line
+ return failed_lines
+
+
+class CmdPushInfo(PushInfo):
+ """
+ Pure Python implementation of a PushInfo interface
+ """
+ __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit_binsha',
+ '_remotename_or_url', 'repo', 'summary')
+
+ _flag_map = { 'X' : PushInfo.NO_MATCH,
+ '-' : PushInfo.DELETED, '*' : 0,
+ '+' : PushInfo.FORCED_UPDATE,
+ ' ' : PushInfo.FAST_FORWARD,
+ '=' : PushInfo.UP_TO_DATE,
+ '!' : PushInfo.ERROR }
+
+ def __init__(self, flags, local_ref, remote_ref_string, repo, remotename_or_url, old_commit_binsha=None,
+ summary=''):
+ """ Initialize a new instance """
+ self.flags = flags
+ self.local_ref = local_ref
+ self.repo = repo
+ self.remote_ref_string = remote_ref_string
+ self._remotename_or_url = remotename_or_url
+ self.old_commit_binsha = old_commit_binsha
+ self.summary = summary
+
+ @property
+ def remote_ref(self):
+ """
+ :return:
+ Remote Reference or TagReference in the local repository corresponding
+ to the remote_ref_string kept in this instance."""
+ # translate heads to a local remote, tags stay as they are
+ if self.remote_ref_string.startswith("refs/tags"):
+ return TagReference(self.repo, self.remote_ref_string)
+ elif self.remote_ref_string.startswith("refs/heads"):
+ remote_ref = Reference(self.repo, self.remote_ref_string)
+ if '/' in self._remotename_or_url:
+ sys.stderr.write("Cannot provide RemoteReference instance if it was created from a url instead of of a remote name: %s. Returning Reference instance instead" % sefl._remotename_or_url)
+ return remote_ref
+ #END assert correct input
+ return RemoteReference(self.repo, "refs/remotes/%s/%s" % (str(self._remotename_or_url), remote_ref.name))
+ else:
+ raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string)
+ # END
+
+ @classmethod
+ def _from_line(cls, repo, remotename_or_url, line):
+ """Create a new PushInfo instance as parsed from line which is expected to be like
+ refs/heads/master:refs/heads/master 05d2687..1d0568e"""
+ control_character, from_to, summary = line.split('\t', 3)
+ flags = 0
+
+ # control character handling
+ try:
+ flags |= cls._flag_map[ control_character ]
+ except KeyError:
+ raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line))
+ # END handle control character
+
+ # from_to handling
+ from_ref_string, to_ref_string = from_to.split(':')
+ if flags & cls.DELETED:
+ from_ref = None
+ else:
+ from_ref = Reference.from_path(repo, from_ref_string)
+
+ # commit handling, could be message or commit info
+ old_commit_binsha = None
+ if summary.startswith('['):
+ if "[rejected]" in summary:
+ flags |= cls.REJECTED
+ elif "[remote rejected]" in summary:
+ flags |= cls.REMOTE_REJECTED
+ elif "[remote failure]" in summary:
+ flags |= cls.REMOTE_FAILURE
+ elif "[no match]" in summary:
+ flags |= cls.ERROR
+ elif "[new tag]" in summary:
+ flags |= cls.NEW_TAG
+ elif "[new branch]" in summary:
+ flags |= cls.NEW_HEAD
+ # uptodate encoded in control character
+ else:
+ # fast-forward or forced update - was encoded in control character,
+ # but we parse the old and new commit
+ split_token = "..."
+ if control_character == " ":
+ split_token = ".."
+ old_sha, new_sha = summary.split(' ')[0].split(split_token)
+ old_commit_binsha = repo.resolve(old_sha)
+ # END message handling
+
+ return cls(flags, from_ref, to_ref_string, repo, remotename_or_url, old_commit_binsha, summary)
+
+
+class CmdFetchInfo(FetchInfo):
+ """
+ Pure python implementation of a FetchInfo interface
+ """
+ __slots__ = ('ref','old_commit_binsha', 'flags', 'note')
+
+ # %c %-*s %-*s -> %s (%s)
+ re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?")
+
+ _flag_map = { '!' : FetchInfo.ERROR,
+ '+' : FetchInfo.FORCED_UPDATE,
+ '-' : FetchInfo.TAG_UPDATE,
+ '*' : 0,
+ '=' : FetchInfo.HEAD_UPTODATE,
+ ' ' : FetchInfo.FAST_FORWARD }
+
+ def __init__(self, ref, flags, note = '', old_commit_binsha = None):
+ """
+ Initialize a new instance
+ """
+ self.ref = ref
+ self.flags = flags
+ self.note = note
+ self.old_commit_binsha = old_commit_binsha
+
+ def __str__(self):
+ return self.name
+
+ @property
+ def name(self):
+ """:return: Name of our remote ref"""
+ return self.ref.name
+
+ @property
+ def commit(self):
+ """:return: Commit of our remote ref"""
+ return self.ref.commit
+
+ @classmethod
+ def _from_line(cls, repo, line, fetch_line):
+ """Parse information from the given line as returned by git-fetch -v
+ and return a new CmdFetchInfo object representing this information.
+
+ We can handle a line as follows
+ "%c %-*s %-*s -> %s%s"
+
+ Where c is either ' ', !, +, -, *, or =
+ ! means error
+ + means success forcing update
+ - means a tag was updated
+ * means birth of new branch or tag
+ = means the head was up to date ( and not moved )
+ ' ' means a fast-forward
+
+ fetch line is the corresponding line from FETCH_HEAD, like
+ acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo"""
+ match = cls.re_fetch_result.match(line)
+ if match is None:
+ raise ValueError("Failed to parse line: %r" % line)
+
+ # parse lines
+ control_character, operation, local_remote_ref, remote_local_ref, note = match.groups()
+ try:
+ new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t")
+ ref_type_name, fetch_note = fetch_note.split(' ', 1)
+ except ValueError: # unpack error
+ raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line)
+
+ # handle FETCH_HEAD and figure out ref type
+ # If we do not specify a target branch like master:refs/remotes/origin/master,
+ # the fetch result is stored in FETCH_HEAD which destroys the rule we usually
+ # have. In that case we use a symbolic reference which is detached
+ ref_type = None
+ if remote_local_ref == "FETCH_HEAD":
+ ref_type = SymbolicReference
+ elif ref_type_name == "branch":
+ ref_type = RemoteReference
+ elif ref_type_name == "tag":
+ ref_type = TagReference
+ else:
+ raise TypeError("Cannot handle reference type: %r" % ref_type_name)
+
+ # create ref instance
+ if ref_type is SymbolicReference:
+ remote_local_ref = ref_type(repo, "FETCH_HEAD")
+ else:
+ remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip()))
+ # END create ref instance
+
+ note = ( note and note.strip() ) or ''
+
+ # parse flags from control_character
+ flags = 0
+ try:
+ flags |= cls._flag_map[control_character]
+ except KeyError:
+ raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line))
+ # END control char exception hanlding
+
+ # parse operation string for more info - makes no sense for symbolic refs
+ old_commit_binsha = None
+ if isinstance(remote_local_ref, Reference):
+ if 'rejected' in operation:
+ flags |= cls.REJECTED
+ if 'new tag' in operation:
+ flags |= cls.NEW_TAG
+ if 'new branch' in operation:
+ flags |= cls.NEW_HEAD
+ if '...' in operation or '..' in operation:
+ split_token = '...'
+ if control_character == ' ':
+ split_token = split_token[:-1]
+ old_commit_binsha = repo.resolve(operation.split(split_token)[0])
+ # END handle refspec
+ # END reference flag handling
+
+ return cls(remote_local_ref, flags, note, old_commit_binsha)
+
+
+class GitCommandMixin(object):
+ """A mixin to provide the git command object through the git property"""
+
+ def __init__(self, *args, **kwargs):
+ """Initialize this instance with the root and a git command"""
+ super(GitCommandMixin, self).__init__(*args, **kwargs)
+ self._git = Git(self.working_dir)
+
+ @property
+ def git(self):
+ return self._git
+
+
+class CmdObjectDBRMixin(object):
+ """A mixing implementing object reading through a git command
+ It will create objects only in the loose object database.
+ :note: for now, we use the git command to do all the lookup, just until he
+ have packs and the other implementations
+ """
+ #{ ODB Interface
+ # overrides from PureOdb Implementation, which is responsible only for writing
+ # objects
+ def info(self, sha):
+ hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
+ return OInfo(hex_to_bin(hexsha), typename, size)
+
+ def stream(self, sha):
+ """For now, all lookup is done by git itself
+ :note: As we don't know when the stream is actually read (and if it is
+ stored for later use) we read the data rigth away and cache it.
+ This has HUGE performance implication, both for memory as for
+ reading/deserializing objects, but we have no other choice in order
+ to make the database behaviour consistent with other implementations !"""
+
+ hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
+ return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: Full binary 20 byte sha from the given partial hexsha
+ :raise AmbiguousObjectName:
+ :raise BadObject:
+ :note: currently we only raise BadObject as git does not communicate
+ AmbiguousObjects separately"""
+ try:
+ hexsha, typename, size = self._git.get_object_header(partial_hexsha)
+ return hex_to_bin(hexsha)
+ except (GitCommandError, ValueError):
+ raise BadObject(partial_hexsha)
+ # END handle exceptions
+
+ #} END odb interface
+
+
+class CmdTransportMixin(TransportDB):
+ """A mixin requiring the .git property as well as repository paths
+
+ It will create objects only in the loose object database.
+ :note: for now, we use the git command to do all the lookup, just until he
+ have packs and the other implementations
+ """
+
+ #{ Transport DB interface
+
+ def push(self, url, refspecs=None, progress=None, **kwargs):
+ """Push given refspecs using the git default implementation
+ :param url: may be a remote name or a url
+ :param refspecs: single string, RefSpec instance or list of such or None.
+ :param progress: RemoteProgress derived instance or None
+ :param **kwargs: Additional arguments to be passed to the git-push process"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.push(url, refspecs, porcelain=True, as_process=True, **add_progress(kwargs, self.git, progress))
+ return get_push_info(self, url, proc, progress)
+
+ def pull(self, url, refspecs=None, progress=None, **kwargs):
+ """Fetch and merge the given refspecs.
+ If not refspecs are given, the merge will only work properly if you
+ have setup upstream (tracking) branches.
+ :param url: may be a remote name or a url
+ :param refspecs: see push()
+ :param progress: see push()"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.pull(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress))
+ return get_fetch_info_from_stderr(self, proc, progress)
+
+ def fetch(self, url, refspecs=None, progress=None, **kwargs):
+ """Fetch the latest changes
+ :param url: may be a remote name or a url
+ :param refspecs: see push()
+ :param progress: see push()"""
+ progress = CmdRemoteProgress(progress)
+ proc = self._git.fetch(url, refspecs, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, self.git, progress))
+ return get_fetch_info_from_stderr(self, proc, progress)
+
+ #} end transport db interface
+
+
+class CmdHighLevelRepository(HighLevelRepository):
+ """An intermediate interface carrying advanced git functionality that can be used
+ in other comound repositories which do not implement this functionality themselves.
+
+ The mixin must be used with repositories compatible to the GitCommandMixin.
+
+ :note: at some point, methods provided here are supposed to be provided by custom interfaces"""
+ DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
+
+ # precompiled regex
+ re_whitespace = re.compile(r'\s+')
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
+ re_author_committer_start = re.compile(r'^(author|committer)')
+ re_tab_full_line = re.compile(r'^\t(.*)$')
+
+ #{ Configuration
+ CommitCls = Commit
+ GitCls = Git
+ #} END configuration
+
+ def daemon_export():
+ def _get_daemon_export(self):
+ filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
+ return os.path.exists(filename)
+
+ def _set_daemon_export(self, value):
+ filename = join(self.git_dir, self.DAEMON_EXPORT_FILE)
+ fileexists = os.path.exists(filename)
+ if value and not fileexists:
+ touch(filename)
+ elif not value and fileexists:
+ os.unlink(filename)
+
+ return property(_get_daemon_export, _set_daemon_export,
+ doc="If True, git-daemon may export this repository")
+
+ daemon_export = daemon_export()
+
+ def is_dirty(self, index=True, working_tree=True, untracked_files=False):
+ if self._bare:
+ # Bare repositories with no associated working directory are
+ # always consired to be clean.
+ return False
+
+ # start from the one which is fastest to evaluate
+ default_args = ('--abbrev=40', '--full-index', '--raw')
+ if index:
+ # diff index against HEAD
+ if isfile(self.index.path) and self.head.is_valid() and \
+ len(self.git.diff('HEAD', '--cached', *default_args)):
+ return True
+ # END index handling
+ if working_tree:
+ # diff index against working tree
+ if len(self.git.diff(*default_args)):
+ return True
+ # END working tree handling
+ if untracked_files:
+ if len(self.untracked_files):
+ return True
+ # END untracked files
+ return False
+
+ @property
+ def untracked_files(self):
+ # make sure we get all files, no only untracked directores
+ proc = self.git.status(untracked_files=True, as_process=True)
+ stream = iter(proc.stdout)
+ untracked_files = list()
+ for line in stream:
+ if not line.startswith("# Untracked files:"):
+ continue
+ # skip two lines
+ stream.next()
+ stream.next()
+
+ for untracked_info in stream:
+ if not untracked_info.startswith("#\t"):
+ break
+ untracked_files.append(untracked_info.replace("#\t", "").rstrip())
+ # END for each utracked info line
+ # END for each line
+ return untracked_files
+
+ def blame(self, rev, file):
+ data = self.git.blame(rev, '--', file, p=True)
+ commits = dict()
+ blames = list()
+ info = None
+
+ for line in data.splitlines(False):
+ parts = self.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ if self.re_hexsha_only.search(firstpart):
+ # handles
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
+ # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2
+ digits = parts[-1].split(" ")
+ if len(digits) == 3:
+ info = {'id': firstpart}
+ blames.append([None, []])
+ # END blame data initialization
+ else:
+ m = self.re_author_committer_start.search(firstpart)
+ if m:
+ # handles:
+ # author Tom Preston-Werner
+ # author-mail <tom@mojombo.com>
+ # author-time 1192271832
+ # author-tz -0700
+ # committer Tom Preston-Werner
+ # committer-mail <tom@mojombo.com>
+ # committer-time 1192271832
+ # committer-tz -0700 - IGNORED BY US
+ role = m.group(0)
+ if firstpart.endswith('-mail'):
+ info["%s_email" % role] = parts[-1]
+ elif firstpart.endswith('-time'):
+ info["%s_date" % role] = int(parts[-1])
+ elif role == firstpart:
+ info[role] = parts[-1]
+ # END distinguish mail,time,name
+ else:
+ # handle
+ # filename lib/grit.rb
+ # summary add Blob
+ # <and rest>
+ if firstpart.startswith('filename'):
+ info['filename'] = parts[-1]
+ elif firstpart.startswith('summary'):
+ info['summary'] = parts[-1]
+ elif firstpart == '':
+ if info:
+ sha = info['id']
+ c = commits.get(sha)
+ if c is None:
+ c = self.CommitCls( self, hex_to_bin(sha),
+ author=Actor._from_string(info['author'] + ' ' + info['author_email']),
+ authored_date=info['author_date'],
+ committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']),
+ committed_date=info['committer_date'],
+ message=info['summary'])
+ commits[sha] = c
+ # END if commit objects needs initial creation
+ m = self.re_tab_full_line.search(line)
+ text, = m.groups()
+ blames[-1][0] = c
+ blames[-1][1].append( text )
+ info = None
+ # END if we collected commit info
+ # END distinguish filename,summary,rest
+ # END distinguish author|committer vs filename,summary,rest
+ # END distinguish hexsha vs other information
+ return blames
+
+ @classmethod
+ def init(cls, path=None, mkdir=True, **kwargs):
+ """
+ :parm kwargs:
+ keyword arguments serving as additional options to the git-init command
+
+ For more information, see the respective docs of HighLevelRepository"""
+
+ if mkdir and path and not os.path.exists(path):
+ os.makedirs(path, 0755)
+
+ # git command automatically chdir into the directory
+ git = cls.GitCls(path)
+ output = git.init(**kwargs)
+ return cls(path)
+
+ @classmethod
+ def _clone(cls, git, url, path, progress, **kwargs):
+ # special handling for windows for path at which the clone should be
+ # created.
+ # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence
+ # we at least give a proper error instead of letting git fail
+ prev_cwd = None
+ prev_path = None
+ if os.name == 'nt':
+ if '~' in path:
+ raise OSError("Git cannot handle the ~ character in path %r correctly" % path)
+
+ # on windows, git will think paths like c: are relative and prepend the
+ # current working dir ( before it fails ). We temporarily adjust the working
+ # dir to make this actually work
+ match = re.match("(\w:[/\\\])(.*)", path)
+ if match:
+ prev_cwd = os.getcwd()
+ prev_path = path
+ drive, rest_of_path = match.groups()
+ os.chdir(drive)
+ path = rest_of_path
+ kwargs['with_keep_cwd'] = True
+ # END cwd preparation
+ # END windows handling
+
+ try:
+ proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress))
+ if progress is not None:
+ digest_process_messages(proc.stderr, progress)
+ #END digest progress messages
+ finalize_process(proc)
+ finally:
+ if prev_cwd is not None:
+ os.chdir(prev_cwd)
+ path = prev_path
+ # END reset previous working dir
+ # END bad windows handling
+
+ # our git command could have a different working dir than our actual
+ # environment, hence we prepend its working dir if required
+ if not os.path.isabs(path) and git.working_dir:
+ path = join(git._working_dir, path)
+
+ # adjust remotes - there may be operating systems which use backslashes,
+ # These might be given as initial paths, but when handling the config file
+ # that contains the remote from which we were clones, git stops liking it
+ # as it will escape the backslashes. Hence we undo the escaping just to be
+ # sure
+ repo = cls(os.path.abspath(path))
+ if repo.remotes:
+ repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/"))
+ # END handle remote repo
+ return repo
+
+ def clone(self, path, progress = None, **kwargs):
+ """
+ :param kwargs:
+ All remaining keyword arguments are given to the git-clone command
+
+ For more information, see the respective method in HighLevelRepository"""
+ return self._clone(self.git, self.git_dir, path, CmdRemoteProgress(progress), **kwargs)
+
+ @classmethod
+ def clone_from(cls, url, to_path, progress = None, **kwargs):
+ """
+ :param kwargs: see the ``clone`` method
+ For more information, see the respective method in the HighLevelRepository"""
+ return cls._clone(cls.GitCls(os.getcwd()), url, to_path, CmdRemoteProgress(progress), **kwargs)
+
+ def archive(self, ostream, treeish=None, prefix=None, **kwargs):
+ """For all args see HighLevelRepository interface
+ :parm kwargs:
+ Additional arguments passed to git-archive
+ NOTE: Use the 'format' argument to define the kind of format. Use
+ specialized ostreams to write any format supported by python
+
+ :raise GitCommandError: in case something went wrong"""
+ if treeish is None:
+ treeish = self.head.commit
+ if prefix and 'prefix' not in kwargs:
+ kwargs['prefix'] = prefix
+ kwargs['output_stream'] = ostream
+
+ self.git.archive(treeish, **kwargs)
+ return self
diff --git a/git/db/cmd/complex.py b/git/db/cmd/complex.py
new file mode 100644
index 00000000..49e8c590
--- /dev/null
+++ b/git/db/cmd/complex.py
@@ -0,0 +1,16 @@
+"""Module with our own git implementation - it uses the git command"""
+
+from git.db.compat import RepoCompatibilityInterface
+from base import *
+
+
+__all__ = ['CmdPartialGitDB']
+
+
+class CmdPartialGitDB( GitCommandMixin, CmdObjectDBRMixin, CmdTransportMixin,
+ CmdHighLevelRepository ):
+ """Utility repository which only partially implements all required methods.
+ It cannot be reliably used alone, but is provided to allow mixing it with other
+ implementations"""
+ pass
+
diff --git a/git/db/compat.py b/git/db/compat.py
new file mode 100644
index 00000000..767ab5e0
--- /dev/null
+++ b/git/db/compat.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module providing adaptors to maintain backwards compatability"""
+
+class RepoCompatibilityInterface(object):
+ """Interface to install backwards compatability of the new complex repository
+ types with the previous, all in one, repository."""
+
+ @property
+ def bare(self):
+ return self.is_bare
+
+ def rev_parse(self, *args, **kwargs):
+ return self.resolve_object(*args, **kwargs)
+
+ @property
+ def odb(self):
+ """The odb is now an integrated part of each repository"""
+ return self
+
+ @property
+ def active_branch(self):
+ """The name of the currently active branch.
+
+ :return: Head to the active branch"""
+ return self.head.reference
+
+ def __repr__(self):
+ return '<git.Repo "%s">' % self.git_dir
diff --git a/git/db/complex.py b/git/db/complex.py
new file mode 100644
index 00000000..31b047a0
--- /dev/null
+++ b/git/db/complex.py
@@ -0,0 +1,28 @@
+"""Module with many useful complex databases with different useful combinations of primary implementations"""
+
+from py.complex import PurePartialGitDB
+from cmd.complex import CmdPartialGitDB
+from compat import RepoCompatibilityInterface
+
+__all__ = ['CmdGitDB', 'PureGitDB', 'CmdCompatibilityGitDB', 'PureCompatibilityGitDB']
+
+class CmdGitDB(CmdPartialGitDB, PurePartialGitDB):
+ """A database which uses primarily the git command implementation, but falls back
+ to pure python where it is more feasible
+ :note: To assure consistent behaviour across implementations, when calling the
+ ``stream()`` method a cache is created. This makes this implementation a bad
+ choice when reading big files as these are streamed from memory in all cases."""
+
+class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB):
+ """A database which fills in its missing implementation using the pure python
+ implementation"""
+ pass
+
+class PureGitDB(PurePartialGitDB, CmdPartialGitDB):
+ """A repository which uses the pure implementation primarily, but falls back
+ on using the git command for high-level functionality"""
+
+class PureCompatibilityGitDB(RepoCompatibilityInterface, PureGitDB):
+ """Repository which uses the pure implementation primarily, but falls back
+ to the git command implementation. Please note that the CmdGitDB does it
+ the opposite way around."""
diff --git a/git/db/interface.py b/git/db/interface.py
new file mode 100644
index 00000000..a4c05265
--- /dev/null
+++ b/git/db/interface.py
@@ -0,0 +1,838 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains interfaces for basic database building blocks"""
+
+__all__ = ( 'ObjectDBR', 'ObjectDBW', 'RootPathDB', 'CompoundDB', 'CachingDB',
+ 'TransportDB', 'ConfigurationMixin', 'RepositoryPathsMixin',
+ 'RefSpec', 'FetchInfo', 'PushInfo', 'ReferencesMixin', 'SubmoduleDB',
+ 'IndexDB', 'HighLevelRepository')
+
+
+class ObjectDBR(object):
+ """Defines an interface for object database lookup.
+ Objects are identified either by their 20 byte bin sha"""
+
+ def __contains__(self, sha):
+ return self.has_obj(sha)
+
+ #{ Query Interface
+ def has_object(self, sha):
+ """
+ :return: True if the object identified by the given 20 bytes
+ binary sha is contained in the database"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def has_object_async(self, reader):
+ """Return a reader yielding information about the membership of objects
+ as identified by shas
+ :param reader: Reader yielding 20 byte shas.
+ :return: async.Reader yielding tuples of (sha, bool) pairs which indicate
+ whether the given sha exists in the database or not"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info(self, sha):
+ """ :return: OInfo instance
+ :param sha: bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info_async(self, reader):
+ """Retrieve information of a multitude of objects asynchronously
+ :param reader: Channel yielding the sha's of the objects of interest
+ :return: async.Reader yielding OInfo|InvalidOInfo, in any order"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream(self, sha):
+ """:return: OStream instance
+ :param sha: 20 bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream_async(self, reader):
+ """Retrieve the OStream of multiple objects
+ :param reader: see ``info``
+ :param max_threads: see ``ObjectDBW.store``
+ :return: async.Reader yielding OStream|InvalidOStream instances in any order
+ :note: depending on the system configuration, it might not be possible to
+ read all OStreams at once. Instead, read them individually using reader.read(x)
+ where x is small enough."""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def size(self):
+ """:return: amount of objects in this database"""
+ raise NotImplementedError()
+
+ def sha_iter(self):
+ """Return iterator yielding 20 byte shas for all objects in this data base"""
+ raise NotImplementedError()
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """
+ :return: 20 byte binary sha1 from the given less-than-40 byte hexsha
+ :param partial_hexsha: hexsha with less than 40 byte
+ :raise AmbiguousObjectName: If multiple objects would match the given sha
+ :raies BadObject: If object was not found"""
+ raise NotImplementedError()
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical (hexadecimal) representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ #} END query interface
+
+
+class ObjectDBW(object):
+ """Defines an interface to create objects in the database"""
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ """
+ Adjusts the stream to which all data should be sent when storing new objects
+
+ :param stream: if not None, the stream to use, if None the default stream
+ will be used.
+ :return: previously installed stream, or None if there was no override
+ :raise TypeError: if the stream doesn't have the supported functionality"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def ostream(self):
+ """
+ :return: overridden output stream this instance will write to, or None
+ if it will write to the default stream"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store(self, istream):
+ """
+ Create a new object in the database
+ :return: the input istream object with its sha set to its corresponding value
+
+ :param istream: IStream compatible instance. If its sha is already set
+ to a value, the object will just be stored in the our database format,
+ in which case the input stream is expected to be in object format ( header + contents ).
+ :raise IOError: if data could not be written"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store_async(self, reader):
+ """
+ Create multiple new objects in the database asynchronously. The method will
+ return right away, returning an output channel which receives the results as
+ they are computed.
+
+ :return: Channel yielding your IStream which served as input, in any order.
+ The IStreams sha will be set to the sha it received during the process,
+ or its error attribute will be set to the exception informing about the error.
+
+ :param reader: async.Reader yielding IStream instances.
+ The same instances will be used in the output channel as were received
+ in by the Reader.
+
+ :note:As some ODB implementations implement this operation atomic, they might
+ abort the whole operation if one item could not be processed. Hence check how
+ many items have actually been produced."""
+ raise NotImplementedError("To be implemented in subclass")
+
+ #} END edit interface
+
+
+class RootPathDB(object):
+ """Provides basic facilities to retrieve files of interest"""
+
+ def __init__(self, root_path):
+ """Initialize this instance to look for its files at the given root path
+ All subsequent operations will be relative to this path
+ :raise InvalidDBRoot:
+ :note: The base will not perform any accessablity checking as the base
+ might not yet be accessible, but become accessible before the first
+ access."""
+ super(RootPathDB, self).__init__(root_path)
+
+ #{ Interface
+ def root_path(self):
+ """:return: path at which this db operates"""
+ raise NotImplementedError()
+
+ def db_path(self, rela_path):
+ """
+ :return: the given relative path relative to our database root, allowing
+ to pontentially access datafiles"""
+ raise NotImplementedError()
+ #} END interface
+
+
+class CachingDB(object):
+ """A database which uses caches to speed-up access"""
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Call this method if the underlying data changed to trigger an update
+ of the internal caching structures.
+
+ :param force: if True, the update must be performed. Otherwise the implementation
+ may decide not to perform an update if it thinks nothing has changed.
+ :return: True if an update was performed as something change indeed"""
+
+ # END interface
+
+
+class CompoundDB(object):
+ """A database which delegates calls to sub-databases.
+ They should usually be cached and lazy-loaded"""
+
+ #{ Interface
+
+ def databases(self):
+ """:return: tuple of database instances we use for lookups"""
+ raise NotImplementedError()
+
+ #} END interface
+
+
+class IndexDB(object):
+ """A database which provides a flattened index to all objects in its currently
+ active tree."""
+ @property
+ def index(self):
+ """:return: IndexFile compatible instance"""
+ raise NotImplementedError()
+
+
+class RefSpec(object):
+ """A refspec is a simple container which provides information about the way
+ something should be fetched or pushed. It requires to use symbols to describe
+ the actual objects which is done using reference names (or respective instances
+ which resolve to actual reference names)."""
+ __slots__ = ('source', 'destination', 'force')
+
+ def __init__(self, source, destination, force=False):
+ """initalize the instance with the required values
+ :param source: reference name or instance. If None, the Destination
+ is supposed to be deleted."""
+ self.source = source
+ self.destination = destination
+ self.force = force
+ if self.destination is None:
+ raise ValueError("Destination must be set")
+
+ def __str__(self):
+ """:return: a git-style refspec"""
+ s = str(self.source)
+ if self.source is None:
+ s = ''
+ #END handle source
+ d = str(self.destination)
+ p = ''
+ if self.force:
+ p = '+'
+ #END handle force
+ res = "%s%s:%s" % (p, s, d)
+
+ def delete_destination(self):
+ return self.source is None
+
+
+class RemoteProgress(object):
+ """
+ Handler providing an interface to parse progress information emitted by git-push
+ and git-fetch and to dispatch callbacks allowing subclasses to react to the progress.
+
+ Subclasses should derive from this type.
+ """
+ _num_op_codes = 7
+ BEGIN, END, COUNTING, COMPRESSING, WRITING, RECEIVING, RESOLVING = [1 << x for x in range(_num_op_codes)]
+ STAGE_MASK = BEGIN|END
+ OP_MASK = ~STAGE_MASK
+
+ #{ Subclass Interface
+
+ def line_dropped(self, line):
+ """Called whenever a line could not be understood and was therefore dropped."""
+ pass
+
+ def update(self, op_code, cur_count, max_count=None, message='', input=''):
+ """Called whenever the progress changes
+
+ :param op_code:
+ Integer allowing to be compared against Operation IDs and stage IDs.
+
+ Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation
+ ID as well as END. It may be that BEGIN and END are set at once in case only
+ one progress message was emitted due to the speed of the operation.
+ Between BEGIN and END, none of these flags will be set
+
+ Operation IDs are all held within the OP_MASK. Only one Operation ID will
+ be active per call.
+ :param cur_count: Current absolute count of items
+
+ :param max_count:
+ The maximum count of items we expect. It may be None in case there is
+ no maximum number of items or if it is (yet) unknown.
+
+ :param message:
+ In case of the 'WRITING' operation, it contains the amount of bytes
+ transferred. It may possibly be used for other purposes as well.
+
+ :param input:
+ The actual input string that was used to parse the information from.
+ This is usually a line from the output of git-fetch, but really
+ depends on the implementation
+
+ You may read the contents of the current line in self._cur_line"""
+ pass
+
+ def __call__(self, message, input=''):
+ """Same as update, but with a simpler interface which only provides the
+ message of the operation.
+ :note: This method will be called in addition to the update method. It is
+ up to you which one you implement"""
+ pass
+ #} END subclass interface
+
+
+class PushInfo(object):
+ """A type presenting information about the result of a push operation for exactly
+ one refspec
+
+ flags # bitflags providing more information about the result
+ local_ref # Reference pointing to the local reference that was pushed
+ # It is None if the ref was deleted.
+ remote_ref_string # path to the remote reference located on the remote side
+ remote_ref # Remote Reference on the local side corresponding to
+ # the remote_ref_string. It can be a TagReference as well.
+ old_commit_binsha # binary sha to commit at which the remote_ref was standing before we pushed
+ # it to local_ref.commit. Will be None if an error was indicated
+ summary # summary line providing human readable english text about the push
+ """
+ __slots__ = tuple()
+
+ NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \
+ FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ]
+
+
+class FetchInfo(object):
+ """A type presenting information about the fetch operation on exactly one refspec
+
+ The following members are defined:
+ ref # name of the reference to the changed
+ # remote head or FETCH_HEAD. Implementations can provide
+ # actual class instance which convert to a respective string
+ flags # additional flags to be & with enumeration members,
+ # i.e. info.flags & info.REJECTED
+ # is 0 if ref is FETCH_HEAD
+ note # additional notes given by the fetch-pack implementation intended for the user
+ old_commit_binsha# if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD,
+ # field is set to the previous location of ref as binary sha or None"""
+ __slots__ = tuple()
+
+ NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \
+ FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ]
+
+
+class TransportDB(object):
+ """A database which allows to transport objects from and to different locations
+ which are specified by urls (location) and refspecs (what to transport,
+ see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html).
+
+ At the beginning of a transport operation, it will be determined which objects
+ have to be sent (either by this or by the other side).
+
+ Afterwards a pack with the required objects is sent (or received). If there is
+ nothing to send, the pack will be empty.
+
+ As refspecs involve symbolic names for references to be handled, we require
+ RefParse functionality. How this is done is up to the actual implementation."""
+ # The following variables need to be set by the derived class
+
+ #{ Interface
+
+ def fetch(self, url, refspecs, progress=None, **kwargs):
+ """Fetch the objects defined by the given refspec from the given url.
+ :param url: url identifying the source of the objects. It may also be
+ a symbol from which the respective url can be resolved, like the
+ name of the remote. The implementation should allow objects as input
+ as well, these are assumed to resovle to a meaningful string though.
+ :param refspecs: iterable of reference specifiers or RefSpec instance,
+ identifying the references to be fetch from the remote.
+ :param progress: RemoteProgress derived instance which receives progress messages for user consumption or None
+ :param kwargs: may be used for additional parameters that the actual implementation could
+ find useful.
+ :return: List of FetchInfo compatible instances which provide information about what
+ was previously fetched, in the order of the input refspecs.
+ :note: even if the operation fails, one of the returned FetchInfo instances
+ may still contain errors or failures in only part of the refspecs.
+ :raise: if any issue occours during the transport or if the url is not
+ supported by the protocol.
+ """
+ raise NotImplementedError()
+
+ def push(self, url, refspecs, progress=None, **kwargs):
+ """Transport the objects identified by the given refspec to the remote
+ at the given url.
+ :param url: Decribes the location which is to receive the objects
+ see fetch() for more details
+ :param refspecs: iterable of refspecs strings or RefSpec instances
+ to identify the objects to push
+ :param progress: see fetch()
+ :param kwargs: additional arguments which may be provided by the caller
+ as they may be useful to the actual implementation
+ :todo: what to return ?
+ :raise: if any issue arises during transport or if the url cannot be handled"""
+ raise NotImplementedError()
+
+ @property
+ def remotes(self):
+ """:return: An IterableList of Remote objects allowing to access and manipulate remotes
+ :note: Remote objects can also be used for the actual push or fetch operation"""
+ raise NotImplementedError()
+
+ def remote(self, name='origin'):
+ """:return: Remote object with the given name
+ :note: it does not necessarily exist, hence this is just a more convenient way
+ to construct Remote objects"""
+ raise NotImplementedError()
+
+ #}end interface
+
+
+ #{ Utility Methods
+
+ def create_remote(self, name, url, **kwargs):
+ """Create a new remote with the given name pointing to the given url
+ :return: Remote instance, compatible to the Remote interface"""
+ return Remote.create(self, name, url, **kwargs)
+
+ def delete_remote(self, remote):
+ """Delete the given remote.
+ :param remote: a Remote instance"""
+ return Remote.remove(self, remote)
+
+ #} END utility methods
+
+
+class ReferencesMixin(object):
+ """Database providing reference objects which in turn point to database objects
+ like Commits or Tag(Object)s.
+
+ The returned types are compatible to the interfaces of the pure python
+ reference implementation in GitDB.ref"""
+
+ def resolve(self, name):
+ """Resolve the given name into a binary sha. Valid names are as defined
+ in the rev-parse documentation http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
+ :return: binary sha matching the name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ raise NotImplementedError()
+
+ def resolve_object(self, name):
+ """As ``resolve()``, but returns the Objecft instance pointed to by the
+ resolved binary sha
+ :return: Object instance of the correct type, e.g. shas pointing to commits
+ will be represented by a Commit object"""
+ raise NotImplementedError()
+
+ @property
+ def references(self):
+ """:return: iterable list of all Reference objects representing tags, heads
+ and remote references. This is the most general method to obtain any
+ references."""
+ raise NotImplementedError()
+
+ @property
+ def heads(self):
+ """:return: IterableList with HeadReference objects pointing to all
+ heads in the repository."""
+ raise NotImplementedError()
+
+ @property
+ def head(self):
+ """:return: HEAD Object pointing to the current head reference"""
+ raise NotImplementedError()
+
+ @property
+ def tags(self):
+ """:return: An IterableList of TagReferences or compatible items that
+ are available in this repo"""
+ raise NotImplementedError()
+
+ #{ Utility Methods
+
+ def tag(self, name):
+ """:return: Tag with the given name
+ :note: It does not necessarily exist, hence this is just a more convenient
+ way to construct TagReference objects"""
+ raise NotImplementedError()
+
+
+ def commit(self, rev=None):
+ """The Commit object for the specified revision
+ :param rev: revision specifier, see git-rev-parse for viable options.
+ :return: Commit compatible object"""
+ raise NotImplementedError()
+
+ def iter_trees(self, *args, **kwargs):
+ """:return: Iterator yielding Tree compatible objects
+ :note: Takes all arguments known to iter_commits method"""
+ raise NotImplementedError()
+
+ def tree(self, rev=None):
+ """The Tree (compatible) object for the given treeish revision
+ Examples::
+
+ repo.tree(repo.heads[0])
+
+ :param rev: is a revision pointing to a Treeish ( being a commit or tree )
+ :return: ``git.Tree``
+
+ :note:
+ If you need a non-root level tree, find it by iterating the root tree. Otherwise
+ it cannot know about its path relative to the repository root and subsequent
+ operations might have unexpected results."""
+ raise NotImplementedError()
+
+ def iter_commits(self, rev=None, paths='', **kwargs):
+ """A list of Commit objects representing the history of a given ref/commit
+
+ :parm rev:
+ revision specifier, see git-rev-parse for viable options.
+ If None, the active branch will be used.
+
+ :parm paths:
+ is an optional path or a list of paths to limit the returned commits to
+ Commits that do not contain that path or the paths will not be returned.
+
+ :parm kwargs:
+ Arguments to be passed to git-rev-list - common ones are
+ max_count and skip
+
+ :note: to receive only commits between two named revisions, use the
+ "revA..revB" revision specifier
+
+ :return: iterator yielding Commit compatible instances"""
+ raise NotImplementedError()
+
+
+ #} END utility methods
+
+ #{ Edit Methods
+
+ def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
+ """Create a new head within the repository.
+ :param commit: a resolvable name to the commit or a Commit or Reference instance the new head should point to
+ :param force: if True, a head will be created even though it already exists
+ Otherwise an exception will be raised.
+ :param logmsg: message to append to the reference log. If None, a default message
+ will be used
+ :return: newly created Head instances"""
+ raise NotImplementedError()
+
+ def delete_head(self, *heads):
+ """Delete the given heads
+ :param heads: list of Head references that are to be deleted"""
+ raise NotImplementedError()
+
+ def create_tag(self, path, ref='HEAD', message=None, force=False):
+ """Create a new tag reference.
+ :param path: name or path of the new tag.
+ :param ref: resolvable name of the reference or commit, or Commit or Reference
+ instance describing the commit the tag should point to.
+ :param message: message to be attached to the tag reference. This will
+ create an actual Tag object carrying the message. Otherwise a TagReference
+ will be generated.
+ :param force: if True, the Tag will be created even if another tag does already
+ exist at the given path. Otherwise an exception will be thrown
+ :return: TagReference object """
+ raise NotImplementedError()
+
+ def delete_tag(self, *tags):
+ """Delete the given tag references
+ :param tags: TagReferences to delete"""
+ raise NotImplementedError()
+
+ #}END edit methods
+
+ #{ Backward Compatability
+ # These aliases need to be provided by the implementing interface as well
+ refs = references
+ branches = heads
+ #} END backward compatability
+
+
+
+
+class RepositoryPathsMixin(object):
+ """Represents basic functionality of a full git repository. This involves an
+ optional working tree, a git directory with references and an object directory.
+
+ This type collects the respective paths and verifies the provided base path
+ truly is a git repository.
+
+ If the underlying type provides the config_reader() method, we can properly determine
+ whether this is a bare repository as well. Otherwise it will make an educated guess
+ based on the path name."""
+ #{ Subclass Interface
+ def _initialize(self, path):
+ """initialize this instance with the given path. It may point to
+ any location within the repositories own data, as well as the working tree.
+
+ The implementation will move up and search for traces of a git repository,
+ which is indicated by a child directory ending with .git or the
+ current path portion ending with .git.
+
+ The paths made available for query are suitable for full git repositories
+ only. Plain object databases need to be fed the "objects" directory path.
+
+ :param path: the path to initialize the repository with
+ It is a path to either the root git directory or the bare git repo::
+
+ repo = Repo("/Users/mtrier/Development/git-python")
+ repo = Repo("/Users/mtrier/Development/git-python.git")
+ repo = Repo("~/Development/git-python.git")
+ repo = Repo("$REPOSITORIES/Development/git-python.git")
+
+ :raise InvalidDBRoot:
+ """
+ raise NotImplementedError()
+ #} end subclass interface
+
+ #{ Object Interface
+
+ def __eq__(self, rhs):
+ raise NotImplementedError()
+
+ def __ne__(self, rhs):
+ raise NotImplementedError()
+
+ def __hash__(self):
+ raise NotImplementedError()
+
+ def __repr__(self):
+ raise NotImplementedError()
+
+ #} END object interface
+
+ #{ Interface
+
+ @property
+ def is_bare(self):
+ """:return: True if this is a bare repository
+ :note: this value is cached upon initialization"""
+ raise NotImplementedError()
+
+ @property
+ def git_dir(self):
+ """:return: path to directory containing this actual git repository (which
+ in turn provides access to objects and references"""
+ raise NotImplementedError()
+
+ @property
+ def working_tree_dir(self):
+ """:return: path to directory containing the working tree checkout of our
+ git repository.
+ :raise AssertionError: If this is a bare repository"""
+ raise NotImplementedError()
+
+ @property
+ def objects_dir(self):
+ """:return: path to the repository's objects directory"""
+ raise NotImplementedError()
+
+ @property
+ def working_dir(self):
+ """:return: working directory of the git process or related tools, being
+ either the working_tree_dir if available or the git_path"""
+ raise NotImplementedError()
+
+ @property
+ def description(self):
+ """:return: description text associated with this repository or set the
+ description."""
+ raise NotImplementedError()
+
+ #} END interface
+
+
+class ConfigurationMixin(object):
+ """Interface providing configuration handler instances, which provide locked access
+ to a single git-style configuration file (ini like format, using tabs as improve readablity).
+
+ Configuration readers can be initialized with multiple files at once, whose information is concatenated
+ when reading. Lower-level files overwrite values from higher level files, i.e. a repository configuration file
+ overwrites information coming from a system configuration file
+
+ :note: for the 'repository' config level, a git_path() compatible type is required"""
+ config_level = ("system", "global", "repository")
+
+ #{ Interface
+
+ def config_reader(self, config_level=None):
+ """
+ :return:
+ GitConfigParser allowing to read the full git configuration, but not to write it
+
+ The configuration will include values from the system, user and repository
+ configuration files.
+
+ :param config_level:
+ For possible values, see config_writer method
+ If None, all applicable levels will be used. Specify a level in case
+ you know which exact file you whish to read to prevent reading multiple files for
+ instance
+ :note: On windows, system configuration cannot currently be read as the path is
+ unknown, instead the global path will be used."""
+ raise NotImplementedError()
+
+ def config_writer(self, config_level="repository"):
+ """
+ :return:
+ GitConfigParser allowing to write values of the specified configuration file level.
+ Config writers should be retrieved, used to change the configuration ,and written
+ right away as they will lock the configuration file in question and prevent other's
+ to write it.
+
+ :param config_level:
+ One of the following values
+ system = sytem wide configuration file
+ global = user level configuration file
+ repository = configuration file for this repostory only"""
+ raise NotImplementedError()
+
+
+ #} END interface
+
+
+class SubmoduleDB(object):
+ """Interface providing access to git repository submodules.
+ The actual implementation is found in the Submodule object type, which is
+ currently only available in one implementation."""
+
+ @property
+ def submodules(self):
+ """
+ :return: git.IterableList(Submodule, ...) of direct submodules
+ available from the current head"""
+ raise NotImplementedError()
+
+ def submodule(self, name):
+ """ :return: Submodule with the given name
+ :raise ValueError: If no such submodule exists"""
+ raise NotImplementedError()
+
+ def create_submodule(self, *args, **kwargs):
+ """Create a new submodule
+
+ :note: See the documentation of Submodule.add for a description of the
+ applicable parameters
+ :return: created submodules"""
+ raise NotImplementedError()
+
+ def iter_submodules(self, *args, **kwargs):
+ """An iterator yielding Submodule instances, see Traversable interface
+ for a description of args and kwargs
+ :return: Iterator"""
+ raise NotImplementedError()
+
+ def submodule_update(self, *args, **kwargs):
+ """Update the submodules, keeping the repository consistent as it will
+ take the previous state into consideration. For more information, please
+ see the documentation of RootModule.update"""
+ raise NotImplementedError()
+
+
+class HighLevelRepository(object):
+ """An interface combining several high-level repository functionality and properties"""
+
+ @property
+ def daemon_export(self):
+ """:return: True if the repository may be published by the git-daemon"""
+ raise NotImplementedError()
+
+ def is_dirty(self, index=True, working_tree=True, untracked_files=False):
+ """
+ :return:
+ ``True``, the repository is considered dirty. By default it will react
+ like a git-status without untracked files, hence it is dirty if the
+ index or the working copy have changes."""
+ raise NotImplementedError()
+
+ @property
+ def untracked_files(self):
+ """
+ :return:
+ list(str,...)
+
+ :note:
+ ignored files will not appear here, i.e. files mentioned in .gitignore.
+ Bare repositories never have untracked files"""
+ raise NotImplementedError()
+
+ def blame(self, rev, file):
+ """The blame information for the given file at the given revision.
+
+ :parm rev: revision specifier, see git-rev-parse for viable options.
+ :return:
+ list: [Commit, list: [<line>]]
+ A list of tuples associating a Commit object with a list of lines that
+ changed within the given commit. The Commit objects will be given in order
+ of appearance."""
+ raise NotImplementedError()
+
+ @classmethod
+ def init(cls, path=None, mkdir=True):
+ """Initialize a git repository at the given path if specified
+
+ :param path:
+ is the full path to the repo (traditionally ends with /<name>.git)
+ or None in which case the repository will be created in the current
+ working directory
+
+ :parm mkdir:
+ if specified will create the repository directory if it doesn't
+ already exists. Creates the directory with a mode=0755.
+ Only effective if a path is explicitly given
+
+ :return: Instance pointing to the newly created repository with similar capabilities
+ of this class"""
+ raise NotImplementedError()
+
+ def clone(self, path, progress = None):
+ """Create a clone from this repository.
+ :param path:
+ is the full path of the new repo (traditionally ends with ./<name>.git).
+
+ :param progress:
+ a RemoteProgress instance or None if no progress information is required
+
+ :return: ``git.Repo`` (the newly cloned repo)"""
+ raise NotImplementedError()
+
+ @classmethod
+ def clone_from(cls, url, to_path, progress = None):
+ """Create a clone from the given URL
+ :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS
+ :param to_path: Path to which the repository should be cloned to
+ :param progress:
+ a RemoteProgress instance or None if no progress information is required
+ :return: instance pointing to the cloned directory with similar capabilities as this class"""
+ raise NotImplementedError()
+
+ def archive(self, ostream, treeish=None, prefix=None):
+ """Archive the tree at the given revision.
+ :parm ostream: file compatible stream object to which the archive will be written
+ :parm treeish: is the treeish name/id, defaults to active branch
+ :parm prefix: is the optional prefix to prepend to each filename in the archive
+ :parm kwargs:
+ Additional arguments passed to git-archive
+ NOTE: Use the 'format' argument to define the kind of format. Use
+ specialized ostreams to write any format supported by python
+ :return: self"""
+ raise NotImplementedError()
+
+
diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py
new file mode 100644
index 00000000..8a681e42
--- /dev/null
+++ b/git/db/py/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
diff --git a/git/db/py/base.py b/git/db/py/base.py
new file mode 100644
index 00000000..2fdbd202
--- /dev/null
+++ b/git/db/py/base.py
@@ -0,0 +1,474 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains basic implementations for the interface building blocks"""
+from git.db.interface import *
+
+from git.util import (
+ pool,
+ join,
+ isfile,
+ normpath,
+ abspath,
+ dirname,
+ LazyMixin,
+ hex_to_bin,
+ bin_to_hex,
+ expandvars,
+ expanduser,
+ exists,
+ is_git_dir,
+ )
+
+from git.index import IndexFile
+from git.config import GitConfigParser
+from git.exc import (
+ BadObject,
+ AmbiguousObjectName,
+ InvalidGitRepositoryError,
+ NoSuchPathError
+ )
+
+from async import ChannelThreadTask
+
+from itertools import chain
+import sys
+import os
+
+
+__all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB',
+ 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin',
+ 'PureIndexDB')
+
+
+class PureObjectDBR(ObjectDBR):
+
+ #{ Query Interface
+
+ def has_object_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha)))
+ return pool.add_task(task)
+
+ def info_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.info_async), self.info)
+ return pool.add_task(task)
+
+ def stream_async(self, reader):
+ # base implementation just uses the stream method repeatedly
+ task = ChannelThreadTask(reader, str(self.stream_async), self.stream)
+ return pool.add_task(task)
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ len_partial_hexsha = len(partial_hexsha)
+ if len_partial_hexsha % 2 != 0:
+ partial_binsha = hex_to_bin(partial_hexsha + "0")
+ else:
+ partial_binsha = hex_to_bin(partial_hexsha)
+ # END assure successful binary conversion
+ return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha))
+
+ #} END query interface
+
+
+class PureObjectDBW(ObjectDBW):
+
+ def __init__(self, *args, **kwargs):
+ super(PureObjectDBW, self).__init__(*args, **kwargs)
+ self._ostream = None
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ cstream = self._ostream
+ self._ostream = stream
+ return cstream
+
+ def ostream(self):
+ return self._ostream
+
+ def store_async(self, reader):
+ task = ChannelThreadTask(reader, str(self.store_async), self.store)
+ return pool.add_task(task)
+
+ #} END edit interface
+
+
+class PureRootPathDB(RootPathDB):
+
+ def __init__(self, root_path):
+ self._root_path = root_path
+ super(PureRootPathDB, self).__init__(root_path)
+
+
+
+ #{ Interface
+ def root_path(self):
+ return self._root_path
+
+ def db_path(self, rela_path):
+ return join(self._root_path, rela_path)
+ #} END interface
+
+
+def _databases_recursive(database, output):
+ """Fill output list with database from db, in order. Deals with Loose, Packed
+ and compound databases."""
+ if isinstance(database, CompoundDB):
+ compounds = list()
+ dbs = database.databases()
+ output.extend(db for db in dbs if not isinstance(db, CompoundDB))
+ for cdb in (db for db in dbs if isinstance(db, CompoundDB)):
+ _databases_recursive(cdb, output)
+ else:
+ output.append(database)
+ # END handle database type
+
+
+class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB):
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ elif attr == '_obj_cache':
+ self._obj_cache = dict()
+ else:
+ super(PureCompoundDB, self)._set_cache_(attr)
+
+ def _db_query(self, sha):
+ """:return: database containing the given 20 byte sha
+ :raise BadObject:"""
+ # most databases use binary representations, prevent converting
+ # it everytime a database is being queried
+ try:
+ return self._obj_cache[sha]
+ except KeyError:
+ pass
+ # END first level cache
+
+ for db in self._dbs:
+ if db.has_object(sha):
+ self._obj_cache[sha] = db
+ return db
+ # END for each database
+ raise BadObject(sha)
+
+ #{ PureObjectDBR interface
+
+ def has_object(self, sha):
+ try:
+ self._db_query(sha)
+ return True
+ except BadObject:
+ return False
+ # END handle exceptions
+
+ def info(self, sha):
+ return self._db_query(sha).info(sha)
+
+ def stream(self, sha):
+ return self._db_query(sha).stream(sha)
+
+ def size(self):
+ return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0)
+
+ def sha_iter(self):
+ return chain(*(db.sha_iter() for db in self._dbs))
+
+ #} END object DBR Interface
+
+ #{ Interface
+
+ def databases(self):
+ return tuple(self._dbs)
+
+ def update_cache(self, force=False):
+ # something might have changed, clear everything
+ self._obj_cache.clear()
+ stat = False
+ for db in self._dbs:
+ if isinstance(db, CachingDB):
+ stat |= db.update_cache(force)
+ # END if is caching db
+ # END for each database to update
+ return stat
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ len_partial_hexsha = len(partial_hexsha)
+ if len_partial_hexsha % 2 != 0:
+ partial_binsha = hex_to_bin(partial_hexsha + "0")
+ else:
+ partial_binsha = hex_to_bin(partial_hexsha)
+ # END assure successful binary conversion
+
+ candidate = None
+ for db in self._dbs:
+ full_bin_sha = None
+ try:
+ if hasattr(db, 'partial_to_complete_sha_hex'):
+ full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha)
+ else:
+ full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha)
+ # END handle database type
+ except BadObject:
+ continue
+ # END ignore bad objects
+ if full_bin_sha:
+ if candidate and candidate != full_bin_sha:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = full_bin_sha
+ # END handle candidate
+ # END for each db
+ if not candidate:
+ raise BadObject(partial_binsha)
+ return candidate
+
+ def partial_to_complete_sha(self, partial_binsha, hex_len):
+ """Simple adaptor to feed into our implementation"""
+ return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len])
+ #} END interface
+
+
+class PureRepositoryPathsMixin(RepositoryPathsMixin):
+ # slots has no effect here, its just to keep track of used attrs
+ __slots__ = ("_git_path", '_bare')
+
+ #{ Configuration
+ repo_dir = '.git'
+ objs_dir = 'objects'
+ #} END configuration
+
+ #{ Subclass Interface
+ def _initialize(self, path):
+ epath = abspath(expandvars(expanduser(path or os.getcwd())))
+
+ if not exists(epath):
+ raise NoSuchPathError(epath)
+ #END check file
+
+ self._working_tree_dir = None
+ self._git_path = None
+ curpath = epath
+
+ # walk up the path to find the .git dir
+ while curpath:
+ if is_git_dir(curpath):
+ self._git_path = curpath
+ self._working_tree_dir = os.path.dirname(curpath)
+ break
+ gitpath = join(curpath, self.repo_dir)
+ if is_git_dir(gitpath):
+ self._git_path = gitpath
+ self._working_tree_dir = curpath
+ break
+ curpath, dummy = os.path.split(curpath)
+ if not dummy:
+ break
+ # END while curpath
+
+ if self._git_path is None:
+ raise InvalidGitRepositoryError(epath)
+ # END path not found
+
+ self._bare = self._git_path.endswith(self.repo_dir)
+ if hasattr(self, 'config_reader'):
+ try:
+ self._bare = self.config_reader("repository").getboolean('core','bare')
+ except Exception:
+ # lets not assume the option exists, although it should
+ pass
+ #END check bare flag
+
+ #} end subclass interface
+
+ #{ Object Interface
+
+ def __eq__(self, rhs):
+ if hasattr(rhs, 'git_dir'):
+ return self.git_dir == rhs.git_dir
+ return False
+
+ def __ne__(self, rhs):
+ return not self.__eq__(rhs)
+
+ def __hash__(self):
+ return hash(self.git_dir)
+
+ def __repr__(self):
+ return "%s(%r)" % (type(self).__name__, self.git_dir)
+
+ #} END object interface
+
+ #{ Interface
+
+ @property
+ def is_bare(self):
+ return self._bare
+
+ @property
+ def git_dir(self):
+ return self._git_path
+
+ @property
+ def working_tree_dir(self):
+ if self.is_bare:
+ raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_dir)
+ #END assertion
+ return dirname(self.git_dir)
+
+ @property
+ def objects_dir(self):
+ return join(self.git_dir, self.objs_dir)
+
+ @property
+ def working_dir(self):
+ if self.is_bare:
+ return self.git_dir
+ else:
+ return self.working_tree_dir
+ #END handle bare state
+
+ def _mk_description():
+ def _get_description(self):
+ filename = join(self.git_dir, 'description')
+ return file(filename).read().rstrip()
+
+ def _set_description(self, descr):
+ filename = join(self.git_dir, 'description')
+ file(filename, 'w').write(descr+'\n')
+
+ return property(_get_description, _set_description, "Descriptive text for the content of the repository")
+
+ description = _mk_description()
+ del(_mk_description)
+
+ #} END interface
+
+
+class PureConfigurationMixin(ConfigurationMixin):
+
+ #{ Configuration
+ system_config_file_name = "gitconfig"
+ repo_config_file_name = "config"
+ #} END
+
+ def __init__(self, *args, **kwargs):
+ """Verify prereqs"""
+ super(PureConfigurationMixin, self).__init__(*args, **kwargs)
+ assert hasattr(self, 'git_dir')
+
+ def _path_at_level(self, level ):
+ # we do not support an absolute path of the gitconfig on windows ,
+ # use the global config instead
+ if sys.platform == "win32" and level == "system":
+ level = "global"
+ #END handle windows
+
+ if level == "system":
+ return "/etc/%s" % self.system_config_file_name
+ elif level == "global":
+ return normpath(expanduser("~/.%s" % self.system_config_file_name))
+ elif level == "repository":
+ return join(self.git_dir, self.repo_config_file_name)
+ #END handle level
+
+ raise ValueError("Invalid configuration level: %r" % level)
+
+ #{ Interface
+
+ def config_reader(self, config_level=None):
+ files = None
+ if config_level is None:
+ files = [ self._path_at_level(f) for f in self.config_level ]
+ else:
+ files = [ self._path_at_level(config_level) ]
+ #END handle level
+ return GitConfigParser(files, read_only=True)
+
+ def config_writer(self, config_level="repository"):
+ return GitConfigParser(self._path_at_level(config_level), read_only=False)
+
+
+ #} END interface
+
+
+class PureIndexDB(IndexDB):
+ #{ Configuration
+ IndexCls = IndexFile
+ #} END configuration
+
+ @property
+ def index(self):
+ return self.IndexCls(self)
+
+
+class PureAlternatesFileMixin(object):
+ """Utility able to read and write an alternates file through the alternates property
+ It needs to be part of a type with the git_dir or db_path property.
+
+ The file by default is assumed to be located at the default location as imposed
+ by the standard git repository layout"""
+
+ #{ Configuration
+ alternates_filepath = os.path.join('info', 'alternates') # relative path to alternates file
+
+ #} END configuration
+
+ def __init__(self, *args, **kwargs):
+ super(PureAlternatesFileMixin, self).__init__(*args, **kwargs)
+ self._alternates_path() # throws on incompatible type
+
+ #{ Interface
+
+ def _alternates_path(self):
+ if hasattr(self, 'git_dir'):
+ return join(self.git_dir, 'objects', self.alternates_filepath)
+ elif hasattr(self, 'db_path'):
+ return self.db_path(self.alternates_filepath)
+ else:
+ raise AssertionError("This mixin requires a parent type with either the git_dir property or db_path method")
+ #END handle path
+
+ def _get_alternates(self):
+ """The list of alternates for this repo from which objects can be retrieved
+
+ :return: list of strings being pathnames of alternates"""
+ alternates_path = self._alternates_path()
+
+ if os.path.exists(alternates_path):
+ try:
+ f = open(alternates_path)
+ alts = f.read()
+ finally:
+ f.close()
+ return alts.strip().splitlines()
+ else:
+ return list()
+ # END handle path exists
+
+ def _set_alternates(self, alts):
+ """Sets the alternates
+
+ :parm alts:
+ is the array of string paths representing the alternates at which
+ git should look for objects, i.e. /home/user/repo/.git/objects
+
+ :raise NoSuchPathError:
+ :note:
+ The method does not check for the existance of the paths in alts
+ as the caller is responsible."""
+ alternates_path = self._alternates_path()
+ if not alts:
+ if isfile(alternates_path):
+ os.remove(alternates_path)
+ else:
+ try:
+ f = open(alternates_path, 'w')
+ f.write("\n".join(alts))
+ finally:
+ f.close()
+ # END file handling
+ # END alts handling
+
+ alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates")
+
+ #} END interface
+
diff --git a/git/db/py/complex.py b/git/db/py/complex.py
new file mode 100644
index 00000000..d5c185f3
--- /dev/null
+++ b/git/db/py/complex.py
@@ -0,0 +1,128 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of PurePartialGitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.db.interface import HighLevelRepository
+from base import (
+ PureCompoundDB,
+ PureObjectDBW,
+ PureRootPathDB,
+ PureRepositoryPathsMixin,
+ PureConfigurationMixin,
+ PureAlternatesFileMixin,
+ PureIndexDB,
+ )
+from transport import PureTransportDB
+from resolve import PureReferencesMixin
+
+from loose import PureLooseObjectODB
+from pack import PurePackedODB
+from ref import PureReferenceDB
+from submodule import PureSubmoduleDB
+
+from git.db.compat import RepoCompatibilityInterface
+
+from git.util import (
+ LazyMixin,
+ normpath,
+ join,
+ dirname
+ )
+from git.exc import (
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
+import os
+
+__all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB')
+
+
+class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureAlternatesFileMixin):
+ """A git-style object-only database, which contains all objects in the 'objects'
+ subdirectory.
+ :note: The type needs to be initialized on the ./objects directory to function,
+ as it deals solely with object lookup. Use a PurePartialGitDB type if you need
+ reference and push support."""
+ # Configuration
+ PackDBCls = PurePackedODB
+ LooseDBCls = PureLooseObjectODB
+ PureReferenceDBCls = PureReferenceDB
+
+ # Directories
+ packs_dir = 'pack'
+ loose_dir = ''
+
+
+ def __init__(self, root_path):
+ """Initialize ourselves on a git ./objects directory"""
+ super(PureGitODB, self).__init__(root_path)
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs' or attr == '_loose_db':
+ self._dbs = list()
+ loose_db = None
+ for subpath, dbcls in ((self.packs_dir, self.PackDBCls),
+ (self.loose_dir, self.LooseDBCls),
+ (self.alternates_filepath, self.PureReferenceDBCls)):
+ path = self.db_path(subpath)
+ if os.path.exists(path):
+ self._dbs.append(dbcls(path))
+ if dbcls is self.LooseDBCls:
+ loose_db = self._dbs[-1]
+ # END remember loose db
+ # END check path exists
+ # END for each db type
+
+ # should have at least one subdb
+ if not self._dbs:
+ raise InvalidDBRoot(self.root_path())
+ # END handle error
+
+ # we the first one should have the store method
+ assert loose_db is not None and hasattr(loose_db, 'store'), "One database needs store functionality"
+
+ # finally set the value
+ self._loose_db = loose_db
+ else:
+ super(PureGitODB, self)._set_cache_(attr)
+ # END handle attrs
+
+ #{ PureObjectDBW interface
+
+ def store(self, istream):
+ return self._loose_db.store(istream)
+
+ def ostream(self):
+ return self._loose_db.ostream()
+
+ def set_ostream(self, ostream):
+ return self._loose_db.set_ostream(ostream)
+
+ #} END objectdbw interface
+
+
+
+class PurePartialGitDB(PureGitODB,
+ PureRepositoryPathsMixin, PureConfigurationMixin,
+ PureReferencesMixin, PureSubmoduleDB,
+ PureIndexDB, PureTransportDB
+ # HighLevelRepository Currently not implemented !
+ ):
+ """Git like database with support for object lookup as well as reference resolution.
+ Our rootpath is set to the actual .git directory (bare on unbare).
+
+ The root_path will be the git objects directory. Use git_path() to obtain the actual top-level
+ git directory."""
+ #directories
+
+ def __init__(self, root_path):
+ """Initialize ourselves on the .git directory, or the .git/objects directory."""
+ PureRepositoryPathsMixin._initialize(self, root_path)
+ super(PurePartialGitDB, self).__init__(self.objects_dir)
+
+
+
+class PureCompatibilityGitDB(PurePartialGitDB, RepoCompatibilityInterface):
+ """Pure git database with a compatability layer required by 0.3x code"""
+
diff --git a/git/db/py/loose.py b/git/db/py/loose.py
new file mode 100644
index 00000000..6e72aff0
--- /dev/null
+++ b/git/db/py/loose.py
@@ -0,0 +1,263 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import (
+ PureRootPathDB,
+ PureObjectDBR,
+ PureObjectDBW
+ )
+
+
+from git.exc import (
+ InvalidDBRoot,
+ BadObject,
+ AmbiguousObjectName
+ )
+
+from git.stream import (
+ DecompressMemMapReader,
+ FDCompressedSha1Writer,
+ FDStream,
+ Sha1Writer
+ )
+
+from git.base import (
+ OStream,
+ OInfo
+ )
+
+from git.util import (
+ file_contents_ro_filepath,
+ ENOENT,
+ hex_to_bin,
+ bin_to_hex,
+ exists,
+ chmod,
+ isdir,
+ isfile,
+ remove,
+ mkdir,
+ rename,
+ dirname,
+ basename,
+ join
+ )
+
+from git.fun import (
+ chunk_size,
+ loose_object_header_info,
+ write_object,
+ stream_copy
+ )
+
+import tempfile
+import mmap
+import sys
+import os
+
+
+__all__ = ( 'PureLooseObjectODB', )
+
+
+class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW):
+ """A database which operates on loose object files"""
+
+ # CONFIGURATION
+ # chunks in which data will be copied between streams
+ stream_chunk_size = chunk_size
+
+ # On windows we need to keep it writable, otherwise it cannot be removed
+ # either
+ new_objects_mode = 0444
+ if os.name == 'nt':
+ new_objects_mode = 0644
+
+
+ def __init__(self, root_path):
+ super(PureLooseObjectODB, self).__init__(root_path)
+ self._hexsha_to_file = dict()
+ # Additional Flags - might be set to 0 after the first failure
+ # Depending on the root, this might work for some mounts, for others not, which
+ # is why it is per instance
+ self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+
+ #{ Interface
+ def object_path(self, hexsha):
+ """
+ :return: path at which the object with the given hexsha would be stored,
+ relative to the database root"""
+ return join(hexsha[:2], hexsha[2:])
+
+ def readable_db_object_path(self, hexsha):
+ """
+ :return: readable object path to the object identified by hexsha
+ :raise BadObject: If the object file does not exist"""
+ try:
+ return self._hexsha_to_file[hexsha]
+ except KeyError:
+ pass
+ # END ignore cache misses
+
+ # try filesystem
+ path = self.db_path(self.object_path(hexsha))
+ if exists(path):
+ self._hexsha_to_file[hexsha] = path
+ return path
+ # END handle cache
+ raise BadObject(hexsha)
+
+
+ #} END interface
+
+ def _map_loose_object(self, sha):
+ """
+ :return: memory map of that file to allow random read access
+ :raise BadObject: if object could not be located"""
+ db_path = self.db_path(self.object_path(bin_to_hex(sha)))
+ try:
+ return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
+ except OSError,e:
+ if e.errno != ENOENT:
+ # try again without noatime
+ try:
+ return file_contents_ro_filepath(db_path)
+ except OSError:
+ raise BadObject(sha)
+ # didn't work because of our flag, don't try it again
+ self._fd_open_flags = 0
+ else:
+ raise BadObject(sha)
+ # END handle error
+ # END exception handling
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ finally:
+ os.close(fd)
+ # END assure file is closed
+
+ def set_ostream(self, stream):
+ """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+ if stream is not None and not isinstance(stream, Sha1Writer):
+ raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+ return super(PureLooseObjectODB, self).set_ostream(stream)
+
+ def info(self, sha):
+ m = self._map_loose_object(sha)
+ try:
+ type, size = loose_object_header_info(m)
+ return OInfo(sha, type, size)
+ finally:
+ m.close()
+ # END assure release of system resources
+
+ def stream(self, sha):
+ m = self._map_loose_object(sha)
+ type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+ return OStream(sha, type, size, stream)
+
+ def has_object(self, sha):
+ try:
+ self.readable_db_object_path(bin_to_hex(sha))
+ return True
+ except BadObject:
+ return False
+ # END check existance
+
+ def partial_to_complete_sha_hex(self, partial_hexsha):
+ """:return: 20 byte binary sha1 string which matches the given name uniquely
+ :param name: hexadecimal partial name
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for binsha in self.sha_iter():
+ if bin_to_hex(binsha).startswith(partial_hexsha):
+ # it can't ever find the same object twice
+ if candidate is not None:
+ raise AmbiguousObjectName(partial_hexsha)
+ candidate = binsha
+ # END for each object
+ if candidate is None:
+ raise BadObject(partial_hexsha)
+ return candidate
+
+ def store(self, istream):
+ """note: The sha we produce will be hex by nature"""
+ tmp_path = None
+ writer = self.ostream()
+ if writer is None:
+ # open a tmp file to write the data to
+ fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+
+ if istream.binsha is None:
+ writer = FDCompressedSha1Writer(fd)
+ else:
+ writer = FDStream(fd)
+ # END handle direct stream copies
+ # END handle custom writer
+
+ try:
+ try:
+ if istream.binsha is not None:
+ # copy as much as possible, the actual uncompressed item size might
+ # be smaller than the compressed version
+ stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size)
+ else:
+ # write object with header, we have to make a new one
+ write_object(istream.type, istream.size, istream.read, writer.write,
+ chunk_size=self.stream_chunk_size)
+ # END handle direct stream copies
+ finally:
+ if tmp_path:
+ writer.close()
+ # END assure target stream is closed
+ except:
+ if tmp_path:
+ os.remove(tmp_path)
+ raise
+ # END assure tmpfile removal on error
+
+ hexsha = None
+ if istream.binsha:
+ hexsha = istream.hexsha
+ else:
+ hexsha = writer.sha(as_hex=True)
+ # END handle sha
+
+ if tmp_path:
+ obj_path = self.db_path(self.object_path(hexsha))
+ obj_dir = dirname(obj_path)
+ if not isdir(obj_dir):
+ mkdir(obj_dir)
+ # END handle destination directory
+ # rename onto existing doesn't work on windows
+ if os.name == 'nt' and isfile(obj_path):
+ remove(obj_path)
+ # END handle win322
+ rename(tmp_path, obj_path)
+
+ # make sure its readable for all ! It started out as rw-- tmp file
+ # but needs to be rwrr
+ chmod(obj_path, self.new_objects_mode)
+ # END handle dry_run
+
+ istream.binsha = hex_to_bin(hexsha)
+ return istream
+
+ def sha_iter(self):
+ # find all files which look like an object, extract sha from there
+ for root, dirs, files in os.walk(self.root_path()):
+ root_base = basename(root)
+ if len(root_base) != 2:
+ continue
+
+ for f in files:
+ if len(f) != 38:
+ continue
+ yield hex_to_bin(root_base + f)
+ # END for each file
+ # END for each walk iteration
+
+ def size(self):
+ return len(tuple(self.sha_iter()))
+
diff --git a/git/db/py/mem.py b/git/db/py/mem.py
new file mode 100644
index 00000000..da02dbdd
--- /dev/null
+++ b/git/db/py/mem.py
@@ -0,0 +1,112 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Contains the MemoryDatabase implementation"""
+from base import (
+ PureObjectDBR,
+ PureObjectDBW
+ )
+from loose import PureLooseObjectODB
+from git.base import (
+ OStream,
+ IStream,
+ )
+
+from git.exc import (
+ BadObject,
+ UnsupportedOperation
+ )
+from git.stream import (
+ ZippedStoreShaWriter,
+ DecompressMemMapReader,
+ )
+
+from cStringIO import StringIO
+
+__all__ = ("PureMemoryDB", )
+
+class PureMemoryDB(PureObjectDBR, PureObjectDBW):
+ """A memory database stores everything to memory, providing fast IO and object
+ retrieval. It should be used to buffer results and obtain SHAs before writing
+ it to the actual physical storage, as it allows to query whether object already
+ exists in the target storage before introducing actual IO
+
+ :note: memory is currently not threadsafe, hence the async methods cannot be used
+ for storing"""
+
+ def __init__(self):
+ super(PureMemoryDB, self).__init__()
+ self._db = PureLooseObjectODB("path/doesnt/matter")
+
+ # maps 20 byte shas to their OStream objects
+ self._cache = dict()
+
+ def set_ostream(self, stream):
+ raise UnsupportedOperation("PureMemoryDB's always stream into memory")
+
+ def store(self, istream):
+ zstream = ZippedStoreShaWriter()
+ self._db.set_ostream(zstream)
+
+ istream = self._db.store(istream)
+ zstream.close() # close to flush
+ zstream.seek(0)
+
+ # don't provide a size, the stream is written in object format, hence the
+ # header needs decompression
+ decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False)
+ self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream)
+
+ return istream
+
+ def store_async(self, reader):
+ raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access")
+
+ def has_object(self, sha):
+ return sha in self._cache
+
+ def info(self, sha):
+ # we always return streams, which are infos as well
+ return self.stream(sha)
+
+ def stream(self, sha):
+ try:
+ ostream = self._cache[sha]
+ # rewind stream for the next one to read
+ ostream.stream.seek(0)
+ return ostream
+ except KeyError:
+ raise BadObject(sha)
+ # END exception handling
+
+ def size(self):
+ return len(self._cache)
+
+ def sha_iter(self):
+ return self._cache.iterkeys()
+
+
+ #{ Interface
+ def stream_copy(self, sha_iter, odb):
+ """Copy the streams as identified by sha's yielded by sha_iter into the given odb
+ The streams will be copied directly
+ :note: the object will only be written if it did not exist in the target db
+ :return: amount of streams actually copied into odb. If smaller than the amount
+ of input shas, one or more objects did already exist in odb"""
+ count = 0
+ for sha in sha_iter:
+ if odb.has_object(sha):
+ continue
+ # END check object existance
+
+ ostream = self.stream(sha)
+ # compressed data including header
+ sio = StringIO(ostream.stream.data())
+ istream = IStream(ostream.type, ostream.size, sio, sha)
+
+ odb.store(istream)
+ count += 1
+ # END for each sha
+ return count
+ #} END interface
diff --git a/git/db/py/pack.py b/git/db/py/pack.py
new file mode 100644
index 00000000..75b75468
--- /dev/null
+++ b/git/db/py/pack.py
@@ -0,0 +1,212 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Module containing a database to deal with packs"""
+from git.db import CachingDB
+from base import (
+ PureRootPathDB,
+ PureObjectDBR
+ )
+
+from git.util import LazyMixin
+
+from git.exc import (
+ BadObject,
+ UnsupportedOperation,
+ AmbiguousObjectName
+ )
+
+from git.pack import PackEntity
+
+import os
+import glob
+
+__all__ = ('PurePackedODB', )
+
+#{ Utilities
+
+
+class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin):
+ """A database operating on a set of object packs"""
+
+ # the type to use when instantiating a pack entity
+ PackEntityCls = PackEntity
+
+ # sort the priority list every N queries
+ # Higher values are better, performance tests don't show this has
+ # any effect, but it should have one
+ _sort_interval = 500
+
+ def __init__(self, root_path):
+ super(PurePackedODB, self).__init__(root_path)
+ # list of lists with three items:
+ # * hits - number of times the pack was hit with a request
+ # * entity - Pack entity instance
+ # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+ # self._entities = list() # lazy loaded list
+ self._hit_count = 0 # amount of hits
+ self._st_mtime = 0 # last modification data of our root path
+
+ def _set_cache_(self, attr):
+ if attr == '_entities':
+ self._entities = list()
+ self.update_cache(force=True)
+ # END handle entities initialization
+
+ def _sort_entities(self):
+ self._entities.sort(key=lambda l: l[0], reverse=True)
+
+ def _pack_info(self, sha):
+ """:return: tuple(entity, index) for an item at the given sha
+ :param sha: 20 or 40 byte sha
+ :raise BadObject:
+ :note: This method is not thread-safe, but may be hit in multi-threaded
+ operation. The worst thing that can happen though is a counter that
+ was not incremented, or the list being in wrong order. So we safe
+ the time for locking here, lets see how that goes"""
+ # presort ?
+ if self._hit_count % self._sort_interval == 0:
+ self._sort_entities()
+ # END update sorting
+
+ for item in self._entities:
+ index = item[2](sha)
+ if index is not None:
+ item[0] += 1 # one hit for you
+ self._hit_count += 1 # general hit count
+ return (item[1], index)
+ # END index found in pack
+ # END for each item
+
+ # no hit, see whether we have to update packs
+ # NOTE: considering packs don't change very often, we safe this call
+ # and leave it to the super-caller to trigger that
+ raise BadObject(sha)
+
+ #{ Object DB Read
+
+ def has_object(self, sha):
+ try:
+ self._pack_info(sha)
+ return True
+ except BadObject:
+ return False
+ # END exception handling
+
+ def info(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.info_at_index(index)
+
+ def stream(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.stream_at_index(index)
+
+ def sha_iter(self):
+ sha_list = list()
+ for entity in self.entities():
+ index = entity.index()
+ sha_by_index = index.sha
+ for index in xrange(index.size()):
+ yield sha_by_index(index)
+ # END for each index
+ # END for each entity
+
+ def size(self):
+ sizes = [item[1].index().size() for item in self._entities]
+ return reduce(lambda x,y: x+y, sizes, 0)
+
+ #} END object db read
+
+ #{ object db write
+
+ def store(self, istream):
+ """Storing individual objects is not feasible as a pack is designed to
+ hold multiple objects. Writing or rewriting packs for single objects is
+ inefficient"""
+ raise UnsupportedOperation()
+
+ def store_async(self, reader):
+ # TODO: add PureObjectDBRW before implementing this
+ raise NotImplementedError()
+
+ #} END object db write
+
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Update our cache with the acutally existing packs on disk. Add new ones,
+ and remove deleted ones. We keep the unchanged ones
+
+ :param force: If True, the cache will be updated even though the directory
+ does not appear to have changed according to its modification timestamp.
+ :return: True if the packs have been updated so there is new information,
+ False if there was no change to the pack database"""
+ stat = os.stat(self.root_path())
+ if not force and stat.st_mtime <= self._st_mtime:
+ return False
+ # END abort early on no change
+ self._st_mtime = stat.st_mtime
+
+ # packs are supposed to be prefixed with pack- by git-convention
+ # get all pack files, figure out what changed
+ pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+ our_pack_files = set(item[1].pack().path() for item in self._entities)
+
+ # new packs
+ for pack_file in (pack_files - our_pack_files):
+ # init the hit-counter/priority with the size, a good measure for hit-
+ # probability. Its implemented so that only 12 bytes will be read
+ entity = self.PackEntityCls(pack_file)
+ self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+ # END for each new packfile
+
+ # removed packs
+ for pack_file in (our_pack_files - pack_files):
+ del_index = -1
+ for i, item in enumerate(self._entities):
+ if item[1].pack().path() == pack_file:
+ del_index = i
+ break
+ # END found index
+ # END for each entity
+ assert del_index != -1
+ del(self._entities[del_index])
+ # END for each removed pack
+
+ # reinitialize prioritiess
+ self._sort_entities()
+ return True
+
+ def entities(self):
+ """:return: list of pack entities operated upon by this database"""
+ return [ item[1] for item in self._entities ]
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for item in self._entities:
+ item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+ if item_index is not None:
+ sha = item[1].index().sha(item_index)
+ if candidate and candidate != sha:
+ raise AmbiguousObjectName(partial_binsha)
+ candidate = sha
+ # END handle full sha could be found
+ # END for each entity
+
+ if candidate:
+ return candidate
+
+ # still not found ?
+ raise BadObject(partial_binsha)
+
+ #} END interface
diff --git a/git/db/py/ref.py b/git/db/py/ref.py
new file mode 100644
index 00000000..d2c77a3a
--- /dev/null
+++ b/git/db/py/ref.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from base import PureCompoundDB
+
+import os
+__all__ = ('PureReferenceDB', )
+
+class PureReferenceDB(PureCompoundDB):
+ """A database consisting of database referred to in a file"""
+
+ # Configuration
+ # Specifies the object database to use for the paths found in the alternates
+ # file. If None, it defaults to the PureGitODB
+ ObjectDBCls = None
+
+ def __init__(self, ref_file):
+ super(PureReferenceDB, self).__init__()
+ self._ref_file = ref_file
+
+ def _set_cache_(self, attr):
+ if attr == '_dbs':
+ self._dbs = list()
+ self._update_dbs_from_ref_file()
+ else:
+ super(PureReferenceDB, self)._set_cache_(attr)
+ # END handle attrs
+
+ def _update_dbs_from_ref_file(self):
+ dbcls = self.ObjectDBCls
+ if dbcls is None:
+ # late import
+ import complex
+ dbcls = complex.PureGitODB
+ # END get db type
+
+ # try to get as many as possible, don't fail if some are unavailable
+ ref_paths = list()
+ try:
+ ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()]
+ except (OSError, IOError):
+ pass
+ # END handle alternates
+
+ ref_paths_set = set(ref_paths)
+ cur_ref_paths_set = set(db.root_path() for db in self._dbs)
+
+ # remove existing
+ for path in (cur_ref_paths_set - ref_paths_set):
+ for i, db in enumerate(self._dbs[:]):
+ if db.root_path() == path:
+ del(self._dbs[i])
+ continue
+ # END del matching db
+ # END for each path to remove
+
+ # add new
+ # sort them to maintain order
+ added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p))
+ for path in added_paths:
+ try:
+ db = dbcls(path)
+ # force an update to verify path
+ if isinstance(db, PureCompoundDB):
+ db.databases()
+ # END verification
+ self._dbs.append(db)
+ except Exception, e:
+ # ignore invalid paths or issues
+ pass
+ # END for each path to add
+
+ def update_cache(self, force=False):
+ # re-read alternates and update databases
+ self._update_dbs_from_ref_file()
+ return super(PureReferenceDB, self).update_cache(force)
diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py
new file mode 100644
index 00000000..7bea779e
--- /dev/null
+++ b/git/db/py/resolve.py
@@ -0,0 +1,367 @@
+"""Module with an implementation for refspec parsing. It is the pure-python
+version assuming compatible interface for reference and object types"""
+
+from git.db.interface import ReferencesMixin
+from git.exc import BadObject
+from git.refs import (
+ SymbolicReference,
+ Reference,
+ HEAD,
+ Head,
+ TagReference
+ )
+from git.refs.head import HEAD
+from git.refs.headref import Head
+from git.refs.tag import TagReference
+
+from git.objects.base import Object
+from git.objects.commit import Commit
+from git.util import (
+ join,
+ isdir,
+ isfile,
+ hex_to_bin,
+ bin_to_hex,
+ is_git_dir
+ )
+from string import digits
+import os
+import re
+
+__all__ = ["PureReferencesMixin"]
+
+#{ Utilities
+
+def short_to_long(odb, hexsha):
+ """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha
+ or None if no candidate could be found.
+ :param hexsha: hexsha with less than 40 byte"""
+ try:
+ return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
+ except BadObject:
+ return None
+ # END exception handling
+
+
+def name_to_object(repo, name, return_ref=False):
+ """
+ :return: object specified by the given name, hexshas ( short and long )
+ as well as references are supported
+ :param return_ref: if name specifies a reference, we will return the reference
+ instead of the object. Otherwise it will raise BadObject
+ """
+ hexsha = None
+
+ # is it a hexsha ? Try the most common ones, which is 7 to 40
+ if repo.re_hexsha_shortened.match(name):
+ if len(name) != 40:
+ # find long sha for short sha
+ hexsha = short_to_long(repo.odb, name)
+ else:
+ hexsha = name
+ # END handle short shas
+ #END find sha if it matches
+
+ # if we couldn't find an object for what seemed to be a short hexsha
+ # try to find it as reference anyway, it could be named 'aaa' for instance
+ if hexsha is None:
+ for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'):
+ try:
+ hexsha = SymbolicReference.dereference_recursive(repo, base % name)
+ if return_ref:
+ return SymbolicReference(repo, base % name)
+ #END handle symbolic ref
+ break
+ except ValueError:
+ pass
+ # END for each base
+ # END handle hexsha
+
+ # didn't find any ref, this is an error
+ if return_ref:
+ raise BadObject("Couldn't find reference named %r" % name)
+ #END handle return ref
+
+ # tried everything ? fail
+ if hexsha is None:
+ raise BadObject(name)
+ # END assert hexsha was found
+
+ return Object.new_from_sha(repo, hex_to_bin(hexsha))
+
+def deref_tag(tag):
+ """Recursively dereference a tag and return the resulting object"""
+ while True:
+ try:
+ tag = tag.object
+ except AttributeError:
+ break
+ # END dereference tag
+ return tag
+
+def to_commit(obj):
+ """Convert the given object to a commit if possible and return it"""
+ if obj.type == 'tag':
+ obj = deref_tag(obj)
+
+ if obj.type != "commit":
+ raise ValueError("Cannot convert object %r to type commit" % obj)
+ # END verify type
+ return obj
+
+def rev_parse(repo, rev):
+ """
+ :return: Object at the given revision, either Commit, Tag, Tree or Blob
+ :param rev: git-rev-parse compatible revision specification, please see
+ http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html
+ for details
+ :note: Currently there is no access to the rev-log, rev-specs may only contain
+ topological tokens such ~ and ^.
+ :raise BadObject: if the given revision could not be found
+ :raise ValueError: If rev couldn't be parsed
+ :raise IndexError: If invalid reflog index is specified"""
+
+ # colon search mode ?
+ if rev.startswith(':/'):
+ # colon search mode
+ raise NotImplementedError("commit by message search ( regex )")
+ # END handle search
+
+ obj = None
+ ref = None
+ output_type = "commit"
+ start = 0
+ parsed_to = 0
+ lr = len(rev)
+ while start < lr:
+ if rev[start] not in "^~:@":
+ start += 1
+ continue
+ # END handle start
+
+ token = rev[start]
+
+ if obj is None:
+ # token is a rev name
+ if start == 0:
+ ref = repo.head.ref
+ else:
+ if token == '@':
+ ref = name_to_object(repo, rev[:start], return_ref=True)
+ else:
+ obj = name_to_object(repo, rev[:start])
+ #END handle token
+ #END handle refname
+
+ if ref is not None:
+ obj = ref.commit
+ #END handle ref
+ # END initialize obj on first token
+
+
+ start += 1
+
+ # try to parse {type}
+ if start < lr and rev[start] == '{':
+ end = rev.find('}', start)
+ if end == -1:
+ raise ValueError("Missing closing brace to define type in %s" % rev)
+ output_type = rev[start+1:end] # exclude brace
+
+ # handle type
+ if output_type == 'commit':
+ pass # default
+ elif output_type == 'tree':
+ try:
+ obj = to_commit(obj).tree
+ except (AttributeError, ValueError):
+ pass # error raised later
+ # END exception handling
+ elif output_type in ('', 'blob'):
+ if obj.type == 'tag':
+ obj = deref_tag(obj)
+ else:
+ # cannot do anything for non-tags
+ pass
+ # END handle tag
+ elif token == '@':
+ # try single int
+ assert ref is not None, "Require Reference to access reflog"
+ revlog_index = None
+ try:
+ # transform reversed index into the format of our revlog
+ revlog_index = -(int(output_type)+1)
+ except ValueError:
+ # TODO: Try to parse the other date options, using parse_date
+ # maybe
+ raise NotImplementedError("Support for additional @{...} modes not implemented")
+ #END handle revlog index
+
+ try:
+ entry = ref.log_entry(revlog_index)
+ except IndexError:
+ raise IndexError("Invalid revlog index: %i" % revlog_index)
+ #END handle index out of bound
+
+ obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha))
+
+ # make it pass the following checks
+ output_type = None
+ else:
+ raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev))
+ # END handle output type
+
+ # empty output types don't require any specific type, its just about dereferencing tags
+ if output_type and obj.type != output_type:
+ raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type))
+ # END verify ouput type
+
+ start = end+1 # skip brace
+ parsed_to = start
+ continue
+ # END parse type
+
+ # try to parse a number
+ num = 0
+ if token != ":":
+ found_digit = False
+ while start < lr:
+ if rev[start] in digits:
+ num = num * 10 + int(rev[start])
+ start += 1
+ found_digit = True
+ else:
+ break
+ # END handle number
+ # END number parse loop
+
+ # no explicit number given, 1 is the default
+ # It could be 0 though
+ if not found_digit:
+ num = 1
+ # END set default num
+ # END number parsing only if non-blob mode
+
+
+ parsed_to = start
+ # handle hiererarchy walk
+ try:
+ if token == "~":
+ obj = to_commit(obj)
+ for item in xrange(num):
+ obj = obj.parents[0]
+ # END for each history item to walk
+ elif token == "^":
+ obj = to_commit(obj)
+ # must be n'th parent
+ if num:
+ obj = obj.parents[num-1]
+ elif token == ":":
+ if obj.type != "tree":
+ obj = obj.tree
+ # END get tree type
+ obj = obj[rev[start:]]
+ parsed_to = lr
+ else:
+ raise ValueError("Invalid token: %r" % token)
+ # END end handle tag
+ except (IndexError, AttributeError):
+ raise BadObject("Invalid Revision in %s" % rev)
+ # END exception handling
+ # END parse loop
+
+ # still no obj ? Its probably a simple name
+ if obj is None:
+ obj = name_to_object(repo, rev)
+ parsed_to = lr
+ # END handle simple name
+
+ if obj is None:
+ raise ValueError("Revision specifier could not be parsed: %s" % rev)
+
+ if parsed_to != lr:
+ raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to]))
+
+ return obj
+
+#} END utilities
+
+class PureReferencesMixin(ReferencesMixin):
+ """Pure-Python refparse implementation"""
+
+ re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$')
+ re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$')
+
+ #{ Configuration
+ # Types to use when instatiating references
+ TagReferenceCls = TagReference
+ HeadCls = Head
+ ReferenceCls = Reference
+ HEADCls = HEAD
+ CommitCls = Commit
+ #} END configuration
+
+ def resolve(self, name):
+ return self.resolve_object(name).binsha
+
+ def resolve_object(self, name):
+ return rev_parse(self, name)
+
+ @property
+ def references(self):
+ return self.ReferenceCls.list_items(self)
+
+ @property
+ def heads(self):
+ return self.HeadCls.list_items(self)
+
+ @property
+ def tags(self):
+ return self.TagReferenceCls.list_items(self)
+
+ def tag(self, name):
+ return self.TagReferenceCls(self, self.TagReferenceCls.to_full_path(name))
+
+ def commit(self, rev=None):
+ if rev is None:
+ return self.head.commit
+ else:
+ return self.resolve_object(str(rev)+"^0")
+ #END handle revision
+
+ def iter_trees(self, *args, **kwargs):
+ return ( c.tree for c in self.iter_commits(*args, **kwargs) )
+
+ def tree(self, rev=None):
+ if rev is None:
+ return self.head.commit.tree
+ else:
+ return self.resolve_object(str(rev)+"^{tree}")
+
+ def iter_commits(self, rev=None, paths='', **kwargs):
+ if rev is None:
+ rev = self.head.commit
+
+ return self.CommitCls.iter_items(self, rev, paths, **kwargs)
+
+
+ @property
+ def head(self):
+ return self.HEADCls(self,'HEAD')
+
+ def create_head(self, path, commit='HEAD', force=False, logmsg=None ):
+ return self.HeadCls.create(self, path, commit, force, logmsg)
+
+ def delete_head(self, *heads, **kwargs):
+ return self.HeadCls.delete(self, *heads, **kwargs)
+
+ def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs):
+ return self.TagReferenceCls.create(self, path, ref, message, force, **kwargs)
+
+ def delete_tag(self, *tags):
+ return self.TagReferenceCls.delete(self, *tags)
+
+
+ # compat
+ branches = heads
+ refs = references
diff --git a/git/db/py/submodule.py b/git/db/py/submodule.py
new file mode 100644
index 00000000..735f90b1
--- /dev/null
+++ b/git/db/py/submodule.py
@@ -0,0 +1,33 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+from git.objects.submodule.base import Submodule
+from git.objects.submodule.root import RootModule
+from git.db.interface import SubmoduleDB
+
+__all__ = ["PureSubmoduleDB"]
+
+class PureSubmoduleDB(SubmoduleDB):
+ """Pure python implementation of submodule functionality"""
+
+ @property
+ def submodules(self):
+ return Submodule.list_items(self)
+
+ def submodule(self, name):
+ try:
+ return self.submodules[name]
+ except IndexError:
+ raise ValueError("Didn't find submodule named %r" % name)
+ # END exception handling
+
+ def create_submodule(self, *args, **kwargs):
+ return Submodule.add(self, *args, **kwargs)
+
+ def iter_submodules(self, *args, **kwargs):
+ return RootModule(self).traverse(*args, **kwargs)
+
+ def submodule_update(self, *args, **kwargs):
+ return RootModule(self).update(*args, **kwargs)
+
diff --git a/git/db/py/transport.py b/git/db/py/transport.py
new file mode 100644
index 00000000..00d222b0
--- /dev/null
+++ b/git/db/py/transport.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
+#
+# This module is part of GitDB and is released under
+# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
+"""Implement a transport compatible database which sends objects using the git protocol"""
+
+from git.db.interface import ( TransportDB,
+ PushInfo,
+ FetchInfo,
+ RefSpec )
+
+from git.refs.remote import RemoteReference
+from git.remote import Remote
+
+
+__all__ = ["PureTransportDB"]
+
+class PurePushInfo(PushInfo):
+ """TODO: Implementation"""
+ __slots__ = tuple()
+
+
+
+class PureFetchInfo(FetchInfo):
+ """TODO"""
+ __slots__ = tuple()
+
+
+class PureTransportDB(TransportDB):
+ # The following variables need to be set by the derived class
+ #{Configuration
+ protocol = None
+ RemoteCls = Remote
+ #}end configuraiton
+
+ #{ Interface
+
+ def fetch(self, url, refspecs, progress=None, **kwargs):
+ raise NotImplementedError()
+
+ def push(self, url, refspecs, progress=None, **kwargs):
+ raise NotImplementedError()
+
+ @property
+ def remotes(self):
+ return self.RemoteCls.list_items(self)
+
+ def remote(self, name='origin'):
+ return self.remotes[name]
+
+ def create_remote(self, name, url, **kwargs):
+ return self.RemoteCls.create(self, name, url, **kwargs)
+
+ def delete_remote(self, remote):
+ return self.RemoteCls.remove(self, remote)
+
+ #}end interface
+