From 4177eefd7bdaea96a529b00ba9cf751924ede202 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 5 May 2011 19:43:22 +0200 Subject: Added all code from gitdb to gitpython. Next is to make it generally work. Then the tests will need some work --- git/db/py/__init__.py | 13 ++ git/db/py/base.py | 351 +++++++++++++++++++++++++++++++++++++++++++++++++ git/db/py/git.py | 113 ++++++++++++++++ git/db/py/loose.py | 262 ++++++++++++++++++++++++++++++++++++ git/db/py/mem.py | 113 ++++++++++++++++ git/db/py/pack.py | 212 +++++++++++++++++++++++++++++ git/db/py/ref.py | 77 +++++++++++ git/db/py/resolve.py | 297 +++++++++++++++++++++++++++++++++++++++++ git/db/py/transport.py | 89 +++++++++++++ 9 files changed, 1527 insertions(+) create mode 100644 git/db/py/__init__.py create mode 100644 git/db/py/base.py create mode 100644 git/db/py/git.py create mode 100644 git/db/py/loose.py create mode 100644 git/db/py/mem.py create mode 100644 git/db/py/pack.py create mode 100644 git/db/py/ref.py create mode 100644 git/db/py/resolve.py create mode 100644 git/db/py/transport.py (limited to 'git/db/py') diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py new file mode 100644 index 00000000..046c699d --- /dev/null +++ b/git/db/py/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php + +from base import * +from loose import * +from mem import * +from pack import * +from git import * +from ref import * +from resolve import * +from transport import * diff --git a/git/db/py/base.py b/git/db/py/base.py new file mode 100644 index 00000000..c378b10e --- /dev/null +++ b/git/db/py/base.py @@ -0,0 +1,351 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains basic implementations for the interface building blocks""" + +from gitdb.db.interface import * + +from gitdb.util import ( + pool, + join, + normpath, + abspath, + dirname, + LazyMixin, + hex_to_bin, + bin_to_hex, + expandvars, + expanduser, + exists, + is_git_dir + ) + +from gitdb.config import GitConfigParser +from gitdb.exc import ( + BadObject, + AmbiguousObjectName, + InvalidDBRoot + ) + +from async import ChannelThreadTask + +from itertools import chain +import sys +import os + + +__all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', + 'PureConfigurationMixin', 'PureRepositoryPathsMixin') + + +class PureObjectDBR(ObjectDBR): + + #{ Query Interface + + def has_object_async(self, reader): + task = ChannelThreadTask(reader, str(self.has_object_async), lambda sha: (sha, self.has_object(sha))) + return pool.add_task(task) + + def info_async(self, reader): + task = ChannelThreadTask(reader, str(self.info_async), self.info) + return pool.add_task(task) + + def stream_async(self, reader): + # base implementation just uses the stream method repeatedly + task = ChannelThreadTask(reader, str(self.stream_async), self.stream) + return pool.add_task(task) + + def partial_to_complete_sha_hex(self, partial_hexsha): + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + return self.partial_to_complete_sha(partial_binsha, len(partial_hexsha)) + + #} END query interface + + +class PureObjectDBW(ObjectDBW): + + def __init__(self, *args, **kwargs): + super(PureObjectDBW, self).__init__(*args, **kwargs) + self._ostream = None + + #{ Edit Interface + def set_ostream(self, stream): + cstream = self._ostream + self._ostream = stream + return cstream + + def ostream(self): + return self._ostream + + def store_async(self, reader): + task = ChannelThreadTask(reader, str(self.store_async), self.store) + return pool.add_task(task) + + #} END edit interface + + +class PureRootPathDB(RootPathDB): + + def __init__(self, root_path): + super(PureRootPathDB, self).__init__(root_path) + self._root_path = root_path + + + #{ Interface + def root_path(self): + return self._root_path + + def db_path(self, rela_path): + return join(self._root_path, rela_path) + #} END interface + + +def _databases_recursive(database, output): + """Fill output list with database from db, in order. Deals with Loose, Packed + and compound databases.""" + if isinstance(database, CompoundDB): + compounds = list() + dbs = database.databases() + output.extend(db for db in dbs if not isinstance(db, CompoundDB)) + for cdb in (db for db in dbs if isinstance(db, CompoundDB)): + _databases_recursive(cdb, output) + else: + output.append(database) + # END handle database type + + +class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + elif attr == '_db_cache': + self._db_cache = dict() + else: + super(PureCompoundDB, self)._set_cache_(attr) + + def _db_query(self, sha): + """:return: database containing the given 20 byte sha + :raise BadObject:""" + # most databases use binary representations, prevent converting + # it everytime a database is being queried + try: + return self._db_cache[sha] + except KeyError: + pass + # END first level cache + + for db in self._dbs: + if db.has_object(sha): + self._db_cache[sha] = db + return db + # END for each database + raise BadObject(sha) + + #{ PureObjectDBR interface + + def has_object(self, sha): + try: + self._db_query(sha) + return True + except BadObject: + return False + # END handle exceptions + + def info(self, sha): + return self._db_query(sha).info(sha) + + def stream(self, sha): + return self._db_query(sha).stream(sha) + + def size(self): + return reduce(lambda x,y: x+y, (db.size() for db in self._dbs), 0) + + def sha_iter(self): + return chain(*(db.sha_iter() for db in self._dbs)) + + #} END object DBR Interface + + #{ Interface + + def databases(self): + return tuple(self._dbs) + + def update_cache(self, force=False): + # something might have changed, clear everything + self._db_cache.clear() + stat = False + for db in self._dbs: + if isinstance(db, CachingDB): + stat |= db.update_cache(force) + # END if is caching db + # END for each database to update + return stat + + def partial_to_complete_sha_hex(self, partial_hexsha): + databases = self.databases() + + len_partial_hexsha = len(partial_hexsha) + if len_partial_hexsha % 2 != 0: + partial_binsha = hex_to_bin(partial_hexsha + "0") + else: + partial_binsha = hex_to_bin(partial_hexsha) + # END assure successful binary conversion + + candidate = None + for db in self._dbs: + full_bin_sha = None + try: + if hasattr(db, 'partial_to_complete_sha_hex'): + full_bin_sha = db.partial_to_complete_sha_hex(partial_hexsha) + else: + full_bin_sha = db.partial_to_complete_sha(partial_binsha, len_partial_hexsha) + # END handle database type + except BadObject: + continue + # END ignore bad objects + if full_bin_sha: + if candidate and candidate != full_bin_sha: + raise AmbiguousObjectName(partial_hexsha) + candidate = full_bin_sha + # END handle candidate + # END for each db + if not candidate: + raise BadObject(partial_binsha) + return candidate + + def partial_to_complete_sha(self, partial_binsha, hex_len): + """Simple adaptor to feed into our implementation""" + return self.partial_to_complete_sha_hex(bin_to_hex(partial_binsha)[:hex_len]) + #} END interface + + +class PureRepositoryPathsMixin(RepositoryPathsMixin): + # slots has no effect here, its just to keep track of used attrs + __slots__ = ("_git_path", '_bare') + + #{ Configuration + repo_dir = '.git' + objs_dir = 'objects' + #} END configuration + + #{ Subclass Interface + def _initialize(self, path): + epath = abspath(expandvars(expanduser(path or os.getcwd()))) + + if not exists(epath): + raise InvalidDBRoot(epath) + #END check file + + self._working_tree_dir = None + self._git_path = None + curpath = epath + + # walk up the path to find the .git dir + while curpath: + if is_git_dir(curpath): + self._git_path = curpath + self._working_tree_dir = os.path.dirname(curpath) + break + gitpath = join(curpath, self.repo_dir) + if is_git_dir(gitpath): + self._git_path = gitpath + self._working_tree_dir = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + # END while curpath + + if self._git_path is None: + raise InvalidDBRoot(epath) + # END path not found + + self._bare = self._git_path.endswith(self.repo_dir) + if hasattr(self, 'config_reader'): + try: + self._bare = self.config_reader("repository").getboolean('core','bare') + except Exception: + # lets not assume the option exists, although it should + pass + #END check bare flag + + + #} end subclass interface + + #{ Interface + + def is_bare(self): + return self._bare + + def git_path(self): + return self._git_path + + def working_tree_path(self): + if self.is_bare(): + raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_path()) + #END assertion + return dirname(self.git_path()) + + def objects_path(self): + return join(self.git_path(), self.objs_dir) + + def working_dir(self): + if self.is_bare(): + return self.git_path() + else: + return self.working_tree_dir() + #END handle bare state + + #} END interface + + +class PureConfigurationMixin(ConfigurationMixin): + + #{ Configuration + system_config_file_name = "gitconfig" + repo_config_file_name = "config" + #} END + + def __init__(self, *args, **kwargs): + """Verify prereqs""" + assert hasattr(self, 'git_path') + + def _path_at_level(self, level ): + # we do not support an absolute path of the gitconfig on windows , + # use the global config instead + if sys.platform == "win32" and level == "system": + level = "global" + #END handle windows + + if level == "system": + return "/etc/%s" % self.system_config_file_name + elif level == "global": + return normpath(expanduser("~/.%s" % self.system_config_file_name)) + elif level == "repository": + return join(self.git_path(), self.repo_config_file_name) + #END handle level + + raise ValueError("Invalid configuration level: %r" % level) + + #{ Interface + + def config_reader(self, config_level=None): + files = None + if config_level is None: + files = [ self._path_at_level(f) for f in self.config_level ] + else: + files = [ self._path_at_level(config_level) ] + #END handle level + return GitConfigParser(files, read_only=True) + + def config_writer(self, config_level="repository"): + return GitConfigParser(self._path_at_level(config_level), read_only=False) + + #} END interface + diff --git a/git/db/py/git.py b/git/db/py/git.py new file mode 100644 index 00000000..bc148c6f --- /dev/null +++ b/git/db/py/git.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PureGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureCompoundDB, + PureObjectDBW, + PureRootPathDB, + PureRepositoryPathsMixin, + PureConfigurationMixin, + ) + +from resolve import PureReferencesMixin + +from loose import PureLooseObjectODB +from pack import PurePackedODB +from ref import PureReferenceDB + +from gitdb.util import ( + LazyMixin, + normpath, + join, + dirname + ) +from gitdb.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) +import os + +__all__ = ('PureGitODB', 'PureGitDB') + + +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): + """A git-style object-only database, which contains all objects in the 'objects' + subdirectory. + :note: The type needs to be initialized on the ./objects directory to function, + as it deals solely with object lookup. Use a PureGitDB type if you need + reference and push support.""" + # Configuration + PackDBCls = PurePackedODB + LooseDBCls = PureLooseObjectODB + PureReferenceDBCls = PureReferenceDB + + # Directories + packs_dir = 'pack' + loose_dir = '' + alternates_dir = os.path.join('info', 'alternates') + + def __init__(self, root_path): + """Initialize ourselves on a git ./objects directory""" + super(PureGitODB, self).__init__(root_path) + + def _set_cache_(self, attr): + if attr == '_dbs' or attr == '_loose_db': + self._dbs = list() + loose_db = None + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + (self.loose_dir, self.LooseDBCls), + (self.alternates_dir, self.PureReferenceDBCls)): + path = self.db_path(subpath) + if os.path.exists(path): + self._dbs.append(dbcls(path)) + if dbcls is self.LooseDBCls: + loose_db = self._dbs[-1] + # END remember loose db + # END check path exists + # END for each db type + + # should have at least one subdb + if not self._dbs: + raise InvalidDBRoot(self.root_path()) + # END handle error + + # we the first one should have the store method + assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + + # finally set the value + self._loose_db = loose_db + else: + super(PureGitODB, self)._set_cache_(attr) + # END handle attrs + + #{ PureObjectDBW interface + + def store(self, istream): + return self._loose_db.store(istream) + + def ostream(self): + return self._loose_db.ostream() + + def set_ostream(self, ostream): + return self._loose_db.set_ostream(ostream) + + #} END objectdbw interface + + +class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): + """Git like database with support for object lookup as well as reference resolution. + Our rootpath is set to the actual .git directory (bare on unbare). + + The root_path will be the git objects directory. Use git_path() to obtain the actual top-level + git directory.""" + #directories + + def __init__(self, root_path): + """Initialize ourselves on the .git directory, or the .git/objects directory.""" + PureRepositoryPathsMixin._initialize(self, root_path) + super(PureGitDB, self).__init__(self.objects_path()) + + + diff --git a/git/db/py/loose.py b/git/db/py/loose.py new file mode 100644 index 00000000..34e31da6 --- /dev/null +++ b/git/db/py/loose.py @@ -0,0 +1,262 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureRootPathDB, + PureObjectDBR, + PureObjectDBW + ) + + +from gitdb.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) + +from gitdb.stream import ( + DecompressMemMapReader, + FDCompressedSha1Writer, + FDStream, + Sha1Writer + ) + +from gitdb.base import ( + OStream, + OInfo + ) + +from gitdb.util import ( + file_contents_ro_filepath, + ENOENT, + hex_to_bin, + bin_to_hex, + exists, + chmod, + isdir, + isfile, + remove, + mkdir, + rename, + dirname, + basename, + join + ) + +from gitdb.fun import ( + chunk_size, + loose_object_header_info, + write_object, + stream_copy + ) + +import tempfile +import mmap +import sys +import os + + +__all__ = ( 'PureLooseObjectODB', ) + + +class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): + """A database which operates on loose object files""" + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = chunk_size + + # On windows we need to keep it writable, otherwise it cannot be removed + # either + new_objects_mode = 0444 + if os.name == 'nt': + new_objects_mode = 0644 + + + def __init__(self, root_path): + super(PureLooseObjectODB, self).__init__(root_path) + self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = getattr(os, 'O_NOATIME', 0) + + #{ Interface + def object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + def readable_db_object_path(self, hexsha): + """ + :return: readable object path to the object identified by hexsha + :raise BadObject: If the object file does not exist""" + try: + return self._hexsha_to_file[hexsha] + except KeyError: + pass + # END ignore cache misses + + # try filesystem + path = self.db_path(self.object_path(hexsha)) + if exists(path): + self._hexsha_to_file[hexsha] = path + return path + # END handle cache + raise BadObject(hexsha) + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate + + #} END interface + + def _map_loose_object(self, sha): + """ + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(bin_to_hex(sha))) + try: + return file_contents_ro_filepath(db_path, flags=self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + return file_contents_ro_filepath(db_path) + except OSError: + raise BadObject(sha) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(sha) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(PureLooseObjectODB, self).set_ostream(stream) + + def info(self, sha): + m = self._map_loose_object(sha) + try: + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) + finally: + m.close() + # END assure release of system resources + + def stream(self, sha): + m = self._map_loose_object(sha) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) + + def has_object(self, sha): + try: + self.readable_db_object_path(bin_to_hex(sha)) + return True + except BadObject: + return False + # END check existance + + def store(self, istream): + """note: The sha we produce will be hex by nature""" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + + if istream.binsha is None: + writer = FDCompressedSha1Writer(fd) + else: + writer = FDStream(fd) + # END handle direct stream copies + # END handle custom writer + + try: + try: + if istream.binsha is not None: + # copy as much as possible, the actual uncompressed item size might + # be smaller than the compressed version + stream_copy(istream.read, writer.write, sys.maxint, self.stream_chunk_size) + else: + # write object with header, we have to make a new one + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + # END handle direct stream copies + finally: + if tmp_path: + writer.close() + # END assure target stream is closed + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + + hexsha = None + if istream.binsha: + hexsha = istream.hexsha + else: + hexsha = writer.sha(as_hex=True) + # END handle sha + + if tmp_path: + obj_path = self.db_path(self.object_path(hexsha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + # rename onto existing doesn't work on windows + if os.name == 'nt' and isfile(obj_path): + remove(obj_path) + # END handle win322 + rename(tmp_path, obj_path) + + # make sure its readable for all ! It started out as rw-- tmp file + # but needs to be rwrr + chmod(obj_path, self.new_objects_mode) + # END handle dry_run + + istream.binsha = hex_to_bin(hexsha) + return istream + + def sha_iter(self): + # find all files which look like an object, extract sha from there + for root, dirs, files in os.walk(self.root_path()): + root_base = basename(root) + if len(root_base) != 2: + continue + + for f in files: + if len(f) != 38: + continue + yield hex_to_bin(root_base + f) + # END for each file + # END for each walk iteration + + def size(self): + return len(tuple(self.sha_iter())) + diff --git a/git/db/py/mem.py b/git/db/py/mem.py new file mode 100644 index 00000000..ba922e96 --- /dev/null +++ b/git/db/py/mem.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Contains the MemoryDatabase implementation""" +from loose import PureLooseObjectODB +from base import ( + PureObjectDBR, + PureObjectDBW + ) + +from gitdb.base import ( + OStream, + IStream, + ) + +from gitdb.exc import ( + BadObject, + UnsupportedOperation + ) +from gitdb.stream import ( + ZippedStoreShaWriter, + DecompressMemMapReader, + ) + +from cStringIO import StringIO + +__all__ = ("PureMemoryDB", ) + +class PureMemoryDB(PureObjectDBR, PureObjectDBW): + """A memory database stores everything to memory, providing fast IO and object + retrieval. It should be used to buffer results and obtain SHAs before writing + it to the actual physical storage, as it allows to query whether object already + exists in the target storage before introducing actual IO + + :note: memory is currently not threadsafe, hence the async methods cannot be used + for storing""" + + def __init__(self): + super(PureMemoryDB, self).__init__() + self._db = PureLooseObjectODB("path/doesnt/matter") + + # maps 20 byte shas to their OStream objects + self._cache = dict() + + def set_ostream(self, stream): + raise UnsupportedOperation("PureMemoryDB's always stream into memory") + + def store(self, istream): + zstream = ZippedStoreShaWriter() + self._db.set_ostream(zstream) + + istream = self._db.store(istream) + zstream.close() # close to flush + zstream.seek(0) + + # don't provide a size, the stream is written in object format, hence the + # header needs decompression + decomp_stream = DecompressMemMapReader(zstream.getvalue(), close_on_deletion=False) + self._cache[istream.binsha] = OStream(istream.binsha, istream.type, istream.size, decomp_stream) + + return istream + + def store_async(self, reader): + raise UnsupportedOperation("PureMemoryDBs cannot currently be used for async write access") + + def has_object(self, sha): + return sha in self._cache + + def info(self, sha): + # we always return streams, which are infos as well + return self.stream(sha) + + def stream(self, sha): + try: + ostream = self._cache[sha] + # rewind stream for the next one to read + ostream.stream.seek(0) + return ostream + except KeyError: + raise BadObject(sha) + # END exception handling + + def size(self): + return len(self._cache) + + def sha_iter(self): + return self._cache.iterkeys() + + + #{ Interface + def stream_copy(self, sha_iter, odb): + """Copy the streams as identified by sha's yielded by sha_iter into the given odb + The streams will be copied directly + :note: the object will only be written if it did not exist in the target db + :return: amount of streams actually copied into odb. If smaller than the amount + of input shas, one or more objects did already exist in odb""" + count = 0 + for sha in sha_iter: + if odb.has_object(sha): + continue + # END check object existance + + ostream = self.stream(sha) + # compressed data including header + sio = StringIO(ostream.stream.data()) + istream = IStream(ostream.type, ostream.size, sio, sha) + + odb.store(istream) + count += 1 + # END for each sha + return count + #} END interface diff --git a/git/db/py/pack.py b/git/db/py/pack.py new file mode 100644 index 00000000..1d0e9bfc --- /dev/null +++ b/git/db/py/pack.py @@ -0,0 +1,212 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Module containing a database to deal with packs""" +from gitdb.db import CachingDB +from base import ( + PureRootPathDB, + PureObjectDBR + ) + +from gitdb.util import LazyMixin + +from gitdb.exc import ( + BadObject, + UnsupportedOperation, + AmbiguousObjectName + ) + +from gitdb.pack import PackEntity + +import os +import glob + +__all__ = ('PurePackedODB', ) + +#{ Utilities + + +class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin): + """A database operating on a set of object packs""" + + # the type to use when instantiating a pack entity + PackEntityCls = PackEntity + + # sort the priority list every N queries + # Higher values are better, performance tests don't show this has + # any effect, but it should have one + _sort_interval = 500 + + def __init__(self, root_path): + super(PurePackedODB, self).__init__(root_path) + # list of lists with three items: + # * hits - number of times the pack was hit with a request + # * entity - Pack entity instance + # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query + # self._entities = list() # lazy loaded list + self._hit_count = 0 # amount of hits + self._st_mtime = 0 # last modification data of our root path + + def _set_cache_(self, attr): + if attr == '_entities': + self._entities = list() + self.update_cache(force=True) + # END handle entities initialization + + def _sort_entities(self): + self._entities.sort(key=lambda l: l[0], reverse=True) + + def _pack_info(self, sha): + """:return: tuple(entity, index) for an item at the given sha + :param sha: 20 or 40 byte sha + :raise BadObject: + :note: This method is not thread-safe, but may be hit in multi-threaded + operation. The worst thing that can happen though is a counter that + was not incremented, or the list being in wrong order. So we safe + the time for locking here, lets see how that goes""" + # presort ? + if self._hit_count % self._sort_interval == 0: + self._sort_entities() + # END update sorting + + for item in self._entities: + index = item[2](sha) + if index is not None: + item[0] += 1 # one hit for you + self._hit_count += 1 # general hit count + return (item[1], index) + # END index found in pack + # END for each item + + # no hit, see whether we have to update packs + # NOTE: considering packs don't change very often, we safe this call + # and leave it to the super-caller to trigger that + raise BadObject(sha) + + #{ Object DB Read + + def has_object(self, sha): + try: + self._pack_info(sha) + return True + except BadObject: + return False + # END exception handling + + def info(self, sha): + entity, index = self._pack_info(sha) + return entity.info_at_index(index) + + def stream(self, sha): + entity, index = self._pack_info(sha) + return entity.stream_at_index(index) + + def sha_iter(self): + sha_list = list() + for entity in self.entities(): + index = entity.index() + sha_by_index = index.sha + for index in xrange(index.size()): + yield sha_by_index(index) + # END for each index + # END for each entity + + def size(self): + sizes = [item[1].index().size() for item in self._entities] + return reduce(lambda x,y: x+y, sizes, 0) + + #} END object db read + + #{ object db write + + def store(self, istream): + """Storing individual objects is not feasible as a pack is designed to + hold multiple objects. Writing or rewriting packs for single objects is + inefficient""" + raise UnsupportedOperation() + + def store_async(self, reader): + # TODO: add PureObjectDBRW before implementing this + raise NotImplementedError() + + #} END object db write + + + #{ Interface + + def update_cache(self, force=False): + """ + Update our cache with the acutally existing packs on disk. Add new ones, + and remove deleted ones. We keep the unchanged ones + + :param force: If True, the cache will be updated even though the directory + does not appear to have changed according to its modification timestamp. + :return: True if the packs have been updated so there is new information, + False if there was no change to the pack database""" + stat = os.stat(self.root_path()) + if not force and stat.st_mtime <= self._st_mtime: + return False + # END abort early on no change + self._st_mtime = stat.st_mtime + + # packs are supposed to be prefixed with pack- by git-convention + # get all pack files, figure out what changed + pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) + our_pack_files = set(item[1].pack().path() for item in self._entities) + + # new packs + for pack_file in (pack_files - our_pack_files): + # init the hit-counter/priority with the size, a good measure for hit- + # probability. Its implemented so that only 12 bytes will be read + entity = self.PackEntityCls(pack_file) + self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) + # END for each new packfile + + # removed packs + for pack_file in (our_pack_files - pack_files): + del_index = -1 + for i, item in enumerate(self._entities): + if item[1].pack().path() == pack_file: + del_index = i + break + # END found index + # END for each entity + assert del_index != -1 + del(self._entities[del_index]) + # END for each removed pack + + # reinitialize prioritiess + self._sort_entities() + return True + + def entities(self): + """:return: list of pack entities operated upon by this database""" + return [ item[1] for item in self._entities ] + + def partial_to_complete_sha(self, partial_binsha, canonical_length): + """:return: 20 byte sha as inferred by the given partial binary sha + :param partial_binsha: binary sha with less than 20 bytes + :param canonical_length: length of the corresponding canonical representation. + It is required as binary sha's cannot display whether the original hex sha + had an odd or even number of characters + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for item in self._entities: + item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length) + if item_index is not None: + sha = item[1].index().sha(item_index) + if candidate and candidate != sha: + raise AmbiguousObjectName(partial_binsha) + candidate = sha + # END handle full sha could be found + # END for each entity + + if candidate: + return candidate + + # still not found ? + raise BadObject(partial_binsha) + + #} END interface diff --git a/git/db/py/ref.py b/git/db/py/ref.py new file mode 100644 index 00000000..951f0437 --- /dev/null +++ b/git/db/py/ref.py @@ -0,0 +1,77 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import PureCompoundDB + +import os +__all__ = ('PureReferenceDB', ) + +class PureReferenceDB(PureCompoundDB): + """A database consisting of database referred to in a file""" + + # Configuration + # Specifies the object database to use for the paths found in the alternates + # file. If None, it defaults to the PureGitODB + ObjectDBCls = None + + def __init__(self, ref_file): + super(PureReferenceDB, self).__init__() + self._ref_file = ref_file + + def _set_cache_(self, attr): + if attr == '_dbs': + self._dbs = list() + self._update_dbs_from_ref_file() + else: + super(PureReferenceDB, self)._set_cache_(attr) + # END handle attrs + + def _update_dbs_from_ref_file(self): + dbcls = self.ObjectDBCls + if dbcls is None: + # late import + from git import PureGitODB + dbcls = PureGitODB + # END get db type + + # try to get as many as possible, don't fail if some are unavailable + ref_paths = list() + try: + ref_paths = [l.strip() for l in open(self._ref_file, 'r').readlines()] + except (OSError, IOError): + pass + # END handle alternates + + ref_paths_set = set(ref_paths) + cur_ref_paths_set = set(db.root_path() for db in self._dbs) + + # remove existing + for path in (cur_ref_paths_set - ref_paths_set): + for i, db in enumerate(self._dbs[:]): + if db.root_path() == path: + del(self._dbs[i]) + continue + # END del matching db + # END for each path to remove + + # add new + # sort them to maintain order + added_paths = sorted(ref_paths_set - cur_ref_paths_set, key=lambda p: ref_paths.index(p)) + for path in added_paths: + try: + db = dbcls(path) + # force an update to verify path + if isinstance(db, PureCompoundDB): + db.databases() + # END verification + self._dbs.append(db) + except Exception, e: + # ignore invalid paths or issues + pass + # END for each path to add + + def update_cache(self, force=False): + # re-read alternates and update databases + self._update_dbs_from_ref_file() + return super(PureReferenceDB, self).update_cache(force) diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py new file mode 100644 index 00000000..86c1e594 --- /dev/null +++ b/git/db/py/resolve.py @@ -0,0 +1,297 @@ +"""Module with an implementation for refspec parsing. It is the pure-python +version assuming compatible interface for reference and object types""" + +from gitdb.db.interface import ReferencesMixin +from gitdb.exc import BadObject +from gitdb.ref import SymbolicReference +from gitdb.object.base import Object +from gitdb.util import ( + join, + isdir, + isfile, + hex_to_bin, + bin_to_hex, + is_git_dir + ) +from string import digits +import os +import re + +__all__ = ["PureReferencesMixin"] + +#{ Utilities + +def short_to_long(odb, hexsha): + """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha + or None if no candidate could be found. + :param hexsha: hexsha with less than 40 byte""" + try: + return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) + except BadObject: + return None + # END exception handling + + +def name_to_object(repo, name, return_ref=False): + """ + :return: object specified by the given name, hexshas ( short and long ) + as well as references are supported + :param return_ref: if name specifies a reference, we will return the reference + instead of the object. Otherwise it will raise BadObject + """ + hexsha = None + + # is it a hexsha ? Try the most common ones, which is 7 to 40 + if repo.re_hexsha_shortened.match(name): + if len(name) != 40: + # find long sha for short sha + hexsha = short_to_long(repo.odb, name) + else: + hexsha = name + # END handle short shas + #END find sha if it matches + + # if we couldn't find an object for what seemed to be a short hexsha + # try to find it as reference anyway, it could be named 'aaa' for instance + if hexsha is None: + for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): + try: + hexsha = SymbolicReference.dereference_recursive(repo, base % name) + if return_ref: + return SymbolicReference(repo, base % name) + #END handle symbolic ref + break + except ValueError: + pass + # END for each base + # END handle hexsha + + # didn't find any ref, this is an error + if return_ref: + raise BadObject("Couldn't find reference named %r" % name) + #END handle return ref + + # tried everything ? fail + if hexsha is None: + raise BadObject(name) + # END assert hexsha was found + + return Object.new_from_sha(repo, hex_to_bin(hexsha)) + +def deref_tag(tag): + """Recursively dereference a tag and return the resulting object""" + while True: + try: + tag = tag.object + except AttributeError: + break + # END dereference tag + return tag + +def to_commit(obj): + """Convert the given object to a commit if possible and return it""" + if obj.type == 'tag': + obj = deref_tag(obj) + + if obj.type != "commit": + raise ValueError("Cannot convert object %r to type commit" % obj) + # END verify type + return obj + +def rev_parse(repo, rev): + """ + :return: Object at the given revision, either Commit, Tag, Tree or Blob + :param rev: git-rev-parse compatible revision specification, please see + http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html + for details + :note: Currently there is no access to the rev-log, rev-specs may only contain + topological tokens such ~ and ^. + :raise BadObject: if the given revision could not be found + :raise ValueError: If rev couldn't be parsed + :raise IndexError: If invalid reflog index is specified""" + + # colon search mode ? + if rev.startswith(':/'): + # colon search mode + raise NotImplementedError("commit by message search ( regex )") + # END handle search + + obj = None + ref = None + output_type = "commit" + start = 0 + parsed_to = 0 + lr = len(rev) + while start < lr: + if rev[start] not in "^~:@": + start += 1 + continue + # END handle start + + token = rev[start] + + if obj is None: + # token is a rev name + if start == 0: + ref = repo.head.ref + else: + if token == '@': + ref = name_to_object(repo, rev[:start], return_ref=True) + else: + obj = name_to_object(repo, rev[:start]) + #END handle token + #END handle refname + + if ref is not None: + obj = ref.commit + #END handle ref + # END initialize obj on first token + + + start += 1 + + # try to parse {type} + if start < lr and rev[start] == '{': + end = rev.find('}', start) + if end == -1: + raise ValueError("Missing closing brace to define type in %s" % rev) + output_type = rev[start+1:end] # exclude brace + + # handle type + if output_type == 'commit': + pass # default + elif output_type == 'tree': + try: + obj = to_commit(obj).tree + except (AttributeError, ValueError): + pass # error raised later + # END exception handling + elif output_type in ('', 'blob'): + if obj.type == 'tag': + obj = deref_tag(obj) + else: + # cannot do anything for non-tags + pass + # END handle tag + elif token == '@': + # try single int + assert ref is not None, "Require Reference to access reflog" + revlog_index = None + try: + # transform reversed index into the format of our revlog + revlog_index = -(int(output_type)+1) + except ValueError: + # TODO: Try to parse the other date options, using parse_date + # maybe + raise NotImplementedError("Support for additional @{...} modes not implemented") + #END handle revlog index + + try: + entry = ref.log_entry(revlog_index) + except IndexError: + raise IndexError("Invalid revlog index: %i" % revlog_index) + #END handle index out of bound + + obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) + + # make it pass the following checks + output_type = None + else: + raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) + # END handle output type + + # empty output types don't require any specific type, its just about dereferencing tags + if output_type and obj.type != output_type: + raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) + # END verify ouput type + + start = end+1 # skip brace + parsed_to = start + continue + # END parse type + + # try to parse a number + num = 0 + if token != ":": + found_digit = False + while start < lr: + if rev[start] in digits: + num = num * 10 + int(rev[start]) + start += 1 + found_digit = True + else: + break + # END handle number + # END number parse loop + + # no explicit number given, 1 is the default + # It could be 0 though + if not found_digit: + num = 1 + # END set default num + # END number parsing only if non-blob mode + + + parsed_to = start + # handle hiererarchy walk + try: + if token == "~": + obj = to_commit(obj) + for item in xrange(num): + obj = obj.parents[0] + # END for each history item to walk + elif token == "^": + obj = to_commit(obj) + # must be n'th parent + if num: + obj = obj.parents[num-1] + elif token == ":": + if obj.type != "tree": + obj = obj.tree + # END get tree type + obj = obj[rev[start:]] + parsed_to = lr + else: + raise ValueError("Invalid token: %r" % token) + # END end handle tag + except (IndexError, AttributeError): + raise BadObject("Invalid Revision in %s" % rev) + # END exception handling + # END parse loop + + # still no obj ? Its probably a simple name + if obj is None: + obj = name_to_object(repo, rev) + parsed_to = lr + # END handle simple name + + if obj is None: + raise ValueError("Revision specifier could not be parsed: %s" % rev) + + if parsed_to != lr: + raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) + + return obj + +#} END utilities + +class PureReferencesMixin(ReferencesMixin): + """Pure-Python refparse implementation""" + + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + + def resolve(self, name): + return rev_parse(self, name) + + @property + def references(self): + raise NotImplementedError() + + @property + def heads(self): + raise NotImplementedError() + + @property + def tags(self): + raise NotImplementedError() diff --git a/git/db/py/transport.py b/git/db/py/transport.py new file mode 100644 index 00000000..783fb8d5 --- /dev/null +++ b/git/db/py/transport.py @@ -0,0 +1,89 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +"""Implement a transport compatible database which sends objects using the git protocol""" + +from gitdb.db.interface import ( TransportDB, + PushInfo, + FetchInfo, + RefSpec ) + +__all__ = ["PureTransportDB"] + +class PurePushInfo(PushInfo): + """TODO: Implementation""" + __slots__ = tuple() + + + +class PureFetchInfo(FetchInfo): + """TODO""" + __slots__ = tuple() + + +class PureTransportDB(TransportDB): + """A database which allows to transport objects from and to different locations + which are specified by urls (location) and refspecs (what to transport, + see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). + + At the beginning of a transport operation, it will be determined which objects + have to be sent (either by this or by the other side). + + Afterwards a pack with the required objects is sent (or received). If there is + nothing to send, the pack will be empty. + + The communication itself if implemented using a protocol instance which deals + with the actual formatting of the lines sent. + + As refspecs involve symbolic names for references to be handled, we require + RefParse functionality. How this is done is up to the actual implementation.""" + # The following variables need to be set by the derived class + #{Configuration + protocol = None + #}end configuraiton + + #{ Interface + + def fetch(self, url, refspecs, progress=None, **kwargs): + """Fetch the objects defined by the given refspec from the given url. + :param url: url identifying the source of the objects. It may also be + a symbol from which the respective url can be resolved, like the + name of the remote. The implementation should allow objects as input + as well, these are assumed to resovle to a meaningful string though. + :param refspecs: iterable of reference specifiers or RefSpec instance, + identifying the references to be fetch from the remote. + :param progress: callable which receives progress messages for user consumption + :param kwargs: may be used for additional parameters that the actual implementation could + find useful. + :return: List of PureFetchInfo compatible instances which provide information about what + was previously fetched, in the order of the input refspecs. + :note: even if the operation fails, one of the returned PureFetchInfo instances + may still contain errors or failures in only part of the refspecs. + :raise: if any issue occours during the transport or if the url is not + supported by the protocol. + """ + raise NotImplementedError() + + def push(self, url, refspecs, progress=None, **kwargs): + """Transport the objects identified by the given refspec to the remote + at the given url. + :param url: Decribes the location which is to receive the objects + see fetch() for more details + :param refspecs: iterable of refspecs strings or RefSpec instances + to identify the objects to push + :param progress: see fetch() + :param kwargs: additional arguments which may be provided by the caller + as they may be useful to the actual implementation + :todo: what to return ? + :raise: if any issue arises during transport or if the url cannot be handled""" + raise NotImplementedError() + + @property + def remotes(self): + """:return: An IterableList of Remote objects allowing to access and manipulate remotes + :note: Remote objects can also be used for the actual push or fetch operation""" + raise NotImplementedError() + + #}end interface + -- cgit v1.2.3 From acf5e6ea64a2f24117f1d419c208ed1c38c43690 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 15:03:14 +0200 Subject: replaced all gitdb strings with git --- git/db/py/base.py | 8 ++++---- git/db/py/git.py | 4 ++-- git/db/py/loose.py | 10 +++++----- git/db/py/mem.py | 6 +++--- git/db/py/pack.py | 8 ++++---- git/db/py/resolve.py | 10 +++++----- git/db/py/transport.py | 2 +- 7 files changed, 24 insertions(+), 24 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/base.py b/git/db/py/base.py index c378b10e..28bbf258 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -4,9 +4,9 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains basic implementations for the interface building blocks""" -from gitdb.db.interface import * +from git.db.interface import * -from gitdb.util import ( +from git.util import ( pool, join, normpath, @@ -21,8 +21,8 @@ from gitdb.util import ( is_git_dir ) -from gitdb.config import GitConfigParser -from gitdb.exc import ( +from git.config import GitConfigParser +from git.exc import ( BadObject, AmbiguousObjectName, InvalidDBRoot diff --git a/git/db/py/git.py b/git/db/py/git.py index bc148c6f..1f929e31 100644 --- a/git/db/py/git.py +++ b/git/db/py/git.py @@ -16,13 +16,13 @@ from loose import PureLooseObjectODB from pack import PurePackedODB from ref import PureReferenceDB -from gitdb.util import ( +from git.util import ( LazyMixin, normpath, join, dirname ) -from gitdb.exc import ( +from git.exc import ( InvalidDBRoot, BadObject, AmbiguousObjectName diff --git a/git/db/py/loose.py b/git/db/py/loose.py index 34e31da6..56915f18 100644 --- a/git/db/py/loose.py +++ b/git/db/py/loose.py @@ -9,25 +9,25 @@ from base import ( ) -from gitdb.exc import ( +from git.exc import ( InvalidDBRoot, BadObject, AmbiguousObjectName ) -from gitdb.stream import ( +from git.stream import ( DecompressMemMapReader, FDCompressedSha1Writer, FDStream, Sha1Writer ) -from gitdb.base import ( +from git.base import ( OStream, OInfo ) -from gitdb.util import ( +from git.util import ( file_contents_ro_filepath, ENOENT, hex_to_bin, @@ -44,7 +44,7 @@ from gitdb.util import ( join ) -from gitdb.fun import ( +from git.fun import ( chunk_size, loose_object_header_info, write_object, diff --git a/git/db/py/mem.py b/git/db/py/mem.py index ba922e96..5851aebc 100644 --- a/git/db/py/mem.py +++ b/git/db/py/mem.py @@ -9,16 +9,16 @@ from base import ( PureObjectDBW ) -from gitdb.base import ( +from git.base import ( OStream, IStream, ) -from gitdb.exc import ( +from git.exc import ( BadObject, UnsupportedOperation ) -from gitdb.stream import ( +from git.stream import ( ZippedStoreShaWriter, DecompressMemMapReader, ) diff --git a/git/db/py/pack.py b/git/db/py/pack.py index 1d0e9bfc..75b75468 100644 --- a/git/db/py/pack.py +++ b/git/db/py/pack.py @@ -3,21 +3,21 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing a database to deal with packs""" -from gitdb.db import CachingDB +from git.db import CachingDB from base import ( PureRootPathDB, PureObjectDBR ) -from gitdb.util import LazyMixin +from git.util import LazyMixin -from gitdb.exc import ( +from git.exc import ( BadObject, UnsupportedOperation, AmbiguousObjectName ) -from gitdb.pack import PackEntity +from git.pack import PackEntity import os import glob diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 86c1e594..7c03bcd1 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -1,11 +1,11 @@ """Module with an implementation for refspec parsing. It is the pure-python version assuming compatible interface for reference and object types""" -from gitdb.db.interface import ReferencesMixin -from gitdb.exc import BadObject -from gitdb.ref import SymbolicReference -from gitdb.object.base import Object -from gitdb.util import ( +from git.db.interface import ReferencesMixin +from git.exc import BadObject +from git.ref import SymbolicReference +from git.object.base import Object +from git.util import ( join, isdir, isfile, diff --git a/git/db/py/transport.py b/git/db/py/transport.py index 783fb8d5..f8edfb23 100644 --- a/git/db/py/transport.py +++ b/git/db/py/transport.py @@ -4,7 +4,7 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Implement a transport compatible database which sends objects using the git protocol""" -from gitdb.db.interface import ( TransportDB, +from git.db.interface import ( TransportDB, PushInfo, FetchInfo, RefSpec ) -- cgit v1.2.3 From 7ae36c3e019a5cc16924d1b6007774bfb625036f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 6 May 2011 18:53:59 +0200 Subject: Started to fix imports - tests still have no chance to work as database changed drastically. Now the actual work begins --- git/db/py/__init__.py | 9 +--- git/db/py/base.py | 1 - git/db/py/complex.py | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ git/db/py/git.py | 113 -------------------------------------------------- git/db/py/mem.py | 3 +- git/db/py/ref.py | 2 +- git/db/py/resolve.py | 4 +- 7 files changed, 118 insertions(+), 127 deletions(-) create mode 100644 git/db/py/complex.py delete mode 100644 git/db/py/git.py (limited to 'git/db/py') diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py index 046c699d..73cc2bdf 100644 --- a/git/db/py/__init__.py +++ b/git/db/py/__init__.py @@ -3,11 +3,4 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import * -from loose import * -from mem import * -from pack import * -from git import * -from ref import * -from resolve import * -from transport import * +from complex import * diff --git a/git/db/py/base.py b/git/db/py/base.py index 28bbf258..5c470ba4 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -3,7 +3,6 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains basic implementations for the interface building blocks""" - from git.db.interface import * from git.util import ( diff --git a/git/db/py/complex.py b/git/db/py/complex.py new file mode 100644 index 00000000..1f929e31 --- /dev/null +++ b/git/db/py/complex.py @@ -0,0 +1,113 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of PureGitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from base import ( + PureCompoundDB, + PureObjectDBW, + PureRootPathDB, + PureRepositoryPathsMixin, + PureConfigurationMixin, + ) + +from resolve import PureReferencesMixin + +from loose import PureLooseObjectODB +from pack import PurePackedODB +from ref import PureReferenceDB + +from git.util import ( + LazyMixin, + normpath, + join, + dirname + ) +from git.exc import ( + InvalidDBRoot, + BadObject, + AmbiguousObjectName + ) +import os + +__all__ = ('PureGitODB', 'PureGitDB') + + +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): + """A git-style object-only database, which contains all objects in the 'objects' + subdirectory. + :note: The type needs to be initialized on the ./objects directory to function, + as it deals solely with object lookup. Use a PureGitDB type if you need + reference and push support.""" + # Configuration + PackDBCls = PurePackedODB + LooseDBCls = PureLooseObjectODB + PureReferenceDBCls = PureReferenceDB + + # Directories + packs_dir = 'pack' + loose_dir = '' + alternates_dir = os.path.join('info', 'alternates') + + def __init__(self, root_path): + """Initialize ourselves on a git ./objects directory""" + super(PureGitODB, self).__init__(root_path) + + def _set_cache_(self, attr): + if attr == '_dbs' or attr == '_loose_db': + self._dbs = list() + loose_db = None + for subpath, dbcls in ((self.packs_dir, self.PackDBCls), + (self.loose_dir, self.LooseDBCls), + (self.alternates_dir, self.PureReferenceDBCls)): + path = self.db_path(subpath) + if os.path.exists(path): + self._dbs.append(dbcls(path)) + if dbcls is self.LooseDBCls: + loose_db = self._dbs[-1] + # END remember loose db + # END check path exists + # END for each db type + + # should have at least one subdb + if not self._dbs: + raise InvalidDBRoot(self.root_path()) + # END handle error + + # we the first one should have the store method + assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + + # finally set the value + self._loose_db = loose_db + else: + super(PureGitODB, self)._set_cache_(attr) + # END handle attrs + + #{ PureObjectDBW interface + + def store(self, istream): + return self._loose_db.store(istream) + + def ostream(self): + return self._loose_db.ostream() + + def set_ostream(self, ostream): + return self._loose_db.set_ostream(ostream) + + #} END objectdbw interface + + +class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): + """Git like database with support for object lookup as well as reference resolution. + Our rootpath is set to the actual .git directory (bare on unbare). + + The root_path will be the git objects directory. Use git_path() to obtain the actual top-level + git directory.""" + #directories + + def __init__(self, root_path): + """Initialize ourselves on the .git directory, or the .git/objects directory.""" + PureRepositoryPathsMixin._initialize(self, root_path) + super(PureGitDB, self).__init__(self.objects_path()) + + + diff --git a/git/db/py/git.py b/git/db/py/git.py deleted file mode 100644 index 1f929e31..00000000 --- a/git/db/py/git.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors -# -# This module is part of PureGitDB and is released under -# the New BSD License: http://www.opensource.org/licenses/bsd-license.php -from base import ( - PureCompoundDB, - PureObjectDBW, - PureRootPathDB, - PureRepositoryPathsMixin, - PureConfigurationMixin, - ) - -from resolve import PureReferencesMixin - -from loose import PureLooseObjectODB -from pack import PurePackedODB -from ref import PureReferenceDB - -from git.util import ( - LazyMixin, - normpath, - join, - dirname - ) -from git.exc import ( - InvalidDBRoot, - BadObject, - AmbiguousObjectName - ) -import os - -__all__ = ('PureGitODB', 'PureGitDB') - - -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): - """A git-style object-only database, which contains all objects in the 'objects' - subdirectory. - :note: The type needs to be initialized on the ./objects directory to function, - as it deals solely with object lookup. Use a PureGitDB type if you need - reference and push support.""" - # Configuration - PackDBCls = PurePackedODB - LooseDBCls = PureLooseObjectODB - PureReferenceDBCls = PureReferenceDB - - # Directories - packs_dir = 'pack' - loose_dir = '' - alternates_dir = os.path.join('info', 'alternates') - - def __init__(self, root_path): - """Initialize ourselves on a git ./objects directory""" - super(PureGitODB, self).__init__(root_path) - - def _set_cache_(self, attr): - if attr == '_dbs' or attr == '_loose_db': - self._dbs = list() - loose_db = None - for subpath, dbcls in ((self.packs_dir, self.PackDBCls), - (self.loose_dir, self.LooseDBCls), - (self.alternates_dir, self.PureReferenceDBCls)): - path = self.db_path(subpath) - if os.path.exists(path): - self._dbs.append(dbcls(path)) - if dbcls is self.LooseDBCls: - loose_db = self._dbs[-1] - # END remember loose db - # END check path exists - # END for each db type - - # should have at least one subdb - if not self._dbs: - raise InvalidDBRoot(self.root_path()) - # END handle error - - # we the first one should have the store method - assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" - - # finally set the value - self._loose_db = loose_db - else: - super(PureGitODB, self)._set_cache_(attr) - # END handle attrs - - #{ PureObjectDBW interface - - def store(self, istream): - return self._loose_db.store(istream) - - def ostream(self): - return self._loose_db.ostream() - - def set_ostream(self, ostream): - return self._loose_db.set_ostream(ostream) - - #} END objectdbw interface - - -class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): - """Git like database with support for object lookup as well as reference resolution. - Our rootpath is set to the actual .git directory (bare on unbare). - - The root_path will be the git objects directory. Use git_path() to obtain the actual top-level - git directory.""" - #directories - - def __init__(self, root_path): - """Initialize ourselves on the .git directory, or the .git/objects directory.""" - PureRepositoryPathsMixin._initialize(self, root_path) - super(PureGitDB, self).__init__(self.objects_path()) - - - diff --git a/git/db/py/mem.py b/git/db/py/mem.py index 5851aebc..da02dbdd 100644 --- a/git/db/py/mem.py +++ b/git/db/py/mem.py @@ -3,12 +3,11 @@ # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Contains the MemoryDatabase implementation""" -from loose import PureLooseObjectODB from base import ( PureObjectDBR, PureObjectDBW ) - +from loose import PureLooseObjectODB from git.base import ( OStream, IStream, diff --git a/git/db/py/ref.py b/git/db/py/ref.py index 951f0437..94887fb8 100644 --- a/git/db/py/ref.py +++ b/git/db/py/ref.py @@ -31,7 +31,7 @@ class PureReferenceDB(PureCompoundDB): dbcls = self.ObjectDBCls if dbcls is None: # late import - from git import PureGitODB + from complex import PureGitODB # TODO: This should be a configurable for flexibility dbcls = PureGitODB # END get db type diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 7c03bcd1..9cce8efe 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -3,8 +3,8 @@ version assuming compatible interface for reference and object types""" from git.db.interface import ReferencesMixin from git.exc import BadObject -from git.ref import SymbolicReference -from git.object.base import Object +from git.refs import SymbolicReference +from git.objects.base import Object from git.util import ( join, isdir, -- cgit v1.2.3 From cd26aaebbda94dc3740e41bbd3f91ba6b1a25c10 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 10 May 2011 10:21:26 +0200 Subject: Made repository paths methods a property to be compatible with the existing repo interface. Added submodule interface ... goal is to provide all of the extra repo functionality in custom interfaces --- git/db/py/base.py | 67 +++++++++++++++++++++++++++++++++++++++----------- git/db/py/complex.py | 5 ++-- git/db/py/submodule.py | 33 +++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 git/db/py/submodule.py (limited to 'git/db/py') diff --git a/git/db/py/base.py b/git/db/py/base.py index 5c470ba4..f45711d5 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -273,34 +273,70 @@ class PureRepositoryPathsMixin(RepositoryPathsMixin): # lets not assume the option exists, although it should pass #END check bare flag + + #} end subclass interface + + #{ Object Interface + + def __eq__(self, rhs): + if hasattr(rhs, 'git_dir'): + return self.git_dir == rhs.git_dir + return False + + def __ne__(self, rhs): + return not self.__eq__(rhs) + + def __hash__(self): + return hash(self.git_dir) + def __repr__(self): + return "%s(%r)" % (type(self).__name__, self.git_dir) - #} end subclass interface + #} END object interface #{ Interface + @property def is_bare(self): return self._bare - def git_path(self): + @property + def git_dir(self): return self._git_path - def working_tree_path(self): - if self.is_bare(): - raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_path()) + @property + def working_tree_dir(self): + if self.is_bare: + raise AssertionError("Repository at %s is bare and does not have a working tree directory" % self.git_dir) #END assertion - return dirname(self.git_path()) - - def objects_path(self): - return join(self.git_path(), self.objs_dir) - + return dirname(self.git_dir) + + @property + def objects_dir(self): + return join(self.git_dir, self.objs_dir) + + @property def working_dir(self): - if self.is_bare(): - return self.git_path() + if self.is_bare: + return self.git_dir else: - return self.working_tree_dir() + return self.working_tree_dir #END handle bare state + def _mk_description(): + def _get_description(self): + filename = join(self.git_dir, 'description') + return file(filename).read().rstrip() + + def _set_description(self, descr): + filename = join(self.git_dir, 'description') + file(filename, 'w').write(descr+'\n') + + return property(_get_description, _set_description, "Descriptive text for the content of the repository") + + description = _mk_description() + del(_mk_description) + #} END interface @@ -313,7 +349,7 @@ class PureConfigurationMixin(ConfigurationMixin): def __init__(self, *args, **kwargs): """Verify prereqs""" - assert hasattr(self, 'git_path') + assert hasattr(self, 'git_dir') def _path_at_level(self, level ): # we do not support an absolute path of the gitconfig on windows , @@ -327,7 +363,7 @@ class PureConfigurationMixin(ConfigurationMixin): elif level == "global": return normpath(expanduser("~/.%s" % self.system_config_file_name)) elif level == "repository": - return join(self.git_path(), self.repo_config_file_name) + return join(self.git_dir, self.repo_config_file_name) #END handle level raise ValueError("Invalid configuration level: %r" % level) @@ -346,5 +382,6 @@ class PureConfigurationMixin(ConfigurationMixin): def config_writer(self, config_level="repository"): return GitConfigParser(self._path_at_level(config_level), read_only=False) + #} END interface diff --git a/git/db/py/complex.py b/git/db/py/complex.py index 1f929e31..de68d4fd 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -15,6 +15,7 @@ from resolve import PureReferencesMixin from loose import PureLooseObjectODB from pack import PurePackedODB from ref import PureReferenceDB +from submodule import PureSubmoduleDB from git.util import ( LazyMixin, @@ -32,7 +33,7 @@ import os __all__ = ('PureGitODB', 'PureGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -107,7 +108,7 @@ class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, Pu def __init__(self, root_path): """Initialize ourselves on the .git directory, or the .git/objects directory.""" PureRepositoryPathsMixin._initialize(self, root_path) - super(PureGitDB, self).__init__(self.objects_path()) + super(PureGitDB, self).__init__(self.objects_dir) diff --git a/git/db/py/submodule.py b/git/db/py/submodule.py new file mode 100644 index 00000000..735f90b1 --- /dev/null +++ b/git/db/py/submodule.py @@ -0,0 +1,33 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.objects.submodule.base import Submodule +from git.objects.submodule.root import RootModule +from git.db.interface import SubmoduleDB + +__all__ = ["PureSubmoduleDB"] + +class PureSubmoduleDB(SubmoduleDB): + """Pure python implementation of submodule functionality""" + + @property + def submodules(self): + return Submodule.list_items(self) + + def submodule(self, name): + try: + return self.submodules[name] + except IndexError: + raise ValueError("Didn't find submodule named %r" % name) + # END exception handling + + def create_submodule(self, *args, **kwargs): + return Submodule.add(self, *args, **kwargs) + + def iter_submodules(self, *args, **kwargs): + return RootModule(self).traverse(*args, **kwargs) + + def submodule_update(self, *args, **kwargs): + return RootModule(self).update(*args, **kwargs) + -- cgit v1.2.3 From 0996049122842a343e0ea7fbbecafddb2b4ba9d3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 29 May 2011 21:59:12 +0200 Subject: Intermediate commit with quite some progress in order to put all extra methods on the default Repo implementation into interfaces or something that can be abstracted. It shows that it would indeed be good to keep the differentiation between Repositories which contain an object database as it is clearly easier to setup any combination of repositories that use git and those that do not, with just the addition of one more level of indirection. Lets see how it will end up --- git/db/py/base.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++++- git/db/py/complex.py | 18 ++++++++---- git/db/py/resolve.py | 44 ++++++++++++++++++++++++++++-- git/db/py/transport.py | 61 ++++++++++------------------------------- 4 files changed, 142 insertions(+), 55 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/base.py b/git/db/py/base.py index f45711d5..cc326c27 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -35,7 +35,7 @@ import os __all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', - 'PureConfigurationMixin', 'PureRepositoryPathsMixin') + 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin') class PureObjectDBR(ObjectDBR): @@ -385,3 +385,75 @@ class PureConfigurationMixin(ConfigurationMixin): #} END interface + +class PureAlternatesFileMixin(object): + """Utility able to read and write an alternates file through the alternates property + It needs to be part of a type with the git_dir or db_path property. + + The file by default is assumed to be located at the default location as imposed + by the standard git repository layout""" + + #{ Configuration + alternates_filepath = os.path.join('info', 'alternates') # relative path to alternates file + + #} END configuration + + def __init__(self, *args, **kwargs): + super(PureAlternatesFileMixin, self).__init__(*args, **kwargs) + self._alternates_path() # throws on incompatible type + + #{ Interface + + def _alternates_path(self): + if hasattr(self, 'git_dir'): + return join(self.git_dir, 'objects', self.alternates_filepath) + elif hasattr(self, 'db_path'): + return self.db_path(self.alternates_filepath) + else: + raise AssertionError("This mixin requires a parent type with either the git_dir property or db_path method") + #END handle path + + def _get_alternates(self): + """The list of alternates for this repo from which objects can be retrieved + + :return: list of strings being pathnames of alternates""" + alternates_path = self._alternates_path() + + if os.path.exists(alternates_path): + try: + f = open(alternates_path) + alts = f.read() + finally: + f.close() + return alts.strip().splitlines() + else: + return list() + # END handle path exists + + def _set_alternates(self, alts): + """Sets the alternates + + :parm alts: + is the array of string paths representing the alternates at which + git should look for objects, i.e. /home/user/repo/.git/objects + + :raise NoSuchPathError: + :note: + The method does not check for the existance of the paths in alts + as the caller is responsible.""" + alternates_path = self._alternates_path() + if not alts: + if isfile(alternates_path): + os.remove(alternates_path) + else: + try: + f = open(alternates_path, 'w') + f.write("\n".join(alts)) + finally: + f.close() + # END file handling + # END alts handling + + alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") + + #} END interface diff --git a/git/db/py/complex.py b/git/db/py/complex.py index de68d4fd..6504b3ed 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -8,6 +8,7 @@ from base import ( PureRootPathDB, PureRepositoryPathsMixin, PureConfigurationMixin, + PureAlternatesFileMixin, ) from resolve import PureReferencesMixin @@ -17,6 +18,8 @@ from pack import PurePackedODB from ref import PureReferenceDB from submodule import PureSubmoduleDB +from git.db.compat import RepoCompatInterface + from git.util import ( LazyMixin, normpath, @@ -30,10 +33,11 @@ from git.exc import ( ) import os -__all__ = ('PureGitODB', 'PureGitDB') +__all__ = ('PureGitODB', 'PureGitDB', 'PureCompatibilityGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, + PureSubmoduleDB, PureAlternatesFileMixin): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -47,7 +51,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) # Directories packs_dir = 'pack' loose_dir = '' - alternates_dir = os.path.join('info', 'alternates') + def __init__(self, root_path): """Initialize ourselves on a git ./objects directory""" @@ -59,7 +63,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) loose_db = None for subpath, dbcls in ((self.packs_dir, self.PackDBCls), (self.loose_dir, self.LooseDBCls), - (self.alternates_dir, self.PureReferenceDBCls)): + (self.alternates_filepath, self.PureReferenceDBCls)): path = self.db_path(subpath) if os.path.exists(path): self._dbs.append(dbcls(path)) @@ -75,7 +79,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) # END handle error # we the first one should have the store method - assert loose_db is not None and hasattr(loose_db, 'store'), "First database needs store functionality" + assert loose_db is not None and hasattr(loose_db, 'store'), "One database needs store functionality" # finally set the value self._loose_db = loose_db @@ -97,6 +101,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureSubmoduleDB) #} END objectdbw interface + class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): """Git like database with support for object lookup as well as reference resolution. Our rootpath is set to the actual .git directory (bare on unbare). @@ -112,3 +117,6 @@ class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, Pu +class PureCompatibilityGitDB(PureGitDB, RepoCompatInterface): + """Pure git database with a compatability layer required by 0.3x code""" + diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 9cce8efe..94992d11 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -5,6 +5,9 @@ from git.db.interface import ReferencesMixin from git.exc import BadObject from git.refs import SymbolicReference from git.objects.base import Object +from git.refs.head import HEAD +from git.refs.headref import Head +from git.refs.tag import TagReference from git.util import ( join, isdir, @@ -281,17 +284,52 @@ class PureReferencesMixin(ReferencesMixin): re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') + #{ Configuration + # Types to use when instatiating references + TagReferenceCls = TagReference + HeadCls = Head + ReferenceCls = Reference + HEADCls = HEAD + #} END configuration + def resolve(self, name): + return self.resolve_object(name).binsha + + def resolve_object(self, name): return rev_parse(self, name) @property def references(self): - raise NotImplementedError() + return self.ReferenceCls.list_items(self) @property def heads(self): - raise NotImplementedError() + return self.HeadCls.list_items(self) @property def tags(self): - raise NotImplementedError() + return self.TagReferenceCls.list_items(self) + + def tag(self, name): + return self.tags[name] + + @property + def head(self): + return self.HEADCls(self,'HEAD') + + def create_head(self, path, commit='HEAD', force=False, logmsg=None ): + return self.HeadCls.create(self, path, commit, force, logmsg) + + def delete_head(self, *heads, **kwargs): + return self.HeadCls.delete(self, *heads, **kwargs) + + def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): + return self.TagReferenceCls.create(self, path, ref, message, force, **kwargs) + + def delete_tag(self, *tags): + return self.TagReferenceCls.delete(self, *tags) + + + # compat + branches = heads + refs = references diff --git a/git/db/py/transport.py b/git/db/py/transport.py index f8edfb23..00d222b0 100644 --- a/git/db/py/transport.py +++ b/git/db/py/transport.py @@ -9,6 +9,10 @@ from git.db.interface import ( TransportDB, FetchInfo, RefSpec ) +from git.refs.remote import RemoteReference +from git.remote import Remote + + __all__ = ["PureTransportDB"] class PurePushInfo(PushInfo): @@ -23,67 +27,32 @@ class PureFetchInfo(FetchInfo): class PureTransportDB(TransportDB): - """A database which allows to transport objects from and to different locations - which are specified by urls (location) and refspecs (what to transport, - see http://www.kernel.org/pub/software/scm/git/docs/git-fetch.html). - - At the beginning of a transport operation, it will be determined which objects - have to be sent (either by this or by the other side). - - Afterwards a pack with the required objects is sent (or received). If there is - nothing to send, the pack will be empty. - - The communication itself if implemented using a protocol instance which deals - with the actual formatting of the lines sent. - - As refspecs involve symbolic names for references to be handled, we require - RefParse functionality. How this is done is up to the actual implementation.""" # The following variables need to be set by the derived class #{Configuration protocol = None + RemoteCls = Remote #}end configuraiton #{ Interface def fetch(self, url, refspecs, progress=None, **kwargs): - """Fetch the objects defined by the given refspec from the given url. - :param url: url identifying the source of the objects. It may also be - a symbol from which the respective url can be resolved, like the - name of the remote. The implementation should allow objects as input - as well, these are assumed to resovle to a meaningful string though. - :param refspecs: iterable of reference specifiers or RefSpec instance, - identifying the references to be fetch from the remote. - :param progress: callable which receives progress messages for user consumption - :param kwargs: may be used for additional parameters that the actual implementation could - find useful. - :return: List of PureFetchInfo compatible instances which provide information about what - was previously fetched, in the order of the input refspecs. - :note: even if the operation fails, one of the returned PureFetchInfo instances - may still contain errors or failures in only part of the refspecs. - :raise: if any issue occours during the transport or if the url is not - supported by the protocol. - """ raise NotImplementedError() def push(self, url, refspecs, progress=None, **kwargs): - """Transport the objects identified by the given refspec to the remote - at the given url. - :param url: Decribes the location which is to receive the objects - see fetch() for more details - :param refspecs: iterable of refspecs strings or RefSpec instances - to identify the objects to push - :param progress: see fetch() - :param kwargs: additional arguments which may be provided by the caller - as they may be useful to the actual implementation - :todo: what to return ? - :raise: if any issue arises during transport or if the url cannot be handled""" raise NotImplementedError() @property def remotes(self): - """:return: An IterableList of Remote objects allowing to access and manipulate remotes - :note: Remote objects can also be used for the actual push or fetch operation""" - raise NotImplementedError() + return self.RemoteCls.list_items(self) + + def remote(self, name='origin'): + return self.remotes[name] + def create_remote(self, name, url, **kwargs): + return self.RemoteCls.create(self, name, url, **kwargs) + + def delete_remote(self, remote): + return self.RemoteCls.remove(self, remote) + #}end interface -- cgit v1.2.3 From 112bb1672d6b28f203e7839e320b985486636800 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 01:23:28 +0200 Subject: Finished moving all repository methods to the respective interfaces and implementations. It seems theoretically work together now, although it clearly is much more complex than ever before. The repo package was slimmed down to being a module once again, which is only there for compatability actually --- git/db/py/base.py | 14 +++++++++++++- git/db/py/complex.py | 12 +++++++++--- git/db/py/resolve.py | 26 ++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/base.py b/git/db/py/base.py index cc326c27..74b8beb9 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -20,6 +20,7 @@ from git.util import ( is_git_dir ) +from git.index import IndexFile from git.config import GitConfigParser from git.exc import ( BadObject, @@ -35,7 +36,8 @@ import os __all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', - 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin') + 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin', + 'PureIndexDB') class PureObjectDBR(ObjectDBR): @@ -386,6 +388,16 @@ class PureConfigurationMixin(ConfigurationMixin): #} END interface +class PureIndexDB(IndexDB): + #{ Configuration + IndexCls = IndexFile + #} END configuration + + @property + def index(self): + return self.IndexCls(self) + + class PureAlternatesFileMixin(object): """Utility able to read and write an alternates file through the alternates property It needs to be part of a type with the git_dir or db_path property. diff --git a/git/db/py/complex.py b/git/db/py/complex.py index 6504b3ed..efcbb2ba 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -2,6 +2,7 @@ # # This module is part of PureGitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php +from git.db.interface import HighLevelRepository from base import ( PureCompoundDB, PureObjectDBW, @@ -9,6 +10,7 @@ from base import ( PureRepositoryPathsMixin, PureConfigurationMixin, PureAlternatesFileMixin, + PureIndexDB, ) from resolve import PureReferencesMixin @@ -36,8 +38,7 @@ import os __all__ = ('PureGitODB', 'PureGitDB', 'PureCompatibilityGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, - PureSubmoduleDB, PureAlternatesFileMixin): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -102,7 +103,12 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, -class PureGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin): +class PureGitDB(PureGitODB, + PureRepositoryPathsMixin, PureConfigurationMixin, + PureReferencesMixin, PureSubmoduleDB, PureAlternatesFileMixin, + PureIndexDB, + # HighLevelRepository Currently not implemented ! + ): """Git like database with support for object lookup as well as reference resolution. Our rootpath is set to the actual .git directory (bare on unbare). diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 94992d11..d0685747 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -5,6 +5,7 @@ from git.db.interface import ReferencesMixin from git.exc import BadObject from git.refs import SymbolicReference from git.objects.base import Object +from git.objects.commit import Commit from git.refs.head import HEAD from git.refs.headref import Head from git.refs.tag import TagReference @@ -290,6 +291,7 @@ class PureReferencesMixin(ReferencesMixin): HeadCls = Head ReferenceCls = Reference HEADCls = HEAD + CommitCls = Commit #} END configuration def resolve(self, name): @@ -313,6 +315,30 @@ class PureReferencesMixin(ReferencesMixin): def tag(self, name): return self.tags[name] + + def commit(self, rev=None): + if rev is None: + return self.head.commit + else: + return self.resolve_object(str(rev)+"^0") + #END handle revision + + def iter_trees(self, *args, **kwargs): + return ( c.tree for c in self.iter_commits(*args, **kwargs) ) + + def tree(self, rev=None): + if rev is None: + return self.head.commit.tree + else: + return self.resolve_object(str(rev)+"^{tree}") + + def iter_commits(self, rev=None, paths='', **kwargs): + if rev is None: + rev = self.head.commit + + return self.CommitCls.iter_items(self, rev, paths, **kwargs) + + @property def head(self): return self.HEADCls(self,'HEAD') -- cgit v1.2.3 From 024adf37acddd6a5d8293b6b5d15795c59a142c0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 13:06:37 +0200 Subject: Fixed tests far enough to allow basic repository tests to be applied to any of the new database types. This reduces code duplication to the mere minimum, but allows custom tests to be added on top easily and flexibly --- git/db/py/__init__.py | 2 -- git/db/py/complex.py | 4 ++-- git/db/py/resolve.py | 13 ++++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/__init__.py b/git/db/py/__init__.py index 73cc2bdf..8a681e42 100644 --- a/git/db/py/__init__.py +++ b/git/db/py/__init__.py @@ -2,5 +2,3 @@ # # This module is part of GitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php - -from complex import * diff --git a/git/db/py/complex.py b/git/db/py/complex.py index efcbb2ba..9d891537 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -20,7 +20,7 @@ from pack import PurePackedODB from ref import PureReferenceDB from submodule import PureSubmoduleDB -from git.db.compat import RepoCompatInterface +from git.db.compat import RepoCompatibilityInterface from git.util import ( LazyMixin, @@ -123,6 +123,6 @@ class PureGitDB(PureGitODB, -class PureCompatibilityGitDB(PureGitDB, RepoCompatInterface): +class PureCompatibilityGitDB(PureGitDB, RepoCompatibilityInterface): """Pure git database with a compatability layer required by 0.3x code""" diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index d0685747..7194149c 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -3,12 +3,19 @@ version assuming compatible interface for reference and object types""" from git.db.interface import ReferencesMixin from git.exc import BadObject -from git.refs import SymbolicReference -from git.objects.base import Object -from git.objects.commit import Commit +from git.refs import ( + SymbolicReference, + Reference, + HEAD, + Head, + TagReference + ) from git.refs.head import HEAD from git.refs.headref import Head from git.refs.tag import TagReference + +from git.objects.base import Object +from git.objects.commit import Commit from git.util import ( join, isdir, -- cgit v1.2.3 From 1f71ed94578799ee1667ba54b66a369e307f415b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 16:32:56 +0200 Subject: git cmd implementation of repository appears to work, at least this is what the test suggests. Pure python implementation still has some trouble, but this should be very fixable --- git/db/py/base.py | 9 ++++++--- git/db/py/complex.py | 16 ++++++++-------- git/db/py/resolve.py | 3 +-- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/base.py b/git/db/py/base.py index 74b8beb9..4d9b6e14 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -8,6 +8,7 @@ from git.db.interface import * from git.util import ( pool, join, + isfile, normpath, abspath, dirname, @@ -25,7 +26,8 @@ from git.config import GitConfigParser from git.exc import ( BadObject, AmbiguousObjectName, - InvalidDBRoot + InvalidGitRepositoryError, + NoSuchPathError ) from async import ChannelThreadTask @@ -240,7 +242,7 @@ class PureRepositoryPathsMixin(RepositoryPathsMixin): epath = abspath(expandvars(expanduser(path or os.getcwd()))) if not exists(epath): - raise InvalidDBRoot(epath) + raise NoSuchPathError(epath) #END check file self._working_tree_dir = None @@ -264,7 +266,7 @@ class PureRepositoryPathsMixin(RepositoryPathsMixin): # END while curpath if self._git_path is None: - raise InvalidDBRoot(epath) + raise InvalidGitRepositoryError(epath) # END path not found self._bare = self._git_path.endswith(self.repo_dir) @@ -351,6 +353,7 @@ class PureConfigurationMixin(ConfigurationMixin): def __init__(self, *args, **kwargs): """Verify prereqs""" + super(PureConfigurationMixin, self).__init__(*args, **kwargs) assert hasattr(self, 'git_dir') def _path_at_level(self, level ): diff --git a/git/db/py/complex.py b/git/db/py/complex.py index 9d891537..a51118b3 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -1,6 +1,6 @@ # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors # -# This module is part of PureGitDB and is released under +# This module is part of PurePartialGitDB and is released under # the New BSD License: http://www.opensource.org/licenses/bsd-license.php from git.db.interface import HighLevelRepository from base import ( @@ -12,7 +12,7 @@ from base import ( PureAlternatesFileMixin, PureIndexDB, ) - +from transport import PureTransportDB from resolve import PureReferencesMixin from loose import PureLooseObjectODB @@ -35,14 +35,14 @@ from git.exc import ( ) import os -__all__ = ('PureGitODB', 'PureGitDB', 'PureCompatibilityGitDB') +__all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB') class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, - as it deals solely with object lookup. Use a PureGitDB type if you need + as it deals solely with object lookup. Use a PurePartialGitDB type if you need reference and push support.""" # Configuration PackDBCls = PurePackedODB @@ -103,10 +103,10 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): -class PureGitDB(PureGitODB, +class PurePartialGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, PureReferencesMixin, PureSubmoduleDB, PureAlternatesFileMixin, - PureIndexDB, + PureIndexDB, PureTransportDB # HighLevelRepository Currently not implemented ! ): """Git like database with support for object lookup as well as reference resolution. @@ -119,10 +119,10 @@ class PureGitDB(PureGitODB, def __init__(self, root_path): """Initialize ourselves on the .git directory, or the .git/objects directory.""" PureRepositoryPathsMixin._initialize(self, root_path) - super(PureGitDB, self).__init__(self.objects_dir) + super(PurePartialGitDB, self).__init__(self.objects_dir) -class PureCompatibilityGitDB(PureGitDB, RepoCompatibilityInterface): +class PureCompatibilityGitDB(PurePartialGitDB, RepoCompatibilityInterface): """Pure git database with a compatability layer required by 0.3x code""" diff --git a/git/db/py/resolve.py b/git/db/py/resolve.py index 7194149c..7bea779e 100644 --- a/git/db/py/resolve.py +++ b/git/db/py/resolve.py @@ -320,8 +320,7 @@ class PureReferencesMixin(ReferencesMixin): return self.TagReferenceCls.list_items(self) def tag(self, name): - return self.tags[name] - + return self.TagReferenceCls(self, self.TagReferenceCls.to_full_path(name)) def commit(self, rev=None): if rev is None: -- cgit v1.2.3 From 4ea529dd7f545dddc8cfdfdb4b6209eef0494ec5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 30 May 2011 17:16:18 +0200 Subject: Fixed pure python implementation to run the default repository tests --- git/db/py/base.py | 15 +++++++-------- git/db/py/complex.py | 4 ++-- git/db/py/ref.py | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/base.py b/git/db/py/base.py index 4d9b6e14..a2c9a4ef 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -96,8 +96,9 @@ class PureObjectDBW(ObjectDBW): class PureRootPathDB(RootPathDB): def __init__(self, root_path): - super(PureRootPathDB, self).__init__(root_path) self._root_path = root_path + super(PureRootPathDB, self).__init__(root_path) + #{ Interface @@ -127,8 +128,8 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): def _set_cache_(self, attr): if attr == '_dbs': self._dbs = list() - elif attr == '_db_cache': - self._db_cache = dict() + elif attr == '_obj_cache': + self._obj_cache = dict() else: super(PureCompoundDB, self)._set_cache_(attr) @@ -138,14 +139,14 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): # most databases use binary representations, prevent converting # it everytime a database is being queried try: - return self._db_cache[sha] + return self._obj_cache[sha] except KeyError: pass # END first level cache for db in self._dbs: if db.has_object(sha): - self._db_cache[sha] = db + self._obj_cache[sha] = db return db # END for each database raise BadObject(sha) @@ -181,7 +182,7 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): def update_cache(self, force=False): # something might have changed, clear everything - self._db_cache.clear() + self._obj_cache.clear() stat = False for db in self._dbs: if isinstance(db, CachingDB): @@ -191,8 +192,6 @@ class PureCompoundDB(CompoundDB, PureObjectDBR, LazyMixin, CachingDB): return stat def partial_to_complete_sha_hex(self, partial_hexsha): - databases = self.databases() - len_partial_hexsha = len(partial_hexsha) if len_partial_hexsha % 2 != 0: partial_binsha = hex_to_bin(partial_hexsha + "0") diff --git a/git/db/py/complex.py b/git/db/py/complex.py index a51118b3..d5c185f3 100644 --- a/git/db/py/complex.py +++ b/git/db/py/complex.py @@ -38,7 +38,7 @@ import os __all__ = ('PureGitODB', 'PurePartialGitDB', 'PureCompatibilityGitDB') -class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): +class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB, PureAlternatesFileMixin): """A git-style object-only database, which contains all objects in the 'objects' subdirectory. :note: The type needs to be initialized on the ./objects directory to function, @@ -105,7 +105,7 @@ class PureGitODB(PureRootPathDB, PureObjectDBW, PureCompoundDB): class PurePartialGitDB(PureGitODB, PureRepositoryPathsMixin, PureConfigurationMixin, - PureReferencesMixin, PureSubmoduleDB, PureAlternatesFileMixin, + PureReferencesMixin, PureSubmoduleDB, PureIndexDB, PureTransportDB # HighLevelRepository Currently not implemented ! ): diff --git a/git/db/py/ref.py b/git/db/py/ref.py index 94887fb8..d2c77a3a 100644 --- a/git/db/py/ref.py +++ b/git/db/py/ref.py @@ -31,8 +31,8 @@ class PureReferenceDB(PureCompoundDB): dbcls = self.ObjectDBCls if dbcls is None: # late import - from complex import PureGitODB # TODO: This should be a configurable for flexibility - dbcls = PureGitODB + import complex + dbcls = complex.PureGitODB # END get db type # try to get as many as possible, don't fail if some are unavailable -- cgit v1.2.3 From 47f14d527f61d30ffa49a6254838ca5c1aee3972 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 6 Jun 2011 18:59:46 +0200 Subject: Added loose object writing and reading performance tessts, in pure and command implementations. The previous performance test was truncated a bit as it compared directly with the git hash_object write performance. This is out, and if we wanted it we could implement it , but its actually slower for us --- git/db/py/loose.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/loose.py b/git/db/py/loose.py index 56915f18..6e72aff0 100644 --- a/git/db/py/loose.py +++ b/git/db/py/loose.py @@ -107,22 +107,6 @@ class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): # END handle cache raise BadObject(hexsha) - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: 20 byte binary sha1 string which matches the given name uniquely - :param name: hexadecimal partial name - :raise AmbiguousObjectName: - :raise BadObject: """ - candidate = None - for binsha in self.sha_iter(): - if bin_to_hex(binsha).startswith(partial_hexsha): - # it can't ever find the same object twice - if candidate is not None: - raise AmbiguousObjectName(partial_hexsha) - candidate = binsha - # END for each object - if candidate is None: - raise BadObject(partial_hexsha) - return candidate #} END interface @@ -179,6 +163,23 @@ class PureLooseObjectODB(PureRootPathDB, PureObjectDBR, PureObjectDBW): except BadObject: return False # END check existance + + def partial_to_complete_sha_hex(self, partial_hexsha): + """:return: 20 byte binary sha1 string which matches the given name uniquely + :param name: hexadecimal partial name + :raise AmbiguousObjectName: + :raise BadObject: """ + candidate = None + for binsha in self.sha_iter(): + if bin_to_hex(binsha).startswith(partial_hexsha): + # it can't ever find the same object twice + if candidate is not None: + raise AmbiguousObjectName(partial_hexsha) + candidate = binsha + # END for each object + if candidate is None: + raise BadObject(partial_hexsha) + return candidate def store(self, istream): """note: The sha we produce will be hex by nature""" -- cgit v1.2.3 From 9bf3fdec93fe427bb5f0bd39c986a4e977969f41 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 7 Jun 2011 13:38:48 +0200 Subject: First run in order to fix the remote handling. Cleaned up interfaces and figured out that the implementation really should be specific to the git command. This leaves the interface open for other implemntations which use a different way to provide feedback (as we do not make assumptions about the format of a feedback line) --- git/db/py/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'git/db/py') diff --git a/git/db/py/base.py b/git/db/py/base.py index a2c9a4ef..2fdbd202 100644 --- a/git/db/py/base.py +++ b/git/db/py/base.py @@ -18,7 +18,7 @@ from git.util import ( expandvars, expanduser, exists, - is_git_dir + is_git_dir, ) from git.index import IndexFile @@ -40,7 +40,7 @@ import os __all__ = ( 'PureObjectDBR', 'PureObjectDBW', 'PureRootPathDB', 'PureCompoundDB', 'PureConfigurationMixin', 'PureRepositoryPathsMixin', 'PureAlternatesFileMixin', 'PureIndexDB') - + class PureObjectDBR(ObjectDBR): @@ -471,3 +471,4 @@ class PureAlternatesFileMixin(object): alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") #} END interface + -- cgit v1.2.3