From 4e1c89ec97ec90037583e85d0e9e71e9c845a19b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 16:13:32 +0200 Subject: Added performance testing foundation library, reworked existing performance tests to work on larger repositories --- test/git/performance/lib.py | 46 +++++++++++++++++++++++++ test/git/performance/test_commit.py | 68 +++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 test/git/performance/lib.py create mode 100644 test/git/performance/test_commit.py (limited to 'test/git/performance') diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py new file mode 100644 index 00000000..4b552b20 --- /dev/null +++ b/test/git/performance/lib.py @@ -0,0 +1,46 @@ +"""Contains library functions""" +import os +from test.testlib import * + +from git import ( + Repo + ) + +#{ Invvariants +k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE" +#} END invariants + + +#{ Utilities +def resolve_or_fail(env_var): + """:return: resolved environment variable or raise EnvironmentError""" + try: + return os.environ[env_var] + except KeyError: + raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var) + # END exception handling + +#} END utilities + + +#{ Base Classes + +class TestBigRepoReadOnly(TestBase): + """TestCase providing access to readonly 'big' repositories using the following + member variables: + + * gitrepo + + * Read-Only git repository - actually the repo of git itself""" + + #{ Invariants + head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca' + head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5' + #} END invariants + + @classmethod + def setUpAll(cls): + super(TestBigRepoReadOnly, cls).setUpAll() + cls.gitrepo = Repo(resolve_or_fail(k_env_git_repo)) + +#} END base classes diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py new file mode 100644 index 00000000..c1f8ce59 --- /dev/null +++ b/test/git/performance/test_commit.py @@ -0,0 +1,68 @@ +# test_performance.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from test.testlib import * +from git import * +from time import time +import sys + +class TestPerformance(TestBase): + + # ref with about 100 commits in its history + ref_100 = '0.1.6' + + def _query_commit_info(self, c): + c.author + c.authored_date + c.author_tz_offset + c.committer + c.committed_date + c.committer_tz_offset + c.message + c.parents + + def test_iteration(self): + no = 0 + nc = 0 + + # find the first commit containing the given path - always do a full + # iteration ( restricted to the path in question ), but in fact it should + # return quite a lot of commits, we just take one and hence abort the operation + + st = time() + for c in self.rorepo.iter_commits(self.ref_100): + nc += 1 + self._query_commit_info(c) + for obj in c.tree.traverse(): + obj.size + no += 1 + # END for each object + # END for each commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) + + def test_commit_traversal(self): + # bound to cat-file parsing performance + nc = 0 + st = time() + for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + + def test_commit_iteration(self): + # bound to stream parsing performance + nc = 0 + st = time() + for c in Commit.iter_items(self.rorepo, self.ref_100): + nc += 1 + self._query_commit_info(c) + # END for each traversed commit + elapsed_time = time() - st + print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + -- cgit v1.2.3 From ae5a69f67822d81bbbd8f4af93be68703e730b37 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 16:41:28 +0200 Subject: commit: redesigned revlist and commit parsing, commits are always retrieved from their object information directly. This is faster, and resolves issues with the rev-list format and empty commit messages Adjusted many tests to go with the changes, as they were still mocked. The mock was removed if necessary and replaced by code that actually executes --- test/git/performance/test_commit.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'test/git/performance') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index c1f8ce59..b4a9d868 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -4,12 +4,12 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php -from test.testlib import * +from lib import * from git import * from time import time import sys -class TestPerformance(TestBase): +class TestPerformance(TestBigRepoReadOnly): # ref with about 100 commits in its history ref_100 = '0.1.6' @@ -48,7 +48,7 @@ class TestPerformance(TestBase): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.rorepo.commit(self.ref_100).traverse(branch_first=False): + for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -59,7 +59,7 @@ class TestPerformance(TestBase): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.rorepo, self.ref_100): + for c in Commit.iter_items(self.gitrepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit -- cgit v1.2.3 From 282018b79cc8df078381097cb3aeb29ff56e83c6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 20:11:00 +0200 Subject: Added first design and frame for object database. In a first step, loose objects will be written using our utilities, and certain object retrieval functionality moves into the GitObjectDatabase which is used by the repo instance Added performance test for object database access, which shows quite respectable tree parsing performance, and okay blob access. Nonetheless, it will be hard to beat the c performance using a pure python implementation, but it can be a nice practice to write it anyway to allow more direct pack manipulations. Some could benefit from the ability to write packs as these can serve as local cache if alternates are used --- test/git/performance/test_odb.py | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 test/git/performance/test_odb.py (limited to 'test/git/performance') diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py new file mode 100644 index 00000000..0ad2ce33 --- /dev/null +++ b/test/git/performance/test_odb.py @@ -0,0 +1,61 @@ +"""Performance tests for object store""" + +from time import time +import sys +import stat + +from lib import ( + TestBigRepoReadOnly + ) + + +class TestObjDBPerformance(TestBigRepoReadOnly): + + def test_random_access(self): + + # GET COMMITS + # TODO: use the actual db for this + st = time() + root_commit = self.gitrepo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed) + + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + for blob_list in blobs_per_commit: + for blob in blob_list: + blob.data + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed) -- cgit v1.2.3 From 38d59fc8ccccae8882fa48671377bf40a27915a7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 16:35:35 +0200 Subject: odb: implemented loose object streaming, which is impossible to do efficiently considering that it copies string buffers all the time --- test/git/performance/test_utils.py | 44 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 test/git/performance/test_utils.py (limited to 'test/git/performance') diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py new file mode 100644 index 00000000..381d7c8b --- /dev/null +++ b/test/git/performance/test_utils.py @@ -0,0 +1,44 @@ +"""Performance of utilities""" +from time import time +import sys +import stat + +from lib import ( + TestBigRepoReadOnly + ) + + +class TestUtilPerformance(TestBigRepoReadOnly): + + def test_access(self): + # compare dict vs. slot access + class Slotty(object): + __slots__ = "attr" + def __init__(self): + self.attr = 1 + + class Dicty(object): + def __init__(self): + self.attr = 1 + + class BigSlotty(object): + __slots__ = ('attr', ) + tuple('abcdefghijk') + def __init__(self): + for attr in self.__slots__: + setattr(self, attr, 1) + + class BigDicty(object): + def __init__(self): + for attr in BigSlotty.__slots__: + setattr(self, attr, 1) + + ni = 1000000 + for cls in (Slotty, Dicty, BigSlotty, BigDicty): + cli = cls() + st = time() + for i in xrange(ni): + cli.attr + # END for each access + elapsed = time() - st + print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed) + # END for each class type -- cgit v1.2.3 From 26e138cb47dccc859ff219f108ce9b7d96cbcbcd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 18:21:05 +0200 Subject: odb: fixed streamed decompression reader ( specific tests would still be missing ) and added performance tests which are extremely promising --- test/git/performance/test_streams.py | 91 ++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 test/git/performance/test_streams.py (limited to 'test/git/performance') diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py new file mode 100644 index 00000000..15924c08 --- /dev/null +++ b/test/git/performance/test_streams.py @@ -0,0 +1,91 @@ +"""Performance data streaming performance""" + +from test.testlib import * +from git.odb.db import * + +from array import array +from cStringIO import StringIO +from time import time +import os +import sys +import stat +import random + + +from lib import ( + TestBigRepoReadOnly + ) + + + +def make_memory_file(size_in_bytes, randomize=False): + """:return: tuple(size_of_stream, stream) + :param randomize: try to produce a very random stream""" + actual_size = size_in_bytes / 4 + producer = xrange(actual_size) + if randomize: + producer = list(producer) + random.shuffle(producer) + # END randomize + a = array('i', producer) + return actual_size*4, StringIO(a.tostring()) + + +class TestObjDBPerformance(TestBigRepoReadOnly): + + large_data_size_bytes = 1000*1000*10 # some MiB should do it + moderate_data_size_bytes = 1000*1000*1 # just 1 MiB + + @with_bare_rw_repo + def test_large_data_streaming(self, rwrepo): + ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects')) + + for randomize in range(2): + desc = (randomize and 'random ') or '' + print >> sys.stderr, "Creating %s data ..." % desc + st = time() + size, stream = make_memory_file(self.large_data_size_bytes, randomize) + elapsed = time() - st + print >> sys.stderr, "Done (in %f s)" % elapsed + + # writing - due to the compression it will seem faster than it is + st = time() + sha = ldb.to_object('blob', size, stream) + elapsed = time() - st + assert ldb.has_object(sha) + fsize_kib = os.path.getsize(ldb.readable_db_object_path(sha)) / 1000 + + + size_kib = size / 1000 + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed, size_kib / elapsed) + + # reading all at once + st = time() + type, size, shastream = ldb.object(sha) + shadata = shastream.read() + elapsed = time() - st + + stream.seek(0) + assert shadata == stream.getvalue() + print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed, size_kib / elapsed) + + + # reading in chunks of 1 MiB + cs = 512*1000 + chunks = list() + st = time() + type, size, shastream = ldb.object(sha) + while True: + data = shastream.read(cs) + chunks.append(data) + if len(data) < cs: + break + # END read in chunks + elapsed = time() - st + + stream.seek(0) + assert ''.join(chunks) == stream.getvalue() + + cs_kib = cs / 1000 + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed, size_kib / elapsed) + # END for each randomization factor -- cgit v1.2.3 From 4b4a514e51fbc7dc6ddcb27c188159d57b5d1fa9 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 19:04:18 +0200 Subject: Added performance comparison to cgit ... and yes, git-python is faster :) --- test/git/performance/test_streams.py | 67 ++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) (limited to 'test/git/performance') diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index 15924c08..6c2834b3 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -10,6 +10,7 @@ import os import sys import stat import random +import subprocess from lib import ( @@ -51,23 +52,24 @@ class TestObjDBPerformance(TestBigRepoReadOnly): # writing - due to the compression it will seem faster than it is st = time() sha = ldb.to_object('blob', size, stream) - elapsed = time() - st + elapsed_add = time() - st assert ldb.has_object(sha) - fsize_kib = os.path.getsize(ldb.readable_db_object_path(sha)) / 1000 + db_file = ldb.readable_db_object_path(sha) + fsize_kib = os.path.getsize(db_file) / 1000 size_kib = size / 1000 - print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed, size_kib / elapsed) + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) # reading all at once st = time() type, size, shastream = ldb.object(sha) shadata = shastream.read() - elapsed = time() - st + elapsed_readall = time() - st stream.seek(0) assert shadata == stream.getvalue() - print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed, size_kib / elapsed) + print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) # reading in chunks of 1 MiB @@ -81,11 +83,62 @@ class TestObjDBPerformance(TestBigRepoReadOnly): if len(data) < cs: break # END read in chunks - elapsed = time() - st + elapsed_readchunks = time() - st stream.seek(0) assert ''.join(chunks) == stream.getvalue() cs_kib = cs / 1000 - print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed, size_kib / elapsed) + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks) + + # del db file so git has something to do + os.remove(db_file) + + # VS. CGIT + ########## + # CGIT ! Can using the cgit programs be faster ? + proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE) + + # write file - pump everything in at once to be a fast as possible + data = stream.getvalue() # cache it + st = time() + proc.stdin.write(data) + proc.stdin.close() + gitsha = proc.stdout.read().strip() + proc.wait() + gelapsed_add = time() - st + del(data) + assert gitsha == sha # we do it the same way, right ? + + # as its the same sha, we reuse our path + fsize_kib = os.path.getsize(db_file) / 1000 + print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) + + # compare ... + print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc) + + + # read all + st = time() + s, t, size, data = rwrepo.git.get_object_data(gitsha) + gelapsed_readall = time() - st + print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall) + + # compare + print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc) + + + # read chunks + st = time() + s, t, size, stream = rwrepo.git.stream_object_data(gitsha) + while True: + data = stream.read(cs) + if len(data) < cs: + break + # END read stream + gelapsed_readchunks = time() - st + print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks) + + # compare + print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc) # END for each randomization factor -- cgit v1.2.3 From 1e2b46138ba58033738a24dadccc265748fce2ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:20:34 +0200 Subject: commit.create_from_tree now uses pure python implementation, fixed message parsing which truncated newlines although it was ilegitimate. Its up to the reader to truncate therse, nowhere in the git code I could find anyone adding newlines to commits where it is written Added performance tests for serialization, it does about 5k commits per second if writing to tmpfs --- test/git/performance/lib.py | 25 ++++++++++++++++++++++--- test/git/performance/test_commit.py | 36 +++++++++++++++++++++++++++++++++--- test/git/performance/test_odb.py | 6 +++--- test/git/performance/test_streams.py | 4 ++-- test/git/performance/test_utils.py | 4 ++-- 5 files changed, 62 insertions(+), 13 deletions(-) (limited to 'test/git/performance') diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py index 4b552b20..650bea82 100644 --- a/test/git/performance/lib.py +++ b/test/git/performance/lib.py @@ -1,6 +1,8 @@ """Contains library functions""" import os from test.testlib import * +import shutil +import tempfile from git import ( Repo @@ -25,7 +27,7 @@ def resolve_or_fail(env_var): #{ Base Classes -class TestBigRepoReadOnly(TestBase): +class TestBigRepoR(TestBase): """TestCase providing access to readonly 'big' repositories using the following member variables: @@ -40,7 +42,24 @@ class TestBigRepoReadOnly(TestBase): @classmethod def setUpAll(cls): - super(TestBigRepoReadOnly, cls).setUpAll() - cls.gitrepo = Repo(resolve_or_fail(k_env_git_repo)) + super(TestBigRepoR, cls).setUpAll() + cls.gitrorepo = Repo(resolve_or_fail(k_env_git_repo)) + +class TestBigRepoRW(TestBigRepoR): + """As above, but provides a big repository that we can write to. + + Provides ``self.gitrwrepo``""" + + @classmethod + def setUpAll(cls): + super(TestBigRepoRW, cls).setUpAll() + dirname = tempfile.mktemp() + os.mkdir(dirname) + cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True) + + @classmethod + def tearDownAll(cls): + shutil.rmtree(cls.gitrwrepo.working_tree_dir) + #} END base classes diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index b4a9d868..2398c93d 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -6,10 +6,12 @@ from lib import * from git import * +from test.git.test_commit import assert_commit_serialization +from cStringIO import StringIO from time import time import sys -class TestPerformance(TestBigRepoReadOnly): +class TestPerformance(TestBigRepoRW): # ref with about 100 commits in its history ref_100 = '0.1.6' @@ -48,7 +50,7 @@ class TestPerformance(TestBigRepoReadOnly): # bound to cat-file parsing performance nc = 0 st = time() - for c in self.gitrepo.commit(self.head_sha_2k).traverse(branch_first=False): + for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False): nc += 1 self._query_commit_info(c) # END for each traversed commit @@ -59,10 +61,38 @@ class TestPerformance(TestBigRepoReadOnly): # bound to stream parsing performance nc = 0 st = time() - for c in Commit.iter_items(self.gitrepo, self.head_sha_2k): + for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k): nc += 1 self._query_commit_info(c) # END for each traversed commit elapsed_time = time() - st print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time) + def test_commit_serialization(self): + assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) + + rwrepo = self.gitrwrepo + make_object = rwrepo.odb.to_object + # direct serialization - deserialization can be tested afterwards + # serialization is probably limited on IO + hc = rwrepo.commit(self.head_sha_2k) + + commits = list() + nc = 5000 + st = time() + for i in xrange(nc): + cm = Commit( rwrepo, Commit.NULL_HEX_SHA, hc.tree, + hc.author, hc.authored_date, hc.author_tz_offset, + hc.committer, hc.committed_date, hc.committer_tz_offset, + str(i), parents=hc.parents, encoding=hc.encoding) + + stream = StringIO() + cm._serialize(stream) + slen = stream.tell() + stream.seek(0) + + cm.sha = make_object(Commit.type, slen, stream) + # END commit creation + elapsed = time() - st + + print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed) diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py index 0ad2ce33..7b1ee838 100644 --- a/test/git/performance/test_odb.py +++ b/test/git/performance/test_odb.py @@ -5,18 +5,18 @@ import sys import stat from lib import ( - TestBigRepoReadOnly + TestBigRepoR ) -class TestObjDBPerformance(TestBigRepoReadOnly): +class TestObjDBPerformance(TestBigRepoR): def test_random_access(self): # GET COMMITS # TODO: use the actual db for this st = time() - root_commit = self.gitrepo.commit(self.head_sha_2k) + root_commit = self.gitrorepo.commit(self.head_sha_2k) commits = list(root_commit.traverse()) nc = len(commits) elapsed = time() - st diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index 6c2834b3..d31bee14 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -14,7 +14,7 @@ import subprocess from lib import ( - TestBigRepoReadOnly + TestBigRepoR ) @@ -32,7 +32,7 @@ def make_memory_file(size_in_bytes, randomize=False): return actual_size*4, StringIO(a.tostring()) -class TestObjDBPerformance(TestBigRepoReadOnly): +class TestObjDBPerformance(TestBigRepoR): large_data_size_bytes = 1000*1000*10 # some MiB should do it moderate_data_size_bytes = 1000*1000*1 # just 1 MiB diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py index 381d7c8b..47366d34 100644 --- a/test/git/performance/test_utils.py +++ b/test/git/performance/test_utils.py @@ -4,11 +4,11 @@ import sys import stat from lib import ( - TestBigRepoReadOnly + TestBigRepoR ) -class TestUtilPerformance(TestBigRepoReadOnly): +class TestUtilPerformance(TestBigRepoR): def test_access(self): # compare dict vs. slot access -- cgit v1.2.3 From 1906ee4df9ae4e734288c5203cf79894dff76cab Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:27:09 +0200 Subject: Fixed compatability issues with python 2.5, made sure all tests run --- test/git/performance/lib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'test/git/performance') diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py index 650bea82..7d2a9f4a 100644 --- a/test/git/performance/lib.py +++ b/test/git/performance/lib.py @@ -60,6 +60,6 @@ class TestBigRepoRW(TestBigRepoR): @classmethod def tearDownAll(cls): - shutil.rmtree(cls.gitrwrepo.working_tree_dir) + shutil.rmtree(cls.gitrwrepo.working_dir) #} END base classes -- cgit v1.2.3 From a1e80445ad5cb6da4c0070d7cb8af89da3b0803b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 14:41:15 +0200 Subject: initial version of new odb design to facilitate a channel based multi-threading implementation of all odb functions --- test/git/performance/test_commit.py | 2 +- test/git/performance/test_streams.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'test/git/performance') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index 2398c93d..bca3ad8b 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -72,7 +72,7 @@ class TestPerformance(TestBigRepoRW): assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True) rwrepo = self.gitrwrepo - make_object = rwrepo.odb.to_object + make_object = rwrepo.odb.store # direct serialization - deserialization can be tested afterwards # serialization is probably limited on IO hc = rwrepo.commit(self.head_sha_2k) diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index d31bee14..30fd8048 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -51,7 +51,7 @@ class TestObjDBPerformance(TestBigRepoR): # writing - due to the compression it will seem faster than it is st = time() - sha = ldb.to_object('blob', size, stream) + sha = ldb.store('blob', size, stream) elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(sha) @@ -63,7 +63,7 @@ class TestObjDBPerformance(TestBigRepoR): # reading all at once st = time() - type, size, shastream = ldb.object(sha) + type, size, shastream = ldbstreamsha) shadata = shastream.read() elapsed_readall = time() - st @@ -76,7 +76,7 @@ class TestObjDBPerformance(TestBigRepoR): cs = 512*1000 chunks = list() st = time() - type, size, shastream = ldb.object(sha) + type, size, shastream = ldbstreamsha) while True: data = shastream.read(cs) chunks.append(data) -- cgit v1.2.3 From e746f96bcc29238b79118123028ca170adc4ff0f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 17:22:08 +0200 Subject: Fixed implementation after design change to deal with it - all tests run, but next there will have to be more through testing --- test/git/performance/test_commit.py | 2 +- test/git/performance/test_streams.py | 12 ++++++------ test/git/performance/test_utils.py | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'test/git/performance') diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py index bca3ad8b..0571d0d9 100644 --- a/test/git/performance/test_commit.py +++ b/test/git/performance/test_commit.py @@ -91,7 +91,7 @@ class TestPerformance(TestBigRepoRW): slen = stream.tell() stream.seek(0) - cm.sha = make_object(Commit.type, slen, stream) + cm.sha = make_object(IStream(Commit.type, slen, stream)).sha # END commit creation elapsed = time() - st diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py index 30fd8048..01ec9fc4 100644 --- a/test/git/performance/test_streams.py +++ b/test/git/performance/test_streams.py @@ -1,7 +1,7 @@ """Performance data streaming performance""" from test.testlib import * -from git.odb.db import * +from git.odb import * from array import array from cStringIO import StringIO @@ -51,7 +51,7 @@ class TestObjDBPerformance(TestBigRepoR): # writing - due to the compression it will seem faster than it is st = time() - sha = ldb.store('blob', size, stream) + sha = ldb.store(IStream('blob', size, stream)).sha elapsed_add = time() - st assert ldb.has_object(sha) db_file = ldb.readable_db_object_path(sha) @@ -63,8 +63,8 @@ class TestObjDBPerformance(TestBigRepoR): # reading all at once st = time() - type, size, shastream = ldbstreamsha) - shadata = shastream.read() + ostream = ldb.stream(sha) + shadata = ostream.read() elapsed_readall = time() - st stream.seek(0) @@ -76,9 +76,9 @@ class TestObjDBPerformance(TestBigRepoR): cs = 512*1000 chunks = list() st = time() - type, size, shastream = ldbstreamsha) + ostream = ldb.stream(sha) while True: - data = shastream.read(cs) + data = ostream.read(cs) chunks.append(data) if len(data) < cs: break diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py index 47366d34..76adffec 100644 --- a/test/git/performance/test_utils.py +++ b/test/git/performance/test_utils.py @@ -42,3 +42,18 @@ class TestUtilPerformance(TestBigRepoR): elapsed = time() - st print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed) # END for each class type + + # check num of sequence-acceses + for cls in (list, tuple): + x = 10 + st = time() + s = cls(range(x)) + for i in xrange(ni): + s[0] + s[1] + s[2] + # END for + elapsed = time() - st + na = ni * 3 + print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed) + # END for each sequence -- cgit v1.2.3