From 155158e1410ff036812a87975cce6cb91aa8280e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 6 Jun 2011 17:15:12 +0200 Subject: Added PackedDB test with generalized type to allows other implementations to be tested as well at some point --- git/test/performance/db/test_packedodb_pure.py | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 git/test/performance/db/test_packedodb_pure.py (limited to 'git/test/performance/db/test_packedodb_pure.py') diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py new file mode 100644 index 00000000..7b9f2930 --- /dev/null +++ b/git/test/performance/db/test_packedodb_pure.py @@ -0,0 +1,7 @@ +from packedodb_impl import TestPurePackedODBPerformanceBase +from git.db.py.pack import PurePackedODB + +class TestPurePackedODB(TestPurePackedODBPerformanceBase): + #{ Configuration + PackedODBCls = PurePackedODB + #} END configuration -- cgit v1.2.3 From ce79835556c195ed6e638a33280f729537dcee54 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 6 Jun 2011 20:12:48 +0200 Subject: Fixed performance pack writing tests. As they are actually depent on the database (as streams have to be decompressed, it should be redesigned to have multiple database implementations) --- git/test/performance/db/test_packedodb_pure.py | 76 ++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) (limited to 'git/test/performance/db/test_packedodb_pure.py') diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py index 7b9f2930..f254c518 100644 --- a/git/test/performance/db/test_packedodb_pure.py +++ b/git/test/performance/db/test_packedodb_pure.py @@ -1,7 +1,83 @@ +# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors +# +# This module is part of GitDB and is released under +# the New BSD License: http://www.opensource.org/licenses/bsd-license.php from packedodb_impl import TestPurePackedODBPerformanceBase from git.db.py.pack import PurePackedODB +from git.stream import NullStream + +from git.pack import PackEntity + +import os +import sys + +from time import time +from nose import SkipTest + + +class CountedNullStream(NullStream): + __slots__ = '_bw' + def __init__(self): + self._bw = 0 + + def bytes_written(self): + return self._bw + + def write(self, d): + self._bw += NullStream.write(self, d) + + class TestPurePackedODB(TestPurePackedODBPerformanceBase): #{ Configuration PackedODBCls = PurePackedODB #} END configuration + + def test_pack_writing(self): + # see how fast we can write a pack from object streams. + # This will not be fast, as we take time for decompressing the streams as well + ostream = CountedNullStream() + pdb = self.ropdb + + ni = 5000 + count = 0 + total_size = 0 + st = time() + objs = list() + for sha in pdb.sha_iter(): + count += 1 + objs.append(pdb.stream(sha)) + if count == ni: + break + #END gather objects for pack-writing + elapsed = time() - st + print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed) + + st = time() + PackEntity.write_pack(objs, ostream.write) + elapsed = time() - st + total_kb = ostream.bytes_written() / 1000 + print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed) + + + def test_stream_reading(self): + raise SkipTest("This test was only used for --with-profile runs") + pdb = self.ropdb + + # streaming only, meant for --with-profile runs + ni = 5000 + count = 0 + pdb_stream = pdb.stream + total_size = 0 + st = time() + for sha in pdb.sha_iter(): + if count == ni: + break + stream = pdb_stream(sha) + stream.read() + total_size += stream.size + count += 1 + elapsed = time() - st + total_kib = total_size / 1000 + print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed) + -- cgit v1.2.3 From 3c12de3762abcde33dd27151b49589da76c2132f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 6 Jun 2011 20:46:53 +0200 Subject: Improved pack writing test to show that the pack generation can be lightning fast with nearly now overhead if the data streams in fast enough (~30 MB/s when writing a pack). This shows that there is huge potential for sending packs, considering that we are actually recompressing them (without deltification). To be faster in future, we could probably just send ref-deltas or full objects as found in the pack without doing any recompression. --- git/test/performance/db/test_packedodb_pure.py | 49 +++++++++++++++----------- 1 file changed, 28 insertions(+), 21 deletions(-) (limited to 'git/test/performance/db/test_packedodb_pure.py') diff --git a/git/test/performance/db/test_packedodb_pure.py b/git/test/performance/db/test_packedodb_pure.py index f254c518..4ea09779 100644 --- a/git/test/performance/db/test_packedodb_pure.py +++ b/git/test/performance/db/test_packedodb_pure.py @@ -33,31 +33,38 @@ class TestPurePackedODB(TestPurePackedODBPerformanceBase): PackedODBCls = PurePackedODB #} END configuration + def test_pack_writing_note(self): + sys.stderr.write("test_pack_writing should be adjusted to support different databases to read from - see test for more info") + raise SkipTest() + def test_pack_writing(self): # see how fast we can write a pack from object streams. # This will not be fast, as we take time for decompressing the streams as well + # For now we test the fast streaming and slow streaming versions manually ostream = CountedNullStream() - pdb = self.ropdb - - ni = 5000 - count = 0 - total_size = 0 - st = time() - objs = list() - for sha in pdb.sha_iter(): - count += 1 - objs.append(pdb.stream(sha)) - if count == ni: - break - #END gather objects for pack-writing - elapsed = time() - st - print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed) - - st = time() - PackEntity.write_pack(objs, ostream.write) - elapsed = time() - st - total_kb = ostream.bytes_written() / 1000 - print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed) + # NOTE: We use the same repo twice to see whether OS caching helps + for rorepo in (self.rorepo, self.rorepo, self.ropdb): + + ni = 5000 + count = 0 + total_size = 0 + st = time() + objs = list() + for sha in rorepo.sha_iter(): + count += 1 + objs.append(rorepo.stream(sha)) + if count == ni: + break + #END gather objects for pack-writing + elapsed = time() - st + print >> sys.stderr, "PDB Streaming: Got %i streams from %s by sha in in %f s ( %f streams/s )" % (ni, rorepo.__class__.__name__, elapsed, ni / elapsed) + + st = time() + PackEntity.write_pack(objs, ostream.write) + elapsed = time() - st + total_kb = ostream.bytes_written() / 1000 + print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed) + #END for each rorepo def test_stream_reading(self): -- cgit v1.2.3