From 3323464f85b986cba23176271da92a478b33ab9c Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Thu, 10 Jun 2010 00:24:49 +0200
Subject: messy first version of a properly working depth-first graph method,
 which allows the pool to work as expected. Many more tests need to be added,
 and there still is a problem with shutdown as sometimes it won't kill all
 threads, mainly because the process came up with worker threads started,
 which cannot be

---
 lib/git/async/pool.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib/git/async/pool.py')

diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py
index 2ec18f1a..5ebc3655 100644
--- a/lib/git/async/pool.py
+++ b/lib/git/async/pool.py
@@ -182,14 +182,13 @@ class Pool(object):
 				dfirst_tasks = self._taskorder_cache[id(task)]
 			except KeyError:
 				# have to retrieve the list from the graph
-				dfirst_tasks = list()
-				self._tasks.visit_input_inclusive_depth_first(task, lambda n: dfirst_tasks.append(n))
+				dfirst_tasks = self._tasks.input_inclusive_dfirst_reversed(task)
 				self._taskorder_cache[id(task)] = dfirst_tasks
 			# END handle cached order retrieval
 		finally:
 			self._taskgraph_lock.release()
 		# END handle locking
-		
+		print dfirst_tasks
 		# check the min count on all involved tasks, and be sure that we don't 
 		# have any task which produces less than the maximum min-count of all tasks
 		# The actual_count is used when chunking tasks up for the queue, whereas 
@@ -309,6 +308,7 @@ class Pool(object):
 			threadsafe to optimize item throughput.
 		
 		:note: currently NOT threadsafe !"""
+		print "set_size", size
 		assert size > -1, "Size cannot be negative"
 		
 		# either start new threads, or kill existing ones.
-- 
cgit v1.2.3


From cfb278d74ad01f3f1edf5e0ad113974a9555038d Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Thu, 10 Jun 2010 10:14:32 +0200
Subject: InputChannelTask now has interface for properly handling the reading
 from the same and different pools

---
 lib/git/async/pool.py | 43 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

(limited to 'lib/git/async/pool.py')

diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py
index 5ebc3655..1b3c2748 100644
--- a/lib/git/async/pool.py
+++ b/lib/git/async/pool.py
@@ -10,7 +10,6 @@ from util import (
 		DummyLock
 	)
 
-from task import InputChannelTask
 from Queue import (
 	Queue, 
 	Empty
@@ -66,6 +65,24 @@ class RPoolChannel(CallbackRChannel):
 		if sys.getrefcount(self) < 6:
 			pool.remove_task(task, _from_destructor_=True)
 		# END handle refcount based removal of task
+
+	#{ Internal
+	def _read(self, count=0, block=True, timeout=None):
+		"""Direct read, bypassing the pool handling"""
+		return CallbackRChannel.read(self, count, block, timeout)
+	#} END internal
+	
+	#{ Interface
+	
+	def pool_ref(self):
+		""":return: reference to the pool we belong to"""
+		return self._pool_ref
+		
+	def task_ref(self):
+		""":return: reference to the task producing our items"""
+		return self._task_ref
+	
+	#} END interface 
 	
 	def read(self, count=0, block=True, timeout=None):
 		"""Read an item that was processed by one of our threads
@@ -188,7 +205,7 @@ class Pool(object):
 		finally:
 			self._taskgraph_lock.release()
 		# END handle locking
-		print dfirst_tasks
+		
 		# check the min count on all involved tasks, and be sure that we don't 
 		# have any task which produces less than the maximum min-count of all tasks
 		# The actual_count is used when chunking tasks up for the queue, whereas 
@@ -406,6 +423,18 @@ class Pool(object):
 		# create a write channel for it
 		wctype = WChannel
 		
+		# adjust the task with our pool ref, if it has the slot and is empty
+		# For now, we don't allow tasks to be used in multiple pools, except
+		# for by their channels
+		if hasattr(task, 'pool'):
+			their_pool = task.pool()
+			if their_pool is None:
+				task.set_pool(self)
+			elif their_pool is not self:
+				raise ValueError("Task %r is already registered to another pool" % task.id)
+			# END handle pool exclusivity
+		# END handle pool aware tasks
+		
 		self._taskgraph_lock.acquire()
 		try:
 			self._taskorder_cache.clear()
@@ -431,12 +460,18 @@ class Pool(object):
 		# END sync task addition
 		
 		# If the input channel is one of our read channels, we add the relation
-		if isinstance(task, InputChannelTask):
+		if hasattr(task, 'rchannel'):
 			ic = task.rchannel()
-			if isinstance(ic, RPoolChannel) and ic._pool_ref() is self:
+			if hasattr(ic, 'pool_ref') and ic.pool_ref()() is self:
 				self._taskgraph_lock.acquire()
 				try:
 					self._tasks.add_edge(ic._task_ref(), task)
+					
+					# additionally, bypass ourselves when reading from the 
+					# task, if possible
+					if hasattr(ic, '_read'):
+						task.set_read(ic._read)
+					# END handle read bypass
 				finally:
 					self._taskgraph_lock.release()
 				# END handle edge-adding
-- 
cgit v1.2.3


From 55e757928e493ce93056822d510482e4ffcaac2d Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Thu, 10 Jun 2010 14:39:57 +0200
Subject: channel: Changed design to be more logical - a channel now has any
 amount of readers and writers, a ready is not connected to its writer
 anymore. This changes the refcounting of course, which is why the
 auto-cleanup for the pool is currently broken. The benefit of this are faster
 writes to the channel, reading didn't improve, refcounts should be clearer
 now

---
 lib/git/async/pool.py | 76 +++++++++++++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 39 deletions(-)

(limited to 'lib/git/async/pool.py')

diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py
index 1b3c2748..68551ea3 100644
--- a/lib/git/async/pool.py
+++ b/lib/git/async/pool.py
@@ -18,27 +18,28 @@ from Queue import (
 from graph import Graph 
 from channel import (
 		mkchannel,
-		WChannel, 
-		SerialWChannel,
-		CallbackRChannel
+		Writer, 
+		Channel,
+		SerialChannel,
+		CallbackReader
 	)
 
 import sys
 import weakref
 from time import sleep
+import new
 
 
-class RPoolChannel(CallbackRChannel):
-	""" A read-only pool channel may not be wrapped or derived from, but it provides slots to call
-	before and after an item is to be read.
-	
+class PoolReader(CallbackReader):
+	"""A reader designed to read from channels which take part in pools
 	It acts like a handle to the underlying task in the pool."""
-	__slots__ = ('_task_ref', '_pool_ref')
+	__slots__ = ('_task_ref', '_pool_ref', '_read')
 	
-	def __init__(self, wchannel, task, pool):
-		CallbackRChannel.__init__(self, wchannel)
+	def __init__(self, channel, task, pool):
+		CallbackReader.__init__(self, channel)
 		self._task_ref = weakref.ref(task)
 		self._pool_ref = weakref.ref(pool)
+		self._read = new.instancemethod(CallbackReader.__dict__['read'], self, CallbackReader)
 		
 	def __del__(self):
 		"""Assures that our task will be deleted if we were the last reader"""
@@ -63,15 +64,9 @@ class RPoolChannel(CallbackRChannel):
 		# okay for now
 		# TODO: Fix this - private/public method
 		if sys.getrefcount(self) < 6:
-			pool.remove_task(task, _from_destructor_=True)
+			pool.remove_task(task)
 		# END handle refcount based removal of task
 
-	#{ Internal
-	def _read(self, count=0, block=True, timeout=None):
-		"""Direct read, bypassing the pool handling"""
-		return CallbackRChannel.read(self, count, block, timeout)
-	#} END internal
-	
 	#{ Interface
 	
 	def pool_ref(self):
@@ -118,7 +113,7 @@ class RPoolChannel(CallbackRChannel):
 		####### read data ########
 		##########################
 		# read actual items, tasks were setup to put their output into our channel ( as well )
-		items = CallbackRChannel.read(self, count, block, timeout)
+		items = CallbackReader.read(self, count, block, timeout)
 		##########################
 		
 		
@@ -262,21 +257,21 @@ class Pool(object):
 			# should make things execute faster. Putting the if statements 
 			# into the loop would be less code, but ... slower
 			# DEBUG
-			# print actual_count, numchunks, chunksize, remainder, task._out_wc.size()
+			# print actual_count, numchunks, chunksize, remainder, task._out_writer.size()
 			if self._num_workers:
 				# respect the chunk size, and split the task up if we want 
 				# to process too much. This can be defined per task
-				queue = self._queue
+				qput = self._queue
 				if numchunks > 1:
 					for i in xrange(numchunks):
-						queue.put((task.process, chunksize))
+						qput((task.process, chunksize))
 					# END for each chunk to put
 				else:
-					queue.put((task.process, chunksize))
+					qput((task.process, chunksize))
 				# END try efficient looping
 				
 				if remainder:
-					queue.put((task.process, remainder))
+					qput((task.process, remainder))
 				# END handle chunksize
 			else:
 				# no workers, so we have to do the work ourselves
@@ -295,16 +290,16 @@ class Pool(object):
 		# END for each task to process
 		
 		
-	def _remove_task_if_orphaned(self, task, from_destructor):
+	def _remove_task_if_orphaned(self, task):
 		"""Check the task, and delete it if it is orphaned"""
-		# 1 as its stored on the task, 1 for the getrefcount call
+		# 1 for writer on task, 1 for the getrefcount call + 1 for each other writer/reader
 		# If we are getting here from the destructor of an RPool channel, 
 		# its totally valid to virtually decrement the refcount by 1 as 
 		# we can expect it to drop once the destructor completes, which is when
 		# we finish all recursive calls
-		max_ref_count = 3 + from_destructor
-		if sys.getrefcount(task.wchannel()) < max_ref_count:
-			self.remove_task(task, from_destructor)
+		max_ref_count = 3
+		if sys.getrefcount(task.writer().channel) < max_ref_count:
+			self.remove_task(task)
 	#} END internal
 	
 	#{ Interface 
@@ -375,7 +370,7 @@ class Pool(object):
 		finally:
 			self._taskgraph_lock.release()
 		
-	def remove_task(self, task, _from_destructor_=False):
+	def remove_task(self, task):
 		"""Delete the task
 		Additionally we will remove orphaned tasks, which can be identified if their 
 		output channel is only held by themselves, so no one will ever consume 
@@ -410,7 +405,7 @@ class Pool(object):
 		# END locked deletion
 		
 		for t in in_tasks:
-			self._remove_task_if_orphaned(t, _from_destructor_)
+			self._remove_task_if_orphaned(t)
 		# END handle orphans recursively
 		
 		return self
@@ -421,7 +416,7 @@ class Pool(object):
 			the task will be considered orphaned and will be deleted on the next 
 			occasion."""
 		# create a write channel for it
-		wctype = WChannel
+		ctype = Channel
 		
 		# adjust the task with our pool ref, if it has the slot and is empty
 		# For now, we don't allow tasks to be used in multiple pools, except
@@ -442,26 +437,29 @@ class Pool(object):
 			
 			# Use a non-threadsafe queue
 			# This brings about 15% more performance, but sacrifices thread-safety
-			# when reading from multiple threads.
 			if self.size() == 0:
-				wctype = SerialWChannel
+				ctype = SerialChannel
 			# END improve locks
 			
 			# setup the tasks channel - respect the task creators choice though
 			# if it is set.
-			wc = task.wchannel()
+			wc = task.writer()
+			ch = None
 			if wc is None:
-				wc = wctype()
+				ch = ctype()
+				wc = Writer(ch)
+				task.set_writer(wc)
+			else:
+				ch = wc.channel
 			# END create write channel ifunset
-			rc = RPoolChannel(wc, task, self)
-			task.set_wchannel(wc)
+			rc = PoolReader(ch, task, self)
 		finally:
 			self._taskgraph_lock.release()
 		# END sync task addition
 		
 		# If the input channel is one of our read channels, we add the relation
-		if hasattr(task, 'rchannel'):
-			ic = task.rchannel()
+		if hasattr(task, 'reader'):
+			ic = task.reader()
 			if hasattr(ic, 'pool_ref') and ic.pool_ref()() is self:
 				self._taskgraph_lock.acquire()
 				try:
-- 
cgit v1.2.3


From 7c36f3648e39ace752c67c71867693ce1eee52a3 Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Thu, 10 Jun 2010 15:38:40 +0200
Subject: Now tracking the amount of concurrent writers to assure the channel
 is closed only when there is no one else writing to it. This assures that all
 tasks can continue working, and put their results accordingly. Shutdown is
 still not working correctly, but that should be solvable as well. Its still
 not perfect though ...

---
 lib/git/async/pool.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'lib/git/async/pool.py')

diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py
index 68551ea3..3fd99c7b 100644
--- a/lib/git/async/pool.py
+++ b/lib/git/async/pool.py
@@ -33,13 +33,12 @@ import new
 class PoolReader(CallbackReader):
 	"""A reader designed to read from channels which take part in pools
 	It acts like a handle to the underlying task in the pool."""
-	__slots__ = ('_task_ref', '_pool_ref', '_read')
+	__slots__ = ('_task_ref', '_pool_ref')
 	
 	def __init__(self, channel, task, pool):
 		CallbackReader.__init__(self, channel)
 		self._task_ref = weakref.ref(task)
 		self._pool_ref = weakref.ref(pool)
-		self._read = new.instancemethod(CallbackReader.__dict__['read'], self, CallbackReader)
 		
 	def __del__(self):
 		"""Assures that our task will be deleted if we were the last reader"""
@@ -62,11 +61,16 @@ class PoolReader(CallbackReader):
 		# it has no way of knowing that the write channel is about to diminsh.
 		# which is why we pass the info as a private kwarg  - not nice, but 
 		# okay for now
-		# TODO: Fix this - private/public method
 		if sys.getrefcount(self) < 6:
-			pool.remove_task(task)
+			pool.remove_task(task, _from_destructor_ = True)
 		# END handle refcount based removal of task
 
+	#{ Internal
+	def _read(self, count=0, block=True, timeout=None):
+		return CallbackReader.read(self, count, block, timeout)
+	
+	#} END internal
+
 	#{ Interface
 	
 	def pool_ref(self):
@@ -261,7 +265,7 @@ class Pool(object):
 			if self._num_workers:
 				# respect the chunk size, and split the task up if we want 
 				# to process too much. This can be defined per task
-				qput = self._queue
+				qput = self._queue.put
 				if numchunks > 1:
 					for i in xrange(numchunks):
 						qput((task.process, chunksize))
@@ -290,16 +294,16 @@ class Pool(object):
 		# END for each task to process
 		
 		
-	def _remove_task_if_orphaned(self, task):
+	def _remove_task_if_orphaned(self, task, from_destructor):
 		"""Check the task, and delete it if it is orphaned"""
 		# 1 for writer on task, 1 for the getrefcount call + 1 for each other writer/reader
 		# If we are getting here from the destructor of an RPool channel, 
 		# its totally valid to virtually decrement the refcount by 1 as 
 		# we can expect it to drop once the destructor completes, which is when
 		# we finish all recursive calls
-		max_ref_count = 3
+		max_ref_count = 3 + from_destructor
 		if sys.getrefcount(task.writer().channel) < max_ref_count:
-			self.remove_task(task)
+			self.remove_task(task, from_destructor)
 	#} END internal
 	
 	#{ Interface 
@@ -370,7 +374,7 @@ class Pool(object):
 		finally:
 			self._taskgraph_lock.release()
 		
-	def remove_task(self, task):
+	def remove_task(self, task, _from_destructor_ = False):
 		"""Delete the task
 		Additionally we will remove orphaned tasks, which can be identified if their 
 		output channel is only held by themselves, so no one will ever consume 
@@ -405,7 +409,7 @@ class Pool(object):
 		# END locked deletion
 		
 		for t in in_tasks:
-			self._remove_task_if_orphaned(t)
+			self._remove_task_if_orphaned(t, _from_destructor_)
 		# END handle orphans recursively
 		
 		return self
-- 
cgit v1.2.3


From fbe062bf6dacd3ad63dd827d898337fa542931ac Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Thu, 10 Jun 2010 23:55:50 +0200
Subject: Added dependency-task tests, and fixed plenty of ref-count related
 bugs, as well as concurrency issues. Now it works okay, but the
 thread-shutdown is still an issue, as it causes incorrect behaviour making
 the tests fail. Its good, as it hints at additional issues that need to be
 solved. There is just a little more left on the feature side, but its nearly
 there

---
 lib/git/async/pool.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/git/async/pool.py')

diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py
index 3fd99c7b..0aad90ae 100644
--- a/lib/git/async/pool.py
+++ b/lib/git/async/pool.py
@@ -402,7 +402,7 @@ class Pool(object):
 			
 			# keep its input nodes as we check whether they were orphaned
 			in_tasks = task.in_nodes
-			self._tasks.del_node(task)
+			self._tasks.remove_node(task)
 			self._taskorder_cache.clear()
 		finally:
 			self._taskgraph_lock.release()
-- 
cgit v1.2.3


From 6d1212e8c412b0b4802bc1080d38d54907db879d Mon Sep 17 00:00:00 2001
From: Sebastian Thiel <byronimo@gmail.com>
Date: Fri, 11 Jun 2010 11:52:01 +0200
Subject: IMPORTANT: sometimes, when notifying waiters by releasing their lock,
 the lock is not actually released or they are not actually notifyied, staying
 in a beautysleep. This glitch is probably caused by some detail not treated
 correctly in the thread python module, which is something we cannot fix. It
 works most of the time as expected though - maybe some cleanup is not done
 correctly which causes this

---
 lib/git/async/pool.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/git/async/pool.py')

diff --git a/lib/git/async/pool.py b/lib/git/async/pool.py
index 0aad90ae..dbc201a9 100644
--- a/lib/git/async/pool.py
+++ b/lib/git/async/pool.py
@@ -324,7 +324,6 @@ class Pool(object):
 			threadsafe to optimize item throughput.
 		
 		:note: currently NOT threadsafe !"""
-		print "set_size", size
 		assert size > -1, "Size cannot be negative"
 		
 		# either start new threads, or kill existing ones.
-- 
cgit v1.2.3