diff --git a/pyopencl/array.py b/pyopencl/array.py
index 97dc28dc4..ebb748bee 100644
--- a/pyopencl/array.py
+++ b/pyopencl/array.py
@@ -228,6 +228,14 @@ class _copy_queue:  # noqa
 
 _ARRAY_GET_SIZES_CACHE = {}
 
+from dataclasses import dataclass
+
+@dataclass(frozen=True, repr=True)
+class MyFrameSummary:
+    filename: str
+    lineno: int
+    func_name: str
+
 
 class Array:
     """A :class:`numpy.ndarray` work-alike that stores its data and performs
@@ -414,6 +422,14 @@ class Array:
 
     __array_priority__ = 100
 
+    big_threshold = 30_000
+    total_arrays = 0
+    total_big_arrays = 0
+    max_big_arrays = 0
+    total_bytes = 0
+    alloc_dict = {}
+    alloc_number = 0
+
     def __init__(self, cq, shape, dtype, order="C", allocator=None,
             data=None, offset=0, strides=None, events=None, _flags=None):
         # {{{ backward compatibility
@@ -516,6 +532,60 @@ def __init__(self, cq, shape, dtype, order="C", allocator=None,
         self.size = size
         alloc_nbytes = self.nbytes = self.dtype.itemsize * self.size
 
+        Array.total_arrays += 1
+        Array.total_bytes += alloc_nbytes
+        Array.alloc_number += 1
+
+        if alloc_nbytes > Array.big_threshold:
+            Array.total_big_arrays += 1
+            from builtins import max
+            Array.max_big_arrays = max(Array.max_big_arrays, Array.total_big_arrays)
+
+        if 0:
+            if Array.total_big_arrays >= 44:
+                new_dict = {}
+                for key, (alloc_id, aid, stack, size) in Array.alloc_dict.items():
+                    new_dict.setdefault(stack, []).append((alloc_id, aid, size))
+                nallocs = 0
+                for stack, alloc_sizes in new_dict.items():
+                    if any(frame.func_name in ["nodes", "normal"]
+                            for frame in stack):
+                        continue
+                    s = [(alloc_id, aid, s) for alloc_id, aid, s in alloc_sizes if s>Array.big_threshold]
+                    if s:
+                        for frame in stack:
+                            print(frame)
+                        print(s)
+                    nallocs += len(s)
+                print(f"{nallocs} live allocations that matter")
+                pu.db
+                import os
+                os._exit(1)
+
+        print(f"CREATING PYOPENCL ARRAY: {Array.total_bytes/1e9} ({Array.total_arrays}/{Array.max_big_arrays})")
+        from traceback import extract_stack
+
+        stack = tuple(MyFrameSummary(filename=fs.filename, lineno=fs.lineno, func_name=fs.name) for fs in extract_stack())
+        self.alloc_id = Array.alloc_number
+        #print(self.stack)
+        Array.alloc_dict[Array.alloc_number] = (self.alloc_id, id(self), stack, alloc_nbytes)
+
+        """
+        if Array.total_arrays == 55:
+            # Combine old values
+            new_dict = {}
+            for key, value in Array.alloc_dict.items():
+                if value[0] in new_dict:
+                    new_dict[value[0]] += value[1]
+                else:
+                    new_dict[value[0]] = value[1]
+            for key, value in new_dict.items():
+                for entry in key:
+                    print(entry)
+                print(value/1e9)
+            exit()
+        """
+
         self.allocator = allocator
 
         if data is None:
@@ -534,6 +604,9 @@ def __init__(self, cq, shape, dtype, order="C", allocator=None,
                     self.base_data = cl.Buffer(
                             context, cl.mem_flags.READ_WRITE, alloc_nbytes)
                 else:
+                    #print("Allocating {} GB".format(alloc_nbytes / 1e9))
+                    #if alloc_nbytes / 1e9 > .4:
+                    #    import pudb; pu.db
                     self.base_data = self.allocator(alloc_nbytes)
         else:
             self.base_data = data
@@ -542,6 +615,19 @@ def __init__(self, cq, shape, dtype, order="C", allocator=None,
         self.context = context
         self._flags = _flags
 
+    def __del__(self):
+        Array.total_arrays -= 1
+        Array.total_bytes -= self.nbytes
+        if self.nbytes > Array.big_threshold:
+            Array.total_big_arrays -= 1
+        #print(f"DELETING PYOPENCL ARRAY: {Array.total_bytes/1e9} ({Array.total_arrays})")
+        #print(self.stack)
+        Array.alloc_dict.pop(self.alloc_id)
+        #print(Array.alloc_dict[self.stack])
+        #super().__del__()
+        #self.base_data.release()
+        #print("DELETING ARRAY")
+
     @property
     def ndim(self):
         return len(self.shape)
@@ -2238,7 +2324,7 @@ class Info(Record):
 # }}}
 
 
-# {{{ take/put/concatenate/diff/(h?stack)
+# {{{ take/put/concatenate/diff
 
 @elwise_kernel_runner
 def _take(result, ary, indices):
@@ -2579,58 +2665,6 @@ def hstack(arrays, queue=None):
 
     return result
 
-
-def stack(arrays, axis=0, queue=None):
-    """
-    Join a sequence of arrays along a new axis.
-
-    :arg arrays: A sequnce of :class:`Array`.
-    :arg axis: Index of the dimension of the new axis in the result array.
-        Can be -1, for the new axis to be last dimension.
-
-    :returns: :class:`Array`
-    """
-    if not arrays:
-        raise ValueError("need at least one array to stack")
-
-    input_shape = arrays[0].shape
-    input_ndim = arrays[0].ndim
-    axis = input_ndim if axis == -1 else axis
-
-    if queue is None:
-        for ary in arrays:
-            if ary.queue is not None:
-                queue = ary.queue
-                break
-
-    if not all(ary.shape == input_shape for ary in arrays[1:]):
-        raise ValueError("arrays must have the same shape")
-
-    if not (0 <= axis <= input_ndim):
-        raise ValueError("invalid axis")
-
-    if (axis == 0 and not all(ary.flags.c_contiguous
-                              for ary in arrays)):
-        # pyopencl.Array.__setitem__ does not support non-contiguous assignments
-        raise NotImplementedError
-
-    if (axis == input_ndim and not all(ary.flags.f_contiguous
-                                       for ary in arrays)):
-        # pyopencl.Array.__setitem__ does not support non-contiguous assignments
-        raise NotImplementedError
-
-    result_shape = input_shape[:axis] + (len(arrays),) + input_shape[axis:]
-    result = empty(queue, result_shape, np.result_type(*(ary.dtype
-                                                         for ary in arrays)),
-                   # TODO: reconsider once arrays support non-contiguous
-                   # assignments
-                   order="C" if axis == 0 else "F")
-    for i, ary in enumerate(arrays):
-        idx = (slice(None),)*axis + (i,) + (slice(None),)*(input_ndim-axis)
-        result[idx] = ary
-
-    return result
-
 # }}}