Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Memory profiling hack #463

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 87 additions & 53 deletions pyopencl/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,14 @@ class _copy_queue: # noqa

_ARRAY_GET_SIZES_CACHE = {}

from dataclasses import dataclass

@dataclass(frozen=True, repr=True)
class MyFrameSummary:
filename: str
lineno: int
func_name: str


class Array:
"""A :class:`numpy.ndarray` work-alike that stores its data and performs
Expand Down Expand Up @@ -414,6 +422,14 @@ class Array:

__array_priority__ = 100

big_threshold = 30_000
total_arrays = 0
total_big_arrays = 0
max_big_arrays = 0
total_bytes = 0
alloc_dict = {}
alloc_number = 0

def __init__(self, cq, shape, dtype, order="C", allocator=None,
data=None, offset=0, strides=None, events=None, _flags=None):
# {{{ backward compatibility
Expand Down Expand Up @@ -516,6 +532,60 @@ def __init__(self, cq, shape, dtype, order="C", allocator=None,
self.size = size
alloc_nbytes = self.nbytes = self.dtype.itemsize * self.size

Array.total_arrays += 1
Array.total_bytes += alloc_nbytes
Array.alloc_number += 1

if alloc_nbytes > Array.big_threshold:
Array.total_big_arrays += 1
from builtins import max
Array.max_big_arrays = max(Array.max_big_arrays, Array.total_big_arrays)

if 0:
if Array.total_big_arrays >= 44:
new_dict = {}
for key, (alloc_id, aid, stack, size) in Array.alloc_dict.items():
new_dict.setdefault(stack, []).append((alloc_id, aid, size))
nallocs = 0
for stack, alloc_sizes in new_dict.items():
if any(frame.func_name in ["nodes", "normal"]
for frame in stack):
continue
s = [(alloc_id, aid, s) for alloc_id, aid, s in alloc_sizes if s>Array.big_threshold]
if s:
for frame in stack:
print(frame)
print(s)
nallocs += len(s)
print(f"{nallocs} live allocations that matter")
pu.db
import os
os._exit(1)

print(f"CREATING PYOPENCL ARRAY: {Array.total_bytes/1e9} ({Array.total_arrays}/{Array.max_big_arrays})")
from traceback import extract_stack

stack = tuple(MyFrameSummary(filename=fs.filename, lineno=fs.lineno, func_name=fs.name) for fs in extract_stack())
self.alloc_id = Array.alloc_number
#print(self.stack)
Array.alloc_dict[Array.alloc_number] = (self.alloc_id, id(self), stack, alloc_nbytes)

"""
if Array.total_arrays == 55:
# Combine old values
new_dict = {}
for key, value in Array.alloc_dict.items():
if value[0] in new_dict:
new_dict[value[0]] += value[1]
else:
new_dict[value[0]] = value[1]
for key, value in new_dict.items():
for entry in key:
print(entry)
print(value/1e9)
exit()
"""

self.allocator = allocator

if data is None:
Expand All @@ -534,6 +604,9 @@ def __init__(self, cq, shape, dtype, order="C", allocator=None,
self.base_data = cl.Buffer(
context, cl.mem_flags.READ_WRITE, alloc_nbytes)
else:
#print("Allocating {} GB".format(alloc_nbytes / 1e9))
#if alloc_nbytes / 1e9 > .4:
# import pudb; pu.db
self.base_data = self.allocator(alloc_nbytes)
else:
self.base_data = data
Expand All @@ -542,6 +615,19 @@ def __init__(self, cq, shape, dtype, order="C", allocator=None,
self.context = context
self._flags = _flags

def __del__(self):
Array.total_arrays -= 1
Array.total_bytes -= self.nbytes
if self.nbytes > Array.big_threshold:
Array.total_big_arrays -= 1
#print(f"DELETING PYOPENCL ARRAY: {Array.total_bytes/1e9} ({Array.total_arrays})")
#print(self.stack)
Array.alloc_dict.pop(self.alloc_id)
#print(Array.alloc_dict[self.stack])
#super().__del__()
#self.base_data.release()
#print("DELETING ARRAY")

@property
def ndim(self):
return len(self.shape)
Expand Down Expand Up @@ -2238,7 +2324,7 @@ class Info(Record):
# }}}


# {{{ take/put/concatenate/diff/(h?stack)
# {{{ take/put/concatenate/diff

@elwise_kernel_runner
def _take(result, ary, indices):
Expand Down Expand Up @@ -2579,58 +2665,6 @@ def hstack(arrays, queue=None):

return result


def stack(arrays, axis=0, queue=None):
"""
Join a sequence of arrays along a new axis.

:arg arrays: A sequnce of :class:`Array`.
:arg axis: Index of the dimension of the new axis in the result array.
Can be -1, for the new axis to be last dimension.

:returns: :class:`Array`
"""
if not arrays:
raise ValueError("need at least one array to stack")

input_shape = arrays[0].shape
input_ndim = arrays[0].ndim
axis = input_ndim if axis == -1 else axis

if queue is None:
for ary in arrays:
if ary.queue is not None:
queue = ary.queue
break

if not all(ary.shape == input_shape for ary in arrays[1:]):
raise ValueError("arrays must have the same shape")

if not (0 <= axis <= input_ndim):
raise ValueError("invalid axis")

if (axis == 0 and not all(ary.flags.c_contiguous
for ary in arrays)):
# pyopencl.Array.__setitem__ does not support non-contiguous assignments
raise NotImplementedError

if (axis == input_ndim and not all(ary.flags.f_contiguous
for ary in arrays)):
# pyopencl.Array.__setitem__ does not support non-contiguous assignments
raise NotImplementedError

result_shape = input_shape[:axis] + (len(arrays),) + input_shape[axis:]
result = empty(queue, result_shape, np.result_type(*(ary.dtype
for ary in arrays)),
# TODO: reconsider once arrays support non-contiguous
# assignments
order="C" if axis == 0 else "F")
for i, ary in enumerate(arrays):
idx = (slice(None),)*axis + (i,) + (slice(None),)*(input_ndim-axis)
result[idx] = ary

return result

# }}}


Expand Down