Skip to content
This repository has been archived by the owner on Dec 11, 2023. It is now read-only.

index_as_columns option in ctable.fromdataframe() #345

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions bcolz/ctable.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,13 +644,17 @@ def copy(self, **kwargs):
return ccopy

@staticmethod
def fromdataframe(df, **kwargs):
def fromdataframe(df, index_as_columns=False, **kwargs):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although backward incompatible, I'd say that setting index_as_columns to True as default makes more sense.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had the backward compatibility in mind, but I do agree that index_as_columns to True as a default makes more sense.

"""Return a ctable object out of a pandas dataframe.

Parameters
----------
df : DataFrame
A pandas dataframe.
index_as_columns : boolean
If True, the index values will be converted to columns and the
names of the columns will be stored in the attribute
'dataframe_index_columns'.
kwargs : list of parameters or dictionary
Any parameter supported by the ctable constructor.

Expand Down Expand Up @@ -706,9 +710,38 @@ def fromdataframe(df, **kwargs):
cols.append(col)
else:
cols.append(vals)

# Convert index levels into columns
if index_as_columns:
index_names = []
for level in range(df.index.nlevels):
vals = df.index.get_level_values(level).values
if vals.dtype == np.object:
inferred_type = pd.lib.infer_dtype(vals)
if inferred_type == 'unicode':
maxitemsize = pd.lib.max_len_string_array(vals)
col = bcolz.carray(vals,
dtype='U%d' % maxitemsize,
**ckwargs)
elif inferred_type == 'string':
maxitemsize = pd.lib.max_len_string_array(vals)
# In Python 3 strings should be represented as Unicode
dtype = "U" if sys.version_info >= (3, 0) else "S"
col = bcolz.carray(vals, dtype='%s%d' %
(dtype, maxitemsize), **ckwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that 'unicode' and 'string' branches can be fused a bit more.

else:
col = vals
cols.append(col)
else:
cols.append(vals)
new_index_name = 'index_column_'
new_index_name += str(level)
new_index_name += df.index.get_level_values(level).name
names.append(new_index_name)
index_names.append(new_index_name)
# Create the ctable
ct = ctable(cols, names, **kwargs)
if index_as_columns:
ct.attrs.attrs['dataframe_index_columns'] = index_names
return ct

@staticmethod
Expand Down Expand Up @@ -805,6 +838,12 @@ def todataframe(self, columns=None, orient='columns'):
df = pd.DataFrame.from_items(
((key, self[key][:]) for key in keys),
columns=columns, orient=orient)
# Check for dataframe_index_columns attribute and set index
if self.attrs.attrs.get('dataframe_index_columns'):
index_names = self.attrs.attrs.get('dataframe_index_columns')
original_index_names = [i[14:] for i in index_names]
df.set_index(index_names, inplace=True)
df.index.names = original_index_names
return df

def tohdf5(self, filepath, nodepath='/ctable', mode='w',
Expand Down