diff --git a/app.py b/app.py index 60b773d..246ce38 100644 --- a/app.py +++ b/app.py @@ -21,7 +21,7 @@ WANDB_PROJECT = "opentensor-dev/alpha-validators" PROJECT_URL = f'https://wandb.ai/{WANDB_PROJECT}/table?workspace=default' -MAX_RECENT_RUNS = 100 +MAX_RECENT_RUNS = 300 DEFAULT_FILTERS = {}#{"tags": {"$in": [f'1.1.{i}' for i in range(10)]}} DEFAULT_SELECTED_HOTKEYS = None DEFAULT_TASK = 'qa' @@ -56,13 +56,6 @@ # add vertical space st.markdown('#') - -runid_c1, runid_c2 = st.columns([3, 1]) -# make multiselect for run_ids with label on same line -run_ids = runid_c1.multiselect('Select one or more weights and biases run by id:', df_runs['run_id'], key='run_id', default=df_runs['run_id'][:3], help=f'Select one or more runs to analyze. You can find the raw data for these runs [here]({PROJECT_URL}).') -n_runs = len(run_ids) -df_runs_subset = df_runs[df_runs['run_id'].isin(run_ids)] - st.markdown('#') tab1, tab2, tab3, tab4 = st.tabs(["Run Data", "UID Health", "Completions", "Prompt-based scoring"]) @@ -72,7 +65,13 @@ st.markdown('#') st.subheader(":violet[Run] Data") - with st.expander(f'Show :violet[all] wandb runs'): + + # make multiselect for run_ids with label on same line + run_ids = st.multiselect('Select one or more weights and biases run by id:', df_runs['run_id'], key='run_id', default=df_runs['run_id'][:3], help=f'Select one or more runs to analyze. You can find the raw data for these runs [here]({PROJECT_URL}).') + n_runs = len(run_ids) + df_runs_subset = df_runs[df_runs['run_id'].isin(run_ids)] + + with st.expander(f'Select from :violet[all] wandb runs'): edited_df = st.data_editor( df_runs.assign(Select=False).set_index('Select'), diff --git a/opendashboards/assets/metric.py b/opendashboards/assets/metric.py index 5b52602..64c11f5 100644 --- a/opendashboards/assets/metric.py +++ b/opendashboards/assets/metric.py @@ -28,29 +28,28 @@ def wandb(df_runs): col2.metric('Hotkeys', fmt(df_runs.hotkey.nunique()), delta=fmt(df_runs.hotkey.nunique()-df_runs_old.hotkey.nunique())+' (24h)') col3.metric('Events', fmt(df_runs.num_steps.sum()), delta=fmt(df_runs.num_steps.sum()-df_runs_old.num_steps.sum())+' (24h)') col4.metric('Completions', fmt(df_runs.num_completions.sum()), delta=fmt(df_runs.num_completions.sum()-df_runs_old.num_completions.sum())+' (24h)') - + st.markdown('----') @st.cache_data -def runs(df_long): - +def runs(df_long, full=False): + col1, col2, col3, col4 = st.columns(4) - print(df_long.columns) # Convert to appropriate units e.g. 1.2k instead of 1200.c col1.metric('Runs', fmt(df_long.run_id.nunique())) col2.metric('Hotkeys', fmt(df_long.hotkey.nunique())) col3.metric('Events', fmt(df_long.groupby(['run_id','_step']).ngroups)) col4.metric('Completions', fmt(df_long.shape[0])) - - aggs = df_long.groupby('task').agg({'uids': 'nunique', 'completions': 'nunique'}) - print(aggs) - for i,c in enumerate(st.columns(len(aggs))): - name = aggs.index[i].title() - uid_unique, comp_unique = aggs.iloc[i] - c.metric(label=f'{name} UIDs', value=uid_unique) - c.metric(label=f'{name} Completions', value=comp_unique) + + if full: + aggs = df_long.groupby('task').agg({'uids': 'nunique', 'completions': 'nunique'}) + for i,c in enumerate(st.columns(len(aggs))): + name = aggs.index[i].title() + uid_unique, comp_unique = aggs.iloc[i] + c.metric(label=f'{name} UIDs', value=uid_unique) + c.metric(label=f'{name} Completions', value=comp_unique) st.markdown('----') @@ -76,7 +75,7 @@ def uids(df_long, src, uids=None): help='Number of unique completions divided by total number of events' ) # uniqueness can be expressed as the average number of unique completions per uid divided by all unique completions - # uniqueness is the shared completions between selected uids + # uniqueness is the shared completions between selected uids col3.metric( label="Uniqueness %",