Skip to content

Commit

Permalink
updated main
Browse files Browse the repository at this point in the history
  • Loading branch information
thongekchakrit committed Apr 12, 2023
1 parent 2adc525 commit 1ab741c
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 137 deletions.
Binary file modified __pycache__/plot.cpython-311.pyc
Binary file not shown.
93 changes: 53 additions & 40 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def recursion_batch(list_of_df, list_of_result, new_question, query_recommendati
print("Recursive batch: ", list_of_df[0])
print("Length: ", len(list_of_result))
print("Content: ", list_of_result)
if len(list_of_df) <= 3:
if len(list_of_df) <= 10:
if len(list_of_df) < 2:
dataframe_json = list_of_df[0].to_json()
prompt = f"You are an actuary, " \
Expand All @@ -420,35 +420,18 @@ def recursion_batch(list_of_df, list_of_result, new_question, query_recommendati
return "Sorry, we've disabled huge processing of large file insights for now..."

@st.cache_data
def recursive_summarizer_sub(list_of_response, new_question):
def recursive_summarizer_sub(list_of_response, list_of_result_response, new_question):

if len(list_of_response) < 2:
data = '\n'.join(list_of_response)
return data
list_of_result_response = list_of_result_response + list_of_response
return list_of_result_response
else:
# if len('\n'.join(list_of_response)) < 4000:
# data = '\n'.join(list_of_response)
# else:
pass

# data = '\n'.join(list_of_response)
# print("Question in recursive_summarizer_sub: ", new_question)
# print("Answer in data: ", data)
# prompt = f"Give a report on the passage to answer the question: {new_question}." \
# f"The passage: {data}"
# print(f"Prompt being asked: {prompt}")
# list_of_result = [gpt3.gpt_promt_davinci(prompt)]
# print(f"Answer: {list_of_result}")
# return '\n'.join(list_of_result)
# raise
# response_extract = list_of_response[0]
# prompt = f"You are an actuary" \
# f"The main goal is to answer {new_question}" \
# f"Please summarize the passage:" \
# f"{response_extract}"
# list_of_result = list_of_summarize_text + [gpt3.gpt_promt_davinci(prompt)]
# new_list = list_of_response[1:]
# return recursion_batch(new_list, list_of_result)
data = '\n'.join(list_of_response[0])
prompt = f"Given the question is {new_question}." \
f"Summarize the following text after: {data}"
list_of_result_response = list_of_result_response + [gpt3.gpt_promt_davinci(prompt)]
new_list = list_of_response[1:]
return recursive_summarizer_sub(new_list, list_of_result_response, new_question)

# def recursive_summarizer_main(response, list_of_response, new_question):
# if len(response) < 2:
Expand All @@ -458,22 +441,52 @@ def recursive_summarizer_sub(list_of_response, new_question):
# response = recursive_summarizer_sub(response, list_of_summarize_text, new_question)
# return recursive_summarizer_main(response, list_of_response, new_question)

@st.cache_data
def split_words_into_sublists(word_list, max_words_per_list):
"""
Joins words in a list together and splits them into sublists with a maximum word count
of `max_words_per_list`.
Args:
word_list (list): List of words.
max_words_per_list (int): Maximum word count per sublist.
Returns:
list: List of sublists containing words.
"""
# Join words into a single string
joined_words = ' '.join(word_list)

# Split words into sublists of max_words_per_list each
sublists = [joined_words[i:i + max_words_per_list] for i in range(0, len(joined_words), max_words_per_list)]

return sublists

@st.cache_data
def explain_result(query_recommendation, new_question, dataframe_new):

batch_size = round(len(dataframe_new.to_json())/ 3200 ) + (len(dataframe_new.to_json()) % 3200 > 0)
print(f"Batch size: {batch_size}")
list_of_df = np.array_split(dataframe_new, batch_size)
# sample data to first 10 dataframe to get result, to remove in prod
list_of_df = list_of_df[:2]
list_of_df = list_of_df[:3]
list_of_result = []

with st.spinner("Working on the analysis, please wait..."):
response = recursion_batch(list_of_df, list_of_result, new_question, query_recommendation)

if response:
list_of_result_response = []
st.success('Done!')
response = recursive_summarizer_sub(response, new_question)
if len(response) >= 2:
print("Processing sub explaination")
max_words_per_list = 3500
sublists = split_words_into_sublists(response, max_words_per_list)
response = recursive_summarizer_sub(sublists, list_of_result_response, new_question)
response = '\n'.join(response)
else:
print("Combining the response")
response = '\n'.join(response)

return response

Expand Down Expand Up @@ -752,17 +765,17 @@ def handle_layout_change(updated_layout):
# Create a text element and let the reader know the data is loading.
DATA, sample_data_overview = load_data(UPLOADED_FILE)

#####################################################
# with st.expander("See data explaination"):
# get_data_overview(sample_data_overview)
#
# # Inspecting raw data
# with st.expander("See raw data"):
# get_raw_table(DATA)
#
# # Inspecting summary statistics
# with st.expander("See summary statistics"):
# get_summary_statistics(DATA)
####################################################
with st.expander("See data explaination"):
get_data_overview(sample_data_overview)

# Inspecting raw data
with st.expander("See raw data"):
get_raw_table(DATA)

# Inspecting summary statistics
with st.expander("See summary statistics"):
get_summary_statistics(DATA)

data_schema = convert_datatype(DATA)
schema_data = str(data_schema.dtypes.to_dict().items())
Expand Down
211 changes: 114 additions & 97 deletions plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ def plot_metrics(dataframe, label, x_var):

def create_bar_chart(data, x_var, y_var, hue_var, label):

hue_var = hue_var.split(",")[0]
x_var = x_var.split(",")[0]
y_var = y_var.split(",")[0]

with mui.Typography:
html.div(
label,
Expand Down Expand Up @@ -172,106 +176,119 @@ def create_metric_chart(data, x_var, y_var, label):


def create_scatter_plot(data, x_var, y_var, hue_var, label):
with st.spinner("Cooking the scatter plot now..."):
print("Scatterplot: Starting data transformation")
data_chart = data.to_dict('records')
number_of_list = []
for x in data_chart:
number_of_list = number_of_list + [x[hue_var]]
number_of_list = len(list(set(number_of_list)))
list_of_dict = []
counter = 0

for x in data_chart:
if list_of_dict:
for y in list_of_dict:
if y['id'] == x[hue_var]:
y['data'].append({"x": x[x_var], "y": x[y_var]})
elif len(list(set([x for x in [k['id'] for k in list_of_dict]]))) < number_of_list:
list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
else:
list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
print(counter)
counter+=1
if hue_var:
hue_var = hue_var.split(",")[0]
if x_var:
x_var = x_var.split(",")[0]
if y_var:
y_var = y_var.split(",")[0]

with mui.Typography:
html.div(
label,
css={
"display": "block",
"margin-top": "1em",
"margin-bottom": "1em",
"margin-left": "2em",
"margin-right": "0em"
}
)
print("Scatterplot: Completed data transformation")
if hue_var:
with st.spinner("Cooking the scatter plot now..."):
print("Scatterplot: Starting data transformation")
data_chart = data.to_dict('records')
number_of_list = []
for x in data_chart:
number_of_list = number_of_list + [x[hue_var]]
number_of_list = len(list(set(number_of_list)))
list_of_dict = []
counter = 0

nivo.ScatterPlot(
data=list_of_dict,
layout="vertical",
xFormat=">-.2f",
margin={"top": 20, "right": 130, "bottom": 100, "left": 60},
padding={0.4},
xScale={"type": 'linear', "min": 0, "max": 'auto'},
yScale={"type": 'linear', "min": 0, "max": 'auto'},
blendMode="multiply",
indexScale={"type": 'band', "round": "true"},
colors={"scheme": 'pastel1'},
borderColor={
"from": 'color',
"modifiers": [
[
'darker',
1.6
for x in data_chart:
if list_of_dict:
for y in list_of_dict:
if y['id'] == x[hue_var]:
y['data'].append({"x": x[x_var], "y": x[y_var]})
elif len(list(set([x for x in [k['id'] for k in list_of_dict]]))) < number_of_list:
list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
else:
list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
print(counter)
counter+=1

with mui.Typography:
html.div(
label,
css={
"display": "block",
"margin-top": "1em",
"margin-bottom": "1em",
"margin-left": "2em",
"margin-right": "0em"
}
)
print("Scatterplot: Completed data transformation")

nivo.ScatterPlot(
data=list_of_dict,
layout="vertical",
xFormat=">-.2f",
margin={"top": 20, "right": 130, "bottom": 100, "left": 60},
padding={0.4},
xScale={"type": 'linear', "min": 0, "max": 'auto'},
yScale={"type": 'linear', "min": 0, "max": 'auto'},
blendMode="multiply",
indexScale={"type": 'band', "round": "true"},
colors={"scheme": 'pastel1'},
borderColor={
"from": 'color',
"modifiers": [
[
'darker',
1.6
]
]
]
},
axisBottom={
'orient': 'bottom',
"tickSize": 5,
"tickPadding": 5,
"tickRotation": 0,
"legend": str(x_var),
"legendPosition": 'middle',
"legendOffset": 32
},
axisLeft={
'orient': 'left',
"tickSize": 5,
"tickPadding": 5,
"tickRotation": 0,
"legend": str(y_var),
"legendPosition": 'middle',
"legendOffset": -40
},
legends=[
{
"dataFrom": 'keys',
"anchor": 'top-right',
"direction": 'column',
"margin": { "left": 10 },
"justify": "false",
"translateX": 120,
"translateY": 0,
"itemsSpacing": 2,
"itemWidth": 100,
"itemHeight": 20,
"itemDirection": 'left-to-right',
"itemOpacity": 0.85,
"symbolSize": 20,
"effects": [
{
"on": 'hover',
"style": {
"itemOpacity": 1
},
axisBottom={
'orient': 'bottom',
"tickSize": 5,
"tickPadding": 5,
"tickRotation": 0,
"legend": str(x_var),
"legendPosition": 'middle',
"legendOffset": 32
},
axisLeft={
'orient': 'left',
"tickSize": 5,
"tickPadding": 5,
"tickRotation": 0,
"legend": str(y_var),
"legendPosition": 'middle',
"legendOffset": -40
},
legends=[
{
"dataFrom": 'keys',
"anchor": 'top-right',
"direction": 'column',
"margin": { "left": 10 },
"justify": "false",
"legend": str(hue_var),
"translateX": 120,
"translateY": 0,
"itemsSpacing": 2,
"itemWidth": 100,
"itemHeight": 20,
"itemDirection": 'left-to-right',
"itemOpacity": 0.85,
"symbolSize": 20,
"effects": [
{
"on": 'hover',
"style": {
"itemOpacity": 1
}
}
}
]
}
],
role="application",
ariaLabel=label
)
print("Scatterplot: Plotted")
]
}
],
role="application",
ariaLabel=label
)
else:
with st.spinner("Cooking the scatter plot now..."):
st.error("Missing hue for scatter plot")
print("Scatterplot: Plotted")

0 comments on commit 1ab741c

Please sign in to comment.