updated main

thongekchakrit · Apr 12, 2023 · 1ab741c · 1ab741c
1 parent 2adc525
commit 1ab741c
Show file tree

Hide file tree

Showing 3 changed files with 167 additions and 137 deletions.
diff --git a/__pycache__/plot.cpython-311.pyc b/__pycache__/plot.cpython-311.pyc
diff --git a/main.py b/main.py
@@ -395,7 +395,7 @@ def recursion_batch(list_of_df, list_of_result, new_question, query_recommendati
     print("Recursive batch: ", list_of_df[0])
     print("Length: ", len(list_of_result))
     print("Content: ", list_of_result)
-    if len(list_of_df) <= 3:
+    if len(list_of_df) <= 10:
         if len(list_of_df) < 2:
             dataframe_json = list_of_df[0].to_json()
             prompt = f"You are an actuary, " \
@@ -420,35 +420,18 @@ def recursion_batch(list_of_df, list_of_result, new_question, query_recommendati
         return "Sorry, we've disabled huge processing of large file insights for now..."
 
 @st.cache_data
-def recursive_summarizer_sub(list_of_response, new_question):
+def recursive_summarizer_sub(list_of_response, list_of_result_response, new_question):
 
     if len(list_of_response) < 2:
-        data = '\n'.join(list_of_response)
-        return data
+        list_of_result_response = list_of_result_response + list_of_response
+        return list_of_result_response
     else:
-        # if len('\n'.join(list_of_response)) < 4000:
-        #     data = '\n'.join(list_of_response)
-        # else:
-        pass
-
-        # data = '\n'.join(list_of_response)
-        # print("Question in recursive_summarizer_sub: ", new_question)
-        # print("Answer in data: ", data)
-        # prompt = f"Give a report on the passage to answer the question: {new_question}." \
-        #          f"The passage: {data}"
-        # print(f"Prompt being asked: {prompt}")
-        # list_of_result = [gpt3.gpt_promt_davinci(prompt)]
-        # print(f"Answer: {list_of_result}")
-        # return '\n'.join(list_of_result)
-        #     raise
-    #     response_extract = list_of_response[0]
-    #     prompt = f"You are an actuary" \
-    #              f"The main goal is to answer {new_question}" \
-    #              f"Please summarize the passage:" \
-    #              f"{response_extract}"
-    #     list_of_result = list_of_summarize_text + [gpt3.gpt_promt_davinci(prompt)]
-    #     new_list = list_of_response[1:]
-    #     return recursion_batch(new_list, list_of_result)
+        data = '\n'.join(list_of_response[0])
+        prompt = f"Given the question is {new_question}." \
+                 f"Summarize the following text after: {data}"
+        list_of_result_response = list_of_result_response + [gpt3.gpt_promt_davinci(prompt)]
+        new_list = list_of_response[1:]
+        return recursive_summarizer_sub(new_list, list_of_result_response, new_question)
 
 # def recursive_summarizer_main(response, list_of_response, new_question):
 #     if len(response) < 2:
@@ -458,22 +441,52 @@ def recursive_summarizer_sub(list_of_response, new_question):
 #         response = recursive_summarizer_sub(response, list_of_summarize_text, new_question)
 #         return recursive_summarizer_main(response, list_of_response, new_question)
 
+@st.cache_data
+def split_words_into_sublists(word_list, max_words_per_list):
+    """
+    Joins words in a list together and splits them into sublists with a maximum word count
+    of `max_words_per_list`.
+
+    Args:
+        word_list (list): List of words.
+        max_words_per_list (int): Maximum word count per sublist.
+
+    Returns:
+        list: List of sublists containing words.
+    """
+    # Join words into a single string
+    joined_words = ' '.join(word_list)
+
+    # Split words into sublists of max_words_per_list each
+    sublists = [joined_words[i:i + max_words_per_list] for i in range(0, len(joined_words), max_words_per_list)]
+
+    return sublists
+
 @st.cache_data
 def explain_result(query_recommendation, new_question, dataframe_new):
 
     batch_size = round(len(dataframe_new.to_json())/ 3200 ) + (len(dataframe_new.to_json()) % 3200 > 0)
     print(f"Batch size: {batch_size}")
     list_of_df = np.array_split(dataframe_new, batch_size)
     # sample data to first 10 dataframe to get result, to remove in prod
-    list_of_df = list_of_df[:2]
+    list_of_df = list_of_df[:3]
     list_of_result = []
 
     with st.spinner("Working on the analysis, please wait..."):
         response = recursion_batch(list_of_df, list_of_result, new_question, query_recommendation)
 
     if response:
+        list_of_result_response = []
         st.success('Done!')
-        response = recursive_summarizer_sub(response, new_question)
+        if len(response) >= 2:
+            print("Processing sub explaination")
+            max_words_per_list = 3500
+            sublists = split_words_into_sublists(response, max_words_per_list)
+            response = recursive_summarizer_sub(sublists, list_of_result_response, new_question)
+            response = '\n'.join(response)
+        else:
+            print("Combining the response")
+            response = '\n'.join(response)
 
     return response
 
@@ -752,17 +765,17 @@ def handle_layout_change(updated_layout):
     # Create a text element and let the reader know the data is loading.
     DATA, sample_data_overview = load_data(UPLOADED_FILE)
 
-    #####################################################
-    # with st.expander("See data explaination"):
-    #     get_data_overview(sample_data_overview)
-    #
-    # # Inspecting raw data
-    # with st.expander("See raw data"):
-    #     get_raw_table(DATA)
-    #
-    # # Inspecting summary statistics
-    # with st.expander("See summary statistics"):
-    #     get_summary_statistics(DATA)
+    ####################################################
+    with st.expander("See data explaination"):
+        get_data_overview(sample_data_overview)
+
+    # Inspecting raw data
+    with st.expander("See raw data"):
+        get_raw_table(DATA)
+
+    # Inspecting summary statistics
+    with st.expander("See summary statistics"):
+        get_summary_statistics(DATA)
 
     data_schema = convert_datatype(DATA)
     schema_data = str(data_schema.dtypes.to_dict().items())

diff --git a/plot.py b/plot.py
@@ -32,6 +32,10 @@ def plot_metrics(dataframe, label, x_var):
 
 def create_bar_chart(data, x_var, y_var, hue_var, label):
 
+    hue_var = hue_var.split(",")[0]
+    x_var = x_var.split(",")[0]
+    y_var = y_var.split(",")[0]
+
     with mui.Typography:
         html.div(
             label,
@@ -172,106 +176,119 @@ def create_metric_chart(data, x_var, y_var, label):
 
 
 def create_scatter_plot(data, x_var, y_var, hue_var, label):
-    with st.spinner("Cooking the scatter plot now..."):
-        print("Scatterplot: Starting data transformation")
-        data_chart = data.to_dict('records')
-        number_of_list = []
-        for x in data_chart:
-            number_of_list = number_of_list + [x[hue_var]]
-        number_of_list = len(list(set(number_of_list)))
-        list_of_dict = []
-        counter = 0
 
-        for x in data_chart:
-            if list_of_dict:
-                for y in list_of_dict:
-                    if y['id'] == x[hue_var]:
-                        y['data'].append({"x": x[x_var], "y": x[y_var]})
-                    elif len(list(set([x for x in [k['id'] for k in list_of_dict]]))) < number_of_list:
-                        list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
-            else:
-                list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
-            print(counter)
-            counter+=1
+    if hue_var:
+        hue_var = hue_var.split(",")[0]
+    if x_var:
+        x_var = x_var.split(",")[0]
+    if y_var:
+        y_var = y_var.split(",")[0]
 
-        with mui.Typography:
-            html.div(
-                label,
-                css={
-                    "display": "block",
-                    "margin-top": "1em",
-                    "margin-bottom": "1em",
-                    "margin-left": "2em",
-                    "margin-right": "0em"
-                }
-            )
-        print("Scatterplot: Completed data transformation")
+    if hue_var:
+        with st.spinner("Cooking the scatter plot now..."):
+            print("Scatterplot: Starting data transformation")
+            data_chart = data.to_dict('records')
+            number_of_list = []
+            for x in data_chart:
+                number_of_list = number_of_list + [x[hue_var]]
+            number_of_list = len(list(set(number_of_list)))
+            list_of_dict = []
+            counter = 0
 
-        nivo.ScatterPlot(
-            data=list_of_dict,
-            layout="vertical",
-            xFormat=">-.2f",
-            margin={"top": 20, "right": 130, "bottom": 100, "left": 60},
-            padding={0.4},
-            xScale={"type": 'linear', "min": 0, "max": 'auto'},
-            yScale={"type": 'linear', "min": 0, "max": 'auto'},
-            blendMode="multiply",
-            indexScale={"type": 'band', "round": "true"},
-            colors={"scheme": 'pastel1'},
-            borderColor={
-                "from": 'color',
-                "modifiers": [
-                    [
-                        'darker',
-                        1.6
+            for x in data_chart:
+                if list_of_dict:
+                    for y in list_of_dict:
+                        if y['id'] == x[hue_var]:
+                            y['data'].append({"x": x[x_var], "y": x[y_var]})
+                        elif len(list(set([x for x in [k['id'] for k in list_of_dict]]))) < number_of_list:
+                            list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
+                else:
+                    list_of_dict = list_of_dict + [{'id': x[hue_var], 'data' : [{"x": x[x_var], "y": x[y_var]}]}]
+                print(counter)
+                counter+=1
+
+            with mui.Typography:
+                html.div(
+                    label,
+                    css={
+                        "display": "block",
+                        "margin-top": "1em",
+                        "margin-bottom": "1em",
+                        "margin-left": "2em",
+                        "margin-right": "0em"
+                    }
+                )
+            print("Scatterplot: Completed data transformation")
+
+            nivo.ScatterPlot(
+                data=list_of_dict,
+                layout="vertical",
+                xFormat=">-.2f",
+                margin={"top": 20, "right": 130, "bottom": 100, "left": 60},
+                padding={0.4},
+                xScale={"type": 'linear', "min": 0, "max": 'auto'},
+                yScale={"type": 'linear', "min": 0, "max": 'auto'},
+                blendMode="multiply",
+                indexScale={"type": 'band', "round": "true"},
+                colors={"scheme": 'pastel1'},
+                borderColor={
+                    "from": 'color',
+                    "modifiers": [
+                        [
+                            'darker',
+                            1.6
+                        ]
                     ]
-                ]
-            },
-            axisBottom={
-                'orient': 'bottom',
-                "tickSize": 5,
-                "tickPadding": 5,
-                "tickRotation": 0,
-                "legend": str(x_var),
-                "legendPosition": 'middle',
-                "legendOffset": 32
-            },
-            axisLeft={
-                'orient': 'left',
-                "tickSize": 5,
-                "tickPadding": 5,
-                "tickRotation": 0,
-                "legend": str(y_var),
-                "legendPosition": 'middle',
-                "legendOffset": -40
-            },
-            legends=[
-                {
-                    "dataFrom": 'keys',
-                    "anchor": 'top-right',
-                    "direction": 'column',
-                    "margin": { "left": 10 },
-                    "justify": "false",
-                    "translateX": 120,
-                    "translateY": 0,
-                    "itemsSpacing": 2,
-                    "itemWidth": 100,
-                    "itemHeight": 20,
-                    "itemDirection": 'left-to-right',
-                    "itemOpacity": 0.85,
-                    "symbolSize": 20,
-                    "effects": [
-                        {
-                            "on": 'hover',
-                            "style": {
-                                "itemOpacity": 1
+                },
+                axisBottom={
+                    'orient': 'bottom',
+                    "tickSize": 5,
+                    "tickPadding": 5,
+                    "tickRotation": 0,
+                    "legend": str(x_var),
+                    "legendPosition": 'middle',
+                    "legendOffset": 32
+                },
+                axisLeft={
+                    'orient': 'left',
+                    "tickSize": 5,
+                    "tickPadding": 5,
+                    "tickRotation": 0,
+                    "legend": str(y_var),
+                    "legendPosition": 'middle',
+                    "legendOffset": -40
+                },
+                legends=[
+                    {
+                        "dataFrom": 'keys',
+                        "anchor": 'top-right',
+                        "direction": 'column',
+                        "margin": { "left": 10 },
+                        "justify": "false",
+                        "legend": str(hue_var),
+                        "translateX": 120,
+                        "translateY": 0,
+                        "itemsSpacing": 2,
+                        "itemWidth": 100,
+                        "itemHeight": 20,
+                        "itemDirection": 'left-to-right',
+                        "itemOpacity": 0.85,
+                        "symbolSize": 20,
+                        "effects": [
+                            {
+                                "on": 'hover',
+                                "style": {
+                                    "itemOpacity": 1
+                                }
                             }
-                        }
-                    ]
-                }
-            ],
-            role="application",
-            ariaLabel=label
-        )
-        print("Scatterplot: Plotted")
+                        ]
+                    }
+                ],
+                role="application",
+                ariaLabel=label
+            )
+    else:
+        with st.spinner("Cooking the scatter plot now..."):
+            st.error("Missing hue for scatter plot")
+            print("Scatterplot: Plotted")