Add files via upload

mainly fixing a characters in scene issue
StephanosPSteer · Apr 26, 2023 · 652a890 · 652a890
1 parent 1834f73
commit 652a890
Show file tree

Hide file tree

Showing 3 changed files with 85 additions and 37 deletions.
diff --git a/Aubrushli.py b/Aubrushli.py
@@ -89,6 +89,11 @@ def createshotlist(self, numcharacters, outpath, action_df, df, shot_vals_df, se
                         (action_df['original_line'].astype(int) <= endline)]
 
         filreact_df = filact_df.reset_index(drop=True)
+        if 'charsinscene' not in filreact_df:
+            print('not')
+            filreact_df.loc[:, 'charsinscene'] = numcharacters
+        else:
+            print('ya')
         # shot number you create depending on shots 
 
         #the headers for the shotlist
@@ -146,16 +151,13 @@ def createshotlist(self, numcharacters, outpath, action_df, df, shot_vals_df, se
         finshot ={}
         # do I want drones and extreme wides, probably not indoors
         dronesetc = 0
-        if sending_format==1:
-            if 'EXT.' in currscenename or currscenename.startswith("."):
+        #if sending_format==1:
+        if 'EXT.' in currscenename or currscenename.startswith("."):
                 dronesetc=1 #thetext in row['element_text']:
         # choose shots depending on characters in scene
         #print(f_scene_name)
         #print(dronesetc)
-        # this check is all about what shots are needed as per number of characters
-        filshot_df = shot_vals_df[(shot_vals_df['min_chars'] <=numcharacters) & (shot_vals_df['dronesetc'] <= dronesetc) &  (shot_vals_df['dialog'] <= len(currdialog_df))]
-        finshot_df = filshot_df.reset_index(drop=True)
-        finshot_df = finshot_df.drop_duplicates()
+
 
             #elif current_type == 'Action':
             #if current_type == '':
@@ -181,6 +183,12 @@ def createshotlist(self, numcharacters, outpath, action_df, df, shot_vals_df, se
                 #print(mydesc)
                 #print(eddesc) 
                 #THESE ARE THE SHOTS
+
+                    # this check is all about what shots are needed as per number of characters
+            numcharacters = theaction['charsinscene']
+            filshot_df = shot_vals_df[(shot_vals_df['min_chars'] <=numcharacters) & (shot_vals_df['dronesetc'] <= dronesetc) &  (shot_vals_df['dialog'] <= len(currdialog_df))]
+            finshot_df = filshot_df.reset_index(drop=True)
+            finshot_df = finshot_df.drop_duplicates()
             for index, shotrow in finshot_df.iterrows():
                     #otsok = 0
                     #values = index + 1, currscneno, f_scene_name, shotrow['shot_size'], shotrow['shot_type'], shotrow['AngleOrigin'], 'STATIC or PAN', shotrow['lens'] \

diff --git a/aubrushli.db b/aubrushli.db
diff --git a/fdx2aub.py b/fdx2aub.py
@@ -1,4 +1,10 @@
 import xml.etree.ElementTree as ET
+# tree = ET.parse(r'D:\pythonEnvs\stable_LLM\Big_Fish.fdx')
+# root = tree.getroot()
+
+# for child in root:
+#     print(child.tag, child.attrib)
+
 import sys
 import sqlite3
 import pandas as pd
@@ -21,7 +27,7 @@ def createbigcsv(self, csvpath, screenplayid):
 
         sql = "SELECT linenumber,  line, linetype, scenenum, length, pagenum, charsinscene FROM fdxlines where screenplayid = ?  order by linenumber"
         castcsvdf = pd.read_sql(sql, connection, params=(screenplayid,))
-        print(castcsvdf)
+        #print(castcsvdf)
             #connection.commit()
         castcsvdf.to_csv(csvpath, index=False)
 
@@ -35,7 +41,7 @@ def writelinetodb(self, connection, screenplayID, line, linenum, linetype, scene
 
 
     def create_cast_list(self,castlistpath, screenplayid):
-        print(screenplayid)
+        #print(screenplayid)
 
         current_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -47,7 +53,7 @@ def create_cast_list(self,castlistpath, screenplayid):
 
         sql = "select line as character, count(line) as lines from fdxlines where linetype = 'character' and screenplayid = ? group by line order by lines desc"
         castcsvdf = pd.read_sql(sql, connection, params=(screenplayid,))
-        print(castcsvdf)
+        #print(castcsvdf)
             #connection.commit()
         castcsvdf.to_csv(castlistpath, index=False)
 
@@ -60,6 +66,17 @@ def fdx2dbaub(self, input_file_name, shotsdef, outpath):
         connection.execute('PRAGMA journal_mode = WAL')
         connection.execute('PRAGMA cache_size = -8192')
         connection.commit()
+
+
+
+
+
+
+        #output_file_name = r"D:\pythonEnvs\stable_LLM\test.txt"
+        #input_file_name = r'G:\Aubrushli_images_current\Big_Fish.fdx'
+        #shotsdef = r'G:\Aubrushli_images_current\shot_vals.csv'
+        #outpath = 'G:/Aubrushli_images_current/fdxshotlists/'
+        #file = open(output_file_name,"w")
 
 
 
@@ -123,16 +140,17 @@ def fdx2dbaub(self, input_file_name, shotsdef, outpath):
                         for index, text_element in enumerate(paragraph.findall('Text')):
                             if (text_element.text is not None):
                                 linetype =''
-                                if "(" in text_element.text or ")" in text_element.text:
+                                if "(" in text_element.text or ")" in text_element.text or "cont’d" in text_element.text:
                                     #charsinscene= charsinscene +1
                                     linetype = 'parenthetical'
                                 else:
+                                    #print(text_element.text, linenum)
                                     charsinscene= charsinscene +1
                                     linetype = 'character'
                                 linenum = linenum + 1
                     # typenum = typenum +1
                                 scenetypenum= scenetypenum + 1
-                                self.writelinetodb(connection, screenplayID, text_element.text.upper(), linenum, linetype, scenenum, scenetypenum, chars=charsinscene )
+                                self.writelinetodb(connection, screenplayID, text_element.text.upper().strip(), linenum, linetype, scenenum, scenetypenum, chars=charsinscene )
 
                 elif paragraph.attrib['Type'] == "Parenthetical":
                     if hasattr(paragraph.find('Text'), 'text'):
@@ -154,49 +172,64 @@ def fdx2dbaub(self, input_file_name, shotsdef, outpath):
                                 self.writelinetodb(connection, screenplayID, text_element.text, linenum, 'dialogue', scenenum, scenetypenum )
                 elif paragraph.attrib['Type'] == "Scene Heading":
                     off= paragraph.get('Number')
-
-
                     scene_properties = paragraph.find('SceneProperties').attrib
 
-
-                                # Access the values of the SceneProperties attributes
+                    # Access the values of the SceneProperties attributes
                     length = scene_properties['Length']
                     page = scene_properties['Page']
 
-
-
-
-                        #print(child.attrib)
+                    #print(child.attrib)
                     if hasattr(paragraph.find('Text'), 'text'):
+                        thetext = ''
                         for index, text_element in enumerate(paragraph.findall('Text')):
                             if (text_element.text is not None):
-                                # print(leng)
-                                # print(page)
-                                charsinscene=0 
-                                linenum = linenum + 1
+                                thetext = thetext + ' ' + text_element.text.upper()
+                        charsinscene=0 
+                        linenum = linenum + 1
                     # typenum = typenum +1
-                                scenenum = scenenum + 1
-                                scenetypenum=0
-                                self.writelinetodb(connection, screenplayID, text_element.text.upper() , linenum, 'scene', scenenum, scenetypenum, off, length, page, chars=charsinscene)
+                        scenenum = scenenum + 1
+                        scenetypenum=0
+                        self.writelinetodb(connection, screenplayID, thetext , linenum, 'scene', scenenum, scenetypenum, off, length, page, chars=charsinscene)
 
-
+        #*************************steph need a characters in scene update here to distinct characters*************************************
         cursor = connection.cursor()
         sql = "CREATE TEMP TABLE distchars as select distinct line, scenenum from fdxlines where linetype = 'character' group by scenenum, line order by scenenum" 
         sql1 ="create temp table finchars as select count(line) thechars, scenenum from distchars group by scenenum"
         sql2 = "update fdxlines set charsinscene = f.thechars from (select thechars, scenenum from finchars) as f where f.scenenum = fdxlines.scenenum"
+        sql3 = "INSERT INTO characters (screenplayID, charactername) select distinct screenplayid, trim(line) from fdxlines where linetype = 'character'"
         cursor.execute(sql)
         cursor.execute(sql1)
         cursor.execute(sql2)
+        cursor.execute(sql3)
+        connection.commit()
+        cursor = connection.cursor()
+        sql4 = "SELECT characterID, screenplayID, charactername FROM characters where screenplayid =? "
+        params = (screenplayID,)  # create a tuple of parameters
+        cursor.execute(sql4, params)  # pass the tuple to cursor.execute()
+        for row in cursor:
+            print(row[1], row[2])
+            cursor1 = connection.cursor()
+            sql5= "INSERT INTO characteractions (characterID, actionID, charactername, theline) select ? , fdxlineID , ? , line from fdxlines where screenplayid =? and linetype = 'action' and (line like '% ' || ? || '%' or line like ? || '%')  "
+            params1 = (row[0], row[2], screenplayID, row[2], row[2])
+            cursor1.execute(sql5, params1) 
+            #for newrow in cursor1:
+            #    print(newrow)
+            #b= row  
+        cursor = connection.cursor()
+        sql6= "update fdxlines set actchars = c.actchars from (select count(actionid) as actchars, actionid FROM characteractions group by actionid, theline) as c where c.actionid = fdxlines.fdxlineid"
+        cursor.execute(sql6)
         connection.commit()
-
 
 
 
+        # need to loop round each scene in db for shots call aubrushli.py
+
+
 
 
         # need a complete DF of the fdxlines table in the same format as fountain
         # need to put an id in fdxlines to match to the screenplay you need.
-        sql = 'SELECT line, linetype, linenumber, scenenum, charsinscene from fdxlines where screenplayID=?'
+        sql = 'SELECT line, linetype, linenumber, scenenum, case when actchars is null then charsinscene else actchars end as charsinscene from fdxlines where screenplayID=?'
         df = pd.read_sql(sql, connection, params=(screenplayID,))
 
         df = df.rename(columns={'line': 'element_text', 'linetype': 'element_type', 'linenumber': 'original_line'})
@@ -219,10 +252,10 @@ def fdx2dbaub(self, input_file_name, shotsdef, outpath):
             scenedf = scenedf.reset_index(drop=True)
             currscenenamedf = scenedf[(scenedf['element_type']== 'scene')]
             currscenenamedf = currscenenamedf.reset_index(drop=True)
-            print(currscenenamedf)
+            #print(currscenenamedf)
             if len(currscenenamedf) >0:
                 curscenename = currscenenamedf['element_text'][0]
-                print(curscenename)
+                #print(curscenename)
                 charsinscene = scenedf['charsinscene'][0]
                 currsceneno = scenedf['scenenum'][0]
 
@@ -235,17 +268,24 @@ def fdx2dbaub(self, input_file_name, shotsdef, outpath):
                 endline = scenedf['original_line'].max()
                 combined=[]
 
-                #print(scene[0])
-                print(scenedf)
-                print(actionsdf)
-                print(startline)
-                print(endline)
-                print(charsinscene)
+                # #print(scene[0])
+                # print(scenedf)
+                # print(actionsdf)
+                # print(startline)
+                # print(endline)
+                # print(charsinscene)
 
                 aubshot = Aubrushli()
                 aubshot.createshotlist( charsinscene, outpath, actionsdf, df, shot_vals_df, 2, startline, endline, currsceneno, curscenename, combined, 'Big_FDX_Fish')
 
+                #createshotlist( numcharacters, outpath, action_df, df, shot_vals_df, sending_format=1, startline=f_start_scene_line, endline=f_next_scene_line, currscene=currscneno, currscenename=f_scene_name, combined=combine_chars):
+
+
+
+
 
+        # also need a start and end line for the scene
+        # use the sending format param to allow extra
 
         connection.close()