From e5e86f27d066fc9e494cafd0df2a58ed45bc2a7f Mon Sep 17 00:00:00 2001 From: Yuxiao Mao Date: Tue, 22 Oct 2024 09:42:27 +0200 Subject: [PATCH 1/7] [hlopt] cache based on opcode array --- src/generators/genhl.ml | 4 +- src/generators/hlopt.ml | 140 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 136 insertions(+), 8 deletions(-) diff --git a/src/generators/genhl.ml b/src/generators/genhl.ml index 06b29a4d7d0..6822739b242 100644 --- a/src/generators/genhl.ml +++ b/src/generators/genhl.ml @@ -96,6 +96,7 @@ type context = { cfunctions : fundecl DynArray.t; cconstants : (constval, (global * int array)) lookup; optimize : bool; + opt_cache : bool; w_null_compare : bool; overrides : (string * path, bool) Hashtbl.t; defined_funs : (int,unit) Hashtbl.t; @@ -3434,7 +3435,7 @@ and make_fun ?gen_content ctx name fidx f cthis cparent = Hashtbl.add ctx.defined_funs fidx (); let f = if ctx.optimize && (gen_content = None || name <> ("","")) then begin let t = Timer.timer ["generate";"hl";"opt"] in - let f = Hlopt.optimize ctx.dump_out (DynArray.get ctx.cstrings.arr) hlf f in + let f = Hlopt.optimize ctx.dump_out ctx.opt_cache (DynArray.get ctx.cstrings.arr) hlf f in t(); f end else @@ -4127,6 +4128,7 @@ let create_context com dump = let ctx = { com = com; optimize = not (Common.raw_defined com "hl_no_opt"); + opt_cache = not (Common.raw_defined com "hl_no_opt_cache"); w_null_compare = Common.raw_defined com "hl_w_null_compare"; dump_out = if dump then Some (IO.output_channel (open_out_bin "dump/hlopt.txt")) else None; m = method_context 0 HVoid null_capture false; diff --git a/src/generators/hlopt.ml b/src/generators/hlopt.ml index e79e7c7495a..37d0893fce6 100644 --- a/src/generators/hlopt.ml +++ b/src/generators/hlopt.ml @@ -1049,7 +1049,126 @@ let _optimize (f:fundecl) = r_reg_moved = reg_moved; } +let same_op op1 op2 = + match op1, op2 with + | OMov (a1,b1), OMov (a2, b2) -> a1 = a2 && b1 = b2 + | OInt (r1,_), OInt (r2, _) -> r1 = r2 + | OFloat (r1,_), OFloat (r2,_) -> r1 = r2 + | OBool (r1,b1), OBool (r2,b2) -> r1 = r2 && b1 = b2 + | OBytes (r1,_), OBytes (r2,_) -> r1 = r2 + | OString (r1,_), OString (r2,_) -> r1 = r2 + | ONull r1, ONull r2 -> r1 = r2 + | OAdd (r1,a1,b1), OAdd (r2,a2,b2) + | OSub (r1,a1,b1), OSub (r2,a2,b2) + | OMul (r1,a1,b1), OMul (r2,a2,b2) + | OSDiv (r1,a1,b1), OSDiv (r2,a2,b2) + | OUDiv (r1,a1,b1), OUDiv (r2,a2,b2) + | OSMod (r1,a1,b1), OSMod (r2,a2,b2) + | OUMod (r1,a1,b1), OUMod (r2,a2,b2) + | OShl (r1,a1,b1), OShl (r2,a2,b2) + | OSShr (r1,a1,b1), OSShr (r2,a2,b2) + | OUShr (r1,a1,b1), OUShr (r2,a2,b2) + | OAnd (r1,a1,b1), OAnd (r2,a2,b2) + | OOr (r1,a1,b1), OOr (r2,a2,b2) + | OXor (r1,a1,b1), OXor (r2,a2,b2) + -> r1 = r2 && a1 = a2 && b1 = b2 + | ONeg (r1,v1), ONeg (r2,v2) + | ONot (r1,v1), ONot (r2,v2) + -> r1 = r2 && v1 = v2 + | OIncr r1, OIncr r2 + | ODecr r1, ODecr r2 + -> r1 = r2 + | OCall0 (r1,_), OCall0 (r2,_) -> r1 = r2 + | OCall1 (r1,_,a1), OCall1 (r2,_,a2) -> r1 = r2 && a1 = a2 + | OCall2 (r1,_,a1,b1), OCall2 (r2,_,a2,b2) -> r1 = r2 && a1 = a2 && b1 = b2 + | OCall3 (r1,_,a1,b1,c1), OCall3 (r2,_,a2,b2,c2) -> r1 = r2 && a1 = a2 && b1 = b2 && c1 = c2 + | OCall4 (r1,_,a1,b1,c1,d1), OCall4 (r2,_,a2,b2,c2,d2) -> r1 = r2 && a1 = a2 && b1 = b2 && c1 = c2 && d1 = d2 + | OCallN (r1,_,rl1), OCallN (r2,_,rl2) -> r1 = r2 && rl1 = rl2 + | OCallMethod (r1,f1,rl1), OCallMethod (r2,f2,rl2) -> r1 = r2 && f1 = f2 && rl1 = rl2 + | OCallClosure (r1,f1,rl1), OCallClosure (r2,f2,rl2) -> r1 = r2 && f1 = f2 && rl1 = rl2 + | OCallThis (r1,f1,rl1), OCallThis (r2,f2,rl2) -> r1 = r2 && f1 = f2 && rl1 = rl2 + | OStaticClosure (r1,_), OStaticClosure (r2,_) -> r1 = r2 + | OInstanceClosure (r1,_,v1), OInstanceClosure (r2,_,v2) -> r1 = r2 && v1 = v2 + | OVirtualClosure (r1,o1,m1), OVirtualClosure (r2,o2,m2) -> r1 = r2 && o1 = o2 && m1 = m2 + | OGetGlobal (r1,_), OGetGlobal (r2,_) + | OSetGlobal (_,r1), OSetGlobal (_,r2) + -> r1 = r2 + | OField (r1,o1,i1), OField (r2,o2,i2) + | OSetField (o1,i1,r1), OSetField (o2,i2,r2) + -> r1 = r2 && o1 = o2 && i1 = i2 + | OGetThis (r1,i1), OGetThis (r2,i2) + | OSetThis (i1,r1), OSetThis (i2,r2) + -> r1 = r2 && i1 = i2 + | ODynGet (r1,o1,_), ODynGet (r2,o2,_) -> r1 = r2 && o1 = o2 + | ODynSet (o1,_,v1), ODynSet (o2,_,v2) -> o1 = o2 && v1 = v2 + | OJTrue (r1,d1), OJTrue (r2,d2) + | OJFalse (r1,d1), OJFalse (r2,d2) + | OJNull (r1,d1), OJNull (r2,d2) + | OJNotNull (r1,d1), OJNotNull (r2,d2) + -> r1 = r2 && d1 = d2 + | OJSLt (a1,b1,i1), OJSLt (a2,b2,i2) + | OJSGte (a1,b1,i1), OJSGte (a2,b2,i2) + | OJSGt (a1,b1,i1), OJSGt (a2,b2,i2) + | OJSLte (a1,b1,i1), OJSLte (a2,b2,i2) + | OJULt (a1,b1,i1), OJULt (a2,b2,i2) + | OJUGte (a1,b1,i1), OJUGte (a2,b2,i2) + | OJNotLt (a1,b1,i1), OJNotLt (a2,b2,i2) + | OJNotGte (a1,b1,i1), OJNotGte (a2,b2,i2) + | OJEq (a1,b1,i1), OJEq (a2,b2,i2) + | OJNotEq (a1,b1,i1), OJNotEq (a2,b2,i2) + -> a1 = a2 && b1 = b2 && i1 = i2 + | OJAlways d1, OJAlways d2 -> d1 = d2 + | OToDyn (r1,a1), OToDyn (r2,a2) + | OToSFloat (r1,a1), OToSFloat (r2,a2) + | OToUFloat (r1,a1), OToUFloat (r2,a2) + | OToInt (r1,a1), OToInt (r2,a2) + -> r1 = r2 && a1 = a2 + | OSafeCast (r1,v1), OSafeCast (r2,v2) + | OUnsafeCast (r1,v1), OUnsafeCast (r2,v2) + | OToVirtual (r1,v1), OToVirtual (r2,v2) + -> r1 = r2 && v1 = v2 + | OLabel _, OLabel _ -> true + | ORet r1, ORet r2 -> r1 = r2 + | OThrow r1, OThrow r2 + | ORethrow r1, ORethrow r2 + -> r1 = r2 + | OSwitch (r1,idx1,eend1), OSwitch (r2,idx2,eend2) -> r1 = r2 && idx1 = idx2 && eend1 = eend2 + | ONullCheck r1, ONullCheck r2 -> r1 = r2 + | OTrap (r1,i1), OTrap (r2,i2) -> r1 = r2 && i1 = i2 + | OEndTrap b1, OEndTrap b2 -> b1 = b2 + | OGetUI8 (r1,a1,b1), OGetUI8 (r2,a2,b2) + | OGetUI16 (r1,a1,b1), OGetUI16 (r2,a2,b2) + | OGetMem (r1,a1,b1), OGetMem (r2,a2,b2) + | OGetArray (r1,a1,b1), OGetArray (r2,a2,b2) + | OSetUI8 (r1,a1,b1), OSetUI8 (r2,a2,b2) + | OSetUI16 (r1,a1,b1), OSetUI16 (r2,a2,b2) + | OSetMem (r1,a1,b1), OSetMem (r2,a2,b2) + | OSetArray (r1,a1,b1), OSetArray (r2,a2,b2) + -> r1 = r2 && a1 = a2 && b1 = b2 + | ONew r1, ONew r2 -> r1 = r2 + | OArraySize (r1,a1), OArraySize (r2,a2) -> r1 = r2 && a1 = a2 + | OType (r1,_), OType (r2,_) -> r1 = r2 + | OGetType (r1,v1), OGetType (r2,v2) + | OGetTID (r1,v1), OGetTID (r2,v2) + | ORef (r1,v1), ORef (r2,v2) + | OUnref (v1,r1), OUnref (v2,r2) + | OSetref (r1,v1), OSetref (r2,v2) + -> r1 = r2 && v1 = v2 + | OMakeEnum (r1,e1,pl1), OMakeEnum (r2,e2,pl2) -> r1 = r2 && e1 = e2 && pl1 = pl2 + | OEnumAlloc (r1,e1), OEnumAlloc (r2,e2) -> r1 = r2 && e1 = e2 + | OEnumIndex (r1,e1), OEnumIndex (r2,e2) -> r1 = r2 && e1 = e2 + | OEnumField (r1,e1,i1,n1), OEnumField (r2,e2,i2,n2) -> r1 = r2 && e1 = e2 && i1 = i2 && n1 = n2 + | OSetEnumField (e1,i1,r1), OSetEnumField (e2,i2,r2) -> r1 = r2 && e1 = e2 && i1 = i2 + | OAssert _, OAssert _ -> true + | ORefData (r1,d1), ORefData (r2,d2) -> r1 = r2 && d1 = d2 + | ORefOffset (r1,a1,off1), ORefOffset (r2,a2,off2) -> r1 = r2 && a1 = a2 && off1 = off2 + | ONop s1, ONop s2 -> s1 = s2 + | OPrefetch (r1,f1,mode1), OPrefetch (r2,f2,mode2) -> r1 = r2 && f1 = f2 && mode1 = mode2 + | OAsm (mode1, value1, reg1), OAsm (mode2, value2, reg2) -> mode1 = mode2 && value1 = value2 && reg1 = reg2 + | _ -> false + type cache_elt = { + c_old_code : opcode array; c_code : opcode array; c_rctx : rctx; c_remap_indexes : int array; @@ -1059,12 +1178,17 @@ type cache_elt = { let opt_cache = ref PMap.empty let used_mark = ref 0 -let optimize dump get_str (f:fundecl) (hxf:Type.tfunc) = - let old_code = match dump with None -> f.code | Some _ -> Array.copy f.code in +let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) = + let sign = if f.fpath <> ("","") then fundecl_name f else (Printf.sprintf "%s:%d" hxf.tf_expr.epos.pfile hxf.tf_expr.epos.pmin) in try - let c = PMap.find hxf (!opt_cache) in + if not usecache then raise Not_found; + let c = PMap.find sign (!opt_cache) in c.c_last_used <- !used_mark; - if Array.length f.code <> Array.length c.c_code then Globals.die "" __LOC__; + if Array.length f.code <> Array.length c.c_code then raise Not_found; + Array.iteri (fun i op1 -> + let op2 = Array.unsafe_get f.code i in + if not (same_op op1 op2) then raise Not_found; + ) c.c_old_code; let code = c.c_code in Array.iter (fun i -> let op = (match Array.unsafe_get code i, Array.unsafe_get f.code i with @@ -1088,8 +1212,9 @@ let optimize dump get_str (f:fundecl) (hxf:Type.tfunc) = | _ -> Globals.die "" __LOC__) in Array.unsafe_set code i op ) c.c_remap_indexes; - remap_fun c.c_rctx { f with code = code } dump get_str old_code + remap_fun c.c_rctx { f with code = code } dump get_str f.code with Not_found -> + let old_code = match dump, usecache with None, true | Some _, _ -> Array.copy f.code | _ -> f.code in let rctx = _optimize f in let old_ops = f.code in let fopt = remap_fun rctx f dump get_str old_code in @@ -1109,12 +1234,13 @@ let optimize dump get_str (f:fundecl) (hxf:Type.tfunc) = DynArray.add idxs i | _ -> () ) old_ops; - (*opt_cache := PMap.add hxf { + if usecache then opt_cache := PMap.add sign { + c_old_code = old_code; c_code = old_ops; c_rctx = rctx; c_last_used = !used_mark; c_remap_indexes = DynArray.to_array idxs; - } (!opt_cache);*) + } (!opt_cache); fopt let clean_cache() = From ea0e41d1db1b0808751d73ecbb5dbf8492f808c6 Mon Sep 17 00:00:00 2001 From: Yuxiao Mao Date: Thu, 24 Oct 2024 09:49:45 +0200 Subject: [PATCH 2/7] Fix last_used position --- src/generators/hlopt.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/generators/hlopt.ml b/src/generators/hlopt.ml index 37d0893fce6..75c2ffaec3d 100644 --- a/src/generators/hlopt.ml +++ b/src/generators/hlopt.ml @@ -1183,12 +1183,12 @@ let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) = try if not usecache then raise Not_found; let c = PMap.find sign (!opt_cache) in - c.c_last_used <- !used_mark; if Array.length f.code <> Array.length c.c_code then raise Not_found; Array.iteri (fun i op1 -> let op2 = Array.unsafe_get f.code i in if not (same_op op1 op2) then raise Not_found; ) c.c_old_code; + c.c_last_used <- !used_mark; let code = c.c_code in Array.iter (fun i -> let op = (match Array.unsafe_get code i, Array.unsafe_get f.code i with From 103711e2b5e57ea43c07581b5d7b57f761d4f5d7 Mon Sep 17 00:00:00 2001 From: Yuxiao Mao Date: Thu, 24 Oct 2024 09:50:36 +0200 Subject: [PATCH 3/7] Fix reg_map segfault in an extreme case Where f has more unused regs at the end but the code is the same. --- src/generators/hlopt.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/generators/hlopt.ml b/src/generators/hlopt.ml index 75c2ffaec3d..25f25065c66 100644 --- a/src/generators/hlopt.ml +++ b/src/generators/hlopt.ml @@ -1184,6 +1184,7 @@ let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) = if not usecache then raise Not_found; let c = PMap.find sign (!opt_cache) in if Array.length f.code <> Array.length c.c_code then raise Not_found; + if Array.length f.regs <> Array.length c.c_rctx.r_reg_map then raise Not_found; Array.iteri (fun i op1 -> let op2 = Array.unsafe_get f.code i in if not (same_op op1 op2) then raise Not_found; From 338a9e6a55b06cd3e8c9f99a1d7348d42ad9d1cb Mon Sep 17 00:00:00 2001 From: Yuxiao Mao Date: Thu, 24 Oct 2024 09:54:41 +0200 Subject: [PATCH 4/7] Simplify same_op by using op1=op2 --- src/generators/hlopt.ml | 103 ++-------------------------------------- 1 file changed, 3 insertions(+), 100 deletions(-) diff --git a/src/generators/hlopt.ml b/src/generators/hlopt.ml index 25f25065c66..22e218564ff 100644 --- a/src/generators/hlopt.ml +++ b/src/generators/hlopt.ml @@ -1051,121 +1051,24 @@ let _optimize (f:fundecl) = let same_op op1 op2 = match op1, op2 with - | OMov (a1,b1), OMov (a2, b2) -> a1 = a2 && b1 = b2 | OInt (r1,_), OInt (r2, _) -> r1 = r2 | OFloat (r1,_), OFloat (r2,_) -> r1 = r2 - | OBool (r1,b1), OBool (r2,b2) -> r1 = r2 && b1 = b2 | OBytes (r1,_), OBytes (r2,_) -> r1 = r2 | OString (r1,_), OString (r2,_) -> r1 = r2 - | ONull r1, ONull r2 -> r1 = r2 - | OAdd (r1,a1,b1), OAdd (r2,a2,b2) - | OSub (r1,a1,b1), OSub (r2,a2,b2) - | OMul (r1,a1,b1), OMul (r2,a2,b2) - | OSDiv (r1,a1,b1), OSDiv (r2,a2,b2) - | OUDiv (r1,a1,b1), OUDiv (r2,a2,b2) - | OSMod (r1,a1,b1), OSMod (r2,a2,b2) - | OUMod (r1,a1,b1), OUMod (r2,a2,b2) - | OShl (r1,a1,b1), OShl (r2,a2,b2) - | OSShr (r1,a1,b1), OSShr (r2,a2,b2) - | OUShr (r1,a1,b1), OUShr (r2,a2,b2) - | OAnd (r1,a1,b1), OAnd (r2,a2,b2) - | OOr (r1,a1,b1), OOr (r2,a2,b2) - | OXor (r1,a1,b1), OXor (r2,a2,b2) - -> r1 = r2 && a1 = a2 && b1 = b2 - | ONeg (r1,v1), ONeg (r2,v2) - | ONot (r1,v1), ONot (r2,v2) - -> r1 = r2 && v1 = v2 - | OIncr r1, OIncr r2 - | ODecr r1, ODecr r2 - -> r1 = r2 | OCall0 (r1,_), OCall0 (r2,_) -> r1 = r2 | OCall1 (r1,_,a1), OCall1 (r2,_,a2) -> r1 = r2 && a1 = a2 | OCall2 (r1,_,a1,b1), OCall2 (r2,_,a2,b2) -> r1 = r2 && a1 = a2 && b1 = b2 | OCall3 (r1,_,a1,b1,c1), OCall3 (r2,_,a2,b2,c2) -> r1 = r2 && a1 = a2 && b1 = b2 && c1 = c2 | OCall4 (r1,_,a1,b1,c1,d1), OCall4 (r2,_,a2,b2,c2,d2) -> r1 = r2 && a1 = a2 && b1 = b2 && c1 = c2 && d1 = d2 | OCallN (r1,_,rl1), OCallN (r2,_,rl2) -> r1 = r2 && rl1 = rl2 - | OCallMethod (r1,f1,rl1), OCallMethod (r2,f2,rl2) -> r1 = r2 && f1 = f2 && rl1 = rl2 - | OCallClosure (r1,f1,rl1), OCallClosure (r2,f2,rl2) -> r1 = r2 && f1 = f2 && rl1 = rl2 - | OCallThis (r1,f1,rl1), OCallThis (r2,f2,rl2) -> r1 = r2 && f1 = f2 && rl1 = rl2 | OStaticClosure (r1,_), OStaticClosure (r2,_) -> r1 = r2 | OInstanceClosure (r1,_,v1), OInstanceClosure (r2,_,v2) -> r1 = r2 && v1 = v2 - | OVirtualClosure (r1,o1,m1), OVirtualClosure (r2,o2,m2) -> r1 = r2 && o1 = o2 && m1 = m2 - | OGetGlobal (r1,_), OGetGlobal (r2,_) - | OSetGlobal (_,r1), OSetGlobal (_,r2) - -> r1 = r2 - | OField (r1,o1,i1), OField (r2,o2,i2) - | OSetField (o1,i1,r1), OSetField (o2,i2,r2) - -> r1 = r2 && o1 = o2 && i1 = i2 - | OGetThis (r1,i1), OGetThis (r2,i2) - | OSetThis (i1,r1), OSetThis (i2,r2) - -> r1 = r2 && i1 = i2 + | OGetGlobal (r1,_), OGetGlobal (r2,_) -> r1 = r2 + | OSetGlobal (_,r1), OSetGlobal (_,r2) -> r1 = r2 | ODynGet (r1,o1,_), ODynGet (r2,o2,_) -> r1 = r2 && o1 = o2 | ODynSet (o1,_,v1), ODynSet (o2,_,v2) -> o1 = o2 && v1 = v2 - | OJTrue (r1,d1), OJTrue (r2,d2) - | OJFalse (r1,d1), OJFalse (r2,d2) - | OJNull (r1,d1), OJNull (r2,d2) - | OJNotNull (r1,d1), OJNotNull (r2,d2) - -> r1 = r2 && d1 = d2 - | OJSLt (a1,b1,i1), OJSLt (a2,b2,i2) - | OJSGte (a1,b1,i1), OJSGte (a2,b2,i2) - | OJSGt (a1,b1,i1), OJSGt (a2,b2,i2) - | OJSLte (a1,b1,i1), OJSLte (a2,b2,i2) - | OJULt (a1,b1,i1), OJULt (a2,b2,i2) - | OJUGte (a1,b1,i1), OJUGte (a2,b2,i2) - | OJNotLt (a1,b1,i1), OJNotLt (a2,b2,i2) - | OJNotGte (a1,b1,i1), OJNotGte (a2,b2,i2) - | OJEq (a1,b1,i1), OJEq (a2,b2,i2) - | OJNotEq (a1,b1,i1), OJNotEq (a2,b2,i2) - -> a1 = a2 && b1 = b2 && i1 = i2 - | OJAlways d1, OJAlways d2 -> d1 = d2 - | OToDyn (r1,a1), OToDyn (r2,a2) - | OToSFloat (r1,a1), OToSFloat (r2,a2) - | OToUFloat (r1,a1), OToUFloat (r2,a2) - | OToInt (r1,a1), OToInt (r2,a2) - -> r1 = r2 && a1 = a2 - | OSafeCast (r1,v1), OSafeCast (r2,v2) - | OUnsafeCast (r1,v1), OUnsafeCast (r2,v2) - | OToVirtual (r1,v1), OToVirtual (r2,v2) - -> r1 = r2 && v1 = v2 - | OLabel _, OLabel _ -> true - | ORet r1, ORet r2 -> r1 = r2 - | OThrow r1, OThrow r2 - | ORethrow r1, ORethrow r2 - -> r1 = r2 - | OSwitch (r1,idx1,eend1), OSwitch (r2,idx2,eend2) -> r1 = r2 && idx1 = idx2 && eend1 = eend2 - | ONullCheck r1, ONullCheck r2 -> r1 = r2 - | OTrap (r1,i1), OTrap (r2,i2) -> r1 = r2 && i1 = i2 - | OEndTrap b1, OEndTrap b2 -> b1 = b2 - | OGetUI8 (r1,a1,b1), OGetUI8 (r2,a2,b2) - | OGetUI16 (r1,a1,b1), OGetUI16 (r2,a2,b2) - | OGetMem (r1,a1,b1), OGetMem (r2,a2,b2) - | OGetArray (r1,a1,b1), OGetArray (r2,a2,b2) - | OSetUI8 (r1,a1,b1), OSetUI8 (r2,a2,b2) - | OSetUI16 (r1,a1,b1), OSetUI16 (r2,a2,b2) - | OSetMem (r1,a1,b1), OSetMem (r2,a2,b2) - | OSetArray (r1,a1,b1), OSetArray (r2,a2,b2) - -> r1 = r2 && a1 = a2 && b1 = b2 - | ONew r1, ONew r2 -> r1 = r2 - | OArraySize (r1,a1), OArraySize (r2,a2) -> r1 = r2 && a1 = a2 | OType (r1,_), OType (r2,_) -> r1 = r2 - | OGetType (r1,v1), OGetType (r2,v2) - | OGetTID (r1,v1), OGetTID (r2,v2) - | ORef (r1,v1), ORef (r2,v2) - | OUnref (v1,r1), OUnref (v2,r2) - | OSetref (r1,v1), OSetref (r2,v2) - -> r1 = r2 && v1 = v2 - | OMakeEnum (r1,e1,pl1), OMakeEnum (r2,e2,pl2) -> r1 = r2 && e1 = e2 && pl1 = pl2 - | OEnumAlloc (r1,e1), OEnumAlloc (r2,e2) -> r1 = r2 && e1 = e2 - | OEnumIndex (r1,e1), OEnumIndex (r2,e2) -> r1 = r2 && e1 = e2 - | OEnumField (r1,e1,i1,n1), OEnumField (r2,e2,i2,n2) -> r1 = r2 && e1 = e2 && i1 = i2 && n1 = n2 - | OSetEnumField (e1,i1,r1), OSetEnumField (e2,i2,r2) -> r1 = r2 && e1 = e2 && i1 = i2 - | OAssert _, OAssert _ -> true - | ORefData (r1,d1), ORefData (r2,d2) -> r1 = r2 && d1 = d2 - | ORefOffset (r1,a1,off1), ORefOffset (r2,a2,off2) -> r1 = r2 && a1 = a2 && off1 = off2 - | ONop s1, ONop s2 -> s1 = s2 - | OPrefetch (r1,f1,mode1), OPrefetch (r2,f2,mode2) -> r1 = r2 && f1 = f2 && mode1 = mode2 - | OAsm (mode1, value1, reg1), OAsm (mode2, value2, reg2) -> mode1 = mode2 && value1 = value2 && reg1 = reg2 - | _ -> false + | _ -> op1 = op2 type cache_elt = { c_old_code : opcode array; From e9a3cec659a5abacb232475c3153aa91193459e1 Mon Sep 17 00:00:00 2001 From: Yuxiao Mao Date: Thu, 24 Oct 2024 15:47:31 +0200 Subject: [PATCH 5/7] Fix nargs differs with unused args in a special case unused args' reg should remain --- src/generators/hlopt.ml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/generators/hlopt.ml b/src/generators/hlopt.ml index 22e218564ff..18d2b1cfb2e 100644 --- a/src/generators/hlopt.ml +++ b/src/generators/hlopt.ml @@ -1072,6 +1072,7 @@ let same_op op1 op2 = type cache_elt = { c_old_code : opcode array; + c_old_fnargs : int; c_code : opcode array; c_rctx : rctx; c_remap_indexes : int array; @@ -1082,12 +1083,14 @@ let opt_cache = ref PMap.empty let used_mark = ref 0 let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) = + let nargs f = (match f.ftype with HFun (args,_) -> List.length args | _ -> Globals.die "" __LOC__) in let sign = if f.fpath <> ("","") then fundecl_name f else (Printf.sprintf "%s:%d" hxf.tf_expr.epos.pfile hxf.tf_expr.epos.pmin) in try if not usecache then raise Not_found; let c = PMap.find sign (!opt_cache) in if Array.length f.code <> Array.length c.c_code then raise Not_found; if Array.length f.regs <> Array.length c.c_rctx.r_reg_map then raise Not_found; + if nargs f <> c.c_old_fnargs then raise Not_found; Array.iteri (fun i op1 -> let op2 = Array.unsafe_get f.code i in if not (same_op op1 op2) then raise Not_found; @@ -1140,6 +1143,7 @@ let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) = ) old_ops; if usecache then opt_cache := PMap.add sign { c_old_code = old_code; + c_old_fnargs = nargs f; c_code = old_ops; c_rctx = rctx; c_last_used = !used_mark; From bf6aa9a3c59d198004badf4bf77f8830b964479b Mon Sep 17 00:00:00 2001 From: Yuxiao Mao Date: Thu, 24 Oct 2024 15:49:47 +0200 Subject: [PATCH 6/7] Fix code reuse in the same run code can be send as optimize result as-is if no reg map and no nop operations. Make a code copy if the cache entry is used in this run. --- src/generators/hlopt.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/generators/hlopt.ml b/src/generators/hlopt.ml index 18d2b1cfb2e..c0fc6541036 100644 --- a/src/generators/hlopt.ml +++ b/src/generators/hlopt.ml @@ -1095,8 +1095,8 @@ let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) = let op2 = Array.unsafe_get f.code i in if not (same_op op1 op2) then raise Not_found; ) c.c_old_code; + let code = if c.c_last_used = !used_mark then Array.copy c.c_code else c.c_code in c.c_last_used <- !used_mark; - let code = c.c_code in Array.iter (fun i -> let op = (match Array.unsafe_get code i, Array.unsafe_get f.code i with | OInt (r,_), OInt (_,idx) -> OInt (r,idx) From 00963c1c7a2171a37dc2b6ece952a62964ce775b Mon Sep 17 00:00:00 2001 From: Yuxiao Mao Date: Fri, 25 Oct 2024 10:19:29 +0200 Subject: [PATCH 7/7] Rename same_op so its more clear --- src/generators/hlopt.ml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/generators/hlopt.ml b/src/generators/hlopt.ml index c0fc6541036..18d885e919f 100644 --- a/src/generators/hlopt.ml +++ b/src/generators/hlopt.ml @@ -1049,7 +1049,7 @@ let _optimize (f:fundecl) = r_reg_moved = reg_moved; } -let same_op op1 op2 = +let same_op_except_index op1 op2 = match op1, op2 with | OInt (r1,_), OInt (r2, _) -> r1 = r2 | OFloat (r1,_), OFloat (r2,_) -> r1 = r2 @@ -1093,7 +1093,7 @@ let optimize dump usecache get_str (f:fundecl) (hxf:Type.tfunc) = if nargs f <> c.c_old_fnargs then raise Not_found; Array.iteri (fun i op1 -> let op2 = Array.unsafe_get f.code i in - if not (same_op op1 op2) then raise Not_found; + if not (same_op_except_index op1 op2) then raise Not_found; ) c.c_old_code; let code = if c.c_last_used = !used_mark then Array.copy c.c_code else c.c_code in c.c_last_used <- !used_mark;