From 70cf7224a25b5dc5ebcdca9dd43428b2df9073d1 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Sat, 25 Sep 2021 16:50:11 -0400
Subject: [PATCH] Optimize how segmented array slices are interpreted as
 strings/bytes

Optimize how slices of segmented arrays are interpreted as a Chapel
string/bytes. Interpreting is required to use Chapel's regex support,
which only operates on string/bytes types. Previously, the regex code
was localizing a region with a slice-assignment, and then making a new
string from it. This resulted in a large amount of short-lived
concurrent allocations, which was slow.

Update the regex code to use the new `lowLevelLocalizingSlice` routine
and instead of creating a new string, borrow memory from the segmented
array (or take ownership of the localized slice if the original data was
remote). This avoids allocation when the slice is local and minimizes
the amount of allocation and communication when the slice is remote.

This should significantly improve the performance of regex operations.

Part of 917
Part of 930
---
 src/CastMsg.chpl        |  2 +-
 src/Flatten.chpl        | 34 ++++++++++++++++++--------------
 src/SegmentedArray.chpl | 43 +++++++++++++++++++++++------------------
 3 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/src/CastMsg.chpl b/src/CastMsg.chpl
index 621f09f525..df28caa67b 100644
--- a/src/CastMsg.chpl
+++ b/src/CastMsg.chpl
@@ -203,7 +203,7 @@ module CastMsg {
               } else {
                    end = oa[i+1] - 1;
               }
-              e = interpretAsString(va[start..end]) : toType;
+              e = interpretAsString(va, start..end) : toType;
           }
       } catch e: IllegalArgumentError {
           var errorMsg = "bad value in cast from string to %s".format(toType:string);
diff --git a/src/Flatten.chpl b/src/Flatten.chpl
index 85a5d313db..f53aa22d44 100644
--- a/src/Flatten.chpl
+++ b/src/Flatten.chpl
@@ -1,6 +1,7 @@
 module Flatten {
   use ServerConfig;
 
+  use AryUtil;
   use SegmentedArray;
   use ServerErrors;
   use SymArrayDmap;
@@ -13,21 +14,23 @@ module Flatten {
   config const NULL_STRINGS_VALUE = 0:uint(8);
 
   /*
-    Convert a uint(8) array into bytes. Modeled after SegString.interpretAsString
-  */
-  inline proc interpretAsBytes(bytearray: [?D] uint(8)): bytes {
-    // Byte buffer must be local in order to make a C pointer
-    var localBytes: [{0..#D.size}] uint(8) = bytearray;
-    var cBytes = c_ptrTo(localBytes);
-    // Byte buffer is null-terminated, so length is buffer.size - 1
-    // The contents of the buffer should be copied out because cBytes will go out of scope
-    var b: bytes;
+     Interpret a region of a byte array as bytes. Modeled after interpretAsString
+   */
+  proc interpretAsBytes(bytearray: [?D] uint(8), region: range(?), borrow=false): bytes {
+    var localSlice = new lowLevelLocalizingSlice(bytearray, region);
+    // Byte buffer is null-terminated, so length is region.size - 1
     try {
-      b = createBytesWithNewBuffer(cBytes, D.size-1, D.size);
+      if localSlice.isOwned {
+        localSlice.isOwned = false;
+        return createBytesWithOwnedBuffer(localSlice.ptr, region.size-1, region.size);
+      } else if borrow {
+        return createBytesWithBorrowedBuffer(localSlice.ptr, region.size-1, region.size);
+      } else {
+        return createBytesWithNewBuffer(localSlice.ptr, region.size-1, region.size);
+      }
     } catch {
-      b = b"<error interpreting uint(8) as bytes>";
+      return b"<error interpreting uint(8) as bytes>";
     }
-    return b;
   }
 
   /*
@@ -61,10 +64,10 @@ module Flatten {
                                                                              var writeAgg = newDstAggregator(bool),
                                                                              var nbAgg = newDstAggregator(bool),
                                                                              var matchAgg = newDstAggregator(int)) {
+      var matchessize = 0 ;
       // for each string, find delim matches and set the positions of matches in writeToVal to false (non-matches will be copied to flattenedVals)
       // mark the locations of null bytes (the positions before original offsets and the last character of matches)
-      var matches = myRegex.matches(interpretAsBytes(origVals[off..#len]));
-      for m in matches {
+      for m in myRegex.matches(interpretAsBytes(origVals, off..#len, borrow=true)) {
         var match: reMatch = m[0];
         // set writeToVal to false for matches (except the last character of the match because we will write a null byte)
         for k in (off + match.offset:int)..#(match.size - 1) {
@@ -72,12 +75,13 @@ module Flatten {
         }
         // is writeToVal[(off + match.offset:int)..#(match.size - 1)] = false more efficient or for loop with aggregator?
         nbAgg.copy(nullByteLocations[off + match.offset:int + (match.size - 1)], true);
+        matchessize += 1;
       }
       if off != 0 {
         // the position before an offset is a null byte (except for off == 0)
         nbAgg.copy(nullByteLocations[off - 1], true);
       }
-      matchAgg.copy(numMatches[i], matches.size);
+      matchAgg.copy(numMatches[i], matchessize);
     }
 
     // writeToVal is true for positions to copy origVals (non-matches) and positions to write a null byte
diff --git a/src/SegmentedArray.chpl b/src/SegmentedArray.chpl
index c86feb8a63..ec9b428b52 100644
--- a/src/SegmentedArray.chpl
+++ b/src/SegmentedArray.chpl
@@ -148,7 +148,7 @@ module SegmentedArray {
         end = offsets.a[idx+1] - 1;
       }
       // Take the slice of the bytearray and "cast" it to a chpl string
-      var s = interpretAsString(values.a[start..end]);
+      var s = interpretAsString(values.a, start..end);
       return s;
     }
 
@@ -505,7 +505,7 @@ module SegmentedArray {
                                                                                var lenAgg = newDstAggregator(int),
                                                                                var startAgg = newDstAggregator(bool),
                                                                                var matchAgg = newDstAggregator(int)) {
-        var matches = myRegex.matches(interpretAsString(origVals[off..#len]));
+        var matches = myRegex.matches(interpretAsString(origVals, off..#len, borrow=true));
         for m in matches {
           var match: reMatch = m[0];
           lenAgg.copy(sparseLens[off + match.offset:int], match.size);
@@ -616,18 +616,18 @@ module SegmentedArray {
         when SearchMode.contains {
           forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) {
             // regexp.search searches the receiving string for matches at any offset
-            h = myRegex.search(interpretAsString(va[o..#l])).matched;
+            h = myRegex.search(interpretAsString(va, o..#l, borrow=true)).matched;
           }
         }
         when SearchMode.startsWith {
           forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) {
             // regexp.match only returns a match if the start of the string matches the pattern
-            h = myRegex.match(interpretAsString(va[o..#l])).matched;
+            h = myRegex.match(interpretAsString(va, o..#l, borrow=true)).matched;
           }
         }
         when SearchMode.endsWith {
           forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) {
-            var matches = myRegex.matches(interpretAsString(va[o..#l]));
+            var matches = myRegex.matches(interpretAsString(va, o..#l, borrow=true));
             var lastMatch: reMatch = matches[matches.size-1][0];
             // h = true iff start(lastMatch) + len(lastMatch) == len(string) (-1 to account for null byte)
             h = lastMatch.offset + lastMatch.size == l-1;
@@ -638,7 +638,7 @@ module SegmentedArray {
             // regexp.match only returns a match if the start of the string matches the pattern
             // h = true iff len(match) == len(string) (-1 to account for null byte)
             // if no match is found reMatch.size returns -1
-            h = myRegex.match(interpretAsString(va[o..#l])).size == l-1;
+            h = myRegex.match(interpretAsString(va, o..#l, borrow=true)).size == l-1;
           }
         }
       }
@@ -745,7 +745,7 @@ module SegmentedArray {
       var rightStart: [offsets.aD] int;
 
       forall (o, len, i) in zip(oa, lengths, offsets.aD) with (var myRegex = _unsafeCompileRegex(delimiter)) {
-        var matches = myRegex.matches(interpretAsString(va[o..#len]));
+        var matches = myRegex.matches(interpretAsString(va, o..#len, borrow=true));
         if matches.size < times {
           // not enough occurances of delim, the entire string stays together, and the param args
           // determine whether it ends up on the left or right
@@ -1375,20 +1375,25 @@ module SegmentedArray {
     }
   }
 
-  /* Convert an array of raw bytes into a Chapel string. */
-  inline proc interpretAsString(bytearray: [?D] uint(8)): string {
-    // Byte buffer must be local in order to make a C pointer
-    var localBytes: [{0..#D.size}] uint(8) = bytearray;
-    var cBytes = c_ptrTo(localBytes);
-    // Byte buffer is null-terminated, so length is buffer.size - 1
-    // The contents of the buffer should be copied out because cBytes will go out of scope
-    // var s = new string(cBytes, D.size-1, D.size, isowned=false, needToCopy=true);
-    var s: string;
+  /*
+     Interpret a region of a byte array as a Chapel string. If `borrow=false` a
+     new string is returned, otherwise the string borrows memory from the array
+     (reduces memory allocations if the string isn't needed after array)
+   */
+  proc interpretAsString(bytearray: [?D] uint(8), region: range(?), borrow=false): string {
+    var localSlice = new lowLevelLocalizingSlice(bytearray, region);
+    // Byte buffer is null-terminated, so length is region.size - 1
     try {
-      s = createStringWithNewBuffer(cBytes, D.size-1, D.size);
+      if localSlice.isOwned {
+        localSlice.isOwned = false;
+        return createStringWithOwnedBuffer(localSlice.ptr, region.size-1, region.size);
+      } else if borrow {
+        return createStringWithBorrowedBuffer(localSlice.ptr, region.size-1, region.size);
+      } else {
+        return createStringWithNewBuffer(localSlice.ptr, region.size-1, region.size);
+      }
     } catch {
-      s = "<error interpreting bytes as string>";
+      return "<error interpreting bytes as string>";
     }
-    return s;
   }
 }