diff --git a/src/CastMsg.chpl b/src/CastMsg.chpl index 621f09f525..df28caa67b 100644 --- a/src/CastMsg.chpl +++ b/src/CastMsg.chpl @@ -203,7 +203,7 @@ module CastMsg { } else { end = oa[i+1] - 1; } - e = interpretAsString(va[start..end]) : toType; + e = interpretAsString(va, start..end) : toType; } } catch e: IllegalArgumentError { var errorMsg = "bad value in cast from string to %s".format(toType:string); diff --git a/src/Flatten.chpl b/src/Flatten.chpl index 85a5d313db..f53aa22d44 100644 --- a/src/Flatten.chpl +++ b/src/Flatten.chpl @@ -1,6 +1,7 @@ module Flatten { use ServerConfig; + use AryUtil; use SegmentedArray; use ServerErrors; use SymArrayDmap; @@ -13,21 +14,23 @@ module Flatten { config const NULL_STRINGS_VALUE = 0:uint(8); /* - Convert a uint(8) array into bytes. Modeled after SegString.interpretAsString - */ - inline proc interpretAsBytes(bytearray: [?D] uint(8)): bytes { - // Byte buffer must be local in order to make a C pointer - var localBytes: [{0..#D.size}] uint(8) = bytearray; - var cBytes = c_ptrTo(localBytes); - // Byte buffer is null-terminated, so length is buffer.size - 1 - // The contents of the buffer should be copied out because cBytes will go out of scope - var b: bytes; + Interpret a region of a byte array as bytes. Modeled after interpretAsString + */ + proc interpretAsBytes(bytearray: [?D] uint(8), region: range(?), borrow=false): bytes { + var localSlice = new lowLevelLocalizingSlice(bytearray, region); + // Byte buffer is null-terminated, so length is region.size - 1 try { - b = createBytesWithNewBuffer(cBytes, D.size-1, D.size); + if localSlice.isOwned { + localSlice.isOwned = false; + return createBytesWithOwnedBuffer(localSlice.ptr, region.size-1, region.size); + } else if borrow { + return createBytesWithBorrowedBuffer(localSlice.ptr, region.size-1, region.size); + } else { + return createBytesWithNewBuffer(localSlice.ptr, region.size-1, region.size); + } } catch { - b = b""; + return b""; } - return b; } /* @@ -61,10 +64,10 @@ module Flatten { var writeAgg = newDstAggregator(bool), var nbAgg = newDstAggregator(bool), var matchAgg = newDstAggregator(int)) { + var matchessize = 0 ; // for each string, find delim matches and set the positions of matches in writeToVal to false (non-matches will be copied to flattenedVals) // mark the locations of null bytes (the positions before original offsets and the last character of matches) - var matches = myRegex.matches(interpretAsBytes(origVals[off..#len])); - for m in matches { + for m in myRegex.matches(interpretAsBytes(origVals, off..#len, borrow=true)) { var match: reMatch = m[0]; // set writeToVal to false for matches (except the last character of the match because we will write a null byte) for k in (off + match.offset:int)..#(match.size - 1) { @@ -72,12 +75,13 @@ module Flatten { } // is writeToVal[(off + match.offset:int)..#(match.size - 1)] = false more efficient or for loop with aggregator? nbAgg.copy(nullByteLocations[off + match.offset:int + (match.size - 1)], true); + matchessize += 1; } if off != 0 { // the position before an offset is a null byte (except for off == 0) nbAgg.copy(nullByteLocations[off - 1], true); } - matchAgg.copy(numMatches[i], matches.size); + matchAgg.copy(numMatches[i], matchessize); } // writeToVal is true for positions to copy origVals (non-matches) and positions to write a null byte diff --git a/src/SegmentedArray.chpl b/src/SegmentedArray.chpl index c86feb8a63..ec9b428b52 100644 --- a/src/SegmentedArray.chpl +++ b/src/SegmentedArray.chpl @@ -148,7 +148,7 @@ module SegmentedArray { end = offsets.a[idx+1] - 1; } // Take the slice of the bytearray and "cast" it to a chpl string - var s = interpretAsString(values.a[start..end]); + var s = interpretAsString(values.a, start..end); return s; } @@ -505,7 +505,7 @@ module SegmentedArray { var lenAgg = newDstAggregator(int), var startAgg = newDstAggregator(bool), var matchAgg = newDstAggregator(int)) { - var matches = myRegex.matches(interpretAsString(origVals[off..#len])); + var matches = myRegex.matches(interpretAsString(origVals, off..#len, borrow=true)); for m in matches { var match: reMatch = m[0]; lenAgg.copy(sparseLens[off + match.offset:int], match.size); @@ -616,18 +616,18 @@ module SegmentedArray { when SearchMode.contains { forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) { // regexp.search searches the receiving string for matches at any offset - h = myRegex.search(interpretAsString(va[o..#l])).matched; + h = myRegex.search(interpretAsString(va, o..#l, borrow=true)).matched; } } when SearchMode.startsWith { forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) { // regexp.match only returns a match if the start of the string matches the pattern - h = myRegex.match(interpretAsString(va[o..#l])).matched; + h = myRegex.match(interpretAsString(va, o..#l, borrow=true)).matched; } } when SearchMode.endsWith { forall (o, l, h) in zip(oa, lengths, hits) with (var myRegex = _unsafeCompileRegex(pattern)) { - var matches = myRegex.matches(interpretAsString(va[o..#l])); + var matches = myRegex.matches(interpretAsString(va, o..#l, borrow=true)); var lastMatch: reMatch = matches[matches.size-1][0]; // h = true iff start(lastMatch) + len(lastMatch) == len(string) (-1 to account for null byte) h = lastMatch.offset + lastMatch.size == l-1; @@ -638,7 +638,7 @@ module SegmentedArray { // regexp.match only returns a match if the start of the string matches the pattern // h = true iff len(match) == len(string) (-1 to account for null byte) // if no match is found reMatch.size returns -1 - h = myRegex.match(interpretAsString(va[o..#l])).size == l-1; + h = myRegex.match(interpretAsString(va, o..#l, borrow=true)).size == l-1; } } } @@ -745,7 +745,7 @@ module SegmentedArray { var rightStart: [offsets.aD] int; forall (o, len, i) in zip(oa, lengths, offsets.aD) with (var myRegex = _unsafeCompileRegex(delimiter)) { - var matches = myRegex.matches(interpretAsString(va[o..#len])); + var matches = myRegex.matches(interpretAsString(va, o..#len, borrow=true)); if matches.size < times { // not enough occurances of delim, the entire string stays together, and the param args // determine whether it ends up on the left or right @@ -1375,20 +1375,25 @@ module SegmentedArray { } } - /* Convert an array of raw bytes into a Chapel string. */ - inline proc interpretAsString(bytearray: [?D] uint(8)): string { - // Byte buffer must be local in order to make a C pointer - var localBytes: [{0..#D.size}] uint(8) = bytearray; - var cBytes = c_ptrTo(localBytes); - // Byte buffer is null-terminated, so length is buffer.size - 1 - // The contents of the buffer should be copied out because cBytes will go out of scope - // var s = new string(cBytes, D.size-1, D.size, isowned=false, needToCopy=true); - var s: string; + /* + Interpret a region of a byte array as a Chapel string. If `borrow=false` a + new string is returned, otherwise the string borrows memory from the array + (reduces memory allocations if the string isn't needed after array) + */ + proc interpretAsString(bytearray: [?D] uint(8), region: range(?), borrow=false): string { + var localSlice = new lowLevelLocalizingSlice(bytearray, region); + // Byte buffer is null-terminated, so length is region.size - 1 try { - s = createStringWithNewBuffer(cBytes, D.size-1, D.size); + if localSlice.isOwned { + localSlice.isOwned = false; + return createStringWithOwnedBuffer(localSlice.ptr, region.size-1, region.size); + } else if borrow { + return createStringWithBorrowedBuffer(localSlice.ptr, region.size-1, region.size); + } else { + return createStringWithNewBuffer(localSlice.ptr, region.size-1, region.size); + } } catch { - s = ""; + return ""; } - return s; } }