diff --git a/.gitignore b/.gitignore
index 53d84dd..f385a16 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ project.lock.json
# VSCode directories that are not at the repository root
/**/.vscode/
+
+# Visual Studio per user directories
+.vs/
diff --git a/src/code/EditContentCommand.cs b/src/code/EditContentCommand.cs
new file mode 100644
index 0000000..3dcb06d
--- /dev/null
+++ b/src/code/EditContentCommand.cs
@@ -0,0 +1,614 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using System;
+using System.Collections.Generic;
+using System.Collections.ObjectModel;
+using System.Linq;
+using System.IO;
+using System.Management.Automation;
+using System.Runtime.InteropServices;
+using System.Security;
+using System.Text;
+using System.Text.RegularExpressions;
+using System.Collections;
+using System.Management.Automation.Language;
+
+namespace Microsoft.PowerShell.TextUtility
+{
+ [Cmdlet(VerbsData.Edit, "Content", DefaultParameterSetName = ParameterSetPath, SupportsShouldProcess = true)]
+ public class EditContentCommand : PSCmdlet
+ {
+ internal const string CommandName = "Edit-Content";
+ private const string ParameterSetPath = "PathParameterSet";
+ private const string ParameterSetLiteralPath = "PathLiteralParameterSet";
+ private const int lohObjectSizeThreshold = 85000;
+
+ private Regex[] _regexes;
+ private string _patternArrayAsString;
+ private string _replacementArrayAsString;
+
+ ///
+ /// Specifies a path to one or more file locations. Wildcards are permitted.
+ ///
+ [Parameter(Mandatory = true, Position = 2, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true,
+ ParameterSetName = ParameterSetPath)]
+ [ValidateNotNullOrEmpty]
+ public string[] Path { get; set; }
+
+ ///
+ /// Specifies a path to one or more file locations. Unlike the Path parameter, the value of the LiteralPath
+ /// parameter is used exactly as entered. No characters are interpreted as wildcards. If the path includes
+ /// escape characters, enclose them in single quotation marks.
+ /// Single quotation marks tell PowerShell not to interpret any characters as escape sequences.
+ ///
+ [Parameter(Mandatory = true, ValueFromPipeline = true, ValueFromPipelineByPropertyName = true,
+ ParameterSetName = ParameterSetLiteralPath)]
+ [ValidateNotNullOrEmpty]
+ [Alias("PSPath", "LP")]
+ public string[] LiteralPath { get; set; }
+
+ [Parameter(Position = 0, Mandatory = true)]
+ [ValidateNotNull]
+ [AllowEmptyString]
+ public string[] Pattern { get; set; }
+
+ [Parameter(Position = 1, Mandatory = true)]
+ [ValidateNotNull]
+ [AllowEmptyString]
+ public string[] Replacement { get; set; }
+
+ [Parameter]
+ [ArgumentToEncodingTransformation]
+ [ArgumentCompleter(typeof(ArgumentEncodingCompletionsAttribute))]
+ [ValidateNotNullOrEmpty]
+ public Encoding Encoding { get; set; }
+
+ [Parameter]
+ public SwitchParameter CaseSensitive { get; set; }
+
+ [Parameter]
+ public SwitchParameter SimpleMatch { get; set; }
+
+ [Parameter]
+ public SwitchParameter SingleString { get; set; }
+
+ [Parameter]
+ public SwitchParameter PassThru { get; set; }
+
+ [Parameter]
+ public SwitchParameter Force { get; set; }
+
+ protected override void BeginProcessing()
+ {
+ if (Pattern.Length != Replacement.Length)
+ {
+ var msg = "The array length must be the same for both the Pattern and Replacement parameters.";
+ var exc = new PSArgumentException(msg, "Replacement");
+ ThrowTerminatingError(new ErrorRecord(exc, CommandName, ErrorCategory.InvalidArgument, null));
+ }
+
+ var patternStrBld = new StringBuilder();
+ var replacementStrBld = new StringBuilder();
+
+ _regexes = new Regex[Pattern.Length];
+ for (int i = 0; i < Pattern.Length; i++)
+ {
+ string pattern = SimpleMatch ? Regex.Escape(Pattern[i]) : Pattern[i];
+ RegexOptions regexOptions = CaseSensitive ? RegexOptions.None : RegexOptions.IgnoreCase;
+ // TODO: RKH 2020-12-05 Determine if there are cases where using a compiled regex gives better perf.
+ _regexes[i] = new Regex(pattern, regexOptions);
+
+ if (i != 0)
+ {
+ patternStrBld.Append(",");
+ replacementStrBld.AppendFormat(",");
+ }
+
+ patternStrBld.AppendFormat("'{0}'", Pattern[i]);
+ replacementStrBld.AppendFormat("'{0}'", Replacement[i]);
+ }
+
+ _patternArrayAsString = patternStrBld.ToString();
+ _replacementArrayAsString = replacementStrBld.ToString();
+ }
+
+ protected override void ProcessRecord()
+ {
+ // Resolve paths for the selected parameterset
+ var resolvedPaths = new List();
+ string[] paths = ParameterSetName.Equals(ParameterSetPath) ? Path : LiteralPath;
+ foreach (string path in paths)
+ {
+ if (!string.IsNullOrEmpty(path))
+ {
+ try
+ {
+ if (ParameterSetName.Equals(ParameterSetPath))
+ {
+ Collection pathInfos = SessionState.Path.GetResolvedPSPathFromPSPath(path);
+ resolvedPaths.AddRange(pathInfos.Select(pi => pi.Path));
+ }
+ else
+ {
+ resolvedPaths.Add(SessionState.Path.GetUnresolvedProviderPathFromPSPath(path));
+ }
+ }
+ catch (Exception ex)
+ {
+ WriteError(new ErrorRecord(ex, "PathNotFound", ErrorCategory.ObjectNotFound, path));
+ }
+ }
+ }
+
+ // Process each path
+ foreach (string path in resolvedPaths)
+ {
+ try
+ {
+ // Check each path to verify it is not a directory.
+ FileAttributes attrs = File.GetAttributes(path);
+ if ((attrs | FileAttributes.Directory) == FileAttributes.Directory)
+ {
+ var exc = new PSArgumentException(
+ $"Unable to edit content because it is a directory: '{path}'. Specify a path to a file.",
+ ParameterSetName.Equals(ParameterSetPath) ? "Path" : "LiteralPath");
+ WriteError(new ErrorRecord(exc, "InvalidOperation", ErrorCategory.InvalidOperation, path));
+ continue;
+ }
+
+ if (ShouldProcess(path, $"{CommandName} replacing pattern " + _patternArrayAsString + " with " + _replacementArrayAsString))
+ {
+ if (Force) MakeFileWritable(path);
+
+
+ var fileData = new FileData(path);
+ if (SingleString)
+ {
+ EditFileAsSingleString(fileData);
+ }
+ else if (fileData.Length < (lohObjectSizeThreshold - 1000))
+ {
+ EditFileByLineMemoryBacked(fileData);
+ }
+ else
+ {
+ // The file size is large enough that editing it in memory would place MemStream objects in the LOH, so edit use a temp file.
+ // The modified temp file contents are then copied back to the source file (after regex processing).
+ EditFileByLineFileBacked(fileData);
+ }
+ }
+
+ if (PassThru)
+ {
+ Collection results = SessionState.InvokeProvider.Item.Get(path);
+ if (results.Count > 0)
+ {
+ WriteObject(results[0]);
+ }
+ }
+ }
+ catch (FileNotFoundException ex)
+ {
+ WriteError(new ErrorRecord(ex, "PathNotFound", ErrorCategory.ObjectNotFound, path));
+ }
+ catch (SecurityException ex)
+ {
+ WriteError(new ErrorRecord(ex, "SecurityError", ErrorCategory.SecurityError, path));
+ }
+ catch (UnauthorizedAccessException ex)
+ {
+ WriteError(new ErrorRecord(ex, "UnauthorizedError", ErrorCategory.SecurityError, path));
+ }
+ catch (PipelineStoppedException)
+ {
+ throw;
+ }
+ catch (Exception ex)
+ {
+ WriteError(new ErrorRecord(ex, "FileError", ErrorCategory.NotSpecified, path));
+ }
+ }
+ }
+
+ private void EditFileAsSingleString(FileData fileData)
+ {
+ using (var fileStream = new FileStream(fileData.Path, FileMode.Open, FileAccess.ReadWrite, FileShare.Read))
+ {
+ if (Encoding == null) WriteVerboseEncodingInfo(fileData);
+
+ Encoding encoding = Encoding ?? fileData.Encoding;
+ var streamReader = new StreamReader(fileStream, encoding);
+ var content = streamReader.ReadToEnd();
+
+ for (int i = 0; i < _regexes.Length; i++)
+ {
+ content = _regexes[i].Replace(content, Replacement[i]);
+ }
+
+ streamReader.DiscardBufferedData();
+ fileStream.SetLength(0L);
+ var streamWriter = new StreamWriter(fileStream, encoding);
+ streamWriter.Write(content);
+ streamWriter.Flush();
+ }
+ }
+
+ private void EditFileByLineFileBacked(FileData fileData)
+ {
+ string tempPath = System.IO.Path.GetTempFileName();
+ WriteVerbose(string.Format($"{CommandName} using temp file '{tempPath}' for '{fileData.Path}'"));
+
+ try
+ {
+ using (var sourceFileStream = new FileStream(fileData.Path, FileMode.Open, FileAccess.ReadWrite, FileShare.Read))
+ using (var editResultsStream = new FileStream(tempPath, FileMode.Open, FileAccess.ReadWrite))
+ {
+ EditFileByLineImpl(fileData, sourceFileStream, editResultsStream);
+ }
+ }
+ finally
+ {
+ File.Delete(tempPath);
+ }
+ }
+
+ private void EditFileByLineMemoryBacked(FileData fileData)
+ {
+ int memoryStreamCapacity;
+
+ // If file length is within 20% of LOH size or higher, jump up to next order ot magnitude
+ // to limit the number of different sized LOH segments created. Keeping in mind that the
+ // edit operation can make the file larger.
+ if (fileData.Length >= (lohObjectSizeThreshold * 0.8))
+ {
+ memoryStreamCapacity = (int)Math.Pow(10, (int)(Math.Ceiling(Math.Log10(fileData.Length))));
+ }
+ else
+ {
+ memoryStreamCapacity = (int)Math.Max(10, fileData.Length);
+ }
+
+ using (var sourceFileStream = new FileStream(fileData.Path, FileMode.Open, FileAccess.ReadWrite, FileShare.Read))
+ using (var editResultsStream = new MemoryStream(memoryStreamCapacity))
+ {
+ EditFileByLineImpl(fileData, sourceFileStream, editResultsStream);
+ }
+ }
+
+ private void EditFileByLineImpl(FileData fileData, FileStream sourceFileStream, Stream editResultsStream)
+ {
+ if (Encoding == null) WriteVerboseEncodingInfo(fileData);
+
+ Encoding writeEncoding = Encoding ?? fileData.Encoding;
+ var streamReader = new StreamReader(sourceFileStream);
+ var streamWriter = new StreamWriter(editResultsStream, writeEncoding);
+
+ string prevLine = null;
+ string line;
+ while ((line = streamReader.ReadLine()) != null)
+ {
+ if (prevLine != null) streamWriter.WriteLine(prevLine);
+ prevLine = line;
+ for (int i = 0; i < _regexes.Length; i++)
+ {
+ prevLine = _regexes[i].Replace(prevLine, Replacement[i]);
+ }
+ }
+
+ // Use Write or WriteLine on last line depending on whether the source file ends in a newline.
+ if (fileData.LastLineEndsWithNewline)
+ {
+ streamWriter.WriteLine(prevLine ?? "");
+ }
+ else
+ {
+ streamWriter.Write(prevLine ?? "");
+ }
+ streamWriter.Flush();
+
+ // Resets results stream and source file stream to beginning to prep for copy operation.
+ streamReader.DiscardBufferedData();
+ sourceFileStream.SetLength(0L);
+ editResultsStream.Seek(0L, SeekOrigin.Begin);
+
+ editResultsStream.CopyTo(sourceFileStream);
+ sourceFileStream.Flush();
+ }
+
+ private void MakeFileWritable(string path)
+ {
+ var fileInfo = new FileInfo(path);
+ if (fileInfo.IsReadOnly)
+ {
+ WriteVerbose($"{CommandName} -Force specified, making readonly file writable: '{path}'");
+ fileInfo.IsReadOnly = false;
+ }
+ }
+
+ private void WriteVerboseEncodingInfo(FileData fileData)
+ {
+ var msg = string.Format("{0} detected encoding of {1} with {2}BOM for '{3}'{4}",
+ CommandName,
+ fileData.Encoding.EncodingName,
+ (fileData.EncoderEmitsUtf8Identifier ? "" : "no "),
+ fileData.Path,
+ ((Encoding == null) ? "" : " but overriden with " + Encoding + " encoding."));
+ WriteVerbose(msg);
+ }
+
+ internal class FileData
+ {
+ private readonly byte[] _utf8Bom = { 0xEF, 0xBB, 0xBF };
+ private readonly char[] _tempReadEncodingBuffer = new char[256];
+
+ public FileData(string path)
+ {
+ if (string.IsNullOrWhiteSpace(path)) throw new ArgumentNullException("path");
+
+ Path = path;
+ EncoderEmitsUtf8Identifier = true;
+
+ using (var fileStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))
+ {
+ Length = fileStream.Length;
+
+ // According to MSDN topic, stream reader can't return accurate encoding until after the first read,
+ // so read some bytes if stream position indicates no reading has been done.
+ var streamReader = new StreamReader(fileStream, detectEncodingFromByteOrderMarks: true);
+ streamReader.Read(_tempReadEncodingBuffer, 0, _tempReadEncodingBuffer.Length);
+ Encoding = streamReader.CurrentEncoding;
+
+ // Do not use streamReader after this point. If so, you need to call streamReader.DiscardBufferedData()
+ // to resync buffer with the underlying stream.
+
+ // Determine if file ends with a newline
+ byte[] endBytes;
+ if (fileStream.Length >= 2)
+ {
+ fileStream.Seek(-2, SeekOrigin.End);
+ endBytes = new byte[2];
+ fileStream.Read(endBytes, 0, 2);
+ LastLineEndsWithNewline = (endBytes[0] == '\n') || (endBytes[0] == '\r') || (endBytes[1] == '\n') || (endBytes[1] == '\r');
+ }
+ else if (fileStream.Length == 1)
+ {
+ fileStream.Seek(-1, SeekOrigin.End);
+ endBytes = new byte[1];
+ fileStream.Read(endBytes, 0, 1);
+ LastLineEndsWithNewline = (endBytes[0] == '\n') || (endBytes[0] == '\r');
+ }
+
+ // Just because StreamReader says it is UTF8, that doesn't mean the original
+ // file has a UTF-8 BOM, this code attempts to detect that configure the returned
+ // encoding to only write a BOM if the original file had a BOM.
+ if (Encoding.Equals(Encoding.UTF8))
+ {
+ if (fileStream.Length < _utf8Bom.Length)
+ {
+ // Can't have a BOM if file length is less than that of BOM
+ EncoderEmitsUtf8Identifier = false;
+ Encoding = new UTF8Encoding(EncoderEmitsUtf8Identifier, throwOnInvalidBytes: true);
+ }
+ else
+ {
+ var fileBytes = new byte[_utf8Bom.Length];
+ fileStream.Seek(0L, SeekOrigin.Begin);
+ fileStream.Read(fileBytes, 0, fileBytes.Length);
+ for (int i = 0; i < _utf8Bom.Length; i++)
+ {
+ if (fileBytes[i] != _utf8Bom[i])
+ {
+ EncoderEmitsUtf8Identifier = false;
+ Encoding = new UTF8Encoding(EncoderEmitsUtf8Identifier, throwOnInvalidBytes: true);
+ }
+ }
+ }
+ }
+ else if (fileStream.Length < 2)
+ {
+ // No BOM at all so default to UTF8 with no BOM for output
+ EncoderEmitsUtf8Identifier = false;
+ Encoding = new UTF8Encoding(EncoderEmitsUtf8Identifier, throwOnInvalidBytes: true);
+ }
+ }
+ }
+
+ public string Path { get; private set; }
+
+ public long Length { get; private set; }
+
+ public Encoding Encoding { get; private set; }
+
+ public bool EncoderEmitsUtf8Identifier { get; private set; }
+
+ public bool LastLineEndsWithNewline { get; private set; }
+ }
+ }
+
+ ///
+ /// To make it easier to specify -Encoding parameter, we add an ArgumentTransformationAttribute here.
+ /// When the input data is of type string and is valid to be converted to System.Text.Encoding, we do
+ /// the conversion and return the converted value. Otherwise, we just return the input data.
+ ///
+ internal sealed class ArgumentToEncodingTransformationAttribute : ArgumentTransformationAttribute
+ {
+ public override object Transform(EngineIntrinsics engineIntrinsics, object inputData)
+ {
+ switch (inputData)
+ {
+ case string stringName:
+ if (EncodingConversion.encodingMap.TryGetValue(stringName, out Encoding foundEncoding))
+ {
+ return foundEncoding;
+ }
+ else
+ {
+ return Encoding.GetEncoding(stringName);
+ }
+ case int intName:
+ return Encoding.GetEncoding(intName);
+ }
+
+ return inputData;
+ }
+ }
+
+ ///
+ /// Provides the set of Encoding values for tab completion of an Encoding parameter.
+ ///
+ internal sealed class ArgumentEncodingCompletionsAttribute : IArgumentCompleter
+ {
+ private readonly string[] _completions;
+
+ public ArgumentEncodingCompletionsAttribute()
+ {
+ _completions = new string[] {
+ EncodingConversion.Ascii,
+ EncodingConversion.BigEndianUnicode,
+ EncodingConversion.BigEndianUtf32,
+ EncodingConversion.OEM,
+ EncodingConversion.Unicode,
+ EncodingConversion.Utf7,
+ EncodingConversion.Utf8,
+ EncodingConversion.Utf8Bom,
+ EncodingConversion.Utf8NoBom,
+ EncodingConversion.Utf32
+ };
+ }
+
+ public IEnumerable CompleteArgument(string commandName, string parameterName, string wordToComplete, CommandAst commandAst, IDictionary fakeBoundParameters)
+ {
+ var wordToCompletePattern = WildcardPattern.Get(string.IsNullOrWhiteSpace(wordToComplete) ? "*" : wordToComplete + "*", WildcardOptions.IgnoreCase);
+
+ foreach (var str in _completions)
+ {
+ if (wordToCompletePattern.IsMatch(str))
+ {
+ yield return new CompletionResult(str, str, CompletionResultType.ParameterValue, str);
+ }
+ }
+ }
+ }
+
+ internal static class EncodingConversion
+ {
+ internal const string Unknown = "unknown";
+ internal const string String = "string";
+ internal const string Unicode = "unicode";
+ internal const string BigEndianUnicode = "bigendianunicode";
+ internal const string BigEndianUtf32 = "bigendianutf32";
+ internal const string Ascii = "ascii";
+ internal const string Utf8 = "utf8";
+ internal const string Utf8NoBom = "utf8NoBOM";
+ internal const string Utf8Bom = "utf8BOM";
+ internal const string Utf7 = "utf7";
+ internal const string Utf32 = "utf32";
+ internal const string Default = "default";
+ internal const string OEM = "oem";
+
+ internal static readonly string[] TabCompletionResults = {
+ Ascii, BigEndianUnicode, BigEndianUtf32, OEM, Unicode, Utf7, Utf8, Utf8Bom, Utf8NoBom, Utf32
+ };
+
+ internal static readonly Dictionary encodingMap = new Dictionary(StringComparer.OrdinalIgnoreCase)
+ {
+ { Ascii, Encoding.ASCII },
+ { BigEndianUnicode, Encoding.BigEndianUnicode },
+ { BigEndianUtf32, new UTF32Encoding(bigEndian: true, byteOrderMark: true) },
+ { Default, ClrFacade.GetDefaultEncoding() },
+ { OEM, ClrFacade.GetOEMEncoding() },
+ { Unicode, Encoding.Unicode },
+#pragma warning disable SYSLIB0001
+ { Utf7, Encoding.UTF7 },
+#pragma warning restore SYSLIB0001
+ { Utf8, ClrFacade.GetDefaultEncoding() },
+ { Utf8Bom, Encoding.UTF8 },
+ { Utf8NoBom, ClrFacade.GetDefaultEncoding() },
+ { Utf32, Encoding.UTF32 },
+ { String, Encoding.Unicode },
+ { Unknown, Encoding.Unicode },
+ };
+
+ ///
+ /// Warn if the encoding has been designated as obsolete.
+ ///
+ /// A cmdlet instance which is used to emit the warning.
+ /// The encoding to check for obsolescence.
+ internal static void WarnIfObsolete(Cmdlet cmdlet, Encoding encoding)
+ {
+ // Check for UTF-7 by checking for code page 65000
+ // See: https://docs.microsoft.com/en-us/dotnet/core/compatibility/corefx#utf-7-code-paths-are-obsolete
+ if (encoding != null && encoding.CodePage == 65000)
+ {
+ cmdlet.WriteWarning("Encoding 'UTF-7' is obsolete, please use UTF-8.");
+ }
+ }
+ }
+
+ ///
+ /// ClrFacade contains all diverging code (different implementation for FullCLR and CoreCLR using if/def).
+ /// It exposes common APIs that can be used by the rest of the code base.
+ ///
+ internal static class ClrFacade
+ {
+ private static volatile Encoding s_defaultEncoding;
+ private static volatile Encoding s_oemEncoding;
+
+ ///
+ /// Facade for getting default encoding.
+ ///
+ internal static Encoding GetDefaultEncoding()
+ {
+ if (s_defaultEncoding == null)
+ {
+ // load all available encodings
+ EncodingRegisterProvider();
+ s_defaultEncoding = new UTF8Encoding(false);
+ }
+
+ return s_defaultEncoding;
+ }
+
+ ///
+ /// Facade for getting OEM encoding
+ /// OEM encodings work on all platforms, or rather codepage 437 is available on both Windows and Non-Windows.
+ ///
+ internal static Encoding GetOEMEncoding()
+ {
+ if (s_oemEncoding == null)
+ {
+ // load all available encodings
+ EncodingRegisterProvider();
+#if UNIX
+ s_oemEncoding = new UTF8Encoding(false);
+#else
+ uint oemCp = NativeMethods.GetOEMCP();
+ s_oemEncoding = Encoding.GetEncoding((int)oemCp);
+#endif
+ }
+
+ return s_oemEncoding;
+ }
+
+ private static void EncodingRegisterProvider()
+ {
+ if (s_defaultEncoding == null && s_oemEncoding == null)
+ {
+ Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+ }
+ }
+
+ ///
+ /// Native methods that are used by facade methods.
+ ///
+ private static class NativeMethods
+ {
+ ///
+ /// Pinvoke for GetOEMCP to get the OEM code page.
+ ///
+ [DllImport("api-ms-win-core-localization-l1-2-0.dll", SetLastError = false, CharSet = CharSet.Unicode)]
+ internal static extern uint GetOEMCP();
+ }
+ }
+}
diff --git a/src/code/Microsoft.PowerShell.TextUtility.csproj b/src/code/Microsoft.PowerShell.TextUtility.csproj
index 906d43b..3fadd4c 100644
--- a/src/code/Microsoft.PowerShell.TextUtility.csproj
+++ b/src/code/Microsoft.PowerShell.TextUtility.csproj
@@ -17,6 +17,7 @@
all
+