Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smaller gcdump storage #1307

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/FastSerialization/SegmentedMemoryStreamReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace FastSerialization
{
public class SegmentedMemoryStreamReader
public class SegmentedMemoryStreamReader : IStreamReader
{
const int BlockCopyCapacity = 10 * 1024 * 1024;

Expand Down Expand Up @@ -127,6 +127,10 @@ public string ReadString()
}
return sb.ToString();
}
void IStreamReader.Read(byte[] data, int offset, int length)
{
throw new NotImplementedException();
}
/// <summary>
/// Implementation of IStreamReader
/// </summary>
Expand Down
2 changes: 1 addition & 1 deletion src/FastSerialization/SegmentedMemoryStreamWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

namespace FastSerialization
{
public class SegmentedMemoryStreamWriter
public class SegmentedMemoryStreamWriter : IStreamWriter
{
public SegmentedMemoryStreamWriter() : this(64) { }
public SegmentedMemoryStreamWriter(int initialSize)
Expand Down
91 changes: 87 additions & 4 deletions src/MemoryGraph/MemoryGraph.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,77 @@ void IFastSerializable.ToStream(Serializer serializer)
base.ToStream(serializer);
// Write out the Memory addresses of each object
serializer.Write(m_nodeAddresses.Count);
for (int i = 0; i < m_nodeAddresses.Count; i++)

// Write m_nodeAddresses as a sequence of groups of addresses near each other. The following assumptions are
// made for this process:
//
// 1. It is common for multiple objects to have addresses within ushort.MaxValue of each other
// 2. It is common for an element of m_nodeAddresses to have the address 0
// 3. The address at index 'i+1' will never be the same value as index 'i', unless that value is 0
//
// Assumption (3) allows '0' values in m_nodeAddresses to be written with the differential value '0', which
// is efficient and does not interrupt the grouping of a segment of otherwise-similar addresses.
int offset = 0;
foreach (var pair in GroupNodeAddresses(m_nodeAddresses))
{
serializer.Write((long)m_nodeAddresses[i]);
// A group is written as:
//
// 1. Int32: The number of elements in the group
// 2. Int64: The address of the first element in the group
// 3. UInt16 (repeat N times, where N = #Group - 1):
// a. 0, if the nth element of the group has the address 0
// b. Otherwise, the offset of the nth relative to the address of the first element in the group
serializer.Write(pair.Value);
serializer.Write((long)pair.Key);
Debug.Assert(pair.Key == m_nodeAddresses[offset]);

for (int i = 1; i < pair.Value; i++)
{
Address current = m_nodeAddresses[i + offset];
if (current == 0)
{
serializer.Write((short)0);
continue;
}

ushort relativeAddress = (ushort)(current - pair.Key);
serializer.Write(unchecked((short)relativeAddress));
}

offset += pair.Value;
}

serializer.WriteTagged(Is64Bit);

IEnumerable<KeyValuePair<Address, int>> GroupNodeAddresses(SegmentedList<Address> nodeAddresses)
{
if (nodeAddresses.Count == 0)
yield break;

var baseAddress = nodeAddresses[0];
var startIndex = 0;
for (int i = 1; i < nodeAddresses.Count; i++)
{
var current = nodeAddresses[i];
if (current == 0)
{
continue;
}

if (unchecked(current - baseAddress) <= ushort.MaxValue)
{
continue;
}

var count = i - startIndex;
yield return new KeyValuePair<Address, int>(baseAddress, count);

baseAddress = current;
startIndex = i;
}

yield return new KeyValuePair<Address, int>(baseAddress, nodeAddresses.Count - startIndex);
}
}

void IFastSerializable.FromStream(Deserializer deserializer)
Expand All @@ -135,9 +200,27 @@ void IFastSerializable.FromStream(Deserializer deserializer)
int addressCount = deserializer.ReadInt();
m_nodeAddresses = new SegmentedList<Address>(SegmentSize, addressCount);

for (int i = 0; i < addressCount; i++)
// See ToStream above for a description of the differential compression process
int offset = 0;
while (offset < addressCount)
{
m_nodeAddresses.Add((Address)deserializer.ReadInt64());
int groupCount = deserializer.ReadInt();
Address baseAddress = (Address)deserializer.ReadInt64();
m_nodeAddresses.Add(baseAddress);
for (int i = 1; i < groupCount; i++)
{
ushort relativeAddress = unchecked((ushort)deserializer.ReadInt16());
if (relativeAddress == 0)
{
m_nodeAddresses.Add(0);
}
else
{
m_nodeAddresses.Add(baseAddress + relativeAddress);
}
}

offset += groupCount;
}

bool is64bit = false;
Expand Down
59 changes: 52 additions & 7 deletions src/MemoryGraph/graph.cs
Original file line number Diff line number Diff line change
Expand Up @@ -500,20 +500,50 @@ public virtual void ToStream(Serializer serializer)
{
serializer.Write(m_totalSize);
serializer.Write((int)RootIndex);
// Write out the Types

// Write out the module names for types
var moduleNames = new Dictionary<string, int>();
foreach (var type in m_types)
{
if (type.ModuleName is null)
continue;

if (!moduleNames.ContainsKey(type.ModuleName))
{
// Index 0 is implicitly null, so start with 1 for the first non-null value
moduleNames.Add(type.ModuleName, moduleNames.Count + 1);
}
}

serializer.Write(moduleNames.Count);
foreach (var pair in moduleNames)
{
// Dictionary<TKey, TValue> iterates in insertion order
serializer.Write(pair.Key);
}

// Write out the Types
serializer.Write(m_types.Count);
for (int i = 0; i < m_types.Count; i++)
{
serializer.Write(m_types[i].Name);
serializer.Write(m_types[i].Size);
serializer.Write(m_types[i].ModuleName);
if (m_types[i].ModuleName is null)
serializer.Write(0);
else
serializer.Write(moduleNames[m_types[i].ModuleName]);
}

// Write out the Nodes
serializer.Write(m_nodes.Count);
int previousLabel = 0;
for (int i = 0; i < m_nodes.Count; i++)
{
serializer.Write((int)m_nodes[i]);
// Apply differential compression to the label, and then write it as a compressed integer
int currentLabel = (int)m_nodes[i];
int difference = unchecked(currentLabel - previousLabel);
Node.WriteCompressedInt(serializer.Writer, difference);
previousLabel = currentLabel;
}

// Write out the Blob stream.
Expand Down Expand Up @@ -558,6 +588,14 @@ public void FromStream(Deserializer deserializer)
deserializer.Read(out m_totalSize);
RootIndex = (NodeIndex)deserializer.ReadInt();

// Read in the module names
var moduleNamesCount = deserializer.ReadInt();
var moduleNames = new string[moduleNamesCount + 1];
for (int i = 0; i < moduleNamesCount; i++)
{
moduleNames[i + 1] = deserializer.ReadString();
}

// Read in the Types
TypeInfo info = new TypeInfo();
int typeCount = deserializer.ReadInt();
Expand All @@ -566,17 +604,22 @@ public void FromStream(Deserializer deserializer)
{
deserializer.Read(out info.Name);
deserializer.Read(out info.Size);
deserializer.Read(out info.ModuleName);
info.ModuleName = moduleNames[deserializer.ReadInt()];
m_types.Add(info);
}

// Read in the Nodes
int nodeCount = deserializer.ReadInt();
m_nodes = new SegmentedList<StreamLabel>(SegmentSize, nodeCount);

int previousLabel = 0;
for (int i = 0; i < nodeCount; i++)
{
m_nodes.Add((StreamLabel)(uint)deserializer.ReadInt());
// Read the label as a compressed differential integer
int difference = Node.ReadCompressedInt(deserializer.Reader);
int currentLabel = unchecked(previousLabel + difference);
m_nodes.Add((StreamLabel)currentLabel);
previousLabel = currentLabel;
}

// Read in the Blob stream.
Expand Down Expand Up @@ -836,7 +879,8 @@ protected internal Node(Graph graph)
}

// Node information is stored in a compressed form because we have alot of them.
internal static int ReadCompressedInt(SegmentedMemoryStreamReader reader)
internal static int ReadCompressedInt<T>(T reader)
where T : IStreamReader
{
int ret = 0;
byte b = reader.ReadByte();
Expand Down Expand Up @@ -877,7 +921,8 @@ internal static int ReadCompressedInt(SegmentedMemoryStreamReader reader)
return ret;
}

internal static void WriteCompressedInt(SegmentedMemoryStreamWriter writer, int value)
internal static void WriteCompressedInt<T>(T writer, int value)
where T : IStreamWriter
{
if (value << 25 >> 25 == value)
{
Expand Down