Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Trace tool #1723

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/beanmachine/ppl/diagnostics/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@
# flake8: noqa

"""Visual diagnostic tools for Bean Machine models."""

import sys
from pathlib import Path


if sys.version_info >= (3, 8):
# NOTE: We need to import NotRequired from typing_extensions until PEP 655 is
# accepted, see https://peps.python.org/pep-0655/. This is to follow the
# interface objects in JavaScript that allow keys to not be required using ?.
from typing import TypedDict

from typing_extensions import NotRequired
else:
from typing_extensions import TypedDict
from typing_extensions import NotRequired, TypedDict


TOOLS_DIR = Path(__file__).parent.resolve()
Expand Down
25 changes: 0 additions & 25 deletions src/beanmachine/ppl/diagnostics/tools/js/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
"fast-kde": "^0.2.1"
},
"devDependencies": {
"@types/node": "^18.0.4",
"@typescript-eslint/eslint-plugin": "^5.30.5",
"@typescript-eslint/parser": "^5.30.5",
"eslint": "^8.19.0",
"eslint-config-airbnb": "^19.0.4",
Expand All @@ -24,32 +22,9 @@
"eslint-plugin-prefer-arrow": "^1.2.3",
"eslint-plugin-react": "^7.28.0",
"eslint-plugin-react-hooks": "^4.3.0",
"prettier": "^2.7.1",
"ts-loader": "^9.3.1",
"ts-node": "^10.9.1",
"typescript": "^4.7.4",
"webpack": "^5.74.0",
"webpack-cli": "^4.10.0"
},
"overrides": {
"cwise": "$cwise",
"minimist": "$minimist",
"quote-stream": "$quote-stream",
"static-eval": "$static-eval",
"static-module": "$static-module",
"typedarray-pool": "$typedarray-pool"
},
"peerDependencies": {
"@types/cwise": "^1.0.4",
"@types/minimist": "^1.2.2",
"@types/static-eval": "^0.2.31",
"@types/typedarray-pool": "^1.1.2",
"buffer": "^6.0.3",
"cwise": "^1.0.10",
"minimist": "^1.2.6",
"quote-stream": "^1.0.2",
"static-eval": "2.1.0",
"static-module": "^3.0.4",
"typedarray-pool": "^1.2.0"
}
}
89 changes: 89 additions & 0 deletions src/beanmachine/ppl/diagnostics/tools/js/src/stats/array.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,92 @@ export const numericalSort = (data: number[]): number[] => {
return a < b ? -1 : a > b ? 1 : 0;
});
};

/**
* Determine the shape of the given array.
*
* @param {any[]} data - Any array of data.
* @returns {number[]} The shape of the data as an array.
*/
export const shape = (data: any[]): number[] => {
// From https://stackoverflow.com/questions/10237615/get-size-of-dimensions-in-array
const computeShape = (array: any[]): any[] => {
return array.length ? [...[array.length], ...computeShape(array[0])] : [];
};
const arrayShape = computeShape(data);
// Remove the empty array that will exist at the end of the shape array, since it is
// the returned "else" value from above.
const dataShape = [];
for (let i = 0; i < arrayShape.length; i += 1) {
if (!Array.isArray(arrayShape[i])) {
dataShape.push(arrayShape[i]);
}
}
return dataShape;
};

/**
* Create an array that starts and stops with the given number of steps.
*
* @param {number} start - Where to start the array from.
* @param {number} stop - Where to stop the array.
* @param {number} [step] - The step size to take.
* @param {boolean} [closed] - Flag used to return a closed array or not.
* @param {null | number} [size] - If not null, then will return an array with the given
* size.
* @returns {number[]} An array that is linearly spaced between the start and stop
* values.
*/
export const linearRange = (
start: number,
stop: number,
step: number = 1,
closed: boolean = true,
size: null | number = null,
): number[] => {
if (size !== null) {
step = (stop - start) / size;
}
let len = (stop - start) / step + 1;
if (!closed) {
len = (stop - start - step) / step + 1;
}
return Array.from({length: len}, (_, i) => {
return start + i * step;
});
};

/**
* Return the indices that would sort the array. Follows NumPy's implementation.
*
* @param {number[]} data - The data to sort.
* @returns {number[]} An array of indices that would sort the original array.
*/
export const argSort = (data: number[]): number[] => {
const dataCopy = data.slice(0);
return dataCopy
.map((value, index) => {
return [value, index];
})
.sort((a, b) => {
return a[0] - b[0];
})
.map((value) => {
return value[1];
});
};

/**
* Count the number of time a value appears in an array.
*
* @param {number[]} data - The numeric array to count objects for.
* @returns {{[key: string]: number}} An object that contains the keys as the items in
* the original array, and values that are counts of the key.
*/
export const valueCounts = (data: number[]): {[key: string]: number} => {
const counts: {[key: string]: number} = {};
for (let i = 0; i < data.length; i += 1) {
counts[data[i]] = (counts[data[i]] || 0) + 1;
}
return counts;
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* LICENSE file in the root directory of this source tree.
*/

import {argSort, valueCounts} from './array';

/**
* Scale the given array of numbers by the given scaleFactor. Note that this method
* divides values in the given array by the scaleFactor.
Expand Down Expand Up @@ -32,3 +34,49 @@ export const scaleToOne = (data: number[]): number[] => {
const scaleFactor = Math.max(...data);
return scaleBy(data, scaleFactor);
};

/**
* Assign ranks to the given data. Follows SciPy's and ArviZ's implementations.
*
* @param {number[]} data - The numeric data to rank.
* @returns {number[]} An array of rankings.
*/
export const rankData = (data: number[]): number[] => {
const n = data.length;
const rank = Array(n);
const sortedIndex = argSort(data);
for (let i = 0; i < rank.length; i += 1) {
rank[sortedIndex[i]] = i + 1;
}
const counts = valueCounts(data);
const countsArray = Object.entries(counts);
const keys = [];
const keyCounts = [];
for (let i = 0; i < countsArray.length; i += 1) {
const [key, count] = countsArray[i];
if (count > 1) {
keys.push(parseFloat(key));
keyCounts.push(count);
}
}
for (let i = 0; i < keys.length; i += 1) {
const repeatIndices = [];
for (let j = 0; j < data.length; j += 1) {
if (data[j] === keys[i]) {
repeatIndices.push(j);
}
}
const rankValues = [];
for (let k = 0; k < repeatIndices.length; k += 1) {
rankValues.push(rank[repeatIndices[k]]);
}
const sum = rankValues.reduce((previousValue, currentValue) => {
return previousValue + currentValue;
}, 0.0);
const rankMean = sum / rankValues.length;
for (let k = 0; k < repeatIndices.length; k += 1) {
rank[repeatIndices[k]] = rankMean;
}
}
return rank;
};
152 changes: 152 additions & 0 deletions src/beanmachine/ppl/diagnostics/tools/js/src/stats/histogram.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

import {linearRange, numericalSort, shape} from './array';
import {rankData, scaleToOne} from './dataTransformation';
import {mean as computeMean} from './pointStatistic';

/**
* Compute the histogram of the given data.
*
* @param {number[]} data - Data to bin.
* @param {number} [numBins] - The number of bins to use for the histogram. If none is
* given, then we follow ArviZ's implementation by using twice then number of bins
* of the Sturges formula.
* @returns {number[][]} [TODO:description]
*/
export const calculateHistogram = (data: number[], numBins: number = 0): number[][] => {
const sortedData = numericalSort(data);
const numSamples = sortedData.length;
const dataMin = Math.min(...data);
const dataMax = Math.max(...data);
if (numBins === 0) {
numBins = Math.floor(Math.ceil(2 * Math.log2(numSamples)) + 1);
}
const binSize =
(dataMax - dataMin) / numBins === 0 ? 1 : (dataMax - dataMin) / numBins;
const bins = Array(numBins)
.fill([0, 0])
.map((_, i) => {
return [i, 0];
});

for (let i = 0; i < data.length; i += 1) {
const datum = sortedData[i];
let binIndex = Math.floor((datum - dataMin) / binSize);
// Subtract 1 if the value lies on the last bin.
if (binIndex === numBins) {
binIndex -= 1;
}
bins[binIndex][1] += 1;
}
return bins;
};

export interface RankHistogram {
[key: string]: {
quad: {
left: number[];
top: number[];
right: number[];
bottom: number[];
chain: number[];
draws: string[];
rank: number[];
};
line: {x: number[]; y: number[]};
chain: number[];
rankMean: number[];
mean: number[];
};
}

/**
* A histogram of rank data.
*
* @param {number[][]} data - Raw random variable data for several chains.
* @returns {RankHistogram} A histogram of the data rankings.
*/
export const rankHistogram = (data: number[][]): RankHistogram => {
const [numChains, numDraws] = shape(data);
const numSamples = numChains * numDraws;
const flatData = data.flat();

// Calculate the rank of the data and ensure it is the same shape as the original
// data.
const rank = rankData(flatData);
const rankArray = [];
let start = Number.NaN;
let end = Number.NaN;
for (let i = 0; i < numChains; i += 1) {
if (i === 0) {
start = 0;
end = numDraws;
} else {
start = end;
end = (i + 1) * numDraws;
}
const chainRanks = rank.slice(start, end);
rankArray.push(chainRanks);
start = end;
end = (i + 1) * numDraws;
}

// Calculate the number of bins needed. We will follow ArviZ and use twice the result
// using the Sturges' formula.
const numBins = Math.floor(Math.ceil(2 * Math.log2(numSamples)) + 1);
const lastBinEdge = Math.max(...rank);

// Calculate the bin edges. Since the linearRange function computes a linear spacing
// of values between the start and end point, we need to ensure they are integer
// values.
let binEdges = linearRange(0, lastBinEdge, 1, true, numBins);
binEdges = binEdges.map((value) => {
return Math.ceil(value);
});

// Calculate the histograms of the rank data, and normalize it for each chain.
const output = {} as RankHistogram;
for (let i = 0; i < numChains; i += 1) {
const chainIndex = i + 1;
const chainName = `chain${chainIndex}`;
const chainRankHistogram = calculateHistogram(rankArray[i], numBins);
let counts = [];
for (let j = 0; j < chainRankHistogram.length; j += 1) {
counts.push(chainRankHistogram[j][1]);
}
counts = scaleToOne(counts);
const chainCounts = counts.map((value) => {
return value + i;
});

const chainRankMean = computeMean(chainCounts);
const left = binEdges.slice(0, binEdges.length - 1);
const right = binEdges.slice(1);
const binLabel = [];
for (let j = 0; j < left.length; j += 1) {
binLabel.push(`${left[j].toLocaleString()}-${right[j].toLocaleString()}`);
}
const x = linearRange(0, numSamples, 1);
const y = Array(x.length).fill(chainRankMean);
output[chainName] = {
quad: {
left: left,
top: chainCounts,
right: right,
bottom: Array(numBins).fill(i),
chain: Array(left.length).fill(i + 1),
draws: binLabel,
rank: counts,
},
line: {x: x, y: y},
chain: Array(x.length).fill(i + 1),
rankMean: Array(x.length).fill(chainIndex - chainRankMean),
mean: Array(x.length).fill(computeMean(counts)),
};
}
return output;
};
Loading