microsoft · MaanavD · Dec 11, 2024 · Dec 9, 2024 · Dec 9, 2024 · Dec 9, 2024
diff --git a/_sass/color_schemes/onnxruntime.scss b/_sass/color_schemes/onnxruntime.scss
@@ -13,6 +13,7 @@ $btn-primary-color: #226aca;
 // }
 // 2024 December Accessibility changes
 .highlight .s { color: #3c7a3b ;}
+.highlight .py {color: #a25f00;}
 // Initial Theme
 .highlight .hll { background-color: #ffffcc; }
 .highlight { background: #ffffff; }
@@ -22,7 +23,7 @@ $btn-primary-color: #226aca;
 .highlight .o { color: #333333; }
 .highlight .ch { color: #707070 ; }
 .highlight .cm { color: #707070 ; }
-.highlight .cp { color: #557799; }
+.highlight .cp { color: #507191; }
 .highlight .cpf { color: #707070 ; }
 .highlight .c1 { color: #707070 ; }
 .highlight .cs { color: #cc0000; font-weight: bold; }
@@ -52,7 +53,7 @@ $btn-primary-color: #226aca;
 .highlight .ni { color: #880000; font-weight: bold; }
 .highlight .ne { font-weight: bold; color: #eb0000; }
 .highlight .nf { color: #0066BB; font-weight: bold; }
-.highlight .nl { font-weight: bold; color: #8f6f00; }
+.highlight .nl { font-weight: bold; color: #876900; }
 .highlight .nn { font-weight: bold; color: #0d77a2 ; }
 .highlight .nt { color: #007700; }
 .highlight .nv { color: #996633; }
@@ -68,7 +69,7 @@ $btn-primary-color: #226aca;
 .highlight .sc { color: #0044DD; }
 .highlight .dl { background-color: #fff0f0; }
 .highlight .sd { color: #d54220; }
-.highlight .s2 { background-color: #fff0f0; }
+.highlight .s2 { color: #3c7a3b ; background-color: #fff0f0; }
 .highlight .se { color: #666666; font-weight: bold; background-color: #fff0f0; }
 .highlight .sh { background-color: #fff0f0; }
 .highlight .si { background-color: #eeeeee; }

diff --git a/package.json b/package.json
@@ -36,7 +36,7 @@
 		"prettier-plugin-svelte": "^2.10.1",
 		"svelte": "^4.0.5",
 		"svelte-check": "^3.4.3",
-		"svelte-highlight": "^7.3.0",
+		"svelte-highlight": "^7.7.0",
 		"svelte-preprocess-import-assets": "^1.0.1",
 		"svelte-sitemap": "^2.6.0",
 		"tailwindcss": "^3.3.3",

diff --git a/src/routes/+layout.svelte b/src/routes/+layout.svelte
@@ -2,15 +2,15 @@
 	import '../app.css';
 	import Header from './components/header.svelte';
 	import Footer from './components/footer.svelte';
-	import oneLight from 'svelte-highlight/styles/one-light';
+	import a11yLight from 'svelte-highlight/styles/a11y-light';
 	import { fade } from 'svelte/transition';
 	import { page } from '$app/stores';
 	export let data;
 	const url = 'https://onnxruntime.ai';
 </script>
 
 <svelte:head>
-	{@html oneLight}
+	{@html a11yLight}
 	{#if !data.pathname.startsWith('/blogs/')}
 	<title
 		>ONNX Runtime | {data.pathname == '/'

diff --git a/src/routes/blogs/accelerating-llama-2/+page.svelte b/src/routes/blogs/accelerating-llama-2/+page.svelte
@@ -46,14 +46,14 @@
 <div class="container mx-auto px-4 md:px-8 lg:px-48 pt-8">
 	<h1 class="text-5xl pb-2">Accelerating LLaMA-2 Inference with ONNX Runtime</h1>
 	<p class="text-neutral">
-		By: <a href="https://www.linkedin.com/in/kunal-v-16315b94" class="text-blue-700 underline"
+		By: <a href="https://www.linkedin.com/in/kunal-v-16315b94" class="text-blue-800 underline"
 			>Kunal Vaishnavi</a
 		>
 		and
-		<a href="https://www.linkedin.com/in/parinitaparinita/" class="text-blue-700 underline">Parinita Rahi</a>
+		<a href="https://www.linkedin.com/in/parinitaparinita/" class="text-blue-800 underline">Parinita Rahi</a>
 	</p>
 	<p class="text-neutral">
-		14TH NOVEMBER, 2023 <span class="italic text-stone-500">(Updated 22nd November)</span>
+		14TH NOVEMBER, 2023 <span class="italic text-stone-600">(Updated 22nd November)</span>
 	</p>
 	<div class="py-4">
 		<p class="mb-4">
@@ -71,13 +71,13 @@
 			quantization updates, and cross-platform usage scenarios.
 		</p>
 
-		<h2 class="text-blue-700 text-3xl mb-4">Background: Llama2 and Microsoft</h2>
+		<h2 class="text-blue-800 text-3xl mb-4">Background: Llama2 and Microsoft</h2>
 
 		<p class="mb-4">
 			Llama2 is a state-of-the-art open source LLM from Meta ranging in scale from 7B to 70B
 			parameters (7B, 13B, 70B). Microsoft and Meta <a
 				href="https://blogs.microsoft.com/blog/2023/07/18/microsoft-and-meta-expand-their-ai-partnership-with-llama-2-on-azure-and-windows/"
-				class="text-blue-700 underline">announced</a
+				class="text-blue-800 underline">announced</a
 			> their AI on Azure and Windows collaboration in July 2023. As part of the announcement, Llama2
 			was added to the Azure AI model catalog, which serves as a hub of foundation models that empower
 			developers and machine learning (ML) professionals to easily discover, evaluate, customize, and
@@ -90,7 +90,7 @@
 			your costs.
 		</p>
 
-		<h2 class="text-blue-700 text-3xl mb-4">
+		<h2 class="text-blue-800 text-3xl mb-4">
 			Faster Inferencing with New ONNX Runtime Optimizations
 		</h2>
 
@@ -116,7 +116,7 @@
 		</div>
 		<div class="mt-2 mb-4 text-center">Figure 1: E2E Throughput Comparisons</div>
 
-		<h2 class="text-blue-700 text-3xl mb-4">Latency and Throughput</h2>
+		<h2 class="text-blue-800 text-3xl mb-4">Latency and Throughput</h2>
 
 		<p class="mb-4">
 			The graphs below show latency comparisons between the ONNX Runtime and PyTorch variants of the
@@ -153,11 +153,11 @@
 		<p class="mb-4">
 			More details on these metrics can be found <a
 				href="https://github.com/microsoft/onnxruntime-inference-examples/blob/main/python/models/llama/README.md"
-				class="text-blue-700 underline">here</a
+				class="text-blue-800 underline">here</a
 			>.
 		</p>
 
-		<h2 class="text-blue-700 text-3xl mb-4">ONNX Runtime with Multi-GPU Inference</h2>
+		<h2 class="text-blue-800 text-3xl mb-4">ONNX Runtime with Multi-GPU Inference</h2>
 
 		<p class="mb-4">
 			ONNX Runtime supports multi-GPU inference to enable serving large models. Even in FP16
@@ -166,7 +166,7 @@
 		</p>
 
 		<p class="mb-4">
-			ONNX Runtime applied <a href="https://arxiv.org/pdf/1909.08053.pdf" class="text-blue-700 underline"
+			ONNX Runtime applied <a href="https://arxiv.org/pdf/1909.08053.pdf" class="text-blue-800 underline"
 				>Megatron-LM</a
 			>
 			Tensor Parallelism on the 70B model to split the original model weight onto different GPUs. Megatron
@@ -177,7 +177,7 @@
 			You can find additional example scripts
 			<a
 				href="https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/transformers/models/llama/"
-				class="text-blue-700 underline">here</a
+				class="text-blue-800 underline">here</a
 			>.
 		</p>
 
@@ -186,7 +186,7 @@
 			<figcaption class="mt-2 mb-4 text-center">Figure 4: 70B Llama2 Model Throughput</figcaption>
 		</figure>
 
-		<h2 class="text-blue-700 text-3xl mb-4">ONNX Runtime Optimizations</h2>
+		<h2 class="text-blue-800 text-3xl mb-4">ONNX Runtime Optimizations</h2>
 		<figure class="px-10 pt-4">
 			<img src={figure5} alt="LLaMA-2 Optimization Diagram" />
 			<figcaption class="mt-2 mb-4 text-center">Figure 5: LLaMA-2 Optimization Diagram</figcaption>
@@ -253,24 +253,24 @@
 			calculate the rotary embeddings more efficiently with less memory usage. The rotary embedding
 			compute kernels also support interleaved and non-interleaved formats to support both the <a
 				href="https://github.com/microsoft/Llama-2-Onnx"
-				class="text-blue-700 underline">Microsoft version of LLaMA-2</a
+				class="text-blue-800 underline">Microsoft version of LLaMA-2</a
 			>
 			and the Hugging Face version of LLaMA-2 respectively while sharing the same calculations.
 		</p>
 
 		<p class="mb-4">
 			The optimizations work for the <a
 				href="https://huggingface.co/meta-llama"
-				class="text-blue-700 underline">Hugging Face versions</a
+				class="text-blue-800 underline">Hugging Face versions</a
 			>
 			(models ending with <i>-hf</i>) and the Microsoft versions. You can download the optimized HF
 			versions from
-			<a href="https://github.com/microsoft/Llama-2-Onnx/tree/main-CUDA_CPU" class="text-blue-700 underline"
+			<a href="https://github.com/microsoft/Llama-2-Onnx/tree/main-CUDA_CPU" class="text-blue-800 underline"
 				>Microsoft's LLaMA-2 ONNX repository</a
 			>. Stay tuned for newer Microsoft versions coming soon!
 		</p>
 
-		<h2 class="text-blue-700 text-3xl mb-4">Optimize your own model using Olive</h2>
+		<h2 class="text-blue-800 text-3xl mb-4">Optimize your own model using Olive</h2>
 
 		<p class="mb-4">
 			Olive is a hardware-aware model optimization tool that incorporates advanced techniques such
@@ -282,25 +282,25 @@
 		<p class="mb-4">
 			Here is an example of <a
 				href="https://github.com/microsoft/Olive/tree/main/examples/llama2"
-				class="text-blue-700 underline">Llama2 optimization with Olive</a
+				class="text-blue-800 underline">Llama2 optimization with Olive</a
 			>, which harnesses ONNX Runtime optimizations highlighted in this blog. Distinct optimization
 			flows cater to various requirements. For instance, you have the flexibility to choose
 			different data types for quantization in CPU and GPU inference, based on your accuracy
 			tolerance. Additionally, you can fine-tune your own Llama2 model with Olive-QLoRa on client
 			GPUs and perform inference with ONNX Runtime optimizations.
 		</p>
 
-		<h2 class="text-blue-700 text-3xl mb-4">Usage Example</h2>
+		<h2 class="text-blue-800 text-3xl mb-4">Usage Example</h2>
 
 		<p class="mb-4">
 			Here is a <a
 				href="https://github.com/microsoft/onnxruntime-inference-examples/blob/main/python/models/llama/LLaMA-2%20E2E%20Notebook.ipynb"
-				class="text-blue-700 underline">sample notebook</a
+				class="text-blue-800 underline">sample notebook</a
 			> that shows you an end-to-end example of how you can use the above ONNX Runtime optimizations
 			in your application.
 		</p>
 
-		<h2 class="text-blue-700 text-3xl mb-4">Conclusion</h2>
+		<h2 class="text-blue-800 text-3xl mb-4">Conclusion</h2>
 
 		<p class="mb-4">
 			The advancements discussed in this blog provide faster Llama2 inferencing with ONNX Runtime,

diff --git a/src/routes/blogs/blog-post-featured.svelte b/src/routes/blogs/blog-post-featured.svelte
@@ -33,7 +33,7 @@
 				<h2 class="card-title">{title}</h2>
 				<p>{description}</p>
 				<img class="rounded" src={image} alt={imgalt} />
-				<div class="text-right text-blue-700">
+				<div class="text-right text-blue-800">
 					{date}
 				</div>
 			</div>

diff --git a/src/routes/blogs/blog-post.svelte b/src/routes/blogs/blog-post.svelte
@@ -30,7 +30,7 @@
 			<div class="card-body">
 				<h2 class="card-title">{title}</h2>
 				<p>{description}</p>
-				<p class="text-blue-700 text-right">
+				<p class="text-blue-800 text-right">
 					{date}
 				</p>
 			</div>

diff --git a/src/routes/blogs/post.svelte b/src/routes/blogs/post.svelte
@@ -82,15 +82,15 @@
 				<p class="inline">By:</p>
 			{/if}
 			{#each authors as author, i}
-				<a href={authorsLink[i]} class="text-blue-700">{author}</a>{i + 1 === authors.length
+				<a href={authorsLink[i]} class="text-blue-800">{author}</a>{i + 1 === authors.length
 					? ''
 					: ', '}
 			{/each}
 		</p>
 		<p class="text-neutral">
 			{date.toLocaleUpperCase()}
 			{#if updated != undefined}
-				<span class="italic text-stone-500">(Updated {updated})</span>
+				<span class="italic text-stone-600">(Updated {updated})</span>
 			{/if}
 		</p>
 		<div class="py-4 markdown-body">

diff --git a/src/routes/components/code-blocks.svelte b/src/routes/components/code-blocks.svelte
@@ -8,7 +8,6 @@
 	import FaLink from 'svelte-icons/fa/FaLink.svelte';
 	import { blur, fade } from 'svelte/transition';
 	import { d } from 'svelte-highlight/languages';
-	import github from "svelte-highlight/styles/github";
 
 	let pythonCode =
 		'import onnxruntime as ort\n# Load the model and create InferenceSession\nmodel_path = "path/to/your/onnx/model"\nsession = ort.InferenceSession(model_path)\n# "Load and preprocess the input image inputTensor"\n...\n# Run inference\noutputs = session.run(None, {"input": inputTensor})\nprint(outputs)';
@@ -51,9 +50,6 @@
 
 </script>
 <svelte:window bind:innerWidth/>
-<svelte:head>
-  {@html github}
-</svelte:head>
 <div class="container mx-auto px-4">
 	<h3 class="text-xl mb-4 text-center">
 		Use ONNX Runtime with your favorite language and get started with the tutorials:

diff --git a/src/routes/components/hero.svelte b/src/routes/components/hero.svelte
@@ -126,7 +126,7 @@
 					<a class="underline" href="http://">More interested in training? More info here.</a>
 				</p> -->
 				<p class="text-lg mt-2">
-					<a class="text-blue-700 font-medium" href="./getting-started"
+					<a class="text-blue-800 font-medium" href="./getting-started"
 						>Interested in using other languages? See the many others we support →</a
 					>
 				</p>

diff --git a/src/routes/components/performance.svelte b/src/routes/components/performance.svelte
@@ -11,7 +11,7 @@
 				CPU, GPU, NPU - no matter what hardware you run on, ONNX Runtime optimizes for latency,
 				throughput, memory utilization, and binary size. In addition to excellent out-of-the-box
 				performance for common usage patterns, additional
-				<a href="https://onnxruntime.ai/docs/performance/" class="text-blue-700 underline"
+				<a href="https://onnxruntime.ai/docs/performance/" class="text-blue-800 underline"
 					>model optimization techniques</a
 				> and runtime configurations are available to further improve performance for specific use cases
 				and models.

diff --git a/src/routes/components/training-and-inference.svelte b/src/routes/components/training-and-inference.svelte
@@ -62,10 +62,10 @@
 					<p class="text-lg">
 						Accelerate training of popular models, including <a
 							href="https://huggingface.co/"
-							class="text-blue-700">Hugging Face</a
+							class="text-blue-800 underline">Hugging Face</a
 						>
 						models like Llama-2-7b and curated models from the
-						<a href="https://ml.azure.com/" class="text-blue-700"
+						<a href="https://ml.azure.com/" class="text-blue-800 underline"
 							>Azure AI | Machine Learning Studio</a
 						> model catalog.
 					</p>

diff --git a/src/routes/components/winarm.svelte b/src/routes/components/winarm.svelte
@@ -35,33 +35,33 @@
 	<div class="divider" />
 	<div class="grid grid-cols-3 gap-4">
 		<div class="md:col-span-2 col-span-3">
-			<h2 class="text-xl text-blue-700">Get started on your Windows Dev Kit 2023 today</h2>
+			<h2 class="text-xl text-blue-800">Get started on your Windows Dev Kit 2023 today</h2>
 			Follow these steps to setup your device to use ONNX Runtime (ORT) with the built in NPU:
 			<ol class="list-decimal ml-10">
 				<li>
 					<a
-						class="text-blue-700"
+						class="text-blue-800"
 						href="https://qpm.qualcomm.com/main/tools/details/qualcomm_ai_engine_direct">Download</a
 					> the Qualcomm AI Engine Direct SDK (QNN SDK)
 				</li>
 				<li>
 					<a
-						class="text-blue-700"
+						class="text-blue-800"
 						href="https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.QNN">Download</a
 					> and install the ONNX Runtime with QNN package
 				</li>
 				<li>Start using the ONNX Runtime API in your application.</li>
 			</ol>
 			<br /><br />
-			<p class="text-xl text-blue-700">Optimizing models for the NPU</p>
-			<a class="text-blue-700" href="https://onnx.ai/">ONNX</a> is a standard format for
+			<p class="text-xl text-blue-800">Optimizing models for the NPU</p>
+			<a class="text-blue-800" href="https://onnx.ai/">ONNX</a> is a standard format for
 			representing ML models authored in frameworks like PyTorch, TensorFlow, and others. ONNX
 			Runtime can run any ONNX model, however to make use of the NPU, you currently need to quantize
 			the ONNX model to QDQ model.
 			<br />
 			See our
 			<a
-				class="text-blue-700"
+				class="text-blue-800"
 				href="https://github.com/microsoft/onnxruntime-inference-examples/tree/main/c_cxx/QNN_EP/mobilenetv2_classification"
 				>C# tutorial</a
 			>
@@ -70,13 +70,13 @@
 			Many models can be optimized for the NPU using this process. Even if a model cannot be optimized
 			for the NPU, it can still be run by ONNX Runtime on the CPU.
 			<br /><br />
-			<p class="text-xl text-blue-700">Getting Help</p>
+			<p class="text-xl text-blue-800">Getting Help</p>
 			For help with ONNX Runtime, you can<a
-				class="text-blue-700"
+				class="text-blue-800"
 				href="https://github.com/microsoft/onnxruntime/discussions">start a discussion</a
 			>
 			on GitHub or
-			<a class="text-blue-700" href="https://github.com/microsoft/onnxruntime/issues"
+			<a class="text-blue-800" href="https://github.com/microsoft/onnxruntime/issues"
 				>file an issue</a
 			>.
 		</div>

diff --git a/src/routes/inference/+page.svelte b/src/routes/inference/+page.svelte
@@ -156,7 +156,7 @@
 		</div>
 		<a
 			href="https://github.com/microsoft/onnxruntime-inference-examples/tree/main/mobile"
-			class="text-2xl text-blue-700">See more examples of ONNX Runtime Mobile on GitHub. →</a
+			class="text-2xl text-blue-800">See more examples of ONNX Runtime Mobile on GitHub. →</a
 		>
 	</div>
 </div>

diff --git a/src/routes/models/+page.svelte b/src/routes/models/+page.svelte
@@ -84,7 +84,7 @@
 	<h2 class="text-2xl">Can't find what you're looking for?</h2>
 	<p>
 		Convert to ONNX, optimize, and quantize your own models quickly and easily with <a
-			class="text-blue-700 underline"
+			class="text-blue-800 underline"
 			href="https://github.com/microsoft/Olive/tree/main">Olive</a
 		>. Here's a quick snippet showing you how easy it can be done:
 	</p>

diff --git a/src/routes/onnx/+page.svelte b/src/routes/onnx/+page.svelte
@@ -15,7 +15,7 @@
 					We hope your stay is short and that you quickly get what you need!
 				</p>
 				<p class="text-lg mb-4">
-					All below links are <a href="https://aka.ms" class="text-blue-700">aka.ms/</a> supported, so
+					All below links are <a href="https://aka.ms" class="text-blue-800">aka.ms/</a> supported, so
 					feel free to use those in the future.
 				</p>
 			</div>
-Original file line number
+Diff line change
@@ Expand Up / @@ -156,7 +156,7 @@ @@
     		</div>
     		<a
     			href="https://github.com/microsoft/onnxruntime-inference-examples/tree/main/mobile"
-    			class="text-2xl text-blue-700">See more examples of ONNX Runtime Mobile on GitHub. →</a
+    			class="text-2xl text-blue-800">See more examples of ONNX Runtime Mobile on GitHub. →</a
     		>
     	</div>
     </div>
@@ Expand Down @@