Skip to content
This repository has been archived by the owner on Aug 2, 2019. It is now read-only.

Get detailed character information for a pdf page #141

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions PdfiumViewer.Demo/PdfRangeDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,11 @@ public Rectangle RectangleFromPdf(int page, RectangleF rect)
return _document.RectangleFromPdf(TranslatePage(page), rect);
}

public IList<PdfCharacterInformation> GetCharacterInformation(int page)
{
return _document.GetCharacterInformation(page);
}

private int TranslatePage(int page)
{
if (page < 0 || page >= PageCount)
Expand Down
7 changes: 7 additions & 0 deletions PdfiumViewer/IPdfDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -252,5 +252,12 @@ public interface IPdfDocument : IDisposable
/// <param name="rect">The rectangle to convert.</param>
/// <returns>The converted rectangle.</returns>
Rectangle RectangleFromPdf(int page, RectangleF rect);

/// <summary>
/// Get detailed information for all characters on the page.
/// </summary>
/// <param name="page">The page to get the information for.</param>
/// <returns>The character information.</returns>
IList<PdfCharacterInformation> GetCharacterInformation(int page);
}
}
11 changes: 11 additions & 0 deletions PdfiumViewer/NativeMethods.Pdfium.cs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,14 @@ public static IntPtr FPDFText_FindStart(IntPtr page, byte[] findWhat, FPDF_SEARC
}
}

public static double FPDFText_GetFontSize(IntPtr page, int index)
{
lock (LockString)
{
return Imports.FPDFText_GetFontSize(page, index);
}
}

public static int FPDFText_GetSchResultIndex(IntPtr handle)
{
lock (LockString)
Expand Down Expand Up @@ -684,6 +692,9 @@ private static class Imports
[DllImport("pdfium.dll")]
public static extern IntPtr FPDFText_FindStart(IntPtr page, byte[] findWhat, FPDF_SEARCH_FLAGS flags, int start_index);

[DllImport("pdfium.dll")]
public static extern double FPDFText_GetFontSize(IntPtr page, int index);

[DllImport("pdfium.dll")]
public static extern int FPDFText_GetSchResultIndex(IntPtr handle);

Expand Down
26 changes: 26 additions & 0 deletions PdfiumViewer/PdfCharacterInformation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Text;

namespace PdfiumViewer
{
public struct PdfCharacterInformation
{
public int Page { get; }
public int Offset { get; }
public double FontSize { get; }
public char Character { get; }
public RectangleF Bounds { get; }

public PdfCharacterInformation(int page, int offset, char character, double fontSize, RectangleF bounds)
{
Page = page;
Offset = offset;
FontSize = fontSize;
Bounds = bounds;
Character = character;
}

}
}
10 changes: 10 additions & 0 deletions PdfiumViewer/PdfDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -598,5 +598,15 @@ protected void Dispose(bool disposing)
_disposed = true;
}
}

/// <summary>
/// Get detailed information all characters on the page.
/// </summary>
/// <param name="page">The page to get the information for.</param>
/// <returns>The character information.</returns>
public IList<PdfCharacterInformation> GetCharacterInformation(int page)
{
return _file.GetCharacterInformation(page);
}
}
}
18 changes: 18 additions & 0 deletions PdfiumViewer/PdfFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,24 @@ private string GetPdfText(PageData pageData, PdfTextSpan textSpan)
return FPDFEncoding.GetString(result, 0, textSpan.Length * 2);
}

public IList<PdfCharacterInformation> GetCharacterInformation(int page)
{
using (var pageData = new PageData(_document, _form, page))
{
var result = new List<PdfCharacterInformation>();
int charCount = NativeMethods.FPDFText_CountChars(pageData.TextPage);
var allChars = GetPdfText(pageData, new PdfTextSpan(page, 0, charCount)).ToCharArray();

for (int i = 0; i < charCount; i++)
{
var bounds = GetBounds(pageData.TextPage, i);
double fontSize = NativeMethods.FPDFText_GetFontSize(pageData.TextPage, i);
result.Add(new PdfCharacterInformation(page, i, allChars[i], fontSize, bounds));
}

return result;
}
}
public void DeletePage (int pageNumber)
{
NativeMethods.FPDFPage_Delete(_document, pageNumber);
Expand Down
1 change: 1 addition & 0 deletions PdfiumViewer/PdfiumViewer.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
<Compile Include="PdfBookmarkCollection.cs" />
<Compile Include="PdfiumResolveEventHandler.cs" />
<Compile Include="PdfiumResolver.cs" />
<Compile Include="PdfCharacterInformation.cs" />
<Compile Include="PdfPrintMultiplePages.cs" />
<Compile Include="PdfPrintSettings.cs" />
<Compile Include="PdfRectangle.cs" />
Expand Down