Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error: Spaces make a new line #1171

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 0 additions & 165 deletions lib/jars/JPedal-LICENSE.txt

This file was deleted.

Binary file removed lib/jars/jpedal_lgpl.jar
Binary file not shown.
Binary file not shown.
7 changes: 6 additions & 1 deletion lib/tabula_java_wrapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,13 @@ module Extraction

def Extraction.openPDF(pdf_filename, password='')
raise Errno::ENOENT unless File.exists?(pdf_filename)
document = PDDocument.load(pdf_filename)
document = PDDocument.load(java.io.File.new(pdf_filename))
#document = PDDocument.loadNonSeq(java.io.File.new(pdf_filename), nil, password)
document
end

class ObjectExtractor < Java::TechnologyTabula.ObjectExtractor
field_accessor :pdfDocument

alias_method :close!, :close

Expand All @@ -85,6 +86,10 @@ def initialize(pdf_filename, pages=[1], password='', options={})

super(document)
end

def page_count
self.pdfDocument.get_number_of_pages
end
end

class PagesInfoExtractor < ObjectExtractor
Expand Down
2 changes: 1 addition & 1 deletion lib/tabula_job_executor/jobs/generate_thumbnails.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def perform
output_dir = options[:output_dir]
thumbnail_sizes = options[:thumbnail_sizes]

generator = JPedalThumbnailGenerator.new(filepath, output_dir, thumbnail_sizes)
generator = PDFBox2ThumbnailGenerator.new(filepath, output_dir, thumbnail_sizes)
generator.add_observer(self, :at)
generator.generate_thumbnails!

Expand Down
71 changes: 34 additions & 37 deletions lib/thumbnail_generator.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
require 'java'
require 'observer'

java.lang.System.setProperty('org.jpedal.jai', 'true')
require_relative './jars/jpedal_lgpl.jar'

java_import javax.imageio.ImageIO
java_import java.awt.image.BufferedImage
java_import java.awt.Image

java_import org.jpedal.PdfDecoder
java_import org.jpedal.fonts.FontMappings
java_import org.apache.pdfbox.rendering.PDFRenderer
java_import org.apache.pdfbox.pdmodel.PDDocument
java_import java.io.ByteArrayOutputStream


class AbstractThumbnailGenerator
include Observable
SIZE = 800

def initialize(pdf_filename, output_directory, sizes=[2048, 560])
raise Errno::ENOENT unless File.directory?(output_directory)
Expand Down Expand Up @@ -48,41 +48,37 @@ def generate_thumbnails!
end
end

class JPedalThumbnailGenerator < AbstractThumbnailGenerator
class PDFBox2ThumbnailGenerator < AbstractThumbnailGenerator
def initialize(pdf_filename, output_directory, sizes=[2048, 560])
super(pdf_filename, output_directory, sizes)
@decoder = PdfDecoder.new(true)
FontMappings.setFontReplacements
@decoder.openPdfFile(pdf_filename)
@decoder.setExtractionMode(0, 1.0)
@decoder.useHiResScreenDisplay(true)
@pdf_document = PDDocument.load(java.io.File.new(pdf_filename))
end

def generate_thumbnails!
total_pages = @decoder.getPageCount

total_pages.times do |i|

begin
image = @decoder.getPageAsImage(i+1);
image_w, image_h = image.getWidth, image.getHeight

@sizes.each do |s|
scale = s.to_f / image_w.to_f
bi = BufferedImage.new(s, image_h * scale, image.getType)
bi.getGraphics.drawImage(image.getScaledInstance(s, image_h * scale, Image::SCALE_SMOOTH), 0, 0, nil)
ImageIO.write(bi,
'png',
java.io.File.new(File.join(@output_directory,
"document_#{s}_#{i+1}.png")))
changed
notify_observers(i+1, total_pages, "generating page thumbnails...")
end
rescue java.lang.RuntimeException
# TODO What?
end
renderer = PDFRenderer.new(@pdf_document);
total_pages = @pdf_document.get_number_of_pages

total_pages.times do |pi|
image = renderer.render_image_with_dpi(pi, 75);
imageWidth = image.width # was get_width
imageHeight = image.height # was get_height
scale = SIZE / imageWidth.to_f

bi = BufferedImage.new(SIZE, (imageHeight * scale).round, image.type);
bi.get_graphics.draw_image(image.get_scaled_instance(SIZE, (imageHeight * scale).round, Image::SCALE_SMOOTH), 0, 0, nil);

out = ByteArrayOutputStream.new
ImageIO.write(bi, "png", out);

filename = "document_#{SIZE}_#{pi + 1}.png"
ImageIO.write(bi,
'png',
java.io.File.new(File.join(@output_directory,
filename)))
notify_observers(pi+1, total_pages, "generating page thumbnails...")
end
@decoder.closePdfFile

@pdf_document.close();

end
end

Expand All @@ -93,9 +89,10 @@ def update(page, total_pages)
STDERR.puts "#{page}///#{total_pages}"
end
end
require_relative '../lib/jars/tabula-1.0.0-SNAPSHOT-jar-with-dependencies.jar'

#pdftg = JPedalThumbnailGenerator.new(ARGV[0], '/tmp', [560])
pdftg = MUDrawThumbnailGenerator.new(ARGV[0], '/tmp', [560])
# pdftg = MUDrawThumbnailGenerator.new(ARGV[0], '/tmp', [560])
pdftg = PDFBox2ThumbnailGenerator.new(ARGV[0], '/tmp', [560])
pdftg.add_observer(STDERRProgressReporter.new)
pdftg.generate_thumbnails!
end
2 changes: 1 addition & 1 deletion webapp/tabula_web.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
require 'fileutils'
require 'securerandom'

require_relative '../lib/jars/tabula-0.9.2-jar-with-dependencies.jar'
require_relative '../lib/jars/tabula-1.0.0-SNAPSHOT-jar-with-dependencies.jar'

require_relative '../lib/tabula_java_wrapper.rb'
java_import 'java.io.ByteArrayOutputStream'
Expand Down