Skip to content

Commit

Permalink
Merge pull request #31 from contentinnovation/add-tables
Browse files Browse the repository at this point in the history
Adds ability to specify pdftotext options
  • Loading branch information
jonmagic authored May 8, 2017
2 parents 50e973d + e6bf3a8 commit a63a444
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 4 deletions.
11 changes: 8 additions & 3 deletions lib/grim/page.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,22 @@ def save(path, options={})
Grim.processor.save(@pdf, @index, path, options)
end

# Extracts the text from the selected page.
# Extracts the text from the selected page, using additional options.
#
# For example:
#
# pdf[1].text
# # => "This is text from slide 2.\n\nAnd even more text from slide 2."
#
# pdf[1].text({flags: ["-table"]})
# Returns a String.
#
def text
command = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ')
def text(options={})
flags = options.fetch(:flags, [])
command_parts = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number]
command_parts += flags if flags.length > 0
command_parts += [Shellwords.escape(@pdf.path), "-"]
command = command_parts.join(' ')
Grim.logger.debug { "Running pdftotext command" }
Grim.logger.debug { command }
`#{command}`
Expand Down
2 changes: 1 addition & 1 deletion lib/grim/version.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# encoding: UTF-8
module Grim
VERSION = "1.2.0" unless defined?(::Grim::VERSION)
VERSION = "1.2.1" unless defined?(::Grim::VERSION)
end
Binary file added spec/fixtures/table.pdf
Binary file not shown.
8 changes: 8 additions & 0 deletions spec/lib/grim/page_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@
eq("Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f")
end

it "should extract tabular data with the -table option" do
pdf = Grim::Pdf.new(fixture_path("table.pdf"))
expect(pdf[0].text({flags: ["-table"]})).to \
include(
" Male 979 (85) 968 (85)\n\n" +
" Female 169 (15) 169 (15)\n")
end

it "works with full path to pdftotext" do
pdftotext_path = `which pdftotext`.chomp
pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)
Expand Down

0 comments on commit a63a444

Please sign in to comment.