diff --git a/lib/grim/page.rb b/lib/grim/page.rb index 3b50ef2..c2a4e7f 100644 --- a/lib/grim/page.rb +++ b/lib/grim/page.rb @@ -40,17 +40,22 @@ def save(path, options={}) Grim.processor.save(@pdf, @index, path, options) end - # Extracts the text from the selected page. + # Extracts the text from the selected page, using additional options. # # For example: # # pdf[1].text # # => "This is text from slide 2.\n\nAnd even more text from slide 2." # + # pdf[1].text({flags: ["-table"]}) # Returns a String. # - def text - command = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number, Shellwords.escape(@pdf.path), "-"].join(' ') + def text(options={}) + flags = options.fetch(:flags, []) + command_parts = [@pdftotext_path, "-enc", "UTF-8", "-f", @number, "-l", @number] + command_parts += flags if flags.length > 0 + command_parts += [Shellwords.escape(@pdf.path), "-"] + command = command_parts.join(' ') Grim.logger.debug { "Running pdftotext command" } Grim.logger.debug { command } `#{command}` diff --git a/lib/grim/version.rb b/lib/grim/version.rb index 3120f4e..cd77236 100644 --- a/lib/grim/version.rb +++ b/lib/grim/version.rb @@ -1,4 +1,4 @@ # encoding: UTF-8 module Grim - VERSION = "1.2.0" unless defined?(::Grim::VERSION) + VERSION = "1.2.1" unless defined?(::Grim::VERSION) end diff --git a/spec/fixtures/table.pdf b/spec/fixtures/table.pdf new file mode 100644 index 0000000..62300f6 Binary files /dev/null and b/spec/fixtures/table.pdf differ diff --git a/spec/lib/grim/page_spec.rb b/spec/lib/grim/page_spec.rb index db2113f..2405d31 100644 --- a/spec/lib/grim/page_spec.rb +++ b/spec/lib/grim/page_spec.rb @@ -51,6 +51,14 @@ eq("Step 1: get someone to print this curve for you to scale, 72” wide\nStep 2: Get a couple 55 gallon drums\n\n\f") end + it "should extract tabular data with the -table option" do + pdf = Grim::Pdf.new(fixture_path("table.pdf")) + expect(pdf[0].text({flags: ["-table"]})).to \ + include( + " Male 979 (85) 968 (85)\n\n" + + " Female 169 (15) 169 (15)\n") + end + it "works with full path to pdftotext" do pdftotext_path = `which pdftotext`.chomp pdf = Grim::Pdf.new(fixture_path("smoker.pdf"), pdftotext_path: pdftotext_path)