PDF to Text / Ruby Examples

This page contains various examples of using the PDF to Text API in Ruby. The examples are complete and fully functional. Read more about how to convert PDF to Text in Ruby.

Basic examples
Rails examples

Basic examples

PDF file to text file

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Run the conversion and save the result to a file.
    client.convertFileToFile("/path/to/invoice.pdf", "invoice.txt")

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

PDF file to in-memory text

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Run the conversion and store the result in the `txt` variable.
    txt = client.convertFile("/path/to/invoice.pdf")

    # at this point the "txt" variable contains TXT raw data and
    # can be sent in an HTTP response, saved to a file, etc.

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

PDF file to text stream

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Create an output stream for the conversion result
    output_stream = open("invoice.txt", "wb")

    # run the conversion and write the result to the output stream.
    client.convertFileToStream("/path/to/invoice.pdf", output_stream)

    # Close the output stream.
    output_stream.close()

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

PDF url to text file

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Run the conversion and save the result to a file.
    client.convertUrlToFile("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf", "invoice.txt")

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

PDF url to in-memory text

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Run the conversion and store the result in the `txt` variable.
    txt = client.convertUrl("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf")

    # at this point the "txt" variable contains TXT raw data and
    # can be sent in an HTTP response, saved to a file, etc.

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

PDF url to text stream

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Create an output stream for the conversion result
    output_stream = open("invoice.txt", "wb")

    # run the conversion and write the result to the output stream.
    client.convertUrlToStream("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf", output_stream)

    # Close the output stream.
    output_stream.close()

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

In-memory PDF to text file

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Run the conversion and save the result to a file.
    client.convertRawDataToFile(open('/path/to/hello_world.pdf', 'rb').read(), "invoice.txt")

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

In-memory PDF to in-memory text

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Run the conversion and store the result in the `txt` variable.
    txt = client.convertRawData(open('/path/to/hello_world.pdf', 'rb').read())

    # at this point the "txt" variable contains TXT raw data and
    # can be sent in an HTTP response, saved to a file, etc.

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

In-memory PDF to text stream

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Create an output stream for the conversion result
    output_stream = open("invoice.txt", "wb")

    # run the conversion and write the result to the output stream.
    client.convertRawDataToStream(open('/path/to/hello_world.pdf', 'rb').read(), output_stream)

    # Close the output stream.
    output_stream.close()

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

Get info about the current conversion

require "pdfcrowd"

begin
    # Create an API client instance.
    client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # Configure the conversion.
    client.setDebugLog(true)
    client.setPageBreakMode("default")

    # Run the conversion and save the result to a file.
    client.convertFileToFile("/path/to/invoice.pdf", "invoice.txt")
    
    # print URL of the debug log
    puts "Debug log url: #{client.getDebugLogUrl()}"
    
    # print the number of conversion credits remaining in your account
    puts "Remaining credit count: #{client.getRemainingCreditCount()}"
    
    # print the number of credits used for the conversion
    puts "Consumed credit count: #{client.getConsumedCreditCount()}"
    
    # print the unique identifier for the conversion
    puts "Job id: #{client.getJobId()}"
    
    # print total number of pages in the output document
    puts "Page count: #{client.getPageCount()}"
    
    # print size of the output data in bytes
    puts "Output size: #{client.getOutputSize()}"

rescue Pdfcrowd::Error => why
    STDERR.puts "Pdfcrowd Error: #{why}"
    raise
end

Rails examples

PDF file to text in Rails

require "pdfcrowd"

class DemoController < ApplicationController
    def convert
        # The recommended method is POST.
        # may be restricted by ":via => :post" in your routes.rb
        if ! request.post?
            return render text: "POST is allowed only", status: 400
        end

        begin
            # Create an API client instance.
            client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

            # Run the conversion and store the result in the `txt` variable.
            txt = client.convertFile("/path/to/invoice.pdf")

            # Send the result and set HTTP response headers.
            send_data txt,
                      :type => "text/plain",
                      :disposition => "attachment; filename*=UTF-8''#{ERB::Util.url_encode('invoice.txt')}"
        rescue Pdfcrowd::Error => why
            # Send the error in the HTTP response.
            render plain: why.getMessage(), status: why.getCode()
        end
    end
end

PDF url to text in Rails

require "pdfcrowd"

class DemoController < ApplicationController
    def convert
        # The recommended method is POST.
        # may be restricted by ":via => :post" in your routes.rb
        if ! request.post?
            return render text: "POST is allowed only", status: 400
        end

        begin
            # Create an API client instance.
            client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

            # Run the conversion and store the result in the `txt` variable.
            txt = client.convertUrl("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf")

            # Send the result and set HTTP response headers.
            send_data txt,
                      :type => "text/plain",
                      :disposition => "attachment; filename*=UTF-8''#{ERB::Util.url_encode('invoice.txt')}"
        rescue Pdfcrowd::Error => why
            # Send the error in the HTTP response.
            render plain: why.getMessage(), status: why.getCode()
        end
    end
end

In-memory PDF to text in Rails

require "pdfcrowd"

class DemoController < ApplicationController
    def convert
        # The recommended method is POST.
        # may be restricted by ":via => :post" in your routes.rb
        if ! request.post?
            return render text: "POST is allowed only", status: 400
        end

        begin
            # Create an API client instance.
            client = Pdfcrowd::PdfToTextClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

            # Run the conversion and store the result in the `txt` variable.
            txt = client.convertRawData(open('/path/to/hello_world.pdf', 'rb').read())

            # Send the result and set HTTP response headers.
            send_data txt,
                      :type => "text/plain",
                      :disposition => "attachment; filename*=UTF-8''#{ERB::Util.url_encode('invoice.txt')}"
        rescue Pdfcrowd::Error => why
            # Send the error in the HTTP response.
            render plain: why.getMessage(), status: why.getCode()
        end
    end
end