PDF to HTML in Ruby

Overview

This page describes how to convert PDF to HTML in Ruby using the Pdfcrowd API. The API is easy to use and the integration takes only a couple of lines of code.

Installation

Install the client library from rubygems.org
gem install pdfcrowd

We also offer other installation options.

Authentication

Authentication is needed in order to use the Pdfcrowd API. The credentials used for accessing the API are your Pdfcrowd username and the API key.

You can try out the API without registration using the following demo credentials:
  • Username: demo
  • API key: ce544b6ea52a5621fb9d55f8b542d14d

Or you can start a free trial or purchase the API license to get your personal API credentials.

Examples

require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # run the conversion and write the result to a file
    client.convertFileToFile("/path/to/logo.pdf", "logo.html")
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # run the conversion and store the result into the "html" variable
    html = client.convertFile("/path/to/logo.pdf")

    # at this point the "html" variable contains HTML raw data and
    # can be sent in an HTTP response, saved to a file, etc.
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # create an output stream for the conversion result
    output_stream = open("logo.html", "wb")

    # run the conversion and write the result into the output stream
    client.convertFileToStream("/path/to/logo.pdf", output_stream)

    # close the output stream
    output_stream.close()
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # run the conversion and write the result to a file
    client.convertUrlToFile("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf", "invoice.html")
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # run the conversion and store the result into the "html" variable
    html = client.convertUrl("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf")

    # at this point the "html" variable contains HTML raw data and
    # can be sent in an HTTP response, saved to a file, etc.
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # create an output stream for the conversion result
    output_stream = open("invoice.html", "wb")

    # run the conversion and write the result into the output stream
    client.convertUrlToStream("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf", output_stream)

    # close the output stream
    output_stream.close()
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # run the conversion and write the result to a file
    client.convertRawDataToFile(open('/path/to/hello_world.pdf', 'rb').read(), "logo.html")
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # run the conversion and store the result into the "html" variable
    html = client.convertRawData(open('/path/to/hello_world.pdf', 'rb').read())

    # at this point the "html" variable contains HTML raw data and
    # can be sent in an HTTP response, saved to a file, etc.
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # create an output stream for the conversion result
    output_stream = open("logo.html", "wb")

    # run the conversion and write the result into the output stream
    client.convertRawDataToStream(open('/path/to/hello_world.pdf', 'rb').read(), output_stream)

    # close the output stream
    output_stream.close()
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end
require "pdfcrowd"

begin
    # create the API client instance
    client = Pdfcrowd::PdfToHtmlClient.new("demo", "ce544b6ea52a5621fb9d55f8b542d14d")

    # configure the conversion
    client.setDebugLog(true)

    # run the conversion and write the result to a file
    client.convertFileToFile("/path/to/logo.pdf", "logo.html")
    
    # print URL to the debug log
    puts "Debug log url: #{client.getDebugLogUrl()}"
    
    # print the number of available conversion credits in your account
    puts "Remaining credit count: #{client.getRemainingCreditCount()}"
    
    # print the number of credits consumed by the conversion
    puts "Consumed credit count: #{client.getConsumedCreditCount()}"
    
    # print the unique ID of the conversion
    puts "Job id: #{client.getJobId()}"
    
    # print the total number of pages in the output document
    puts "Page count: #{client.getPageCount()}"
    
    # print the size of the output in bytes
    puts "Output size: #{client.getOutputSize()}"
rescue Pdfcrowd::Error => why
    # report the error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # rethrow or handle the exception
    raise
end

Error Handling

begin 
    # call the API 
rescue Pdfcrowd::Error => why 
    # print error
    STDERR.puts "Pdfcrowd Error: #{why}"

    # print just error code
    STDERR.puts "Pdfcrowd Error Code: #{why.getCode()}"

    # print just error message
    STDERR.puts "Pdfcrowd Error Message: #{why.getMessage()}"

    # or handle the error in your way
end

Troubleshooting

API Method Reference

Refer for details to the API Method Reference.