HTML to PDF in Python

This page describes how to convert web pages and HTML to PDF in Python using the Pdfcrowd API. The API is easy to use and the integration takes only a few of lines of code.

Samples

Installation

Install the client library from PyPI
pip install pdfcrowd

We also offer other installation options.

Authentication

Authentication is needed in order to use the Pdfcrowd API. The credentials used for accessing the API are your Pdfcrowd username and the API key.

You can try out the API without registration using the following demo credentials:
  • Username: demo
  • API key: ce544b6ea52a5621fb9d55f8b542d14d

To get your personal API credentials, you can start a free API trial or buy the API license.

Getting Started

Enter a web page or upload an HTML file to get a PDF preview and integration code that you can copy and paste to your application.

Or you can choose from the examples below and adapt the code to your needs. You can also interactively explore the API in the API Playground.

Examples

import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'example.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # run the conversion and store the result into the "pdf" variable
    pdf = client.convertUrl('http://www.example.com')

    # at this point the "pdf" variable contains PDF raw data and
    # can be sent in an HTTP response, saved to a file, etc.
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # create an output stream for the conversion result
    output_stream = open('example.pdf', 'wb')

    # run the conversion and write the result into the output stream
    client.convertUrlToStream('http://www.example.com', output_stream)

    # close the output stream
    output_stream.close()
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # run the conversion and write the result to a file
    client.convertFileToFile('/path/to/MyLayout.html', 'MyLayout.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # run the conversion and store the result into the "pdf" variable
    pdf = client.convertFile('/path/to/MyLayout.html')

    # at this point the "pdf" variable contains PDF raw data and
    # can be sent in an HTTP response, saved to a file, etc.
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # create an output stream for the conversion result
    output_stream = open('MyLayout.pdf', 'wb')

    # run the conversion and write the result into the output stream
    client.convertFileToStream('/path/to/MyLayout.html', output_stream)

    # close the output stream
    output_stream.close()
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # run the conversion and write the result to a file
    client.convertStringToFile('<html><body><h1>Hello World!</h1></body></html>', 'HelloWorld.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # run the conversion and store the result into the "pdf" variable
    pdf = client.convertString('<html><body><h1>Hello World!</h1></body></html>')

    # at this point the "pdf" variable contains PDF raw data and
    # can be sent in an HTTP response, saved to a file, etc.
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # create an output stream for the conversion result
    output_stream = open('HelloWorld.pdf', 'wb')

    # run the conversion and write the result into the output stream
    client.convertStringToStream('<html><body><h1>Hello World!</h1></body></html>', output_stream)

    # close the output stream
    output_stream.close()
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setDebugLog(True)

    # run the conversion and write the result to a file
    client.convertFileToFile('/path/to/MyLayout.html', 'MyLayout.pdf')
    
    # print URL to the debug log
    print('Debug log url: {}'.format(client.getDebugLogUrl()))
    
    # print the number of available conversion credits in your account
    print('Remaining credit count: {}'.format(client.getRemainingCreditCount()))
    
    # print the number of credits consumed by the conversion
    print('Consumed credit count: {}'.format(client.getConsumedCreditCount()))
    
    # print the unique ID of the conversion
    print('Job id: {}'.format(client.getJobId()))
    
    # print the total number of pages in the output document
    print('Page count: {}'.format(client.getPageCount()))
    
    # print the size of the output in bytes
    print('Output size: {}'.format(client.getOutputSize()))
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise

Advanced Examples

import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setPageSize('Letter')
    client.setOrientation('landscape')
    client.setNoMargins(True)

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'letter_landscape.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setHeaderHeight('15mm')
    client.setFooterHeight('10mm')
    client.setHeaderHtml('<a class=\'pdfcrowd-source-url\' data-pdfcrowd-placement=\'href-and-content\'></a>')
    client.setFooterHtml('<center><span class=\'pdfcrowd-page-number\'></span></center>')
    client.setMarginTop('0')
    client.setMarginBottom('0')

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'header_footer.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setEnablePdfForms(True)

    # run the conversion and write the result to a file
    client.convertStringToFile('<html><body>Enter name:<input type=text></body></html>', 'form.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setScaleFactor(300)

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'zoom_300.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setAuthor('Pdfcrowd')
    client.setTitle('Hello World')
    client.setSubject('Demo')
    client.setKeywords('Pdfcrowd,demo')

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'with_metadata.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setPageLayout('single-page')
    client.setPageMode('full-screen')
    client.setInitialZoomType('fit-page')
    client.setOrientation('landscape')
    client.setNoMargins(True)

    # run the conversion and write the result to a file
    client.convertUrlToFile('https://pdfcrowd.com/api/', 'slide_show.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setElementToConvert('#main')

    # run the conversion and write the result to a file
    client.convertUrlToFile('https://pdfcrowd.com/api/', 'html_part.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setCustomJavascript('el=document.createElement(\'h2\'); el.textContent=\'Hello from Pdfcrowd API\'; el.style.color=\'red\'; el_before=document.getElementsByTagName(\'h1\')[0]; el_before.parentNode.insertBefore(el, el_before.nextSibling)')

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'html_inject.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setViewportWidth(992)
    client.setRenderingMode('viewport')
    client.setSmartScalingMode('viewport-fit')
    client.setNoMargins(True)

    # run the conversion and write the result to a file
    client.convertUrlToFile('https://getbootstrap.com/', 'bootstrap.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys
import io
import zipfile

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # create ZIP archive
    in_stream = io.BytesIO()
    with zipfile.ZipFile(in_stream, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        # add HTML content to the archive
        zip_file.writestr('index.html', '''<html>
            <head>
                <style>
                 @font-face
                 {
                     font-family: 'OpenSans';
                     src: url(fonts/OpenSans.ttf) format('truetype');
                 }
        
                 h1
                 {
                     font-family: OpenSans;
                 }
                </style>
            </head>
            <body>
                <h1>Hello World</h1>
                <img src='images/logo.png'>
            </body>
        </html>''')

        # add required local files to the archive
        with open("/your-path-to/fonts/OpenSans.ttf", "rb") as in_file:
            zip_file.writestr("fonts/OpenSans.ttf", in_file.read())
        with open("/your-path-to/images/logo.png", "rb") as in_file:
            zip_file.writestr("images/logo.png", in_file.read())
    in_stream.seek(0)

    # create an output stream for the conversion result
    output_stream = open('HelloFromZip.pdf', 'wb')

    # run the conversion and write the result into the output stream
    client.convertStreamToStream(in_stream, output_stream)

    # close the output stream
    output_stream.close()
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setCustomJavascript('libPdfcrowd.highlightHtmlElements({backgroundColor: \'rgba(255, 191, 0, 0.1)\', borderColor:null})')

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'highlight_background.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d')

    # configure the conversion
    client.setCustomJavascript('libPdfcrowd.highlightHtmlElements({borderColor: \'orange\', backgroundColor: null, padding: \'4px\', margin: \'4px\'})')

    # run the conversion and write the result to a file
    client.convertUrlToFile('http://www.example.com', 'highlight_borders.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise

Integration Examples

The API can be easily integrated into your environment. You can have our interactive API Playground autogenerate the integration code for you:

You can also check out a complete example in our pdfcrowd-examples Github repository:

Common Customizations

The API lets you convert a web page, a local HTML file, or a string containing HTML. The result of the conversion can be stored to a local file, to a stream object or to a variable. See the conversion input section for more details.

The best way to start with the API is to choose one of the examples and once you get it working, you can further customize the code. You can find the most common customizations in the table below.

Page size Change the page size with setPageSize or setPageDimensions . Pass -1 to setPageHeight to get a single page PDF containing the whole document.
Page orientation Change the page orientation to landscape with setOrientation.
Page margins Adjust the page margins with setPageMargins.
Headers and footers Add headers and footers with setHeaderHtml and setFooterHtml. Set the height with setFooterHeight and setHeaderHeight.
You can learn more in this tutorial.
Zoom Scale the HTML contents with setScaleFactor.
Hide or remove elements You can use the following classes in your HTML code to hide or remove elements from the output:
  • pdfcrowd-remove - sets display:none!important on the element
  • pdfcrowd-hide - sets visibility:hidden!important on the element
Learn about other options.
Use @media print You can switch to the print version of the page (if it exists) with setUsePrintMedia.
Force page break You can force a page break with
<div style="page-break-before:always"></div>
Avoid page break You can avoid a page break inside an element with the following CSS
th, td, img { page-break-inside:avoid }
Run custom JavaScript You can use setOnLoadJavascript or setCustomJavascript to alter the HTML contents with a custom JavaScript. In addition to the standard browser APIs, the custom JavaScript code can use helper functions from our JavaScript library .
Custom CSS styling You can alter CSS styling used during conversion with a custom JavaScript or using the pdfcrowd-body CSS class, which is automatically set on the HTML <body> element. You can, for example, set the H1 height to 48px by adding the following line to your CSS:
.pdfcrowd-body h1 { font-size: 48px; }
Add PDF signature You can create PDF containing a digital signature field. Such PDF can be digitally signed in, for example, Adobe Acrobat or Preview. Learn more in Create Digital Signature in PDF .
Fillable PDF form You can create fillable PDF containing interactive fields and buttons. Learn more in Create Fillable PDF Form.

Template Rendering

The API enables rendering of HTML templates. The template syntax is based on the Jinja template rendering engine.

The most common constructs are:

  • Data rendering: Invoice: {{ invoice.number }}
  • For loop: {% for invoice in invoices %} ... {% endfor %}
  • If statement: {% if invoice.total > 100 %} ... {% endif %}
  • Data filter: {{ invoice.to.first_name|capitalize }}

The supported input data formats are JSON, XML, CSV and YAML. The data can be uploaded from a file or from a string variable.

Supported template filters: capitalize, center, default, escape, first, forceescape, format, indent, join, last, length, list, lower, replace, reverse, safe, slice, sort, string, striptags, title, trim, truncate, unique, upper, wordcount, wordwrap.

Template Rendering Examples

import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('{{ user_name }}', '{{ api_key }}')

    # configure the conversion
    client.setDataString("""{
            "name": "World",
            "product": "Pdfcrowd API"
        }""")

    # run the conversion and write the result to a file
    client.convertStringToFile('Hello {{ name }} from {{ product }}', 'output.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('{{ user_name }}', '{{ api_key }}')

    # configure the conversion
    client.setDataString("""<?xml version="1.0" encoding="UTF-8"?>
        <data>
          <name>World</name>
          <product>Pdfcrowd API</product>
        </data>""")

    # run the conversion and write the result to a file
    client.convertStringToFile('Hello {{ data.name }} from {{ data.product }}', 'output.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('{{ user_name }}', '{{ api_key }}')

    # configure the conversion
    client.setDataString("""name: World
product: Pdfcrowd API""")

    # run the conversion and write the result to a file
    client.convertStringToFile('Hello {{ name }} from {{ product }}', 'output.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise
import pdfcrowd
import sys

try:
    # create the API client instance
    client = pdfcrowd.HtmlToPdfClient('{{ user_name }}', '{{ api_key }}')

    # configure the conversion
    client.setDataString("""name,product
World,Pdfcrowd API""")

    # run the conversion and write the result to a file
    client.convertStringToFile('Hello {{ name }} from {{ product }}', 'output.pdf')
except pdfcrowd.Error as why:
    # report the error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # rethrow or handle the exception
    raise

Error Handling

try: 
    # call the API 
except pdfcrowd.Error as why: 
    # print error
    sys.stderr.write('Pdfcrowd Error: {}\n'.format(why))

    # print just error code
    sys.stderr.write('Pdfcrowd Error Code: {}\n'.format(why.getCode()))

    # print just error message
    sys.stderr.write('Pdfcrowd Error Message: {}\n'.format(why.getMessage()))

    # or handle the error in your way

Troubleshooting

API Method Reference

Refer for details to the API Method Reference.