Python HTML to PDF Code Examples

These Python code examples show URL, HTML file, and raw HTML string conversion to PDF with the official PDFCrowd client library.

For installation, authentication, and the basic conversion flow, start with the HTML to PDF in Python documentation.

Basic examples

Webpage to PDF file
Webpage to in-memory PDF
Webpage to PDF stream
HTML file to PDF file
HTML file to in-memory PDF
HTML file to PDF stream
HTML string to PDF file
HTML string to in-memory PDF
HTML string to PDF stream
Get info about the current conversion

Advanced examples

Customize the page size and the orientation
Put the source URL in the header and the page number in the footer
Create fillable PDF form
Zoom the HTML document
Set PDF metadata
Create a Powerpoint like presentation from an HTML document
Convert an HTML document section
Inject an HTML code
Convert a responsive web page as it appears on a large device
Create an in-memory archive (ZIP) and convert it
Renderer debugging - highlight HTML elements
Renderer debugging - borders with spacing around HTML elements

Template rendering examples

Create PDF from JSON data
Create PDF from XML data
Create PDF from YAML data
Create PDF from CSV data

Django examples

Webpage to PDF in Django
HTML file to PDF in Django
HTML string to PDF in Django

Flask examples

Webpage to PDF in Flask
HTML file to PDF in Flask
HTML string to PDF in Flask

Basic examples

Webpage to PDF file

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Specify the mapping of HTML content width to the PDF page width.
    # To fine-tune the layout, you can specify an exact viewport width, such as '960px'.
    client.setContentViewportWidth('balanced')

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'example.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Webpage to in-memory PDF

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Run the conversion and store the result in the `pdf` variable.
    pdf = client.convertUrl('http://www.example.com')

    # at this point the "pdf" variable contains PDF raw data and
    # can be sent in an HTTP response, saved to a file, etc.

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Webpage to PDF stream

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Create an output stream for the conversion result
    output_stream = open('example.pdf', 'wb')

    # run the conversion and write the result to the output stream.
    client.convertUrlToStream('http://www.example.com', output_stream)

    # Close the output stream.
    output_stream.close()

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

HTML file to PDF file

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Specify the mapping of HTML content width to the PDF page width.
    # To fine-tune the layout, you can specify an exact viewport width, such as '960px'.
    client.setContentViewportWidth('balanced')

    # Run the conversion and save the result to a file.
    client.convertFileToFile('/path/to/MyLayout.html', 'MyLayout.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

HTML file to in-memory PDF

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Run the conversion and store the result in the `pdf` variable.
    pdf = client.convertFile('/path/to/MyLayout.html')

    # at this point the "pdf" variable contains PDF raw data and
    # can be sent in an HTTP response, saved to a file, etc.

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

HTML file to PDF stream

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Create an output stream for the conversion result
    output_stream = open('MyLayout.pdf', 'wb')

    # run the conversion and write the result to the output stream.
    client.convertFileToStream('/path/to/MyLayout.html', output_stream)

    # Close the output stream.
    output_stream.close()

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

HTML string to PDF file

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Specify the mapping of HTML content width to the PDF page width.
    # To fine-tune the layout, you can specify an exact viewport width, such as '960px'.
    client.setContentViewportWidth('balanced')

    # Run the conversion and save the result to a file.
    client.convertStringToFile('<html><body><h1>Hello World!</h1></body></html>', 'HelloWorld.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

HTML string to in-memory PDF

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Run the conversion and store the result in the `pdf` variable.
    pdf = client.convertString('<html><body><h1>Hello World!</h1></body></html>')

    # at this point the "pdf" variable contains PDF raw data and
    # can be sent in an HTTP response, saved to a file, etc.

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

HTML string to PDF stream

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Create an output stream for the conversion result
    output_stream = open('HelloWorld.pdf', 'wb')

    # run the conversion and write the result to the output stream.
    client.convertStringToStream('<html><body><h1>Hello World!</h1></body></html>', output_stream)

    # Close the output stream.
    output_stream.close()

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Get info about the current conversion

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Specify the mapping of HTML content width to the PDF page width.
    # To fine-tune the layout, you can specify an exact viewport width, such as '960px'.
    client.setDebugLog(True)

    # Run the conversion and save the result to a file.
    client.convertFileToFile('/path/to/MyLayout.html', 'MyLayout.pdf')
    
    # print URL pointing to the debug log for this request.
    print('Debug log url: {}'.format(client.getDebugLogUrl()))
    
    # print number of conversion credits remaining in your account.
    print('Remaining credit count: {}'.format(client.getRemainingCreditCount()))
    
    # print number of credits consumed for this conversion.
    print('Consumed credit count: {}'.format(client.getConsumedCreditCount()))
    
    # print unique identifier assigned to this conversion job.
    print('Job id: {}'.format(client.getJobId()))
    
    # print total number of pages in the output document.
    print('Page count: {}'.format(client.getPageCount()))
    
    # print size of the output data in bytes.
    print('Output size: {}'.format(client.getOutputSize()))

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Advanced examples

Customize the page size and the orientation

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setPageSize('Letter')
    client.setOrientation('landscape')
    client.setNoMargins(True)

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'letter_landscape.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Put the source URL in the header and the page number in the footer

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setHeaderHeight('15mm')
    client.setFooterHeight('10mm')
    client.setHeaderHtml('<a class=\'pdfcrowd-source-url\' data-pdfcrowd-placement=\'href-and-content\'></a>')
    client.setFooterHtml('<center><span class=\'pdfcrowd-page-number\'></span></center>')
    client.setMarginTop('0')
    client.setMarginBottom('0')

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'header_footer.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Create fillable PDF form

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setEnablePdfForms(True)

    # Run the conversion and save the result to a file.
    client.convertStringToFile('<html><body>Enter name:<input type=text></body></html>', 'form.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Zoom the HTML document

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setScaleFactor(300)

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'zoom_300.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Set PDF metadata

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setAuthor('Pdfcrowd')
    client.setTitle('Hello World')
    client.setSubject('Demo')
    client.setKeywords('Pdfcrowd,demo')

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'with_metadata.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Create a Powerpoint like presentation from an HTML document

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setPageLayout('single-page')
    client.setPageMode('full-screen')
    client.setInitialZoomType('fit-page')
    client.setOrientation('landscape')
    client.setNoMargins(True)

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('https://pdfcrowd.com/api/', 'slide_show.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Convert an HTML document section

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setElementToConvert('#main')

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('https://pdfcrowd.com/api/', 'html_part.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Inject an HTML code

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setCustomJavascript('el=document.createElement(\'h2\'); el.textContent=\'Hello from Pdfcrowd API\'; el.style.color=\'red\'; el_before=document.getElementsByTagName(\'h1\')[0]; el_before.parentNode.insertBefore(el, el_before.nextSibling)')

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'html_inject.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Convert a responsive web page as it appears on a large device

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setContentViewportWidth('large')
    client.setNoMargins(True)

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('https://getbootstrap.com/', 'bootstrap.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Create an in-memory archive (ZIP) and convert it

import pdfcrowd
import sys
import io
import zipfile

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Create a ZIP archive.
    in_stream = io.BytesIO()
    with zipfile.ZipFile(in_stream, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        # Add HTML content to the archive.
        zip_file.writestr('index.html', '''<html>
            <head>
                <style>
                 @font-face
                 {
                     font-family: 'OpenSans';
                     src: url(fonts/OpenSans.ttf) format('truetype');
                 }
        
                 h1
                 {
                     font-family: OpenSans;
                 }
                </style>
            </head>
            <body>
                <h1>Hello World</h1>
                <img src='images/logo.png'>
            </body>
        </html>''')

        # Add required local files to the archive.
        with open("/your-path-to/fonts/OpenSans.ttf", "rb") as in_file:
            zip_file.writestr("fonts/OpenSans.ttf", in_file.read())
        with open("/your-path-to/images/logo.png", "rb") as in_file:
            zip_file.writestr("images/logo.png", in_file.read())
    in_stream.seek(0)

    # Create an output stream for the conversion result
    output_stream = open('HelloFromZip.pdf', 'wb')

    # run the conversion and write the result to the output stream.
    client.convertStreamToStream(in_stream, output_stream)

    # Close the output stream.
    output_stream.close()

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Renderer debugging - highlight HTML elements

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setCustomJavascript('libPdfcrowd.highlightHtmlElements({backgroundColor: \'rgba(255, 191, 0, 0.1)\', borderColor:null})')

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'highlight_background.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Renderer debugging - borders with spacing around HTML elements

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setCustomJavascript('libPdfcrowd.highlightHtmlElements({borderColor: \'orange\', backgroundColor: null, padding: \'4px\', margin: \'4px\'})')

    # Run the conversion and save the result to a file.
    client.convertUrlToFile('http://www.example.com', 'highlight_borders.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Template rendering examples

Create PDF from JSON data

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setDataString("""{
            "name": "World",
            "product": "Pdfcrowd API"
        }""")

    # Run the conversion and save the result to a file.
    client.convertStringToFile('Hello {{ name }} from {{ product }}', 'output.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Create PDF from XML data

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setDataString("""<?xml version="1.0" encoding="UTF-8"?>
        <data>
          <name>World</name>
          <product>Pdfcrowd API</product>
        </data>""")

    # Run the conversion and save the result to a file.
    client.convertStringToFile('Hello {{ data.name }} from {{ data.product }}', 'output.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Create PDF from YAML data

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setDataString("""name: World
product: Pdfcrowd API""")

    # Run the conversion and save the result to a file.
    client.convertStringToFile('Hello {{ name }} from {{ product }}', 'output.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Create PDF from CSV data

import pdfcrowd
import sys

try:
    # Create an API client instance.
    client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

    # Configure the conversion.
    client.setDataString("""name,product
World,Pdfcrowd API""")

    # Run the conversion and save the result to a file.
    client.convertStringToFile('Hello {{ name }} from {{ product }}', 'output.pdf')

except pdfcrowd.Error as why:
    sys.stderr.write('PDFCrowd Error: {}\n'.format(why))
    raise

Django examples

Webpage to PDF in Django

import urllib.parse
from django.http import HttpResponse
from django.views.decorators.http import require_POST
import pdfcrowd

# The recommended method is POST.
@require_POST
def convert(request):
    try:
        # Create an API client instance.
        client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

        # Set HTTP response headers.
        response = HttpResponse(content_type='application/pdf')
        response['Cache-Control'] = 'max-age=0'
        response['Accept-Ranges'] = 'none'
        response['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('example.pdf', safe='')

        # run the conversion and write the result to the output stream.
        client.convertUrlToStream('http://www.example.com', response)
        return response
    except pdfcrowd.Error as why:
        # Send the error in the HTTP response.
        return HttpResponse(
            why, status=why.getStatusCode(), content_type='text/plain')

HTML file to PDF in Django

import urllib.parse
from django.http import HttpResponse
from django.views.decorators.http import require_POST
import pdfcrowd

# The recommended method is POST.
@require_POST
def convert(request):
    try:
        # Create an API client instance.
        client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

        # Set HTTP response headers.
        response = HttpResponse(content_type='application/pdf')
        response['Cache-Control'] = 'max-age=0'
        response['Accept-Ranges'] = 'none'
        response['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('MyLayout.pdf', safe='')

        # run the conversion and write the result to the output stream.
        client.convertFileToStream('/path/to/MyLayout.html', response)
        return response
    except pdfcrowd.Error as why:
        # Send the error in the HTTP response.
        return HttpResponse(
            why, status=why.getStatusCode(), content_type='text/plain')

HTML string to PDF in Django

import urllib.parse
from django.http import HttpResponse
from django.views.decorators.http import require_POST
import pdfcrowd

# The recommended method is POST.
@require_POST
def convert(request):
    try:
        # Create an API client instance.
        client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

        # Set HTTP response headers.
        response = HttpResponse(content_type='application/pdf')
        response['Cache-Control'] = 'max-age=0'
        response['Accept-Ranges'] = 'none'
        response['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('HelloWorld.pdf', safe='')

        # run the conversion and write the result to the output stream.
        client.convertStringToStream('<html><body><h1>Hello World!</h1></body></html>', response)
        return response
    except pdfcrowd.Error as why:
        # Send the error in the HTTP response.
        return HttpResponse(
            why, status=why.getStatusCode(), content_type='text/plain')

Flask examples

Webpage to PDF in Flask

import urllib.parse
from flask import Flask, Response
import pdfcrowd

app = Flask(__name__)

# The recommended method is POST.
@app.route('/', methods=['POST'])
def convert():
    try:
        # Create an API client instance.
        client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

        # Run the conversion and store the result in the `pdf` variable.
        pdf = client.convertUrl('http://www.example.com');

        # Send the result and set HTTP response headers.
        response = Response(pdf, mimetype='application/pdf')
        response.headers['Cache-Control'] = 'max-age=0'
        response.headers['Accept-Ranges'] = 'none'
        response.headers['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('example.pdf', safe='')
        return response
    except pdfcrowd.Error as why:
        # Send the error in the HTTP response.
        return Response(
            str(why), status=why.getStatusCode(), mimetype='text/plain')

HTML file to PDF in Flask

import urllib.parse
from flask import Flask, Response
import pdfcrowd

app = Flask(__name__)

# The recommended method is POST.
@app.route('/', methods=['POST'])
def convert():
    try:
        # Create an API client instance.
        client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

        # Run the conversion and store the result in the `pdf` variable.
        pdf = client.convertFile('/path/to/MyLayout.html');

        # Send the result and set HTTP response headers.
        response = Response(pdf, mimetype='application/pdf')
        response.headers['Cache-Control'] = 'max-age=0'
        response.headers['Accept-Ranges'] = 'none'
        response.headers['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('MyLayout.pdf', safe='')
        return response
    except pdfcrowd.Error as why:
        # Send the error in the HTTP response.
        return Response(
            str(why), status=why.getStatusCode(), mimetype='text/plain')

HTML string to PDF in Flask

import urllib.parse
from flask import Flask, Response
import pdfcrowd

app = Flask(__name__)

# The recommended method is POST.
@app.route('/', methods=['POST'])
def convert():
    try:
        # Create an API client instance.
        client = pdfcrowd.HtmlToPdfClient('demo', 'demo')

        # Run the conversion and store the result in the `pdf` variable.
        pdf = client.convertString('<html><body><h1>Hello World!</h1></body></html>');

        # Send the result and set HTTP response headers.
        response = Response(pdf, mimetype='application/pdf')
        response.headers['Cache-Control'] = 'max-age=0'
        response.headers['Accept-Ranges'] = 'none'
        response.headers['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('HelloWorld.pdf', safe='')
        return response
    except pdfcrowd.Error as why:
        # Send the error in the HTTP response.
        return Response(
            str(why), status=why.getStatusCode(), mimetype='text/plain')