This page contains various examples of using the PDF to Text API in Python. The examples are complete and fully functional. Read more about how to convert PDF to Text in Python.
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and write the result to a file client.convertFileToFile('/path/to/invoice.pdf', 'invoice.txt') except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and store the result into the "txt" variable txt = client.convertFile('/path/to/invoice.pdf') # at this point the "txt" variable contains TXT raw data and # can be sent in an HTTP response, saved to a file, etc. except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # create an output stream for the conversion result output_stream = open('invoice.txt', 'wb') # run the conversion and write the result into the output stream client.convertFileToStream('/path/to/invoice.pdf', output_stream) # close the output stream output_stream.close() except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and write the result to a file client.convertUrlToFile('https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf', 'invoice.txt') except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and store the result into the "txt" variable txt = client.convertUrl('https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf') # at this point the "txt" variable contains TXT raw data and # can be sent in an HTTP response, saved to a file, etc. except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # create an output stream for the conversion result output_stream = open('invoice.txt', 'wb') # run the conversion and write the result into the output stream client.convertUrlToStream('https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf', output_stream) # close the output stream output_stream.close() except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and write the result to a file client.convertRawDataToFile(open('/path/to/hello_world.pdf', 'rb').read(), 'invoice.txt') except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and store the result into the "txt" variable txt = client.convertRawData(open('/path/to/hello_world.pdf', 'rb').read()) # at this point the "txt" variable contains TXT raw data and # can be sent in an HTTP response, saved to a file, etc. except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # create an output stream for the conversion result output_stream = open('invoice.txt', 'wb') # run the conversion and write the result into the output stream client.convertRawDataToStream(open('/path/to/hello_world.pdf', 'rb').read(), output_stream) # close the output stream output_stream.close() except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import pdfcrowd import sys try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # configure the conversion client.setDebugLog(True) client.setPageBreakMode('default') # run the conversion and write the result to a file client.convertFileToFile('/path/to/invoice.pdf', 'invoice.txt') # print URL of the debug log print('Debug log url: {}'.format(client.getDebugLogUrl())) # print the number of conversion credits remaining in your account print('Remaining credit count: {}'.format(client.getRemainingCreditCount())) # print the number of credits used for the conversion print('Consumed credit count: {}'.format(client.getConsumedCreditCount())) # print the unique identifier for the conversion print('Job id: {}'.format(client.getJobId())) # print total number of pages in the output document print('Page count: {}'.format(client.getPageCount())) # print size of the output data in bytes print('Output size: {}'.format(client.getOutputSize())) except pdfcrowd.Error as why: sys.stderr.write('Pdfcrowd Error: {}\n'.format(why)) raise
import urllib.parse from django.http import HttpResponse from django.views.decorators.http import require_POST import pdfcrowd # the recommended method is POST @require_POST def convert(request): try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # set HTTP response headers response = HttpResponse(content_type='text/plain') response['Cache-Control'] = 'max-age=0' response['Accept-Ranges'] = 'none' response['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('invoice.txt', safe='') # run the conversion and write the result into the output stream client.convertFileToStream('/path/to/invoice.pdf', response) return response except pdfcrowd.Error as why: # send the error in the HTTP response return HttpResponse(why.getMessage(), status=why.getCode(), content_type='text/plain')
import urllib.parse from django.http import HttpResponse from django.views.decorators.http import require_POST import pdfcrowd # the recommended method is POST @require_POST def convert(request): try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # set HTTP response headers response = HttpResponse(content_type='text/plain') response['Cache-Control'] = 'max-age=0' response['Accept-Ranges'] = 'none' response['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('invoice.txt', safe='') # run the conversion and write the result into the output stream client.convertUrlToStream('https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf', response) return response except pdfcrowd.Error as why: # send the error in the HTTP response return HttpResponse(why.getMessage(), status=why.getCode(), content_type='text/plain')
import urllib.parse from django.http import HttpResponse from django.views.decorators.http import require_POST import pdfcrowd # the recommended method is POST @require_POST def convert(request): try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # set HTTP response headers response = HttpResponse(content_type='text/plain') response['Cache-Control'] = 'max-age=0' response['Accept-Ranges'] = 'none' response['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('invoice.txt', safe='') # run the conversion and write the result into the output stream client.convertRawDataToStream(open('/path/to/hello_world.pdf', 'rb').read(), response) return response except pdfcrowd.Error as why: # send the error in the HTTP response return HttpResponse(why.getMessage(), status=why.getCode(), content_type='text/plain')
import urllib.parse from flask import Flask, Response import pdfcrowd app = Flask(__name__) # the recommended method is POST @app.route('/', methods=['POST']) def convert(): try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and store the result into the "txt" variable txt = client.convertFile('/path/to/invoice.pdf'); # send the result and set HTTP response headers response = Response(txt, mimetype='text/plain') response.headers['Cache-Control'] = 'max-age=0' response.headers['Accept-Ranges'] = 'none' response.headers['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('invoice.txt', safe='') return response except pdfcrowd.Error as why: # send the error in the HTTP response return Response(why.getMessage(), status=why.getCode(), mimetype='text/plain')
import urllib.parse from flask import Flask, Response import pdfcrowd app = Flask(__name__) # the recommended method is POST @app.route('/', methods=['POST']) def convert(): try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and store the result into the "txt" variable txt = client.convertUrl('https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf'); # send the result and set HTTP response headers response = Response(txt, mimetype='text/plain') response.headers['Cache-Control'] = 'max-age=0' response.headers['Accept-Ranges'] = 'none' response.headers['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('invoice.txt', safe='') return response except pdfcrowd.Error as why: # send the error in the HTTP response return Response(why.getMessage(), status=why.getCode(), mimetype='text/plain')
import urllib.parse from flask import Flask, Response import pdfcrowd app = Flask(__name__) # the recommended method is POST @app.route('/', methods=['POST']) def convert(): try: # create the API client instance client = pdfcrowd.PdfToTextClient('demo', 'ce544b6ea52a5621fb9d55f8b542d14d') # run the conversion and store the result into the "txt" variable txt = client.convertRawData(open('/path/to/hello_world.pdf', 'rb').read()); # send the result and set HTTP response headers response = Response(txt, mimetype='text/plain') response.headers['Cache-Control'] = 'max-age=0' response.headers['Accept-Ranges'] = 'none' response.headers['Content-Disposition'] = "attachment; filename*=UTF-8''" + urllib.parse.quote('invoice.txt', safe='') return response except pdfcrowd.Error as why: # send the error in the HTTP response return Response(why.getMessage(), status=why.getCode(), mimetype='text/plain')