PDF to Text Java Examples

This page contains various examples of using the PDF to Text API in Java. The examples are complete and fully functional. Read more about how to convert PDF to Text in Java.

Basic examples

Basic examples

Convert a local PDF file to a text file

import com.pdfcrowd.*;
import java.io.*;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // run the conversion and write the result to a file
            client.convertFileToFile("/path/to/invoice.pdf", "invoice.txt");
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert a local PDF file to in-memory text

import com.pdfcrowd.*;
import java.io.*;

public class ApiTest {
    public static void main(String[] args) throws Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // run the conversion and store the result into the "txt" variable
            byte[] txt = client.convertFile("/path/to/invoice.pdf");

            // at this point the "txt" variable contains TXT raw data and
            // can be sent in an HTTP response, saved to a file, etc.
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert a local PDF file and write the resulting text to an output stream

import com.pdfcrowd.*;
import java.io.*;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // create an output stream for the conversion result
            FileOutputStream outputStream = new FileOutputStream("invoice.txt");

            // run the conversion and write the result into the output stream
            client.convertFileToStream("/path/to/invoice.pdf", outputStream);

            // close the output stream
            outputStream.close();
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert url with PDF file to a text file

import com.pdfcrowd.*;
import java.io.*;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // run the conversion and write the result to a file
            client.convertUrlToFile("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf", "invoice.txt");
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert url with PDF file to in-memory text

import com.pdfcrowd.*;
import java.io.*;

public class ApiTest {
    public static void main(String[] args) throws Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // run the conversion and store the result into the "txt" variable
            byte[] txt = client.convertUrl("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf");

            // at this point the "txt" variable contains TXT raw data and
            // can be sent in an HTTP response, saved to a file, etc.
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert url with PDF file and write the resulting text to an output stream

import com.pdfcrowd.*;
import java.io.*;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // create an output stream for the conversion result
            FileOutputStream outputStream = new FileOutputStream("invoice.txt");

            // run the conversion and write the result into the output stream
            client.convertUrlToStream("https://pdfcrowd.com/static/pdf/apisamples/invoice.pdf", outputStream);

            // close the output stream
            outputStream.close();
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert an in-memory PDF to a text file

import com.pdfcrowd.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // run the conversion and write the result to a file
            client.convertRawDataToFile(Files.readAllBytes(Paths.get("/path/to/hello_world.pdf")), "invoice.txt");
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert an in-memory PDF to in-memory text

import com.pdfcrowd.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // run the conversion and store the result into the "txt" variable
            byte[] txt = client.convertRawData(Files.readAllBytes(Paths.get("/path/to/hello_world.pdf")));

            // at this point the "txt" variable contains TXT raw data and
            // can be sent in an HTTP response, saved to a file, etc.
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Convert an in-memory PDF and write the resulting text to an output stream

import com.pdfcrowd.*;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // create an output stream for the conversion result
            FileOutputStream outputStream = new FileOutputStream("invoice.txt");

            // run the conversion and write the result into the output stream
            client.convertRawDataToStream(Files.readAllBytes(Paths.get("/path/to/hello_world.pdf")), outputStream);

            // close the output stream
            outputStream.close();
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Get info about the current conversion

import com.pdfcrowd.*;
import java.io.*;

public class ApiTest {
    public static void main(String[] args) throws IOException, Pdfcrowd.Error {
        try {
            // create the API client instance
            Pdfcrowd.PdfToTextClient client =
                new Pdfcrowd.PdfToTextClient("demo", "ce544b6ea52a5621fb9d55f8b542d14d");

            // configure the conversion
            client.setDebugLog(true);
            client.setPageBreakMode("default");

            // run the conversion and write the result to a file
            client.convertFileToFile("/path/to/invoice.pdf", "invoice.txt");
            
            // print URL to the debug log
            System.out.println("Debug log url: " + client.getDebugLogUrl());
            
            // print the number of available conversion credits in your account
            System.out.println("Remaining credit count: " + client.getRemainingCreditCount());
            
            // print the number of credits consumed by the conversion
            System.out.println("Consumed credit count: " + client.getConsumedCreditCount());
            
            // print the unique ID of the conversion
            System.out.println("Job id: " + client.getJobId());
            
            // print the total number of pages in the output document
            System.out.println("Page count: " + client.getPageCount());
            
            // print the size of the output in bytes
            System.out.println("Output size: " + client.getOutputSize());
        }
        catch(Pdfcrowd.Error why) {
            // report the error
            System.err.println("Pdfcrowd Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
        catch(IOException why) {
            // report the error
            System.err.println("IO Error: " + why);

            // rethrow or handle the exception
            throw why;
        }
    }
}

Advanced examples

Template rendering Examples