PDFCrowd Blog
Product updates, tips & tricks

Convert HTML to PDF in C++

In the previous article in this series we showed how to use the Pdfcrowd API in C to convert web pages and HTML documents to PDF. While it is perfectly fine to use C code in a C++ application, C++ programmers may prefer to integrate the API using modern C++.

You can download the complete C++ code with examples for quick integration into your project.

 

Step by Step Tutorial

You'll need a C++ compiler and libcurl development files installed on your system.

1. Create a new C++ file and include the following header files:

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <curl/curl.h>

2. Set up the Pdfcrowd API endpoint and your API credentials as constants. We will use the demo credentials in this tutorial:

const char* api_url = "https://api.pdfcrowd.com/convert/latest/";
const char* username = "demo";
const char* api_key = "ce544b6ea52a5621fb9d55f8b542d14d";

3. Create structures to specify conversion options, files, and HTTP response.

struct FormField {
    std::string name;
    std::string value;
};

struct FormFile {
    std::string name;
    std::string filename;
    std::vector<char> data;
};

typedef std::vector<char> ResponseBody;

4. Create a callback function that receives the response data from the API and stores it into a memory buffer.

size_t write_callback(char* ptr, size_t size, size_t nmemb, void* userdata) {
    ResponseBody* response_body = static_cast<ResponseBody*>(userdata);
    size_t data_size = size * nmemb;
    response_body->insert(response_body->end(), ptr, ptr + data_size);
    return data_size;
}

The alternative approach is to write directly to the output file. However, this approach has a drawback in that if the conversion fails for some reason, the file will contain the error message instead of the PDF output.

5. Create a helper object that handles proper memory deallocation for libcurl:

struct CurlHolder {
    CURL* curl;
    curl_slist* headers;
    curl_mime* mime;

    CurlHolder() {
        curl = curl_easy_init();
        if (curl) {
            mime = curl_mime_init(curl);
        }
        headers = nullptr;
    }

    ~CurlHolder() {
        if(curl) {
            curl_easy_cleanup(curl);
            curl_mime_free(mime);
            if(headers) {
                curl_slist_free_all(headers);
            }
            curl_global_cleanup();
        }
    }

    // make holder non-copyable
    CurlHolder(const CurlHolder&) = delete;
    CurlHolder& operator=(const CurlHolder&) = delete;
};

6. Implement the main conversion function:

long convert(const std::vector<FormField>& fields, const std::vector<FormFile>& files, const char* output_filename) {
    // result -1 means error, otherwise it containse HTTP status code
    // result 200 means success
    // result greater than 200 means the conversion error, details: https://pdfcrowd.com/api/status-codes/
    long result = -1;

    // initialize CURL library
    CurlHolder curl;
    if(!curl.curl) {
        std::cerr << "Failed to initialize libcurl" << std::endl;
        return result;
    }

    // set conversion entry point
    curl_easy_setopt(curl.curl, CURLOPT_URL, api_url);

    // set Pdfcrowd username and API key
    std::ostringstream pdfcrowd_credentials;
    pdfcrowd_credentials << username << ":" << api_key;
    curl_easy_setopt(curl.curl, CURLOPT_USERPWD, pdfcrowd_credentials.str().c_str());
    curl_easy_setopt(curl.curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);

    // set HTTP multipart post data
    curl_easy_setopt(curl.curl, CURLOPT_POST, 1L);

    curl.headers = curl_slist_append(nullptr, "Content-Type: multipart/form-data");
    curl.headers = curl_slist_append(curl.headers, "boundary=----------ThIs_Is_tHe_bOUnDary_$");
    curl_easy_setopt(curl.curl, CURLOPT_HTTPHEADER, curl.headers);

    for (const auto& field : fields) {
        curl_mimepart* part = curl_mime_addpart(curl.mime);
        curl_mime_name(part, field.name.c_str());
        curl_mime_data(part, field.value.c_str(), CURL_ZERO_TERMINATED);
    }

    for (const auto& file : files) {
        curl_mimepart* part = curl_mime_addpart(curl.mime);
        curl_mime_name(part, file.name.c_str());
        curl_mime_filename(part, file.filename.c_str());
        curl_mime_type(part, "application/octet-stream");
        curl_mime_data(part, file.data.data(), file.data.size());
    }

    curl_easy_setopt(curl.curl, CURLOPT_MIMEPOST, curl.mime);

    // set the callback for writing the response into a memory buffer
    ResponseBody response_body;
    curl_easy_setopt(curl.curl, CURLOPT_WRITEDATA, static_cast<void*>(&response_body));
    curl_easy_setopt(curl.curl, CURLOPT_WRITEFUNCTION, write_callback);

    // perform the conversion
    CURLcode res = curl_easy_perform(curl.curl);

    if (res != CURLE_OK) {
        std::cerr << "Failed to perform multipart post: " << curl_easy_strerror(res) << std::endl;
        return result;
    }

    curl_easy_getinfo(curl.curl, CURLINFO_RESPONSE_CODE, &result);
    if (result != 200) {
        // print details about the Pdfcrowd error
        std::cerr << "Pdfcrowd Error Code: " << result << std::endl;
        std::cerr << "Pdfcrowd Error Details: " << response_body.data() << std::endl;
        return result;
    }

    // success, write the result to the output file
    std::ofstream output_file(output_filename, std::ios::out | std::ios::binary);
    if (!output_file.is_open()) {
        std::cerr << "Error opening output file: " << output_filename << std::endl;
        return -1;
    }

    output_file.write(response_body.data(), response_body.size());
    if (!output_file.good()) {
        std::cerr << "Error writing to file: " << output_filename << std::endl;
        result = -1;
    }
    output_file.close();

    return result;
}

Now we can use the convert() function in examples.

Convert URL to PDF

std::vector<FormField> fields = {
    {"input_format", "html"},
    {"output_format", "pdf"},
    {"page_size", "letter"},    
    {"url", "https://example.com/"}
};

convert(fields, std::vector<FormFile>(), "example_url.pdf");

Convert HTML String to PDF

std::vector<FormField> fields = {
     {"input_format", "html"},
     {"output_format", "pdf"},
     {"page_size", "letter"},
     {"text", "<h1>Hello from Pdfcrowd</h1>"}
};

convert(fields, std::vector<FormFile>(), "example_text.pdf");

Convert Local HTML File to PDF

std::vector<FormField> fields = {
    {"input_format", "html"},
    {"output_format", "pdf"},
    {"page_size", "letter"}
};

std::ifstream in_stream("your-file.html");
if(!in_stream) {
    std::cerr << "Read file error" << std::endl;
} else {
    std::vector<char> file_data((std::istreambuf_iterator<char>(in_stream)),
                                std::istreambuf_iterator<char>());

    std::vector<FormFile> files = {
        {"file", "your-file.html", std::move(file_data)}
    };

    convert(fields, files, "example_file.pdf");
}

Final Tips

  • See the list of all conversion options available to customize your PDF output.
  • You can use the same approach for all Pdfcrowd REST API conversions. For instance, if you want to convert HTML to an image, you can simply replace:
{"output_format", "pdf"}

with

{"output_format", "jpg"}