Convert HTML to PDF in C++
In the previous article in this series we showed how to use the Pdfcrowd API in C to convert web pages and HTML documents to PDF. While it is perfectly fine to use C code in a C++ application, C++ programmers may prefer to integrate the API using modern C++.
You can download the complete C++ code with examples for quick integration into your project.
Step by Step Tutorial
You'll need a C++ compiler and libcurl development files installed on your system.
1. Create a new C++ file and include the following header files:
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <curl/curl.h>
2. Set up the Pdfcrowd API endpoint and your API credentials as constants. We will use the demo credentials in this tutorial:
const char* api_url = "https://api.pdfcrowd.com/convert/latest/";
const char* username = "demo";
const char* api_key = "ce544b6ea52a5621fb9d55f8b542d14d";
3. Create structures to specify conversion options, files, and HTTP response.
struct FormField {
std::string name;
std::string value;
};
struct FormFile {
std::string name;
std::string filename;
std::vector<char> data;
};
typedef std::vector<char> ResponseBody;
4. Create a callback function that receives the response data from the API and stores it into a memory buffer.
size_t write_callback(char* ptr, size_t size, size_t nmemb, void* userdata) {
ResponseBody* response_body = static_cast<ResponseBody*>(userdata);
size_t data_size = size * nmemb;
response_body->insert(response_body->end(), ptr, ptr + data_size);
return data_size;
}
The alternative approach is to write directly to the output file. However, this approach has a drawback in that if the conversion fails for some reason, the file will contain the error message instead of the PDF output.
5. Create a helper object that handles proper memory deallocation for libcurl:
struct CurlHolder {
CURL* curl;
curl_slist* headers;
curl_mime* mime;
CurlHolder() {
curl = curl_easy_init();
if (curl) {
mime = curl_mime_init(curl);
}
headers = nullptr;
}
~CurlHolder() {
if(curl) {
curl_easy_cleanup(curl);
curl_mime_free(mime);
if(headers) {
curl_slist_free_all(headers);
}
curl_global_cleanup();
}
}
// make holder non-copyable
CurlHolder(const CurlHolder&) = delete;
CurlHolder& operator=(const CurlHolder&) = delete;
};
6. Implement the main conversion function:
long convert(const std::vector<FormField>& fields, const std::vector<FormFile>& files, const char* output_filename) {
// result -1 means error, otherwise it containse HTTP status code
// result 200 means success
// result greater than 200 means the conversion error, details: https://pdfcrowd.com/api/status-codes/
long result = -1;
// initialize CURL library
CurlHolder curl;
if(!curl.curl) {
std::cerr << "Failed to initialize libcurl" << std::endl;
return result;
}
// set conversion entry point
curl_easy_setopt(curl.curl, CURLOPT_URL, api_url);
// set Pdfcrowd username and API key
std::ostringstream pdfcrowd_credentials;
pdfcrowd_credentials << username << ":" << api_key;
curl_easy_setopt(curl.curl, CURLOPT_USERPWD, pdfcrowd_credentials.str().c_str());
curl_easy_setopt(curl.curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
// set HTTP multipart post data
curl_easy_setopt(curl.curl, CURLOPT_POST, 1L);
curl.headers = curl_slist_append(nullptr, "Content-Type: multipart/form-data");
curl.headers = curl_slist_append(curl.headers, "boundary=----------ThIs_Is_tHe_bOUnDary_$");
curl_easy_setopt(curl.curl, CURLOPT_HTTPHEADER, curl.headers);
for (const auto& field : fields) {
curl_mimepart* part = curl_mime_addpart(curl.mime);
curl_mime_name(part, field.name.c_str());
curl_mime_data(part, field.value.c_str(), CURL_ZERO_TERMINATED);
}
for (const auto& file : files) {
curl_mimepart* part = curl_mime_addpart(curl.mime);
curl_mime_name(part, file.name.c_str());
curl_mime_filename(part, file.filename.c_str());
curl_mime_type(part, "application/octet-stream");
curl_mime_data(part, file.data.data(), file.data.size());
}
curl_easy_setopt(curl.curl, CURLOPT_MIMEPOST, curl.mime);
// set the callback for writing the response into a memory buffer
ResponseBody response_body;
curl_easy_setopt(curl.curl, CURLOPT_WRITEDATA, static_cast<void*>(&response_body));
curl_easy_setopt(curl.curl, CURLOPT_WRITEFUNCTION, write_callback);
// perform the conversion
CURLcode res = curl_easy_perform(curl.curl);
if (res != CURLE_OK) {
std::cerr << "Failed to perform multipart post: " << curl_easy_strerror(res) << std::endl;
return result;
}
curl_easy_getinfo(curl.curl, CURLINFO_RESPONSE_CODE, &result);
if (result != 200) {
// print details about the Pdfcrowd error
std::cerr << "Pdfcrowd Error Code: " << result << std::endl;
std::cerr << "Pdfcrowd Error Details: " << response_body.data() << std::endl;
return result;
}
// success, write the result to the output file
std::ofstream output_file(output_filename, std::ios::out | std::ios::binary);
if (!output_file.is_open()) {
std::cerr << "Error opening output file: " << output_filename << std::endl;
return -1;
}
output_file.write(response_body.data(), response_body.size());
if (!output_file.good()) {
std::cerr << "Error writing to file: " << output_filename << std::endl;
result = -1;
}
output_file.close();
return result;
}
Now we can use the convert() function in examples.
Convert URL to PDF
std::vector<FormField> fields = {
{"input_format", "html"},
{"output_format", "pdf"},
{"page_size", "letter"},
{"url", "https://example.com/"}
};
convert(fields, std::vector<FormFile>(), "example_url.pdf");
Convert HTML String to PDF
std::vector<FormField> fields = {
{"input_format", "html"},
{"output_format", "pdf"},
{"page_size", "letter"},
{"text", "<h1>Hello from Pdfcrowd</h1>"}
};
convert(fields, std::vector<FormFile>(), "example_text.pdf");
Convert Local HTML File to PDF
std::vector<FormField> fields = {
{"input_format", "html"},
{"output_format", "pdf"},
{"page_size", "letter"}
};
std::ifstream in_stream("your-file.html");
if(!in_stream) {
std::cerr << "Read file error" << std::endl;
} else {
std::vector<char> file_data((std::istreambuf_iterator<char>(in_stream)),
std::istreambuf_iterator<char>());
std::vector<FormFile> files = {
{"file", "your-file.html", std::move(file_data)}
};
convert(fields, files, "example_file.pdf");
}
Final Tips
- See the list of all conversion options available to customize your PDF output.
- You can use the same approach for all Pdfcrowd REST API conversions. For instance, if you want to convert HTML to an image, you can simply replace:
{"output_format", "pdf"}
with
{"output_format", "jpg"}