March 13, 2023 Tutorials

Convert HTML to PDF in C

In this article, we will show how to convert a webpage or an HTML document to PDF in C using the Pdfcrowd API.

We will show the convert() function that provides the conversion functionality. We will demonstrate how to use it to create PDF from various input sources, such as a URL, a local HTML file, or an HTML string. By the end of this article, you should have a good understanding of how to integrate the Pdfcrowd API into your C programs for HTML to PDF conversion.

The API is cloud-based and accessible over HTTP. We will use libcurl to communicate with the API. libcurl is a widely used library available on all major platforms.

To quickly incorporate the Pdfcrowd API into your own code, you can download the complete C code with examples.

Step by Step Tutorial

To get started, you will need a C compiler and the libcurl development files installed on your system.

1. Create a new C file and include the following header files:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>

2. Define the Pdfcrowd API endpoint and your API credentials as constants. For the purposes of this tutorial, we use demo credentials:

#define API_ENDPOINT "https://api.pdfcrowd.com/convert/latest/"
#define USERNAME "demo"
#define API_KEY "ce544b6ea52a5621fb9d55f8b542d14d"

3. Define structures for specifying conversion options, files, and HTTP response.

typedef struct {
    const char *name;
    const char *value;
} FormField;

typedef struct {
    const char *name;
    const char *filename;
    const char *mime_type;
    const char *data;
    size_t data_len;
} FormFile;

typedef struct {
    char *buffer;
    size_t size;
} ResponseBody;

4. Create a callback function to receive the response data from the API into a memory buffer:

size_t write_callback(char *ptr, size_t size, size_t nmemb, void *data)
{
    size_t realsize = size * nmemb;
    ResponseBody* mem = (ResponseBody*) data;

    mem->buffer = realloc(mem->buffer, mem->size + realsize + 1);
    if (mem->buffer == NULL) {
        fprintf(stderr, "Not enough buffer\n");
        return 0;
    }

    memcpy(&(mem->buffer[mem->size]), ptr, realsize);
    mem->size += realsize;
    mem->buffer[mem->size] = 0;

    return realsize;
}

The alternative approach to handling the Pdfcrowd API response is to write it directly to a file instead of storing it in memory. This can be achieved using the fwrite() function to write the response data directly to a file instead of storing it in the ResponseBody struct. However, this approach has a drawback in that if the conversion fails for some reason, the file will contain the error message instead of the PDF output.

5. Implement the main conversion function:

long convert(FormField *fields, int num_fields, FormFile *files, int num_files, const char* output_filename) {
    /* define local variables used */
    int i;
    curl_mime *mime;
    curl_mimepart *part;
    struct curl_slist *headers = NULL;
    ResponseBody response_body = { 0 };
    CURLcode res;

    /* result -1 means error, otherwise it containse HTTP status code */
    /* result 200 means success */
    /* result greater than 200 means the conversion error, details: https://pdfcrowd.com/api/status-codes/ */
    long result = -1;

    /* initialize CURL library */
    CURL *curl = curl_easy_init();
    if (!curl) {
        fprintf(stderr, "Failed to initialize libcurl\n");
        return result;
    }

    /* set conversion entry point */
    curl_easy_setopt(curl, CURLOPT_URL, API_ENDPOINT);

    /* set Pdfcrowd username and API key */
    curl_easy_setopt(curl, CURLOPT_USERPWD, USERNAME ":" API_KEY);
    curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);

    /* set HTTP multipart post data */
    curl_easy_setopt(curl, CURLOPT_POST, 1L);

    headers = curl_slist_append(headers, "Content-Type: multipart/form-data");
    headers = curl_slist_append(headers, "boundary=----------ThIs_Is_tHe_bOUnDary_$");
    curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);

    mime = curl_mime_init(curl);

    for (i = 0; i < num_fields; i++) {
        part = curl_mime_addpart(mime);
        curl_mime_name(part, fields[i].name);
        curl_mime_data(part, fields[i].value, CURL_ZERO_TERMINATED);
    }

    for (i = 0; i < num_files; i++) {
        part = curl_mime_addpart(mime);
        curl_mime_name(part, files[i].name);
        curl_mime_filename(part, files[i].filename);
        curl_mime_type(part, files[i].mime_type);
        curl_mime_data(part, files[i].data, files[i].data_len);
    }

    curl_easy_setopt(curl, CURLOPT_MIMEPOST, mime);

    /* set the callback for writting the response into a memory buffer */
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void*) &response_body);
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);

    /* perform the conversion */
    res = curl_easy_perform(curl);

    if (res != CURLE_OK) {
        fprintf(stderr, "Failed to perform multipart post: %s\n", curl_easy_strerror(res));
    } else {
        curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &result);
        if (result != 200) {
            /* print details about the Pdfcrowd error */
            fprintf(stderr, "Pdfcrowd Error Code: %ld\n", result);
            if (response_body.buffer != NULL) {
                fprintf(stderr, "Pdfcrowd Error Details: ");
                fwrite(response_body.buffer, 1, response_body.size, stderr);
                fprintf(stderr, "\n");
            }
        } else {
            /* success, write the result to the output file */
            FILE *fp = fopen(output_filename, "wb");
            if (!fp) {
                fprintf(stderr, "Error opening output file: %s\n", output_filename);
            } else {
                if(!fwrite(response_body.buffer, response_body.size, 1, fp)) {
                    fprintf(stderr, "Error writting to file: %s\n", output_filename);
                    result = -1;
                }
                fclose(fp);
            }
        }
    }

    /* do final cleanup */
    curl_easy_cleanup(curl);
    curl_mime_free(mime);
    curl_slist_free_all(headers);
    curl_global_cleanup();

    return result;
}

Now we can use the convert() function in examples.

Convert URL to PDF

FormField* fields = malloc(sizeof(FormField) * 4);
fields[0].name = "input_format";
fields[0].value = "html";

fields[1].name = "output_format";
fields[1].value = "pdf";

fields[2].name = "page_size";
fields[2].value = "letter";

fields[3].name = "url";
fields[3].value = "https://example.com/";

convert(fields, 4, NULL, 0, "example_url.pdf");

Convert HTML String to PDF

FormField* fields = malloc(sizeof(FormField) * 4);
fields[0].name = "input_format";
fields[0].value = "html";

fields[1].name = "output_format";
fields[1].value = "pdf";

fields[2].name = "page_size";
fields[2].value = "letter";

fields[3].name = "text";
fields[3].value = "<h1>Hello from Pdfcrowd</h1>";

convert(fields, 4, NULL, 0, "example_text.pdf");

Convert Local HTML File to PDF

To perform this test, we will need a helper function that can read a local file and populate the FormFile structure with the file's contents.

int read_file(const char* filename, FormFile* target) {
    FILE* fp = fopen(filename, "rb");
    long size;
    char* buffer;
    size_t bytesRead;

    /* determine the file size */
    fseek(fp, 0, SEEK_END);
    size = ftell(fp);
    fseek(fp, 0, SEEK_SET);

    /* allocate memory for the file contents */
    buffer = (char*) malloc(size);
    if (buffer == NULL) {
        fprintf(stderr, "Error: could not allocate memory\n");
        fclose(fp);
        return 1;
    }

    /* read the file into memory */
    bytesRead = fread(buffer, 1, size, fp);
    if (bytesRead != (size_t) size) {
        fprintf(stderr, "Error: could not read file\n");
        free(buffer);
        fclose(fp);
        return 1;
    }

    fclose(fp);

    target->name = "file";
    target->filename = filename;
    target->mime_type = "application/octet-stream";
    target->data = buffer;
    target->data_len = size;
    return 0;
}

Now the test will look like:

FormFile* files;
FormField* fields = malloc(sizeof(FormField) * 3);
fields[0].name = "input_format";
fields[0].value = "html";

fields[1].name = "output_format";
fields[1].value = "pdf";

fields[2].name = "page_size";
fields[2].value = "letter";

files = malloc(sizeof(FormFile));
if(read_file("your-file.html", &files[0])) {
    fprintf(stderr, "Error reading file\n");
} else {
    convert(fields, 3, files, 1, "example_file.pdf");
}

Final Tips

see the list of all conversion options available to customize your PDF output
the same approach can be used for all Pdfcrowd REST API conversions so e.g. for converting HTML to image just replace:

fields[1].value = "pdf";

with

fields[1].value = "jpg";

Pdfcrowd offers API clients for many programming languages, but not for all, such as Swift, Perl, and D. By including the convert() function in your C library and exporting it, you can create a bridge for these languages to access the Pdfcrowd API functionality.

Step by Step Tutorial

Convert URL to PDF

Convert HTML String to PDF

Convert Local HTML File to PDF

Final Tips

Categories

Most Read