Pdfcrowd - pdfcrowd.rb

raw code

  1 # Copyright (C) 2009-2013 pdfcrowd.com
  2 #
  3 # Permission is hereby granted, free of charge, to any person
  4 # obtaining a copy of this software and associated documentation
  5 # files (the "Software"), to deal in the Software without
  6 # restriction, including without limitation the rights to use,
  7 # copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 # copies of the Software, and to permit persons to whom the
  9 # Software is furnished to do so, subject to the following
 10 # conditions:
 11 #
 12 # The above copyright notice and this permission notice shall be
 13 # included in all copies or substantial portions of the Software.
 14 #
 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 16 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 17 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 18 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 19 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 21 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 22 # OTHER DEALINGS IN THE SOFTWARE.
 23 
 24 require 'net/http'
 25 require 'cgi'
 26 
 27 
 28 module Pdfcrowd
 29   # constants for setPageLayout()
 30   SINGLE_PAGE, CONTINUOUS, CONTINUOUS_FACING = 1, 2, 3
 31 
 32   # constants for setPageMode()
 33   NONE_VISIBLE, THUMBNAILS_VISIBLE, FULLSCREEN = 1, 2, 3
 34 
 35   # constants for setInitialPdfZoomType()
 36   FIT_WIDTH, FIT_HEIGHT, FIT_PAGE = 1, 2, 3
 37 
 38 
 39   #
 40   # Thrown when an error occurs.
 41   #
 42   class Error < RuntimeError
 43     attr_reader :http_code, :error
 44 
 45     def initialize(error, http_code=nil)
 46       super()
 47       @http_code = http_code
 48         @error = error
 49     end
 50 
 51     def to_s()
 52 
 53       @http_code ?  "#{@http_code} - #{@error}" : @error
 54     end
 55   end
 56 
 57 
 58   #
 59   # Pdfcrowd API client.
 60   #
 61   class Client
 62 
 63     #
 64     # Client constructor.
 65     #
 66     # username -- your username at Pdfcrowd
 67     # apikey  -- your API key
 68     #
 69     def initialize(username, apikey, hostname=nil)
 70       useSSL(false)
 71       @fields  = {
 72         'username' => username,
 73         'key' => apikey,
 74         'html_zoom' => 200,
 75         'pdf_scaling_factor' => 1
 76       }
 77       @hostname = hostname || $api_hostname;
 78     end
 79 
 80     #
 81     # Converts a web page.
 82     #
 83     # uri        -- a web page URL
 84     # outstream -- an object having method 'write(data)'; if nil then the
 85     #               return value is a string containing the PDF.
 86     #
 87     def convertURI(uri, outstream=nil)
 88         return call_api_urlencoded('/api/pdf/convert/uri/', uri, outstream)
 89     end
 90 
 91     #
 92     # Converts an in-memory html document.
 93     #
 94     # content    -- a string containing an html document
 95     # outstream -- an object having method 'write(data)'; if nil then the
 96     #               return value is a string containing the PDF.
 97     #
 98     def convertHtml(content, outstream=nil)
 99         return call_api_urlencoded('/api/pdf/convert/html/', content, outstream)
100     end
101 
102     #
103     # Converts an html file.
104     #
105     # fpath      -- a path to an html file
106     # outstream -- an object having method 'write(data)'; if nil then the
107     #               return value is a string containing the PDF.
108     #
109     def convertFile(fpath, outstream=nil)
110         return post_multipart(fpath, outstream)
111     end
112 
113     #
114     # Returns the number of available conversion tokens.
115     #
116     def numTokens()
117       uri = '/api/user/%s/tokens/' % @fields['username']
118       return Integer(call_api_urlencoded(uri))
119     end
120 
121     def useSSL(use_ssl)
122         @use_ssl = use_ssl
123         @api_uri = use_ssl ? HTTPS_API_URI : HTTP_API_URI
124     end
125 
126     def setUsername(username)
127         @fields['username'] = username
128     end
129 
130     def setApiKey(key)
131         @fields['key'] = key
132     end
133 
134     def setPageWidth(value)
135         @fields['width'] = value
136     end
137 
138     def setPageHeight(value)
139         @fields['height'] = value
140     end
141 
142     def setHorizontalMargin(value)
143         @fields['margin_right'] = @fields['margin_left'] = value.to_s()
144     end
145 
146     def setVerticalMargin(value)
147         @fields['margin_top'] = @fields['margin_bottom'] = value.to_s()
148     end
149 
150     def setPageMargins(top, right, bottom, left)
151         @fields['margin_top'] = top.to_s()
152         @fields['margin_right'] = right.to_s()
153         @fields['margin_bottom'] = bottom.to_s()
154         @fields['margin_left'] = left.to_s()
155     end
156 
157 
158     def setEncrypted(val=true)
159         @fields['encrypted'] = val
160     end
161 
162     def setUserPassword(pwd)
163         @fields['user_pwd'] = pwd
164     end
165 
166     def setOwnerPassword(pwd)
167         @fields['owner_pwd'] = pwd
168     end
169 
170     def setNoPrint(val=true)
171         @fields['no_print'] = val
172     end
173 
174     def setNoModify(val=true)
175         @fields['no_modify'] = val
176     end
177 
178     def setNoCopy(val=true)
179         @fields['no_copy'] = val
180     end
181 
182     def setPageLayout(value)
183         assert { value > 0 and value <= 3 }
184         @fields['page_layout'] = value
185     end
186 
187     def setPageMode(value)
188         assert { value > 0 and value <= 3 }
189         @fields['page_mode'] = value
190     end
191 
192 
193     def setFooterText(value)
194         @fields['footer_text'] = value
195     end
196 
197     def enableImages(value=true)
198         @fields['no_images'] = (not value)
199     end
200 
201     def enableBackgrounds(value=true)
202         @fields['no_backgrounds'] = (not value)
203     end
204 
205     def setHtmlZoom(value)
206         @fields['html_zoom'] = value
207     end
208 
209     def enableJavaScript(value=true)
210         @fields['no_javascript'] = (not value)
211     end
212 
213     def enableHyperlinks(value=true)
214         @fields['no_hyperlinks'] = (not value)
215     end
216 
217     def setDefaultTextEncoding(value)
218         @fields['text_encoding'] = value
219     end
220 
221     def usePrintMedia(value=true)
222         @fields['use_print_media'] = value
223     end
224 
225     def setMaxPages(value)
226         @fields['max_pages'] = value
227     end
228 
229     def enablePdfcrowdLogo(value=true)
230         @fields['pdfcrowd_logo'] = value
231     end
232 
233     def setInitialPdfZoomType(value)
234         assert { value>0 and value<=3 }
235         @fields['initial_pdf_zoom_type'] = value
236     end
237 
238     def setInitialPdfExactZoom(value)
239         @fields['initial_pdf_zoom_type'] = 4
240         @fields['initial_pdf_zoom'] = value
241     end
242 
243     def setAuthor(value)
244         @fields['author'] = value
245     end
246 
247     def setFailOnNon200(value)
248         @fields['fail_on_non200'] = value
249     end
250 
251     def setPdfScalingFactor(value)
252         @fields['pdf_scaling_factor'] = value
253     end
254 
255     def setFooterHtml(value)
256         @fields['footer_html'] = value
257     end
258 
259     def setFooterUrl(value)
260         @fields['footer_url'] = value
261     end
262 
263     def setHeaderHtml(value)
264         @fields['header_html'] = value
265     end
266 
267     def setHeaderUrl(value)
268         @fields['header_url'] = value
269     end
270 
271     def setPageBackgroundColor(value)
272         @fields['page_background_color'] = value
273     end
274 
275     def setTransparentBackground(value=true)
276         @fields['transparent_background'] = value
277     end
278 
279     def setPageNumberingOffset(value)
280         @fields['page_numbering_offset'] = value
281     end
282 
283     def setHeaderFooterPageExcludeList(value)
284         @fields['header_footer_page_exclude_list'] = value
285     end
286 
287     def setWatermark(url, offset_x=0, offset_y=0)
288         @fields["watermark_url"] = value
289         @fields["watermark_offset_x"] = offset_x
290         @fields["watermark_offset_y"] = offset_y
291     end
292 
293     def setWatermarkRotation(angle)
294         @fields["watermark_rotation"] = angle
295     end
296 
297     def setWatermarkInBackground(val=True)
298         @fields["watermark_in_background"] = val
299     end
300 
301 
302 
303     # ----------------------------------------------------------------------
304     #
305     #                      Private stuff
306     #
307 
308     private
309 
310     def create_http_obj()
311       if @use_ssl
312         require 'net/https' #apt-get install libopenssl-ruby
313         http = Net::HTTP.new(@hostname, $api_https_port)
314         # OpenSSL::SSL::VERIFY_PEER fails here:
315         # ... certificate verify failed ...
316         http.verify_mode = OpenSSL::SSL::VERIFY_NONE
317         http.use_ssl = @use_ssl
318       else
319         http = Net::HTTP.new(@hostname, $api_http_port)
320       end
321       return http
322     end
323 
324     def call_api_urlencoded(path, src=nil, out_stream=nil)
325       request = Net::HTTP::Post.new(path)
326       request.set_form_data(rename_post_data({'src' => src}))
327       return call_api(request, out_stream)
328     end
329 
330 
331     def call_api(request, out_stream)
332       http = create_http_obj()
333       begin
334         http.start {|conn|
335           conn.request(request) {|response|
336             case response
337             when Net::HTTPSuccess
338               if out_stream
339                 response.read_body do |chunk|
340                   out_stream.write(chunk)
341                 end
342               else
343                 return response.body
344               end
345             else
346               raise Error.new(response.body, response.code)
347             end
348           }
349         }
350       rescue SystemCallError => why
351         raise Error.new("#{why}\n")
352       end
353     end
354 
355     def rename_post_data(extra_data={})
356         result = {}
357         extra_data.each { |key, val| result[key] = val if val }
358         @fields.each { |key, val| result[key] = val if val }
359         result
360     end
361 
362     def encode_multipart_post_data(filename)
363         boundary = '----------ThIs_Is_tHe_bOUnDary_$'
364         body = []
365         for field, value in @fields
366             body << '--' + boundary << 'Content-Disposition: form-data; name="%s"' % field << '' << value.to_s if value
367         end
368         # filename
369         body << '--' + boundary
370         body << 'Content-Disposition: form-data; name="src"; filename="%s"' % filename
371         mime_type = 'application/octet-stream'
372         body << 'Content-Type: ' + mime_type
373         body << ''
374         body << open(filename).read()
375         # finalize
376         body << '--' + boundary + '--'
377         body << ''
378         body = body.join("\r\n")
379         content_type = 'multipart/form-data; boundary=%s' % boundary
380         return content_type, body
381     end
382 
383     def post_multipart(fpath, out_stream)
384       req = Net::HTTP::Post.new('/api/pdf/convert/html/')
385       req.content_type, req.body = encode_multipart_post_data(fpath)
386       return call_api(req, out_stream)
387     end
388 end
389 end
390 
391 
392 def assert
393   raise "Assertion failed !" unless yield
394 end
395 
396 
397 $api_hostname = 'pdfcrowd.com'
398 $api_http_port = 80
399 $api_https_port = 443
400 
401 
402 API_SELECTOR_BASE = '/api/'
403 HTTP_API_URI = "http://#{$api_hostname}#{API_SELECTOR_BASE}"
404 HTTPS_API_URI = "https://#{$api_hostname}#{API_SELECTOR_BASE}"
405 
406 
407 
408 # ---------------------------------------------------------------------------
409 #
410 #                                   Test
411 #
412 
413 if __FILE__ == $0
414   if ARGV.length < 2
415     print "usage: ruby pdfcrowd.rb username apikey [hostname [http-port https-port]]\n"
416     exit 1
417   end
418 
419   if ARGV.length > 2
420     $api_hostname=ARGV[2]
421   end
422 
423   if ARGV.length == 5
424     $api_http_port=ARGV[3]
425     $api_https_port=ARGV[4]
426   end
427 
428   print "using %s ports %d %d\n" % [$api_hostname, $api_http_port, $api_https_port]
429 
430   some_html="<html><body>Uploaded content!</body></html>"
431   Dir.chdir(File.dirname($0))
432   $test_dir = '../test_files'
433 
434   def out_stream(name, use_ssl)
435     fname = $test_dir + "/out/rb_client_#{name}"
436     if use_ssl
437       fname = fname + '_ssl'
438     end
439     return open(fname + '.pdf', 'wb')
440   end
441 
442   client = Pdfcrowd::Client.new(ARGV[0], ARGV[1])
443   for use_ssl in [false, true]
444     client.useSSL(use_ssl)
445     begin
446       ntokens = client.numTokens()
447       client.convertURI('http://www.web-to-pdf.com', out_stream('uri', use_ssl))
448       client.convertHtml(some_html, out_stream('content', use_ssl))
449       client.convertFile("#{$test_dir}/in/simple.html", out_stream('upload', use_ssl))
450       client.convertFile("#{$test_dir}/in/archive.tar.gz", out_stream('archive', use_ssl))
451       after_tokens = client.numTokens()
452       if ntokens-4 != after_tokens
453         raise RuntimeError, 'got unexpected number of tokens'
454       end
455       print "remaining tokens: %d \n" % client.numTokens()
456     rescue Pdfcrowd::Error => why
457       print 'FAILED: ', why
458       exit(1)
459     end
460   end
461   # test individual methods
462   begin
463     for method, arg in [[:setPageWidth, 500],
464                         [:setPageHeight, -1],
465                         [:setHorizontalMargin, 0],
466                         [:setEncrypted, true],
467                         [:setUserPassword, 'userpwd'],
468                         [:setOwnerPassword, 'ownerpwd'],
469                         [:setNoPrint, true],
470                         [:setNoModify, true],
471                         [:setNoCopy, true],
472                         [:setAuthor, "ruby test"],
473                         [:setFailOnNon200, true],
474                         [:setPageLayout, Pdfcrowd::CONTINUOUS],
475                         [:setPageMode, Pdfcrowd::FULLSCREEN],
476                         [:setFooterText, '%p/%n | source %u'],
477                         [:enableImages, false],
478                         [:enableBackgrounds, false],
479                         [:setHtmlZoom, 300],
480                         [:enableJavaScript, false],
481                         [:enableHyperlinks, false],
482                         [:setDefaultTextEncoding, 'iso-8859-1'],
483                         [:usePrintMedia, true],
484                         [:setMaxPages, 1],
485                         [:enablePdfcrowdLogo, true],
486                         [:setInitialPdfZoomType, Pdfcrowd::FIT_PAGE],
487                         [:setInitialPdfExactZoom, 113],
488                         [:setFooterHtml, '<b>bold</b> and <i>italic</i> <img src="http://s3.pdfcrowd.com/test-resources/logo175x30.png" />'],
489                         [:setFooterUrl, 'http://s3.pdfcrowd.com/test-resources/footer.html'],
490                         [:setHeaderHtml, 'page %p out of %n'],
491                         [:setHeaderUrl, 'http://s3.pdfcrowd.com/test-resources/header.html'],
492                         [:setPdfScalingFactor, 0.5],
493                         [:setPageBackgroundColor, 'ee82EE'],
494                         [:setTransparentBackground, true]]
495       client = Pdfcrowd::Client.new(ARGV[0], ARGV[1])
496       client.setVerticalMargin("1in")
497       client.send(method, arg)
498       client.convertFile("#{$test_dir}/in/simple.html", out_stream(method.id2name.downcase(), false))
499     end
500   rescue Pdfcrowd::Error => why
501     print 'FAILED: ', why
502     exit(1)
503   end
504 
505   # 4 margins
506   client = Pdfcrowd::Client.new(ARGV[0], ARGV[1])
507   client.setPageMargins('0.25in', '0.5in', '0.75in', '1.0in')
508   client.convertHtml('<div style="background-color:red;height:100%">4 margins</div>', out_stream('4margins', false))
509 
510 
511 end