Module pdfimposer

Source Code for Module pdfimposer

   1  #!/usr/bin/env python 
   2  # -*- coding: UTF-8 -*- 
   3   
   4  ######################################################################## 
   5  # 
   6  # pdfimposer - achieve some basic imposition on PDF documents 
   7  # Copyright (C) 2008-2019 Kjö Hansi Glaz <kjo@a4nancy.net.eu.org> 
   8  # 
   9  # This program is  free software; you can redistribute  it and/or modify 
  10  # it under the  terms of the GNU General Public  License as published by 
  11  # the Free Software Foundation; either  version 3 of the License, or (at 
  12  # your option) any later version. 
  13  # 
  14  # This program  is distributed in the  hope that it will  be useful, but 
  15  # WITHOUT   ANY  WARRANTY;   without  even   the  implied   warranty  of 
  16  # MERCHANTABILITY  or FITNESS  FOR A  PARTICULAR PURPOSE.   See  the GNU 
  17  # General Public License for more details. 
  18  # 
  19  # You should have received a copy of the GNU General Public License 
  20  # along with this program.  If not, see <http://www.gnu.org/licenses/>. 
  21  # 
  22  ######################################################################## 
  23   
  24  ######################################################################## 
  25  # 
  26  # pdfimposer.py 
  27  # 
  28  # This python module enables to change PDF page layout. It is the backend 
  29  # of BookletImposer, but is designed to be easily usable by from any python 
  30  # script. 
  31  # 
  32  ######################################################################## 
  33   
  34  """ 
  35  Converts PDF documents between different page layouts. 
  36   
  37  This module enables to: 
  38   - convert linear (page by page) PDF documents to booklets; 
  39   - revert booklets to linear documents; 
  40   - reduce multiple input PDF pages and put them on one single output page. 
  41   
  42  The `StreamConverter` class works on StreamIO, while the `FileConverter` 
  43  class works on files. 
  44   
  45  Some convenience functions are also provided. 
  46  """ 
  47  # XXX: File should be ASCII 
  48   
  49  from abc import ABCMeta, abstractmethod 
  50   
  51  import re 
  52  import os 
  53  import types 
  54   
  55  import PyPDF2 as pyPdf 
  56   
  57  # XXX: Fix these translatable strings 
  58  try: 
  59      _ 
  60  except NameError: 
61 - def _(x): return x
62 63 __docformat__ = "restructuredtext"
64 65 ######################################################################## 66 67 # CONSTANTS 68 69 70 -class PageOrientation:
71 """The page orientation constants""" 72 PORTRAIT = False 73 """The portrait orientation""" 74 LANDSCAPE = True 75 """The lanscape orientation"""
76
77 78 -class TwoSidedFlip:
79 """Which paper edge will the flip occur on when printing?""" 80 SHORT_EDGE = _("short-edge flip") 81 """Pages will be flipped on the short edge""" 82 LONG_EDGE = _("long-edge flip") 83 """Pages will be flipped on the long edge"""
84
85 ######################################################################## 86 87 88 -class PdfConvError(Exception):
89 """ 90 The base class for all exceptions raised by PdfImposer. 91 92 The attribute "message" contains a message explaining the cause of the 93 error. 94 """
95 - def __init__(self, message=None):
96 Exception.__init__(self) 97 self.message = message
98
99 ######################################################################## 100 101 102 -class MismachingOrientationsError(PdfConvError):
103 """ 104 This exception is raised if the required layout is incompatible with 105 the input page orientation. 106 107 The attribute "message" contains the problematic layout. 108 """
109 - def __str__(self):
110 return _("The layout %s is incompatible with the input page " 111 "orientation") % self.message
112
113 ######################################################################## 114 115 116 -class UnknownFormatError(PdfConvError):
117 """ 118 This exception is raised when the user tries to set an unknown page 119 format. 120 121 The attribute "message" contains the problematic format. 122 """
123 - def __str__(self):
124 return _('The page format "%s" is unknown') % self.message
125
126 ######################################################################## 127 128 129 -class UserInterruptError(PdfConvError):
130 """ 131 This exception is raised when the user interrupts the conversion. 132 """
133 - def __str__(self):
134 return _('User interruption') % self.message
135
136 ######################################################################## 137 138 139 -class AbstractConverter(object, metaclass=ABCMeta):
140 """ 141 The base class for all pdfimposer converter classes. 142 143 It is an abstract class, with some abstract functions which should be 144 overriden : 145 - get_input_height 146 - get_input_width 147 - get_page_count 148 - bookletize 149 - linearize 150 - reduce 151 """ 152 153 page_formats = { 154 "A3": (841, 1190), # noqa: F601 155 "A3": (842, 1192), # noqa: F601 156 "A4": (595, 841), # noqa: F601 157 "A4": (595, 842), # noqa: F601 158 "A5": (420, 595), 159 "Tabloid": (792, 1224), 160 "Letter": (612, 792), 161 "Legal": (612, 1008), 162 } 163
164 - def __init__(self, 165 layout='2x1', 166 format='A4', 167 flip=TwoSidedFlip.SHORT_EDGE, 168 copy_pages=False):
169 """ 170 Create an AbstractConverter instance. 171 172 :Parameters: 173 - `layout` The layout of input pages on one output page 174 (see set_layout). 175 - `format` The format of the output paper (see 176 set_output_format). 177 - `flip` Render the output booklet for two-sided printing with 178 flipping on the short (default) or long edge. Long-edge flip 179 will result in even-numbered output pages being upside-down. 180 - `copy_pages` Wether the same group of input pages 181 shoud be copied to fill the corresponding output page or not 182 (see set_copy_pages). 183 """ 184 self.layout = None 185 self.output_format = None 186 self.output_orientation = None 187 188 self.set_layout(layout) 189 self.set_output_format(format) 190 self.set_copy_pages(copy_pages) 191 self.set_two_sided_flip(flip) 192 193 def default_progress_callback(msg, prog): 194 print("%s (%i%%)" % (msg, prog*100))
195 196 self.set_progress_callback(default_progress_callback)
197 198 # GETTERS AND SETTERS 199 # =================== 200
201 - def set_output_height(self, height):
202 """ 203 Set the height of the output page. 204 205 :Parameters: 206 - `height` The height of the output page in defalut user 207 space units. 208 """ 209 self.__output_height = int(height)
210
211 - def get_output_height(self):
212 """ 213 Get the height of the output page. 214 215 :Returns: 216 The height of the output page in defalut user space units. 217 """ 218 return self.__output_height
219
220 - def set_output_width(self, width):
221 """ 222 Set the width of the output page. 223 224 - `width` The height of the output page in defalut user space units. 225 """ 226 self.__output_width = int(width)
227
228 - def get_output_width(self):
229 """ 230 Get the width of the output page. 231 232 :Returns: 233 The width of the output page in defalut user space units. 234 """ 235 return self.__output_width
236
237 - def set_pages_in_width(self, num):
238 """ 239 Set the number of input pages to put in the width on one output page. 240 241 :Parameters: 242 - `num` An integer representing the number of pages in width. 243 """ 244 self.__pages_in_width = int(num)
245
246 - def get_pages_in_width(self):
247 """ 248 Get the number of input pages to put in the width on one output page. 249 250 :Returns: 251 An integer representing the number of pages in width. 252 """ 253 return self.__pages_in_width
254
255 - def set_pages_in_height(self, num):
256 """ 257 Set the number of input pages to put in the height on one output page. 258 259 :Parameters: 260 - `num` An integer representing the number of pages in height. 261 """ 262 self.__pages_in_height = int(num)
263
264 - def get_pages_in_height(self):
265 """ 266 Get the number of input pages to put in the height on one output page. 267 268 :Returns: 269 An integer representing the number of pages in height. 270 """ 271 return self.__pages_in_height
272
273 - def set_copy_pages(self, copy_pages):
274 """ 275 Set wether the same group of input pages shoud be copied to fill the 276 corresponding output page or not. 277 278 :Parameters: 279 - `copy_pages` True to get copies of the same group of input page 280 on one output page. False to get diffrent groups of 281 input pages on one output page. 282 """ 283 self.__copy_pages = bool(copy_pages)
284
285 - def get_copy_pages(self):
286 """ 287 Get wether the same group of input pages will be copied to fill the 288 corresponding output page or not. 289 290 :Returns: 291 True if copies of the same group of input page will get 292 copied on one output page. False if diffrent groups of 293 input pages will go on one output page. 294 """ 295 return self.__copy_pages
296
297 - def set_progress_callback(self, progress_callback):
298 """ 299 Register a progress callback function. 300 301 Register a callback function that will be called to inform on the 302 progress of the conversion. 303 304 :Parameters: 305 - `progress_callback` The callback function which is called to 306 return the conversion progress. Its signature 307 must be : a string for the progress message; 308 a number in the range [0, 1] for the progress. 309 """ 310 assert(isinstance(progress_callback, types.FunctionType)) 311 self.__progress_callback = progress_callback
312
313 - def get_progress_callback(self):
314 """ 315 Get the progress callback function. 316 317 Get the callback function that will be called to inform on the 318 progress of the conversion. 319 320 :Returns: 321 The callback function which is called to 322 return the conversion progress. 323 """ 324 return self.__progress_callback
325
326 - def set_two_sided_flip(self, flip):
327 """ 328 Set the edge which the paper will be flipped on when printed. Defaults 329 to TwoSidedFlip.SHORT_EDGE, where all the output pages are the right 330 way up. If your printer can only flip over the long edge, set this to 331 TwoSidedFlip.LONG_EDGE. The imposer will rotate all even output pages 332 180° to compensate. 333 334 :Parameters: 335 - `flip` Either TwoSidedFlip.SHORT_EDGE or TwoSidedFlip.LONG_EDGE. 336 """ 337 assert(flip in (TwoSidedFlip.SHORT_EDGE, TwoSidedFlip.LONG_EDGE)) 338 self.__two_sided_flip = flip
339
340 - def get_two_sided_flip(self):
341 """ 342 Get the edge which the paper will be flipped on when printed. 343 344 :Returns: 345 Either TwoSidedFlip.SHORT_EDGE or TwoSidedFlip.LONG_EDGE. 346 """ 347 return self.__two_sided_flip
348 349 # SOME GETTERS THAT CALCULATE THE VALUE THEY RETURN FROM OTHER VALUES 350 # ===================================================================
351 - def get_input_size(self):
352 """ 353 Return the page size of the input document. 354 355 :Returns: 356 A tuple (width, height) representing the page size of 357 the input document expressed in default user space units. 358 """ 359 return (self.get_input_width(), self.get_input_height())
360 361 @abstractmethod
362 - def get_input_height(self):
363 """ 364 Return the page height of the input document. 365 366 :Returns: 367 The page height of the input document expressed in default 368 user space units. 369 """ 370 raise NotImplementedError("get_input_height must be implemented in " 371 "a subclass.")
372 373 @abstractmethod
374 - def get_input_width(self):
375 """ 376 Return the page width of the input document. 377 378 :Returns: 379 The page width of the input document expressed in default 380 user space units. 381 """ 382 raise NotImplementedError("get_input_width must be implemented in " 383 "a subclass.")
384
385 - def get_input_orientation(self):
386 """ 387 Return the page orientation of the input document. 388 389 :Returns: 390 A constant from PageOrientation, or None (if square paper). 391 """ 392 if self.get_input_height() > self.get_input_width(): 393 return PageOrientation.PORTRAIT 394 elif self.get_input_height() < self.get_input_width(): 395 return PageOrientation.LANDSCAPE 396 else: 397 # XXX: is square 398 return None
399
400 - def set_layout(self, layout):
401 """ 402 Set the layout of input pages on one output page. 403 404 :Parameters: 405 - `layout` A string of the form WxH, or a tuple or list of the form 406 (W, H), where W is the number of input pages to put on the width of 407 the output page and H is the number of input pages to put in the 408 height of an output page. 409 """ 410 if isinstance(layout, str): 411 pages_in_width, pages_in_height = layout.split('x') 412 elif (isinstance(layout, (tuple, list)) 413 and (len(layout) == 2)): 414 pages_in_width, pages_in_height = layout 415 else: 416 raise ValueError 417 self.set_pages_in_width(int(pages_in_width)) 418 self.set_pages_in_height(int(pages_in_height))
419
420 - def get_layout(self):
421 """ 422 Return the layout of input pages on one output page. 423 424 :Returns: 425 A string of the form WxH, where W is the number of input pages 426 to put on the width of the output page and H is the number of 427 input pages to put in the height of an output page. 428 """ 429 return str(self.get_pages_in_width()) + \ 430 'x' + str(self.get_pages_in_height())
431
432 - def get_pages_in_sheet(self):
433 """ 434 Calculate the number of input page that will be put on one output page. 435 436 :Returns: 437 An integer representing the number of input pages on one 438 output page. 439 """ 440 return self.get_pages_in_width() * self.get_pages_in_height()
441
442 - def set_output_format(self, format):
443 """ 444 Set the format of the output paper. 445 446 :Parameters: 447 - `format` A string representing name ot the the desired paper 448 format, among the keys of page_formats (e.g. A3, A4, A5). 449 450 :Raises UnknonwFormatError: if the given paper format is not 451 recognized. 452 """ 453 try: 454 width, height = AbstractConverter.page_formats[format] 455 self.set_output_height(height) 456 self.set_output_width(width) 457 except KeyError: 458 raise UnknownFormatError(format)
459
460 - def get_output_format(self):
461 """ 462 Return the format of the output paper. 463 464 :Returns: 465 A string representing the name of the paper format 466 (e.g. A3, A4, A5). 467 """ 468 for output_format in list(AbstractConverter.page_formats.keys()): 469 if AbstractConverter.page_formats[output_format] == \ 470 (self.get_output_width, self.get_output_height): 471 return output_format
472
473 - def get_input_format(self):
474 """ 475 Return the format of the input paper 476 477 :Returns: 478 A string representing the name of the paper format 479 (e.g. A3, A4, A5). 480 """ 481 width, height = self.get_input_size() 482 if self.get_input_orientation() == PageOrientation.LANDSCAPE: 483 size = height, width 484 else: 485 size = width, height 486 for k in list(self.page_formats.keys()): 487 if self.page_formats[k] == size: 488 return k
489 490 @abstractmethod
491 - def get_page_count(self):
492 """ 493 Return the number of pages of the input document. 494 495 :Returns: 496 The number of pages of the input document. 497 """ 498 raise NotImplementedError("get_page_count must be implemented in " 499 "a subclass.")
500
501 - def get_reduction_factor(self):
502 """ 503 Calculate the reduction factor. 504 505 :Returns: 506 The reduction factor to be applied to an input page to 507 obtain its size on the output page. 508 """ 509 return float(self.get_output_width()) / \ 510 (self.get_pages_in_width() * self.get_input_width())
511
512 - def get_increasing_factor(self):
513 """ 514 Calculate the increasing factor. 515 516 :Returns: 517 The increasing factor to be applied to an input page to 518 obtain its size on the output page. 519 """ 520 return float(self.get_pages_in_width() * self.get_output_width()) / \ 521 self.get_input_width()
522
523 - def _set_output_orientation(self, output_orientation):
524 """ 525 Set the orientation of the output paper. 526 527 WARNING: in the current implementation, the orientation of the 528 output paper may be automatically adjusted, even if ti was set 529 manually. 530 531 :Parameters: 532 - `output_orientation` A constant from PageOrientation, 533 or None (if square paper). 534 """ 535 output_orientation = bool(output_orientation) 536 537 w = self.get_output_width() 538 h = self.get_output_height() 539 540 if (output_orientation == PageOrientation.PORTRAIT and w > h) or \ 541 (output_orientation == PageOrientation.LANDSCAPE and h > w): 542 self.set_output_height(w) 543 self.set_output_width(h)
544
545 - def _get_output_orientation(self):
546 """ 547 Return the orientation of the output paper. 548 549 WARNING: in the current implementation, the orientation of the 550 output paper may be automatically adjusted, even if it was set 551 manually. 552 553 :Returns: 554 A constant among from PageOrientation, or None (if square paper). 555 """ 556 if self.get_output_height() > self.get_output_width(): 557 return PageOrientation.PORTRAIT 558 elif self.get_output_height() < self.get_output_width(): 559 return PageOrientation.LANDSCAPE 560 else: 561 return None
562 563 # CONVERSION FUNCTIONS 564 # ==================== 565 566 @abstractmethod
567 - def bookletize(self):
568 """ 569 Convert a linear document to a booklet. 570 571 Convert a linear document to a booklet, arranging the pages as 572 required. 573 """ 574 raise NotImplementedError("bookletize must be implemented in a " 575 "subclass.")
576 577 @abstractmethod
578 - def linearize(self):
579 """ 580 Convert a booklet to a linear document. 581 582 Convert a booklet to a linear document, arranging the pages as 583 required. 584 """ 585 raise NotImplementedError("linearize must be implemented in a " 586 "subclass.")
587 588 @abstractmethod
589 - def reduce(self):
590 """ 591 Put multiple input pages on one output page. 592 """ 593 raise NotImplementedError("reduce must be implemented in a subclass.")
594
595 ######################################################################## 596 597 598 -class StreamConverter(AbstractConverter):
599 """ 600 This class performs conversions on file-like objects (e.g. a StreamIO). 601 """ 602
603 - def __init__(self, 604 input_stream, 605 output_stream, 606 layout='2x1', 607 format='A4', 608 flip=TwoSidedFlip.SHORT_EDGE, 609 copy_pages=False):
610 """ 611 Create a StreamConverter. 612 613 :Parameters: 614 - `input_stream` The file-like object from which tne input PDF 615 document should be read. 616 - `output_stream` The file-like object to which tne output PDF 617 document should be written. 618 - `layout` The layout of input pages on one output page (see 619 set_layout). 620 - `format` The format of the output paper (see set_output_format). 621 - `copy_pages` Wether the same group of input pages shoud be copied 622 to fill the corresponding output page or not (see 623 set_copy_pages). 624 """ 625 626 AbstractConverter.__init__(self, layout, format, 627 flip, copy_pages) 628 self._output_stream = output_stream 629 self._input_stream = input_stream 630 631 self._inpdf = pyPdf.PdfFileReader(input_stream)
632
633 - def get_input_height(self):
634 page = self._inpdf.getPage(0) 635 height = page.mediaBox.getHeight() 636 return int(height)
637
638 - def get_input_width(self):
639 page = self._inpdf.getPage(0) 640 width = page.mediaBox.getWidth() 641 return int(width)
642
643 - def get_page_count(self):
644 return self._inpdf.getNumPages()
645
646 - def __fix_page_orientation(self, cmp):
647 """ 648 Adapt the output page orientation. 649 650 :Parameters: 651 - `cmp` A comparator function. Takes: number of pages on one 652 direction (int), number of pages on the other direction 653 (int). Must return: the boolean result of the comparaison. 654 655 :Raises MismachingOrientationsError: if the required layout is 656 incompatible with the input page orientation. 657 """ 658 if cmp(self.get_pages_in_width(), self.get_pages_in_height()): 659 if self.get_input_orientation() == PageOrientation.PORTRAIT: 660 if self._get_output_orientation() == PageOrientation.PORTRAIT: 661 self._set_output_orientation(PageOrientation.LANDSCAPE) 662 else: 663 raise MismachingOrientationsError(self.get_layout()) 664 elif cmp(self.get_pages_in_height(), self.get_pages_in_width()): 665 if self.get_input_orientation() == PageOrientation.LANDSCAPE: 666 if self._get_output_orientation() == PageOrientation.LANDSCAPE: 667 self._set_output_orientation(PageOrientation.PORTRAIT) 668 else: 669 raise MismachingOrientationsError(self.get_layout()) 670 else: 671 if self.get_input_orientation() == PageOrientation.LANDSCAPE: 672 if self._get_output_orientation() == PageOrientation.PORTRAIT: 673 self._set_output_orientation(PageOrientation.LANDSCAPE) 674 else: 675 if self._get_output_orientation() == PageOrientation.LANDSCAPE: 676 self._set_output_orientation(PageOrientation.PORTRAIT)
677
679 """ 680 Adapt the output page orientation to impose 681 """ 682 def __is_two_times(op1, op2): 683 if op1 == 2 * op2: 684 return True 685 else: 686 return False
687 self.__fix_page_orientation(__is_two_times)
688
689 - def __fix_page_orientation_for_linearize(self):
690 """ 691 Adapt the output page orientation to linearize 692 """ 693 def __is_half(op1, op2): 694 if op2 == 2 * op1: 695 return True 696 else: 697 return False
698 self.__fix_page_orientation(__is_half) 699
700 - def __get_sequence_for_booklet(self):
701 """ 702 Calculates the page sequence to impose a booklet. 703 704 :Returns: 705 A list of page numbers representing sequence of pages to 706 impose a booklet. The list might contain None where blank 707 pages should be added. 708 """ 709 n_pages = self.get_page_count() 710 pages = list(range(0, n_pages)) 711 712 # Check for missing pages 713 if (n_pages % 4) == 0: 714 n_missing_pages = 0 715 else: 716 n_missing_pages = 4 - (n_pages % 4) 717 # XXX: print a warning if input page number not diviable by 4? 718 719 # Add reference to the missing empty pages to the pages sequence 720 for missing_page in range(0, n_missing_pages): 721 pages.append(None) 722 723 def append_and_copy(list, pages): 724 """ 725 Append pages to the list and copy them if needed 726 """ 727 if self.get_copy_pages(): 728 for i in range(int(self.get_pages_in_sheet() / 2)): 729 list.extend(pages) 730 else: 731 list.extend(pages)
732 733 # Arranges the pages in booklet order 734 sequence = [] 735 while pages: 736 append_and_copy(sequence, [pages.pop(), pages.pop(0)]) 737 append_and_copy(sequence, [pages.pop(0), pages.pop()]) 738 739 return sequence 740
741 - def __get_sequence_for_linearize(self, booklet=True):
742 """ 743 Calculates the page sequence to lineraize a booklet. 744 745 :Returns: 746 A list of page numbers representing sequence of pages to 747 be extracted to linearize a booklet. 748 """ 749 # XXX: is booklet argument useful? 750 751 def append_and_remove_copies(list, pages): 752 sequence.extend(pages) 753 if self.get_copy_pages(): 754 for copy in range(self.get_pages_in_sheet() - len(pages)): 755 sequence.append(None)
756 757 if booklet: 758 sequence = [] 759 try: 760 for i in range(0, self.get_page_count() * 761 self.get_pages_in_sheet(), 4): 762 append_and_remove_copies(sequence, 763 [int(i / 2), int(i / 2)]) 764 append_and_remove_copies(sequence, 765 [int(i / 2 + 1), int(i / 2 + 2)]) 766 except IndexError: 767 # XXX: Print a warning 768 pass 769 else: 770 sequence = list(range(0, 771 self.get_page_count() * 772 self.get_pages_in_sheet())) 773 return sequence 774
775 - def __get_sequence_for_reduce(self):
776 """ 777 Calculates the page sequence to linearly impose reduced pages. 778 779 :Returns: 780 A list of page numbers representing sequence of pages to 781 impose reduced pages. The list might contain None where blank 782 pages should be added. 783 """ 784 if self.get_copy_pages(): 785 sequence = [] 786 for page in range(self.get_page_count()): 787 for copy in range(self.get_pages_in_sheet()): 788 sequence.append(page) 789 else: 790 sequence = list(range(self.get_page_count())) 791 if len(sequence) % self.get_pages_in_sheet() != 0: 792 for missing_page in range( 793 self.get_pages_in_sheet() - 794 (len(sequence) % self.get_pages_in_sheet())): 795 sequence.append(None) 796 return sequence
797
798 - def __write_output_stream(self, outpdf):
799 """ 800 Writes output to the stream. 801 802 :Parameters: 803 - `outpdf` the object to write to the stream. This object must have a 804 write() method. 805 """ 806 self.get_progress_callback()(_("writing converted file"), 1) 807 outpdf.write(self._output_stream) 808 self.get_progress_callback()(_("done"), 1)
809
810 - def __do_reduce(self, sequence):
811 """ 812 Do actual imposition job. 813 814 :Parameters: 815 - `sequence` a list of page numbers repersenting the sequence of 816 pages to impose. None means blank page. 817 818 """ 819 # XXX: Translated progress messages 820 self.__fix_page_orientation_for_booklet() 821 outpdf = pyPdf.PdfFileWriter() 822 823 current_page = 0 824 output_page = 0 825 while current_page < len(sequence): 826 self.get_progress_callback()( 827 _("creating page %i") % 828 ((current_page + self.get_pages_in_sheet()) / 829 self.get_pages_in_sheet()), 830 float(current_page) / len(sequence) 831 ) 832 page = outpdf.addBlankPage(self.get_output_width(), 833 self.get_output_height()) 834 for vert_pos in range(0, self.get_pages_in_height()): 835 for horiz_pos in range(0, self.get_pages_in_width()): 836 if (current_page < len(sequence) and 837 sequence[current_page] is not None): 838 page.mergeScaledTranslatedPage( 839 self._inpdf.getPage(sequence[current_page]), 840 self.get_reduction_factor(), 841 horiz_pos*self.get_output_width() / 842 self.get_pages_in_width(), 843 self.get_output_height() - ( 844 (vert_pos + 1) * self.get_output_height() / 845 self.get_pages_in_height()) 846 ) 847 current_page += 1 848 if (self.get_two_sided_flip() == TwoSidedFlip.LONG_EDGE and 849 output_page % 2): 850 page.rotateClockwise(180) 851 page.compressContentStreams() 852 output_page += 1 853 self.__write_output_stream(outpdf)
854
855 - def bookletize(self):
856 self.__do_reduce(self.__get_sequence_for_booklet())
857
858 - def reduce(self):
859 self.__do_reduce(self.__get_sequence_for_reduce())
860
861 - def linearize(self, booklet=True):
862 # XXX: Translated progress messages 863 # XXX: Wrong zoom factor e.g. when layout is 2x1 864 865 self.__fix_page_orientation_for_linearize() 866 sequence = self.__get_sequence_for_linearize() 867 outpdf = pyPdf.PdfFileWriter() 868 869 output_page = 0 870 for input_page in range(0, self.get_page_count()): 871 for vert_pos in range(0, self.get_pages_in_height()): 872 for horiz_pos in range(0, self.get_pages_in_width()): 873 if sequence[output_page] is not None: 874 self.get_progress_callback()( 875 _("extracting page %i") % (output_page + 1), 876 float(output_page) / len(sequence)) 877 page = outpdf.insertBlankPage(self.get_output_width(), 878 self.get_output_height(), 879 sequence[output_page]) 880 page.mergeScaledTranslatedPage( 881 self._inpdf.getPage(input_page), 882 self.get_increasing_factor(), 883 - horiz_pos * self.get_output_width(), 884 (vert_pos - self.get_pages_in_height() + 1) * 885 self.get_output_height() 886 ) 887 page.compressContentStreams() 888 output_page += 1 889 self.__write_output_stream(outpdf)
890
891 ######################################################################## 892 893 894 -class FileConverter(StreamConverter):
895 """ 896 This class performs conversions on true files. 897 """
898 - def __init__(self, 899 infile_name, 900 outfile_name=None, 901 layout='2x1', 902 format='A4', 903 flip=TwoSidedFlip.SHORT_EDGE, 904 copy_pages=False, 905 overwrite_outfile_callback=None):
906 """ 907 Create a FileConverter. 908 909 :Parameters: 910 - `infile_name` The name to the input PDF file. 911 - `outfile_name` The name of the file where the output PDF 912 should de written. If ommited, defaults to the 913 name of the input PDF postponded by '-conv'. 914 - `layout` The layout of input pages on one output page (see 915 set_layout). 916 - `format` The format of the output paper (see set_output_format). 917 - `copy_pages` Wether the same group of input pages shoud be copied 918 to fill the corresponding output page or not (see 919 set_copy_pages). 920 - `overwrite_outfile_callback` A callback function which is called 921 if outfile_name already exists when trying to open it. Its 922 signature must be : take a string for the outfile_name as an 923 argument; 924 925 return False not to overwrite the file. If ommited, existing file 926 would be overwritten without confirmation. 927 928 """ 929 # sets [input, output]_stream to None so we can test their presence 930 # in __del__ 931 self._input_stream = None 932 self._output_stream = None 933 934 # outfile_name is set if provided 935 if outfile_name: 936 self.__set_outfile_name(outfile_name) 937 else: 938 self.__set_outfile_name(None) 939 940 # Then infile_nameis set, so if outfile_name was not provided we 941 # can create it from infile_name 942 self.__set_infile_name(infile_name) 943 944 # Setup callback to ask for confirmation before overwriting outfile 945 if overwrite_outfile_callback: 946 assert(isinstance(overwrite_outfile_callback, types.FunctionType)) 947 else: 948 def overwrite_outfile_callback(filename): 949 return True
950 951 # Now initialize a streamConverter 952 self._input_stream = open(self.get_infile_name(), 'rb') 953 outfile_name = self.get_outfile_name() 954 if (os.path.exists(outfile_name) and not 955 overwrite_outfile_callback(os.path.abspath(outfile_name))): 956 raise UserInterruptError() 957 self._output_stream = open(outfile_name, 'wb') 958 StreamConverter.__init__(self, self._input_stream, self._output_stream, 959 layout, format, flip, copy_pages)
960
961 - def __del__(self):
962 if self._input_stream: 963 try: 964 self._input_stream.close() 965 except IOError: 966 # XXX: Do something better 967 pass 968 if self._output_stream: 969 try: 970 self._output_stream.close() 971 except IOError: 972 # XXX: Do something better 973 pass
974 975 # GETTERS AND SETTERS SECTION 976 # =========================== 977
978 - def __set_infile_name(self, name):
979 """ 980 Sets the name of the input PDF file. Also set the name of output PDF 981 file if not already set. 982 983 :Parameters: 984 - `name` the name of the input PDF file. 985 """ 986 self.__infile_name = name 987 988 if not self.__outfile_name: 989 result = re.search(r"(.+)\.\w*$", name) 990 if result: 991 self.__outfile_name = result.group(1) + '-conv.pdf' 992 else: 993 self.__outfile_name = name + '-conv.pdf'
994
995 - def get_infile_name(self):
996 """ 997 Get the name of the input PDF file. 998 999 :Returns: 1000 The name of the input PDF file. 1001 """ 1002 return self.__infile_name
1003
1004 - def __set_outfile_name(self, name):
1005 """ 1006 Sets the name of the output PDF file. 1007 1008 :Parameters: 1009 - `name` the name of the output PDF file. 1010 """ 1011 self.__outfile_name = name
1012
1013 - def get_outfile_name(self):
1014 """ 1015 Get the name of the output PDF file. 1016 1017 :Returns: 1018 The name of the output PDF file. 1019 """ 1020 return self.__outfile_name
1021
1022 1023 # Convenience functions 1024 # ===================== 1025 1026 -def bookletize_on_stream(input_stream, 1027 output_stream, 1028 layout='2x1', 1029 format='A4', 1030 flip=TwoSidedFlip.SHORT_EDGE, 1031 copy_pages=False):
1032 """ 1033 Convert a linear document to a booklet. 1034 1035 Convert a linear document to a booklet, arranging the pages as 1036 required. 1037 1038 This is a convenience function around StreamConverter 1039 1040 :Parameters: 1041 - `input_stream` The file-like object from which tne input PDF 1042 document should be read. 1043 - `output_stream` The file-like object to which tne output PDF 1044 document should be written. 1045 - `layout` The layout of input pages on one output page (see 1046 set_layout). 1047 - `format` The format of the output paper (see set_output_format). 1048 - `flip` Whether the output paper will be flipped on the short edge 1049 (default) or the long edge when printing (see set_two_sided_flip). 1050 - `copy_pages` Wether the same group of input pages shoud be copied 1051 to fill the corresponding output page or not (see 1052 set_copy_pages). 1053 """ 1054 StreamConverter(layout, format, flip, copy_pages, 1055 input_stream, output_stream()).bookletize()
1056
1057 1058 -def bookletize_on_file(input_file, 1059 output_file=None, 1060 layout='2x1', 1061 format='A4', 1062 flip=TwoSidedFlip.SHORT_EDGE, 1063 copy_pages=False):
1064 """ 1065 Convert a linear PDF file to a booklet. 1066 1067 Convert a linear PDF file to a booklet, arranging the pages as 1068 required. 1069 1070 This is a convenience function around FileConverter 1071 1072 :Parameters: 1073 - `input_file` The name to the input PDF file. 1074 - `output_file` The name of the file where the output PDF 1075 should de written. If ommited, defaults to the 1076 name of the input PDF postponded by '-conv'. 1077 - `layout` The layout of input pages on one output page (see 1078 set_layout). 1079 - `format` The format of the output paper (see set_output_format). 1080 - `flip` Whether the output paper will be flipped on the short edge 1081 (default) or the long edge when printing (see set_two_sided_flip). 1082 - `copy_pages` Wether the same group of input pages shoud be copied 1083 to fill the corresponding output page or not (see 1084 set_copy_pages). 1085 """ 1086 FileConverter(input_file, output_file, layout, format, 1087 flip, copy_pages).bookletize()
1088
1089 1090 -def linearize_on_stream(input_stream, 1091 output_stream, 1092 layout='2x1', 1093 format='A4', 1094 copy_pages=False):
1095 """ 1096 Convert a booklet to a linear document. 1097 1098 Convert a booklet to a linear document, arranging the pages as 1099 required. 1100 1101 This is a convenience function around StreamConverter 1102 1103 :Parameters: 1104 - `input_stream` The file-like object from which tne input PDF 1105 document should be read. 1106 - `output_stream` The file-like object to which tne output PDF 1107 document should be written. 1108 - `layout` The layout of output pages on one input page (see 1109 set_layout). 1110 - `format` The format of the output paper (see set_output_format). 1111 - `copy_pages` Wether the same group of input pages shoud be copied 1112 to fill the corresponding output page or not (see 1113 set_copy_pages). 1114 """ 1115 StreamConverter(input_stream, output_stream, layout, 1116 format, TwoSidedFlip.SHORT_EDGE, copy_pages).linearize()
1117
1118 1119 -def linearize_on_file(input_file, 1120 output_file=None, 1121 layout='2x1', 1122 format='A4', 1123 flip=TwoSidedFlip.SHORT_EDGE, 1124 copy_pages=False):
1125 """ 1126 Convert a booklet to a linear PDF file. 1127 1128 Convert a booklet to a linear PDF file, arranging the pages as 1129 required. 1130 1131 This is a convenience function around FileConverter 1132 1133 :Parameters: 1134 - `input_file` The name to the input PDF file. 1135 - `output_file` The name of the file where the output PDF 1136 should de written. If ommited, defaults to the 1137 name of the input PDF postponded by '-conv'. 1138 - `layout` The layout of input pages on one output page (see 1139 set_layout). 1140 - `format` The format of the output paper (see set_output_format). 1141 - `copy_pages` Wether the same group of input pages shoud be copied 1142 to fill the corresponding output page or not (see 1143 set_copy_pages). 1144 """ 1145 FileConverter(input_file, output_file, layout, format, 1146 flip, copy_pages).linearize()
1147
1148 1149 -def reduce_on_stream(input_stream, 1150 output_stream, 1151 layout='2x1', 1152 format='A4', 1153 flip=TwoSidedFlip.SHORT_EDGE, 1154 copy_pages=False):
1155 """ 1156 Put multiple input pages on one output page. 1157 1158 This is a convenience function around StreamConverter 1159 1160 :Parameters: 1161 - `input_stream` The file-like object from which tne input PDF 1162 document should be read. 1163 - `output_stream` The file-like object to which tne output PDF 1164 document should be written. 1165 - `layout` The layout of input pages on one output page (see 1166 set_layout). 1167 - `format` The format of the output paper (see set_output_format). 1168 - `flip` Whether the output paper will be flipped on the short edge 1169 (default) or the long edge when printing (see set_two_sided_flip). 1170 - `copy_pages` Wether the same group of input pages shoud be copied 1171 to fill the corresponding output page or not (see 1172 set_copy_pages). 1173 """ 1174 StreamConverter(input_stream, output_stream, layout, format, 1175 flip, copy_pages).reduce()
1176
1177 1178 -def reduce_on_file(input_file, 1179 output_file=None, 1180 layout='2x1', 1181 format='A4', 1182 flip=TwoSidedFlip.SHORT_EDGE, 1183 copy_pages=False):
1184 """ 1185 Put multiple input pages on one output page. 1186 1187 This is a convenience function around FileConverter 1188 1189 :Parameters: 1190 - `input_file` The name to the input PDF file. 1191 - `output_file` The name of the file where the output PDF 1192 should de written. If ommited, defaults to the 1193 name of the input PDF postponded by '-conv'. 1194 - `layout` The layout of input pages on one output page (see 1195 set_layout). 1196 - `format` The format of the output paper (see set_output_format). 1197 - `flip` Whether the output paper will be flipped on the short edge 1198 (default) or the long edge when printing (see set_two_sided_flip). 1199 - `copy_pages` Wether the same group of input pages shoud be copied 1200 to fill the corresponding output page or not (see 1201 set_copy_pages). 1202 """ 1203 FileConverter(input_file, output_file, layout, format, 1204 flip, copy_pages).reduce()
1205