Module polib
[hide private]
[frames] | no frames]

Source Code for Module polib

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # License: MIT (see LICENSE file provided) 
   5  # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: 
   6   
   7  """ 
   8  **polib** allows you to manipulate, create, modify gettext files (pot, po 
   9  and mo files).  You can load existing files, iterate through it's entries, 
  10  add, modify entries, comments or metadata, etc... or create new po files 
  11  from scratch. 
  12   
  13  **polib** provides a simple and pythonic API, exporting only three 
  14  convenience functions (*pofile*, *mofile* and *detect_encoding*), and the 
  15  four core classes, *POFile*, *MOFile*, *POEntry* and *MOEntry* for creating 
  16  new files/entries. 
  17   
  18  **Basic example**: 
  19   
  20  >>> import polib 
  21  >>> # load an existing po file 
  22  >>> po = polib.pofile('tests/test_utf8.po') 
  23  >>> for entry in po: 
  24  ...     # do something with entry... 
  25  ...     pass 
  26  >>> # add an entry 
  27  >>> entry = polib.POEntry(msgid='Welcome', msgstr='Bienvenue') 
  28  >>> entry.occurrences = [('welcome.py', '12'), ('anotherfile.py', '34')] 
  29  >>> po.append(entry) 
  30  >>> # to save our modified po file: 
  31  >>> # po.save() 
  32  >>> # or you may want to compile the po file 
  33  >>> # po.save_as_mofile('tests/test_utf8.mo') 
  34  """ 
  35   
  36  __author__    = 'David JEAN LOUIS <izimobil@gmail.com>' 
  37  __version__   = '0.4.2' 
  38  __all__       = ['pofile', 'POFile', 'POEntry', 'mofile', 'MOFile', 'MOEntry', 
  39                   'detect_encoding', 'escape', 'unescape'] 
  40   
  41  import codecs 
  42  import struct 
  43  import textwrap 
  44   
  45  default_encoding = 'utf-8' 
  46   
  47  # function pofile() {{{ 
  48   
49 -def pofile(fpath, **kwargs):
50 """ 51 Convenience function that parse the po/pot file *fpath* and return 52 a POFile instance. 53 54 **Keyword arguments**: 55 - *fpath*: string, full or relative path to the po/pot file to parse 56 - *wrapwidth*: integer, the wrap width, only useful when -w option was 57 passed to xgettext (optional, default to 78) 58 - *autodetect_encoding*: boolean, if set to False the function will 59 not try to detect the po file encoding (optional, default to True) 60 - *encoding*: string, an encoding, only relevant if autodetect_encoding 61 is set to False 62 63 **Example**: 64 65 >>> import polib 66 >>> po = polib.pofile('tests/test_weird_occurrences.po') 67 >>> po #doctest: +ELLIPSIS 68 <POFile instance at ...> 69 >>> import os, tempfile 70 >>> for fname in ['test_iso-8859-15.po', 'test_utf8.po']: 71 ... orig_po = polib.pofile('tests/'+fname) 72 ... tmpf = tempfile.NamedTemporaryFile().name 73 ... orig_po.save(tmpf) 74 ... try: 75 ... new_po = polib.pofile(tmpf) 76 ... for old, new in zip(orig_po, new_po): 77 ... if old.msgid != new.msgid: 78 ... old.msgid 79 ... new.msgid 80 ... if old.msgstr != new.msgstr: 81 ... old.msgid 82 ... new.msgid 83 ... finally: 84 ... os.unlink(tmpf) 85 """ 86 if kwargs.get('autodetect_encoding', True) == True: 87 enc = detect_encoding(fpath) 88 else: 89 enc = kwargs.get('encoding', default_encoding) 90 parser = _POFileParser(fpath, enc) 91 instance = parser.parse() 92 instance.wrapwidth = kwargs.get('wrapwidth', 78) 93 return instance
94 95 # }}} 96 # function mofile() {{{ 97
98 -def mofile(fpath, **kwargs):
99 """ 100 Convenience function that parse the mo file *fpath* and return 101 a MOFile instance. 102 103 **Keyword arguments**: 104 - *fpath*: string, full or relative path to the mo file to parse 105 - *wrapwidth*: integer, the wrap width, only useful when -w option was 106 passed to xgettext to generate the po file that was used to format 107 the mo file (optional, default to 78) 108 - *autodetect_encoding*: boolean, if set to False the function will 109 not try to detect the po file encoding (optional, default to True) 110 - *encoding*: string, an encoding, only relevant if autodetect_encoding 111 is set to False 112 113 **Example**: 114 115 >>> import polib 116 >>> mo = polib.mofile('tests/test_utf8.mo') 117 >>> mo #doctest: +ELLIPSIS 118 <MOFile instance at ...> 119 >>> import os, tempfile 120 >>> for fname in ['test_iso-8859-15.mo', 'test_utf8.mo']: 121 ... orig_mo = polib.mofile('tests/'+fname) 122 ... tmpf = tempfile.NamedTemporaryFile().name 123 ... orig_mo.save(tmpf) 124 ... try: 125 ... new_mo = polib.mofile(tmpf) 126 ... for old, new in zip(orig_mo, new_mo): 127 ... if old.msgid != new.msgid: 128 ... old.msgstr 129 ... new.msgstr 130 ... finally: 131 ... os.unlink(tmpf) 132 """ 133 if kwargs.get('autodetect_encoding', True) == True: 134 enc = detect_encoding(fpath, True) 135 else: 136 enc = kwargs.get('encoding', default_encoding) 137 parser = _MOFileParser(fpath, enc) 138 instance = parser.parse() 139 instance.wrapwidth = kwargs.get('wrapwidth', 78) 140 return instance
141 142 # }}} 143 # function detect_encoding() {{{ 144
145 -def detect_encoding(fpath, binary_mode=False):
146 """ 147 Try to detect the encoding used by the file *fpath*. The function will 148 return polib default *encoding* if it's unable to detect it. 149 150 **Keyword argument**: 151 - *fpath*: string, full or relative path to the mo file to parse. 152 153 **Examples**: 154 155 >>> print(detect_encoding('tests/test_noencoding.po')) 156 utf-8 157 >>> print(detect_encoding('tests/test_utf8.po')) 158 UTF-8 159 >>> print(detect_encoding('tests/test_utf8.mo', True)) 160 UTF-8 161 >>> print(detect_encoding('tests/test_iso-8859-15.po')) 162 ISO_8859-15 163 >>> print(detect_encoding('tests/test_iso-8859-15.mo', True)) 164 ISO_8859-15 165 """ 166 import re 167 rx = re.compile(r'"?Content-Type:.+? charset=([\w_\-:\.]+)') 168 if binary_mode: 169 mode = 'rb' 170 else: 171 mode = 'r' 172 f = open(fpath, mode) 173 for l in f.readlines(): 174 match = rx.search(l) 175 if match: 176 f.close() 177 return match.group(1).strip() 178 f.close() 179 return default_encoding
180 181 # }}} 182 # function escape() {{{ 183
184 -def escape(st):
185 """ 186 Escape special chars and return the given string *st*. 187 188 **Examples**: 189 190 >>> escape('\\t and \\n and \\r and " and \\\\') 191 '\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\' 192 """ 193 st = st.replace('\\', r'\\') 194 st = st.replace('\t', r'\t') 195 st = st.replace('\r', r'\r') 196 st = st.replace('\n', r'\n') 197 st = st.replace('\"', r'\"') 198 return st
199 200 # }}} 201 # function unescape() {{{ 202
203 -def unescape(st):
204 """ 205 Unescape special chars and return the given string *st*. 206 207 **Examples**: 208 209 >>> unescape('\\\\t and \\\\n and \\\\r and \\\\" and \\\\\\\\') 210 '\\t and \\n and \\r and " and \\\\' 211 """ 212 st = st.replace(r'\"', '"') 213 st = st.replace(r'\n', '\n') 214 st = st.replace(r'\r', '\r') 215 st = st.replace(r'\t', '\t') 216 st = st.replace(r'\\', '\\') 217 return st
218 219 # }}} 220 # class _BaseFile {{{ 221
222 -class _BaseFile(list):
223 """ 224 Common parent class for POFile and MOFile classes. 225 This class must **not** be instanciated directly. 226 """ 227
228 - def __init__(self, fpath=None, wrapwidth=78, encoding=default_encoding):
229 """ 230 Constructor. 231 232 **Keyword arguments**: 233 - *fpath*: string, path to po or mo file 234 - *wrapwidth*: integer, the wrap width, only useful when -w option 235 was passed to xgettext to generate the po file that was used to 236 format the mo file, default to 78 (optional). 237 """ 238 list.__init__(self) 239 # the opened file handle 240 self.fpath = fpath 241 # the width at which lines should be wrapped 242 self.wrapwidth = wrapwidth 243 # the file encoding 244 self.encoding = encoding 245 # header 246 self.header = '' 247 # both po and mo files have metadata 248 self.metadata = {} 249 self.metadata_is_fuzzy = 0
250
251 - def __str__(self):
252 """String representation of the file.""" 253 ret = [] 254 entries = [self.metadata_as_entry()] + \ 255 [e for e in self if not e.obsolete] 256 for entry in entries: 257 ret.append(entry.__str__(self.wrapwidth)) 258 for entry in self.obsolete_entries(): 259 ret.append(entry.__str__(self.wrapwidth)) 260 return '\n'.join(ret)
261
262 - def __repr__(self):
263 """Return the official string representation of the object.""" 264 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
265
266 - def metadata_as_entry(self):
267 """Return the metadata as an entry""" 268 e = POEntry(msgid='') 269 mdata = self.ordered_metadata() 270 if mdata: 271 strs = [] 272 for name, value in mdata: 273 # Strip whitespace off each line in a multi-line entry 274 value = '\n'.join([v.strip() for v in value.split('\n')]) 275 strs.append('%s: %s' % (name, value)) 276 e.msgstr = '\n'.join(strs) + '\n' 277 return e
278
279 - def save(self, fpath=None, repr_method='__str__'):
280 """ 281 Save the po file to file *fpath* if no file handle exists for 282 the object. If there's already an open file and no fpath is 283 provided, then the existing file is rewritten with the modified 284 data. 285 286 **Keyword arguments**: 287 - *fpath*: string, full or relative path to the file. 288 - *repr_method*: string, the method to use for output. 289 """ 290 if self.fpath is None and fpath is None: 291 raise IOError('You must provide a file path to save() method') 292 contents = getattr(self, repr_method)() 293 if fpath is None: 294 fpath = self.fpath 295 if repr_method == 'to_binary': 296 fhandle = open(fpath, 'wb') 297 else: 298 fhandle = codecs.open(fpath, 'w', self.encoding) 299 fhandle.write(contents) 300 fhandle.close()
301
302 - def find(self, st, by='msgid'):
303 """ 304 Find entry which msgid (or property identified by the *by* 305 attribute) matches the string *st*. 306 307 **Keyword arguments**: 308 - *st*: string, the string to search for 309 - *by*: string, the comparison attribute 310 311 **Examples**: 312 313 >>> po = pofile('tests/test_utf8.po') 314 >>> entry = po.find('Thursday') 315 >>> entry.msgstr 316 u'Jueves' 317 >>> entry = po.find('Some unexistant msgid') 318 >>> entry is None 319 True 320 >>> entry = po.find('Jueves', 'msgstr') 321 >>> entry.msgid 322 u'Thursday' 323 """ 324 for e in self: 325 if getattr(e, by) == st: 326 return e 327 return None
328
329 - def ordered_metadata(self):
330 """ 331 Convenience method that return the metadata ordered. The return 332 value is list of tuples (metadata name, metadata_value). 333 """ 334 # copy the dict first 335 metadata = self.metadata.copy() 336 data_order = [ 337 'Project-Id-Version', 338 'Report-Msgid-Bugs-To', 339 'POT-Creation-Date', 340 'PO-Revision-Date', 341 'Last-Translator', 342 'Language-Team', 343 'MIME-Version', 344 'Content-Type', 345 'Content-Transfer-Encoding' 346 ] 347 ordered_data = [] 348 for data in data_order: 349 try: 350 value = metadata.pop(data) 351 ordered_data.append((data, value)) 352 except KeyError: 353 pass 354 # the rest of the metadata won't be ordered there are no specs for this 355 keys = metadata.keys() 356 list(keys).sort() 357 for data in keys: 358 value = metadata[data] 359 ordered_data.append((data, value)) 360 return ordered_data
361
362 - def to_binary(self):
363 """Return the mofile binary representation.""" 364 import struct 365 import array 366 output = '' 367 offsets = [] 368 ids = strs = '' 369 entries = self.translated_entries() 370 # the keys are sorted in the .mo file 371 def cmp(_self, other): 372 if _self.msgid > other.msgid: 373 return 1 374 elif _self.msgid < other.msgid: 375 return -1 376 else: 377 return 0
378 entries.sort(cmp) 379 # add metadata entry 380 mentry = self.metadata_as_entry() 381 mentry.msgstr = mentry.msgstr.replace('\\n', '').lstrip() + '\n' 382 entries = [mentry] + entries 383 entries_len = len(entries) 384 for e in entries: 385 # For each string, we need size and file offset. Each string is 386 # NUL terminated; the NUL does not count into the size. 387 msgid = e.msgid 388 if e.msgid_plural: 389 msgid = msgid + '\0' + e.msgid_plural 390 indexes = e.msgstr_plural.keys() 391 indexes.sort() 392 msgstr = [] 393 for index in indexes: 394 msgstr.append(e.msgstr_plural[index]) 395 msgstr = '\0'.join(msgstr) 396 else: 397 msgstr = e.msgstr 398 399 offsets.append((len(ids), len(msgid), len(strs), len(msgstr))) 400 ids += e.msgid + '\0' 401 strs += e.msgstr + '\0' 402 # The header is 7 32-bit unsigned integers. 403 keystart = 7*4+16*entries_len 404 # and the values start after the keys 405 valuestart = keystart + len(ids) 406 koffsets = [] 407 voffsets = [] 408 # The string table first has the list of keys, then the list of values. 409 # Each entry has first the size of the string, then the file offset. 410 for o1, l1, o2, l2 in offsets: 411 koffsets += [l1, o1+keystart] 412 voffsets += [l2, o2+valuestart] 413 offsets = koffsets + voffsets 414 output = struct.pack("IIIIIII", 415 0x950412de, # Magic number 416 0, # Version 417 entries_len, # # of entries 418 7*4, # start of key index 419 7*4+entries_len*8, # start of value index 420 0, 0) # size and offset of hash table 421 output += array.array("I", offsets).tostring() 422 output += ids 423 output += strs 424 return output
425 426 # }}} 427 # class POFile {{{ 428
429 -class POFile(_BaseFile):
430 ''' 431 Po (or Pot) file reader/writer. 432 POFile objects inherit the list objects methods. 433 434 **Example**: 435 436 >>> po = POFile() 437 >>> entry1 = POEntry( 438 ... msgid="Some english text", 439 ... msgstr="Un texte en anglais" 440 ... ) 441 >>> entry1.occurrences = [('testfile', 12),('another_file', 1)] 442 >>> entry1.comment = "Some useful comment" 443 >>> entry2 = POEntry( 444 ... msgid="Peace in some languages", 445 ... msgstr="Pace سلام שלום Hasîtî 和平" 446 ... ) 447 >>> entry2.occurrences = [('testfile', 15),('another_file', 5)] 448 >>> entry2.comment = "Another useful comment" 449 >>> entry3 = POEntry( 450 ... msgid='Some entry with quotes " \\"', 451 ... msgstr='Un message unicode avec des quotes " \\"' 452 ... ) 453 >>> entry3.comment = "Test string quoting" 454 >>> po.append(entry1) 455 >>> po.append(entry2) 456 >>> po.append(entry3) 457 >>> po.header = "Some Header" 458 >>> print(po) 459 # Some Header 460 msgid "" 461 msgstr "" 462 <BLANKLINE> 463 #. Some useful comment 464 #: testfile:12 another_file:1 465 msgid "Some english text" 466 msgstr "Un texte en anglais" 467 <BLANKLINE> 468 #. Another useful comment 469 #: testfile:15 another_file:5 470 msgid "Peace in some languages" 471 msgstr "Pace سلام שלום Hasîtî 和平" 472 <BLANKLINE> 473 #. Test string quoting 474 msgid "Some entry with quotes \\" \\"" 475 msgstr "Un message unicode avec des quotes \\" \\"" 476 <BLANKLINE> 477 ''' 478
479 - def __str__(self):
480 """Return the string representation of the po file""" 481 ret, headers = '', self.header.split('\n') 482 for header in headers: 483 if header[:1] in [',', ':']: 484 ret += '#%s\n' % header 485 else: 486 ret += '# %s\n' % header 487 return ret + _BaseFile.__str__(self)
488
489 - def save_as_mofile(self, fpath):
490 """ 491 Save the binary representation of the file to *fpath*. 492 493 **Keyword arguments**: 494 - *fpath*: string, full or relative path to the file. 495 """ 496 _BaseFile.save(self, fpath, 'to_binary')
497
498 - def percent_translated(self):
499 """ 500 Convenience method that return the percentage of translated 501 messages. 502 503 **Example**: 504 505 >>> import polib 506 >>> po = polib.pofile('tests/test_pofile_helpers.po') 507 >>> po.percent_translated() 508 50 509 >>> po = POFile() 510 >>> po.percent_translated() 511 100 512 """ 513 total = len([e for e in self if not e.obsolete]) 514 if total == 0: 515 return 100 516 translated = len(self.translated_entries()) 517 return int((100.00 / float(total)) * translated)
518
519 - def translated_entries(self):
520 """ 521 Convenience method that return a list of translated entries. 522 523 **Example**: 524 525 >>> import polib 526 >>> po = polib.pofile('tests/test_pofile_helpers.po') 527 >>> len(po.translated_entries()) 528 6 529 """ 530 return [e for e in self if e.translated() and not e.obsolete]
531
532 - def untranslated_entries(self):
533 """ 534 Convenience method that return a list of untranslated entries. 535 536 **Example**: 537 538 >>> import polib 539 >>> po = polib.pofile('tests/test_pofile_helpers.po') 540 >>> len(po.untranslated_entries()) 541 6 542 """ 543 return [e for e in self if not e.translated() and not e.obsolete]
544
545 - def fuzzy_entries(self):
546 """ 547 Convenience method that return the list of 'fuzzy' entries. 548 549 **Example**: 550 551 >>> import polib 552 >>> po = polib.pofile('tests/test_pofile_helpers.po') 553 >>> len(po.fuzzy_entries()) 554 2 555 """ 556 return [e for e in self if 'fuzzy' in e.flags]
557
558 - def obsolete_entries(self):
559 """ 560 Convenience method that return the list of obsolete entries. 561 562 **Example**: 563 564 >>> import polib 565 >>> po = polib.pofile('tests/test_pofile_helpers.po') 566 >>> len(po.obsolete_entries()) 567 4 568 """ 569 return [e for e in self if e.obsolete]
570
571 - def merge(self, refpot):
572 """ 573 XXX this could not work if encodings are different, needs thinking 574 and general refactoring of how polib handles encoding... 575 576 Convenience method that merge the current pofile with the pot file 577 provided. It behaves exactly as the gettext msgmerge utility: 578 579 - comments of this file will be preserved, but extracted comments 580 and occurrences will be discarded 581 - any translations or comments in the file will be discarded, 582 however dot comments and file positions will be preserved 583 584 **Keyword argument**: 585 - *refpot*: object POFile, the reference catalog. 586 587 **Example**: 588 589 >>> import polib 590 >>> refpot = polib.pofile('tests/test_merge.pot') 591 >>> po = polib.pofile('tests/test_merge_before.po') 592 >>> po.merge(refpot) 593 >>> expected_po = polib.pofile('tests/test_merge_after.po') 594 >>> unicode(po) == unicode(expected_po) 595 True 596 """ 597 for entry in refpot: 598 e = self.find(entry.msgid) 599 if e is None: 600 e = POEntry() 601 self.append(e) 602 e.merge(entry) 603 # ok, now we must "obsolete" entries that are not in the refpot 604 # anymore 605 for entry in self: 606 if refpot.find(entry.msgid) is None: 607 entry.obsolete = True
608 609 # }}} 610 # class MOFile {{{ 611
612 -class MOFile(_BaseFile):
613 ''' 614 Mo file reader/writer. 615 MOFile objects inherit the list objects methods. 616 617 **Example**: 618 619 >>> mo = MOFile() 620 >>> entry1 = POEntry( 621 ... msgid="Some english text", 622 ... msgstr="Un texte en anglais" 623 ... ) 624 >>> entry2 = POEntry( 625 ... msgid="I need my dirty cheese", 626 ... msgstr="Je veux mon sale fromage" 627 ... ) 628 >>> entry3 = MOEntry( 629 ... msgid='Some entry with quotes " \\"', 630 ... msgstr='Un message unicode avec des quotes " \\"' 631 ... ) 632 >>> mo.append(entry1) 633 >>> mo.append(entry2) 634 >>> mo.append(entry3) 635 >>> print(mo) 636 msgid "" 637 msgstr "" 638 <BLANKLINE> 639 msgid "Some english text" 640 msgstr "Un texte en anglais" 641 <BLANKLINE> 642 msgid "I need my dirty cheese" 643 msgstr "Je veux mon sale fromage" 644 <BLANKLINE> 645 msgid "Some entry with quotes \\" \\"" 646 msgstr "Un message unicode avec des quotes \\" \\"" 647 <BLANKLINE> 648 ''' 649
650 - def __init__(self, *args, **kwargs):
651 """ 652 MOFile constructor. Mo files have two other properties: 653 - magic_number: the magic_number of the binary file, 654 - version: the version of the mo spec. 655 """ 656 _BaseFile.__init__(self, *args, **kwargs) 657 self.magic_number = None 658 self.version = 0
659
660 - def save_as_pofile(self, fpath):
661 """ 662 Save the string representation of the file to *fpath*. 663 664 **Keyword argument**: 665 - *fpath*: string, full or relative path to the file. 666 """ 667 _BaseFile.save(self, fpath)
668
669 - def save(self, fpath):
670 """ 671 Save the binary representation of the file to *fpath*. 672 673 **Keyword argument**: 674 - *fpath*: string, full or relative path to the file. 675 """ 676 _BaseFile.save(self, fpath, 'to_binary')
677
678 - def percent_translated(self):
679 """ 680 Convenience method to keep the same interface with POFile instances. 681 """ 682 return 100
683
684 - def translated_entries(self):
685 """ 686 Convenience method to keep the same interface with POFile instances. 687 """ 688 return self
689
690 - def untranslated_entries(self):
691 """ 692 Convenience method to keep the same interface with POFile instances. 693 """ 694 return []
695
696 - def fuzzy_entries(self):
697 """ 698 Convenience method to keep the same interface with POFile instances. 699 """ 700 return []
701
702 - def obsolete_entries(self):
703 """ 704 Convenience method to keep the same interface with POFile instances. 705 """ 706 return []
707 708 # }}} 709 # class _BaseEntry {{{ 710
711 -class _BaseEntry(object):
712 """ 713 Base class for POEntry or MOEntry objects. 714 This class must *not* be instanciated directly. 715 """ 716
717 - def __init__(self, *args, **kwargs):
718 """Base Entry constructor.""" 719 self.msgid = kwargs.get('msgid', '') 720 self.msgstr = kwargs.get('msgstr', '') 721 self.msgid_plural = kwargs.get('msgid_plural', '') 722 self.msgstr_plural = kwargs.get('msgstr_plural', {}) 723 self.obsolete = kwargs.get('obsolete', False) 724 self.encoding = kwargs.get('encoding', default_encoding)
725
726 - def __repr__(self):
727 """Return the official string representation of the object.""" 728 return '<%s instance at %x>' % (self.__class__.__name__, id(self))
729
730 - def __str__(self, wrapwidth=78):
731 """ 732 Common string representation of the POEntry and MOEntry 733 objects. 734 """ 735 if self.obsolete: 736 delflag = '#~ ' 737 else: 738 delflag = '' 739 # write the msgid 740 ret = [] 741 ret += self._str_field("msgid", delflag, "", self.msgid) 742 # write the msgid_plural if any 743 if self.msgid_plural: 744 ret += self._str_field("msgid_plural", delflag, "", self.msgid_plural) 745 if self.msgstr_plural: 746 # write the msgstr_plural if any 747 msgstrs = self.msgstr_plural 748 keys = list(msgstrs) 749 keys.sort() 750 for index in keys: 751 msgstr = msgstrs[index] 752 plural_index = '[%s]' % index 753 ret += self._str_field("msgstr", delflag, plural_index, msgstr) 754 else: 755 # otherwise write the msgstr 756 ret += self._str_field("msgstr", delflag, "", self.msgstr) 757 ret.append('') 758 return '\n'.join(ret)
759
760 - def _str_field(self, fieldname, delflag, plural_index, field):
761 lines = field.splitlines(True) # keep line breaks in strings 762 # potentially, we could do line-wrapping here, but textwrap.wrap 763 # treats whitespace too carelessly for us to use it. 764 if len(lines) > 1: 765 lines = ['']+lines # start with initial empty line 766 else: 767 lines = [field] # needed for the empty string case 768 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, 769 escape(lines.pop(0)))] 770 for mstr in lines: 771 ret.append('%s"%s"' % (delflag, escape(mstr))) 772 return ret
773 774 # }}} 775 # class POEntry {{{ 776
777 -class POEntry(_BaseEntry):
778 """ 779 Represents a po file entry. 780 781 **Examples**: 782 783 >>> entry = POEntry(msgid='Welcome', msgstr='Bienvenue') 784 >>> entry.occurrences = [('welcome.py', 12), ('anotherfile.py', 34)] 785 >>> print(entry) 786 #: welcome.py:12 anotherfile.py:34 787 msgid "Welcome" 788 msgstr "Bienvenue" 789 <BLANKLINE> 790 >>> entry = POEntry() 791 >>> entry.occurrences = [('src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c', 32), ('src/eggs.c', 45)] 792 >>> entry.comment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...' 793 >>> entry.tcomment = 'A plural translation. This is a very very very long line please do not wrap, this is just for testing comment wrapping...' 794 >>> entry.flags.append('c-format') 795 >>> entry.msgid = 'I have spam but no egg !' 796 >>> entry.msgid_plural = 'I have spam and %d eggs !' 797 >>> entry.msgstr_plural[0] = "J'ai du jambon mais aucun oeuf !" 798 >>> entry.msgstr_plural[1] = "J'ai du jambon et %d oeufs !" 799 >>> print(entry) 800 #. A plural translation. This is a very very very long line please do not 801 #. wrap, this is just for testing comment wrapping... 802 # A plural translation. This is a very very very long line please do not wrap, 803 # this is just for testing comment wrapping... 804 #: src/some-very-long-filename-that-should-not-be-wrapped-even-if-it-is-larger-than-the-wrap-limit.c:32 805 #: src/eggs.c:45 806 #, c-format 807 msgid "I have spam but no egg !" 808 msgid_plural "I have spam and %d eggs !" 809 msgstr[0] "J'ai du jambon mais aucun oeuf !" 810 msgstr[1] "J'ai du jambon et %d oeufs !" 811 <BLANKLINE> 812 """ 813
814 - def __init__(self, *args, **kwargs):
815 """POEntry constructor.""" 816 _BaseEntry.__init__(self, *args, **kwargs) 817 self.comment = kwargs.get('comment', '') 818 self.tcomment = kwargs.get('tcomment', '') 819 self.occurrences = kwargs.get('occurrences', []) 820 self.flags = kwargs.get('flags', [])
821
822 - def __str__(self, wrapwidth=78):
823 """ 824 Return the string representation of the entry. 825 """ 826 if self.obsolete: 827 return _BaseEntry.__str__(self) 828 ret = [] 829 # comment first, if any (with text wrapping as xgettext does) 830 if self.comment != '': 831 for comment in self.comment.split('\n'): 832 if wrapwidth > 0 and len(comment) > wrapwidth-3: 833 ret += textwrap.wrap(comment, wrapwidth, 834 initial_indent='#. ', 835 subsequent_indent='#. ', 836 break_long_words=False) 837 else: 838 ret.append('#. %s' % comment) 839 # translator comment, if any (with text wrapping as xgettext does) 840 if self.tcomment != '': 841 for tcomment in self.tcomment.split('\n'): 842 if wrapwidth > 0 and len(tcomment) > wrapwidth-2: 843 ret += textwrap.wrap(tcomment, wrapwidth, 844 initial_indent='# ', 845 subsequent_indent='# ', 846 break_long_words=False) 847 else: 848 ret.append('# %s' % tcomment) 849 # occurrences (with text wrapping as xgettext does) 850 if self.occurrences: 851 filelist = [] 852 for fpath, lineno in self.occurrences: 853 if lineno: 854 filelist.append('%s:%s' % (fpath, lineno)) 855 else: 856 filelist.append(fpath) 857 filestr = ' '.join(filelist) 858 if wrapwidth > 0 and len(filestr)+3 > wrapwidth: 859 # XXX textwrap split words that contain hyphen, this is not 860 # what we want for filenames, so the dirty hack is to 861 # temporally replace hyphens with a char that a file cannot 862 # contain, like "*" 863 lines = textwrap.wrap(filestr.replace('-', '*'), 864 wrapwidth, 865 initial_indent='#: ', 866 subsequent_indent='#: ', 867 break_long_words=False) 868 # end of the replace hack 869 for line in lines: 870 ret.append(line.replace('*', '-')) 871 else: 872 ret.append('#: '+filestr) 873 # flags 874 if self.flags: 875 flags = [] 876 for flag in self.flags: 877 flags.append(flag) 878 ret.append('#, %s' % ', '.join(flags)) 879 ret.append(_BaseEntry.__str__(self)) 880 return '\n'.join(ret)
881
882 - def __cmp__(self, other):
883 ''' 884 Called by comparison operations if rich comparison is not defined. 885 886 **Tests**: 887 >>> a = POEntry(msgid='a', occurrences=[('b.py', 1), ('b.py', 3)]) 888 >>> b = POEntry(msgid='b', occurrences=[('b.py', 1), ('b.py', 3)]) 889 >>> c1 = POEntry(msgid='c1', occurrences=[('a.py', 1), ('b.py', 1)]) 890 >>> c2 = POEntry(msgid='c2', occurrences=[('a.py', 1), ('a.py', 3)]) 891 >>> po = POFile() 892 >>> po.append(a) 893 >>> po.append(b) 894 >>> po.append(c1) 895 >>> po.append(c2) 896 >>> po.sort() 897 >>> print(po) 898 # 899 msgid "" 900 msgstr "" 901 <BLANKLINE> 902 #: a.py:1 a.py:3 903 msgid "c2" 904 msgstr "" 905 <BLANKLINE> 906 #: a.py:1 b.py:1 907 msgid "c1" 908 msgstr "" 909 <BLANKLINE> 910 #: b.py:1 b.py:3 911 msgid "a" 912 msgstr "" 913 <BLANKLINE> 914 #: b.py:1 b.py:3 915 msgid "b" 916 msgstr "" 917 <BLANKLINE> 918 ''' 919 def compare_occurrences(a, b): 920 """ 921 Compare an entry occurrence with another one. 922 """ 923 if a[0] != b[0]: 924 return a[0] < b[0] 925 if a[1] != b[1]: 926 return a[1] < b[1] 927 return 0
928 929 # First: Obsolete test 930 if self.obsolete != other.obsolete: 931 if self.obsolete: 932 return -1 933 else: 934 return 1 935 # Work on a copy to protect original 936 occ1 = self.occurrences[:] 937 occ2 = other.occurrences[:] 938 # Sorting using compare method 939 occ1.sort(compare_occurrences) 940 occ2.sort(compare_occurrences) 941 # Comparing sorted occurrences 942 pos = 0 943 for entry1 in occ1: 944 try: 945 entry2 = occ2[pos] 946 except IndexError: 947 return 1 948 pos = pos + 1 949 if entry1[0] != entry2[0]: 950 if entry1[0] > entry2[0]: 951 return 1 952 else: 953 return -1 954 if entry1[1] != entry2[1]: 955 if entry1[1] > entry2[1]: 956 return 1 957 else: 958 return -1 959 # Finally: Compare message ID 960 if self.msgid > other.msgid: return 1 961 else: return -1
962
963 - def translated(self):
964 """ 965 Return True if the entry has been translated or False. 966 """ 967 if self.obsolete or 'fuzzy' in self.flags: 968 return False 969 if self.msgstr != '': 970 return True 971 if self.msgstr_plural: 972 for pos in self.msgstr_plural: 973 if self.msgstr_plural[pos] == '': 974 return False 975 return True 976 return False
977
978 - def merge(self, other):
979 """ 980 Merge the current entry with the given pot entry. 981 """ 982 self.msgid = other.msgid 983 self.occurrences = other.occurrences 984 self.comment = other.comment 985 self.flags = other.flags 986 self.msgid_plural = other.msgid_plural 987 if other.msgstr_plural: 988 for pos in other.msgstr_plural: 989 try: 990 # keep existing translation at pos if any 991 self.msgstr_plural[pos] 992 except KeyError: 993 self.msgstr_plural[pos] = ''
994 995 # }}} 996 # class MOEntry {{{ 997
998 -class MOEntry(_BaseEntry):
999 """ 1000 Represents a mo file entry. 1001 1002 **Examples**: 1003 1004 >>> entry = MOEntry() 1005 >>> entry.msgid = 'translate me !' 1006 >>> entry.msgstr = 'traduisez moi !' 1007 >>> print(entry) 1008 msgid "translate me !" 1009 msgstr "traduisez moi !" 1010 <BLANKLINE> 1011 """ 1012
1013 - def __str__(self, wrapwidth=78):
1014 """ 1015 Return the string representation of the entry. 1016 """ 1017 return _BaseEntry.__str__(self, wrapwidth)
1018 1019 # }}} 1020 # class _POFileParser {{{ 1021
1022 -class _POFileParser(object):
1023 """ 1024 A finite state machine to parse efficiently and correctly po 1025 file format. 1026 """ 1027
1028 - def __init__(self, fpath, enc=default_encoding):
1029 """ 1030 Constructor. 1031 1032 **Keyword argument**: 1033 - *fpath*: string, path to the po file 1034 """ 1035 try: 1036 self.fhandle = codecs.open(fpath, 'rU', enc) 1037 except LookupError: 1038 enc = default_encoding 1039 self.fhandle = codecs.open(fpath, 'rU', enc) 1040 self.instance = POFile(fpath=fpath, encoding=enc) 1041 self.transitions = {} 1042 self.current_entry = POEntry() 1043 self.current_state = 'ST' 1044 self.current_token = None 1045 # two memo flags used in handlers 1046 self.msgstr_index = 0 1047 self.entry_obsolete = 0 1048 # Configure the state machine, by adding transitions. 1049 # Signification of symbols: 1050 # * ST: Beginning of the file (start) 1051 # * HE: Header 1052 # * TC: a translation comment 1053 # * GC: a generated comment 1054 # * OC: a file/line occurence 1055 # * FL: a flags line 1056 # * MI: a msgid 1057 # * MP: a msgid plural 1058 # * MS: a msgstr 1059 # * MX: a msgstr plural 1060 # * MC: a msgid or msgstr continuation line 1061 all_ = ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'] 1062 1063 self.add('TC', ['ST', 'HE'], 'HE') 1064 self.add('TC', ['GC', 'OC', 'FL', 'TC', 'MS', 'MP', 'MX', 'MI'], 'TC') 1065 self.add('GC', all_, 'GC') 1066 self.add('OC', all_, 'OC') 1067 self.add('FL', all_, 'FL') 1068 self.add('MI', ['ST', 'HE', 'GC', 'OC', 'FL', 'TC', 'MS', 'MX'], 'MI') 1069 self.add('MP', ['TC', 'GC', 'MI'], 'MP') 1070 self.add('MS', ['MI', 'MP', 'TC'], 'MS') 1071 self.add('MX', ['MI', 'MX', 'MP', 'TC'], 'MX') 1072 self.add('MC', ['MI', 'MP', 'MS', 'MX'], 'MC')
1073
1074 - def parse(self):
1075 """ 1076 Run the state machine, parse the file line by line and call process() 1077 with the current matched symbol. 1078 """ 1079 i, lastlen = 1, 0 1080 for line in self.fhandle: 1081 line = line.strip() 1082 if line == '': 1083 i = i+1 1084 continue 1085 if line[:3] == '#~ ': 1086 line = line[3:] 1087 self.entry_obsolete = 1 1088 else: 1089 self.entry_obsolete = 0 1090 self.current_token = line 1091 if line[:2] == '#:': 1092 # we are on a occurrences line 1093 self.process('OC', i) 1094 elif line[:7] == 'msgid "': 1095 # we are on a msgid 1096 self.process('MI', i) 1097 elif line[:8] == 'msgstr "': 1098 # we are on a msgstr 1099 self.process('MS', i) 1100 elif line[:1] == '"': 1101 # we are on a continuation line or some metadata 1102 self.process('MC', i) 1103 elif line[:14] == 'msgid_plural "': 1104 # we are on a msgid plural 1105 self.process('MP', i) 1106 elif line[:7] == 'msgstr[': 1107 # we are on a msgstr plural 1108 self.process('MX', i) 1109 elif line[:3] == '#, ': 1110 # we are on a flags line 1111 self.process('FL', i) 1112 elif line[:2] == '# ' or line == '#': 1113 if line == '#': line = line + ' ' 1114 # we are on a translator comment line 1115 self.process('TC', i) 1116 elif line[:2] == '#.': 1117 # we are on a generated comment line 1118 self.process('GC', i) 1119 i = i+1 1120 1121 if self.current_entry: 1122 # since entries are added when another entry is found, we must add 1123 # the last entry here (only if there are lines) 1124 self.instance.append(self.current_entry) 1125 # before returning the instance, check if there's metadata and if 1126 # so extract it in a dict 1127 firstentry = self.instance[0] 1128 if firstentry.msgid == '': # metadata found 1129 # remove the entry 1130 firstentry = self.instance.pop(0) 1131 self.instance.metadata_is_fuzzy = firstentry.flags 1132 key = None 1133 for msg in firstentry.msgstr.splitlines(): 1134 try: 1135 key, val = msg.split(':', 1) 1136 self.instance.metadata[key] = val.strip() 1137 except: 1138 if key is not None: 1139 self.instance.metadata[key] += '\n'+ msg.strip() 1140 # close opened file 1141 self.fhandle.close() 1142 return self.instance
1143
1144 - def add(self, symbol, states, next_state):
1145 """ 1146 Add a transition to the state machine. 1147 Keywords arguments: 1148 1149 symbol -- string, the matched token (two chars symbol) 1150 states -- list, a list of states (two chars symbols) 1151 next_state -- the next state the fsm will have after the action 1152 """ 1153 for state in states: 1154 action = getattr(self, 'handle_%s' % next_state.lower()) 1155 self.transitions[(symbol, state)] = (action, next_state)
1156
1157 - def process(self, symbol, linenum):
1158 """ 1159 Process the transition corresponding to the current state and the 1160 symbol provided. 1161 1162 Keywords arguments: 1163 symbol -- string, the matched token (two chars symbol) 1164 linenum -- integer, the current line number of the parsed file 1165 """ 1166 try: 1167 (action, state) = self.transitions[(symbol, self.current_state)] 1168 if action(): 1169 self.current_state = state 1170 except Exception, exc: 1171 raise IOError('Syntax error in po file (line %s)' % linenum)
1172 1173 # state handlers 1174
1175 - def handle_he(self):
1176 """Handle a header comment.""" 1177 if self.instance.header != '': 1178 self.instance.header += '\n' 1179 self.instance.header += self.current_token[2:] 1180 return 1
1181
1182 - def handle_tc(self):
1183 """Handle a translator comment.""" 1184 if self.current_state in ['MC', 'MS', 'MX']: 1185 self.instance.append(self.current_entry) 1186 self.current_entry = POEntry() 1187 if self.current_entry.tcomment != '': 1188 self.current_entry.tcomment += '\n' 1189 self.current_entry.tcomment += self.current_token[2:] 1190 return True
1191
1192 - def handle_gc(self):
1193 """Handle a generated comment.""" 1194 if self.current_state in ['MC', 'MS', 'MX']: 1195 self.instance.append(self.current_entry) 1196 self.current_entry = POEntry() 1197 if self.current_entry.comment != '': 1198 self.current_entry.comment += '\n' 1199 self.current_entry.comment += self.current_token[3:] 1200 return True
1201
1202 - def handle_oc(self):
1203 """Handle a file:num occurence.""" 1204 if self.current_state in ['MC', 'MS', 'MX']: 1205 self.instance.append(self.current_entry) 1206 self.current_entry = POEntry() 1207 occurrences = self.current_token[3:].split() 1208 for occurrence in occurrences: 1209 if occurrence != '': 1210 try: 1211 fil, line = occurrence.split(':') 1212 if not line.isdigit(): 1213 fil = fil + line 1214 line = '' 1215 self.current_entry.occurrences.append((fil, line)) 1216 except: 1217 self.current_entry.occurrences.append((occurrence, '')) 1218 return True
1219
1220 - def handle_fl(self):
1221 """Handle a flags line.""" 1222 if self.current_state in ['MC', 'MS', 'MX']: 1223 self.instance.append(self.current_entry) 1224 self.current_entry = POEntry() 1225 self.current_entry.flags += self.current_token[3:].split(', ') 1226 return True
1227
1228 - def handle_mi(self):
1229 """Handle a msgid.""" 1230 if self.current_state in ['MC', 'MS', 'MX']: 1231 self.instance.append(self.current_entry) 1232 self.current_entry = POEntry() 1233 self.current_entry.obsolete = self.entry_obsolete 1234 self.current_entry.msgid = unescape(self.current_token[7:-1]) 1235 return True
1236
1237 - def handle_mp(self):
1238 """Handle a msgid plural.""" 1239 self.current_entry.msgid_plural = unescape(self.current_token[14:-1]) 1240 return True
1241
1242 - def handle_ms(self):
1243 """Handle a msgstr.""" 1244 self.current_entry.msgstr = unescape(self.current_token[8:-1]) 1245 return True
1246
1247 - def handle_mx(self):
1248 """Handle a msgstr plural.""" 1249 index, value = self.current_token[7], self.current_token[11:-1] 1250 self.current_entry.msgstr_plural[index] = unescape(value) 1251 self.msgstr_index = index 1252 return True
1253
1254 - def handle_mc(self):
1255 """Handle a msgid or msgstr continuation line.""" 1256 if self.current_state == 'MI': 1257 self.current_entry.msgid += unescape(self.current_token[1:-1]) 1258 elif self.current_state == 'MP': 1259 self.current_entry.msgid_plural += \ 1260 unescape(self.current_token[1:-1]) 1261 elif self.current_state == 'MS': 1262 self.current_entry.msgstr += unescape(self.current_token[1:-1]) 1263 elif self.current_state == 'MX': 1264 msgstr = self.current_entry.msgstr_plural[self.msgstr_index] +\ 1265 unescape(self.current_token[1:-1]) 1266 self.current_entry.msgstr_plural[self.msgstr_index] = msgstr 1267 # don't change the current state 1268 return False
1269 1270 # }}} 1271 # class _MOFileParser {{{ 1272
1273 -class _MOFileParser(object):
1274 """ 1275 A class to parse binary mo files. 1276 """ 1277 BIG_ENDIAN = 0xde120495 1278 LITTLE_ENDIAN = 0x950412de 1279
1280 - def __init__(self, fpath, enc=default_encoding):
1281 """_MOFileParser constructor.""" 1282 self.fhandle = open(fpath, 'rb') 1283 self.instance = MOFile(fpath=fpath, encoding=enc)
1284
1285 - def parse_magicnumber(self):
1286 """ 1287 Parse the magic number and raise an exception if not valid. 1288 """
1289
1290 - def parse(self):
1291 """ 1292 Build the instance with the file handle provided in the 1293 constructor. 1294 """ 1295 magic_number = self._readbinary('<I', 4) 1296 if magic_number == self.LITTLE_ENDIAN: 1297 ii = '<II' 1298 elif magic_number == self.BIG_ENDIAN: 1299 ii = '>II' 1300 else: 1301 raise IOError('Invalid mo file, magic number is incorrect !') 1302 self.instance.magic_number = magic_number 1303 # parse the version number and the number of strings 1304 self.instance.version, numofstrings = self._readbinary(ii, 8) 1305 # original strings and translation strings hash table offset 1306 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8) 1307 # move to msgid hash table and read length and offset of msgids 1308 self.fhandle.seek(msgids_hash_offset) 1309 msgids_index = [] 1310 for i in range(numofstrings): 1311 msgids_index.append(self._readbinary(ii, 8)) 1312 # move to msgstr hash table and read length and offset of msgstrs 1313 self.fhandle.seek(msgstrs_hash_offset) 1314 msgstrs_index = [] 1315 for i in range(numofstrings): 1316 msgstrs_index.append(self._readbinary(ii, 8)) 1317 # build entries 1318 for i in range(numofstrings): 1319 self.fhandle.seek(msgids_index[i][1]) 1320 msgid = self.fhandle.read(msgids_index[i][0]) 1321 self.fhandle.seek(msgstrs_index[i][1]) 1322 msgstr = self.fhandle.read(msgstrs_index[i][0]) 1323 if i == 0: # metadata 1324 raw_metadata, metadata = msgstr.split('\n'), {} 1325 for line in raw_metadata: 1326 tokens = line.split(':', 1) 1327 if tokens[0] != '': 1328 try: 1329 metadata[tokens[0]] = tokens[1].strip() 1330 except IndexError: 1331 metadata[tokens[0]] = '' 1332 self.instance.metadata = metadata 1333 continue 1334 entry = MOEntry(msgid=msgid, msgstr=msgstr) 1335 self.instance.append(entry) 1336 # close opened file 1337 self.fhandle.close() 1338 return self.instance
1339
1340 - def _readbinary(self, fmt, numbytes):
1341 """ 1342 Private method that unpack n bytes of data using format <fmt>. 1343 It returns a tuple or a mixed value if the tuple length is 1. 1344 """ 1345 bytes = self.fhandle.read(numbytes) 1346 tup = struct.unpack(fmt, bytes) 1347 if len(tup) == 1: 1348 return tup[0] 1349 return tup
1350 1351 # }}} 1352 # __main__ {{{ 1353 1354 if __name__ == '__main__': 1355 """ 1356 **Main function**:: 1357 - to **test** the module just run: *python polib.py [-v]* 1358 - to **profile** the module: *python polib.py -p <some_pofile.po>* 1359 """ 1360 import sys 1361 if len(sys.argv) > 2 and sys.argv[1] == '-p':
1362 - def test(f):
1363 if f.endswith('po'): 1364 p = pofile(f) 1365 else: 1366 p = mofile(f) 1367 s = str(p)
1368 import profile 1369 profile.run('test("'+sys.argv[2]+'")') 1370 else: 1371 import doctest 1372 doctest.testmod() 1373 1374 # }}} 1375