A
download csv2po.py
Language: Python
License: GPL
Copyright: Copyright 2003-2006 Zuza Software Foundation
LOC: 163
Project Info
translate
Server: SourceForge
Type: cvs
...late\src\translate\convert\
   .cvsignore
   __init__.py
   convert.py
   csv2po.py
   csv2tbx.py
   dtd2po.py
   html2po.py
   moz2po.py
   mozfunny2prop.py
   nb2po.py
   oo2po.py
   po2csv.py
   po2dtd.py
   po2html.py
   po2moz.py
   po2nb.py
   po2oo.py
   po2prop.py
   po2tmx.py
   po2ts.py
   po2txt.py
   po2xliff.py
   poreplace.py
   pot2po.py
   prop2mozfunny.py
   prop2po.py
   sxw2po.py
   test_convert.py
   test_csv2po.py
   test_dtd2po.py
   test_html2po.py
   test_moz2po.py
   test_mozfunny2prop.py
   test_oo2po.py
   test_po2csv.py
   test_po2dtd.py
   test_po2html.py
   test_po2moz.py
   test_po2oo.py
   test_po2prop.py
   test_po2tmx.py
   test_po2ts.py
   test_po2txt.py
   test_po2xliff.py
   test_pot2po.py
   test_prop2mozfunny.py
   test_prop2po.py
   test_sxw2po.py
   test_ts2po.py
   test_txt2po.py
   test_xliff2po.py
   ts2po.py
   txt2po.py
   xliff2po.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 
# Copyright 2003-2006 Zuza Software Foundation
# 
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with translate; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""converts comma-separated values (.csv) files to gettext .po localization files"""

import sys
from translate.misc import quote
from translate.misc import sparse
from translate.storage import po
from translate.storage import csvl10n

def replacestrings(source, *pairs):
  for orig, new in pairs:
    source = source.replace(orig, new)
  return source

def quotecsvstr(source):
  return '"' + replacestrings(source, ('\\"','"'), ('"','\\"'), ("\\\\'", "\\'"), ('\\\\n', '\\n')) + '"'

def simplify(string):
  return filter(type(string).isalnum, string)
  tokens = sparse.SimpleParser().tokenize(string)
  return " ".join(tokens)

class csv2po:
  """a class that takes translations from a .csv file and puts them in a .po file"""
  def __init__(self, templatepo=None, charset=None, duplicatestyle="keep"):
    """construct the converter..."""
    self.pofile = templatepo
    self.charset = charset
    self.duplicatestyle = duplicatestyle
    if self.pofile is not None:
      self.unmatched = 0
      self.makeindex()

  def makeindex(self):
    """makes indexes required for searching..."""
    self.commentindex = {}
    self.sourceindex = {}
    self.simpleindex = {}
    self.duplicatecomments = []
    for thepo in self.pofile.units:
      commentparts = []
      for comment in thepo.sourcecomments:
        commentparts.append(comment.replace("#:","",1).strip())
      joinedcomment = " ".join(commentparts)
      unquotedid = po.unquotefrompo(thepo.msgid)
      # the definitive way to match is by source comment (joinedcomment)
      if joinedcomment in self.commentindex:
        # unless more than one thing matches...
        self.duplicatecomments.append(joinedcomment)
      else:
        self.commentindex[joinedcomment] = thepo
      # do simpler matching in case things have been mangled...
      simpleid = simplify(unquotedid)
      # but check for duplicates
      if simpleid in self.simpleindex and not (unquotedid in self.sourceindex):
        # keep a list of them...
        self.simpleindex[simpleid].append(thepo)
      else:
        self.simpleindex[simpleid] = [thepo]
      # also match by standard msgid
      self.sourceindex[unquotedid] = thepo
    for comment in self.duplicatecomments:
      if comment in self.commentindex:
        del self.commentindex[comment]

  def convertelement(self,thecsv):
    """converts csv element to po element"""
    thepo = po.pounit(encoding="UTF-8")
    thepo.sourcecomments = ["#: " + thecsv.comment + "\n"]
    thepo.msgid = [quotecsvstr(line) for line in thecsv.source.split('\n')]
    thepo.msgstr = [quotecsvstr(line) for line in thecsv.target.split('\n')]
    return thepo

  def handlecsvunit(self, thecsv):
    """handles reintegrating a csv element into the .po file"""
    if len(thecsv.comment.strip()) > 0 and thecsv.comment in self.commentindex:
      thepo = self.commentindex[thecsv.comment]
    elif thecsv.source in self.sourceindex:
      thepo = self.sourceindex[thecsv.source]
    elif simplify(thecsv.source) in self.simpleindex:
      thepolist = self.simpleindex[simplify(thecsv.source)]
      if len(thepolist) > 1:
        csvfilename = getattr(self.csvfile, "filename", "(unknown)")
        matches = "\n  ".join(["possible match: " + po.unquotefrompo(thepo.msgid) for thepo in thepolist])
        print >>sys.stderr, "%s - csv entry not found in pofile, multiple matches found:\n  location\t%s\n  original\t%s\n  translation\t%s\n  %s" % (csvfilename, thecsv.comment, thecsv.source, thecsv.target, matches)
        self.unmatched += 1
        return
      thepo = thepolist[0]
    else:
      csvfilename = getattr(self.csvfile, "filename", "(unknown)")
      print >>sys.stderr, "%s - csv entry not found in pofile:\n  location\t%s\n  original\t%s\n  translation\t%s" % (csvfilename, thecsv.comment, thecsv.source, thecsv.target)
      self.unmatched += 1
      return
    csvtarget = [quotecsvstr(line) for line in thecsv.target.split('\n')]
    if thepo.hasplural():
      # we need to work out whether we matched the singular or the plural
      singularid = po.unquotefrompo(thepo.msgid)
      pluralid = po.unquotefrompo(thepo.msgid_plural)
      if thecsv.source == singularid:
        thepo.msgstr[0] = csvtarget
      elif thecsv.source == pluralid:
        thepo.msgstr[1] = csvtarget
      elif simplify(thecsv.source) == simplify(singularid):
        thepo.msgstr[0] = csvtarget
      elif simplify(thecsv.source) == simplify(pluralid):
        thepo.msgstr[1] = csvtarget
      else:
        print >>sys.stderr, "couldn't work out singular or plural: %r, %r, %r" %  \
          (thecsv.source, singularid, pluralid)
        self.unmatched += 1
        return
    else:
      thepo.msgstr = csvtarget

  def convertfile(self, thecsvfile):
    """converts a csvfile to a pofile, and returns it. uses templatepo if given at construction"""
    self.csvfile = thecsvfile
    if self.pofile is None:
      self.pofile = po.pofile()
      mergemode = False
    else:
      mergemode = True
    if self.pofile.units and self.pofile.units[0].isheader():
      headerpo = self.pofile.units[0]
      headerpo.msgstr = [line.replace("CHARSET", "UTF-8").replace("ENCODING", "8bit") for line in headerpo.msgstr]
    else:
      headerpo = self.pofile.makeheader(charset="UTF-8", encoding="8bit")
    headerpo.othercomments.append("# extracted from %s\n" % self.csvfile.filename)
    mightbeheader = True
    for thecsv in self.csvfile.units:
      if self.charset is not None:
        thecsv.source = thecsv.source.decode(self.charset)
        thecsv.target = thecsv.target.decode(self.charset)
      if mightbeheader:
        # ignore typical header strings...
        mightbeheader = False
        if [item.strip().lower() for item in thecsv.comment, thecsv.source, thecsv.target] == \
           ["comment", "original", "translation"]:
          continue
        if len(thecsv.comment.strip()) == 0 and thecsv.source.find("Content-Type:") != -1:
          continue
      if mergemode:
        self.handlecsvunit(thecsv)
      else:
        thepo = self.convertelement(thecsv)
        self.pofile.units.append(thepo)
    self.pofile.removeduplicates(self.duplicatestyle)
    return self.pofile

  def getmissing(self):
    """get the number of missing translations..."""
    # TODO: work out how to print out the following if in verbose mode
    missing = 0
    for thepo in self.pofile.units:
      if thepo.isblankmsgstr():
        missing += 1

def convertcsv(inputfile, outputfile, templatefile, charset=None, columnorder=None, duplicatestyle="keep"):
  """reads in inputfile using csvl10n, converts using csv2po, writes to outputfile"""
  inputcsv = csvl10n.csvfile(inputfile, fieldnames=columnorder)
  if templatefile is None:
    convertor = csv2po(charset=charset, duplicatestyle=duplicatestyle)
  else:
    templatepo = po.pofile(templatefile)
    convertor = csv2po(templatepo, charset=charset, duplicatestyle=duplicatestyle)
  outputpo = convertor.convertfile(inputcsv)
  if outputpo.isempty():
    return 0
  outputposrc = str(outputpo)
  outputfile.write(outputposrc)
  return 1

def main(argv=None):
  from translate.convert import convert
  formats = {("csv", "po"): ("po", convertcsv), ("csv", "pot"): ("po", convertcsv), 
             ("csv", None): ("po", convertcsv)}
  parser = convert.ConvertOptionParser(formats, usetemplates=True, description=__doc__)
  parser.add_option("", "--charset", dest="charset", default=None,
    help="set charset to decode from csv files", metavar="CHARSET")
  parser.add_option("", "--columnorder", dest="columnorder", default=None,
    help="specify the order and position of columns (source,source,target)")
  parser.add_duplicates_option()
  parser.passthrough.append("charset")
  parser.passthrough.append("columnorder")
  parser.run(argv)

About Koders | Resources | Downloads | Support | Black Duck | Terms of Service | DMCA | Privacy Policy | Contact Us