Logo Search packages:      
Sourcecode: qpdf version File versions  Download package

QPDF_String.cc

#include <qpdf/QPDF_String.hh>

#include <qpdf/QUtil.hh>
// DO NOT USE ctype -- it is locale dependent for some things, and
// it's not worth the risk of including it in case it may accidentally
// be used.
#include <string.h>

// See above about ctype.
static bool is_iso_latin1_printable(unsigned char ch)
{
    return (((ch >= 32) && (ch <= 126)) || (ch >= 160));
}

QPDF_String::QPDF_String(std::string const& val) :
    val(val)
{
}

QPDF_String::~QPDF_String()
{
}

std::string
QPDF_String::unparse()
{
    return unparse(false);
}

std::string
QPDF_String::unparse(bool force_binary)
{
    bool use_hexstring = force_binary;
    if (! use_hexstring)
    {
      unsigned int nonprintable = 0;
      int consecutive_printable = 0;
      for (unsigned int i = 0; i < this->val.length(); ++i)
      {
          char ch = this->val[i];
          // Note: do not use locale to determine printability.  The PDF
          // specification accepts arbitrary binary data.  Some locales
          // imply multibyte characters.  We'll consider something
          // printable if it is printable in ISO-Latin-1.  We'll code
          // this manually rather than being rude and setting locale.
          if ((ch == 0) || (! (is_iso_latin1_printable(ch) ||
                         strchr("\n\r\t\b\f", ch))))
          {
            ++nonprintable;
            consecutive_printable = 0;
          }
          else
          {
            if (++consecutive_printable > 5)
            {
                // If there are more than 5 consecutive printable
                // characters, I want to see them as such.
                nonprintable = 0;
                break;
            }
          }
      }

      // Use hex notation if more than 20% of the characters are not
      // printable in the current locale.  Uniformly distributed random
      // characters will not pass this test even with ISO-Latin-1 in
      // which 76% are either printable or in the set of standard
      // escaped characters.
      if (5 * nonprintable > val.length())
      {
          use_hexstring = true;
      }
    }
    std::string result;
    if (use_hexstring)
    {
      result += "<";
      char num[3];
      for (unsigned int i = 0; i < this->val.length(); ++i)
      {
          sprintf(num, "%02x", (unsigned char) this->val[i]);
          result += num;
      }
      result += ">";
    }
    else
    {
      result += "(";
      char num[5];
      for (unsigned int i = 0; i < this->val.length(); ++i)
      {
          char ch = this->val[i];
          switch (ch)
          {
            case '\n':
            result += "\\n";
            break;

            case '\r':
            result += "\\r";
            break;

            case '\t':
            result += "\\t";
            break;

            case '\b':
            result += "\\b";
            break;

            case '\f':
            result += "\\f";
            break;

            case '(':
            result += "\\(";
            break;

            case ')':
            result += "\\)";
            break;

            case '\\':
            result += "\\\\";
            break;

            default:
            if (is_iso_latin1_printable(ch))
            {
                result += this->val[i];
            }
            else
            {
                sprintf(num, "\\%03o", (unsigned char)ch);
                result += num;
            }
            break;
          }
      }
      result += ")";
    }

    return result;
}

std::string
QPDF_String::getVal() const
{
    return this->val;
}

std::string
QPDF_String::getUTF8Val() const
{
    std::string result;
    unsigned int len = this->val.length();
    if ((len >= 2) && (len % 2 == 0) &&
      (this->val[0] == '\xfe') && (this->val[1] == '\xff'))
    {
      // This is a Unicode string using big-endian UTF-16.  This
      // code is not actually correct as it doesn't properly handle
      // characters past 0xffff.
      for (unsigned int i = 2; i < len; i += 2)
      {
          result += QUtil::toUTF8(((unsigned char) this->val[i] << 8) +
                            ((unsigned char) this->val[i+1]));
      }
    }
    else
    {
      for (unsigned int i = 0; i < len; ++i)
      {
          result += QUtil::toUTF8((unsigned char) this->val[i]);
      }
    }
    return result;
}

Generated by  Doxygen 1.6.0   Back to index