# --- T2-COPYRIGHT-NOTE-BEGIN --- # This copyright note is auto-generated by ./scripts/Create-CopyPatch. # # T2 SDE: package/.../exact-image/no-utf8-pdf.patch # Copyright (C) 2009 The T2 SDE Project # # More information can be found in the files COPYING and README. # # This patch file is dual-licensed. It is available under the license the # patched project is licensed under, as long as it is an OpenSource license # as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms # of the GNU General Public License as published by the Free Software # Foundation; either version 2 of the License, or (at your option) any later # version. # --- T2-COPYRIGHT-NOTE-END --- No half-way hocr 2 pdf writing in utf8, yet. Index: codecs/pdf.cc =================================================================== --- ./codecs/pdf.cc (revision 1516) +++ ./codecs/pdf.cc (revision 1515) @@ -711,12 +711,10 @@ c << "("; // parse string and use proper escape + // TODO: UTF-8 input parsing // TODO: Unicode mappings bool first_newline = true; - - // decode utf8, locally - std::vector utf8 = DecodeUtf8(text.c_str(), text.size()); - for (std::vector::const_iterator it = utf8.begin(); it != utf8.end(); ++it) + for (std::string::const_iterator it = text.begin(); it != text.end(); ++it) { switch (*it) { @@ -739,7 +737,7 @@ // just copy by default: default: - c << (char)*it; + c << *it; } }