i18n.cpp

Go to the documentation of this file.
00001 /*
00002  * i18n.cpp
00003  *
00004  * Copyright (c) 2010 Thomas A. Vaughan
00005  * All rights reserved.
00006  *
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions are met:
00010  *     * Redistributions of source code must retain the above copyright
00011  *       notice, this list of conditions and the following disclaimer.
00012  *     * Redistributions in binary form must reproduce the above copyright
00013  *       notice, this list of conditions and the following disclaimer in the
00014  *       documentation and/or other materials provided with the distribution.
00015  *     * Neither the name of the <organization> nor the
00016  *       names of its contributors may be used to endorse or promote products
00017  *       derived from this software without specific prior written permission.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THOMAS A. VAUGHAN ''AS IS'' AND ANY
00020  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00021  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00022  * DISCLAIMED. IN NO EVENT SHALL THOMAS A. VAUGHAN BE LIABLE FOR ANY
00023  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00024  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00025  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00026  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00028  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  *
00030  *
00031  * Basic localized string management.  See i18n.h
00032  */
00033 
00034 // includes --------------------------------------------------------------------
00035 #include "i18n.h"               // always include our own header first
00036 
00037 #include "common/wave_ex.h"
00038 #include "datahash/datahash_text.h"
00039 #include "datahash/datahash_util.h"
00040 #include "perf/perf.h"
00041 #include "resources/resources-internal.h"
00042 #include "util/parsing.h"
00043 #include "util/string-buffer.h"
00044 
00045 
00046 // forward declared (specified in resource section of config file)
00047 const string_resource_t * getI18nResourceTable(void) throw();
00048 
00049 
00050 namespace i18n {
00051 
00052 // interface destructors
00053 Manager::~Manager(void) throw() { }
00054 
00055 
00056 struct two_character_map_t {
00057         enum eConstants {
00058                 eDimension      = 26,   //
00059                 eSize           = eDimension * eDimension
00060         };
00061 
00062         void clear(void) throw() {
00063                         base = 'a';
00064                         memset(map, 0, eSize);
00065                 }
00066 
00067         // WARNING: returns -1 for bad input
00068         int getIndex(IN const char * code) const throw() {
00069                         ASSERT(code, "null");
00070                         char c0 = *code;
00071                         char c1 = *(code + 1);
00072 
00073                         if (c0 < base || c0 >= base + eDimension) {
00074                                 DPRINTF("Bad first character: %s", code);
00075                                 return -1;
00076                         }
00077                         if (c1 < base || c1 >= base + eDimension) {
00078                                 DPRINTF("Bad second character: %s", code);
00079                                 return -1;
00080                         }
00081 
00082                         int offset = (c0 - base) * eDimension + (c1 - base);
00083                         return offset;
00084                 }
00085 
00086         bool isValid(IN const char * code) const throw() {
00087                         int index = this->getIndex(code);
00088                         if (index < 0 || !map[index]) {
00089                                 DPRINTF("Not a valid %s code: '%s'",
00090                                     type, code);
00091                                 return false;
00092                         }
00093                         return true;
00094                 }
00095 
00096         // data fields
00097         char            base;   // 'a' or 'A', typically
00098         byte_t          map[eSize];
00099         char            type[16];
00100 };
00101 
00102 
00103 static two_character_map_t s_countryCodes;
00104 static two_character_map_t s_languageCodes;
00105 
00106 static const char * s_resourceNamespace         = "i18n";
00107 
00108 
00109 ////////////////////////////////////////////////////////////////////////////////
00110 //
00111 //      static helper methods
00112 //
00113 ////////////////////////////////////////////////////////////////////////////////
00114 
00115 static void
00116 loadTable
00117 (
00118 IO two_character_map_t& table,
00119 IN char base,
00120 IN const char * name
00121 )
00122 {
00123         ASSERT(base, "bad character base");
00124         ASSERT(name, "null");
00125 
00126         table.clear();
00127         table.base = base;
00128 
00129         const char * value = getStringResource(s_resourceNamespace, name);
00130         ASSERT(value, "null");
00131 
00132 //      DPRINTF("Resource '%s':\n%s", name, value);
00133 
00134         // okay, now run through each line...
00135         while (*value) {
00136                 int offset = table.getIndex(value);
00137 //              DPRINTF("%c%c : index %d", value[0], value[1], offset);
00138 
00139                 table.map[offset] = 1;
00140 
00141                 // walk forward to end of line
00142                 while (*value && '\n' != *value) {
00143                         ++value;
00144                 }
00145                 if (*value)
00146                         ++value;        // skip newline character
00147         }
00148 }
00149 
00150 
00151 
00152 static void
00153 loadTables
00154 (
00155 void
00156 )
00157 throw()
00158 {
00159         // this static methods loads our static tables from linked-in resource
00160         //      files
00161         static bool s_loaded = false;
00162         if (s_loaded)
00163                 return; // already loaded!
00164         s_loaded = true;        // set flag
00165 
00166         // register our resources manually (TODO: get rid of this!)
00167         {
00168                 const string_resource_t * table = getI18nResourceTable();
00169                 ASSERT(table, "null");
00170 
00171                 AutoStringRegister unused(table);
00172         }
00173 
00174         //int nStrings = getStringResourceCount(s_resourceNamespace);
00175         //DPRINTF("Found %d resources in the i18n library", nStrings);
00176 
00177         strcpy(s_countryCodes.type, "country");
00178         strcpy(s_languageCodes.type, "language");
00179 
00180         loadTable(s_countryCodes, 'A', "country-codes.txt");
00181         loadTable(s_languageCodes, 'a', "language-codes.txt");
00182 }
00183 
00184 
00185 
00186 ////////////////////////////////////////////////////////////////////////////////
00187 //
00188 //      locale_t -- method implementations
00189 //
00190 ////////////////////////////////////////////////////////////////////////////////
00191 
00192 bool
00193 locale_t::isValid
00194 (
00195 void
00196 )
00197 const
00198 throw()
00199 {
00200         char buffer[eBufferSize];
00201         this->getLanguageCode(buffer);
00202 //      DPRINTF("Language code: %s", buffer);
00203         if (!isValidLanguageCode(buffer)) {
00204                 DPRINTF("Language code not valid: '%s'", buffer);
00205                 return false;
00206         }
00207 
00208         this->getCountryCode(buffer);
00209 //      DPRINTF("Country code: %s", buffer);
00210         if (!isValidCountryCode(buffer)) {
00211                 DPRINTF("Country code not valid: '%s'", buffer);
00212                 return false;
00213         }
00214 
00215         this->getEncoding(buffer);
00216 //      DPRINTF("Encoding: %s", buffer);
00217         if (!isValidEncoding(buffer)) {
00218                 DPRINTF("Encoding not valid: '%s'", buffer);
00219                 DPRINTF("  Only 'UTF-8' is supported for now!");
00220                 return false;
00221         }
00222 
00223         // got here?  All elements must be valid!
00224         return true;
00225 }
00226 
00227 
00228 
00229 ////////////////////////////////////////////////////////////////////////////////
00230 //
00231 //      Mgr -- class that implements the i18n::Manager interface
00232 //
00233 ////////////////////////////////////////////////////////////////////////////////
00234 
00235 class Mgr : public Manager {
00236 public:
00237         // constructor, destructor ---------------------------------------------
00238         ~Mgr(void) throw() { }
00239 
00240         // public class methods ------------------------------------------------
00241         void initialize(IN const char * locale);
00242 
00243         // i18n::Manager class interface methods -------------------------------
00244         const char * getLocale(void) const throw() { return m_locale.string; }
00245         void parseStrings(IO nstream::Stream * stream);
00246         void parseFolder(IN nstream::Folder * folder,
00247                                 IN const SetString * extensions,
00248                                 IN const char * filter);
00249         const char * getString(IN const char * id) const throw();
00250 
00251 private:
00252         // private typedefs ----------------------------------------------------
00253         struct string_record_t {
00254                 int     valueOffset;    // localized string
00255                 int     filenameOffset; // pointer to filename
00256         };
00257 
00258         typedef std::map<std::string, string_record_t> string_map_t;
00259         typedef std::map<std::string, int> filename_map_t;
00260 
00261         // private helper methods ----------------------------------------------
00262         int getFilenameOffset(IN const char * filename);
00263         const string_record_t * findString(IN const char * id) const throw();
00264         static nstream::eIterationFlag parseEntry(IN nstream::Entry * entry,
00265                                 IN void * context);
00266 
00267         // private member data -------------------------------------------------
00268         locale_t                m_locale;
00269         filename_map_t          m_filenames;
00270         smart_ptr<StringBuffer> m_sbuf; // list of all strings
00271         string_map_t            m_map;
00272 };
00273 
00274 
00275 
00276 void
00277 Mgr::initialize
00278 (
00279 IN const char * locale
00280 )
00281 {
00282         ASSERT(locale, "null");
00283 
00284         getLocaleFromString(locale, m_locale);
00285         ASSERT_THROW(m_locale.isValid(), "Invalid locale: " << locale);
00286 
00287         m_sbuf = StringBuffer::create();
00288         ASSERT(m_sbuf, "out of memory");
00289 }
00290 
00291 
00292 
00293 ////////////////////////////////////////////////////////////////////////////////
00294 //
00295 //      Mgr -- i18n::Manager class interface methods
00296 //
00297 ////////////////////////////////////////////////////////////////////////////////
00298 
00299 void
00300 Mgr::parseStrings
00301 (
00302 IN nstream::Stream * stream
00303 )
00304 {
00305         ASSERT(m_sbuf, "null");
00306         ASSERT(stream, "null");
00307 
00308         // get the File object associated with this stream
00309         smart_ptr<nstream::File> file = stream->getFile();
00310         ASSERT(file, "null file associated with named stream");
00311         const char * name = file->getName();
00312         //DPRINTF("Parsing file for strings: %s", name);
00313         int filenameOffset = this->getFilenameOffset(name);
00314 
00315         std::istream& instream = stream->getStream();
00316         ASSERT_THROW(!instream.bad(), "bad stream?");
00317 
00318         // keep parsing until eof or we hit "localeInfo"
00319         // NOTE: this isn't a simple hash structure!  We require "localeInfo"
00320         // to be the first token on the line, and we won't pay attention to
00321         // nesting of brackets etc.
00322         std::string line;
00323         eParseBehavior parse = eParse_Strip;
00324         while (instream.good()) {
00325                 line = getNextLineFromStream(instream, parse);
00326                 if (strncmp(line.c_str(), "localeInfo", 10))
00327                         continue;       // skip this line
00328 
00329                 // okay this is a locale ...maybe
00330                 const char * p = line.c_str();
00331                 p = expectFromString(p, "localeInfo", parse);
00332                 p = expectFromString(p, "{", parse);
00333 
00334                 // okay, read the rest as a hash
00335                 smart_ptr<Datahash> hash =
00336                     readHashFromStream("localeInfo", instream);
00337                 ASSERT(hash, "null");
00338 
00339                 // extract locale
00340                 const char * locale = ::getString(hash, "locale");
00341                 ASSERT_THROW(locale,
00342                     "No 'locale' specified in 'localeInfo' block?");
00343                 if (strcmp(locale, m_locale.getString())) {
00344                         DPRINTF("Skipping locale '%s'", locale);
00345                         continue;       // skip this block--wrong locale
00346                 }
00347 
00348                 // okay, this is our locale!
00349                 smart_ptr<Datahash> strings =
00350                     getSubhash(hash, "localizedStrings");
00351                 if (!strings)
00352                         break;  // no localized strings--skip
00353 
00354                 Datahash::iterator_t i;
00355                 strings->getIterator(i);
00356                 std::string key;
00357                 const hash_value_t * phv;
00358                 while ((phv = strings->getNextElementUnsafe(i, key))) {
00359                         if (eHashDataType_String != phv->type)
00360                                 continue;       // not a string--skip
00361 
00362                         const char * val = phv->text.c_str();
00363                         //DPRINTF("string %s --> '%s'", key.c_str(), val);
00364 
00365                         // already seen this key?
00366                         const string_record_t * psr =
00367                             this->findString(key.c_str());
00368                         ASSERT_THROW(!psr, "Duplicate string definition!  " <<
00369                             "String id = '" << key << "'.  Previous definition "
00370                             << "in file " <<
00371                             m_sbuf->getString(psr->filenameOffset) << ", second"
00372                             << " definition in file " <<
00373                             m_sbuf->getString(filenameOffset));
00374 
00375                         string_record_t sr;
00376                         sr.filenameOffset = filenameOffset;
00377                         sr.valueOffset = m_sbuf->appendString(val);
00378                         m_map[key] = sr;
00379                 }
00380         }
00381         //DPRINTF("%d strings in map", (int) m_map.size());
00382 }
00383 
00384 
00385 
00386 void
00387 Mgr::parseFolder
00388 (
00389 IN nstream::Folder * folder,
00390 IN const SetString * extensions,
00391 IN const char * filter
00392 )
00393 {
00394         ASSERT(m_sbuf, "null");
00395         ASSERT(folder, "null");
00396         // ASSERT(extensions) -- can be null!
00397         // ASSERT(filter) -- can be null!
00398 
00399         // recursively walk the folder
00400         bool visitHidden = false; // skip entries beginning with '.'
00401         walkChildFolders(folder, parseEntry, this, extensions, filter,
00402             visitHidden);
00403 }
00404 
00405 
00406 
00407 const char *
00408 Mgr::getString
00409 (
00410 IN const char * id
00411 )
00412 const
00413 throw()
00414 {
00415         ASSERT(id, "null");
00416 
00417         const string_record_t * psr = this->findString(id);
00418         if (!psr)
00419                 return NULL;
00420 
00421         ASSERT(m_sbuf, "null");
00422         return m_sbuf->getString(psr->valueOffset);
00423 }
00424 
00425 
00426 
00427 ////////////////////////////////////////////////////////////////////////////////
00428 //
00429 //      Mgr -- private helper methods
00430 //
00431 ////////////////////////////////////////////////////////////////////////////////
00432 
00433 int
00434 Mgr::getFilenameOffset
00435 (
00436 IN const char * filename
00437 )
00438 {
00439         ASSERT(filename, "null");
00440 
00441         // is this already in our set?
00442         filename_map_t::iterator i = m_filenames.find(filename);
00443         if (m_filenames.end() != i) {
00444                 return i->second;
00445         }
00446 
00447         // not already encountered--add it!
00448         int offset = m_sbuf->appendString(filename);
00449         m_filenames[filename] = offset;
00450         return offset;
00451 }
00452 
00453 
00454 
00455 const Mgr::string_record_t *
00456 Mgr::findString
00457 (
00458 IN const char * id
00459 )
00460 const
00461 throw()
00462 {
00463         ASSERT(id, "null");
00464 
00465         string_map_t::const_iterator i = m_map.find(id);
00466         if (m_map.end() == i)
00467                 return NULL;    // no such string
00468         return &i->second;
00469 }
00470 
00471 
00472 
00473 nstream::eIterationFlag
00474 Mgr::parseEntry
00475 (
00476 IN nstream::Entry * entry,
00477 IN void * context
00478 )
00479 {
00480         ASSERT(entry, "null");
00481         Mgr * pThis = (Mgr *) context;
00482         ASSERT(pThis, "null context?");
00483 
00484         nstream::File * file = dynamic_cast<nstream::File *>(entry);
00485         if (!file)
00486                 return nstream::eIterate_Continue;
00487 
00488         // okay, this is a file we care about!
00489         smart_ptr<nstream::Stream> stream = file->openStream();
00490         ASSERT_THROW(stream, "failed to open stream");
00491 
00492         pThis->parseStrings(stream);
00493         return nstream::eIterate_Continue;
00494 }
00495 
00496 
00497 
00498 ////////////////////////////////////////////////////////////////////////////////
00499 //
00500 //      Public APIs
00501 //
00502 ////////////////////////////////////////////////////////////////////////////////
00503 
00504 smart_ptr<Manager>
00505 Manager::create
00506 (
00507 IN const char * locale
00508 )
00509 {
00510         ASSERT(locale, "null");
00511 
00512         smart_ptr<Mgr> local = new Mgr;
00513         ASSERT(local, "out of memory");
00514 
00515         local->initialize(locale);
00516 
00517         return local;
00518 }
00519 
00520 
00521 
00522 void
00523 getLocaleFromString
00524 (
00525 IN const char * localeString,
00526 OUT locale_t& locale
00527 )
00528 {
00529         ASSERT(localeString, "null");
00530         locale.clear();
00531 
00532         ASSERT_THROW(strlen(localeString) <= locale_t::eMaxLength,
00533             "Locale string is too long: " << localeString);
00534 
00535         strcpy(locale.string, localeString);
00536 
00537         ASSERT_THROW(locale.isValid(),
00538             "Locale string is not valid: " << localeString);
00539 }
00540 
00541 
00542 
00543 bool
00544 isValidCountryCode
00545 (
00546 IN const char * code
00547 )
00548 {
00549         ASSERT(code, "null");
00550 
00551         loadTables();
00552 
00553         return s_countryCodes.isValid(code);
00554 }
00555 
00556 
00557 
00558 bool
00559 isValidLanguageCode
00560 (
00561 IN const char * code
00562 )
00563 {
00564         ASSERT(code, "null");
00565 
00566         loadTables();
00567 
00568         return s_languageCodes.isValid(code);
00569 }
00570 
00571 
00572 
00573 bool
00574 isValidEncoding
00575 (
00576 IN const char * encoding
00577 )
00578 {
00579         ASSERT(encoding, "null");
00580 
00581         // only support 'UTF-8' for now!
00582         return (!strcmp(encoding, "UTF-8"));
00583 }
00584 
00585 
00586 
00587 const char *
00588 getString
00589 (
00590 IN const Manager * mgr,
00591 IN const char * id
00592 )
00593 {
00594         ASSERT(id, "null");
00595         ASSERT(mgr, "null");
00596 
00597         const char * val = mgr->getString(id);
00598         if (val)
00599                 return val;
00600 
00601         // string wasn't found!  Return a temp
00602         const int bufsize = 256;
00603         static char buffer[bufsize];
00604 
00605         snprintf(buffer, bufsize,
00606             "Missing a localized string (id='%s') for locale '%s'",
00607             id, mgr->getLocale());
00608 
00609         return buffer;
00610 }
00611 
00612 
00613 
00614 const char *
00615 getHostLocale
00616 (
00617 void
00618 )
00619 {
00620         const char * val = getenv("LANG");
00621         if (val)
00622                 return val;
00623         DPRINTF("Unable to determine host's locale!");
00624         return NULL;
00625 }
00626 
00627 
00628 
00629 };      // i18n namespace
00630