/***************************************************************************
                old_richtextparser.cpp - The old MSN and Plus parser
                             -------------------
    begin                : April 30, 2008
    copyright            : (C) 2008 by Valerio Pilo
    email                : valerio@kmess.org
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#include "old_richtextparser.h"

#include "../src/contact/contactbase.h"
#include "../src/utils/kmessshared.h"
#include "../src/currentaccount.h"
#include "../src/emoticonmanager.h"
#include "../src/kmessdebug.h"

#include <math.h>

#include <QColor>
#include <QRegExp>
#include <QTextDocument>
#include <QUrl>

#include <KLocale>
#include <KStandardDirs>




// Initialize the list of MSN Plus colors
// Note that a whole lot of colors are still missing from the list
QStringList OldRichTextParser::predefinedColors_(
  QStringList() << "FFFFFF" << "000000" << "00007F" << "009300" << "FF0000"
                << "7F0000" << "9C009C" << "FC7F00" << "FFFF00" << "00FC00"
                << "009393" << "00FFFF" << "2020FC" << "FF00FF" << "7F7F7F"
                << "D2D2D2" << "E7E6E4" << "CFCDD0" << "FFDEA4" << "FFAEB9"
                << "FFA8FF" << "B4B4FC" << "BAFBE5" << "C1FFA3" << "FAFDA2"
                << "B6B4D7" << "A2A0A1" << "F9C152" << "FF6D66" << "FF62FF"
                << "6C6CFF" << "68FFC3" << "8EFF67" << "F9FF57" << "858482"
                << "6E6B7D" << "FFA01E" << "F92611" << "FF20FF" << "202BFF"
                << "1EFFA5" << "60F913" << "FFF813" << "5E6464" << "4B494C"
                << "D98812" << "EB0505" << "DE00DE" << "0000D3" << "03CC88"
                << "59D80D" << "D4C804" << "000268" << "18171C" << "944E00"
                << "9B0008" << "980299" << "01038C" << "01885F" << "389600"
                << "9A9E15" << "473400" << "4D0000" << "5F0162" << "000047"
                << "06502F" << "1C5300" << "544D05" );
// Initialize the qhash maps
QHash<QString, QString> OldRichTextParser::cleanedStringsCache_;
QHash<QString, QString> OldRichTextParser::formattedStringsCache_;
// Initialize the MSN Plus tag matching regexp
QRegExp                 OldRichTextParser::colorMatch_( "\\[(c|a)=(#?[0-9a-z]+)\\](.*)\\[/\\1(?:=(#?[0-9a-z]+))?\\]",
                                                     Qt::CaseInsensitive );
// Set the id counter for emoticons not yet received
int     OldRichTextParser::lastPendingEmoticonId_ = 0;
// Set the pointer to the instance of the Emoticon Manager
EmoticonManager *OldRichTextParser::emoticonManager_ = 0;



// Return the given string with MSN Plus! formatting stripped out
void OldRichTextParser::getCleanString( QString &string )
{
  // First check if the string does not need modification
  if( ! string.contains( "[" ) )
  {
    return;
  }

  // Check if the string is already in cache
  if( cleanedStringsCache_.contains( string ) )
  {
    string = cleanedStringsCache_.value( string );
    return;
  }

  const QString originalString = string;

  string.replace(  "[b]", "", Qt::CaseInsensitive )
        .replace( "[/b]", "", Qt::CaseInsensitive )
        .replace(  "[i]", "", Qt::CaseInsensitive )
        .replace( "[/i]", "", Qt::CaseInsensitive )
        .replace(  "[u]", "", Qt::CaseInsensitive )
        .replace( "[/u]", "", Qt::CaseInsensitive )
        .replace(  "[s]", "", Qt::CaseInsensitive )
        .replace( "[/s]", "", Qt::CaseInsensitive );

  string.replace( QRegExp( "\\[/?(c|a)(=#?[0-9a-z,]+)?\\]", Qt::CaseInsensitive ), "" );

#ifdef KMESSDEBUG_RICHTEXTPARSER
  kDebug() << "Original:" << originalString;
  kDebug() << "Parsed:"   << string;
#endif

  // Add this to the cache
  cleanedStringsCache_.insert( originalString, string );

  // Keep the queue size to the maximum allowed length
  if( cleanedStringsCache_.count() > OLD_MSN_PLUS_STRINGCACHESIZE )
  {
    cleanedStringsCache_.remove( cleanedStringsCache_.constBegin().key() );
  }
}



// Return the given string with MSN Plus! formatting parsed
void OldRichTextParser::getFormattedString( QString &string )
{
  // First check if the string does not need modification
  if( ! string.contains( "[" ) )
  {
    return;
  }

  // Check if the string is already in cache
  if( formattedStringsCache_.contains( string ) )
  {
    string = formattedStringsCache_.value( string );
    return;
  }

  const QString originalString = string;

  string.replace(  "[b]",  "<b>", Qt::CaseInsensitive )
        .replace( "[/b]", "</b>", Qt::CaseInsensitive )
        .replace(  "[i]",  "<i>", Qt::CaseInsensitive )
        .replace( "[/i]", "</i>", Qt::CaseInsensitive )
        .replace(  "[u]",  "<u>", Qt::CaseInsensitive )
        .replace( "[/u]", "</u>", Qt::CaseInsensitive )
        .replace(  "[s]",  "<s>", Qt::CaseInsensitive )
        .replace( "[/s]", "</s>", Qt::CaseInsensitive );

  while( colorMatch_.indexIn( string ) != -1 )
  {
    bool isForeground = ( colorMatch_.cap( 1 ).toLower() == "c" );

    // match a solid color
    if( colorMatch_.cap( 4 ).isEmpty() )
    {
      string.replace( colorMatch_.pos(), colorMatch_.matchedLength(),
                      "<span style='" + QString( isForeground ? "color" : "background-color" ) + ":" +
                      getHtmlColor( colorMatch_.cap( 2 ) ) + ";'>" + colorMatch_.cap( 3 ) + "</span>" );
    }
    // Match a foreground color gradient
    else if( isForeground )
    {
      string.replace( colorMatch_.pos(), colorMatch_.matchedLength(),
                      getHtmlGradient( colorMatch_.cap( 3 ), colorMatch_.cap( 2 ), colorMatch_.cap( 4 ) ) );

    }
    // Match a background color gradient
    else
    {
      string.replace( colorMatch_.pos(), colorMatch_.matchedLength(),
                      "<span style='background-color:qlineargradient(x1:0,y1:0,x2:1,y2:0,"
                      "stop:0 " + getHtmlColor( colorMatch_.cap( 2 ) ) + ",stop:1 " +
                      getHtmlColor( colorMatch_.cap( 4 ) ) + ");'>" + colorMatch_.cap( 3 ) + "</span>" );
    }
  }

  // Add the parsed string in a tag which is not usually used elsewhere: this drastically reduces parsing problems
  // originated by, for example, missing closing tags
  string = "<font>" + string + "</font>";

#ifdef KMESSDEBUG_RICHTEXTPARSER
  kDebug() << "Original:" << originalString;
  kDebug() << "Parsed:"   << string;
#endif

  // Add this to the cache
  formattedStringsCache_.insert( originalString, string );

  // Keep the queue size to the maximum allowed length
  if( formattedStringsCache_.count() > OLD_MSN_PLUS_STRINGCACHESIZE )
  {
    formattedStringsCache_.remove( formattedStringsCache_.constBegin().key() );
  }
}



// Turns color codes (english color names, RGB triplets, MSN Plus! palette colors) into an HTML RGB color code
QString OldRichTextParser::getHtmlColor( const QString& color )
{
  bool convertedOk;
  int colorNum = color.toInt( &convertedOk );

  // Find colors in the MSN Plus! palette
  if( convertedOk )
  {
    if( colorNum < predefinedColors_.size() )
    {
      return "#" + predefinedColors_[ colorNum ];
    }
    else
    {
      return "#000000";
    }
  }

  // Find HTML (#RRGGBB/#RGB) or CSS (red,blue) color codes
  int colorLength = color.length();
  if( colorLength > 0 && color[0] == '#' && colorLength != 4 && colorLength != 7 )
  {
    // Pad with zeros incomplete RGB triplets
    QString paddedColor( color );
    paddedColor.append( QString().fill( '0', 7 - colorLength ) );
    QColor cssOrHtmlColor( paddedColor );
    if( cssOrHtmlColor.isValid() )
    {
      return cssOrHtmlColor.name();
    }
  }
  else
  {
    QColor cssOrHtmlColor( color );
    if( cssOrHtmlColor.isValid() )
    {
      return cssOrHtmlColor.name();
    }
  }

  return "#000000";

/*
  // These are not used with MSN+ Live, though are still parsed for backwards compatibility

  // Find arbitrary RGB triplets
  if( color.contains( "," ) )
  {
    QStringList rgb = color.split( ",", QString::KeepEmptyParts );
    QColor rgbColor( rgb[0].toInt(), rgb[1].toInt(), rgb[2].toInt() );

    if( rgbColor.isValid() )
    {
      return rgbColor.name();
    }
  }
*/
}



// Turns a string into a gradient colored one, using Qt HTML tags
QString OldRichTextParser::getHtmlGradient( const QString& text, const QString& startColor, const QString& endColor )
{
  QColor start( getHtmlColor( startColor ) );
  QColor end  ( getHtmlColor( endColor   ) );

  // Skip gradienting wrong colors or empty strings
  if( ! start.isValid() || ! end.isValid() || text.isEmpty() )
  {
    return text;
  }

  QList<int>   tags;
  QChar        character;
  QColor       current = start;
  QString      outputText;
  unsigned int indexGradient = 0;
  int          indexFullString = 0;
  unsigned int levels = text.length();
  int          differenceRed, differenceGreen, differenceBlue, tempRed, tempGreen, tempBlue;
  QRegExp      looseTagsMatch( "<[^>]+>|&[a-z]+;|&#[0-9]+;|\\[/?[ca](=#?[0-9a-z]+)?\\]", Qt::CaseInsensitive );

#ifdef KMESSDEBUG_RICHTEXTPARSER
  kDebug() << "Text to gradient:" << text;
#endif

  // Search in the text all the tags to skip, and store their positions for faster parsing
  while( looseTagsMatch.indexIn( text, indexFullString, QRegExp::CaretAtOffset ) > -1 )
  {
    int pos = looseTagsMatch.pos( 0 );
    int len = looseTagsMatch.matchedLength();

#ifdef KMESSDEBUG_RICHTEXTPARSER
    kDebug() << "Adding tag to skip at pos" << pos << "(len" << len << "):" << looseTagsMatch.cap( 0 );
#endif

    // Add position and length of the tag to allow skipping it later
    tags << pos;
    tags << len;

    // Continue searching after the tag
    indexFullString = pos + len;

    // Subtract from the gradient size the tag length: it will be output as one single char
    levels -= len;
  }

  // Calculate the RGB difference between the starting and ending color
  differenceRed   = (int)floor( (float)( start.red  () - end.red  () ) / (float)levels );
  differenceGreen = (int)floor( (float)( start.green() - end.green() ) / (float)levels );
  differenceBlue  = (int)floor( (float)( start.blue () - end.blue () ) / (float)levels );

#ifdef KMESSDEBUG_RICHTEXTPARSER
  kDebug() << "Size is" << text.length() <<"(" << levels << "stripped) -"
           << "Colored from" << start.name() << "to" << end.name() << ", difference: ("
           << differenceRed << "," << differenceGreen << "," << differenceBlue << ")";
#endif

  // When the start and end colors are the same, don't waste time creating a gradient with only one color
  if( differenceRed == 0 && differenceGreen == 0 && differenceBlue == 0 )
  {
#ifdef KMESSDEBUG_RICHTEXTPARSER
    kDebug() << "The gradient colors are equal, converting it to solid color.";
#endif

    return "<span style=\"color:" + start.name() + ";\">" + text + "</span>";
  }

  // Proceed through the entire original string
  for( indexFullString = 0; indexFullString < text.length(); ++indexFullString )
  {
    // Get the new values for the current gradient character
    tempRed   = start.red  () - ( differenceRed   * indexGradient );
    tempGreen = start.green() - ( differenceGreen * indexGradient );
    tempBlue  = start.blue () - ( differenceBlue  * indexGradient );

    // The values may get out of the limits, and since setRgb() voids the whole RGB color if one of the values is
    // out of range, we must assure them to be always in range
    current.setRgb( tempRed   < 0 ? 0 : ( tempRed   > 255 ? 255 : tempRed   ),
                    tempGreen < 0 ? 0 : ( tempGreen > 255 ? 255 : tempGreen ),
                    tempBlue  < 0 ? 0 : ( tempBlue  > 255 ? 255 : tempBlue  ) );

    // Match the formatting tags: they must be skipped
    if( tags.count() > 0 && tags.first() == indexFullString )
    {
      // Remove the tag position and size from the list
      tags.takeFirst();
      int matchLength = tags.takeFirst();

      // Output the whole tag as a single color
      outputText += "<span style=\"color:" + current.name() + ";\">" + text.mid( indexFullString, matchLength ) + "</span>";

#ifdef KMESSDEBUG_RICHTEXTPARSER
      kDebug() << "Outputting whole tag:" << text.mid( indexFullString, matchLength );
#endif

      // Move the index on to the tag end
      indexFullString += matchLength - 1;
      continue;
    }

    // Get the next character to colorize
    character = text[ indexFullString ];

    // Use the <font> to save characters
    outputText += "<font color='" + current.name() + "'>" + character + "</font>";

    indexGradient++;
  }

  return outputText;
}



/**
 * @brief Initialize the class
 */
void OldRichTextParser::initialize()
{
  colorMatch_.setMinimal( true );
  emoticonManager_ = EmoticonManager::instance();
}



/**
 * @brief Replace the very old Messenger Plus characters with HTML markup
 *
 * Replace the oldest MSN+ tags with HTML-compatible formatting. This kind of formatting
 * method uses reserved characters - thing which may cause i18n problems - but many MSN+
 * commands (most notably, the predefined messages with sound) still use it.
 *
 * @param text  The text to format. The original string will be lost.
 */
void OldRichTextParser::parseMsnPlusString( QString &text )
{
  bool boldFlag      = false;
  bool italicFlag    = false;
  bool underlineFlag = false;
  bool fontFlag      = false;
  QColor  color;

  QRegExp htmlTest( "^\x04""&#?[a-z0-9]+;" );
  QRegExp fontCapture = QRegExp( "^\x03""([0-9]{1,2})(,([0-9]{1,2}))?" );

  for( int index = 0; index < text.length(); index++ )
  {
    switch( text.at( index ).unicode() )
    {
      case 0x0002: // bold character
        boldFlag = !boldFlag;

        text = text.replace( index, 1, ( boldFlag ) ? "<b>"  : "</b>" );
        index += ( boldFlag ) ? 2 : 3; // Skip the characters we've just added
        break;

        case 0x0003: // color character
          fontFlag = !fontFlag;

          fontCapture.indexIn( text, index, QRegExp::CaretAtOffset );

          color = getHtmlColor( fontCapture.cap(1) );

          // Font background text is ignored, as it's impossible to render in Qt's HTML subset
          if( fontCapture.matchedLength() == -1 )
          {
          // No color found after the special character, close the html tag
            text = text.replace( index, 1, "</font>" );
            index += 6; // Skip the characters we've just added
          }
          else
          {
          // Font color open
            text = text.replace( index, fontCapture.matchedLength(), "<font color='" + color.name() + "'>" );
            index += 21; // Skip the characters we've just added
          }
          break;

      case 0x0004:
        // Sound tag: this character is followed by another which identifies the sound ID
        htmlTest.indexIn( text, index, QRegExp::CaretAtOffset );
        if( htmlTest.matchedLength() != -1 )
        {
          // Some sounds IDs are HTML entities: that has to be taken care of, too
          text = text.replace( index, htmlTest.matchedLength(), "" );
        }
        else
        {
          // we need to delete this character and the following one from the string
          text = text.replace( index, 2, "" );
        }

        // Restart from where we encountered the starting character
        index -= 1;
        break;

        case 0x0005: // italic character
          italicFlag = !italicFlag;

          text = text.replace( index, 1, ( italicFlag ) ? "<i>"  : "</i>" );
          index += ( italicFlag ) ? 2 : 3; // Skip the characters we've just added
          break;

          case 0x001f: // underline character
            underlineFlag = !underlineFlag;

            text = text.replace( index, 1, ( underlineFlag ) ? "<u>"  : "</u>" );
            index += ( underlineFlag ) ? 2 : 3; // Skip the characters we've just added
            break;
    }

  }

  // Close any tag still open. Hopefully, the parser will not complain too much if the closing order is wrong
  if( boldFlag )  text.append( "</b>" );
  if( italicFlag )  text.append( "</i>" );
  if( underlineFlag )  text.append( "</u>" );
  if( fontFlag )  text.append( "</font>" );
}



/**
 * Transform a string into its rich text form
 *
 * This is the one-stop shop for text parsing. This method is capable of single-pass conversion of
 * many things in their rich text equivalents:
 * - web links are made clickable, even geek-style "kmess.org" links and email addresses
 * - emoticons shortcuts are transformed in HTML image tags, even custom ones, and not yet received ones
 * - MSN Plus formatting is turned into HTML formatting
 *
 * @param text                 The string to parse
 * @param showEmoticons        Whether to show MSN emoticons in the parsed string
 * @param showSmallEmoticons     Whether the emoticons should be full-size or small
 * @param showLinks            Whether to enable clickable links and email addresses
 * @param showFormatting       Whether to show or strip away MSN+ formatting tags
 * @param handle               If not null or empty, custom emoticons of this contact will be parsed
 * @param pendingEmoticonTags  If the handle is specified, this must be too: this is a list of pending
 *                             custom emoticons which the contact specified by <code>handle</code> has sent to us.
 */
void OldRichTextParser::parseMsnString( QString &text, bool showEmoticons, bool showSmallEmoticons, bool showLinks, bool showFormatting, const QString &handle, QStringList &pendingEmoticonTags )
{
  KMessShared::htmlEscape( text );

  // Links and emoticons are replaced in one loop cycle, traversing the message text
  // Multiple search-replace cyles give unwanted side effects:
  // - smileys can pop up in links like ftp://user:pass@host/ and https://host
  // - emoticon replacements could be replaced by another cycle

  bool                         allowAddingEmoticons = false;

  // Build a collection of all emoticon data
  const QRegExp               &emoticonRegExp       = emoticonManager_->getHtmlPattern();
  const QHash<QString,QString> &emoticonReplacements = emoticonManager_->getHtmlReplacements( showSmallEmoticons );


  QString code;
  QRegExp customRegExp;
  QRegExp pendingRegExp;
  QHash<QString,QString> customReplacements;
  QRegExp userCustomEmoticons;
  QStringList customEmoticonsBlacklist;

  // Get theme of custom emoticons
  if( &handle != 0 && ! handle.isEmpty() )
  {
    // Avoid problems if no list of pending emoticons has been given
    if( &pendingEmoticonTags == 0 )
    {
      kWarning() << "The given pending emoticons list is not valid!";
      pendingEmoticonTags = QStringList();
    }

    if( handle == CurrentAccount::instance()->getHandle() )
    {
        customRegExp       = emoticonManager_->getHtmlPattern( true );
        customReplacements = emoticonManager_->getHtmlReplacements( showSmallEmoticons, true );
        // We already have all of our emoticons, there are no pending ones
    }
    else
    {
      const ContactBase *contact = CurrentAccount::instance()->getContactByHandle( handle );
      if( contact != 0 )
      {
        customRegExp       = contact->getEmoticonPattern();
        customReplacements = contact->getEmoticonReplacements();
        pendingRegExp      = contact->getPendingEmoticonPattern();
        customEmoticonsBlacklist = contact->getEmoticonBlackList();

        allowAddingEmoticons = true;
        userCustomEmoticons = emoticonManager_->getHtmlPattern( true ); // Used to verify if we've already added an emoticon
      }
    }
  }


  QRegExp linkRegExp;
  linkRegExp.setPattern( "\\b(?:http://|https://|ftp://|sftp://|www\\.)"  // match protocol string
                         "[^ \r\n]+"                                      // followed by the host/path
                       );

  QRegExp emailRegExp;
  emailRegExp.setPattern(
                          "\\b("                   // begin of word, start capture
                          "[a-z0-9_\\-\\.]+"       // match email username
                          "\\@"                    // match '@'
                          "[a-z0-9\\-\\.]+"        // match domain hostname
                          "\\.[a-z0-9]{2,6}"       // match top-level-domain
                          ")"                      // end capture`
                          "(?:[^a-zA-Z0-9\\-]|$)"  // not followed by more simple characters, or should find an end-of-line
                        );

  QRegExp geekLinkRegExp;
  geekLinkRegExp.setPattern(
                             "(^|\\b)"                  // look-before test, for start of capture or word delimiter
                             "("                        // begin of word, start capture
                             "([a-z0-9\\-]+\\.)+"       // match simple characters, but it should contain a dot between each part
                             "([a-z]{2,3})"             // finally match domain part 2 or 3 characters
                             "(/[a-z0-9\\-_/\\.?=&]+)?" // match the path on the server and simple query requests
                             ")"                        // end capture
                             "(?:[^a-zA-Z0-9]|$)"       // not followed by more simple characters, or should find an end-of-line
                           );

  QRegExp longWordsRegExp( "([\\w\\d-_\\.]{30})([\\w\\d-_\\.]+)" );
  QRegExp punctuationChars( "(?:[.,;!?\"'])$" );
  QRegExp invalidCcTld( "^(js|hh|cc|ui|fo|so|ko|qt|pp|cf|am|in|gz|ps|ai|rv|rm|wm)$" ); // block typical files instead of listing the whole country code list
  QRegExp topLevelDomain( "^(?:com|org|net|edu|gov)$" );

#ifdef KMESSTEST
  KMESS_ASSERT( emoticonRegExp.isValid() );
  KMESS_ASSERT( emailRegExp.isValid() );
  KMESS_ASSERT( linkRegExp.isValid() );
  KMESS_ASSERT( geekLinkRegExp.isValid() );
  KMESS_ASSERT( longWordsRegExp.isValid() );
#endif

  // Set the filename of the placeholder image for pending emoticons
  static QString pendingEmoticonPlaceholder( KGlobal::dirs()->findResource( "appdata", "pics/empty.png" ) );

  static const int REGEXP_COUNT = 7;
  const QRegExp* regexps[REGEXP_COUNT];

  // Allow to overwrite standard emoticons by parsing custom emoticons before standard ones
  regexps[0] = ( ( showEmoticons && ! customRegExp  .isEmpty() ) ? &customRegExp   : 0 );
  regexps[1] = ( ( showEmoticons && ! pendingRegExp .isEmpty() ) ? &pendingRegExp  : 0 );
  regexps[2] = ( ( showEmoticons && ! emoticonRegExp.isEmpty() ) ? &emoticonRegExp : 0 );
  regexps[3] = ( showLinks ? &linkRegExp : 0 );
  regexps[4] = ( showLinks ? &emailRegExp : 0 );
  regexps[5] = ( showLinks ? &geekLinkRegExp : 0 );
  regexps[6] = ( &longWordsRegExp );

  QString replacement;
  int lastPos = 0;
  int matches[ REGEXP_COUNT ];
  memset( matches, -1, sizeof( matches ) );

  while( true )
  {
    // Find out which expression matches first
    int matchedRegExp = -1;
    int matchStart    = -1;
    int matchedLength = 0;
    for(int i = 0; i < REGEXP_COUNT; i++)
    {
      if( regexps[ i ] == 0 )
      {
        continue;
      }

      matches[ i ] = regexps[ i ]->indexIn( text, lastPos );
      if( matches[ i ] == -1 || (int) text.length() < matches[ i ] )
      {
        continue;
      }

#ifdef KMESSDEBUG_RICHTEXTPARSER
      kDebug() << "regexp " << i << " matches at character " << matches[ i ];
#endif

      // See if it's before all other regexps
      if( matches[ i ] < matchStart || matchStart == -1 )
      {
        matchStart    = matches[ i ];
        matchedRegExp = i;
        matchedLength = regexps[ i ]->matchedLength();
      }
    }


    QString link;
    QString linkBefore;
    QString code;
    QString altText;
    QString placeholderId;

    // Determine the replacement for the matched expression
    switch( matchedRegExp )
    {

      // Found a custom emoticon
      case 0:
        code = text.mid( matchStart, customRegExp.matchedLength() );  // cap(0) is not const

        // Avoid replacing invalid emoticons with nothing
        if( ! customReplacements.contains( code ) )
        {
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kWarning() << "Emoticon replacement for '" << code << "' not found!";
#endif
          replacement = code;
          break;
        }

        // Do not display unwanted emoticons
        if( customEmoticonsBlacklist.contains( code ) )
        {
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kDebug() << "Ignoring blacklisted emoticon" << code;
#endif
          replacement = code;
          break;
        }

        replacement = customReplacements[ code ];

        // This emoticon is unknown, allow the user to add it by adding an internal KMess link to it
        if( allowAddingEmoticons && userCustomEmoticons.indexIn( code ) == -1 )
        {
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kDebug() << "Inserting emoticon addition link:" << code;
#endif
          QString imagePath( replacement );
          QString urlCode( QUrl::toPercentEncoding( code ) );

          // Retrieve the image name from the replacement
          imagePath = imagePath.replace( "\"", "'" );
          imagePath = imagePath.mid( imagePath.indexOf( "src='" ) + 5 );
          imagePath = imagePath.mid( 0, imagePath.indexOf( "'" ) );

          // i18n() will unescape the string: Without this, an emoticon like " 'test " will result in
          // an attribute like this: " title='Add this emoticon: 'test' ", messing up the whole markup
          QString escapedCode( code );
          KMessShared::htmlEscape( escapedCode );

          // The name attribute is required as, if the user adds the emoticon, we'll want to make all links like this unclickable
          replacement = "<a name='newEmoticon_" + urlCode
                        + "' title='" + i18n( "Add this emoticon: %1", escapedCode )
                        + "' href='kmess://emoticon/" + handle + "/" +  urlCode + "/" + QUrl::toPercentEncoding( imagePath )
                        + "'>"
                        + replacement
                        + "</a>";
        }
#ifdef KMESSDEBUG_RICHTEXTPARSER
        else
        {
          kDebug() << "Allow adding emoticons:" << allowAddingEmoticons << "code:" << code;
        }
#endif
        break;


      // Found a custom emoticon, but the image file is still being downloaded
      // Generate a placeholder tag, <img src="empty.png">, and update this tag later when the emoticon is received
      case 1:
        // Generate and store placeholder ID
        placeholderId = "ce" + QString::number( ++lastPendingEmoticonId_ );
        pendingEmoticonTags.append( placeholderId );

        // Insert placeholder
        code = text.mid( matchStart, pendingRegExp.matchedLength() );  // cap(0) is not const
        replacement = "<img id='" + placeholderId
                    + "' src='" + Qt::escape( pendingEmoticonPlaceholder )

                    + "' alt='" + code
                    + "' contact='" + Qt::escape( handle )
                    + "' width='16' height='16' valign='middle' class='customEmoticonPlaceholder' />";
        break;


      // Found an emoticon
      case 2:
        code = text.mid( matchStart, emoticonRegExp.matchedLength() );  // cap(0) is not const
        if( emoticonReplacements.contains( code ) )
        {
          replacement = emoticonReplacements[ code ];
        }
        else
        {
          // HACK: Replace with the same string, to skip the entire code and continue parsing after it
          // See EmoticonTheme::updateCache()
          replacement = code;
#ifdef KMESSDEBUG_RICHTEXTPARSER
          kDebug() << "Skipping unmatched code '" << code << "'";
#endif
        }

        break;


      // Found a link
      case 3:
        // When www. is found, automatically add http:// to the href
        // This doesn't clash, because http:// links are matched earlier
        link = linkRegExp.cap(0);
        if( ! link.isEmpty() )
        {
          // filter out puntuation char
          matchedLength = link.length();
          if(  punctuationChars.exactMatch(link)                // matches standard chars at end
          || ( link.endsWith(")") && ! link.contains("(") ) )   // has ")" at end, unless it's a wikipedia disambiguation link
          {
            matchedLength--;
            link.remove( matchedLength, 1 );
          }

          // Filter out long words in links (one example: eBay auctions)
          QString longWordFilter( link );
          longWordFilter.replace( longWordsRegExp, "\\1&shy;\\2" );

          // Create link
          replacement = ( link.startsWith("www.") )
                        ? replacement = "<a href=\"http://" + link + "\" target=\"_blank\">" + longWordFilter + "</a>"
                        : replacement = "<a href=\""        + link + "\" target=\"_blank\">" + longWordFilter + "</a>";
        }
        break;


      // Found a email address
      case 4:
        link = emailRegExp.cap(1);
        if( ! link.isEmpty() )
        {
          matchedLength = link.length();  // filter out puntuation char
          replacement   = "<a href=\"mailto:" + link + "\">" + link + "</a>";
        }
        break;


      // Found a geek-style link
      case 5:
        linkBefore = geekLinkRegExp.cap(1); // matched look-before character
        link       = geekLinkRegExp.cap(2);
        if( ! link.isEmpty() )
        {
          // Avoid matching "index.htm", "test.js" etc.
          // The list can never be complete but filter out 99% of the cases
          QString tld( geekLinkRegExp.cap(4) );
          if( ( tld.length() == 2 && ! invalidCcTld.exactMatch(tld) )
          ||  ( tld.length() == 3 && topLevelDomain.exactMatch(tld) ) )
          {
            matchedLength = linkBefore.length() + link.length();  // filter out puntuation char
            replacement = linkBefore + "<a href=\"http://" + link + "/\" target=\"_blank\">" + link + "</a>";
          }
        }
        break;


      // Found an excessively long word (for example, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaah!)
      case 6:
        matchedLength = longWordsRegExp.cap( 0 ).length();
        replacement = longWordsRegExp.cap( 1 ) + "&shy;" + longWordsRegExp.cap( 2 );
        break;

      // Nothing was found, stop
      case -1:
        break;

      default:
#ifdef KMESSDEBUG_RICHTEXTPARSER
        kWarning() << "result of regular expression " << matchedRegExp << " is unhandled!";
#endif
        break;
    }

    // Nothing was found, stop
    if( matchedRegExp == -1 )
    {
      // C doesn't allow us to break the while loop inside the switch statement, so break again
      break;
    }

    // Process the replacement
    if( replacement.isEmpty() || text.mid(matchStart, matchedLength).isEmpty() )
    {
      // No replacement found, move cursor to next char
      lastPos = matchStart + 1;
    }
    else
    {
      // Replace the original text
#ifdef KMESSDEBUG_RICHTEXTPARSER
      kDebug() << "replacing '" << text.mid( matchStart, matchedLength ) << "' with: " << replacement << " (matched regexp=" << matchedRegExp << ")";
#endif
      text.replace( matchStart, matchedLength, replacement );
      lastPos     = matchStart + replacement.length();
      replacement = QString::null;
    }
  }

  // Replace any "> "s in the message with ">&nbsp;" to avoid missing spaces after emoticons
  text = text.replace( "> ", ">&nbsp;" );

  // Replace double spaces with double &nbsp;s so that they'll show properly
  text = text.replace( "  ", "&nbsp;&nbsp;" );

  // Replace the MSN Plus text formatting tags
  if( showFormatting )
  {
    parseMsnPlusString( text );
    getFormattedString( text );
  }
  else
  {
    getCleanString( text );
  }
}



// Constructor, can be told to not parse the initial string but just save it (during KMess initialization)
OldFormattedString::OldFormattedString( const QString &string, bool parseName )
: showEmoticons_     ( true  )
, showLinks_         ( false )
, showSmallEmoticons_( true  )
{
  if( ! string.isEmpty() )
  {
    if( parseName )
    {
      setString( string );
    }
    else
    {
      original_  =
      formatted_ =
      escaped_   =
      cleaned_   = string;
    }
  }
}



// Returns the specified version of the string
const QString &OldFormattedString::getString( OldFormattingMode mode ) const
{
  switch( mode )
  {
    case OLD_STRING_CLEANED:              return cleaned_;
    case OLD_STRING_CLEANED_ESCAPED:      return escaped_;
    case OLD_STRING_FORMATTED:            return formatted_;
    case OLD_STRING_CHAT_SETTING:         return CurrentAccount::instance()->getUseChatFormatting() ? formatted_ : cleaned_;
    case OLD_STRING_LIST_SETTING:         return CurrentAccount::instance()->getUseListFormatting() ? formatted_ : cleaned_;
    case OLD_STRING_CHAT_SETTING_ESCAPED: return CurrentAccount::instance()->getUseChatFormatting() ? formatted_ : escaped_;
    case OLD_STRING_LIST_SETTING_ESCAPED: return CurrentAccount::instance()->getUseListFormatting() ? formatted_ : escaped_;
    default:                              return original_;
  }
}



// Set a string, which will be formatted according to the given flags
void OldFormattedString::setString( const QString &string )
{
  // Save the new string everywhere
  cleaned_   =
  escaped_   =
  original_  =
  formatted_ = string;

  // Then parse the cleaned and formatted versions
  if( ! string.isEmpty() )
  {
    OldRichTextParser::getCleanString( cleaned_ );
    OldRichTextParser::parseMsnString( formatted_, showEmoticons_, showSmallEmoticons_, showLinks_, true );
    escaped_ = cleaned_;
    KMessShared::htmlEscape( escaped_ );
  }
}
