sbStringTransformImpl.cpp
Go to the documentation of this file.
1 /*
2 //
3 // BEGIN SONGBIRD GPL
4 //
5 // This file is part of the Songbird web player.
6 //
7 // Copyright(c) 2005-2008 POTI, Inc.
8 // http://songbirdnest.com
9 //
10 // This file may be licensed under the terms of of the
11 // GNU General Public License Version 2 (the "GPL").
12 //
13 // Software distributed under the License is distributed
14 // on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either
15 // express or implied. See the GPL for the specific language
16 // governing rights and limitations.
17 //
18 // You should have received a copy of the GPL along with this
19 // program. If not, go to http://www.gnu.org/licenses/gpl.html
20 // or write to the Free Software Foundation, Inc.,
21 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 //
23 // END SONGBIRD GPL
24 //
25 */
26 
27 #include "sbStringTransformImpl.h"
28 
29 #include <nsMemory.h>
30 #include <nsStringGlue.h>
31 
32 #include <prmem.h>
33 
34 #include <glib.h>
35 #include "sbLeadingNumbers.h"
36 
38 {
39 }
40 
42 {
43 }
44 
45 nsresult
47  return NS_OK;
48 }
49 
50 NS_IMETHODIMP
51 sbStringTransformImpl::NormalizeString(const nsAString & aCharset,
52  PRUint32 aTransformFlags,
53  const nsAString & aInput,
54  nsAString & _retval)
55 {
56  nsCString str;
57  CopyUTF16toUTF8(aInput, str);
58 
59  if(aTransformFlags & sbIStringTransform::TRANSFORM_LOWERCASE) {
60  gchar* lowercaseStr = g_utf8_strdown(str.BeginReading(), str.Length());
61  NS_ENSURE_TRUE(lowercaseStr, NS_ERROR_OUT_OF_MEMORY);
62  str.Assign(lowercaseStr);
63  g_free(lowercaseStr);
64  }
65 
66  if(aTransformFlags & sbIStringTransform::TRANSFORM_UPPERCASE) {
67  gchar* uppercaseStr = g_utf8_strup(str.BeginReading(), str.Length());
68  NS_ENSURE_TRUE(uppercaseStr, NS_ERROR_OUT_OF_MEMORY);
69  str.Assign(uppercaseStr);
70  g_free(uppercaseStr);
71  }
72 
73  if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONSPACE) {
74  nsString workingStr;
75 
76  PRBool leadingOnly = aTransformFlags &
78  PRBool bypassTest = PR_FALSE;
79 
80  gchar* nonspaceStr = g_utf8_normalize(str.BeginReading(),
81  str.Length(),
82  G_NORMALIZE_ALL);
83  NS_ENSURE_TRUE(nonspaceStr, NS_ERROR_OUT_OF_MEMORY);
84 
85  glong strLen = g_utf8_strlen(nonspaceStr, -1);
86 
87  for(glong currentChar = 0; currentChar < strLen; ++currentChar) {
88 
89  gchar* offset = g_utf8_offset_to_pointer(nonspaceStr, currentChar);
90  gunichar unichar = g_utf8_get_char(offset);
91  GUnicodeType unicharType = g_unichar_type(unichar);
92 
93  if(bypassTest ||
94  (unicharType != G_UNICODE_NON_SPACING_MARK &&
95  unicharType != G_UNICODE_COMBINING_MARK &&
96  unicharType != G_UNICODE_ENCLOSING_MARK)) {
97  workingStr += unichar;
98  if(leadingOnly)
99  bypassTest = PR_TRUE;
100  }
101  }
102 
103  g_free(nonspaceStr);
104  CopyUTF16toUTF8(workingStr, str);
105  }
106 
107  if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_SYMBOLS) {
108  nsString workingStr;
109 
110  PRBool leadingOnly = aTransformFlags &
112  PRBool bypassTest = PR_FALSE;
113 
114  gchar* nosymbolsStr = g_utf8_normalize(str.BeginReading(),
115  str.Length(),
116  G_NORMALIZE_ALL);
117  NS_ENSURE_TRUE(nosymbolsStr, NS_ERROR_OUT_OF_MEMORY);
118 
119  glong strLen = g_utf8_strlen(nosymbolsStr, -1);
120 
121  for(glong currentChar = 0; currentChar < strLen; ++currentChar) {
122  gchar* offset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar);
123  gunichar unichar = g_utf8_get_char(offset);
124  GUnicodeType unicharType = g_unichar_type(unichar);
125 
127  PRInt32 numberLength;
128  SB_ExtractLeadingNumber((const gchar *)offset, NULL, NULL, &numberLength);
129  if (numberLength > 0) {
130  for (glong copychar=0;copychar < numberLength;copychar++) {
131  gchar* copyoffset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar+copychar);
132  gunichar unichar = g_utf8_get_char(copyoffset);
133  workingStr += unichar;
134  }
135  currentChar += numberLength-1;
136  if(leadingOnly)
137  bypassTest = PR_TRUE;
138  continue;
139  }
140  }
141 
142  if(bypassTest ||
143  (unicharType != G_UNICODE_CURRENCY_SYMBOL &&
144  unicharType != G_UNICODE_MODIFIER_SYMBOL &&
145  unicharType != G_UNICODE_MATH_SYMBOL &&
146  unicharType != G_UNICODE_OTHER_SYMBOL)) {
147  workingStr += unichar;
148  if(leadingOnly)
149  bypassTest = PR_TRUE;
150  }
151  }
152 
153  g_free(nosymbolsStr);
154  CopyUTF16toUTF8(workingStr, str);
155  }
156 
157  if((aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM) ||
159  nsString workingStr;
160 
161  PRBool leadingOnly = aTransformFlags &
163  PRBool bypassTest = PR_FALSE;
164 
165  gchar* nosymbolsStr = g_utf8_normalize(str.BeginReading(),
166  str.Length(),
167  G_NORMALIZE_ALL);
168  NS_ENSURE_TRUE(nosymbolsStr, NS_ERROR_OUT_OF_MEMORY);
169 
170  glong strLen = g_utf8_strlen(nosymbolsStr, -1);
171 
172  for(glong currentChar = 0; currentChar < strLen; ++currentChar) {
173 
174  gchar* offset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar);
175  gunichar unichar = g_utf8_get_char(offset);
176  GUnicodeType unicharType = g_unichar_type(unichar);
177 
178  if (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS) {
179  PRInt32 numberLength;
180  SB_ExtractLeadingNumber((const gchar *)offset, NULL, NULL, &numberLength);
181  if (numberLength > 0) {
182  for (glong copychar=0;copychar < numberLength;copychar++) {
183  gchar* copyoffset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar+copychar);
184  gunichar unichar = g_utf8_get_char(copyoffset);
185  workingStr += unichar;
186  }
187  currentChar += numberLength-1;
188  if(leadingOnly)
189  bypassTest = PR_TRUE;
190  continue;
191  }
192  }
193 
194  if(bypassTest ||
195  (unicharType == G_UNICODE_LOWERCASE_LETTER ||
196  unicharType == G_UNICODE_MODIFIER_LETTER ||
197  unicharType == G_UNICODE_OTHER_LETTER ||
198  unicharType == G_UNICODE_TITLECASE_LETTER ||
199  unicharType == G_UNICODE_UPPERCASE_LETTER ||
200  unicharType == G_UNICODE_DECIMAL_NUMBER ||
201  unicharType == G_UNICODE_LETTER_NUMBER ||
202  unicharType == G_UNICODE_OTHER_NUMBER) ||
203  (!(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE) &&
204  unichar == ' ')) {
205  workingStr += unichar;
206  if(leadingOnly)
207  bypassTest = PR_TRUE;
208  }
209  }
210 
211  g_free(nosymbolsStr);
212  CopyUTF16toUTF8(workingStr, str);
213  }
214 
215  CopyUTF8toUTF16(str, _retval);
216 
217  return NS_OK;
218 }
219 
220 NS_IMETHODIMP
221 sbStringTransformImpl::ConvertToCharset(const nsAString & aDestCharset,
222  const nsAString & aInput,
223  nsAString & _retval)
224 {
225  return NS_ERROR_NOT_IMPLEMENTED;
226 }
227 
228 NS_IMETHODIMP
229 sbStringTransformImpl::GuessCharset(const nsAString & aInput,
230  nsAString & _retval)
231 {
232  return NS_ERROR_NOT_IMPLEMENTED;
233 }
return NS_OK
const unsigned long TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE
const unsigned long TRANSFORM_IGNORE_LEADING
TRANSFORM_IGNORE_* operates on leading characters and stops as soon as a non-ignored character is fou...
Leading Numbers Parsing.
const unsigned long TRANSFORM_IGNORE_SYMBOLS
Ignore symbol characters. This includes but is not limited to #, $, +, -.
const unsigned long TRANSFORM_UPPERCASE
Transform to uppercase.
const unsigned long TRANSFORM_IGNORE_NONALPHANUM
Ignore non-alphanumerical characters.
PRUint32 & offset
void SB_ExtractLeadingNumber(const CHARTYPE *str, PRBool *hasLeadingNumber, PRFloat64 *leadingNumber, PRInt32 *numberLength)
const unsigned long TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS
TRANSFORM_IGNORE_* keeps number symbols in.
const unsigned long TRANSFORM_LOWERCASE
Transform to lowercase.
const unsigned long TRANSFORM_IGNORE_NONSPACE
Ignore non-spacing characters. This includes but is not limited to diacritics.
NS_DECL_SBISTRINGTRANSFORM_IMPL nsresult Init()