sbStringTransformImpl.cpp
Go to the documentation of this file.
1 /*
2  *=BEGIN SONGBIRD GPL
3  *
4  * This file is part of the Songbird web player.
5  *
6  * Copyright(c) 2005-2008 POTI, Inc.
7  * http://www.songbirdnest.com
8  *
9  * This file may be licensed under the terms of of the
10  * GNU General Public License Version 2 (the ``GPL'').
11  *
12  * Software distributed under the License is distributed
13  * on an ``AS IS'' basis, WITHOUT WARRANTY OF ANY KIND, either
14  * express or implied. See the GPL for the specific language
15  * governing rights and limitations.
16  *
17  * You should have received a copy of the GPL along with this
18  * program. If not, go to http://www.gnu.org/licenses/gpl.html
19  * or write to the Free Software Foundation, Inc.,
20  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21  *
22  *=END SONGBIRD GPL
23  */
24 
25 #include "sbStringTransformImpl.h"
26 
27 #include <nsMemory.h>
28 #include <nsStringGlue.h>
29 
30 #include <prmem.h>
31 
32 #include <glib.h>
33 #include "sbLeadingNumbers.h"
34 
36 {
37 }
38 
40 {
41 }
42 
43 nsresult
45  return NS_OK;
46 }
47 
48 NS_IMETHODIMP
49 sbStringTransformImpl::NormalizeString(const nsAString & aCharset,
50  PRUint32 aTransformFlags,
51  const nsAString & aInput,
52  nsAString & _retval)
53 {
54  nsCString str;
55  CopyUTF16toUTF8(aInput, str);
56 
57  if(aTransformFlags & sbIStringTransform::TRANSFORM_LOWERCASE) {
58  gchar* lowercaseStr = g_utf8_strdown(str.BeginReading(), str.Length());
59  NS_ENSURE_TRUE(lowercaseStr, NS_ERROR_OUT_OF_MEMORY);
60  str.Assign(lowercaseStr);
61  g_free(lowercaseStr);
62  }
63 
64  if(aTransformFlags & sbIStringTransform::TRANSFORM_UPPERCASE) {
65  gchar* uppercaseStr = g_utf8_strup(str.BeginReading(), str.Length());
66  NS_ENSURE_TRUE(uppercaseStr, NS_ERROR_OUT_OF_MEMORY);
67  str.Assign(uppercaseStr);
68  g_free(uppercaseStr);
69  }
70 
71  if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONSPACE) {
72  nsString workingStr;
73 
74  PRBool leadingOnly = aTransformFlags &
76  PRBool bypassTest = PR_FALSE;
77 
78  gchar* nonspaceStr = g_utf8_normalize(str.BeginReading(),
79  str.Length(),
80  G_NORMALIZE_ALL);
81  NS_ENSURE_TRUE(nonspaceStr, NS_ERROR_OUT_OF_MEMORY);
82 
83  glong strLen = g_utf8_strlen(nonspaceStr, -1);
84 
85  for(glong currentChar = 0; currentChar < strLen; ++currentChar) {
86 
87  gchar* offset = g_utf8_offset_to_pointer(nonspaceStr, currentChar);
88  gunichar unichar = g_utf8_get_char(offset);
89  GUnicodeType unicharType = g_unichar_type(unichar);
90 
91  if(bypassTest ||
92  (unicharType != G_UNICODE_NON_SPACING_MARK &&
93  unicharType != G_UNICODE_COMBINING_MARK &&
94  unicharType != G_UNICODE_ENCLOSING_MARK)) {
95  workingStr += unichar;
96  if(leadingOnly)
97  bypassTest = PR_TRUE;
98  }
99  }
100 
101  g_free(nonspaceStr);
102  CopyUTF16toUTF8(workingStr, str);
103  }
104 
105  if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_SYMBOLS) {
106  nsString workingStr;
107 
108  PRBool leadingOnly = aTransformFlags &
110  PRBool bypassTest = PR_FALSE;
111 
112  gchar* nosymbolsStr = g_utf8_normalize(str.BeginReading(),
113  str.Length(),
114  G_NORMALIZE_ALL);
115  NS_ENSURE_TRUE(nosymbolsStr, NS_ERROR_OUT_OF_MEMORY);
116 
117  glong strLen = g_utf8_strlen(nosymbolsStr, -1);
118 
119  for(glong currentChar = 0; currentChar < strLen; ++currentChar) {
120  gchar* offset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar);
121  gunichar unichar = g_utf8_get_char(offset);
122  GUnicodeType unicharType = g_unichar_type(unichar);
123 
125  PRInt32 numberLength;
126  SB_ExtractLeadingNumber((const gchar *)offset, NULL, NULL, &numberLength);
127  if (numberLength > 0) {
128  for (glong copychar=0;copychar < numberLength;copychar++) {
129  gchar* copyoffset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar+copychar);
130  gunichar unichar = g_utf8_get_char(copyoffset);
131  workingStr += unichar;
132  }
133  currentChar += numberLength-1;
134  if(leadingOnly)
135  bypassTest = PR_TRUE;
136  continue;
137  }
138  }
139 
140  if(bypassTest ||
141  (unicharType != G_UNICODE_CURRENCY_SYMBOL &&
142  unicharType != G_UNICODE_MODIFIER_SYMBOL &&
143  unicharType != G_UNICODE_MATH_SYMBOL &&
144  unicharType != G_UNICODE_OTHER_SYMBOL)) {
145  workingStr += unichar;
146  if(leadingOnly)
147  bypassTest = PR_TRUE;
148  }
149  }
150 
151  g_free(nosymbolsStr);
152  CopyUTF16toUTF8(workingStr, str);
153  }
154 
155  if((aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM) ||
157  nsString workingStr;
158 
159  PRBool leadingOnly = aTransformFlags &
161  PRBool bypassTest = PR_FALSE;
162 
163  gchar* nosymbolsStr = g_utf8_normalize(str.BeginReading(),
164  str.Length(),
165  G_NORMALIZE_ALL);
166  NS_ENSURE_TRUE(nosymbolsStr, NS_ERROR_OUT_OF_MEMORY);
167 
168  glong strLen = g_utf8_strlen(nosymbolsStr, -1);
169 
170  for(glong currentChar = 0; currentChar < strLen; ++currentChar) {
171 
172  gchar* offset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar);
173  gunichar unichar = g_utf8_get_char(offset);
174  GUnicodeType unicharType = g_unichar_type(unichar);
175 
176  if (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS) {
177  PRInt32 numberLength;
178  SB_ExtractLeadingNumber((const gchar *)offset, NULL, NULL, &numberLength);
179  if (numberLength > 0) {
180  for (glong copychar=0;copychar < numberLength;copychar++) {
181  gchar* copyoffset = g_utf8_offset_to_pointer(nosymbolsStr, currentChar+copychar);
182  gunichar unichar = g_utf8_get_char(copyoffset);
183  workingStr += unichar;
184  }
185  currentChar += numberLength-1;
186  if(leadingOnly)
187  bypassTest = PR_TRUE;
188  continue;
189  }
190  }
191 
192  if(bypassTest ||
193  (unicharType == G_UNICODE_LOWERCASE_LETTER ||
194  unicharType == G_UNICODE_MODIFIER_LETTER ||
195  unicharType == G_UNICODE_OTHER_LETTER ||
196  unicharType == G_UNICODE_TITLECASE_LETTER ||
197  unicharType == G_UNICODE_UPPERCASE_LETTER ||
198  unicharType == G_UNICODE_DECIMAL_NUMBER ||
199  unicharType == G_UNICODE_LETTER_NUMBER ||
200  unicharType == G_UNICODE_OTHER_NUMBER) ||
201  (!(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE) &&
202  unichar == ' ')) {
203  workingStr += unichar;
204  if(leadingOnly)
205  bypassTest = PR_TRUE;
206  }
207  }
208 
209  g_free(nosymbolsStr);
210  CopyUTF16toUTF8(workingStr, str);
211  }
212 
213  CopyUTF8toUTF16(str, _retval);
214 
215  return NS_OK;
216 }
217 
218 NS_IMETHODIMP
219 sbStringTransformImpl::ConvertToCharset(const nsAString & aDestCharset,
220  const nsAString & aInput,
221  nsAString & _retval)
222 {
223  return NS_ERROR_NOT_IMPLEMENTED;
224 }
225 
226 NS_IMETHODIMP
227 sbStringTransformImpl::GuessCharset(const nsAString & aInput,
228  nsAString & _retval)
229 {
230  return NS_ERROR_NOT_IMPLEMENTED;
231 }
return NS_OK
const unsigned long TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE
const unsigned long TRANSFORM_IGNORE_LEADING
TRANSFORM_IGNORE_* operates on leading characters and stops as soon as a non-ignored character is fou...
Leading Numbers Parsing.
const unsigned long TRANSFORM_IGNORE_SYMBOLS
Ignore symbol characters. This includes but is not limited to #, $, +, -.
const unsigned long TRANSFORM_UPPERCASE
Transform to uppercase.
const unsigned long TRANSFORM_IGNORE_NONALPHANUM
Ignore non-alphanumerical characters.
PRUint32 & offset
void SB_ExtractLeadingNumber(const CHARTYPE *str, PRBool *hasLeadingNumber, PRFloat64 *leadingNumber, PRInt32 *numberLength)
const unsigned long TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS
TRANSFORM_IGNORE_* keeps number symbols in.
const unsigned long TRANSFORM_LOWERCASE
Transform to lowercase.
const unsigned long TRANSFORM_IGNORE_NONSPACE
Ignore non-spacing characters. This includes but is not limited to diacritics.
NS_DECL_SBISTRINGTRANSFORM_IMPL nsresult Init()