sbStringTransformImpl.cpp
Go to the documentation of this file.
1 /*
2 //
3 // BEGIN SONGBIRD GPL
4 //
5 // This file is part of the Songbird web player.
6 //
7 // Copyright(c) 2005-2008 POTI, Inc.
8 // http://songbirdnest.com
9 //
10 // This file may be licensed under the terms of of the
11 // GNU General Public License Version 2 (the "GPL").
12 //
13 // Software distributed under the License is distributed
14 // on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either
15 // express or implied. See the GPL for the specific language
16 // governing rights and limitations.
17 //
18 // You should have received a copy of the GPL along with this
19 // program. If not, go to http://www.gnu.org/licenses/gpl.html
20 // or write to the Free Software Foundation, Inc.,
21 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 //
23 // END SONGBIRD GPL
24 //
25 */
26 
27 #include "sbStringTransformImpl.h"
28 
29 #include <nsMemory.h>
30 #include <nsStringGlue.h>
31 
32 #include <prmem.h>
33 #include "sbLeadingNumbers.h"
34 
36 {
37 }
38 
40 {
41 }
42 
43 nsresult
45  return NS_OK;
46 }
47 
48 unsigned long
50  nsTArray<WORD> aExcludeChars[NTYPES],
51  nsTArray<WORD> aIncludeChars[NTYPES])
52 {
53  DWORD actualFlags = 0;
54 
56  actualFlags |= LCMAP_LOWERCASE;
57  }
58 
60  actualFlags |= LCMAP_UPPERCASE;
61  }
62 
64  aExcludeChars[C3].AppendElement(C3_DIACRITIC);
65  aExcludeChars[C3].AppendElement(C3_NONSPACING);
66  }
67 
69  aExcludeChars[C3].AppendElement(C3_LEXICAL);
70  aExcludeChars[C3].AppendElement(C3_VOWELMARK);
71  }
72 
75  aExcludeChars[C3].AppendElement(C3_LEXICAL);
76  aExcludeChars[C1].AppendElement(C1_PUNCT);
77  aIncludeChars[C3].AppendElement(C3_ALPHA);
78  aIncludeChars[C2].AppendElement(C2_EUROPENUMBER);
79  aIncludeChars[C1].AppendElement(C1_DIGIT);
80  aIncludeChars[C1].AppendElement(C1_ALPHA);
81  if (aFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE) {
82  aExcludeChars[C1].AppendElement(C1_SPACE);
83  }
84  }
85 
86  return actualFlags;
87 }
88 
89 NS_IMETHODIMP
90 sbStringTransformImpl::NormalizeString(const nsAString & aCharset,
91  PRUint32 aTransformFlags,
92  const nsAString & aInput,
93  nsAString & _retval)
94 {
95  nsString finalStr;
96  nsString inStr(aInput);
97 
98  if(inStr.IsEmpty()) {
99  _retval.Truncate();
100  return NS_OK;
101  }
102 
103  nsTArray<WORD> excludeChars[NTYPES];
104  nsTArray<WORD> includeChars[NTYPES];
105  DWORD dwFlags = MakeFlags(aTransformFlags,
106  excludeChars,
107  includeChars);
108 
109  if(aTransformFlags & sbIStringTransform::TRANSFORM_LOWERCASE ||
110  aTransformFlags & sbIStringTransform::TRANSFORM_UPPERCASE) {
111 
112  WCHAR *wszJunk = {0};
113  int requiredBufferSize = ::LCMapStringW(LOCALE_USER_DEFAULT,
114  dwFlags,
115  inStr.BeginReading(),
116  inStr.Length(),
117  wszJunk,
118  0);
119 
120  nsString bufferStr;
121  int convertedChars =
122  ::LCMapStringW(LOCALE_USER_DEFAULT,
123  dwFlags,
124  inStr.BeginReading(),
125  inStr.Length(),
126  bufferStr.BeginWriting(requiredBufferSize),
127  requiredBufferSize);
128 
129  NS_ENSURE_TRUE(convertedChars == requiredBufferSize,
130  NS_ERROR_CANNOT_CONVERT_DATA);
131 
132  finalStr = bufferStr;
133  inStr = bufferStr;
134  }
135 
136  if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONSPACE ||
140  PRBool leadingOnly = aTransformFlags &
142  PRBool bypassTest = PR_FALSE;
143  LPWSTR wszJunk = {0};
144  int requiredBufferSize = ::FoldStringW(MAP_COMPOSITE,
145  inStr.BeginReading(),
146  inStr.Length(),
147  wszJunk,
148  0);
149 
150  nsString bufferStr;
151  int convertedChars =
152  ::FoldStringW(MAP_COMPOSITE,
153  inStr.BeginReading(),
154  inStr.Length(),
155  bufferStr.BeginWriting(requiredBufferSize),
156  requiredBufferSize);
157 
158  NS_ENSURE_TRUE(convertedChars == requiredBufferSize,
159  NS_ERROR_CANNOT_CONVERT_DATA);
160 
161  LPWORD ct1 = new WORD[requiredBufferSize];
162  BOOL success = GetStringTypeW(CT_CTYPE1,
163  (LPWSTR) bufferStr.BeginReading(),
164  bufferStr.Length(),
165  &ct1[0]);
166 
167  if(!success) {
168  delete [] ct1;
169  _retval.Truncate();
170  return NS_ERROR_CANNOT_CONVERT_DATA;
171  }
172 
173  LPWORD ct2 = new WORD[requiredBufferSize];
174  success = GetStringTypeW(CT_CTYPE2,
175  (LPWSTR) bufferStr.BeginReading(),
176  bufferStr.Length(),
177  &ct2[0]);
178 
179  if(!success) {
180  delete [] ct1;
181  delete [] ct2;
182  _retval.Truncate();
183  return NS_ERROR_CANNOT_CONVERT_DATA;
184  }
185 
186  LPWORD ct3 = new WORD[requiredBufferSize];
187  success = GetStringTypeW(CT_CTYPE3,
188  (LPWSTR) bufferStr.BeginReading(),
189  bufferStr.Length(),
190  &ct3[0]);
191 
192  if(!success) {
193  delete [] ct1;
194  delete [] ct2;
195  delete [] ct3;
196  _retval.Truncate();
197  return NS_ERROR_CANNOT_CONVERT_DATA;
198  }
199 
200  LPWORD charTypes[NTYPES] = {ct1, ct2, ct3};
201 
202  for(int current = 0; current < requiredBufferSize; ++current) {
203  PRBool validChar = PR_TRUE;
204  PRInt32 skipChars = 0;
205 
206  if (!bypassTest) {
208  PRInt32 numberLength;
209  SB_ExtractLeadingNumber(bufferStr.BeginReading() + current, NULL, NULL, &numberLength);
210  if (numberLength > 0) {
211  finalStr.Append(bufferStr.BeginReading() + current, numberLength);
212  current += numberLength-1;
213  if (leadingOnly) {
214  bypassTest = PR_TRUE;
215  }
216  continue;
217  }
218  }
219 
220  // first check if the char is excluded by any of its type flags
221  for (int type = FIRSTTYPE; type <= LASTTYPE && validChar; type++) {
222  PRUint32 excludeCharsLength = excludeChars[type].Length();
223  for(PRUint32 invalid = 0; invalid < excludeCharsLength; ++invalid) {
224  if(excludeChars[type][invalid] & charTypes[type][current]) {
225  validChar = PR_FALSE;
226  break;
227  }
228  }
229  }
230  // next, check if the char is in the included chars arrays. if all
231  // arrays are empty, allow all chars instead of none
232  PRBool found = PR_FALSE;
233  PRBool testedAnything = PR_FALSE;
234  for (int type = FIRSTTYPE;
235  type <= LASTTYPE && validChar && !found;
236  type++) {
237  PRUint32 includeCharsLength = includeChars[type].Length();
238  for(PRUint32 valid = 0; valid < includeCharsLength; ++valid) {
239  testedAnything = PR_TRUE;
240  if (includeChars[type][valid] & charTypes[type][current]) {
241  found = PR_TRUE;
242  break;
243  }
244  }
245  }
246  if (testedAnything &&
247  !found) {
248  validChar = PR_FALSE;
249  }
250  }
251 
252  if(validChar) {
253  if (leadingOnly) {
254  bypassTest = PR_TRUE;
255  }
256  finalStr.Append(bufferStr.CharAt(current));
257  }
258  current += skipChars;
259  }
260 
261  delete [] ct1;
262  delete [] ct2;
263  delete [] ct3;
264  }
265 
266  _retval = finalStr;
267 
268  return NS_OK;
269 }
270 
271 NS_IMETHODIMP
272 sbStringTransformImpl::ConvertToCharset(const nsAString & aDestCharset,
273  const nsAString & aInput,
274  nsAString & _retval)
275 {
276  return NS_ERROR_NOT_IMPLEMENTED;
277 }
278 
279 NS_IMETHODIMP
280 sbStringTransformImpl::GuessCharset(const nsAString & aInput,
281  nsAString & _retval)
282 {
283  return NS_ERROR_NOT_IMPLEMENTED;
284 }
#define LASTTYPE
#define C2
return NS_OK
const unsigned long TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE
const unsigned long TRANSFORM_IGNORE_LEADING
TRANSFORM_IGNORE_* operates on leading characters and stops as soon as a non-ignored character is fou...
Leading Numbers Parsing.
const unsigned long TRANSFORM_IGNORE_SYMBOLS
Ignore symbol characters. This includes but is not limited to #, $, +, -.
const unsigned long TRANSFORM_UPPERCASE
Transform to uppercase.
const unsigned long TRANSFORM_IGNORE_NONALPHANUM
Ignore non-alphanumerical characters.
unsigned long MakeFlags(PRUint32 aFlags, nsTArray< WORD > aExcludeChars[(2-0+1)], nsTArray< WORD > aIncludeChars[(2-0+1)])
#define NTYPES
#define C1
#define FIRSTTYPE
void SB_ExtractLeadingNumber(const CHARTYPE *str, PRBool *hasLeadingNumber, PRFloat64 *leadingNumber, PRInt32 *numberLength)
const unsigned long TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS
TRANSFORM_IGNORE_* keeps number symbols in.
const unsigned long TRANSFORM_LOWERCASE
Transform to lowercase.
const unsigned long TRANSFORM_IGNORE_NONSPACE
Ignore non-spacing characters. This includes but is not limited to diacritics.
NS_DECL_SBISTRINGTRANSFORM_IMPL nsresult Init()
#define C3