sbStringTransformImpl.mm
Go to the documentation of this file.
1  /*
2 //
3 // BEGIN SONGBIRD GPL
4 //
5 // This file is part of the Songbird web player.
6 //
7 // Copyright(c) 2005-2008 POTI, Inc.
8 // http://songbirdnest.com
9 //
10 // This file may be licensed under the terms of of the
11 // GNU General Public License Version 2 (the "GPL").
12 //
13 // Software distributed under the License is distributed
14 // on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either
15 // express or implied. See the GPL for the specific language
16 // governing rights and limitations.
17 //
18 // You should have received a copy of the GPL along with this
19 // program. If not, go to http://www.gnu.org/licenses/gpl.html
20 // or write to the Free Software Foundation, Inc.,
21 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 //
23 // END SONGBIRD GPL
24 //
25 */
26 
27 #include "sbStringTransformImpl.h"
28 
29 #include <nsMemory.h>
30 #include <nsStringGlue.h>
31 
32 #include <prmem.h>
33 
34 #include <CoreFoundation/CoreFoundation.h>
35 #include <Foundation/Foundation.h>
36 
37 #include "sbLeadingNumbers.h"
38 
40 {
41 }
42 
44 {
45 }
46 
47 nsresult
49  return NS_OK;
50 }
51 
52 NS_IMETHODIMP
53 sbStringTransformImpl::NormalizeString(const nsAString & aCharset,
54  PRUint32 aTransformFlags,
55  const nsAString & aInput,
56  nsAString & _retval)
57 {
58  PRBool leadingOnly =
60 
61  NSMutableString *str = [[NSMutableString alloc] initWithCharacters:aInput.BeginReading()
62  length:aInput.Length()];
63 
64  if(aTransformFlags & sbIStringTransform::TRANSFORM_LOWERCASE) {
65  NSString *lcaseStr = [str lowercaseString];
66  str = [NSString stringWithString:lcaseStr];
67  }
68 
69  if(aTransformFlags & sbIStringTransform::TRANSFORM_UPPERCASE) {
70  NSString *ucaseStr = [str uppercaseString];
71  str = [NSString stringWithString:ucaseStr];
72  }
73 
74  if(aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONSPACE) {
75  if (leadingOnly) {
76  NSString *strCopy =
77  [[NSMutableString alloc] initWithCharacters:aInput.BeginReading()
78  length:aInput.Length()];
79  // Perform the full transform on |strCpy| - then look for the first
80  // similar character.
81  CFStringTransform( (CFMutableStringRef)strCopy,
82  NULL,
83  kCFStringTransformStripCombiningMarks,
84  false);
85 
86  // Find the first occurance of matching non-ignored characters. Then,
87  // remove the 0 to i-th char from |str|.
88  for (unsigned int i = 0; i < [str length]; i++) {
89  if ([strCopy characterAtIndex:0] == [str characterAtIndex:i]) {
90  [str replaceCharactersInRange:NSMakeRange(0, i)
91  withString:@""];
92  break;
93  }
94  }
95  [strCopy release];
96  }
97  else {
98  // Just transform the whole string
99  CFStringTransform((CFMutableStringRef)str,
100  NULL,
101  kCFStringTransformStripCombiningMarks,
102  false);
103  }
104  }
105 
106  const PRUnichar *original = (const PRUnichar *)aInput.BeginReading();
107  if ((aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_SYMBOLS) ||
110  NSCharacterSet *symbols = [NSCharacterSet symbolCharacterSet];
111  NSCharacterSet *alphaNumSet = [NSCharacterSet alphanumericCharacterSet];
112  PRBool bypassSymbolFiltering = false;
113  PRBool bypassNonalnumFiltering = false;
114 
115  for(unsigned int current = 0, optr = 0; current < [str length]; ++current, ++optr) {
116  if (bypassSymbolFiltering &&
117  bypassNonalnumFiltering)
118  break;
119 
120  unichar c = [str characterAtIndex:current];
121 
123  PRInt32 numberLength;
124  SB_ExtractLeadingNumber(original + optr, NULL, NULL, &numberLength);
125  if (numberLength > 0) {
126  current += numberLength-1;
127  optr += numberLength-1;
128  if (leadingOnly) {
129  break;
130  }
131  continue;
132  }
133  }
134 
135  if (!bypassSymbolFiltering && (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_SYMBOLS)) {
136  if([symbols characterIsMember:c]) {
137  [str replaceCharactersInRange:NSMakeRange(current--, 1) withString:@""];
138  } else {
139  if (leadingOnly) {
140  bypassSymbolFiltering = true;
141  }
142  }
143  }
144 
145  if (!bypassNonalnumFiltering &&
146  ((aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM) ||
147  (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE))) {
148  if(![alphaNumSet characterIsMember:c] &&
149  ((c != ' ') || (aTransformFlags & sbIStringTransform::TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE))) {
150  [str replaceCharactersInRange:NSMakeRange(current--, 1) withString:@""];
151  } else {
152  if (leadingOnly) {
153  bypassNonalnumFiltering = true;
154  }
155  }
156  }
157 
158  // make sure we're still in sync
159  NS_ASSERTION(original[optr] != 0, "error with optr position tracking");
160  // but just in case...
161  if (!original[optr])
162  optr--;
163  }
164 
165  }
166 
167  unichar *buf = (unichar *) malloc(sizeof(unichar) * [str length]);
168  NS_ENSURE_TRUE(buf, NS_ERROR_OUT_OF_MEMORY);
169 
170  [str getCharacters:buf];
171 
172  _retval.Assign(buf, [str length]);
173  free(buf);
174  [str release];
175 
176  return NS_OK;
177 }
178 
179 NS_IMETHODIMP
180 sbStringTransformImpl::ConvertToCharset(const nsAString & aDestCharset,
181  const nsAString & aInput,
182  nsAString & _retval)
183 {
184  return NS_ERROR_NOT_IMPLEMENTED;
185 }
186 
187 NS_IMETHODIMP
188 sbStringTransformImpl::GuessCharset(const nsAString & aInput,
189  nsAString & _retval)
190 {
191  return NS_ERROR_NOT_IMPLEMENTED;
192 }
return NS_OK
const unsigned long TRANSFORM_IGNORE_NONALPHANUM_IGNORE_SPACE
const unsigned long TRANSFORM_IGNORE_LEADING
TRANSFORM_IGNORE_* operates on leading characters and stops as soon as a non-ignored character is fou...
Leading Numbers Parsing.
const unsigned long TRANSFORM_IGNORE_SYMBOLS
Ignore symbol characters. This includes but is not limited to #, $, +, -.
const unsigned long TRANSFORM_UPPERCASE
Transform to uppercase.
const unsigned long TRANSFORM_IGNORE_NONALPHANUM
Ignore non-alphanumerical characters.
void SB_ExtractLeadingNumber(const CHARTYPE *str, PRBool *hasLeadingNumber, PRFloat64 *leadingNumber, PRInt32 *numberLength)
const unsigned long TRANSFORM_IGNORE_KEEPNUMBERSYMBOLS
TRANSFORM_IGNORE_* keeps number symbols in.
const unsigned long TRANSFORM_LOWERCASE
Transform to lowercase.
const unsigned long TRANSFORM_IGNORE_NONSPACE
Ignore non-spacing characters. This includes but is not limited to diacritics.
NS_DECL_SBISTRINGTRANSFORM_IMPL nsresult Init()
_getSelectedPageStyle s i