sbLeadingNumbers.h
Go to the documentation of this file.
1 /*
2 //
3 // BEGIN SONGBIRD GPL
4 //
5 // This file is part of the Songbird web player.
6 //
7 // Copyright(c) 2005-2008 POTI, Inc.
8 // http://songbirdnest.com
9 //
10 // This file may be licensed under the terms of of the
11 // GNU General Public License Version 2 (the "GPL").
12 //
13 // Software distributed under the License is distributed
14 // on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either
15 // express or implied. See the GPL for the specific language
16 // governing rights and limitations.
17 //
18 // You should have received a copy of the GPL along with this
19 // program. If not, go to http://www.gnu.org/licenses/gpl.html
20 // or write to the Free Software Foundation, Inc.,
21 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 //
23 // END SONGBIRD GPL
24 //
25  */
26 
32 #include <prtypes.h>
33 #include <math.h>
34 
35 #ifdef XP_MACOSX
36 #include <Carbon/Carbon.h>
37 #else
38 #ifdef XP_UNIX
39 #include <glib.h>
40 #endif
41 #endif
42 
43 #if defined(XP_MACOSX)
44 #define UTF16_CHARTYPE UniChar
45 #define NATIVE_CHAR_TYPE UniChar
46 #elif defined(XP_UNIX)
47 #define UTF16_CHARTYPE gunichar2
48 #define NATIVE_CHAR_TYPE gunichar
49 #elif defined(XP_WIN)
50 #define UTF16_CHARTYPE wchar_t
51 #define NATIVE_CHAR_TYPE wchar_t
52 #endif
53 
54 #define CHARTYPE_OTHER 0
55 #define CHARTYPE_DIGIT 1
56 #define CHARTYPE_DECIMALPOINT 2
57 #define CHARTYPE_SIGN 3
58 #define CHARTYPE_EXPONENT 4
59 
60 template<class CHARTYPE> inline PRInt32 SB_GetCharType(const CHARTYPE *p) {
61  switch (*p) {
62  case '.':
63  case ',':
64  return CHARTYPE_DECIMALPOINT;
65  case '+':
66  case '-':
67  return CHARTYPE_SIGN;
68  case 'e':
69  case 'E':
70  return CHARTYPE_EXPONENT;
71  }
72  if (*p >= '0' && *p <= '9')
73  return CHARTYPE_DIGIT;
74  return CHARTYPE_OTHER;
75 }
76 
77 template<class CHARTYPE>
78  inline void SB_ExtractLeadingNumber(const CHARTYPE *str,
79  PRBool *hasLeadingNumber,
80  PRFloat64 *leadingNumber,
81  PRInt32 *numberLength) {
82 
83  // it would be nice to be able to do all of this with just sscanf, but
84  // unfortunately that function does not tell us where the parsed number ended,
85  // and we need to know that in order to strip it from the string, so we have
86  // to parse manually. also, we want to handle ',' as '.', which sscanf doesn't
87  // do.
88 
89  PRBool gotDecimalPoint = PR_FALSE;
90  PRBool gotExponent = PR_FALSE;
91  PRBool gotSign = PR_FALSE;
92  PRBool gotExponentSign = PR_FALSE;
93  PRBool gotDigit = PR_FALSE;
94  PRBool gotExponentDigit = PR_FALSE;
95  PRBool abortParsing = PR_FALSE;
96  PRFloat64 value = 0;
97  PRInt32 expValue = 0;
98  PRFloat64 decimalMul = 1;
99  PRInt32 sign = 1;
100  PRInt32 expSign = 1;
101 
102  const CHARTYPE *p = str;
103 
104  while (!abortParsing && *p) {
105  switch (SB_GetCharType(p)) {
106  case CHARTYPE_SIGN:
107  if (!gotExponent) {
108  // if we already had a sign for this number, or if the number part has
109  // already started (already had digits or a decimal point) we can't
110  // accept a sign here, so abort parsing.
111  if (gotSign || gotDigit || gotDecimalPoint) {
112  abortParsing = PR_TRUE;
113  break;
114  }
115  // remember that we got a sign for the number part
116  gotSign = PR_TRUE;
117  switch (*p) {
118  case '+':
119  sign = 1;
120  break;
121  case '-':
122  sign = -1;
123  break;
124  }
125  } else {
126  // if we already had a sign for this exponent, or if the number part
127  // of the exponent has already started (already had a digit in the
128  // exponent) we can't accept a sign here, so abort parsing.
129  if (gotExponentSign || gotExponentDigit) {
130  abortParsing = PR_TRUE;
131  break;
132  }
133  // remember that we got a sign for the exponent part
134  gotExponentSign = PR_TRUE;
135  switch (*p) {
136  case '+':
137  expSign = 1;
138  break;
139  case '-':
140  expSign = -1;
141  break;
142  }
143  }
144  break;
145  case CHARTYPE_DIGIT:
146  // remember that the number part has started
147  if (!gotExponent) {
148  gotDigit = PR_TRUE;
149  if (!gotDecimalPoint) {
150  value *= 10;
151  value += *p - '0';
152  } else {
153  decimalMul *= .1;
154  value += (*p - '0') * decimalMul;
155  }
156  } else {
157  gotExponentDigit = PR_TRUE;
158  expValue *= 10;
159  expValue += *p - '0';
160  }
161  break;
163  if (!gotExponent) {
164  // if we already had a decimal point for this number, we can't have
165  // another one, so abort parsing.
166  if (gotDecimalPoint) {
167  abortParsing = PR_TRUE;
168  break;
169  }
170  // remember that we got a decimal point for the number part
171  gotDecimalPoint = PR_TRUE;
172  } else {
173  // decimal points cannot be part of an exponent, so abort parsing.
174  abortParsing = PR_TRUE;
175  break;
176  }
177  break;
178  case CHARTYPE_EXPONENT:
179  // if we already are in the exponent part, we cannot get another
180  // exponent character, so abort parsing.
181  if (gotExponent) {
182  abortParsing = PR_TRUE;
183  break;
184  }
185  // this is only an exponent character if the next character is either
186  // a digit or a sign (it is safe to dereference p+1, since at worst
187  // it will be a null terminator)
188  switch (SB_GetCharType(p+1)) {
189  case CHARTYPE_DIGIT:
190  case CHARTYPE_SIGN:
191  // remember that we got an exponent.
192  gotExponent = PR_TRUE;
193  break;
194  default:
195  // anything else means this is not an exponent, but just the letter
196  // 'e' or 'E', so abort parsing.
197  abortParsing = PR_TRUE;
198  break;
199  }
200  break;
201  case CHARTYPE_OTHER:
202  // anything else is a character or symbol that isn't part of a valid
203  // number, so abort parsing (this includes utf8 extended characters).
204  abortParsing = PR_TRUE;
205  break;
206  }
207  p++;
208  }
209 
210  // if we stopped the parser on an invalid char, we need to back up one char,
211  // otherwise the whole string was a number and p just points at the terminal
212  // null char.
213  if (abortParsing)
214  p--;
215 
216  // p now points at the first character that isn't part of a valid number.
217  // copy the string, without the number.
218  if (numberLength)
219  *numberLength = p-str;
220 
221  // we may mistakenly think there is a number if we only got an exponent, or
222  // just a sign, or just a decimal point, so in addition to checking that we
223  // parsed at least one character, also make sure we did get digits
224  if (p == str ||
225  !gotDigit) {
226  // no number found
227  if (hasLeadingNumber)
228  *hasLeadingNumber = PR_FALSE;
229  if (leadingNumber)
230  *leadingNumber = 0;
231  if (numberLength)
232  *numberLength = 0;
233  } else {
234 
235  // factor in the exponent
236  if (expValue != 0) {
237  PRFloat64 mul = pow((PRFloat64)10, (PRFloat64)(expValue * expSign));
238  value *= mul;
239  }
240 
241  // factor in the sign
242  value *= sign;
243 
244  if (hasLeadingNumber)
245  *hasLeadingNumber = PR_TRUE;
246  if (leadingNumber)
247  *leadingNumber = value;
248  }
249 }
250 
251 template<class CHARTYPE> inline PRInt32 SB_FindNextNumber(const CHARTYPE *aStr) {
252  if (!aStr)
253  return -1;
254 
255  const CHARTYPE *p = aStr;
256  const CHARTYPE *beginning = NULL;
257  while (*p) {
258  PRInt32 c = SB_GetCharType(p);
259  if (c == CHARTYPE_DIGIT) {
260  if (!beginning)
261  beginning = p;
262  return beginning-aStr;
263  }
264  if (c == CHARTYPE_SIGN ||
265  c == CHARTYPE_DECIMALPOINT) {
266  if (!beginning) {
267  beginning = p;
268  }
269  } else {
270  beginning = NULL;
271  }
272  p++;
273  }
274 
275  return -1;
276 }
277 
#define CHARTYPE_EXPONENT
PRInt32 SB_FindNextNumber(const CHARTYPE *aStr)
#define CHARTYPE_SIGN
#define CHARTYPE_DIGIT
void SB_ExtractLeadingNumber(const CHARTYPE *str, PRBool *hasLeadingNumber, PRFloat64 *leadingNumber, PRInt32 *numberLength)
#define CHARTYPE_DECIMALPOINT
countRef value
Definition: FeedWriter.js:1423
#define CHARTYPE_OTHER
PRInt32 SB_GetCharType(const CHARTYPE *p)