stringOpsSort.C
Go to the documentation of this file.
1 /*---------------------------------------------------------------------------*\
2  ========= |
3  \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
4  \\ / O peration |
5  \\ / A nd | www.openfoam.com
6  \\/ M anipulation |
7 -------------------------------------------------------------------------------
8  Copyright (C) 2017 OpenCFD Ltd.
9 -------------------------------------------------------------------------------
10 Changes for OpenFOAM
11 
12  Code cleanup, reduction, elimination of redundant code.
13 \*---------------------------------------------------------------------------*/
14 
15 //========================================================================
16 // Copyright (c) 1998-2010,2011 Free Software Foundation, Inc.
17 //
18 // Permission is hereby granted, free of charge, to any person obtaining a
19 // copy of this software and associated documentation files (the
20 // "Software"), to deal in the Software without restriction, including
21 // without limitation the rights to use, copy, modify, merge, publish,
22 // distribute, distribute with modifications, sublicense, and/or sell
23 // copies of the Software, and to permit persons to whom the Software is
24 // furnished to do so, subject to the following conditions:
25 //
26 // The above copyright notice and this permission notice shall be included
27 // in all copies or substantial portions of the Software.
28 //
29 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
30 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
32 // IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
33 // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
34 // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
35 // THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36 //
37 // Except as contained in this notice, the name(s) of the above copyright
38 // holders shall not be used in advertising or otherwise to promote the
39 // sale, use or other dealings in this Software without prior written
40 // authorization.
41 //========================================================================
42 
43 //========================================================================
44 // Original Author: Jan-Marten Spit <jmspit@euronet.nl>
45 //========================================================================
46 
47 #include "stringOpsSort.H"
48 #include <cctype>
49 #include <cstring>
50 
51 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
52 
53 // Local tweaks/preferences:
54 //
55 // [DIGITS_ALWAYS_FIRST] : as per original code
56 // This results in "file123.txt" sorting before "file.txt", which is
57 // inconsistent with what 'ls -v' produces
58 // - normally do not want this (Mark Olesen: Oct-2017)
59 #undef DIGITS_ALWAYS_FIRST
60 
61 // [MANUAL_NUMCOMPARE] : handwritten code instead of strncmp
62 // The original code has a mix of strncmp for equality but handwritten code
63 // for greater-than/less-than.
64 //
65 // -> this does to be unneeded, rely on strncmp() return values
66 // (Mark Olesen: Oct-2017)
67 #undef MANUAL_NUMCOMPARE
68 
69 // [DEBUG_NATSTRCMP] : debug info to std::cerr (for development purposes only)
70 #undef DEBUG_NATSTRCMP
71 
72 // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
73 
74 #ifdef DEBUG_NATSTRCMP
75 #include <iostream>
76 
77 template<class T>
78 static inline void debugPrint(const char* text, T item1, T item2)
79 {
80  std::cerr << text << ": <" << item1 << "> <" << item2 << ">\n";
81 }
82 
83 // Character sequences, end pointer is _inclusive_.
84 static inline void debugPrint
85 (
86  const char* b1, const char* e1,
87  const char* b2, const char* e2
88 )
89 {
90  std::cerr << "<";
91  while (b1 < e1) { std::cerr << *b1++; }
92  std::cerr << *e1 << "> ";
93 
94  std::cerr << "<";
95  while (b2 < e2) { std::cerr << *b2++; }
96  std::cerr << *e2 << ">\n";
97 }
98 #endif
99 
100 #ifdef MANUAL_NUMCOMPARE
101 // Manual comparison of identical length (digit) sequences
102 static inline int manual_numcompare(const char* s1, const char* s2)
103 {
104  while (*s1 && *s2)
105  {
106  const int cmp = (*s1 - *s2);
107  if (!cmp) return cmp;
108 
109  ++s1; ++s2;
110  }
111  return 0; // Length check done before in caller.
112 }
113 #endif
114 
115 
116 // ------------------------------------------------------------------------- //
117 
118 int Foam::stringOps::natstrcmp(const char* s1, const char* s2)
119 {
120  #ifdef DEBUG_NATSTRCMP
121  debugPrint("natstrcmp", s1, s2);
122  #endif
123 
124  // States for state engine
125  enum stateType { SCAN, ALPHA, NUMERIC };
126 
127  // Number of leading zeroes
128  unsigned zeros1 = 0;
129  unsigned zeros2 = 0;
130 
131  // Pointers to begin/end of integer sequences (without leading zeros)
132  const char* numbeg1 = nullptr;
133  const char* numbeg2 = nullptr;
134  const char* numend1 = nullptr;
135  const char* numend2 = nullptr;
136 
137  stateType state = SCAN;
138 
139  const char* p1 = s1;
140  const char* p2 = s2;
141 
142  while (*p1 && *p2)
143  {
144  // Bitmask for digits vs alpha
145  // - 0: neither are digits
146  // - 1: p1 is the only digit
147  // - 2: p2 is the only digit
148  // - 3: both p1 and p2 are digits
149 
150  const unsigned digitMask =
151  ((isdigit(*p2) ? 2:0) | (isdigit(*p1) ? 1:0));
152 
153  switch (state)
154  {
155  case SCAN:
156  {
157  #ifdef DEBUG_NATSTRCMP
158  debugPrint("SCAN", *p1, *p2);
159  #endif
160 
161  switch (digitMask)
162  {
163  case 0: // (alpha,alpha)
164  {
165  state = ALPHA;
166 
167  if (*p1 == *p2)
168  {
169  ++p1; ++p2;
170  }
171  else
172  {
173  // Lexical compare
174  return (*p1 - *p2);
175  }
176  break;
177  }
178 
179  #ifdef DIGITS_ALWAYS_FIRST
180  case 0x1: // (digit,alpha) : digit < alpha
181  {
182  return -1;
183  break;
184  }
185  case 0x2: // (alpha,digit) : alpha > digit
186  {
187  return 1;
188  break;
189  }
190  #else /* DIGITS_ALWAYS_FIRST */
191  case 0x1: // (digit,alpha)
192  case 0x2: // (alpha,digit)
193  {
194  // Lexical compare for digits/alpha
195  return (*p1 - *p2);
196  break;
197  }
198  #endif /* DIGITS_ALWAYS_FIRST */
199 
200  default: // (digit,digit)
201  {
202  state = NUMERIC;
203 
204  // State changed from SCAN to NUMERIC, so skip leading
205  // leading zeros so the numeric comparison is
206  // untainted by them, but capture the first occurrence
207  // of leading zeroes for a final tie-break if needed.
208 
209  if (!zeros1)
210  {
211  // First occurrence of any leading zeroes
212  while (*p1 == '0') { ++p1; ++zeros1; }
213  }
214  else
215  {
216  while (*p1 == '0') { ++p1; }
217  }
218 
219  if (!zeros2)
220  {
221  // First occurrence of any leading zeroes
222  while (*p2 == '0') { ++p2; ++zeros2; }
223  }
224  else
225  {
226  while (*p2 == '0') { ++p2; }
227  }
228 
229  if (zeros1 == zeros2)
230  {
231  // Same number of zeros - so irrelevant
232  zeros1 = zeros2 = 0;
233  }
234 
235  if (!isdigit(*p1)) --p1;
236  if (!isdigit(*p2)) --p2;
237 
238  numbeg1 = numend1 = p1;
239  numbeg2 = numend2 = p2;
240 
241  break;
242  }
243  }
244  break;
245  }
246 
247  case ALPHA:
248  {
249  #ifdef DEBUG_NATSTRCMP
250  debugPrint("ALPHA", *p1, *p2);
251  #endif
252 
253  if (digitMask)
254  {
255  state = SCAN;
256  }
257  else // (alpha,alpha)
258  {
259  if (*p1 == *p2)
260  {
261  ++p1; ++p2;
262  }
263  else
264  {
265  // Lexical compare
266  return (*p1 - *p2);
267  }
268  }
269  break;
270  }
271 
272  case NUMERIC:
273  {
274  while (isdigit(*p1)) numend1 = p1++;
275  while (isdigit(*p2)) numend2 = p2++;
276 
277  #ifdef DEBUG_NATSTRCMP
278  debugPrint("NUMERIC", *p1, *p2);
279  debugPrint(numbeg1,numend1, numbeg2,numend2);
280  #endif
281 
282  // Length (minus 1) of each sequence
283  const size_t len1 = (numend1 - numbeg1);
284  const size_t len2 = (numend2 - numbeg2);
285 
286  if (len1 < len2)
287  {
288  return -1;
289  }
290  else if (len1 > len2)
291  {
292  return 1;
293  }
294 
295  // Same number of digits, leading zeros have been skipped.
296  // - so lexical and numerical compares are equivalent.
297 
298  const int cmp = strncmp(numbeg1, numbeg2, len1+1);
299  if (!cmp)
300  {
301  // Identical (digit) sequence - continue
302  state = SCAN;
303  }
304  else
305  {
306  #ifdef MANUAL_STRNCMP
307  return manual_numcompare(numbeg1, numbeg2);
308  #else
309  return cmp;
310  #endif
311  }
312  break;
313  }
314  }
315  }
316 
317  if (zeros1 < zeros2) return -1;
318  if (zeros1 > zeros2) return 1;
319  if (!*p1 && *p2) return -1; // s1 shorter than s2
320  if (*p1 && !*p2) return 1; // s1 longer than s2
321 
322  return 0;
323 }
324 
325 
326 // ************************************************************************* //
stringOpsSort.H
Specialized string sorting.
Foam::stringOps::natstrcmp
int natstrcmp(const char *s1, const char *s2)
'Natural' compare for C-strings
Definition: stringOpsSort.C:118
T
const volScalarField & T
Definition: createFieldRefs.H:2