stringOpsSort.C
Go to the documentation of this file.
1/*---------------------------------------------------------------------------*\
2 ========= |
3 \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
4 \\ / O peration |
5 \\ / A nd | www.openfoam.com
6 \\/ M anipulation |
7-------------------------------------------------------------------------------
8 Copyright (C) 2017 OpenCFD Ltd.
9-------------------------------------------------------------------------------
10Changes for OpenFOAM
11
12 Code cleanup, reduction, elimination of redundant code.
13\*---------------------------------------------------------------------------*/
14
15//========================================================================
16// Copyright (c) 1998-2010,2011 Free Software Foundation, Inc.
17//
18// Permission is hereby granted, free of charge, to any person obtaining a
19// copy of this software and associated documentation files (the
20// "Software"), to deal in the Software without restriction, including
21// without limitation the rights to use, copy, modify, merge, publish,
22// distribute, distribute with modifications, sublicense, and/or sell
23// copies of the Software, and to permit persons to whom the Software is
24// furnished to do so, subject to the following conditions:
25//
26// The above copyright notice and this permission notice shall be included
27// in all copies or substantial portions of the Software.
28//
29// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
30// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
32// IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
33// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
34// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
35// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36//
37// Except as contained in this notice, the name(s) of the above copyright
38// holders shall not be used in advertising or otherwise to promote the
39// sale, use or other dealings in this Software without prior written
40// authorization.
41//========================================================================
42
43//========================================================================
44// Original Author: Jan-Marten Spit <jmspit@euronet.nl>
45//========================================================================
46
47#include "stringOpsSort.H"
48#include <cctype>
49#include <cstring>
50
51// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
52
53// Local tweaks/preferences:
54//
55// [DIGITS_ALWAYS_FIRST] : as per original code
56// This results in "file123.txt" sorting before "file.txt", which is
57// inconsistent with what 'ls -v' produces
58// - normally do not want this (Mark Olesen: Oct-2017)
59#undef DIGITS_ALWAYS_FIRST
60
61// [MANUAL_NUMCOMPARE] : handwritten code instead of strncmp
62// The original code has a mix of strncmp for equality but handwritten code
63// for greater-than/less-than.
64//
65// -> this does to be unneeded, rely on strncmp() return values
66// (Mark Olesen: Oct-2017)
67#undef MANUAL_NUMCOMPARE
68
69// [DEBUG_NATSTRCMP] : debug info to std::cerr (for development purposes only)
70#undef DEBUG_NATSTRCMP
71
72// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
73
74#ifdef DEBUG_NATSTRCMP
75#include <iostream>
76
77template<class T>
78static inline void debugPrint(const char* text, T item1, T item2)
79{
80 std::cerr << text << ": <" << item1 << "> <" << item2 << ">\n";
81}
82
83// Character sequences, end pointer is _inclusive_.
84static inline void debugPrint
85(
86 const char* b1, const char* e1,
87 const char* b2, const char* e2
88)
89{
90 std::cerr << "<";
91 while (b1 < e1) { std::cerr << *b1++; }
92 std::cerr << *e1 << "> ";
93
94 std::cerr << "<";
95 while (b2 < e2) { std::cerr << *b2++; }
96 std::cerr << *e2 << ">\n";
97}
98#endif
99
100#ifdef MANUAL_NUMCOMPARE
101// Manual comparison of identical length (digit) sequences
102static inline int manual_numcompare(const char* s1, const char* s2)
103{
104 while (*s1 && *s2)
105 {
106 const int cmp = (*s1 - *s2);
107 if (!cmp) return cmp;
108
109 ++s1; ++s2;
110 }
111 return 0; // Length check done before in caller.
112}
113#endif
114
115
116// ------------------------------------------------------------------------- //
117
118int Foam::stringOps::natstrcmp(const char* s1, const char* s2)
119{
120 #ifdef DEBUG_NATSTRCMP
121 debugPrint("natstrcmp", s1, s2);
122 #endif
123
124 // States for state engine
125 enum stateType { SCAN, ALPHA, NUMERIC };
126
127 // Number of leading zeroes
128 unsigned zeros1 = 0;
129 unsigned zeros2 = 0;
130
131 // Pointers to begin/end of integer sequences (without leading zeros)
132 const char* numbeg1 = nullptr;
133 const char* numbeg2 = nullptr;
134 const char* numend1 = nullptr;
135 const char* numend2 = nullptr;
136
137 stateType state = SCAN;
138
139 const char* p1 = s1;
140 const char* p2 = s2;
141
142 while (*p1 && *p2)
143 {
144 // Bitmask for digits vs alpha
145 // - 0: neither are digits
146 // - 1: p1 is the only digit
147 // - 2: p2 is the only digit
148 // - 3: both p1 and p2 are digits
149
150 const unsigned digitMask =
151 ((isdigit(*p2) ? 2:0) | (isdigit(*p1) ? 1:0));
152
153 switch (state)
154 {
155 case SCAN:
156 {
157 #ifdef DEBUG_NATSTRCMP
158 debugPrint("SCAN", *p1, *p2);
159 #endif
160
161 switch (digitMask)
162 {
163 case 0: // (alpha,alpha)
164 {
165 state = ALPHA;
166
167 if (*p1 == *p2)
168 {
169 ++p1; ++p2;
170 }
171 else
172 {
173 // Lexical compare
174 return (*p1 - *p2);
175 }
176 break;
177 }
178
179 #ifdef DIGITS_ALWAYS_FIRST
180 case 0x1: // (digit,alpha) : digit < alpha
181 {
182 return -1;
183 break;
184 }
185 case 0x2: // (alpha,digit) : alpha > digit
186 {
187 return 1;
188 break;
189 }
190 #else /* DIGITS_ALWAYS_FIRST */
191 case 0x1: // (digit,alpha)
192 case 0x2: // (alpha,digit)
193 {
194 // Lexical compare for digits/alpha
195 return (*p1 - *p2);
196 break;
197 }
198 #endif /* DIGITS_ALWAYS_FIRST */
199
200 default: // (digit,digit)
201 {
202 state = NUMERIC;
203
204 // State changed from SCAN to NUMERIC, so skip leading
205 // leading zeros so the numeric comparison is
206 // untainted by them, but capture the first occurrence
207 // of leading zeroes for a final tie-break if needed.
208
209 if (!zeros1)
210 {
211 // First occurrence of any leading zeroes
212 while (*p1 == '0') { ++p1; ++zeros1; }
213 }
214 else
215 {
216 while (*p1 == '0') { ++p1; }
217 }
218
219 if (!zeros2)
220 {
221 // First occurrence of any leading zeroes
222 while (*p2 == '0') { ++p2; ++zeros2; }
223 }
224 else
225 {
226 while (*p2 == '0') { ++p2; }
227 }
228
229 if (zeros1 == zeros2)
230 {
231 // Same number of zeros - so irrelevant
232 zeros1 = zeros2 = 0;
233 }
234
235 if (!isdigit(*p1)) --p1;
236 if (!isdigit(*p2)) --p2;
237
238 numbeg1 = numend1 = p1;
239 numbeg2 = numend2 = p2;
240
241 break;
242 }
243 }
244 break;
245 }
246
247 case ALPHA:
248 {
249 #ifdef DEBUG_NATSTRCMP
250 debugPrint("ALPHA", *p1, *p2);
251 #endif
252
253 if (digitMask)
254 {
255 state = SCAN;
256 }
257 else // (alpha,alpha)
258 {
259 if (*p1 == *p2)
260 {
261 ++p1; ++p2;
262 }
263 else
264 {
265 // Lexical compare
266 return (*p1 - *p2);
267 }
268 }
269 break;
270 }
271
272 case NUMERIC:
273 {
274 while (isdigit(*p1)) numend1 = p1++;
275 while (isdigit(*p2)) numend2 = p2++;
276
277 #ifdef DEBUG_NATSTRCMP
278 debugPrint("NUMERIC", *p1, *p2);
279 debugPrint(numbeg1,numend1, numbeg2,numend2);
280 #endif
281
282 // Length (minus 1) of each sequence
283 const size_t len1 = (numend1 - numbeg1);
284 const size_t len2 = (numend2 - numbeg2);
285
286 if (len1 < len2)
287 {
288 return -1;
289 }
290 else if (len1 > len2)
291 {
292 return 1;
293 }
294
295 // Same number of digits, leading zeros have been skipped.
296 // - so lexical and numerical compares are equivalent.
297
298 const int cmp = strncmp(numbeg1, numbeg2, len1+1);
299 if (!cmp)
300 {
301 // Identical (digit) sequence - continue
302 state = SCAN;
303 }
304 else
305 {
306 #ifdef MANUAL_STRNCMP
307 return manual_numcompare(numbeg1, numbeg2);
308 #else
309 return cmp;
310 #endif
311 }
312 break;
313 }
314 }
315 }
316
317 if (zeros1 < zeros2) return -1;
318 if (zeros1 > zeros2) return 1;
319 if (!*p1 && *p2) return -1; // s1 shorter than s2
320 if (*p1 && !*p2) return 1; // s1 longer than s2
321
322 return 0;
323}
324
325
326// ************************************************************************* //
const volScalarField & T
int natstrcmp(const char *s1, const char *s2)
'Natural' compare for C-strings
Specialized string sorting.