path.cxx
Go to the documentation of this file.
1 // ============================================================================
2 // Copyright (c) 2011-2012 University of Pennsylvania
3 // Copyright (c) 2013-2016 Andreas Schuh
4 // All rights reserved.
5 //
6 // See COPYING file for license information or visit
7 // https://cmake-basis.github.io/download.html#license
8 // ============================================================================
9 
10 /**
11  * @file path.cxx
12  * @brief File/directory path related functions.
13  */
14 
15 
16 #include <vector>
17 
18 #include <basis/config.h> // platform macros - must be first
19 
20 #include <stdlib.h> // malloc(), free(), _splitpath_s() (WINDOWS)
21 #include <string.h> // strncmp()
22 #include <cctype> // toupper()
23 #include <algorithm> // transform()
24 
25 #if WINDOWS
26 # include <windows.h> // GetFileAttributes()
27 #else
28 # include <sys/stat.h> // stat(), lstat()
29 #endif
30 
31 #include <basis/except.h> // to throw exceptions
32 
33 #include <basis/os.h>
34 #include <basis/os/path.h>
35 
36 
37 // acceptable in .cxx file
38 using namespace std;
39 
40 
41 namespace basis { namespace os { namespace path {
42 
43 
44 // ===========================================================================
45 // representation
46 // ===========================================================================
47 
48 #if WINDOWS
49  static const char cSeparator = '\\';
50  static const char* cSeparators = "\\/";
51 #else
52  static const char cSeparator = '/';
53  static const char* cSeparators = "/";
54 #endif
55 
56 // ---------------------------------------------------------------------------
57 inline bool issep(char c)
58 {
59  #if WINDOWS
60  return c == '/' || c == '\\';
61  #else
62  return c == '/';
63  #endif
64 }
65 
66 // ---------------------------------------------------------------------------
67 static inline string replace(string str, char from, char to)
68 {
69  string res(str.size(), '\0');
70  string::const_iterator in = str.begin();
71  string::iterator out = res.begin();
72  while (in != str.end()) {
73  if (*in == from) *out = to;
74  else *out = *in;
75  in++; out++;
76  }
77  return res;
78 }
79 
80 // ---------------------------------------------------------------------------
81 string normpath(const string& path)
82 {
83  if (path.empty()) return "";
84  char drive[3] = {'\0', ':', '\0'};
85  size_t i = 0;
86  #if WINDOWS
87  if (path.size() > 1 && path[1] == ':') {
88  drive[0] = path[0];
89  i = 2;
90  }
91  #endif
92  string norm_path = drive;
93  bool abs = issep(path[i]);
94  if (abs) {
95  #if WINDOWS
96  while (i <= path.size() && issep(path[i])) {
97  norm_path += cSeparator;
98  i++;
99  }
100  #else
101  norm_path += cSeparator;
102  #endif
103  }
104  string current;
105  vector<string> parts;
106  while (i <= path.size()) {
107  if (issep(path[i]) || path[i] == '\0') {
108  if (current == "..") {
109  if (!abs && (parts.empty() || parts.back() == "..")) {
110  parts.push_back(current);
111  } else if (!parts.empty()) {
112  parts.pop_back();
113  }
114  } else if (current != "" && current != ".") {
115  parts.push_back(current);
116  }
117  current.clear();
118  } else {
119  current += path[i];
120  }
121  i++;
122  }
123  for (i = 0; i < parts.size(); i++) {
124  norm_path = join(norm_path, parts[i]);
125  }
126  return norm_path.empty() ? "." : norm_path;
127 }
128 
129 // ---------------------------------------------------------------------------
130 string posixpath(const string& path)
131 {
132  #if WINDOWS
133  string norm_path = path;
134  #else
135  string norm_path = replace(path, '\\', '/');
136  #endif
137  norm_path = normpath(norm_path);
138  #if WINDOWS
139  norm_path = replace(norm_path, '\\', '/');
140  #endif
141  return norm_path;
142 }
143 
144 // ---------------------------------------------------------------------------
145 string ntpath(const string& path)
146 {
147  #if WINDOWS
148  string norm_path = path;
149  #else
150  string norm_path = replace(path, '\\', '/');
151  #endif
152  norm_path = normpath(norm_path);
153  #if UNIX
154  norm_path = replace(norm_path, '/', '\\');
155  #endif
156  return norm_path;
157 }
158 
159 // ===========================================================================
160 // components
161 // ===========================================================================
162 
163 // ---------------------------------------------------------------------------
164 void split(const string& path, string& head, string& tail)
165 {
166  size_t last = path.find_last_of(cSeparators);
167  if (last == string::npos) {
168  head = "";
169  tail = path;
170  } else {
171  size_t pos = last;
172  if (last > 0) pos = path.find_last_not_of(cSeparators, last - 1);
173  if (pos == string::npos) head = path.substr(0, last + 1);
174  else head = path.substr(0, pos + 1);
175  tail = path.substr(last + 1);
176  }
177 }
178 
179 // ---------------------------------------------------------------------------
180 vector<string> split(const string& path)
181 {
182  vector<string> parts(2, "");
183  split(path, parts[0], parts[1]);
184  return parts;
185 }
186 
187 // ---------------------------------------------------------------------------
188 void splitdrive(const string& path, string& drive, string& tail)
189 {
190 #if WINDOWS
191  if (path.size() > 1 && path[1] == ':') {
192  tail = path.substr(2);
193  drive = path[0]; drive += ':';
194  }
195  else
196 #endif
197  {
198  tail = path;
199  drive = "";
200  }
201 }
202 
203 // ---------------------------------------------------------------------------
204 vector<string> splitdrive(const string& path)
205 {
206  vector<string> parts(2, "");
207  splitdrive(path, parts[0], parts[1]);
208  return parts;
209 }
210 
211 // ---------------------------------------------------------------------------
212 void splitext(const string& path, string& head, string& ext, const set<string>* exts, bool icase)
213 {
214  size_t pos = string::npos;
215  // test user supplied extensions only
216  if (exts) {
217  for (set<string>::const_iterator i = exts->begin(); i != exts->end(); ++i) {
218  if (path.size() < i->size()) continue;
219  size_t start = path.size() - i->size();
220  if (start < pos) { // longest match
221  if (icase) {
222  string str = path.substr(start);
223  string ext = *i;
224  std::transform(str.begin(), str.end(), str.begin(), ::toupper);
225  std::transform(ext.begin(), ext.end(), ext.begin(), ::toupper);
226  if (str == ext) pos = start;
227  } else if (path.compare(start, i->size(), *i) == 0) {
228  pos = start;
229  }
230  }
231  }
232  // otherwise, get position of last dot
233  } else {
234  pos = path.find_last_of('.');
235  // leading dot of file name in Posix indicates hidden file,
236  // not start of file extension
237  #if UNIX
238  if (pos != string::npos && (pos == 0 || issep(path[pos - 1]))) {
239  pos = string::npos;
240  }
241  #endif
242  }
243  // split extension
244  if (pos == string::npos) {
245  head = path;
246  ext = "";
247  } else {
248  // tmp variable used for the case that head references the same input
249  // string as path
250  string tmp = path.substr(0, pos);
251  ext = path.substr(pos);
252  head = tmp;
253  }
254 }
255 
256 // ---------------------------------------------------------------------------
257 vector<string> splitext(const string& path, const set<string>* exts)
258 {
259  vector<string> parts(2, "");
260  splitext(path, parts[0], parts[1], exts);
261  return parts;
262 }
263 
264 // ---------------------------------------------------------------------------
265 string dirname(const string& path)
266 {
267  vector<string> parts(2, "");
268  split(path, parts[0], parts[1]);
269  return parts[0];
270 }
271 
272 // ---------------------------------------------------------------------------
273 string basename(const string& path)
274 {
275  vector<string> parts(2, "");
276  split(path, parts[0], parts[1]);
277  return parts[1];
278 }
279 
280 // ---------------------------------------------------------------------------
281 bool hasext(const string& path, const set<string>* exts)
282 {
283  string ext = splitext(path, exts)[1];
284  return exts ? exts->find(ext) != exts->end() : !ext.empty();
285 }
286 
287 // ===========================================================================
288 // conversion
289 // ===========================================================================
290 
291 // ---------------------------------------------------------------------------
292 bool isabs(const string& path)
293 {
294  size_t i = 0;
295  #if WINDOWS
296  if (path.size() > 1 && path[1] == ':') i = 2;
297  #endif
298  return i < path.size() && issep(path[i]);
299 }
300 
301 // ---------------------------------------------------------------------------
302 string abspath(const string& path)
303 {
304  return normpath(join(getcwd(), path));
305 }
306 
307 // ---------------------------------------------------------------------------
308 string relpath(const string& path, const string& base)
309 {
310  // if relative path is given just return it
311  if (!isabs(path)) return path;
312  // normalize paths
313  string norm_path = normpath(path);
314  string norm_base = normpath(join(getcwd(), base));
315  // check if paths are on same drive
316  #if WINDOWS
317  string drive = splitdrive(norm_path)[0];
318  string base_drive = splitdrive(norm_base)[0];
319  if (drive != base_drive) {
320  BASIS_THROW(invalid_argument,
321  "Path is on drive " << drive << ", start is on drive " << base_drive);
322  }
323  #endif
324  // find start of first path component in which paths differ
325  string::const_iterator b = norm_base.begin();
326  string::const_iterator p = norm_path.begin();
327  size_t pos = 0;
328  size_t i = 0;
329  while (b != norm_base.end() && p != norm_path.end()) {
330  if (issep(*p)) {
331  if (!issep(*b)) break;
332  pos = i;
333  } else if (*b != *p) {
334  break;
335  }
336  b++; p++; i++;
337  }
338  // set pos to i (in this case, the size of one of the paths) if the end
339  // of one path was reached, but the other path has a path separator
340  // at this position, this is required below
341  if ((b != norm_base.end() && issep(*b)) ||
342  (p != norm_path.end() && issep(*p))) pos = i;
343  // skip trailing separator of other path if end of one path reached
344  if (b == norm_base.end() && p != norm_path.end() && issep(*p)) p++;
345  if (p == norm_path.end() && b != norm_base.end() && issep(*b)) b++;
346  // if paths are the same, just return a period (.)
347  //
348  // Thanks to the previous skipping of trailing separators, this condition
349  // handles all of the following cases:
350  //
351  // base := "/usr/bin" path := "/usr/bin"
352  // base := "/usr/bin/" path := "/usr/bin/"
353  // base := "/usr/bin" path := "/usr/bin/"
354  // base := "/usr/bin/" path := "/usr/bin"
355  if (b == norm_base.end() && p == norm_path.end()) return ".";
356  // otherwise, pos is the index of the last slash for which both paths
357  // were identical; hence, everything that comes after in the original
358  // path is preserved and for each following component in the base path
359  // a "../" is prepended to the relative path
360  string rel_path;
361  // truncate base path with a separator as for each "*/" path component,
362  // a "../" will be prepended to the relative path
363  if (b != norm_base.end() && !issep(norm_base[norm_base.size() - 1])) {
364  // attention: This operation may invalidate the iterator b!
365  // Therefore, remember position of iterator and get a new one.
366  size_t pos = b - norm_base.begin();
367  norm_base += cSeparator;
368  b = norm_base.begin() + pos;
369  }
370  while (b != norm_base.end()) {
371  if (issep(*b)) {
372  rel_path += "..";
373  rel_path += cSeparator;
374  }
375  b++;
376  }
377  if (pos + 1 < norm_path.size()) rel_path += norm_path.substr(pos + 1);
378  // remove trailing path separator
379  if (issep(rel_path[rel_path.size() - 1])) {
380  rel_path.erase(rel_path.size() - 1);
381  }
382  return rel_path;
383 }
384 
385 // ---------------------------------------------------------------------------
386 string realpath(const string& path)
387 {
388  string curr_path = join(getcwd(), path);
389  #if UNIX
390  // use stringstream and std::getline() to split absolute path at slashes (/)
391  stringstream ss(curr_path);
392  curr_path.clear();
393  string fname;
394  string prev_path;
395  string next_path;
396  char slash;
397  ss >> slash; // root slash
398  while (getline(ss, fname, '/')) {
399  // current absolute path
400  curr_path += '/';
401  curr_path += fname;
402  // if current path is a symbolic link, follow it
403  if (islink(curr_path)) {
404  // for safety reasons, restrict the depth of symbolic links followed
405  for (unsigned int i = 0; i < 100; i++) {
406  next_path = os::readlink(curr_path);
407  if (next_path.empty()) {
408  // if real path could not be determined because of permissions
409  // or invalid path, return the original path
410  break;
411  } else {
412  curr_path = join(prev_path, next_path);
413  if (!islink(next_path)) break;
414  }
415  }
416  // if real path could not be determined with the given maximum number
417  // of loop iterations (endless cycle?) or one of the symbolic links
418  // could not be read, just return original path as absolute path
419  if (islink(next_path)) {
420  return abspath(path);
421  }
422  }
423  // memorize previous path used as base for abspath()
424  prev_path = curr_path;
425  }
426  #endif
427  // normalize path after all symbolic links were resolved
428  return normpath(curr_path);
429 }
430 
431 // ---------------------------------------------------------------------------
432 string join(const string& base, const string& path)
433 {
434  if (base.empty() || isabs(path)) return path;
435  if (issep(base[base.size() - 1])) return base + path;
436  #if WINDOWS
437  return base + '\\' + path;
438  #else
439  return base + '/' + path;
440  #endif
441 }
442 
443 // ===========================================================================
444 // file status
445 // ===========================================================================
446 
447 // ---------------------------------------------------------------------------
448 bool isfile(const std::string path)
449 {
450  #if WINDOWS
451  const DWORD info = ::GetFileAttributes(path.c_str());
452  return (FILE_ATTRIBUTE_DIRECTORY & info) == 0;
453  #else
454  struct stat info;
455  if (stat(path.c_str(), &info) != 0) return false;
456  return S_ISREG(info.st_mode);
457  #endif
458  return false;
459 }
460 
461 // ---------------------------------------------------------------------------
462 bool isdir(const std::string path)
463 {
464  #if WINDOWS
465  const DWORD info = ::GetFileAttributes(path.c_str());
466  return (FILE_ATTRIBUTE_DIRECTORY & info) != 0;
467  #else
468  struct stat info;
469  if (stat(path.c_str(), &info) != 0) return false;
470  return S_ISDIR(info.st_mode);
471  #endif
472  return false;
473 }
474 
475 // ---------------------------------------------------------------------------
476 bool exists(const std::string path)
477 {
478  #if WINDOWS
479  const DWORD info = ::GetFileAttributes(path.c_str());
480  return info != INVALID_FILE_ATTRIBUTES;
481  #else
482  struct stat info;
483  if (stat(path.c_str(), &info) == 0) return true;
484  #endif
485  return false;
486 }
487 
488 // ---------------------------------------------------------------------------
489 bool islink(const string& path)
490 {
491  #if WINDOWS
492  return false;
493  #else
494  struct stat info;
495  if (lstat(path.c_str(), &info) != 0) return false;
496  return S_ISLNK(info.st_mode);
497  #endif
498 }
499 
500 
501 } // namespace path
502 
503 } // namespace os
504 
505 } // namespace basis
Basic exceptions and related helper macros.
std::string ntpath(const std::string &path)
Convert path to Windows representation.
Definition: path.cxx:145
std::string getcwd()
Get absolute path of the (current) working directory.
Definition: os.cxx:47
function normpath(in path)
Clean path, i.e., remove occurences of "./", duplicate slashes,...
function realpath(in path)
Get canonical file path.
std::string join(const std::string &base, const std::string &path)
Join two paths, e.g., base path and relative path.
Definition: path.cxx:432
System related macro definitions.
STL namespace.
bool hasext(const std::string &path, const std::set< std::string > *exts=NULL)
Test whether a given path has an extension.
Definition: path.cxx:281
bool isabs(const std::string &path)
Test whether a given path is absolute.
Definition: path.cxx:292
Definition: basis.h:34
#define BASIS_THROW(type, msg)
Throw exception with given message.
Definition: except.h:48
void splitdrive(const std::string &path, std::string &drive, std::string &tail)
Get drive specification of Windows path.
Definition: path.cxx:188
bool issep(char c)
Determine if a given character is a path separator.
Definition: path.cxx:57
bool exists(const std::string path)
Test the existance of a file or directory.
Definition: path.cxx:476
bool isfile(const std::string path)
Test whether a given path is the path of an existent file.
Definition: path.cxx:448
std::string basename(const std::string &path)
Get file name.
Definition: path.cxx:273
void split(const std::string &path, std::string &head, std::string &tail)
Split path into two parts.
Definition: path.cxx:164
std::string relpath(const std::string &path, const std::string &base=std::string())
Make path relative.
Definition: path.cxx:308
Operating system dependent functions.
bool islink(const std::string &path)
Whether a given path is a symbolic link.
Definition: path.cxx:489
void splitext(const std::string &path, std::string &head, std::string &ext, const std::set< std::string > *exts=NULL, bool icase=false)
Get file name extension.
Definition: path.cxx:212
function abspath(in path)
Get absolute path given a relative path.
std::string dirname(const std::string &path)
Get file directory.
Definition: path.cxx:265
File/directory path related functions.
std::string readlink(const std::string &path)
Read value of symbolic link.
Definition: os.cxx:147
bool isdir(const std::string path)
Test whether a given path is the path of an existent directory.
Definition: path.cxx:462
std::string posixpath(const std::string &path)
Convert path to Posix (e.g., Unix, Mac OS) representation.
Definition: path.cxx:130