Be careful with Path.GetExtension() and Path.GetFileName()

Be careful with Path.GetExtension() and Path.GetFileName()

Path.GetExtension as well as Path.GetFileName are very useful, but you will have some issues if you don’t read the document.

One important thing to know about them is that they are very restricted and related to NTFS file system. It’s safe to use with NTFS/FAT32 paths and not with URI, HFS+ (OsX) and almost all LINUX paths.

Capture

Invalid characters are: quote (“), less than (<), greater than (>), pipe (|), backspace (\b), null (\0), tab (\t). So if your string contains one of these characters (including the file path!!), Path.GetExtension or Path.GetFileName will return an exception. This restriction doesn’t make a lot of sense in my opinion, why verify if the file path contains an invalid character if the only thing we want is the file extension?

Some example of paths not compatible with Path.GetExtension:

  • test/Document <1>.txt
  • work|2010/meeting.docx
  • http://www.bing.com/test.txt
  • images/family <3/WP2016_01.png

Capture
So, if your app communicates with a Linux server for example, or if you want to retrieve the file extension of an URI or an other file system, you will have some issues with them.

In order to solve this issue, I’ve written a helper, compatible with URI as well as other filesystems, implementing Path.GetExtension, Path.GetFileName but also RemoveInvalidChars():


using System;
using System.IO;
using System.Linq;
using Windows.Networking.Sockets;

namespace Huyn
{
    public static class PathHelper
    {

        #region GetExtension

        static readonly char[] _filePathSeparators = { '/', '\\', ':' };
        static readonly char[] _uriSeparators = { '/' };

        /*
       * Equivalent of Path.GetExtension but doesn't throw an exception when the string contains an invalid character (" < > | etc...)
       */
        public static string GetExtension(this string path)
        {
            return GetExtensionInternal(path, _filePathSeparators);
        }

        public static string GetExtension(this Uri uri)
        {
            return GetExtensionInternal(uri.LocalPath, _uriSeparators);
        }

        public static string GetExtension(this string path, char[] separators)
        {
            if (separators != null && separators.Contains('.'))
            {
                throw new ArgumentException("separators can't contain '.'");
            }

            return GetExtensionInternal(path, separators);
        }


        private static string GetExtensionInternal(this string path, char[] separators)
        {
            if (path == null)
                return null;

            var length = path.Length;
            for (var i = length - 1; i >= 0; --i)
            {
                var ch = path[i];
                if (ch == '.')
                {
                    return i != length - 1 ? path.Substring(i, length - i) : string.Empty;
                }
                else if (separators != null && separators.Contains(ch))
                {
                    break;
                }
            }
            return string.Empty;
        }

        #endregion

        #region GetFileName

        public static string GetFileName(this string path)
        {
            return GetFileNameInternal(path, _filePathSeparators);
        }

        public static string GetFileName(this Uri uri)
        {
            return GetFileNameInternal(uri.LocalPath, _uriSeparators);
        }

        public static string GetFileName(this string path, char[] separators)
        {
            if (separators != null && separators.Contains('.'))
            {
                throw new ArgumentException("separators can't contain '.'");
            }

            return GetFileNameInternal(path, separators);
        }


        private static string GetFileNameInternal(string path, char[] separators)
        {
            if (path != null)
            {

                var length = path.Length;
                for (int i = length - 1; i >= 0; --i)
                {
                    var ch = path[i];
                    if (separators.Contains(ch))
                        return path.Substring(i + 1, length - i - 1);
                }
            }
            return path;
        }

        #endregion


        public static string RemoveInvalidChars(string filename, char? replacedLetter = null)
        {
            if (filename == null)
                return null;

            var invalidChars = Path.GetInvalidFileNameChars();

            if (replacedLetter == null)
            {
                return new string(filename
                    .Where(x => !invalidChars.Contains(x))
                    .ToArray());
            }
            else
            {
                return new string(filename
                    .Select(x => invalidChars.Contains(x) ? replacedLetter.Value : x)
                    .ToArray());
            }
        }
    }
}


Don’t hesitate to send me feedback on twitter: @rudyhuyn

Comments are closed.