Before I start: Yes, I have checked the other questions and answers on this topic both here and elsewhere.
I have found an example string that the .Net will base64 decode even though it isn't actually base64 encoded. Here is the example:
Rhinocort Aqueous 64mcg/dose Nasal Spray
The .Net method Convert.FromBase64String does not throw an exception when decoding this string so my IsBase64Encoded method happily returns true for this string.
Interestingly, if I use the cygwin base64 -d command using this string as input, it fails with the message invalid input.
Even more interestingly, the source that I thought that belongs to this executable (http://libb64.sourceforge.net/) "decodes" this same string with the same result as I am getting from the .Net Convert.FromBase64String. I will keep looking hoping to find a clue elsewhere but right now I'm stumped.
Any ideas?
There's a slightly better solution which also checks the input string length.
I recommend you do a check at the beginning. If the input is null or empty then return false
.
http://www.codeproject.com/Questions/177808/How-to-determine-if-a-string-is-Base-decoded-or
When strings do pass Base64 decoding and the decoded data has special characters, then perhaps we can conclude that it was not valid Base64 (this depends on the encoding). Also, sometimes we're expecting the data being passed to be Base64, but sometimes it may not be properly padded with '='. Therefore, one method uses "strict" rules for Base64 and the other is "forgiving".
[TestMethod]
public void CheckForBase64()
{
Assert.IsFalse(IsBase64DataStrict("eyJhIjoiMSIsImIiOiI2N2NiZjA5MC00ZGRiLTQ3OTktOTlmZi1hMjhhYmUyNzQwYjEiLCJmIjoiMSIsImciOiIxIn0"));
Assert.IsTrue(IsBase64DataForgiving("eyJhIjoiMSIsImIiOiI2N2NiZjA5MC00ZGRiLTQ3OTktOTlmZi1hMjhhYmUyNzQwYjEiLCJmIjoiMSIsImciOiIxIn0"));
Assert.IsFalse(IsBase64DataForgiving("testing123"));
Assert.IsFalse(IsBase64DataStrict("ABBA"));
Assert.IsFalse(IsBase64DataForgiving("6AC648C9-C08F-4F9D-A0A5-3904CF15ED3E"));
}
public bool IsBase64DataStrict(string data)
{
if (string.IsNullOrWhiteSpace(data)) return false;
if ((new Regex(@"[^A-Z0-9+/=]", RegexOptions.IgnoreCase)).IsMatch(data)) return false;
if (data.Length % 4 != 0) return false;
var e = data.IndexOf('=');
var l = data.Length;
if (!(e == -1 || e == l - 1 || (e == l - 2 && data[l - 1] == '='))) return false;
var decoded = string.Empty;
try
{
byte[] decodedData = Convert.FromBase64String(data);
decoded = Encoding.UTF8.GetString(decodedData);
}
catch(Exception)
{
return false;
}
//check for special chars that you know should not be there
char current;
for (int i = 0; i < decoded.Length; i++)
{
current = decoded[i];
if (current == 65533) return false;
if (!((current == 0x9 || current == 0xA || current == 0xD) ||
((current >= 0x20) && (current <= 0xD7FF)) ||
((current >= 0xE000) && (current <= 0xFFFD)) ||
((current >= 0x10000) && (current <= 0x10FFFF))))
{
return false;
}
}
return true;
}
public bool IsBase64DataForgiving(string data)
{
if (string.IsNullOrWhiteSpace(data)) return false;
//it could be made more forgiving by replacing any spaces with '+' here
if ((new Regex(@"[^A-Z0-9+/=]", RegexOptions.IgnoreCase)).IsMatch(data)) return false;
//this is the forgiving part
if (data.Length % 4 > 0)
data = data.PadRight(data.Length + 4 - data.Length % 4, '=');
var e = data.IndexOf('=');
var l = data.Length;
if (!(e == -1 || e == l - 1 || (e == l - 2 && data[l - 1] == '='))) return false;
var decoded = string.Empty;
try
{
byte[] decodedData = Convert.FromBase64String(data);
decoded = Encoding.UTF8.GetString(decodedData);
}
catch (Exception)
{
return false;
}
//check for special chars that you know should not be there
char current;
for (int i = 0; i < decoded.Length; i++)
{
current = decoded[i];
if (current == 65533) return false;
if (!((current == 0x9 || current == 0xA || current == 0xD) ||
((current >= 0x20) && (current <= 0xD7FF)) ||
((current >= 0xE000) && (current <= 0xFFFD)) ||
((current >= 0x10000) && (current <= 0x10FFFF))))
{
return false;
}
}
return true;
}