#region Copyright 2013-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0 /* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #endregion /* ------------------------------------------------------------------------------- * DERIVED WORK FROM http://hpop.sourceforge.net/ * Provided here with a compatible (Apache) license and a few modifications to * limit functionality to MIME parsing only. If desired, the original works are * available in public domain at the url above. * ---------------------------------------------------------------------------- */ using System; using System.Collections; using System.Collections.ObjectModel; using System.Globalization; using System.Text; using System.Collections.Generic; using System.Collections.Specialized; using System.IO; using System.Net.Mail; using System.Net.Mime; using System.Text.RegularExpressions; namespace CSharpTest.Net.Http { /// /// Parses a raw HTML form where ContentType = "multipart/form-data", including one or more attachments. /// Caution advised as this is entirely in-memory for the moment and not a viable option for large files. /// public sealed class MimeMultiPartData : IEnumerable { private const long MaxMessageSize = 100*1024*1024; private readonly NameValueCollection _headers; private readonly MimeMessagePart _message; /// /// Constructs the form data from the input stream and http "Content-Type" header. /// public MimeMultiPartData(Stream input, string contentType) : this(input, ContentTypeHeaders(contentType)) { } /// /// Constructs the form data from the input stream and the http headers. /// public MimeMultiPartData(Stream input, NameValueCollection headers) { _headers = new NameValueCollection(headers); using (MemoryStream outStream = new MemoryStream()) { int bytesRead; byte[] buffer = new byte[16 * 1024]; while ((bytesRead = input.Read(buffer, 0, buffer.Length)) > 0) { outStream.Write(buffer, 0, bytesRead); if (outStream.Length > MaxMessageSize) throw new ArgumentException("The input stream is too large."); } _message = new MimeMessagePart(outStream.ToArray(), _headers); } if (!_message.IsMultiPart) throw new ArgumentException("The provided content-type is not a multipart encoding or is not recognized."); } /// /// The headers originally provided. /// public NameValueCollection Headers { get { return _headers; } } /// /// The Content-Type header field.
///
/// If not set, the ContentType is created by the default "text/plain; charset=us-ascii" which is /// defined in RFC 2045 section 5.2. ///
public ContentType ContentType { get { return _message.ContentType; } } /// /// This header describes the Content encoding during transfer.
///
/// If no Content-Transfer-Encoding header was present in the message, it is set /// to the default of ContentTransferEncoding.SevenBit in accordance to the RFC. ///
public ContentTransferEncoding ContentTransferEncoding { get { return _message.ContentTransferEncoding; } } private static NameValueCollection ContentTypeHeaders(string contentType) { NameValueCollection c = new NameValueCollection(); c["CONTENT-TYPE"] = contentType; return c; } /// /// Returns an enumerator that iterates through the collection. /// /// /// A that can be used to iterate through the collection. /// /// 1 public IEnumerator GetEnumerator() { return _message.MessageParts.GetEnumerator(); } IEnumerator IEnumerable.GetEnumerator() { return _message.MessageParts.GetEnumerator(); } /// /// Total number of message parts / Form data found. /// public int Count { get { return _message.MessageParts.Count; } } /// /// Returns a element by name, or ArgumentOutOfRangeException /// public MimeMessagePart this[string name] { get { MimeMessagePart p; if (TryGetMessagePart(name, out p)) return p; throw new ArgumentOutOfRangeException("The message part does not exist.", "name"); } } /// /// Returns true if the element was found and the out parameter messagePart set successfully /// public bool TryGetMessagePart(string name, out MimeMessagePart messagePart) { foreach (MimeMessagePart part in _message.MessageParts) { if (StringComparer.OrdinalIgnoreCase.Equals(name, part.Name) || StringComparer.OrdinalIgnoreCase.Equals(name, part.FileName)) { messagePart = part; return true; } } messagePart = null; return false; } /// /// Gets all parts for a given name, used in the case of multi-file upload controls. /// public IEnumerable GetAllPartsByName(string name) { foreach (MimeMessagePart part in _message.MessageParts) { if (StringComparer.OrdinalIgnoreCase.Equals(name, part.Name) || StringComparer.OrdinalIgnoreCase.Equals(name, part.FileName)) { yield return part; } } } /// /// Returns the unique names for all the message parts /// public ICollection Keys { get { Dictionary keys = new Dictionary(); foreach (MimeMessagePart part in _message.MessageParts) keys[part.Name] = part.Name; return keys.Keys; } } /// /// Gets all the parts that are not of type "text/plain" and/or have a filename value set in content disposition. /// public ICollection GetAttachments() { List result = new List(); foreach (MimeMessagePart part in _message.MessageParts) { if (part.IsMultiPart || !StringComparer.OrdinalIgnoreCase.Equals(part.ContentType.MediaType, "text/plain") || part.FileName != part.Name) result.Add(part); } return result; } /// /// Gets a dictionary of all name/value pairs of parts that have a content-type of "text/plain" and do not have /// a filename value set in content disposition. /// public Dictionary ToDictionary() { Dictionary result = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (MimeMessagePart part in _message.MessageParts) { if (!part.IsMultiPart && StringComparer.OrdinalIgnoreCase.Equals(part.ContentType.MediaType, "text/plain") && part.FileName == part.Name) result[part.Name] = part.Text; } return result; } } #region MessagePart /// /// A MessagePart is a part of an email message used to describe the whole email parse tree. /// public sealed class MimeMessagePart { private readonly MessageHeader _headers; private readonly List _messageParts; #region Public properties /// /// Returns the collection of headers for a message part /// public NameValueCollection Headers { get { return _headers.Headers; } } /// /// The Content-Type header field.
///
/// If not set, the ContentType is created by the default "text/plain; charset=us-ascii" which is /// defined in RFC 2045 section 5.2.
///
/// If set, the default is overridden. ///
public ContentType ContentType { get { return _headers.ContentType; } } /// /// A human readable description of the body
///
/// if no Content-Description header was present in the message.
///
public string ContentDescription { get { return _headers.ContentDescription; } } /// /// This header describes the Content encoding during transfer.
///
/// If no Content-Transfer-Encoding header was present in the message, it is set /// to the default of ContentTransferEncoding.SevenBit in accordance to the RFC. ///
/// See RFC 2045 section 6 for details public ContentTransferEncoding ContentTransferEncoding { get { return _headers.ContentTransferEncoding; } } /// /// ID of the content part (like an attached image). Used with MultiPart messages.
///
/// if no Content-ID header field was present in the message. ///
public string ContentId { get { return _headers.ContentId; } } /// /// Used to describe if a is to be displayed or to be though of as an attachment.
/// Also contains information about filename if such was sent.
///
/// if no Content-Disposition header field was present in the message ///
public ContentDisposition ContentDisposition { get { return _headers.ContentDisposition; } } /// /// This is the encoding used to parse the message body if the
/// is not a MultiPart message. It is derived from the character set property. ///
public Encoding BodyEncoding { get; private set; } /// /// This is the parsed body of this .
/// It is parsed in that way, if the body was ContentTransferEncoded, it has been decoded to the /// correct bytes.
///
/// It will be if this is a MultiPart message.
/// Use to check if this is a MultiPart message. ///
public byte[] Body { get; private set; } /// /// Describes if this is a MultiPart message
///
/// The is a MultiPart message if the media type property starts with "multipart/" ///
public bool IsMultiPart { get { return ContentType.MediaType.StartsWith("multipart/", StringComparison.OrdinalIgnoreCase); } } /// /// A is considered to be holding text in it's body if the MediaType /// starts either "text/" or is equal to "message/rfc822" /// public bool IsText { get { string mediaType = ContentType.MediaType; return mediaType.StartsWith("text/", StringComparison.OrdinalIgnoreCase) || mediaType.Equals("message/rfc822", StringComparison.OrdinalIgnoreCase); } } /// /// A is considered to be an attachment, if
/// - it is not holding text and is not a MultiPart message
/// or
/// - it has a Content-Disposition header that says it is an attachment ///
public bool IsAttachment { get { // Inline is the opposite of attachment return (!IsText && !IsMultiPart) || (ContentDisposition != null && !ContentDisposition.Inline); } } /// /// Returns the ContentDisposition's Name field, or FileName if not present /// public string Name { get { return _headers.Name ?? FileName; } } /// /// Returns the ContentDisposition's FileName field, or Name if not present /// public string FileName { get; private set; } /// /// If this is a MultiPart message, then this property /// has a list of each of the Multiple parts that the message consists of.
///
/// It is if it is not a MultiPart message.
/// Use to check if this is a MultiPart message. ///
public ICollection MessageParts { get { return new ReadOnlyCollection(_messageParts); } } #endregion #region Constructors /// /// Parses a MIME multi-part encoded content, using 7-bit encoding (www form using "multipart/form-data") /// internal MimeMessagePart(byte[] rawBody, NameValueCollection headers) : this(rawBody, new MessageHeader(headers)) { } private MimeMessagePart(byte[] rawBody, MessageHeader headers) { if (rawBody == null) throw new ArgumentNullException("rawBody"); if (headers == null) throw new ArgumentNullException("headers"); _headers = headers; FileName = FindFileName(ContentType, ContentDisposition, "(no name)"); BodyEncoding = ParseBodyEncoding(ContentType.CharSet); // Initialize the MessageParts property, with room to as many bodies as we have found _messageParts = new List(); ParseBody(rawBody); } #endregion #region Parsing static Encoding ParseBodyEncoding(string characterSet) { // Default encoding in Mime messages is US-ASCII Encoding encoding = Encoding.ASCII; // If the character set was specified, find the encoding that the character // set describes, and use that one instead if (!string.IsNullOrEmpty(characterSet)) encoding = EncodingFinder.FindEncoding(characterSet); return encoding; } private static string FindFileName(ContentType contentType, ContentDisposition contentDisposition, string defaultName) { if (contentType == null) throw new ArgumentNullException("contentType"); if (contentDisposition != null && contentDisposition.FileName != null) return contentDisposition.FileName; if (contentType.Name != null) return contentType.Name; return defaultName; } private void ParseBody(byte[] rawBody) { if (IsMultiPart) { // Parses a MultiPart message ParseMultiPartBody(rawBody); } else { // Parses a non MultiPart message // Decode the body accodingly and set the Body property Body = DecodeBody(rawBody, ContentTransferEncoding); } } private void ParseMultiPartBody(byte[] rawBody) { // Fetch out the boundary used to delimit the messages within the body string multipartBoundary = ContentType.Boundary; // Fetch the individual MultiPart message parts using the MultiPart boundary List bodyParts = GetMultiPartParts(rawBody, multipartBoundary); // Now parse each byte array as a message body and add it the the MessageParts property foreach (byte[] bodyPart in bodyParts) { MimeMessagePart messagePart = GetMessagePart(bodyPart); _messageParts.Add(messagePart); } } private static MimeMessagePart GetMessagePart(byte[] rawMessageContent) { // Find the headers and the body parts of the byte array MessageHeader headers; byte[] body; HeaderExtractor.ExtractHeadersAndBody(rawMessageContent, out headers, out body); // Create a new MessagePart from the headers and the body return new MimeMessagePart(body, headers); } private static List GetMultiPartParts(byte[] rawBody, string multipPartBoundary) { // This is the list we want to return List messageBodies = new List(); // Create a stream from which we can find MultiPart boundaries using (MemoryStream stream = new MemoryStream(rawBody)) { bool lastMultipartBoundaryEncountered; // Find the start of the first message in this multipart // Since the method returns the first character on a the line containing the MultiPart boundary, we // need to add the MultiPart boundary with prepended "--" and appended CRLF pair to the position returned. int startLocation = FindPositionOfNextMultiPartBoundary(stream, multipPartBoundary, out lastMultipartBoundaryEncountered) + ("--" + multipPartBoundary + "\r\n").Length; while (true) { // When we have just parsed the last multipart entry, stop parsing on if (lastMultipartBoundaryEncountered) break; // Find the end location of the current multipart // Since the method returns the first character on a the line containing the MultiPart boundary, we // need to go a CRLF pair back, so that we do not get that into the body of the message part int stopLocation = FindPositionOfNextMultiPartBoundary(stream, multipPartBoundary, out lastMultipartBoundaryEncountered) - "\r\n".Length; // If we could not find the next multipart boundary, but we had not yet discovered the last boundary, then // we will consider the rest of the bytes as contained in a last message part. if (stopLocation <= -1) { // Include everything except the last CRLF. stopLocation = (int)stream.Length - "\r\n".Length; // We consider this as the last part lastMultipartBoundaryEncountered = true; // Special case: when the last multipart delimiter is not ending with "--", but is indeed the last // one, then the next multipart would contain nothing, and we should not include such one. if (startLocation >= stopLocation) break; } // We have now found the start and end of a message part // Now we create a byte array with the correct length and put the message part's bytes into // it and add it to our list we want to return int length = stopLocation - startLocation; byte[] messageBody = new byte[length]; Array.Copy(rawBody, startLocation, messageBody, 0, length); messageBodies.Add(messageBody); // We want to advance to the next message parts start. // We can find this by jumping forward the MultiPart boundary from the last // message parts end position startLocation = stopLocation + ("\r\n" + "--" + multipPartBoundary + "\r\n").Length; } } // We are done return messageBodies; } private static int FindPositionOfNextMultiPartBoundary(Stream stream, string multiPartBoundary, out bool lastMultipartBoundaryFound) { lastMultipartBoundaryFound = false; while (true) { // Get the current position. This is the first position on the line - no characters of the line will // have been read yet int currentPos = (int)stream.Position; // Read the line string line = ReadLineAsAscii(stream); // If we kept reading until there was no more lines, we did not meet // the MultiPart boundary. -1 is then returned to describe this. if (line == null) return -1; // The MultiPart boundary is the MultiPartBoundary with "--" in front of it // which is to be at the very start of a line if (line.StartsWith("--" + multiPartBoundary, StringComparison.Ordinal)) { // Check if the found boundary was also the last one lastMultipartBoundaryFound = line.StartsWith("--" + multiPartBoundary + "--", StringComparison.OrdinalIgnoreCase); return currentPos; } } } private static byte[] DecodeBody(byte[] messageBody, ContentTransferEncoding contentTransferEncoding) { if (messageBody == null) throw new ArgumentNullException("messageBody"); switch (contentTransferEncoding) { case ContentTransferEncoding.QuotedPrintable: // If encoded in QuotedPrintable, everything in the body is in US-ASCII return QuotedPrintable.DecodeContentTransferEncoding(Encoding.ASCII.GetString(messageBody)); case ContentTransferEncoding.Base64: // If encoded in Base64, everything in the body is in US-ASCII return Convert.FromBase64String(Encoding.ASCII.GetString(messageBody)); case ContentTransferEncoding.SevenBit: case ContentTransferEncoding.Binary: case ContentTransferEncoding.EightBit: // We do not have to do anything return messageBody; default: throw new ArgumentOutOfRangeException("contentTransferEncoding"); } } #endregion #region Public methods string GetBodyAsText() { return BodyEncoding.GetString(Body); } /// /// Gets this MessagePart's as text, or null if the IsMultiPart property is true.
///
public string Text { get { return IsMultiPart ? null : GetBodyAsText(); } } #endregion #region ReadLineAsAscii static byte[] ReadLineAsBytes(Stream stream) { if (stream == null) throw new ArgumentNullException("stream"); using (MemoryStream memoryStream = new MemoryStream()) { while (true) { int justRead = stream.ReadByte(); if (justRead == -1 && memoryStream.Length > 0) break; // Check if we started at the end of the stream we read from // and we have not read anything from it yet if (justRead == -1 && memoryStream.Length == 0) return null; char readChar = (char)justRead; // Do not write \r or \n if (readChar != '\r' && readChar != '\n') memoryStream.WriteByte((byte)justRead); // Last point in CRLF pair if (readChar == '\n') break; } return memoryStream.ToArray(); } } internal static string ReadLineAsAscii(Stream stream) { byte[] readFromStream = ReadLineAsBytes(stream); return readFromStream != null ? Encoding.ASCII.GetString(readFromStream) : null; } #endregion #region MessageHeader sealed class MessageHeader { public NameValueCollection Headers { get; private set; } public ContentTransferEncoding ContentTransferEncoding { get; private set; } public ContentType ContentType { get; private set; } public string Name { get; private set; } public ContentDisposition ContentDisposition { get; private set; } public string ContentId { get; private set; } public string ContentDescription { get; private set; } internal MessageHeader(NameValueCollection headers) { if (headers == null) throw new ArgumentNullException("headers"); Headers = headers; // 7BIT is the default ContentTransferEncoding (assumed if not set) ContentTransferEncoding = ContentTransferEncoding.SevenBit; // text/plain; charset=us-ascii is the default ContentType ContentType = new ContentType("text/plain; charset=us-ascii"); // Now parse the actual headers ParseHeaders(headers); } private void ParseHeaders(NameValueCollection headers) { if (headers == null) throw new ArgumentNullException("headers"); // Now begin to parse the header values foreach (string headerName in headers.Keys) { string[] headerValues = headers.GetValues(headerName); if (headerValues != null) { foreach (string headerValue in headerValues) { ParseHeader(headerName, headerValue); } } } } private void ParseHeader(string headerName, string headerValue) { if (headerName == null) throw new ArgumentNullException("headerName"); if (headerValue == null) throw new ArgumentNullException("headerValue"); switch (headerName.ToUpperInvariant()) { // See http://tools.ietf.org/html/rfc2045#section-6 // See ContentTransferEncoding class for more details case "CONTENT-TRANSFER-ENCODING": ContentTransferEncoding = HeaderFieldParser.ParseContentTransferEncoding(headerValue.Trim()); break; // See http://tools.ietf.org/html/rfc2045#section-5.1 // Example: Content-type: text/plain; charset="us-ascii" case "CONTENT-TYPE": ContentType = HeaderFieldParser.ParseContentType(headerValue); break; // See http://tools.ietf.org/html/rfc2183 case "CONTENT-DISPOSITION": string name; ContentDisposition = HeaderFieldParser.ParseContentDisposition(headerValue, out name); Name = name ?? ContentDisposition.FileName; break; // See http://tools.ietf.org/html/rfc2045#section-8 case "CONTENT-DESCRIPTION": // Human description of for example a file. Can be encoded ContentDescription = EncodedWord.Decode(headerValue.Trim()); break; // See http://tools.ietf.org/html/rfc2045#section-7 // Example: case "CONTENT-ID": ContentId = HeaderFieldParser.ParseId(headerValue); break; } } } #endregion #region QuotedPrintable static class QuotedPrintable { public static string DecodeEncodedWord(string toDecode, Encoding encoding) { if (toDecode == null) throw new ArgumentNullException("toDecode"); if (encoding == null) throw new ArgumentNullException("encoding"); // Decode the QuotedPrintable string and return it return encoding.GetString(Rfc2047QuotedPrintableDecode(toDecode, true)); } public static byte[] DecodeContentTransferEncoding(string toDecode) { if (toDecode == null) throw new ArgumentNullException("toDecode"); // Decode the QuotedPrintable string and return it return Rfc2047QuotedPrintableDecode(toDecode, false); } private static byte[] Rfc2047QuotedPrintableDecode(string toDecode, bool encodedWordVariant) { if (toDecode == null) throw new ArgumentNullException("toDecode"); // Create a byte array builder which is roughly equivalent to a StringBuilder using (MemoryStream byteArrayBuilder = new MemoryStream()) { // Remove illegal control characters toDecode = RemoveIllegalControlCharacters(toDecode); // Run through the whole string that needs to be decoded for (int i = 0; i < toDecode.Length; i++) { char currentChar = toDecode[i]; if (currentChar == '=') { // Check that there is at least two characters behind the equal sign if (toDecode.Length - i < 3) { // We are at the end of the toDecode string, but something is missing. Handle it the way RFC 2045 states WriteAllBytesToStream(byteArrayBuilder, DecodeEqualSignNotLongEnough(toDecode.Substring(i))); // Since it was the last part, we should stop parsing anymore break; } // Decode the Quoted-Printable part string quotedPrintablePart = toDecode.Substring(i, 3); WriteAllBytesToStream(byteArrayBuilder, DecodeEqualSign(quotedPrintablePart)); // We now consumed two extra characters. Go forward two extra characters i += 2; } else { // This character is not quoted printable hex encoded. // Could it be the _ character, which represents space // and are we using the encoded word variant of QuotedPrintable if (currentChar == '_' && encodedWordVariant) { // The RFC specifies that the "_" always represents hexadecimal 20 even if the // SPACE character occupies a different code position in the character set in use. byteArrayBuilder.WriteByte(0x20); } else { // This is not encoded at all. This is a literal which should just be included into the output. byteArrayBuilder.WriteByte((byte)currentChar); } } } return byteArrayBuilder.ToArray(); } } private static void WriteAllBytesToStream(Stream stream, byte[] toWrite) { stream.Write(toWrite, 0, toWrite.Length); } private static string RemoveIllegalControlCharacters(string input) { if (input == null) throw new ArgumentNullException("input"); // First we remove any \r or \n which is not part of a \r\n pair input = RemoveCarriageReturnAndNewLinewIfNotInPair(input); // Here only legal \r\n is left over // We now simply keep them, and the \t which is also allowed // \x0A = \n // \x0D = \r // \x09 = \t) return Regex.Replace(input, "[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", ""); } private static string RemoveCarriageReturnAndNewLinewIfNotInPair(string input) { if (input == null) throw new ArgumentNullException("input"); // Use this for building up the new string. This is used for performance instead // of altering the input string each time a illegal token is found StringBuilder newString = new StringBuilder(input.Length); for (int i = 0; i < input.Length; i++) { // There is a character after it // Check for lonely \r // There is a lonely \r if it is the last character in the input or if there // is no \n following it if (input[i] == '\r' && (i + 1 >= input.Length || input[i + 1] != '\n')) { // Illegal token \r found. Do not add it to the new string // Check for lonely \n // There is a lonely \n if \n is the first character or if there // is no \r in front of it } else if (input[i] == '\n' && (i - 1 < 0 || input[i - 1] != '\r')) { // Illegal token \n found. Do not add it to the new string } else { // No illegal tokens found. Simply insert the character we are at // in our new string newString.Append(input[i]); } } return newString.ToString(); } private static byte[] DecodeEqualSignNotLongEnough(string decode) { if (decode == null) throw new ArgumentNullException("decode"); // We can only decode wrong length equal signs if (decode.Length >= 3) throw new ArgumentException("decode must have length lower than 3", "decode"); // First char must be = if (decode[0] != '=') throw new ArgumentException("First part of decode must be an equal sign", "decode"); // We will now believe that the string sent to us, was actually not encoded // Therefore it must be in US-ASCII and we will return the bytes it corrosponds to return Encoding.ASCII.GetBytes(decode); } private static byte[] DecodeEqualSign(string decode) { if (decode == null) throw new ArgumentNullException("decode"); // We can only decode the string if it has length 3 - other calls to this function is invalid if (decode.Length != 3) throw new ArgumentException("decode must have length 3", "decode"); // First char must be = if (decode[0] != '=') throw new ArgumentException("decode must start with an equal sign", "decode"); // There are two cases where an equal sign might appear // It might be a // - hex-string like =3D, denoting the character with hex value 3D // - it might be the last character on the line before a CRLF // pair, denoting a soft linebreak, which simply // splits the text up, because of the 76 chars per line restriction if (decode.Contains("\r\n")) { // Soft break detected // We want to return string.Empty which is equivalent to a zero-length byte array return new byte[0]; } // Hex string detected. Convertion needed. // It might be that the string located after the equal sign is not hex characters // An example: =JU // In that case we would like to catch the FormatException and do something else try { // The number part of the string is the last two digits. Here we simply remove the equal sign string numberString = decode.Substring(1); // Now we create a byte array with the converted number encoded in the string as a hex value (base 16) // This will also handle illegal encodings like =3d where the hex digits are not uppercase, // which is a robustness requirement from RFC 2045. byte[] oneByte = new[] { Convert.ToByte(numberString, 16) }; // Simply return our one byte byte array return oneByte; } catch (FormatException) { // RFC 2045 says about robust implementation: // An "=" followed by a character that is neither a // hexadecimal digit (including "abcdef") nor the CR // character of a CRLF pair is illegal. This case can be // the result of US-ASCII text having been included in a // quoted-printable part of a message without itself // having been subjected to quoted-printable encoding. A // reasonable approach by a robust implementation might be // to include the "=" character and the following // character in the decoded data without any // transformation and, if possible, indicate to the user // that proper decoding was not possible at this point in // the data. // So we choose to believe this is actually an un-encoded string // Therefore it must be in US-ASCII and we will return the bytes it corrosponds to return Encoding.ASCII.GetBytes(decode); } } } #endregion #region HeaderFieldParser static class HeaderFieldParser { public static ContentTransferEncoding ParseContentTransferEncoding(string headerValue) { if (headerValue == null) throw new ArgumentNullException("headerValue"); switch (headerValue.Trim().ToUpperInvariant()) { case "7BIT": return ContentTransferEncoding.SevenBit; case "8BIT": return ContentTransferEncoding.EightBit; case "QUOTED-PRINTABLE": return ContentTransferEncoding.QuotedPrintable; case "BASE64": return ContentTransferEncoding.Base64; case "BINARY": return ContentTransferEncoding.Binary; // If a wrong argument is passed to this parser method, then we assume // default encoding, which is SevenBit. // This is to ensure that we do not throw exceptions, even if the email not MIME valid. default: //DefaultLogger.Log.LogDebug("Wrong ContentTransferEncoding was used. It was: " + headerValue); return ContentTransferEncoding.SevenBit; } } public static MailPriority ParseImportance(string headerValue) { if (headerValue == null) throw new ArgumentNullException("headerValue"); switch (headerValue.ToUpperInvariant()) { case "5": case "HIGH": return MailPriority.High; case "3": case "NORMAL": return MailPriority.Normal; case "1": case "LOW": return MailPriority.Low; default: //DefaultLogger.Log.LogDebug("HeaderFieldParser: Unknown importance value: \"" + headerValue + "\". Using default of normal importance."); return MailPriority.Normal; } } public static ContentType ParseContentType(string headerValue) { if (headerValue == null) throw new ArgumentNullException("headerValue"); // We create an empty Content-Type which we will fill in when we see the values ContentType contentType = new ContentType(); // Now decode the parameters List> parameters = Rfc2231Decoder.Decode(headerValue); foreach (KeyValuePair keyValuePair in parameters) { string key = keyValuePair.Key.ToUpperInvariant().Trim(); string value = RemoveQuotesIfAny(keyValuePair.Value.Trim()); switch (key) { case "": // This is the MediaType - it has no key since it is the first one mentioned in the // headerValue and has no = in it. // Check for illegal content-type if (value.ToUpperInvariant().Equals("TEXT")) value = "text/plain"; contentType.MediaType = value; break; case "BOUNDARY": contentType.Boundary = value; break; case "CHARSET": contentType.CharSet = value; break; case "NAME": contentType.Name = EncodedWord.Decode(value); break; default: // This is to shut up the code help that is saying that contentType.Parameters // can be null - which it cant! if (contentType.Parameters == null) throw new Exception("The ContentType parameters property is null. This will never be thrown."); // We add the unknown value to our parameters list // "Known" unknown values are: // - title // - report-type contentType.Parameters.Add(key, value); break; } } return contentType; } public static ContentDisposition ParseContentDisposition(string headerValue, out string name) { name = null; if (headerValue == null) throw new ArgumentNullException("headerValue"); // See http://www.ietf.org/rfc/rfc2183.txt for RFC definition // Create empty ContentDisposition - we will fill in details as we read them ContentDisposition contentDisposition = new ContentDisposition(); // Now decode the parameters List> parameters = Rfc2231Decoder.Decode(headerValue); foreach (KeyValuePair keyValuePair in parameters) { string key = keyValuePair.Key.ToUpperInvariant().Trim(); string value = keyValuePair.Value; switch (key) { case "": // This is the DispisitionType - it has no key since it is the first one // and has no = in it. contentDisposition.DispositionType = value; break; // The correct name of the parameter is filename, but some emails also contains the parameter // name, which also holds the name of the file. Therefore we use both names for the same field. case "NAME": contentDisposition.FileName = name = EncodedWord.Decode(RemoveQuotesIfAny(value)); break; case "FILENAME": // The filename might be in qoutes, and it might be encoded-word encoded contentDisposition.FileName = EncodedWord.Decode(RemoveQuotesIfAny(value)); break; case "CREATION-DATE": // Notice that we need to create a new DateTime because of a failure in .NET 2.0. // The failure is: you cannot give contentDisposition a DateTime with a Kind of UTC // It will set the CreationDate correctly, but when trying to read it out it will throw an exception. // It is the same with ModificationDate and ReadDate. // This is fixed in 4.0 - maybe in 3.0 too. // Therefore we create a new DateTime which have a DateTimeKind set to unspecified DateTime creationDate = new DateTime(Rfc2822DateTime.StringToDate(RemoveQuotesIfAny(value)).Ticks); contentDisposition.CreationDate = creationDate; break; case "MODIFICATION-DATE": DateTime midificationDate = new DateTime(Rfc2822DateTime.StringToDate(RemoveQuotesIfAny(value)).Ticks); contentDisposition.ModificationDate = midificationDate; break; case "READ-DATE": DateTime readDate = new DateTime(Rfc2822DateTime.StringToDate(RemoveQuotesIfAny(value)).Ticks); contentDisposition.ReadDate = readDate; break; case "SIZE": contentDisposition.Size = int.Parse(RemoveQuotesIfAny(value), CultureInfo.InvariantCulture); break; default: if (key.StartsWith("X-")) { contentDisposition.Parameters.Add(key, RemoveQuotesIfAny(value)); break; } throw new ArgumentException("Unknown parameter in Content-Disposition. Ask developer to fix! Parameter: " + key); } } return contentDisposition; } internal static string ParseId(string headerValue) { // Remove whitespace in front and behind since // whitespace is allowed there // Remove the last > and the first < return headerValue.Trim().TrimEnd('>').TrimStart('<'); } internal static List ParseMultipleIDs(string headerValue) { List returner = new List(); // Split the string by > // We cannot use ' ' (space) here since this is a possible value: // string[] ids = headerValue.Trim().Split(new[] { '>' }, StringSplitOptions.RemoveEmptyEntries); foreach (string id in ids) { returner.Add(ParseId(id)); } return returner; } internal static string RemoveQuotesIfAny(string text) { if (text == null) throw new ArgumentNullException("text"); // Check if there are qoutes at both ends if (text[0] == '"' && text[text.Length - 1] == '"') { // Remove quotes at both ends return text.Substring(1, text.Length - 2); } // If no quotes were found, the text is just returned return text; } } #endregion #region Rfc2231Decoder static class Rfc2231Decoder { internal static List> Decode(string toDecode) { if (toDecode == null) throw new ArgumentNullException("toDecode"); // Normalize the input to take account for missing semicolons after parameters. // Example // text/plain; charset=\"iso-8859-1\" name=\"somefile.txt\" or // text/plain;\tcharset=\"iso-8859-1\"\tname=\"somefile.txt\" // is normalized to // text/plain; charset=\"iso-8859-1\"; name=\"somefile.txt\" // Only works for parameters inside quotes // \s = matches whitespace toDecode = Regex.Replace(toDecode, "=\\s*\"(?[^\"]*)\"\\s", "=\"${value}\"; "); // Normalize // Since the above only works for parameters inside quotes, we need to normalize // the special case with the first parameter. // Example: // attachment filename="foo" // is normalized to // attachment; filename="foo" // ^ = matches start of line (when not inside square bracets []) toDecode = Regex.Replace(toDecode, @"^(?[^;\s]+)\s(?[^;\s]+)", "${first}; ${second}"); // Split by semicolon, but only if not inside quotes List splitted = SplitStringWithCharNotInsideQuotes(toDecode.Trim(), ';'); List> collection = new List>(splitted.Count); foreach (string part in splitted) { // Empty strings should not be processed if (part.Trim().Length == 0) continue; string[] keyValue = part.Trim().Split(new[] { '=' }, 2); if (keyValue.Length == 1) { collection.Add(new KeyValuePair("", keyValue[0])); } else if (keyValue.Length == 2) { collection.Add(new KeyValuePair(keyValue[0], keyValue[1])); } else { throw new ArgumentException("When splitting the part \"" + part + "\" by = there was " + keyValue.Length + " parts. Only 1 and 2 are supported"); } } return DecodePairs(collection); } internal static List SplitStringWithCharNotInsideQuotes(string input, char toSplitAt) { List elements = new List(); int lastSplitLocation = 0; bool insideQuote = false; char[] characters = input.ToCharArray(); for (int i = 0; i < characters.Length; i++) { char character = characters[i]; if (character == '\"') insideQuote = !insideQuote; // Only split if we are not inside quotes if (character == toSplitAt && !insideQuote) { // We need to split int length = i - lastSplitLocation; elements.Add(input.Substring(lastSplitLocation, length)); // Update last split location // + 1 so that we do not include the character used to split with next time lastSplitLocation = i + 1; } } // Add the last part elements.Add(input.Substring(lastSplitLocation, input.Length - lastSplitLocation)); return elements; } internal static List> DecodePairs(List> pairs) { if (pairs == null) throw new ArgumentNullException("pairs"); List> resultPairs = new List>(pairs.Count); int pairsCount = pairs.Count; for (int i = 0; i < pairsCount; i++) { KeyValuePair currentPair = pairs[i]; string key = currentPair.Key; string value = HeaderFieldParser.RemoveQuotesIfAny(currentPair.Value); // Is it a continuation parameter? (encoded or not) if (key.EndsWith("*0", StringComparison.OrdinalIgnoreCase) || key.EndsWith("*0*", StringComparison.OrdinalIgnoreCase)) { // This encoding will not be used if we get into the if which tells us // that the whole continuation is not encoded string encoding = "notEncoded - Value here is never used"; // Now lets find out if it is encoded too. if (key.EndsWith("*0*", StringComparison.OrdinalIgnoreCase)) { // It is encoded. // Fetch out the encoding for later use and decode the value // If the value was not encoded as the email specified // encoding will be set to null. This will be used later. value = DecodeSingleValue(value, out encoding); // Find the right key to use to store the full value // Remove the start *0 which tells is it is a continuation, and the first one // And remove the * afterwards which tells us it is encoded key = key.Replace("*0*", ""); } else { // It is not encoded, and no parts of the continuation is encoded either // Find the right key to use to store the full value // Remove the start *0 which tells is it is a continuation, and the first one key = key.Replace("*0", ""); } // The StringBuilder will hold the full decoded value from all continuation parts StringBuilder builder = new StringBuilder(); // Append the decoded value builder.Append(value); // Now go trough the next keys to see if they are part of the continuation for (int j = i + 1, continuationCount = 1; j < pairsCount; j++, continuationCount++) { string jKey = pairs[j].Key; string valueJKey = HeaderFieldParser.RemoveQuotesIfAny(pairs[j].Value); if (jKey.Equals(key + "*" + continuationCount)) { // This value part of the continuation is not encoded // Therefore remove qoutes if any and add to our stringbuilder builder.Append(valueJKey); // Remember to increment i, as we have now treated one more KeyValuePair i++; } else if (jKey.Equals(key + "*" + continuationCount + "*")) { // We will not get into this part if the first part was not encoded // Therefore the encoding will only be used if and only if the // first part was encoded, in which case we have remembered the encoding used // Sometimes an email creator says that a string was encoded, but it really // `was not. This is to catch that problem. if (encoding != null) { // This value part of the continuation is encoded // the encoding is not given in the current value, // but was given in the first continuation, which we remembered for use here valueJKey = DecodeSingleValue(valueJKey, encoding); } builder.Append(valueJKey); // Remember to increment i, as we have now treated one more KeyValuePair i++; } else { // No more keys for this continuation break; } } // Add the key and the full value as a pair value = builder.ToString(); resultPairs.Add(new KeyValuePair(key, value)); } else if (key.EndsWith("*", StringComparison.OrdinalIgnoreCase)) { // This parameter is only encoded - it is not part of a continuation // We need to change the key from "*" to "" and decode the value // To get the key we want, we remove the last * that denotes // that the value hold by the key was encoded key = key.Replace("*", ""); // Decode the value string throwAway; value = DecodeSingleValue(value, out throwAway); // Now input the new value with the new key resultPairs.Add(new KeyValuePair(key, value)); } else { // Fully normal key - the value is not encoded // Therefore nothing to do, and we can simply pass the pair // as being decoded now resultPairs.Add(currentPair); } } return resultPairs; } private static string DecodeSingleValue(string toDecode, out string encodingUsed) { if (toDecode == null) throw new ArgumentNullException("toDecode"); // Check if input has a part describing the encoding if (toDecode.IndexOf('\'') == -1) { // The input was not encoded (at least not valid) and it is returned as is //DefaultLogger.Log.LogDebug("Rfc2231Decoder: Someone asked me to decode a string which was not encoded - returning raw string. Input: " + toDecode); encodingUsed = null; return toDecode; } encodingUsed = toDecode.Substring(0, toDecode.IndexOf('\'')); toDecode = toDecode.Substring(toDecode.LastIndexOf('\'') + 1); return DecodeSingleValue(toDecode, encodingUsed); } private static string DecodeSingleValue(string valueToDecode, string encoding) { if (valueToDecode == null) throw new ArgumentNullException("valueToDecode"); if (encoding == null) throw new ArgumentNullException("encoding"); // The encoding used is the same as QuotedPrintable, we only // need to change % to = // And otherwise make it look like the correct EncodedWord encoding valueToDecode = "=?" + encoding + "?Q?" + valueToDecode.Replace("%", "=") + "?="; return EncodedWord.Decode(valueToDecode); } } #endregion #region Rfc2822DateTime static class Rfc2822DateTime { public static DateTime StringToDate(string inputDate) { if (inputDate == null) throw new ArgumentNullException("inputDate"); // Old date specification allows comments and a lot of whitespace inputDate = StripCommentsAndExcessWhitespace(inputDate); try { // Extract the DateTime DateTime dateTime = ExtractDateTime(inputDate); // If a day-name is specified in the inputDate string, check if it fits with the date ValidateDayNameIfAny(dateTime, inputDate); // Convert the date into UTC dateTime = new DateTime(dateTime.Ticks, DateTimeKind.Utc); // Adjust according to the time zone dateTime = AdjustTimezone(dateTime, inputDate); // Return the parsed date return dateTime; } catch (FormatException e) // Convert.ToDateTime() Failure { throw new ArgumentException("Could not parse date: " + e.Message + ". Input was: \"" + inputDate + "\"", e); } catch (ArgumentException e) { throw new ArgumentException("Could not parse date: " + e.Message + ". Input was: \"" + inputDate + "\"", e); } } private static DateTime AdjustTimezone(DateTime dateTime, string dateInput) { // We know that the timezones are always in the last part of the date input string[] parts = dateInput.Split(' '); string lastPart = parts[parts.Length - 1]; // Convert timezones in older formats to [+-]dddd format. lastPart = Regex.Replace(lastPart, @"UT|GMT|EST|EDT|CST|CDT|MST|MDT|PST|PDT|[A-I]|[K-Y]|Z", MatchEvaluator); // Find the timezone specification // Example: Fri, 21 Nov 1997 09:55:06 -0600 // finds -0600 Match match = Regex.Match(lastPart, @"[\+-](?\d\d)(?\d\d)"); if (match.Success) { // We have found that the timezone is in +dddd or -dddd format // Add the number of hours and minutes to our found date int hours = int.Parse(match.Groups["hours"].Value); int minutes = int.Parse(match.Groups["minutes"].Value); int factor = match.Value[0] == '+' ? -1 : 1; dateTime = dateTime.AddHours(factor * hours); dateTime = dateTime.AddMinutes(factor * minutes); return dateTime; } // A timezone of -0000 is the same as doing nothing return dateTime; } private static string MatchEvaluator(Match match) { if (!match.Success) { throw new ArgumentException("Match success are always true"); } switch (match.Value) { // "A" through "I" // are equivalent to "+0100" through "+0900" respectively case "A": return "+0100"; case "B": return "+0200"; case "C": return "+0300"; case "D": return "+0400"; case "E": return "+0500"; case "F": return "+0600"; case "G": return "+0700"; case "H": return "+0800"; case "I": return "+0900"; // "K", "L", and "M" // are equivalent to "+1000", "+1100", and "+1200" respectively case "K": return "+1000"; case "L": return "+1100"; case "M": return "+1200"; // "N" through "Y" // are equivalent to "-0100" through "-1200" respectively case "N": return "-0100"; case "O": return "-0200"; case "P": return "-0300"; case "Q": return "-0400"; case "R": return "-0500"; case "S": return "-0600"; case "T": return "-0700"; case "U": return "-0800"; case "V": return "-0900"; case "W": return "-1000"; case "X": return "-1100"; case "Y": return "-1200"; // "Z", "UT" and "GMT" // is equivalent to "+0000" case "Z": case "UT": case "GMT": return "+0000"; // US time zones case "EDT": return "-0400"; // EDT is semantically equivalent to -0400 case "EST": return "-0500"; // EST is semantically equivalent to -0500 case "CDT": return "-0500"; // CDT is semantically equivalent to -0500 case "CST": return "-0600"; // CST is semantically equivalent to -0600 case "MDT": return "-0600"; // MDT is semantically equivalent to -0600 case "MST": return "-0700"; // MST is semantically equivalent to -0700 case "PDT": return "-0700"; // PDT is semantically equivalent to -0700 case "PST": return "-0800"; // PST is semantically equivalent to -0800 default: throw new ArgumentException("Unexpected input"); } } private static DateTime ExtractDateTime(string dateInput) { // Matches the date and time part of a string // Example: Fri, 21 Nov 1997 09:55:06 -0600 // Finds: 21 Nov 1997 09:55:06 // Seconds does not need to be specified // Even though it is illigal, sometimes hours, minutes or seconds are only specified with one digit Match match = Regex.Match(dateInput, @"\d\d? .+ (\d\d\d\d|\d\d) \d?\d:\d?\d(:\d?\d)?"); if (match.Success) { return Convert.ToDateTime(match.Value, CultureInfo.InvariantCulture); } throw new InvalidDataException("The given date does not appear to be in a valid format: " + dateInput); //return DateTime.MinValue; } private static void ValidateDayNameIfAny(DateTime dateTime, string dateInput) { // Check if there is a day name in front of the date // Example: Fri, 21 Nov 1997 09:55:06 -0600 if (dateInput.Length >= 4 && dateInput[3] == ',') { string dayName = dateInput.Substring(0, 3); // If a dayName was specified. Check that the dateTime and the dayName // agrees on which day it is // This is just a failure-check and could be left out if ((dateTime.DayOfWeek == DayOfWeek.Monday && !dayName.Equals("Mon")) || (dateTime.DayOfWeek == DayOfWeek.Tuesday && !dayName.Equals("Tue")) || (dateTime.DayOfWeek == DayOfWeek.Wednesday && !dayName.Equals("Wed")) || (dateTime.DayOfWeek == DayOfWeek.Thursday && !dayName.Equals("Thu")) || (dateTime.DayOfWeek == DayOfWeek.Friday && !dayName.Equals("Fri")) || (dateTime.DayOfWeek == DayOfWeek.Saturday && !dayName.Equals("Sat")) || (dateTime.DayOfWeek == DayOfWeek.Sunday && !dayName.Equals("Sun"))) { throw new InvalidDataException("Day-name does not correspond to the weekday of the date: " + dateInput); } } // If no day name was found no checks can be made } private static string StripCommentsAndExcessWhitespace(string input) { // Strip out comments // Also strips out nested comments input = Regex.Replace(input, @"(\((?>\((?)|\)(?<-C>)|.?)*(?(C)(?!))\))", ""); // Reduce any whitespace character to one space only input = Regex.Replace(input, @"\s+", " "); // Remove all initial whitespace input = Regex.Replace(input, @"^\s+", ""); // Remove all ending whitespace input = Regex.Replace(input, @"\s+$", ""); // Remove spaces at colons // Example: 22: 33 : 44 => 22:33:44 input = Regex.Replace(input, @" ?: ?", ":"); return input; } } #endregion #region EncodedWord static class EncodedWord { public static string Decode(string encodedWords) { if (encodedWords == null) throw new ArgumentNullException("encodedWords"); // Notice that RFC2231 redefines the BNF to // encoded-word := "=?" charset ["*" language] "?" encoded-text "?=" // but no usage of this BNF have been spotted yet. It is here to // ease debugging if such a case is discovered. // This is the regex that should fit the BNF // RFC Says that NO WHITESPACE is allowed in this encoding, but there are examples // where whitespace is there, and therefore this regex allows for such. const string encodedWordRegex = @"\=\?(?\S+?)\?(?\w)\?(?.+?)\?\="; // \w Matches any word character including underscore. Equivalent to "[A-Za-z0-9_]". // \S Matches any nonwhite space character. Equivalent to "[^ \f\n\r\t\v]". // +? non-gready equivalent to + // (?REGEX) is a named group with name NAME and regular expression REGEX // Any amount of linear-space-white between 'encoded-word's, // even if it includes a CRLF followed by one or more SPACEs, // is ignored for the purposes of display. // http://tools.ietf.org/html/rfc2047#page-12 // Define a regular expression that captures two encoded words with some whitespace between them const string replaceRegex = @"(?" + encodedWordRegex + @")\s+(?" + encodedWordRegex + ")"; // Then, find an occourance of such an expression, but remove the whitespace inbetween when found encodedWords = Regex.Replace(encodedWords, replaceRegex, "${first}${second}"); string decodedWords = encodedWords; MatchCollection matches = Regex.Matches(encodedWords, encodedWordRegex); foreach (Match match in matches) { // If this match was not a success, we should not use it if (!match.Success) continue; string fullMatchValue = match.Value; string encodedText = match.Groups["Content"].Value; string encoding = match.Groups["Encoding"].Value; string charset = match.Groups["Charset"].Value; // Get the encoding which corrosponds to the character set Encoding charsetEncoding = EncodingFinder.FindEncoding(charset); // Store decoded text here when done string decodedText; // Encoding may also be written in lowercase switch (encoding.ToUpperInvariant()) { // RFC: // The "B" encoding is identical to the "BASE64" // encoding defined by RFC 2045. // http://tools.ietf.org/html/rfc2045#section-6.8 case "B": decodedText = charsetEncoding.GetString(Convert.FromBase64String(encodedText)); break; // RFC: // The "Q" encoding is similar to the "Quoted-Printable" content- // transfer-encoding defined in RFC 2045. // There are more details to this. Please check // http://tools.ietf.org/html/rfc2047#section-4.2 // case "Q": decodedText = QuotedPrintable.DecodeEncodedWord(encodedText, charsetEncoding); break; default: throw new ArgumentException("The encoding " + encoding + " was not recognized"); } // Repalce our encoded value with our decoded value decodedWords = decodedWords.Replace(fullMatchValue, decodedText); } return decodedWords; } } #endregion #region EncodingFinder static class EncodingFinder { public delegate Encoding FallbackDecoderDelegate(string characterSet); public static FallbackDecoderDelegate FallbackDecoder { private get; set; } private static Dictionary EncodingMap { get; set; } static EncodingFinder() { Reset(); } internal static void Reset() { EncodingMap = new Dictionary(); FallbackDecoder = null; // Some emails incorrectly specify the encoding as utf8, but it should have been utf-8. AddMapping("utf8", Encoding.UTF8); } internal static Encoding FindEncoding(string characterSet) { if (characterSet == null) throw new ArgumentNullException("characterSet"); string charSetUpper = characterSet.ToUpperInvariant(); // Check if the characterSet is explicitly mapped to an encoding if (EncodingMap.ContainsKey(charSetUpper)) return EncodingMap[charSetUpper]; // Try to find the generally find the encoding try { if (charSetUpper.Contains("WINDOWS") || charSetUpper.Contains("CP")) { // It seems the characterSet contains an codepage value, which we should use to parse the encoding charSetUpper = charSetUpper.Replace("CP", ""); // Remove cp charSetUpper = charSetUpper.Replace("WINDOWS", ""); // Remove windows charSetUpper = charSetUpper.Replace("-", ""); // Remove - which could be used as cp-1554 // Now we hope the only thing left in the characterSet is numbers. int codepageNumber = int.Parse(charSetUpper, CultureInfo.InvariantCulture); return Encoding.GetEncoding(codepageNumber); } // It seems there is no codepage value in the characterSet. It must be a named encoding return Encoding.GetEncoding(characterSet); } catch (ArgumentException) { // The encoding could not be found generally. // Try to use the FallbackDecoder if it is defined. // Check if it is defined if (FallbackDecoder == null) throw; // It was not defined - throw catched exception // Use the FallbackDecoder Encoding fallbackDecoderResult = FallbackDecoder(characterSet); // Check if the FallbackDecoder had a solution if (fallbackDecoderResult != null) return fallbackDecoderResult; // If no solution was found, throw catched exception throw; } } public static void AddMapping(string characterSet, Encoding encoding) { if (characterSet == null) throw new ArgumentNullException("characterSet"); if (encoding == null) throw new ArgumentNullException("encoding"); // Add the mapping using uppercase EncodingMap.Add(characterSet.ToUpperInvariant(), encoding); } } #endregion #region HeaderExtractor static class HeaderExtractor { private static int FindHeaderEndPosition(byte[] messageContent) { // Convert the byte array into a stream using (Stream stream = new MemoryStream(messageContent)) { while (true) { // Read a line from the stream. We know headers are in US-ASCII // therefore it is not problem to read them as such string line = MimeMessagePart.ReadLineAsAscii(stream); // The end of headers is signaled when a blank line is found // or if the line is null - in which case the email is actually an email with // only headers but no body if (string.IsNullOrEmpty(line)) return (int)stream.Position; } } } public static void ExtractHeadersAndBody(byte[] fullRawMessage, out MessageHeader headers, out byte[] body) { if (fullRawMessage == null) throw new ArgumentNullException("fullRawMessage"); // Find the end location of the headers int endOfHeaderLocation = FindHeaderEndPosition(fullRawMessage); // The headers are always in ASCII - therefore we can convert the header part into a string // using US-ASCII encoding string headersString = Encoding.ASCII.GetString(fullRawMessage, 0, endOfHeaderLocation); // Now parse the headers to a NameValueCollection NameValueCollection headersUnparsedCollection = ExtractHeaders(headersString); // Use the NameValueCollection to parse it into a strongly-typed MessageHeader header headers = new MessageHeader(headersUnparsedCollection); // Since we know where the headers end, we also know where the body is // Copy the body part into the body parameter body = new byte[fullRawMessage.Length - endOfHeaderLocation]; Array.Copy(fullRawMessage, endOfHeaderLocation, body, 0, body.Length); } private static NameValueCollection ExtractHeaders(string messageContent) { if (messageContent == null) throw new ArgumentNullException("messageContent"); NameValueCollection headers = new NameValueCollection(); using (StringReader messageReader = new StringReader(messageContent)) { // Read until all headers have ended. // The headers ends when an empty line is encountered // An empty message might actually not have an empty line, in which // case the headers end with null value. string line; while (!string.IsNullOrEmpty(line = messageReader.ReadLine())) { // Split into name and value KeyValuePair header = SeparateHeaderNameAndValue(line); // First index is header name string headerName = header.Key; // Second index is the header value. // Use a StringBuilder since the header value may be continued on the next line StringBuilder headerValue = new StringBuilder(header.Value); // Keep reading until we would hit next header // This if for handling multi line headers while (IsMoreLinesInHeaderValue(messageReader)) { // Unfolding is accomplished by simply removing any CRLF // that is immediately followed by WSP // This was done using ReadLine (it discards CRLF) // See http://tools.ietf.org/html/rfc822#section-3.1.1 for more information string moreHeaderValue = messageReader.ReadLine(); // If this exception is ever raised, there is an serious algorithm failure // IsMoreLinesInHeaderValue does not return true if the next line does not exist // This check is only included to stop the nagging "possibly null" code analysis hint if (moreHeaderValue == null) throw new ArgumentException("This will never happen"); // Simply append the line just read to the header value headerValue.Append(moreHeaderValue); } // Now we have the name and full value. Add it headers.Add(headerName, headerValue.ToString()); } } return headers; } private static bool IsMoreLinesInHeaderValue(TextReader reader) { int peek = reader.Peek(); if (peek == -1) return false; char peekChar = (char)peek; // A multi line header must have a whitespace character // on the next line if it is to be continued return peekChar == ' ' || peekChar == '\t'; } internal static KeyValuePair SeparateHeaderNameAndValue(string rawHeader) { if (rawHeader == null) throw new ArgumentNullException("rawHeader"); string key = string.Empty; string value = string.Empty; int indexOfColon = rawHeader.IndexOf(':'); // Check if it is allowed to make substring calls if (indexOfColon >= 0 && rawHeader.Length >= indexOfColon + 1) { key = rawHeader.Substring(0, indexOfColon).Trim(); value = rawHeader.Substring(indexOfColon + 1).Trim(); } return new KeyValuePair(key, value); } } #endregion } #endregion #region ContentTransferEncoding /// /// that describes the ContentTransferEncoding header field /// /// See RFC 2045 section 6 for more details public enum ContentTransferEncoding { /// /// 7 bit Encoding /// SevenBit, /// /// 8 bit Encoding /// EightBit, /// /// Quoted Printable Encoding /// QuotedPrintable, /// /// Base64 Encoding /// Base64, /// /// Binary Encoding /// Binary } #endregion }