#region Copyright 2010-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0 /* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #endregion using System; using System.Collections.Generic; using System.Data; using System.IO; using System.Globalization; using System.Text; namespace CSharpTest.Net.Data { /// /// Options that define formatting of the CSV file /// public enum CsvOptions { /// No options defined None = 0, /// The first line contains the names of the fields HasFieldHeaders = 1, } /// /// Provides an interface to CSV/Tab delimited text files. /// public class CsvReader : IDataReader { readonly Dictionary _fieldNames; readonly TextReader _reader; readonly CsvOptions _options; readonly IFormatProvider _formatting; readonly char _delim; readonly int _depth; int _recordCount; bool _closed; string[] _currentFields; /// Constructs the CSV reader for the provided text reader /// The text reader to read from /// Options for parsing the text /// The character used to delineate fields /// The format provided used for interpreting numbers and dates /// Provides for nested CSV parsers protected CsvReader(TextReader reader, CsvOptions options, char fieldDelim, IFormatProvider formatter, int depth) { _fieldNames = new Dictionary(StringComparer.OrdinalIgnoreCase); _delim = fieldDelim; _reader = reader; _options = options; _formatting = formatter; _depth = depth; _recordCount = 0; _closed = false; _currentFields = new string[0]; ReadHeader(); } #region ctor overloads /// Constructs the CSV reader for the provided text reader /// The text reader to read from /// Options for parsing the text /// The character used to delineate fields /// The format provided used for interpreting numbers and dates public CsvReader(TextReader reader, CsvOptions options, char fieldDelim, IFormatProvider formatter) : this(reader, options, fieldDelim, formatter, 0) { } /// Constructs the CSV reader for the provided text reader /// The text file to read from public CsvReader(string inputFile) : this(inputFile, CsvOptions.HasFieldHeaders) { } /// Constructs the CSV reader for the provided text reader /// The text file to read from /// Options for parsing the text public CsvReader(string inputFile, CsvOptions options) : this(inputFile, options, ',', CultureInfo.CurrentCulture) { } /// Constructs the CSV reader for the provided text reader /// The text file to read from /// Options for parsing the text /// The character used to delineate fields /// The format provided used for interpreting numbers and dates public CsvReader(string inputFile, CsvOptions options, char fieldDelim, IFormatProvider formatter) : this(new StreamReader(File.Open(inputFile, FileMode.Open, FileAccess.Read, FileShare.Read)), options, fieldDelim, formatter) { } /// Constructs the CSV reader for the provided text reader /// The text reader to read from public CsvReader(TextReader reader) : this(reader, CsvOptions.HasFieldHeaders) { } /// Constructs the CSV reader for the provided text reader /// The text reader to read from /// Options for parsing the text public CsvReader(TextReader reader, CsvOptions options) : this(reader, options, ',', CultureInfo.CurrentCulture) { } #endregion /// /// Disposes of the reader /// public void Dispose() { Close(); } /// /// Closes the csv reader and disposes the underlying text reader /// public void Close() { _recordCount = -1; _currentFields = new string[0]; _closed = true; _reader.Dispose(); } /// /// Returns true if the reader has been closed /// public bool IsClosed { get { return _closed; } } /// /// Returns the depth (zero based) of the reader when using nested CSV parsers /// public int Depth { get { return _depth; } } bool IDataReader.NextResult() { Close(); return false; } /// /// Provides a single-record parser of CSV content /// public static string[] ReadCsvLine(TextReader reader, Char delim) { bool pending = false; char[] newline = Environment.NewLine.ToCharArray(); const char quote = '"'; List fields = new List(); StringBuilder sbField = new StringBuilder(); int next; while (-1 != (next = reader.Read())) { Char ch = (Char)next; if (ch == delim || ch == newline[0]) { pending = ch == delim; fields.Add(sbField.ToString()); sbField.Length = 0; } if (ch == newline[0]) break;//end of line if (ch == delim || Char.IsWhiteSpace(ch)) continue; if (ch == quote) { while (true) { ReadUntil(reader, sbField, quote, (char)0xFFFF); reader.Read(); if (reader.Peek() == quote) { sbField.Append((Char)reader.Read()); continue; } else break; } } else { pending = true; sbField.Append(ch); ReadUntil(reader, sbField, delim, newline[0]); int lastws = sbField.Length; while (lastws > 0 && Char.IsWhiteSpace(sbField[lastws - 1])) lastws--; if (lastws != sbField.Length) sbField.Length = lastws; } } if (pending) { fields.Add(sbField.ToString()); sbField.Length = 0; } return fields.ToArray(); } static void ReadUntil(TextReader reader, StringBuilder sb, char stop1, char stop2) { int ch = reader.Peek(); while (ch != -1 && ch != stop1 && ch != stop2) { sb.Append((Char)reader.Read()); ch = reader.Peek(); } } void ReadHeader() { if ((_options & CsvOptions.HasFieldHeaders) == CsvOptions.HasFieldHeaders) { string[] lineText = ReadCsvLine(_reader, _delim); for (int i = 0; i < lineText.Length; i++) _fieldNames[lineText[i]] = i; } } /// /// Advances the to the next record. /// public bool Read() { string[] lineText = ReadCsvLine(_reader, _delim); if (lineText.Length == 0 && _reader.Peek() == -1) return false; if (lineText.Length < _fieldNames.Count) Array.Resize(ref lineText, _fieldNames.Count); _recordCount++; _currentFields = lineText; return true; } /// /// Returns the current record number of the parser /// public int RecordsAffected { get { return _recordCount; } } /// /// Returns the number of fields defined in this record /// public int FieldCount { get { return Math.Max(_fieldNames.Count, _currentFields.Length); } } string IDataRecord.GetDataTypeName(int i) { return GetFieldType(i).Name; } /// /// Returns typeof(String) /// public Type GetFieldType(int i) { return typeof(String); } /// /// Returns a DataTable which defines the columns in this CSV file /// public DataTable GetSchemaTable() { DataTable dt = new DataTable(); for (int i = 0; i < FieldCount; i++) dt.Columns.Add(new DataColumn(GetName(i), GetFieldType(i))); return dt; } /// /// Returns the name of the column by ordinal /// public string GetName(int i) { Check.InRange(i, 0, FieldCount - 1); foreach (KeyValuePair kv in _fieldNames) if (kv.Value == i) return kv.Key; return i.ToString(); } /// /// Returns the ordinal of the column by name /// public int GetOrdinal(string name) { int value; if (_fieldNames.TryGetValue(name, out value)) { return value; } if (int.TryParse(name, out value)) { if (value >= _fieldNames.Count && value < _currentFields.Length) return value; } throw new ArgumentOutOfRangeException(); } /// /// Returns the string content of the field by name /// public object this[string name] { get { return GetValue(GetOrdinal(name)); } } /// /// Returns the string content of the field by ordinal /// public object this[int i] { get { return GetValue(i); } } /// /// Returns the string content of the field by ordinal /// public object GetValue(int i) { return _currentFields[i]; } /// /// Returns an object[] containing all the strings for the current record. /// public object[] GetValues() { object[] values = new object[_currentFields.Length]; GetValues(values); return values; } /// /// Fills an object[] with all the strings for the current record. /// public int GetValues(object[] values) { _currentFields.CopyTo(values, 0); return _currentFields.Length; } /// /// Gets the string value of the specified field. /// public string GetString(string name) { return (string)_currentFields[GetOrdinal(name)]; } /// /// Gets the string value of the specified field. /// public string GetString(int i) { return (string)_currentFields[i]; } /// /// Return whether the specified field is set to null. /// public bool IsDBNull(int i) { return _currentFields[i] == null; } /// /// Gets the value of the specified column as a Boolean. /// public bool GetBoolean(int i) { return bool.Parse(GetString(i)); } /// /// Gets the 8-bit unsigned integer value of the specified column. /// public byte GetByte(int i) { return byte.Parse(GetString(i), _formatting); } /// /// Reads a stream of bytes from the specified column offset into the buffer as an array, starting at the given buffer offset. /// public long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length) { string hex = GetString(i); int ordinal = 0; for (int chPos = (int)(fieldOffset * 2); chPos < hex.Length && ordinal < length; chPos += 2, ordinal++) buffer[bufferoffset + ordinal] = byte.Parse(hex.Substring(chPos, 2), NumberStyles.AllowHexSpecifier, _formatting); return ordinal; } /// /// Gets the character value of the specified column. /// public char GetChar(int i) { return GetString(i)[0]; } /// /// Reads a stream of characters from the specified column offset into the buffer as an array, starting at the given buffer offset. /// public long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length) { string chars = GetString(i); length = Math.Min(chars.Length - (int)fieldoffset, length); Array.Copy(chars.ToCharArray(), fieldoffset, buffer, bufferoffset, length); return length; } /// /// Returns a for the specified column ordinal. /// public IDataReader GetData(int i) { return new CsvReader(new StringReader(GetString(i)), _options, _delim, _formatting, _depth + 1); } /// /// Gets the date and time data value of the specified field. /// public DateTime GetDateTime(int i) { return DateTime.Parse(GetString(i), _formatting); } /// /// Gets the fixed-position numeric value of the specified field. /// public decimal GetDecimal(int i) { return decimal.Parse(GetString(i), _formatting); } /// /// Gets the double-precision floating point number of the specified field. /// public double GetDouble(int i) { return double.Parse(GetString(i), _formatting); } /// /// Gets the single-precision floating point number of the specified field. /// public float GetFloat(int i) { return float.Parse(GetString(i), _formatting); } /// /// Returns the GUID value of the specified field. /// public Guid GetGuid(int i) { return new Guid(GetString(i)); } /// /// Gets the 16-bit signed integer value of the specified field. /// public short GetInt16(int i) { return short.Parse(GetString(i), _formatting); } /// /// Gets the 32-bit signed integer value of the specified field. /// public int GetInt32(int i) { return int.Parse(GetString(i), _formatting); } /// /// Gets the 64-bit signed integer value of the specified field. /// public long GetInt64(int i) { return long.Parse(GetString(i), _formatting); } } }