#region Copyright 2010-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0
/* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#endregion
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Globalization;
using System.Text;
namespace CSharpTest.Net.Data
{
///
/// Options that define formatting of the CSV file
///
public enum CsvOptions
{
/// No options defined
None = 0,
/// The first line contains the names of the fields
HasFieldHeaders = 1,
}
///
/// Provides an interface to CSV/Tab delimited text files.
///
public class CsvReader : IDataReader
{
readonly Dictionary _fieldNames;
readonly TextReader _reader;
readonly CsvOptions _options;
readonly IFormatProvider _formatting;
readonly char _delim;
readonly int _depth;
int _recordCount;
bool _closed;
string[] _currentFields;
/// Constructs the CSV reader for the provided text reader
/// The text reader to read from
/// Options for parsing the text
/// The character used to delineate fields
/// The format provided used for interpreting numbers and dates
/// Provides for nested CSV parsers
protected CsvReader(TextReader reader, CsvOptions options, char fieldDelim, IFormatProvider formatter, int depth)
{
_fieldNames = new Dictionary(StringComparer.OrdinalIgnoreCase);
_delim = fieldDelim;
_reader = reader;
_options = options;
_formatting = formatter;
_depth = depth;
_recordCount = 0;
_closed = false;
_currentFields = new string[0];
ReadHeader();
}
#region ctor overloads
/// Constructs the CSV reader for the provided text reader
/// The text reader to read from
/// Options for parsing the text
/// The character used to delineate fields
/// The format provided used for interpreting numbers and dates
public CsvReader(TextReader reader, CsvOptions options, char fieldDelim, IFormatProvider formatter)
: this(reader, options, fieldDelim, formatter, 0)
{ }
/// Constructs the CSV reader for the provided text reader
/// The text file to read from
public CsvReader(string inputFile)
: this(inputFile, CsvOptions.HasFieldHeaders)
{ }
/// Constructs the CSV reader for the provided text reader
/// The text file to read from
/// Options for parsing the text
public CsvReader(string inputFile, CsvOptions options)
: this(inputFile, options, ',', CultureInfo.CurrentCulture)
{ }
/// Constructs the CSV reader for the provided text reader
/// The text file to read from
/// Options for parsing the text
/// The character used to delineate fields
/// The format provided used for interpreting numbers and dates
public CsvReader(string inputFile, CsvOptions options, char fieldDelim, IFormatProvider formatter)
: this(new StreamReader(File.Open(inputFile, FileMode.Open, FileAccess.Read, FileShare.Read)), options, fieldDelim, formatter)
{ }
/// Constructs the CSV reader for the provided text reader
/// The text reader to read from
public CsvReader(TextReader reader)
: this(reader, CsvOptions.HasFieldHeaders)
{ }
/// Constructs the CSV reader for the provided text reader
/// The text reader to read from
/// Options for parsing the text
public CsvReader(TextReader reader, CsvOptions options)
: this(reader, options, ',', CultureInfo.CurrentCulture)
{ }
#endregion
///
/// Disposes of the reader
///
public void Dispose()
{
Close();
}
///
/// Closes the csv reader and disposes the underlying text reader
///
public void Close()
{
_recordCount = -1;
_currentFields = new string[0];
_closed = true;
_reader.Dispose();
}
///
/// Returns true if the reader has been closed
///
public bool IsClosed
{
get { return _closed; }
}
///
/// Returns the depth (zero based) of the reader when using nested CSV parsers
///
public int Depth
{
get { return _depth; }
}
bool IDataReader.NextResult()
{
Close();
return false;
}
///
/// Provides a single-record parser of CSV content
///
public static string[] ReadCsvLine(TextReader reader, Char delim)
{
bool pending = false;
char[] newline = Environment.NewLine.ToCharArray();
const char quote = '"';
List fields = new List();
StringBuilder sbField = new StringBuilder();
int next;
while (-1 != (next = reader.Read()))
{
Char ch = (Char)next;
if (ch == delim || ch == newline[0])
{
pending = ch == delim;
fields.Add(sbField.ToString());
sbField.Length = 0;
}
if (ch == newline[0])
break;//end of line
if (ch == delim || Char.IsWhiteSpace(ch))
continue;
if (ch == quote)
{
while (true)
{
ReadUntil(reader, sbField, quote, (char)0xFFFF);
reader.Read();
if (reader.Peek() == quote)
{
sbField.Append((Char)reader.Read());
continue;
}
else
break;
}
}
else
{
pending = true;
sbField.Append(ch);
ReadUntil(reader, sbField, delim, newline[0]);
int lastws = sbField.Length;
while (lastws > 0 && Char.IsWhiteSpace(sbField[lastws - 1]))
lastws--;
if (lastws != sbField.Length)
sbField.Length = lastws;
}
}
if (pending)
{
fields.Add(sbField.ToString());
sbField.Length = 0;
}
return fields.ToArray();
}
static void ReadUntil(TextReader reader, StringBuilder sb, char stop1, char stop2)
{
int ch = reader.Peek();
while (ch != -1 && ch != stop1 && ch != stop2)
{
sb.Append((Char)reader.Read());
ch = reader.Peek();
}
}
void ReadHeader()
{
if ((_options & CsvOptions.HasFieldHeaders) == CsvOptions.HasFieldHeaders)
{
string[] lineText = ReadCsvLine(_reader, _delim);
for (int i = 0; i < lineText.Length; i++)
_fieldNames[lineText[i]] = i;
}
}
///
/// Advances the to the next record.
///
public bool Read()
{
string[] lineText = ReadCsvLine(_reader, _delim);
if (lineText.Length == 0 && _reader.Peek() == -1)
return false;
if (lineText.Length < _fieldNames.Count)
Array.Resize(ref lineText, _fieldNames.Count);
_recordCount++;
_currentFields = lineText;
return true;
}
///
/// Returns the current record number of the parser
///
public int RecordsAffected
{
get { return _recordCount; }
}
///
/// Returns the number of fields defined in this record
///
public int FieldCount
{
get { return Math.Max(_fieldNames.Count, _currentFields.Length); }
}
string IDataRecord.GetDataTypeName(int i)
{
return GetFieldType(i).Name;
}
///
/// Returns typeof(String)
///
public Type GetFieldType(int i)
{
return typeof(String);
}
///
/// Returns a DataTable which defines the columns in this CSV file
///
public DataTable GetSchemaTable()
{
DataTable dt = new DataTable();
for (int i = 0; i < FieldCount; i++)
dt.Columns.Add(new DataColumn(GetName(i), GetFieldType(i)));
return dt;
}
///
/// Returns the name of the column by ordinal
///
public string GetName(int i)
{
Check.InRange(i, 0, FieldCount - 1);
foreach (KeyValuePair kv in _fieldNames)
if (kv.Value == i)
return kv.Key;
return i.ToString();
}
///
/// Returns the ordinal of the column by name
///
public int GetOrdinal(string name)
{
int value;
if (_fieldNames.TryGetValue(name, out value))
{
return value;
}
if (int.TryParse(name, out value))
{
if (value >= _fieldNames.Count && value < _currentFields.Length)
return value;
}
throw new ArgumentOutOfRangeException();
}
///
/// Returns the string content of the field by name
///
public object this[string name]
{
get { return GetValue(GetOrdinal(name)); }
}
///
/// Returns the string content of the field by ordinal
///
public object this[int i]
{
get { return GetValue(i); }
}
///
/// Returns the string content of the field by ordinal
///
public object GetValue(int i)
{
return _currentFields[i];
}
///
/// Returns an object[] containing all the strings for the current record.
///
public object[] GetValues()
{
object[] values = new object[_currentFields.Length];
GetValues(values);
return values;
}
///
/// Fills an object[] with all the strings for the current record.
///
public int GetValues(object[] values)
{
_currentFields.CopyTo(values, 0);
return _currentFields.Length;
}
///
/// Gets the string value of the specified field.
///
public string GetString(string name)
{
return (string)_currentFields[GetOrdinal(name)];
}
///
/// Gets the string value of the specified field.
///
public string GetString(int i)
{
return (string)_currentFields[i];
}
///
/// Return whether the specified field is set to null.
///
public bool IsDBNull(int i)
{
return _currentFields[i] == null;
}
///
/// Gets the value of the specified column as a Boolean.
///
public bool GetBoolean(int i)
{
return bool.Parse(GetString(i));
}
///
/// Gets the 8-bit unsigned integer value of the specified column.
///
public byte GetByte(int i)
{
return byte.Parse(GetString(i), _formatting);
}
///
/// Reads a stream of bytes from the specified column offset into the buffer as an array, starting at the given buffer offset.
///
public long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length)
{
string hex = GetString(i);
int ordinal = 0;
for (int chPos = (int)(fieldOffset * 2); chPos < hex.Length && ordinal < length; chPos += 2, ordinal++)
buffer[bufferoffset + ordinal] = byte.Parse(hex.Substring(chPos, 2), NumberStyles.AllowHexSpecifier, _formatting);
return ordinal;
}
///
/// Gets the character value of the specified column.
///
public char GetChar(int i)
{
return GetString(i)[0];
}
///
/// Reads a stream of characters from the specified column offset into the buffer as an array, starting at the given buffer offset.
///
public long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length)
{
string chars = GetString(i);
length = Math.Min(chars.Length - (int)fieldoffset, length);
Array.Copy(chars.ToCharArray(), fieldoffset, buffer, bufferoffset, length);
return length;
}
///
/// Returns a for the specified column ordinal.
///
public IDataReader GetData(int i)
{
return new CsvReader(new StringReader(GetString(i)), _options, _delim, _formatting, _depth + 1);
}
///
/// Gets the date and time data value of the specified field.
///
public DateTime GetDateTime(int i)
{
return DateTime.Parse(GetString(i), _formatting);
}
///
/// Gets the fixed-position numeric value of the specified field.
///
public decimal GetDecimal(int i)
{
return decimal.Parse(GetString(i), _formatting);
}
///
/// Gets the double-precision floating point number of the specified field.
///
public double GetDouble(int i)
{
return double.Parse(GetString(i), _formatting);
}
///
/// Gets the single-precision floating point number of the specified field.
///
public float GetFloat(int i)
{
return float.Parse(GetString(i), _formatting);
}
///
/// Returns the GUID value of the specified field.
///
public Guid GetGuid(int i)
{
return new Guid(GetString(i));
}
///
/// Gets the 16-bit signed integer value of the specified field.
///
public short GetInt16(int i)
{
return short.Parse(GetString(i), _formatting);
}
///
/// Gets the 32-bit signed integer value of the specified field.
///
public int GetInt32(int i)
{
return int.Parse(GetString(i), _formatting);
}
///
/// Gets the 64-bit signed integer value of the specified field.
///
public long GetInt64(int i)
{
return long.Parse(GetString(i), _formatting);
}
}
}