#region Copyright 2012-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0
/* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#endregion
using System;
using System.IO;
using System.Collections.Generic;
using CSharpTest.Net.IO;
using CSharpTest.Net.Serialization;
namespace CSharpTest.Net.Collections
{
/// Defines how duplicate keys are handled
public enum DuplicateHandling
{
/// Do nothing and pass-through all duplicates
None = 0,
/// Remove all but the first item of duplicates
FirstValueWins,
/// Remove all but the last item of duplicates
LastValueWins,
/// Throw an error on duplicates
RaisesException,
}
///
/// Creates an ordered enumeration from an unordered enumeration by paginating the data, sorting the page,
/// and then performing a binary-tree grouped mergesort on the resulting pages. When the page size (memoryLimit)
/// is hit, the page will be unloaded to disk and restored on demand if a serializer is provided.
///
public class OrderedEnumeration : IEnumerable
{
private const int DefaultLimit = 0x10000;
private const int LimitMax = int.MaxValue;
private readonly IEnumerable _unordered;
private IComparer _comparer;
private ISerializer _serializer;
private int _memoryLimit;
private DuplicateHandling _duplicateHandling;
private bool _enumerated;
/// Constructs an ordered enumeration from an unordered enumeration
public OrderedEnumeration(IEnumerable unordered) : this(Comparer.Default, unordered, null, DefaultLimit) { }
/// Constructs an ordered enumeration from an unordered enumeration
public OrderedEnumeration(IComparer comparer, IEnumerable unordered) : this(comparer, unordered, null, DefaultLimit) { }
/// Constructs an ordered enumeration from an unordered enumeration
public OrderedEnumeration(IComparer comparer, IEnumerable unordered, ISerializer serializer) : this(comparer, unordered, serializer, DefaultLimit) { }
/// Constructs an ordered enumeration from an unordered enumeration
public OrderedEnumeration(IComparer comparer, IEnumerable unordered, ISerializer serializer, int memoryLimit)
{
_enumerated = false;
_comparer = comparer;
_unordered = unordered;
_serializer = serializer;
_memoryLimit = Check.InRange(memoryLimit, 1, LimitMax);
_duplicateHandling = DuplicateHandling.None;
}
///
/// Gets or sets the comparer to use when ordering the items.
///
public IComparer Comparer
{
get { return _comparer; }
set { _comparer = value ?? Comparer.Default; }
}
///
/// Gets or sets the serializer to use when paging to disk.
///
public ISerializer Serializer
{
get { return _serializer; }
set { _serializer = value; }
}
///
/// Gets or sets the number of instances to keep in memory before sorting/paging to disk.
///
/// You must specify the Serializer before setting this property
public int InMemoryLimit
{
get { return _memoryLimit; }
set { _memoryLimit = Check.InRange(value, 1, LimitMax); }
}
/// Gets or sets the duplicate item handling policy
public DuplicateHandling DuplicateHandling
{
get { return _duplicateHandling; }
set { _duplicateHandling = value; }
}
///
/// Returns an enumerator that iterates through the collection.
///
/// GetEnumerator() may only be called once.
/// Enumeration is out of sequence.
/// Duplicate item in enumeration.
public IEnumerator GetEnumerator()
{
if (_enumerated)
throw new InvalidOperationException();
_enumerated = true;
return new OrderedEnumerator(PagedAndOrdered(), _comparer, _duplicateHandling);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{ return GetEnumerator(); }
private IEnumerable PagedAndOrdered()
{
T[] items = new T[Math.Min(InMemoryLimit, 2048)];
using (DisposingList resources = new DisposingList())
{
List> orderedSet = new List>();
int count = 0;
foreach (T item in _unordered)
{
if (_memoryLimit > 0 && count == _memoryLimit)
{
if (_serializer != null)
{
TempFile temp = new TempFile();
resources.Add(temp);
Stream io;
resources.Add(io = new FileStream(temp.TempPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read));
MergeSort.Sort(items, _comparer);
foreach (T i in items)
_serializer.WriteTo(i, io);
io.Position = 0;
orderedSet.Add(Read(temp, io));
}
else
{
T[] copy;
MergeSort.Sort(items, out copy, 0, items.Length, _comparer);
orderedSet.Add(items);
items = copy;
}
Array.Clear(items, 0, items.Length);
count = 0;
}
if (count == items.Length)
Array.Resize(ref items, Math.Min(InMemoryLimit, items.Length * 2));
items[count++] = item;
}
if (count != items.Length)
Array.Resize(ref items, count);
MergeSort.Sort(items, _comparer);
IEnumerable result;
if (orderedSet.Count == 0)
result = items;
else
{
orderedSet.Add(items);
result = Merge(_comparer, orderedSet.ToArray());
}
foreach (T item in result)
yield return item;
}
}
private IEnumerable Read(TempFile file, Stream io)
{
using (file)
using (io)
{
for (int i = 0; i < _memoryLimit; i++)
yield return _serializer.ReadFrom(io);
}
}
#region static Merge(...)
///
/// Merges two ordered enumerations based on the comparer provided.
///
public static IEnumerable Merge(IComparer comparer, IEnumerable x, IEnumerable y)
{
using(IEnumerator left = x.GetEnumerator())
using(IEnumerator right = y.GetEnumerator())
{
bool lvalid = left.MoveNext();
bool rvalid = right.MoveNext();
while(lvalid || rvalid)
{
int cmp = !rvalid ? -1 : !lvalid ? 1 : comparer.Compare(left.Current, right.Current);
if (cmp <= 0)
{
yield return left.Current;
lvalid = left.MoveNext();
}
else
{
yield return right.Current;
rvalid = right.MoveNext();
}
}
}
}
///
/// Merges n-number of ordered enumerations based on the default comparer of T.
///
public static IEnumerable Merge(params IEnumerable[] enums)
{
return Merge(Comparer.Default, 0, enums.Length, enums);
}
///
/// Merges n-number of ordered enumerations based on the comparer provided.
///
public static IEnumerable Merge(IComparer comparer, params IEnumerable[] enums)
{
return Merge(comparer, 0, enums.Length, enums);
}
///
/// Merges n-number of ordered enumerations based on the comparer provided.
///
public static IEnumerable Merge(IComparer comparer, DuplicateHandling duplicateHandling, params IEnumerable[] enums)
{
return WithDuplicateHandling(Merge(comparer, enums), comparer, duplicateHandling);
}
private static IEnumerable Merge(IComparer comparer, int start, int count, IEnumerable[] enums)
{
if (count <= 0)
return new T[0];
if (count == 1)
return enums[start];
if (count == 2)
return Merge(comparer, enums[start], enums[start + 1]);
int half = count/2;
return Merge(comparer,
Merge(comparer, start, half, enums),
Merge(comparer, start + half, count - half, enums)
);
}
#endregion
private class OrderedEnumerator : IEnumerator
{
private readonly IEnumerable _ordered;
private IEnumerator _enumerator;
private readonly IComparer _comparer;
private readonly DuplicateHandling _duplicateHandling;
private bool _isValid, _hasNext, _isFirst;
private T _current;
private T _next;
public OrderedEnumerator(
IEnumerable enumerator,
IComparer comparer,
DuplicateHandling duplicateHandling)
{
_ordered = enumerator;
_enumerator = null;
_comparer = comparer;
_duplicateHandling = duplicateHandling;
_isFirst = true;
}
public void Dispose()
{
if (_enumerator != null)
_enumerator.Dispose();
_enumerator = null;
}
public bool MoveNext()
{
if (_isFirst)
{
_isFirst = false;
_enumerator = _ordered.GetEnumerator();
_hasNext = _enumerator.MoveNext();
if (_hasNext)
_next = _enumerator.Current;
}
_isValid = _hasNext;
_current = _next;
if (!_isValid)
return false;
// ReSharper disable RedundantBoolCompare
while ((_hasNext = _enumerator.MoveNext()) == true)
// ReSharper restore RedundantBoolCompare
{
_next = _enumerator.Current;
int cmp = _comparer.Compare(_current, _next);
if (cmp > 0)
throw new InvalidDataException("Enumeration out of sequence.");
if (cmp != 0 || _duplicateHandling == DuplicateHandling.None)
break;
if (_duplicateHandling == DuplicateHandling.RaisesException)
throw new ArgumentException("Duplicate item in enumeration.");
if (_duplicateHandling == DuplicateHandling.LastValueWins)
_current = _next;
}
if (!_hasNext)
_next = default(T);
return true;
}
public T Current
{
get
{
if (!_isValid)
throw new InvalidOperationException();
return _current;
}
}
object System.Collections.IEnumerator.Current { get { return Current; } }
void System.Collections.IEnumerator.Reset()
{ throw new NotSupportedException(); }
}
///
/// Wraps an existing enumeration of Key/value pairs with an assertion about ascending order and handling
/// for duplicate keys.
///
public static IEnumerable WithDuplicateHandling(
IEnumerable items, IComparer comparer, DuplicateHandling duplicateHandling)
{
return new EnumWithDuplicateKeyHandling(items, comparer, duplicateHandling);
}
private class EnumWithDuplicateKeyHandling : IEnumerable
{
private readonly IEnumerable _items;
private readonly IComparer _comparer;
private readonly DuplicateHandling _duplicateHandling;
public EnumWithDuplicateKeyHandling(IEnumerable items, IComparer comparer, DuplicateHandling duplicateHandling)
{
_items = items;
_comparer = comparer;
_duplicateHandling = duplicateHandling;
}
public IEnumerator GetEnumerator()
{
return new OrderedEnumerator(_items, _comparer, _duplicateHandling);
}
[Obsolete]
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
}
}