#region Copyright 2012-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0 /* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #endregion using System; using System.IO; using System.Collections.Generic; using CSharpTest.Net.IO; using CSharpTest.Net.Serialization; namespace CSharpTest.Net.Collections { /// Defines how duplicate keys are handled public enum DuplicateHandling { /// Do nothing and pass-through all duplicates None = 0, /// Remove all but the first item of duplicates FirstValueWins, /// Remove all but the last item of duplicates LastValueWins, /// Throw an error on duplicates RaisesException, } /// /// Creates an ordered enumeration from an unordered enumeration by paginating the data, sorting the page, /// and then performing a binary-tree grouped mergesort on the resulting pages. When the page size (memoryLimit) /// is hit, the page will be unloaded to disk and restored on demand if a serializer is provided. /// public class OrderedEnumeration : IEnumerable { private const int DefaultLimit = 0x10000; private const int LimitMax = int.MaxValue; private readonly IEnumerable _unordered; private IComparer _comparer; private ISerializer _serializer; private int _memoryLimit; private DuplicateHandling _duplicateHandling; private bool _enumerated; /// Constructs an ordered enumeration from an unordered enumeration public OrderedEnumeration(IEnumerable unordered) : this(Comparer.Default, unordered, null, DefaultLimit) { } /// Constructs an ordered enumeration from an unordered enumeration public OrderedEnumeration(IComparer comparer, IEnumerable unordered) : this(comparer, unordered, null, DefaultLimit) { } /// Constructs an ordered enumeration from an unordered enumeration public OrderedEnumeration(IComparer comparer, IEnumerable unordered, ISerializer serializer) : this(comparer, unordered, serializer, DefaultLimit) { } /// Constructs an ordered enumeration from an unordered enumeration public OrderedEnumeration(IComparer comparer, IEnumerable unordered, ISerializer serializer, int memoryLimit) { _enumerated = false; _comparer = comparer; _unordered = unordered; _serializer = serializer; _memoryLimit = Check.InRange(memoryLimit, 1, LimitMax); _duplicateHandling = DuplicateHandling.None; } /// /// Gets or sets the comparer to use when ordering the items. /// public IComparer Comparer { get { return _comparer; } set { _comparer = value ?? Comparer.Default; } } /// /// Gets or sets the serializer to use when paging to disk. /// public ISerializer Serializer { get { return _serializer; } set { _serializer = value; } } /// /// Gets or sets the number of instances to keep in memory before sorting/paging to disk. /// /// You must specify the Serializer before setting this property public int InMemoryLimit { get { return _memoryLimit; } set { _memoryLimit = Check.InRange(value, 1, LimitMax); } } /// Gets or sets the duplicate item handling policy public DuplicateHandling DuplicateHandling { get { return _duplicateHandling; } set { _duplicateHandling = value; } } /// /// Returns an enumerator that iterates through the collection. /// /// GetEnumerator() may only be called once. /// Enumeration is out of sequence. /// Duplicate item in enumeration. public IEnumerator GetEnumerator() { if (_enumerated) throw new InvalidOperationException(); _enumerated = true; return new OrderedEnumerator(PagedAndOrdered(), _comparer, _duplicateHandling); } System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } private IEnumerable PagedAndOrdered() { T[] items = new T[Math.Min(InMemoryLimit, 2048)]; using (DisposingList resources = new DisposingList()) { List> orderedSet = new List>(); int count = 0; foreach (T item in _unordered) { if (_memoryLimit > 0 && count == _memoryLimit) { if (_serializer != null) { TempFile temp = new TempFile(); resources.Add(temp); Stream io; resources.Add(io = new FileStream(temp.TempPath, FileMode.Create, FileAccess.ReadWrite, FileShare.Read)); MergeSort.Sort(items, _comparer); foreach (T i in items) _serializer.WriteTo(i, io); io.Position = 0; orderedSet.Add(Read(temp, io)); } else { T[] copy; MergeSort.Sort(items, out copy, 0, items.Length, _comparer); orderedSet.Add(items); items = copy; } Array.Clear(items, 0, items.Length); count = 0; } if (count == items.Length) Array.Resize(ref items, Math.Min(InMemoryLimit, items.Length * 2)); items[count++] = item; } if (count != items.Length) Array.Resize(ref items, count); MergeSort.Sort(items, _comparer); IEnumerable result; if (orderedSet.Count == 0) result = items; else { orderedSet.Add(items); result = Merge(_comparer, orderedSet.ToArray()); } foreach (T item in result) yield return item; } } private IEnumerable Read(TempFile file, Stream io) { using (file) using (io) { for (int i = 0; i < _memoryLimit; i++) yield return _serializer.ReadFrom(io); } } #region static Merge(...) /// /// Merges two ordered enumerations based on the comparer provided. /// public static IEnumerable Merge(IComparer comparer, IEnumerable x, IEnumerable y) { using(IEnumerator left = x.GetEnumerator()) using(IEnumerator right = y.GetEnumerator()) { bool lvalid = left.MoveNext(); bool rvalid = right.MoveNext(); while(lvalid || rvalid) { int cmp = !rvalid ? -1 : !lvalid ? 1 : comparer.Compare(left.Current, right.Current); if (cmp <= 0) { yield return left.Current; lvalid = left.MoveNext(); } else { yield return right.Current; rvalid = right.MoveNext(); } } } } /// /// Merges n-number of ordered enumerations based on the default comparer of T. /// public static IEnumerable Merge(params IEnumerable[] enums) { return Merge(Comparer.Default, 0, enums.Length, enums); } /// /// Merges n-number of ordered enumerations based on the comparer provided. /// public static IEnumerable Merge(IComparer comparer, params IEnumerable[] enums) { return Merge(comparer, 0, enums.Length, enums); } /// /// Merges n-number of ordered enumerations based on the comparer provided. /// public static IEnumerable Merge(IComparer comparer, DuplicateHandling duplicateHandling, params IEnumerable[] enums) { return WithDuplicateHandling(Merge(comparer, enums), comparer, duplicateHandling); } private static IEnumerable Merge(IComparer comparer, int start, int count, IEnumerable[] enums) { if (count <= 0) return new T[0]; if (count == 1) return enums[start]; if (count == 2) return Merge(comparer, enums[start], enums[start + 1]); int half = count/2; return Merge(comparer, Merge(comparer, start, half, enums), Merge(comparer, start + half, count - half, enums) ); } #endregion private class OrderedEnumerator : IEnumerator { private readonly IEnumerable _ordered; private IEnumerator _enumerator; private readonly IComparer _comparer; private readonly DuplicateHandling _duplicateHandling; private bool _isValid, _hasNext, _isFirst; private T _current; private T _next; public OrderedEnumerator( IEnumerable enumerator, IComparer comparer, DuplicateHandling duplicateHandling) { _ordered = enumerator; _enumerator = null; _comparer = comparer; _duplicateHandling = duplicateHandling; _isFirst = true; } public void Dispose() { if (_enumerator != null) _enumerator.Dispose(); _enumerator = null; } public bool MoveNext() { if (_isFirst) { _isFirst = false; _enumerator = _ordered.GetEnumerator(); _hasNext = _enumerator.MoveNext(); if (_hasNext) _next = _enumerator.Current; } _isValid = _hasNext; _current = _next; if (!_isValid) return false; // ReSharper disable RedundantBoolCompare while ((_hasNext = _enumerator.MoveNext()) == true) // ReSharper restore RedundantBoolCompare { _next = _enumerator.Current; int cmp = _comparer.Compare(_current, _next); if (cmp > 0) throw new InvalidDataException("Enumeration out of sequence."); if (cmp != 0 || _duplicateHandling == DuplicateHandling.None) break; if (_duplicateHandling == DuplicateHandling.RaisesException) throw new ArgumentException("Duplicate item in enumeration."); if (_duplicateHandling == DuplicateHandling.LastValueWins) _current = _next; } if (!_hasNext) _next = default(T); return true; } public T Current { get { if (!_isValid) throw new InvalidOperationException(); return _current; } } object System.Collections.IEnumerator.Current { get { return Current; } } void System.Collections.IEnumerator.Reset() { throw new NotSupportedException(); } } /// /// Wraps an existing enumeration of Key/value pairs with an assertion about ascending order and handling /// for duplicate keys. /// public static IEnumerable WithDuplicateHandling( IEnumerable items, IComparer comparer, DuplicateHandling duplicateHandling) { return new EnumWithDuplicateKeyHandling(items, comparer, duplicateHandling); } private class EnumWithDuplicateKeyHandling : IEnumerable { private readonly IEnumerable _items; private readonly IComparer _comparer; private readonly DuplicateHandling _duplicateHandling; public EnumWithDuplicateKeyHandling(IEnumerable items, IComparer comparer, DuplicateHandling duplicateHandling) { _items = items; _comparer = comparer; _duplicateHandling = duplicateHandling; } public IEnumerator GetEnumerator() { return new OrderedEnumerator(_items, _comparer, _duplicateHandling); } [Obsolete] System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() { return GetEnumerator(); } } } }