#region Copyright 2011-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0
/* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#endregion
using System;
using System.Collections.Generic;
using System.IO;
using CSharpTest.Net.Interfaces;
using CSharpTest.Net.Serialization;
using CSharpTest.Net.Synchronization;
using CSharpTest.Net.Utils;
namespace CSharpTest.Net.Collections
{
/// Defines the storage type to use
public enum StorageType
{
/// Uses in-memory storage
Memory,
/// Uses a file to store data, (Set by setting the FileName property)
Disk,
/// Uses a custom data store, (Set by setting the StorageSystem property)
Custom
}
/// Determines if the file specified should be created
public enum CreatePolicy
{
/// Does not create a new file
Never,
/// Creates a new file even if one already exists
Always,
/// Creates a new file only if it does not exist
IfNeeded
}
/// Determines the type of node caching used in the tree
public enum CachePolicy
{
/// Does not cache, allways loads from storage.
None,
/// Keeps every loaded object in memory.
All,
/// Keeps a history of objects in memory (see CacheKeepAliveXXX properties)
Recent,
}
///
/// Defines the action to perform when opening a BPlusTree with an existing log file.
///
public enum ExistingLogAction
{
///
/// Infers the default wether or not the data file was created. For newly created data
/// files (CreatePolicy = Always, or IfNeeded and the file is missing) the default will
/// be Truncate. When existing data files are opened the default will ReplayAndCommit.
///
Default,
/// Ignore the existing entries in the log
Ignore,
/// Replay the log entries uncommitted
Replay,
/// Replay the log entries and commit the changes to the store
ReplayAndCommit,
/// Ignore the existing entries and truncate the log
Truncate,
}
///
/// Defines the levels of durability the store will try to achieve. 'Uncommitted changes' in the descriptions below
/// refers to all changes made to the tree since the last call to CommitChanges() on the BPlusTree class.
///
public enum StoragePerformance
{
/// (100k rps) Uncommitted changes will be lost, a crash durring commit may corrupt state.
///
/// No changes are committed until a call to Commit is made, durring the commit a partial write may corrupt the store.
///
Fastest = 1,
/// (30k rps) Uses a system-cached transaction log to recover uncommitted changes after a process crash.
/// Will not corrupt state; however, in a power outage or system failure it may loose some comitted records.
LogFileInCache = 2,
/// (8k rps) Every write will commit changes to the storage file immediately into system cache
/// May corrupt state and/or loose data in the event of a power outage
CommitToCache = 3,
/// (2k rps) Uses a cache-writethrough transaction log to recover uncommitted changes after a power outage or system crash.
/// Complies with ACID durability requirements, can be expensive to recover from the log.
LogFileNoCache = 4,
/// (1k rps) Every write will commit changes to the storage file immediately bypassing system cache (Slowest/Safest)
/// Complies with ACID durability requirements
CommitToDisk = 5,
/// Defaults to using a transaction log in system cache for best performance/durability.
Default = LogFileInCache,
}
/// Determines the binary file format and backwards compatibility
public enum FileVersion
{
/// Version 1 compatable
Version1 = 1,
/// Version 2 compatable
Version2 = 2,
}
///
/// Defines the options nessessary to construct a BPlusTree implementation
///
public abstract class BPlusTreeOptions : ICloneable
{
private readonly ISerializer _keySerializer;
private readonly ISerializer _valueSerializer;
private IComparer _keyComparer;
private StorageType _storageType = StorageType.Memory;
private CreatePolicy _createFile = CreatePolicy.Never;
private bool _readOnly;
private ILockFactory _lockingFactory = new LockFactory();
private ILockStrategy _callLevelLock = IgnoreLocking.Instance;
private int _lockTimeout = 120000;
private int _minimumChildNodes = 12;
private int _maximumChildNodes = 32; // (assumes a key size of apx 100 bytes: (FileBlockSize - StoreageOverhead) / (AvgKeyBytes + ChildLinkSize)
private int _fillChildNodes = 22;
private int _minimumValueNodes = 3;
private int _maximumValueNodes = 8; // (assumes a value size of apx 500 bytes: (FileBlockSize - StoreageOverhead) / (AvgValueBytes + AvgKeyBytes)
private int _fillValueNodes = 4;
private int _fileBlockSize = 4096;
private ExistingLogAction _existingLogAction = ExistingLogAction.Default;
private string _fileName;
private INodeStorage _storageSystem;
private CachePolicy _cachePolicy = CachePolicy.Recent;
private int _keepAliveMinHistory = 10;
private int _keepAliveMaxHistory = 100;
private int _keepAliveTimeout = 60000;
///
/// Constructs the options configuration to initialize a BPlusTree instance
///
protected BPlusTreeOptions(ISerializer keySerializer, ISerializer valueSerializer, IComparer comparer)
{
_keySerializer = Check.NotNull(keySerializer);
_valueSerializer = Check.NotNull(valueSerializer);
KeyComparer = comparer;
TransactionLogLimit = -1;
}
/// Accesses the key serializer given to the constructor
public ISerializer KeySerializer { get { return _keySerializer; } }
/// Accesses the key serializer given to the constructor
public ISerializer ValueSerializer { get { return _valueSerializer; } }
/// Defines a custom IComparer<T> to be used for comparing keys
public IComparer KeyComparer
{
get { return _keyComparer ?? Comparer.Default; }
set
{
try { Check.NotNull(value); }
catch (Exception e)
{ throw new InvalidConfigurationValueException("KeyComparer", "You must speicify a valid IComparer.", e); }
_keyComparer = value;
}
}
///
/// Returns the version this option set is compatable with.
///
public abstract FileVersion FileVersion { get; }
///
/// Calculates default node-threasholds based upon the average number of bytes in key and value
///
public void CalcBTreeOrder(int avgKeySizeBytes, int avgValueSizeBytes)
{
CalculateOrder(avgKeySizeBytes, avgValueSizeBytes);
}
///
/// Calculates default node-threasholds based upon the average number of bytes in key and value
///
protected abstract void CalculateOrder(int avgKeySizeBytes, int avgValueSizeBytes);
///
/// Can be used to explicitly specify the storage type, or by simply providing a file name this
/// will be done for you. If no file name was specified the default is to use a memory store.
///
public StorageType StorageType
{
get { return _storageType; }
set
{
InvalidConfigurationValueException.Assert(Enum.IsDefined(typeof(StorageType), value), "StorageType", "The value is not defined.");
InvalidConfigurationValueException.Assert(value != StorageType.Custom || _storageSystem != null, "StorageType", "Please provide the StorageSystem to be used.");
InvalidConfigurationValueException.Assert(value != StorageType.Disk || _fileName != null, "StorageType", "Please provide the FileName to be used.");
_storageType = value;
}
}
///
/// Sets the BTree into a read-only mode (only supported when opening an existing file)
///
public bool ReadOnly
{
get { return _readOnly; }
set
{
if (value)
{
InvalidConfigurationValueException.Assert(CreateFile == CreatePolicy.Never, "ReadOnly", "ReadOnly can only be used when CreateFile is Never");
InvalidConfigurationValueException.Assert(StorageType == StorageType.Disk, "ReadOnly", "ReadOnly can only be used with the file storage");
InvalidConfigurationValueException.Assert(File.Exists(FileName), "ReadOnly", "ReadOnly can only be used with an existing file");
}
_readOnly = value;
}
}
///
/// Sets the custom implementation of the storage back-end to use for the BTree
///
public INodeStorage StorageSystem
{
get { return _storageType == StorageType.Custom ? _storageSystem : null; }
set { _storageSystem = Check.NotNull(value); _storageType = StorageType.Custom; }
}
///
/// Gets or sets the FileName that should be used to store the BTree
///
public string FileName
{
get { return _fileName; }
set
{
Check.NotNull(value);
try { GC.KeepAlive(Path.GetFullPath(value)); }
catch (Exception e)
{ throw new InvalidConfigurationValueException("FileName", e.Message, e); }
_fileName = value;
_storageType = StorageType.Disk;
}
}
///
/// Gets or sets the file-create policy used when backing with a file storage
///
public CreatePolicy CreateFile
{
get { return _createFile; }
set
{
InvalidConfigurationValueException.Assert(Enum.IsDefined(typeof(CreatePolicy), value), "CreateFile", "The value is not defined.");
_createFile = value;
}
}
///
/// Gets or sets the number of bytes per file-block used in the file storage
///
public int FileBlockSize
{
get { return _fileBlockSize; }
set
{
InvalidConfigurationValueException.Assert(value >= 512 && value <= 0x10000, "FileBlockSize", "The valid range is from 512 bytes to 64 kilobytes in powers of 2.");
_fileBlockSize = value;
}
}
///
/// Gets or sets the number of milliseconds to wait before failing a lock request, the default
/// of two minutes should be more than adequate.
///
public int LockTimeout
{
get { return _lockTimeout; }
set
{
InvalidConfigurationValueException.Assert(value >= -1 && value <= int.MaxValue, "LockTimeout", "The valid range is from -1 to MaxValue.");
_lockTimeout = value;
}
}
///
/// Gets or sets the locking factory to use for accessing shared data. The default is WriterOnlyLocking()
/// which does not perform read locks, rather it will rely on the cache of the btree and may preform dirty
/// reads. You can use any implementation of ILockFactory; however, the SimpleReadWriteLocking seems to
/// perform the most efficiently for both reader/writer locks. Additionally wrapping that instance in a
/// ReserveredWriterLocking() instance will allow reads to continue up until a writer begins the commit
/// process. If you are only accessing the BTree instance from a single thread this can be set to
/// IgnoreLocking. Be careful of using ReaderWriterLocking as the write-intesive nature of the BTree will
/// suffer extreme performance penalties with this lock.
///
public ILockFactory LockingFactory
{
get { return _lockingFactory; }
set { _lockingFactory = Check.NotNull(value); }
}
///
/// Defines a reader/writer lock that used to control exclusive tree access when needed. The public
/// methods for EnableCount(), Clear(), and UnloadCache() each acquire an exclusive (write) lock while
/// all other public methods acquire a shared (read) lock. By default this lock is non-operational
/// (an instance of IgnoreLocking) so if you need the above methods to work while multiple threads are
/// accessing the tree, or if you exclusive access to the tree, specify a lock instance. Since this
/// lock is primarily a read-heavy lock consider using the ReaderWriterLocking or SimpleReadWriteLocking.
///
public virtual ILockStrategy CallLevelLock
{
get { return _callLevelLock; }
set { _callLevelLock = Check.NotNull(value); }
}
///
/// A quick means of setting all the min/max values for the node counts using this value as a basis
/// for the Maximum fields and one-quarter of this value for Minimum fields provided the result is in
/// range.
///
public int BTreeOrder
{
set
{
InvalidConfigurationValueException.Assert(value >= 4 && value <= 256, "BTreeOrder", "The valid range is from 4 to 256.");
MaximumChildNodes = MaximumValueNodes = value;
MinimumChildNodes = MinimumValueNodes = Math.Max(2, value >> 2);
}
}
///
/// The smallest number of child nodes that should be linked to before refactoring the tree to remove
/// this node. In a 'normal' and/or purest B+Tree this is always half of max; however for performance
/// reasons this B+Tree allow any value equal to or less than half of max but at least 2.
///
/// A number in the range of 2 to 128 that is at most half of MaximumChildNodes.
public int MinimumChildNodes
{
get { return _minimumChildNodes; }
set
{
InvalidConfigurationValueException.Assert(value >= 2 && value <= (MaximumChildNodes / 2), "MinimumChildNodes", "The valid range is from 2 to (MaximumChildNodes / 2).");
_minimumChildNodes = value;
_fillChildNodes = ((_maximumChildNodes - _minimumChildNodes) >> 1) + _minimumChildNodes;
}
}
///
/// The largest number of child nodes that should be linked to before refactoring the tree to split
/// this node into two. This property has a side-effect on MinimumChildNodes to ensure that it continues
/// to be at most half of MaximumChildNodes.
///
/// A number in the range of 4 to 256.
public int MaximumChildNodes
{
get { return _maximumChildNodes; }
set
{
InvalidConfigurationValueException.Assert(value >= 4 && value <= 256, "MaximumChildNodes", "The valid range is from 4 to 256.");
_maximumChildNodes = value;
_minimumChildNodes = Math.Min(value, _maximumChildNodes / 2);
_fillChildNodes = ((_maximumChildNodes - _minimumChildNodes) >> 1) + _minimumChildNodes;
}
}
///
/// The smallest number of values that should be contained in this node before refactoring the tree to remove
/// this node. In a 'normal' and/or purest B+Tree this is always half of max; however for performance
/// reasons this B+Tree allow any value equal to or less than half of max but at least 2.
///
/// A number in the range of 2 to 128 that is at most half of MaximumValueNodes.
public int MinimumValueNodes
{
get { return _minimumValueNodes; }
set
{
InvalidConfigurationValueException.Assert(value >= 2 && value <= (MaximumValueNodes / 2), "MinimumValueNodes", "The valid range is from 2 to (MaximumValueNodes / 2).");
_minimumValueNodes = value;
_fillValueNodes = ((_maximumValueNodes - _minimumValueNodes) >> 1) + _minimumValueNodes;
}
}
///
/// The largest number of values that should be contained in this node before refactoring the tree to split
/// this node into two. This property has a side-effect on MinimumValueNodes to ensure that it continues
/// to be at most half of MaximumValueNodes.
///
/// A number in the range of 4 to 256.
public int MaximumValueNodes
{
get { return _maximumValueNodes; }
set
{
InvalidConfigurationValueException.Assert(value >= 4 && value <= 256, "MaximumValueNodes", "The valid range is from 4 to 256.");
_maximumValueNodes = value;
_minimumValueNodes = Math.Min(value, _maximumValueNodes / 2);
_fillValueNodes = ((_maximumValueNodes - _minimumValueNodes) >> 1) + _minimumValueNodes;
}
}
///
/// Determines how long loaded nodes stay in memory, Full keeps all loaded nodes alive and is the
/// most efficient, The default Recent keeps recently visited nodes alive based on the CacheKeepAlive
/// properties, and None does not cache the nodes at all but does maintain a cache of locks for
/// each node visited.
///
public CachePolicy CachePolicy
{
get { return _cachePolicy; }
set
{
InvalidConfigurationValueException.Assert(Enum.IsDefined(typeof(CachePolicy), value), "CachePolicy", "The value is not defined.");
_cachePolicy = value;
}
}
///
/// CacheKeepAliveFactory provides a delegate to inject an implementation of the IObjectKeepAlive
/// interface while then igoring all the other CacheKeepAliveXXX properties.
///
public FactoryMethod CacheKeepAliveFactory
{
get { return _cacheKeepAliveFactory; }
set { _cacheKeepAliveFactory = value; }
}
///
/// Determins minimum number of recently visited nodes to keep alive in memory. This number defines
/// the history size, not the number of distinct nodes. This number will always be kept reguardless
/// of the timeout. Specify a value of 0 to allow the timeout to empty the cache.
///
public int CacheKeepAliveMinimumHistory
{
get { return _keepAliveMinHistory; }
set
{
InvalidConfigurationValueException.Assert(value >= 0 && value <= int.MaxValue, "CacheKeepAliveMinimumHistory", "The valid range is from 0 to MaxValue.");
_keepAliveMinHistory = value;
_keepAliveMaxHistory = Math.Max(_keepAliveMaxHistory, value);
}
}
///
/// Determins maximum number of recently visited nodes to keep alive in memory. This number defines
/// the history size, not the number of distinct nodes. The ceiling is always respected reguardless
/// of the timeout. Specify a value of 0 to disable history keep alive.
///
public int CacheKeepAliveMaximumHistory
{
get { return _keepAliveMaxHistory; }
set
{
InvalidConfigurationValueException.Assert(value >= 0 && value <= int.MaxValue, "CacheKeepAliveMaximumHistory", "The valid range is from 0 to MaxValue.");
_keepAliveMaxHistory = value;
_keepAliveMinHistory = Math.Min(_keepAliveMinHistory, value);
}
}
///
/// If the cache contains more that CacheKeepAliveMinimumHistory items, this timeout will start to
/// remove those items until the cache history is reduced to CacheKeepAliveMinimumHistory. It is
/// important to know that the BPlusTree itself contains no theads and this timeout will not be
/// respected if cache is not in use.
///
public int CacheKeepAliveTimeout
{
get { return _keepAliveTimeout; }
set
{
InvalidConfigurationValueException.Assert(value >= 0 && value <= int.MaxValue, "CacheKeepAliveTimeout", "The valid range is from 0 to MaxValue.");
_keepAliveTimeout = value;
}
}
///
/// Creates a shallow clone of the configuration options.
///
public BPlusTreeOptions Clone() { return (BPlusTreeOptions)MemberwiseClone(); }
object ICloneable.Clone() { return MemberwiseClone(); }
#region INTERNAL USE ONLY
/// Enables or disables the caching and reordering of node write operations
protected void SetStorageCache(bool cached) { UseStorageCache = cached; }
/// Sets the transaction log to use
protected void SetLogFile(ITransactionLog logFile) { LogFile = logFile; }
internal bool UseStorageCache;
internal ITransactionLog LogFile;
private FactoryMethod _cacheKeepAliveFactory;
internal ExistingLogAction ExistingLogAction
{
get { return _existingLogAction; }
set { _existingLogAction = value; }
}
internal long TransactionLogLimit { get; set; }
/// Used to create the correct storage type
internal abstract INodeStorage CreateStorage();
/// The desired fill-size of node that contain children
internal int FillChildNodes { get { return _fillChildNodes; } }
/// The desired fill-size of node that contain values
internal int FillValueNodes { get { return _fillValueNodes; } }
///
/// Creates the keep-alive object reference tracking implementation
///
internal IObjectKeepAlive CreateCacheKeepAlive()
{
if (_cacheKeepAliveFactory != null)
return _cacheKeepAliveFactory();
return new ObjectKeepAlive(
CacheKeepAliveMinimumHistory,
CacheKeepAliveMaximumHistory,
TimeSpan.FromMilliseconds(CacheKeepAliveTimeout));
}
#endregion
}
}