#region Copyright 2010-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0
/* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#endregion
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Web;
using System.IO;
using System.Xml;
using System.Xml.XPath;
using CSharpTest.Net.Utils;
namespace CSharpTest.Net.Html
{
///
/// Represents an html element
///
[System.Diagnostics.DebuggerDisplay("{OriginalTag}")]
public class XmlLightElement : IXPathNavigable
{
///Provides tag name assigned to the ROOT node of the heirarchy
public static readonly string ROOT = "";
///Provides tag name assigned to the TEXT nodes in the heirarchy
public static readonly string TEXT = "!TEXT";
///Provides tag name assigned to the CDATA nodes in the heirarchy
public static readonly string CDATA = "![CDATA[";
///Provides tag name assigned to comment nodes in the heirarchy
public static readonly string COMMENT = "!--";
///Provides tag name assigned to the TEXT nodes in the heirarchy
public static readonly string CONTROL = "!";
///Provides tag name assigned to processing instruction nodes in the heirarchy
public static readonly string PROCESSING = "?";
///
/// Creates a new xml element
///
public XmlLightElement(XmlLightElement parent, string tagName)
: this(parent, true, tagName, String.Empty)
{ }
internal XmlLightElement(XmlLightElement parent, bool closed, string tagName, string tagContent)
: this(parent, closed, tagName, String.Empty, tagContent, null)
{ }
internal XmlLightElement(XmlLightElement parent, XmlTagInfo tag)
: this(parent, tag.SelfClosed, tag.FullName, tag.EndingWhitespace, tag.UnparsedTag, tag.Attributes)
{ }
internal XmlLightElement(XmlLightElement parent, bool closed, string tagName, string closingWs, string tagContent, IEnumerable attrs)
{
_originalTag = tagContent;
Parent = parent;
_tagName = tagName;
OpeningTagWhitespace = closingWs;
ClosingTagWhitespace = String.Empty;
IsEmpty = closed;
if (parent != null)
parent.Children.Add(this);
Attributes = new XmlLightAttributes(attrs ?? new XmlLightAttribute[0]);
}
private XmlLightElement _parent;
private readonly string _tagName;
private bool _isEmpty;
private string _originalTag;
private string _openingTagWhitespace;
private string _closingTagWhitespace;
/// Returns the tag name of this html element
public string TagName
{
get { return _tagName; }
}
/// Whitespace appearing before the close of the start tag (<div >)
public string OpeningTagWhitespace
{
get { return _openingTagWhitespace ?? String.Empty; }
set { _openingTagWhitespace = value; }
}
/// Whitespace appearing before the close of the end tag (</div >)
public string ClosingTagWhitespace
{
get { return _closingTagWhitespace ?? String.Empty; }
set { _closingTagWhitespace = value; }
}
///
/// Returns the text in it's original format. Where IsSpecial == true, this is used to rewrite
/// the content.
///
public string OriginalTag
{
get { return _originalTag; }
set
{
Check.Assert(IsSpecialTag && TagName != ROOT);
_originalTag = value;
}
}
/// Returns the value (if any) of this html element
public string Value
{
get
{
if (TagName == TEXT)
{
return HttpUtility.HtmlDecode(OriginalTag);
}
else if (TagName == CDATA)
{
return OriginalTag.Substring("".Length);
}
else
return String.Empty;
}
set
{
if (TagName == TEXT)
_originalTag = HttpUtility.HtmlEncode(value);
else if (TagName == CDATA)
_originalTag = "";
else if (TagName == COMMENT)
_originalTag = "";
else
{
throw new NotSupportedException();
}
}
}
/// Returns the parent (if any) of this html element
public XmlLightElement Parent
{
get { return _parent; }
set
{
Check.Assert(_parent == null || value == null);
_parent = value;
}
}
///
/// Returns the root-level node
///
public XmlLightElement Document
{
get
{
XmlLightElement e = this;
while (e.Parent != null)
e = e.Parent;
return e;
}
}
///
/// Deep-scans heirarchy for the element with the provided id
///
public XmlLightElement GetElementById(string id)
{
foreach (XmlLightElement found in FindElement(delegate(XmlLightElement e) { return e.Attributes.ContainsKey("id") && e.Attributes["id"] == id; }))
return found;
return null;
}
///
/// Finds the elements matching the provided criteria
///
public IEnumerable FindElement(Predicate match)
{
List todo = new List();
todo.Add(this);
for( int ix = 0; ix < todo.Count; ix++ )
{
XmlLightElement test = todo[ix];
if (match(test))
yield return test;
todo.AddRange(test.Children);
}
}
///
/// Returns true if the node has a textual value, i.e. text or cdata
///
public bool IsText { get { return TagName == TEXT || TagName == CDATA; } }
///
/// Returns true if the node is a comment
///
public bool IsComment { get { return TagName == COMMENT; } }
///
/// Returns true if the node is self-closing (i.e. ends with '/>')
///
public bool IsEmpty
{
get { return _isEmpty && Children.Count == 0; }
set { _isEmpty = value; }
}
///
/// Returns true if the node is not a normal element
///
public bool IsSpecialTag
{
get { return TagName == ROOT || TagName == TEXT || TagName == CDATA || TagName == COMMENT || TagName == CONTROL || TagName == PROCESSING; }
}
///
/// Returns the namespace or empty string
///
public string Namespace
{
get
{
int ix = TagName.IndexOf(':');
return ix < 0 ? String.Empty : TagName.Substring(0, ix);
}
}
///
/// Returns the namespace or null
///
public string NamespaceOrNull
{
get
{
int ix = TagName.IndexOf(':');
return ix < 0 ? null : TagName.Substring(0, ix);
}
}
///
/// Returns the name without the namespace prefix
///
public string LocalName
{
get
{
int ix = TagName.IndexOf(':');
return ix < 0 ? TagName : TagName.Substring(ix + 1);
}
}
/// Returns the children of this html element
public readonly List Children = new List();
/// Returns the attributes of this html element
public readonly XmlLightAttributes Attributes;
/// Returns the inner text of this html element
public string InnerText { get { return NormalizeText(GetInnerText()); } }
/// Removes this node from it's parent element
public void Remove()
{
Check.Assert(Parent != null);
int ix = Parent.Children.IndexOf(this);
Check.Assert(ix >= 0);
Parent.Children.RemoveAt(ix);
Parent = null;
}
///
/// Returns the next sibling element
///
public XmlLightElement NextSibling
{
get
{
if (Parent == null) return null;
int ix = Parent.Children.IndexOf(this) + 1;
if (ix >= 0 && ix < Parent.Children.Count)
return Parent.Children[ix];
return null;
}
}
///
/// Returns the previous sibling element
///
public XmlLightElement PrevSibling
{
get
{
if (Parent == null) return null;
int ix = Parent.Children.IndexOf(this) - 1;
if (ix >= 0 && ix < Parent.Children.Count)
return Parent.Children[ix];
return null;
}
}
static string NormalizeText(string text)
{
text = text.Replace('\r', ' ');
text = text.Replace('\n', ' ');
while (text.IndexOf(" ") >= 0)
text = text.Replace(" ", " ");
return text.Trim();
}
private string GetInnerText()
{
using (StringWriter sw = new StringWriter())
{
WriteText(sw);
return sw.ToString();
}
}
private void WriteText(TextWriter wtr)
{
if (IsText)
wtr.Write(Value);
else
{
foreach (XmlLightElement e in Children)
e.WriteText(wtr);
}
}
///
/// Returns the elements from the given xpath expression
///
public IEnumerable Select(string xpath)
{
foreach (XmlLightNavigator nav in CreateNavigator().Select(xpath))
yield return nav.Element;
}
///
/// Returns the first element from the given xpath expression
///
public XmlLightElement SelectSingleNode(string xpath)
{
foreach (XmlLightElement e in Select(xpath))
return e;
return null;
}
private string FindPrefixUri(string nsPrefix)
{
if (String.IsNullOrEmpty(nsPrefix))
return null;
string attr = String.Format("xmlns:{0}", nsPrefix);
XmlLightElement e = this;
while (e != null)
{
string value;
if (e.Attributes.TryGetValue(attr, out value))
return value;
e = e.Parent;
}
return null;
}
///
/// Writes the text to the xml writer while preserving entities and still ensuring
/// the remainder of the text is properly encoded.
///
protected virtual void WriteText(XmlWriter wtr, string encodedValue)
{
int currIx = 0;
//we want to
foreach (Match match in RegexPatterns.HtmlEntity.Matches(encodedValue))
{
wtr.WriteString(encodedValue.Substring(currIx, match.Index - currIx));
wtr.WriteRaw(match.Value);
currIx = match.Index + match.Length;
}
wtr.WriteString(encodedValue.Substring(currIx, encodedValue.Length - currIx));
}
///
/// Writes XML to an xml writer to ensure proper formatting
///
public virtual void WriteXml(XmlWriter wtr)
{
if (TagName == TEXT)
{
if (OriginalTag.Trim().Length > 0)//non-whitespace?
WriteText(wtr, OriginalTag);
return;
}
if (TagName == CDATA)
{
wtr.WriteCData(Value);
return;
}
if (TagName == COMMENT)
{
wtr.WriteComment(OriginalTag.Substring(4, OriginalTag.Length-7));
return;
}
if (IsSpecialTag)
{
wtr.WriteRaw(OriginalTag);
return;
}
wtr.WriteStartElement(NamespaceOrNull, LocalName,
Attributes.ContainsKey("xmlns")
? Attributes["xmlns"]
: FindPrefixUri(NamespaceOrNull));
foreach (XmlLightAttribute kv in Attributes.ToArray())
{
if (kv.Name == "xmlns") { }
else if (kv.Namespace == "xmlns")
{
wtr.WriteAttributeString(kv.Namespace, kv.LocalName, null, kv.Value);
}
else
{
wtr.WriteAttributeString(kv.NamespaceOrNull, kv.LocalName,
FindPrefixUri(kv.NamespaceOrNull),
HttpUtility.HtmlDecode(kv.Value));
}
}
foreach (XmlLightElement e in Children)
e.WriteXml(wtr);
if (IsEmpty && Children.Count == 0)
wtr.WriteEndElement();
else
wtr.WriteFullEndElement();
}
///
/// Writes the re-constructed innerHTML in a well-formed Xml format
///
public void WriteXml(TextWriter wtr)
{
using (XmlTextWriter xw = new XmlTextWriter(wtr))
{
xw.Indentation = 2;
xw.IndentChar = ' ';
xw.Formatting = System.Xml.Formatting.Indented;
WriteXml(xw);
xw.Flush();
}
}
///
/// Writes the modified document in it's original formatting
///
public virtual void WriteUnformatted(TextWriter wtr)
{
if (IsSpecialTag)
{
wtr.Write(OriginalTag);
return;
}
wtr.Write("<{0}", TagName);
foreach (XmlLightAttribute kv in Attributes.ToArray())
{
string quote = kv.Quote == XmlQuoteStyle.Double ? "\"" :
kv.Quote == XmlQuoteStyle.Single ? "'" :
String.Empty;
wtr.Write(kv.Before);
wtr.Write(kv.Name);
if (kv.Quote != XmlQuoteStyle.None || !String.IsNullOrEmpty(kv.Value))
{
wtr.Write('=');
wtr.Write(quote);
wtr.Write(kv.Value);
wtr.Write(quote);
}
}
wtr.Write(OpeningTagWhitespace);
if (IsEmpty && Children.Count == 0)
wtr.Write('/');
wtr.Write('>');
foreach (XmlLightElement e in Children)
e.WriteUnformatted(wtr);
if (!IsEmpty || Children.Count > 0)
wtr.Write("{0}{1}>", TagName, ClosingTagWhitespace);
}
///
/// Returns the re-constructed innerHTML in a well-formed Xml format
///
public string InnerXml
{
get
{
using (StringWriter sw = new StringWriter())
{
WriteXml(sw);
return sw.ToString();
}
}
}
///
/// Returns a new System.Xml.XPath.XPathNavigator object.
///
public XPathNavigator CreateNavigator()
{
return new XmlLightNavigator(this);
}
}
}