#region Copyright 2010-2014 by Roger Knapp, Licensed under the Apache License, Version 2.0
/* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#endregion
using System;
using System.Collections.Generic;
using NUnit.Framework;
using CSharpTest.Net.Html;
using System.IO;
using System.Text.RegularExpressions;
using CSharpTest.Net.Utils;
using System.Net;
using System.Xml.XPath;
using CSharpTest.Net.IO;
using XmlDocument = System.Xml.XmlDocument;
using System.Xml;
namespace CSharpTest.Net.Library.Test
{
[TestFixture]
public partial class TestHtmlParser
{
const string document = @"
|
this is content.
";
private string Normalize(string text)
{
text = text.Replace('\r', ' ').Replace('\n', ' ').Replace('\t', ' ').Trim();
while (text.IndexOf(" ") >= 0)
text = text.Replace(" ", " ");
return text;
}
[Test]
public void TestDocUnformatted()
{
string docText = @"";
XmlLightDocument doc = new XmlLightDocument(docText);
string content;
TextWriter sw = new StringWriter();
doc.WriteUnformatted(sw);
content = sw.ToString();
Assert.AreEqual(docText, content);
using (MemoryStream ms = new MemoryStream())
{
sw = new StreamWriter(ms);
doc.WriteUnformatted(sw);
sw.Flush();
ms.Position = 0;
StreamReader sr = new StreamReader(ms);
content = sr.ReadToEnd();
Assert.AreEqual(docText, content);
}
}
[Test]
public void TestDocToXml()
{
HtmlLightDocument doc = new HtmlLightDocument();
XmlLightElement body = new XmlLightElement(new XmlLightElement(doc, "html"), "body");
body.IsEmpty = false;
body.Attributes.Add("id", "bdy");
Assert.AreEqual(" ", Normalize(doc.InnerXml));
}
[Test]
public void TestXmlNamespace()
{
string xml = @"";
XmlLightDocument doc = new XmlLightDocument(xml);
Assert.AreEqual(xml, Normalize(doc.InnerXml));
}
[Test]
public void TestXmlNamespacePrefix()
{
string xml = @"
hello
world
6ft
155 lbs
".Replace('\'', '"');
XmlLightDocument doc = new XmlLightDocument(xml);
Assert.AreEqual(Normalize(xml), Normalize(doc.InnerXml));
}
[Test]
public void TestHtmlEntityRef()
{
string html = @"
< !"">
this char '<' and this one '>' and this one '&' should be encoded.
We encoded ' ' and à and ' ' and ' ' all by ourselves.
This in not valid xml , nor is , but we still allow it.
This entity name will pass-through &unknown; this will not &whateverthatmeans;
and nor will these &; &h; &l t; &1two; &234; g; -123;.
";
string expect = @"
this char '<' and this one '>' and this one '&' should be encoded.
We encoded ' ' and à and ' ' and ' ' all by ourselves.
This in not valid xml , nor is , but we still allow it.
This entity name will pass-through &unknown; this will not &whateverthatmeans;
and nor will these &; &#; &h; &l t; &1two; &234; g; &#-123;.
";
XmlLightDocument doc = new HtmlLightDocument(html);
XmlWriterSettings settings = new XmlWriterSettings()
{
CheckCharacters = true,
Indent = false,
IndentChars = "",
NewLineChars = "",
NewLineHandling = NewLineHandling.None,
OmitXmlDeclaration = true,
CloseOutput = false
};
StringWriter sw = new StringWriter();
XmlWriter wtr = XmlWriter.Create(sw, settings);
doc.WriteXml(wtr);
wtr.Flush();
string xml = sw.ToString();
Assert.AreEqual(expect, xml);
}
[Test]
public void TestParseDocument()
{
XmlLightDocument doc = new HtmlLightDocument(document);
XmlLightDocument doc2;
using (TempFile t = new TempFile())
{
using (TextWriter tw = new StreamWriter(t.Open()))
doc.WriteXml(tw);
new XhtmlValidation(XhtmlDTDSpecification.XhtmlTransitional_10).Validate(t.TempPath);
doc2 = new XmlLightDocument(t.ReadAllText());
Assert.AreEqual(doc.InnerXml, doc2.InnerXml);
}
}
[Test]
public void TestParseAttributes()
{
IEnumerator en;
en = XmlLightParser.ParseAttributes("").GetEnumerator();
Assert.IsTrue(en.MoveNext());
Assert.AreEqual("a", en.Current.Name);
Assert.AreEqual("1", en.Current.Value);
Assert.IsTrue(en.MoveNext());
Assert.AreEqual("b", en.Current.Name);
Assert.AreEqual("2", en.Current.Value);
Assert.IsTrue(en.MoveNext());
Assert.AreEqual("c", en.Current.Name);
Assert.AreEqual("3", en.Current.Value);
Assert.IsTrue(en.MoveNext());
Assert.AreEqual("d", en.Current.Name);
Assert.AreEqual(null, en.Current.Value);
Assert.IsTrue(en.MoveNext());
Assert.AreEqual("e", en.Current.Name);
Assert.AreEqual("", en.Current.Value);
Assert.IsFalse(en.MoveNext());
en = XmlLightParser.ParseAttributes("").GetEnumerator();
Assert.IsTrue(en.MoveNext());
Assert.AreEqual("version", en.Current.Name);
Assert.AreEqual("1.0", en.Current.Value);
Assert.IsFalse(en.MoveNext());
}
[Test]
public void TestParseText()
{
string text = Normalize(XmlLightParser.ParseText(document));
Assert.AreEqual("Document Title this is > cdata! Hi, this is content.", text);
}
[Test]
public void TestXPath()
{
XmlDocument xdoc = new XmlDocument();
XmlLightDocument doc = new HtmlLightDocument(document);
string testpath = "/html/body[@id='one' and @class='cls']/../body/div[@id='two' and text() = 'Hi']/@id";
xdoc.LoadXml(doc.CreateNavigator().InnerXml);
Assert.IsNotNull(xdoc.SelectSingleNode(testpath));
XPathNavigator nav = doc.CreateNavigator().SelectSingleNode(testpath);
Assert.IsNotNull(nav);
Assert.IsTrue(nav.NodeType == XPathNodeType.Attribute);
Assert.AreEqual("id", nav.Name);
Assert.AreEqual("two", nav.Value);
XmlLightElement e = doc.SelectSingleNode("/html/Head");
Assert.IsNull(e);
e = doc.SelectSingleNode("/html/head");
Assert.IsNotNull(e);
}
[Test]
public void TestXmlElement()
{
XmlLightDocument doc = new HtmlLightDocument(document);
Assert.IsNull(doc.PrevSibling);
Assert.IsNull(doc.Children[0].PrevSibling);
Assert.IsNull(doc.NextSibling);
Assert.IsNull(doc.Children[doc.Children.Count - 1].NextSibling);
XmlLightElement e = doc.SelectSingleNode("/html/body//*[@class='2']");
Assert.IsNotNull(e);
Assert.AreEqual("p", e.TagName);
Assert.IsNotNull(e.PrevSibling);
Assert.AreEqual("p", e.PrevSibling.TagName);
Assert.AreEqual("", e.Namespace);
Assert.AreEqual("p", e.LocalName);
e = new XmlLightElement(null, "a:b");
Assert.AreEqual("a", e.Namespace);
Assert.AreEqual("b", e.LocalName);
}
[Test]
public void TestXmlNavigator()
{
XPathNavigator nav = new HtmlLightDocument(document).CreateNavigator().SelectSingleNode("/html/body//p[@class='1']");
XPathNavigator pos = nav.Clone();
Assert.IsFalse(nav.MoveToPrevious());
Assert.IsTrue(nav.MoveToNext());
Assert.IsTrue(nav.MoveToPrevious());
Assert.IsTrue(nav.IsSamePosition(pos));
Assert.IsFalse(nav.MoveToFirstNamespace());
Assert.IsFalse(nav.MoveToNextNamespace());
Assert.IsTrue(Object.ReferenceEquals(nav.NameTable, pos.NameTable));
Assert.IsNotNull(nav.BaseURI);
Assert.AreEqual(nav.BaseURI, pos.BaseURI);
Assert.IsTrue(nav.MoveToId("one"));
Assert.AreEqual("body", nav.Name);
Assert.IsFalse(nav.MoveToId("none-exists"));
Assert.AreEqual("body", nav.Name);
}
[Test]
public void TestInnerText()
{
XmlLightDocument doc = new HtmlLightDocument(document);
XmlLightElement e = doc.SelectSingleNode("/html/body");
Assert.AreEqual("this is > cdata! Hi, this is content.", Normalize(e.InnerText));
Assert.AreEqual("Hi", e.SelectSingleNode(".//div[@id='two']").InnerText);
Assert.AreEqual("this is > cdata!", Normalize(e.SelectSingleNode("text()").InnerText));
}
[Test]
public void TestComments()
{
XmlLightDocument doc = new HtmlLightDocument(document);
XmlLightElement e = doc.SelectSingleNode("/html/head");
e = e.NextSibling;
Assert.IsTrue(e.IsComment);
Assert.AreEqual("", e.InnerXml);
}
[Test]
public void TestParsers()
{
string notxml = "";
HtmlLightDocument html = new HtmlLightDocument();
XmlLightParser.Parse(notxml, html);
Assert.AreEqual("html", html.Root.TagName);
Assert.AreEqual(1, html.Root.Attributes.Count);
Assert.AreEqual("a", html.Root.Attributes["id"]);
Assert.AreEqual(1, html.Root.Children.Count);
Assert.AreEqual("body", html.Root.Children[0].TagName);
Assert.AreEqual("foo", html.Root.Children[0].Attributes["bar"]);
Assert.AreEqual("bar", html.Root.Children[0].Attributes["foo"]);
XmlLightDocument xml = new XmlLightDocument();
XmlLightParser.Parse(notxml, XmlLightParser.AttributeFormat.Xml, xml);
Assert.AreEqual(2, xml.Root.Attributes.Count);
//Not recognized: xml.Root.Attributes["id"]
Assert.AreEqual("body", xml.Root.TagName);
Assert.AreEqual("foo", xml.Root.Attributes["bar"]);
Assert.AreEqual("bar", xml.Root.Attributes["foo"]);
}
[Test]
public void TestAttributes()
{
string xml = "";
XmlLightDocument doc = new XmlLightDocument(xml);
Assert.AreEqual("root", doc.Root.LocalName);
Assert.AreEqual(1, doc.Root.Attributes.Count);
Assert.IsTrue(doc.Root.Attributes.GetEnumerator().MoveNext());
Assert.IsTrue(((System.Collections.IEnumerable)doc.Root.Attributes).GetEnumerator().MoveNext());
Assert.IsTrue(doc.Root.Attributes.Remove("id"));
Assert.AreEqual(0, doc.Root.Attributes.Count);
}
[Test]
public void TestManuallyCreated()
{
XmlLightElement root = new XmlLightElement(null, "root");
new XmlLightElement(root, "a").Attributes["b"] = "c";
new XmlLightElement(root, XmlLightElement.TEXT).Value = "Normal & Text";
new XmlLightElement(root, XmlLightElement.COMMENT).OriginalTag = "";
new XmlLightElement(root, XmlLightElement.CONTROL){
OriginalTag = " Hey, that isn't valid !>"
}.Remove();
StringWriter sw = new StringWriter();
root.WriteUnformatted(sw);
Assert.AreEqual("Normal & <Encoded> Text", sw.ToString());
}
[Test, Explicit]
public void RunPerfTests()
{
string[] files = Directory.GetFiles(@"c:\temp\trash", "*.htm", SearchOption.AllDirectories);
System.Diagnostics.Stopwatch sw;
for (int i = 0; i < 10; i++)
{
//HTML Parser
sw = new System.Diagnostics.Stopwatch();
sw.Start();
foreach (string file in files)
new HtmlLightDocument(File.ReadAllText(file));
Console.WriteLine("HTML = {0}", sw.ElapsedMilliseconds);
//XML Parser
sw = new System.Diagnostics.Stopwatch();
sw.Start();
foreach (string file in files)
new XmlLightDocument(File.ReadAllText(file));
Console.WriteLine("XHTM = {0}", sw.ElapsedMilliseconds);
//Parse Only
sw = new System.Diagnostics.Stopwatch();
sw.Start();
IXmlLightReader rdr = new EmptyReader();
foreach (string file in files)
XmlLightParser.Parse(File.ReadAllText(file), XmlLightParser.AttributeFormat.Xml, rdr);
Console.WriteLine("NDOM = {0}", sw.ElapsedMilliseconds);
//Text Only
sw = new System.Diagnostics.Stopwatch();
sw.Start();
foreach (string file in files)
XmlLightParser.ParseText(File.ReadAllText(file));
Console.WriteLine("TEXT = {0}", sw.ElapsedMilliseconds);
}
}
class EmptyReader : IXmlLightReader
{
public void AddCData(string cdata) { }
public void AddComment(string comment) { }
public void AddControl(string cdata) { }
public void AddInstruction(string instruction) { }
public void AddText(string content) { }
public void EndDocument() { }
public void EndTag(XmlTagInfo tag) { }
public void StartDocument() { }
public void StartTag(XmlTagInfo tag) { }
}
[Test, ExpectedException(typeof(System.Xml.XmlException))]
public void TestXmlNoRootNode()
{
new XmlLightDocument("no xml root node defined");
}
[Test, ExpectedException(typeof(System.Xml.XmlException))]
public void TestXmlNoClosingTag()
{
new XmlLightDocument("");
}
[Test, ExpectedException(typeof(System.Xml.XmlException))]
public void TestXmlWrongClosingTag()
{
new XmlLightDocument("");
}
[Test, ExpectedException(typeof(System.ApplicationException))]
public void TestRootNodeNotHtml()
{
new HtmlLightDocument("");
}
}
} |