1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Collections;
using HtmlAgilityPack;
namespace EBookGrabber
{
public class HrefElement
{
private string _href;
private string _innerText;
public string Href
{
get { return _href; }
set { _href = value; }
}
public string InnerText
{
get { return _innerText; }
set { _innerText = value; }
}
}
/// <summary>
/// Represents a document that needs linked files to be rendered, such as images or css files, and points to other HTML documents.
/// </summary>
public class DocumentWithLinks
{
private List<HrefElement> _references;
private HtmlDocument _doc;
/// <summary>
/// Creates an instance of a DocumentWithLinkedFiles.
/// </summary>
/// <param name="doc">The input HTML document. May not be null.</param>
public DocumentWithLinks(HtmlDocument doc)
{
if (doc == null)
{
throw new ArgumentNullException("doc");
}
_doc = doc;
GetReferences();
}
private void GetReferences()
{
_references = new List<HrefElement>();
HtmlNodeCollection hrefs = _doc.DocumentNode.SelectNodes("//a[@href]");
if (hrefs == null)
return;
foreach (HtmlNode href in hrefs)
{
HrefElement he = new HrefElement();
he.Href = href.Attributes["href"].Value;
he.InnerText = href.InnerText;
_references.Add(he);
}
}
/// <summary>
/// Gets a list of reference links to other HTML documents, as they are declared in the HTML document.
/// </summary>
public List<HrefElement> References
{
get
{
return _references;
}
}
}
}