新彩天欢迎您!
幻海优品

.NET Core(C#)使用AngleSharp下载网页源码及资源文件(html,css,js,jpg等图片)

1、项目中安装引用AngleSharp

AngleSharp:http://anglesharp.github.io/

通过NuGet获取AngleSharp

1)使用Nuget管理控制台

将AngleSharp集成到项目中的最简单方法是使用NuGet。您可以通过打开包管理器控制台(PM)并键入以下语句来安装AngleSharp:

Install-Package AngleSharp

2)使用Nuget图形管理器

使用Nuget的界面的管理器搜索"AngleSharp"=> 找到点出点击"安装"

3)使用.NET CLI命令安装

> dotnet add TodoApi.csproj package AngleSharp

相关文档VS(Visual Studio)中Nuget的使用

2、下载网页代码及引用的静态文件

    /// <summary>        /// MD5字符串加密        /// </summary>        /// <param name="txt"></param>        /// <returns>加密后字符串</returns>        public static string GenerateMD5(string txt)        {            using (MD5 mi = MD5.Create())            {                byte[] buffer = Encoding.Default.GetBytes(txt);                //开始加密                byte[] newBuffer = mi.ComputeHash(buffer);                StringBuilder sb = new StringBuilder();                for (int i = 0; i < newBuffer.Length; i++)                {                    sb.Append(newBuffer[i].ToString("x2"));                }                return sb.ToString();            }        }       //下载文件        public static bool DwonFile(IElement item, string fileName, string sourceUrl, string filePath,            string sub, string attr, string dir, IDocument document)        {            //原文:https://www.wonhero.com/article/696/            sourceUrl = item.GetAttribute(attr);            if (string.IsNullOrEmpty(sourceUrl))                return true;            if (dir == "jpg" && sourceUrl.IndexOf(";base64,") > -1 || (dir == "css" && sourceUrl.ToLower().IndexOf(".css") == 0))                return true;            WebClient webClient = new WebClient();            sub = Path.Combine(filePath, dir);            if (!Directory.Exists(sub))                Directory.CreateDirectory(sub);            sourceUrl = fixUrl(sourceUrl, document.Origin);            fileName = GenerateMD5(sourceUrl) + "." + dir;            Console.WriteLine(sourceUrl);            log.Info(sourceUrl + " = " + sourceUrl);            //https://www.wonhero.com/article/696/            if (!File.Exists(Path.Combine(sub, fileName)))                try                {                    webClient.DownloadFile(sourceUrl, Path.Combine(sub, fileName));                }                catch (Exception ex)                {                    log.Info("sourceUrl = " + sourceUrl + " dir = " + dir);                    log.Error(ex);                }            item.SetAttribute(attr, "/static/" + dir + "/" + fileName);            item.SetAttribute("referrerPolicy", "no-referrer");            return false;        }        public static string GetHtml(string url, string filePath)        {            var requester = new DefaultHttpRequester("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36");           //https://www.wonhero.com/article/696/            requester.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");            requester.Headers.Add("Referer", "");            requester.Headers.Add("Accept-Language", "zh-Hans-CN,zh-Hans;q=0.8,en-US;q=0.5,en;q=0.3");            var context = BrowsingContext.New(Configuration.Default.WithLocaleBasedEncoding().WithDefaultLoader().WithDefaultCookies().With(requester));            //根据虚拟请求/响应模式创建文档            //https://www.wonhero.com/article/696/            var document = context.OpenAsync(url).Result;            //var blueListItemsLinq = document.All.Where(m => m.LocalName == "li" && m.ClassList.Contains("blue"));            //或者直接使用CSS选择器            string sourceUrl = string.Empty;            var scripts = document.QuerySelectorAll("script");            string fileName = string.Empty;            string sub = string.Empty;            foreach (var item in scripts)            {                if (DwonFile(item, fileName, sourceUrl, filePath, sub, "src", "js", document))                    continue;            }            var links = document.QuerySelectorAll("link");            foreach (var item in links)            {                if (DwonFile(item, fileName, sourceUrl, filePath, sub, "href", "css", document))                    continue;            }            var imgs = document.QuerySelectorAll("img");            foreach (var item in imgs)            {                if (DwonFile(item, fileName, sourceUrl, filePath, sub, "src", "jpg", document))                    continue;            }            return document.ToHtml();        }

相关文档:

.NET Core(C#) AngleSharp 解析百度和谷歌搜索结果html(链接、标题、描述)

.Net(C#) Core安装使用anglesharp解析html的方法及示例代码

免责声明:以上内容(如有图片或视频亦包括在内)有转载其他网站资源,如有侵权请联系删除