在一些需要經常更新頁面數據的網站中,一般訪問量不是很大的都直接發布的是帶后臺代碼,每次訪問都是有數據庫交互的。但是一旦訪問量增加了,那么這些服務器開銷變成本就要考慮進來了,像一些文章,后臺編輯后,文章內容存入數據庫,如果1000人訪問,如果還是每次取數據庫,那這1000次的io訪問就顯得比較大了,一個好的方法就是,文章確定之后,做成靜態頁面,而這個做的方法由程序來做,就是遞歸遍歷整個網站,將網站內容都訪問一遍,然后生成這些頁面的靜態文本頁面,在將這些頁面發布,這樣對瀏覽者而言,他看到的還是同一個地址,同一份文章,只是這份是靜態的而言。這樣就提升了網站的效率節約了資源;
下面附上一份C#遍歷網站內容,然后生成內容頁面代碼:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
private ArrayList htmlCreatedList = new ArrayList(); /// <summary> /// 遞歸實現頁面靜態化功能 /// </summary> /// <param name="urlString">要訪問的頁面鏈接地址</param> public void SaveHtmlCode( string urlString) { if (htmlCreatedList.Contains(urlString)) { return ; } string htmlCode = GetHtmlCodeFromUrl(urlString); string htmlPath = urlString.ToPhysicalPath(); string direcHtmlPath = Path.GetDirectoryName(htmlPath); if (!Directory.Exists(direcHtmlPath)) { Directory.CreateDirectory(direcHtmlPath); } File.WriteAllText(htmlPath, htmlCode); htmlCreatedList.Add(urlString); var urlList = GetUrlLinkFromHtmlCode(htmlCode); string urlTemp = string .Empty; foreach ( string url in urlList) { urlTemp = url; urlTemp = Regex.Replace(urlTemp, "href\\s*=\\s*" , "" ); urlTemp = urlTemp.Replace( "\"" , "" ); urlTemp = urlTemp.Replace( "\\" , "/" ); urlTemp = WebConfigInfo.UrlPrefix + urlTemp; SaveHtmlCode(urlTemp); } } /// <summary> /// 通過HttpWebRequest頁面鏈接的html代碼 /// </summary> /// <param name="urlString">頁面鏈接地址</param> /// <returns>頁面鏈接對應的html代碼</returns> private string GetHtmlCodeFromUrl( string urlString) { HttpWebRequest hwRequest = (HttpWebRequest)WebRequest.Create(urlString); hwRequest.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705" ; hwRequest.Accept = "*/*" ; hwRequest.KeepAlive = true ; hwRequest.Headers.Add( "Accept-Language" , "zh-cn,en-us;q=0.5" ); HttpWebResponse hwResponse = (HttpWebResponse)hwRequest.GetResponse(); Stream streamResponse = hwResponse.GetResponseStream(); StreamReader readerOfStream = new StreamReader(streamResponse, System.Text.Encoding.GetEncoding( "utf-8" )); string strHtml = readerOfStream.ReadToEnd(); readerOfStream.Close(); streamResponse.Close(); hwResponse.Close(); return strHtml; } ///<summary> ///正則表達式匹配出html代碼中的超鏈接 ///</summary> ///<param name="htmlCode">要找出超鏈接的html代碼</param> ///<returns></returns> private IEnumerable< string > GetUrlLinkFromHtmlCode( string htmlCode) { string strRegex = "href\\s*=\\s*(?:[\"'](?<1>[^\"'.#:]*)[\"'])" ; Regex r = new Regex(strRegex, RegexOptions.IgnoreCase); MatchCollection ms = r.Matches(htmlCode); IEnumerable< string > listUrl = from Match cc in ms select cc.ToString().Replace( "&" , "&" ); return listUrl.Distinct(); } } |
給string 擴展了一個方法。
1
2
3
4
5
6
7
8
9
10
11
|
public static string ToPhysicalPath( this string urlString) { System.Uri uri = new System.Uri(urlString); string htmlPath = string .Format( "{0}\\Html\\{1}\\" , System.Web.HttpContext.Current.Request.PhysicalApplicationPath, uri.AbsolutePath); string [] querys = uri.Query.Split( new char [] { '?' , '&' , '=' }, StringSplitOptions.RemoveEmptyEntries); htmlPath += string .Join( string .Empty, querys); htmlPath += querys.Length.Equals(0) ? "Index.html" : ".html" ; htmlPath = htmlPath.Replace( "/" , "\\" ); htmlPath = htmlPath.Replace( "\\\\" , "\\" ); return htmlPath; } |
總結
以上就是這篇文章的全部內容了,希望本文的內容對大家的學習或者工作具有一定的參考學習價值,謝謝大家對服務器之家的支持。如果你想了解更多相關內容請查看下面相關鏈接
原文鏈接:https://blog.csdn.net/chenqiangdage/article/details/49821189