Download using Internet Explorer Cookies

From MetaSharp

Jump to: navigation, search

Article Author(s): Audric Thevenet
All Rights Reserved.


Here's the only solution I could come up with to download a webpage only accessible when "logged in" through IE cookies:

[DllImport("wininet.dll", SetLastError = true)]
public static extern bool InternetGetCookie(string url, string cookieName, StringBuilder cookieData, ref int size);

private static CookieContainer GetUriCookieContainer(Uri uri)
{
    CookieContainer cookies = null;
    // Determine the size of the cookie
    int datasize = 256;
    StringBuilder cookieData = new StringBuilder(datasize);
    if (!InternetGetCookie(uri.ToString(), null, cookieData, ref datasize))
    {
        if (datasize < 0)
            return null;
        // Allocate stringbuilder large enough to hold the cookie
        cookieData = new StringBuilder(datasize);
        if (!InternetGetCookie(uri.ToString(), null, cookieData, ref datasize))
            return null;
    }
    if (cookieData.Length > 0)
    {
        cookies = new CookieContainer();
        cookies.SetCookies(uri, cookieData.ToString().Replace(';', ','));
    }
    return cookies;
}

/// <summary>
/// downloads a web page as a string using internet explorer cookies
/// </summary>
/// <param name="rootUrl">root url of your target page (eg: "http://www.mysite.com/")</param>
/// <param name="pageUrl">your target page (eg: "http://www.mysite.com/mytargetpage.htm"</param>
/// <returns>the page in a string, woohooo sexy isn't it?</returns>
public static string DownloadString(string rootUrl, string pageUrl)
{
    CookieContainer cookies = GetUriCookieContainer(new Uri(rootUrl));
    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(pageUrl);
    request.AllowAutoRedirect = false;
    request.CookieContainer = cookies;
    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
    using (Stream s = response.GetResponseStream())
    {
         StreamReader sr = new StreamReader(s);
         return sr.ReadToEnd();
    }
}

However, for some reasons, on large webpages, it seems that the returned string is just a part of the webpage. I couldn't figure out why as of today...