StringUtil
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
namespace FashionExchange.Common.Utils
{
public class StringUtil
{
public static string SubstringFromStart(string value, string endStr, bool includeEndStr = false, bool ignoreCase = false)
{
int endIndex = ignoreCase ? value.IndexOf(endStr, StringComparison.CurrentCultureIgnoreCase) : value.IndexOf(endStr);
if (endIndex == -1)
return null;
if (includeEndStr)
endIndex += endStr.Length;
return value.Substring(0, endIndex);
}
public static string SubstringToEnd(string value, string startStr, bool includeStartStr = false, bool ignoreCase = false)
{
int startIndex = ignoreCase ? value.IndexOf(startStr, StringComparison.InvariantCultureIgnoreCase) : value.IndexOf(startStr);
if (startIndex == -1)
return null;
if (!includeStartStr)
startIndex += startStr.Length;
return value.Substring(startIndex);
}
public static string Substring(string value, string startStr, string endStr, bool includeStartStr = false, bool includeEndStr = false, bool ignoreCase = false)
{
int startIndex = ignoreCase ? value.IndexOf(startStr, StringComparison.InvariantCultureIgnoreCase) : value.IndexOf(startStr);
if (startIndex == -1)
return null;
int endIndex = ignoreCase ? value.IndexOf(endStr, startIndex + startStr.Length, StringComparison.InvariantCultureIgnoreCase) : value.IndexOf(endStr, startIndex + startStr.Length);
if (endIndex == -1)
return null;
if (!includeStartStr)
startIndex += startStr.Length;
if (includeEndStr)
endIndex += endStr.Length;
return value.Substring(startIndex, endIndex - startIndex);
}
public static string Substring(string value, int startIndex, int endIndex)
{
return value.Substring(startIndex, endIndex - startIndex);
}
public static List Substrings(string value, string startStr, string endStr, bool includeStartStr = false, bool includeEndStr = false)
{
List matchStrs = new List();
int startIndex = 0, endIndex = 0;
while ((startIndex = value.IndexOf(startStr, startIndex)) != -1)
{
if ((endIndex = value.IndexOf(endStr, startIndex + startStr.Length)) == -1)
break;
if (!includeStartStr)
startIndex += startStr.Length;
if (includeEndStr)
endIndex += endStr.Length;
matchStrs.Add(value.Substring(startIndex, endIndex - startIndex));
startIndex = (includeEndStr) ? endIndex : endIndex + endStr.Length;
}
return matchStrs;
}
public static string Remove(string value, string removeStr, bool ignoreCase = false)
{
if (String.IsNullOrEmpty(value)
|| String.IsNullOrEmpty(removeStr)
|| (ignoreCase && !ContainsIgnoreCase(value, removeStr))
|| (!ignoreCase && !value.Contains(removeStr)))
return value;
int startIndex = -1;
while (true)
{
if (ignoreCase)
startIndex = value.IndexOf(removeStr, StringComparison.InvariantCultureIgnoreCase);
else
startIndex = value.IndexOf(removeStr);
if (startIndex == -1)
break;
value = value.Remove(startIndex, removeStr.Count());
}
return value;
}
public static string RemoveStart(string value, string removeStr, bool ignoreCase = false)
{
if (String.IsNullOrEmpty(value)
|| String.IsNullOrEmpty(removeStr)
|| (ignoreCase && !value.StartsWith(removeStr, StringComparison.CurrentCultureIgnoreCase))
|| (!ignoreCase && !value.StartsWith(removeStr)))
return value;
return value.Substring(removeStr.Length);
}
public static string RemoveEnd(string value, string removeStr, bool ignoreCase = false)
{
if (String.IsNullOrEmpty(value)
|| String.IsNullOrEmpty(removeStr)
|| (ignoreCase && !value.EndsWith(removeStr, StringComparison.CurrentCultureIgnoreCase))
|| (!ignoreCase && !value.EndsWith(removeStr)))
return value;
return value.Substring(0, value.Length - removeStr.Length);
}
public static bool Contains(List stringList, string value, bool ignoreCase = false)
{
if (ignoreCase)
return stringList.FindAll(m => m.Equals(value, StringComparison.InvariantCultureIgnoreCase)).Any();
else
return stringList.Contains(value);
}
public static bool ContainsIgnoreCase(string value, string searchString)
{
return value.IndexOf(searchString, StringComparison.InvariantCultureIgnoreCase) >= 0;
}
public static string Encrypt(string input, string key)
{
byte[] inputArray = UTF8Encoding.UTF8.GetBytes(input);
TripleDESCryptoServiceProvider tripleDES = new TripleDESCryptoServiceProvider();
tripleDES.Key = UTF8Encoding.UTF8.GetBytes(key);
tripleDES.Mode = CipherMode.ECB;
tripleDES.Padding = PaddingMode.PKCS7;
ICryptoTransform cTransform = tripleDES.CreateEncryptor();
byte[] resultArray = cTransform.TransformFinalBlock(inputArray, 0, inputArray.Length);
tripleDES.Clear();
return Convert.ToBase64String(resultArray, 0, resultArray.Length);
}
public static string Decrypt(string input, string key)
{
byte[] inputArray = Convert.FromBase64String(input);
TripleDESCryptoServiceProvider tripleDES = new TripleDESCryptoServiceProvider();
tripleDES.Key = UTF8Encoding.UTF8.GetBytes(key);
tripleDES.Mode = CipherMode.ECB;
tripleDES.Padding = PaddingMode.PKCS7;
ICryptoTransform cTransform = tripleDES.CreateDecryptor();
byte[] resultArray = cTransform.TransformFinalBlock(inputArray, 0, inputArray.Length);
tripleDES.Clear();
return UTF8Encoding.UTF8.GetString(resultArray);
}
public static string NormaliseTextForFile(string value)
{
value = ScraperUtil.NormalizeText(value);
// escape invalid filename characters
foreach (char invalidChar in Path.GetInvalidFileNameChars())
value = value.Replace(invalidChar, '-');
// escape hash character
value = value.Replace("#", "-");
// escape & character
value = value.Replace("&", "and");
// escape + character
value = value.Replace("+", "plus");
// escape % character
value = value.Replace("%", "");
return value;
}
public static string NormaliseUrl(string url)
{
if (String.IsNullOrEmpty(url))
return url;
url = url.Replace("+", "-") // replace spaces (+) in url with dashes
.Replace("%20", "-") // replace spaces in url with dashes
.Replace("%26", "and") // replace & in url with and
.Replace("%27", "") // remove ' in url
.Replace("%2e", "-") // replace dots in url with dashes
.Replace("%2c", "") // remove commas
.Replace("%2b", ""); // remove plus signs
return url;
}
public static IEnumerable IndexesOf(string value, string searchStr)
{
int lastIndex = 0;
while (true)
{
int index = value.IndexOf(searchStr, lastIndex);
if (index == -1)
yield break;
yield return index;
lastIndex = index + searchStr.Length;
}
}
}
}
Scrapy Util
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Net;
using HtmlAgilityPack;
using log4net;
using System.Xml;
using FashionExchange.Common.DAL;
using FashionExchange.Common.Enums;
using Newtonsoft.Json.Linq;
using System.Web;
using System.Collections.Specialized;
using OpenQA.Selenium.PhantomJS;
using System.Runtime.Caching;
using OpenQA.Selenium.Firefox;
using OpenQA.Selenium.Remote;
using System.IO.Compression;
using System.Web.Hosting;
namespace FashionExchange.Common.Utils
{
public class ScraperUtil
{
private static ILog log = LogManager.GetLogger(typeof(ScraperUtil).Name);
private const int defaultRetryCount = 3;
private static FixedSizeConcurrentQueue loadedHtmlUrls = new FixedSizeConcurrentQueue(1000);
private static FixedSizeConcurrentQueue loadedHttpGetUrls = new FixedSizeConcurrentQueue(1000);
public static string CheckForRedirectUrl(string url, WebProxy proxy = null, bool useHead = true)
{
string responseUrl = String.Empty;
if (url.Contains("./"))
FixDotInUri();
url = WebUtility.HtmlDecode(url);
// retry if request has been timed out or refused
for (int i = 0; i <= defaultRetryCount; i++)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
if (useHead)
request.Method = "HEAD";
if (proxy != null)
request.Proxy = proxy;
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.AllowAutoRedirect = false;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
responseUrl = String.IsNullOrEmpty(response.GetResponseHeader("Location")) ? url : response.GetResponseHeader("Location");
response.Close();
return responseUrl;
}
catch (WebException e)
{
// give up if already retried n times
if (i == defaultRetryCount)
throw;
if (e.Status == WebExceptionStatus.Timeout)
log.Warn(e.Message);
else if (e.Status == WebExceptionStatus.ProtocolError)
{
if (useHead)
{
log.Warn(e.Message + " Retry without http request = head.");
useHead = false;
}
else
{
throw;
}
}
else
throw;
}
catch (IOException)
{
// give up if already retried n times
if (i == defaultRetryCount)
throw;
}
}
return responseUrl;
}
public static HtmlDocument LoadHtml(string url, CookieContainer cookieContainer = null, bool retryOnError = false, bool retryOnTimeout = true, int retryCount = 3, int retryDelayInMilisecond = 0, WebProxy proxy = null, Dictionary httpHeaders = null, bool checkUrlLoaded = true, RemoteWebDriver webDriver = null)
{
string redirectedUrl;
return LoadHtml(url, out redirectedUrl, cookieContainer: cookieContainer, retryOnError: retryOnError, retryOnTimeout: retryOnTimeout, retryCount: retryCount, retryDelayInMilisecond: retryDelayInMilisecond, proxy: proxy, httpHeaders: httpHeaders, checkUrlLoaded: checkUrlLoaded, webDriver: webDriver);
}
public static HtmlDocument LoadHtml(string url, out string redirectedUrl, CookieContainer cookieContainer = null, bool retryOnError = false, bool retryOnTimeout = true, int retryCount = 3, int retryDelayInMilisecond = 0, WebProxy proxy = null, Dictionary httpHeaders = null, bool checkUrlLoaded = true, RemoteWebDriver webDriver = null)
{
if (url.Contains("./"))
FixDotInUri();
// check if url has already been loaded in the past
if (checkUrlLoaded && loadedHtmlUrls.Contains(url) && loadedHtmlUrls.Where(m => m == url).Count() >= 2)
{
Exception e = new Exception("Url has already been loaded more than 2 times. Url: " + url);
e.Data[ParameterInfo.ExceptionData.Url] = url;
throw e;
}
else
loadedHtmlUrls.Enqueue(url);
url = WebUtility.HtmlDecode(url);
redirectedUrl = url;
bool hasConnectionClosedError = false;
// retry if request has been timed out or refused
for (int i = 0; i <= retryCount; i++)
{
try
{
HtmlDocument htmlDocument = new HtmlDocument();
HttpWebResponse response;
bool redirected = false;
List redirectedUrls = new List();
redirectedUrls.Add(redirectedUrl);
if (webDriver != null)
{
webDriver.Navigate().GoToUrl(url);
System.Threading.Thread.Sleep(1000 * 5);
HtmlNode.ElementsFlags.Remove("option");
HtmlNode.ElementsFlags.Remove("form");
htmlDocument.LoadHtml(webDriver.PageSource);
if (url != webDriver.Url)
redirectedUrl = webDriver.Url;
}
else
{
do
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(redirectedUrl);
request.Method = "GET";
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.AllowAutoRedirect = false;
request.CookieContainer = cookieContainer;
request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
/* Sometimes .net framework doesn't handle http request properly and instead throws
* web exception "The request was aborted: The connection was closed unexpectedly".
* Using a different HttpVersion.Version10 can get around this issue. */
if (hasConnectionClosedError)
request.ProtocolVersion = HttpVersion.Version10;
if (proxy != null)
request.Proxy = proxy;
SetRequestHeaders(request, httpHeaders);
response = (HttpWebResponse)request.GetResponse();
if (!redirectedUrl.Contains("www.styletread.com.au")
&& (response.StatusCode == HttpStatusCode.Redirect
|| response.StatusCode == HttpStatusCode.TemporaryRedirect
|| response.StatusCode == HttpStatusCode.MovedPermanently
|| response.StatusCode == HttpStatusCode.Moved))
{
// append root url address it does not exist in redirect url
if (!response.GetResponseHeader("Location").StartsWith("http"))
{
Uri uri = new Uri(redirectedUrl);
string domainUrl = new UriBuilder(uri.Scheme, uri.DnsSafeHost).ToString();
redirectedUrl = domainUrl + response.GetResponseHeader("Location").TrimStart('/'); // domainUrl will always have '/' at the end
}
else
redirectedUrl = response.GetResponseHeader("Location");
// append hash tag
if (url.Contains("#") && !redirectedUrl.Contains("#"))
redirectedUrl += StringUtil.SubstringToEnd(url, "#", includeStartStr: true);
// check if the new url has been redirected before. if yes throw an exception, else add new redirect url to redirectUrls list
if (redirectedUrls.Contains(redirectedUrl))
throw new WebException("Too many automatic redirections were attempted.", WebExceptionStatus.ProtocolError);
else
redirectedUrls.Add(redirectedUrl);
redirected = true;
}
else
{
redirected = false;
HtmlNode.ElementsFlags.Remove("option");
HtmlNode.ElementsFlags.Remove("form");
htmlDocument.Load(response.GetResponseStream(), Encoding.GetEncoding(response.CharacterSet.Trim('"')));
}
response.Close();
} while (redirected);
}
// convert all img src and a href from relative to absolute urls
Uri finalUrl = new Uri(redirectedUrl);
string baseUrl = finalUrl.Scheme + "://" + finalUrl.Authority;
foreach (HtmlNode linkNode in htmlDocument.DocumentNode.SelectNodes("//a[@href and not(starts-with(@href, 'http'))]") ?? Enumerable.Empty())
{
if (linkNode.Attributes["href"].Value.StartsWith("//"))
linkNode.Attributes["href"].Value = finalUrl.Scheme + ":" + linkNode.Attributes["href"].Value;
else if (linkNode.Attributes["href"].Value.StartsWith("?"))
linkNode.Attributes["href"].Value = baseUrl + finalUrl.AbsolutePath + linkNode.Attributes["href"].Value;
else if (linkNode.Attributes["href"].Value.StartsWith("../"))
{
// work out number of times traverse to parent
int backUpParentCount = 0;
int linkNodeStartIndex = 0;
while (linkNode.Attributes["href"].Value.IndexOf("../", linkNodeStartIndex) == linkNodeStartIndex)
{
linkNodeStartIndex += "../".Length; ;
backUpParentCount++;
}
// work out number of slashes we need to traverse and get the position index
int slashIndex = 0;
if (finalUrl.AbsolutePath.LastIndexOf('/') != 0)
{
slashIndex = finalUrl.AbsolutePath.LastIndexOf('/');
for (int backUpParentCounter = 0; backUpParentCounter < backUpParentCount; backUpParentCounter++)
{
slashIndex = finalUrl.AbsolutePath.LastIndexOf('/', slashIndex - 1);
if (slashIndex == 0)
break;
}
}
linkNode.Attributes["href"].Value = baseUrl + finalUrl.AbsolutePath.Substring(0, slashIndex) + "/" + linkNode.Attributes["href"].Value.Substring(linkNodeStartIndex);
}
else if (linkNode.Attributes["href"].Value.StartsWith("/"))
linkNode.Attributes["href"].Value = baseUrl + linkNode.Attributes["href"].Value;
else
{
if (finalUrl.AbsolutePath == "/")
linkNode.Attributes["href"].Value = baseUrl + "/" + linkNode.Attributes["href"].Value;
else
linkNode.Attributes["href"].Value = baseUrl + finalUrl.AbsolutePath.Substring(0, finalUrl.AbsolutePath.TrimEnd('/').LastIndexOf('/')) + "/" + linkNode.Attributes["href"].Value;
}
}
foreach (HtmlNode imgNode in htmlDocument.DocumentNode.SelectNodes("//img[@src and not(starts-with(@src, 'http'))]") ?? Enumerable.Empty())
{
if (imgNode.Attributes["src"].Value.StartsWith("//"))
imgNode.Attributes["src"].Value = finalUrl.Scheme + ":" + imgNode.Attributes["src"].Value;
else if (imgNode.Attributes["src"].Value.StartsWith("?"))
imgNode.Attributes["src"].Value = baseUrl + finalUrl.AbsolutePath + imgNode.Attributes["src"].Value;
else if (imgNode.Attributes["src"].Value.StartsWith("../"))
{
// work out number of times traverse to parent
int backUpParentCount = 0;
int imgNodeStartIndex = 0;
while (imgNode.Attributes["src"].Value.IndexOf("../", imgNodeStartIndex) == imgNodeStartIndex)
{
imgNodeStartIndex += "../".Length; ;
backUpParentCount++;
}
// work out number of slashes we need to traverse and get the position index
int slashIndex = 0;
if (finalUrl.AbsolutePath.LastIndexOf('/') != 0)
{
slashIndex = finalUrl.AbsolutePath.LastIndexOf('/');
for (int backUpParentCounter = 0; backUpParentCounter < backUpParentCount; backUpParentCounter++)
{
slashIndex = finalUrl.AbsolutePath.LastIndexOf('/', slashIndex - 1);
if (slashIndex == 0)
break;
}
}
imgNode.Attributes["src"].Value = baseUrl + finalUrl.AbsolutePath.Substring(0, slashIndex) + "/" + imgNode.Attributes["src"].Value.Substring(imgNodeStartIndex);
}
else if (imgNode.Attributes["src"].Value.StartsWith("/"))
imgNode.Attributes["src"].Value = baseUrl + imgNode.Attributes["src"].Value;
else
{
if (finalUrl.AbsolutePath == "/")
imgNode.Attributes["src"].Value = baseUrl + "/" + imgNode.Attributes["src"].Value;
else
imgNode.Attributes["src"].Value = baseUrl + finalUrl.AbsolutePath.Substring(0, finalUrl.AbsolutePath.TrimEnd('/').LastIndexOf('/')) + "/" + imgNode.Attributes["src"].Value;
}
}
return htmlDocument;
}
catch (Exception e)
{
log.Info("An exception occured while loading url: " + redirectedUrl);
e.Data[ParameterInfo.ExceptionData.Url] = redirectedUrl;
// give up if already retried n times
if (i == retryCount)
throw;
// If web exception is connection closed unexpectedly, retry with different Protocol Version (i.e. HttpVersion10)
// Else retry if request timeout and retryOnTimeout = true, or if retryOnError = true
if (e is WebException && ((WebException)e).Status == WebExceptionStatus.ConnectionClosed && !hasConnectionClosedError)
hasConnectionClosedError = true;
else if (retryOnTimeout
&& e is WebException
&& ((((WebException)e).Status == WebExceptionStatus.Timeout)
|| (((WebException)e).Response != null && ((HttpWebResponse)((WebException)e).Response).StatusCode == HttpStatusCode.GatewayTimeout)))
log.Warn(e.Message);
else if (retryOnTimeout
&& e is OpenQA.Selenium.WebDriverException
&& ((OpenQA.Selenium.WebDriverException)e).InnerException is WebException
&& ((WebException)((OpenQA.Selenium.WebDriverException)e).InnerException).Status == WebExceptionStatus.Timeout)
log.Warn(e.Message);
else if (retryOnError)
log.Warn(e.Message);
else
throw;
if (retryDelayInMilisecond > 0)
{
log.Info("Retry loading html page after " + retryDelayInMilisecond + " milliseconds. Retry count: " + (i + 1));
System.Threading.Thread.Sleep(retryDelayInMilisecond);
}
else
{
log.Info("Retry loading html page. Retry count: " + (i + 1));
}
}
}
return null;
}
public static XmlDocument LoadXml(string url, CookieContainer cookieContainer = null, bool retryOnError = false, bool retryOnTimeout = true, int retryCount = 3, int retryDelayInMilisecond = 0, Dictionary httpHeaders = null)
{
if (url.Contains("./"))
FixDotInUri();
url = WebUtility.HtmlDecode(url);
// retry if request has been timed out or refused
for (int i = 0; i <= retryCount; i++)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.CookieContainer = cookieContainer;
SetRequestHeaders(request, httpHeaders);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
XmlDocument xml = new XmlDocument();
xml.Load(response.GetResponseStream());
response.Close();
return xml;
}
catch (Exception e)
{
log.InfoFormat("An exception occured while loading xml. Url: {0}", url);
e.Data[ParameterInfo.ExceptionData.Url] = url;
// give up if already retried n times
if (i == retryCount)
throw;
if (retryOnTimeout
&& e is WebException
&& ((((WebException)e).Status == WebExceptionStatus.Timeout)
|| (((WebException)e).Response != null && ((HttpWebResponse)((WebException)e).Response).StatusCode == HttpStatusCode.GatewayTimeout)))
log.Warn(e.Message);
else if (retryOnError)
log.Warn(e.Message);
else
throw;
if (retryDelayInMilisecond > 0)
{
log.Info("Retry loading xml after " + retryDelayInMilisecond + " milliseconds. Retry count: " + (i + 1));
System.Threading.Thread.Sleep(retryDelayInMilisecond);
}
else
log.Info("Retry loading xml. Retry count: " + (i + 1));
}
}
return null;
}
public static string RemoveXmlNamespaces(string xml)
{
List xmlNamepaces = StringUtil.Substrings(xml, "xmlns=\"", "\"", includeStartStr: true, includeEndStr: true);
foreach (string xmlNamespace in xmlNamepaces)
xml = xml.Replace(xmlNamespace, String.Empty);
return xml;
}
public static string NormalizeText(string text)
{
if (String.IsNullOrEmpty(text))
return text;
text = WebUtility.HtmlDecode(text)
.Replace("\n", " ")
.Replace("\r", " ")
.Replace("\t", " ")
.Replace("\u00A0", " ")
.Replace("\u0085", String.Empty)
.Replace("\u0080", String.Empty)
.Replace("\u0093", String.Empty)
.Replace("\u0099", String.Empty)
.Replace("\u0091", String.Empty)
.Replace("\u0092", String.Empty)
.Replace("\u0094", String.Empty)
.Replace("\u009f", String.Empty)
.Replace("\u008c", String.Empty)
.Replace("\uff08", "(")
.Replace("\uff09", ")")
.Replace("¢", String.Empty)
.Replace("„", String.Empty)
.Replace("€", String.Empty)
.Replace("™", String.Empty)
.Replace("®", String.Empty)
.Replace("“", String.Empty)
.Replace("–º", String.Empty)
.Replace("â", String.Empty)
.Replace("Â", String.Empty)
.Replace("&", "&")
.Replace("è", "e")
.Replace("é", "e")
.Replace("ë", "e")
.Replace("ç", "c")
.Replace("ć", "c")
.Replace("Ć", "C")
.Replace("ô", "o")
.Replace("ó", "o")
.Replace("ò", "o")
.Replace("Ò", "O")
.Replace("ø", "o")
.Replace("Ê", "E")
.Replace("É", "E")
.Replace("È", "E")
.Replace("Ë", "E")
.Replace("ù", "u")
.Replace("ü", "u")
.Replace("ä", "a")
.Replace("Ã", "A")
.Replace("Å", "A")
.Replace("å", "a")
.Replace("à", "a")
.Replace("á", "a")
.Replace("–", "-")
.Replace("�", "?")
.Replace("’", "'")
.Replace("⁺", "+")
.Replace("", String.Empty) // hidden character, ref to task #480
.Replace("", String.Empty) // hidden character, ref to task #4165
.Replace("▽", String.Empty)
.Replace("`", "'")
.Replace("ï", "i")
.Replace("Ï", "I")
.Replace("í", "i")
.Replace("Í", "I")
.Replace("⅜", "3/8")
.Replace("¾", "3/4")
.Replace("¼", "1/4")
.Replace("½", "1/2")
.Replace("⅝", "5/8")
.Replace("⅛", "1/8")
.Replace("″", "\"")
.Replace("ð", String.Empty)
.Trim();
// remove extra spaces
while (text.Contains(" "))
text = text.Replace(" ", " ");
return text;
}
public static string CreateHttpGetRequest(string url, CookieContainer cookieContainer = null, string accept = null, Dictionary httpHeaders = null, bool retryOnError = false, bool retryOnTimeout = true, int retryCount = 3, int retryDelayInMilisecond = 0, WebProxy proxy = null, bool checkUrlLoaded = true)
{
// check if url has already been loaded in the past
if (checkUrlLoaded && loadedHttpGetUrls.Contains(url) && loadedHttpGetUrls.Where(m => m == url).Count() >= 2)
{
Exception e = new Exception("Url has already been loaded more than 2 times. Url: " + url);
e.Data[ParameterInfo.ExceptionData.Url] = url;
throw e;
}
else
loadedHttpGetUrls.Enqueue(url);
url = WebUtility.HtmlDecode(url);
bool hasConnectionClosedError = false;
// retry if request has been timed out or refused
for (int i = 0; i <= retryCount; i++)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
request.CookieContainer = cookieContainer;
/* Sometimes .net framework doesn't handle http request properly and instead throws
* web exception "The request was aborted: The connection was closed unexpectedly".
* Using a different HttpVersion.Version10 can get around this issue. */
if (hasConnectionClosedError)
request.ProtocolVersion = HttpVersion.Version10;
if (proxy != null)
request.Proxy = proxy;
if (!String.IsNullOrEmpty(accept))
request.Accept = accept;
SetRequestHeaders(request, httpHeaders);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
string responseStr;
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
{
responseStr = reader.ReadToEnd();
}
response.Close();
return responseStr;
}
catch (Exception e)
{
log.InfoFormat("A WebException occured while sending request. Url: {0}", url);
e.Data[ParameterInfo.ExceptionData.Url] = url;
// give up if already retried n times
if (i == retryCount)
throw;
// If web exception is connection closed unexpectedly, retry with different Protocol Version (i.e. HttpVersion10)
// Else retry if request timeout and retryOnTimeout = true, or if retryOnError = true
if (e is WebException && ((WebException)e).Status == WebExceptionStatus.ConnectionClosed && !hasConnectionClosedError)
hasConnectionClosedError = true;
else if (retryOnTimeout
&& e is WebException
&& ((((WebException)e).Status == WebExceptionStatus.Timeout)
|| (((WebException)e).Response != null && ((HttpWebResponse)((WebException)e).Response).StatusCode == HttpStatusCode.GatewayTimeout)))
log.Warn(e.Message);
else if (retryOnError)
log.Warn(e.Message);
else
throw;
if (retryDelayInMilisecond > 0)
{
log.Info("Retry sending request after " + retryDelayInMilisecond + " milliseconds. Retry count: " + (i + 1));
System.Threading.Thread.Sleep(retryDelayInMilisecond);
}
else
{
log.Info("Retry posting request. Retry count: " + (i + 1));
}
}
}
return null;
}
public static string CreateHttpPostRequest(string url, string postData, Dictionary httpHeaders = null, CookieContainer cookieContainer = null, string ContentType = "application/x-www-form-urlencoded; charset=UTF-8", bool retryOnError = false, bool retryOnTimeout = true, int retryCount = 3, int retryDelayInMilisecond = 0, WebProxy proxy = null)
{
string responseStr = String.Empty;
postData = postData ?? String.Empty;
url = WebUtility.HtmlDecode(url);
bool hasConnectionClosedError = false;
byte[] postDataBytes = UTF8Encoding.UTF8.GetBytes(postData);
// retry if request has been timed out or refused
for (int i = 0; i <= retryCount; i++)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "POST";
request.ContentLength = postDataBytes.Length;
request.ContentType = ContentType;
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
request.CookieContainer = cookieContainer;
/* Sometimes .net framework doesn't handle http request properly and instead throws
* web exception "The request was aborted: The connection was closed unexpectedly".
* Using a different HttpVersion.Version10 can get around this issue. */
if (hasConnectionClosedError)
request.ProtocolVersion = HttpVersion.Version10;
if (proxy != null)
request.Proxy = proxy;
SetRequestHeaders(request, httpHeaders);
using (Stream postStream = request.GetRequestStream())
{
postStream.Write(postDataBytes, 0, postDataBytes.Length);
}
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
{
responseStr = reader.ReadToEnd();
}
response.Close();
return responseStr;
}
catch (Exception e)
{
log.InfoFormat("An exception occured while posting request. Url: {0}", url);
e.Data[ParameterInfo.ExceptionData.Url] = url;
// give up if already retried n times
if (i == retryCount)
throw;
// If web exception is connection closed unexpectedly, retry with different Protocol Version (i.e. HttpVersion10)
// Else retry if request timeout and retryOnTimeout = true, or if retryOnError = true
if (e is WebException && ((WebException)e).Status == WebExceptionStatus.ConnectionClosed && !hasConnectionClosedError)
hasConnectionClosedError = true;
else if (retryOnTimeout
&& e is WebException
&& ((((WebException)e).Status == WebExceptionStatus.Timeout)
|| (((WebException)e).Response != null && ((HttpWebResponse)((WebException)e).Response).StatusCode == HttpStatusCode.GatewayTimeout)))
log.Warn(e.Message);
else if (retryOnError)
log.Warn(e.Message);
else
throw;
if (retryDelayInMilisecond > 0)
{
log.Info("Retry posting request after " + retryDelayInMilisecond + " milliseconds. Retry count: " + (i + 1));
System.Threading.Thread.Sleep(retryDelayInMilisecond);
}
else
{
log.Info("Retry posting request. Retry count: " + (i + 1));
}
}
}
return null;
}
public static void DownloadFile(string destFilePath, string localFilePath)
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(destFilePath);
request.Method = WebRequestMethods.Http.Get;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream responseStream = response.GetResponseStream();
FileStream writer = new FileStream(localFilePath, FileMode.Create);
long length = response.ContentLength;
int bufferSize = 2048;
int readCount;
byte[] buffer = new byte[2048];
readCount = responseStream.Read(buffer, 0, bufferSize);
while (readCount > 0)
{
writer.Write(buffer, 0, readCount);
readCount = responseStream.Read(buffer, 0, bufferSize);
}
responseStream.Close();
response.Close();
writer.Close();
}
public static void SetRequestHeaders(HttpWebRequest request, Dictionary httpHeaders)
{
if (httpHeaders != null && httpHeaders.Any())
{
foreach (KeyValuePair httpHeader in httpHeaders)
{
if (httpHeader.Key == "User-Agent")
request.UserAgent = httpHeader.Value;
else if (httpHeader.Key == "Referer")
request.Referer = httpHeader.Value;
else if (httpHeader.Key == "Range")
{
long endRange = Convert.ToInt64(StringUtil.SubstringToEnd(httpHeader.Value, "-"));
if (httpHeader.Value.Contains("="))
{
string rangeSpecifier = StringUtil.SubstringFromStart(httpHeader.Value, "=");
long startRange = Convert.ToInt64(StringUtil.Substring(httpHeader.Value, "=", "-"));
request.AddRange(rangeSpecifier, startRange, endRange);
}
else
{
long startRange = Convert.ToInt64(StringUtil.SubstringFromStart(httpHeader.Value, "-"));
request.AddRange(startRange, endRange);
}
}
else if (httpHeader.Key == "Expect")
request.ServicePoint.Expect100Continue = Convert.ToBoolean(httpHeader.Value);
else if (httpHeader.Key == "Accept")
request.Accept = httpHeader.Value;
else if (httpHeader.Key == "Connection" && String.Equals(httpHeader.Value, "Keep-Alive"))
{
// For some reason Keep-Alive is set only on first request. Below is work around using reflection to set Keep-Alive on every request
ServicePoint servicePoint = request.ServicePoint;
System.Reflection.PropertyInfo properptyInfo = servicePoint.GetType().GetProperty("HttpBehaviour", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic);
properptyInfo.SetValue(servicePoint, (byte)0, null);
}
else if (httpHeader.Key == "Connection" && String.Equals(httpHeader.Value, "keep-alive"))
{
// Similar to above but setting Keep-Alive as lower case. Note following reflection code appends keep-alive instead of replacing existing value. Therefore Connection: keep-alive,Keep-Alive. But this only happens on first request for same domain. Subsequent requests will be Connection: keep-alive
request.Headers.GetType().InvokeMember(
"ChangeInternal",
System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.InvokeMethod,
Type.DefaultBinder,
request.Headers,
new object[] { "Connection", "keep-alive" }
);
}
else if (httpHeader.Key == "Proxy-Authorization")
{
request.Headers[httpHeader.Key] = httpHeader.Value;
request.PreAuthenticate = true;
}
else if (httpHeader.Key == "Content-Type")
request.ContentType = httpHeader.Value;
else
request.Headers[httpHeader.Key] = httpHeader.Value;
}
}
}
/*
* Workaround for .Net framework bug which escapes dot in uri. For example: http://www.thevetshed.com.au/buy/adjustable-3-8-puppy-kitten-cat-harness-w-lead./CATHARN
* becomes http://www.thevetshed.com.au/buy/adjustable-3-8-puppy-kitten-cat-harness-w-lead/CATHARN
*
* Reference: http://stackoverflow.com/questions/856885/httpwebrequest-to-url-with-dot-at-the-end
*/
private static void FixDotInUri()
{
System.Reflection.MethodInfo getSyntax = typeof(UriParser).GetMethod("GetSyntax", System.Reflection.BindingFlags.Static | System.Reflection.BindingFlags.NonPublic);
System.Reflection.FieldInfo flagsField = typeof(UriParser).GetField("m_Flags", System.Reflection.BindingFlags.Instance | System.Reflection.BindingFlags.NonPublic);
if (getSyntax != null && flagsField != null)
{
foreach (string scheme in new[] { "http", "https" })
{
UriParser parser = (UriParser)getSyntax.Invoke(null, new object[] { scheme });
if (parser != null)
{
int flagsValue = (int)flagsField.GetValue(parser);
// Clear the CanonicalizeAsFilePath attribute
if ((flagsValue & 0x1000000) != 0)
flagsField.SetValue(parser, flagsValue & ~0x1000000);
}
}
}
}
public static CookieContainer GetLoginCookieContainer(string hostAddress)
{
// login
string loginUrl = hostAddress + FashionExchangeSetting.AdminLoginPath;
string userName = FashionExchangeSetting.AdminUserName;
string password = FashionExchangeSetting.AdminPassword;
string postData = String.Format("email={0}&password={1}", userName, password);
byte[] postDataBytes = UTF8Encoding.UTF8.GetBytes(postData);
HttpWebRequest loginReq = (HttpWebRequest)WebRequest.Create(loginUrl);
loginReq.Method = "POST";
loginReq.ContentType = "application/x-www-form-urlencoded";
loginReq.ContentLength = postDataBytes.Length;
loginReq.AllowAutoRedirect = false;
loginReq.CookieContainer = new CookieContainer();
using (Stream postStream = loginReq.GetRequestStream())
{
postStream.Write(postDataBytes, 0, postDataBytes.Length);
}
HttpWebResponse loginResp = (HttpWebResponse)loginReq.GetResponse();
loginResp.Close();
return loginReq.CookieContainer;
}
public static void ScrapeStore(string hostAddress, int[] storeIds = null, bool scrapeNewUrl = false, double? scrapeExistingUrlAgeInHour = null, int? scrapeExistingUrlPriceDroppedInDay = null, bool uploadProductPhoto = false, bool checkNewsletterProductAvailabilityAndWatermarkPhoto = false, bool uploadNewsletterPhoto = false, bool uploadOfferImage = false, bool syncUserActivity = false, bool reindex = false, bool uploadIndex = false, bool sendPriceAlert = false, bool sendSaleAlert = false, bool deleteOldIndex = false, bool deleteOldPhoto = false, int scraperThreadCount = 10, CookieContainer loginCookieContainer = null, bool notifyTodaysSale = false, bool checkStoreForScrapeAlert = false, bool sendScrapeResultSummary = false, int? deleteProductAgeInDay = null)
{
if (storeIds == null)
storeIds = new int[0];
string postData = String.Join("&", storeIds.Select(m => ParameterInfo.QueryString.StoreId + "=" + m));
postData += "&" + ParameterInfo.QueryString.ScrapeNewUrl + "=" + scrapeNewUrl;
postData += "&" + ParameterInfo.QueryString.ScrapeExistingUrlAgeInHour + "=" + scrapeExistingUrlAgeInHour;
postData += "&" + ParameterInfo.QueryString.ScrapeExistingUrlPriceDroppedInDay + "=" + scrapeExistingUrlPriceDroppedInDay;
postData += "&" + ParameterInfo.QueryString.UploadProductPhoto + "=" + uploadProductPhoto;
postData += "&" + ParameterInfo.QueryString.CheckNewsletterProductAvailabilityAndWatermarkPhoto + "=" + checkNewsletterProductAvailabilityAndWatermarkPhoto;
postData += "&" + ParameterInfo.QueryString.UploadNewsletterPhoto + "=" + uploadNewsletterPhoto;
postData += "&" + ParameterInfo.QueryString.UploadOfferImage + "=" + uploadOfferImage;
postData += "&" + ParameterInfo.QueryString.SyncUserActivity + "=" + syncUserActivity;
postData += "&" + ParameterInfo.QueryString.Reindex + "=" + reindex;
postData += "&" + ParameterInfo.QueryString.UploadIndex + "=" + uploadIndex;
postData += "&" + ParameterInfo.QueryString.SendPriceAlert + "=" + sendPriceAlert;
postData += "&" + ParameterInfo.QueryString.SendSaleAlert + "=" + sendSaleAlert;
postData += "&" + ParameterInfo.QueryString.DeleteOldIndex + "=" + deleteOldIndex;
postData += "&" + ParameterInfo.QueryString.DeleteOldPhoto + "=" + deleteOldPhoto;
postData += "&" + ParameterInfo.QueryString.ScraperThreadCount + "=" + scraperThreadCount;
postData += "&" + ParameterInfo.QueryString.NotifyTodaysSale + "=" + notifyTodaysSale;
postData += "&" + ParameterInfo.QueryString.CheckStoreForScrapeAlert + "=" + checkStoreForScrapeAlert;
postData += "&" + ParameterInfo.QueryString.SendScrapeResultSummary + "=" + sendScrapeResultSummary;
postData += "&" + ParameterInfo.QueryString.DeleteProductAgeInDay + "=" + deleteProductAgeInDay;
byte[] postDataBytes = UTF8Encoding.UTF8.GetBytes(postData);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(hostAddress + FashionExchangeSetting.AdminScrapeStorePath);
request.Method = WebRequestMethods.Http.Post;
request.ContentType = "application/x-www-form-urlencoded";
request.ContentLength = postDataBytes.Length;
request.AllowAutoRedirect = false;
request.CookieContainer = loginCookieContainer;
request.Timeout = 1000 * 2;
using (Stream postStream = request.GetRequestStream())
postStream.Write(postDataBytes, 0, postDataBytes.Length);
try
{
request.GetResponse();
}
catch (WebException e)
{
// Request was set to timeout after 2 seconds because we don't want to wait for the request to complete
if (e.Status == WebExceptionStatus.Timeout)
return;
else
throw;
}
}
private static Dictionary> proxies = new Dictionary>();
private static Dictionary, List> proxiesValidatedByUrl = new Dictionary, List>();
private static Object proxyLock = new Object();
public static List GetProxies(ProxyNetwork proxyNetwork = ProxyNetwork.MyPrivateProxy, string testUrl = null, int maxTestUrlAttemptCount = 30, int maxWorkingProxyCount = 20, bool disableCache = false)
{
if (proxyNetwork == ProxyNetwork.MyPrivateProxy)
{
lock (proxyLock)
{
if (!proxies.ContainsKey(ProxyNetwork.MyPrivateProxy))
{
log.Info("Get MyPrivateProxy proxy list...");
proxies[ProxyNetwork.MyPrivateProxy] = new List();
string proxyListResponse = ScraperUtil.CreateHttpGetRequest("https://api.myprivateproxy.net/v1/fetchProxies/json/full/gkytva0jbisl9olooyzjmpbdfcrpmn9i", checkUrlLoaded: false);
JObject proxyListJson = JObject.Parse("{Wrapper:" + proxyListResponse + "}");
foreach (JToken proxyListItem in proxyListJson.SelectToken("Wrapper"))
{
log.Info("Found proxy: " + proxyListItem["proxy_ip"].ToString() + ":" + proxyListItem["proxy_port"].ToString());
WebProxy proxy = new WebProxy(proxyListItem["proxy_ip"].ToString(), Convert.ToInt32(proxyListItem["proxy_port"].ToString()));
proxy.Credentials = new NetworkCredential(proxyListItem["username"].ToString(), proxyListItem["password"].ToString());
proxies[ProxyNetwork.MyPrivateProxy].Add(proxy);
}
}
}
}
else if (proxyNetwork == ProxyNetwork.ProxyBonanza)
{
lock (proxyLock)
{
if (!proxies.ContainsKey(ProxyNetwork.ProxyBonanza))
{
proxies[ProxyNetwork.ProxyBonanza] = new List();
string apiUrl = "https://api.proxybonanza.com/v1/userpackages/49575.json";
Dictionary httpHeaders = new Dictionary();
httpHeaders.Add("Authorization", "mIAWG1CKaz3cSjWUV2wnAaszmNS6nck6C8kQIDBmOheslgMOFp!43336");
JObject proxyListJson = JObject.Parse(ScraperUtil.CreateHttpGetRequest(apiUrl, httpHeaders: httpHeaders, checkUrlLoaded: false));
foreach (JToken proxyItem in proxyListJson.SelectToken("data.ippacks"))
{
log.Info("Found proxy: " + proxyItem["ip"].ToString() + ":" + proxyItem["port_http"].ToString());
WebProxy proxy = new WebProxy(proxyItem["ip"].ToString(), Convert.ToInt32(proxyItem["port_http"].ToString()));
proxy.Credentials = new NetworkCredential(proxyListJson.SelectToken("data.login").ToString(), proxyListJson.SelectToken("data.password").ToString());
proxies[ProxyNetwork.ProxyBonanza].Add(proxy);
}
}
}
}
else if (proxyNetwork == ProxyNetwork.BinaryLane)
{
lock (proxyLock)
{
if (!proxies.ContainsKey(ProxyNetwork.BinaryLane))
{
proxies[ProxyNetwork.BinaryLane] = new List();
WebProxy proxy = new WebProxy("43.229.63.22", 29842);
proxy.Credentials = new NetworkCredential("scrapeserver", "95EpLZ");
proxies[ProxyNetwork.BinaryLane].Add(proxy);
}
}
}
else if (proxyNetwork == ProxyNetwork.BinaryLaneScrapeServer)
{
lock (proxyLock)
{
if (!proxies.ContainsKey(ProxyNetwork.BinaryLaneScrapeServer))
{
proxies[ProxyNetwork.BinaryLaneScrapeServer] = new List();
WebProxy proxy = new WebProxy("43.229.63.22", 29843);
proxies[ProxyNetwork.BinaryLaneScrapeServer].Add(proxy);
}
}
}
else if (proxyNetwork == ProxyNetwork.LuminatiGlobalShared) // Luminati Global Shared proxies are slightly different because it has huge list of IPs
{
if (disableCache && String.IsNullOrEmpty(testUrl))
throw new Exception("test url is required when disable cache is enabled");
if (String.IsNullOrEmpty(testUrl))
{
string sessionId = new Random().Next().ToString();
string userName = "lum-customer-hl_8238b460-zone-zone1-session-" + sessionId;
string proxyAddress = "customer-hl_8238b460-session-" + sessionId + ".zproxy.lum-superproxy.io";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "200fbti3d9xt");
return new List() { proxy };
}
Tuple luminatiAndTestUrl = new Tuple(proxyNetwork, new Uri(testUrl).Authority);
if (!disableCache && proxiesValidatedByUrl.ContainsKey(luminatiAndTestUrl))
return proxiesValidatedByUrl[luminatiAndTestUrl];
proxiesValidatedByUrl[luminatiAndTestUrl] = new List();
for (int attempt = 0; attempt < maxTestUrlAttemptCount && proxiesValidatedByUrl[luminatiAndTestUrl].Count < maxWorkingProxyCount; attempt++)
{
string sessionId = new Random().Next().ToString();
string userName = "lum-customer-hl_8238b460-zone-zone1-session-" + sessionId;
string proxyAddress = "customer-hl_8238b460-session-" + sessionId + ".zproxy.lum-superproxy.io";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "200fbti3d9xt");
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(testUrl);
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.Proxy = proxy;
request.Timeout = 1000 * 10;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
log.Info("Found working proxy. Test Url: " + testUrl + " Proxy IP: " + CreateHttpGetRequest("https://api.ipify.org/", proxy: proxy, checkUrlLoaded: false));
proxiesValidatedByUrl[luminatiAndTestUrl].Add(proxy);
}
catch (WebException e)
{
if ((e.Status == WebExceptionStatus.Timeout)
|| (e.Response != null && ((HttpWebResponse)e.Response).StatusCode == HttpStatusCode.GatewayTimeout))
log.Info("Timed out downloading test url. Test Url: " + testUrl + " Proxy IP: " + CreateHttpGetRequest("https://api.ipify.org/", proxy: proxy, checkUrlLoaded: false));
else if (e.Response != null && (((HttpWebResponse)e.Response).StatusCode == HttpStatusCode.Unauthorized || ((HttpWebResponse)e.Response).StatusCode == HttpStatusCode.Forbidden))
log.Info("Forbidden or unauthorized downloading test url. Test Url: " + testUrl + " Proxy IP: " + CreateHttpGetRequest("https://api.ipify.org/", proxy: proxy, checkUrlLoaded: false));
else
log.Info("Error downloading test url. Test Url: " + testUrl + " Proxy IP: " + CreateHttpGetRequest("https://api.ipify.org/", proxy: proxy, checkUrlLoaded: false) + " Exception: " + e.ToString());
}
}
return proxiesValidatedByUrl[luminatiAndTestUrl];
}
else if (proxyNetwork == ProxyNetwork.LuminatiStatic)
{
string proxyAddress = "customer-hl_8238b460.zproxy.lum-superproxy.io";
string userName = "lum-customer-hl_8238b460-zone-static";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "xoj41myrdax6");
return new List() { proxy };
}
else if (proxyNetwork == ProxyNetwork.LuminatiGlobalSharedJDSports)
{
string sessionId = new Random().Next().ToString();
string userName = "lum-customer-hl_8238b460-zone-jdsports-session-" + sessionId;
string proxyAddress = "customer-hl_8238b460-session-" + sessionId + ".zproxy.lum-superproxy.io";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "tw53pckkvvie");
return new List() { proxy };
}
else if (proxyNetwork == ProxyNetwork.LuminatiGlobalSharedFootLocker)
{
string sessionId = new Random().Next().ToString();
string userName = "lum-customer-hl_8238b460-zone-footlocker-session-" + sessionId;
string proxyAddress = "customer-hl_8238b460-session-" + sessionId + ".zproxy.lum-superproxy.io";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "728yatag13w4");
return new List() { proxy };
}
else if (proxyNetwork == ProxyNetwork.LuminatiGlobalSharedVisionDirect)
{
string sessionId = new Random().Next().ToString();
string userName = "lum-customer-hl_8238b460-zone-visiondirect-session-" + sessionId;
string proxyAddress = "customer-hl_8238b460-session-" + sessionId + ".zproxy.lum-superproxy.io";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "mls5d00ybhdr");
return new List() { proxy };
}
else if (proxyNetwork == ProxyNetwork.LuminatiGlobalSharedASOS)
{
string sessionId = new Random().Next().ToString();
string userName = "lum-customer-hl_8238b460-zone-asos-session-" + sessionId;
string proxyAddress = "customer-hl_8238b460-session-" + sessionId + ".zproxy.lum-superproxy.io";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "cis8217oa34e");
return new List() { proxy };
}
else if (proxyNetwork == ProxyNetwork.LuminatiStaticNZ)
{
string proxyAddress = "customer-hl_8238b460.zproxy.lum-superproxy.io";
string userName = "lum-customer-hl_8238b460-zone-static_nz";
WebProxy proxy = new WebProxy(proxyAddress, 22225);
proxy.Credentials = new NetworkCredential(userName, "cf83xw0jjhgi");
return new List() { proxy };
}
if (String.IsNullOrEmpty(testUrl))
return proxies[proxyNetwork];
Tuple proxyNetworkAndTestUrl = new Tuple(proxyNetwork, new Uri(testUrl).Authority);
if (proxiesValidatedByUrl.ContainsKey(proxyNetworkAndTestUrl))
return proxiesValidatedByUrl[proxyNetworkAndTestUrl];
else
{
proxiesValidatedByUrl.Add(proxyNetworkAndTestUrl, new List());
foreach (WebProxy proxy in proxies[proxyNetwork])
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(testUrl);
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.Proxy = proxy;
request.Timeout = 1000 * 10;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
log.Info("Found working proxy. Test Url: " + testUrl + " Proxy: " + proxy.Address.AbsoluteUri);
proxiesValidatedByUrl[proxyNetworkAndTestUrl].Add(proxy);
}
catch (WebException e)
{
if ((e.Status == WebExceptionStatus.Timeout)
|| (e.Response != null && ((HttpWebResponse)e.Response).StatusCode == HttpStatusCode.GatewayTimeout))
log.Info("Timed out downloading test url. Test Url: " + testUrl + " Proxy: " + proxy.Address.AbsoluteUri);
else
log.Info("Error downloading test url. Test Url: " + testUrl + " Proxy: " + proxy.Address.AbsoluteUri + " Exception: " + e.ToString());
}
}
return proxiesValidatedByUrl[proxyNetworkAndTestUrl];
}
}
public static WebProxy GetRandomProxy(ProxyNetwork proxyNetwork = ProxyNetwork.MyPrivateProxy, string testUrl = null, int maxTestUrlAttemptCount = 30, int maxWorkingProxyCount = 20, bool disableCache = false)
{
List proxies = GetProxies(proxyNetwork: proxyNetwork, testUrl: testUrl, maxTestUrlAttemptCount: maxTestUrlAttemptCount, maxWorkingProxyCount: maxWorkingProxyCount, disableCache: disableCache);
return proxies.ElementAt(new Random().Next(proxies.Count()));
}
public static string GenerateChromeProxyExtension(WebProxy proxy, string fileNamePrefix)
{
string extensionFullPath = HostingEnvironment.ApplicationPhysicalPath ?? AppDomain.CurrentDomain.BaseDirectory;
if (!Directory.Exists(extensionFullPath + FashionExchangeSetting.TemporaryDirectory))
Directory.CreateDirectory(extensionFullPath + FashionExchangeSetting.TemporaryDirectory);
extensionFullPath += FashionExchangeSetting.TemporaryDirectory + fileNamePrefix + "_ChromeProxy_" + System.Threading.Thread.CurrentThread.ManagedThreadId + "_" + DateTime.Now.ToString("yyyyMMdd_HHmmss") + ".zip";
using (MemoryStream memoryStream = new MemoryStream())
{
using (ZipArchive archive = new ZipArchive(memoryStream, ZipArchiveMode.Create, true))
{
ZipArchiveEntry backgroundJS = archive.CreateEntry("background.js");
using (Stream entryStream = backgroundJS.Open())
using (StreamWriter streamWriter = new StreamWriter(entryStream))
{
streamWriter.Write(
@"var config = {{
mode: ""fixed_servers"",
rules: {{
singleProxy: {{
scheme: ""http"",
host: ""{0}"",
port: parseInt({1})
}},
bypassList: [""foobar.com""]
}}
}};
chrome.proxy.settings.set({{value: config, scope: ""regular""}}, function() {{}});
function callbackFn(details) {{
return {{
authCredentials: {{
username: ""{2}"",
password: ""{3}""
}}
}};
}}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{{urls: [""""]}},
['blocking']
);",
proxy.Address.Host,
proxy.Address.Port,
(proxy.Credentials as NetworkCredential).UserName,
(proxy.Credentials as NetworkCredential).Password
);
}
ZipArchiveEntry manifestJSON = archive.CreateEntry("manifest.json");
using (Stream entryStream = manifestJSON.Open())
using (StreamWriter streamWriter = new StreamWriter(entryStream))
{
streamWriter.Write(
@"{
""version"": ""1.0.0"",
""manifest_version"": 2,
""name"": ""Chrome Proxy"",
""permissions"": [
""proxy"",
""tabs"",
""unlimitedStorage"",
""storage"",
"""",
""webRequest"",
""webRequestBlocking""
],
""background"": {
""scripts"": [""background.js""]
},
""minimum_chrome_version"":""22.0.0""
}"
);
}
}
using (FileStream fileStream = new FileStream(extensionFullPath, FileMode.Create))
{
memoryStream.Seek(0, SeekOrigin.Begin);
memoryStream.CopyTo(fileStream);
}
}
return extensionFullPath;
}
public static string GenerateChromeBlockExtension(List blockUrls, List blockResourceTypes, string fileNamePrefix)
{
if (blockUrls == null || !blockUrls.Any())
throw new ArgumentException("At least one block url is required");
string extensionFullPath = HostingEnvironment.ApplicationPhysicalPath ?? AppDomain.CurrentDomain.BaseDirectory;
if (!Directory.Exists(extensionFullPath + FashionExchangeSetting.TemporaryDirectory))
Directory.CreateDirectory(extensionFullPath + FashionExchangeSetting.TemporaryDirectory);
extensionFullPath += FashionExchangeSetting.TemporaryDirectory + fileNamePrefix + "_ChromeBlock_" + System.Threading.Thread.CurrentThread.ManagedThreadId + "_" + DateTime.Now.ToString("yyyyMMdd_HHmmss") + ".zip";
using (MemoryStream memoryStream = new MemoryStream())
{
using (ZipArchive archive = new ZipArchive(memoryStream, ZipArchiveMode.Create, true))
{
ZipArchiveEntry backgroundJS = archive.CreateEntry("background.js");
using (Stream entryStream = backgroundJS.Open())
using (StreamWriter streamWriter = new StreamWriter(entryStream))
{
streamWriter.Write(
@"chrome.webRequest.onBeforeRequest.addListener(
function(details) {{ return {{cancel: true}}; }},
{{
urls: [{0}],
types: [{1}]
}},
[""blocking""]);",
String.Join(",", blockUrls.Select(m => "\"" + m + "\"")),
String.Join(",", blockResourceTypes.Select(m => "\"" + m + "\""))
);
}
// For a list of applicable resource types: https://developer.chrome.com/extensions/webRequest#type-ResourceType
ZipArchiveEntry manifestJSON = archive.CreateEntry("manifest.json");
using (Stream entryStream = manifestJSON.Open())
using (StreamWriter streamWriter = new StreamWriter(entryStream))
{
streamWriter.Write(
@"{
""version"": ""1.0.0"",
""manifest_version"": 2,
""name"": ""Chrome Block"",
""permissions"": [
"""",
""webRequest"",
""webRequestBlocking""
],
""background"": {
""scripts"": [""background.js""]
},
""minimum_chrome_version"":""22.0.0""
}"
);
}
}
using (FileStream fileStream = new FileStream(extensionFullPath, FileMode.Create))
{
memoryStream.Seek(0, SeekOrigin.Begin);
memoryStream.CopyTo(fileStream);
}
}
return extensionFullPath;
}
private static Dictionary exchangeRates;
public static decimal ConvertToAUD(string currencyCode, decimal price)
{
if (exchangeRates == null)
{
log.Info("Retrieve exchange rates...");
exchangeRates = new Dictionary(StringComparer.InvariantCultureIgnoreCase);
JObject exchangeRateJson = JObject.Parse(ScraperUtil.CreateHttpGetRequest("http://apilayer.net/api/live?access_key=" + FashionExchangeSetting.CurrencyLayerAccessKey + "¤cies=AUD,NZD,EUR,GBP", checkUrlLoaded: false));
JToken usdToAud = exchangeRateJson.SelectToken("quotes.USDAUD");
exchangeRates.Add(Country.UnitedStates.CurrencyCode, Convert.ToDecimal(usdToAud.ToString()));
JToken usdToGbp = exchangeRateJson.SelectToken("quotes.USDGBP");
exchangeRates.Add(Country.UnitedKingdom.CurrencyCode, exchangeRates[Country.UnitedStates.CurrencyCode] / Convert.ToDecimal(usdToGbp.ToString()));
JToken usdToNzd = exchangeRateJson.SelectToken("quotes.USDNZD");
exchangeRates.Add(Country.NewZealand.CurrencyCode, exchangeRates[Country.UnitedStates.CurrencyCode] / Convert.ToDecimal(usdToNzd.ToString()));
JToken usdToEur = exchangeRateJson.SelectToken("quotes.USDEUR");
exchangeRates.Add(Country.Germany.CurrencyCode, exchangeRates[Country.UnitedStates.CurrencyCode] / Convert.ToDecimal(usdToEur.ToString()));
log.Info("Exchange rates retrieved successfully");
}
return price * exchangeRates[currencyCode];
}
public static string UpdateUrlQuery(string url, string queryName, string queryValue)
{
Uri uri = new Uri(url);
NameValueCollection queries = HttpUtility.ParseQueryString(uri.Query);
if (queryValue == null)// only remove query when query value is null, do not remove if value is blank
queries.Remove(queryName);
else
queries.Set(queryName, queryValue);
if (queries.Count > 0)
return uri.Scheme + "://" + uri.Authority + uri.AbsolutePath + "?" + queries.ToString();
else
return uri.Scheme + "://" + uri.Authority + uri.AbsolutePath;
}
public static long ConvertIPAddressToNumber(string ipAddress)
{
// on localhost, ip address is ::1
if (String.Equals(ipAddress, "::1"))
return 0;
int[] segments = ipAddress.Split('.').Select(m => Convert.ToInt32(m)).ToArray();
return (long)segments[0] * 16777216 + segments[1] * 65536 + segments[2] * 256 + segments[3];
}
private static SortedList _chinaIPRanges;
public static bool IsChinaIPAddress(string ipAddress)
{
if (_chinaIPRanges == null)
{
log.Info("Reading China IP Address.txt");
_chinaIPRanges = new SortedList();
try
{
using (StreamReader reader = new StreamReader(Path.Combine(HostingEnvironment.ApplicationPhysicalPath, "bin", "China IP Address.txt")))
{
string line = null;
while ((line = reader.ReadLine()) != null)
{
if (String.IsNullOrWhiteSpace(line) || line.StartsWith("#"))
continue;
string startIP = StringUtil.SubstringFromStart(line, " - ");
string endIP = StringUtil.Substring(line, " - ", " China");
_chinaIPRanges.Add(ScraperUtil.ConvertIPAddressToNumber(endIP), ScraperUtil.ConvertIPAddressToNumber(startIP));
}
}
}
catch (Exception e)
{
log.Error("Error reading china IP address list: " + e.ToString());
}
log.Info("Found " + _chinaIPRanges.Count + " china ip ranges");
}
long ip = ScraperUtil.ConvertIPAddressToNumber(ipAddress);
KeyValuePair ipRange = _chinaIPRanges.FirstOrDefault(m => ip <= m.Key);
if (ipRange.Key == 0 && ipRange.Value == 0)
return false;
else if (ip >= ipRange.Value)
return true;
else
return false;
}
public static List ParseCategoryTreeAsList(string homeUrl, string categoryTreeXPath, List skipCategoryByExactMatch = null, List skipCategoryByContainMatch = null)
{
HtmlDocument homePage = ScraperUtil.LoadHtml(homeUrl, checkUrlLoaded: false);
HtmlNode treeNode = homePage.DocumentNode.SelectSingleNode(categoryTreeXPath);
return ParseCategoryTree(treeNode, skipCategoryByExactMatch: skipCategoryByExactMatch, skipCategoryByContainMatch: skipCategoryByContainMatch);
}
public static void ParseCategoryTree(HtmlNode topTreeNode, DAL.SiteMap siteMap, List skipCategoryByExactMatch = null, List skipCategoryByContainMatch = null)
{
List categories = ParseCategoryTree(topTreeNode, skipCategoryByExactMatch: skipCategoryByExactMatch, skipCategoryByContainMatch: skipCategoryByContainMatch);
// add leaf categories to SiteMap
foreach (Category category in categories.Where(m => !m.SubCategories.Any() && !String.IsNullOrEmpty(m.Url)))
{
string breadcrumb = category.Name;
Category currentCategory = category;
while (currentCategory.ParentCategory != null)
{
breadcrumb = currentCategory.ParentCategory.Name + " " + breadcrumb;
currentCategory = currentCategory.ParentCategory;
}
siteMap.AddCategoryPage(breadcrumb, category.Url, siteMap.GetHomePageUrl());
}
}
private static List ParseCategoryTree(HtmlNode topTreeNode, List skipCategoryByExactMatch = null, List skipCategoryByContainMatch = null)
{
// remove all empty text nodes
foreach (HtmlNode textNode in topTreeNode.SelectNodes("//text()"))
if (String.IsNullOrWhiteSpace(textNode.InnerText.Trim()))
textNode.Remove();
HtmlNode currentNode = topTreeNode;
HtmlNode parentNode = null;
int depth = 0;
Category topCategory = new Category();
topCategory.Name = "Home";
topCategory.Depth = depth;
List categoryList = new List();
categoryList.Add(topCategory);
while (currentNode.HasChildNodes || currentNode.NextSibling != null || currentNode.ParentNode != topTreeNode)
{
if (currentNode.HasChildNodes)
{
parentNode = currentNode;
currentNode = currentNode.FirstChild;
depth++;
}
else if (currentNode.NextSibling != null)
{
currentNode = currentNode.NextSibling;
currentNode.PreviousSibling.Remove();
}
else if (currentNode.ParentNode != null)
{
currentNode = currentNode.ParentNode;
currentNode.FirstChild.Remove();
parentNode = currentNode.ParentNode;
depth--;
}
else
break;
// if current node is a ahref node, add link and link text to category, otherwise just add text to category
string name = null, url = null;
if (currentNode.Name == "a")
{
name = ScraperUtil.NormalizeText(currentNode.InnerText);
url = currentNode.Attributes["href"]?.Value;
}
else if (!String.IsNullOrWhiteSpace(currentNode.SelectSingleNode("./text()")?.InnerText.Trim()))
{
name = ScraperUtil.NormalizeText(currentNode.SelectSingleNode("./text()").InnerText);
}
if (!String.IsNullOrEmpty(name))
{
Category category = new Category();
category.Name = name;
category.Depth = depth;
category.Url = url;
Category parentCategory = categoryList.Where(m => m.Depth < depth).Last(); // parent category is the last added category with lower depth
parentCategory.AddSubCategory(category);
categoryList.Add(category);
// Stop parse inner html if current node is ahref, because we can have xxx and we got link text already
if (currentNode.Name == "a")
{
currentNode.Remove();
currentNode = parentNode;
parentNode = currentNode.ParentNode;
depth--;
}
}
}
for (int i = 0; i < categoryList.Count; i++)
{
Category category = categoryList[i];
if (skipCategoryByExactMatch != null && (StringUtil.Contains(skipCategoryByExactMatch, category.Name, ignoreCase: true) || skipCategoryByExactMatch.Where(m => StringUtil.Contains(category.Breadcrumb, m, ignoreCase: true)).Any()))
{
categoryList.Remove(category);
i--;
}
else if (skipCategoryByContainMatch != null && (skipCategoryByContainMatch.Any(m => StringUtil.ContainsIgnoreCase(category.Name, m)) || skipCategoryByContainMatch.Where(skipCategory => category.Breadcrumb.Where(breadcrumb => StringUtil.ContainsIgnoreCase(breadcrumb, skipCategory)).Any()).Any()))
{
categoryList.Remove(category);
i--;
}
}
return categoryList;
}
public class Category
{
public Category()
{
SubCategories = new List();
}
public string Name { get; set; }
public int Depth { get; set; }
public string Url { get; set; }
public Category ParentCategory { get; private set; }
public List SubCategories { get; private set; }
public List Breadcrumb
{
get
{
List breadcrumb = new List();
if (ParentCategory == null)
return breadcrumb;
Category currentCategory = this;
while (currentCategory.ParentCategory != null && currentCategory.ParentCategory.Depth > 0)
{
currentCategory = currentCategory.ParentCategory;
breadcrumb.Add(currentCategory.Name);
}
return breadcrumb;
}
}
public void AddSubCategory(string name, int depth, string url)
{
Category category = new Category();
category.Name = name;
category.Depth = depth;
category.Url = url;
AddSubCategory(category);
}
public void AddSubCategory(Category category)
{
category.ParentCategory = this;
this.SubCategories.Add(category);
}
}
public static bool IsNoIndex()
{
if (HttpContext.Current.Request.QueryString.Count >= 2
|| (HttpContext.Current.Request.QueryString.Count == 1 && HttpContext.Current.Request.QueryString.GetKey(0) != ParameterInfo.QueryString.StoreId && HttpContext.Current.Request.QueryString.GetKey(0) != ParameterInfo.QueryString.BrandName))
return true;
else
return false;
}
}
}
MailUtil
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net.Mail;
using System.Text.RegularExpressions;
using System.IO;
using System.Collections;
namespace FashionExchange.Common.Utils
{
public class MailUtil
{
private string subject;
private string body;
private string sender;
private string senderName;
private IList recipients;
private IList ccRecipients;
private IList bccRecipients;
private IList replyToRecipients;
private IList attachments;
private bool isHTML;
public MailUtil()
{
recipients = new ArrayList();
ccRecipients = new ArrayList();
bccRecipients = new ArrayList();
replyToRecipients = new ArrayList();
attachments = new ArrayList();
isHTML = false;
}
public IList Recipients
{
get { return recipients; }
set { recipients = value; }
}
public IList CCRecipients
{
get { return ccRecipients; }
set { ccRecipients = value; }
}
public IList BCCRecipients
{
get { return bccRecipients; }
set { bccRecipients = value; }
}
public IList ReplyToRecipients
{
get { return replyToRecipients; }
set { replyToRecipients = value; }
}
public IList Attachments
{
get { return attachments; }
set { attachments = value; }
}
public string Subject
{
get { return subject; }
set { subject = value; }
}
public string Body
{
get { return body; }
set { body = value; }
}
public string Sender
{
get { return sender; }
set { sender = value; }
}
public string SenderName
{
get { return senderName; }
set { senderName = value; }
}
public bool IsHTML
{
get { return isHTML; }
set { isHTML = value; }
}
public void attachFileByString(string s, string filename)
{
MemoryStream ms = new MemoryStream();
byte[] b = new UTF8Encoding().GetBytes(s);
ms.Write(b, 0, b.Length);
ms.Position = 0;
attachments.Add(new Attachment(ms, filename));
}
public void send()
{
SmtpClient smtpClient = new SmtpClient();
MailMessage mailMessage = new MailMessage();
foreach (string address in recipients)
{
mailMessage.To.Add(address);
}
foreach (string address in ccRecipients)
{
mailMessage.CC.Add(address);
}
foreach (string address in bccRecipients)
{
mailMessage.Bcc.Add(address);
}
foreach (string address in replyToRecipients)
{
mailMessage.ReplyToList.Add(address);
}
foreach (Attachment attachment in attachments)
{
mailMessage.Attachments.Add(attachment);
}
// if this is a html format email, set mailMessage.IsBodyHtml = true
// else set
if (isHTML)
{
mailMessage.IsBodyHtml = true;
}
else if (body != null)
{
Body = Regex.Replace(Body, "<[^>]*>", String.Empty);
Body = Regex.Replace(Body, " ", String.Empty);
mailMessage.IsBodyHtml = false;
}
mailMessage.From = new MailAddress(sender, senderName);
mailMessage.Subject = Subject;
mailMessage.Body = Body;
smtpClient.Send(mailMessage);
}
public void test()
{
SmtpClient smtpClient = new SmtpClient();
MailMessage mail = new MailMessage();
mail.To.Add(new MailAddress("edchan@live.com"));
mail.From = new MailAddress("admin@fashionlane.com.au");
MemoryStream s = new MemoryStream();
byte[] b = new UnicodeEncoding().GetBytes("ASD");
s.Write(b, 0, b.Length);
s.Position = 0;
Attachment attachment = new Attachment(s, "test.txt");
mail.Attachments.Add(attachment);
mail.Body = "This is a testing message.";
mail.Subject = "Test Subject";
smtpClient.Send(mail);
}
public static bool IsEmailValid(string emailAddress)
{
bool isValid = false;
try
{
new MailAddress(emailAddress);
isValid = true;
}
catch
{
isValid = false;
}
return isValid;
}
}
}
JustGroupUtil
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace FashionExchange.Common.Utils
{
public class JustGroupUtil
{
public static decimal GetSalePriceFromDiscountText(decimal regularPrice, decimal salePrice, string discountText)
{
decimal basePrice = salePrice > 0 ? salePrice : regularPrice;
decimal discountPrice = 0;
if (discountText.StartsWith("Now $", StringComparison.InvariantCultureIgnoreCase))
{
if (StringUtil.ContainsIgnoreCase(discountText, " All"))
discountPrice = Decimal.Parse(StringUtil.Substring(discountText, "Now $", " All", ignoreCase: true));
else if (StringUtil.ContainsIgnoreCase(discountText, "! ONLINE ONLY."))
discountPrice = Decimal.Parse(StringUtil.Substring(discountText, "Now $", "! ONLINE ONLY.", ignoreCase: true));
else if (StringUtil.ContainsIgnoreCase(discountText, "ONLINE ONLY."))
discountPrice = Decimal.Parse(StringUtil.Substring(discountText, "Now $", "ONLINE ONLY.", ignoreCase: true));
else if (StringUtil.ContainsIgnoreCase(discountText, ". "))
discountPrice = Decimal.Parse(StringUtil.Substring(discountText, "Now $", ". ", ignoreCase: true));
else if (StringUtil.ContainsIgnoreCase(discountText, ".End"))
discountPrice = Decimal.Parse(StringUtil.Substring(discountText, "Now $", ".End", ignoreCase: true));
else
discountPrice = Decimal.Parse(StringUtil.SubstringToEnd(discountText, "Now $", ignoreCase: true));
}
else if (discountText.StartsWith("Take", StringComparison.InvariantCultureIgnoreCase) && StringUtil.ContainsIgnoreCase(discountText, "% Off"))
{
string discountPercent = String.Empty;
if (discountText.StartsWith("Take A Further", StringComparison.InvariantCultureIgnoreCase))
discountPercent = StringUtil.Substring(discountText, "Take A Further", "% Off", ignoreCase: true);
else
discountPercent = StringUtil.Substring(discountText, "Take", "% Off", ignoreCase: true);
discountPrice = basePrice * (1 - Decimal.Parse(discountPercent) / 100);
}
else if (discountText.StartsWith("Further", StringComparison.InvariantCultureIgnoreCase) && StringUtil.ContainsIgnoreCase(discountText, "% Off"))
{
string discountPercent = StringUtil.Substring(discountText, "Further", "% Off", ignoreCase: true);
discountPrice = basePrice * (1 - Decimal.Parse(discountPercent) / 100);
}
else if (discountText.StartsWith("Take $", StringComparison.InvariantCultureIgnoreCase) && StringUtil.ContainsIgnoreCase(discountText, "Off"))
{
string discountAmount = StringUtil.Substring(discountText, "Take $", " Off", ignoreCase: true);
discountPrice = basePrice - Decimal.Parse(discountAmount);
}
else
discountPrice = basePrice;
return discountPrice;
}
}
}
ImageUtil
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Net;
using ImageResizer;
using System.Drawing;
using FashionExchange.Common.Enums;
namespace FashionExchange.Common.Utils
{
public static class ImageUtil
{
// download without saving to local file system
public static Image DownloadImage(string url)
{
url = WebUtility.HtmlDecode(url);
if (url.StartsWith("//"))
url = "http:" + url;
WebRequest request = WebRequest.Create(url);
request.ContentType = "image/jpeg";
using (WebResponse response = request.GetResponse())
{
return Image.FromStream(response.GetResponseStream());
}
}
// download without resizing
public static void DownloadImage(string url, string saveToPath, string fileName)
{
if (!Directory.Exists(saveToPath))
Directory.CreateDirectory(saveToPath);
url = WebUtility.HtmlDecode(url);
if (url.StartsWith("//"))
url = "http:" + url;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.ContentType = "image/jpeg";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if ((response.StatusCode == HttpStatusCode.OK || response.StatusCode == HttpStatusCode.Moved || response.StatusCode == HttpStatusCode.Redirect))
{
using (WebClient client = new WebClient())
{
client.DownloadFile(url, Path.Combine(saveToPath, fileName));
}
}
response.Close();
}
public static void DownloadImage(string url, string saveToPath, string fileName, int width, int height, Dictionary httpHeaders = null, WebProxy proxy = null, CookieContainer cookieContainer = null)
{
if (!Directory.Exists(saveToPath))
Directory.CreateDirectory(saveToPath);
url = WebUtility.HtmlDecode(url);
if (url.StartsWith("//"))
url = "http:" + url;
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.UserAgent = FashionExchangeSetting.UserAgentChrome;
request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
request.CookieContainer = cookieContainer;
if (proxy != null)
request.Proxy = proxy;
ScraperUtil.SetRequestHeaders(request, httpHeaders);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
if ((response.StatusCode == HttpStatusCode.OK || response.StatusCode == HttpStatusCode.Moved || response.StatusCode == HttpStatusCode.Redirect))
{
ImageBuilder.Current.Build(response.GetResponseStream(), Path.Combine(saveToPath, fileName), new ResizeSettings(width, height, FitMode.Max, "jpg"));
}
response.Close();
}
}
}
FtpUtil
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using log4net;
namespace FashionExchange.Common.Utils
{
public static class FtpUtil
{
private static ILog log = LogManager.GetLogger(typeof(FtpUtil).Name);
private const int maxRetryCount = 3;
public static void Upload(string server, string username, string password, string localFilePath, string destFilePath)
{
if (!destFilePath.StartsWith("/"))
destFilePath = "/" + destFilePath;
bool success = false;
int retryCount = 0;
do
{
try
{
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(server + destFilePath);
request.Method = WebRequestMethods.Ftp.UploadFile;
request.Timeout = 1000 * 60 * 60; // set timeout to 1 hour
request.UseBinary = true;
NetworkCredential networkCredential = new NetworkCredential(username, password);
request.Credentials = networkCredential;
long fileSize = 0;
int count = 0;
byte[] buffer = new byte[8 * 1024];
Stream requestStream = request.GetRequestStream();
FileStream fileStream = File.OpenRead(localFilePath);
while ((count = fileStream.Read(buffer, 0, buffer.Length)) > 0)
{
fileSize += count;
requestStream.Write(buffer, 0, count);
requestStream.Flush();
}
fileStream.Close();
// if file size is larger than 20mb, abort request and close requestStream, otherwise requestStream doesn't seem to close until timeout
if (fileSize > 20 * 1000 * 1000)
{
request.Abort();
Console.WriteLine("Request aborted");
}
requestStream.Close();
success = true;
}
catch (WebException e)
{
log.Info("A WebException occured while uploading file: " + destFilePath + " Exception: " + e.ToString());
if (retryCount == maxRetryCount)
{
log.Error("Retried upload file maximum number of times. Retry count: " + retryCount);
throw;
}
else
log.Info("Retry upload file. Retry count: " + ++retryCount);
}
} while (!success && retryCount < maxRetryCount);
}
public static string[] ListDirectory(string server, string username, string password, string directory)
{
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(server + directory + "/");
request.Method = WebRequestMethods.Ftp.ListDirectory;
request.Credentials = new NetworkCredential(username, password);
string directoryDetails = String.Empty;
try
{
FtpWebResponse response = (FtpWebResponse)request.GetResponse();
Stream responseStream = response.GetResponseStream();
StreamReader reader = new StreamReader(responseStream);
directoryDetails = reader.ReadToEnd();
reader.Close();
response.Close();
}
catch (WebException e)
{
if (e.Response != null && ((FtpWebResponse)e.Response).StatusCode == FtpStatusCode.ActionNotTakenFileUnavailable)
directoryDetails = String.Empty;
else
throw;
}
return directoryDetails.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
}
public static List ListDirectoryDetails(string server, string username, string password, string directory)
{
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(server + directory + "/");
request.Method = WebRequestMethods.Ftp.ListDirectoryDetails;
request.Credentials = new NetworkCredential(username, password);
string directoryDetailsResponse = String.Empty;
try
{
FtpWebResponse response = (FtpWebResponse)request.GetResponse();
Stream responseStream = response.GetResponseStream();
StreamReader reader = new StreamReader(responseStream);
directoryDetailsResponse = reader.ReadToEnd();
reader.Close();
response.Close();
}
catch (WebException e)
{
if (e.Response != null && ((FtpWebResponse)e.Response).StatusCode == FtpStatusCode.ActionNotTakenFileUnavailable)
return new List();
else
throw;
}
if (String.IsNullOrEmpty(directoryDetailsResponse))
return new List();
/* There are many different ftp list formats. The most commonly used are Windows and Unix
* Windows:
* 08-10-11 12:02PM Version2
* 06-25-09 02:41PM 144700153 image34.gif
* 06-25-09 02:51PM 144700153 updates.txt
*
* Unix:
* d--x--x--x 2 ftp ftp 4096 Mar 07 2002 bin
* -rw-r--r-- 1 ftp ftp 659450 Jun 15 05:07 TEST.TXT
* -rw-r--r-- 1 ftp ftp 101786380 Sep 08 2008 TEST03-05.TXT
*/
List fileOrDirectoryInfos = new List();
if (Char.IsDigit(directoryDetailsResponse[0])) // Windows
{
foreach (string directoryDetail in directoryDetailsResponse.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries))
{
string[] data = directoryDetail.Split(new char[] { ' ' }, 4, StringSplitOptions.RemoveEmptyEntries);
SimpleFileInfo fileInfo = new SimpleFileInfo();
fileInfo.Name = data[3];
if (data[2] == "")
fileInfo.IsDirectory = true;
else
{
fileInfo.IsDirectory = false;
fileInfo.Size = Convert.ToInt64(data[2]);
}
fileInfo.LastWriteTime = DateTime.ParseExact(data[0] + data[1], "MM-dd-yyhh:mmtt", System.Globalization.CultureInfo.InvariantCulture);
fileOrDirectoryInfos.Add(fileInfo);
}
}
else // Unix
{
foreach (string directoryDetail in directoryDetailsResponse.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries))
{
string[] data = directoryDetail.Split(new char[] { ' ' }, 9, StringSplitOptions.RemoveEmptyEntries);
SimpleFileInfo fileInfo = new SimpleFileInfo();
fileInfo.Name = data[8];
if (data[0].StartsWith("dr"))
fileInfo.IsDirectory = true;
else
{
fileInfo.IsDirectory = false;
fileInfo.Size = Convert.ToInt64(data[4]);
}
if (data[7].Contains(":"))
fileInfo.LastWriteTime = DateTime.ParseExact(data[5] + " " + data[6] + " " + data[7], "MMM d HH:mm", System.Globalization.CultureInfo.InvariantCulture);
else
fileInfo.LastWriteTime = DateTime.ParseExact(data[5] + " " + data[6] + " " + data[7], "MMM d yyyy", System.Globalization.CultureInfo.InvariantCulture);
fileOrDirectoryInfos.Add(fileInfo);
}
}
return fileOrDirectoryInfos;
}
public static void MakeDirectory(string server, string username, string password, string directory)
{
string requestUri = Path.Combine(server + directory);
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(requestUri);
request.Method = WebRequestMethods.Ftp.MakeDirectory;
NetworkCredential networkCredential = new NetworkCredential(username, password);
request.Credentials = networkCredential;
FtpWebResponse response = (FtpWebResponse)request.GetResponse();
response.Close();
}
public static void RemoveDirectory(string server, string username, string password, string directory)
{
if (!directory.StartsWith("/"))
directory = "/" + directory;
// delete files and sub directories in folder if exist
List simpleFileInfos = ListDirectoryDetails(server, username, password, directory);
foreach (SimpleFileInfo simpleFileInfo in simpleFileInfos)
{
if (simpleFileInfo.IsDirectory)
RemoveDirectory(server, username, password, directory + "/" + simpleFileInfo.Name);
else
DeleteFile(server, username, password, directory + "/" + simpleFileInfo.Name);
}
string requestUri = Path.Combine(server + directory);
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(requestUri);
request.Method = WebRequestMethods.Ftp.RemoveDirectory;
NetworkCredential networkCredential = new NetworkCredential(username, password);
request.Credentials = networkCredential;
FtpWebResponse response = (FtpWebResponse)request.GetResponse();
response.Close();
}
public static void DeleteFile(string server, string username, string password, string file)
{
if (!file.StartsWith("/"))
file = "/" + file;
string requestUri = Path.Combine(server + file);
bool success = false;
int retryCount = 0;
do
{
try
{
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(requestUri);
request.Method = WebRequestMethods.Ftp.DeleteFile;
NetworkCredential networkCredential = new NetworkCredential(username, password);
request.Credentials = networkCredential;
FtpWebResponse response = (FtpWebResponse)request.GetResponse();
response.Close();
success = true;
}
catch (WebException e)
{
log.Info("A WebException occured while deleting file: " + requestUri + " Exception: " + e.ToString());
if (retryCount == maxRetryCount)
{
log.Error("Retried delete file maximum number of times. Retry count: " + retryCount);
throw;
}
else
log.Info("Retry delete file. Retry count: " + ++retryCount);
}
} while (!success && retryCount < maxRetryCount);
}
public static void Rename(string server, string username, string password, string name, string newName)
{
if (!name.StartsWith("/"))
name = "/" + name;
string requestUri = Path.Combine(server + name);
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(requestUri);
request.Method = WebRequestMethods.Ftp.Rename;
request.RenameTo = newName;
NetworkCredential networkCredential = new NetworkCredential(username, password);
request.Credentials = networkCredential;
FtpWebResponse response = (FtpWebResponse)request.GetResponse();
response.Close();
}
public static void DownloadFile(string destFilePath, string username, string password, string localFilePath, int maxAttemptCount = 3)
{
for (int attempt = 1; attempt <= maxAttemptCount; attempt++)
{
try
{
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(destFilePath);
request.Credentials = new NetworkCredential(username, password);
request.UseBinary = true;
request.Method = WebRequestMethods.Ftp.DownloadFile;
using (FtpWebResponse response = (FtpWebResponse)request.GetResponse())
using (Stream responseStream = response.GetResponseStream())
using (FileStream writer = new FileStream(localFilePath, FileMode.Create))
{
long length = response.ContentLength;
int bufferSize = 2048;
int readCount;
byte[] buffer = new byte[2048];
readCount = responseStream.Read(buffer, 0, bufferSize);
while (readCount > 0)
{
writer.Write(buffer, 0, readCount);
readCount = responseStream.Read(buffer, 0, bufferSize);
}
}
break;
}
catch (WebException e) when (attempt < maxAttemptCount)
{
log.Info("A WebException occured while downloading file: " + destFilePath + " Attempt: " + attempt + " Exception: " + e.ToString());
}
}
}
public static string DownloadFileAsString(string destFilePath, string username, string password)
{
FtpWebRequest request = (FtpWebRequest)WebRequest.Create(destFilePath);
request.Credentials = new NetworkCredential(username, password);
request.Method = WebRequestMethods.Ftp.DownloadFile;
string responseStr = null;
using (FtpWebResponse response = (FtpWebResponse)request.GetResponse())
using (StreamReader reader = new StreamReader(response.GetResponseStream()))
{
responseStr = reader.ReadToEnd();
}
return responseStr;
}
public class SimpleFileInfo
{
public string Name;
public bool IsDirectory;
public long Size;
public DateTime LastWriteTime;
}
}
}
Comments
Post a Comment