ProductFeed
using FashionExchange.Common.BLL;
using FashionExchange.Common.DAL;
using FashionExchange.Common.Enums;
using FashionExchange.Common.Utils;
using log4net;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
namespace FashionExchange.Common.ProductFeeds
{
public abstract class ProductFeed
{
protected ILog log { get { return LogManager.GetLogger(this.GetType().Name); } }
protected Store Store
{
get
{
return StoreManager.GetStoreByScraperClassName(this.GetType().Name);
}
}
protected virtual bool ProductSizeOnSeparateRow
{
get
{
return false;
}
}
private HashSet _existingProductNames = null;
protected HashSet ExistingProductNames
{
get
{
if (_existingProductNames == null)
{
_existingProductNames = new HashSet(ProductManager.GetProductsByStore(Store.Id).Select(m => m.Name), StringComparer.InvariantCultureIgnoreCase);
}
return _existingProductNames;
}
}
private HashSet _updatedProductNames = null;
protected HashSet UpdatedProductNames
{
get
{
if (_updatedProductNames == null)
_updatedProductNames = new HashSet(StringComparer.InvariantCultureIgnoreCase);
return _updatedProductNames;
}
}
protected int errorCount = 0, warningCount = 0;
protected int productUpdatedCount = 0, productAddedCount = 0, productDeletedCount = 0;
protected Dictionary>> scrapeErrorDict = new Dictionary>>();
protected virtual WebProxy Proxy { get { return null; } }
public ScrapeResult Import()
{
log.InfoFormat("Importing product feed. Store name: {0}", Store.Name);
ScrapeResult result = new ScrapeResult();
result.StartDateTime = DateTime.Now;
try
{
ParseProductFeed();
// delete products in database but not in product feed
foreach (string productName in ExistingProductNames)
{
Product product = ProductManager.GetProductByStoreAndName(Store.Id, productName);
ProductManager.DeleteProduct(product);
productDeletedCount++;
log.InfoFormat("Product deleted[{0}/{1}]: {2}", productDeletedCount, ExistingProductNames.Count(), productName);
}
}
catch (Exception e)
{
log.ErrorFormat("An error has occured while importing product feed. {0}", e.ToString());
scrapeErrorDict.Add(e.StackTrace, new KeyValuePair>(e, new List()));
errorCount++;
}
log.InfoFormat("Import product feed completed. Store name: {0}", Store.Name);
log.InfoFormat("Products Added={0} Updated={1} Deleted={2}", productAddedCount, productUpdatedCount, productDeletedCount);
log.InfoFormat("Warning={0} Error={1}", warningCount, errorCount);
// return result
result.Store = Store;
result.ScrapeType = StoreManager.GetScrapeType(ScrapeTypeCode.ProductFeed);
result.ProductAddedCount = productAddedCount;
result.ProductDeletedCount = productDeletedCount;
result.ProductUpdatedCount = productUpdatedCount;
result.SaleProductCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.SalePrice != null).Count();
result.TotalProductCount = ProductManager.GetProductsByStore(Store.Id).Count();
result.WarningCount = warningCount;
result.ErrorCount = errorCount;
result.ProductWithProductSizeCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).Count();
result.ProductSizeAvailableCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).SelectMany(m => m.ProductSizes).Where(m => m.Available).Count();
result.ProductSizeWithColourCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).SelectMany(m => m.ProductSizes).Where(m => m.Colour != null).Count();
result.TotalProductSizeCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).SelectMany(m => m.ProductSizes).Count();
result.ProductWithBrandCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.BrandUnparsed != null).Count();
result.UniqueBrandCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.BrandUnparsed != null).Select(m => m.BrandUnparsed).Distinct().Count();
result.EndDateTime = DateTime.Now;
foreach (KeyValuePair>> scrapeErrorOuterPair in scrapeErrorDict)
{
KeyValuePair> scrapeErrorPair = scrapeErrorOuterPair.Value;
ScrapeError scrapeError = new ScrapeError();
scrapeError.Url1 = scrapeErrorPair.Value.FirstOrDefault();
scrapeError.Url2 = scrapeErrorPair.Value.Skip(1).FirstOrDefault();
scrapeError.Url3 = scrapeErrorPair.Value.Skip(2).FirstOrDefault();
scrapeError.Exception = scrapeErrorPair.Key.GetType().ToString();
scrapeError.Message = scrapeErrorPair.Key.Message;
scrapeError.StackTrace = scrapeErrorPair.Key.StackTrace;
scrapeError.Count = scrapeErrorPair.Value.Any() ? scrapeErrorPair.Value.Count() : 1;
result.ScrapeErrors.Add(scrapeError);
}
return result;
}
protected abstract void ParseProductFeed();
protected Product CreateProductObjects(object data)
{
Product product = new Product();
product.Name = ScraperUtil.NormalizeText(GetName(data));
product.BrandUnparsed = ScraperUtil.NormalizeText(GetBrand(data));
if (String.IsNullOrWhiteSpace(product.BrandUnparsed))
product.BrandUnparsed = null;
product.CategoryUnparsed = ScraperUtil.NormalizeText(GetCategory(data));
if (String.IsNullOrWhiteSpace(product.CategoryUnparsed))
product.CategoryUnparsed = null;
//product.Description = ScraperUtil.NormalizeText(GetDescription(data));
product.Price = Math.Round(GetPrice(data), 2, MidpointRounding.AwayFromZero);
decimal salePrice = Math.Round(GetSalePrice(data), 2, MidpointRounding.AwayFromZero);
if (salePrice > 0 && salePrice < product.Price)
product.SalePrice = salePrice;
foreach (ProductSize productSize in GetSizes(data))
{
if (String.IsNullOrWhiteSpace(productSize.Size))
continue;
else
productSize.Size = ScraperUtil.NormalizeText(productSize.Size);
if (!String.IsNullOrWhiteSpace(productSize.Colour))
productSize.Colour = ScraperUtil.NormalizeText(productSize.Colour);
else
productSize.Colour = null;
product.ProductSizes.Add(productSize);
}
product.Url = GetProductUrl(data);
product.Store = Store;
string imageUrl = GetImageUrl(data);
//try
//{
// ProductManager.DownloadProductPhoto(imageUrl, product, proxy: Proxy);
//}
//catch (Exception e)
//{
// log.WarnFormat("Error downloading / processing product image. Product ignored. ImageUrl {0} {1}", imageUrl, e.ToString());
// throw;
//}
return product;
}
protected abstract bool IgnoreProduct(object data);
protected abstract string GetProductUrl(object data);
protected abstract string GetName(object data);
protected abstract decimal GetPrice(object data);
protected abstract decimal GetSalePrice(object data);
protected abstract string GetBrand(object data);
protected abstract string GetCategory(object data);
protected abstract string GetDescription(object data);
protected abstract string GetImageUrl(object data);
protected abstract IEnumerable GetSizes(object data);
/* 1. Check if product with same name already exist in database
* 1.1 Check if product has already been updated, if yes
* 1.1.1 Check if productFromDB has same price as product from feed. Increment warning count if prices are different
* 1.1.2 Check if ProductSizeOnSeparateRow and product from feed has product size, if yes, merge product size
* 2. If product does not exist in database, add new product
*/
protected void ValidateAndUpdateProduct(Product product)
{
Product productFromDB = ProductManager.GetProductByStoreAndName(Store.Id, product.Name);
if (productFromDB != null)
{
if (UpdatedProductNames.Contains(product.Name))
{
if (productFromDB.Price != product.Price || productFromDB.SalePrice != product.SalePrice)
{
log.Warn("Product to merge has different price. Existing Product Url: " + productFromDB.Url + " New Product Url: " + product.Url);
warningCount++;
}
if (ProductSizeOnSeparateRow && product.ProductSizes.Any())
{
ProductSize sizeFromFeed = product.ProductSizes.First();
ProductSize sizeFromDB = productFromDB.ProductSizes.FirstOrDefault(m => String.Equals(m.Size, sizeFromFeed.Size, StringComparison.InvariantCultureIgnoreCase) && String.Equals(m.Colour, sizeFromFeed.Colour, StringComparison.InvariantCultureIgnoreCase));
if (sizeFromDB == null)
{
ProductSize size = new ProductSize();
size.Colour = sizeFromFeed.Colour;
size.Size = sizeFromFeed.Size;
size.Available = sizeFromFeed.Available;
size.DateUpdated = DateTime.Now;
size.DateCreated = DateTime.Now;
productFromDB.ProductSizes.Add(size);
}
else
{
sizeFromDB.DateUpdated = DateTime.Now;
sizeFromDB.Available = true;
}
ProductManager.Detach(product);
ProductManager.UpdateProduct(productFromDB, true, false);
if (!String.IsNullOrEmpty(sizeFromFeed.Colour))
log.Info("Size added. Size: " + sizeFromFeed.Size + " Colour: " + sizeFromFeed.Colour);
else
log.Info("Size added: " + sizeFromFeed.Size);
}
else
log.Info("Product already updated");
}
else
{
ProductManager.UpdateProduct(product, true, true);
productUpdatedCount++;
UpdatedProductNames.Add(product.Name);
log.Info("Product updated");
}
}
else
{
ProductManager.NewProduct(product, true);
productAddedCount++;
UpdatedProductNames.Add(product.Name);
log.Info("Product added");
}
}
protected void LogProduct(Product product)
{
LogProduct(new List() { product });
}
protected void LogProduct(IEnumerable products)
{
int productNum = 1;
foreach (Product p in products)
{
if (p.SalePrice > 0)
log.InfoFormat("{0}. {1} Regular Price: ${2} Sale Price: ${3} {4}", productNum, p.Name, p.Price, p.SalePrice, p.Url);
else
log.InfoFormat("{0}. {1} ${2} {3}", productNum, p.Name, p.Price, p.Url);
productNum++;
}
}
protected string GetProductFeedFileName(string affiliateNetworkName, string fileExtension)
{
string countryCode = null;
if (Store.AUSite)
countryCode = Country.Australia.CountryCode;
else if (Store.NZSite)
countryCode = Country.NewZealand.CountryCode;
else if (Store.USSite)
countryCode = Country.UnitedStates.CountryCode;
return DateTime.Now.ToString("yyyy.MM.dd_HH.mm") + "_" + affiliateNetworkName + "_" + Store.Name.Replace(" ", "_") + "_" + countryCode + "." + fileExtension.TrimStart('.');
}
}
}
CommissionFactoryProductFeed
using FashionExchange.Common.BLL;
using FashionExchange.Common.DAL;
using FashionExchange.Common.Enums;
using FashionExchange.Common.Utils;
using log4net;
using Microsoft.VisualBasic.FileIO;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Web.Hosting;
using System.Xml;
namespace FashionExchange.Common.ProductFeeds
{
public abstract class CommissionFactoryProductFeed : ProductFeed
{
protected override void ParseProductFeed()
{
string productFeedFileNameWithPath = DownloadProductFeed();
log.InfoFormat("Product feed file name: {0}", productFeedFileNameWithPath);
FileInfo productFeed = new FileInfo(productFeedFileNameWithPath);
using (TextFieldParser parser = new TextFieldParser(productFeed.OpenText()))
{
parser.TextFieldType = FieldType.Delimited;
parser.SetDelimiters(",");
int lineNumber = 0;
while (!parser.EndOfData)
{
try
{
log.Info("Parsing product feed line: " + lineNumber);
lineNumber++;
if (lineNumber == 1)
{
log.InfoFormat("Not a product");
continue;
}
string[] data = parser.ReadFields();
if (data.Count() < 5)
{
log.InfoFormat("Not a product");
continue;
}
if (IgnoreProduct(data))
{
continue;
}
Product product = CreateProductObjects(data);
LogProduct(product);
ValidateAndUpdateProduct(product);
ExistingProductNames.Remove(product.Name);
}
catch (Exception e)
{
log.Error("Error parsing product: " + e.ToString());
if (scrapeErrorDict.ContainsKey(e.StackTrace))
scrapeErrorDict[e.StackTrace].Value.Add(lineNumber.ToString());
else
scrapeErrorDict.Add(e.StackTrace, new KeyValuePair>(e, new List() { lineNumber.ToString() }));
errorCount++;
}
finally
{
// discard all changes / caching to database context
DataContext.DisposeDataContext();
}
}
}
// delete temporary product feed files
File.Delete(productFeedFileNameWithPath);
log.InfoFormat("Temporary product feed file deleted: {0}", productFeedFileNameWithPath);
}
protected abstract string ProductFeedUrl { get; }
private string DownloadProductFeed()
{
log.InfoFormat("Downloading product feed: {0}", ProductFeedUrl);
string tempProductFeedFileName = GetProductFeedFileName("CommissionFactory", "csv");
string tempProductFeedFileNameWithPath = HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + tempProductFeedFileName;
ScraperUtil.DownloadFile(ProductFeedUrl, tempProductFeedFileNameWithPath);
log.Info("Download product feed completed");
return tempProductFeedFileNameWithPath;
}
protected override string GetName(object data)
{
return ((string[])data)[5];
}
protected override decimal GetPrice(object data)
{
string[] dataArray = (string[])data;
if (!String.IsNullOrWhiteSpace(dataArray[30]))
return Convert.ToDecimal(dataArray[30]);
else
return Convert.ToDecimal(dataArray[17]);
}
protected override decimal GetSalePrice(object data)
{
string[] dataArray = (string[])data;
if (!String.IsNullOrWhiteSpace(dataArray[17]) && dataArray[30] != dataArray[17])
return Convert.ToDecimal(dataArray[17]);
else
return 0;
}
protected override string GetDescription(object data)
{
return ((string[])data)[7];
}
protected override string GetBrand(object data)
{
string[] dataArray = (string[])data;
if (dataArray[18].Length > 50)
return dataArray[18].Substring(0, 50);
else
return dataArray[18];
}
protected override string GetCategory(object data)
{
return ((string[])data)[6] + " " + ((string[])data)[35];
}
protected override string GetProductUrl(object data)
{
return ((string[])data)[9];
}
protected override string GetImageUrl(object data)
{
return ((string[])data)[10];
}
protected override bool IgnoreProduct(object data)
{
return false;
}
protected override IEnumerable GetSizes(object data)
{
return Enumerable.Empty();
}
protected string GetColour(object data)
{
return ((string[])data)[19];
}
}
}
CommissionJunctionProductFeed
using FashionExchange.Common.BLL;
using FashionExchange.Common.DAL;
using FashionExchange.Common.Enums;
using FashionExchange.Common.Utils;
using log4net;
using Microsoft.VisualBasic.FileIO;
using SevenZip;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Text;
using System.Web.Hosting;
using System.Xml;
namespace FashionExchange.Common.ProductFeeds
{
public abstract class CommissionJunctionProductFeed : ProductFeed
{
protected abstract List ProductFeedUrls { get; }
protected string DownloadProductFeed(string productFeedUrl)
{
log.InfoFormat("Downloading product feed: {0}", productFeedUrl);
string archiveFileName = GetProductFeedFileName("CommissionJunction", "");
string archiveFullPath = HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + archiveFileName + "zip";
// download product feed from Commission Junction ftp and save in temporary directory
FtpUtil.DownloadFile(productFeedUrl, "4564956", "Crpo?kzj", archiveFullPath);
log.Info("Download product feed completed");
log.InfoFormat("Decompressing product feed: {0}", archiveFullPath);
string productFeedFileName = archiveFileName + "txt";
SevenZipExtractor.SetLibraryPath(FashionExchangeSetting.SevenZipDLLFullPath);
using (SevenZipExtractor extractor = new SevenZipExtractor(archiveFullPath))
{
extractor.PreserveDirectoryStructure = false;
using (FileStream fileStream = File.OpenWrite(HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + productFeedFileName))
{
extractor.ExtractFile(0, fileStream);
}
}
log.Info("Decompress product feed completed");
File.Delete(archiveFullPath);
log.InfoFormat("Product feed archive deleted: {0}", archiveFullPath);
return HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + productFeedFileName;
}
protected override void ParseProductFeed()
{
foreach (string productFeedUrl in ProductFeedUrls)
{
log.InfoFormat("Parsing product feed [{0}/{1}]", ProductFeedUrls.IndexOf(productFeedUrl) + 1, ProductFeedUrls.Count());
string productFeedFileNameWithPath = DownloadProductFeed(productFeedUrl);
log.InfoFormat("Product feed file name: {0}", productFeedFileNameWithPath);
FileInfo productFeed = new FileInfo(productFeedFileNameWithPath);
using (StreamReader reader = productFeed.OpenText())
{
int lineNumber = 0;
string line = null;
reader.ReadLine(); // read first header line
while ((line = reader.ReadLine()) != null)
{
try
{
log.InfoFormat("Parsing product {0}", ++lineNumber);
string[] data = line.Split('\t');
if (IgnoreProduct(data))
{
continue;
}
Product product = CreateProductObjects(data);
LogProduct(product);
ValidateAndUpdateProduct(product);
ExistingProductNames.Remove(product.Name);
}
catch (Exception e)
{
log.Error("Error parsing product: " + e.ToString());
if (scrapeErrorDict.ContainsKey(e.StackTrace))
scrapeErrorDict[e.StackTrace].Value.Add(lineNumber.ToString());
else
scrapeErrorDict.Add(e.StackTrace, new KeyValuePair>(e, new List() { lineNumber.ToString() }));
errorCount++;
}
finally
{
// discard all changes / caching to database context
DataContext.DisposeDataContext();
}
}
}
// delete temporary product feed files
File.Delete(productFeedFileNameWithPath);
log.InfoFormat("Temporary product feed file deleted: {0}", productFeedFileNameWithPath);
File.Delete(productFeedFileNameWithPath + ".gz");
log.InfoFormat("Temporary product feed file deleted: {0}", productFeedFileNameWithPath + ".gz");
}
}
protected override string GetProductUrl(object data)
{
return System.Web.HttpUtility.UrlDecode(StringUtil.SubstringToEnd(((string[])data)[7], "?url="));
}
protected override string GetName(object data)
{
return ((string[])data)[5];
}
protected override decimal GetPrice(object data)
{
return Convert.ToDecimal(((string[])data)[15]);
}
protected override decimal GetSalePrice(object data)
{
string[] dataArray = (string[])data;
if (!String.IsNullOrWhiteSpace(dataArray[16]) && dataArray[16] != dataArray[15])
return Convert.ToDecimal(dataArray[16]);
else
return 0;
}
protected override string GetDescription(object data)
{
return ((string[])data)[6];
}
protected override string GetBrand(object data)
{
return ((string[])data)[25];
}
protected override string GetCategory(object data)
{
return ((string[])data)[36] + " " + ((string[])data)[24];
}
protected override string GetImageUrl(object data)
{
return ((string[])data)[9];
}
protected override bool IgnoreProduct(object data)
{
if (((string[])data)[12] == "in stock" && ((string[])data)[28] == "yes")
return false;
else
return true;
}
protected override IEnumerable GetSizes(object data)
{
return new List() {
new ProductSize() {
Colour = GetColour(data),
Size = ((string[])data)[39],
Available = true
}
};
}
protected virtual string GetColour(object data)
{
return ((string[])data)[35];
}
}
}
Comments
Post a Comment