Product Feed

ProductFeed


using FashionExchange.Common.BLL;
using FashionExchange.Common.DAL;
using FashionExchange.Common.Enums;
using FashionExchange.Common.Utils;
using log4net;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;

namespace FashionExchange.Common.ProductFeeds
{
    public abstract class ProductFeed
    {
        protected ILog log { get { return LogManager.GetLogger(this.GetType().Name); } }

        protected Store Store
        {
            get
            {
                return StoreManager.GetStoreByScraperClassName(this.GetType().Name);
            }
        }

        protected virtual bool ProductSizeOnSeparateRow
        {
            get
            {
                return false;
            }
        }

        private HashSet _existingProductNames = null;
        protected HashSet ExistingProductNames
        {
            get
            {
                if (_existingProductNames == null)
                {
                    _existingProductNames = new HashSet(ProductManager.GetProductsByStore(Store.Id).Select(m => m.Name), StringComparer.InvariantCultureIgnoreCase);
                }
                return _existingProductNames;
            }
        }
        private HashSet _updatedProductNames = null;
        protected HashSet UpdatedProductNames
        {
            get
            {
                if (_updatedProductNames == null)
                    _updatedProductNames = new HashSet(StringComparer.InvariantCultureIgnoreCase);

                return _updatedProductNames;
            }
        }
        protected int errorCount = 0, warningCount = 0;
        protected int productUpdatedCount = 0, productAddedCount = 0, productDeletedCount = 0;
        protected Dictionary>> scrapeErrorDict = new Dictionary>>();
        protected virtual WebProxy Proxy { get { return null; } }

        public ScrapeResult Import()
        {
            log.InfoFormat("Importing product feed. Store name: {0}", Store.Name);

            ScrapeResult result = new ScrapeResult();
            result.StartDateTime = DateTime.Now;

            try
            {
                ParseProductFeed();

                // delete products in database but not in product feed
                foreach (string productName in ExistingProductNames)
                {
                    Product product = ProductManager.GetProductByStoreAndName(Store.Id, productName);
                    ProductManager.DeleteProduct(product);
                    productDeletedCount++;
                    log.InfoFormat("Product deleted[{0}/{1}]: {2}", productDeletedCount, ExistingProductNames.Count(), productName);
                }
            }
            catch (Exception e)
            {
                log.ErrorFormat("An error has occured while importing product feed. {0}", e.ToString());
                scrapeErrorDict.Add(e.StackTrace, new KeyValuePair>(e, new List()));
                errorCount++;
            }

            log.InfoFormat("Import product feed completed. Store name: {0}", Store.Name);
            log.InfoFormat("Products Added={0} Updated={1} Deleted={2}", productAddedCount, productUpdatedCount, productDeletedCount);
            log.InfoFormat("Warning={0} Error={1}", warningCount, errorCount);

            // return result
            result.Store = Store;
            result.ScrapeType = StoreManager.GetScrapeType(ScrapeTypeCode.ProductFeed);
            result.ProductAddedCount = productAddedCount;
            result.ProductDeletedCount = productDeletedCount;
            result.ProductUpdatedCount = productUpdatedCount;
            result.SaleProductCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.SalePrice != null).Count();
            result.TotalProductCount = ProductManager.GetProductsByStore(Store.Id).Count();
            result.WarningCount = warningCount;
            result.ErrorCount = errorCount;
            result.ProductWithProductSizeCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).Count();
            result.ProductSizeAvailableCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).SelectMany(m => m.ProductSizes).Where(m => m.Available).Count();
            result.ProductSizeWithColourCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).SelectMany(m => m.ProductSizes).Where(m => m.Colour != null).Count();
            result.TotalProductSizeCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.ProductSizes.Any()).SelectMany(m => m.ProductSizes).Count();
            result.ProductWithBrandCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.BrandUnparsed != null).Count();
            result.UniqueBrandCount = ProductManager.GetProductsByStore(Store.Id).Where(m => m.BrandUnparsed != null).Select(m => m.BrandUnparsed).Distinct().Count();
            result.EndDateTime = DateTime.Now;

            foreach (KeyValuePair>> scrapeErrorOuterPair in scrapeErrorDict)
            {
                KeyValuePair> scrapeErrorPair = scrapeErrorOuterPair.Value;

                ScrapeError scrapeError = new ScrapeError();
                scrapeError.Url1 = scrapeErrorPair.Value.FirstOrDefault();
                scrapeError.Url2 = scrapeErrorPair.Value.Skip(1).FirstOrDefault();
                scrapeError.Url3 = scrapeErrorPair.Value.Skip(2).FirstOrDefault();
                scrapeError.Exception = scrapeErrorPair.Key.GetType().ToString();
                scrapeError.Message = scrapeErrorPair.Key.Message;
                scrapeError.StackTrace = scrapeErrorPair.Key.StackTrace;
                scrapeError.Count = scrapeErrorPair.Value.Any() ? scrapeErrorPair.Value.Count() : 1;

                result.ScrapeErrors.Add(scrapeError);
            }

            return result;
        }

        protected abstract void ParseProductFeed();

        protected Product CreateProductObjects(object data)
        {
            Product product = new Product();
            product.Name = ScraperUtil.NormalizeText(GetName(data));
            product.BrandUnparsed = ScraperUtil.NormalizeText(GetBrand(data));
            if (String.IsNullOrWhiteSpace(product.BrandUnparsed))
                product.BrandUnparsed = null;
            product.CategoryUnparsed = ScraperUtil.NormalizeText(GetCategory(data));
            if (String.IsNullOrWhiteSpace(product.CategoryUnparsed))
                product.CategoryUnparsed = null;
            //product.Description = ScraperUtil.NormalizeText(GetDescription(data));
            product.Price = Math.Round(GetPrice(data), 2, MidpointRounding.AwayFromZero);

            decimal salePrice = Math.Round(GetSalePrice(data), 2, MidpointRounding.AwayFromZero);
            if (salePrice > 0 && salePrice < product.Price)
                product.SalePrice = salePrice;

            foreach (ProductSize productSize in GetSizes(data))
            {
                if (String.IsNullOrWhiteSpace(productSize.Size))
                    continue;
                else
                    productSize.Size = ScraperUtil.NormalizeText(productSize.Size);

                if (!String.IsNullOrWhiteSpace(productSize.Colour))
                    productSize.Colour = ScraperUtil.NormalizeText(productSize.Colour);
                else
                    productSize.Colour = null;

                product.ProductSizes.Add(productSize);
            }

            product.Url = GetProductUrl(data);
            product.Store = Store;

            string imageUrl = GetImageUrl(data);
            //try
            //{
            //        ProductManager.DownloadProductPhoto(imageUrl, product, proxy: Proxy);
            //}
            //catch (Exception e)
            //{
            //    log.WarnFormat("Error downloading / processing product image. Product ignored. ImageUrl {0} {1}", imageUrl, e.ToString());
            //    throw;
            //}

            return product;
        }

        protected abstract bool IgnoreProduct(object data);
        protected abstract string GetProductUrl(object data);
        protected abstract string GetName(object data);
        protected abstract decimal GetPrice(object data);
        protected abstract decimal GetSalePrice(object data);
        protected abstract string GetBrand(object data);
        protected abstract string GetCategory(object data);
        protected abstract string GetDescription(object data);
        protected abstract string GetImageUrl(object data);
        protected abstract IEnumerable GetSizes(object data);

        /* 1. Check if product with same name already exist in database
         * 1.1 Check if product has already been updated, if yes
         * 1.1.1 Check if productFromDB has same price as product from feed. Increment warning count if prices are different
         * 1.1.2 Check if ProductSizeOnSeparateRow and product from feed has product size, if yes, merge product size
         * 2. If product does not exist in database, add new product
         */
        protected void ValidateAndUpdateProduct(Product product)
        {
            Product productFromDB = ProductManager.GetProductByStoreAndName(Store.Id, product.Name);
            if (productFromDB != null)
            {
                if (UpdatedProductNames.Contains(product.Name))
                {
                    if (productFromDB.Price != product.Price || productFromDB.SalePrice != product.SalePrice)
                    {
                        log.Warn("Product to merge has different price. Existing Product Url: " + productFromDB.Url + " New Product Url: " + product.Url);
                        warningCount++;
                    }

                    if (ProductSizeOnSeparateRow && product.ProductSizes.Any())
                    {
                        ProductSize sizeFromFeed = product.ProductSizes.First();
                        ProductSize sizeFromDB = productFromDB.ProductSizes.FirstOrDefault(m => String.Equals(m.Size, sizeFromFeed.Size, StringComparison.InvariantCultureIgnoreCase) && String.Equals(m.Colour, sizeFromFeed.Colour, StringComparison.InvariantCultureIgnoreCase));

                        if (sizeFromDB == null)
                        {
                            ProductSize size = new ProductSize();
                            size.Colour = sizeFromFeed.Colour;
                            size.Size = sizeFromFeed.Size;
                            size.Available = sizeFromFeed.Available;
                            size.DateUpdated = DateTime.Now;
                            size.DateCreated = DateTime.Now;
                            productFromDB.ProductSizes.Add(size);
                        }
                        else
                        {
                            sizeFromDB.DateUpdated = DateTime.Now;
                            sizeFromDB.Available = true;
                        }

                        ProductManager.Detach(product);
                        ProductManager.UpdateProduct(productFromDB, true, false);
                        if (!String.IsNullOrEmpty(sizeFromFeed.Colour))
                            log.Info("Size added. Size: " + sizeFromFeed.Size + " Colour: " + sizeFromFeed.Colour);
                        else
                            log.Info("Size added: " + sizeFromFeed.Size);
                    }
                    else
                        log.Info("Product already updated");
                }
                else
                {
                    ProductManager.UpdateProduct(product, true, true);
                    productUpdatedCount++;
                    UpdatedProductNames.Add(product.Name);
                    log.Info("Product updated");
                }
            }
            else
            {
                ProductManager.NewProduct(product, true);
                productAddedCount++;
                UpdatedProductNames.Add(product.Name);
                log.Info("Product added");
            }
        }

        protected void LogProduct(Product product)
        {
            LogProduct(new List() { product });
        }
        protected void LogProduct(IEnumerable products)
        {
            int productNum = 1;
            foreach (Product p in products)
            {
                if (p.SalePrice > 0)
                    log.InfoFormat("{0}. {1} Regular Price: ${2} Sale Price: ${3} {4}", productNum, p.Name, p.Price, p.SalePrice, p.Url);
                else
                    log.InfoFormat("{0}. {1} ${2} {3}", productNum, p.Name, p.Price, p.Url);
                productNum++;
            }
        }

        protected string GetProductFeedFileName(string affiliateNetworkName, string fileExtension)
        {
            string countryCode = null;
            if (Store.AUSite)
                countryCode = Country.Australia.CountryCode;
            else if (Store.NZSite)
                countryCode = Country.NewZealand.CountryCode;
            else if (Store.USSite)
                countryCode = Country.UnitedStates.CountryCode;

            return DateTime.Now.ToString("yyyy.MM.dd_HH.mm") + "_" + affiliateNetworkName + "_" + Store.Name.Replace(" ", "_") + "_" + countryCode + "." + fileExtension.TrimStart('.');
        }
    }
}


CommissionFactoryProductFeed


using FashionExchange.Common.BLL;
using FashionExchange.Common.DAL;
using FashionExchange.Common.Enums;
using FashionExchange.Common.Utils;
using log4net;
using Microsoft.VisualBasic.FileIO;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Web.Hosting;
using System.Xml;

namespace FashionExchange.Common.ProductFeeds
{
    public abstract class CommissionFactoryProductFeed : ProductFeed
    {
        protected override void ParseProductFeed()
        {
            string productFeedFileNameWithPath = DownloadProductFeed();

            log.InfoFormat("Product feed file name: {0}", productFeedFileNameWithPath);

            FileInfo productFeed = new FileInfo(productFeedFileNameWithPath);

            using (TextFieldParser parser = new TextFieldParser(productFeed.OpenText()))
            {
                parser.TextFieldType = FieldType.Delimited;
                parser.SetDelimiters(",");


                int lineNumber = 0;
                while (!parser.EndOfData)
                {
                    try
                    {
                        log.Info("Parsing product feed line: " + lineNumber);
                        lineNumber++;
                        if (lineNumber == 1)
                        {
                            log.InfoFormat("Not a product");
                            continue;
                        }


                        string[] data = parser.ReadFields();
                        if (data.Count() < 5)
                        {
                            log.InfoFormat("Not a product");
                            continue;
                        }

                        if (IgnoreProduct(data))
                        {
                            continue;
                        }


                        Product product = CreateProductObjects(data);

                        LogProduct(product);

                        ValidateAndUpdateProduct(product);

                        ExistingProductNames.Remove(product.Name);
                    }
                    catch (Exception e)
                    {
                        log.Error("Error parsing product: " + e.ToString());

                        if (scrapeErrorDict.ContainsKey(e.StackTrace))
                            scrapeErrorDict[e.StackTrace].Value.Add(lineNumber.ToString());
                        else
                            scrapeErrorDict.Add(e.StackTrace, new KeyValuePair>(e, new List() { lineNumber.ToString() }));

                        errorCount++;
                    }
                    finally
                    {
                        // discard all changes / caching to database context
                        DataContext.DisposeDataContext();
                    }
                }
            }

            // delete temporary product feed files
            File.Delete(productFeedFileNameWithPath);
            log.InfoFormat("Temporary product feed file deleted: {0}", productFeedFileNameWithPath);

        }

        protected abstract string ProductFeedUrl { get; }

        private string DownloadProductFeed()
        {
            log.InfoFormat("Downloading product feed: {0}", ProductFeedUrl);

            string tempProductFeedFileName = GetProductFeedFileName("CommissionFactory", "csv");
            string tempProductFeedFileNameWithPath = HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + tempProductFeedFileName;

            ScraperUtil.DownloadFile(ProductFeedUrl, tempProductFeedFileNameWithPath);

            log.Info("Download product feed completed");
            return tempProductFeedFileNameWithPath;
        }

        protected override string GetName(object data)
        {
            return ((string[])data)[5];
        }

        protected override decimal GetPrice(object data)
        {
            string[] dataArray = (string[])data;
            if (!String.IsNullOrWhiteSpace(dataArray[30]))
                return Convert.ToDecimal(dataArray[30]);
            else
                return Convert.ToDecimal(dataArray[17]);
        }

        protected override decimal GetSalePrice(object data)
        {
            string[] dataArray = (string[])data;
            if (!String.IsNullOrWhiteSpace(dataArray[17]) && dataArray[30] != dataArray[17])
                return Convert.ToDecimal(dataArray[17]);
            else
                return 0;
        }

        protected override string GetDescription(object data)
        {
            return ((string[])data)[7];
        }

        protected override string GetBrand(object data)
        {
            string[] dataArray = (string[])data;
            if (dataArray[18].Length > 50)
                return dataArray[18].Substring(0, 50);
            else
                return dataArray[18];
        }

        protected override string GetCategory(object data)
        {
            return ((string[])data)[6] + " " + ((string[])data)[35];
        }

        protected override string GetProductUrl(object data)
        {
            return ((string[])data)[9];
        }

        protected override string GetImageUrl(object data)
        {
            return ((string[])data)[10];
        }

        protected override bool IgnoreProduct(object data)
        {
                return false;
        }

        protected override IEnumerable GetSizes(object data)
        {
            return Enumerable.Empty();
        }

        protected string GetColour(object data)
        {
            return ((string[])data)[19];
        }
    }
}


CommissionJunctionProductFeed


using FashionExchange.Common.BLL;
using FashionExchange.Common.DAL;
using FashionExchange.Common.Enums;
using FashionExchange.Common.Utils;
using log4net;
using Microsoft.VisualBasic.FileIO;
using SevenZip;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net;
using System.Text;
using System.Web.Hosting;
using System.Xml;

namespace FashionExchange.Common.ProductFeeds
{
    public abstract class CommissionJunctionProductFeed : ProductFeed
    {
        protected abstract List ProductFeedUrls { get; }

        protected string DownloadProductFeed(string productFeedUrl)
        {
            log.InfoFormat("Downloading product feed: {0}", productFeedUrl);
            
            string archiveFileName = GetProductFeedFileName("CommissionJunction", "");
            string archiveFullPath = HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + archiveFileName + "zip";

            // download product feed from Commission Junction ftp and save in temporary directory
            FtpUtil.DownloadFile(productFeedUrl, "4564956", "Crpo?kzj", archiveFullPath);

            log.Info("Download product feed completed");
            log.InfoFormat("Decompressing product feed: {0}", archiveFullPath);

            string productFeedFileName = archiveFileName + "txt";
            SevenZipExtractor.SetLibraryPath(FashionExchangeSetting.SevenZipDLLFullPath);
            using (SevenZipExtractor extractor = new SevenZipExtractor(archiveFullPath))
            {
                extractor.PreserveDirectoryStructure = false;
                using (FileStream fileStream = File.OpenWrite(HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + productFeedFileName))
                {
                    extractor.ExtractFile(0, fileStream);
                }
            }
            log.Info("Decompress product feed completed");

            File.Delete(archiveFullPath);
            log.InfoFormat("Product feed archive deleted: {0}", archiveFullPath);

            return HostingEnvironment.ApplicationPhysicalPath + FashionExchangeSetting.TemporaryDirectory + productFeedFileName;
        }
        protected override void ParseProductFeed()
        {
            foreach (string productFeedUrl in ProductFeedUrls)
            {
                log.InfoFormat("Parsing product feed [{0}/{1}]", ProductFeedUrls.IndexOf(productFeedUrl) + 1, ProductFeedUrls.Count());

                string productFeedFileNameWithPath = DownloadProductFeed(productFeedUrl);

                log.InfoFormat("Product feed file name: {0}", productFeedFileNameWithPath);

                FileInfo productFeed = new FileInfo(productFeedFileNameWithPath);

                using (StreamReader reader = productFeed.OpenText())
                {
                    int lineNumber = 0;
                    string line = null;

                    reader.ReadLine(); // read first header line
                    while ((line = reader.ReadLine()) != null)
                    {
                        try
                        {
                            log.InfoFormat("Parsing product {0}", ++lineNumber);

                            string[] data = line.Split('\t');

                            if (IgnoreProduct(data))
                            {
                                continue;
                            }

                            Product product = CreateProductObjects(data);

                            LogProduct(product);

                            ValidateAndUpdateProduct(product);

                            ExistingProductNames.Remove(product.Name);
                        }
                        catch (Exception e)
                        {
                            log.Error("Error parsing product: " + e.ToString());

                            if (scrapeErrorDict.ContainsKey(e.StackTrace))
                                scrapeErrorDict[e.StackTrace].Value.Add(lineNumber.ToString());
                            else
                                scrapeErrorDict.Add(e.StackTrace, new KeyValuePair>(e, new List() { lineNumber.ToString() }));

                            errorCount++;
                        }
                        finally
                        {
                            // discard all changes / caching to database context
                            DataContext.DisposeDataContext();
                        }
                    }
                }

                // delete temporary product feed files
                File.Delete(productFeedFileNameWithPath);
                log.InfoFormat("Temporary product feed file deleted: {0}", productFeedFileNameWithPath);
                File.Delete(productFeedFileNameWithPath + ".gz");
                log.InfoFormat("Temporary product feed file deleted: {0}", productFeedFileNameWithPath + ".gz");
            }
        }

        protected override string GetProductUrl(object data)
        {
            return System.Web.HttpUtility.UrlDecode(StringUtil.SubstringToEnd(((string[])data)[7], "?url="));
        }

        protected override string GetName(object data)
        {
            return ((string[])data)[5];
        }

        protected override decimal GetPrice(object data)
        {
            return Convert.ToDecimal(((string[])data)[15]);
        }

        protected override decimal GetSalePrice(object data)
        {
            string[] dataArray = (string[])data;
            if (!String.IsNullOrWhiteSpace(dataArray[16]) && dataArray[16] != dataArray[15])
                return Convert.ToDecimal(dataArray[16]);
            else
                return 0;
        }

        protected override string GetDescription(object data)
        {
            return ((string[])data)[6];
        }

        protected override string GetBrand(object data)
        {
            return ((string[])data)[25];
        }

        protected override string GetCategory(object data)
        {
            return ((string[])data)[36] + " " + ((string[])data)[24];
        }

        protected override string GetImageUrl(object data)
        {
            return ((string[])data)[9];
        }

        protected override bool IgnoreProduct(object data)
        {
            if (((string[])data)[12] == "in stock" && ((string[])data)[28] == "yes")
                return false;
            else
                return true;
        }

        protected override IEnumerable GetSizes(object data)
        {
            return new List() {
                new ProductSize() {
                    Colour = GetColour(data),
                    Size = ((string[])data)[39],
                    Available = true
                }
            };
        }

        protected virtual string GetColour(object data)
        {
            return ((string[])data)[35];
        }
    }
}


Comments

Popular posts from this blog

Web Scraping material

Scrapy Splash

Utility