Skip to content
Advertisement

Data from an Parser is storing 2x the same data instead of 1x some data and the other 1x the other data

I’ve made a parser and it’s supposed to take data from 2 pages in a webshop and save those data but it’s now only saving the data from 1 page two times instead of the data from both the pages each one time. Can someone figure out why only the second page’s data is saved instead of both pages data? output when i run the command and what he is supposed to save what the command is saving

`<?php

namespace AppConsoleCommands;

use AppModelsPageParse;
use IlluminateConsoleCommand;
use AppModelsPage;
use AppModelsItem;
use AppModelsWebshop;
use IlluminateNotificationsNotification;
use AppHttpRequests;
use IlluminateHttpRequest;

class Parser extends Command
{
    /**
     * The name and signature of the console command.
     *
     * @var string
     */
    protected $signature = 'Parser';

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = 'Command description';

    /**
     * Create a new command instance.
     *
     * @return void
     */
    public function __construct()
    {
        parent::__construct();
    }

    /**
     * Execute the console command.
     *
     * @return int
     * @param  IlluminateHttpRequest  $request
     */
    public function handle()
    {
        date_default_timezone_set('Europe/Amsterdam');
        $pages = Page::all();
        $this->info('Paginas opgehaald');
        foreach($pages as $page){
                $this->parsePage($page, $pages);
                $this->info('Nu bij pagina:'.$page->url);
        }
    }


    private function parsePage($page, $pages)
    {
        libxml_use_internal_errors(true);

        $this->info('Pagina: '.$page->id.' '.$page->url);

        $pageContent = file_get_contents($page->url);
        $dom = new DOMDocument();
        $dom->loadHTML($pageContent);
        $xpath = new DOMXPath($dom);
        libxml_clear_errors();

        //Parse price
        $price = $xpath->query($page->parser->xpath_price)->item(0);
        $this->info('Price: '.$price->textContent);

        //Parse stock
        $stock = $xpath->query($page->parser->xpath_stock)->item(0);
        $this->info('Stock: '.$stock->textContent);

        //save pageparse
            $this->saveParse($page,$price,$stock);
        //check if variable has changed and if so change the data. If not changed just save the data.
        $page_parses = PageParse::all();
        foreach($page_parses as $page_parse){
            if($page_parse->price != $xpath->query($page->parser->xpath_price)->item(0) or $page_parse->stock != $xpath->query($page->parser->xpath_stock)->item(0)) {
            $this->updateParse($page, $price, $stock, $page_parse);
        }else{
               // do nothing
        }
        }

   }


    private function saveParse($page,$price,$stock)
    {
            $page_parse = new PageParse();
            $page_parse->parse_date = date('Y-m-d H:i:s');
            $page_parse->price = $price->textContent;
            $page_parse->stock = $stock->textContent;
            $page_parse->page_id = $page->id;
            $page_parse->save();
    }

    private function updateParse($page,$price,$stock,$page_parse){
        $page_parse = PageParse::find($page_parse->id);
        $page_parse->parse_date = date('Y-m-d H:i:s');
        $page_parse->price = $price->textContent;
        $page_parse->stock = $stock->textContent;
        $page_parse->page_id = $page->id;
        $page_parse->save();
    }
}`

Advertisement

Answer

I think it’s because a mistake here:

foreach($page_parses as $page_parse){
            if($page_parse->price != $xpath->query($page->parser->xpath_price)->item(0) or $page_parse->stock != $xpath->query($page->parser->xpath_stock)->item(0)) {
            $this->updateParse($page, $price, $stock, $page_parse);
        }else{

You iterate all of parsed pages every time a new page is parsed and compare different links to each other. For example consider your first link is parsed, and you have it in db. Next when you parse the second page, you compare second page date with first one and actually they are different. So you edit every item even page_id. That will change first record in database.

User contributions licensed under: CC BY-SA
2 People found this is helpful
Advertisement