I’m trying to create an array of amazon product variants using DOM php, My desired array should look like;
["Variant Name":"ASIN number"]
Here is my code:
$ch = curl_init(); curl_setopt($ch, CURLOPT_URL, 'https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1'); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET'); $html = curl_exec($ch) curl_close($ch); $dom = new DomDocument(); $dom->loadHTML($html); $dom_xpath = new DOMXpath($dom); $variants = $dom_xpath->query('//*[@class="swatchAvailable" or @class="swatchSelect"]'); foreach($variants as $data){ $input = $data->getAttribute("data-defaultasin"); $inputn = $data->getAttribute("title"); if (!empty($input)) { preg_match_all('/(.{10})/', $input, $output); $output1 = str_replace("Click to select ","|",$inputn); $split = explode("|", $output1); $json1->SizeVariant3[] = $split[1]; $json1->SizeVariant4[] = $output[0][0]; $json->VariantB = array_combine($json1->SizeVariant3,$json1->SizeVariant4); } }
Though my code is working but there are some mistakes in code and it might wont work for all amazon products. so I need suggestions and improvement. and also my output is a json object :
{ "2031 Deep Blue": "B08LZH84TN", "2031 Khaki": "B08LZHMQXS", }
while I want it to be an array as I mentioned above.
Advertisement
Answer
<?php function curl( $url=NULL ){ $cacert='c:/wwwroot/cacert.pem'; # download a copy from internet - https://curl.haxx.se/docs/caextract.html $curl=curl_init(); if( parse_url( $url,PHP_URL_SCHEME )=='https' ){ curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, true ); curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 ); curl_setopt( $curl, CURLOPT_CAINFO, $cacert ); curl_setopt( $curl, CURLOPT_CAPATH, $cacert ); } curl_setopt( $curl, CURLOPT_URL,trim( $url ) ); curl_setopt( $curl, CURLOPT_AUTOREFERER, true ); curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true ); curl_setopt( $curl, CURLOPT_FAILONERROR, true ); curl_setopt( $curl, CURLOPT_HEADER, false ); curl_setopt( $curl, CURLINFO_HEADER_OUT, false ); curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true ); curl_setopt( $curl, CURLOPT_BINARYTRANSFER, true ); curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 ); curl_setopt( $curl, CURLOPT_TIMEOUT, 60 ); curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Curly-Wurly Ding-Dong' ); curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 ); curl_setopt( $curl, CURLOPT_ENCODING, '' ); $res=(object)array( 'response' => curl_exec( $curl ), 'info' => (object)curl_getinfo( $curl ), 'errors' => curl_error( $curl ) ); curl_close( $curl ); return $res; } $url='https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1'; $res=curl( $url ); if( $res->info->http_code==200 ){ libxml_use_internal_errors( true ); $dom=new DOMDocument; $dom->validateOnParse=false; $dom->recover=true; $dom->strictErrorChecking=false; $dom->loadHTML( $res->response ); libxml_clear_errors(); $xp=new DOMXPath( $dom ); $expr='//*[@class="swatchAvailable" or @class="swatchSelect"]'; $tmp=array(); $col=$xp->query( $expr ); if( $col && $col->length > 0 ){ foreach( $col as $node ){ $asin=$node->getAttribute('data-defaultasin'); $title=str_replace( array( 'Click to select ', '|' ), '', $node->getAttribute('title') ); $tmp[$title]=$asin; } } printf('<pre>%s</pre>',print_r($tmp,true)); } ?>
Which outputs:
Array ( [2031 Deep Blue] => B08LZH84TN [2031 Khaki] => B08LZHMQXS [2031 Light Grey] => B08LZFGGRL [2031 Navy] => B08LZNGD5H [2031 Deep Grey] => B08LZHZXDW [2031 Wine Red] => B08LZHHGPD [8636 All White] => B07PHQ69B7 [2031 All Black] => B08LZKXC3G )
When tested with a new url it yielded:
Array ( [Wine] => B07NYYZSWG [Gold] => B07H4P7TZP [Rose Gold] => B07H4ZMTML [Silver Glitz] => B07P27Y9SQ )