I’m trying to create an array of amazon product variants using DOM php, My desired array should look like;
JavaScript
x
["Variant Name":"ASIN number"]
Here is my code:
JavaScript
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, 'https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
$html = curl_exec($ch)
curl_close($ch);
$dom = new DomDocument();
$dom->loadHTML($html);
$dom_xpath = new DOMXpath($dom);
$variants = $dom_xpath->query('//*[@class="swatchAvailable" or @class="swatchSelect"]');
foreach($variants as $data){
$input = $data->getAttribute("data-defaultasin");
$inputn = $data->getAttribute("title");
if (!empty($input)) {
preg_match_all('/(.{10})/', $input, $output);
$output1 = str_replace("Click to select ","|",$inputn);
$split = explode("|", $output1);
$json1->SizeVariant3[] = $split[1];
$json1->SizeVariant4[] = $output[0][0];
$json->VariantB = array_combine($json1->SizeVariant3,$json1->SizeVariant4);
}
}
Though my code is working but there are some mistakes in code and it might wont work for all amazon products. so I need suggestions and improvement. and also my output is a json object :
JavaScript
{
"2031 Deep Blue": "B08LZH84TN",
"2031 Khaki": "B08LZHMQXS",
}
while I want it to be an array as I mentioned above.
Advertisement
Answer
JavaScript
<?php
function curl( $url=NULL ){
$cacert='c:/wwwroot/cacert.pem'; # download a copy from internet - https://curl.haxx.se/docs/caextract.html
$curl=curl_init();
if( parse_url( $url,PHP_URL_SCHEME )=='https' ){
curl_setopt( $curl, CURLOPT_SSL_VERIFYPEER, true );
curl_setopt( $curl, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
curl_setopt( $curl, CURLOPT_CAPATH, $cacert );
}
curl_setopt( $curl, CURLOPT_URL,trim( $url ) );
curl_setopt( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, true );
curl_setopt( $curl, CURLOPT_FAILONERROR, true );
curl_setopt( $curl, CURLOPT_HEADER, false );
curl_setopt( $curl, CURLINFO_HEADER_OUT, false );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $curl, CURLOPT_BINARYTRANSFER, true );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 20 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 60 );
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Curly-Wurly Ding-Dong' );
curl_setopt( $curl, CURLOPT_MAXREDIRS, 10 );
curl_setopt( $curl, CURLOPT_ENCODING, '' );
$res=(object)array(
'response' => curl_exec( $curl ),
'info' => (object)curl_getinfo( $curl ),
'errors' => curl_error( $curl )
);
curl_close( $curl );
return $res;
}
$url='https://www.amazon.co.uk/dp/B08LZHMQXS?psc=1';
$res=curl( $url );
if( $res->info->http_code==200 ){
libxml_use_internal_errors( true );
$dom=new DOMDocument;
$dom->validateOnParse=false;
$dom->recover=true;
$dom->strictErrorChecking=false;
$dom->loadHTML( $res->response );
libxml_clear_errors();
$xp=new DOMXPath( $dom );
$expr='//*[@class="swatchAvailable" or @class="swatchSelect"]';
$tmp=array();
$col=$xp->query( $expr );
if( $col && $col->length > 0 ){
foreach( $col as $node ){
$asin=$node->getAttribute('data-defaultasin');
$title=str_replace( array( 'Click to select ', '|' ), '', $node->getAttribute('title') );
$tmp[$title]=$asin;
}
}
printf('<pre>%s</pre>',print_r($tmp,true));
}
?>
Which outputs:
JavaScript
Array
(
[2031 Deep Blue] => B08LZH84TN
[2031 Khaki] => B08LZHMQXS
[2031 Light Grey] => B08LZFGGRL
[2031 Navy] => B08LZNGD5H
[2031 Deep Grey] => B08LZHZXDW
[2031 Wine Red] => B08LZHHGPD
[8636 All White] => B07PHQ69B7
[2031 All Black] => B08LZKXC3G
)
When tested with a new url it yielded:
JavaScript
Array
(
[Wine] => B07NYYZSWG
[Gold] => B07H4P7TZP
[Rose Gold] => B07H4ZMTML
[Silver Glitz] => B07P27Y9SQ
)