Skip to content
Advertisement

Fetching Images URL from wikipedia

I’m using wikipedia api to scrape images from api its returning data in json form in which the image Url is like this “https://upload.wikimedia.org/wikipedia/en/f/f7/Canada%27s_Aviation_Hall_of_Fame_logo.jpg” the Url that is same for all images is “https://upload.wikimedia.org/wikipedia/en/” The Php code is as follows:

<form action="" method="get">
    <input type="text" name="search">
    
    <input type="submit" value="Search">
</form>

<?php
if(@$_GET['search']){
    $api_url="https://en.wikipedia.org/w/api.php?action=query&format=json&list=allimages&aifrom=".ucwords($_GET['search'])."&ailimit=500";
    $api_url=str_replace('', '%20', $api_url);
    $curl=curl_init();
    curl_setopt($curl, CURLOPT_URL, $api_url);
    curl_setopt($curl,CURLOPT_RETURNTRANSFER, true);
    $output=curl_exec($curl);
    curl_close($curl);
    preg_match_all('!//upload.wikimedia.org/wikipedia/en/!', $output, $data);
    echo '<pre>';
    foreach ($data[0] as $list) {
        echo "<img src='$list'/>";
        # code...
    }

    }



?>

How can I get the remaining part of the url correctly?

Advertisement

Answer

You need to decode it using json_decode and get the url image link

function get_wiki_image( $search, $limit) {

  $streamContext = array(
    "ssl" => array(
          "verify_peer" => false,
          "verify_peer_name" => false,
      ),
  );

  $url = 'https://en.wikipedia.org/w/';
  $url .= '/api.php?action=query&format=json&list=allimages&aifrom=' . $search . '&ailimit=' . $limit;

  $context = stream_context_create($streamContext);

  if(FALSE === ($content = @file_get_contents($url, false,$context)) ) {
    return false;
  } else {
    $data = json_decode($content,true);
    $ret = array();
    foreach($data['query']['allimages'] as $img) {
      $ret[] = $img['url'];
    }
    return $ret;
  }

}

$search = ucwords($_GET['search']);
$images = get_wiki_image($search,500);

foreach($images as $img) {
  echo "<img src='{$img}'>";
}
User contributions licensed under: CC BY-SA
3 People found this is helpful
Advertisement