Passing parameters from command line to script

I am writing a program to scrape the following website: https://filmstoon.in/

From it, I want to find several movies (Batman Begins, Iron Man, Expendables 3) and TV series (Game of Thrones) and to scrape the title, the host url and the meta url. I managed to do this, however, it is manually crafted for the specific titles. The code:

include  ("simple_html_dom.php");
ini_set('max_execution_time', 0);
date_default_timezone_set('Europe/Vilnius');
$link = "https://filmstoon.in/series/game-of-thrones/";
$link1 = "https://filmstoon.in/batman-begins/";
$link2 = "https://filmstoon.in/iron-man/";
$link3 = "https://filmstoon.in/expendables-3/";

//TV Series
class episode{
  private $title;
  private $host_url;
  private $linking_url;
  public function setTitle($title){
    $this->title = $title;
  }
  public function getTitle(){
    return $this->title;
  }
  public function setHost_url($host_url){
    $this->host_url = $host_url;
  }
  public function getHost_url(){
    return $this->host_url;
  }
  public function setLinking_url($linking_url){
    $this->linking_url = $linking_url;
  }
  public function getLinking_url(){
    return $this->linking_url;
  }
}

function main(){
  $array_url = getting_url();
  foreach($array_url as $single_link){
    $episodeObject = info_from_linking($single_link);
    echo_to_server($episodeObject->getLinking_url(), $episodeObject->getHost_url(),  $episodeObject->getTitle());
    writeToFile($episodeObject->getLinking_url(), $episodeObject->getHost_url(),  $episodeObject->getTitle());
  }
}

function getting_url(){
  global $link;
  $html = file_get_html($link);
  $array_url = array();
  foreach($html->find('.les-content a') as $divClass) {
    $linking_url = $divClass->href;
    array_push($array_url, $linking_url);
  }
  return $array_url;
}

function info_from_linking($episode_link){
  $inside_linking = file_get_html($episode_link);
  $mainDiv = $inside_linking->find('div[class="main-content main-detail"]')[0];
  $title = $mainDiv->find('h3[itemprop="name"]',0)->plaintext;
  $host_url = $mainDiv->find('iframe',1)->src;
  $class = new episode;
  $class->setTitle($title);
  $class->setHost_url($host_url);
  $class->setLinking_url($episode_link);
  return $class;
}

function echo_to_server($linking_url, $host_url, $title){
  $date = date('m/d/Y H:i', time());
  echo "{$date} t {$linking_url} t {$host_url} t {$title} n";
}

function writeToFile($linking_url, $host_url, $title){
  $date = date('m/d/Y H:i', time());
  $result = array($date, $linking_url, $host_url, $title);
  $output = 'scrape.txt';
  file_put_contents($output, print_r($result, true), FILE_APPEND);
}
main();

//Movies
function get_content_movies($url){
    $htmlContent = file_get_contents($url);
    $dom = new simple_html_dom();
    $dom->load($htmlContent);
    if(count($dom->find('div[class="main-content main-detail"]'))>0){
      $file = $dom->find('div[class="main-content main-detail"]')[0];
      $title = $file->find('h3[itemprop="name"]',0)->plaintext;
      $host_url = $file->find('iframe',1)->src;
      $meta_link = $dom->find('meta[property="og:url"]',0)->content;
      $date = date('m/d/Y H:i', time());
      echo "{$date} t {$host_url} t {$meta_link} t {$title} n";
      $result = array($title, $host_url, $meta_link, $date);
      $output = 'scrape.txt';
      file_put_contents($output, print_r($result, true), FILE_APPEND);
    }
}
get_content_movies($link1);
get_content_movies($link2);
get_content_movies($link3);

JavaScript
​x
 
include  ("simple_html_dom.php");ini_set('max_execution_time', 0);date_default_timezone_set('Europe/Vilnius');$link = "https://filmstoon.in/series/game-of-thrones/";$link1 = "https://filmstoon.in/batman-begins/";$link2 = "https://filmstoon.in/iron-man/";$link3 = "https://filmstoon.in/expendables-3/";​//TV Seriesclass episode{  private $title;  private $host_url;  private $linking_url;  public function setTitle($title){    $this->title = $title;  }  public function getTitle(){    return $this->title;  }  public function setHost_url($host_url){    $this->host_url = $host_url;  }  public function getHost_url(){    return $this->host_url;  }  public function setLinking_url($linking_url){    $this->linking_url = $linking_url;  }  public function getLinking_url(){    return $this->linking_url;  }}​function main(){  $array_url = getting_url();  foreach($array_url as $single_link){    $episodeObject = info_from_linking($single_link);    echo_to_server($episodeObject->getLinking_url(), $episodeObject->getHost_url(),  $episodeObject->getTitle());    writeToFile($episodeObject->getLinking_url(), $episodeObject->getHost_url(),  $episodeObject->getTitle());  }}​function getting_url(){  global $link;  $html = file_get_html($link);  $array_url = array();  foreach($html->find('.les-content a') as $divClass) {    $linking_url = $divClass->href;    array_push($array_url, $linking_url);  }  return $array_url;}​function info_from_linking($episode_link){  $inside_linking = file_get_html($episode_link);  $mainDiv = $inside_linking->find('div[class="main-content main-detail"]')[0];  $title = $mainDiv->find('h3[itemprop="name"]',0)->plaintext;  $host_url = $mainDiv->find('iframe',1)->src;  $class = new episode;  $class->setTitle($title);  $class->setHost_url($host_url);  $class->setLinking_url($episode_link);  return $class;}​function echo_to_server($linking_url, $host_url, $title){  $date = date('m/d/Y H:i', time());  echo "{$date} t {$linking_url} t {$host_url} t {$title} n";}​function writeToFile($linking_url, $host_url, $title){  $date = date('m/d/Y H:i', time());  $result = array($date, $linking_url, $host_url, $title);  $output = 'scrape.txt';  file_put_contents($output, print_r($result, true), FILE_APPEND);}main();​//Moviesfunction get_content_movies($url){    $htmlContent = file_get_contents($url);    $dom = new simple_html_dom();    $dom->load($htmlContent);    if(count($dom->find('div[class="main-content main-detail"]'))>0){      $file = $dom->find('div[class="main-content main-detail"]')[0];      $title = $file->find('h3[itemprop="name"]',0)->plaintext;      $host_url = $file->find('iframe',1)->src;      $meta_link = $dom->find('meta[property="og:url"]',0)->content;      $date = date('m/d/Y H:i', time());      echo "{$date} t {$host_url} t {$meta_link} t {$title} n";      $result = array($title, $host_url, $meta_link, $date);      $output = 'scrape.txt';      file_put_contents($output, print_r($result, true), FILE_APPEND);    }}get_content_movies($link1);get_content_movies($link2);get_content_movies($link3);​

Everything works fine, however, I would like to make it so that if I write:

php crawler.php batman begins

JavaScript
 
php crawler.php batman begins​

or any other title while executing the code in cmd, it would specifically find that movie/tv series and execute the script that I wrote.

So far, the only ideas I’ve had how to execute this is to scrape the entirety of the page, store it in a database (.txt file for example) and then find the content from it with $argc and $argv. Or – go to the main page and use the search function. My argument that I write in the command line would be passed to search form and then it would execute the script.

However, I can not wrap my head around how to do any of these ideas since I am quite new.

Answer

function getArgumentValues($argv, $seperator){
$values = "$argv[1]";
  foreach($argv as $key=>$value){
    if($key>1){
      $values.="$seperator$value";
      }
    }
return $values;
}

function get_content_movies($linkMovies, $argv){
    $htmlContent = file_get_contents($linkMovies);
    $argvValue = getArgumentValues($argv, " ");
    if(!preg_match("/href="(.*?)".*?oldtitle="$argvValue/i", $htmlContent, $search)){
      return null;
    };
    $key = array_values($search)[1];
    $htmlContent = file_get_contents($key);
    if(preg_match("/series/", $key)){
      main();
    }
    else{
    preg_match('/<h3 itemprop="name">(.*)</h3>/iSU', $htmlContent, $title);
    preg_match('/<iframe.*data-lazy-src="(.*)".*></iframe>/iSU', $htmlContent, $embed_url);
    preg_match('/<meta.*property="og:url".*content="(.*)".*/>/iSU', $htmlContent, $meta_url);
    $date = date('m/d/Y H:i', time());
    echo "{$date} t {$embed_url[1]} t {$meta_url[1]} t {$title[1]} n";
    $result = array($date, $embed_url[1], $meta_url[1], $title[1]);
    $output = 'scrape.txt';
    file_put_contents($output, print_r($result, true), FILE_APPEND);
    }
}
get_content_movies($linkMovies, $argv);

JavaScript
 
function getArgumentValues($argv, $seperator){$values = "$argv[1]";  foreach($argv as $key=>$value){    if($key>1){      $values.="$seperator$value";      }    }return $values;}​function get_content_movies($linkMovies, $argv){    $htmlContent = file_get_contents($linkMovies);    $argvValue = getArgumentValues($argv, " ");    if(!preg_match("/href="(.*?)".*?oldtitle="$argvValue/i", $htmlContent, $search)){      return null;    };    $key = array_values($search)[1];    $htmlContent = file_get_contents($key);    if(preg_match("/series/", $key)){      main();    }    else{    preg_match('/<h3 itemprop="name">(.*)</h3>/iSU', $htmlContent, $title);    preg_match('/<iframe.*data-lazy-src="(.*)".*></iframe>/iSU', $htmlContent, $embed_url);    preg_match('/<meta.*property="og:url".*content="(.*)".*/>/iSU', $htmlContent, $meta_url);    $date = date('m/d/Y H:i', time());    echo "{$date} t {$embed_url[1]} t {$meta_url[1]} t {$title[1]} n";    $result = array($date, $embed_url[1], $meta_url[1], $title[1]);    $output = 'scrape.txt';    file_put_contents($output, print_r($result, true), FILE_APPEND);    }}get_content_movies($linkMovies, $argv);​

Advertisement

Answer