Skip to content
Advertisement

Parse Large (100MB) JSON with PHP using salsify’s JsonStreamerParser in PHPProBid

I have a JSON file:

curl https://api.mercadolibre.com/sites/MLM/categories/all  > categoriesMLM.gz

which contains an object of objects (about 60000+). I’ve also installed https://github.com/salsify/jsonstreamingparser via composer to save every item into a database in PHPProBid as written in the following code:

public function SaveCategoryByStream(){
    require_once dirname(__FILE__).'/../../../../../../vendor/autoload.php';
    ini_set('memory_limit', '4024M');
    ini_set('max_execution_time', 0);
    $testfile = '/home/richi/Desktop/categoriesMLM.json';

    $listener = new JsonStreamingParserListenerGeoJsonListener(function ($category) {
        
        $category_name = Array(
            $category['name']
        );
        $category['id'] = preg_replace('/[^0-9]/', '', $category['id']);
        $id = $order_id = Array(
            $category['id']
        );
        $parent_id = $category['path_from_root'];
        if($parent_id == null){

        }else{
            $parent_id = end($category['path_from_root']);
            $parent_id = prev($category['path_from_root']);
            $parent_id = preg_replace('/[^0-9]/', '', $parent_id['id']);
        }
        $new_category = Array(
            'parent_id' => $parent_id,
            'id' => $id,
            'order_id' => $order_id,
            'name' => $category_name,
            'full_name' => $category_name
        );
        try {
            $categoriesService = new ServiceTableRelationalCategories();
            $categoriesService->save($new_category);
            $parent_id = $id;//['id'];
            //Saving all children categories of that specific category
            for($ii = 0; $ii < count($category['children_categories']); $ii++){
                $this->SaveChildrenCategory($parent_id, $category['children_categories'][$ii], $categoriesService);
            }
        }catch(Exception $e){
            echo $e;
        }
    });
    $stream = fopen($testfile, 'r');
    try {
        $parser = new JsonStreamingParserParser($stream, $listener);
        $parser->parse();
        fclose($stream);
    } catch (Exception $e) {
        fclose($stream);
        throw $e;
    }

    $controller = 'Mercadolibre';
    $headline = $this->_('MeliSync');
    $filter = 'first_time';
    return array(
        'controller' => $controller,
        'headline'   => $headline,
        'messages'   => $this->_flashMessenger->getMessages(),
        'filter'     => $filter
    );

}

Once decompressed, the JSON file looks something like this:

{
  "MLM5320": {
    "id": "MLM5320",
    "name": "Accesorios para Autos",
    "picture": "http://resources.mlstatic.com/category/images/a076507d-2b2a-43a8-b08a-86a2197c3a82.png",
    "permalink": null,
    "total_items_in_this_category": 0,
    "path_from_root": [
      {
        "id": "MLM1747",
        "name": "Accesorios para Vehículos"
      },
      {
        "id": "MLM5320",
        "name": "Accesorios para Autos"
      }
    ],
    "children_categories": [
      {
        "id": "MLM92132",
        "name": "Accesorios para el Exterior",
        "total_items_in_this_category": 119421
      },
      {
        "id": "MLM92130",
        "name": "Accesorios para el Interior",
        "total_items_in_this_category": 152424
      },
      {
        "id": "MLM92140",
        "name": "Otros",
        "total_items_in_this_category": 0
      }
    ],
    "attribute_types": "attributes",
    "settings": {
      "adult_content": false,
      "buying_allowed": true,
      "buying_modes": [
        "buy_it_now",
        "auction"
      ],
      "catalog_domain": null,
      "coverage_areas": "not_allowed",
      "currencies": [
        "USD",
        "MXN"
      ],
      "fragile": false,
      "immediate_payment": "required",
      "item_conditions": [
        "not_specified",
        "new",
        "used"
      ],
      "items_reviews_allowed": false,
      "listing_allowed": false,
      "max_description_length": 50000,
      "max_pictures_per_item": 12,
      "max_pictures_per_item_var": 10,
      "max_sub_title_length": 70,
      "max_title_length": 60,
      "maximum_price": null,
      "minimum_price": null,
      "mirror_category": null,
      "mirror_master_category": null,
      "mirror_slave_categories": [
      ],
      "price": "required",
      "reservation_allowed": "not_allowed",
      "restrictions": [
      ],
      "rounded_address": false,
      "seller_contact": "not_allowed",
      "shipping_modes": [
        "me1",
        "not_specified",
        "custom"
      ],
      "shipping_options": [
        "carrier",
        "custom"
      ],
      "shipping_profile": "optional",
      "show_contact_information": false,
      "simple_shipping": "optional",
      "stock": "required",
      "sub_vertical": null,
      "subscribable": false,
      "tags": [
      ],
      "vertical": null,
      "vip_subdomain": "articulo"
    },
    "meta_categ_id": null,
    "attributable": false
  },"MLM187169": {
    "id": "MLM187169",
    "name": "Alimentos para Bebés",
    "picture": null,
    "permalink": null,
    "total_items_in_this_category": 1752,
    "path_from_root": [
      {
        "id": "MLM1403",
        "name": "Alimentos y Bebidas"
      },
      {
        "id": "MLM187169",
        "name": "Alimentos para Bebés"
      }
    ],
    "children_categories": [
      {
        "id": "MLM189061",
        "name": "Leche",
        "total_items_in_this_category": 934
      },
      {
        "id": "MLM189062",
        "name": "Papilla",
        "total_items_in_this_category": 193
      },
      {
        "id": "MLM189060",
        "name": "Otros",
        "total_items_in_this_category": 619
      }
    ],
    "attribute_types": "attributes",
    "settings": {
      "adult_content": false,
      "buying_allowed": true,
      "buying_modes": [
        "auction",
        "buy_it_now"
      ],
      "catalog_domain": null,
      "coverage_areas": "not_allowed",
      "currencies": [
        "USD",
        "MXN"
      ],
      "fragile": false,
      "immediate_payment": "required",
      "item_conditions": [
        "used",
        "not_specified",
        "new"
      ],
      "items_reviews_allowed": false,
      "listing_allowed": false,
      "max_description_length": 50000,
      "max_pictures_per_item": 12,
      "max_pictures_per_item_var": 10,
      "max_sub_title_length": 70,
      "max_title_length": 60,
      "maximum_price": null,
      "minimum_price": null,
      "mirror_category": null,
      "mirror_master_category": "MLM39965",
      "mirror_slave_categories": [
      ],
      "price": "required",
      "reservation_allowed": "not_allowed",
      "restrictions": [
      ],
      "rounded_address": false,
      "seller_contact": "not_allowed",
      "shipping_modes": [
        "custom",
        "not_specified",
        "me2",
        "me1"
      ],
      "shipping_options": [
        "carrier",
        "custom"
      ],
      "shipping_profile": "optional",
      "show_contact_information": false,
      "simple_shipping": "optional",
      "stock": "required",
      "sub_vertical": null,
      "subscribable": true,
      "tags": [
      ],
      "vertical": null,
      "vip_subdomain": "articulo"
    },
    "meta_categ_id": null,
    "attributable": false
  },
  "MLM1403": {
    "id": "MLM1403",
    "name": "Alimentos y Bebidas",
    "picture": "http://resources.mlstatic.com/category/images/57ad1885-5f74-4ac0-b54c-d0b4c575a371.png",
    "permalink": "http://home.mercadolibre.com.mx/alimentos-bebidas/",
    "total_items_in_this_category": 72668,
    "path_from_root": [
      {
        "id": "MLM1403",
        "name": "Alimentos y Bebidas"
      }
    ],
    "children_categories": [
      {
        "id": "MLM187169",
        "name": "Alimentos para Bebés",
        "total_items_in_this_category": 1584
      },
      {
        "id": "MLM189607",
        "name": "Bebidas con Alcohol",
        "total_items_in_this_category": 4593
      },
      {
        "id": "MLM178700",
        "name": "Bebidas sin Alcohol",
        "total_items_in_this_category": 12501
      },
      {
        "id": "MLM1423",
        "name": "Comestibles",
        "total_items_in_this_category": 37780
      },
      {
        "id": "MLM187587",
        "name": "Kit de Preparación de Cerveza",
        "total_items_in_this_category": 300
      },
      {
        "id": "MLM191759",
        "name": "Lúpulo",
        "total_items_in_this_category": 0
      },
      {
        "id": "MLM189225",
        "name": "Yerba Mate",
        "total_items_in_this_category": 195
      },
      {
        "id": "MLM1417",
        "name": "Otros",
        "total_items_in_this_category": 15146
      }
    ],
    "attribute_types": "attributes",
    "settings": {
      "adult_content": false,
      "buying_allowed": true,
      "buying_modes": [
        "buy_it_now",
        "auction"
      ],
      "catalog_domain": null,
      "coverage_areas": "not_allowed",
      "currencies": [
        "USD",
        "MXN"
      ],
      "fragile": false,
      "immediate_payment": "required",
      "item_conditions": [
        "used",
        "not_specified",
        "new"
      ],
      "items_reviews_allowed": false,
      "listing_allowed": false,
      "max_description_length": 50000,
      "max_pictures_per_item": 12,
      "max_pictures_per_item_var": 10,
      "max_sub_title_length": 70,
      "max_title_length": 60,
      "maximum_price": null,
      "minimum_price": null,
      "mirror_category": null,
      "mirror_master_category": null,
      "mirror_slave_categories": [
      ],
      "price": "required",
      "reservation_allowed": "not_allowed",
      "restrictions": [
      ],
      "rounded_address": false,
      "seller_contact": "not_allowed",
      "shipping_modes": [
        "me2",
        "not_specified",
        "me1",
        "custom"
      ],
      "shipping_options": [
        "carrier",
        "custom"
      ],
      "shipping_profile": "optional",
      "show_contact_information": false,
      "simple_shipping": "optional",
      "stock": "required",
      "sub_vertical": null,
      "subscribable": false,
      "tags": [
      ],
      "vertical": null,
      "vip_subdomain": "articulo"
    },
    "meta_categ_id": null,
    "attributable": false
  },
  "MLM92150": {
    "id": "MLM92150",
    "name": "Accesorios para Cuatrimotos",
    "picture": "http://resources.mlstatic.com/category/images/61189af5-4e70-45b6-89fa-95339ccc2166.png",
    "permalink": null,
    "total_items_in_this_category": 2360,
    "path_from_root": [
      {
        "id": "MLM1747",
        "name": "Accesorios para Vehículos"
      },
      {
        "id": "MLM92150",
        "name": "Accesorios para Cuatrimotos"
      }
    ],
    "children_categories": [
      {
        "id": "MLM165695",
        "name": "Caja",
        "total_items_in_this_category": 136
      },
      {
        "id": "MLM189905",
        "name": "Fundas",
        "total_items_in_this_category": 52
      },
      {
        "id": "MLM165696",
        "name": "Rampas",
        "total_items_in_this_category": 69
      },
      {
        "id": "MLM165694",
        "name": "Ropa",
        "total_items_in_this_category": 165
      },
      {
        "id": "MLM165697",
        "name": "Winch",
        "total_items_in_this_category": 451
      },
      {
        "id": "MLM165698",
        "name": "Otros",
        "total_items_in_this_category": 1486
      }
    ],
    "attribute_types": "attributes",
    "settings": {
      "adult_content": false,
      "buying_allowed": true,
      "buying_modes": [
        "buy_it_now",
        "auction"
      ],
      "catalog_domain": null,
      "coverage_areas": "not_allowed",
      "currencies": [
        "MXN",
        "USD"
      ],
      "fragile": false,
      "immediate_payment": "required",
      "item_conditions": [
        "not_specified",
        "used",
        "new"
      ],
      "items_reviews_allowed": false,
      "listing_allowed": false,
      "max_description_length": 50000,
      "max_pictures_per_item": 12,
      "max_pictures_per_item_var": 10,
      "max_sub_title_length": 70,
      "max_title_length": 60,
      "maximum_price": null,
      "minimum_price": null,
      "mirror_category": null,
      "mirror_master_category": null,
      "mirror_slave_categories": [
      ],
      "price": "required",
      "reservation_allowed": "not_allowed",
      "restrictions": [
      ],
      "rounded_address": false,
      "seller_contact": "not_allowed",
      "shipping_modes": [
        "custom",
        "me1",
        "not_specified"
      ],
      "shipping_options": [
        "carrier",
        "custom"
      ],
      "shipping_profile": "optional",
      "show_contact_information": false,
      "simple_shipping": "optional",
      "stock": "required",
      "sub_vertical": null,
      "subscribable": false,
      "tags": [
      ],
      "vertical": null,
      "vip_subdomain": "articulo"
    },
    "meta_categ_id": null,
    "attributable": false
  },
  "MLM189061": {
    "id": "MLM189061",
    "name": "Leche",
    "picture": null,
    "permalink": null,
    "total_items_in_this_category": 114,
    "path_from_root": [
      {
        "id": "MLM1403",
        "name": "Alimentos y Bebidas"
      },
      {
        "id": "MLM187169",
        "name": "Alimentos para Bebés"
      },
      {
        "id": "MLM189061",
        "name": "Leche"
      }
    ],
    "children_categories": [
    ],
    "attribute_types": "attributes",
    "settings": {
      "adult_content": false,
      "buying_allowed": true,
      "buying_modes": [
        "auction",
        "buy_it_now"
      ],
      "catalog_domain": "MLM-BABIES_FORMULA",
      "coverage_areas": "not_allowed",
      "currencies": [
        "USD",
        "MXN"
      ],
      "fragile": false,
      "immediate_payment": "required",
      "item_conditions": [
        "used",
        "not_specified",
        "new"
      ],
      "items_reviews_allowed": false,
      "listing_allowed": true,
      "max_description_length": 50000,
      "max_pictures_per_item": 12,
      "max_pictures_per_item_var": 10,
      "max_sub_title_length": 70,
      "max_title_length": 60,
      "maximum_price": null,
      "minimum_price": null,
      "mirror_category": "MLM189058",
      "mirror_master_category": "MLM189058",
      "mirror_slave_categories": [
      ],
      "price": "required",
      "reservation_allowed": "not_allowed",
      "restrictions": [
      ],
      "rounded_address": false,
      "seller_contact": "not_allowed",
      "shipping_modes": [
        "custom",
        "not_specified",
        "me2",
        "me1"
      ],
      "shipping_options": [
        "carrier",
        "custom"
      ],
      "shipping_profile": "optional",
      "show_contact_information": false,
      "simple_shipping": "optional",
      "stock": "required",
      "sub_vertical": null,
      "subscribable": true,
      "tags": [
      ],
      "vertical": null,
      "vip_subdomain": "articulo"
    },
    "meta_categ_id": null,
    "attributable": false
  },
  "MLM1071": {
    "id": "MLM1071",
    "name": "Animales y Mascotas",
    "picture": "http://resources.mlstatic.com/category/images/0b18438b-f56c-421a-a020-f581f40f4c24.png",
    "permalink": "http://home.mercadolibre.com.mx/animales/",
    "total_items_in_this_category": 0,
    "path_from_root": [
      {
        "id": "MLM1071",
        "name": "Animales y Mascotas"
      }
    ],
    "children_categories": [
      {
        "id": "MLM1100",
        "name": "Aves",
        "total_items_in_this_category": 0
      },
      {
        "id": "MLM189310",
        "name": "Conejos",
        "total_items_in_this_category": 4006
      },
      {
        "id": "MLM1117",
        "name": "Equinos",
        "total_items_in_this_category": 4128
      },
      {
        "id": "MLM1081",
        "name": "Gatos",
        "total_items_in_this_category": 35580
      },
      {
        "id": "MLM3615",
        "name": "Libros de animales",
        "total_items_in_this_category": 10917
      },
      {
        "id": "MLM1091",
        "name": "Peces",
        "total_items_in_this_category": 2218
      },
      {
        "id": "MLM1072",
        "name": "Perros",
        "total_items_in_this_category": 0
      },
      {
        "id": "MLM1111",
        "name": "Reptiles y Anfibios",
        "total_items_in_this_category": 420
      },
      {
        "id": "MLM1105",
        "name": "Roedores",
        "total_items_in_this_category": 8093
      },
      {
        "id": "MLM1126",
        "name": "Otros",
        "total_items_in_this_category": 2736
      }
    ],
    "attribute_types": "attributes",
    "settings": {
      "adult_content": false,
      "buying_allowed": true,
      "buying_modes": [
        "buy_it_now",
        "auction"
      ],
      "catalog_domain": null,
      "coverage_areas": "not_allowed",
      "currencies": [
        "USD",
        "MXN"
      ],
      "fragile": false,
      "immediate_payment": "required",
      "item_conditions": [
        "new",
        "used",
        "not_specified"
      ],
      "items_reviews_allowed": false,
      "listing_allowed": false,
      "max_description_length": 50000,
      "max_pictures_per_item": 12,
      "max_pictures_per_item_var": 10,
      "max_sub_title_length": 70,
      "max_title_length": 60,
      "maximum_price": null,
      "minimum_price": null,
      "mirror_category": null,
      "mirror_master_category": null,
      "mirror_slave_categories": [
      ],
      "price": "required",
      "reservation_allowed": "not_allowed",
      "restrictions": [
      ],
      "rounded_address": false,
      "seller_contact": "not_allowed",
      "shipping_modes": [
        "custom",
        "not_specified",
        "me1"
      ],
      "shipping_options": [
        "custom",
        "carrier"
      ],
      "shipping_profile": "optional",
      "show_contact_information": false,
      "simple_shipping": "optional",
      "stock": "required",
      "sub_vertical": null,
      "subscribable": false,
      "tags": [
      ],
      "vertical": null,
      "vip_subdomain": "articulo"
    },
    "meta_categ_id": null,
    "attributable": false
  }
}

or in a human-readable way:

{
   "MLMXXXXX":{...},
   "MLMXXXY":{...},
    ...
}

Nonetheless, when I call that function it gets stuck after about saving 3552. I also read that GeoJsonListener loads JSON in memory. My question is how can I create a Listener that loads each object individually instead of loading the whole JSON in memory.

Here’s the output of 3552th Item:

{ id: 'MLM45922',
  name: 'Mitsubishi',
  picture: null,
  permalink: null,
  total_items_in_this_category: 39,
  path_from_root: 
   [ { id: 'MLM1747', name: 'Accesorios para Vehículos' },
     { id: 'MLM179617', name: 'Tuning y Performance' },
     { id: 'MLM179724', name: 'Performance' },
     { id: 'MLM4859', name: 'Filtros Alto Flujo' },
     { id: 'MLM45922', name: 'Mitsubishi' } ],
  children_categories: [],
  attribute_types: 'attributes',
  settings: 
   { adult_content: false,
     buying_allowed: true,
     buying_modes: [ 'buy_it_now', 'auction' ],
     catalog_domain: null,
     coverage_areas: 'not_allowed',
     currencies: [ 'USD', 'MXN' ],
     fragile: false,
     immediate_payment: 'required',
     item_conditions: [ 'used', 'not_specified', 'new' ],
     items_reviews_allowed: false,
     listing_allowed: true,
     max_description_length: 50000,
     max_pictures_per_item: 12,
     max_pictures_per_item_var: 10,
     max_sub_title_length: 70,
     max_title_length: 60,
     maximum_price: null,
     minimum_price: null,
     mirror_category: null,
     mirror_master_category: null,
     mirror_slave_categories: [],
     price: 'required',
     reservation_allowed: 'not_allowed',
     restrictions: [],
     rounded_address: false,
     seller_contact: 'not_allowed',
     shipping_modes: [ 'not_specified', 'custom', 'me1', 'me2' ],
     shipping_options: [ 'custom', 'carrier' ],
     shipping_profile: 'optional',
     show_contact_information: false,
     simple_shipping: 'optional',
     stock: 'required',
     sub_vertical: null,
     subscribable: false,
     tags: [],
     vertical: null,
     vip_subdomain: 'articulo' },
  meta_categ_id: null,
  attributable: false }

Advertisement

Answer

The GeoJSONListener does exactly what you want to do – it keeps the second level of the objects in memory. That way it loads each MLM object in your file by itself – it does not load the complete file into memory.

Testing out the code you have included on the file you’ve referenced (and reducing memory limit to 32M, since a streaming parser shouldn’t need 4G of memory), it parses through the whole file, reading through 27200 objects before I cancelled the process, in about 10 minutes on an old Macbook.

This leads me to believe that the problem isn’t related to your JSON parser or how you’re parsing the file, and can possibly be caused by something else (like your host / web server not honouring calls to set_time_limit or your database layer locking or barfing on some content.

User contributions licensed under: CC BY-SA
6 People found this is helpful
Advertisement