DEV Community

Eko Priyanto
Eko Priyanto

Posted on

Scrape masjid di SIMAS Kemenag

Image description

Scrape masjid di SIMAS Kemenag dengan PHP


<?php

function get_string_between($string, $start, $end){
    $string = ' ' . $string;
    $ini = strpos($string, $start);
    if ($ini == 0) return '';
    $ini += strlen($start);
    $len = strpos($string, $end, $ini) - $ini;
    return substr($string, $ini, $len);
}

function extract_masjid_data($html) {
    $masjid_data = [];

    // Nama Masjid
    $masjid_data['nama_masjid'] = trim(get_string_between($html, '<h1 class="masjid-title">', '</h1>'));

    // Didirikan
    $didirikan_start = strpos($html, '<div class="masjid-alamat-calendar">');
    $didirikan_end = strpos($html, '</div>', $didirikan_start);
    $didirikan_html = substr($html, $didirikan_start, $didirikan_end - $didirikan_start);
    $masjid_data['didirikan'] = trim(strip_tags(get_string_between($didirikan_html, '<p>', '</p>')));


    // Alamat
    $alamat_start = strpos($html, '<div class="masjid-alamat-location">');
    $alamat_end = strpos($html, '<p class="masjid-alamat-nav">', $alamat_start);
    $alamat_html = substr($html, $alamat_start, $alamat_end - $alamat_start);
    $alamat = trim(strip_tags(get_string_between($alamat_html, '<p>', '</p>')));
    $masjid_data['alamat'] = str_replace("<br/>", ", ",$alamat);

    // Sejarah Masjid
    $sejarah_start = strpos($html, '<div class="masjid-sejarah show-less" id="content-sejarah">');
    $sejarah_end = strpos($html, '</div>', $sejarah_start);
    $sejarah_html = substr($html, $sejarah_start, $sejarah_end - $sejarah_start);
    $masjid_data['sejarah_masjid'] = trim(strip_tags($sejarah_html));

    // URL Gambar
    $image_urls = [];
    preg_match_all('/<img src="(https:\/\/simas.kemenag.go.id\/uploads\/masjid-img\/.*?)"/', $html, $matches);
    $masjid_data['url_gambar'] = $matches[1];

    return $masjid_data;
}

//$html = file_get_contents('data.html'); // Ganti 'data.html' dengan path file HTML Anda, atau gunakan file_get_contents() langsung ke URL jika memungkinkan.

$html = file_get_contents('https://simas.kemenag.go.id/profil/masjid/25');

$masjid_data = extract_masjid_data($html);

// Output the data
echo "Nama Masjid: " . $masjid_data['nama_masjid'] . "\n";
echo "Didirikan: " . $masjid_data['didirikan'] . "\n";
echo "Alamat: " . $masjid_data['alamat'] . "\n";
echo "Sejarah Masjid: " . $masjid_data['sejarah_masjid'] . "\n";
echo "URL Gambar:\n";
foreach ($masjid_data['url_gambar'] as $url) {
    echo $url . "\n";
}


Enter fullscreen mode Exit fullscreen mode

Top comments (0)