Scrape masjid di SIMAS Kemenag dengan PHP
<?php
function get_string_between($string, $start, $end){
$string = ' ' . $string;
$ini = strpos($string, $start);
if ($ini == 0) return '';
$ini += strlen($start);
$len = strpos($string, $end, $ini) - $ini;
return substr($string, $ini, $len);
}
function extract_masjid_data($html) {
$masjid_data = [];
// Nama Masjid
$masjid_data['nama_masjid'] = trim(get_string_between($html, '<h1 class="masjid-title">', '</h1>'));
// Didirikan
$didirikan_start = strpos($html, '<div class="masjid-alamat-calendar">');
$didirikan_end = strpos($html, '</div>', $didirikan_start);
$didirikan_html = substr($html, $didirikan_start, $didirikan_end - $didirikan_start);
$masjid_data['didirikan'] = trim(strip_tags(get_string_between($didirikan_html, '<p>', '</p>')));
// Alamat
$alamat_start = strpos($html, '<div class="masjid-alamat-location">');
$alamat_end = strpos($html, '<p class="masjid-alamat-nav">', $alamat_start);
$alamat_html = substr($html, $alamat_start, $alamat_end - $alamat_start);
$alamat = trim(strip_tags(get_string_between($alamat_html, '<p>', '</p>')));
$masjid_data['alamat'] = str_replace("<br/>", ", ",$alamat);
// Sejarah Masjid
$sejarah_start = strpos($html, '<div class="masjid-sejarah show-less" id="content-sejarah">');
$sejarah_end = strpos($html, '</div>', $sejarah_start);
$sejarah_html = substr($html, $sejarah_start, $sejarah_end - $sejarah_start);
$masjid_data['sejarah_masjid'] = trim(strip_tags($sejarah_html));
// URL Gambar
$image_urls = [];
preg_match_all('/<img src="(https:\/\/simas.kemenag.go.id\/uploads\/masjid-img\/.*?)"/', $html, $matches);
$masjid_data['url_gambar'] = $matches[1];
return $masjid_data;
}
//$html = file_get_contents('data.html'); // Ganti 'data.html' dengan path file HTML Anda, atau gunakan file_get_contents() langsung ke URL jika memungkinkan.
$html = file_get_contents('https://simas.kemenag.go.id/profil/masjid/25');
$masjid_data = extract_masjid_data($html);
// Output the data
echo "Nama Masjid: " . $masjid_data['nama_masjid'] . "\n";
echo "Didirikan: " . $masjid_data['didirikan'] . "\n";
echo "Alamat: " . $masjid_data['alamat'] . "\n";
echo "Sejarah Masjid: " . $masjid_data['sejarah_masjid'] . "\n";
echo "URL Gambar:\n";
foreach ($masjid_data['url_gambar'] as $url) {
echo $url . "\n";
}
Top comments (0)