Showing posts with label photo of the day. Show all posts
Showing posts with label photo of the day. Show all posts

Friday, January 11, 2013

National Geographic Photo of the Day using Python

A small snippet of code to get the National Geographic Photo of the Day.
It uses mechanize and beautiful soup 4 to help with the scraping.

It also has a function to allow walking backwards to pickup previous photos.

# -*- coding: utf-8 -*-
import os
import re
import sys

import mechanize
from bs4 import BeautifulSoup

MASTER_URL = "http://photography.nationalgeographic.com/photography/photo-of-the-day/?source=NavPhoPOD"

class POD_Browser(mechanize.Browser):
    """
    A browser for pod, with our configuration settings.
    """
    def __init__(self, *args, **kwargs):
        mechanize.Browser.__init__(self, *args, **kwargs)
        self.set_handle_robots(False)
        self.set_debug_redirects(False)
        self.set_debug_http(False)
        self.set_handle_equiv(True)
        self.set_handle_gzip(True)
        self.set_handle_redirect(True)
        self.open(MASTER_URL)

class Session(object):
    """
    A Session.
    """
    def __init__(self):
        self.browser = POD_Browser()

    def downloadPhotoOfTheDay(self):
        """
        Search the page looking for a Wallpaper link. Not all pages have
        Wallpaper link, and we (politely) don't download the image in that
        case.
        """
        page = self.browser.response().read()
        soup = BeautifulSoup(page)

        tags = soup.find_all('a', text = re.compile ('Download Wallpaper'))
        for t in tags:
            filename = t['href'].split('/')[-1]
            # If we've already downloaded the file, don't download it again.
            if not os.path.exists(filename):
                filename, _headers = self.browser.retrieve(t['href'], filename )
                print filename
                sys.stdout.flush()

    def goPrevious(self):
        """
        Find the previous link and go back a day
        """
        link = self.browser.find_link(text_regex = re.compile('Previous'))
        self.browser.follow_link(link)
        
    def downloadPriorPhotos(self, start = 1, num = 10):
        """
        Download some prior photos.
        You can start 7 days back and get 14 days of photos with
        start = 7, num = 14

        :param start: How many days to go back before starting
        :param num: How many days to look at.
        
        """
        for x in xrange(start):
            self.goPrevious()

        for x in xrange(num):
            self.downloadPhotoOfTheDay()
            self.goPrevious()

if __name__ == '__main__':
    s = Session()
    s.downloadPhotoOfTheDay()
    # If you want to get the last week's worth uncomment this instead.
    #s.downloadPriorPhotos(num = 7)