This post and code has been updated as on 21th November 2019.

The other day, I wanted to retrieve posts from my own blog corresponding to a list of movies from IMDB. If not found in my blog, it should retrieve the information from Wikipedia. I thought I can cook-up a Python script for it in an hour or so. It turned out to be nearly more than a full days work. Reasons for the long time taken were my level of Python skills, the unavailability of public APIs for IMDB, learning on how to call IMDB package, Wikipedia package, the WordPress APIs (initially I went with using RSS from my blog) and so on. I wanted to include Google search results, but that meant I need a Google Cloud Account and so on, the unofficial web scrapping package was not working.

Notes:

  1. To read RSS from my blog I used the Feedparser package and later I moved to calling WordPress REST API V2 from the Requests library – the Requests method with its parameters construction from a dictionary object is super convenient to constructing ‘get’ requests by hand.
  2. To access Wikipedia API I am using Wikipedia package which gives a nice wrapper around the APIs.
  3. For drawing tables in the terminal, I am using the Tabulate package. Initially, I tried TerminalTables but its error handling was poor. Tabulate has the ability to generate HTML Tables with a quick change and that will be useful for my next version (web page).
  4. The code is not yet Pythonic for example, empty strings and lists are tested with len() and not the bool() function.
  5. Current version is just a QnD work and the performance for retrieval of data from IMDB is poor.
Sample output from the program for a few movie searches.
Sample output from the program for a few movie searches.
Movie Listing Web Version with Flask & Python
A teaser of the next version – a webpage built with Flask and Bootstrap style
#Usage: Python3 movies.py MOVIENAME NUMBER_OF_MOVIES
#pip install tabulate, wikipedia, imdb

#CONSTANTS
MOVIES_COUNT = 5
BLOG_SEARCH_URL = "https://venkatarangan.com/blog/wp-json/wp/v2/posts"
REVIEW_NOT_FOUND = "No reviews"
HEADERS = ["Year", "Title", "Director", "Genre", "Languages", "Rating", "Review"]
NOT_AVAILABLE = "N.A."
YEAR_UNKNOWN = "N.A."
TABULATEFORMAT = "fancy_grid"
DEFAULT_MOVIE = 'Puli (2015)'
IMDB_BASE="https://www.imdb.com/title/tt"
#----------------------------------------------------------#

def retrievekey ( minfo, keyname ):
    _answer = "Unknown"
    
    if (keyname == "director"):
        try:
            p = minfo.data.get("director")
            _answer = p[0].data.get('name')
        except:
            pass
        return (_answer)
    else:    
        try:
            iter(minfo)
            _answer = ""
            xs = minfo.get(keyname)[:2]
            _answer = ",".join (xs)

        except TypeError:
            # not iterable, probably a float, so lets convert to a string
            _answer = str(minfo.get(keyname)) 
            if _answer == "None":
                _answer = NOT_AVAILABLE
            
        return (_answer)
#end def

def PrintResults(_items):
    try:
        print(tabulate(_items, HEADERS, tablefmt="fancy_grid"), flush=True)
    except:
        print(_items, flush=True)
#end def

def Wiki_Read (_movietitle):
    _answer = ""
    try:
        _answer = wikipedia.page(_movietitle).url
    except:
        _answer = ""
    return (_answer)
#end def
    
def Blog_Read(_movie_title, _movieyear):
    _link = ""
    _title = ""
    _params_dict = {
                        'search'        : _movie_title,
                        'category'      : 'movies',
                        'orderby'       : 'date',
                        'posts_per_page': 20
                   }
    try:
        _response = requests.get (BLOG_SEARCH_URL, params=_params_dict)
        _movie_title = str.lower ( _movie_title ).replace ("'","")
        if _response.status_code == 200:
            _json_response = _response.json()
            for js in _json_response:
                _link = ""
                try:
                    _title = str.lower ( js['title']['rendered'] )
                    _link = js['link']
                    if ( _title.rfind(_movie_title) >= 0 ):
                        return (_link)
                    else:
                        _title = ""
                        _link = ""
                except:
                    _title = ""
                    _link = ""
    finally:
         return (_link)
#end def

def main():
    try:
        _movie_name =sys.argv [1]
    except:
        _movie_name = input ("Enter a movie name:")
    
    try:
        _movies_count = int( sys.argv [2] )
    except:
        _movies_count = MOVIES_COUNT

    if _movie_name == "":
        _movie_name = DEFAULT_MOVIE
    
    searchtitle = "Fetching latest (Max:" + str(_movies_count) + ") movies titled: " + _movie_name 
    print (searchtitle)

    titles = ia.search_movie(_movie_name, _movies_count + 1)
    movies = [i for i in titles if i.data['kind']=='movie']
    movies_sorted = sorted(movies, key = lambda mv: mv.data.setdefault('year', 2100),reverse=True) 
    
    movies_items = []
    movie_moreinfo = ""
    movie_details = []
    for m in movies_sorted:
        id = m.movieID
        title = m.data['title']
        year = str (m.data['year'])
        fulltitle = title + " (" + year + ")"

        if len(movies_items) == 0: 
            print ("Progress:", end='', flush=True)

        movie_moreinfo = ia.get_movie(id)
        print ("*", end='', flush=True)

        genres = retrievekey (movie_moreinfo, 'genre')
        languages = retrievekey (movie_moreinfo, 'lang')
        director =  retrievekey (movie_moreinfo, 'director')
        rating =  retrievekey (movie_moreinfo, 'rating')

        if year == 2100:
            review = REVIEW_NOT_FOUND
            year = YEAR_UNKNOWN
        else:
            review = Blog_Read (fulltitle, year)
            if review == "":
                review =  Wiki_Read (fulltitle)
            if review == "":
                review = IMDB_BASE + id    
                #review = REVIEW_NOT_FOUND

        movie_details = [year, title, director,  genres, languages, rating, review]
        movies_items.append (movie_details)
    else:
        if movies_items == []:
           print ( "No Movies found")
    print ("\n", flush=True)

    PrintResults (movies_items)
#end def

#----------------------------------------------------------#
if __name__ == "__main__":
    try:
        import sys 
        import requests
        import wikipedia
        from imdb import IMDb
        from tabulate import tabulate
        ia = IMDb()
        main()
    except:
        print ("One or more packages not found.")
        exit()     

Write A Comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.