Python3 urllib Wrapper: HTTP-Request mit Python

Dieser Beitrag ist Teil meiner Sourcecode a Day-Aktion.

Heute gibt’s eine simple Wrapper-Klasse für urllib aus Python3:

# -*- coding: utf-8 -*-
#-------------------------------------------------------------------------------
# Name:        NetConnector
# Purpose:     Python urllib wrapper
#
# Author:      Alexander Thiemann
#
# Created:     10.10.2011
# Copyright:   (c) Alexander Thiemann 2011
#-------------------------------------------------------------------------------

import urllib.request, urllib.parse, urllib.error
import http.cookiejar
import time, sys

class NetConnector:

    def __init__(self, encoding='utf-8'):
        self.userAgent = 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.9) Gecko/2008060309 Firefox/3.0'

        self.encoding = encoding
        self.cookiejar = http.cookiejar.CookieJar()
        self.opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.cookiejar))

        urllib.request.install_opener(self.opener)
        self.opener.addheaders = [('User-agent', self.userAgent)]

    def request(self, url, params={}, noencode=False, internalCount=1):

        print("URL: " + url);

        data = ""

        try:

            if len(params) != 0:
                if noencode:
                    req = params["query"].encode(self.encoding)
                else:
                    req = urllib.parse.urlencode(params).encode(self.encoding)
                    print("Params: " + str(req))

                sock = self.opener.open(url, req)
            else:
                sock = self.opener.open(url)

            data = sock.read().decode(self.encoding)
            sock.close()

        except urllib.error.HTTPError as e:
            print("HTTP Error: "  + str(e.code))

        except urllib.error.URLError as e:
            print(e)

            if internalCount >= 2: # edit this!
                print("[error] connection error. returning ''");
                return ""

            print("[error] connection error. Sleeping " + str(internalCount) +  " seconds.")
            time.sleep(internalCount)
            data = self.request(url, params, noencode, internalCount+1)

        return data

Verwendung:

connector = NetConnector();
connector.request("http://google.de") # get-request, returns response
connector.request("http://google.de", {'a': 'asdasd'}) # post-request, returns response

Das ganze codiert automatisch Post-Parameter und verwaltet Cookies 🙂