Package Halberd :: Module clientlib
[hide private]
[frames] | no frames]

Source Code for Module Halberd.clientlib

  1  # -*- coding: iso-8859-1 -*- 
  2   
  3  """HTTP/HTTPS client module. 
  4   
  5  @var default_timeout: Default timeout for socket operations. 
  6  @type default_timeout: C{float} 
  7   
  8  @var default_bufsize: Default number of bytes to try to read from the network. 
  9  @type default_bufsize: C{int} 
 10   
 11  @var default_template: Request template, must be filled by L{HTTPClient} 
 12  @type default_template: C{str} 
 13  """ 
 14   
 15  # Copyright (C) 2004, 2005, 2006, 2010  Juan M. Bello Rivas <jmbr@superadditive.com> 
 16  # 
 17  # This program is free software; you can redistribute it and/or modify 
 18  # it under the terms of the GNU General Public License as published by 
 19  # the Free Software Foundation; either version 2 of the License, or 
 20  # (at your option) any later version. 
 21  # 
 22  # This program is distributed in the hope that it will be useful, 
 23  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 24  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 25  # GNU General Public License for more details. 
 26  # 
 27  # You should have received a copy of the GNU General Public License 
 28  # along with this program; if not, write to the Free Software 
 29  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 30   
 31   
 32  import time 
 33  import socket 
 34  import urlparse 
 35   
 36  from itertools import takewhile 
 37   
 38  import Halberd.ScanTask 
 39   
 40   
 41  default_timeout = 2 
 42   
 43  default_bufsize = 1024 
 44   
 45  # WARNING - Changing the HTTP request method in the following template will 
 46  # require updating tests/test_clientlib.py accordingly. 
 47  default_template = """\ 
 48  GET %(request)s HTTP/1.1\r\n\ 
 49  Host: %(hostname)s%(port)s\r\n\ 
 50  Pragma: no-cache\r\n\ 
 51  Cache-control: no-cache\r\n\ 
 52  User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.7) Gecko/20050414 Firefox/1.0.3\r\n\ 
 53  Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg,\ 
 54   application/x-shockwave-flash, */*\r\n\ 
 55  Accept-Language: en-us,en;q=0.5\r\n\ 
 56  Accept-Encoding: gzip,deflate\r\n\ 
 57  Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n\ 
 58  Keep-Alive: 300\r\n\ 
 59  Connection: keep-alive\r\n\r\n\ 
 60  """ 
 61   
 62   
63 -class HTTPError(Exception):
64 """Generic HTTP exception""" 65
66 - def __init__(self, msg):
67 self.msg = msg
68
69 - def __str__(self):
70 return str(self.msg)
71
72 - def __deepcopy__(self, memo):
73 return self
74
75 -class HTTPSError(HTTPError):
76 """Generic HTTPS exception"""
77
78 -class InvalidURL(HTTPError):
79 """Invalid or unsupported URL"""
80
81 -class TimedOut(HTTPError):
82 """Operation timed out"""
83
84 -class ConnectionRefused(HTTPError):
85 """Unable to reach webserver"""
86
87 -class UnknownReply(HTTPError):
88 """The remote host didn't return an HTTP reply"""
89 90
91 -class HTTPClient:
92 """Special-purpose HTTP client. 93 94 @ivar timeout: Timeout for socket operations (expressed in seconds). 95 B{WARNING}: changing this value is strongly discouraged. 96 @type timeout: C{float} 97 98 @ivar bufsize: Buffer size for network I/O. 99 @type bufsize: C{int} 100 101 @ivar template: Template of the HTTP request to be sent to the target. 102 @type template: C{str} 103 104 @ivar _recv: Reference to a callable responsible from reading data from the 105 network. 106 @type _recv: C{callable} 107 """ 108 timeout = default_timeout 109 bufsize = default_bufsize 110 template = default_template 111
112 - def __init__(self):
113 """Initializes the object. 114 """ 115 self.schemes = ['http'] 116 self.default_port = 80 117 # _timeout_exceptions MUST be converted to a tuple before using it with 118 # except. 119 self._timeout_exceptions = [socket.timeout] 120 121 self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 122 self._sock.settimeout(self.timeout) 123 124 self._recv = self._sock.recv
125
126 - def getHeaders(self, address, urlstr):
127 """Talk to the target webserver and fetch MIME headers. 128 129 @param address: The target's network address. 130 @type address: C{tuple} 131 132 @param urlstr: URL to use. 133 @type urlstr: C{str} 134 135 @return: The time when the client started reading the server's response 136 and the MIME headers that were sent. 137 @rtype: C{tuple} 138 """ 139 self._putRequest(address, urlstr) 140 141 timestamp, headers = self._getReply() 142 if not headers: 143 return None 144 145 # Remove HTTP response and leave only the MIME headers. 146 headers = headers.splitlines()[1:] 147 headers = list(takewhile(lambda x: x != '', headers)) 148 headers.append('\r\n') 149 headers = '\r\n'.join(headers) 150 151 return timestamp, headers
152
153 - def _putRequest(self, address, urlstr):
154 """Sends an HTTP request to the target webserver. 155 156 This method connects to the target server, sends the HTTP request and 157 records a timestamp. 158 159 @param address: Target address. 160 @type address: C{str} 161 162 @param urlstr: A valid Unified Resource Locator. 163 @type urlstr: C{str} 164 165 @raise InvalidURL: In case the URL scheme is not HTTP or HTTPS 166 @raise ConnectionRefused: If it can't reach the target webserver. 167 @raise TimedOut: If we cannot send the data within the specified time. 168 """ 169 scheme, netloc, url, params, query, fragment = urlparse.urlparse(urlstr) 170 171 if scheme not in self.schemes: 172 raise InvalidURL, '%s is not a supported protocol' % scheme 173 174 hostname, port = self._getHostAndPort(netloc) 175 # NOTE: address and hostname may not be the same. The caller is 176 # responsible for checking that. 177 178 req = self._fillTemplate(hostname, port, url, params, query, fragment) 179 180 self._connect((address, port)) 181 182 self._sendAll(req)
183
184 - def _getHostAndPort(self, netloc):
185 """Determine the hostname and port to connect to from an URL 186 187 @param netloc: Relevant part of the parsed URL. 188 @type netloc: C{str} 189 190 @return: Hostname (C{str}) and port (C{int}) 191 @rtype: C{tuple} 192 """ 193 try: 194 hostname, portnum = netloc.split(':', 1) 195 except ValueError: 196 hostname, port = netloc, self.default_port 197 else: 198 if portnum.isdigit(): 199 port = int(portnum) 200 else: 201 raise InvalidURL, '%s is not a valid port number' % portnum 202 203 return hostname, port
204
205 - def _fillTemplate(self, hostname, port, url, params='', query='', fragment=''):
206 """Fills the request template with relevant information. 207 208 @param hostname: Target host to reach. 209 @type hostname: C{str} 210 211 @param port: Remote port. 212 @type port: C{int} 213 214 @param url: URL to use as source. 215 @type url: C{str} 216 217 @return: A request ready to be sent 218 @rtype: C{str} 219 """ 220 urlstr = url or '/' 221 if params: 222 urlstr += ';' + params 223 if query: 224 urlstr += '?' + query 225 if fragment: 226 urlstr += '#' + fragment 227 228 if port == self.default_port: 229 p = '' 230 else: 231 p = ':' + str(port) 232 233 values = {'request': urlstr, 'hostname': hostname, 'port': p} 234 235 return self.template % values
236
237 - def _connect(self, addr):
238 """Connect to the target address. 239 240 @param addr: The target's address. 241 @type addr: C{tuple} 242 243 @raise ConnectionRefused: If it can't reach the target webserver. 244 """ 245 try: 246 self._sock.connect(addr) 247 except socket.error: 248 raise ConnectionRefused, 'Connection refused'
249
250 - def _sendAll(self, data):
251 """Sends a string to the socket. 252 """ 253 try: 254 self._sock.sendall(data) 255 except socket.timeout: 256 raise TimedOut, 'timed out while writing to the network'
257
258 - def _getReply(self):
259 """Read a reply from the server. 260 261 @return: Time when the data started arriving plus the received data. 262 @rtype: C{tuple} 263 264 @raise UnknownReply: If the remote server doesn't return a valid HTTP 265 reply. 266 @raise TimedOut: In case reading from the network takes too much time. 267 """ 268 data = '' 269 timestamp = None 270 stoptime = time.time() + self.timeout 271 while time.time() < stoptime: 272 try: 273 chunk = self._recv(self.bufsize) 274 except tuple(self._timeout_exceptions), msg: 275 raise TimedOut, msg 276 277 if not chunk: 278 # The remote end closed the connection. 279 break 280 281 if not timestamp: 282 timestamp = time.time() 283 284 data += chunk 285 idx = data.find('\r\n\r\n') 286 if idx != -1: 287 data = data[:idx] 288 break 289 290 if not data.startswith('HTTP/'): 291 raise UnknownReply, 'Invalid protocol' 292 293 return timestamp, data
294
295 - def __del__(self):
296 if self._sock: 297 self._sock.close()
298 299
300 -class HTTPSClient(HTTPClient):
301 """Special-purpose HTTPS client. 302 """ 303
304 - def __init__(self):
305 HTTPClient.__init__(self) 306 307 self.schemes.append('https') 308 309 self.default_port = 443 310 311 self._recv = None 312 self._sslsock = None 313 self._timeout_exceptions.append(socket.sslerror) 314 315 # Path to an SSL key file and certificate. 316 self.keyfile = None 317 self.certfile = None
318
319 - def _connect(self, addr):
320 """Connect to the target web server. 321 322 @param addr: The target's address. 323 @type addr: C{tuple} 324 325 @raise HTTPSError: In case there's some mistake during the SSL 326 negotiation. 327 """ 328 HTTPClient._connect(self, addr) 329 try: 330 self._sslsock = socket.ssl(self._sock, self.keyfile, self.certfile) 331 except socket.sslerror, msg: 332 raise HTTPSError, msg 333 334 self._recv = self._sslsock.read
335
336 - def _sendAll(self, data):
337 """Sends a string to the socket. 338 """ 339 # xxx - currently we don't make sure everything is sent. 340 self._sslsock.write(data)
341 342
343 -def clientFactory(scantask):
344 """HTTP/HTTPS client factory. 345 346 @param scantask: Object describing where the target is and how to reach it. 347 @type scantask: C{instanceof(ScanTask)} 348 349 @return: The appropriate client class for the specified URL. 350 @rtype: C{class} 351 """ 352 url = scantask.url 353 keyfile = scantask.keyfile 354 certfile = scantask.certfile 355 356 if url.startswith('http://'): 357 return HTTPClient() 358 elif url.startswith('https://'): 359 httpsclient = HTTPSClient() 360 httpsclient.keyfile = keyfile 361 httpsclient.certfile = certfile 362 return httpsclient 363 else: 364 raise InvalidURL
365 366 367 # vim: ts=4 sw=4 et 368