1
2
3 """HTTP/HTTPS client module.
4
5 @var default_timeout: Default timeout for socket operations.
6 @type default_timeout: C{float}
7
8 @var default_bufsize: Default number of bytes to try to read from the network.
9 @type default_bufsize: C{int}
10
11 @var default_template: Request template, must be filled by L{HTTPClient}
12 @type default_template: C{str}
13 """
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 import time
33 import socket
34 import urlparse
35
36 from itertools import takewhile
37
38 import Halberd.ScanTask
39
40
41 default_timeout = 2
42
43 default_bufsize = 1024
44
45
46
47 default_template = """\
48 GET %(request)s HTTP/1.1\r\n\
49 Host: %(hostname)s%(port)s\r\n\
50 Pragma: no-cache\r\n\
51 Cache-control: no-cache\r\n\
52 User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.7) Gecko/20050414 Firefox/1.0.3\r\n\
53 Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg,\
54 application/x-shockwave-flash, */*\r\n\
55 Accept-Language: en-us,en;q=0.5\r\n\
56 Accept-Encoding: gzip,deflate\r\n\
57 Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n\
58 Keep-Alive: 300\r\n\
59 Connection: keep-alive\r\n\r\n\
60 """
61
62
64 """Generic HTTP exception"""
65
68
71
74
76 """Generic HTTPS exception"""
77
79 """Invalid or unsupported URL"""
80
82 """Operation timed out"""
83
85 """Unable to reach webserver"""
86
88 """The remote host didn't return an HTTP reply"""
89
90
92 """Special-purpose HTTP client.
93
94 @ivar timeout: Timeout for socket operations (expressed in seconds).
95 B{WARNING}: changing this value is strongly discouraged.
96 @type timeout: C{float}
97
98 @ivar bufsize: Buffer size for network I/O.
99 @type bufsize: C{int}
100
101 @ivar template: Template of the HTTP request to be sent to the target.
102 @type template: C{str}
103
104 @ivar _recv: Reference to a callable responsible from reading data from the
105 network.
106 @type _recv: C{callable}
107 """
108 timeout = default_timeout
109 bufsize = default_bufsize
110 template = default_template
111
113 """Initializes the object.
114 """
115 self.schemes = ['http']
116 self.default_port = 80
117
118
119 self._timeout_exceptions = [socket.timeout]
120
121 self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
122 self._sock.settimeout(self.timeout)
123
124 self._recv = self._sock.recv
125
127 """Talk to the target webserver and fetch MIME headers.
128
129 @param address: The target's network address.
130 @type address: C{tuple}
131
132 @param urlstr: URL to use.
133 @type urlstr: C{str}
134
135 @return: The time when the client started reading the server's response
136 and the MIME headers that were sent.
137 @rtype: C{tuple}
138 """
139 self._putRequest(address, urlstr)
140
141 timestamp, headers = self._getReply()
142 if not headers:
143 return None
144
145
146 headers = headers.splitlines()[1:]
147 headers = list(takewhile(lambda x: x != '', headers))
148 headers.append('\r\n')
149 headers = '\r\n'.join(headers)
150
151 return timestamp, headers
152
154 """Sends an HTTP request to the target webserver.
155
156 This method connects to the target server, sends the HTTP request and
157 records a timestamp.
158
159 @param address: Target address.
160 @type address: C{str}
161
162 @param urlstr: A valid Unified Resource Locator.
163 @type urlstr: C{str}
164
165 @raise InvalidURL: In case the URL scheme is not HTTP or HTTPS
166 @raise ConnectionRefused: If it can't reach the target webserver.
167 @raise TimedOut: If we cannot send the data within the specified time.
168 """
169 scheme, netloc, url, params, query, fragment = urlparse.urlparse(urlstr)
170
171 if scheme not in self.schemes:
172 raise InvalidURL, '%s is not a supported protocol' % scheme
173
174 hostname, port = self._getHostAndPort(netloc)
175
176
177
178 req = self._fillTemplate(hostname, port, url, params, query, fragment)
179
180 self._connect((address, port))
181
182 self._sendAll(req)
183
185 """Determine the hostname and port to connect to from an URL
186
187 @param netloc: Relevant part of the parsed URL.
188 @type netloc: C{str}
189
190 @return: Hostname (C{str}) and port (C{int})
191 @rtype: C{tuple}
192 """
193 try:
194 hostname, portnum = netloc.split(':', 1)
195 except ValueError:
196 hostname, port = netloc, self.default_port
197 else:
198 if portnum.isdigit():
199 port = int(portnum)
200 else:
201 raise InvalidURL, '%s is not a valid port number' % portnum
202
203 return hostname, port
204
205 - def _fillTemplate(self, hostname, port, url, params='', query='', fragment=''):
206 """Fills the request template with relevant information.
207
208 @param hostname: Target host to reach.
209 @type hostname: C{str}
210
211 @param port: Remote port.
212 @type port: C{int}
213
214 @param url: URL to use as source.
215 @type url: C{str}
216
217 @return: A request ready to be sent
218 @rtype: C{str}
219 """
220 urlstr = url or '/'
221 if params:
222 urlstr += ';' + params
223 if query:
224 urlstr += '?' + query
225 if fragment:
226 urlstr += '#' + fragment
227
228 if port == self.default_port:
229 p = ''
230 else:
231 p = ':' + str(port)
232
233 values = {'request': urlstr, 'hostname': hostname, 'port': p}
234
235 return self.template % values
236
238 """Connect to the target address.
239
240 @param addr: The target's address.
241 @type addr: C{tuple}
242
243 @raise ConnectionRefused: If it can't reach the target webserver.
244 """
245 try:
246 self._sock.connect(addr)
247 except socket.error:
248 raise ConnectionRefused, 'Connection refused'
249
251 """Sends a string to the socket.
252 """
253 try:
254 self._sock.sendall(data)
255 except socket.timeout:
256 raise TimedOut, 'timed out while writing to the network'
257
259 """Read a reply from the server.
260
261 @return: Time when the data started arriving plus the received data.
262 @rtype: C{tuple}
263
264 @raise UnknownReply: If the remote server doesn't return a valid HTTP
265 reply.
266 @raise TimedOut: In case reading from the network takes too much time.
267 """
268 data = ''
269 timestamp = None
270 stoptime = time.time() + self.timeout
271 while time.time() < stoptime:
272 try:
273 chunk = self._recv(self.bufsize)
274 except tuple(self._timeout_exceptions), msg:
275 raise TimedOut, msg
276
277 if not chunk:
278
279 break
280
281 if not timestamp:
282 timestamp = time.time()
283
284 data += chunk
285 idx = data.find('\r\n\r\n')
286 if idx != -1:
287 data = data[:idx]
288 break
289
290 if not data.startswith('HTTP/'):
291 raise UnknownReply, 'Invalid protocol'
292
293 return timestamp, data
294
296 if self._sock:
297 self._sock.close()
298
299
301 """Special-purpose HTTPS client.
302 """
303
305 HTTPClient.__init__(self)
306
307 self.schemes.append('https')
308
309 self.default_port = 443
310
311 self._recv = None
312 self._sslsock = None
313 self._timeout_exceptions.append(socket.sslerror)
314
315
316 self.keyfile = None
317 self.certfile = None
318
320 """Connect to the target web server.
321
322 @param addr: The target's address.
323 @type addr: C{tuple}
324
325 @raise HTTPSError: In case there's some mistake during the SSL
326 negotiation.
327 """
328 HTTPClient._connect(self, addr)
329 try:
330 self._sslsock = socket.ssl(self._sock, self.keyfile, self.certfile)
331 except socket.sslerror, msg:
332 raise HTTPSError, msg
333
334 self._recv = self._sslsock.read
335
337 """Sends a string to the socket.
338 """
339
340 self._sslsock.write(data)
341
342
344 """HTTP/HTTPS client factory.
345
346 @param scantask: Object describing where the target is and how to reach it.
347 @type scantask: C{instanceof(ScanTask)}
348
349 @return: The appropriate client class for the specified URL.
350 @rtype: C{class}
351 """
352 url = scantask.url
353 keyfile = scantask.keyfile
354 certfile = scantask.certfile
355
356 if url.startswith('http://'):
357 return HTTPClient()
358 elif url.startswith('https://'):
359 httpsclient = HTTPSClient()
360 httpsclient.keyfile = keyfile
361 httpsclient.certfile = certfile
362 return httpsclient
363 else:
364 raise InvalidURL
365
366
367
368