@@ -1051,7 +1051,7 @@ AbstractBasicAuthHandler Objects
10511051 *headers * should be the error headers.
10521052
10531053 *host * is either an authority (e.g. ``"python.org" ``) or a URL containing an
1054- authority component (e.g. ``"http ://python.org/" ``). In either case, the
1054+ authority component (e.g. ``"https ://python.org/" ``). In either case, the
10551055 authority must not contain a userinfo component (so, ``"python.org" `` and
10561056 ``"python.org:80" `` are fine, ``"joe:password@python.org" `` is not).
10571057
@@ -1247,10 +1247,14 @@ This example gets the python.org main page and displays the first 300 bytes of
12471247it::
12481248
12491249 >>> import urllib.request
1250- >>> with urllib.request.urlopen('http://www.python.org/') as f:
1251- ... print(f.read(300))
1252- ...
1253- b'<!doctype html>\n<!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]-->\n<!--[if IE 7]> <html class="no-js ie7 lt-ie8 lt-ie9"> <![endif]-->\n<!--[if IE 8]> <html class="no-js ie8 lt-ie9">
1250+ >>> with urllib.request.urlopen('https://www.python.org/') as f:
1251+ ... # The response may be compressed (for example, 'gzip').
1252+ ... print(f.headers.get('Content-Encoding'))
1253+ ... data = f.read()
1254+ ... if f.headers.get('Content-Encoding') == 'gzip':
1255+ ... import gzip
1256+ ... data = gzip.decompress(data)
1257+ ... print(data[:300].decode('utf-8', errors='replace'))
12541258
12551259Note that urlopen returns a bytes object. This is because there is no way
12561260for urlopen to automatically determine the encoding of the byte stream
@@ -1267,26 +1271,30 @@ For additional information, see the W3C document: https://www.w3.org/Internation
12671271As the python.org website uses *utf-8 * encoding as specified in its meta tag, we
12681272will use the same for decoding the bytes object::
12691273
1270- >>> with urllib.request.urlopen('http://www.python.org/') as f:
1271- ... print(f.read(100).decode('utf-8'))
1274+ >>> with urllib.request.urlopen('https://www.python.org/') as f:
1275+ ... # Check for compression and decode appropriately.
1276+ ... enc = f.headers.get('Content-Encoding')
1277+ ... data = f.read()
1278+ ... if enc == 'gzip':
1279+ ... import gzip
1280+ ... data = gzip.decompress(data)
1281+ ... print(data[:100].decode('utf-8', errors='replace'))
12721282 ...
1273- <!doctype html>
1274- <!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]-->
1275- <!-
12761283
12771284It is also possible to achieve the same result without using the
12781285:term: `context manager ` approach::
12791286
12801287 >>> import urllib.request
1281- >>> f = urllib.request.urlopen('http ://www.python.org/')
1288+ >>> f = urllib.request.urlopen('https ://www.python.org/')
12821289 >>> try:
1283- ... print(f.read(100).decode('utf-8'))
1290+ ... enc = f.headers.get('Content-Encoding')
1291+ ... data = f.read()
1292+ ... if enc == 'gzip':
1293+ ... import gzip
1294+ ... data = gzip.decompress(data)
1295+ ... print(data[:100].decode('utf-8', errors='replace'))
12841296 ... finally:
12851297 ... f.close()
1286- ...
1287- <!doctype html>
1288- <!--[if lt IE 7]> <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> <![endif]-->
1289- <!--
12901298
12911299In the following example, we are sending a data-stream to the stdin of a CGI
12921300and reading the data it returns to us. Note that this example will only work
@@ -1357,7 +1365,7 @@ Use the *headers* argument to the :class:`Request` constructor, or::
13571365
13581366 import urllib.request
13591367 req = urllib.request.Request('http://www.example.com/')
1360- req.add_header('Referer', 'http ://www.python.org/')
1368+ req.add_header('Referer', 'https ://www.python.org/')
13611369 # Customize the default User-Agent header value:
13621370 req.add_header('User-Agent', 'urllib-example/0.1 (Contact: . . .)')
13631371 with urllib.request.urlopen(req) as f:
@@ -1386,7 +1394,7 @@ containing parameters::
13861394 >>> import urllib.request
13871395 >>> import urllib.parse
13881396 >>> params = urllib.parse.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0})
1389- >>> url = "http ://www.musi-cal.com/cgi-bin/query ?%s" % params
1397+ >>> url = "https ://www.python.org/ ?%s" % params
13901398 >>> with urllib.request.urlopen(url) as f:
13911399 ... print(f.read().decode('utf-8'))
13921400 ...
@@ -1398,7 +1406,7 @@ from urlencode is encoded to bytes before it is sent to urlopen as data::
13981406 >>> import urllib.parse
13991407 >>> data = urllib.parse.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0})
14001408 >>> data = data.encode('ascii')
1401- >>> with urllib.request.urlopen("http ://requestb.in/xrbl82xr ", data) as f:
1409+ >>> with urllib.request.urlopen("https ://httpbin.org/post ", data) as f:
14021410 ... print(f.read().decode('utf-8'))
14031411 ...
14041412
@@ -1408,15 +1416,15 @@ environment settings::
14081416 >>> import urllib.request
14091417 >>> proxies = {'http': 'http://proxy.example.com:8080/'}
14101418 >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler(proxies))
1411- >>> with opener.open("http ://www.python.org") as f:
1419+ >>> with opener.open("https ://www.python.org") as f:
14121420 ... f.read().decode('utf-8')
14131421 ...
14141422
14151423The following example uses no proxies at all, overriding environment settings::
14161424
14171425 >>> import urllib.request
1418- >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}} ))
1419- >>> with opener.open("http ://www.python.org/") as f:
1426+ >>> opener = urllib.request.build_opener(urllib.request.ProxyHandler({}))
1427+ >>> with opener.open("https ://www.python.org/") as f:
14201428 ... f.read().decode('utf-8')
14211429 ...
14221430
@@ -1449,7 +1457,7 @@ some point in the future.
14491457 The following example illustrates the most common usage scenario::
14501458
14511459 >>> import urllib.request
1452- >>> local_filename, headers = urllib.request.urlretrieve('http ://python.org/')
1460+ >>> local_filename, headers = urllib.request.urlretrieve('https ://python.org/')
14531461 >>> html = open(local_filename)
14541462 >>> html.close()
14551463
0 commit comments