{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Кодировки" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import sys\n", "import chardet" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "28" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = 10\n", "sys.getsizeof(x)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def f():\n", " \"\"\"\n", " docs\n", " \"\"\"\n", " pass" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'\\n docs\\n '" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f.__doc__" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[49, 80, 84, 88, 92, 96, 100]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beer = \"🍺 some\"\n", "[sys.getsizeof(beer[:index]) for index in range(len(beer) + 1)]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "easy = \"easy\"\n", "изич = \"изич\"\n", "易易易易 = \"易易易易\"" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[49, 50, 51, 52, 53]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[sys.getsizeof(easy[:index]) for index in range(len(easy) + 1)]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[49, 76, 78, 80, 82]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[sys.getsizeof(изич[:index]) for index in range(len(изич) + 1)]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[49, 76, 78, 80, 82]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[sys.getsizeof(易易易易[:index]) for index in range(len(易易易易) + 1)]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(90, 98)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sys.getsizeof(изич + easy), sys.getsizeof(易易易易 + изич + easy)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "({'confidence': 1.0, 'encoding': 'ascii', 'language': ''},\n", " {'confidence': 0.938125, 'encoding': 'utf-8', 'language': ''},\n", " {'confidence': 0.938125, 'encoding': 'utf-8', 'language': ''})" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chardet.detect(easy.encode()), chardet.detect(изич.encode()), chardet.detect(易易易易.encode())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Задание строк" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "str1 = \"think about 'it'\"\n", "\n", "str2 = 'синк эбаут \"ит\"'\n", "\n", "str3 = \"\"\"想想吧 想想吧 想想吧\n", "想想吧\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "think about 'it'\n", "синк эбаут \"ит\" \t \" '\n" ] } ], "source": [ "str_escape = str1 + \"\\n\" + str2 + \" \\t \\\" \\'\"\n", "print(str_escape)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\\n \\t \\\" \\'\n" ] } ], "source": [ "raw_str = r\"\\n \\t \\\" \\'\"\n", "print(raw_str)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Строки и Unicode" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import codecs" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "EASY\n" ] }, { "data": { "text/plain": [ "b'EASY'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "u_str = \"\\u0045\\u0041\\u0053\\u0059\"\n", "print(u_str)\n", "codecs.encode(u_str)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('A', 48)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chr(65), ord('0')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Функции строк стандартной библиотеки str" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "text = \"\"\"Given a string representing one Unicode character,\n", "return an integer representing the Unicode code point of that character.\n", "For example, ord('a') returns the integer 97 and ord('€') (Euro sign) returns 8364.\n", "This is the inverse of chr().\"\"\"" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(-1, 78, 10)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text.find(\"€\", 180), text.index(\"ing\", 70, 100), text.count(\"a\")" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Given a string representing one UniEASYode EASYharaEASYter,\n", "return an integer representing the UniEASYode EASYode point of that EASYharaEASYter.\n", "For example, ord('a') returns the integer 97 and ord('€') (Euro sign) returns 8364.\n", "This is the inverse of EASYhr().\n" ] } ], "source": [ "print(text.replace(\"c\", \"EASY\"))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(False, True)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text.startswith(\"Givens\"), text.endswith(\".\")" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(True, True, True, False, True, False)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"easy100500\".isalnum(), \"想想吧\".isalpha(), \"1000\".isdigit(), \"AbCd\".islower(), \" \".isspace(), \"Sad sad\".istitle()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('test test', 'TEST TEST', 'Test test', 'Test Test', 'Test Test')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"tEST tEST\".lower(), \"tEST tEST\".upper(), \"tEST tEST\".capitalize(), \"tEST tEST\".title(), \"tEST tEST\".swapcase()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\"Given a string representing one Unicode character,\\nreturn an integer representing the Unicode code point of that character.\\nFor example, ord('a') returns the integer 97 and ord('€') (Euro sign) returns 8364.\\nThis is the inverse of chr().\"" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(\"\\n\" + text).strip()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\"Given a string representing one Unicode character, return an integer representing the Unicode code point of that character. For example, ord('a') returns the integer 97 and ord('€') (Euro sign) returns 8364. This is the inverse of chr().\"" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "' '.join(text.split())" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('Given a string representing one Unicode character,\\nreturn an ',\n", " 'integer',\n", " \" representing the Unicode code point of that character.\\nFor example, ord('a') returns the integer 97 and ord('€') (Euro sign) returns 8364.\\nThis is the inverse of chr().\")" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text.partition(\"integer\")" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('100 ', ' 100', ' 100 ')" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "str(100).ljust(10), str(100).rjust(10), str(100).center(10)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'some text 10'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x = 10\n", "f\"some text {x}\"" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(True, True)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"integer\" in text, \"Gives\" not in text" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(158, 158)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text.rfind(\"int\"), text.rindex(\"int\")" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(\"Given a string representing one Unicode character,\\nreturn an integer representing the Unicode code point of that character.\\nFor example, ord('a') returns the \",\n", " 'int',\n", " \"eger 97 and ord('€') (Euro sign) returns 8364.\\nThis is the inverse of chr().\")" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text.rpartition(\"int\")" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import chardet" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"test\".encode(\"utf-8\") in \"some test\".encode(\"utf-8\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Форматирование строк" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Новый стиль" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fn: Anton, sn: Emelianov\n" ] } ], "source": [ "name = \"Anton\"\n", "second_name = \"Emelianov\"\n", "print(\"fn: {}, sn: {}\".format(name, second_name))" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fn: Anton, sn: Emelianov\n" ] } ], "source": [ "print(\"fn: {1}, sn: {0}\".format(second_name, name))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fn: Anton, sn: Emelianov\n" ] } ], "source": [ "print(\"fn: {name}, sn: {second_name}\".format(second_name=second_name, name=name))" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "left 10\n", "10 right\n", "left 10 right\n" ] } ], "source": [ "print(\"left{:>10}\".format(10))\n", "print(\"{:<10}right\".format(10))\n", "print(\"left{:^10}right\".format(10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Старый стиль" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "old school 1900 year in town Moscow\n" ] } ], "source": [ "print(\"old school %d year in town %s\" % (1900, \"Moscow\"))" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "old school in town Moscow\n" ] } ], "source": [ "print(\"old school in town %s\" % (\"Moscow\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Модуль string" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from string import ascii_lowercase, ascii_uppercase, digits, ascii_letters, punctuation, whitespace" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('abcdefghijklmnopqrstuvwxyz',\n", " 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',\n", " '0123456789',\n", " 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',\n", " '!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~',\n", " ' \\t\\n\\r\\x0b\\x0c')" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ascii_lowercase, ascii_uppercase, digits, ascii_letters, punctuation, whitespace" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Тип bytes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "byte_str = b\"\\xd0\\xbb\\xd0\\xb5\\xd0\\xb3\\xd0\\xba\\xd0\\xbe\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\"легко\".encode(\"utf-8\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Файлы" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "fin = open(\"buckets.py\", \"r\", encoding=\"utf-8\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "type(fin)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fout = open(\"buckets_copy.py\", \"w\")\n", "for line in fin.readlines():\n", " print(line)\n", " fout.write(line)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "fin.close()\n", "fout.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fin.closed, fout.closed" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "with open(\"buckets.py\", \"r\", encoding=\"utf-8\") as fin:\n", " ff = fin.read()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'class Buckets(object):\\n def __init__(self, length, default):\\n self.default = default\\n self.buckets = [default] * length\\n\\n def add(self, index, element):\\n self.buckets[index].append(element)\\n\\n def find(self, index, element):\\n return element in self.buckets[index]\\n\\n def clear(self, index):\\n self.buckets[index] = self.default\\n'" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Потоки ввода-вывода" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stdin = sys.stdin.readline()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sys.stdout.write(\"End of lecture! Yeah!\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sys.stderr.write(\"Dead\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }