|
1 | 1 | { |
2 | 2 | "metadata": { |
3 | 3 | "name": "", |
4 | | - "signature": "sha256:d5895f75b2ac58db150d7b521682366a447ffb2fb0b7db7e551edd40e6d1ab10" |
| 4 | + "signature": "sha256:8dc4f91bc6a88e15ab0d25fac35b9a7645a7149b5ab4e1e15b2b372362e82ae2" |
5 | 5 | }, |
6 | 6 | "nbformat": 3, |
7 | 7 | "nbformat_minor": 0, |
|
855 | 855 | "collapsed": false, |
856 | 856 | "input": [ |
857 | 857 | "import random\n", |
858 | | - "import copy\n", |
859 | 858 | "import timeit\n", |
| 859 | + "from collections import defaultdict\n", |
860 | 860 | "\n", |
861 | 861 | "\n", |
862 | | - "\n", |
863 | | - "def add_element_check1(my_dict, elements):\n", |
| 862 | + "def add_element_check1(elements):\n", |
| 863 | + " d = dict()\n", |
864 | 864 | " for e in elements:\n", |
865 | | - " if e not in my_dict:\n", |
866 | | - " my_dict[e] = 1\n", |
| 865 | + " if e not in d:\n", |
| 866 | + " d[e] = 1\n", |
867 | 867 | " else:\n", |
868 | | - " my_dict[e] += 1\n", |
| 868 | + " d[e] += 1\n", |
| 869 | + " return d\n", |
869 | 870 | " \n", |
870 | | - "def add_element_check2(my_dict, elements):\n", |
| 871 | + "def add_element_check2(elements):\n", |
| 872 | + " d = dict()\n", |
871 | 873 | " for e in elements:\n", |
872 | | - " if e not in my_dict:\n", |
873 | | - " my_dict[e] = 0\n", |
874 | | - " my_dict[e] += 1 \n", |
875 | | - "\n", |
876 | | - "def add_element_except(my_dict, elements):\n", |
| 874 | + " if e not in d:\n", |
| 875 | + " d[e] = 0\n", |
| 876 | + " d[e] += 1 \n", |
| 877 | + " return d\n", |
| 878 | + " \n", |
| 879 | + "def add_element_except(elements):\n", |
| 880 | + " d = dict()\n", |
877 | 881 | " for e in elements:\n", |
878 | 882 | " try:\n", |
879 | | - " my_dict[e] += 1\n", |
| 883 | + " d[e] += 1\n", |
880 | 884 | " except KeyError:\n", |
881 | | - " my_dict[e] = 1\n", |
| 885 | + " d[e] = 1\n", |
| 886 | + " return d\n", |
882 | 887 | " \n", |
| 888 | + "def add_element_defaultdict(elements):\n", |
| 889 | + " d = defaultdict(int)\n", |
| 890 | + " for e in elements:\n", |
| 891 | + " d[e] += 1\n", |
| 892 | + " return d\n", |
883 | 893 | "\n", |
884 | 894 | "random.seed(123)\n", |
885 | | - "rand_ints = [random.randrange(1, 10) for i in range(100)]\n", |
886 | | - "empty_dict = {}\n", |
887 | 895 | "\n", |
888 | 896 | "print('Results for 100 integers in range 1-10') \n", |
889 | | - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", |
890 | | - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", |
891 | | - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", |
892 | | - " \n", |
893 | | - "print('\\nResults for 1000 integers in range 1-10') \n", |
894 | | - "rand_ints = [random.randrange(1, 10) for i in range(1000)]\n", |
895 | | - "empty_dict = {}\n", |
896 | | - "\n", |
897 | | - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", |
898 | | - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", |
899 | | - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", |
| 897 | + "rand_ints = [random.randrange(1, 10) for i in range(100)]\n", |
| 898 | + "%timeit add_element_check1(rand_ints)\n", |
| 899 | + "%timeit add_element_check2(rand_ints)\n", |
| 900 | + "%timeit add_element_except(rand_ints)\n", |
| 901 | + "%timeit add_element_defaultdict(rand_ints)\n", |
| 902 | + "\n", |
| 903 | + "print('\\nResults for 1000 integers in range 1-5') \n", |
| 904 | + "rand_ints = [random.randrange(1, 5) for i in range(1000)]\n", |
| 905 | + "%timeit add_element_check1(rand_ints)\n", |
| 906 | + "%timeit add_element_check2(rand_ints)\n", |
| 907 | + "%timeit add_element_except(rand_ints)\n", |
| 908 | + "%timeit add_element_defaultdict(rand_ints)\n", |
900 | 909 | "\n", |
901 | 910 | "print('\\nResults for 1000 integers in range 1-1000') \n", |
902 | | - "rand_ints = [random.randrange(1, 10) for i in range(1000)]\n", |
903 | | - "empty_dict = {}\n", |
904 | | - "\n", |
905 | | - "%timeit add_element_check1(copy.deepcopy(empty_dict), rand_ints)\n", |
906 | | - "%timeit add_element_check2(copy.deepcopy(empty_dict), rand_ints)\n", |
907 | | - "%timeit add_element_except(copy.deepcopy(empty_dict), rand_ints)\n", |
908 | | - "\n", |
909 | | - "#\n", |
910 | | - "# Python 3.4.0\n", |
911 | | - "# MacOS X 10.9.2\n", |
912 | | - "# 2.5 GHz Intel Core i5\n", |
913 | | - "# 4 GB 1600 Mhz DDR3\n", |
914 | | - "#" |
| 911 | + "rand_ints = [random.randrange(1, 1000) for i in range(1000)]\n", |
| 912 | + "%timeit add_element_check1(rand_ints)\n", |
| 913 | + "%timeit add_element_check2(rand_ints)\n", |
| 914 | + "%timeit add_element_except(rand_ints)\n", |
| 915 | + "%timeit add_element_defaultdict(rand_ints)" |
915 | 916 | ], |
916 | 917 | "language": "python", |
917 | 918 | "metadata": {}, |
|
921 | 922 | "stream": "stdout", |
922 | 923 | "text": [ |
923 | 924 | "Results for 100 integers in range 1-10\n", |
924 | | - "100000 loops, best of 3: 16.6 \u00b5s per loop" |
| 925 | + "10000 loops, best of 3: 24.6 \u00b5s per loop" |
| 926 | + ] |
| 927 | + }, |
| 928 | + { |
| 929 | + "output_type": "stream", |
| 930 | + "stream": "stdout", |
| 931 | + "text": [ |
| 932 | + "\n", |
| 933 | + "10000 loops, best of 3: 26.2 \u00b5s per loop" |
925 | 934 | ] |
926 | 935 | }, |
927 | 936 | { |
928 | 937 | "output_type": "stream", |
929 | 938 | "stream": "stdout", |
930 | 939 | "text": [ |
931 | 940 | "\n", |
932 | | - "100000 loops, best of 3: 17.6 \u00b5s per loop" |
| 941 | + "10000 loops, best of 3: 25.4 \u00b5s per loop" |
933 | 942 | ] |
934 | 943 | }, |
935 | 944 | { |
936 | 945 | "output_type": "stream", |
937 | 946 | "stream": "stdout", |
938 | 947 | "text": [ |
939 | 948 | "\n", |
940 | | - "100000 loops, best of 3: 17.9 \u00b5s per loop" |
| 949 | + "10000 loops, best of 3: 23 \u00b5s per loop" |
941 | 950 | ] |
942 | 951 | }, |
943 | 952 | { |
|
946 | 955 | "text": [ |
947 | 956 | "\n", |
948 | 957 | "\n", |
949 | | - "Results for 1000 integers in range 1-10\n", |
950 | | - "10000 loops, best of 3: 135 \u00b5s per loop" |
| 958 | + "Results for 1000 integers in range 1-5\n", |
| 959 | + "1000 loops, best of 3: 236 \u00b5s per loop" |
951 | 960 | ] |
952 | 961 | }, |
953 | 962 | { |
954 | 963 | "output_type": "stream", |
955 | 964 | "stream": "stdout", |
956 | 965 | "text": [ |
957 | 966 | "\n", |
958 | | - "10000 loops, best of 3: 125 \u00b5s per loop" |
| 967 | + "1000 loops, best of 3: 235 \u00b5s per loop" |
959 | 968 | ] |
960 | 969 | }, |
961 | 970 | { |
962 | 971 | "output_type": "stream", |
963 | 972 | "stream": "stdout", |
964 | 973 | "text": [ |
965 | 974 | "\n", |
966 | | - "10000 loops, best of 3: 105 \u00b5s per loop" |
| 975 | + "1000 loops, best of 3: 207 \u00b5s per loop" |
| 976 | + ] |
| 977 | + }, |
| 978 | + { |
| 979 | + "output_type": "stream", |
| 980 | + "stream": "stdout", |
| 981 | + "text": [ |
| 982 | + "\n", |
| 983 | + "10000 loops, best of 3: 177 \u00b5s per loop" |
967 | 984 | ] |
968 | 985 | }, |
969 | 986 | { |
|
973 | 990 | "\n", |
974 | 991 | "\n", |
975 | 992 | "Results for 1000 integers in range 1-1000\n", |
976 | | - "10000 loops, best of 3: 122 \u00b5s per loop" |
| 993 | + "1000 loops, best of 3: 268 \u00b5s per loop" |
| 994 | + ] |
| 995 | + }, |
| 996 | + { |
| 997 | + "output_type": "stream", |
| 998 | + "stream": "stdout", |
| 999 | + "text": [ |
| 1000 | + "\n", |
| 1001 | + "1000 loops, best of 3: 377 \u00b5s per loop" |
977 | 1002 | ] |
978 | 1003 | }, |
979 | 1004 | { |
980 | 1005 | "output_type": "stream", |
981 | 1006 | "stream": "stdout", |
982 | 1007 | "text": [ |
983 | 1008 | "\n", |
984 | | - "10000 loops, best of 3: 123 \u00b5s per loop" |
| 1009 | + "1000 loops, best of 3: 511 \u00b5s per loop" |
985 | 1010 | ] |
986 | 1011 | }, |
987 | 1012 | { |
988 | 1013 | "output_type": "stream", |
989 | 1014 | "stream": "stdout", |
990 | 1015 | "text": [ |
991 | 1016 | "\n", |
992 | | - "10000 loops, best of 3: 104 \u00b5s per loop" |
| 1017 | + "1000 loops, best of 3: 410 \u00b5s per loop" |
993 | 1018 | ] |
994 | 1019 | }, |
995 | 1020 | { |
|
1000 | 1025 | ] |
1001 | 1026 | } |
1002 | 1027 | ], |
1003 | | - "prompt_number": 13 |
| 1028 | + "prompt_number": 16 |
| 1029 | + }, |
| 1030 | + { |
| 1031 | + "cell_type": "markdown", |
| 1032 | + "metadata": {}, |
| 1033 | + "source": [ |
| 1034 | + "### Conclusion" |
| 1035 | + ] |
1004 | 1036 | }, |
1005 | 1037 | { |
1006 | 1038 | "cell_type": "markdown", |
1007 | 1039 | "metadata": {}, |
1008 | 1040 | "source": [ |
1009 | | - "### Conclusion\n", |
1010 | | - "Interestingly, the `try-except` loop pays off if we have more elements (here: 1000 integers instead of 100) as dictionary keys to check. Also, it doesn't matter much whether the elements exist or do not exist in the dictionary, yet." |
| 1041 | + "We see from the results that the `try-except` variant is faster than then the `if element in my_dict` alternative if we have a low number of unique elements (here: 1000 integers in the range 1-5), which makes sense: the `except`-block is skipped if an element is already added as a key to the dictionary. However, in this case the `collections.defaultdict` has even a better performance. \n", |
| 1042 | + "However, if we are having a relative large number of unique entries(here: 1000 integers in range 1-1000), the `if element in my_dict` approach outperforms the alternative approaches." |
1011 | 1043 | ] |
1012 | 1044 | }, |
1013 | 1045 | { |
|
0 commit comments