forked from MariaNattestad/BioCoding_Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpython intro.py
More file actions
484 lines (300 loc) · 14.1 KB
/
python intro.py
File metadata and controls
484 lines (300 loc) · 14.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
########################################################################################
##
## An intro to Python for biology
##
########################################################################################
########################################################################################
######################## Variables and print statements ################################
########################################################################################
# this is a comment, use these to tell people what your code does
mystring = "Homo sapiens" # a string, letters, words, sentences, anything in quotes
myinteger = 4 # an integer
myfloat = 4.0 # a float (a number with a decimal point)
print "This is a string: %s, this is a float: %f, and this is an integer: %d" % (mystring,myfloat,myinteger)
print "We are %s and there are %d bases in our DNA" % (mystring, myinteger)
# use %s when you need a string
# use %d when you need an integer
# use %f when you need a float (a number with a decimal point)
########################################################################################
################################### Basic Math #########################################
########################################################################################
myfloat = 2.0
yourfloat = 3.0
print myfloat/yourfloat
myint = 2
yourint = 3
print myint/yourint
# The anwer is 2/3 = 0.666666667 but Python and many other programming lanuages have a quirk where they think that any math you do with integers can only output another integer.
# Let's explore this and see if we can find tricks to deal with it
myint = 2
yourint = 3
# Which of these work and why?
print float(myint)/yourint # 1
print myint/float(yourint) # 2
print (myint+0.0)/yourint # 3
print myint/yourint + 0.0 # 4
print myint*1.0/yourint # 5
print myint/yourint*1.0 # 6
########################################################################################
########################### Lists, strings, and for loops ##############################
########################################################################################
####### LISTS #######
mylist = [6,3,6,7,2,6,2,9,7,0]
print mylist
print mylist[0]
print mylist[1]
print mylist[2]
###### STRINGS ######
# strings are like lists of letters, so you can get individual letters using the bracket notation
mystring = "ACGT"
print len(mystring) ## => 4
print mystring[0]
print mystring[1]
print mystring[2]
print mystring[3]
#print mystring[4] # Error:"string index out of range"
DNA = "CAACGGGCAATATGTCTCTGTGTG"
print "Your DNA is %d bases long" % (len(DNA))
print DNA[7]
######### lists can contain anything #########
mylist = [1,"anything",2.45,5,10,"Hello world"]
print mylist[4]
####### you can also make lists longer ######
print mylist
mylist.append(56)
print mylist
# this means you can start with an empty list and add to it:
nucleotides = []
nucleotides.append("A")
nucleotides.append("C")
nucleotides.append("G")
nucleotides.append("T")
print nucleotides
########## QUESTION 3: Which of these are correct? Which give errors? ###########
################ indexing lists ###################
mylist = range(10)
print mylist
print mylist[0:10]
print mylist[:]
print mylist[1:10]
print mylist[4:8]
print mylist[3:]
print mylist[0:10:2] # what does the third number do?
print mylist[0:7:3]
print mylist[::-1] # what does this do?
###### Two different versions of a "for loop" #######
for letter in DNA:
print letter
# the variable letter can be anything, try switching it out with tomato
for tomato in DNA:
print tomato
# we only call it letter so it makes sense. This becomes important when you have many more variables. At that point, naming them after fruits suddenly isn't funny anymore.
# you can also make a for loop like this:
# xrange gives a list from 0 to len(DNA), so i loops through that list
for i in xrange(len(DNA)):
print i
# when you do this, you get the bases in the DNA by saying DNA[i]
for i in xrange(len(DNA)):
print "Letter number %d in your DNA is %s" % (i,DNA[i])
# sometimes we really care about the indices, like when we are dealing with 2 or more lists in parallel
DNA = "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC"
RNA = "AGCUUUUCAUUCUGACUGCAACGGGCAAUAUGUCUCUGUGUGGAUUAAAAAAAGAGUGUCUGAUAGCAGC"
length = len(DNA)
for i in xrange(length):
print "DNA: %s, RNA: %s" % (DNA[i],RNA[i])
# xrange can do many things:
for b in xrange(0,4,1): # start=0,stop=4,step=1
print b
for j in xrange(2,10,2):
print j
########## Now you try to print (1,3,5,7,9) using a for loop ###########
########### You can also make empty strings and add to them #############
DNA = "AGCTTTTCATTC"
# just like lists, you can also add to strings:
mystring = str() # makes an empty string, just like mylist = [] makes an empty list
for i in xrange(len(DNA)):
mystring+=DNA[i]
print mystring
# how is this different from making a list?
mylist = []
for i in xrange(len(DNA)):
mylist.append(DNA[i])
print mylist
# if you do make a list and want it to be a string, you can always just do this:
mystring = str()
for item in mylist:
mystring+=item
print mystring
########################################################################################
################################## If statements #######################################
########################################################################################
#What do you think this does?
correct=True # this is another kind of variable, called a boolean: it can only be True or False
if correct==True:
print "You are right!"
else:
print "You are wrong!"
# You can set as many cases as you want using elif (short for "else if")
mystring = "ABCYSGD"
for letter in mystring:
if letter == "A":
print "Alpha"
elif letter == "B":
print "Bravo"
elif letter == "C":
print "Charlie"
elif letter == "D":
print "Delta"
else:
print "Ummm, I don't know what %s is in military speak" % (letter)
# You can also use these if statements for input validation
# If you ask the user for a string of DNA, it would be nice to check that the string looks like DNA before you do a lot of work with it
############### Challenge: Loop through this string of DNA and output a message to the user for every letter that isn't A, T, C, or G
DNA = "AGCUUUUCATKCTGACUUNNNAACGGGCAATAUGTCTCTGTHTGGATTAAAAAAAGAGTGTCMGATAGCAGC"
# This is called "Input validation"
# Now that you can validate a user's input, let's learn how to actually ask the user for input. It's pretty simple:
DNA = raw_input('Enter your DNA: ')
# whatever the user writes before pressing enter gets stored in the variable DNA
######## We can also check multiple things at once #########
letter = "A"
if letter=="A" or letter=="G":
print "Purine"
elif letter=="U" or letter=="T" or letter=="C":
print "Pyrimidine"
else:
print "Not a nucleotide"
########################################################################################
##################################### Functions ########################################
########################################################################################
mylist = [1,2,3,4,100]
#built-in functions:
min(mylist)
max(mylist)
sum(mylist)
# General syntax for making your own functions:
def function_name(argument1,argument2,etc): # don't forget the colon
# do something with the arguments
something = argument1+argument2*etc
return something
result = function_name(3,4,2)
# an example:
def double(mylist):
doubled = []
for item in mylist:
doubled.append(item*2)
return doubled
result = double(mylist)
print result
# What happens if we call double() and pass it a string?
# Try it!
# write a function that takes in a list, loops through it, and adds each item to a string
# we saw some code that did that already, so just turn it into a function
# make sure to return the string at the end
########################################################################################
################################### Dictionaries #######################################
########################################################################################
# a dictionary is very useful, it is a data structure (meaning it keeps data organized)
# it has a bunch of key:value pairs just like a real dictionary, every word has a definition
# you can also think of it as a decoder, every key is a code, and every value
# stored with the code is what that code means
my_dictionary = {"key":"value","secondkey":"secondvalue","thirdkey":"thirdvalue"}
print my_dictionary["secondkey"]
print my_dictionary.keys()
print my_dictionary.values()
for key in my_dictionary:
print "Key: %s, Value: %s" % (key,my_dictionary[key])
# notice that the order of the dictionary isn't how we started it
# dictionaries are automatically sorted so they are really fast to search
# we use a key in a dictionary just like we normally use an index in a list or array
# the useful part is when we use the dictionary as a decoder:
mystring = "ABCYSGD"
military = {"A":"Alpha", "B":"Bravo", "C":"Charlie", "D":"Delta"}
for letter in mystring:
print military.get(letter,"Ummm, I don't know what %s is in military speak" % letter)
# remember we did the same thing with if statements before?
# military is a dictionary: the perfect decoder
# .get() is a method that all dictionaries have when they are created,
# the first argument is the key (or the code) and the second is the default
# the default is when the key isn't found
# military["Y"] # gives an error: you can try it
# but military.get("Y","?") # doesn't crash your application
# DNA has a recipe for proteins with the code being each set of three nucleotides
# then this is made into RNA (all the T's become U's)
# (bases or letters: AGA, ACU, GUU, etc.)
# the code "UUA" means the amino acid "L" or "Leucine"
# this website has all the codes, can you make them into a dictionary?
# https://www.manylabs.org/file/lessonMedia/69/geneticCode.png
# Hint: we are gonna use this for the third project!
########################################################################################
############################## File input/output #####################################
########################################################################################
############## INPUT ###############
# open a file
f = open("filename",'r') # 'r' means read
for line in f:
print line
f.close()
############## OUTPUT ###############
f2 = open("filename",'w') # 'w' means write, so you overwrite the file
f2.write("Hello world")
f2.write("\n") # \n means newline, as in pressing enter
for i in xrange(10):
f2.write("%d\t" % i) # \t means tab
f2.close()# close the file safely to make sure the last part you wrote is saved
# once f2 is closed, you can reuse the name
f2 = open("filename2",'a') # 'a' means append, so you add to the file each time
f2.write("Hello world")
f2.write("\n") # \n means newline, as in pressing enter
for i in xrange(10):
f2.write("%d\t" % i) # \t means tab
f2.close()
########################################################################################
###################################### Numpy #########################################
########################################################################################
# Try to find a function that would take the average, like mean(), average(), or avg() can you find one?
# If you want to do something simple and can't find a function for it, you can search online.
# Many of the simple math and science functions are in a module named numpy:
import numpy as np
# np is a common nickname for numpy, so now when you want to use it, you say:
print np.mean(mylist)
# Modules are packages of functions and other useful things, including something called an array
# arrays are a bit like lists, but they are only for numbers,
# that way you can do math on an entire array at once instead of looping through the list and doubling every element like we did before
mylist = [1,2,3,4,100]
myarray = np.array(mylist) # this takes mylist and makes it into an array, once again np is just the nickname for numpy
print "doubling a list:"
print mylist*2
print "doubling an array:"
print myarray*2
# many things we do are easier with arrays, including anything involving math
# An important thing to keep in mind is how to start an empty array if you don't want to just make a list into an array
# if you have a sentence, and you want to save the lengths of all the words:
sentence = "if you have a sentence, and you want to save the lengths of all the words"
# one of many built-in tricks:
words = sentence.split()
# this one looks like a function but flipped around. This is because it's a built-in method for a string.
# sentence is a string, and all strings have a bunch of methods that you can call by doing sentence.methodname()
# Methods are functions that belong to objects, so split() is a method that all strings have when they are created, and you can use them whenever you need to
# Other functions we made don't belong to any particular object, so they are very flexible.
# using a list:
lengths=[]
for word in words:
lengths.append(len(word))
print lengths
# you can then convert this to an array:
lengths=np.array(lengths)
# using an array:
lengths = np.zeros(len(words))
for i in xrange(len(words)):
lengths[i]=len(words[i])
print lengths
# using the array seems harder in this case, but sometimes it might come in handy.
# I usually start with lists and convert them to arrays right before I have to do math on them
######## useful numpy functions ###########
print np.std(myarray)
print np.random.rand(10) # multiply, add, or round to get the random numbers you need
# I want 100 random numbers from 10 up to and including 15, but only integers
print np.floor(np.random.rand(100)*6+10)
# np.floor() rounds down, you can also use np.ceil() to round up, or np.around() to round up or down, whichever is closest (like you learn in school)
# There are tons of numpy modules, just search online.
# Anything you have ever seen in a math class will probably exist in numpy