forked from deepspeedai/DeepSpeed
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstage3_test.py
More file actions
90 lines (64 loc) · 2.34 KB
/
stage3_test.py
File metadata and controls
90 lines (64 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
import torch
import deepspeed
###################################
# Setup
###################################
class VerboseLinear(torch.nn.Linear):
def __init__(self, **kwargs):
print(f'Begin VerboseLinear.__init__')
super().__init__(**kwargs)
print(f'End VerboseLinear.__init__')
class LinearStack(torch.nn.Module):
def __init__(self, input_dim=2, hidden_dim=4, output_dim=4, num_layers=2):
super().__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.hidden_dim = hidden_dim
self.input_layer = VerboseLinear(in_features=self.input_dim, out_features=self.hidden_dim)
self.layers = torch.nn.ModuleList([
torch.nn.Linear(in_features=self.hidden_dim, out_features=self.hidden_dim, bias=False)
for x in range(num_layers)
])
self.output_layer = torch.nn.Linear(in_features=self.hidden_dim, out_features=self.output_dim)
self.identity = torch.nn.Identity()
def forward(self, x):
x = self.input_layer(x)
for layer in self.layers:
x = layer(x)
x = self.output_layer(x)
x = self.identity(x)
return x
###################################
# DRIVER
###################################
def test_driver():
print()
print('BUILDING MODEL')
with deepspeed.zero.Init():
model = LinearStack()
print()
# parted = [name for (name, p) in model.named_parameters() if p._partitioned]
# not_parted = [name for (name, p) in model.named_parameters() if not p._partitioned]
# print('partitioned: ', parted)
# print('full: ', not_parted)
# print()
model.train()
test_input = torch.rand(1, model.input_dim)
grad_output = torch.rand(1, model.output_dim)
grad_output.requires_grad = False
test_input.requires_grad = False
print()
print('BEGINNING FORWARD')
print()
output = model(test_input)
output.backward(grad_output)
# parted = [name for (name, p) in model.named_parameters() if p._partitioned]
# not_parted = [name for (name, p) in model.named_parameters() if not p._partitioned]
# print('partitioned: ', parted)
# print('full:' , not_parted)
# print()
#samyamspeed.disable()
test_driver()