Skip to content
Open
Prev Previous commit
Next Next commit
support svd
  • Loading branch information
pingbowen23 committed Apr 29, 2024
commit f93952c6d7477f4913b4ae5a7a7a400eabe6bdf3
51 changes: 37 additions & 14 deletions bitdelta/diff2.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def compress_submodule(name, subname, module, submodule):
setattr(module, subname, compressed)

# TODO: 根据thresh 选择压缩比例
param_dict = dict()
for name, module in finetuned_model.named_modules():

if "vision" in name:
continue

Expand Down Expand Up @@ -162,9 +162,32 @@ def compress_submodule(name, subname, module, submodule):
mask , coeff = compressed.mask, compressed.coeff
delta = (unpack(mask)*2-1) * coeff
delta = delta.T
elif args.choice == "svd":
dim = 1024

if "mlp" in name:
dim = int(1024 * 1.45)

U , S , V = decomposition((f - p).clone().detach(),dim=dim)
param_dict[f"{name}.{subname}" + ".base"] = p
param_dict[f"{name}.{subname}" + ".U"] = U.to(p.dtype)
param_dict[f"{name}.{subname}" + ".S"] = S.to(p.dtype)
param_dict[f"{name}.{subname}" + ".V"] = V.to(p.dtype)
# if "llava" in args.finetuned_model.lower():
# U , S , V = decomposition((f - p).clone().detach(),dim=1024)
# param_dict[f"{name}.{subname}" + ".base"] = p
# param_dict[f"{name}.{subname}" + ".U"] = U.to(p.dtype)
# param_dict[f"{name}.{subname}" + ".S"] = S.to(p.dtype)
# param_dict[f"{name}.{subname}" + ".V"] = V.to(p.dtype)

finetuned_model.get_submodule(f"{name}.{subname}").weight.copy_(p.to(p.dtype) + delta.to(p.dtype))
# import pdb ; pdb.set_trace()

# if "llava" in args.finetuned_model.lower():
# torch.save(param_dict, "/home/pingbowen/workspace/delta-compression/saved_model/llava_svd.pt")
if args.choice == "svd":
torch.save(param_dict, args.svd_dict)


finetuned_model.to(torch.bfloat16)
finetuned_model.save_pretrained(save_dir)

Expand Down Expand Up @@ -231,26 +254,26 @@ def decomposition(masked_input_tensor,dim=None,name=None,attn_outlier=0.1,mlp_ou
if dim is not None:
U , S , V = U[:, :dim],S[:dim] ,V[:, :dim]

if "self_attn" in name:
outlier_U = get_outlier(U[:,64:], percent=attn_outlier)
outlier_V = get_outlier(V[:,64:], percent=attn_outlier)
# if "self_attn" in name:
# outlier_U = get_outlier(U[:,64:], percent=attn_outlier)
# outlier_V = get_outlier(V[:,64:], percent=attn_outlier)

set_zero(U[:,64:], outlier_U)
# import pdb; pdb.set_trace()
set_zero(V[:,64:], outlier_V)
# set_zero(U[:,64:], outlier_U)
# # import pdb; pdb.set_trace()
# set_zero(V[:,64:], outlier_V)

else:
outlier_U = get_outlier(U[:,128:], percent=mlp_outlier)
outlier_V = get_outlier(V[:,128:], percent=mlp_outlier)
# else:
# outlier_U = get_outlier(U[:,128:], percent=mlp_outlier)
# outlier_V = get_outlier(V[:,128:], percent=mlp_outlier)

set_zero(U[:,128:], outlier_U)
set_zero(V[:,128:], outlier_V)
# set_zero(U[:,128:], outlier_U)
# set_zero(V[:,128:], outlier_V)

# max_val, min_val, mean_abs_val = round(torch.max(U).item(),4), round(torch.min(U).item(),4), round(torch.mean(torch.abs(U)).item(),4)

# print(f"max_val {max_val} pos_min {round(torch.min(outlier[outlier > 0]).item(),4)} mean_abs_val {mean_abs_val} ratio {round(torch.min(outlier[outlier > 0]).item() / mean_abs_val,4)}")
# import pdb; pdb.set_trace()
return U, S, V , outlier_U, outlier_V
return U, S, V # , outlier_U, outlier_V

def save_full_model(base_model_name, finetuned_model_name, diff_dir, save_dir, device,layers=None,ori_diff=None):
base_model = get_model(base_model_name, device)
Expand Down
1 change: 0 additions & 1 deletion bitdelta/train2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
finetuned_model = get_model(args.finetuned_model, args.finetuned_model_device, args.finetuned_model_memory_map)


# import pdb;pdb.set_trace()
print(f"compressing diff...")
compress_diff(base_model, finetuned_model, args.save_dir,args)

Expand Down
11 changes: 7 additions & 4 deletions bitdelta/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
from transformers import AutoConfig, AutoModelForCausalLM,AutoTokenizer
from accelerate import infer_auto_device_map, init_empty_weights
import os
from llava.model import *
from llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
try:
from llava.model import *
from llava.constants import DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
except: pass

def load_llava(path,device):
tokenizer = AutoTokenizer.from_pretrained(path, use_fast=False)
Expand Down Expand Up @@ -44,14 +46,15 @@ def load_llava(path,device):

def parse_args():
parser = argparse.ArgumentParser(description="BitDelta")

#
# models
parser.add_argument(
"--finetuned_model", type=str, default="lmsys/vicuna-7b-v1.5-16k"
)
parser.add_argument("--base_model", type=str, default="meta-llama/Llama-2-7b-hf")

# train params
parser.add_argument("--svd_dict", type=str, default="")
parser.add_argument("--dataset_name", type=str, default="c4")
parser.add_argument("--subset", type=str, default="en")
parser.add_argument("--data_dir", type=str, default="en")
Expand All @@ -66,7 +69,7 @@ def parse_args():
parser.add_argument("--train", action="store_true")
parser.add_argument("--attn_outlier", type=float,default=1e-4)
parser.add_argument("--mlp_outlier", type=float,default=1e-4)
parser.add_argument("--choice", type=str,choices=['mix','bit','rank'],default=None)
parser.add_argument("--choice", type=str,choices=['mix','bit','svd'],default=None)

# device management
parser.add_argument("--base_model_device", type=str, default="0")
Expand Down
28 changes: 18 additions & 10 deletions run.sh
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
MODEL_SAVE_DIR=./../save/test
MODEL_SAVE_DIR=/home/pingbowen/workspace/delta-compression/save/test

mkdir -p $MODEL_SAVE_DIR

values=(0.05 0.2 0.4 0.5 0.75)

# for value in ${values[@]}
# do
CUDA_VISIBLE_DEVICES=5,6 python \
pretrained_model=(/data/public/opensource_models/codellama/codellama-7b-python-hf/ /data/public/opensource_models/meta-llama/Llama-2-7b-hf/)
finetuned_model=(/data/groups/QY_LLM_Other/OSS_Code_LLM/Magicoder-S-CL-7B/ /data/public/opensource_models/meta-llama/Llama-2-7b-chat-hf/)
svd_dict=(/home/pingbowen/workspace/delta-compression/saved_model/magicoder_svd.pt /home/pingbowen/workspace/delta-compression/saved_model/llama_chat_svd.pt)
for (( i=0; i<2; i++ )); do

gpu0=$((2 * i))
gpu1=$((2 * i + 1))

CUDA_VISIBLE_DEVICES="$gpu0,$gpu1" python \
bitdelta/train2.py \
--base_model /data/public/opensource_models/meta-llama/Llama-2-7b-hf/ \
--finetuned_model /data/public/opensource_models/WizardLM/WizardMath-7B-V1.0/ \
--base_model ${pretrained_model[$i]} \
--finetuned_model ${finetuned_model[$i]} \
--save_dir $MODEL_SAVE_DIR \
--batch_size 4 \
--num_steps 200 \
--save_full_model True \
--attn_outlier 0.2 \
--mlp_outlier 0.1 \
--choice bit
--svd_dict ${svd_dict[$i]} \
--choice svd &
# &> test.log
# done

# /data/public/opensource_models/meta-llama/Llama-2-7b-chat-hf/
done
wait
# /data/public/opensource_models/codellama/codellama-7b-python-hf/ /data/groups/QY_LLM_Other/OSS_Code_LLM/Magicoder-S-CL-7B/
# /home/pingbowen/models/vicuna-13b-v1.5 , /home/pingbowen/models/Llava-v1.5
# /data/public/opensource_models/WizardLM/WizardMath-7B-V1.0/
# /data/public/opensource_models/meta-llama/Llama-2-7b-hf/ /data/public/opensource_models/meta-llama/Llama-2-7b-chat-hf/
13 changes: 8 additions & 5 deletions run_tailor.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
python \
tailor.py \
--finetuned_model_name /data/public/opensource_models/meta-llama/Llama-2-7b-chat-hf \
--save_dir /home/pingbowen/workspace/delta-compression/BitDelta/tailor_model/7b_chat \
CUDA_VISIBLE_DEVICES=2,3 python tailor.py \
--pretrained_model_name /data/public/opensource_models/meta-llama/Llama-2-7b-hf/ \
--finetuned_model_name /data/public/opensource_models/meta-llama/Llama-2-7b-chat-hf/\
--dim 128 \
--scale_factor 1.45 \
--save_dir /home/pingbowen/save/Llama-2-7b-chat_svd


# &

# /data/public/opensource_models/codellama/codellama-7b-python-hf/
# /data/groups/QY_LLM_Other/OSS_Code_LLM/Magicoder-S-CL-7B/
# python3 tailor.py \
# --finetuned_model_name /data/public/wangshuo/exp/ft-en-metameth-llama-2-7b/ckpts/checkpoints/epoch_2_hf \
# --save_dir /home/pingbowen/workspace/delta-compression/BitDelta/tailor_model/math_lora_7b \
163 changes: 26 additions & 137 deletions tailor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,26 @@
import re
import random
import numpy as np
import math

parser = argparse.ArgumentParser()
parser.add_argument('--finetuned_model_name', type=str, required=True, help='finetuned model name')
parser.add_argument('--save_dir', type=str, required=True, help='finetuned model name')
parser.add_argument('--pretrained_model_name', type=str, help='pretrained model name')
parser.add_argument('--finetuned_model_name', type=str, help='finetuned model name')
parser.add_argument('--save_dir', type=str, help='finetuned model name')
parser.add_argument('--dim', type=int, help='finetuned model name')
parser.add_argument('--scale_factor', type=float, default=1.45, help='finetuned model name')
args = parser.parse_args()

pretrained_model_name = "/data/public/opensource_models/meta-llama/Llama-2-7b-hf"
device = "cuda" if torch.cuda.is_available() else "cpu"

finetuned_model_name = args.finetuned_model_name # /data/public/wangshuo/exp/ft-en-magicoder-llama-2-7b/ckpts/checkpoints/epoch_2_hf
pretrained_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=pretrained_model_name,
device_map="cpu")
pretrained_model_name = args.pretrained_model_name

finetuned_model_name = args.finetuned_model_name
pretrained_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=pretrained_model_name,torch_dtype=torch.bfloat16).to(device)
pretrained_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=pretrained_model_name)
finetuned_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=finetuned_model_name,
device_map="cpu")

finetuned_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=finetuned_model_name,torch_dtype=torch.bfloat16).to(device)
finetuned_tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=finetuned_model_name)

def set_random_seed(seed: int = 0):
"""
set random seed
Expand All @@ -46,145 +50,30 @@ def set_random_seed(seed: int = 0):
torch.backends.cudnn.benchmark = False

set_random_seed(seed=0)
# scale_factor = finetuned_model.config.intermediate_size / finetuned_model.config.hidden_size


scale_factor = 1.45
def get_param_names_to_merge(input_param_names: list, exclude_param_names_regex: list):
"""
get the names of parameters that need to be merged
:param input_param_names: list, names of input parameters
:param exclude_param_names_regex: list, regular expression of names of parameters that need to be excluded
:return:
"""
param_names_to_merge = []
for param_name in input_param_names:
exclude = any([re.match(exclude_pattern, param_name) for exclude_pattern in exclude_param_names_regex])
if not exclude:
param_names_to_merge.append(param_name)
return param_names_to_merge


# import pdb
# pdb.set_trace()
scale_factor = args.scale_factor

def decomposition(masked_input_tensor,dim):

U , S , V = torch.svd(masked_input_tensor)
U , S , V = torch.svd(masked_input_tensor.to(torch.float32))
U , S , V = U[:, :dim],S[:dim],V[:, :dim]
# return torch.mm(U, torch.diag(S)), V.t()
return torch.mm(U, torch.mm(torch.diag(S), V.t())) #return lora_B, lora_A
return U @ torch.diag(S) @ V.t() #return lora_B, lora_A

# dim = 1024
dim = 128
# dim = 64
print("----------------------dim: ",dim)
print("----------------------dim: ",dim)
print("----------------------dim: ",dim)
print("----------------------dim: ",dim)
print("----------------------dim: ",dim)
print("----------------------dim: ",dim)

peft_dict = {}
malign_dict = {}
other_dict = {}

task_vector_param_dict = {}
pretrained_param_dict = {param_name: param_value for param_name, param_value in pretrained_model.named_parameters()}
finetuned_param_dict = {param_name: param_value for param_name, param_value in finetuned_model.named_parameters()}
param_names_to_merge = get_param_names_to_merge(input_param_names=list(pretrained_param_dict.keys()), exclude_param_names_regex=[])
with torch.no_grad():
for param_name in param_names_to_merge:
if "self_attn" in param_name or "mlp" in param_name:
# import pdb ;pdb.set_trace()
if "mlp" in param_name:
dim = math.ceil(dim * scale_factor)

delta = decomposition(finetuned_param_dict[param_name] - pretrained_param_dict[param_name],dim=dim)
finetuned_model.get_submodule(param_name.replace(".weight", "")).weight.copy_(pretrained_model.get_submodule(param_name.replace(".weight", "")).weight + delta)
# print(f"name {param_name} data {task_vector_param_dict[param_name]} ")

for k,v in finetuned_model.state_dict().items():
dim = args.dim
if ".weight" in k:
if "self_attn" in k or "mlp" in k:
if "mlp" in k:
dim = int(dim * scale_factor)
p = pretrained_model.get_submodule(k.replace(".weight", "")).weight
delta = decomposition(v - p,dim).to(v.dtype)
# import pdb; pdb.set_trace()
finetuned_model.get_submodule(k.replace(".weight", "")).weight.copy_(p + delta)

finetuned_model.save_pretrained(save_directory=args.save_dir)
finetuned_tokenizer.save_pretrained(save_directory=args.save_dir)

# for param_name, param_value in tqdm(task_vector_param_dict.items()):
# if "self_attn" in param_name or "mlp" in param_name:
# lora_B, lora_A = decomposition(param_value,dim=dim)
# lora_A = lora_A * (dim/16) ###补偿scaling, 以后的alpha可以统一为16
# peft_key = "base_model.model." + param_name.split(".weight")[0]
# print(peft_key+".lora_A.weight")
# peft_dict[peft_key+".lora_A.weight"] = lora_A.contiguous()
# peft_dict[peft_key+".lora_B.weight"] = lora_B.contiguous()


# other_dict = {k: v.to(torch.float16) for k, v in other_dict.items()}

# other_para_path = "/home/wanghanqing/projects/exp/mAlign_exp/lang_LoRAs/peft_ver/trim_lora/code/other_param"
# torch.save(other_dict, os.path.join(other_para_path, "other.pt"))
# torch.save(other_dict, os.path.join(other_para_path, "pretrain_other.pt"))


peft_dict = {k: v.to(torch.float16) for k, v in peft_dict.items()}

# layernum = 40
# for lnum in range(layernum):
# peft_pfx = f"base_model.model.model.layers.{lnum}"
# delta_pfx = f"encoder.layers.{lnum}"
# malign_dict[f"{delta_pfx}.self_att.self_attention.project_q_lora.lora_A.weight"] = peft_dict[f"{peft_pfx}.self_attn.q_proj.lora_A.weight"].contiguous()
# malign_dict[f"{delta_pfx}.self_att.self_attention.project_q_lora.lora_B.weight"] = peft_dict[f"{peft_pfx}.self_attn.q_proj.lora_B.weight"].contiguous()
# malign_dict[f"{delta_pfx}.self_att.self_attention.project_k_lora.lora_A.weight"] = peft_dict[f"{peft_pfx}.self_attn.k_proj.lora_A.weight"].contiguous()
# malign_dict[f"{delta_pfx}.self_att.self_attention.project_k_lora.lora_B.weight"] = peft_dict[f"{peft_pfx}.self_attn.k_proj.lora_B.weight"].contiguous()
# malign_dict[f"{delta_pfx}.self_att.self_attention.project_v_lora.lora_A.weight"] = peft_dict[f"{peft_pfx}.self_attn.v_proj.lora_A.weight"].contiguous()
# malign_dict[f"{delta_pfx}.self_att.self_attention.project_v_lora.lora_B.weight"] = peft_dict[f"{peft_pfx}.self_attn.v_proj.lora_B.weight"].contiguous()
# malign_dict[f"{delta_pfx}.self_att.self_attention.attention_out_lora.lora_A.weight"] = peft_dict[f"{peft_pfx}.self_attn.o_proj.lora_A.weight"].contiguous()
# malign_dict[f"{delta_pfx}.self_att.self_attention.attention_out_lora.lora_B.weight"] = peft_dict[f"{peft_pfx}.self_attn.o_proj.lora_B.weight"].contiguous()
# malign_dict[f"{delta_pfx}.ffn.ffn.w_in.w_0_lora.lora_A.weight"] = peft_dict[f"{peft_pfx}.mlp.gate_proj.lora_A.weight"].contiguous()
# malign_dict[f"{delta_pfx}.ffn.ffn.w_in.w_0_lora.lora_B.weight"] = peft_dict[f"{peft_pfx}.mlp.gate_proj.lora_B.weight"].contiguous()
# malign_dict[f"{delta_pfx}.ffn.ffn.w_in.w_1_lora.lora_A.weight"] = peft_dict[f"{peft_pfx}.mlp.up_proj.lora_A.weight"].contiguous()
# malign_dict[f"{delta_pfx}.ffn.ffn.w_in.w_1_lora.lora_B.weight"] = peft_dict[f"{peft_pfx}.mlp.up_proj.lora_B.weight"].contiguous()
# malign_dict[f"{delta_pfx}.ffn.ffn.w_out_lora.lora_A.weight"] = peft_dict[f"{peft_pfx}.mlp.down_proj.lora_A.weight"].contiguous()
# malign_dict[f"{delta_pfx}.ffn.ffn.w_out_lora.lora_B.weight"] = peft_dict[f"{peft_pfx}.mlp.down_proj.lora_B.weight"].contiguous()





malign_dict = {k: v.to(torch.float16) for k, v in malign_dict.items()}

# import pdb
# pdb.set_trace()

output_peft_path = "/home/wanghanqing/projects/exp/mAlign_exp/lang_LoRAs/peft_ver/trim_lora/dim256_2/code"
output_malign_path = "/home/wanghanqing/projects/exp/mAlign_exp/mAlign_LoRAs/trim_lora/dim256_2/code"

# torch.save(peft_dict, os.path.join(output_peft_path, "adapter_model.bin"))
# torch.save(malign_dict, os.path.join(output_malign_path, "lora.pt"))


print("--end--")





# num , masked_input_tensor = 0,input_tensor
# if "self_attn" in param_name or "mlp" in param_name:
# if "mlp" in param_name:
# dim = math.ceil(dim * scale_factor)
# thresh_hold = 0.06752
# num, masked_input_tensor = decomposition(input_tensor,dim=dim)





# for param_name, param_value in finetuned_model.named_parameters():
# if param_name in masked_param_dict:
# param_value.data.copy_(masked_param_dict[param_name])

# logger.info(f"saving model at {save_model_path}...")
# os.makedirs(save_model_path, exist_ok=True)
# finetuned_model.save_pretrained(save_directory=save_model_path)
# finetuned_tokenizer.save_pretrained(save_directory=save_model_path)
# logger.info(f"model is saved")
Loading