diffTableTool/table_diff.py at main · eagleLiu82/diffTableTool

History

1755 lines (1498 loc) · 73.1 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

#!/usr/bin/env python3

# -*- coding: utf-8 -*-

import argparse

import sqlite3

from typing import List, Optional, Dict, Any, Union

from abc import ABC, abstractmethod

import logging

import sys

import os

import importlib

# 新增 Union 类型用于 run_comparison 参数类型提示

# 设置日志

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

logger = logging.getLogger(__name__)

class DatabaseAdapter(ABC):

"""数据库适配器抽象基类"""

@abstractmethod

def connect(self, **kwargs):

"""建立数据库连接"""

pass

@abstractmethod

def get_table_fields(self, table_name: str) -> List[str]:

"""获取表字段列表"""

pass

@abstractmethod

def execute_query(self, query: str):

"""执行查询"""

pass

@abstractmethod

def close(self):

"""关闭数据库连接"""

pass

def get_primary_keys(self, table_name: str) -> List[str]:

"""获取表的主键字段列表"""

return [] # 默认实现，子类可以重写

class SQLiteAdapter(DatabaseAdapter):

"""SQLite数据库适配器"""

def __init__(self):

self.connection = None

def connect(self, **kwargs):

db_path = kwargs.get('db_path')

logger.info(f"连接到SQLite数据库: {db_path}")

self.connection = sqlite3.connect(db_path)

return self.connection

def get_table_fields(self, table_name: str) -> List[str]:

logger.info(f"获取SQLite表 {table_name} 的字段")

cursor = self.connection.execute(f"PRAGMA table_info({table_name})")

fields = [row[1] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的字段: {fields}")

return fields

def get_primary_keys(self, table_name: str) -> List[str]:

"""获取SQLite表的主键字段"""

logger.info(f"获取SQLite表 {table_name} 的主键")

cursor = self.connection.execute(f"PRAGMA table_info({table_name})")

primary_keys = [row[1] for row in cursor.fetchall() if row[5] > 0] # pk列大于0表示是主键

logger.info(f"表 {table_name} 的主键: {primary_keys}")

return primary_keys

def execute_query(self, query: str):

logger.info(f"执行SQLite查询: {query}")

return self.connection.execute(query)

def close(self):

if self.connection:

logger.info("关闭SQLite数据库连接")

self.connection.close()

class MySQLAdapter(DatabaseAdapter):

"""MySQL数据库适配器"""

def __init__(self):

self.connection = None

def connect(self, **kwargs):

try:

import mysql.connector

except ImportError:

raise ImportError("需要安装mysql-connector-python库: pip install mysql-connector-python")

host = kwargs.get('host', 'localhost')

port = kwargs.get('port', 3306)

user = kwargs.get('user')

password = kwargs.get('password')

database = kwargs.get('database')

logger.info(f"连接到MySQL数据库: {host}:{port}, 用户: {user}, 数据库: {database}")

self.connection = mysql.connector.connect(

host=host,

port=port,

user=user,

password=password,

database=database,

buffered=True # 添加buffered参数以避免游标问题

)

return self.connection

def get_table_fields(self, table_name: str) -> List[str]:

logger.info(f"获取MySQL表 {table_name} 的字段")

cursor = self.connection.cursor(buffered=True) # 使用buffered游标

cursor.execute(f"DESCRIBE {table_name}")

fields = [row[0] for row in cursor.fetchall()]

cursor.close() # 关闭游标

logger.info(f"表 {table_name} 的字段: {fields}")

return fields

def get_primary_keys(self, table_name: str) -> List[str]:

"""获取MySQL表的主键字段"""

logger.info(f"获取MySQL表 {table_name} 的主键")

cursor = self.connection.cursor(buffered=True) # 使用buffered游标

cursor.execute(f"SHOW KEYS FROM {table_name} WHERE Key_name = 'PRIMARY'")

primary_keys = [row[4] for row in cursor.fetchall()] # Column_name列

cursor.close() # 关闭游标

logger.info(f"表 {table_name} 的主键: {primary_keys}")

return primary_keys

def execute_query(self, query: str):

logger.info(f"执行MySQL查询: {query}")

cursor = self.connection.cursor(buffered=True) # 使用buffered游标

cursor.execute(query)

return cursor

def close(self):

if self.connection:

logger.info("关闭MySQL数据库连接")

self.connection.close()

class PostgreSQLAdapter(DatabaseAdapter):

"""PostgreSQL数据库适配器"""

def __init__(self):

self.connection = None

def connect(self, **kwargs):

try:

import psycopg2

except ImportError:

raise ImportError("需要安装psycopg2库: pip install psycopg2")

host = kwargs.get('host', 'localhost')

port = kwargs.get('port', 5432)

user = kwargs.get('user')

password = kwargs.get('password')

database = kwargs.get('database')

logger.info(f"连接到PostgreSQL数据库: {host}:{port}, 用户: {user}, 数据库: {database}")

self.connection = psycopg2.connect(

host=host,

port=port,

user=user,

password=password,

database=database

)

return self.connection

def get_table_fields(self, table_name: str) -> List[str]:

logger.info(f"获取PostgreSQL表 {table_name} 的字段")

cursor = self.connection.cursor()

# 首先尝试使用当前数据库和模式

try:

cursor.execute("""

SELECT column_name

FROM information_schema.columns

WHERE table_name = %s AND table_catalog = %s

ORDER BY ordinal_position

""", (table_name, self.connection.info.dbname))

fields_result = cursor.fetchall()

if fields_result:

fields = [row[0] for row in fields_result]

logger.info(f"通过information_schema获取到表 {table_name} 的字段: {fields}")

return fields

except Exception as e:

logger.warning(f"通过information_schema获取字段失败: {e}")

# 如果上面的方法失败，尝试直接查询pg_attribute

try:

cursor.execute("""

SELECT a.attname AS column_name

FROM pg_class c

JOIN pg_attribute a ON a.attrelid = c.oid

JOIN pg_type t ON a.atttypid = t.oid

LEFT JOIN pg_attrdef d ON d.adrelid = c.oid AND d.adnum = a.attnum

LEFT JOIN pg_namespace n ON n.oid = c.relnamespace

WHERE c.relname = %s

AND a.attnum > 0

AND NOT a.attisdropped

ORDER BY a.attnum

""", (table_name,))

fields = [row[0] for row in cursor.fetchall()]

if fields:

logger.info(f"通过pg_attribute获取到表 {table_name} 的字段: {fields}")

return fields

except Exception as e:

logger.warning(f"通过pg_attribute获取字段失败: {e}")

# 如果都失败了，抛出表不存在的异常

raise ValueError(f"表 '{table_name}' 不存在")

def get_primary_keys(self, table_name: str) -> List[str]:

"""获取PostgreSQL表的主键字段"""

logger.info(f"获取PostgreSQL表 {table_name} 的主键")

try:

cursor = self.connection.cursor()

cursor.execute("""

SELECT a.attname

FROM pg_index i

JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)

WHERE i.indrelid = %s::regclass AND i.indisprimary

""", (table_name,))

primary_keys = [row[0] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的主键: {primary_keys}")

return primary_keys

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "does not exist" in error_msg or "not found" in error_msg or "unknown" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 主键信息时出错: {str(e)}")

def execute_query(self, query: str):

logger.info(f"执行PostgreSQL查询: {query}")

cursor = self.connection.cursor()

cursor.execute(query)

return cursor

def close(self):

if self.connection:

logger.info("关闭PostgreSQL数据库连接")

self.connection.close()

class OracleAdapter(DatabaseAdapter):

"""Oracle数据库适配器"""

def __init__(self):

self.connection = None

def connect(self, **kwargs):

try:

import oracledb

except ImportError:

raise ImportError("需要安装oracledb库: pip install oracledb")

host = kwargs.get('host', 'localhost')

port = kwargs.get('port', 1521)

user = kwargs.get('user')

password = kwargs.get('password')

database = kwargs.get('database')

service_name = kwargs.get('service_name')

logger.info(f"连接到Oracle数据库: {host}:{port}, 用户: {user}, 数据库: {database}")

# 构建DSN

if service_name:

dsn = oracledb.makedsn(host, port, service_name=service_name)

else:

dsn = oracledb.makedsn(host, port, sid=database)

self.connection = oracledb.connect(

user=user,

password=password,

dsn=dsn

)

return self.connection

def get_table_fields(self, table_name: str) -> List[str]:

logger.info(f"获取Oracle表 {table_name} 的字段")

try:

cursor = self.connection.cursor()

# 处理可能包含模式的表名

if '.' in table_name:

parts = table_name.split('.')

if len(parts) == 2:

owner, table = parts

else:

owner, table = None, table_name

else:

owner, table = None, table_name

if owner:

cursor.execute("""

SELECT column_name

FROM all_tab_columns

WHERE table_name = UPPER(%s) AND owner = UPPER(%s)

ORDER BY column_id

""", (table, owner))

else:

cursor.execute("""

SELECT column_name

FROM user_tab_columns

WHERE table_name = UPPER(%s)

ORDER BY column_id

""", (table,))

fields = [row[0] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的字段: {fields}")

return fields

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "not exist" in error_msg or "not found" in error_msg or "invalid" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 字段信息时出错: {str(e)}")

def get_primary_keys(self, table_name: str) -> List[str]:

"""获取Oracle表的主键字段"""

logger.info(f"获取Oracle表 {table_name} 的主键")

try:

cursor = self.connection.cursor()

# 处理可能包含模式的表名

if '.' in table_name:

parts = table_name.split('.')

if len(parts) == 2:

owner, table = parts

else:

owner, table = None, table_name

else:

owner, table = None, table_name

if owner:

cursor.execute("""

SELECT cols.column_name

FROM all_constraints cons

JOIN all_cons_columns cols ON cons.constraint_name = cols.constraint_name AND cons.owner = cols.owner

WHERE cols.table_name = UPPER(%s) AND cols.owner = UPPER(%s) AND cons.constraint_type = 'P'

ORDER BY cols.position

""", (table, owner))

else:

cursor.execute("""

SELECT column_name

FROM user_cons_columns

WHERE table_name = UPPER(%s) AND constraint_name IN (

SELECT constraint_name

FROM user_constraints

WHERE constraint_type = 'P'

)

ORDER BY position

""", (table,))

primary_keys = [row[0] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的主键: {primary_keys}")

return primary_keys

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "not exist" in error_msg or "not found" in error_msg or "invalid" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 主键信息时出错: {str(e)}")

def execute_query(self, query: str):

logger.info(f"执行Oracle查询: {query}")

cursor = self.connection.cursor()

cursor.execute(query)

return cursor

def close(self):

if self.connection:

logger.info("关闭Oracle数据库连接")

self.connection.close()

class MSSQLAdapter(DatabaseAdapter):

"""MSSQL数据库适配器"""

def __init__(self):

self.connection = None

def connect(self, **kwargs):

try:

import pymssql

except ImportError:

raise ImportError("需要安装pymssql库: pip install pymssql")

host = kwargs.get('host', 'localhost')

port = kwargs.get('port', 1433)

user = kwargs.get('user')

password = kwargs.get('password')

database = kwargs.get('database')

# 构建服务器地址

if port != 1433:

server = f"{host}:{port}"

else:

server = host

logger.info(f"连接到MSSQL数据库: {server}, 用户: {user}, 数据库: {database}")

self.connection = pymssql.connect(

server=server,

user=user,

password=password,

database=database

)

return self.connection

def get_table_fields(self, table_name: str) -> List[str]:

logger.info(f"获取MSSQL表 {table_name} 的字段")

try:

cursor = self.connection.cursor()

# 处理可能包含模式的表名

if '.' in table_name:

parts = table_name.split('.')

if len(parts) == 2:

schema, table = parts

else:

schema, table = 'dbo', table_name

else:

schema, table = 'dbo', table_name

cursor.execute("""

SELECT COLUMN_NAME

FROM INFORMATION_SCHEMA.COLUMNS

WHERE TABLE_NAME = %s AND TABLE_SCHEMA = %s

ORDER BY ORDINAL_POSITION

""", (table, schema))

fields = [row[0] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的字段: {fields}")

return fields

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "not exist" in error_msg or "not found" in error_msg or "invalid" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 字段信息时出错: {str(e)}")

def get_primary_keys(self, table_name: str) -> List[str]:

"""获取MSSQL表的主键字段"""

logger.info(f"获取MSSQL表 {table_name} 的主键")

try:

cursor = self.connection.cursor()

# 处理可能包含模式的表名

if '.' in table_name:

parts = table_name.split('.')

if len(parts) == 2:

schema, table = parts

else:

schema, table = 'dbo', table_name

else:

schema, table = 'dbo', table_name

cursor.execute("""

SELECT COLUMN_NAME

FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE

WHERE OBJECTPROPERTY(OBJECT_ID(CONSTRAINT_SCHEMA + '.' + QUOTENAME(CONSTRAINT_NAME)), 'IsPrimaryKey') = 1

AND TABLE_NAME = %s AND TABLE_SCHEMA = %s

ORDER BY ORDINAL_POSITION

""", (table, schema))

primary_keys = [row[0] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的主键: {primary_keys}")

return primary_keys

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "not exist" in error_msg or "not found" in error_msg or "invalid" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 主键信息时出错: {str(e)}")

def execute_query(self, query: str):

logger.info(f"执行MSSQL查询: {query}")

cursor = self.connection.cursor()

cursor.execute(query)

return cursor

def close(self):

if self.connection:

logger.info("关闭MSSQL数据库连接")

self.connection.close()

class DMAdapter(DatabaseAdapter):

"""达梦数据库适配器"""

def __init__(self):

self.connection = None

def connect(self, **kwargs):

try:

dmPython = importlib.import_module('dmPython')

except ImportError:

raise ImportError("需要安装dmPython库，参考达梦官方文档进行安装")

host = kwargs.get('host', 'localhost')

port = kwargs.get('port', 5236)

user = kwargs.get('user')

password = kwargs.get('password')

database = kwargs.get('database')

logger.info(f"连接到达梦数据库: {host}:{port}, 用户: {user}, 数据库: {database}")

# 构建连接字符串

self.connection = dmPython.connect(

user=user,

password=password,

server=host,

port=port,

schema=database

)

return self.connection

def get_table_fields(self, table_name: str) -> List[str]:

logger.info(f"获取达梦数据库表 {table_name} 的字段")

try:

dmPython = importlib.import_module('dmPython')

cursor = self.connection.cursor()

# 处理可能包含模式的表名

if '.' in table_name:

parts = table_name.split('.')

if len(parts) == 2:

schema, table = parts

else:

schema, table = None, table_name

else:

schema, table = None, table_name

if schema:

cursor.execute("""

SELECT COLUMN_NAME

FROM ALL_TAB_COLUMNS

WHERE TABLE_NAME = UPPER(?) AND OWNER = UPPER(?)

ORDER BY COLUMN_ID

""", (table, schema))

else:

cursor.execute("""

SELECT COLUMN_NAME

FROM USER_TAB_COLUMNS

WHERE TABLE_NAME = UPPER(?)

ORDER BY COLUMN_ID

""", (table,))

fields = [row[0] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的字段: {fields}")

return fields

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "not exist" in error_msg or "not found" in error_msg or "invalid" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 字段信息时出错: {str(e)}")

def get_primary_keys(self, table_name: str) -> List[str]:

"""获取达梦数据库表的主键字段"""

logger.info(f"获取达梦数据库表 {table_name} 的主键")

try:

dmPython = importlib.import_module('dmPython')

cursor = self.connection.cursor()

# 处理可能包含模式的表名

if '.' in table_name:

parts = table_name.split('.')

if len(parts) == 2:

schema, table = parts

else:

schema, table = None, table_name

else:

schema, table = None, table_name

if schema:

cursor.execute("""

SELECT COLS.COLUMN_NAME

FROM ALL_CONSTRAINTS CONS

JOIN ALL_CONS_COLUMNS COLS ON CONS.CONSTRAINT_NAME = COLS.CONSTRAINT_NAME AND CONS.OWNER = COLS.OWNER

WHERE COLS.TABLE_NAME = UPPER(?) AND COLS.OWNER = UPPER(?) AND CONS.CONSTRAINT_TYPE = 'P'

ORDER BY COLS.POSITION

""", (table, schema))

else:

cursor.execute("""

SELECT COLUMN_NAME

FROM USER_CONS_COLUMNS

WHERE TABLE_NAME = UPPER(?) AND CONSTRAINT_NAME IN (

SELECT CONSTRAINT_NAME

FROM USER_CONSTRAINTS

WHERE CONSTRAINT_TYPE = 'P'

)

ORDER BY POSITION

""", (table,))

primary_keys = [row[0] for row in cursor.fetchall()]

logger.info(f"表 {table_name} 的主键: {primary_keys}")

return primary_keys

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "not exist" in error_msg or "not found" in error_msg or "invalid" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 主键信息时出错: {str(e)}")

def execute_query(self, query: str):

logger.info(f"执行达梦数据库查询: {query}")

cursor = self.connection.cursor()

cursor.execute(query)

return cursor

def close(self):

if self.connection:

logger.info("关闭达梦数据库连接")

self.connection.close()

def get_database_adapter(db_type: str) -> DatabaseAdapter:

"""根据数据库类型获取对应的适配器"""

logger.info(f"获取数据库适配器: {db_type}")

adapters = {

'sqlite': SQLiteAdapter,

'mysql': MySQLAdapter,

'postgresql': PostgreSQLAdapter,

'oracle': OracleAdapter,

'mssql': MSSQLAdapter,

'dm': DMAdapter

}

if db_type not in adapters:

raise ValueError(f"不支持的数据库类型: {db_type}")

return adapters[db_type]()

# 自定义Action类用于处理逗号分隔的参数

class CommaSeparatedArgsAction(argparse.Action):

def __call__(self, parser, namespace, values, option_string=None):

if isinstance(values, str):

# 如果输入是字符串，则按逗号分割

values = [v.strip() for v in values.split(',') if v.strip()]

setattr(namespace, self.dest, values)

class TableComparator:

"""

数据库表对比工具类

支持对比同一数据库中的两个表，可以指定字段、排除字段和设置WHERE条件

"""

def __init__(self, db_adapter: DatabaseAdapter, db_adapter2: DatabaseAdapter = None):

"""

初始化对比工具

:param db_adapter: 源数据库适配器实例

:param db_adapter2: 目标数据库适配器实例（可选，默认为None表示使用同一个数据库）

"""

self.db1 = db_adapter

self.db2 = db_adapter2 if db_adapter2 is not None else db_adapter

self.table1 = None

self.table2 = None

self.fields = []

self.exclude_fields = []

self.where_condition = None

# 支持两个表的不同WHERE条件

self.where_condition1 = None

self.where_condition2 = None

logger.info("TableComparator初始化完成")

def set_tables(self, table1: str, table2: str):

"""

设置要对比的表

:param table1: 第一个表名

:param table2: 第二个表名

"""

logger.info(f"设置对比表: {table1} 和 {table2}")

self.table1 = table1

self.table2 = table2

def set_fields(self, fields: List[str]):

"""

设置要对比的字段

:param fields: 要对比的字段列表

"""

logger.info(f"设置对比字段: {fields}")

self.fields = fields

def set_exclude_fields(self, exclude_fields: List[str]):

"""

设置要排除的字段

:param exclude_fields: 要排除的字段列表

"""

logger.info(f"设置排除字段: {exclude_fields}")

self.exclude_fields = exclude_fields

def set_where_condition(self, where_condition: str):

"""

设置WHERE条件（两个表使用相同的WHERE条件）

:param where_condition: WHERE条件字符串

"""

logger.info(f"设置WHERE条件: {where_condition}")

self.where_condition = where_condition

def set_where_condition1(self, where_condition: str):

"""

设置第一个表的WHERE条件

:param where_condition: WHERE条件字符串

"""

logger.info(f"设置表 {self.table1} 的WHERE条件: {where_condition}")

self.where_condition1 = where_condition

def set_where_condition2(self, where_condition: str):

"""

设置第二个表的WHERE条件

:param where_condition: WHERE条件字符串

"""

logger.info(f"设置表 {self.table2} 的WHERE条件: {where_condition}")

self.where_condition2 = where_condition

def get_table_fields(self, table_name: str, db_index: int = 1) -> List[str]:

"""

获取表的所有字段名

:param table_name: 表名

:param db_index: 数据库索引 (1表示源数据库, 2表示目标数据库)

:return: 字段名列表

"""

logger.info(f"获取表 {table_name} 的所有字段")

try:

if db_index == 1:

fields = self.db1.get_table_fields(table_name)

else:

fields = self.db2.get_table_fields(table_name)

logger.info(f"表 {table_name} 的字段: {fields}")

return fields

except Exception as e:

# 检查是否是表不存在的错误

error_msg = str(e).lower()

if "no such table" in error_msg or "doesn't exist" in error_msg or "not found" in error_msg or "unknown" in error_msg or "表" in error_msg:

raise ValueError(f"表 '{table_name}' 不存在")

else:

raise RuntimeError(f"获取表 '{table_name}' 字段信息时出错: {str(e)}")

def get_comparison_fields(self) -> List[str]:

"""

获取最终要对比的字段列表

:return: 对比字段列表

"""

logger.info("获取对比字段列表")

# 如果用户指定了字段，则直接使用

if self.fields:

logger.info(f"使用指定的字段: {self.fields}")

# 检查指定的字段是否在两个表中都存在

try:

fields1 = self.get_table_fields(self.table1, 1)

fields2 = self.get_table_fields(self.table2, 2)

except Exception as e:

logger.error(f"获取表字段时出错: {str(e)}")

raise

# 检查每个指定的字段是否存在于两个表中

missing_in_table1 = [f for f in self.fields if f not in fields1]

missing_in_table2 = [f for f in self.fields if f not in fields2]

if missing_in_table1:

raise ValueError(f"字段 {', '.join(missing_in_table1)} 在表 '{self.table1}' 中不存在")

if missing_in_table2:

raise ValueError(f"字段 {', '.join(missing_in_table2)} 在表 '{self.table2}' 中不存在")

comparison_fields = list(self.fields)

else:

# 否则获取两个表的公共字段

logger.info(f"获取表 {self.table1} 的字段")

fields1 = self.get_table_fields(self.table1, 1)

logger.info(f"获取表 {self.table2} 的字段")

fields2 = self.get_table_fields(self.table2, 2)

# 获取公共字段

common_fields = list(set(fields1) & set(fields2))

logger.info(f"两个表的公共字段: {common_fields}")

# 如果有排除字段，则移除它们

if self.exclude_fields:

logger.info(f"排除字段: {self.exclude_fields}")

# 检查要排除的字段是否存在于公共字段中

existing_exclude_fields = [f for f in self.exclude_fields if f in common_fields]

non_existing_exclude_fields = [f for f in self.exclude_fields if f not in common_fields]

if non_existing_exclude_fields:

logger.warning(f"要排除的字段 {', '.join(non_existing_exclude_fields)} 不存在于两个表的公共字段中")

common_fields = [f for f in common_fields if f not in existing_exclude_fields]

logger.info(f"排除后剩余字段: {common_fields}")

comparison_fields = common_fields

logger.info(f"最终对比字段: {comparison_fields}")

# 如果表有主键但主键不在比较字段中，则添加主键字段

# 仅当用户没有指定字段时才添加主键，如果用户指定了字段，则完全按照用户指定的字段进行比较

if not self.fields:

primary_keys = self.db1.get_primary_keys(self.table1)

if primary_keys:

for pk in primary_keys:

if pk not in comparison_fields:

logger.info(f"添加主键字段 {pk} 到比较字段中")

comparison_fields.append(pk)

return comparison_fields

def build_query(self, fields: List[str], table_name: str, db_index: int = 1) -> str:

"""

构建查询SQL

:param fields: 字段列表

:param table_name: 表名

:param db_index: 数据库索引 (1表示源数据库, 2表示目标数据库)

:return: 查询SQL语句

"""

logger.info(f"为表 {table_name} 构建查询，字段: {fields}")

# 获取主键字段

primary_keys = self.db1.get_primary_keys(table_name) if db_index == 1 else self.db2.get_primary_keys(table_name)

# 确保主键字段包含在查询字段中，以避免KeyError

query_fields = list(fields)

for pk in primary_keys:

if pk not in query_fields:

query_fields.append(pk)

field_list = ', '.join(query_fields)

query = f"SELECT {field_list} FROM {table_name}"

# 添加WHERE条件，优先使用特定表的WHERE条件

where_condition = None

logger.info(f"表名匹配详情 - 当前表名: '{table_name}', self.table1: '{self.table1}', self.table2: '{self.table2}'")

logger.info(f"WHERE条件值 - where_condition1: {self.where_condition1}, where_condition2: {self.where_condition2}, where_condition: {self.where_condition}")

# 改进表名匹配逻辑，处理可能的模式名前缀

# 确保表名不为空

if self.table1 and self.table2:

table1_name = self.table1.split('.')[-1] # 获取表名部分（去掉模式前缀）

table2_name = self.table2.split('.')[-1] # 获取表名部分（去掉模式前缀）

current_table_name = table_name.split('.')[-1] # 获取当前表名部分（去掉模式前缀）

logger.info(f"简化表名匹配 - 当前表名: '{current_table_name}', 表1名: '{table1_name}', 表2名: '{table2_name}'")

if current_table_name == table1_name:

where_condition = self.where_condition1 or self.where_condition

logger.info(f"匹配到表1，使用表1的WHERE条件: {where_condition}")

elif current_table_name == table2_name:

where_condition = self.where_condition2 or self.where_condition

logger.info(f"匹配到表2，使用表2的WHERE条件: {where_condition}")

else:

where_condition = self.where_condition

logger.info(f"未匹配到特定表，使用通用WHERE条件: {where_condition}")

else:

where_condition = self.where_condition

logger.info(f"表名未设置，使用通用WHERE条件: {where_condition}")

if where_condition:

query += f" WHERE {where_condition}"

logger.info(f"最终添加的WHERE条件: {where_condition}")

# 添加ORDER BY主键

if primary_keys:

order_by_fields = ', '.join(primary_keys)

query += f" ORDER BY {order_by_fields}"

logger.info(f"添加ORDER BY主键: {order_by_fields}")

# 如果没有主键，使用所有字段进行排序

else:

# 为PostgreSQL添加ORDER BY以确保结果顺序一致

db = self.db1 if db_index == 1 else self.db2

if isinstance(db, PostgreSQLAdapter):

order_by_fields = ', '.join(query_fields)

query += f" ORDER BY {order_by_fields}"

logger.info(f"添加ORDER BY所有字段: {order_by_fields}")

# 为其他数据库也添加排序以确保一致性

else:

# 对于非PostgreSQL数据库，如果有主键就按主键排序，否则不强制排序

pass

logger.info(f"构建完成的查询: {query}")

return query

def compare(self) -> Dict[str, Any]:

"""

执行表对比

:return: 对比结果

"""

try:

logger.info("开始执行表对比")

# 获取两个表的所有字段

logger.info(f"获取表 {self.table1} 的字段")

fields1 = self.get_table_fields(self.table1, 1)

logger.info(f"获取表 {self.table2} 的字段")

fields2 = self.get_table_fields(self.table2, 2)

# 只有在用户没有指定字段且没有指定排除字段时，才检查字段一致性

if not self.fields and not self.exclude_fields:

# 检查字段是否完全一致

if set(fields1) != set(fields2):

# 获取公共字段

common_fields = list(set(fields1) & set(fields2))

# 字段不一致，返回字段不匹配

logger.warning(f"表 {self.table1} 和 {self.table2} 字段不一致")

# 获取差异字段

only_in_table1 = list(set(fields1) - set(fields2))

only_in_table2 = list(set(fields2) - set(fields1))

result = {

'fields': [],

'table1_row_count': 0,

'table2_row_count': 0,

'differences': [{

'type': 'field_mismatch',

'message': f'表 {self.table1} 和 {self.table2} 字段不一致',

'details': {

'table1_fields': fields1,

'table2_fields': fields2,

'only_in_table1': only_in_table1,

'only_in_table2': only_in_table2,

'common_fields': common_fields

}

}],

'row_differences': [],

'table1_fields': fields1,

'table2_fields': fields2,

'only_in_table1': only_in_table1,

'only_in_table2': only_in_table2,

'common_fields': common_fields

}

return result

# 获取要对比的字段

logger.info("获取对比字段")

comparison_fields = self.get_comparison_fields()

logger.info(f"对比字段: {comparison_fields}")

# 如果指定了字段或有公共字段，但最终没有可对比字段

if not comparison_fields:

logger.error("没有找到可对比的字段")

raise ValueError("没有找到可对比的字段")

# 构建查询语句

logger.info("构建查询语句")

query1 = self.build_query(comparison_fields, self.table1, 1)

query2 = self.build_query(comparison_fields, self.table2, 2)

# 执行查询获取游标，但不立即获取所有数据

logger.info("执行查询1")

cursor1 = self.db1.execute_query(query1)

logger.info("执行查询2")

cursor2 = self.db2.execute_query(query2)

# 获取主键字段

primary_keys1 = self.db1.get_primary_keys(self.table1)

primary_keys2 = self.db2.get_primary_keys(self.table2)

common_primary_keys = list(set(primary_keys1) & set(primary_keys2))

# 准备结果

result = {

'fields': comparison_fields,

'table1_row_count': 0,

'table2_row_count': 0,

'differences': [],

'row_differences': [],

'table1_fields': fields1, # 添加表1的所有字段

'table2_fields': fields2 # 添加表2的所有字段

}

# 如果两个表都有主键，且主键字段一致，并且主键字段在比较字段中，则按主键进行匹配对比

if common_primary_keys and all(pk in comparison_fields for pk in common_primary_keys):

logger.info(f"使用主键 {common_primary_keys} 进行匹配对比")

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

table_diff.py

Latest commit

History

table_diff.py

File metadata and controls