Bulk uploading huge amounts of data using Cx_oracle can be quite a pain. A faster way to do this would be to use SQLAlchemy to insert/update lot of entries in the database. It has a function called bulk_insert_mappings which can be used for this purpose. First you will have to define a class which mimics the structure of the table you want to insert data into. The method accepts two parameters- One is the class that was just defined. The second has a dictionary of the values you want to upload.
import sqlalchemy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, create_engine
from sqlalchemy.orm import scoped_session, sessionmaker
Base = declarative_base()
metadata = Base.metadata
session = scoped_session(sessionmaker())
engine = None
class test_table(Base):
tablename = "T_MATT_TEST_DATA"
index = Column(String(40), primary_key=True)
row_num = Column(String(40))
load_date = Column(String(40))
cyc_id = Column(String(40))
def init_db(db_name='sqlite:///test.db'):
global engine
engine = create_engine('oracle+cx_oracle://%s:%s@%s' % (db["user"], db["password"], dsn_tns))
session.remove()
session.configure(bind=engine, autoflush=False, expire_on_commit=False)
metadata.drop_all(engine)
metadata.create_all(engine)
init_db()
#df is the dataframe which has the data you want to load.
session.bulk_insert_mappings(
test_table,
[ dict(index='{}'.format(index),row_num='{}'.format(row['row_num']),load_date='{}'.format(row['load_date']),cyc_id='{}'.format(row['cyc_id'])) for index,row in df.iterrows()
])
session.commit()
If the dataframe is too large (more than 10000 rows) then its recommended to chunk the data frame into smaller chunks and upload it one chunk at a time
def chunker(seq,size):
return(seq[pos:pos+size] for pos in range(0,len(seq),size))
batch_size=10000 #test different batchsizes
for chunk_df in chunker(df, batch_size):
session.bulk_insert_mappings(
test_table,
[ dict(index='{}'.format(index),row_num='{}'.format(row['row_num']),load_date='{}'.format(row['load_date']),cyc_id='{}'.format(row['cyc_id'])) for index,row in chunk_df.iterrows() ])
session.commit()