leovega wrote:
Hi Spamart
I'm trying to load a massive data from CSV (30000 records) and me too get a
...
but the records are loaded evenly, you have to wait 1 minute per 1000 records, more and less..
I just finish bulk uploading of more than 500000 records, the speed is somewhere betwen 1000 and 4000 records per second.
While the script is running .. i open pgadmin3 connection to the server. Look into targeted databse properties , and make row count refresh each second ... there is where the speed number come from.
My trick is :
1. create new regular (not openerp specific) table, import the csv into it.
2. Prepare multi pgsql and open erp connection
3. Arrrgggh ... toi make it short .. here is my dirty code
Code:
#!/usr/bin/python
import os, psycopg2, sys, xmlrpclib, string, time
from threading import Thread
dbname = sys.stdin.read().strip() #Replace with targeted openerp's database
dbuser = 'bino'
erpuser = 'admin'
erppasswd = 'admin'
def myfunc(ctr_code,ctr_name,db_con,erp_con):
ctr_data = {
'code': ctr_code,
'name': ctr_name
}
ctrid = erp_con.execute(dbname, dbuid, erppasswd , 'pbx.country', 'create', ctr_data)
#con_a = psycopg2.connect("dbname=%(vdbname)s user=%(vdbuser)s" %con_dict)
cur_a = db_con.cursor()
cur_a.execute("SELECT code,name FROM garea WHERE ccode = %(xccode)s", dict(xccode=ctr_code))
for area in cur_a.fetchall():
area_code = area[0]
area_name = area[1]
if area[0] is None: #No Area code
area_name = ctr_name +" (All Area)" #Means its the hole country
area_data = {
'ccode': ctrid,
'code': area_code,
'name': area_name
}
area_id = erp_con.execute(dbname, dbuid, erppasswd, 'pbx.area', 'create', area_data)
cur_a.close()
#Tables for source
os.system("psql -d %s -U %s -f './prefx.sql'" % (dbname,dbuser))
#get UID
sock = xmlrpclib.ServerProxy('http://localhost:8069/xmlrpc/common',allow_none=1)
dbuid = sock.login(dbname, erpuser, erppasswd)
sock = xmlrpclib.ServerProxy('http://localhost:8069/xmlrpc/object',allow_none=1)
#("host=%s dbname=%s user=%s password=%s" % (DBHOST,DBDATABASE, DBUSER, DBPASSWORD))
#con_c = psycopg2.connect("dbname=%(v_dbname)s user=%(v_dbuser)s", serialize=0, dict(v_dbname=dbname v_dbuser=dbuser))
con_dict = {'vdbname': dbname , 'vdbuser': dbuser}
con_c = psycopg2.connect("dbname=%(vdbname)s user=%(vdbuser)s" %con_dict)
cur_c = con_c.cursor()
cur_c.execute("SELECT * FROM genctr")
#prepare multi connection
con_pool = []
erp_pool = []
for i in range(1, 7):
con_pool.append(psycopg2.connect("dbname=%(vdbname)s user=%(vdbuser)s" %con_dict))
erp_pool.append(xmlrpclib.ServerProxy('http://localhost:8069/xmlrpc/object',allow_none=1))
con_num = 0
for ctr in cur_c.fetchall():
#Add one Country to pbx_country
ctr_code = ctr[0]
ctr_name = ctr[1]
print ctr_code + " | " +ctr_name
if con_num > 5:
con_num = 0
t = Thread(target=myfunc, args=(ctr_code,ctr_name,con_pool[con_num], erp_pool[con_num])) #Struct the threads
t.start() # Start the thread
con_num += 1
cur_c.close()
I run it with
Code:
bino@erp:~/mydoc/openerp/smdr$ echo pbx01 |./uldata.py
Note (1):
pbx01 ==> is my openerp database name
uldata.py ==> the name of my script
Note(2): This script importing 2 CSV to 2 openerp tables that have one2many relationship.
Note(3) : This script is not general purpose, so it's not safe to copy paste this code ...
Sincerely
-bino-