""" You need a couple of packages to query the database and write a CSV file """ import psycopg2 import csv """ You will need this database name and host information to create a connection to the database """ database_config = { 'dbname': 'cipo', 'host': 'idb1', } # This is a list of corporate entities we are searching for who are patent owners (assigneees). Feel free to edit the names to find patent documents owned by companies you are interested in companies = [ 'MAZDA MOTOR CORPORATION', 'TOYOTA JIDOSHA KABUSHIKI KAISHA', 'SUBARU CORPORATION', ] # This section of code uses the psycopg2 package to connect to the database con = psycopg2.connect(**database_config) cur = con.cursor() con.set_client_encoding('UTF8') # This executes a SQL statement that creates a temporary table with our list of corporate entities cur.execute('CREATE TEMPORARY TABLE _company (coname TEXT)') for coname in companies: cur.execute(u'INSERT INTO _company VALUES (%s)', (coname,)) # This SQL statement joins our list of corporations with the database assigneee table to filter the results to only the companies we are looking for. This is a more efficient approach than looping through companies and running multiple SELECT statements cur.execute("SELECT doc_id, name FROM assignee INNER JOIN _company ON assignee.name ILIKE '%'||_company.coname||'%'") # This next section of code goes line by line through the results and adds them to a dictionary data type in python, where the patent document id is the key and the name of the company is the value. It also prints it out so you can see the data. mylist = dict() while result := cur.fetchone(): print(result) mylist[result[0]] = result[1] # This next section sets up a CSV that we will use to store the results of our final query with open('mycipopythonresults.csv', mode='w', encoding='ascii', newline='') as csv_file: myheader = ['id','title'] writer = csv.writer(csv_file) writer.writerow(myheader) # This section goes through each item in the dictionary that we created earlier. For each key (docID), it queries the database to find the title information of their patents. Then it writes that information into the CSV file. It also prints it out so you can see the data. for x in mylist.keys(): cur.execute("SELECT doc_id, title.text_en, title.text_fr FROM title WHERE title.doc_id=%s", (x,)) while finalresult := cur.fetchone(): print(finalresult) writer.writerow(finalresult) # Finally, all the connections to the database are closed cur.close() con.close()