sql - Why sometime adding conditions to query accelerate it in Postgresql? -
i have 2 tables, generated django orm - core_instauser
, core_instauser_followers
. please see below create table statements
query getting followers few accounts , ordering column (counts_followed_by) taking more 30 seconds:
# select # t3."id" # "core_instauser_followers" # inner join "core_instauser" t3 on ("core_instauser_followers"."to_instauser_id" = t3."id") # "core_instauser_followers"."from_instauser_id" in (14275, 30533081) # order t3."counts_followed_by" desc # limit 10; id -------- 23358 17461 34360 34201 30624 12475 306799 19215 21042 27073 (10 rows) time: 32850.160 ms
but if add conditions, not changing result, query take 0.3 seconds - faster in 100:
# select # t3."id" # "core_instauser_followers" # inner join "core_instauser" t3 on ("core_instauser_followers"."to_instauser_id" = t3."id") # ("core_instauser_followers"."from_instauser_id" in (14275, 30533081) , t3."count_media" > 0 , # t3."counts_follows" > -1 , t3."counts_followed_by" > -1) # order t3."counts_followed_by" desc # limit 10; id -------- 23358 17461 34360 34201 30624 12475 306799 19215 21042 27073 (10 rows) time: 295.934 ms
all columns in tables have indexes.
why happening?
upd
sql creating tables:
-- core_instauser create table core_instauser ( id integer not null, uid character varying(100) not null, username character varying(100) not null, full_name character varying(100) not null, profile_picture character varying(255) not null, counts_followed_by integer, counts_follows integer, count_media integer, owner_id integer, hidden boolean not null, loaded boolean not null, update_time timestamp time zone, has_avatar boolean, follow_rate double precision, deleted boolean not null, bio text not null, count_loaded_followers integer not null, has_bio boolean, has_full_name boolean, has_website boolean, website text not null ); create sequence core_instauser_id_seq start 1 increment 1 no minvalue no maxvalue cache 1; alter table core_instauser alter column id set default nextval('core_instauser_id_seq'::regclass); alter table core_instauser add constraint core_instauser_pkey primary key (id); alter table core_instauser add constraint core_instauser_uid_key unique (uid); create index core_instauser_count_media_480f209b0ba2dbd4_uniq on core_instauser using btree (count_media); create index core_instauser_counts_followed_by_33a853f6d98098dc_uniq on core_instauser using btree (counts_followed_by); create index core_instauser_counts_follows_66136283704427b2_uniq on core_instauser using btree (counts_follows); -- core_instauser_followers create table core_instauser_followers ( id integer not null, from_instauser_id integer not null, to_instauser_id integer not null ); create sequence core_instauser_followers_id_seq start 1 increment 1 no minvalue no maxvalue cache 1; alter table core_instauser_followers alter column id set default nextval('core_instauser_followers_id_seq'::regclass); alter table core_instauser_followers add constraint core_instauser_followers_from_instauser_id_to_instauser_id_key unique (from_instauser_id, to_instauser_id); alter table core_instauser_followers add constraint core_instauser_followers_pkey primary key (id); create index core_instauser_followers_f865d5f5 on core_instauser_followers using btree (from_instauser_id); create index core_instauser_followers_f9b32b2c on core_instauser_followers using btree (to_instauser_id); alter table core_instauser_followers add constraint core_in_from_instauser_id_2ac1cc9fc9c44a79_fk_core_instauser_id foreign key (from_instauser_id) references core_instauser(id) deferrable deferred; alter table core_instauser_followers add constraint core_inst_to_instauser_id_4236828dfe87cfb8_fk_core_instauser_id foreign key (to_instauser_id) references core_instauser(id) deferrable deferred;
explains of queries:
first slow query without additional conditions
# explain analyze select # t3."id" # "core_instauser_followers" # inner join "core_instauser" t3 on ("core_instauser_followers"."to_instauser_id" = t3."id") # "core_instauser_followers"."from_instauser_id" in (14275, 30533081) # order t3."counts_followed_by" desc # limit 10; limit (cost=1.13..32396.65 rows=10 width=8) (actual time=37561.457..37683.384 rows=10 loops=1) -> nested loop (cost=1.13..48956112.71 rows=15112 width=8) (actual time=37561.455..37683.369 rows=10 loops=1) -> index scan backward using core_instauser_counts_followed_by_33a853f6d98098dc_uniq on core_instauser t3 (cost=0.56..4942183.77 rows=31451512 width=8) (actual time=0.066..4153.129 rows=4492685 loops=1) -> index scan using core_instauser_followers_from_instauser_id_to_instauser_id_key on core_instauser_followers (cost=0.57..1.39 rows=1 width=4) (actual time=0.006..0.006 rows=0 loops=4492685) index cond: ((from_instauser_id = ('{14275,30533081}'::integer[])) , (to_instauser_id = t3.id)) heap fetches: 10 total runtime: 37683.475 ms (7 rows)
fast query addional conditions
# explain analyze select # t3."id" # "core_instauser_followers" # inner join "core_instauser" t3 on ("core_instauser_followers"."to_instauser_id" = t3."id") # ("core_instauser_followers"."from_instauser_id" in (14275, 30533081) , t3."count_media" > -1 , # t3."counts_follows" > -1 , t3."counts_followed_by" > -1) # order t3."counts_followed_by" desc # limit 10; limit (cost=1.13..36969.96 rows=10 width=8) (actual time=24.635..222.119 rows=10 loops=1) -> nested loop (cost=1.13..35453106.76 rows=9590 width=8) (actual time=24.633..222.100 rows=10 loops=1) -> index scan backward using core_instauser_counts_followed_by_33a853f6d98098dc_uniq on core_instauser t3 (cost=0.56..5029740.19 rows=19958436 width=8) (actual time=0.037..60.866 rows=13387 loops=1) index cond: (counts_followed_by > (-1)) filter: ((count_media > (-1)) , (counts_follows > (-1))) -> index scan using core_instauser_followers_from_instauser_id_to_instauser_id_key on core_instauser_followers (cost=0.57..1.51 rows=1 width=4) (actual time=0.009..0.009 rows=0 loops=13387) index cond: ((from_instauser_id = ('{14275,30533081}'::integer[])) , (to_instauser_id = t3.id)) heap fetches: 10 total runtime: 222.208 ms (9 rows)
upd 2
all filtered columns in second query (count_media, counts_follows, counts_followed_by) have values more or equal 0, new conditions must not affect on result
# select count(*) core_instauser counts_followed_by < 0 or count_media < 0 or counts_follows < 0; count ------- 0 (1 row) time: 5.551 ms
when added predicate, told optimizer interested in particular rows (core_instauser).
the old query had match every row in core_instauser. every rows=4492685 found had check other table match.
the new query restricts core_instauser table rows=13387 , had search other table 13387 times.
when adding predicate same amount of rows or less actual table got.
now question why different query gets same result. in case, happened rows in core_instauser table have match core_instauser_followers. answer might different.
we can compare 2 bags of balls numbers on them. 1 bag has 1,2 , 3. other bag has 1 , 2.
now join bags match.
select * bag1 join bag2 on (number) join 2 rows: 1 , 2.
now restrict bag1 balls less 3:
select * bag1 join bag2 bag1.number < 3. result not change.
Comments
Post a Comment