2 years ago

#60273

test-img

MeganCole

data restructure for ggplot geom_bar() stacked bar plotting

avg data frame

structure(list(cluster = 1:10, `B cells` = c(0.0369711424087593, 
0.00526325696315245, 0.0601665087700304, 0.0231936137674591, 
0.00766480549892195, 0.0285649960414246, 0.0044030329888148, 
0.00345795624392323, 0.00309644760567017, 0.00757469580646642
), DCreg = c(0.0304752063136609, 0.174423402403555, 0.0163287878795231, 
0.0192154395050034, 0.124511133655915, 0.0296144152010606, 0.205920199256583, 
0.114542510479173, 0.485649315606826, 0.0260997195368302), `Dendritic cells` = c(0.156500506395882, 
0.0106345235402551, 0.185348445999056, 0.395476210792188, 0.0719924126421944, 
0.104614178324861, 0.0226961213600642, 0.00292885066859525, 0.0122661582750054, 
0.118394797602606), `Dendritic cells CD103` = c(0.0482626330670718, 
0.0140976438812366, 0.030373962919268, 0.0614351282717271, 0.189884617234425, 
0.35658217311524, 0.0170390739879794, 0.0042469791834164, 0.0233514821789908, 
0.0619204360724114), Endothelium = c(0.11337268119519, 0.027025412632833, 
0.43869939276274, 0.0662483745710424, 0.0331520081202891, 0.164940771021627, 
0.050135082662031, 0.00351285357934976, 0.0201434603120533, 0.0658151087814588
), Epithelium = c(0.00418217375070304, 0.000413203430326014, 
0.0104665752013841, 0.00525017082076173, 0.00415698684351819, 
0.0333637286413386, 0.000431569929321054, 0, 0.0011976402913935, 
0.000419107154908937), Fibroblasts = c(0.00612607297867521, 0.0116371963351148, 
0.0108995123396445, 0.0117009481628146, 0.00674570810846355, 
0.0145571600114712, 0.0120879220427041, 0.00272604244680674, 
0.00772202564316953, 0.0272894372187893), `Macrophages other` = c(0.00101589948056542, 
0.000645130694683314, 0, 0, 0.000639755622911849, 0, 0.000197788594031649, 
0.00136588418173722, 0, 0.000420171738310913), `Macrophages type 1` = c(0.221136736926214, 
0.0101728310491049, 0.0295121583899105, 0.0455316207473085, 0.0230660380060092, 
0.0222078529371378, 0.015179095607796, 0.00459851371158574, 0.0112212936162074, 
0.02937463664781), `Macrophages type 2` = c(0.0411011962682536, 
0.0522714029078864, 0.012334445025602, 0.0568282306829578, 0.0453391303748083, 
0.0181451496347937, 0.239616155787136, 0.0115489617356957, 0.04981525808734, 
0.462030477544264), Neutrophils = c(0.0766806635700175, 0.00442125133471751, 
0.0476726698091672, 0.0236749605376406, 0.00911361867045396, 
0.0236169696110325, 0.00537803767758349, 0.0032239571528306, 
0.00201957474248881, 0.0160311845078706), `NK cells` = c(0, 0, 
0.000108464194313773, 0, 0, 8.99698299254026e-05, 0.000114169258081956, 
0, 4.57749702462694e-05, 2.78396436525612e-05), `T cells CD4` = c(0.0330641154468336, 
0.0213946654236908, 0.0323515137814534, 0.148686432010321, 0.0500449048718068, 
0.0685338874314457, 0.0273478878575203, 0.00472971607890761, 
0.0328998359523529, 0.0354818425253482), `T cells CD8` = c(0.0172498783937768, 
0.00877876825324442, 0.0156948623402281, 0.0207354640030442, 
0.0145536348676947, 0.0146643634343241, 0.0155197086731341, 0.00171509323694132, 
0.0135851481885585, 0.0159896002840603), `T reg cells` = c(0.00451599932441037, 
0.0058712074137469, 0.00274652046695111, 0.0167445990360021, 
0.0127422536359504, 0.0142171857157357, 0.00996063310868601, 
0.00089148571457417, 0.0113706843090688, 0.00663049091849752), 
    Tumour = c(0.0765887917753441, 0.651476092235795, 0.0173767962070959, 
    0.0647526184622169, 0.395840854655601, 0.0472273714361081, 
    0.368387800802699, 0.839842321316499, 0.323145170321728, 
    0.111585860905902), Unclassified = c(0.132756302704642, 0.00147401150065844, 
    0.0899193839136316, 0.0405261886295129, 0.0105521371910369, 
    0.0590598276124738, 0.00558572040583437, 0.000668874269964592, 
    0.00247072989889988, 0.0149145931108126)), class = "data.frame", row.names = c(NA, 
-10L))
cluster     B cells      DCreg Dendritic cells Dendritic cells CD103 Endothelium   Epithelium Neutrophils     NK cells T cells CD4 T cells CD8  T reg cells     Tumour Unclassified          
1        1 0.036971142 0.03047521     0.156500506           0.048262633 0.113372681 0.0041821738 0.076680664 0.000000e+00 0.033064115 0.017249878 0.0045159993 0.07658879 0.1327563027  
2        2 0.005263257 0.17442340     0.010634524           0.014097644 0.027025413 0.0004132034 0.004421251 0.000000e+00 0.021394665 0.008778768 0.0058712074 0.65147609 0.0014740115 
3        3 0.060166509 0.01632879     0.185348446           0.030373963 0.438699393 0.0104665752 0.047672670 1.084642e-04 0.032351514 0.015694862 0.0027465205 0.01737680 0.0899193839 
4        4 0.023193614 0.01921544     0.395476211           0.061435128 0.066248375 0.0052501708 0.023674961 0.000000e+00 0.148686432 0.020735464 0.0167445990 0.06475262 0.0405261886  
5        5 0.007664805 0.12451113     0.071992413           0.189884617 0.033152008 0.0041569868 0.009113619 0.000000e+00 0.050044905 0.014553635 0.0127422536 0.39584085 0.0105521372  
6        6 0.028564996 0.02961442     0.104614178           0.356582173 0.164940771 0.0333637286 0.023616970 8.996983e-05 0.068533887 0.014664363 0.0142171857 0.04722737 0.0590598276 
7        7 0.004403033 0.20592020     0.022696121           0.017039074 0.050135083 0.0004315699 0.005378038 1.141693e-04 0.027347888 0.015519709 0.0099606331 0.36838780 0.0055857204  
8        8 0.003457956 0.11454251     0.002928851           0.004246979 0.003512854 0.0000000000 0.003223957 0.000000e+00 0.004729716 0.001715093 0.0008914857 0.83984232 0.0006688743  
9        9 0.003096448 0.48564932     0.012266158           0.023351482 0.020143460 0.0011976403 0.002019575 4.577497e-05 0.032899836 0.013585148 0.0113706843 0.32314517 0.0024707299  
10      10 0.007574696 0.02609972     0.118394798           0.061920436 0.065815109 0.0004191072 0.016031185 2.783964e-05 0.035481843 0.015989600 0.0066304909 0.11158586 0.0149145931  

I have the above data frame and am trying to create a stacked bar using ggplot geom_bar() where each bar = 1 cluster (10 clusters, so 10 bars) and each bar is filled with the proportions of each cell type contributing to a cluster (proportion values for each cluster add up to 1).

I have started by changing the layout of the data :

avgt = avg %>% pivot_longer(cols = -cluster)

Which gave me this layout:

   cluster name                  value
 1       1 B cells               0.0370 
 2       1 DCreg                 0.0305 
 3       1 Dendritic cells       0.157  
 4       1 Dendritic cells CD103 0.0483 
 5       1 Endothelium           0.113  
 6       1 Epithelium            0.00418
 7       1 Fibroblasts           0.00613
 8       1 Macrophages other     0.00102
 9       1 Macrophages type 1    0.221  
10       1 Macrophages type 2    0.0411 

However I am not sure what to do next as if I use the 'cluster' column as X and 'name' column for the 'fill' I, as expected, get equal proportions for each cell type

p = ggplot(avgt, aes(x = as.factor(cluster), fill = as.factor(name)))+
  geom_bar(position = "fill") +
  theme_classic()+
  scale_y_continuous(labels = scales::percent) +
  coord_flip() +
  theme(axis.text.y = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        axis.text=element_text(size=20)) +
  theme(legend.text = element_text(size = 20)) +
  xlab("Community")+
  ylab("Percentage distribution") +
  labs( fill = "")
p

geom_bar() stacked plot result

Any ideas of how I can get this to work?

Thanks in advance

r

ggplot2

geom-bar

0 Answers

Your Answer

Accepted video resources